aboutsummaryrefslogtreecommitdiff
path: root/part2.py
diff options
context:
space:
mode:
authornunzip <np_scarh@e4-pattern-vm.europe-west4-a.c.electric-orbit-223819.internal>2018-12-02 15:17:48 +0000
committernunzip <np_scarh@e4-pattern-vm.europe-west4-a.c.electric-orbit-223819.internal>2018-12-02 15:17:48 +0000
commit37a8f636d6cfce1b930b0a99269779e497b3f0ac (patch)
treed4c5aaea305b9cf64bef2761a4a91669bc69c170 /part2.py
parentc6fbcaed9c00992692427173f22ce0d05f2391c2 (diff)
downloadvz215_np1915-37a8f636d6cfce1b930b0a99269779e497b3f0ac.tar.gz
vz215_np1915-37a8f636d6cfce1b930b0a99269779e497b3f0ac.tar.bz2
vz215_np1915-37a8f636d6cfce1b930b0a99269779e497b3f0ac.zip
Part 2 knn kmean
Diffstat (limited to 'part2.py')
-rwxr-xr-xpart2.py283
1 files changed, 283 insertions, 0 deletions
diff --git a/part2.py b/part2.py
new file mode 100755
index 0000000..299fdcd
--- /dev/null
+++ b/part2.py
@@ -0,0 +1,283 @@
+#!/usr/bin/env python
+# Author: Vasil Zlatanov, Nunzio Pucci
+# EE4 Pattern Recognition coursework
+#
+# usage: train.py [-h] -i DATA [-m EIGEN] [-M REIGEN] [-e ENSEMBLE] [-b]
+# [-R RANDOM] [-n NEIGHBORS] [-f FACES] [-c] [-s SEED]
+# [-t SPLIT] [-2] [-p] [-l] [-r RECONSTRUCT] [-cm] [-q] [-pr]
+# [-alt]
+
+import matplotlib.pyplot as plt
+from mpl_toolkits.mplot3d import Axes3D
+import sys
+import random
+import os
+import json
+import scipy.io
+from random import randint
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.neighbors import DistanceMetric
+from sklearn.cluster import KMeans
+from sklearn.decomposition import PCA
+from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler
+from sklearn.metrics import confusion_matrix
+from sklearn.metrics import accuracy_score
+import argparse
+import numpy as np
+from numpy import genfromtxt
+from numpy import linalg as LA
+from timeit import default_timer as timer
+from scipy.spatial.distance import cdist
+
+#prob query, gal train
+def re_ranking(probFea,galFea,k1,k2,lambda_value, MemorySave = False, Minibatch = 2000):
+
+ query_num = probFea.shape[0]
+ all_num = query_num + galFea.shape[0]
+ feat = np.append(probFea,galFea,axis = 0)
+ feat = feat.astype(np.float16)
+ print('computing original distance')
+ if MemorySave:
+ original_dist = np.zeros(shape = [all_num,all_num],dtype = np.float16)
+ i = 0
+ while True:
+ it = i + Minibatch
+ if it < np.shape(feat)[0]:
+ original_dist[i:it,] = np.power(cdist(feat[i:it,],feat),2).astype(np.float16)
+ else:
+ original_dist[i:,:] = np.power(cdist(feat[i:,],feat),2).astype(np.float16)
+ break
+ i = it
+ else:
+ original_dist = cdist(feat,feat).astype(np.float16)
+ original_dist = np.power(original_dist,2).astype(np.float16)
+ del feat
+ gallery_num = original_dist.shape[0]
+ original_dist = np.transpose(original_dist/np.max(original_dist,axis = 0))
+ V = np.zeros_like(original_dist).astype(np.float16)
+ initial_rank = np.argsort(original_dist).astype(np.int32)
+
+
+ print('starting re_ranking')
+ for i in range(all_num):
+ # k-reciprocal neighbors
+ forward_k_neigh_index = initial_rank[i,:k1+1]
+ backward_k_neigh_index = initial_rank[forward_k_neigh_index,:k1+1]
+ fi = np.where(backward_k_neigh_index==i)[0]
+ k_reciprocal_index = forward_k_neigh_index[fi]
+ k_reciprocal_expansion_index = k_reciprocal_index
+ for j in range(len(k_reciprocal_index)):
+ candidate = k_reciprocal_index[j]
+ candidate_forward_k_neigh_index = initial_rank[candidate,:int(np.around(k1/2))+1]
+ candidate_backward_k_neigh_index = initial_rank[candidate_forward_k_neigh_index,:int(np.around(k1/2))+1]
+ fi_candidate = np.where(candidate_backward_k_neigh_index == candidate)[0]
+ candidate_k_reciprocal_index = candidate_forward_k_neigh_index[fi_candidate]
+ if len(np.intersect1d(candidate_k_reciprocal_index,k_reciprocal_index))> 2/3*len(candidate_k_reciprocal_index):
+ k_reciprocal_expansion_index = np.append(k_reciprocal_expansion_index,candidate_k_reciprocal_index)
+
+ k_reciprocal_expansion_index = np.unique(k_reciprocal_expansion_index)
+ weight = np.exp(-original_dist[i,k_reciprocal_expansion_index])
+ V[i,k_reciprocal_expansion_index] = weight/np.sum(weight)
+ original_dist = original_dist[:query_num,]
+ if k2 != 1:
+ V_qe = np.zeros_like(V,dtype=np.float16)
+ for i in range(all_num):
+ V_qe[i,:] = np.mean(V[initial_rank[i,:k2],:],axis=0)
+ V = V_qe
+ del V_qe
+ del initial_rank
+ invIndex = []
+ for i in range(gallery_num):
+ invIndex.append(np.where(V[:,i] != 0)[0])
+
+ jaccard_dist = np.zeros_like(original_dist,dtype = np.float16)
+
+ for i in range(query_num):
+ temp_min = np.zeros(shape=[1,gallery_num],dtype=np.float16)
+ indNonZero = np.where(V[i,:] != 0)[0]
+ indImages = []
+ indImages = [invIndex[ind] for ind in indNonZero]
+ for j in range(len(indNonZero)):
+ temp_min[0,indImages[j]] = temp_min[0,indImages[j]]+ np.minimum(V[i,indNonZero[j]],V[indImages[j],indNonZero[j]])
+ jaccard_dist[i] = 1-temp_min/(2-temp_min)
+
+ final_dist = jaccard_dist*(1-lambda_value) + original_dist*lambda_value
+ del original_dist
+ del V
+ del jaccard_dist
+ final_dist = final_dist[:query_num,query_num:]
+
+ return final_dist
+
+def draw_results(args, target_test, target_pred):
+ acc_sc = accuracy_score(target_test, target_pred)
+ cm = confusion_matrix(target_test, target_pred)
+ print('Accuracy: ', acc_sc)
+ if (args.conf_mat):
+ plt.matshow(cm, cmap='Blues')
+ plt.colorbar()
+ plt.ylabel('Actual')
+ plt.xlabel('Predicted')
+ plt.show()
+ return
+
+def test_model(train_data, test_data, target_train, target_test, args):
+ classifier = KNeighborsClassifier(n_neighbors=args.neighbors, metric='euclidean')
+# else:
+# S = LA.inv(np.cov(train_data, rowvar=False))
+# print(S.shape)
+# classifier = KNeighborsClassifier(n_neighbors=args.neighbors, metric='mahalanobis', metric_params={'VI':S})
+ classifier.fit(train_data, target_train)
+ target_pred = classifier.predict(test_data)
+ dist, nn_idx = classifier.kneighbors(test_data)
+ #USE NN_IDX TO RECOVER NEIGHBORS
+ return target_pred
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("-R", "--random", help="Number of eigen value to randomise", type=int)
+ parser.add_argument("-n", "--neighbors", help="How many neighbors to use", type=int, default = 1)
+ parser.add_argument("-c", "--principal", help="Show principal components", action='store_true')
+ parser.add_argument("-s", "--seed", help="Seed to use", type=int, default=0)
+ parser.add_argument("-t", "--split", help="Fractoin of data to use for testing", type=float, default=0.3)
+ parser.add_argument("-2", "--grapheigen", help="Swow 2D graph of targets versus principal components",
+ action='store_true')
+ parser.add_argument("-cm", "--conf_mat", help="Show visual confusion matrix", action='store_true')
+ parser.add_argument("-q", "--pca_r", help="Use Reduced PCA", action='store_true')
+ parser.add_argument("-pr", "--prob", help="Certainty on each guess", action='store_true')
+ parser.add_argument("-km", "--kmean", help="Perform Kmeans", action='store_true', default=0)
+ parser.add_argument("-ma", "--mala", help="Perform Mahalanobis Distance metric", action='store_true', default=0)
+ parser.add_argument("-e", "--eucl", help="Standard euclidean", action='store_true', default=0)
+ parser.add_argument("-ka", "--reranka", help="Parameter 1 for Rerank", type=int, default = 20)
+ parser.add_argument("-kb", "--rerankb", help="Parameter 2 for rerank", type=int, default = 6)
+ args = parser.parse_args()
+
+ ###PART2 INPUT DATA
+ mat = scipy.io.loadmat('data/cuhk03_new_protocol_config_labeled.mat')
+ camId = mat['camId']
+ filelist = mat['filelist']
+ gallery_idx = mat['gallery_idx']
+ labels = mat['labels']
+ query_idx = mat['query_idx']
+ train_idx = mat['train_idx']
+ with open("data/feature_data.json", "r") as read_file:
+ feature_vectors = np.array(json.load(read_file))
+
+ query_cam_1 = 0
+ for i in range(query_idx.size):
+ if camId[query_idx[i]] == 1:
+ query_cam_1 = query_cam_1 + 1
+ query_cam_2 = query_idx.size - query_cam_1
+
+ train_cam_1 = 0
+ for i in range(gallery_idx.size):
+ if camId[gallery_idx[i]] == 1:
+ train_cam_1 = train_cam_1 + 1
+ train_cam_2 = gallery_idx.size - train_cam_1
+
+ train_data_1 = np.zeros(((train_cam_1),(feature_vectors.shape[1])))
+ train_label_1 = np.zeros(train_cam_1)
+ test_data_1 = np.zeros(((query_cam_1),(feature_vectors.shape[1])))
+ test_label_1 = np.zeros(query_cam_1)
+
+ train_data_2 = np.zeros(((train_cam_2),(feature_vectors.shape[1])))
+ train_label_2 = np.zeros(train_cam_2)
+ test_data_2 = np.zeros(((query_cam_2),(feature_vectors.shape[1])))
+ test_label_2 = np.zeros(query_cam_2)
+
+ i_1 = 0
+ i_2 = 0
+ for i in range(gallery_idx.size):
+ if camId[gallery_idx[i]] == 1:
+ train_data_1[i_1] = feature_vectors[gallery_idx[i]]
+ i_1 = i_1 + 1
+ else:
+ train_data_2[i_2] = feature_vectors[gallery_idx[i]]
+ i_2 = i_2 + 1
+ i_1 = 0
+ i_2 = 0
+ for i in range(query_idx.size):
+ if camId[query_idx[i]] == 1:
+ test_data_1[i_1] = feature_vectors[query_idx[i]]
+ i_1 = i_1 + 1
+ else:
+ test_data_2[i_2] = feature_vectors[query_idx[i]]
+ i_2 = i_2 + 1
+ i_1 = 0
+ i_2 = 0
+ for i in range(gallery_idx.size):
+ if camId[gallery_idx[i]] == 1:
+ train_label_1[i_1] = labels[gallery_idx[i]]
+ i_1 = i_1 + 1
+ else:
+ train_label_2[i_2] = labels[gallery_idx[i]]
+ i_2 = i_2 + 1
+ i_1 = 0
+ i_2 = 0
+ for i in range(query_idx.size):
+ if camId[query_idx[i]] == 1:
+ test_label_1[i_1] = labels[query_idx[i]]
+ i_1 = i_1 + 1
+ else:
+ test_label_2[i_2] = labels[query_idx[i]]
+ i_2 = i_2 + 1
+
+ if (args.mala):
+ final_dist = re_ranking(test_data_1, train_data_2, args.reranka, args.rerankb, 0.3)
+ target_pred = np.zeros(final_dist.shape[0])
+ for i in range(test_label_1.size):
+ target_pred[i] = train_label_2[np.argmin(final_dist[i])]
+ draw_results(args, test_label_1, target_pred)
+
+ final_dist2 = re_ranking(test_data_2, train_data_1, args.reranka, args.rerankb, 0.3)
+ target_pred2 = np.zeros(final_dist2.shape[0])
+ for i in range(test_label_2.size):
+ target_pred2[i] = train_label_1[np.argmin(final_dist2[i])]
+ draw_results(args, test_label_2, target_pred2)
+
+ elif(args.kmean):
+ km_labels_1 = np.arange(1,np.max(labels)+1)
+ km_labels_2 = np.arange(1,np.max(labels)+1)
+ km_train_data_1 = np.zeros(((km_labels_1.size),(feature_vectors.shape[1])))
+ km_train_data_2 = np.zeros(((km_labels_2.size),(feature_vectors.shape[1])))
+ km_train_data_1 = KMeans(n_clusters=int(np.max(labels)),random_state=0).fit(train_data_1)
+ km_train_data_2 = KMeans(n_clusters=int(np.max(labels)),random_state=0).fit(train_data_2)
+
+ km_idx_1 = km_train_data_1.labels_
+ for i in range(np.max(labels)):
+ class_vote = np.zeros(np.max(labels))
+ for q in range(km_idx_1.size):
+ if km_idx_1[q]==i:
+ class_vote[int(train_label_1[q])-1] = class_vote[int(train_label_1[q])-1] + 1
+ km_labels_1[i] = np.argmax(class_vote) + 1
+
+ target_pred = test_model(km_train_data_1.cluster_centers_, test_data_2, km_labels_1, test_label_2, args)
+ draw_results(args, test_label_2, target_pred)
+
+ km_idx_2 = km_train_data_2.labels_
+ for i in range(np.max(labels)):
+ class_vote = np.zeros(np.max(labels))
+ for q in range(km_idx_2.size):
+ if km_idx_2[q]==i:
+ class_vote[int(train_label_2[q])-1] = class_vote[int(train_label_2[q])-1] + 1
+ km_labels_2[i] = np.argmax(class_vote) + 1
+
+ target_pred = test_model(km_train_data_2.cluster_centers_, test_data_1, km_labels_2, test_label_1, args)
+ draw_results(args, test_label_1, target_pred)
+
+ elif(args.eucl):
+ target_pred = test_model(train_data_2, test_data_1, train_label_2, test_label_1, args)
+ draw_results(args, test_label_1, target_pred)
+ target_pred = test_model(train_data_1, test_data_2, train_label_1, test_label_2, args)
+ draw_results(args, test_label_2, target_pred)
+
+
+ print('N-Query from cam 1:', test_data_1.shape)
+ print('N-Query from cam 2:', test_data_2.shape)
+ print('Complete')
+
+if __name__ == "__main__":
+ main()
+ \ No newline at end of file