From 925176b50390c5eba974d3609abc203527ae8ba6 Mon Sep 17 00:00:00 2001 From: Vasil Zlatanov Date: Wed, 5 Dec 2018 18:23:40 +0000 Subject: Move to lib folder and fix kmeans --- evaluate.py | 197 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ kmean.py | 62 ------------------ lib/kmean.py | 48 ++++++++++++++ lib/rerank.py | 82 ++++++++++++++++++++++++ rerank.py | 82 ------------------------ 5 files changed, 327 insertions(+), 144 deletions(-) create mode 100755 evaluate.py delete mode 100644 kmean.py create mode 100644 lib/kmean.py create mode 100644 lib/rerank.py delete mode 100644 rerank.py diff --git a/evaluate.py b/evaluate.py new file mode 100755 index 0000000..ace647f --- /dev/null +++ b/evaluate.py @@ -0,0 +1,197 @@ +#!/usr/bin/env python +# Author: Vasil Zlatanov, Nunzio Pucci +# EE4 Pattern Recognition coursework +# +# usage: part2.py [-h] [-t] [-cm] [-km] [-ma] [-e] [-r] [-ka RERANKA] +# [-kb RERANKB] [-v] + +import matplotlib.pyplot as plt +from mpl_toolkits.mplot3d import Axes3D +import sys +import random +import os +import json +import scipy.io +from random import randint +from sklearn.neighbors import KNeighborsClassifier +from sklearn.neighbors import DistanceMetric +from sklearn.cluster import KMeans +from sklearn.decomposition import PCA +from sklearn.discriminant_analysis import LinearDiscriminantAnalysis +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import StandardScaler +from sklearn.metrics import confusion_matrix +from sklearn.metrics import accuracy_score +import argparse +import numpy as np +from numpy import genfromtxt +from numpy import linalg as LA +from timeit import default_timer as timer +from scipy.spatial.distance import cdist +sys.path.append('lib') +from rerank import re_ranking +from kmean import create_kmean_clusters + +parser = argparse.ArgumentParser() +parser.add_argument("-t", "--train", help="Use test data instead of query", action='store_true') +parser.add_argument("-c", "--conf_mat", help="Show visual confusion matrix", action='store_true') +parser.add_argument("-k", "--kmean", help="Perform Kmeans", action='store_true', default=0) +parser.add_argument("-m", "--mahalanobis", help="Perform Mahalanobis Distance metric", action='store_true', default=0) +parser.add_argument("-e", "--euclidean", help="Standard euclidean", action='store_true', default=0) +parser.add_argument("-r", "--rerank", help="Use k-reciprocal rernaking", action='store_true') +parser.add_argument("-p", "--reranka", help="Parameter 1 for Rerank", type=int, default = 20) +parser.add_argument("-q", "--rerankb", help="Parameter 2 for rerank", type=int, default = 6) +parser.add_argument("-l", "--rerankl", help="Coefficient to combine distances", type=int, default = 0.3) +parser.add_argument("-n", "--neighbors", help="Number of neighbors", type=int, default = 1) +parser.add_argument("-v", "--verbose", help="Use verbose output", action='store_true') +parser.add_argument("-s", "--showrank", help="Save ranklist pic id in a txt file", type=int, default = 0) +parser.add_argument("-2", "--graphspace", help="Graph space", action='store_true', default=0) +parser.add_argument("-1", "--normalise", help="Normalized features", action='store_true', default=0) +parser.add_argument("-M", "--multrank", help="Run for different ranklist sizes equal to M", type=int, default=1) +parser.add_argument("-C", "--comparison", help="Set to 2 to obtain a comparison of baseline and Improved metric", type=int, default=1) +parser.add_argument("--data", help="Data folder with features data", default='data') + + +args = parser.parse_args() + +def verbose(*text): + if args.verbose: + print(text) + +def draw_results(test_label, pred_label): + acc_sc = accuracy_score(test_label, pred_label) + cm = confusion_matrix(test_label, pred_label) + print('Accuracy: ', acc_sc) + if (args.conf_mat): + plt.matshow(cm, cmap='Blues') + plt.colorbar() + plt.ylabel('Actual') + plt.xlabel('Predicted') + plt.show() + return acc_sc + +def test_model(gallery_data, probe_data, gallery_label, probe_label, gallery_cam, probe_cam, showfiles_train, showfiles_test, args): + + verbose("probe shape:", probe_data.shape) + verbose("gallery shape:", gallery_data.shape) + + if args.rerank: + distances = re_ranking(probe_data, gallery_data, + args.reranka ,args.rerankb , 0.3, + MemorySave = False, Minibatch = 2000) + else: + if args.mahalanobis: + # metric = 'jaccard' is also valid + distances = cdist(probe_data, gallery_data, 'sqeuclidean') + else: + distances = cdist(probe_data, gallery_data, 'euclidean') + + ranklist = np.argsort(distances, axis=1) + + test_table = np.arange(1, args.multrank+1) + target_pred = np.zeros((args.multrank, ranklist.shape[0])) + nsize = args.neighbors + if (args.multrank != 1): + nsize = test_table[args.multrank-1] + nneighbors = np.zeros((ranklist.shape[0],nsize)) + nnshowrank = (np.zeros((ranklist.shape[0],nsize))).astype(object) + + + for i in range(args.multrank): + if args.multrank!= 1: + args.neighbors = test_table[i] + for probe_idx in range(probe_data.shape[0]): + row = ranklist[probe_idx] + n = 0 + q = 0 + while (q < args.neighbors): + while (probe_cam[probe_idx] == gallery_cam[row[n]] and + probe_label[probe_idx] == gallery_label[row[n]]): + n += 1 + nneighbors[probe_idx][q] = gallery_label[row[n]] + nnshowrank[probe_idx][q] = showfiles_train[row[n]] # + q += 1 + n += 1 + + if (args.neighbors) and (probe_label[probe_idx] in nneighbors[probe_idx]): + target_pred[i][probe_idx] = probe_label[probe_idx] + else: + target_pred[i][probe_idx] = nneighbors[probe_idx][0] + + + if (args.showrank): + with open("ranklist.txt", "w") as text_file: + text_file.write(np.array2string(nnshowrank[:args.showrank])) + with open("query.txt", "w") as text_file: + text_file.write(np.array2string(showfiles_test[:args.showrank])) + + if args.graphspace: + # Colors for distinct individuals + cols = ['#{:06x}'.format(randint(0, 0xffffff)) for i in range(1467)] + gallery_label_tmp = np.subtract(gallery_label, 1) + pltCol = [cols[int(k)] for k in gallery_label_tmp] + fig = plt.figure() + ax = fig.add_subplot(111, projection='3d') + ax.scatter(gallery_data[:, 0], gallery_data[:, 1], gallery_data[:, 2], marker='o', color=pltCol) + plt.show() + return target_pred + +def main(): + mat = scipy.io.loadmat(os.path.join(args.data,'cuhk03_new_protocol_config_labeled.mat')) + camId = mat['camId'] + filelist = mat['filelist'] + labels = mat['labels'] + gallery_idx = mat['gallery_idx'] - 1 + query_idx = mat['query_idx'] - 1 + train_idx = mat['train_idx'] - 1 + with open(os.path.join(args.data,'feature_data.json'), 'r') as read_file: + feature_vectors = np.array(json.load(read_file)) + if args.train: + query_idx = train_idx.reshape(train_idx.shape[0]) + gallery_idx = train_idx.reshape(train_idx.shape[0]) + else: + query_idx = query_idx.reshape(query_idx.shape[0]) + gallery_idx = gallery_idx.reshape(gallery_idx.shape[0]) + camId = camId.reshape(camId.shape[0]) + + showfiles_train = filelist[gallery_idx] + showfiles_test = filelist[query_idx] + train_data = feature_vectors[gallery_idx] + test_data = feature_vectors[query_idx] + train_label = labels[gallery_idx] + test_label = labels[query_idx] + train_cam = camId[gallery_idx] + test_cam = camId[query_idx] + + accuracy = np.zeros((2, args.multrank)) + test_table = np.arange(1, args.multrank+1) + + if (args.normalise): + train_data = np.divide(train_data,LA.norm(train_data, axis=0)) + test_data = np.divide(test_data, LA.norm(test_data, axis=0)) + if(args.kmean): + train_data, train_label, train_cam = create_kmean_clusters(feature_vectors, + labels, + gallery_idx, + camId) + for q in range(args.comparison): + target_pred = test_model(train_data, test_data, train_label, test_label, train_cam, test_cam, showfiles_train, showfiles_test, args) + for i in range(args.multrank): + accuracy[q][i] = draw_results(test_label, target_pred[i]) + args.rerank = True + args.neighbors = 1 + + if(args.multrank != 1): + plt.plot(test_table[:(args.multrank)], 100*accuracy[0]) + if(args.comparison!=1): + plt.plot(test_table[:(args.multrank)], 100*accuracy[1]) + plt.legend(['Baseline kNN', 'Improved metric'], loc='upper left') + plt.xlabel('k rank') + plt.ylabel('Recognition Accuracy (%)') + plt.grid(True) + plt.show() + + +if __name__ == "__main__": + main() + diff --git a/kmean.py b/kmean.py deleted file mode 100644 index 4e9d03e..0000000 --- a/kmean.py +++ /dev/null @@ -1,62 +0,0 @@ -import matplotlib.pyplot as plt -from mpl_toolkits.mplot3d import Axes3D -import sys -import random -import os -import json -import scipy.io -from random import randint -from sklearn.neighbors import KNeighborsClassifier -from sklearn.neighbors import DistanceMetric -from sklearn.cluster import KMeans -from sklearn.decomposition import PCA -from sklearn.discriminant_analysis import LinearDiscriminantAnalysis -from sklearn.model_selection import train_test_split -from sklearn.preprocessing import StandardScaler -from sklearn.metrics import confusion_matrix -from sklearn.metrics import accuracy_score -import argparse -import numpy as np -from numpy import genfromtxt -from numpy import linalg as LA -from timeit import default_timer as timer -from scipy.spatial.distance import cdist -from rerank import re_ranking - -def create_kmean_clusters(feature_vectors, labels, gallery_idx, camId): - for i in range(gallery_idx.size): - cam = camId[gallery_idx[i]] - 1 - gallery[cam].append(feature_vectors[gallery_idx[i]]) - gallerylab[cam].append(labels[gallery_idx[i]]) - - - train = np.array(gallery) - tlabel = np.array(gallerylab) - - for i in range(2): - km_train_data[i] = KMeans(n_clusters=int(np.max(labels)),random_state=0).fit(train[i]) - km_labels[i] = np.zeros(int(np.max(labels))) # clusters size - km_idx[i] = km_train_data[i].labels_ - - for j in range(np.max(labels)): - class_vote = np.zeros(np.max(labels)) - for q in range(km_idx[i].size): - if km_idx[i][q]==j: - class_vote[int(tlabel[i][q])-1] += 1 - km_labels[i][j] = np.argmax(class_vote) + 1 - - #MERGE CLUSTERS - cl = [] - cllab = [] - clustercam[0] = np.ones(km_labels_1.size) - clustercam[1] = np.add(np.ones(km_labels_2.size), 1) - for j in range(2): - for j in range(km_labels_1.size): - cl.append(km_train_data[i].cluster_centers_[j]) - cllab.append(km_labels[i][j]) - - train_data = np.array(cl) - train_label = np.array(cllab) - train_cam = np.concatenate(clustercam, axis=1) - return train_data, train_label, train_cam - diff --git a/lib/kmean.py b/lib/kmean.py new file mode 100644 index 0000000..2e34702 --- /dev/null +++ b/lib/kmean.py @@ -0,0 +1,48 @@ +import numpy as np +from sklearn.cluster import KMeans + +def create_kmean_clusters(feature_vectors, labels, gallery_idx, camId): + gallery = ([],[]) + gallerylab = ([],[]) + for i in range(gallery_idx.size): + cam = camId[gallery_idx[i]] - 1 + gallery[cam].append(feature_vectors[gallery_idx[i]]) + gallerylab[cam].append(labels[gallery_idx[i]]) + + + train = np.array(gallery) + tlabel = np.array(gallerylab) + + km_train_data = [] + km_idx = [] + km_labels = [] + + for i in range(2): + km_train_data.append(KMeans(n_clusters=int(np.max(labels)),random_state=0).fit(train[i])) + km_idx.append(km_train_data[i].labels_) + km_labels.append(range(np.max(labels))) + for j in range(np.max(labels)): + class_vote = np.zeros(np.max(labels)) + for q in range(km_idx[i].size): + if km_idx[i][q]==j: + class_vote[int(tlabel[i][q])-1] += 1 + km_labels[i][j] = np.argmax(class_vote) + 1 + + #MERGE CLUSTERS + cl = [] + cllab = [] + clustercam = [] + for i in range(2): + clustercam.append(np.add(np.ones(len(km_labels[i])), i)) + print(len(km_labels[i])) + for j in range(len(km_labels[i])): + cl.append(km_train_data[i].cluster_centers_[j]) + cllab.append(km_labels[i][j]) + + train_data = np.array(cl) + print(train_data.shape) + train_label = np.array(cllab) + print(train_label.shape) + train_cam = np.array([clustercam[i] for i in range(2)]).reshape(train_label.shape[0]) + print(train_cam.shape) + return train_data, train_label, train_cam diff --git a/lib/rerank.py b/lib/rerank.py new file mode 100644 index 0000000..6b20f53 --- /dev/null +++ b/lib/rerank.py @@ -0,0 +1,82 @@ +from scipy.spatial.distance import cdist +import numpy as np + +def re_ranking(probFea,galFea,k1,k2,lambda_value, MemorySave = False, Minibatch = 2000): + + query_num = probFea.shape[0] + all_num = query_num + galFea.shape[0] + feat = np.append(probFea,galFea,axis = 0) + feat = feat.astype(np.float16) + print('computing original distance') + if MemorySave: + original_dist = np.zeros(shape = [all_num,all_num],dtype = np.float16) + i = 0 + while True: + it = i + Minibatch + if it < np.shape(feat)[0]: + original_dist[i:it,] = np.power(cdist(feat[i:it,],feat),2).astype(np.float16) + else: + original_dist[i:,:] = np.power(cdist(feat[i:,],feat),2).astype(np.float16) + break + i = it + else: + original_dist = cdist(feat,feat).astype(np.float16) + original_dist = np.power(original_dist,2).astype(np.float16) + del feat + gallery_num = original_dist.shape[0] + original_dist = np.transpose(original_dist/np.max(original_dist,axis = 0)) + V = np.zeros_like(original_dist).astype(np.float16) + initial_rank = np.argsort(original_dist).astype(np.int32) + + + print('starting re_ranking') + for i in range(all_num): + # k-reciprocal neighbors + forward_k_neigh_index = initial_rank[i,:k1+1] + backward_k_neigh_index = initial_rank[forward_k_neigh_index,:k1+1] + fi = np.where(backward_k_neigh_index==i)[0] + k_reciprocal_index = forward_k_neigh_index[fi] + k_reciprocal_expansion_index = k_reciprocal_index + for j in range(len(k_reciprocal_index)): + candidate = k_reciprocal_index[j] + candidate_forward_k_neigh_index = initial_rank[candidate,:int(np.around(k1/2))+1] + candidate_backward_k_neigh_index = initial_rank[candidate_forward_k_neigh_index,:int(np.around(k1/2))+1] + fi_candidate = np.where(candidate_backward_k_neigh_index == candidate)[0] + candidate_k_reciprocal_index = candidate_forward_k_neigh_index[fi_candidate] + if len(np.intersect1d(candidate_k_reciprocal_index,k_reciprocal_index))> 2/3*len(candidate_k_reciprocal_index): + k_reciprocal_expansion_index = np.append(k_reciprocal_expansion_index,candidate_k_reciprocal_index) + + k_reciprocal_expansion_index = np.unique(k_reciprocal_expansion_index) + weight = np.exp(-original_dist[i,k_reciprocal_expansion_index]) + V[i,k_reciprocal_expansion_index] = weight/np.sum(weight) + original_dist = original_dist[:query_num,] + if k2 != 1: + V_qe = np.zeros_like(V,dtype=np.float16) + for i in range(all_num): + V_qe[i,:] = np.mean(V[initial_rank[i,:k2],:],axis=0) + V = V_qe + del V_qe + del initial_rank + invIndex = [] + for i in range(gallery_num): + invIndex.append(np.where(V[:,i] != 0)[0]) + + jaccard_dist = np.zeros_like(original_dist,dtype = np.float16) + + for i in range(query_num): + temp_min = np.zeros(shape=[1,gallery_num],dtype=np.float16) + indNonZero = np.where(V[i,:] != 0)[0] + indImages = [] + indImages = [invIndex[ind] for ind in indNonZero] + for j in range(len(indNonZero)): + temp_min[0,indImages[j]] = temp_min[0,indImages[j]]+ np.minimum(V[i,indNonZero[j]],V[indImages[j],indNonZero[j]]) + jaccard_dist[i] = 1-temp_min/(2-temp_min) + + final_dist = jaccard_dist*(1-lambda_value) + original_dist*lambda_value + del original_dist + del V + del jaccard_dist + final_dist = final_dist[:query_num,query_num:] + + return final_dist + diff --git a/rerank.py b/rerank.py deleted file mode 100644 index 6b20f53..0000000 --- a/rerank.py +++ /dev/null @@ -1,82 +0,0 @@ -from scipy.spatial.distance import cdist -import numpy as np - -def re_ranking(probFea,galFea,k1,k2,lambda_value, MemorySave = False, Minibatch = 2000): - - query_num = probFea.shape[0] - all_num = query_num + galFea.shape[0] - feat = np.append(probFea,galFea,axis = 0) - feat = feat.astype(np.float16) - print('computing original distance') - if MemorySave: - original_dist = np.zeros(shape = [all_num,all_num],dtype = np.float16) - i = 0 - while True: - it = i + Minibatch - if it < np.shape(feat)[0]: - original_dist[i:it,] = np.power(cdist(feat[i:it,],feat),2).astype(np.float16) - else: - original_dist[i:,:] = np.power(cdist(feat[i:,],feat),2).astype(np.float16) - break - i = it - else: - original_dist = cdist(feat,feat).astype(np.float16) - original_dist = np.power(original_dist,2).astype(np.float16) - del feat - gallery_num = original_dist.shape[0] - original_dist = np.transpose(original_dist/np.max(original_dist,axis = 0)) - V = np.zeros_like(original_dist).astype(np.float16) - initial_rank = np.argsort(original_dist).astype(np.int32) - - - print('starting re_ranking') - for i in range(all_num): - # k-reciprocal neighbors - forward_k_neigh_index = initial_rank[i,:k1+1] - backward_k_neigh_index = initial_rank[forward_k_neigh_index,:k1+1] - fi = np.where(backward_k_neigh_index==i)[0] - k_reciprocal_index = forward_k_neigh_index[fi] - k_reciprocal_expansion_index = k_reciprocal_index - for j in range(len(k_reciprocal_index)): - candidate = k_reciprocal_index[j] - candidate_forward_k_neigh_index = initial_rank[candidate,:int(np.around(k1/2))+1] - candidate_backward_k_neigh_index = initial_rank[candidate_forward_k_neigh_index,:int(np.around(k1/2))+1] - fi_candidate = np.where(candidate_backward_k_neigh_index == candidate)[0] - candidate_k_reciprocal_index = candidate_forward_k_neigh_index[fi_candidate] - if len(np.intersect1d(candidate_k_reciprocal_index,k_reciprocal_index))> 2/3*len(candidate_k_reciprocal_index): - k_reciprocal_expansion_index = np.append(k_reciprocal_expansion_index,candidate_k_reciprocal_index) - - k_reciprocal_expansion_index = np.unique(k_reciprocal_expansion_index) - weight = np.exp(-original_dist[i,k_reciprocal_expansion_index]) - V[i,k_reciprocal_expansion_index] = weight/np.sum(weight) - original_dist = original_dist[:query_num,] - if k2 != 1: - V_qe = np.zeros_like(V,dtype=np.float16) - for i in range(all_num): - V_qe[i,:] = np.mean(V[initial_rank[i,:k2],:],axis=0) - V = V_qe - del V_qe - del initial_rank - invIndex = [] - for i in range(gallery_num): - invIndex.append(np.where(V[:,i] != 0)[0]) - - jaccard_dist = np.zeros_like(original_dist,dtype = np.float16) - - for i in range(query_num): - temp_min = np.zeros(shape=[1,gallery_num],dtype=np.float16) - indNonZero = np.where(V[i,:] != 0)[0] - indImages = [] - indImages = [invIndex[ind] for ind in indNonZero] - for j in range(len(indNonZero)): - temp_min[0,indImages[j]] = temp_min[0,indImages[j]]+ np.minimum(V[i,indNonZero[j]],V[indImages[j],indNonZero[j]]) - jaccard_dist[i] = 1-temp_min/(2-temp_min) - - final_dist = jaccard_dist*(1-lambda_value) + original_dist*lambda_value - del original_dist - del V - del jaccard_dist - final_dist = final_dist[:query_num,query_num:] - - return final_dist - -- cgit v1.2.3-54-g00ecf