From bcd380b631184e9d4e58c0aa80afb17727581066 Mon Sep 17 00:00:00 2001 From: Vasil Zlatanov Date: Wed, 5 Dec 2018 16:32:23 +0000 Subject: Move kmean to seperate filee and refactor code * Other small quality of life improvements and changes * to the flags --- kmean.py | 62 ++++++++++++++++++++++++++++++ part2.py | 129 +++++++++++++++++---------------------------------------------- 2 files changed, 96 insertions(+), 95 deletions(-) create mode 100755 kmean.py diff --git a/kmean.py b/kmean.py new file mode 100755 index 0000000..4e9d03e --- /dev/null +++ b/kmean.py @@ -0,0 +1,62 @@ +import matplotlib.pyplot as plt +from mpl_toolkits.mplot3d import Axes3D +import sys +import random +import os +import json +import scipy.io +from random import randint +from sklearn.neighbors import KNeighborsClassifier +from sklearn.neighbors import DistanceMetric +from sklearn.cluster import KMeans +from sklearn.decomposition import PCA +from sklearn.discriminant_analysis import LinearDiscriminantAnalysis +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import StandardScaler +from sklearn.metrics import confusion_matrix +from sklearn.metrics import accuracy_score +import argparse +import numpy as np +from numpy import genfromtxt +from numpy import linalg as LA +from timeit import default_timer as timer +from scipy.spatial.distance import cdist +from rerank import re_ranking + +def create_kmean_clusters(feature_vectors, labels, gallery_idx, camId): + for i in range(gallery_idx.size): + cam = camId[gallery_idx[i]] - 1 + gallery[cam].append(feature_vectors[gallery_idx[i]]) + gallerylab[cam].append(labels[gallery_idx[i]]) + + + train = np.array(gallery) + tlabel = np.array(gallerylab) + + for i in range(2): + km_train_data[i] = KMeans(n_clusters=int(np.max(labels)),random_state=0).fit(train[i]) + km_labels[i] = np.zeros(int(np.max(labels))) # clusters size + km_idx[i] = km_train_data[i].labels_ + + for j in range(np.max(labels)): + class_vote = np.zeros(np.max(labels)) + for q in range(km_idx[i].size): + if km_idx[i][q]==j: + class_vote[int(tlabel[i][q])-1] += 1 + km_labels[i][j] = np.argmax(class_vote) + 1 + + #MERGE CLUSTERS + cl = [] + cllab = [] + clustercam[0] = np.ones(km_labels_1.size) + clustercam[1] = np.add(np.ones(km_labels_2.size), 1) + for j in range(2): + for j in range(km_labels_1.size): + cl.append(km_train_data[i].cluster_centers_[j]) + cllab.append(km_labels[i][j]) + + train_data = np.array(cl) + train_label = np.array(cllab) + train_cam = np.concatenate(clustercam, axis=1) + return train_data, train_label, train_cam + diff --git a/part2.py b/part2.py index 82a8cdd..679a54d 100755 --- a/part2.py +++ b/part2.py @@ -44,9 +44,10 @@ parser.add_argument("-n", "--neighbors", help="Number of neighbors", type=int, d parser.add_argument("-v", "--verbose", help="Use verbose output", action='store_true') parser.add_argument("-s", "--showrank", help="Save ranklist pic id in a txt file", type=int, default = 0) parser.add_argument("-2", "--graphspace", help="Graph space", action='store_true', default=0) -parser.add_argument("-1", "--norm", help="Normalized features", action='store_true', default=0) +parser.add_argument("-1", "--normalise", help="Normalized features", action='store_true', default=0) parser.add_argument("-M", "--multrank", help="Run for different ranklist sizes equal to M", type=int, default=1) parser.add_argument("-C", "--comparison", help="Set to 2 to obtain a comparison of baseline and Improved metric", type=int, default=1) +parser.add_argument("--data", help="Data folder with features data", default='data') args = parser.parse_args() @@ -68,23 +69,23 @@ def draw_results(test_label, pred_label): return acc_sc def test_model(gallery_data, probe_data, gallery_label, probe_label, gallery_cam, probe_cam, showfiles_train, showfiles_test, args): - + verbose("probe shape:", probe_data.shape) verbose("gallery shape:", gallery_data.shape) - + if args.rerank: - distances = re_ranking(probe_data, gallery_data, - args.reranka ,args.rerankb , 0.3, + distances = re_ranking(probe_data, gallery_data, + args.reranka ,args.rerankb , 0.3, MemorySave = False, Minibatch = 2000) else: if args.mahalanobis: # metric = 'jaccard' is also valid distances = cdist(probe_data, gallery_data, 'sqeuclidean') else: - distances = cdist(probe_data, gallery_data, 'euclidean') + distances = cdist(probe_data, gallery_data, 'euclidean') ranklist = np.argsort(distances, axis=1) - + test_table = np.arange(1, args.multrank+1) target_pred = np.zeros((args.multrank, ranklist.shape[0])) nsize = args.neighbors @@ -92,8 +93,8 @@ def test_model(gallery_data, probe_data, gallery_label, probe_label, gallery_cam nsize = test_table[args.multrank-1] nneighbors = np.zeros((ranklist.shape[0],nsize)) nnshowrank = (np.zeros((ranklist.shape[0],nsize))).astype(object) - - + + for i in range(args.multrank): if args.multrank!= 1: args.neighbors = test_table[i] @@ -109,19 +110,19 @@ def test_model(gallery_data, probe_data, gallery_label, probe_label, gallery_cam nnshowrank[probe_idx][q] = showfiles_train[row[n]] # q += 1 n += 1 - + if (args.neighbors) and (probe_label[probe_idx] in nneighbors[probe_idx]): target_pred[i][probe_idx] = probe_label[probe_idx] else: target_pred[i][probe_idx] = nneighbors[probe_idx][0] - - - if (args.showrank): + + + if (args.showrank): with open("ranklist.txt", "w") as text_file: text_file.write(np.array2string(nnshowrank[:args.showrank])) with open("query.txt", "w") as text_file: text_file.write(np.array2string(showfiles_test[:args.showrank])) - + if args.graphspace: # Colors for distinct individuals cols = ['#{:06x}'.format(randint(0, 0xffffff)) for i in range(1467)] @@ -134,14 +135,14 @@ def test_model(gallery_data, probe_data, gallery_label, probe_label, gallery_cam return target_pred def main(): - mat = scipy.io.loadmat('data/cuhk03_new_protocol_config_labeled.mat') + mat = scipy.io.loadmat(os.path.join(args.data,'cuhk03_new_protocol_config_labeled.mat')) camId = mat['camId'] filelist = mat['filelist'] labels = mat['labels'] gallery_idx = mat['gallery_idx'] - 1 - query_idx = mat['query_idx'] - 1 + query_idx = mat['query_idx'] - 1 train_idx = mat['train_idx'] - 1 - with open("data/feature_data.json", "r") as read_file: + with open(os.path.join(args.data,'feature_data.json'), 'r') as read_file: feature_vectors = np.array(json.load(read_file)) if args.train: query_idx = train_idx.reshape(train_idx.shape[0]) @@ -150,7 +151,7 @@ def main(): query_idx = query_idx.reshape(query_idx.shape[0]) gallery_idx = gallery_idx.reshape(gallery_idx.shape[0]) camId = camId.reshape(camId.shape[0]) - + showfiles_train = filelist[gallery_idx] showfiles_test = filelist[query_idx] train_data = feature_vectors[gallery_idx] @@ -159,98 +160,36 @@ def main(): test_label = labels[query_idx] train_cam = camId[gallery_idx] test_cam = camId[query_idx] - + accuracy = np.zeros((2, args.multrank)) test_table = np.arange(1, args.multrank+1) - if (args.norm): + if (args.normalise): train_data = np.divide(train_data,LA.norm(train_data, axis=0)) test_data = np.divide(test_data, LA.norm(test_data, axis=0)) if(args.kmean): - gallery1 = [] - gallery2 = [] - gallery1lab = [] - gallery2lab = [] - for i in range(gallery_idx.size): - if camId[gallery_idx[i]] == 1: - gallery1.append(feature_vectors[gallery_idx[i]]) - gallery1lab.append(labels[gallery_idx[i]]) - else: - gallery2.append(feature_vectors[gallery_idx[i]]) - gallery2lab.append(labels[gallery_idx[i]]) - - train1 = np.array(gallery1) - train2 = np.array(gallery2) - tlabel1 = np.array(gallery1lab) - tlabel2 = np.array(gallery2lab) + train_data, train_label, train_cam = create_kmean_clusters(feature_vectors, + labels, + gallery_idx, + camId) + for q in range(args.comparison): + target_pred = test_model(train_data, test_data, train_label, test_label, train_cam, test_cam, showfiles_train, showfiles_test, args) + for i in range(args.multrank): + accuracy[q][i] = draw_results(test_label, target_pred[i]) + args.rerank = True + args.neighbors = 1 - km_train_data_1 = KMeans(n_clusters=int(np.max(labels)),random_state=0).fit(train1) - km_train_data_2 = KMeans(n_clusters=int(np.max(labels)),random_state=0).fit(train2) - - ###REMAP LABELS - km_labels_1 = np.zeros(int(np.max(labels))) # clusters size - km_labels_2 = np.zeros(int(np.max(labels))) - km_idx_1 = km_train_data_1.labels_ - for i in range(np.max(labels)): - class_vote = np.zeros(np.max(labels)) - for q in range(km_idx_1.size): - if km_idx_1[q]==i: - class_vote[int(tlabel1[q])-1] += 1 - km_labels_1[i] = np.argmax(class_vote) + 1 - - km_idx_2 = km_train_data_2.labels_ - for i in range(np.max(labels)): - class_vote = np.zeros(np.max(labels)) - for q in range(km_idx_2.size): - if km_idx_2[q]==i: - class_vote[int(tlabel2[q])-1] += 1 - km_labels_2[i] = np.argmax(class_vote) + 1 - - #MERGE CLUSTERS - cl = [] - cllab = [] - clcam = [] - clustercam1 = np.ones(km_labels_1.size) - clustercam2 = np.add(np.ones(km_labels_2.size), 1) - for i in range(km_labels_1.size): - cl.append(km_train_data_1.cluster_centers_[i]) - cllab.append(km_labels_1[i]) - clcam.append(clustercam1[i]) - for i in range(km_labels_2.size): - cl.append(km_train_data_2.cluster_centers_[i]) - cllab.append(km_labels_2[i]) - clcam.append(clustercam2[i]) - - cluster = np.array(cl) - clusterlabel = np.array(cllab) - clustercam = np.array(clcam) - - for q in range(args.comparison): - target_pred = test_model(cluster, test_data, clusterlabel, test_label, clustercam, test_cam, showfiles_train, showfiles_test, args) - for i in range(args.multrank): - accuracy[q][i] = draw_results(test_label, target_pred[i]) - args.rerank = True - args.neighbors = 1 - - else: - for q in range(args.comparison): - target_pred = test_model(train_data, test_data, train_label, test_label, train_cam, test_cam, showfiles_train, showfiles_test, args) - for i in range(args.multrank): - accuracy[q][i] = draw_results(test_label, target_pred[i]) - args.rerank = True - args.neighbors = 1 - if(args.multrank != 1): plt.plot(test_table[:(args.multrank)], 100*accuracy[0]) if(args.comparison!=1): plt.plot(test_table[:(args.multrank)], 100*accuracy[1]) - plt.legend(['Baseline kNN', 'Improved metric'], loc='upper left') + plt.legend(['Baseline kNN', 'Improved metric'], loc='upper left') plt.xlabel('k rank') plt.ylabel('Recognition Accuracy (%)') plt.grid(True) plt.show() - + if __name__ == "__main__": main() - + -- cgit v1.2.3-54-g00ecf