diff options
author | nunzip <np.scarh@gmail.com> | 2018-12-11 13:07:36 +0000 |
---|---|---|
committer | nunzip <np.scarh@gmail.com> | 2018-12-11 13:07:36 +0000 |
commit | b1c5d8a82395a8545794b0ce22cccb16ca380647 (patch) | |
tree | 8980806822be3836e635ca6a27b5df6ae123e560 | |
parent | 46bdc8b2ea4618efc606d509d4de37dc8f50a929 (diff) | |
download | vz215_np1915-b1c5d8a82395a8545794b0ce22cccb16ca380647.tar.gz vz215_np1915-b1c5d8a82395a8545794b0ce22cccb16ca380647.tar.bz2 vz215_np1915-b1c5d8a82395a8545794b0ce22cccb16ca380647.zip |
Optimizer (run no flags)
-rw-r--r-- | opt.py | 358 |
1 files changed, 358 insertions, 0 deletions
@@ -0,0 +1,358 @@ +#!/usr/bin/env python +# Author: Vasil Zlatanov, Nunzio Pucci +# EE4 Pattern Recognition coursework +# +# usage: part2.py [-h] [-t] [-cm] [-km] [-ma] [-e] [-r] [-ka RERANKA] +# [-kb RERANKB] [-v] + +import matplotlib.pyplot as plt +from mpl_toolkits.mplot3d import Axes3D +import sys +import random +import os +import json +import scipy.io +from random import randint +from sklearn.neighbors import KNeighborsClassifier +from sklearn.neighbors import NearestNeighbors +from sklearn.neighbors import DistanceMetric +from sklearn.cluster import KMeans +from sklearn.decomposition import PCA +from sklearn.discriminant_analysis import LinearDiscriminantAnalysis +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import StandardScaler +from sklearn.metrics import confusion_matrix +from sklearn.metrics import accuracy_score +import argparse +import numpy as np +from numpy import genfromtxt +from numpy import linalg as LA +from timeit import default_timer as timer +from scipy.spatial.distance import cdist +sys.path.append('lib') +from rerank import re_ranking +from kmean import create_kmean_clusters +import logging +from logging import debug + +parser = argparse.ArgumentParser() +parser.add_argument("-t", "--train", help="Use train data instead of query and gallery", action='store_true') +parser.add_argument("-c", "--conf_mat", help="Show visual confusion matrix", action='store_true') +parser.add_argument("-k", "--kmean_alt", help="Perform clustering with generalized labels(not actual kmean)", action='store_true', default=0) +parser.add_argument("-m", "--mahalanobis", help="Perform Mahalanobis Distance metric", action='store_true', default=0) +parser.add_argument("-e", "--euclidean", help="Use standard euclidean distance", action='store_true', default=0) +parser.add_argument("-r", "--rerank", help="Use k-reciprocal rernaking", action='store_true') +parser.add_argument("-p", "--reranka", help="Parameter k1 for Rerank -p '$k1val' -ARGUMENT REQUIRED, default=9-", type=int, default = 9) +parser.add_argument("-q", "--rerankb", help="Parameter k2 for rerank -q '$k2val' -ARGUMENT REQUIRED, default=3-", type=int, default = 3) +parser.add_argument("-l", "--rerankl", help="Coefficient to combine distances(lambda) -l '$lambdaval' -ARGUMENT REQUIRED, default=0.3-", type=float, default = 0.3) +parser.add_argument("-n", "--neighbors", help="Use customized ranklist size -n 'size' -ARGUMENT REQUIRED, default=1-", type=int, default = 1) +parser.add_argument("-v", "--verbose", help="Use verbose output", action='store_true') +parser.add_argument("-s", "--showrank", help="Save ranklist pics id in a txt file. Number of ranklists saved specified as -s '$number' -ARGUMENT REQUIRED, default=0-", type=int, default = 0) +parser.add_argument("-1", "--normalise", help="Normalise features", action='store_true', default=0) +parser.add_argument("-M", "--multrank", help="Run for different ranklist sizes equal to M -ARGUMENT REQUIRED, default=1-", type=int, default=1) +parser.add_argument("-C", "--comparison", help="Set to 2 to obtain a comparison of baseline and improved metric -ARGUMENT REQUIRED, default=1-", type=int, default=1) +parser.add_argument("--data", help="You can either put the data in a folder called 'data', or specify the location with --data 'path' -ARGUMENT REQUIRED, default='data'-", default='data') +parser.add_argument("-K", "--kmean", help="Perform Kmean clustering of size specified through -K '$size' -ARGUMENT REQUIRED, default=0-", type=int, default=0) +parser.add_argument("-P", "--mAP", help="Display Mean Average Precision for ranklist of size -n '$size'", action='store_true') +parser.add_argument("-2", "--PCA", help="Use PCA with -2 '$n_components' -ARGUMENT REQUIRED, default=0-", type=int, default=0) + +args = parser.parse_args() + +if args.verbose: + logging.basicConfig(level=logging.DEBUG) + +def draw_results(test_label, pred_label): + acc_sc = accuracy_score(test_label, pred_label) + cm = confusion_matrix(test_label, pred_label) + print('Accuracy: ', acc_sc) + if (args.conf_mat): + plt.matshow(cm, cmap='Blues') + plt.colorbar() + plt.ylabel('Actual') + plt.xlabel('Predicted') + plt.show() + return acc_sc + +def test_model(gallery_data, probe_data, gallery_label, probe_label, gallery_cam, probe_cam, showfiles_train, showfiles_test, args): + + debug("probe shape: %s", probe_data.shape) + debug("gallery shape: %s", gallery_data.shape) + + if args.rerank: + distances = re_ranking(probe_data, gallery_data, + args.reranka, args.rerankb, args.rerankl, + MemorySave = False, Minibatch = 2000) + else: + if args.mahalanobis: + # metric = 'jaccard' is also valid + cov_inv = np.linalg.inv(np.cov(gallery_data.T)) + distances = np.zeros((probe_data.shape[0], gallery_data.shape[0])) + for i in range(int(probe_data.shape[0]/10)): + print("Comupting from", i*10, "to", (i+1)*10-1) + distances[i*10:(i+1)*10-1] = cdist(probe_data[i*10:(i+1)*10-1], gallery_data, 'mahalanobis', VI=cov_inv) + else: + distances = cdist(probe_data, gallery_data, 'euclidean') + + ranklist = np.argsort(distances, axis=1) + + test_table = np.arange(1, args.multrank+1) + target_pred = np.zeros((args.multrank, ranklist.shape[0])) + nsize = args.neighbors + if (args.multrank != 1): + nsize = test_table[args.multrank-1] + nneighbors = np.zeros((ranklist.shape[0],nsize)) + nnshowrank = (np.zeros((ranklist.shape[0],nsize))).astype(object) + + for i in range(args.multrank): + if args.multrank!= 1: + args.neighbors = test_table[i] + for probe_idx in range(probe_data.shape[0]): + row = ranklist[probe_idx] + n = 0 + q = 0 + while (q < args.neighbors): + while (probe_cam[probe_idx] == gallery_cam[row[n]] and + probe_label[probe_idx] == gallery_label[row[n]]): + n += 1 + nneighbors[probe_idx][q] = gallery_label[row[n]] + nnshowrank[probe_idx][q] = showfiles_train[row[n]] # + q += 1 + n += 1 + + if (args.neighbors) and (probe_label[probe_idx] in nneighbors[probe_idx]): + target_pred[i][probe_idx] = probe_label[probe_idx] + else: + target_pred[i][probe_idx] = nneighbors[probe_idx][0] + + + if (args.showrank): + with open("ranklist.txt", "w") as text_file: + text_file.write(np.array2string(nnshowrank[:args.showrank])) + with open("query.txt", "w") as text_file: + text_file.write(np.array2string(showfiles_test[:args.showrank])) + + if args.mAP: + precision = np.zeros((probe_label.shape[0], args.neighbors)) + recall = np.zeros((probe_label.shape[0], args.neighbors)) + mAP = np.zeros(probe_label.shape[0]) + max_level_precision = np.zeros((probe_label.shape[0],11)) + + for i in range(probe_label.shape[0]): + truth_count=0 + false_count=0 + for j in range(args.neighbors): + if probe_label[i] == nneighbors[i][j]: + truth_count+=1 + precision[i][j] = truth_count/(j+1) + else: + false_count+=1 + precision[i][j]= 1 - false_count/(j+1) + if truth_count!=0: + recall_step = 1/truth_count + for j in range(args.neighbors): + if probe_label[i] == nneighbors[i][j]: + recall[i][j:] += recall_step + else: + recall[i][:] = 1 + for i in range(probe_label.shape[0]): + for j in range(11): + max_level_precision[i][j] = np.max(precision[i][np.where(recall[i]>=(j/10))]) + #print(mAP[i]) + for i in range(probe_label.shape[0]): + #mAP[i] = sum(max_level_precision[i])/11 + mAP[i] = sum(precision[i])/args.neighbors + print('mAP:',np.mean(mAP)) + + return target_pred + +def eval(camId, filelist, labels, gallery_idx, train_idx, feature_vectors, args): + + if args.train: + cam = camId[train_idx] + cam = cam.reshape((cam.shape[0],1)) + labs = labels[train_idx].reshape((labels[train_idx].shape[0],1)) + tt = np.hstack((train_idx, cam)) + train, test, train_label, test_label = train_test_split(tt, labs, test_size=0.3, random_state=0) + #to make it smaller we do a double split + del labs + del cam + train_data = feature_vectors[train[:,0]] + test_data = feature_vectors[test[:,0]] + train_cam = train[:,1] + test_cam = test[:,1] + showfiles_train = filelist[train[:,0]] + showfiles_test = filelist[train[:,0]] + del train + del test + del tt + else: + query_idx = query_idx.reshape(query_idx.shape[0]) + gallery_idx = gallery_idx.reshape(gallery_idx.shape[0]) + camId = camId.reshape(camId.shape[0]) + + showfiles_train = filelist[gallery_idx] + showfiles_test = filelist[query_idx] + train_data = feature_vectors[gallery_idx] + test_data = feature_vectors[query_idx] + train_label = labels[gallery_idx] + test_label = labels[query_idx] + train_cam = camId[gallery_idx] + test_cam = camId[query_idx] + + train_idx = train_idx.reshape(train_idx.shape[0]) + train_model = feature_vectors[train_idx] + + if(args.PCA): + pca=PCA(n_components=args.PCA) #Data variance @100 is 94% + train_model=pca.fit_transform(train_model) + train_data=pca.transform(train_data) + test_data=pca.transform(test_data) + + accuracy = np.zeros((2, args.multrank)) + test_table = np.arange(1, args.multrank+1) + + if (args.normalise): + debug("Normalising data") + train_data = np.divide(train_data,LA.norm(train_data,axis=0)) + test_data = np.divide(test_data, LA.norm(test_data,axis=0)) + if(args.kmean_alt): + debug("Using Kmeans") + train_data, train_label, train_cam = create_kmean_clusters(feature_vectors, labels,gallery_idx,camId) + + if args.kmean: + kmeans = KMeans(n_clusters=args.kmean, random_state=0).fit(train_data) + neigh = NearestNeighbors(n_neighbors=1) + neigh.fit(kmeans.cluster_centers_) + neighbors = neigh.kneighbors(test_data, return_distance=False) + target_pred = np.zeros(test_data.shape[0]) + + for i in range(test_data.shape[0]): + td = test_data[i].reshape(1,test_data.shape[1]) + tc = np.array([test_cam[i]]) + tl = np.array([test_label[i]]) + target_pred[i] = (test_model(train_data[np.where(kmeans.labels_==neighbors[i])], td, train_label[np.where(kmeans.labels_==neighbors[i])], tl, train_cam[np.where(kmeans.labels_==neighbors[i])], tc, showfiles_train[np.where(kmeans.labels_==neighbors[i])], showfiles_test[i], args)) + + accuracy[0] = draw_results(test_label, target_pred) + else: + for q in range(args.comparison): + target_pred = test_model(train_data, test_data, train_label, test_label, train_cam, test_cam, showfiles_train, showfiles_test, args) + for i in range(args.multrank): + return draw_results(test_label, target_pred[i]) + args.rerank = True + args.neighbors = 1 + + if(args.multrank != 1): + plt.plot(test_table[:(args.multrank)], 100*accuracy[0]) + if(args.comparison!=1): + plt.plot(test_table[:(args.multrank)], 100*accuracy[1]) + plt.legend(['Baseline NN', 'NN+Reranking'], loc='upper left') + plt.xlabel('Top k') + plt.ylabel('Identification Accuracy (%)') + plt.grid(True) + plt.show() + +def main(): + mat = scipy.io.loadmat(os.path.join(args.data,'cuhk03_new_protocol_config_labeled.mat')) + camId = mat['camId'] + filelist = mat['filelist'] + labels = mat['labels'] + gallery_idx = mat['gallery_idx'] - 1 + query_idx = mat['query_idx'] - 1 + train_idx = mat['train_idx'] - 1 + with open(os.path.join(args.data,'feature_data.json'), 'r') as read_file: + feature_vectors = np.array(json.load(read_file)) + + axis = 0 + search = 0 + steps = 0 + vertical = True + neg = False + outofaxis = False + start = np.array([1,1]) + args.PCA = 10 + args.train = True + args.rerank = True + args.reranka = 1 + args.rerankb = 1 + opt = np.array([1,1]) + max_acc = eval(camId, filelist, labels, gallery_idx, train_idx, feature_vectors, args) + print('origin') + print('vertical') + while steps<3: + steps+=1 + while axis<4: + axis+=1 + p = start[0] + q = start[1] + while search <5: + search+=1 + if vertical: + if neg: + p = start[0] - 2*search + if p < 1: + p = 1 + search = 5 + outofaxis = True + else: + p = search*2 + start[0] + args.reranka = p + if not outofaxis: + print('p:',p,' q:',q) + acc = eval(camId, filelist, labels, gallery_idx, train_idx, feature_vectors, args) + if acc > max_acc: + print('new p:',p, ' for accuracy:', acc) + max_acc=acc + opt[0] = p + start[0] = p + axis=0 + steps=0 + search=6 + else: + if neg: + q = start[1] - 2*search + if q < 1: + q = 1 + search = 5 + outofaxis = True + else: + q = search*2 + start[1] + args.rerankb = q + if not outofaxis: + print('p:',p,' q:',q) + acc = eval(camId, filelist, labels, gallery_idx, train_idx, feature_vectors, args) + if acc > max_acc: + print('new q:',q, ' for accuracy:', acc) + max_acc=acc + opt[1] = q + start[1] = q + axis=0 + steps=0 + search=6 + if search==5: + outofaxis = False + vertical = not vertical + print('vertical:',vertical) + search=0 + if axis==2 or axis == 4: + neg = not neg + axis=0 + start[0]+=2 + start[1]+=2 + p=start[0] + q=start[1] + args.reranka = start[0] + args.rerankb = start[1] + print('p:',p,' q:',q) + acc = eval(camId, filelist, labels, gallery_idx, train_idx, feature_vectors, args) + if acc > max_acc: + print('new p:',p,'new q:',q, ' for accuracy:', acc) + max_acc=acc + opt[0] = start[0] + opt[1] = start[1] + steps=0 + vertical=True + print('Maximum Accuracy:',max_acc,' found at p:',opt[0],'|q:',opt[1]) + +if __name__ == "__main__": + main() + |