From 37a8f636d6cfce1b930b0a99269779e497b3f0ac Mon Sep 17 00:00:00 2001 From: nunzip Date: Sun, 2 Dec 2018 15:17:48 +0000 Subject: Part 2 knn kmean --- part2.py | 283 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 283 insertions(+) create mode 100755 part2.py diff --git a/part2.py b/part2.py new file mode 100755 index 0000000..299fdcd --- /dev/null +++ b/part2.py @@ -0,0 +1,283 @@ +#!/usr/bin/env python +# Author: Vasil Zlatanov, Nunzio Pucci +# EE4 Pattern Recognition coursework +# +# usage: train.py [-h] -i DATA [-m EIGEN] [-M REIGEN] [-e ENSEMBLE] [-b] +# [-R RANDOM] [-n NEIGHBORS] [-f FACES] [-c] [-s SEED] +# [-t SPLIT] [-2] [-p] [-l] [-r RECONSTRUCT] [-cm] [-q] [-pr] +# [-alt] + +import matplotlib.pyplot as plt +from mpl_toolkits.mplot3d import Axes3D +import sys +import random +import os +import json +import scipy.io +from random import randint +from sklearn.neighbors import KNeighborsClassifier +from sklearn.neighbors import DistanceMetric +from sklearn.cluster import KMeans +from sklearn.decomposition import PCA +from sklearn.discriminant_analysis import LinearDiscriminantAnalysis +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import StandardScaler +from sklearn.metrics import confusion_matrix +from sklearn.metrics import accuracy_score +import argparse +import numpy as np +from numpy import genfromtxt +from numpy import linalg as LA +from timeit import default_timer as timer +from scipy.spatial.distance import cdist + +#prob query, gal train +def re_ranking(probFea,galFea,k1,k2,lambda_value, MemorySave = False, Minibatch = 2000): + + query_num = probFea.shape[0] + all_num = query_num + galFea.shape[0] + feat = np.append(probFea,galFea,axis = 0) + feat = feat.astype(np.float16) + print('computing original distance') + if MemorySave: + original_dist = np.zeros(shape = [all_num,all_num],dtype = np.float16) + i = 0 + while True: + it = i + Minibatch + if it < np.shape(feat)[0]: + original_dist[i:it,] = np.power(cdist(feat[i:it,],feat),2).astype(np.float16) + else: + original_dist[i:,:] = np.power(cdist(feat[i:,],feat),2).astype(np.float16) + break + i = it + else: + original_dist = cdist(feat,feat).astype(np.float16) + original_dist = np.power(original_dist,2).astype(np.float16) + del feat + gallery_num = original_dist.shape[0] + original_dist = np.transpose(original_dist/np.max(original_dist,axis = 0)) + V = np.zeros_like(original_dist).astype(np.float16) + initial_rank = np.argsort(original_dist).astype(np.int32) + + + print('starting re_ranking') + for i in range(all_num): + # k-reciprocal neighbors + forward_k_neigh_index = initial_rank[i,:k1+1] + backward_k_neigh_index = initial_rank[forward_k_neigh_index,:k1+1] + fi = np.where(backward_k_neigh_index==i)[0] + k_reciprocal_index = forward_k_neigh_index[fi] + k_reciprocal_expansion_index = k_reciprocal_index + for j in range(len(k_reciprocal_index)): + candidate = k_reciprocal_index[j] + candidate_forward_k_neigh_index = initial_rank[candidate,:int(np.around(k1/2))+1] + candidate_backward_k_neigh_index = initial_rank[candidate_forward_k_neigh_index,:int(np.around(k1/2))+1] + fi_candidate = np.where(candidate_backward_k_neigh_index == candidate)[0] + candidate_k_reciprocal_index = candidate_forward_k_neigh_index[fi_candidate] + if len(np.intersect1d(candidate_k_reciprocal_index,k_reciprocal_index))> 2/3*len(candidate_k_reciprocal_index): + k_reciprocal_expansion_index = np.append(k_reciprocal_expansion_index,candidate_k_reciprocal_index) + + k_reciprocal_expansion_index = np.unique(k_reciprocal_expansion_index) + weight = np.exp(-original_dist[i,k_reciprocal_expansion_index]) + V[i,k_reciprocal_expansion_index] = weight/np.sum(weight) + original_dist = original_dist[:query_num,] + if k2 != 1: + V_qe = np.zeros_like(V,dtype=np.float16) + for i in range(all_num): + V_qe[i,:] = np.mean(V[initial_rank[i,:k2],:],axis=0) + V = V_qe + del V_qe + del initial_rank + invIndex = [] + for i in range(gallery_num): + invIndex.append(np.where(V[:,i] != 0)[0]) + + jaccard_dist = np.zeros_like(original_dist,dtype = np.float16) + + for i in range(query_num): + temp_min = np.zeros(shape=[1,gallery_num],dtype=np.float16) + indNonZero = np.where(V[i,:] != 0)[0] + indImages = [] + indImages = [invIndex[ind] for ind in indNonZero] + for j in range(len(indNonZero)): + temp_min[0,indImages[j]] = temp_min[0,indImages[j]]+ np.minimum(V[i,indNonZero[j]],V[indImages[j],indNonZero[j]]) + jaccard_dist[i] = 1-temp_min/(2-temp_min) + + final_dist = jaccard_dist*(1-lambda_value) + original_dist*lambda_value + del original_dist + del V + del jaccard_dist + final_dist = final_dist[:query_num,query_num:] + + return final_dist + +def draw_results(args, target_test, target_pred): + acc_sc = accuracy_score(target_test, target_pred) + cm = confusion_matrix(target_test, target_pred) + print('Accuracy: ', acc_sc) + if (args.conf_mat): + plt.matshow(cm, cmap='Blues') + plt.colorbar() + plt.ylabel('Actual') + plt.xlabel('Predicted') + plt.show() + return + +def test_model(train_data, test_data, target_train, target_test, args): + classifier = KNeighborsClassifier(n_neighbors=args.neighbors, metric='euclidean') +# else: +# S = LA.inv(np.cov(train_data, rowvar=False)) +# print(S.shape) +# classifier = KNeighborsClassifier(n_neighbors=args.neighbors, metric='mahalanobis', metric_params={'VI':S}) + classifier.fit(train_data, target_train) + target_pred = classifier.predict(test_data) + dist, nn_idx = classifier.kneighbors(test_data) + #USE NN_IDX TO RECOVER NEIGHBORS + return target_pred + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("-R", "--random", help="Number of eigen value to randomise", type=int) + parser.add_argument("-n", "--neighbors", help="How many neighbors to use", type=int, default = 1) + parser.add_argument("-c", "--principal", help="Show principal components", action='store_true') + parser.add_argument("-s", "--seed", help="Seed to use", type=int, default=0) + parser.add_argument("-t", "--split", help="Fractoin of data to use for testing", type=float, default=0.3) + parser.add_argument("-2", "--grapheigen", help="Swow 2D graph of targets versus principal components", + action='store_true') + parser.add_argument("-cm", "--conf_mat", help="Show visual confusion matrix", action='store_true') + parser.add_argument("-q", "--pca_r", help="Use Reduced PCA", action='store_true') + parser.add_argument("-pr", "--prob", help="Certainty on each guess", action='store_true') + parser.add_argument("-km", "--kmean", help="Perform Kmeans", action='store_true', default=0) + parser.add_argument("-ma", "--mala", help="Perform Mahalanobis Distance metric", action='store_true', default=0) + parser.add_argument("-e", "--eucl", help="Standard euclidean", action='store_true', default=0) + parser.add_argument("-ka", "--reranka", help="Parameter 1 for Rerank", type=int, default = 20) + parser.add_argument("-kb", "--rerankb", help="Parameter 2 for rerank", type=int, default = 6) + args = parser.parse_args() + + ###PART2 INPUT DATA + mat = scipy.io.loadmat('data/cuhk03_new_protocol_config_labeled.mat') + camId = mat['camId'] + filelist = mat['filelist'] + gallery_idx = mat['gallery_idx'] + labels = mat['labels'] + query_idx = mat['query_idx'] + train_idx = mat['train_idx'] + with open("data/feature_data.json", "r") as read_file: + feature_vectors = np.array(json.load(read_file)) + + query_cam_1 = 0 + for i in range(query_idx.size): + if camId[query_idx[i]] == 1: + query_cam_1 = query_cam_1 + 1 + query_cam_2 = query_idx.size - query_cam_1 + + train_cam_1 = 0 + for i in range(gallery_idx.size): + if camId[gallery_idx[i]] == 1: + train_cam_1 = train_cam_1 + 1 + train_cam_2 = gallery_idx.size - train_cam_1 + + train_data_1 = np.zeros(((train_cam_1),(feature_vectors.shape[1]))) + train_label_1 = np.zeros(train_cam_1) + test_data_1 = np.zeros(((query_cam_1),(feature_vectors.shape[1]))) + test_label_1 = np.zeros(query_cam_1) + + train_data_2 = np.zeros(((train_cam_2),(feature_vectors.shape[1]))) + train_label_2 = np.zeros(train_cam_2) + test_data_2 = np.zeros(((query_cam_2),(feature_vectors.shape[1]))) + test_label_2 = np.zeros(query_cam_2) + + i_1 = 0 + i_2 = 0 + for i in range(gallery_idx.size): + if camId[gallery_idx[i]] == 1: + train_data_1[i_1] = feature_vectors[gallery_idx[i]] + i_1 = i_1 + 1 + else: + train_data_2[i_2] = feature_vectors[gallery_idx[i]] + i_2 = i_2 + 1 + i_1 = 0 + i_2 = 0 + for i in range(query_idx.size): + if camId[query_idx[i]] == 1: + test_data_1[i_1] = feature_vectors[query_idx[i]] + i_1 = i_1 + 1 + else: + test_data_2[i_2] = feature_vectors[query_idx[i]] + i_2 = i_2 + 1 + i_1 = 0 + i_2 = 0 + for i in range(gallery_idx.size): + if camId[gallery_idx[i]] == 1: + train_label_1[i_1] = labels[gallery_idx[i]] + i_1 = i_1 + 1 + else: + train_label_2[i_2] = labels[gallery_idx[i]] + i_2 = i_2 + 1 + i_1 = 0 + i_2 = 0 + for i in range(query_idx.size): + if camId[query_idx[i]] == 1: + test_label_1[i_1] = labels[query_idx[i]] + i_1 = i_1 + 1 + else: + test_label_2[i_2] = labels[query_idx[i]] + i_2 = i_2 + 1 + + if (args.mala): + final_dist = re_ranking(test_data_1, train_data_2, args.reranka, args.rerankb, 0.3) + target_pred = np.zeros(final_dist.shape[0]) + for i in range(test_label_1.size): + target_pred[i] = train_label_2[np.argmin(final_dist[i])] + draw_results(args, test_label_1, target_pred) + + final_dist2 = re_ranking(test_data_2, train_data_1, args.reranka, args.rerankb, 0.3) + target_pred2 = np.zeros(final_dist2.shape[0]) + for i in range(test_label_2.size): + target_pred2[i] = train_label_1[np.argmin(final_dist2[i])] + draw_results(args, test_label_2, target_pred2) + + elif(args.kmean): + km_labels_1 = np.arange(1,np.max(labels)+1) + km_labels_2 = np.arange(1,np.max(labels)+1) + km_train_data_1 = np.zeros(((km_labels_1.size),(feature_vectors.shape[1]))) + km_train_data_2 = np.zeros(((km_labels_2.size),(feature_vectors.shape[1]))) + km_train_data_1 = KMeans(n_clusters=int(np.max(labels)),random_state=0).fit(train_data_1) + km_train_data_2 = KMeans(n_clusters=int(np.max(labels)),random_state=0).fit(train_data_2) + + km_idx_1 = km_train_data_1.labels_ + for i in range(np.max(labels)): + class_vote = np.zeros(np.max(labels)) + for q in range(km_idx_1.size): + if km_idx_1[q]==i: + class_vote[int(train_label_1[q])-1] = class_vote[int(train_label_1[q])-1] + 1 + km_labels_1[i] = np.argmax(class_vote) + 1 + + target_pred = test_model(km_train_data_1.cluster_centers_, test_data_2, km_labels_1, test_label_2, args) + draw_results(args, test_label_2, target_pred) + + km_idx_2 = km_train_data_2.labels_ + for i in range(np.max(labels)): + class_vote = np.zeros(np.max(labels)) + for q in range(km_idx_2.size): + if km_idx_2[q]==i: + class_vote[int(train_label_2[q])-1] = class_vote[int(train_label_2[q])-1] + 1 + km_labels_2[i] = np.argmax(class_vote) + 1 + + target_pred = test_model(km_train_data_2.cluster_centers_, test_data_1, km_labels_2, test_label_1, args) + draw_results(args, test_label_1, target_pred) + + elif(args.eucl): + target_pred = test_model(train_data_2, test_data_1, train_label_2, test_label_1, args) + draw_results(args, test_label_1, target_pred) + target_pred = test_model(train_data_1, test_data_2, train_label_1, test_label_2, args) + draw_results(args, test_label_2, target_pred) + + + print('N-Query from cam 1:', test_data_1.shape) + print('N-Query from cam 2:', test_data_2.shape) + print('Complete') + +if __name__ == "__main__": + main() + \ No newline at end of file -- cgit v1.2.3-54-g00ecf