aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVasil Zlatanov <vasko@e4-pattern-vm.europe-west4-a.c.electric-orbit-223819.internal>2018-12-05 16:32:23 +0000
committerVasil Zlatanov <vasko@e4-pattern-vm.europe-west4-a.c.electric-orbit-223819.internal>2018-12-05 16:32:23 +0000
commitbcd380b631184e9d4e58c0aa80afb17727581066 (patch)
treed6fbba240f7d22bdfc922351ac96b22aee80965e
parent769a50e70ac253531229e1639db6bc9e401a0c43 (diff)
downloadvz215_np1915-bcd380b631184e9d4e58c0aa80afb17727581066.tar.gz
vz215_np1915-bcd380b631184e9d4e58c0aa80afb17727581066.tar.bz2
vz215_np1915-bcd380b631184e9d4e58c0aa80afb17727581066.zip
Move kmean to seperate filee and refactor code
* Other small quality of life improvements and changes * to the flags
-rwxr-xr-xkmean.py62
-rwxr-xr-xpart2.py129
2 files changed, 96 insertions, 95 deletions
diff --git a/kmean.py b/kmean.py
new file mode 100755
index 0000000..4e9d03e
--- /dev/null
+++ b/kmean.py
@@ -0,0 +1,62 @@
+import matplotlib.pyplot as plt
+from mpl_toolkits.mplot3d import Axes3D
+import sys
+import random
+import os
+import json
+import scipy.io
+from random import randint
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.neighbors import DistanceMetric
+from sklearn.cluster import KMeans
+from sklearn.decomposition import PCA
+from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler
+from sklearn.metrics import confusion_matrix
+from sklearn.metrics import accuracy_score
+import argparse
+import numpy as np
+from numpy import genfromtxt
+from numpy import linalg as LA
+from timeit import default_timer as timer
+from scipy.spatial.distance import cdist
+from rerank import re_ranking
+
+def create_kmean_clusters(feature_vectors, labels, gallery_idx, camId):
+ for i in range(gallery_idx.size):
+ cam = camId[gallery_idx[i]] - 1
+ gallery[cam].append(feature_vectors[gallery_idx[i]])
+ gallerylab[cam].append(labels[gallery_idx[i]])
+
+
+ train = np.array(gallery)
+ tlabel = np.array(gallerylab)
+
+ for i in range(2):
+ km_train_data[i] = KMeans(n_clusters=int(np.max(labels)),random_state=0).fit(train[i])
+ km_labels[i] = np.zeros(int(np.max(labels))) # clusters size
+ km_idx[i] = km_train_data[i].labels_
+
+ for j in range(np.max(labels)):
+ class_vote = np.zeros(np.max(labels))
+ for q in range(km_idx[i].size):
+ if km_idx[i][q]==j:
+ class_vote[int(tlabel[i][q])-1] += 1
+ km_labels[i][j] = np.argmax(class_vote) + 1
+
+ #MERGE CLUSTERS
+ cl = []
+ cllab = []
+ clustercam[0] = np.ones(km_labels_1.size)
+ clustercam[1] = np.add(np.ones(km_labels_2.size), 1)
+ for j in range(2):
+ for j in range(km_labels_1.size):
+ cl.append(km_train_data[i].cluster_centers_[j])
+ cllab.append(km_labels[i][j])
+
+ train_data = np.array(cl)
+ train_label = np.array(cllab)
+ train_cam = np.concatenate(clustercam, axis=1)
+ return train_data, train_label, train_cam
+
diff --git a/part2.py b/part2.py
index 82a8cdd..679a54d 100755
--- a/part2.py
+++ b/part2.py
@@ -44,9 +44,10 @@ parser.add_argument("-n", "--neighbors", help="Number of neighbors", type=int, d
parser.add_argument("-v", "--verbose", help="Use verbose output", action='store_true')
parser.add_argument("-s", "--showrank", help="Save ranklist pic id in a txt file", type=int, default = 0)
parser.add_argument("-2", "--graphspace", help="Graph space", action='store_true', default=0)
-parser.add_argument("-1", "--norm", help="Normalized features", action='store_true', default=0)
+parser.add_argument("-1", "--normalise", help="Normalized features", action='store_true', default=0)
parser.add_argument("-M", "--multrank", help="Run for different ranklist sizes equal to M", type=int, default=1)
parser.add_argument("-C", "--comparison", help="Set to 2 to obtain a comparison of baseline and Improved metric", type=int, default=1)
+parser.add_argument("--data", help="Data folder with features data", default='data')
args = parser.parse_args()
@@ -68,23 +69,23 @@ def draw_results(test_label, pred_label):
return acc_sc
def test_model(gallery_data, probe_data, gallery_label, probe_label, gallery_cam, probe_cam, showfiles_train, showfiles_test, args):
-
+
verbose("probe shape:", probe_data.shape)
verbose("gallery shape:", gallery_data.shape)
-
+
if args.rerank:
- distances = re_ranking(probe_data, gallery_data,
- args.reranka ,args.rerankb , 0.3,
+ distances = re_ranking(probe_data, gallery_data,
+ args.reranka ,args.rerankb , 0.3,
MemorySave = False, Minibatch = 2000)
else:
if args.mahalanobis:
# metric = 'jaccard' is also valid
distances = cdist(probe_data, gallery_data, 'sqeuclidean')
else:
- distances = cdist(probe_data, gallery_data, 'euclidean')
+ distances = cdist(probe_data, gallery_data, 'euclidean')
ranklist = np.argsort(distances, axis=1)
-
+
test_table = np.arange(1, args.multrank+1)
target_pred = np.zeros((args.multrank, ranklist.shape[0]))
nsize = args.neighbors
@@ -92,8 +93,8 @@ def test_model(gallery_data, probe_data, gallery_label, probe_label, gallery_cam
nsize = test_table[args.multrank-1]
nneighbors = np.zeros((ranklist.shape[0],nsize))
nnshowrank = (np.zeros((ranklist.shape[0],nsize))).astype(object)
-
-
+
+
for i in range(args.multrank):
if args.multrank!= 1:
args.neighbors = test_table[i]
@@ -109,19 +110,19 @@ def test_model(gallery_data, probe_data, gallery_label, probe_label, gallery_cam
nnshowrank[probe_idx][q] = showfiles_train[row[n]] #
q += 1
n += 1
-
+
if (args.neighbors) and (probe_label[probe_idx] in nneighbors[probe_idx]):
target_pred[i][probe_idx] = probe_label[probe_idx]
else:
target_pred[i][probe_idx] = nneighbors[probe_idx][0]
-
-
- if (args.showrank):
+
+
+ if (args.showrank):
with open("ranklist.txt", "w") as text_file:
text_file.write(np.array2string(nnshowrank[:args.showrank]))
with open("query.txt", "w") as text_file:
text_file.write(np.array2string(showfiles_test[:args.showrank]))
-
+
if args.graphspace:
# Colors for distinct individuals
cols = ['#{:06x}'.format(randint(0, 0xffffff)) for i in range(1467)]
@@ -134,14 +135,14 @@ def test_model(gallery_data, probe_data, gallery_label, probe_label, gallery_cam
return target_pred
def main():
- mat = scipy.io.loadmat('data/cuhk03_new_protocol_config_labeled.mat')
+ mat = scipy.io.loadmat(os.path.join(args.data,'cuhk03_new_protocol_config_labeled.mat'))
camId = mat['camId']
filelist = mat['filelist']
labels = mat['labels']
gallery_idx = mat['gallery_idx'] - 1
- query_idx = mat['query_idx'] - 1
+ query_idx = mat['query_idx'] - 1
train_idx = mat['train_idx'] - 1
- with open("data/feature_data.json", "r") as read_file:
+ with open(os.path.join(args.data,'feature_data.json'), 'r') as read_file:
feature_vectors = np.array(json.load(read_file))
if args.train:
query_idx = train_idx.reshape(train_idx.shape[0])
@@ -150,7 +151,7 @@ def main():
query_idx = query_idx.reshape(query_idx.shape[0])
gallery_idx = gallery_idx.reshape(gallery_idx.shape[0])
camId = camId.reshape(camId.shape[0])
-
+
showfiles_train = filelist[gallery_idx]
showfiles_test = filelist[query_idx]
train_data = feature_vectors[gallery_idx]
@@ -159,98 +160,36 @@ def main():
test_label = labels[query_idx]
train_cam = camId[gallery_idx]
test_cam = camId[query_idx]
-
+
accuracy = np.zeros((2, args.multrank))
test_table = np.arange(1, args.multrank+1)
- if (args.norm):
+ if (args.normalise):
train_data = np.divide(train_data,LA.norm(train_data, axis=0))
test_data = np.divide(test_data, LA.norm(test_data, axis=0))
if(args.kmean):
- gallery1 = []
- gallery2 = []
- gallery1lab = []
- gallery2lab = []
- for i in range(gallery_idx.size):
- if camId[gallery_idx[i]] == 1:
- gallery1.append(feature_vectors[gallery_idx[i]])
- gallery1lab.append(labels[gallery_idx[i]])
- else:
- gallery2.append(feature_vectors[gallery_idx[i]])
- gallery2lab.append(labels[gallery_idx[i]])
-
- train1 = np.array(gallery1)
- train2 = np.array(gallery2)
- tlabel1 = np.array(gallery1lab)
- tlabel2 = np.array(gallery2lab)
+ train_data, train_label, train_cam = create_kmean_clusters(feature_vectors,
+ labels,
+ gallery_idx,
+ camId)
+ for q in range(args.comparison):
+ target_pred = test_model(train_data, test_data, train_label, test_label, train_cam, test_cam, showfiles_train, showfiles_test, args)
+ for i in range(args.multrank):
+ accuracy[q][i] = draw_results(test_label, target_pred[i])
+ args.rerank = True
+ args.neighbors = 1
- km_train_data_1 = KMeans(n_clusters=int(np.max(labels)),random_state=0).fit(train1)
- km_train_data_2 = KMeans(n_clusters=int(np.max(labels)),random_state=0).fit(train2)
-
- ###REMAP LABELS
- km_labels_1 = np.zeros(int(np.max(labels))) # clusters size
- km_labels_2 = np.zeros(int(np.max(labels)))
- km_idx_1 = km_train_data_1.labels_
- for i in range(np.max(labels)):
- class_vote = np.zeros(np.max(labels))
- for q in range(km_idx_1.size):
- if km_idx_1[q]==i:
- class_vote[int(tlabel1[q])-1] += 1
- km_labels_1[i] = np.argmax(class_vote) + 1
-
- km_idx_2 = km_train_data_2.labels_
- for i in range(np.max(labels)):
- class_vote = np.zeros(np.max(labels))
- for q in range(km_idx_2.size):
- if km_idx_2[q]==i:
- class_vote[int(tlabel2[q])-1] += 1
- km_labels_2[i] = np.argmax(class_vote) + 1
-
- #MERGE CLUSTERS
- cl = []
- cllab = []
- clcam = []
- clustercam1 = np.ones(km_labels_1.size)
- clustercam2 = np.add(np.ones(km_labels_2.size), 1)
- for i in range(km_labels_1.size):
- cl.append(km_train_data_1.cluster_centers_[i])
- cllab.append(km_labels_1[i])
- clcam.append(clustercam1[i])
- for i in range(km_labels_2.size):
- cl.append(km_train_data_2.cluster_centers_[i])
- cllab.append(km_labels_2[i])
- clcam.append(clustercam2[i])
-
- cluster = np.array(cl)
- clusterlabel = np.array(cllab)
- clustercam = np.array(clcam)
-
- for q in range(args.comparison):
- target_pred = test_model(cluster, test_data, clusterlabel, test_label, clustercam, test_cam, showfiles_train, showfiles_test, args)
- for i in range(args.multrank):
- accuracy[q][i] = draw_results(test_label, target_pred[i])
- args.rerank = True
- args.neighbors = 1
-
- else:
- for q in range(args.comparison):
- target_pred = test_model(train_data, test_data, train_label, test_label, train_cam, test_cam, showfiles_train, showfiles_test, args)
- for i in range(args.multrank):
- accuracy[q][i] = draw_results(test_label, target_pred[i])
- args.rerank = True
- args.neighbors = 1
-
if(args.multrank != 1):
plt.plot(test_table[:(args.multrank)], 100*accuracy[0])
if(args.comparison!=1):
plt.plot(test_table[:(args.multrank)], 100*accuracy[1])
- plt.legend(['Baseline kNN', 'Improved metric'], loc='upper left')
+ plt.legend(['Baseline kNN', 'Improved metric'], loc='upper left')
plt.xlabel('k rank')
plt.ylabel('Recognition Accuracy (%)')
plt.grid(True)
plt.show()
-
+
if __name__ == "__main__":
main()
-
+