diff options
| -rwxr-xr-x | part2.py | 282 | ||||
| -rw-r--r-- | rerank.py | 82 | 
2 files changed, 158 insertions, 206 deletions
@@ -1,11 +1,9 @@ -#!/usr/bin/env python +#!/usr/bin/python -W ignore::DeprecationWarning  # Author: Vasil Zlatanov, Nunzio Pucci  # EE4 Pattern Recognition coursework  # -# usage: train.py [-h] -i DATA [-m EIGEN] [-M REIGEN] [-e ENSEMBLE] [-b] -#                 [-R RANDOM] [-n NEIGHBORS] [-f FACES] [-c] [-s SEED] -#                 [-t SPLIT] [-2] [-p] [-l] [-r RECONSTRUCT] [-cm] [-q] [-pr] -#                 [-alt] +# usage: part2.py [-h] [-t] [-cm] [-km] [-ma] [-e] [-r] [-ka RERANKA] +#                [-kb RERANKB] [-v]  import matplotlib.pyplot as plt  from mpl_toolkits.mplot3d import Axes3D @@ -30,90 +28,28 @@ from numpy import genfromtxt  from numpy import linalg as LA  from timeit import default_timer as timer  from scipy.spatial.distance import cdist +from rerank import re_ranking -#prob query, gal train -def re_ranking(probFea,galFea,k1,k2,lambda_value, MemorySave = False, Minibatch = 2000): - -    query_num = probFea.shape[0] -    all_num = query_num + galFea.shape[0]     -    feat = np.append(probFea,galFea,axis = 0) -    feat = feat.astype(np.float16) -    print('computing original distance') -    if MemorySave: -        original_dist = np.zeros(shape = [all_num,all_num],dtype = np.float16) -        i = 0 -        while True: -            it = i + Minibatch -            if it < np.shape(feat)[0]: -                original_dist[i:it,] = np.power(cdist(feat[i:it,],feat),2).astype(np.float16) -            else: -                original_dist[i:,:] = np.power(cdist(feat[i:,],feat),2).astype(np.float16) -                break -            i = it -    else: -        original_dist = cdist(feat,feat).astype(np.float16)   -        original_dist = np.power(original_dist,2).astype(np.float16) -    del feat     -    gallery_num = original_dist.shape[0] -    original_dist = np.transpose(original_dist/np.max(original_dist,axis = 0)) -    V = np.zeros_like(original_dist).astype(np.float16) -    initial_rank = np.argsort(original_dist).astype(np.int32) - -     -    print('starting re_ranking') -    for i in range(all_num): -        # k-reciprocal neighbors -        forward_k_neigh_index = initial_rank[i,:k1+1] -        backward_k_neigh_index = initial_rank[forward_k_neigh_index,:k1+1] -        fi = np.where(backward_k_neigh_index==i)[0] -        k_reciprocal_index = forward_k_neigh_index[fi] -        k_reciprocal_expansion_index = k_reciprocal_index -        for j in range(len(k_reciprocal_index)): -            candidate = k_reciprocal_index[j] -            candidate_forward_k_neigh_index = initial_rank[candidate,:int(np.around(k1/2))+1] -            candidate_backward_k_neigh_index = initial_rank[candidate_forward_k_neigh_index,:int(np.around(k1/2))+1] -            fi_candidate = np.where(candidate_backward_k_neigh_index == candidate)[0] -            candidate_k_reciprocal_index = candidate_forward_k_neigh_index[fi_candidate] -            if len(np.intersect1d(candidate_k_reciprocal_index,k_reciprocal_index))> 2/3*len(candidate_k_reciprocal_index): -                k_reciprocal_expansion_index = np.append(k_reciprocal_expansion_index,candidate_k_reciprocal_index) -             -        k_reciprocal_expansion_index = np.unique(k_reciprocal_expansion_index) -        weight = np.exp(-original_dist[i,k_reciprocal_expansion_index]) -        V[i,k_reciprocal_expansion_index] = weight/np.sum(weight) -    original_dist = original_dist[:query_num,]     -    if k2 != 1: -        V_qe = np.zeros_like(V,dtype=np.float16) -        for i in range(all_num): -            V_qe[i,:] = np.mean(V[initial_rank[i,:k2],:],axis=0) -        V = V_qe -        del V_qe -    del initial_rank -    invIndex = [] -    for i in range(gallery_num): -        invIndex.append(np.where(V[:,i] != 0)[0]) -     -    jaccard_dist = np.zeros_like(original_dist,dtype = np.float16) +parser = argparse.ArgumentParser() +parser.add_argument("-t", "--test", help="Use test data instead of query", action='store_true') +parser.add_argument("-cm", "--conf_mat", help="Show visual confusion matrix", action='store_true') +parser.add_argument("-km", "--kmean", help="Perform Kmeans", action='store_true', default=0) +parser.add_argument("-ma", "--mahalanobis", help="Perform Mahalanobis Distance metric", action='store_true', default=0) +parser.add_argument("-e", "--euclidean", help="Standard euclidean", action='store_true', default=0) +parser.add_argument("-r", "--rerank", help="Use k-reciprocal rernaking", action='store_true') +parser.add_argument("-ka", "--reranka", help="Parameter 1 for Rerank", type=int, default = 20) +parser.add_argument("-kb", "--rerankb", help="Parameter 2 for rerank", type=int, default = 6) +parser.add_argument("-v", "--verbose", help="Use verbose output", action='store_true') +args = parser.parse_args() -    for i in range(query_num): -        temp_min = np.zeros(shape=[1,gallery_num],dtype=np.float16) -        indNonZero = np.where(V[i,:] != 0)[0] -        indImages = [] -        indImages = [invIndex[ind] for ind in indNonZero] -        for j in range(len(indNonZero)): -            temp_min[0,indImages[j]] = temp_min[0,indImages[j]]+ np.minimum(V[i,indNonZero[j]],V[indImages[j],indNonZero[j]]) -        jaccard_dist[i] = 1-temp_min/(2-temp_min) -     -    final_dist = jaccard_dist*(1-lambda_value) + original_dist*lambda_value -    del original_dist -    del V -    del jaccard_dist -    final_dist = final_dist[:query_num,query_num:] -         -    return final_dist +def verbose(*text): +    if args.verbose: +        print(text) -def draw_results(args, target_test, target_pred): -    acc_sc = accuracy_score(target_test, target_pred) -    cm = confusion_matrix(target_test, target_pred) +#prob query, gal train +def draw_results(test_label, pred_label): +    acc_sc = accuracy_score(test_label, pred_label) +    cm = confusion_matrix(test_label, pred_label)      print('Accuracy: ', acc_sc)      if (args.conf_mat):          plt.matshow(cm, cmap='Blues') @@ -123,121 +59,62 @@ def draw_results(args, target_test, target_pred):          plt.show()      return -def test_model(train_data, test_data, target_train, target_test, args): -    classifier = KNeighborsClassifier(n_neighbors=args.neighbors, metric='euclidean') -#    else: -#        S = LA.inv(np.cov(train_data, rowvar=False)) -#        print(S.shape) -#        classifier = KNeighborsClassifier(n_neighbors=args.neighbors, metric='mahalanobis', metric_params={'VI':S}) -    classifier.fit(train_data, target_train) -    target_pred = classifier.predict(test_data) -    dist, nn_idx = classifier.kneighbors(test_data) -    #USE NN_IDX TO RECOVER NEIGHBORS +def test_model(gallery_data, probe_data, gallery_label, probe_label, gallery_cam, probe_cam): +    # metric = 'jaccard' is also valid +    if args.mahalanobis: +        metric = 'sqeuclidean' +    else: +        metric = 'euclidean' + +    verbose("probe shape:", probe_data.shape) +    verbose("gallery shape:", gallery_data.shape) + +    if args.rerank: +        distances = re_ranking(probe_data, gallery_data,  +                               args.reranka ,args.rerankb , 0.3,  +                               MemorySave = False, Minibatch = 2000) +    else: +        distances = cdist(probe_data, gallery_data, metric)  + +    ranklist = np.argsort(distances, axis=1) + +    target_pred = np.zeros(ranklist.shape[0]) +    for probe_idx in range(probe_data.shape[0]): +        row = ranklist[probe_idx] +        n = 0  +        while (probe_cam[probe_idx] == gallery_cam[row[n]] and +          probe_label[probe_idx] == gallery_label[row[n]]): +            n += 1 +        target_pred[probe_idx] = gallery_label[row[n]] +      return target_pred  def main(): -    parser = argparse.ArgumentParser() -    parser.add_argument("-R", "--random", help="Number of eigen value to randomise", type=int) -    parser.add_argument("-n", "--neighbors", help="How many neighbors to use", type=int, default = 1) -    parser.add_argument("-c", "--principal", help="Show principal components", action='store_true') -    parser.add_argument("-s", "--seed", help="Seed to use", type=int, default=0) -    parser.add_argument("-t", "--split", help="Fractoin of data to use for testing", type=float, default=0.3) -    parser.add_argument("-2", "--grapheigen", help="Swow 2D graph of targets versus principal components", -            action='store_true') -    parser.add_argument("-cm", "--conf_mat", help="Show visual confusion matrix", action='store_true') -    parser.add_argument("-q", "--pca_r", help="Use Reduced PCA", action='store_true') -    parser.add_argument("-pr", "--prob", help="Certainty on each guess", action='store_true') -    parser.add_argument("-km", "--kmean", help="Perform Kmeans", action='store_true', default=0) -    parser.add_argument("-ma", "--mala", help="Perform Mahalanobis Distance metric", action='store_true', default=0) -    parser.add_argument("-e", "--eucl", help="Standard euclidean", action='store_true', default=0) -    parser.add_argument("-ka", "--reranka", help="Parameter 1 for Rerank", type=int, default = 20) -    parser.add_argument("-kb", "--rerankb", help="Parameter 2 for rerank", type=int, default = 6) -    args = parser.parse_args() - -    ###PART2 INPUT DATA      mat = scipy.io.loadmat('data/cuhk03_new_protocol_config_labeled.mat')      camId = mat['camId']      filelist = mat['filelist'] -    gallery_idx = mat['gallery_idx']      labels = mat['labels'] -    query_idx = mat['query_idx'] -    train_idx = mat['train_idx'] +    gallery_idx = mat['gallery_idx'] - 1 +    query_idx = mat['query_idx'] - 1  +    train_idx = mat['train_idx'] - 1      with open("data/feature_data.json", "r") as read_file:          feature_vectors = np.array(json.load(read_file)) + +    gallery_idx = gallery_idx.reshape(gallery_idx.shape[0]) +    if args.test: +        query_idx = train_idx.reshape(train_idx.shape[0]) +    else: +        query_idx = query_idx.reshape(query_idx.shape[0]) +    camId = camId.reshape(camId.shape[0]) + +    train_data = feature_vectors[gallery_idx] +    test_data = feature_vectors[query_idx] +    train_label = labels[gallery_idx] +    test_label = labels[query_idx] +    train_cam = camId[gallery_idx] +    test_cam = camId[query_idx] -    query_cam_1 = 0 -    for i in range(query_idx.size): -        if camId[query_idx[i]] == 1: -            query_cam_1 = query_cam_1 + 1 -    query_cam_2 = query_idx.size - query_cam_1 -     -    train_cam_1 = 0 -    for i in range(gallery_idx.size): -        if camId[gallery_idx[i]] == 1: -            train_cam_1 = train_cam_1 + 1 -    train_cam_2 = gallery_idx.size - train_cam_1 -     -    train_data_1 = np.zeros(((train_cam_1),(feature_vectors.shape[1]))) -    train_label_1 = np.zeros(train_cam_1) -    test_data_1 = np.zeros(((query_cam_1),(feature_vectors.shape[1]))) -    test_label_1 = np.zeros(query_cam_1) -     -    train_data_2 = np.zeros(((train_cam_2),(feature_vectors.shape[1]))) -    train_label_2 = np.zeros(train_cam_2) -    test_data_2 = np.zeros(((query_cam_2),(feature_vectors.shape[1]))) -    test_label_2 = np.zeros(query_cam_2) -     -    i_1 = 0 -    i_2 = 0 -    for i in range(gallery_idx.size): -        if camId[gallery_idx[i]] == 1: -            train_data_1[i_1] = feature_vectors[gallery_idx[i]] -            i_1 = i_1 + 1 -        else: -            train_data_2[i_2] = feature_vectors[gallery_idx[i]] -            i_2 = i_2 + 1 -    i_1 = 0 -    i_2 = 0            -    for i in range(query_idx.size): -        if camId[query_idx[i]] == 1: -            test_data_1[i_1] = feature_vectors[query_idx[i]] -            i_1 = i_1 + 1 -        else: -            test_data_2[i_2] = feature_vectors[query_idx[i]] -            i_2 = i_2 + 1 -    i_1 = 0 -    i_2 = 0               -    for i in range(gallery_idx.size): -        if camId[gallery_idx[i]] == 1: -            train_label_1[i_1] = labels[gallery_idx[i]] -            i_1 = i_1 + 1 -        else: -            train_label_2[i_2] = labels[gallery_idx[i]] -            i_2 = i_2 + 1 -    i_1 = 0 -    i_2 = 0             -    for i in range(query_idx.size): -        if camId[query_idx[i]] == 1: -            test_label_1[i_1] = labels[query_idx[i]] -            i_1 = i_1 + 1 -        else: -            test_label_2[i_2] = labels[query_idx[i]] -            i_2 = i_2 + 1 -     -    if (args.mala): -        final_dist = re_ranking(test_data_1, train_data_2, args.reranka, args.rerankb, 0.3) -        target_pred = np.zeros(final_dist.shape[0]) -        for i in range(test_label_1.size): -            target_pred[i] = train_label_2[np.argmin(final_dist[i])] -        draw_results(args, test_label_1, target_pred) -     -        final_dist2 = re_ranking(test_data_2, train_data_1, args.reranka, args.rerankb, 0.3) -        target_pred2 = np.zeros(final_dist2.shape[0]) -        for i in range(test_label_2.size): -            target_pred2[i] = train_label_1[np.argmin(final_dist2[i])] -        draw_results(args, test_label_2, target_pred2) -         -    elif(args.kmean): +    if(args.kmean):          km_labels_1 = np.arange(1,np.max(labels)+1)          km_labels_2 = np.arange(1,np.max(labels)+1)          km_train_data_1 = np.zeros(((km_labels_1.size),(feature_vectors.shape[1]))) @@ -253,8 +130,8 @@ def main():                      class_vote[int(train_label_1[q])-1] = class_vote[int(train_label_1[q])-1] + 1              km_labels_1[i] = np.argmax(class_vote) + 1 -        target_pred = test_model(km_train_data_1.cluster_centers_, test_data_2, km_labels_1, test_label_2, args)     -        draw_results(args, test_label_2, target_pred) +        target_pred = test_model(km_train_data_1.cluster_centers_, test_data_2, km_labels_1, test_label_2)     +        draw_results(test_label_2, target_pred)          km_idx_2 = km_train_data_2.labels_           for i in range(np.max(labels)): @@ -264,20 +141,13 @@ def main():                      class_vote[int(train_label_2[q])-1] = class_vote[int(train_label_2[q])-1] + 1              km_labels_2[i] = np.argmax(class_vote) + 1 -        target_pred = test_model(km_train_data_2.cluster_centers_, test_data_1, km_labels_2, test_label_1, args)     -        draw_results(args, test_label_1, target_pred) +        target_pred = test_model(km_train_data_2.cluster_centers_, test_data_1, km_labels_2, test_label_1)     +        draw_results(test_label_1, target_pred) -    elif(args.eucl):     -        target_pred = test_model(train_data_2, test_data_1, train_label_2, test_label_1, args)     -        draw_results(args, test_label_1, target_pred) -        target_pred = test_model(train_data_1, test_data_2, train_label_1, test_label_2, args)     -        draw_results(args, test_label_2, target_pred) -         -     -    print('N-Query from cam 1:', test_data_1.shape) -    print('N-Query from cam 2:', test_data_2.shape) -    print('Complete') +    else:     +        target_pred = test_model(train_data, test_data, train_label, test_label, train_cam, test_cam) +        draw_results(test_label, target_pred)  if __name__ == "__main__":      main() -    
\ No newline at end of file +     diff --git a/rerank.py b/rerank.py new file mode 100644 index 0000000..6b20f53 --- /dev/null +++ b/rerank.py @@ -0,0 +1,82 @@ +from scipy.spatial.distance import cdist +import numpy as np + +def re_ranking(probFea,galFea,k1,k2,lambda_value, MemorySave = False, Minibatch = 2000): + +    query_num = probFea.shape[0] +    all_num = query_num + galFea.shape[0]     +    feat = np.append(probFea,galFea,axis = 0) +    feat = feat.astype(np.float16) +    print('computing original distance') +    if MemorySave: +        original_dist = np.zeros(shape = [all_num,all_num],dtype = np.float16) +        i = 0 +        while True: +            it = i + Minibatch +            if it < np.shape(feat)[0]: +                original_dist[i:it,] = np.power(cdist(feat[i:it,],feat),2).astype(np.float16) +            else: +                original_dist[i:,:] = np.power(cdist(feat[i:,],feat),2).astype(np.float16) +                break +            i = it +    else: +        original_dist = cdist(feat,feat).astype(np.float16)   +        original_dist = np.power(original_dist,2).astype(np.float16) +    del feat     +    gallery_num = original_dist.shape[0] +    original_dist = np.transpose(original_dist/np.max(original_dist,axis = 0)) +    V = np.zeros_like(original_dist).astype(np.float16) +    initial_rank = np.argsort(original_dist).astype(np.int32) + +     +    print('starting re_ranking') +    for i in range(all_num): +        # k-reciprocal neighbors +        forward_k_neigh_index = initial_rank[i,:k1+1] +        backward_k_neigh_index = initial_rank[forward_k_neigh_index,:k1+1] +        fi = np.where(backward_k_neigh_index==i)[0] +        k_reciprocal_index = forward_k_neigh_index[fi] +        k_reciprocal_expansion_index = k_reciprocal_index +        for j in range(len(k_reciprocal_index)): +            candidate = k_reciprocal_index[j] +            candidate_forward_k_neigh_index = initial_rank[candidate,:int(np.around(k1/2))+1] +            candidate_backward_k_neigh_index = initial_rank[candidate_forward_k_neigh_index,:int(np.around(k1/2))+1] +            fi_candidate = np.where(candidate_backward_k_neigh_index == candidate)[0] +            candidate_k_reciprocal_index = candidate_forward_k_neigh_index[fi_candidate] +            if len(np.intersect1d(candidate_k_reciprocal_index,k_reciprocal_index))> 2/3*len(candidate_k_reciprocal_index): +                k_reciprocal_expansion_index = np.append(k_reciprocal_expansion_index,candidate_k_reciprocal_index) +             +        k_reciprocal_expansion_index = np.unique(k_reciprocal_expansion_index) +        weight = np.exp(-original_dist[i,k_reciprocal_expansion_index]) +        V[i,k_reciprocal_expansion_index] = weight/np.sum(weight) +    original_dist = original_dist[:query_num,]     +    if k2 != 1: +        V_qe = np.zeros_like(V,dtype=np.float16) +        for i in range(all_num): +            V_qe[i,:] = np.mean(V[initial_rank[i,:k2],:],axis=0) +        V = V_qe +        del V_qe +    del initial_rank +    invIndex = [] +    for i in range(gallery_num): +        invIndex.append(np.where(V[:,i] != 0)[0]) +     +    jaccard_dist = np.zeros_like(original_dist,dtype = np.float16) + +    for i in range(query_num): +        temp_min = np.zeros(shape=[1,gallery_num],dtype=np.float16) +        indNonZero = np.where(V[i,:] != 0)[0] +        indImages = [] +        indImages = [invIndex[ind] for ind in indNonZero] +        for j in range(len(indNonZero)): +            temp_min[0,indImages[j]] = temp_min[0,indImages[j]]+ np.minimum(V[i,indNonZero[j]],V[indImages[j],indNonZero[j]]) +        jaccard_dist[i] = 1-temp_min/(2-temp_min) +     +    final_dist = jaccard_dist*(1-lambda_value) + original_dist*lambda_value +    del original_dist +    del V +    del jaccard_dist +    final_dist = final_dist[:query_num,query_num:] +         +    return final_dist +  | 
