From 0a71765565b2c00f3b1c8ace9caef60b55d1d828 Mon Sep 17 00:00:00 2001 From: nunzip Date: Thu, 13 Dec 2018 16:53:43 +0000 Subject: Add standardisation --- README.md | 6 +++++- evaluate.py | 13 ++++++++++++- opt.py | 3 ++- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 2aa6aaa..e89f6db 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ ``` usage: evaluate.py [-h] [-t] [-c] [-k] [-m] [-e] [-r] [-a RERANKA] [-b RERANKB] [-l RERANKL] [-n NEIGHBORS] [-v] [-s SHOWRANK] - [-1] [-M MULTRANK] [-C] [--data DATA] [-K KMEAN] [-A] + [-1] [-2] [-M MULTRANK] [-C] [--data DATA] [-K KMEAN] [-A] [-P PCA] optional arguments: @@ -26,6 +26,7 @@ optional arguments: Save ranklist pics id in a txt file for first SHOWRANK queries -1, --normalise Normalise features + -2, --standardise Standardise features -M MULTRANK, --multrank MULTRANK Run for different ranklist sizes equal to MULTRANK -C, --comparison Compare baseline and improved metric @@ -59,6 +60,9 @@ EXAMPLES for `evaluate.py`: EXAMPLE 7: Run on the training set with euclidean distance and normalize feature vectors. Draw confusion matrix at the end. `evaluate.py -t -1 -c` + EXAMPLE 8: Run euclidean distance standardising the feature data for the first 10 values of top n and graph them. + `evaluate.py -2 -M 10` + EXAMPLE 8: Run for rerank top 10 and save the names of the images that compose the ranklist for the first 5 queries: query.txt, ranklist.txt. `evaluate.py -r -s 5 -n 10` diff --git a/evaluate.py b/evaluate.py index 9d41424..1f54b95 100755 --- a/evaluate.py +++ b/evaluate.py @@ -4,7 +4,7 @@ # # usage: evaluate.py [-h] [-t] [-c] [-k] [-m] [-e] [-r] [-a RERANKA] # [-b RERANKB] [-l RERANKL] [-n NEIGHBORS] [-v] -# [-s SHOWRANK] [-1] [-M MULTRANK] [-C] [DATA] +# [-s SHOWRANK] [-1] [-2] [-M MULTRANK] [-C] [DATA] # [-K KMEAN] [-A] [-P PCA] import matplotlib.pyplot as plt @@ -29,6 +29,7 @@ from rerank import re_ranking from kmean import create_kmean_clusters import logging from logging import debug +from sklearn.preprocessing import StandardScaler parser = argparse.ArgumentParser() parser.add_argument("-t", "--train", help="Use train data instead of query and gallery", action='store_true') @@ -44,6 +45,7 @@ parser.add_argument("-n", "--neighbors", help="Use customized ranklist size NEIG parser.add_argument("-v", "--verbose", help="Use verbose output", action='store_true') parser.add_argument("-s", "--showrank", help="Save ranklist pics id in a txt file for first SHOWRANK queries", type=int, default = 0) parser.add_argument("-1", "--normalise", help="Normalise features", action='store_true') +parser.add_argument("-2", "--standardise", help="Standardise features", action='store_true') parser.add_argument("-M", "--multrank", help="Run for different ranklist sizes equal to MULTRANK", type=int, default=1) parser.add_argument("-C", "--comparison", help="Compare baseline and improved metric", action='store_true') parser.add_argument("--data", help="Folder containing data", default='data') @@ -220,6 +222,15 @@ def main(): debug("Normalising data") train_data = np.divide(train_data,LA.norm(train_data,axis=0)) test_data = np.divide(test_data, LA.norm(test_data,axis=0)) + train_model = np.divide(train_model, LA.norm(train_model,axis=0)) + + if (args.standardise): + debug("Standardising data") + scaler = StandardScaler() + train_data=scaler.fit_transform(train_data) + test_data=scaler.fit_transform(test_data) + train_model=scaler.fit_transform(train_model) + if(args.kmean_alt): debug("Using Kmeans") train_data, train_label, train_cam = create_kmean_clusters(feature_vectors, labels, gallery_idx, camId) diff --git a/opt.py b/opt.py index 873b14d..6175aaa 100755 --- a/opt.py +++ b/opt.py @@ -4,7 +4,7 @@ # # usage: opt.py [-h] [-t] [-c] [-k] [-m] [-e] [-r] [-a RERANKA] # [-b RERANKB] [-l RERANKL] [-n NEIGHBORS] [-v] -# [-s SHOWRANK] [-1] [-M MULTRANK] [-C] [DATA] +# [-s SHOWRANK] [-1] [-2] [-M MULTRANK] [-C] [DATA] # [-K KMEAN] [-A] [-P PCA] import matplotlib.pyplot as plt @@ -47,6 +47,7 @@ parser.add_argument("-n", "--neighbors", help="Use customized ranklist size NEIG parser.add_argument("-v", "--verbose", help="Use verbose output", action='store_true') parser.add_argument("-s", "--showrank", help="Save ranklist pics id in a txt file for first SHOWRANK queries", type=int, default = 0) parser.add_argument("-1", "--normalise", help="Normalise features", action='store_true') +parser.add_argument("-2", "--standardise", help="Standardise features", action='store_true') parser.add_argument("-M", "--multrank", help="Run for different ranklist sizes equal to MULTRANK", type=int, default=1) parser.add_argument("-C", "--comparison", help="Compare baseline and improved metric", action='store_true') parser.add_argument("--data", help="Folder containing data", default='data') -- cgit v1.2.3