From 0a71765565b2c00f3b1c8ace9caef60b55d1d828 Mon Sep 17 00:00:00 2001
From: nunzip <np.scarh@gmail.com>
Date: Thu, 13 Dec 2018 16:53:43 +0000
Subject: Add standardisation

---
 README.md   |  6 +++++-
 evaluate.py | 13 ++++++++++++-
 opt.py      |  3 ++-
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 2aa6aaa..e89f6db 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 ```
 usage: evaluate.py [-h] [-t] [-c] [-k] [-m] [-e] [-r] [-a RERANKA]
                    [-b RERANKB] [-l RERANKL] [-n NEIGHBORS] [-v] [-s SHOWRANK]
-                   [-1] [-M MULTRANK] [-C] [--data DATA] [-K KMEAN] [-A]
+                   [-1] [-2] [-M MULTRANK] [-C] [--data DATA] [-K KMEAN] [-A]
                    [-P PCA]
 
 optional arguments:
@@ -26,6 +26,7 @@ optional arguments:
                         Save ranklist pics id in a txt file for first SHOWRANK
                         queries
   -1, --normalise       Normalise features
+  -2, --standardise	Standardise features
   -M MULTRANK, --multrank MULTRANK
                         Run for different ranklist sizes equal to MULTRANK
   -C, --comparison      Compare baseline and improved metric
@@ -59,6 +60,9 @@ EXAMPLES for `evaluate.py`:
 	EXAMPLE 7: Run on the training set with euclidean distance and normalize feature vectors. Draw confusion matrix at the end.
 		`evaluate.py -t -1 -c`
 
+	EXAMPLE 8: Run euclidean distance standardising the feature data for the first 10 values of top n and graph them.
+		`evaluate.py -2 -M 10`
+
 	EXAMPLE 8: Run for rerank top 10 and save the names of the images that compose the ranklist for the first 5 queries: query.txt, ranklist.txt.
 		`evaluate.py -r -s 5 -n 10`
 
diff --git a/evaluate.py b/evaluate.py
index 9d41424..1f54b95 100755
--- a/evaluate.py
+++ b/evaluate.py
@@ -4,7 +4,7 @@
 #
 # usage: evaluate.py [-h] [-t] [-c] [-k] [-m] [-e] [-r] [-a RERANKA]
 #               [-b RERANKB] [-l RERANKL] [-n NEIGHBORS] [-v]
-#               [-s SHOWRANK] [-1] [-M MULTRANK] [-C] [DATA]
+#               [-s SHOWRANK] [-1] [-2] [-M MULTRANK] [-C] [DATA]
 #               [-K KMEAN] [-A] [-P PCA]
 
 import matplotlib.pyplot as plt
@@ -29,6 +29,7 @@ from rerank import re_ranking
 from kmean import create_kmean_clusters
 import logging
 from logging import debug
+from sklearn.preprocessing import StandardScaler
 
 parser = argparse.ArgumentParser()
 parser.add_argument("-t", "--train", help="Use train data instead of query and gallery", action='store_true')
@@ -44,6 +45,7 @@ parser.add_argument("-n", "--neighbors", help="Use customized ranklist size NEIG
 parser.add_argument("-v", "--verbose", help="Use verbose output", action='store_true')
 parser.add_argument("-s", "--showrank", help="Save ranklist pics id in a txt file for first SHOWRANK queries", type=int, default = 0)
 parser.add_argument("-1", "--normalise", help="Normalise features", action='store_true')
+parser.add_argument("-2", "--standardise", help="Standardise features", action='store_true')
 parser.add_argument("-M", "--multrank", help="Run for different ranklist sizes equal to MULTRANK", type=int, default=1)
 parser.add_argument("-C", "--comparison", help="Compare baseline and improved metric", action='store_true')
 parser.add_argument("--data", help="Folder containing data", default='data')
@@ -220,6 +222,15 @@ def main():
         debug("Normalising data")
         train_data = np.divide(train_data,LA.norm(train_data,axis=0))
         test_data = np.divide(test_data, LA.norm(test_data,axis=0))
+        train_model = np.divide(train_model, LA.norm(train_model,axis=0))
+
+    if (args.standardise):
+        debug("Standardising data")
+        scaler = StandardScaler()
+        train_data=scaler.fit_transform(train_data)
+        test_data=scaler.fit_transform(test_data)
+        train_model=scaler.fit_transform(train_model)
+
     if(args.kmean_alt):
         debug("Using Kmeans")
         train_data, train_label, train_cam = create_kmean_clusters(feature_vectors, labels, gallery_idx, camId)
diff --git a/opt.py b/opt.py
index 873b14d..6175aaa 100755
--- a/opt.py
+++ b/opt.py
@@ -4,7 +4,7 @@
 #
 # usage: opt.py [-h] [-t] [-c] [-k] [-m] [-e] [-r] [-a RERANKA]
 #               [-b RERANKB] [-l RERANKL] [-n NEIGHBORS] [-v]
-#               [-s SHOWRANK] [-1] [-M MULTRANK] [-C] [DATA]
+#               [-s SHOWRANK] [-1] [-2] [-M MULTRANK] [-C] [DATA]
 #               [-K KMEAN] [-A] [-P PCA]
 
 import matplotlib.pyplot as plt
@@ -47,6 +47,7 @@ parser.add_argument("-n", "--neighbors", help="Use customized ranklist size NEIG
 parser.add_argument("-v", "--verbose", help="Use verbose output", action='store_true')
 parser.add_argument("-s", "--showrank", help="Save ranklist pics id in a txt file for first SHOWRANK queries", type=int, default = 0)
 parser.add_argument("-1", "--normalise", help="Normalise features", action='store_true')
+parser.add_argument("-2", "--standardise", help="Standardise features", action='store_true')
 parser.add_argument("-M", "--multrank", help="Run for different ranklist sizes equal to MULTRANK", type=int, default=1)
 parser.add_argument("-C", "--comparison", help="Compare baseline and improved metric", action='store_true')
 parser.add_argument("--data", help="Folder containing data", default='data')
-- 
cgit v1.2.3-70-g09d2