From 18a589bd030ce10133b512bccf716783984abe01 Mon Sep 17 00:00:00 2001 From: nunzip Date: Fri, 7 Dec 2018 19:05:37 +0000 Subject: Add second ranklist --- report2/fig/eucranklist.png | Bin 0 -> 3880590 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 report2/fig/eucranklist.png diff --git a/report2/fig/eucranklist.png b/report2/fig/eucranklist.png new file mode 100644 index 0000000..9daaa9c Binary files /dev/null and b/report2/fig/eucranklist.png differ -- cgit v1.2.3-54-g00ecf From 939e4e1333f1740ede3af8de639a69eb47a2b817 Mon Sep 17 00:00:00 2001 From: nunzip Date: Fri, 7 Dec 2018 21:10:20 +0000 Subject: Add introduction --- report2/paper.md | 51 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 48 insertions(+), 3 deletions(-) diff --git a/report2/paper.md b/report2/paper.md index f70f77a..14f2f98 100755 --- a/report2/paper.md +++ b/report2/paper.md @@ -1,12 +1,57 @@ # Summary +In this report we analysed how distance metrics learning affects classification +accuracy for the dataset CUHK03. The baseline method used for classification is +Nearest Neighbors based on Euclidean distance. The improved approach we propose +mixes Jaccardian and Mahalanobis metrics to obtain a ranklist that takes into +account also the reciprocal neighbors. This approach is computationally more +complex, since the matrices representing distances are effectively calculated +twice. However it is possible to observe a significant accuracy improvement of +around 10% for the $@rank1$ case. Accuracy improves overall, especially for +$@rankn$ cases with low n. + +# Formulation of the Addresssed Machine Learning Problem + +## CUHK03 + +The dataset CUHK03 contains 14096 pictures of people captured from two +different cameras. The feature vectors used come from passing the +rescaled images through ResNet50. Each feature vector contains 2048 +features that we use for classification. The pictures represent 1467 different +people and each of them appears between 9 and 10 times. The separation of +train_idx, query_idx and gallery_idx allows to perform taining and validation +on a training set (train_idx, adequately split between test, train and +validation keeping the same number of identities). This prevents overfitting +the algorithm to the specific data associated with query_idx and gallery_idx. + +## Probelm to solve + +The problem to solve is to create a ranklist for each image of the query set +by finding the nearest neighbor(s) within a gallery set. However gallery images +with the same label and taken from the same camera as the query image should +not be considered when forming the ranklist. + +## Nearest Neighbor ranklist + +Nearest Neighbor aims to find the gallery image whose feature are the closest to +the ones of a query image, predicting the class of the query image as the same +of its nearest neighbor(s). The distance between images can be calculated through +different distance metrics, however one of the most commonly used is euclidean +distance, represented as $d=\sqrt{\sum (x-y)^{2}}$. + +EXPLAIN KNN BRIEFLY -# Baseline Formulation # Baseline Evaluation -# Formulation of Suggested Improvement +\begin{figure} +\begin{center} +\includegraphics[width=17em]{fig/baseline.pdf} +\caption{Recognition accuracy of baseline Nearest Neighbor @rank k} +\label{fig:baselineacc} +\end{center} +\end{figure} -# Suggested Improvement Evaluation +# Suggested Improvement # Conclusion -- cgit v1.2.3-54-g00ecf From dd475ccd3dde8949ca168f09ce048865fe0f5b40 Mon Sep 17 00:00:00 2001 From: nunzip Date: Fri, 7 Dec 2018 21:10:41 +0000 Subject: Add baseline topn graph --- report2/fig/baseline.pdf | Bin 0 -> 11539 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 report2/fig/baseline.pdf diff --git a/report2/fig/baseline.pdf b/report2/fig/baseline.pdf new file mode 100644 index 0000000..e6a4794 Binary files /dev/null and b/report2/fig/baseline.pdf differ -- cgit v1.2.3-54-g00ecf From f6ee52c3f7f424ab8e7ce972281f0f6199b8262d Mon Sep 17 00:00:00 2001 From: Vasil Zlatanov Date: Sat, 8 Dec 2018 17:31:55 +0000 Subject: Fix mahalanobis --- evaluate.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/evaluate.py b/evaluate.py index 6561b81..e8468a2 100755 --- a/evaluate.py +++ b/evaluate.py @@ -84,7 +84,8 @@ def test_model(gallery_data, probe_data, gallery_label, probe_label, gallery_cam else: if args.mahalanobis: # metric = 'jaccard' is also valid - distances = cdist(probe_data, gallery_data, 'jaccard') + cov_inv = np.linalg.inv(np.cov(gallery_data.T)) + distances = cdist(probe_data, gallery_data, 'mahalanobis', VI=cov_inv) else: distances = cdist(probe_data, gallery_data, 'euclidean') -- cgit v1.2.3-54-g00ecf From aaee9b10aab7c78c7a02d5caec4ed37b85dff1bd Mon Sep 17 00:00:00 2001 From: Vasil Zlatanov Date: Sat, 8 Dec 2018 17:37:25 +0000 Subject: Use correct transpose for mahalanobis --- evaluate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evaluate.py b/evaluate.py index e8468a2..38b20bb 100755 --- a/evaluate.py +++ b/evaluate.py @@ -84,7 +84,7 @@ def test_model(gallery_data, probe_data, gallery_label, probe_label, gallery_cam else: if args.mahalanobis: # metric = 'jaccard' is also valid - cov_inv = np.linalg.inv(np.cov(gallery_data.T)) + cov_inv = np.linalg.inv(np.cov(gallery_data.T)).T distances = cdist(probe_data, gallery_data, 'mahalanobis', VI=cov_inv) else: distances = cdist(probe_data, gallery_data, 'euclidean') -- cgit v1.2.3-54-g00ecf