From 6ecc84b2081e2ec9da71e774a273cb668baa7140 Mon Sep 17 00:00:00 2001
From: nunzip <np.scarh@gmail.com>
Date: Fri, 16 Nov 2018 17:55:17 +0000
Subject: Remove useless figure1. Correct small mistakes

---
 report/paper.md | 28 ++++++++++------------------
 1 file changed, 10 insertions(+), 18 deletions(-)

(limited to 'report')

diff --git a/report/paper.md b/report/paper.md
index 6291273..8dc0421 100755
--- a/report/paper.md
+++ b/report/paper.md
@@ -12,15 +12,8 @@ accuracy is obtained when using a 90% of the data for
 training. Despite such results we will be using 70% of the data
 for training as a standard. This will allow to give more than one
 example of success and failure for each class when classifying the 
-test_data.
-
-\begin{figure}
-\begin{center}
-\includegraphics[width=20em]{fig/partition.pdf}
-\label{accuracy}
-\caption{NN Recognition Accuracies for different data partitions}
-\end{center}
-\end{figure}
+test_data. Moreover using 90% training data would make the results
+obtained heavilly dependent on the seed chosen.
 
 After partitioning the data into training and testing sets,
 PCA is applied. The covariance matrix, S, of dimension
@@ -118,7 +111,7 @@ From here it follows that AA\textsuperscript{T} and A\textsuperscript{T}A have t
 
 It can be noticed that we effectively don't lose any data calculating the eigenvectors
 for PCA with the second method. The main advantages of it are in terms of speed,
-(since the two methods require on average respectively 3.4s and 0.14s), and complexity of computation
+(since the two methods require on average respectively 3.4s and 0.11s), and complexity of computation
 (since the eigenvectors found with the first method are extracted from a significantly 
 bigger matrix).
 
@@ -131,7 +124,7 @@ the covariance matrix, whereas method 2 requires an additional projection step.
 
 Using the computational method for fast PCA, face reconstruction is then performed.
 The quality of reconstruction will depend on the amount of eigenvectors picked.
-The results of varying M can be observed in the picture in fig.\ref{face160rec}. Two faces from classes 
+The results of varying M can be observed in fig.\ref{face160rec}. Two faces from classes 
 number 21 and 2 respectively, are reconstructed as shown in fig.\ref{face10rec} with respective M values 
 of M=10, M=100, M=200, M=300. The last picture is the original face.
 
@@ -198,7 +191,7 @@ classification.
 It is possible to use a NN classification that takes into account majority voting.
 With this method recognition is based on the K closest neighbors of the projected
 test image. Such method anyways showed the best recognition accuracies for PCA with
-K=1, as it can be observed from the figure \ref{k-diff}.
+K=1, as it can be observed from figure \ref{k-diff}.
 
 \begin{figure}
 \begin{center}
@@ -237,7 +230,6 @@ can be observed in figure \ref{cm-alt}.
 \end{figure}
 
 Similarly to the NN case, we present two cases, respectively failure and success.
-The pictures on the right show the reconstructed images.
 
 \begin{figure}
 \begin{center}
@@ -281,7 +273,7 @@ $$ S\textsubscript{W} = \sum\limits_{c}\sum\limits_{i\in c}(x\textsubscript{i} -
 
 To maximize J(W) we differentiate with respect to W and equate to zero:
 
-$$ \frac{d}{dW}J(W) = \frac{d}{dW}(\frac{W\textsuperscript{T}S\textsubscript{B}W}{W\textsuperscript{T}S\textsubscript{W}W}) = 0 $$
+$$ \frac{d}{dW}J(W) = \frac{d}{dW}\left(\frac{W\textsuperscript{T}S\textsubscript{B}W}{W\textsuperscript{T}S\textsubscript{W}W}\right) = 0 $$
 $$ (W\textsuperscript{T}S\textsubscript{W}W)\frac{d(W\textsuperscript{T}S\textsubscript{B}W)}{dW} -  (W\textsuperscript{T}S\textsubscript{B}W)\frac{d(W\textsuperscript{T}S\textsubscript{W}W)}{dW} = 0 $$
 $$ (W\textsuperscript{T}S\textsubscript{W}W)2S\textsubscript{B}W - (W\textsuperscript{T}S\textsubscript{B}W)2S\textsubscript{W}W = 0 $$ 
 $$ S\textsubscript{B}W - JS\textsubscript{W}W = 0 $$
@@ -291,15 +283,15 @@ $$ S\textsubscript{W}\textsuperscript{-1}S\textsubscript{B}W - JW = 0 $$
 
 From here it follows:
 
-$$ W\textsubscript{opt} = arg\underset{W}max|\frac{W\textsuperscript{T}S\textsubscript{B}W}{W\textsuperscript{T}S\textsubscript{W}W}| = S\textsubscript{W}\textsuperscript{-1}(\mu\textsubscript{1} - \mu\textsubscript{2}) $$
+$$ W\textsubscript{opt} = arg\underset{W}max\frac{|W\textsuperscript{T}S\textsubscript{B}W|}{|W\textsuperscript{T}S\textsubscript{W}W|} = S\textsubscript{W}\textsuperscript{-1}(\mu\textsubscript{1} - \mu\textsubscript{2}) $$
 
 However S\textsubscript{W} is often singular since the rank of S\textsubscript{W}
 is at most N-c and usually N is smaller than D.
 
 In such case it is possible to use Fisherfaces. The optimal solution to such
-problem lays in W\textsuperscript{T}\textsubscript{opt} =  W\textsuperscript{T}\textsubscript{lda}W\textsuperscript{T}\textsubscript{pca}
+problem lays in W\textsuperscript{T}\textsubscript{opt} =  W\textsuperscript{T}\textsubscript{lda}W\textsuperscript{T}\textsubscript{pca}, 
 
-Where W\textsubscript{pca} is chosen to maximize the determinant of the total scatter matrix
+where W\textsubscript{pca} is chosen to maximize the determinant of the total scatter matrix
 of the projected samples: $$ W\textsuperscript{T}\textsubscript{pca} = arg\underset{W}max|W\textsuperscript{T}S\textsubscript{T}W| $$
 $$ And $$
 $$ W\textsubscript{lda} = arg\underset{W}max\frac{|W\textsuperscript{T}W\textsuperscript{T}\textsubscript{pca}S\textsubscript{B}W\textsubscript{pca}W|}{|W\textsuperscript{T}W\textsuperscript{T}\textsubscript{pca}S\textsubscript{W}W\textsubscript{pca}W|} $$
@@ -332,7 +324,7 @@ vaying between 0.11s(low M_pca) and 0.19s(high M_pca).
 \end{center}
 \end{figure}
 
-DD RANK OF SCATTER MATRICES
+ADD RANK OF SCATTER MATRICES
 
 Testing with M_lda=50 and M_pca=115 gives 92.9% accuracy. The results of such test can be
 observed in the confusion matrix shown in figure \ref{ldapca_cm}.
-- 
cgit v1.2.3-70-g09d2