1 files changed, 136 insertions, 114 deletions
diff --git a/train.py b/train.py
index 5530664..064d92e 100755
--- a/train.py
+++ b/train.py
@@ -2,6 +2,8 @@
 # Train a model from sample data
 # Author: Vasil Zlatanov, Nunzio Pucci
 # EE4 Pattern Recognition coursework
+#
+# usage: train.py [-h] -i DATA -o MODEL [-m M]
 
 import matplotlib.pyplot as plt
 from mpl_toolkits.mplot3d import Axes3D
@@ -14,7 +16,7 @@ from sklearn.decomposition import PCA
 from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
 from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import StandardScaler
-from sklearn.metrics import confusion_matrix  
+from sklearn.metrics import confusion_matrix
 from sklearn.metrics import accuracy_score
 
 import argparse
@@ -23,16 +25,21 @@ import numpy as np
 from numpy import genfromtxt
 from numpy import linalg as LA
 
+from timeit import default_timer as timer
+
+n_faces = 52
+n_cases = 10
+n_pixels = 2576
+
 # subtract the normal face from each row of the face matrix
 def normalise_faces(average_face, faces):
     faces = np.subtract(faces, np.tile(average_face, (faces.shape[0],1)))
     return np.divide(faces.T, np.std(faces.T, axis=0)).T
+
 # Split data into training and testing sets
 def test_split(n_faces, raw_faces, split, seed):
     random.seed(seed)
-    n_cases = 10
-    n_pixels = 2576
-    
+
     raw_faces_split = np.split(raw_faces,n_cases)
     n_training_faces = int(round(n_cases*(1 - split)))
     n_test_faces = n_cases - n_training_faces
@@ -40,139 +47,154 @@ def test_split(n_faces, raw_faces, split, seed):
     faces_test = np.zeros((n_faces, n_test_faces, n_pixels))
     target_train = np.repeat(np.arange(n_faces), n_training_faces)
     target_test = np.repeat(np.arange(n_faces), n_test_faces)
-    
-    for x in range (n_faces):
+
+    for x in range(n_faces):
         samples = random.sample(range(n_cases), n_training_faces)
         faces_train[x] = [raw_faces[i+n_cases*x] for i in samples]
-        faces_test[x] = [raw_faces[i+n_cases*x] for i in range (n_cases) if i not in samples]
+        faces_test[x] = [raw_faces[i+n_cases*x] for i in range(n_cases) if i not in samples]
 
     faces_train = faces_train.reshape(n_faces*n_training_faces, n_pixels)
     faces_test = faces_test.reshape(n_faces*n_test_faces, n_pixels)
     return faces_train, faces_test, target_train, target_test
 
-# usage: train.py [-h] -i DATA -o MODEL [-m M]
-parser = argparse.ArgumentParser()
-parser.add_argument("-i", "--data", help="Input CSV file", required=True)
-parser.add_argument("-m", "--eigen", help="Number of eigenvalues in model", type=int, default = 10 )
-parser.add_argument("-n", "--neighbors", help="How many neighbors to use", type=int, default = 3)
-parser.add_argument("-f", "--faces", help="Show faces", type=int, default = 0)
-parser.add_argument("-c", "--principal", help="Show principal components", action='store_true')
-parser.add_argument("-s", "--seed", help="Seed to use", type=int, default=0)
-parser.add_argument("-t", "--split", help="Fractoin of data to use for testing", type=float, default=0.22)
-### best split for lda = 22
-### best plit for pca = 20
-parser.add_argument("-2", "--grapheigen", help="Swow 2D graph of targets versus principal components", action='store_true')
-parser.add_argument("-p", "--pca", help="Use PCA", action='store_true')
-parser.add_argument("-l", "--lda", help="Use LDA", action='store_true')
-parser.add_argument("-r", "--reconstruct", help="Use PCA reconstruction, specify face NR", type=int, default=0)
-parser.add_argument("-cm", "--conf_mat", help="Show visual confusion matrix", action='store_true')
+def draw_conf_mat(target_test, target_pred):
+    cm = confusion_matrix(target_test, target_pred)
+    print(cm)
+    if (args.conf_mat):
+        plt.matshow(cm, cmap='Blues')
+        plt.colorbar()
+        plt.ylabel('Actual')
+        plt.xlabel('Predicted')
+        plt.show()
+    print('Accuracy %fl' % accuracy_score(target_test, target_pred))
 
-parser.add_argument("-q", "--pca_r", help="Use Reduced PCA", action='store_true')
+def test_model(M, faces_train, faces_test, target_train, target_test, args):
+    raw_faces_train = faces_train
 
-args = parser.parse_args()
+    explained_variances = ()
 
-M = args.eigen
+    if args.pca or args.pca_r:
+        # faces_pca containcts the principial components or the M most variant eigenvectors
+        average_face = np.mean(faces_train, axis=0)
+        deviations_tr =  np.std(faces_train, axis=0)
+        deviations_tst = np.std(faces_train, axis=0)
+        faces_train = normalise_faces(average_face, faces_train)
+        faces_test = normalise_faces(average_face, faces_test)
+        if (args.pca_r):
+            print('Reduced PCA')
+            e_vals, e_vecs = LA.eigh(np.dot(faces_train, faces_train.T))
+            e_vecs = np.dot(faces_train.T, e_vecs)
+            e_vecs = e_vecs/LA.norm(e_vecs, axis = 0)
+        else:
+            print('Standard PCA')
+            e_vals, e_vecs = LA.eigh(np.cov(faces_train.T))
+            # e_vecs = normalise_faces(np.mean(e_vecs,axis=0), e_vecs)
 
-raw_faces = genfromtxt(args.data, delimiter=',')
-targets = np.repeat(np.arange(52),10)
+        e_vals = np.flip(e_vals)[:M]
+        e_vecs = np.fliplr(e_vecs).T[:M]
+        deviations_tr = np.flip(deviations_tr)
+        deviations_tst = np.flip(deviations_tst)
 
-n_faces = 52
+        faces_train = np.dot(faces_train, e_vecs.T)
+        faces_test = np.dot(faces_test, e_vecs.T)
 
-faces_train, faces_test, target_train, target_test = test_split(n_faces, raw_faces, args.split, args.seed)
+        if (args.reconstruct):
+            rec_vec = np.add(average_face, np.dot(faces_train[args.reconstruct], e_vecs) * deviations_tr)
+            rec_faces_test = np.add(average_face, np.dot(faces_test, e_vecs) * deviations_tst)
+            rec_error = LA.norm(np.subtract(raw_faces_train[args.reconstruct], rec_vec))
+            ar = plt.subplot(2, 1, 1)
+            ar.imshow(rec_vec.reshape([46,56]).T, cmap = 'gist_gray')
+            ar = plt.subplot(2, 1, 2)
+            ar.imshow(raw_faces_train[args.reconstruct].reshape([46,56]).T, cmap = 'gist_gray')
+            plt.show()
 
-# This remove the mean and scales to unit variance
-sc = StandardScaler()
-#faces_train = sc.fit_transform(faces_train)
-#faces_test = sc.transform(faces_test)
-raw_faces_train = faces_train
+    if args.lda or (args.pca and args.lda):
+        lda = LinearDiscriminantAnalysis(n_components=M, solver='eigen')
+        faces_train = lda.fit_transform(faces_train, target_train)
+        faces_test = lda.transform(faces_test)
+        class_means = lda.means_
+        e_vals = lda.explained_variance_ratio_
 
-explained_variances = ()
+    if args.faces:
+        if args.lda:
+            for i in range(10):
+                ax = plt.subplot(2, 5, i + 1)
+                ax.imshow(class_means[i].reshape([46,56]).T)
+        else:
+            for i in range(args.faces):
+                ax = plt.subplot(2, args.faces/2, i + 1)
+                ax.imshow(e_vecs[i].reshape([46, 56]).T, cmap = 'gist_gray')
+        plt.show()
+
+    if args.principal:
+        e_vals = np.multiply(np.divide(e_vals, np.sum(e_vals)), 100)
+        plt.bar(np.arange(M), e_vals[:M])
+        plt.ylabel('Varaiance ratio (%)');plt.xlabel('Number')
+        plt.show()
 
-if args.pca or args.pca_r:
-    # faces_pca containcts the principial components or the M most variant eigenvectors
-    average_face = np.mean(faces_train, axis=0) 
-    deviations_tr =  np.std(faces_train, axis=0)
-    deviations_tst = np.std(faces_train, axis=0)
-    faces_train = normalise_faces(average_face, faces_train)
-    faces_test = normalise_faces(average_face, faces_test)
-    if (args.pca_r):
-        print('Reduced PCA')
-        e_vals, e_vecs = LA.eigh(np.dot(faces_train, faces_train.T))
-        e_vecs = np.dot(faces_train.T, e_vecs) 
-        e_vecs = e_vecs/LA.norm(e_vecs, axis = 0)
+    if args.grapheigen:
+	graph_eigen()
+        # Colors for distinct individuals
+        cols = ['#{:06x}'.format(randint(0, 0xffffff)) for i in range(n_faces)]
+        pltCol = [cols[int(k)] for k in target_train]
+        fig = plt.figure()
+        ax = fig.add_subplot(111, projection='3d')
+        ax.scatter(faces_train[:, 0], faces_train[:, 1], faces_train[:, 2], marker='o', color=pltCol)
+        plt.show()
+
+    classifier = KNeighborsClassifier(n_neighbors=args.neighbors)
+    if (args.reconstruct):
+        classifier.fit(raw_faces_train, target_train)
+        target_pred = classifier.predict(rec_faces_test)
+        #Better Passing n_neighbors = 1
     else:
-        print('Standard PCA')
-        e_vals, e_vecs = LA.eigh(np.cov(faces_train.T))
-        # e_vecs = normalise_faces(np.mean(e_vecs,axis=0), e_vecs)
-        # e_vecs = sc.fit_transform(e_vecs)
+        classifier.fit(faces_train, target_train)
+        target_pred = classifier.predict(faces_test)
+        #Better n_neighbors = 2
+    draw_conf_mat(target_test, target_pred)
 
-    e_vals = np.flip(e_vals)[:M]
-    e_vecs = np.fliplr(e_vecs).T[:M]
-    deviations_tr = np.flip(deviations_tr)
-    deviations_tst = np.flip(deviations_tst)
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-i", "--data", help="Input CSV file", required=True)
+    parser.add_argument("-m", "--eigen", help="Number of eigenvalues in model", type=int, default = 10 )
+    parser.add_argument("-M", "--reigen", help="Number of eigenvalues in model", type=int)
+    parser.add_argument("-n", "--neighbors", help="How many neighbors to use", type=int, default = 3)
+    parser.add_argument("-f", "--faces", help="Show faces", type=int, default = 0)
+    parser.add_argument("-c", "--principal", help="Show principal components", action='store_true')
+    parser.add_argument("-s", "--seed", help="Seed to use", type=int, default=0)
+    parser.add_argument("-t", "--split", help="Fractoin of data to use for testing", type=float, default=0.22)
+    ### best split for lda = 22
+    ### best plit for pca = 20
+    parser.add_argument("-2", "--grapheigen", help="Swow 2D graph of targets versus principal components", action='store_true')
+    parser.add_argument("-p", "--pca", help="Use PCA", action='store_true')
+    parser.add_argument("-l", "--lda", help="Use LDA", action='store_true')
+    parser.add_argument("-r", "--reconstruct", help="Use PCA reconstruction, specify face NR", type=int, default=0)
+    parser.add_argument("-cm", "--conf_mat", help="Show visual confusion matrix", action='store_true')
 
-    faces_train = np.dot(faces_train, e_vecs.T)
-    faces_test = np.dot(faces_test, e_vecs.T)
+    parser.add_argument("-q", "--pca_r", help="Use Reduced PCA", action='store_true')
 
-    if (args.reconstruct):
-        rec_vec = np.add(average_face, np.dot(faces_train[args.reconstruct], e_vecs) * deviations_tr)
-        rec_faces_test = np.add(average_face, np.dot(faces_test, e_vecs) * deviations_tst)
-        rec_error = LA.norm(np.subtract(raw_faces_train[args.reconstruct], rec_vec))
-        ar = plt.subplot(2, 1, 1)
-        ar.imshow(rec_vec.reshape([46,56]).T, cmap = 'gist_gray')
-        ar = plt.subplot(2, 1, 2)
-        ar.imshow(raw_faces_train[args.reconstruct].reshape([46,56]).T, cmap = 'gist_gray')
-        plt.show()
+    args = parser.parse_args()
 
-if args.lda or (args.pca and args.lda):
-    lda = LinearDiscriminantAnalysis(n_components=M, solver='eigen')
-    faces_train = lda.fit_transform(faces_train, target_train)
-    faces_test = lda.transform(faces_test)
-    class_means = lda.means_
-    e_vals = lda.explained_variance_ratio_
+    raw_faces = genfromtxt(args.data, delimiter=',')
+    targets = np.repeat(np.arange(n_faces),n_cases)
 
-if args.faces:
-    if args.lda:
-        for i in range (10):
-            ax = plt.subplot(2, 5, i + 1)
-            ax.imshow(class_means[i].reshape([46,56]).T)
-    else:
-        for i in range(args.faces):
-            ax = plt.subplot(2, args.faces/2, i + 1)
-            ax.imshow(e_vecs[i].reshape([46, 56]).T, cmap = 'gist_gray')
-    plt.show()
 
-if args.principal:
-    e_vals = np.multiply(np.divide(e_vals, np.sum(e_vals)), 100)
-    plt.bar(np.arange(M), e_vals[:M])
-    plt.ylabel('Varaiance ratio (%)');plt.xlabel('Eigenface Number')
-    plt.show()
+    faces_train, faces_test, target_train, target_test = test_split(n_faces, raw_faces, args.split, args.seed)
 
-if args.grapheigen:
-    # Colors for distinct individuals
-    cols = ['#{:06x}'.format(randint(0, 0xffffff)) for i in range(52)]
-    pltCol = [cols[int(k)] for k in target_train]
-    fig = plt.figure()
-    ax = fig.add_subplot(111, projection='3d')
-    ax.scatter(faces_train[:, 0], faces_train[:, 1], faces_train[:, 2], marker='o', color=pltCol)
-    plt.show()
 
-classifier = KNeighborsClassifier(n_neighbors=args.neighbors)
-if (args.reconstruct):
-    classifier.fit(raw_faces_train, target_train)
-    target_pred = classifier.predict(rec_faces_test)
-    #Better Passing n_neighbors = 1
-else:
-    classifier.fit(faces_train, target_train)
-    target_pred = classifier.predict(faces_test)
-    #Better n_neighbors = 2
+    if args.reigen:
+	for M in range(args.eigen, args,reigen):
+		start = time()
+		test_model(M, faces_train, faces_test, target_train, target_test, args)
+		end = time()
+		print("Run with", M, "eigenvalues completed in %.2f" % end-start, "seconds")
+    else:
+        M = args.eigen
+	start = time()
+	test_model(M, faces_train, faces_test, target_train, target_test, args):
+	end = time()
+	print("Run with", M, "eigenvalues completed in %.2f" % end-start, "seconds")
 
-cm = confusion_matrix(target_test, target_pred)  
-print(cm)  
-if (args.conf_mat):
-    plt.matshow(cm, cmap='Blues')
-    plt.colorbar()
-    plt.ylabel('Actual')
-    plt.xlabel('Predicted')
-    plt.show()
-print('Accuracy %fl' % accuracy_score(target_test, target_pred)) 
+    
+if __name__ == "__main__":
+    main()