aboutsummaryrefslogtreecommitdiff
path: root/train.py
diff options
context:
space:
mode:
Diffstat (limited to 'train.py')
-rwxr-xr-xtrain.py64
1 files changed, 50 insertions, 14 deletions
diff --git a/train.py b/train.py
index 6c17cc7..b76f5e2 100755
--- a/train.py
+++ b/train.py
@@ -3,11 +3,22 @@
# Author: Vasil Zlatanov, Nunzio Pucci
# EE4 Pattern Recognition coursework
+import matplotlib.pyplot as plt
+import sys;
+
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.decomposition import PCA
+from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler
+from sklearn.metrics import confusion_matrix
+from sklearn.metrics import accuracy_score
+
import argparse
import numpy as np
from numpy import genfromtxt
-from numpy import linalg as LA
+# from numpy import linalg as LA
# subtract the normal face from each row of the face matrix
def normalise_faces(average_face, raw_faces):
@@ -17,26 +28,51 @@ def normalise_faces(average_face, raw_faces):
# usage: train.py [-h] -i DATA -o MODEL [-m M]
parser = argparse.ArgumentParser()
parser.add_argument("-i", "--data", help="Input CSV file", required=True)
-parser.add_argument("-o", "--model", help="Output model file", required=True)
parser.add_argument("-m", "--eigen", help="Number of eigenvalues in model", required=True, type=int)
+parser.add_argument("-g", "--graph", help="Should we show graphs", action='store_true')
+parser.add_argument("-p", "--pca", help="Use PCA", action='store_true')
+parser.add_argument("-l", "--lda", help="Use LDA", action='store_true')
args = parser.parse_args()
-assert args.data, "No input CSV data (-i, --input-data)"
-assert args.model, "No model specified (-o, --model)"
+if args.pca and args.lda:
+ sys.exit("Flags -p and -l are mutually exclusive")
M = args.eigen
-raw_faces = genfromtxt(args.data, delimiter=',').T
+raw_faces = genfromtxt(args.data, delimiter=',')
+targets = np.repeat(np.arange(10),52)
+
+faces_train, faces_test, target_train, target_test = train_test_split(raw_faces, targets, test_size=0.5, random_state=0)
+
+
+# This remove the mean and scales to unit variance
+sc = StandardScaler()
+faces_train = sc.fit_transform(faces_train)
+faces_test = sc.transform(faces_test)
-average_face = np.average(raw_faces, axis=1)
-normal_faces = normalise_faces(average_face, raw_faces)
+explained_variances = ()
+if args.pca:
+ # faces_pca containcts the principial components or the M most variant eigenvectors
+ pca = PCA(n_components=M)
+ faces_train = pca.fit_transform(faces_train)
+ faces_test = pca.transform(faces_test)
+ explained_variances = pca.explained_variance_ratio_
+else:
+ lda = LinearDiscriminantAnalysis(n_components=M)
+ faces_train = lda.fit_transform(faces_train, target_train)
+ faces_test = lda.transform(faces_test)
+ explained_variances = lda.explained_variance_ratio_
-e_vals, e_vecs = LA.eig(np.cov(normal_faces))
+# Plot the variances (eigenvalues) from the pca object
+if args.graph:
+ plt.bar(np.arange(M), explained_variances)
+ plt.ylabel('Varaiance ratio');plt.xlabel('Face Number')
+ plt.show()
-np.savez(args.model,
- average_face=average_face,
- e_vals=e_vals[:M],
- e_vecs=e_vecs[:M],
- projections=np.dot(e_vecs[:M], raw_faces)
- )
+classifier = KNeighborsClassifier(n_neighbors=3)
+classifier.fit(faces_train, target_train)
+target_pred = classifier.predict(faces_test)
+cm = confusion_matrix(target_test, target_pred)
+print(cm)
+print('Accuracy %fl' % accuracy_score(target_test, target_pred))