From 9411ec33b05e6d540c58171bb6a30f172d4bef5b Mon Sep 17 00:00:00 2001 From: Vasil Zlatanov Date: Thu, 18 Oct 2018 18:17:58 +0100 Subject: Use sklearn libs --- train.py | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 50 insertions(+), 14 deletions(-) diff --git a/train.py b/train.py index 6c17cc7..b76f5e2 100755 --- a/train.py +++ b/train.py @@ -3,11 +3,22 @@ # Author: Vasil Zlatanov, Nunzio Pucci # EE4 Pattern Recognition coursework +import matplotlib.pyplot as plt +import sys; + +from sklearn.neighbors import KNeighborsClassifier +from sklearn.decomposition import PCA +from sklearn.discriminant_analysis import LinearDiscriminantAnalysis +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import StandardScaler +from sklearn.metrics import confusion_matrix +from sklearn.metrics import accuracy_score + import argparse import numpy as np from numpy import genfromtxt -from numpy import linalg as LA +# from numpy import linalg as LA # subtract the normal face from each row of the face matrix def normalise_faces(average_face, raw_faces): @@ -17,26 +28,51 @@ def normalise_faces(average_face, raw_faces): # usage: train.py [-h] -i DATA -o MODEL [-m M] parser = argparse.ArgumentParser() parser.add_argument("-i", "--data", help="Input CSV file", required=True) -parser.add_argument("-o", "--model", help="Output model file", required=True) parser.add_argument("-m", "--eigen", help="Number of eigenvalues in model", required=True, type=int) +parser.add_argument("-g", "--graph", help="Should we show graphs", action='store_true') +parser.add_argument("-p", "--pca", help="Use PCA", action='store_true') +parser.add_argument("-l", "--lda", help="Use LDA", action='store_true') args = parser.parse_args() -assert args.data, "No input CSV data (-i, --input-data)" -assert args.model, "No model specified (-o, --model)" +if args.pca and args.lda: + sys.exit("Flags -p and -l are mutually exclusive") M = args.eigen -raw_faces = genfromtxt(args.data, delimiter=',').T +raw_faces = genfromtxt(args.data, delimiter=',') +targets = np.repeat(np.arange(10),52) + +faces_train, faces_test, target_train, target_test = train_test_split(raw_faces, targets, test_size=0.5, random_state=0) + + +# This remove the mean and scales to unit variance +sc = StandardScaler() +faces_train = sc.fit_transform(faces_train) +faces_test = sc.transform(faces_test) -average_face = np.average(raw_faces, axis=1) -normal_faces = normalise_faces(average_face, raw_faces) +explained_variances = () +if args.pca: + # faces_pca containcts the principial components or the M most variant eigenvectors + pca = PCA(n_components=M) + faces_train = pca.fit_transform(faces_train) + faces_test = pca.transform(faces_test) + explained_variances = pca.explained_variance_ratio_ +else: + lda = LinearDiscriminantAnalysis(n_components=M) + faces_train = lda.fit_transform(faces_train, target_train) + faces_test = lda.transform(faces_test) + explained_variances = lda.explained_variance_ratio_ -e_vals, e_vecs = LA.eig(np.cov(normal_faces)) +# Plot the variances (eigenvalues) from the pca object +if args.graph: + plt.bar(np.arange(M), explained_variances) + plt.ylabel('Varaiance ratio');plt.xlabel('Face Number') + plt.show() -np.savez(args.model, - average_face=average_face, - e_vals=e_vals[:M], - e_vecs=e_vecs[:M], - projections=np.dot(e_vecs[:M], raw_faces) - ) +classifier = KNeighborsClassifier(n_neighbors=3) +classifier.fit(faces_train, target_train) +target_pred = classifier.predict(faces_test) +cm = confusion_matrix(target_test, target_pred) +print(cm) +print('Accuracy %fl' % accuracy_score(target_test, target_pred)) -- cgit v1.2.3-54-g00ecf