diff options
author | Vasil Zlatanov <v@skozl.com> | 2018-10-18 18:17:58 +0100 |
---|---|---|
committer | Vasil Zlatanov <v@skozl.com> | 2018-10-18 18:17:58 +0100 |
commit | 9411ec33b05e6d540c58171bb6a30f172d4bef5b (patch) | |
tree | 40d98d9436c59498aff5118d5debb7e7c87f80e7 | |
parent | 1a7d30e24d5213b1ae7f60c0d20fce7f2d277bf7 (diff) | |
download | vz215_np1915-9411ec33b05e6d540c58171bb6a30f172d4bef5b.tar.gz vz215_np1915-9411ec33b05e6d540c58171bb6a30f172d4bef5b.tar.bz2 vz215_np1915-9411ec33b05e6d540c58171bb6a30f172d4bef5b.zip |
Use sklearn libs
-rwxr-xr-x | train.py | 64 |
1 files changed, 50 insertions, 14 deletions
@@ -3,11 +3,22 @@ # Author: Vasil Zlatanov, Nunzio Pucci # EE4 Pattern Recognition coursework +import matplotlib.pyplot as plt +import sys; + +from sklearn.neighbors import KNeighborsClassifier +from sklearn.decomposition import PCA +from sklearn.discriminant_analysis import LinearDiscriminantAnalysis +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import StandardScaler +from sklearn.metrics import confusion_matrix +from sklearn.metrics import accuracy_score + import argparse import numpy as np from numpy import genfromtxt -from numpy import linalg as LA +# from numpy import linalg as LA # subtract the normal face from each row of the face matrix def normalise_faces(average_face, raw_faces): @@ -17,26 +28,51 @@ def normalise_faces(average_face, raw_faces): # usage: train.py [-h] -i DATA -o MODEL [-m M] parser = argparse.ArgumentParser() parser.add_argument("-i", "--data", help="Input CSV file", required=True) -parser.add_argument("-o", "--model", help="Output model file", required=True) parser.add_argument("-m", "--eigen", help="Number of eigenvalues in model", required=True, type=int) +parser.add_argument("-g", "--graph", help="Should we show graphs", action='store_true') +parser.add_argument("-p", "--pca", help="Use PCA", action='store_true') +parser.add_argument("-l", "--lda", help="Use LDA", action='store_true') args = parser.parse_args() -assert args.data, "No input CSV data (-i, --input-data)" -assert args.model, "No model specified (-o, --model)" +if args.pca and args.lda: + sys.exit("Flags -p and -l are mutually exclusive") M = args.eigen -raw_faces = genfromtxt(args.data, delimiter=',').T +raw_faces = genfromtxt(args.data, delimiter=',') +targets = np.repeat(np.arange(10),52) + +faces_train, faces_test, target_train, target_test = train_test_split(raw_faces, targets, test_size=0.5, random_state=0) + + +# This remove the mean and scales to unit variance +sc = StandardScaler() +faces_train = sc.fit_transform(faces_train) +faces_test = sc.transform(faces_test) -average_face = np.average(raw_faces, axis=1) -normal_faces = normalise_faces(average_face, raw_faces) +explained_variances = () +if args.pca: + # faces_pca containcts the principial components or the M most variant eigenvectors + pca = PCA(n_components=M) + faces_train = pca.fit_transform(faces_train) + faces_test = pca.transform(faces_test) + explained_variances = pca.explained_variance_ratio_ +else: + lda = LinearDiscriminantAnalysis(n_components=M) + faces_train = lda.fit_transform(faces_train, target_train) + faces_test = lda.transform(faces_test) + explained_variances = lda.explained_variance_ratio_ -e_vals, e_vecs = LA.eig(np.cov(normal_faces)) +# Plot the variances (eigenvalues) from the pca object +if args.graph: + plt.bar(np.arange(M), explained_variances) + plt.ylabel('Varaiance ratio');plt.xlabel('Face Number') + plt.show() -np.savez(args.model, - average_face=average_face, - e_vals=e_vals[:M], - e_vecs=e_vecs[:M], - projections=np.dot(e_vecs[:M], raw_faces) - ) +classifier = KNeighborsClassifier(n_neighbors=3) +classifier.fit(faces_train, target_train) +target_pred = classifier.predict(faces_test) +cm = confusion_matrix(target_test, target_pred) +print(cm) +print('Accuracy %fl' % accuracy_score(target_test, target_pred)) |