#!/usr/bin/env python # Train a model from sample data # Author: Vasil Zlatanov, Nunzio Pucci # EE4 Pattern Recognition coursework import matplotlib.pyplot as plt import sys; from sklearn.neighbors import KNeighborsClassifier from sklearn.decomposition import PCA from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.metrics import confusion_matrix from sklearn.metrics import accuracy_score import argparse import numpy as np from numpy import genfromtxt # from numpy import linalg as LA # subtract the normal face from each row of the face matrix def normalise_faces(average_face, raw_faces): return np.subtract(raw_faces, np.tile(average_face, (raw_faces.shape[1],1)).T) # usage: train.py [-h] -i DATA -o MODEL [-m M] parser = argparse.ArgumentParser() parser.add_argument("-i", "--data", help="Input CSV file", required=True) parser.add_argument("-m", "--eigen", help="Number of eigenvalues in model", required=True, type=int) parser.add_argument("-g", "--graph", help="Should we show graphs", action='store_true') parser.add_argument("-p", "--pca", help="Use PCA", action='store_true') parser.add_argument("-l", "--lda", help="Use LDA", action='store_true') args = parser.parse_args() if args.pca and args.lda: sys.exit("Flags -p and -l are mutually exclusive") M = args.eigen raw_faces = genfromtxt(args.data, delimiter=',') targets = np.repeat(np.arange(10),52) faces_train, faces_test, target_train, target_test = train_test_split(raw_faces, targets, test_size=0.2, random_state=0) # This remove the mean and scales to unit variance sc = StandardScaler() faces_train = sc.fit_transform(faces_train) faces_test = sc.transform(faces_test) explained_variances = () if args.pca: # faces_pca containcts the principial components or the M most variant eigenvectors pca = PCA(n_components=M) faces_train = pca.fit_transform(faces_train) faces_test = pca.transform(faces_test) explained_variances = pca.explained_variance_ratio_ else: lda = LinearDiscriminantAnalysis(n_components=M) faces_train = lda.fit_transform(faces_train, target_train) faces_test = lda.transform(faces_test) explained_variances = lda.explained_variance_ratio_ # Plot the variances (eigenvalues) from the pca object if args.graph: plt.bar(np.arange(explained_variances.size), explained_variances) plt.ylabel('Varaiance ratio');plt.xlabel('Face Number') plt.show() classifier = KNeighborsClassifier(n_neighbors=3) classifier.fit(faces_train, target_train) target_pred = classifier.predict(faces_test) cm = confusion_matrix(target_test, target_pred) print(cm) print('Accuracy %fl' % accuracy_score(target_test, target_pred))