#!/usr/bin/env python # Train a model from sample data # Author: Vasil Zlatanov, Nunzio Pucci # EE4 Pattern Recognition coursework import matplotlib.pyplot as plt import sys from random import randint from sklearn.neighbors import KNeighborsClassifier from sklearn.decomposition import PCA from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.metrics import confusion_matrix from sklearn.metrics import accuracy_score import argparse import numpy as np from numpy import genfromtxt # from numpy import linalg as LA # subtract the normal face from each row of the face matrix def normalise_faces(average_face, raw_faces): return np.subtract(raw_faces, np.tile(average_face, (raw_faces.shape[1],1)).T) # usage: train.py [-h] -i DATA -o MODEL [-m M] parser = argparse.ArgumentParser() parser.add_argument("-i", "--data", help="Input CSV file", required=True) parser.add_argument("-m", "--eigen", help="Number of eigenvalues in model", type=int, default = 8 ) parser.add_argument("-n", "--neighbors", help="How many neighbors to use", type=int, default = 6) parser.add_argument("-f", "--faces", help="Show faces", type=int, default = 0) parser.add_argument("-c", "--principal", help="Show principal components", action='store_true') parser.add_argument("-s", "--seed", help="Seed to use", type=int, default=0) parser.add_argument("-t", "--split", help="Fractoin of data to use for testing", type=float, default=0.25) parser.add_argument("-2", "--grapheigen", help="Swow 2D graph of targets versus principal components", action='store_true') parser.add_argument("-p", "--pca", help="Use PCA", action='store_true') parser.add_argument("-l", "--lda", help="Use LDA", action='store_true') args = parser.parse_args() if args.pca and args.lda: sys.exit("Flags -p and -l are mutually exclusive") M = args.eigen raw_faces = genfromtxt(args.data, delimiter=',') targets = np.repeat(np.arange(10),52) split = 100 faces_train, faces_test, target_train, target_test = train_test_split(raw_faces, targets, test_size=args.split, random_state=args.seed) # This remove the mean and scales to unit variance sc = StandardScaler() faces_train = sc.fit_transform(faces_train) faces_test = sc.transform(faces_test) explained_variances = () if args.lda: lda = LinearDiscriminantAnalysis(n_components=M) faces_train = lda.fit_transform(faces_train, target_train) faces_test = lda.transform(faces_test) explained_variances = lda.explained_variance_ratio_ else: # faces_pca containcts the principial components or the M most variant eigenvectors pca = PCA(svd_solver='full', n_components=M) faces_train = pca.fit_transform(faces_train) faces_test = pca.transform(faces_test) explained_variances = pca.explained_variance_ratio_ # Plot the variances (eigenvalues) from the pca object if args.faces: if args.lda: sys.exit("Can not plot eigenfaces when using LDA") for i in range(args.faces): ax = plt.subplot(2, args.faces/2, i + 1) ax.imshow(pca.components_[i].reshape([46, 56])) plt.show() if args.principal: plt.bar(np.arange(explained_variances.size), explained_variances) plt.ylabel('Varaiance ratio');plt.xlabel('Face Number') plt.show() if args.grapheigen: # Colors for distinct individuals cols = ['#{:06x}'.format(randint(0, 0xffffff)) for i in range(10)] pltCol = [cols[int(k)] for k in target_train] plt.scatter(faces_train[:, 0], faces_train[:, 1], color=pltCol) plt.show() classifier = KNeighborsClassifier(n_neighbors=args.neighbors) classifier.fit(faces_train, target_train) target_pred = classifier.predict(faces_test) cm = confusion_matrix(target_test, target_pred) print(cm) print('Accuracy %fl' % accuracy_score(target_test, target_pred))