#!/usr/bin/env python # Train a model from sample data # Author: Vasil Zlatanov, Nunzio Pucci # EE4 Pattern Recognition coursework import matplotlib.pyplot as plt import sys import random from numpy import linalg as LA from random import randint from sklearn.neighbors import KNeighborsClassifier from sklearn.decomposition import PCA from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.metrics import confusion_matrix from sklearn.metrics import accuracy_score import argparse import numpy as np from numpy import genfromtxt # from numpy import linalg as LA # subtract the normal face from each row of the face matrix def normalise_faces(average_face, raw_faces): return np.subtract(raw_faces, np.tile(average_face, (raw_faces.shape[1],1)).T) # usage: train.py [-h] -i DATA -o MODEL [-m M] parser = argparse.ArgumentParser() parser.add_argument("-i", "--data", help="Input CSV file", required=True) parser.add_argument("-m", "--eigen", help="Number of eigenvalues in model", type=int, default = 140 ) parser.add_argument("-n", "--neighbors", help="How many neighbors to use", type=int, default = 3) parser.add_argument("-f", "--faces", help="Show faces", type=int, default = 0) parser.add_argument("-c", "--principal", help="Show principal components", action='store_true') parser.add_argument("-s", "--seed", help="Seed to use", type=int, default=0) parser.add_argument("-t", "--split", help="Fractoin of data to use for testing", type=float, default=0.22) ### best split for lda = 22 ### best plit for pca = 20 parser.add_argument("-2", "--grapheigen", help="Swow 2D graph of targets versus principal components", action='store_true') parser.add_argument("-p", "--pca", help="Use PCA", action='store_true') parser.add_argument("-l", "--lda", help="Use LDA", action='store_true') args = parser.parse_args() if args.pca and args.lda: sys.exit("Flags -p and -l are mutually exclusive") M = args.eigen raw_faces = genfromtxt(args.data, delimiter=',') targets = np.repeat(np.arange(10),52) #faces_train, faces_test, target_train, target_test = train_test_split(raw_faces, targets, test_size=args.split, random_state=args.seed) ### Splitter n_faces = 10 def test_split(n_faces, raw_faces, split, seed): random.seed(seed) n_cases = 52 n_pixels = 2576 print(raw_faces.shape) raw_faces_split = np.split(raw_faces,n_cases) n_training_faces = int(round(n_cases*(1 - split))) n_test_faces = n_cases - n_training_faces faces_train = np.zeros((n_faces, n_training_faces, n_pixels)) faces_test = np.zeros((n_faces, n_test_faces, n_pixels)) target_train = np.repeat(np.arange(n_faces), n_training_faces) target_test = np.repeat(np.arange(n_faces), n_test_faces) for x in range (n_faces): samples = random.sample(range(n_cases), n_training_faces) faces_train[x] = [raw_faces[i+n_cases*x] for i in samples] faces_test[x] = [raw_faces[i+n_cases*x] for i in range (n_cases) if i not in samples] faces_train = faces_train.reshape(n_faces*n_training_faces, n_pixels) faces_test = faces_test.reshape(n_faces*n_test_faces, n_pixels) return faces_train, faces_test, target_train, target_test faces_train, faces_test, target_train, target_test = test_split(n_faces, raw_faces, args.split, args.seed) # This remove the mean and scales to unit variance sc = StandardScaler() faces_train = sc.fit_transform(faces_train) faces_test = sc.transform(faces_test) explained_variances = () if args.lda: lda = LinearDiscriminantAnalysis(n_components=M) faces_train = lda.fit_transform(faces_train, target_train) faces_test = lda.transform(faces_test) explained_variances = lda.explained_variance_ratio_ else: # faces_pca containcts the principial components or the M most variant eigenvectors pca = PCA(svd_solver='full', n_components=M) faces_train = pca.fit_transform(faces_train) faces_test = pca.transform(faces_test) explained_variances = pca.explained_variance_ratio_ # Plot the variances (eigenvalues) from the pca object if args.faces: if args.lda: sys.exit("Can not plot eigenfaces when using LDA") for i in range(args.faces): ax = plt.subplot(2, args.faces/2, i + 1) ax.imshow(pca.components_[i].reshape([46, 56]).T) plt.show() if args.principal: plt.bar(np.arange(explained_variances.size), explained_variances) plt.ylabel('Varaiance ratio');plt.xlabel('Face Number') plt.show() if args.grapheigen: # Colors for distinct individuals cols = ['#{:06x}'.format(randint(0, 0xffffff)) for i in range(10)] pltCol = [cols[int(k)] for k in target_train] plt.scatter(faces_train[:, 0], faces_train[:, 1], color=pltCol) plt.show() classifier = KNeighborsClassifier(n_neighbors=args.neighbors) classifier.fit(faces_train, target_train) target_pred = classifier.predict(faces_test) cm = confusion_matrix(target_test, target_pred) print(cm) print('Accuracy %fl' % accuracy_score(target_test, target_pred))