#!/usr/bin/env python # Train a model from sample data # Author: Vasil Zlatanov, Nunzio Pucci # EE4 Pattern Recognition coursework import matplotlib.pyplot as plt import sys import random from random import randint from sklearn.neighbors import KNeighborsClassifier from sklearn.decomposition import PCA from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.metrics import confusion_matrix from sklearn.metrics import accuracy_score import argparse import numpy as np from numpy import genfromtxt from numpy import linalg as LA # subtract the normal face from each row of the face matrix def normalise_faces(average_face, faces): faces = np.subtract(faces, np.tile(average_face, (faces.shape[0],1))) return np.divide(faces.T, np.std(faces.T, axis=0)).T # Split data into training and testing sets def test_split(n_faces, raw_faces, split, seed): random.seed(seed) n_cases = 52 n_pixels = 2576 raw_faces_split = np.split(raw_faces,n_cases) n_training_faces = int(round(n_cases*(1 - split))) n_test_faces = n_cases - n_training_faces faces_train = np.zeros((n_faces, n_training_faces, n_pixels)) faces_test = np.zeros((n_faces, n_test_faces, n_pixels)) target_train = np.repeat(np.arange(n_faces), n_training_faces) target_test = np.repeat(np.arange(n_faces), n_test_faces) for x in range (n_faces): samples = random.sample(range(n_cases), n_training_faces) faces_train[x] = [raw_faces[i+n_cases*x] for i in samples] faces_test[x] = [raw_faces[i+n_cases*x] for i in range (n_cases) if i not in samples] faces_train = faces_train.reshape(n_faces*n_training_faces, n_pixels) faces_test = faces_test.reshape(n_faces*n_test_faces, n_pixels) return faces_train, faces_test, target_train, target_test # usage: train.py [-h] -i DATA -o MODEL [-m M] parser = argparse.ArgumentParser() parser.add_argument("-i", "--data", help="Input CSV file", required=True) parser.add_argument("-m", "--eigen", help="Number of eigenvalues in model", type=int, default = 10 ) parser.add_argument("-n", "--neighbors", help="How many neighbors to use", type=int, default = 3) parser.add_argument("-f", "--faces", help="Show faces", type=int, default = 0) parser.add_argument("-c", "--principal", help="Show principal components", action='store_true') parser.add_argument("-s", "--seed", help="Seed to use", type=int, default=0) parser.add_argument("-t", "--split", help="Fractoin of data to use for testing", type=float, default=0.22) ### best split for lda = 22 ### best plit for pca = 20 parser.add_argument("-2", "--grapheigen", help="Swow 2D graph of targets versus principal components", action='store_true') parser.add_argument("-p", "--pca", help="Use PCA", action='store_true') parser.add_argument("-l", "--lda", help="Use LDA", action='store_true') parser.add_argument("-r", "--reconstruct", help="Use PCA reconstruction, specify face NR", type=int, default=0) parser.add_argument("-q", "--pca_r", help="Use Reduced PCA", action='store_true') args = parser.parse_args() M = args.eigen raw_faces = genfromtxt(args.data, delimiter=',') targets = np.repeat(np.arange(10),52) n_faces = 10 faces_train, faces_test, target_train, target_test = test_split(n_faces, raw_faces, args.split, args.seed) # This remove the mean and scales to unit variance sc = StandardScaler() #faces_train = sc.fit_transform(faces_train) #faces_test = sc.transform(faces_test) explained_variances = () if args.pca or args.pca_r: # faces_pca containcts the principial components or the M most variant eigenvectors average_face = np.mean(faces_train, axis=0) faces_train = normalise_faces(average_face, faces_train) faces_test = normalise_faces(average_face, faces_test) if (args.pca_r): e_vals, e_vecs = LA.eigh(np.cov(faces_train)) e_vecs_original = e_vecs e_vecs = np.dot(faces_train.T, e_vecs) # e_vecs = normalise_faces(np.mean(e_vecs,axis=0), e_vecs) e_vecs = sc.fit_transform(e_vecs) ###TODO Maybe replace with our normalising function if (args.reconstruct): rec_vec = np.divide(average_face, np.std(average_face)).T e_vecs_t = e_vecs.T for i in range (M): rec_vec = np.add(rec_vec, np.dot(e_vecs_original[i][args.reconstruct], e_vecs_t[i])) plt.imshow(rec_vec.reshape([46,56]).T, cmap = 'gist_gray') plt.show() else: e_vals, e_vecs = LA.eigh(np.cov(faces_train.T)) e_vals = np.flip(e_vals) e_vecs = np.fliplr(e_vecs).T faces_train = np.dot(faces_train, e_vecs[:M].T) faces_test = np.dot(faces_test, e_vecs[:M].T) #FOR THE ASSESSMENT PRINT EIGENVALUES AND EIGENVECTORS OF BOTH CASES AND COMPARE RESULTS WITH PHYSICAL EXPLAINATIONS if args.lda or (args.pca and args.lda): lda = LinearDiscriminantAnalysis(n_components=M) faces_train = lda.fit_transform(faces_train, target_train) faces_test = lda.transform(faces_test) class_means = lda.means_ e_vals = lda.explained_variance_ratio_ if args.faces: if args.lda: for i in range (10): ax = plt.subplot(2, 5, i + 1) ax.imshow(class_means[i].reshape([46,56]).T) else: for i in range(args.faces): ax = plt.subplot(2, args.faces/2, i + 1) ax.imshow(e_vecs[i].reshape([46, 56]).T, cmap = 'gist_gray') plt.show() if args.principal: e_vals = np.multiply(np.divide(e_vals, np.sum(e_vals)), 100) plt.bar(np.arange(M), e_vals[:M]) plt.ylabel('Varaiance ratio (%)');plt.xlabel('Eigenface Number') plt.show() if args.grapheigen: # Colors for distinct individuals cols = ['#{:06x}'.format(randint(0, 0xffffff)) for i in range(10)] pltCol = [cols[int(k)] for k in target_train] plt.scatter(faces_train[:, 0], faces_train[:, 1], color=pltCol) plt.show() classifier = KNeighborsClassifier(n_neighbors=args.neighbors) classifier.fit(faces_train, target_train) target_pred = classifier.predict(faces_test) cm = confusion_matrix(target_test, target_pred) print(cm) print('Accuracy %fl' % accuracy_score(target_test, target_pred))