From e42170b70bb9710d73ff22fcd06ae8724a78cbd1 Mon Sep 17 00:00:00 2001 From: Vasil Zlatanov Date: Wed, 5 Dec 2018 16:36:15 +0000 Subject: Move part1 parts to seperate folder --- part1/train.py | 286 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 286 insertions(+) create mode 100755 part1/train.py (limited to 'part1/train.py') diff --git a/part1/train.py b/part1/train.py new file mode 100755 index 0000000..c5d4389 --- /dev/null +++ b/part1/train.py @@ -0,0 +1,286 @@ +#!/usr/bin/env python +# Author: Vasil Zlatanov, Nunzio Pucci +# EE4 Pattern Recognition coursework +# +# usage: train.py [-h] -i DATA [-m EIGEN] [-M REIGEN] [-e ENSEMBLE] [-b] +# [-R RANDOM] [-n NEIGHBORS] [-f FACES] [-c] [-s SEED] +# [-t SPLIT] [-2] [-p] [-l] [-r RECONSTRUCT] [-cm] [-q] [-pr] +# [-alt] + +import matplotlib.pyplot as plt +from mpl_toolkits.mplot3d import Axes3D +import sys +import random +import os +import psutil +from random import randint + +from sklearn.neighbors import KNeighborsClassifier +from sklearn.decomposition import PCA +from sklearn.discriminant_analysis import LinearDiscriminantAnalysis +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import StandardScaler +from sklearn.metrics import confusion_matrix +from sklearn.metrics import accuracy_score + +import argparse +import numpy as np + +from numpy import genfromtxt +from numpy import linalg as LA + +from timeit import default_timer as timer + +n_faces = 52 +n_cases = 10 +n_pixels = 2576 + +# subtract the normal face from each row of the face matrix +def normalise_faces(deviations_tr, average_face, faces): + faces = np.subtract(faces, np.tile(average_face, (faces.shape[0],1))) + return np.divide(faces, deviations_tr) + +# Split data into training and testing sets +def test_split(n_faces, raw_faces, split, seed): + random.seed(seed) + + raw_faces_split = np.split(raw_faces,n_cases) + n_training_faces = int(round(n_cases*(1 - split))) + n_test_faces = n_cases - n_training_faces + faces_train = np.zeros((n_faces, n_training_faces, n_pixels)) + faces_test = np.zeros((n_faces, n_test_faces, n_pixels)) + target_train = np.repeat(np.arange(n_faces), n_training_faces) + target_test = np.repeat(np.arange(n_faces), n_test_faces) + + for x in range(n_faces): + samples = random.sample(range(n_cases), n_training_faces) + faces_train[x] = [raw_faces[i+n_cases*x] for i in samples] + faces_test[x] = [raw_faces[i+n_cases*x] for i in range(n_cases) if i not in samples] + + faces_train = faces_train.reshape(n_faces*n_training_faces, n_pixels) + faces_test = faces_test.reshape(n_faces*n_test_faces, n_pixels) + return faces_train, faces_test, target_train, target_test + +def draw_results(args, target_test, target_pred): + acc_sc = accuracy_score(target_test, target_pred) + cm = confusion_matrix(target_test, target_pred) + print('Accuracy: ', acc_sc) + if (args.conf_mat): + plt.matshow(cm, cmap='Blues') + plt.colorbar() + plt.ylabel('Actual') + plt.xlabel('Predicted') + plt.show() + return + +def test_model(M, faces_train, faces_test, target_train, target_test, args): + raw_faces_train = faces_train + raw_faces_test = faces_test + + explained_variances = () + + + distances = np.zeros(faces_test.shape[0]) + + if args.pca or args.pca_r: + # faces_pca containcts the principial components or the M most variant eigenvectors + average_face = np.mean(faces_train, axis=0) + if args.classifyalt: + deviations_tr = np.ones(n_pixels) + else: + deviations_tr = np.std(faces_train, axis=0) + faces_train = normalise_faces(deviations_tr, average_face, faces_train) + faces_test = normalise_faces(deviations_tr, average_face, faces_test) + if (args.pca_r): + e_vals, e_vecs = LA.eigh(np.dot(faces_train, faces_train.T)) + e_vecs = np.dot(faces_train.T, e_vecs) + e_vecs = e_vecs/LA.norm(e_vecs, axis = 0) + else: + e_vals, e_vecs = LA.eigh(np.cov(faces_train.T)) + + e_vals = np.flip(e_vals) + e_vecs = np.fliplr(e_vecs).T + + if args.random: + random_features = random.sample(range(M-args.random, M), args.random) + for i in range(args.random): + e_vals[M-i] = e_vals[random_features[i]] + e_vecs[M-i] = e_vecs[random_features[i]] + + e_vals = e_vals[:M] + e_vecs = e_vecs[:M] + + deviations_tr = np.flip(deviations_tr) + faces_train = np.dot(faces_train, e_vecs.T) + faces_test = np.dot(faces_test, e_vecs.T) + + rec_vecs = np.add(np.tile(average_face, + (faces_test.shape[0], 1)), np.dot(faces_test, e_vecs) * deviations_tr) + distances = LA.norm(raw_faces_test - rec_vecs, axis=1); + + if args.reconstruct: + rec_vec = np.add(average_face, np.dot(faces_train[args.reconstruct], e_vecs) * deviations_tr) + ar = plt.subplot(2, 1, 1) + ar.imshow(rec_vec.reshape([46,56]).T, cmap = 'gist_gray') + ar = plt.subplot(2, 1, 2) + ar.imshow(raw_faces_train[args.reconstruct].reshape([46,56]).T, cmap = 'gist_gray') + plt.show() + + if args.lda: + if args.pca_r or (args.pca and M > n_training_faces - n_faces): + lda = LinearDiscriminantAnalysis(n_components=M, solver='svd') + else: + lda = LinearDiscriminantAnalysis(n_components=M, store_covariance='True') + + faces_train = lda.fit_transform(faces_train, target_train) + faces_test = lda.transform(faces_test) + class_means = lda.means_ + e_vals = lda.explained_variance_ratio_ + + if args.faces: + if args.lda: + for i in range(10): + ax = plt.subplot(2, 5, i + 1) + ax.imshow(class_means[i].reshape([46,56]).T) + else: + for i in range(args.faces): + ax = plt.subplot(2, args.faces/2, i + 1) + ax.imshow(e_vecs[i].reshape([46, 56]).T, cmap = 'gist_gray') + plt.show() + + if args.principal: + e_vals = np.multiply(np.divide(e_vals, np.sum(e_vals)), 100) + plt.bar(np.arange(M), e_vals[:M]) + plt.ylabel('Varaiance ratio (%)');plt.xlabel('Number') + plt.show() + + if args.grapheigen: + # Colors for distinct individuals + cols = ['#{:06x}'.format(randint(0, 0xffffff)) for i in range(n_faces)] + pltCol = [cols[int(k)] for k in target_train] + fig = plt.figure() + ax = fig.add_subplot(111, projection='3d') + ax.scatter(faces_train[:, 0], faces_train[:, 1], faces_train[:, 2], marker='o', color=pltCol) + plt.show() + + classifier = KNeighborsClassifier(n_neighbors=args.neighbors) + classifier.fit(faces_train, target_train) + target_pred = classifier.predict(faces_test) + if args.prob: + targer_prob = classifier.predict_proba(faces_test) + targer_prob_vec = np.zeros(104) + for i in range (104): + j = int(np.floor(i/2)) + targer_prob_vec [i] = targer_prob[i][j] + avg_targer_prob = np.zeros(n_faces) + for i in range (n_faces): + avg_targer_prob[i] = (targer_prob_vec[2*i] + targer_prob_vec[2*i + 1])/2 + plt.bar(range(n_faces), avg_targer_prob) + plt.show() + + return target_pred, distances + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("-i", "--data", help="Input CSV file", required=True) + parser.add_argument("-m", "--eigen", help="Number of eigenvalues in model", type=int, default = 10 ) + parser.add_argument("-M", "--reigen", help="Number of eigenvalues in model", type=int) + parser.add_argument("-e", "--ensemble", help="Number of ensemmbles to use", type=int, default = 0) + parser.add_argument("-b", "--bagging", help="Number of bags to use", action='store_true') + parser.add_argument("-R", "--random", help="Number of eigen value to randomise", type=int) + parser.add_argument("-n", "--neighbors", help="How many neighbors to use", type=int, default = 1) + parser.add_argument("-f", "--faces", help="Show faces", type=int, default = 0) + parser.add_argument("-c", "--principal", help="Show principal components", action='store_true') + parser.add_argument("-s", "--seed", help="Seed to use", type=int, default=0) + parser.add_argument("-t", "--split", help="Fractoin of data to use for testing", type=float, default=0.3) + parser.add_argument("-2", "--grapheigen", help="Swow 2D graph of targets versus principal components", + action='store_true') + parser.add_argument("-p", "--pca", help="Use PCA", action='store_true') + parser.add_argument("-l", "--lda", help="Use LDA", action='store_true') + parser.add_argument("-r", "--reconstruct", help="Use PCA reconstruction, specify face NR", type=int, default=0) + parser.add_argument("-cm", "--conf_mat", help="Show visual confusion matrix", action='store_true') + parser.add_argument("-q", "--pca_r", help="Use Reduced PCA", action='store_true') + parser.add_argument("-pr", "--prob", help="Certainty on each guess", action='store_true') + parser.add_argument("-alt", "--classifyalt", help="Alternative method ON", action='store_true') + args = parser.parse_args() + + if args.lda and args.classifyalt: + sys.exit("LDA and Alt PCA can not be performed together") + + raw_faces = genfromtxt(args.data, delimiter=',') + targets = np.repeat(np.arange(n_faces),n_cases) + + faces_train, faces_test, target_train, target_test = test_split(n_faces, raw_faces, args.split, args.seed) + + if args.ensemble: + n_training_faces = int(round(n_cases*(1 - args.split))) + faces_train_ens = np.zeros((args.ensemble, n_faces, n_training_faces, n_pixels)) + for x in range(args.ensemble): + if args.bagging: + for k in range(n_faces): + samples = random.choices(range(n_training_faces), k=n_training_faces) + faces_train_ens[x][k] = [faces_train[i+n_training_faces*k] for i in samples] + else: + faces_train_ens[x] = faces_train.reshape((n_faces, n_training_faces, n_pixels)) + + faces_train_ens = faces_train_ens.reshape(args.ensemble, n_faces*n_training_faces, n_pixels) + + if args.classifyalt: + faces_train = faces_train.reshape(n_faces, int(faces_train.shape[0]/n_faces), n_pixels) + target_train = target_train.reshape(n_faces, int(target_train.shape[0]/n_faces)) + + distances = np.zeros((n_faces, faces_test.shape[0])) + for i in range(n_faces): + target_pred, distances[i] = test_model(args.eigen, faces_train[i], + faces_test, target_train[i], target_test, args) + target_pred = np.argmin(distances, axis=0) + elif args.reigen: + target_pred = np.zeros((args.reigen-args.eigen, target_test.shape[0])) + accuracy = np.zeros(args.reigen-args.eigen) + rec_error = np.zeros((args.reigen-args.eigen, target_test.shape[0])) + + for M in range(args.eigen, args.reigen): + start = timer() + target_pred[M - args.eigen], rec_error[M - args.eigen] = test_model(M, faces_train, + faces_test, target_train, target_test, args) + end = timer() + print("Run with", M, "eigenvalues completed in ", end-start, "seconds") + print("Memory Used:", psutil.Process(os.getpid()).memory_info().rss) + accuracy[M - args.eigen] = accuracy_score(target_test, target_pred[M-args.eigen]) + # Plot + print('Max efficiency of ', max(accuracy), '% for M =', np.argmax(accuracy)) + plt.plot(range(args.eigen, args.reigen), 100*accuracy) + plt.xlabel('Number of Eigenvectors used (M)') + plt.ylabel('Recognition Accuracy (%)') + plt.grid(True) + plt.show() + elif args.ensemble: + rec_error = np.zeros((args.ensemble, n_faces, faces_test.shape[0])) + target_pred = np.zeros((args.ensemble, target_test.shape[0])) + for i in range(args.ensemble): + target_pred[i], rec_error[i] = test_model(args.eigen, faces_train_ens[i], + faces_test, target_train, target_test, args) + + target_pred_comb = np.zeros(target_pred.shape[1]) + target_pred = target_pred.astype(int).T + if (args.conf_mat): + cm = confusion_matrix(np.tile(target_test, args.ensemble), target_pred.flatten('F')) + plt.matshow(cm, cmap='Blues') + plt.colorbar() + plt.ylabel('Actual') + plt.xlabel('Predicted') + plt.show() + + for i in range(target_pred.shape[0]): + target_pred_comb[i] = np.bincount(target_pred[i]).argmax() + target_pred = target_pred_comb + else: + M = args.eigen + start = timer() + target_pred, distances = test_model(M, faces_train, faces_test, target_train, target_test, args) + end = timer() + + draw_results(args, target_test, target_pred) + +if __name__ == "__main__": + main() -- cgit v1.2.3-54-g00ecf