From e42170b70bb9710d73ff22fcd06ae8724a78cbd1 Mon Sep 17 00:00:00 2001 From: Vasil Zlatanov Date: Wed, 5 Dec 2018 16:36:15 +0000 Subject: Move part1 parts to seperate folder --- train.py | 286 --------------------------------------------------------------- 1 file changed, 286 deletions(-) delete mode 100755 train.py (limited to 'train.py') diff --git a/train.py b/train.py deleted file mode 100755 index c5d4389..0000000 --- a/train.py +++ /dev/null @@ -1,286 +0,0 @@ -#!/usr/bin/env python -# Author: Vasil Zlatanov, Nunzio Pucci -# EE4 Pattern Recognition coursework -# -# usage: train.py [-h] -i DATA [-m EIGEN] [-M REIGEN] [-e ENSEMBLE] [-b] -# [-R RANDOM] [-n NEIGHBORS] [-f FACES] [-c] [-s SEED] -# [-t SPLIT] [-2] [-p] [-l] [-r RECONSTRUCT] [-cm] [-q] [-pr] -# [-alt] - -import matplotlib.pyplot as plt -from mpl_toolkits.mplot3d import Axes3D -import sys -import random -import os -import psutil -from random import randint - -from sklearn.neighbors import KNeighborsClassifier -from sklearn.decomposition import PCA -from sklearn.discriminant_analysis import LinearDiscriminantAnalysis -from sklearn.model_selection import train_test_split -from sklearn.preprocessing import StandardScaler -from sklearn.metrics import confusion_matrix -from sklearn.metrics import accuracy_score - -import argparse -import numpy as np - -from numpy import genfromtxt -from numpy import linalg as LA - -from timeit import default_timer as timer - -n_faces = 52 -n_cases = 10 -n_pixels = 2576 - -# subtract the normal face from each row of the face matrix -def normalise_faces(deviations_tr, average_face, faces): - faces = np.subtract(faces, np.tile(average_face, (faces.shape[0],1))) - return np.divide(faces, deviations_tr) - -# Split data into training and testing sets -def test_split(n_faces, raw_faces, split, seed): - random.seed(seed) - - raw_faces_split = np.split(raw_faces,n_cases) - n_training_faces = int(round(n_cases*(1 - split))) - n_test_faces = n_cases - n_training_faces - faces_train = np.zeros((n_faces, n_training_faces, n_pixels)) - faces_test = np.zeros((n_faces, n_test_faces, n_pixels)) - target_train = np.repeat(np.arange(n_faces), n_training_faces) - target_test = np.repeat(np.arange(n_faces), n_test_faces) - - for x in range(n_faces): - samples = random.sample(range(n_cases), n_training_faces) - faces_train[x] = [raw_faces[i+n_cases*x] for i in samples] - faces_test[x] = [raw_faces[i+n_cases*x] for i in range(n_cases) if i not in samples] - - faces_train = faces_train.reshape(n_faces*n_training_faces, n_pixels) - faces_test = faces_test.reshape(n_faces*n_test_faces, n_pixels) - return faces_train, faces_test, target_train, target_test - -def draw_results(args, target_test, target_pred): - acc_sc = accuracy_score(target_test, target_pred) - cm = confusion_matrix(target_test, target_pred) - print('Accuracy: ', acc_sc) - if (args.conf_mat): - plt.matshow(cm, cmap='Blues') - plt.colorbar() - plt.ylabel('Actual') - plt.xlabel('Predicted') - plt.show() - return - -def test_model(M, faces_train, faces_test, target_train, target_test, args): - raw_faces_train = faces_train - raw_faces_test = faces_test - - explained_variances = () - - - distances = np.zeros(faces_test.shape[0]) - - if args.pca or args.pca_r: - # faces_pca containcts the principial components or the M most variant eigenvectors - average_face = np.mean(faces_train, axis=0) - if args.classifyalt: - deviations_tr = np.ones(n_pixels) - else: - deviations_tr = np.std(faces_train, axis=0) - faces_train = normalise_faces(deviations_tr, average_face, faces_train) - faces_test = normalise_faces(deviations_tr, average_face, faces_test) - if (args.pca_r): - e_vals, e_vecs = LA.eigh(np.dot(faces_train, faces_train.T)) - e_vecs = np.dot(faces_train.T, e_vecs) - e_vecs = e_vecs/LA.norm(e_vecs, axis = 0) - else: - e_vals, e_vecs = LA.eigh(np.cov(faces_train.T)) - - e_vals = np.flip(e_vals) - e_vecs = np.fliplr(e_vecs).T - - if args.random: - random_features = random.sample(range(M-args.random, M), args.random) - for i in range(args.random): - e_vals[M-i] = e_vals[random_features[i]] - e_vecs[M-i] = e_vecs[random_features[i]] - - e_vals = e_vals[:M] - e_vecs = e_vecs[:M] - - deviations_tr = np.flip(deviations_tr) - faces_train = np.dot(faces_train, e_vecs.T) - faces_test = np.dot(faces_test, e_vecs.T) - - rec_vecs = np.add(np.tile(average_face, - (faces_test.shape[0], 1)), np.dot(faces_test, e_vecs) * deviations_tr) - distances = LA.norm(raw_faces_test - rec_vecs, axis=1); - - if args.reconstruct: - rec_vec = np.add(average_face, np.dot(faces_train[args.reconstruct], e_vecs) * deviations_tr) - ar = plt.subplot(2, 1, 1) - ar.imshow(rec_vec.reshape([46,56]).T, cmap = 'gist_gray') - ar = plt.subplot(2, 1, 2) - ar.imshow(raw_faces_train[args.reconstruct].reshape([46,56]).T, cmap = 'gist_gray') - plt.show() - - if args.lda: - if args.pca_r or (args.pca and M > n_training_faces - n_faces): - lda = LinearDiscriminantAnalysis(n_components=M, solver='svd') - else: - lda = LinearDiscriminantAnalysis(n_components=M, store_covariance='True') - - faces_train = lda.fit_transform(faces_train, target_train) - faces_test = lda.transform(faces_test) - class_means = lda.means_ - e_vals = lda.explained_variance_ratio_ - - if args.faces: - if args.lda: - for i in range(10): - ax = plt.subplot(2, 5, i + 1) - ax.imshow(class_means[i].reshape([46,56]).T) - else: - for i in range(args.faces): - ax = plt.subplot(2, args.faces/2, i + 1) - ax.imshow(e_vecs[i].reshape([46, 56]).T, cmap = 'gist_gray') - plt.show() - - if args.principal: - e_vals = np.multiply(np.divide(e_vals, np.sum(e_vals)), 100) - plt.bar(np.arange(M), e_vals[:M]) - plt.ylabel('Varaiance ratio (%)');plt.xlabel('Number') - plt.show() - - if args.grapheigen: - # Colors for distinct individuals - cols = ['#{:06x}'.format(randint(0, 0xffffff)) for i in range(n_faces)] - pltCol = [cols[int(k)] for k in target_train] - fig = plt.figure() - ax = fig.add_subplot(111, projection='3d') - ax.scatter(faces_train[:, 0], faces_train[:, 1], faces_train[:, 2], marker='o', color=pltCol) - plt.show() - - classifier = KNeighborsClassifier(n_neighbors=args.neighbors) - classifier.fit(faces_train, target_train) - target_pred = classifier.predict(faces_test) - if args.prob: - targer_prob = classifier.predict_proba(faces_test) - targer_prob_vec = np.zeros(104) - for i in range (104): - j = int(np.floor(i/2)) - targer_prob_vec [i] = targer_prob[i][j] - avg_targer_prob = np.zeros(n_faces) - for i in range (n_faces): - avg_targer_prob[i] = (targer_prob_vec[2*i] + targer_prob_vec[2*i + 1])/2 - plt.bar(range(n_faces), avg_targer_prob) - plt.show() - - return target_pred, distances - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("-i", "--data", help="Input CSV file", required=True) - parser.add_argument("-m", "--eigen", help="Number of eigenvalues in model", type=int, default = 10 ) - parser.add_argument("-M", "--reigen", help="Number of eigenvalues in model", type=int) - parser.add_argument("-e", "--ensemble", help="Number of ensemmbles to use", type=int, default = 0) - parser.add_argument("-b", "--bagging", help="Number of bags to use", action='store_true') - parser.add_argument("-R", "--random", help="Number of eigen value to randomise", type=int) - parser.add_argument("-n", "--neighbors", help="How many neighbors to use", type=int, default = 1) - parser.add_argument("-f", "--faces", help="Show faces", type=int, default = 0) - parser.add_argument("-c", "--principal", help="Show principal components", action='store_true') - parser.add_argument("-s", "--seed", help="Seed to use", type=int, default=0) - parser.add_argument("-t", "--split", help="Fractoin of data to use for testing", type=float, default=0.3) - parser.add_argument("-2", "--grapheigen", help="Swow 2D graph of targets versus principal components", - action='store_true') - parser.add_argument("-p", "--pca", help="Use PCA", action='store_true') - parser.add_argument("-l", "--lda", help="Use LDA", action='store_true') - parser.add_argument("-r", "--reconstruct", help="Use PCA reconstruction, specify face NR", type=int, default=0) - parser.add_argument("-cm", "--conf_mat", help="Show visual confusion matrix", action='store_true') - parser.add_argument("-q", "--pca_r", help="Use Reduced PCA", action='store_true') - parser.add_argument("-pr", "--prob", help="Certainty on each guess", action='store_true') - parser.add_argument("-alt", "--classifyalt", help="Alternative method ON", action='store_true') - args = parser.parse_args() - - if args.lda and args.classifyalt: - sys.exit("LDA and Alt PCA can not be performed together") - - raw_faces = genfromtxt(args.data, delimiter=',') - targets = np.repeat(np.arange(n_faces),n_cases) - - faces_train, faces_test, target_train, target_test = test_split(n_faces, raw_faces, args.split, args.seed) - - if args.ensemble: - n_training_faces = int(round(n_cases*(1 - args.split))) - faces_train_ens = np.zeros((args.ensemble, n_faces, n_training_faces, n_pixels)) - for x in range(args.ensemble): - if args.bagging: - for k in range(n_faces): - samples = random.choices(range(n_training_faces), k=n_training_faces) - faces_train_ens[x][k] = [faces_train[i+n_training_faces*k] for i in samples] - else: - faces_train_ens[x] = faces_train.reshape((n_faces, n_training_faces, n_pixels)) - - faces_train_ens = faces_train_ens.reshape(args.ensemble, n_faces*n_training_faces, n_pixels) - - if args.classifyalt: - faces_train = faces_train.reshape(n_faces, int(faces_train.shape[0]/n_faces), n_pixels) - target_train = target_train.reshape(n_faces, int(target_train.shape[0]/n_faces)) - - distances = np.zeros((n_faces, faces_test.shape[0])) - for i in range(n_faces): - target_pred, distances[i] = test_model(args.eigen, faces_train[i], - faces_test, target_train[i], target_test, args) - target_pred = np.argmin(distances, axis=0) - elif args.reigen: - target_pred = np.zeros((args.reigen-args.eigen, target_test.shape[0])) - accuracy = np.zeros(args.reigen-args.eigen) - rec_error = np.zeros((args.reigen-args.eigen, target_test.shape[0])) - - for M in range(args.eigen, args.reigen): - start = timer() - target_pred[M - args.eigen], rec_error[M - args.eigen] = test_model(M, faces_train, - faces_test, target_train, target_test, args) - end = timer() - print("Run with", M, "eigenvalues completed in ", end-start, "seconds") - print("Memory Used:", psutil.Process(os.getpid()).memory_info().rss) - accuracy[M - args.eigen] = accuracy_score(target_test, target_pred[M-args.eigen]) - # Plot - print('Max efficiency of ', max(accuracy), '% for M =', np.argmax(accuracy)) - plt.plot(range(args.eigen, args.reigen), 100*accuracy) - plt.xlabel('Number of Eigenvectors used (M)') - plt.ylabel('Recognition Accuracy (%)') - plt.grid(True) - plt.show() - elif args.ensemble: - rec_error = np.zeros((args.ensemble, n_faces, faces_test.shape[0])) - target_pred = np.zeros((args.ensemble, target_test.shape[0])) - for i in range(args.ensemble): - target_pred[i], rec_error[i] = test_model(args.eigen, faces_train_ens[i], - faces_test, target_train, target_test, args) - - target_pred_comb = np.zeros(target_pred.shape[1]) - target_pred = target_pred.astype(int).T - if (args.conf_mat): - cm = confusion_matrix(np.tile(target_test, args.ensemble), target_pred.flatten('F')) - plt.matshow(cm, cmap='Blues') - plt.colorbar() - plt.ylabel('Actual') - plt.xlabel('Predicted') - plt.show() - - for i in range(target_pred.shape[0]): - target_pred_comb[i] = np.bincount(target_pred[i]).argmax() - target_pred = target_pred_comb - else: - M = args.eigen - start = timer() - target_pred, distances = test_model(M, faces_train, faces_test, target_train, target_test, args) - end = timer() - - draw_results(args, target_test, target_pred) - -if __name__ == "__main__": - main() -- cgit v1.2.3-54-g00ecf