#!/usr/bin/env python
# Train a model from sample data
# Author: Vasil Zlatanov, Nunzio Pucci
# EE4 Pattern Recognition coursework
#
# usage: train.py [-h] -i DATA -o MODEL [-m M]

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import sys
import random
from random import randint

from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

import argparse
import numpy as np

from numpy import genfromtxt
from numpy import linalg as LA

from timeit import default_timer as timer

n_faces = 52
n_cases = 10
n_pixels = 2576

# subtract the normal face from each row of the face matrix
def normalise_faces(average_face, faces):
    faces = np.subtract(faces, np.tile(average_face, (faces.shape[0],1)))
    return np.divide(faces.T, np.std(faces.T, axis=0)).T

# Split data into training and testing sets
def test_split(n_faces, raw_faces, split, seed):
    random.seed(seed)

    raw_faces_split = np.split(raw_faces,n_cases)
    n_training_faces = int(round(n_cases*(1 - split)))
    n_test_faces = n_cases - n_training_faces
    faces_train = np.zeros((n_faces, n_training_faces, n_pixels))
    faces_test = np.zeros((n_faces, n_test_faces, n_pixels))
    target_train = np.repeat(np.arange(n_faces), n_training_faces)
    target_test = np.repeat(np.arange(n_faces), n_test_faces)

    for x in range(n_faces):
        samples = random.sample(range(n_cases), n_training_faces)
        faces_train[x] = [raw_faces[i+n_cases*x] for i in samples]
        faces_test[x] = [raw_faces[i+n_cases*x] for i in range(n_cases) if i not in samples]

    faces_train = faces_train.reshape(n_faces*n_training_faces, n_pixels)
    faces_test = faces_test.reshape(n_faces*n_test_faces, n_pixels)
    return faces_train, faces_test, target_train, target_test

def draw_conf_mat(args, target_test, target_pred):
    cm = confusion_matrix(target_test, target_pred)
    print(cm)
    if (args.conf_mat):
        plt.matshow(cm, cmap='Blues')
        plt.colorbar()
        plt.ylabel('Actual')
        plt.xlabel('Predicted')
        plt.show()
    return accuracy_score(target_test, target_pred)

def test_model(M, faces_train, faces_test, target_train, target_test, args):
    raw_faces_train = faces_train

    explained_variances = ()

    if args.pca or args.pca_r:
        # faces_pca containcts the principial components or the M most variant eigenvectors
        average_face = np.mean(faces_train, axis=0)
        deviations_tr =  np.std(faces_train, axis=0)
        deviations_tst = np.std(faces_train, axis=0)
        faces_train = normalise_faces(average_face, faces_train)
        faces_test = normalise_faces(average_face, faces_test)
        if (args.pca_r):
            print('Reduced PCA')
            e_vals, e_vecs = LA.eigh(np.dot(faces_train, faces_train.T))
            e_vecs = np.dot(faces_train.T, e_vecs)
            e_vecs = e_vecs/LA.norm(e_vecs, axis = 0)
        else:
            print('Standard PCA')
            e_vals, e_vecs = LA.eigh(np.cov(faces_train.T))
            # e_vecs = normalise_faces(np.mean(e_vecs,axis=0), e_vecs)

        e_vals = np.flip(e_vals)[:M]
        e_vecs = np.fliplr(e_vecs).T[:M]
        deviations_tr = np.flip(deviations_tr)
        deviations_tst = np.flip(deviations_tst)

        faces_train = np.dot(faces_train, e_vecs.T)
        faces_test = np.dot(faces_test, e_vecs.T)

        if (args.reconstruct):
            rec_vec = np.add(average_face, np.dot(faces_train[args.reconstruct], e_vecs) * deviations_tr)
            rec_faces_test = np.add(average_face, np.dot(faces_test, e_vecs) * deviations_tst)
#THERE MIGHT BE A RECONSTRUCTION PROBLEM DUE TO DEVIATIONS_TST
            rec_error = LA.norm(np.subtract(raw_faces_train[args.reconstruct], rec_vec))
            ar = plt.subplot(2, 1, 1)
            ar.imshow(rec_vec.reshape([46,56]).T, cmap = 'gist_gray')
            ar = plt.subplot(2, 1, 2)
            ar.imshow(raw_faces_train[args.reconstruct].reshape([46,56]).T, cmap = 'gist_gray')
            plt.show()

    if args.lda:
        if args.pca_r or (args.pca and M > n_training_faces - n_faces):
            lda = LinearDiscriminantAnalysis(n_components=M, solver='eigen')
        else:
            lda = LinearDiscriminantAnalysis(n_components=M, store_covariance='True')

        faces_train = lda.fit_transform(faces_train, target_train)
        faces_test = lda.transform(faces_test)
        class_means = lda.means_
        e_vals = lda.explained_variance_ratio_
        scatter_matrix = lda.covariance_
        print(LA.matrix_rank(scatter_matrix))

    if args.faces:
        if args.lda:
            for i in range(10):
                ax = plt.subplot(2, 5, i + 1)
                ax.imshow(class_means[i].reshape([46,56]).T)
        else:
            for i in range(args.faces):
                ax = plt.subplot(2, args.faces/2, i + 1)
                ax.imshow(e_vecs[i].reshape([46, 56]).T, cmap = 'gist_gray')
        plt.show()

    if args.principal:
        e_vals = np.multiply(np.divide(e_vals, np.sum(e_vals)), 100)
        plt.bar(np.arange(M), e_vals[:M])
        plt.ylabel('Varaiance ratio (%)');plt.xlabel('Number')
        plt.show()

    if args.grapheigen:
        #graph_eigen()
        # Colors for distinct individuals
        cols = ['#{:06x}'.format(randint(0, 0xffffff)) for i in range(n_faces)]
        pltCol = [cols[int(k)] for k in target_train]
        fig = plt.figure()
        ax = fig.add_subplot(111, projection='3d')
        ax.scatter(faces_train[:, 0], faces_train[:, 1], faces_train[:, 2], marker='o', color=pltCol)
        plt.show()

    classifier = KNeighborsClassifier(n_neighbors=args.neighbors)
    if (args.reconstruct):
        classifier.fit(raw_faces_train, target_train)
        target_pred = classifier.predict(rec_faces_test)
        #Better Passing n_neighbors = 1
    else:
        classifier.fit(faces_train, target_train)
        target_pred = classifier.predict(faces_test)
        if args.prob:
            targer_prob = classifier.predict_proba(faces_test)
            targer_prob_vec = np.zeros(104)
            for i in range (104):
                j = int(np.floor(i/2))
                targer_prob_vec [i] = targer_prob[i][j]
            avg_targer_prob = np.zeros(52)
            for i in range (52):
                avg_targer_prob[i] = (targer_prob_vec[2*i] + targer_prob_vec[2*i + 1])/2
        #WE CAN FIX THIS BY RESHAPING TARGER_PROB_VEC AND TAKING THE MEAN ON THE RIGHT AXIS
            plt.bar(range(52), avg_targer_prob)
            plt.show()

        #Better n_neighbors = 2
    return draw_conf_mat(args, target_test, target_pred)

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-i", "--data", help="Input CSV file", required=True)
    parser.add_argument("-m", "--eigen", help="Number of eigenvalues in model", type=int, default = 10 )
    parser.add_argument("-M", "--reigen", help="Number of eigenvalues in model", type=int)
    parser.add_argument("-n", "--neighbors", help="How many neighbors to use", type=int, default = 1)
##USING STANDARD 1 FOR NN ACCURACY
    parser.add_argument("-f", "--faces", help="Show faces", type=int, default = 0)
    parser.add_argument("-c", "--principal", help="Show principal components", action='store_true')
    parser.add_argument("-s", "--seed", help="Seed to use", type=int, default=0)
    parser.add_argument("-t", "--split", help="Fractoin of data to use for testing", type=float, default=0.22)
    ### best split for lda = 22
    ### best plit for pca = 20
    parser.add_argument("-2", "--grapheigen", help="Swow 2D graph of targets versus principal components", action='store_true')
    parser.add_argument("-p", "--pca", help="Use PCA", action='store_true')
    parser.add_argument("-l", "--lda", help="Use LDA", action='store_true')
    parser.add_argument("-r", "--reconstruct", help="Use PCA reconstruction, specify face NR", type=int, default=0)
    parser.add_argument("-cm", "--conf_mat", help="Show visual confusion matrix", action='store_true')

    parser.add_argument("-q", "--pca_r", help="Use Reduced PCA", action='store_true')
    parser.add_argument("-pr", "--prob", help="Certainty on each guess", action='store_true')

    args = parser.parse_args()

    raw_faces = genfromtxt(args.data, delimiter=',')
    targets = np.repeat(np.arange(n_faces),n_cases)


    faces_train, faces_test, target_train, target_test = test_split(n_faces, raw_faces, args.split, args.seed)


    if args.reigen:
        for M in range(args.eigen, args,reigen):
            start = timer()
            accuracy[M] = test_model(M, faces_train, faces_test, target_train, target_test, args)
            end = timer()
            print("Run with", M, "eigenvalues completed in ", end-start, "seconds")

    else:
        M = args.eigen
        start = timer()
        test_model(M, faces_train, faces_test, target_train, target_test, args)
        end = timer()

if __name__ == "__main__":
    main()