diff options
| -rwxr-xr-x | train.py | 250 | 
1 files changed, 136 insertions, 114 deletions
@@ -2,6 +2,8 @@  # Train a model from sample data  # Author: Vasil Zlatanov, Nunzio Pucci  # EE4 Pattern Recognition coursework +# +# usage: train.py [-h] -i DATA -o MODEL [-m M]  import matplotlib.pyplot as plt  from mpl_toolkits.mplot3d import Axes3D @@ -14,7 +16,7 @@ from sklearn.decomposition import PCA  from sklearn.discriminant_analysis import LinearDiscriminantAnalysis  from sklearn.model_selection import train_test_split  from sklearn.preprocessing import StandardScaler -from sklearn.metrics import confusion_matrix   +from sklearn.metrics import confusion_matrix  from sklearn.metrics import accuracy_score  import argparse @@ -23,16 +25,21 @@ import numpy as np  from numpy import genfromtxt  from numpy import linalg as LA +from timeit import default_timer as timer + +n_faces = 52 +n_cases = 10 +n_pixels = 2576 +  # subtract the normal face from each row of the face matrix  def normalise_faces(average_face, faces):      faces = np.subtract(faces, np.tile(average_face, (faces.shape[0],1)))      return np.divide(faces.T, np.std(faces.T, axis=0)).T +  # Split data into training and testing sets  def test_split(n_faces, raw_faces, split, seed):      random.seed(seed) -    n_cases = 10 -    n_pixels = 2576 -     +      raw_faces_split = np.split(raw_faces,n_cases)      n_training_faces = int(round(n_cases*(1 - split)))      n_test_faces = n_cases - n_training_faces @@ -40,139 +47,154 @@ def test_split(n_faces, raw_faces, split, seed):      faces_test = np.zeros((n_faces, n_test_faces, n_pixels))      target_train = np.repeat(np.arange(n_faces), n_training_faces)      target_test = np.repeat(np.arange(n_faces), n_test_faces) -     -    for x in range (n_faces): + +    for x in range(n_faces):          samples = random.sample(range(n_cases), n_training_faces)          faces_train[x] = [raw_faces[i+n_cases*x] for i in samples] -        faces_test[x] = [raw_faces[i+n_cases*x] for i in range (n_cases) if i not in samples] +        faces_test[x] = [raw_faces[i+n_cases*x] for i in range(n_cases) if i not in samples]      faces_train = faces_train.reshape(n_faces*n_training_faces, n_pixels)      faces_test = faces_test.reshape(n_faces*n_test_faces, n_pixels)      return faces_train, faces_test, target_train, target_test -# usage: train.py [-h] -i DATA -o MODEL [-m M] -parser = argparse.ArgumentParser() -parser.add_argument("-i", "--data", help="Input CSV file", required=True) -parser.add_argument("-m", "--eigen", help="Number of eigenvalues in model", type=int, default = 10 ) -parser.add_argument("-n", "--neighbors", help="How many neighbors to use", type=int, default = 3) -parser.add_argument("-f", "--faces", help="Show faces", type=int, default = 0) -parser.add_argument("-c", "--principal", help="Show principal components", action='store_true') -parser.add_argument("-s", "--seed", help="Seed to use", type=int, default=0) -parser.add_argument("-t", "--split", help="Fractoin of data to use for testing", type=float, default=0.22) -### best split for lda = 22 -### best plit for pca = 20 -parser.add_argument("-2", "--grapheigen", help="Swow 2D graph of targets versus principal components", action='store_true') -parser.add_argument("-p", "--pca", help="Use PCA", action='store_true') -parser.add_argument("-l", "--lda", help="Use LDA", action='store_true') -parser.add_argument("-r", "--reconstruct", help="Use PCA reconstruction, specify face NR", type=int, default=0) -parser.add_argument("-cm", "--conf_mat", help="Show visual confusion matrix", action='store_true') +def draw_conf_mat(target_test, target_pred): +    cm = confusion_matrix(target_test, target_pred) +    print(cm) +    if (args.conf_mat): +        plt.matshow(cm, cmap='Blues') +        plt.colorbar() +        plt.ylabel('Actual') +        plt.xlabel('Predicted') +        plt.show() +    print('Accuracy %fl' % accuracy_score(target_test, target_pred)) -parser.add_argument("-q", "--pca_r", help="Use Reduced PCA", action='store_true') +def test_model(M, faces_train, faces_test, target_train, target_test, args): +    raw_faces_train = faces_train -args = parser.parse_args() +    explained_variances = () -M = args.eigen +    if args.pca or args.pca_r: +        # faces_pca containcts the principial components or the M most variant eigenvectors +        average_face = np.mean(faces_train, axis=0) +        deviations_tr =  np.std(faces_train, axis=0) +        deviations_tst = np.std(faces_train, axis=0) +        faces_train = normalise_faces(average_face, faces_train) +        faces_test = normalise_faces(average_face, faces_test) +        if (args.pca_r): +            print('Reduced PCA') +            e_vals, e_vecs = LA.eigh(np.dot(faces_train, faces_train.T)) +            e_vecs = np.dot(faces_train.T, e_vecs) +            e_vecs = e_vecs/LA.norm(e_vecs, axis = 0) +        else: +            print('Standard PCA') +            e_vals, e_vecs = LA.eigh(np.cov(faces_train.T)) +            # e_vecs = normalise_faces(np.mean(e_vecs,axis=0), e_vecs) -raw_faces = genfromtxt(args.data, delimiter=',') -targets = np.repeat(np.arange(52),10) +        e_vals = np.flip(e_vals)[:M] +        e_vecs = np.fliplr(e_vecs).T[:M] +        deviations_tr = np.flip(deviations_tr) +        deviations_tst = np.flip(deviations_tst) -n_faces = 52 +        faces_train = np.dot(faces_train, e_vecs.T) +        faces_test = np.dot(faces_test, e_vecs.T) -faces_train, faces_test, target_train, target_test = test_split(n_faces, raw_faces, args.split, args.seed) +        if (args.reconstruct): +            rec_vec = np.add(average_face, np.dot(faces_train[args.reconstruct], e_vecs) * deviations_tr) +            rec_faces_test = np.add(average_face, np.dot(faces_test, e_vecs) * deviations_tst) +            rec_error = LA.norm(np.subtract(raw_faces_train[args.reconstruct], rec_vec)) +            ar = plt.subplot(2, 1, 1) +            ar.imshow(rec_vec.reshape([46,56]).T, cmap = 'gist_gray') +            ar = plt.subplot(2, 1, 2) +            ar.imshow(raw_faces_train[args.reconstruct].reshape([46,56]).T, cmap = 'gist_gray') +            plt.show() -# This remove the mean and scales to unit variance -sc = StandardScaler() -#faces_train = sc.fit_transform(faces_train) -#faces_test = sc.transform(faces_test) -raw_faces_train = faces_train +    if args.lda or (args.pca and args.lda): +        lda = LinearDiscriminantAnalysis(n_components=M, solver='eigen') +        faces_train = lda.fit_transform(faces_train, target_train) +        faces_test = lda.transform(faces_test) +        class_means = lda.means_ +        e_vals = lda.explained_variance_ratio_ -explained_variances = () +    if args.faces: +        if args.lda: +            for i in range(10): +                ax = plt.subplot(2, 5, i + 1) +                ax.imshow(class_means[i].reshape([46,56]).T) +        else: +            for i in range(args.faces): +                ax = plt.subplot(2, args.faces/2, i + 1) +                ax.imshow(e_vecs[i].reshape([46, 56]).T, cmap = 'gist_gray') +        plt.show() + +    if args.principal: +        e_vals = np.multiply(np.divide(e_vals, np.sum(e_vals)), 100) +        plt.bar(np.arange(M), e_vals[:M]) +        plt.ylabel('Varaiance ratio (%)');plt.xlabel('Number') +        plt.show() -if args.pca or args.pca_r: -    # faces_pca containcts the principial components or the M most variant eigenvectors -    average_face = np.mean(faces_train, axis=0)  -    deviations_tr =  np.std(faces_train, axis=0) -    deviations_tst = np.std(faces_train, axis=0) -    faces_train = normalise_faces(average_face, faces_train) -    faces_test = normalise_faces(average_face, faces_test) -    if (args.pca_r): -        print('Reduced PCA') -        e_vals, e_vecs = LA.eigh(np.dot(faces_train, faces_train.T)) -        e_vecs = np.dot(faces_train.T, e_vecs)  -        e_vecs = e_vecs/LA.norm(e_vecs, axis = 0) +    if args.grapheigen: +	graph_eigen() +        # Colors for distinct individuals +        cols = ['#{:06x}'.format(randint(0, 0xffffff)) for i in range(n_faces)] +        pltCol = [cols[int(k)] for k in target_train] +        fig = plt.figure() +        ax = fig.add_subplot(111, projection='3d') +        ax.scatter(faces_train[:, 0], faces_train[:, 1], faces_train[:, 2], marker='o', color=pltCol) +        plt.show() + +    classifier = KNeighborsClassifier(n_neighbors=args.neighbors) +    if (args.reconstruct): +        classifier.fit(raw_faces_train, target_train) +        target_pred = classifier.predict(rec_faces_test) +        #Better Passing n_neighbors = 1      else: -        print('Standard PCA') -        e_vals, e_vecs = LA.eigh(np.cov(faces_train.T)) -        # e_vecs = normalise_faces(np.mean(e_vecs,axis=0), e_vecs) -        # e_vecs = sc.fit_transform(e_vecs) +        classifier.fit(faces_train, target_train) +        target_pred = classifier.predict(faces_test) +        #Better n_neighbors = 2 +    draw_conf_mat(target_test, target_pred) -    e_vals = np.flip(e_vals)[:M] -    e_vecs = np.fliplr(e_vecs).T[:M] -    deviations_tr = np.flip(deviations_tr) -    deviations_tst = np.flip(deviations_tst) +def main(): +    parser = argparse.ArgumentParser() +    parser.add_argument("-i", "--data", help="Input CSV file", required=True) +    parser.add_argument("-m", "--eigen", help="Number of eigenvalues in model", type=int, default = 10 ) +    parser.add_argument("-M", "--reigen", help="Number of eigenvalues in model", type=int) +    parser.add_argument("-n", "--neighbors", help="How many neighbors to use", type=int, default = 3) +    parser.add_argument("-f", "--faces", help="Show faces", type=int, default = 0) +    parser.add_argument("-c", "--principal", help="Show principal components", action='store_true') +    parser.add_argument("-s", "--seed", help="Seed to use", type=int, default=0) +    parser.add_argument("-t", "--split", help="Fractoin of data to use for testing", type=float, default=0.22) +    ### best split for lda = 22 +    ### best plit for pca = 20 +    parser.add_argument("-2", "--grapheigen", help="Swow 2D graph of targets versus principal components", action='store_true') +    parser.add_argument("-p", "--pca", help="Use PCA", action='store_true') +    parser.add_argument("-l", "--lda", help="Use LDA", action='store_true') +    parser.add_argument("-r", "--reconstruct", help="Use PCA reconstruction, specify face NR", type=int, default=0) +    parser.add_argument("-cm", "--conf_mat", help="Show visual confusion matrix", action='store_true') -    faces_train = np.dot(faces_train, e_vecs.T) -    faces_test = np.dot(faces_test, e_vecs.T) +    parser.add_argument("-q", "--pca_r", help="Use Reduced PCA", action='store_true') -    if (args.reconstruct): -        rec_vec = np.add(average_face, np.dot(faces_train[args.reconstruct], e_vecs) * deviations_tr) -        rec_faces_test = np.add(average_face, np.dot(faces_test, e_vecs) * deviations_tst) -        rec_error = LA.norm(np.subtract(raw_faces_train[args.reconstruct], rec_vec)) -        ar = plt.subplot(2, 1, 1) -        ar.imshow(rec_vec.reshape([46,56]).T, cmap = 'gist_gray') -        ar = plt.subplot(2, 1, 2) -        ar.imshow(raw_faces_train[args.reconstruct].reshape([46,56]).T, cmap = 'gist_gray') -        plt.show() +    args = parser.parse_args() -if args.lda or (args.pca and args.lda): -    lda = LinearDiscriminantAnalysis(n_components=M, solver='eigen') -    faces_train = lda.fit_transform(faces_train, target_train) -    faces_test = lda.transform(faces_test) -    class_means = lda.means_ -    e_vals = lda.explained_variance_ratio_ +    raw_faces = genfromtxt(args.data, delimiter=',') +    targets = np.repeat(np.arange(n_faces),n_cases) -if args.faces: -    if args.lda: -        for i in range (10): -            ax = plt.subplot(2, 5, i + 1) -            ax.imshow(class_means[i].reshape([46,56]).T) -    else: -        for i in range(args.faces): -            ax = plt.subplot(2, args.faces/2, i + 1) -            ax.imshow(e_vecs[i].reshape([46, 56]).T, cmap = 'gist_gray') -    plt.show() -if args.principal: -    e_vals = np.multiply(np.divide(e_vals, np.sum(e_vals)), 100) -    plt.bar(np.arange(M), e_vals[:M]) -    plt.ylabel('Varaiance ratio (%)');plt.xlabel('Eigenface Number') -    plt.show() +    faces_train, faces_test, target_train, target_test = test_split(n_faces, raw_faces, args.split, args.seed) -if args.grapheigen: -    # Colors for distinct individuals -    cols = ['#{:06x}'.format(randint(0, 0xffffff)) for i in range(52)] -    pltCol = [cols[int(k)] for k in target_train] -    fig = plt.figure() -    ax = fig.add_subplot(111, projection='3d') -    ax.scatter(faces_train[:, 0], faces_train[:, 1], faces_train[:, 2], marker='o', color=pltCol) -    plt.show() -classifier = KNeighborsClassifier(n_neighbors=args.neighbors) -if (args.reconstruct): -    classifier.fit(raw_faces_train, target_train) -    target_pred = classifier.predict(rec_faces_test) -    #Better Passing n_neighbors = 1 -else: -    classifier.fit(faces_train, target_train) -    target_pred = classifier.predict(faces_test) -    #Better n_neighbors = 2 +    if args.reigen: +	for M in range(args.eigen, args,reigen): +		start = time() +		test_model(M, faces_train, faces_test, target_train, target_test, args) +		end = time() +		print("Run with", M, "eigenvalues completed in %.2f" % end-start, "seconds") +    else: +        M = args.eigen +	start = time() +	test_model(M, faces_train, faces_test, target_train, target_test, args): +	end = time() +	print("Run with", M, "eigenvalues completed in %.2f" % end-start, "seconds") -cm = confusion_matrix(target_test, target_pred)   -print(cm)   -if (args.conf_mat): -    plt.matshow(cm, cmap='Blues') -    plt.colorbar() -    plt.ylabel('Actual') -    plt.xlabel('Predicted') -    plt.show() -print('Accuracy %fl' % accuracy_score(target_test, target_pred))  +     +if __name__ == "__main__": +    main()  | 
