From 9240c60a514521758581f8f20d64c49b5dd20f13 Mon Sep 17 00:00:00 2001 From: Vasil Zlatanov Date: Fri, 19 Oct 2018 18:20:58 +0100 Subject: Lots of new improvements --- train.py | 50 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 14 deletions(-) diff --git a/train.py b/train.py index 26567e8..b3624e8 100755 --- a/train.py +++ b/train.py @@ -4,7 +4,9 @@ # EE4 Pattern Recognition coursework import matplotlib.pyplot as plt -import sys; +import sys + +from random import randint from sklearn.neighbors import KNeighborsClassifier from sklearn.decomposition import PCA @@ -29,8 +31,12 @@ def normalise_faces(average_face, raw_faces): parser = argparse.ArgumentParser() parser.add_argument("-i", "--data", help="Input CSV file", required=True) parser.add_argument("-m", "--eigen", help="Number of eigenvalues in model", type=int, default = 8 ) -parser.add_argument("-n", "--neighbors", help="How many neighbors to use", type=int, default = 3) -parser.add_argument("-g", "--graph", help="Should we show graphs", action='store_true') +parser.add_argument("-n", "--neighbors", help="How many neighbors to use", type=int, default = 6) +parser.add_argument("-f", "--faces", help="Show faces", type=int, default = 0) +parser.add_argument("-c", "--principal", help="Show principal components", action='store_true') +parser.add_argument("-s", "--seed", help="Seed to use", type=int, default=0) +parser.add_argument("-t", "--split", help="Fractoin of data to use for testing", type=float, default=0.25) +parser.add_argument("-2", "--grapheigen", help="Swow 2D graph of targets versus principal components", action='store_true') parser.add_argument("-p", "--pca", help="Use PCA", action='store_true') parser.add_argument("-l", "--lda", help="Use LDA", action='store_true') args = parser.parse_args() @@ -43,7 +49,9 @@ M = args.eigen raw_faces = genfromtxt(args.data, delimiter=',') targets = np.repeat(np.arange(10),52) -faces_train, faces_test, target_train, target_test = train_test_split(raw_faces, targets, test_size=0.2, random_state=0) +split = 100 + +faces_train, faces_test, target_train, target_test = train_test_split(raw_faces, targets, test_size=args.split, random_state=args.seed) # This remove the mean and scales to unit variance @@ -52,25 +60,39 @@ faces_train = sc.fit_transform(faces_train) faces_test = sc.transform(faces_test) explained_variances = () -if args.pca: - # faces_pca containcts the principial components or the M most variant eigenvectors - pca = PCA(n_components=M) - faces_train = pca.fit_transform(faces_train) - faces_test = pca.transform(faces_test) - explained_variances = pca.explained_variance_ratio_ -else: +if args.lda: lda = LinearDiscriminantAnalysis(n_components=M) faces_train = lda.fit_transform(faces_train, target_train) faces_test = lda.transform(faces_test) explained_variances = lda.explained_variance_ratio_ - +else: + # faces_pca containcts the principial components or the M most variant eigenvectors + pca = PCA(svd_solver='full', n_components=M) + faces_train = pca.fit_transform(faces_train) + faces_test = pca.transform(faces_test) + explained_variances = pca.explained_variance_ratio_ # Plot the variances (eigenvalues) from the pca object -if args.graph: +if args.faces: + if args.lda: + sys.exit("Can not plot eigenfaces when using LDA") + for i in range(args.faces): + ax = plt.subplot(2, args.faces/2, i + 1) + ax.imshow(pca.components_[i].reshape([46, 56])) + plt.show() + +if args.principal: plt.bar(np.arange(explained_variances.size), explained_variances) plt.ylabel('Varaiance ratio');plt.xlabel('Face Number') plt.show() -classifier = KNeighborsClassifier(n_neighbors=3) +if args.grapheigen: + # Colors for distinct individuals + cols = ['#{:06x}'.format(randint(0, 0xffffff)) for i in range(10)] + pltCol = [cols[int(k)] for k in target_train] + plt.scatter(faces_train[:, 0], faces_train[:, 1], color=pltCol) + plt.show() + +classifier = KNeighborsClassifier(n_neighbors=args.neighbors) classifier.fit(faces_train, target_train) target_pred = classifier.predict(faces_test) -- cgit v1.2.3-54-g00ecf