1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
#!/usr/bin/env python
# Train a model from sample data
# Author: Vasil Zlatanov, Nunzio Pucci
# EE4 Pattern Recognition coursework
import matplotlib.pyplot as plt
import sys
import random
from random import randint
from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
import argparse
import numpy as np
from numpy import genfromtxt
# from numpy import linalg as LA
# subtract the normal face from each row of the face matrix
def normalise_faces(average_face, raw_faces):
return np.subtract(raw_faces, np.tile(average_face, (raw_faces.shape[1],1)).T)
# usage: train.py [-h] -i DATA -o MODEL [-m M]
parser = argparse.ArgumentParser()
parser.add_argument("-i", "--data", help="Input CSV file", required=True)
parser.add_argument("-m", "--eigen", help="Number of eigenvalues in model", type=int, default = 140 )
parser.add_argument("-n", "--neighbors", help="How many neighbors to use", type=int, default = 3)
parser.add_argument("-f", "--faces", help="Show faces", type=int, default = 0)
parser.add_argument("-c", "--principal", help="Show principal components", action='store_true')
parser.add_argument("-s", "--seed", help="Seed to use", type=int, default=0)
parser.add_argument("-t", "--split", help="Fractoin of data to use for testing", type=float, default=0.22)
### best split for lda = 22
### best plit for pca = 20
parser.add_argument("-2", "--grapheigen", help="Swow 2D graph of targets versus principal components", action='store_true')
parser.add_argument("-p", "--pca", help="Use PCA", action='store_true')
parser.add_argument("-l", "--lda", help="Use LDA", action='store_true')
args = parser.parse_args()
if args.pca and args.lda:
sys.exit("Flags -p and -l are mutually exclusive")
M = args.eigen
raw_faces = genfromtxt(args.data, delimiter=',')
targets = np.repeat(np.arange(10),52)
#faces_train, faces_test, target_train, target_test = train_test_split(raw_faces, targets, test_size=args.split, random_state=args.seed)
### Splitter
n_faces = 10
def test_split(n_faces, raw_faces, split, seed):
random.seed(seed)
n_cases = 52
n_pixels = 2576
print(raw_faces.shape)
raw_faces_split = np.split(raw_faces,n_cases)
n_training_faces = int(round(n_cases*(1 - split)))
n_test_faces = n_cases - n_training_faces
faces_train = np.zeros((n_faces, n_training_faces, n_pixels))
faces_test = np.zeros((n_faces, n_test_faces, n_pixels))
target_train = np.repeat(np.arange(n_faces), n_training_faces)
target_test = np.repeat(np.arange(n_faces), n_test_faces)
for x in range (n_faces):
samples = random.sample(range(n_cases), n_training_faces)
faces_train[x] = [raw_faces[i+n_cases*x] for i in samples]
faces_test[x] = [raw_faces[i+n_cases*x] for i in range (n_cases) if i not in samples]
faces_train = faces_train.reshape(n_faces*n_training_faces, n_pixels)
faces_test = faces_test.reshape(n_faces*n_test_faces, n_pixels)
return faces_train, faces_test, target_train, target_test
faces_train, faces_test, target_train, target_test = test_split(n_faces, raw_faces, args.split, args.seed)
# This remove the mean and scales to unit variance
sc = StandardScaler()
faces_train = sc.fit_transform(faces_train)
faces_test = sc.transform(faces_test)
explained_variances = ()
if args.lda:
lda = LinearDiscriminantAnalysis(n_components=M)
faces_train = lda.fit_transform(faces_train, target_train)
faces_test = lda.transform(faces_test)
explained_variances = lda.explained_variance_ratio_
else:
# faces_pca containcts the principial components or the M most variant eigenvectors
pca = PCA(svd_solver='full', n_components=M)
faces_train = pca.fit_transform(faces_train)
faces_test = pca.transform(faces_test)
explained_variances = pca.explained_variance_ratio_
# Plot the variances (eigenvalues) from the pca object
if args.faces:
if args.lda:
sys.exit("Can not plot eigenfaces when using LDA")
for i in range(args.faces):
ax = plt.subplot(2, args.faces/2, i + 1)
ax.imshow(pca.components_[i].reshape([46, 56]))
plt.show()
if args.principal:
plt.bar(np.arange(explained_variances.size), explained_variances)
plt.ylabel('Varaiance ratio');plt.xlabel('Face Number')
plt.show()
if args.grapheigen:
# Colors for distinct individuals
cols = ['#{:06x}'.format(randint(0, 0xffffff)) for i in range(10)]
pltCol = [cols[int(k)] for k in target_train]
plt.scatter(faces_train[:, 0], faces_train[:, 1], color=pltCol)
plt.show()
classifier = KNeighborsClassifier(n_neighbors=args.neighbors)
classifier.fit(faces_train, target_train)
target_pred = classifier.predict(faces_test)
cm = confusion_matrix(target_test, target_pred)
print(cm)
print('Accuracy %fl' % accuracy_score(target_test, target_pred))
|