aboutsummaryrefslogtreecommitdiff
path: root/train.py
diff options
context:
space:
mode:
authornunzip <np.scarh@gmail.com>2018-10-24 20:45:33 +0100
committerVasil Zlatanov <v@skozl.com>2018-10-29 15:41:27 +0000
commitf9f575808e93c5d38a2caa35ac85dc39054e3104 (patch)
tree043f339361835c90f24dc3f17d3f16bb761f8a48 /train.py
parentdda17ddbcd73ef0add77f69a6f752cec7e58a583 (diff)
downloadvz215_np1915-f9f575808e93c5d38a2caa35ac85dc39054e3104.tar.gz
vz215_np1915-f9f575808e93c5d38a2caa35ac85dc39054e3104.tar.bz2
vz215_np1915-f9f575808e93c5d38a2caa35ac85dc39054e3104.zip
Add PCA and NORM (no sklearn)
Try to fix LDA
Diffstat (limited to 'train.py')
-rwxr-xr-xtrain.py106
1 files changed, 57 insertions, 49 deletions
diff --git a/train.py b/train.py
index 46f76a4..93139b7 100755
--- a/train.py
+++ b/train.py
@@ -24,12 +24,37 @@ from numpy import linalg as LA
# subtract the normal face from each row of the face matrix
def normalise_faces(average_face, faces):
- return np.subtract(faces, np.tile(average_face, (faces.shape[0],1)))
+ faces = np.subtract(faces, np.tile(average_face, (faces.shape[0],1)))
+ return np.divide(faces.T, np.std(faces.T, axis=0)).T
+
+# Split data into training and testing sets
+def test_split(n_faces, raw_faces, split, seed):
+ random.seed(seed)
+ n_cases = 52
+ n_pixels = 2576
+
+ raw_faces_split = np.split(raw_faces,n_cases)
+ n_training_faces = int(round(n_cases*(1 - split)))
+ n_test_faces = n_cases - n_training_faces
+ faces_train = np.zeros((n_faces, n_training_faces, n_pixels))
+ faces_test = np.zeros((n_faces, n_test_faces, n_pixels))
+ target_train = np.repeat(np.arange(n_faces), n_training_faces)
+ target_test = np.repeat(np.arange(n_faces), n_test_faces)
+
+ for x in range (n_faces):
+ samples = random.sample(range(n_cases), n_training_faces)
+ faces_train[x] = [raw_faces[i+n_cases*x] for i in samples]
+ faces_test[x] = [raw_faces[i+n_cases*x] for i in range (n_cases) if i not in samples]
+
+ faces_train = faces_train.reshape(n_faces*n_training_faces, n_pixels)
+ faces_test = faces_test.reshape(n_faces*n_test_faces, n_pixels)
+ return faces_train, faces_test, target_train, target_test
+
# usage: train.py [-h] -i DATA -o MODEL [-m M]
parser = argparse.ArgumentParser()
parser.add_argument("-i", "--data", help="Input CSV file", required=True)
-parser.add_argument("-m", "--eigen", help="Number of eigenvalues in model", type=int, default = 140 )
+parser.add_argument("-m", "--eigen", help="Number of eigenvalues in model", type=int, default = 10 )
parser.add_argument("-n", "--neighbors", help="How many neighbors to use", type=int, default = 3)
parser.add_argument("-f", "--faces", help="Show faces", type=int, default = 0)
parser.add_argument("-c", "--principal", help="Show principal components", action='store_true')
@@ -50,33 +75,8 @@ M = args.eigen
raw_faces = genfromtxt(args.data, delimiter=',')
targets = np.repeat(np.arange(10),52)
-#faces_train, faces_test, target_train, target_test = train_test_split(raw_faces, targets, test_size=args.split, random_state=args.seed)
-
-### Splitter
n_faces = 10
-def test_split(n_faces, raw_faces, split, seed):
- random.seed(seed)
- n_cases = 52
- n_pixels = 2576
-
- raw_faces_split = np.split(raw_faces,n_cases)
- n_training_faces = int(round(n_cases*(1 - split)))
- n_test_faces = n_cases - n_training_faces
- faces_train = np.zeros((n_faces, n_training_faces, n_pixels))
- faces_test = np.zeros((n_faces, n_test_faces, n_pixels))
- target_train = np.repeat(np.arange(n_faces), n_training_faces)
- target_test = np.repeat(np.arange(n_faces), n_test_faces)
-
- for x in range (n_faces):
- samples = random.sample(range(n_cases), n_training_faces)
- faces_train[x] = [raw_faces[i+n_cases*x] for i in samples]
- faces_test[x] = [raw_faces[i+n_cases*x] for i in range (n_cases) if i not in samples]
-
- faces_train = faces_train.reshape(n_faces*n_training_faces, n_pixels)
- faces_test = faces_test.reshape(n_faces*n_test_faces, n_pixels)
- return faces_train, faces_test, target_train, target_test
-
faces_train, faces_test, target_train, target_test = test_split(n_faces, raw_faces, args.split, args.seed)
# This remove the mean and scales to unit variance
@@ -86,42 +86,50 @@ faces_train, faces_test, target_train, target_test = test_split(n_faces, raw_fac
explained_variances = ()
if args.lda:
- lda = LinearDiscriminantAnalysis(n_components=M)
- faces_train = lda.fit_transform(faces_train, target_train)
- faces_test = lda.transform(faces_test)
- explained_variances = lda.explained_variance_ratio_
+ average_face = np.mean(faces_train, axis=0)
+ n_cases = 52
+
+# lda = LinearDiscriminantAnalysis(n_components=M)
+# faces_train = lda.fit_transform(faces_train, target_train)
+# faces_test = lda.transform(faces_test)
+# explained_variances = lda.explained_variance_ratio_
+### FIND MEAN OF EACH CLASS
+ n_training_faces = int(round(n_cases*(1 - args.split)))
+ n_test_faces = n_cases - n_training_faces
+ mean_vector = np.zeros(10)
+ for n in range (10):
+ mean_acc = 0
+ for x in range (int(np.divide(n_training_faces,10))):
+ mean_acc = np.add(mean_acc, np.mean(faces_train[x + n*10], axis=0))
+ mean_vector [n] = np.divide(mean_acc, np.divide(n_training_faces,10))
+ print (mean_vector)
+### SCATTER MATRIX
+ for n in range (10)
+ faces_train = normalise_faces(mean_vector[n], faces_train[
else:
# faces_pca containcts the principial components or the M most variant eigenvectors
-### FROM SKLEARN
-# pca = PCA(svd_solver='full', n_components=M)
-# faces_train = pca.fit_transform(faces_train)
-# faces_test = pca.transform(faces_test)
-# explained_variances = pca.explained_variance_ratio_
-
-### FROM OLD CODE
average_face = np.mean(faces_train, axis=0)
- plt.imshow(average_face.reshape(46,56))
- plt.show()
faces_train = normalise_faces(average_face, faces_train)
faces_test = normalise_faces(average_face, faces_test)
- e_vals, e_vecs = LA.eigh(np.dot(faces_train.T, faces_train))
- print(e_vecs.shape)
- explained_variances = e_vals[:M]
- e_vecs =np.divide(e_vecs, LA.norm(e_vecs))
- faces_train = np.dot(faces_train, e_vecs[:M])
- faces_test = np.dot(faces_test, e_vecs[:M])
+ e_vals, e_vecs = LA.eigh(np.cov(faces_train.T))
+ e_vals = np.flip(e_vals)
+ e_vecs = np.fliplr(e_vecs).T
+ faces_train = np.dot(faces_train, e_vecs[:M].T)
+ faces_test = np.dot(faces_test, e_vecs[:M].T)
+
# Plot the variances (eigenvalues) from the pca object
if args.faces:
if args.lda:
sys.exit("Can not plot eigenfaces when using LDA")
for i in range(args.faces):
ax = plt.subplot(2, args.faces/2, i + 1)
- ax.imshow(e_vecs[i].reshape([46, 56]), cmap = 'gist_gray')
+ ax.imshow(e_vecs[i].reshape([46, 56]).T, cmap = 'gist_gray')
plt.show()
if args.principal:
- plt.bar(np.arange(explained_variances.size), explained_variances)
- plt.ylabel('Varaiance ratio');plt.xlabel('Face Number')
+ e_vals = np.multiply(np.divide(e_vals, np.sum(e_vals)), 100)
+ plt.bar(np.arange(M), e_vals[:M])
+ plt.ylabel('Varaiance ratio (%)');plt.xlabel('Eigenface Number')
plt.show()
if args.grapheigen: