aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xtrain.py44
1 files changed, 36 insertions, 8 deletions
diff --git a/train.py b/train.py
index b3624e8..cdf3f5e 100755
--- a/train.py
+++ b/train.py
@@ -5,6 +5,7 @@
import matplotlib.pyplot as plt
import sys
+import random
from random import randint
@@ -26,16 +27,17 @@ from numpy import genfromtxt
def normalise_faces(average_face, raw_faces):
return np.subtract(raw_faces, np.tile(average_face, (raw_faces.shape[1],1)).T)
-
# usage: train.py [-h] -i DATA -o MODEL [-m M]
parser = argparse.ArgumentParser()
parser.add_argument("-i", "--data", help="Input CSV file", required=True)
-parser.add_argument("-m", "--eigen", help="Number of eigenvalues in model", type=int, default = 8 )
-parser.add_argument("-n", "--neighbors", help="How many neighbors to use", type=int, default = 6)
+parser.add_argument("-m", "--eigen", help="Number of eigenvalues in model", type=int, default = 140 )
+parser.add_argument("-n", "--neighbors", help="How many neighbors to use", type=int, default = 3)
parser.add_argument("-f", "--faces", help="Show faces", type=int, default = 0)
parser.add_argument("-c", "--principal", help="Show principal components", action='store_true')
parser.add_argument("-s", "--seed", help="Seed to use", type=int, default=0)
-parser.add_argument("-t", "--split", help="Fractoin of data to use for testing", type=float, default=0.25)
+parser.add_argument("-t", "--split", help="Fractoin of data to use for testing", type=float, default=0.22)
+### best split for lda = 22
+### best plit for pca = 20
parser.add_argument("-2", "--grapheigen", help="Swow 2D graph of targets versus principal components", action='store_true')
parser.add_argument("-p", "--pca", help="Use PCA", action='store_true')
parser.add_argument("-l", "--lda", help="Use LDA", action='store_true')
@@ -49,10 +51,36 @@ M = args.eigen
raw_faces = genfromtxt(args.data, delimiter=',')
targets = np.repeat(np.arange(10),52)
-split = 100
-
-faces_train, faces_test, target_train, target_test = train_test_split(raw_faces, targets, test_size=args.split, random_state=args.seed)
-
+#faces_train, faces_test, target_train, target_test = train_test_split(raw_faces, targets, test_size=args.split, random_state=args.seed)
+
+### Splitter
+n_faces = 10
+
+def test_split(n_faces, raw_faces, split, seed):
+ random.seed(seed)
+ n_cases = 52
+ n_pixels = 2576
+
+ print(raw_faces.shape)
+
+ raw_faces_split = np.split(raw_faces,n_cases)
+ n_training_faces = int(round(n_cases*(1 - split)))
+ n_test_faces = n_cases - n_training_faces
+ faces_train = np.zeros((n_faces, n_training_faces, n_pixels))
+ faces_test = np.zeros((n_faces, n_test_faces, n_pixels))
+ target_train = np.repeat(np.arange(n_faces), n_training_faces)
+ target_test = np.repeat(np.arange(n_faces), n_test_faces)
+
+ for x in range (n_faces):
+ samples = random.sample(range(n_cases), n_training_faces)
+ faces_train[x] = [raw_faces[i+n_cases*x] for i in samples]
+ faces_test[x] = [raw_faces[i+n_cases*x] for i in range (n_cases) if i not in samples]
+
+ faces_train = faces_train.reshape(n_faces*n_training_faces, n_pixels)
+ faces_test = faces_test.reshape(n_faces*n_test_faces, n_pixels)
+ return faces_train, faces_test, target_train, target_test
+
+faces_train, faces_test, target_train, target_test = test_split(n_faces, raw_faces, args.split, args.seed)
# This remove the mean and scales to unit variance
sc = StandardScaler()