From 5b3291144270dc4736ce0c1fd5dda52ba6808d22 Mon Sep 17 00:00:00 2001 From: nunzip Date: Tue, 23 Oct 2018 02:21:17 +0100 Subject: Rewrite split for train.py Changes to default settings Eliminated redundancies Imported random --- train.py | 44 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/train.py b/train.py index b3624e8..cdf3f5e 100755 --- a/train.py +++ b/train.py @@ -5,6 +5,7 @@ import matplotlib.pyplot as plt import sys +import random from random import randint @@ -26,16 +27,17 @@ from numpy import genfromtxt def normalise_faces(average_face, raw_faces): return np.subtract(raw_faces, np.tile(average_face, (raw_faces.shape[1],1)).T) - # usage: train.py [-h] -i DATA -o MODEL [-m M] parser = argparse.ArgumentParser() parser.add_argument("-i", "--data", help="Input CSV file", required=True) -parser.add_argument("-m", "--eigen", help="Number of eigenvalues in model", type=int, default = 8 ) -parser.add_argument("-n", "--neighbors", help="How many neighbors to use", type=int, default = 6) +parser.add_argument("-m", "--eigen", help="Number of eigenvalues in model", type=int, default = 140 ) +parser.add_argument("-n", "--neighbors", help="How many neighbors to use", type=int, default = 3) parser.add_argument("-f", "--faces", help="Show faces", type=int, default = 0) parser.add_argument("-c", "--principal", help="Show principal components", action='store_true') parser.add_argument("-s", "--seed", help="Seed to use", type=int, default=0) -parser.add_argument("-t", "--split", help="Fractoin of data to use for testing", type=float, default=0.25) +parser.add_argument("-t", "--split", help="Fractoin of data to use for testing", type=float, default=0.22) +### best split for lda = 22 +### best plit for pca = 20 parser.add_argument("-2", "--grapheigen", help="Swow 2D graph of targets versus principal components", action='store_true') parser.add_argument("-p", "--pca", help="Use PCA", action='store_true') parser.add_argument("-l", "--lda", help="Use LDA", action='store_true') @@ -49,10 +51,36 @@ M = args.eigen raw_faces = genfromtxt(args.data, delimiter=',') targets = np.repeat(np.arange(10),52) -split = 100 - -faces_train, faces_test, target_train, target_test = train_test_split(raw_faces, targets, test_size=args.split, random_state=args.seed) - +#faces_train, faces_test, target_train, target_test = train_test_split(raw_faces, targets, test_size=args.split, random_state=args.seed) + +### Splitter +n_faces = 10 + +def test_split(n_faces, raw_faces, split, seed): + random.seed(seed) + n_cases = 52 + n_pixels = 2576 + + print(raw_faces.shape) + + raw_faces_split = np.split(raw_faces,n_cases) + n_training_faces = int(round(n_cases*(1 - split))) + n_test_faces = n_cases - n_training_faces + faces_train = np.zeros((n_faces, n_training_faces, n_pixels)) + faces_test = np.zeros((n_faces, n_test_faces, n_pixels)) + target_train = np.repeat(np.arange(n_faces), n_training_faces) + target_test = np.repeat(np.arange(n_faces), n_test_faces) + + for x in range (n_faces): + samples = random.sample(range(n_cases), n_training_faces) + faces_train[x] = [raw_faces[i+n_cases*x] for i in samples] + faces_test[x] = [raw_faces[i+n_cases*x] for i in range (n_cases) if i not in samples] + + faces_train = faces_train.reshape(n_faces*n_training_faces, n_pixels) + faces_test = faces_test.reshape(n_faces*n_test_faces, n_pixels) + return faces_train, faces_test, target_train, target_test + +faces_train, faces_test, target_train, target_test = test_split(n_faces, raw_faces, args.split, args.seed) # This remove the mean and scales to unit variance sc = StandardScaler() -- cgit v1.2.3-54-g00ecf