diff options
-rwxr-xr-x | train.py | 106 |
1 files changed, 57 insertions, 49 deletions
@@ -24,12 +24,37 @@ from numpy import linalg as LA # subtract the normal face from each row of the face matrix def normalise_faces(average_face, faces): - return np.subtract(faces, np.tile(average_face, (faces.shape[0],1))) + faces = np.subtract(faces, np.tile(average_face, (faces.shape[0],1))) + return np.divide(faces.T, np.std(faces.T, axis=0)).T + +# Split data into training and testing sets +def test_split(n_faces, raw_faces, split, seed): + random.seed(seed) + n_cases = 52 + n_pixels = 2576 + + raw_faces_split = np.split(raw_faces,n_cases) + n_training_faces = int(round(n_cases*(1 - split))) + n_test_faces = n_cases - n_training_faces + faces_train = np.zeros((n_faces, n_training_faces, n_pixels)) + faces_test = np.zeros((n_faces, n_test_faces, n_pixels)) + target_train = np.repeat(np.arange(n_faces), n_training_faces) + target_test = np.repeat(np.arange(n_faces), n_test_faces) + + for x in range (n_faces): + samples = random.sample(range(n_cases), n_training_faces) + faces_train[x] = [raw_faces[i+n_cases*x] for i in samples] + faces_test[x] = [raw_faces[i+n_cases*x] for i in range (n_cases) if i not in samples] + + faces_train = faces_train.reshape(n_faces*n_training_faces, n_pixels) + faces_test = faces_test.reshape(n_faces*n_test_faces, n_pixels) + return faces_train, faces_test, target_train, target_test + # usage: train.py [-h] -i DATA -o MODEL [-m M] parser = argparse.ArgumentParser() parser.add_argument("-i", "--data", help="Input CSV file", required=True) -parser.add_argument("-m", "--eigen", help="Number of eigenvalues in model", type=int, default = 140 ) +parser.add_argument("-m", "--eigen", help="Number of eigenvalues in model", type=int, default = 10 ) parser.add_argument("-n", "--neighbors", help="How many neighbors to use", type=int, default = 3) parser.add_argument("-f", "--faces", help="Show faces", type=int, default = 0) parser.add_argument("-c", "--principal", help="Show principal components", action='store_true') @@ -50,33 +75,8 @@ M = args.eigen raw_faces = genfromtxt(args.data, delimiter=',') targets = np.repeat(np.arange(10),52) -#faces_train, faces_test, target_train, target_test = train_test_split(raw_faces, targets, test_size=args.split, random_state=args.seed) - -### Splitter n_faces = 10 -def test_split(n_faces, raw_faces, split, seed): - random.seed(seed) - n_cases = 52 - n_pixels = 2576 - - raw_faces_split = np.split(raw_faces,n_cases) - n_training_faces = int(round(n_cases*(1 - split))) - n_test_faces = n_cases - n_training_faces - faces_train = np.zeros((n_faces, n_training_faces, n_pixels)) - faces_test = np.zeros((n_faces, n_test_faces, n_pixels)) - target_train = np.repeat(np.arange(n_faces), n_training_faces) - target_test = np.repeat(np.arange(n_faces), n_test_faces) - - for x in range (n_faces): - samples = random.sample(range(n_cases), n_training_faces) - faces_train[x] = [raw_faces[i+n_cases*x] for i in samples] - faces_test[x] = [raw_faces[i+n_cases*x] for i in range (n_cases) if i not in samples] - - faces_train = faces_train.reshape(n_faces*n_training_faces, n_pixels) - faces_test = faces_test.reshape(n_faces*n_test_faces, n_pixels) - return faces_train, faces_test, target_train, target_test - faces_train, faces_test, target_train, target_test = test_split(n_faces, raw_faces, args.split, args.seed) # This remove the mean and scales to unit variance @@ -86,42 +86,50 @@ faces_train, faces_test, target_train, target_test = test_split(n_faces, raw_fac explained_variances = () if args.lda: - lda = LinearDiscriminantAnalysis(n_components=M) - faces_train = lda.fit_transform(faces_train, target_train) - faces_test = lda.transform(faces_test) - explained_variances = lda.explained_variance_ratio_ + average_face = np.mean(faces_train, axis=0) + n_cases = 52 + +# lda = LinearDiscriminantAnalysis(n_components=M) +# faces_train = lda.fit_transform(faces_train, target_train) +# faces_test = lda.transform(faces_test) +# explained_variances = lda.explained_variance_ratio_ +### FIND MEAN OF EACH CLASS + n_training_faces = int(round(n_cases*(1 - args.split))) + n_test_faces = n_cases - n_training_faces + mean_vector = np.zeros(10) + for n in range (10): + mean_acc = 0 + for x in range (int(np.divide(n_training_faces,10))): + mean_acc = np.add(mean_acc, np.mean(faces_train[x + n*10], axis=0)) + mean_vector [n] = np.divide(mean_acc, np.divide(n_training_faces,10)) + print (mean_vector) +### SCATTER MATRIX + for n in range (10) + faces_train = normalise_faces(mean_vector[n], faces_train[ else: # faces_pca containcts the principial components or the M most variant eigenvectors -### FROM SKLEARN -# pca = PCA(svd_solver='full', n_components=M) -# faces_train = pca.fit_transform(faces_train) -# faces_test = pca.transform(faces_test) -# explained_variances = pca.explained_variance_ratio_ - -### FROM OLD CODE average_face = np.mean(faces_train, axis=0) - plt.imshow(average_face.reshape(46,56)) - plt.show() faces_train = normalise_faces(average_face, faces_train) faces_test = normalise_faces(average_face, faces_test) - e_vals, e_vecs = LA.eigh(np.dot(faces_train.T, faces_train)) - print(e_vecs.shape) - explained_variances = e_vals[:M] - e_vecs =np.divide(e_vecs, LA.norm(e_vecs)) - faces_train = np.dot(faces_train, e_vecs[:M]) - faces_test = np.dot(faces_test, e_vecs[:M]) + e_vals, e_vecs = LA.eigh(np.cov(faces_train.T)) + e_vals = np.flip(e_vals) + e_vecs = np.fliplr(e_vecs).T + faces_train = np.dot(faces_train, e_vecs[:M].T) + faces_test = np.dot(faces_test, e_vecs[:M].T) + # Plot the variances (eigenvalues) from the pca object if args.faces: if args.lda: sys.exit("Can not plot eigenfaces when using LDA") for i in range(args.faces): ax = plt.subplot(2, args.faces/2, i + 1) - ax.imshow(e_vecs[i].reshape([46, 56]), cmap = 'gist_gray') + ax.imshow(e_vecs[i].reshape([46, 56]).T, cmap = 'gist_gray') plt.show() if args.principal: - plt.bar(np.arange(explained_variances.size), explained_variances) - plt.ylabel('Varaiance ratio');plt.xlabel('Face Number') + e_vals = np.multiply(np.divide(e_vals, np.sum(e_vals)), 100) + plt.bar(np.arange(M), e_vals[:M]) + plt.ylabel('Varaiance ratio (%)');plt.xlabel('Eigenface Number') plt.show() if args.grapheigen: |