From 22bcee7eb7207aeb77c85823d3288a283ac8fc95 Mon Sep 17 00:00:00 2001 From: Vasil Zlatanov Date: Mon, 12 Nov 2018 18:58:26 +0000 Subject: Add ensemble bagging --- train.py | 43 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/train.py b/train.py index 7f5b574..349d494 100755 --- a/train.py +++ b/train.py @@ -130,7 +130,7 @@ def test_model(M, faces_train, faces_test, target_train, target_test, args): if args.lda: if args.pca_r or (args.pca and M > n_training_faces - n_faces): - lda = LinearDiscriminantAnalysis(n_components=M, solver='svd') + lda = LinearDiscriminantAnalysis(n_components=M, solver='eigen') else: lda = LinearDiscriminantAnalysis(n_components=M, store_covariance='True') @@ -185,13 +185,15 @@ def test_model(M, faces_train, faces_test, target_train, target_test, args): plt.show() #Better n_neighbors = 2 - return draw_conf_mat(args, target_test, target_pred), distances + accuracy = draw_conf_mat(args, target_test, target_pred) + return target_pred, accuracy, distances def main(): parser = argparse.ArgumentParser() parser.add_argument("-i", "--data", help="Input CSV file", required=True) parser.add_argument("-m", "--eigen", help="Number of eigenvalues in model", type=int, default = 10 ) parser.add_argument("-M", "--reigen", help="Number of eigenvalues in model", type=int) + parser.add_argument("-e", "--ensemble", help="Number of bagging ensembles to use", type=int) parser.add_argument("-n", "--neighbors", help="How many neighbors to use", type=int, default = 1) ##USING STANDARD 1 FOR NN ACCURACY parser.add_argument("-f", "--faces", help="Show faces", type=int, default = 0) @@ -218,6 +220,15 @@ def main(): faces_train, faces_test, target_train, target_test = test_split(n_faces, raw_faces, args.split, args.seed) + if args.ensemble: + n_training_faces = int(round(n_cases*(1 - args.split))) + faces_train_bagged = np.zeros((args.ensemble, n_faces, n_training_faces, n_pixels)) + for x in range(args.ensemble): + for k in range(n_faces): + samples = random.choices(range(n_training_faces), k=n_training_faces) + faces_train_bagged[x][k] = [faces_train[i+n_training_faces*k] for i in samples] + faces_train_bagged = faces_train_bagged.reshape(args.ensemble, n_faces*n_training_faces, n_pixels) + if args.classifyalt: faces_train = faces_train.reshape(n_faces, int(faces_train.shape[0]/n_faces), n_pixels) target_train = target_train.reshape(n_faces, int(target_train.shape[0]/n_faces)) @@ -225,7 +236,7 @@ def main(): accuracy = np.zeros(n_faces) distances = np.zeros((n_faces, faces_test.shape[0])) for i in range(n_faces): - accuracy[i], distances[i] = test_model(args.eigen, faces_train[i], faces_test, target_train[i], target_test, args) + target_pred, accuracy[i], distances[i] = test_model(args.eigen, faces_train[i], faces_test, target_train[i], target_test, args) target_pred = np.argmin(distances, axis=0) acc_sc = accuracy_score(target_test, target_pred) cm = confusion_matrix(target_test, target_pred) @@ -243,7 +254,7 @@ def main(): rec_error = np.zeros((args.reigen-args.eigen, 2*n_faces)) for M in range(args.eigen, args.reigen): start = timer() - accuracy[M - args.eigen], rec_error[M - args.eigen] = test_model(M, faces_train, faces_test, target_train, target_test, args) + target_pred, accuracy[M - args.eigen], rec_error[M - args.eigen] = test_model(M, faces_train, faces_test, target_train, target_test, args) end = timer() print("Run with", M, "eigenvalues completed in ", end-start, "seconds") print("Memory Used:", psutil.Process(os.getpid()).memory_info().rss) @@ -254,6 +265,30 @@ def main(): plt.ylabel('Recognition Accuracy (%)') plt.grid(True) plt.show() + elif args.ensemble: + accuracy = np.zeros(args.ensemble) + rec_error = np.zeros((args.ensemble, n_faces, faces_test.shape[0])) + target_pred = np.zeros((args.ensemble, target_test.shape[0])) + for i in range(args.ensemble): + target_pred[i], accuracy[i], rec_error[i] = test_model(args.eigen, faces_train_bagged[i], faces_test, target_train, target_test, args) + + target_pred_comb = np.zeros(target_pred.shape[1]) + target_pred = target_pred.astype(int).T + for i in range(target_pred.shape[0]): + target_pred_comb[i] = np.bincount(target_pred[i]).argmax() + print(target_pred_comb) + acc_sc = accuracy_score(target_test, target_pred_comb) + cm = confusion_matrix(target_test, target_pred_comb) + print('Total Accuracy: ', acc_sc) + if (args.conf_mat): + plt.matshow(cm, cmap='Blues') + plt.colorbar() + plt.ylabel('Actual') + plt.xlabel('Predicted') + plt.show() + return + + else: M = args.eigen start = timer() -- cgit v1.2.3-54-g00ecf