#!/usr/bin/python # EE4 Selected Topics From Computer Vision Coursework # Vasil Zlatanov, Nunzio Pucci import numpy as np import matplotlib import matplotlib.pyplot as plt import scikitplot as skplt import argparse import logging from logging import debug from sklearn.cluster import KMeans from sklearn.ensemble import RandomForestClassifier from sklearn.ensemble import ExtraTreesClassifier from sklearn.ensemble import RandomTreesEmbedding from sklearn.metrics import accuracy_score import time parser = argparse.ArgumentParser() parser.add_argument("-d", "--data", help="Data path", action='store_true', default='data.npz') parser.add_argument("-c", "--conf_mat", help="Show visual confusion matrix", action='store_true') parser.add_argument("-k", "--kmean", help="Perform kmean clustering with KMEAN cluster centers", type=int, default=0) parser.add_argument("-l", "--leaves", help="Maximum leaf nodes for RF classifier", type=int, default=256) parser.add_argument("-e", "--estimators", help="number of estimators to be used", type=int, default=100) parser.add_argument("-D", "--treedepth", help="depth of trees", type=int, default=5) parser.add_argument("-v", "--verbose", help="Use verbose output", action='store_true') parser.add_argument("-t", "--timer", help="Display execution time", action='store_true') parser.add_argument("-T", "--testmode", help="Testmode", action='store_true') parser.add_argument("-E", "--embest", help="RandomTreesEmbedding estimators", type=int, default=256) parser.add_argument("-r", "--randomness", help="Randomness parameter", type=int, default=0) parser.add_argument("-s", "--seed", help="Seed to use for random_state when creating trees", type=int, default=0) args = parser.parse_args() if args.verbose: logging.basicConfig(level=logging.DEBUG) def make_histogram(data, model, args): if args.kmean: hist_size = args.kmean else: hist_size = args.embest*args.leaves histogram = np.zeros((data.shape[0], data.shape[1],hist_size)) for i in range(data.shape[0]): for j in range(data.shape[1]): if (args.kmean): histogram[i][j] = np.bincount(model.predict(data[i][j].T), minlength=args.kmean) else: leaves = model.apply(data[i][j].T) leaves = np.apply_along_axis(np.bincount, axis=0, arr=leaves, minlength=args.leaves) histogram[i][j] = leaves.reshape(hist_size) return histogram def run_model (data, train, test, train_part, args): if args.timer: start = time.time() if (args.kmean): logging.debug("Computing KMeans with", train_part.shape[0], "keywords") kmeans = KMeans(n_clusters=args.kmean, n_init=1, random_state=args.seed).fit(train_part) hist_train = make_histogram(train, kmeans, args) hist_test = make_histogram(test, kmeans, args) else: trees = RandomTreesEmbedding(max_leaf_nodes=int(args.leaves/2), n_estimators=args.embest, random_state=args.seed).fit(train_part) hist_train = make_histogram(train, trees, args) hist_test = make_histogram(test, trees, args) logging.debug("Generating histograms") logging.debug("Keywords shape", hist_train.shape, "\n") logging.debug("Planting trees...") if args.randomness: clf = RandomForestClassifier(max_features=args.randomness, n_estimators=args.estimators, max_depth=args.treedepth, random_state=0) else: clf = RandomForestClassifier(n_estimators=args.estimators, max_depth=args.treedepth, random_state=args.seed) clf.fit( hist_train.reshape((hist_train.shape[0]*hist_train.shape[1], hist_train.shape[2])), np.repeat(np.arange(hist_train.shape[0]), hist_train.shape[1])) logging.debug("Random forests created") test_pred = clf.predict(hist_test.reshape((hist_test.shape[0]*hist_test.shape[1], hist_test.shape[2]))) test_label = np.repeat(np.arange(hist_test.shape[0]), hist_test.shape[1]) train_pred = clf.predict(hist_train.reshape((hist_train.shape[0]*hist_train.shape[1], hist_train.shape[2]))) train_label = np.repeat(np.arange(hist_train.shape[0]), hist_train.shape[1]) if args.timer: end = time.time() print("Execution time: ",end - start) if args.conf_mat: skplt.metrics.plot_confusion_matrix(test_pred, test_label, normalize=True) plt.show() if args.testmode: return accuracy_score(test_pred, test_label), accuracy_score(train_pred, train_label), end-start else: return accuracy_score(test_pred, test_label) def main(): data = np.load(args.data) train = data['desc_tr'] test = data['desc_te'] train_part = data['desc_sel'].T print(train_part.shape) logging.debug("Verbose is on") if args.testmode: args.timer = 1 a = np.zeros(15) dummy = np.zeros((2,15)) acc = np.zeros((2,15)) for i in range(2): for j in range(15): args.treedepth = j*2+1 a[j] = args.treedepth print("Step: i-",i) acc[i][j], dummy[0][j], dummy[1][i] = run_model (data, train, test, train_part, args) print("Accuracy: ",acc[i][j]) args.seed = 1 plt.plot(a,acc[1]) plt.legend(('Axis aligned','Two Pixels Test'), loc='best') plt.ylabel('Normalized Classification Accuracy') plt.xlabel('Tree Depth') plt.show() else: acc = run_model (data, train, test, train_part, args) print(acc) if __name__ == "__main__": main()