#!/usr/bin/python # EE4 Selected Topics From Computer Vision Coursework # Vasil Zlatanov, Nunzio Pucci import numpy as np import matplotlib import matplotlib.pyplot as plt import scikitplot as skplt import argparse import logging from logging import debug from sklearn.cluster import KMeans from sklearn.ensemble import RandomForestClassifier from sklearn.ensemble import RandomTreesEmbedding from sklearn.metrics import accuracy_score import time parser = argparse.ArgumentParser() parser.add_argument("-d", "--data", help="Data path", action='store_true', default='data.npz') parser.add_argument("-c", "--conf_mat", help="Show visual confusion matrix", action='store_true') parser.add_argument("-k", "--kmean", help="Perform kmean clustering with --kmean cluster centers", type=int, default=0) parser.add_argument("-l", "--leaves", help="Maximum leaf nodes for RF classifier", type=int, default=256) parser.add_argument("-e", "--estimators", help="number of estimators to be used", type=int, default=100) parser.add_argument("-D", "--treedepth", help="depth of trees", type=int, default=5) parser.add_argument("-v", "--verbose", help="Use verbose output", action='store_true') parser.add_argument("-t", "--timer", help="Display execution time", action='store_true') parser.add_argument("-T", "--testmode", help="Testmode", action='store_true') parser.add_argument("-s", "--seed", help="Seed to use for random_state when creating trees", type=int, default=0) args = parser.parse_args() if args.verbose: logging.basicConfig(level=logging.DEBUG) def make_histogram(data, model, args): if args.kmean: hist_size = args.kmean else: hist_size = args.estimators*args.leaves histogram = np.zeros((data.shape[0], data.shape[1],hist_size)) for i in range(data.shape[0]): for j in range(data.shape[1]): if (args.kmean): histogram[i][j] = np.bincount(model.predict(data[i][j].T), minlength=args.kmean) else: leaves = model.apply(data[i][j].T) leaves = np.apply_along_axis(np.bincount, axis=0, arr=leaves, minlength=args.leaves) histogram[i][j] = leaves.reshape(hist_size) return histogram def run_model (data, train, test, train_part, args): if args.timer: start = time.time() if (args.kmean): logging.debug("Computing KMeans with", train_part.shape[0], "keywords") kmeans = KMeans(n_clusters=args.kmean, n_init=args.estimators, random_state=args.seed).fit(train_part) hist_train = make_histogram(train, kmeans, args) hist_test = make_histogram(test, kmeans, args) else: trees = RandomTreesEmbedding(max_leaf_nodes=args.leaves, n_estimators=args.estimators, random_state=args.seed).fit(train_part) hist_train = make_histogram(train, trees, args) hist_test = make_histogram(test, trees, args) logging.debug("Generating histograms") logging.debug("Keywords shape", hist_train.shape, "\n") logging.debug("Planting trees...") clf = RandomForestClassifier(n_estimators=args.estimators, max_depth=args.treedepth, random_state=args.seed) clf.fit( hist_train.reshape((hist_train.shape[0]*hist_train.shape[1], hist_train.shape[2])), np.repeat(np.arange(hist_train.shape[0]), hist_train.shape[1])) logging.debug("Random forests created") test_pred = clf.predict(hist_test.reshape((hist_test.shape[0]*hist_test.shape[1], hist_test.shape[2]))) test_label = np.repeat(np.arange(hist_test.shape[0]), hist_test.shape[1]) if args.timer: end = time.time() print("Execution time: ",end - start) if args.conf_mat: skplt.metrics.plot_confusion_matrix(test_pred, test_label, normalize=True) plt.show() return accuracy_score(test_pred, test_label) def main(): data = np.load(args.data) train = data['desc_tr'] test = data['desc_te'] train_part = data['desc_sel'].T logging.debug("Verbose is on") if args.testmode: cnt = 0 acc = np.zeros((5,5)) for i in range(5): args.estimators = (i+1)*200 cnt+=1 for j in range(5): args.treedepth = j + 1 print("Step ", cnt) acc[i][j] = run_model (data, train, test, train_part, args) print("Accuracy ",acc[i][j]) cnt+=1 fig, ax = plt.subplots() im = ax.imshow(acc) ax.set_xticks(np.arange(5)) ax.set_yticks(np.arange(5)) ax.set_xlabel('Number of trees') ax.set_ylabel('Tree depth') # Loop over data dimensions and create text annotations. for i in range(5): for j in range(5): text = ax.text(j, i, acc[i, j], ha="center", va="center", color="w") ax.set_title("Accuracy varying hyper-parameters") fig.tight_layout() plt.show() else: acc = run_model (data, train, test, train_part, args) print(acc) if __name__ == "__main__": main()