#!/usr/bin/python # EE4 Selected Topics From Computer Vision Coursework # Vasil Zlatanov, Nunzio Pucci import numpy as np import matplotlib.pyplot as plt import scikitplot as skplt import argparse from timeit import default_timer as timer import logging from logging import debug from sklearn.cluster import KMeans from sklearn.ensemble import RandomForestClassifier from sklearn.ensemble import RandomTreesEmbedding from sklearn.metrics import accuracy_score parser = argparse.ArgumentParser() parser.add_argument("-d", "--data", help="Data path", action='store_true', default='data.npz') parser.add_argument("-c", "--conf_mat", help="Show visual confusion matrix", action='store_true') parser.add_argument("-k", "--kmean", help="Perform kmean clustering with --kmean cluster centers", type=int, default=0) parser.add_argument("-l", "--leaves", help="Maximum leaf nodes for RF classifier", type=int, default=256) parser.add_argument("-e", "--estimators", help="number of estimators to be used", type=int, default=100) parser.add_argument("-v", "--verbose", help="Use verbose output", action='store_true') args = parser.parse_args() if args.verbose: logging.basicConfig(level=logging.DEBUG) data = np.load(args.data) train = data['desc_tr'] test = data['desc_te'] train_part = data['desc_sel'].T logging.debug("Verbose is on") if (args.kmean): logging.debug("Computing KMeans with", train_part.shape[0], "keywords") kmeans = KMeans(n_clusters=args.kmean, n_init=args.estimators, random_state=0).fit(train_part) else: trees = RandomTreesEmbedding(max_leaf_nodes=args.leaves, n_estimators=args.estimators, random_state=0).fit(train_part) logging.debug("Generating histograms") def make_histogram(data): if args.kmean: hist_size = args.estimators*args.kmean else: hist_size = args.estimators*args.leaves histogram = np.zeros((data.shape[0], data.shape[1],hist_size)) for i in range(data.shape[0]): for j in range(data.shape[1]): if (args.kmean): histogram[i][j] = np.bincount(kmeans.predict(data[i][j].T),minlength=args.kmean) else: leaves = trees.apply(data[i][j].T) leaves = np.apply_along_axis(np.bincount, axis=0, arr=leaves, minlength=args.leaves) histogram[i][j] = leaves.reshape(hist_size) return histogram hist_train = make_histogram(train) hist_test = make_histogram(test) logging.debug("Keywords shape", hist_train.shape, "\n") logging.debug("Planting trees...") clf = RandomForestClassifier() clf.fit( hist_train.reshape((hist_train.shape[0]*hist_train.shape[1], hist_train.shape[2])), np.repeat(np.arange(hist_train.shape[0]), hist_train.shape[1])) logging.debug("Random forests created") test_pred = clf.predict(hist_test.reshape((hist_test.shape[0]*hist_test.shape[1], hist_test.shape[2]))) test_label = np.repeat(np.arange(hist_test.shape[0]), hist_test.shape[1]) print(accuracy_score(test_pred, test_label)) if args.conf_mat: skplt.metrics.plot_confusion_matrix(test_pred, test_label, normalize=True) plt.show()