#!/usr/bin/python # EE4 Selected Topics From Computer Vision Coursework # Vasil Zlatanov, Nunzio Pucci DATA_FILE = 'data.npz' CLUSTER_CNT = 256 import numpy as np import matplotlib.pyplot as plt from sklearn.cluster import KMeans from sklearn.ensemble import RandomForestClassifier data = np.load(DATA_FILE) train = data['desc_tr'] train_part = data['desc_sel'].T[0:1000] print("Computing KMeans with", train_part.shape[0], "keywords") kmeans = KMeans(n_clusters=CLUSTER_CNT, random_state=0).fit(train_part) print("Generating histograms") histogram = np.zeros((train.shape[0], train.shape[1],CLUSTER_CNT)) for i in range(train.shape[0]): for j in range(train.shape[1]): histogram[i][j] = np.bincount(kmeans.predict(train[i][j].T),minlength=CLUSTER_CNT) print("Keywords shape", histogram.shape, "\n") print("Planting trees...") clf = RandomForestClassifier() clf.fit( histogram.reshape((histogram.shape[0]*histogram.shape[1], histogram.shape[2])), np.repeat(np.arange(histogram.shape[0]), histogram.shape[1])) print("Random forests created") print(clf.score( histogram.reshape((histogram.shape[0]*histogram.shape[1], histogram.shape[2])), np.repeat(np.arange(histogram.shape[0]), histogram.shape[1])))