1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
|
#!/usr/bin/python
# EE4 Selected Topics From Computer Vision Coursework
# Vasil Zlatanov, Nunzio Pucci
DATA_FILE = 'data.npz'
CLUSTER_CNT = 256
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.ensemble import RandomForestClassifier
data = np.load(DATA_FILE)
train = data['desc_tr']
train_part = data['desc_sel'].T[0:1000]
print("Computing KMeans with", train_part.shape[0], "keywords")
kmeans = KMeans(n_clusters=CLUSTER_CNT, random_state=0).fit(train_part)
print("Generating histograms")
histogram = np.zeros((train.shape[0], train.shape[1],CLUSTER_CNT))
for i in range(train.shape[0]):
for j in range(train.shape[1]):
histogram[i][j] = np.bincount(kmeans.predict(train[i][j].T),minlength=CLUSTER_CNT)
print("Keywords shape", histogram.shape, "\n")
print("Planting trees...")
clf = RandomForestClassifier()
clf.fit(
histogram.reshape((histogram.shape[0]*histogram.shape[1], histogram.shape[2])),
np.repeat(np.arange(histogram.shape[0]), histogram.shape[1]))
print("Random forests created")
print(clf.score(
histogram.reshape((histogram.shape[0]*histogram.shape[1], histogram.shape[2])),
np.repeat(np.arange(histogram.shape[0]), histogram.shape[1])))
|