#!/usr/bin/python
# EE4 Selected Topics From Computer Vision Coursework
# Vasil Zlatanov, Nunzio Pucci

DATA_FILE = 'data.npz'
CLUSTER_CNT = 256
KMEAN_PART = 33

import numpy as np
import matplotlib.pyplot as plt

from sklearn.cluster import KMeans

data = np.load(DATA_FILE)

train = data['desc_tr']

# Train part will contain 15 000 descriptors to generate KMeans
part_idx = np.random.randint(train.shape[1])

parts = []
for i in train[:, part_idx]:
    parts.append(i.T[300:1300])

train_part = np.vstack(parts)

print(train_part.shape)

kmeans = KMeans(n_clusters=CLUSTER_CNT, random_state=0).fit(train_part)

print("Generating histograms")

histogram = np.zeros((train.shape[0], train.shape[1],CLUSTER_CNT))

for i in range(train.shape[0]):
    for j in range(train.shape[1]):
        histogram[i][j] = np.bincount(kmeans.predict(train[i][j].T),minlength=CLUSTER_CNT)

print(histogram.shape)

plt.hist(histogram[1][5])
plt.show()
plt.hist(histogram[3][2])
plt.show()
plt.hist(histogram[7][8])
plt.show()