#!/usr/bin/env python # Author: Vasil Zlatanov, Nunzio Pucci # EE4 Pattern Recognition coursework # # usage: kmean.py from logging import debug import numpy as np from sklearn.cluster import KMeans def create_kmean_clusters(feature_vectors, labels, gallery_idx, camId): gallery = ([],[]) gallerylab = ([],[]) for i in range(gallery_idx.size): cam = camId[gallery_idx[i]] - 1 gallery[cam].append(feature_vectors[gallery_idx[i]]) gallerylab[cam].append(labels[gallery_idx[i]]) train = np.array(gallery) tlabel = np.array(gallerylab) km_train_data = [] km_idx = [] km_labels = [] for i in range(2): km_train_data.append(KMeans(n_clusters=int(np.max(labels)),random_state=0).fit(train[i])) km_idx.append(km_train_data[i].labels_) km_labels.append(list(range(np.max(labels)))) for j in range(np.max(labels)): class_vote = np.zeros(np.max(labels)) for q in range(km_idx[i].size): if km_idx[i][q]==j: class_vote[int(tlabel[i][q])-1] += 1 km_labels[i][j] = np.argmax(class_vote) + 1 #MERGE CLUSTERS cl = [] cllab = [] clustercam = [] for i in range(2): clustercam.append(np.add(np.ones(len(km_labels[i])), i)) for j in range(len(km_labels[i])): cl.append(km_train_data[i].cluster_centers_[j]) cllab.append(km_labels[i][j]) train_data = np.array(cl) debug("Kmean data has shape", train_data.shape) train_label = np.array(cllab) train_cam = np.array([clustercam[i] for i in range(2)]).reshape(train_label.shape[0]) return train_data, train_label, train_cam