From 97acdd6ea9e378c90cf9a199e746ebca59a4d5e6 Mon Sep 17 00:00:00 2001 From: Vasil Zlatanov Date: Mon, 11 Feb 2019 17:47:09 +0000 Subject: Add histogram fig --- evaluate.py | 7 ++++++- report/fig/km-histogram.pdf | Bin 0 -> 13076 bytes report/paper.md | 23 ++++++++++++----------- 3 files changed, 18 insertions(+), 12 deletions(-) create mode 100644 report/fig/km-histogram.pdf diff --git a/evaluate.py b/evaluate.py index dff8482..9cb5f78 100755 --- a/evaluate.py +++ b/evaluate.py @@ -19,7 +19,7 @@ import time parser = argparse.ArgumentParser() parser.add_argument("-d", "--data", help="Data path", action='store_true', default='data.npz') parser.add_argument("-c", "--conf_mat", help="Show visual confusion matrix", action='store_true') -parser.add_argument("-k", "--kmean", help="Perform kmean clustering with --kmean cluster centers", type=int, default=0) +parser.add_argument("-k", "--kmean", help="Perform kmean clustering with KMEAN cluster centers", type=int, default=0) parser.add_argument("-l", "--leaves", help="Maximum leaf nodes for RF classifier", type=int, default=256) parser.add_argument("-e", "--estimators", help="number of estimators to be used", type=int, default=100) parser.add_argument("-D", "--treedepth", help="depth of trees", type=int, default=5) @@ -49,6 +49,11 @@ def make_histogram(data, model, args): leaves = model.apply(data[i][j].T) leaves = np.apply_along_axis(np.bincount, axis=0, arr=leaves, minlength=args.leaves) histogram[i][j] = leaves.reshape(hist_size) + + print(histogram[0][0].shape) + plt.bar(np.arange(100), histogram[0][0].flatten()) + plt.show() + return histogram def run_model (data, train, test, train_part, args): diff --git a/report/fig/km-histogram.pdf b/report/fig/km-histogram.pdf new file mode 100644 index 0000000..f459978 Binary files /dev/null and b/report/fig/km-histogram.pdf differ diff --git a/report/paper.md b/report/paper.md index 037d0df..d8e4fca 100644 --- a/report/paper.md +++ b/report/paper.md @@ -1,17 +1,18 @@ -# K-means codebook - -We randomly select 100k descriptors for K-means clustering for building the visual vocabulary -(due to memory issue). Open the main_guideline.m and select/load the dataset. -``` -[data_train, data_test] = getData('Caltech'); -``` -Set 'showImg = 0' in getData.m if you want to stop displaying training and testing images. -Complete getData.m by writing your own lines of code to obtain the visual vocabulary and the -bag-of-words histograms for both training and testing data. Show, measure and -discuss the followings: +# Codebooks + +## K-means codebook + +A common technique for codebook generation involves utilising K-means clustering on a sample of the +image descriptors. In this way descriptors may be mapped to *visual* words which lend themselves to +binning and therefore the creation of bag-of-words histograms for the use of classification. + +In this courseworok 100-thousand descriptors have been selected to build the visual vocabulary from the +Caltech dataset. ## Vocabulary size +The number of clusters or the number of centroids determine the vocabulary size. + ## Bag-of-words histograms of example training/testing images ## Vector quantisation process -- cgit v1.2.3-54-g00ecf From 1fcc82c6862aae879fffb5bb0981ebee441f47aa Mon Sep 17 00:00:00 2001 From: Vasil Zlatanov Date: Mon, 11 Feb 2019 17:52:01 +0000 Subject: Add testing histogram --- report/fig/km-histtest.pdf | Bin 0 -> 13919 bytes report/paper.md | 2 ++ 2 files changed, 2 insertions(+) create mode 100644 report/fig/km-histtest.pdf diff --git a/report/fig/km-histtest.pdf b/report/fig/km-histtest.pdf new file mode 100644 index 0000000..c7da428 Binary files /dev/null and b/report/fig/km-histtest.pdf differ diff --git a/report/paper.md b/report/paper.md index d8e4fca..c0bbd75 100644 --- a/report/paper.md +++ b/report/paper.md @@ -13,6 +13,8 @@ Caltech dataset. The number of clusters or the number of centroids determine the vocabulary size. +![Bag-of-words histogram](fig/km-histogram.pdf) + ## Bag-of-words histograms of example training/testing images ## Vector quantisation process -- cgit v1.2.3-54-g00ecf From fd35886dba493f3588b94bf2109877cf512663fa Mon Sep 17 00:00:00 2001 From: Vasil Zlatanov Date: Mon, 11 Feb 2019 17:52:38 +0000 Subject: Add second image --- report/paper.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/report/paper.md b/report/paper.md index c0bbd75..e673adf 100644 --- a/report/paper.md +++ b/report/paper.md @@ -13,7 +13,8 @@ Caltech dataset. The number of clusters or the number of centroids determine the vocabulary size. -![Bag-of-words histogram](fig/km-histogram.pdf) +![Bag-of-words Training histogram](fig/km-histogram.pdf) +![Bag-of-words Testing histogram](fig/km-histtest.pdf) ## Bag-of-words histograms of example training/testing images -- cgit v1.2.3-54-g00ecf