From c7a918279912b273a7c3ea9870cd40d18d8db91f Mon Sep 17 00:00:00 2001 From: nunzip Date: Fri, 15 Feb 2019 17:51:41 +0000 Subject: Add kmean reference --- report/bibliography.bib | 21 +++++++++++++++++++++ report/paper.md | 2 +- 2 files changed, 22 insertions(+), 1 deletion(-) (limited to 'report') diff --git a/report/bibliography.bib b/report/bibliography.bib index 5d4e51e..8230369 100644 --- a/report/bibliography.bib +++ b/report/bibliography.bib @@ -32,3 +32,24 @@ bibsource = {dblp computer science bibliography, https://dblp.org} } +@book{kmean, +added-at = {2016-05-28T21:24:35.000+0200}, +address = {New York}, +author = {Duda, Richard O. and Hart, Peter E. and Stork, David G.}, +biburl = {https://www.bibsonomy.org/bibtex/25ef4fe4778daaf4b4e56c0d66161e048/flint63}, +edition = 2, +file = {eBook:2000-04/DudaHartStork01.pdf:PDF;Wiley Product page:http\://eu.wiley.com/WileyCDA/WileyTitle/productCd-0471056693.html:URL;Amazon Search inside:http\://www.amazon.de/gp/reader/0471056693/:URL}, +groups = {public}, +interhash = {5af620770e95f6b9ccffca6b3638a8ae}, +intrahash = {5ef4fe4778daaf4b4e56c0d66161e048}, +isbn = {978-0-471-05669-0}, +keywords = {01801 105 book shelf ai data pattern recognition analysis}, +publisher = {Wiley}, +timestamp = {2018-04-17T11:20:04.000+0200}, +title = {Pattern Classification}, +username = {flint63}, + year = 2001 +} + + + diff --git a/report/paper.md b/report/paper.md index 58ec125..0a56cea 100644 --- a/report/paper.md +++ b/report/paper.md @@ -28,7 +28,7 @@ An example of histograms for training and testing images is shown on figure \ref The time complexity of quantization with a K-means codebooks is $O(DNK)$, where N is the number of entities to be clustered (descriptors), D is the dimension (of the descriptors) and K is the cluster count [@km-complexity]. As the computation time is high, the tests use a subsample of descriptors to compute the centroids (a random selection of 100 thousand descriptors). An alternative method we tried is applying PCA to the descriptors vectors to improve time performance. However, the descriptor dimension of 128 is relatiely small and as such we found PCA to be unnecessary. -K-means is a process that converges to local optima and heavily depends on the initialization values of the centroids. +K-means is a process that converges to local optima and heavily depends on the initialization values of the centroids[@kmean]. Initializing K-means is an expensive process, based on sequential attempts of centroids placement. Running for multiple instances significantly affects the computation process, leading to a linear increase in execution time. We did not observe increase in accuracy with more than one K-means clusters initializations, and therefore present results for accuracy and execution time with a single K-Means initialization. \begin{figure} -- cgit v1.2.3-54-g00ecf