EstervQrCode 2.0.0
Library for qr code manipulation
Loading...
Searching...
No Matches
hierarchical_clustering_index.h
1/***********************************************************************
2 * Software License Agreement (BSD License)
3 *
4 * Copyright 2008-2011 Marius Muja (mariusm@cs.ubc.ca). All rights reserved.
5 * Copyright 2008-2011 David G. Lowe (lowe@cs.ubc.ca). All rights reserved.
6 *
7 * THE BSD LICENSE
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *************************************************************************/
30
31#ifndef OPENCV_FLANN_HIERARCHICAL_CLUSTERING_INDEX_H_
32#define OPENCV_FLANN_HIERARCHICAL_CLUSTERING_INDEX_H_
33
35
36#include <algorithm>
37#include <map>
38#include <limits>
39#include <cmath>
40
41#include "general.h"
42#include "nn_index.h"
43#include "dist.h"
44#include "matrix.h"
45#include "result_set.h"
46#include "heap.h"
47#include "allocator.h"
48#include "random.h"
49#include "saving.h"
50
51
52namespace cvflann
53{
54
55struct HierarchicalClusteringIndexParams : public IndexParams
56{
57 HierarchicalClusteringIndexParams(int branching = 32,
58 flann_centers_init_t centers_init = FLANN_CENTERS_RANDOM,
59 int trees = 4, int leaf_size = 100)
60 {
61 (*this)["algorithm"] = FLANN_INDEX_HIERARCHICAL;
62 // The branching factor used in the hierarchical clustering
63 (*this)["branching"] = branching;
64 // Algorithm used for picking the initial cluster centers
65 (*this)["centers_init"] = centers_init;
66 // number of parallel trees to build
67 (*this)["trees"] = trees;
68 // maximum leaf size
69 (*this)["leaf_size"] = leaf_size;
70 }
71};
72
73
80template <typename Distance>
81class HierarchicalClusteringIndex : public NNIndex<Distance>
82{
83public:
84 typedef typename Distance::ElementType ElementType;
85 typedef typename Distance::ResultType DistanceType;
86
87private:
88
89
90 typedef void (HierarchicalClusteringIndex::* centersAlgFunction)(int, int*, int, int*, int&);
91
95 centersAlgFunction chooseCenters;
96
97
98
109 void chooseCentersRandom(int k, int* dsindices, int indices_length, int* centers, int& centers_length)
110 {
111 UniqueRandom r(indices_length);
112
113 int index;
114 for (index=0; index<k; ++index) {
115 bool duplicate = true;
116 int rnd;
117 while (duplicate) {
118 duplicate = false;
119 rnd = r.next();
120 if (rnd<0) {
121 centers_length = index;
122 return;
123 }
124
125 centers[index] = dsindices[rnd];
126
127 for (int j=0; j<index; ++j) {
128 DistanceType sq = distance(dataset[centers[index]], dataset[centers[j]], dataset.cols);
129 if (sq<1e-16) {
130 duplicate = true;
131 }
132 }
133 }
134 }
135
136 centers_length = index;
137 }
138
139
150 void chooseCentersGonzales(int k, int* dsindices, int indices_length, int* centers, int& centers_length)
151 {
152 int n = indices_length;
153
154 int rnd = rand_int(n);
155 CV_DbgAssert(rnd >=0 && rnd < n);
156
157 centers[0] = dsindices[rnd];
158
159 int index;
160 for (index=1; index<k; ++index) {
161
162 int best_index = -1;
163 DistanceType best_val = 0;
164 for (int j=0; j<n; ++j) {
165 DistanceType dist = distance(dataset[centers[0]],dataset[dsindices[j]],dataset.cols);
166 for (int i=1; i<index; ++i) {
167 DistanceType tmp_dist = distance(dataset[centers[i]],dataset[dsindices[j]],dataset.cols);
168 if (tmp_dist<dist) {
169 dist = tmp_dist;
170 }
171 }
172 if (dist>best_val) {
173 best_val = dist;
174 best_index = j;
175 }
176 }
177 if (best_index!=-1) {
178 centers[index] = dsindices[best_index];
179 }
180 else {
181 break;
182 }
183 }
184 centers_length = index;
185 }
186
187
201 void chooseCentersKMeanspp(int k, int* dsindices, int indices_length, int* centers, int& centers_length)
202 {
203 int n = indices_length;
204
205 double currentPot = 0;
206 DistanceType* closestDistSq = new DistanceType[n];
207
208 // Choose one random center and set the closestDistSq values
209 int index = rand_int(n);
210 CV_DbgAssert(index >=0 && index < n);
211 centers[0] = dsindices[index];
212
213 // Computing distance^2 will have the advantage of even higher probability further to pick new centers
214 // far from previous centers (and this complies to "k-means++: the advantages of careful seeding" article)
215 for (int i = 0; i < n; i++) {
216 closestDistSq[i] = distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols);
217 closestDistSq[i] = ensureSquareDistance<Distance>( closestDistSq[i] );
218 currentPot += closestDistSq[i];
219 }
220
221
222 const int numLocalTries = 1;
223
224 // Choose each center
225 int centerCount;
226 for (centerCount = 1; centerCount < k; centerCount++) {
227
228 // Repeat several trials
229 double bestNewPot = -1;
230 int bestNewIndex = 0;
231 for (int localTrial = 0; localTrial < numLocalTries; localTrial++) {
232
233 // Choose our center - have to be slightly careful to return a valid answer even accounting
234 // for possible rounding errors
235 double randVal = rand_double(currentPot);
236 for (index = 0; index < n-1; index++) {
237 if (randVal <= closestDistSq[index]) break;
238 else randVal -= closestDistSq[index];
239 }
240
241 // Compute the new potential
242 double newPot = 0;
243 for (int i = 0; i < n; i++) {
244 DistanceType dist = distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols);
245 newPot += std::min( ensureSquareDistance<Distance>(dist), closestDistSq[i] );
246 }
247
248 // Store the best result
249 if ((bestNewPot < 0)||(newPot < bestNewPot)) {
250 bestNewPot = newPot;
251 bestNewIndex = index;
252 }
253 }
254
255 // Add the appropriate center
256 centers[centerCount] = dsindices[bestNewIndex];
257 currentPot = bestNewPot;
258 for (int i = 0; i < n; i++) {
259 DistanceType dist = distance(dataset[dsindices[i]], dataset[dsindices[bestNewIndex]], dataset.cols);
260 closestDistSq[i] = std::min( ensureSquareDistance<Distance>(dist), closestDistSq[i] );
261 }
262 }
263
264 centers_length = centerCount;
265
266 delete[] closestDistSq;
267 }
268
269
287 void GroupWiseCenterChooser(int k, int* dsindices, int indices_length, int* centers, int& centers_length)
288 {
289 const float kSpeedUpFactor = 1.3f;
290
291 int n = indices_length;
292
293 DistanceType* closestDistSq = new DistanceType[n];
294
295 // Choose one random center and set the closestDistSq values
296 int index = rand_int(n);
297 CV_DbgAssert(index >=0 && index < n);
298 centers[0] = dsindices[index];
299
300 for (int i = 0; i < n; i++) {
301 closestDistSq[i] = distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols);
302 }
303
304
305 // Choose each center
306 int centerCount;
307 for (centerCount = 1; centerCount < k; centerCount++) {
308
309 // Repeat several trials
310 double bestNewPot = -1;
311 int bestNewIndex = 0;
312 DistanceType furthest = 0;
313 for (index = 0; index < n; index++) {
314
315 // We will test only the potential of the points further than current candidate
316 if( closestDistSq[index] > kSpeedUpFactor * (float)furthest ) {
317
318 // Compute the new potential
319 double newPot = 0;
320 for (int i = 0; i < n; i++) {
321 newPot += std::min( distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols)
322 , closestDistSq[i] );
323 }
324
325 // Store the best result
326 if ((bestNewPot < 0)||(newPot <= bestNewPot)) {
327 bestNewPot = newPot;
328 bestNewIndex = index;
329 furthest = closestDistSq[index];
330 }
331 }
332 }
333
334 // Add the appropriate center
335 centers[centerCount] = dsindices[bestNewIndex];
336 for (int i = 0; i < n; i++) {
337 closestDistSq[i] = std::min( distance(dataset[dsindices[i]], dataset[dsindices[bestNewIndex]], dataset.cols)
338 , closestDistSq[i] );
339 }
340 }
341
342 centers_length = centerCount;
343
344 delete[] closestDistSq;
345 }
346
347
348public:
349
350
358 HierarchicalClusteringIndex(const Matrix<ElementType>& inputData, const IndexParams& index_params = HierarchicalClusteringIndexParams(),
359 Distance d = Distance())
360 : dataset(inputData), params(index_params), root(NULL), indices(NULL), distance(d)
361 {
362 memoryCounter = 0;
363
364 size_ = dataset.rows;
365 veclen_ = dataset.cols;
366
367 branching_ = get_param(params,"branching",32);
368 centers_init_ = get_param(params,"centers_init", FLANN_CENTERS_RANDOM);
369 trees_ = get_param(params,"trees",4);
370 leaf_size_ = get_param(params,"leaf_size",100);
371
372 if (centers_init_==FLANN_CENTERS_RANDOM) {
373 chooseCenters = &HierarchicalClusteringIndex::chooseCentersRandom;
374 }
375 else if (centers_init_==FLANN_CENTERS_GONZALES) {
376 chooseCenters = &HierarchicalClusteringIndex::chooseCentersGonzales;
377 }
378 else if (centers_init_==FLANN_CENTERS_KMEANSPP) {
379 chooseCenters = &HierarchicalClusteringIndex::chooseCentersKMeanspp;
380 }
381 else if (centers_init_==FLANN_CENTERS_GROUPWISE) {
382 chooseCenters = &HierarchicalClusteringIndex::GroupWiseCenterChooser;
383 }
384 else {
385 FLANN_THROW(cv::Error::StsError, "Unknown algorithm for choosing initial centers.");
386 }
387
388 root = new NodePtr[trees_];
389 indices = new int*[trees_];
390
391 for (int i=0; i<trees_; ++i) {
392 root[i] = NULL;
393 indices[i] = NULL;
394 }
395 }
396
397 HierarchicalClusteringIndex(const HierarchicalClusteringIndex&);
398 HierarchicalClusteringIndex& operator=(const HierarchicalClusteringIndex&);
399
405 virtual ~HierarchicalClusteringIndex()
406 {
407 if (root!=NULL) {
408 delete[] root;
409 }
410
411 if (indices!=NULL) {
412 free_indices();
413 delete[] indices;
414 }
415 }
416
420 size_t size() const CV_OVERRIDE
421 {
422 return size_;
423 }
424
428 size_t veclen() const CV_OVERRIDE
429 {
430 return veclen_;
431 }
432
433
438 int usedMemory() const CV_OVERRIDE
439 {
440 return pool.usedMemory+pool.wastedMemory+memoryCounter;
441 }
442
446 void buildIndex() CV_OVERRIDE
447 {
448 if (branching_<2) {
449 FLANN_THROW(cv::Error::StsError, "Branching factor must be at least 2");
450 }
451
452 free_indices();
453
454 for (int i=0; i<trees_; ++i) {
455 indices[i] = new int[size_];
456 for (size_t j=0; j<size_; ++j) {
457 indices[i][j] = (int)j;
458 }
459 root[i] = pool.allocate<Node>();
460 computeClustering(root[i], indices[i], (int)size_, branching_,0);
461 }
462 }
463
464
465 flann_algorithm_t getType() const CV_OVERRIDE
466 {
467 return FLANN_INDEX_HIERARCHICAL;
468 }
469
470
471 void saveIndex(FILE* stream) CV_OVERRIDE
472 {
473 save_value(stream, branching_);
474 save_value(stream, trees_);
475 save_value(stream, centers_init_);
476 save_value(stream, leaf_size_);
477 save_value(stream, memoryCounter);
478 for (int i=0; i<trees_; ++i) {
479 save_value(stream, *indices[i], size_);
480 save_tree(stream, root[i], i);
481 }
482
483 }
484
485
486 void loadIndex(FILE* stream) CV_OVERRIDE
487 {
488 if (root!=NULL) {
489 delete[] root;
490 }
491
492 if (indices!=NULL) {
493 free_indices();
494 delete[] indices;
495 }
496
497 load_value(stream, branching_);
498 load_value(stream, trees_);
499 load_value(stream, centers_init_);
500 load_value(stream, leaf_size_);
501 load_value(stream, memoryCounter);
502
503 indices = new int*[trees_];
504 root = new NodePtr[trees_];
505 for (int i=0; i<trees_; ++i) {
506 indices[i] = new int[size_];
507 load_value(stream, *indices[i], size_);
508 load_tree(stream, root[i], i);
509 }
510
511 params["algorithm"] = getType();
512 params["branching"] = branching_;
513 params["trees"] = trees_;
514 params["centers_init"] = centers_init_;
515 params["leaf_size"] = leaf_size_;
516 }
517
518
528 void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams) CV_OVERRIDE
529 {
530
531 const int maxChecks = get_param(searchParams,"checks",32);
532 const bool explore_all_trees = get_param(searchParams,"explore_all_trees",false);
533
534 // Priority queue storing intermediate branches in the best-bin-first search
535 const cv::Ptr<Heap<BranchSt>>& heap = Heap<BranchSt>::getPooledInstance(cv::utils::getThreadID(), (int)size_);
536
537 std::vector<bool> checked(size_,false);
538 int checks = 0;
539 for (int i=0; i<trees_; ++i) {
540 findNN(root[i], result, vec, checks, maxChecks, heap, checked, explore_all_trees);
541 if (!explore_all_trees && (checks >= maxChecks) && result.full())
542 break;
543 }
544
545 BranchSt branch;
546 while (heap->popMin(branch) && (checks<maxChecks || !result.full())) {
547 NodePtr node = branch.node;
548 findNN(node, result, vec, checks, maxChecks, heap, checked, false);
549 }
550
551 CV_Assert(result.full());
552 }
553
554 IndexParams getParameters() const CV_OVERRIDE
555 {
556 return params;
557 }
558
559
560private:
561
565 struct Node
566 {
570 int pivot;
574 int size;
578 Node** childs;
582 int* indices;
586 int level;
587 };
588 typedef Node* NodePtr;
589
590
591
595 typedef BranchStruct<NodePtr, DistanceType> BranchSt;
596
597
598
599 void save_tree(FILE* stream, NodePtr node, int num)
600 {
601 save_value(stream, *node);
602 if (node->childs==NULL) {
603 int indices_offset = (int)(node->indices - indices[num]);
604 save_value(stream, indices_offset);
605 }
606 else {
607 for(int i=0; i<branching_; ++i) {
608 save_tree(stream, node->childs[i], num);
609 }
610 }
611 }
612
613
614 void load_tree(FILE* stream, NodePtr& node, int num)
615 {
616 node = pool.allocate<Node>();
617 load_value(stream, *node);
618 if (node->childs==NULL) {
619 int indices_offset;
620 load_value(stream, indices_offset);
621 node->indices = indices[num] + indices_offset;
622 }
623 else {
624 node->childs = pool.allocate<NodePtr>(branching_);
625 for(int i=0; i<branching_; ++i) {
626 load_tree(stream, node->childs[i], num);
627 }
628 }
629 }
630
631
635 void free_indices()
636 {
637 if (indices!=NULL) {
638 for(int i=0; i<trees_; ++i) {
639 if (indices[i]!=NULL) {
640 delete[] indices[i];
641 indices[i] = NULL;
642 }
643 }
644 }
645 }
646
647
648 void computeLabels(int* dsindices, int indices_length, int* centers, int centers_length, int* labels, DistanceType& cost)
649 {
650 cost = 0;
651 for (int i=0; i<indices_length; ++i) {
652 ElementType* point = dataset[dsindices[i]];
653 DistanceType dist = distance(point, dataset[centers[0]], veclen_);
654 labels[i] = 0;
655 for (int j=1; j<centers_length; ++j) {
656 DistanceType new_dist = distance(point, dataset[centers[j]], veclen_);
657 if (dist>new_dist) {
658 labels[i] = j;
659 dist = new_dist;
660 }
661 }
662 cost += dist;
663 }
664 }
665
677 void computeClustering(NodePtr node, int* dsindices, int indices_length, int branching, int level)
678 {
679 node->size = indices_length;
680 node->level = level;
681
682 if (indices_length < leaf_size_) { // leaf node
683 node->indices = dsindices;
684 std::sort(node->indices,node->indices+indices_length);
685 node->childs = NULL;
686 return;
687 }
688
689 std::vector<int> centers(branching);
690 std::vector<int> labels(indices_length);
691
692 int centers_length;
693 (this->*chooseCenters)(branching, dsindices, indices_length, &centers[0], centers_length);
694
695 if (centers_length<branching) {
696 node->indices = dsindices;
697 std::sort(node->indices,node->indices+indices_length);
698 node->childs = NULL;
699 return;
700 }
701
702
703 // assign points to clusters
704 DistanceType cost;
705 computeLabels(dsindices, indices_length, &centers[0], centers_length, &labels[0], cost);
706
707 node->childs = pool.allocate<NodePtr>(branching);
708 int start = 0;
709 int end = start;
710 for (int i=0; i<branching; ++i) {
711 for (int j=0; j<indices_length; ++j) {
712 if (labels[j]==i) {
713 std::swap(dsindices[j],dsindices[end]);
715 end++;
716 }
717 }
718
719 node->childs[i] = pool.allocate<Node>();
720 node->childs[i]->pivot = centers[i];
721 node->childs[i]->indices = NULL;
722 computeClustering(node->childs[i],dsindices+start, end-start, branching, level+1);
723 start=end;
724 }
725 }
726
727
728
742 void findNN(NodePtr node, ResultSet<DistanceType>& result, const ElementType* vec, int& checks, int maxChecks,
743 const cv::Ptr<Heap<BranchSt>>& heap, std::vector<bool>& checked, bool explore_all_trees = false)
744 {
745 if (node->childs==NULL) {
746 if (!explore_all_trees && (checks>=maxChecks) && result.full()) {
747 return;
748 }
749 for (int i=0; i<node->size; ++i) {
750 int index = node->indices[i];
751 if (!checked[index]) {
752 DistanceType dist = distance(dataset[index], vec, veclen_);
753 result.addPoint(dist, index);
754 checked[index] = true;
755 ++checks;
756 }
757 }
758 }
759 else {
760 DistanceType* domain_distances = new DistanceType[branching_];
761 int best_index = 0;
762 domain_distances[best_index] = distance(vec, dataset[node->childs[best_index]->pivot], veclen_);
763 for (int i=1; i<branching_; ++i) {
764 domain_distances[i] = distance(vec, dataset[node->childs[i]->pivot], veclen_);
765 if (domain_distances[i]<domain_distances[best_index]) {
766 best_index = i;
767 }
768 }
769 for (int i=0; i<branching_; ++i) {
770 if (i!=best_index) {
771 heap->insert(BranchSt(node->childs[i],domain_distances[i]));
772 }
773 }
774 delete[] domain_distances;
775 findNN(node->childs[best_index],result,vec, checks, maxChecks, heap, checked, explore_all_trees);
776 }
777 }
778
779private:
780
781
785 const Matrix<ElementType> dataset;
786
790 IndexParams params;
791
792
796 size_t size_;
797
801 size_t veclen_;
802
806 NodePtr* root;
807
811 int** indices;
812
813
817 Distance distance;
818
826 PooledAllocator pool;
827
831 int memoryCounter;
832
834 int branching_;
835 int trees_;
836 flann_centers_init_t centers_init_;
837 int leaf_size_;
838
839
840};
841
842}
843
845
846#endif /* OPENCV_FLANN_HIERARCHICAL_CLUSTERING_INDEX_H_ */
T distance(T... args)
CvMemStorage CvSeq ** labels
Definition core_c.h:1724
double double end
Definition core_c.h:1381
double start
Definition core_c.h:1381
int index
Definition core_c.h:634
CvSize size
Definition core_c.h:112
const CvArr const CvArr CvArr * result
Definition core_c.h:1423
#define CV_OVERRIDE
Definition cvdef.h:792
#define CV_Assert(expr)
Checks a condition at runtime and throws exception if it fails.
Definition base.hpp:342
#define CV_DbgAssert(expr)
Definition base.hpp:375
CvRect r
Definition imgproc_c.h:984
CV_EXPORTS OutputArray int double double InputArray OutputArray int int bool double k
Definition imgproc.hpp:2133
InputArray int InputArray cost
Definition imgproc.hpp:3387
T min(T... args)
@ StsError
unknown /unspecified error
Definition base.hpp:71
CV_EXPORTS int getThreadID()
Definition flann.hpp:60
T sort(T... args)
Definition cvstd_wrapper.hpp:74
T swap(T... args)