31 #ifndef OPENCV_FLANN_HIERARCHICAL_CLUSTERING_INDEX_H_
32 #define OPENCV_FLANN_HIERARCHICAL_CLUSTERING_INDEX_H_
45 #include "result_set.h"
47 #include "allocator.h"
55 struct HierarchicalClusteringIndexParams :
public IndexParams
57 HierarchicalClusteringIndexParams(
int branching = 32,
58 flann_centers_init_t centers_init = FLANN_CENTERS_RANDOM,
59 int trees = 4,
int leaf_size = 100)
61 (*this)[
"algorithm"] = FLANN_INDEX_HIERARCHICAL;
63 (*this)[
"branching"] = branching;
65 (*this)[
"centers_init"] = centers_init;
67 (*this)[
"trees"] = trees;
69 (*this)[
"leaf_size"] = leaf_size;
80 template <
typename Distance>
81 class HierarchicalClusteringIndex :
public NNIndex<Distance>
84 typedef typename Distance::ElementType ElementType;
85 typedef typename Distance::ResultType DistanceType;
90 typedef void (HierarchicalClusteringIndex::* centersAlgFunction)(int,
int*, int,
int*,
int&);
95 centersAlgFunction chooseCenters;
109 void chooseCentersRandom(
int k,
int* dsindices,
int indices_length,
int* centers,
int& centers_length)
111 UniqueRandom
r(indices_length);
115 bool duplicate =
true;
121 centers_length =
index;
125 centers[
index] = dsindices[rnd];
127 for (
int j=0; j<
index; ++j) {
128 DistanceType sq =
distance(dataset[centers[
index]], dataset[centers[j]], dataset.cols);
136 centers_length =
index;
150 void chooseCentersGonzales(
int k,
int* dsindices,
int indices_length,
int* centers,
int& centers_length)
152 int n = indices_length;
154 int rnd = rand_int(n);
157 centers[0] = dsindices[rnd];
163 DistanceType best_val = 0;
164 for (
int j=0; j<n; ++j) {
165 DistanceType dist =
distance(dataset[centers[0]],dataset[dsindices[j]],dataset.cols);
166 for (
int i=1; i<
index; ++i) {
167 DistanceType tmp_dist =
distance(dataset[centers[i]],dataset[dsindices[j]],dataset.cols);
177 if (best_index!=-1) {
178 centers[
index] = dsindices[best_index];
184 centers_length =
index;
201 void chooseCentersKMeanspp(
int k,
int* dsindices,
int indices_length,
int* centers,
int& centers_length)
203 int n = indices_length;
205 double currentPot = 0;
206 DistanceType* closestDistSq =
new DistanceType[n];
209 int index = rand_int(n);
211 centers[0] = dsindices[
index];
215 for (
int i = 0; i < n; i++) {
216 closestDistSq[i] =
distance(dataset[dsindices[i]], dataset[dsindices[
index]], dataset.cols);
217 closestDistSq[i] = ensureSquareDistance<Distance>( closestDistSq[i] );
218 currentPot += closestDistSq[i];
222 const int numLocalTries = 1;
226 for (centerCount = 1; centerCount <
k; centerCount++) {
229 double bestNewPot = -1;
230 int bestNewIndex = 0;
231 for (
int localTrial = 0; localTrial < numLocalTries; localTrial++) {
235 double randVal = rand_double(currentPot);
237 if (randVal <= closestDistSq[
index])
break;
238 else randVal -= closestDistSq[
index];
243 for (
int i = 0; i < n; i++) {
244 DistanceType dist =
distance(dataset[dsindices[i]], dataset[dsindices[
index]], dataset.cols);
245 newPot +=
std::min( ensureSquareDistance<Distance>(dist), closestDistSq[i] );
249 if ((bestNewPot < 0)||(newPot < bestNewPot)) {
251 bestNewIndex =
index;
256 centers[centerCount] = dsindices[bestNewIndex];
257 currentPot = bestNewPot;
258 for (
int i = 0; i < n; i++) {
259 DistanceType dist =
distance(dataset[dsindices[i]], dataset[dsindices[bestNewIndex]], dataset.cols);
260 closestDistSq[i] =
std::min( ensureSquareDistance<Distance>(dist), closestDistSq[i] );
264 centers_length = centerCount;
266 delete[] closestDistSq;
287 void GroupWiseCenterChooser(
int k,
int* dsindices,
int indices_length,
int* centers,
int& centers_length)
289 const float kSpeedUpFactor = 1.3f;
291 int n = indices_length;
293 DistanceType* closestDistSq =
new DistanceType[n];
296 int index = rand_int(n);
298 centers[0] = dsindices[
index];
300 for (
int i = 0; i < n; i++) {
301 closestDistSq[i] =
distance(dataset[dsindices[i]], dataset[dsindices[
index]], dataset.cols);
307 for (centerCount = 1; centerCount <
k; centerCount++) {
310 double bestNewPot = -1;
311 int bestNewIndex = 0;
312 DistanceType furthest = 0;
316 if( closestDistSq[
index] > kSpeedUpFactor * (
float)furthest ) {
320 for (
int i = 0; i < n; i++) {
322 , closestDistSq[i] );
326 if ((bestNewPot < 0)||(newPot <= bestNewPot)) {
328 bestNewIndex =
index;
329 furthest = closestDistSq[
index];
335 centers[centerCount] = dsindices[bestNewIndex];
336 for (
int i = 0; i < n; i++) {
337 closestDistSq[i] =
std::min(
distance(dataset[dsindices[i]], dataset[dsindices[bestNewIndex]], dataset.cols)
338 , closestDistSq[i] );
342 centers_length = centerCount;
344 delete[] closestDistSq;
358 HierarchicalClusteringIndex(
const Matrix<ElementType>& inputData,
const IndexParams& index_params = HierarchicalClusteringIndexParams(),
359 Distance d = Distance())
360 : dataset(inputData), params(index_params), root(NULL), indices(NULL),
distance(d)
364 size_ = dataset.rows;
365 veclen_ = dataset.cols;
367 branching_ = get_param(params,
"branching",32);
368 centers_init_ = get_param(params,
"centers_init", FLANN_CENTERS_RANDOM);
369 trees_ = get_param(params,
"trees",4);
370 leaf_size_ = get_param(params,
"leaf_size",100);
372 if (centers_init_==FLANN_CENTERS_RANDOM) {
373 chooseCenters = &HierarchicalClusteringIndex::chooseCentersRandom;
375 else if (centers_init_==FLANN_CENTERS_GONZALES) {
376 chooseCenters = &HierarchicalClusteringIndex::chooseCentersGonzales;
378 else if (centers_init_==FLANN_CENTERS_KMEANSPP) {
379 chooseCenters = &HierarchicalClusteringIndex::chooseCentersKMeanspp;
381 else if (centers_init_==FLANN_CENTERS_GROUPWISE) {
382 chooseCenters = &HierarchicalClusteringIndex::GroupWiseCenterChooser;
388 root =
new NodePtr[trees_];
389 indices =
new int*[trees_];
391 for (
int i=0; i<trees_; ++i) {
397 HierarchicalClusteringIndex(
const HierarchicalClusteringIndex&);
398 HierarchicalClusteringIndex& operator=(
const HierarchicalClusteringIndex&);
405 virtual ~HierarchicalClusteringIndex()
440 return pool.usedMemory+pool.wastedMemory+memoryCounter;
454 for (
int i=0; i<trees_; ++i) {
455 indices[i] =
new int[size_];
456 for (
size_t j=0; j<size_; ++j) {
457 indices[i][j] = (int)j;
459 root[i] = pool.allocate<Node>();
460 computeClustering(root[i], indices[i], (
int)size_, branching_,0);
467 return FLANN_INDEX_HIERARCHICAL;
473 save_value(stream, branching_);
474 save_value(stream, trees_);
475 save_value(stream, centers_init_);
476 save_value(stream, leaf_size_);
477 save_value(stream, memoryCounter);
478 for (
int i=0; i<trees_; ++i) {
479 save_value(stream, *indices[i], size_);
480 save_tree(stream, root[i], i);
497 load_value(stream, branching_);
498 load_value(stream, trees_);
499 load_value(stream, centers_init_);
500 load_value(stream, leaf_size_);
501 load_value(stream, memoryCounter);
503 indices =
new int*[trees_];
504 root =
new NodePtr[trees_];
505 for (
int i=0; i<trees_; ++i) {
506 indices[i] =
new int[size_];
507 load_value(stream, *indices[i], size_);
508 load_tree(stream, root[i], i);
511 params[
"algorithm"] = getType();
512 params[
"branching"] = branching_;
513 params[
"trees"] = trees_;
514 params[
"centers_init"] = centers_init_;
515 params[
"leaf_size"] = leaf_size_;
528 void findNeighbors(ResultSet<DistanceType>&
result,
const ElementType* vec,
const SearchParams& searchParams)
CV_OVERRIDE
531 const int maxChecks = get_param(searchParams,
"checks",32);
532 const bool explore_all_trees = get_param(searchParams,
"explore_all_trees",
false);
539 for (
int i=0; i<trees_; ++i) {
540 findNN(root[i],
result, vec, checks, maxChecks, heap, checked, explore_all_trees);
541 if (!explore_all_trees && (checks >= maxChecks) &&
result.full())
546 while (heap->popMin(branch) && (checks<maxChecks || !
result.full())) {
547 NodePtr node = branch.node;
548 findNN(node,
result, vec, checks, maxChecks, heap, checked,
false);
588 typedef Node* NodePtr;
595 typedef BranchStruct<NodePtr, DistanceType> BranchSt;
599 void save_tree(FILE* stream, NodePtr node,
int num)
601 save_value(stream, *node);
602 if (node->childs==NULL) {
603 int indices_offset = (int)(node->indices - indices[num]);
604 save_value(stream, indices_offset);
607 for(
int i=0; i<branching_; ++i) {
608 save_tree(stream, node->childs[i], num);
614 void load_tree(FILE* stream, NodePtr& node,
int num)
616 node = pool.allocate<Node>();
617 load_value(stream, *node);
618 if (node->childs==NULL) {
620 load_value(stream, indices_offset);
621 node->indices = indices[num] + indices_offset;
624 node->childs = pool.allocate<NodePtr>(branching_);
625 for(
int i=0; i<branching_; ++i) {
626 load_tree(stream, node->childs[i], num);
638 for(
int i=0; i<trees_; ++i) {
639 if (indices[i]!=NULL) {
648 void computeLabels(
int* dsindices,
int indices_length,
int* centers,
int centers_length,
int*
labels, DistanceType&
cost)
651 for (
int i=0; i<indices_length; ++i) {
652 ElementType* point = dataset[dsindices[i]];
653 DistanceType dist =
distance(point, dataset[centers[0]], veclen_);
655 for (
int j=1; j<centers_length; ++j) {
656 DistanceType new_dist =
distance(point, dataset[centers[j]], veclen_);
677 void computeClustering(NodePtr node,
int* dsindices,
int indices_length,
int branching,
int level)
679 node->size = indices_length;
682 if (indices_length < leaf_size_) {
683 node->indices = dsindices;
684 std::sort(node->indices,node->indices+indices_length);
693 (this->*chooseCenters)(branching, dsindices, indices_length, ¢ers[0], centers_length);
695 if (centers_length<branching) {
696 node->indices = dsindices;
697 std::sort(node->indices,node->indices+indices_length);
705 computeLabels(dsindices, indices_length, ¢ers[0], centers_length, &
labels[0],
cost);
707 node->childs = pool.allocate<NodePtr>(branching);
710 for (
int i=0; i<branching; ++i) {
711 for (
int j=0; j<indices_length; ++j) {
719 node->childs[i] = pool.allocate<Node>();
720 node->childs[i]->pivot = centers[i];
721 node->childs[i]->indices = NULL;
722 computeClustering(node->childs[i],dsindices+
start,
end-
start, branching, level+1);
742 void findNN(NodePtr node, ResultSet<DistanceType>&
result,
const ElementType* vec,
int& checks,
int maxChecks,
745 if (node->childs==NULL) {
746 if (!explore_all_trees && (checks>=maxChecks) &&
result.full()) {
749 for (
int i=0; i<node->size; ++i) {
750 int index = node->indices[i];
751 if (!checked[
index]) {
754 checked[
index] =
true;
760 DistanceType* domain_distances =
new DistanceType[branching_];
762 domain_distances[best_index] =
distance(vec, dataset[node->childs[best_index]->pivot], veclen_);
763 for (
int i=1; i<branching_; ++i) {
764 domain_distances[i] =
distance(vec, dataset[node->childs[i]->pivot], veclen_);
765 if (domain_distances[i]<domain_distances[best_index]) {
769 for (
int i=0; i<branching_; ++i) {
771 heap->insert(BranchSt(node->childs[i],domain_distances[i]));
774 delete[] domain_distances;
775 findNN(node->childs[best_index],
result,vec, checks, maxChecks, heap, checked, explore_all_trees);
785 const Matrix<ElementType> dataset;
826 PooledAllocator pool;
836 flann_centers_init_t centers_init_;
CvMemStorage CvSeq ** labels
Definition: core_c.h:1724
double double end
Definition: core_c.h:1381
double start
Definition: core_c.h:1381
int index
Definition: core_c.h:634
CvSize size
Definition: core_c.h:112
const CvArr const CvArr CvArr * result
Definition: core_c.h:1423
#define CV_OVERRIDE
Definition: cvdef.h:792
#define CV_Assert(expr)
Checks a condition at runtime and throws exception if it fails.
Definition: base.hpp:342
#define CV_DbgAssert(expr)
Definition: base.hpp:375
CvRect r
Definition: imgproc_c.h:984
CV_EXPORTS OutputArray int double double InputArray OutputArray int int bool double k
Definition: imgproc.hpp:2133
InputArray int InputArray cost
Definition: imgproc.hpp:3387
@ StsError
unknown /unspecified error
Definition: base.hpp:71
CV_EXPORTS int getThreadID()
Definition: cvstd_wrapper.hpp:74