31 #ifndef OPENCV_FLANN_KMEANS_INDEX_H_
32 #define OPENCV_FLANN_KMEANS_INDEX_H_
45 #include "result_set.h"
47 #include "allocator.h"
52 #define BITS_PER_CHAR 8
53 #define BITS_PER_BASE 2
54 #define BASE_PER_CHAR (BITS_PER_CHAR/BITS_PER_BASE)
55 #define HISTOS_PER_BASE (1<<BITS_PER_BASE)
61 struct KMeansIndexParams :
public IndexParams
63 KMeansIndexParams(
int branching = 32,
int iterations = 11,
64 flann_centers_init_t centers_init = FLANN_CENTERS_RANDOM,
65 float cb_index = 0.2,
int trees = 1 )
67 (*this)[
"algorithm"] = FLANN_INDEX_KMEANS;
69 (*this)[
"branching"] = branching;
71 (*this)[
"iterations"] = iterations;
73 (*this)[
"centers_init"] = centers_init;
75 (*this)[
"cb_index"] = cb_index;
77 (*this)[
"trees"] = trees;
88 template <
typename Distance>
89 class KMeansIndex :
public NNIndex<Distance>
92 typedef typename Distance::ElementType ElementType;
93 typedef typename Distance::ResultType DistanceType;
94 typedef typename Distance::CentersType CentersType;
96 typedef typename Distance::is_kdtree_distance is_kdtree_distance;
97 typedef typename Distance::is_vector_space_distance is_vector_space_distance;
101 typedef void (KMeansIndex::* centersAlgFunction)(int,
int*, int,
int*,
int&);
106 centersAlgFunction chooseCenters;
120 void chooseCentersRandom(
int k,
int* indices,
int indices_length,
int* centers,
int& centers_length)
122 UniqueRandom
r(indices_length);
126 bool duplicate =
true;
132 centers_length =
index;
136 centers[
index] = indices[rnd];
138 for (
int j=0; j<
index; ++j) {
139 DistanceType sq = distance_(dataset_[centers[
index]], dataset_[centers[j]], dataset_.cols);
147 centers_length =
index;
161 void chooseCentersGonzales(
int k,
int* indices,
int indices_length,
int* centers,
int& centers_length)
163 int n = indices_length;
165 int rnd = rand_int(n);
168 centers[0] = indices[rnd];
174 DistanceType best_val = 0;
175 for (
int j=0; j<n; ++j) {
176 DistanceType dist = distance_(dataset_[centers[0]],dataset_[indices[j]],dataset_.cols);
177 for (
int i=1; i<
index; ++i) {
178 DistanceType tmp_dist = distance_(dataset_[centers[i]],dataset_[indices[j]],dataset_.cols);
188 if (best_index!=-1) {
189 centers[
index] = indices[best_index];
195 centers_length =
index;
212 void chooseCentersKMeanspp(
int k,
int* indices,
int indices_length,
int* centers,
int& centers_length)
214 int n = indices_length;
216 double currentPot = 0;
217 DistanceType* closestDistSq =
new DistanceType[n];
220 int index = rand_int(n);
222 centers[0] = indices[
index];
224 for (
int i = 0; i < n; i++) {
225 closestDistSq[i] = distance_(dataset_[indices[i]], dataset_[indices[
index]], dataset_.cols);
226 closestDistSq[i] = ensureSquareDistance<Distance>( closestDistSq[i] );
227 currentPot += closestDistSq[i];
231 const int numLocalTries = 1;
235 for (centerCount = 1; centerCount <
k; centerCount++) {
238 double bestNewPot = -1;
239 int bestNewIndex = -1;
240 for (
int localTrial = 0; localTrial < numLocalTries; localTrial++) {
244 double randVal = rand_double(currentPot);
246 if (randVal <= closestDistSq[
index])
break;
247 else randVal -= closestDistSq[
index];
252 for (
int i = 0; i < n; i++) {
253 DistanceType dist = distance_(dataset_[indices[i]], dataset_[indices[
index]], dataset_.cols);
254 newPot +=
std::min( ensureSquareDistance<Distance>(dist), closestDistSq[i] );
258 if ((bestNewPot < 0)||(newPot < bestNewPot)) {
260 bestNewIndex =
index;
265 centers[centerCount] = indices[bestNewIndex];
266 currentPot = bestNewPot;
267 for (
int i = 0; i < n; i++) {
268 DistanceType dist = distance_(dataset_[indices[i]], dataset_[indices[bestNewIndex]], dataset_.cols);
269 closestDistSq[i] =
std::min( ensureSquareDistance<Distance>(dist), closestDistSq[i] );
273 centers_length = centerCount;
275 delete[] closestDistSq;
284 return FLANN_INDEX_KMEANS;
287 template<
class CentersContainerType>
291 KMeansDistanceComputer(Distance _distance,
const Matrix<ElementType>& _dataset,
292 const int _branching,
const int* _indices,
const CentersContainerType& _dcenters,
297 , branching(_branching)
299 , dcenters(_dcenters)
301 , new_centroids(_new_centroids)
302 , sq_dists(_sq_dists)
311 for(
int i = begin; i<
end; ++i)
313 DistanceType sq_dist(
distance(dataset[indices[i]], dcenters[0], veclen));
315 for (
int j=1; j<branching; ++j) {
316 DistanceType new_sq_dist =
distance(dataset[indices[i]], dcenters[j], veclen);
317 if (sq_dist>new_sq_dist) {
319 sq_dist = new_sq_dist;
322 sq_dists[i] = sq_dist;
323 new_centroids[i] = new_centroid;
329 const Matrix<ElementType>& dataset;
332 const CentersContainerType& dcenters;
336 KMeansDistanceComputer& operator=(
const KMeansDistanceComputer & ) {
return *
this; }
346 KMeansIndex(
const Matrix<ElementType>& inputData,
const IndexParams& params = KMeansIndexParams(),
347 Distance d = Distance())
348 : dataset_(inputData), index_params_(params), root_(NULL), indices_(NULL), distance_(d)
352 size_ = dataset_.rows;
353 veclen_ = dataset_.cols;
355 branching_ = get_param(params,
"branching",32);
356 trees_ = get_param(params,
"trees",1);
357 iterations_ = get_param(params,
"iterations",11);
361 centers_init_ = get_param(params,
"centers_init",FLANN_CENTERS_RANDOM);
363 if (centers_init_==FLANN_CENTERS_RANDOM) {
364 chooseCenters = &KMeansIndex::chooseCentersRandom;
366 else if (centers_init_==FLANN_CENTERS_GONZALES) {
367 chooseCenters = &KMeansIndex::chooseCentersGonzales;
369 else if (centers_init_==FLANN_CENTERS_KMEANSPP) {
370 chooseCenters = &KMeansIndex::chooseCentersKMeanspp;
377 root_ =
new KMeansNodePtr[trees_];
378 indices_ =
new int*[trees_];
380 for (
int i=0; i<trees_; ++i) {
387 KMeansIndex(
const KMeansIndex&);
388 KMeansIndex& operator=(
const KMeansIndex&);
396 virtual ~KMeansIndex()
402 if (indices_!=NULL) {
425 void set_cb_index(
float index)
436 return pool_.usedMemory+pool_.wastedMemory+memoryCounter_;
450 for (
int i=0; i<trees_; ++i) {
451 indices_[i] =
new int[size_];
452 for (
size_t j=0; j<size_; ++j) {
453 indices_[i][j] = int(j);
455 root_[i] = pool_.allocate<KMeansNode>();
458 Distance* dummy = NULL;
459 computeNodeStatistics(root_[i], indices_[i], (
unsigned int)size_, dummy);
461 computeClustering(root_[i], indices_[i], (
int)size_, branching_,0);
468 save_value(stream, branching_);
469 save_value(stream, iterations_);
470 save_value(stream, memoryCounter_);
471 save_value(stream, cb_index_);
472 save_value(stream, trees_);
473 for (
int i=0; i<trees_; ++i) {
474 save_value(stream, *indices_[i], (
int)size_);
475 save_tree(stream, root_[i], i);
482 if (indices_!=NULL) {
490 load_value(stream, branching_);
491 load_value(stream, iterations_);
492 load_value(stream, memoryCounter_);
493 load_value(stream, cb_index_);
494 load_value(stream, trees_);
496 indices_ =
new int*[trees_];
497 for (
int i=0; i<trees_; ++i) {
498 indices_[i] =
new int[size_];
499 load_value(stream, *indices_[i], size_);
500 load_tree(stream, root_[i], i);
503 index_params_[
"algorithm"] = getType();
504 index_params_[
"branching"] = branching_;
505 index_params_[
"trees"] = trees_;
506 index_params_[
"iterations"] = iterations_;
507 index_params_[
"centers_init"] = centers_init_;
508 index_params_[
"cb_index"] = cb_index_;
521 void findNeighbors(ResultSet<DistanceType>&
result,
const ElementType* vec,
const SearchParams& searchParams)
CV_OVERRIDE
524 const int maxChecks = get_param(searchParams,
"checks",32);
526 if (maxChecks==FLANN_CHECKS_UNLIMITED) {
527 findExactNN(root_[0],
result, vec);
534 for (
int i=0; i<trees_; ++i) {
535 findNN(root_[i],
result, vec, checks, maxChecks, heap);
536 if ((checks >= maxChecks) &&
result.full())
541 while (heap->popMin(branch) && (checks<maxChecks || !
result.full())) {
542 KMeansNodePtr node = branch.node;
543 findNN(node,
result, vec, checks, maxChecks, heap);
556 int getClusterCenters(Matrix<CentersType>& centers)
558 int numClusters = centers.rows;
563 DistanceType variance;
564 KMeansNodePtr* clusters =
new KMeansNodePtr[numClusters];
566 int clusterCount = getMinVarianceClusters(root_[0], clusters, numClusters, variance);
568 Logger::info(
"Clusters requested: %d, returning %d\n",numClusters, clusterCount);
570 for (
int i=0; i<clusterCount; ++i) {
571 CentersType*
center = clusters[i]->pivot;
572 for (
size_t j=0; j<veclen_; ++j) {
573 centers[i][j] =
center[j];
583 return index_params_;
604 DistanceType mean_radius;
608 DistanceType variance;
626 typedef KMeansNode* KMeansNodePtr;
631 typedef BranchStruct<KMeansNodePtr, DistanceType> BranchSt;
636 void save_tree(FILE* stream, KMeansNodePtr node,
int num)
638 save_value(stream, *node);
639 save_value(stream, *(node->pivot), (
int)veclen_);
640 if (node->childs==NULL) {
641 int indices_offset = (int)(node->indices - indices_[num]);
642 save_value(stream, indices_offset);
645 for(
int i=0; i<branching_; ++i) {
646 save_tree(stream, node->childs[i], num);
652 void load_tree(FILE* stream, KMeansNodePtr& node,
int num)
654 node = pool_.allocate<KMeansNode>();
655 load_value(stream, *node);
656 node->pivot =
new CentersType[veclen_];
657 load_value(stream, *(node->pivot), (
int)veclen_);
658 if (node->childs==NULL) {
660 load_value(stream, indices_offset);
661 node->indices = indices_[num] + indices_offset;
664 node->childs = pool_.allocate<KMeansNodePtr>(branching_);
665 for(
int i=0; i<branching_; ++i) {
666 load_tree(stream, node->childs[i], num);
675 void free_centers(KMeansNodePtr node)
677 delete[] node->pivot;
678 if (node->childs!=NULL) {
679 for (
int k=0;
k<branching_; ++
k) {
680 free_centers(node->childs[
k]);
688 for(
int i=0; i<trees_; ++i) {
689 if (root_[i] != NULL) {
690 free_centers(root_[i]);
701 if (indices_!=NULL) {
702 for(
int i=0; i<trees_; ++i) {
703 if (indices_[i]!=NULL) {
704 delete[] indices_[i];
719 void computeNodeStatistics(KMeansNodePtr node,
int* indices,
unsigned int indices_length)
721 DistanceType variance = 0;
722 CentersType*
mean =
new CentersType[veclen_];
723 memoryCounter_ += int(veclen_*
sizeof(CentersType));
727 for (
unsigned int i=0; i<indices_length; ++i) {
728 ElementType* vec = dataset_[indices[i]];
729 for (
size_t j=0; j<veclen_; ++j) {
732 variance += distance_(vec, ZeroIterator<ElementType>(), veclen_);
734 float length =
static_cast<float>(indices_length);
735 for (
size_t j=0; j<veclen_; ++j) {
736 mean[j] = cvflann::round<CentersType>(
mean[j] /
static_cast<double>(indices_length) );
738 variance /=
static_cast<DistanceType
>( length );
739 variance -= distance_(
mean, ZeroIterator<ElementType>(), veclen_);
742 for (
unsigned int i=0; i<indices_length; ++i) {
743 DistanceType tmp = distance_(
mean, dataset_[indices[i]], veclen_);
749 node->variance = variance;
755 void computeBitfieldNodeStatistics(KMeansNodePtr node,
int* indices,
756 unsigned int indices_length)
758 const unsigned int accumulator_veclen =
static_cast<unsigned int>(
759 veclen_*
sizeof(CentersType)*BITS_PER_CHAR);
761 unsigned long long variance = 0ull;
762 CentersType*
mean =
new CentersType[veclen_];
763 memoryCounter_ += int(veclen_*
sizeof(CentersType));
764 unsigned int* mean_accumulator =
new unsigned int[accumulator_veclen];
766 memset(mean_accumulator, 0,
sizeof(
unsigned int)*accumulator_veclen);
768 for (
unsigned int i=0; i<indices_length; ++i) {
769 variance +=
static_cast<unsigned long long>( ensureSquareDistance<Distance>(
770 distance_(dataset_[indices[i]], ZeroIterator<ElementType>(), veclen_)));
771 unsigned char* vec = (
unsigned char*)dataset_[indices[i]];
772 for (
size_t k=0, l=0;
k<accumulator_veclen;
k+=BITS_PER_CHAR, ++l) {
773 mean_accumulator[
k] += (vec[l]) & 0x01;
774 mean_accumulator[
k+1] += (vec[l]>>1) & 0x01;
775 mean_accumulator[
k+2] += (vec[l]>>2) & 0x01;
776 mean_accumulator[
k+3] += (vec[l]>>3) & 0x01;
777 mean_accumulator[
k+4] += (vec[l]>>4) & 0x01;
778 mean_accumulator[
k+5] += (vec[l]>>5) & 0x01;
779 mean_accumulator[
k+6] += (vec[l]>>6) & 0x01;
780 mean_accumulator[
k+7] += (vec[l]>>7) & 0x01;
783 double cnt =
static_cast<double>(indices_length);
784 unsigned char* char_mean = (
unsigned char*)
mean;
785 for (
size_t k=0, l=0;
k<accumulator_veclen;
k+=BITS_PER_CHAR, ++l) {
786 char_mean[l] =
static_cast<unsigned char>(
787 (((int)(0.5 + (
double)(mean_accumulator[
k]) / cnt)))
788 | (((int)(0.5 + (
double)(mean_accumulator[
k+1]) / cnt))<<1)
789 | (((int)(0.5 + (
double)(mean_accumulator[
k+2]) / cnt))<<2)
790 | (((int)(0.5 + (
double)(mean_accumulator[
k+3]) / cnt))<<3)
791 | (((int)(0.5 + (
double)(mean_accumulator[
k+4]) / cnt))<<4)
792 | (((int)(0.5 + (
double)(mean_accumulator[
k+5]) / cnt))<<5)
793 | (((int)(0.5 + (
double)(mean_accumulator[
k+6]) / cnt))<<6)
794 | (((int)(0.5 + (
double)(mean_accumulator[
k+7]) / cnt))<<7));
796 variance =
static_cast<unsigned long long>(
797 0.5 +
static_cast<double>(variance) /
static_cast<double>(indices_length));
798 variance -=
static_cast<unsigned long long>(
799 ensureSquareDistance<Distance>(
800 distance_(
mean, ZeroIterator<ElementType>(), veclen_)));
803 for (
unsigned int i=0; i<indices_length; ++i) {
804 DistanceType tmp = distance_(
mean, dataset_[indices[i]], veclen_);
810 node->variance =
static_cast<DistanceType
>(variance);
814 delete[] mean_accumulator;
818 void computeDnaNodeStatistics(KMeansNodePtr node,
int* indices,
819 unsigned int indices_length)
821 const unsigned int histos_veclen =
static_cast<unsigned int>(
822 veclen_*
sizeof(CentersType)*(HISTOS_PER_BASE*BASE_PER_CHAR));
824 unsigned long long variance = 0ull;
825 unsigned int* histograms =
new unsigned int[histos_veclen];
826 memset(histograms, 0,
sizeof(
unsigned int)*histos_veclen);
828 for (
unsigned int i=0; i<indices_length; ++i) {
829 variance +=
static_cast<unsigned long long>( ensureSquareDistance<Distance>(
830 distance_(dataset_[indices[i]], ZeroIterator<ElementType>(), veclen_)));
832 unsigned char* vec = (
unsigned char*)dataset_[indices[i]];
833 for (
size_t k=0, l=0;
k<histos_veclen;
k+=HISTOS_PER_BASE*BASE_PER_CHAR, ++l) {
834 histograms[
k + ((vec[l]) & 0x03)]++;
835 histograms[
k + 4 + ((vec[l]>>2) & 0x03)]++;
836 histograms[
k + 8 + ((vec[l]>>4) & 0x03)]++;
837 histograms[
k +12 + ((vec[l]>>6) & 0x03)]++;
841 CentersType*
mean =
new CentersType[veclen_];
842 memoryCounter_ += int(veclen_*
sizeof(CentersType));
843 unsigned char* char_mean = (
unsigned char*)
mean;
844 unsigned int* h = histograms;
845 for (
size_t k=0, l=0;
k<histos_veclen;
k+=HISTOS_PER_BASE*BASE_PER_CHAR, ++l) {
846 char_mean[l] = (h[
k] > h[
k+1] ? h[
k+2] > h[
k+3] ? h[
k] > h[
k+2] ? 0x00 : 0x10
847 : h[
k] > h[
k+3] ? 0x00 : 0x11
848 : h[
k+2] > h[
k+3] ? h[
k+1] > h[
k+2] ? 0x01 : 0x10
849 : h[
k+1] > h[
k+3] ? 0x01 : 0x11)
850 | (h[
k+4]>h[
k+5] ? h[
k+6] > h[
k+7] ? h[
k+4] > h[
k+6] ? 0x00 : 0x1000
851 : h[
k+4] > h[
k+7] ? 0x00 : 0x1100
852 : h[
k+6] > h[
k+7] ? h[
k+5] > h[
k+6] ? 0x0100 : 0x1000
853 : h[
k+5] > h[
k+7] ? 0x0100 : 0x1100)
854 | (h[
k+8]>h[
k+9] ? h[
k+10]>h[
k+11] ? h[
k+8] >h[
k+10] ? 0x00 : 0x100000
855 : h[
k+8] >h[
k+11] ? 0x00 : 0x110000
856 : h[
k+10]>h[
k+11] ? h[
k+9] >h[
k+10] ? 0x010000 : 0x100000
857 : h[
k+9] >h[
k+11] ? 0x010000 : 0x110000)
858 | (h[
k+12]>h[
k+13] ? h[
k+14]>h[
k+15] ? h[
k+12] >h[
k+14] ? 0x00 : 0x10000000
859 : h[
k+12] >h[
k+15] ? 0x00 : 0x11000000
860 : h[
k+14]>h[
k+15] ? h[
k+13] >h[
k+14] ? 0x01000000 : 0x10000000
861 : h[
k+13] >h[
k+15] ? 0x01000000 : 0x11000000);
863 variance =
static_cast<unsigned long long>(
864 0.5 +
static_cast<double>(variance) /
static_cast<double>(indices_length));
865 variance -=
static_cast<unsigned long long>(
866 ensureSquareDistance<Distance>(
867 distance_(
mean, ZeroIterator<ElementType>(), veclen_)));
870 for (
unsigned int i=0; i<indices_length; ++i) {
871 DistanceType tmp = distance_(
mean, dataset_[indices[i]], veclen_);
877 node->variance =
static_cast<DistanceType
>(variance);
885 template<
typename DistType>
886 void computeNodeStatistics(KMeansNodePtr node,
int* indices,
887 unsigned int indices_length,
888 const DistType* identifier)
891 computeNodeStatistics(node, indices, indices_length);
894 void computeNodeStatistics(KMeansNodePtr node,
int* indices,
895 unsigned int indices_length,
899 computeBitfieldNodeStatistics(node, indices, indices_length);
902 void computeNodeStatistics(KMeansNodePtr node,
int* indices,
903 unsigned int indices_length,
904 const cvflann::Hamming<unsigned char>* identifier)
907 computeBitfieldNodeStatistics(node, indices, indices_length);
910 void computeNodeStatistics(KMeansNodePtr node,
int* indices,
911 unsigned int indices_length,
912 const cvflann::Hamming2<unsigned char>* identifier)
915 computeBitfieldNodeStatistics(node, indices, indices_length);
918 void computeNodeStatistics(KMeansNodePtr node,
int* indices,
919 unsigned int indices_length,
920 const cvflann::DNAmmingLUT* identifier)
923 computeDnaNodeStatistics(node, indices, indices_length);
926 void computeNodeStatistics(KMeansNodePtr node,
int* indices,
927 unsigned int indices_length,
928 const cvflann::DNAmming2<unsigned char>* identifier)
931 computeDnaNodeStatistics(node, indices, indices_length);
935 void refineClustering(
int* indices,
int indices_length,
int branching, CentersType** centers,
939 Matrix<double> dcenters(dcenters_buf.data(), branching, veclen_);
941 bool converged =
false;
943 while (!converged && iteration<iterations_) {
948 for (
int i=0; i<branching; ++i) {
949 memset(dcenters[i],0,
sizeof(
double)*veclen_);
952 for (
int i=0; i<indices_length; ++i) {
953 ElementType* vec = dataset_[indices[i]];
954 double*
center = dcenters[belongs_to[i]];
955 for (
size_t k=0;
k<veclen_; ++
k) {
959 for (
int i=0; i<branching; ++i) {
961 for (
size_t k=0;
k<veclen_; ++
k) {
962 dcenters[i][
k] /= cnt;
970 KMeansDistanceComputer<Matrix<double> > invoker(
971 distance_, dataset_, branching, indices, dcenters, veclen_, new_centroids, sq_dists);
974 for (
int i=0; i < (int)indices_length; ++i) {
975 DistanceType sq_dist(sq_dists[i]);
976 int new_centroid(new_centroids[i]);
977 if (sq_dist > radiuses[new_centroid]) {
978 radiuses[new_centroid] = sq_dist;
980 if (new_centroid != belongs_to[i]) {
981 count[belongs_to[i]]--;
982 count[new_centroid]++;
983 belongs_to[i] = new_centroid;
988 for (
int i=0; i<branching; ++i) {
992 int j = (i+1)%branching;
993 while (
count[j]<=1) {
997 for (
int k=0;
k<indices_length; ++
k) {
998 if (belongs_to[
k]==j) {
1000 if ( distance_(dataset_[indices[
k]], dcenters[j], veclen_) == radiuses[j] ) {
1013 for (
int i=0; i<branching; ++i) {
1014 centers[i] =
new CentersType[veclen_];
1015 memoryCounter_ += (int)(veclen_*
sizeof(CentersType));
1016 for (
size_t k=0;
k<veclen_; ++
k) {
1017 centers[i][
k] = (CentersType)dcenters[i][
k];
1023 void refineBitfieldClustering(
int* indices,
int indices_length,
int branching, CentersType** centers,
1026 for (
int i=0; i<branching; ++i) {
1027 centers[i] =
new CentersType[veclen_];
1028 memoryCounter_ += (int)(veclen_*
sizeof(CentersType));
1031 const unsigned int accumulator_veclen =
static_cast<unsigned int>(
1032 veclen_*
sizeof(ElementType)*BITS_PER_CHAR);
1034 Matrix<unsigned int> dcenters(dcenters_buf.data(), branching, accumulator_veclen);
1036 bool converged =
false;
1038 while (!converged && iteration<iterations_) {
1043 for (
int i=0; i<branching; ++i) {
1044 memset(dcenters[i],0,
sizeof(
unsigned int)*accumulator_veclen);
1047 for (
int i=0; i<indices_length; ++i) {
1048 unsigned char* vec = (
unsigned char*)dataset_[indices[i]];
1049 unsigned int* dcenter = dcenters[belongs_to[i]];
1050 for (
size_t k=0, l=0;
k<accumulator_veclen;
k+=BITS_PER_CHAR, ++l) {
1051 dcenter[
k] += (vec[l]) & 0x01;
1052 dcenter[
k+1] += (vec[l]>>1) & 0x01;
1053 dcenter[
k+2] += (vec[l]>>2) & 0x01;
1054 dcenter[
k+3] += (vec[l]>>3) & 0x01;
1055 dcenter[
k+4] += (vec[l]>>4) & 0x01;
1056 dcenter[
k+5] += (vec[l]>>5) & 0x01;
1057 dcenter[
k+6] += (vec[l]>>6) & 0x01;
1058 dcenter[
k+7] += (vec[l]>>7) & 0x01;
1061 for (
int i=0; i<branching; ++i) {
1062 double cnt =
static_cast<double>(
count[i]);
1063 unsigned int* dcenter = dcenters[i];
1064 unsigned char* charCenter = (
unsigned char*)centers[i];
1065 for (
size_t k=0, l=0;
k<accumulator_veclen;
k+=BITS_PER_CHAR, ++l) {
1066 charCenter[l] =
static_cast<unsigned char>(
1067 (((int)(0.5 + (
double)(dcenter[
k]) / cnt)))
1068 | (((int)(0.5 + (
double)(dcenter[
k+1]) / cnt))<<1)
1069 | (((int)(0.5 + (
double)(dcenter[
k+2]) / cnt))<<2)
1070 | (((int)(0.5 + (
double)(dcenter[
k+3]) / cnt))<<3)
1071 | (((int)(0.5 + (
double)(dcenter[
k+4]) / cnt))<<4)
1072 | (((int)(0.5 + (
double)(dcenter[
k+5]) / cnt))<<5)
1073 | (((int)(0.5 + (
double)(dcenter[
k+6]) / cnt))<<6)
1074 | (((int)(0.5 + (
double)(dcenter[
k+7]) / cnt))<<7));
1082 KMeansDistanceComputer<ElementType**> invoker(
1083 distance_, dataset_, branching, indices, centers, veclen_, new_centroids, dists);
1086 for (
int i=0; i < indices_length; ++i) {
1087 DistanceType dist(dists[i]);
1088 int new_centroid(new_centroids[i]);
1089 if (dist > radiuses[new_centroid]) {
1090 radiuses[new_centroid] = dist;
1092 if (new_centroid != belongs_to[i]) {
1093 count[belongs_to[i]]--;
1094 count[new_centroid]++;
1095 belongs_to[i] = new_centroid;
1100 for (
int i=0; i<branching; ++i) {
1104 int j = (i+1)%branching;
1105 while (
count[j]<=1) {
1106 j = (j+1)%branching;
1109 for (
int k=0;
k<indices_length; ++
k) {
1110 if (belongs_to[
k]==j) {
1112 if ( distance_(dataset_[indices[
k]], centers[j], veclen_) == radiuses[j] ) {
1127 void refineDnaClustering(
int* indices,
int indices_length,
int branching, CentersType** centers,
1130 for (
int i=0; i<branching; ++i) {
1131 centers[i] =
new CentersType[veclen_];
1132 memoryCounter_ += (int)(veclen_*
sizeof(CentersType));
1135 const unsigned int histos_veclen =
static_cast<unsigned int>(
1136 veclen_*
sizeof(CentersType)*(HISTOS_PER_BASE*BASE_PER_CHAR));
1138 Matrix<unsigned int> histos(histos_buf.data(), branching, histos_veclen);
1140 bool converged =
false;
1142 while (!converged && iteration<iterations_) {
1147 for (
int i=0; i<branching; ++i) {
1148 memset(histos[i],0,
sizeof(
unsigned int)*histos_veclen);
1151 for (
int i=0; i<indices_length; ++i) {
1152 unsigned char* vec = (
unsigned char*)dataset_[indices[i]];
1153 unsigned int* h = histos[belongs_to[i]];
1154 for (
size_t k=0, l=0;
k<histos_veclen;
k+=HISTOS_PER_BASE*BASE_PER_CHAR, ++l) {
1155 h[
k + ((vec[l]) & 0x03)]++;
1156 h[
k + 4 + ((vec[l]>>2) & 0x03)]++;
1157 h[
k + 8 + ((vec[l]>>4) & 0x03)]++;
1158 h[
k +12 + ((vec[l]>>6) & 0x03)]++;
1161 for (
int i=0; i<branching; ++i) {
1162 unsigned int* h = histos[i];
1163 unsigned char* charCenter = (
unsigned char*)centers[i];
1164 for (
size_t k=0, l=0;
k<histos_veclen;
k+=HISTOS_PER_BASE*BASE_PER_CHAR, ++l) {
1165 charCenter[l]= (h[
k] > h[
k+1] ? h[
k+2] > h[
k+3] ? h[
k] > h[
k+2] ? 0x00 : 0x10
1166 : h[
k] > h[
k+3] ? 0x00 : 0x11
1167 : h[
k+2] > h[
k+3] ? h[
k+1] > h[
k+2] ? 0x01 : 0x10
1168 : h[
k+1] > h[
k+3] ? 0x01 : 0x11)
1169 | (h[
k+4]>h[
k+5] ? h[
k+6] > h[
k+7] ? h[
k+4] > h[
k+6] ? 0x00 : 0x1000
1170 : h[
k+4] > h[
k+7] ? 0x00 : 0x1100
1171 : h[
k+6] > h[
k+7] ? h[
k+5] > h[
k+6] ? 0x0100 : 0x1000
1172 : h[
k+5] > h[
k+7] ? 0x0100 : 0x1100)
1173 | (h[
k+8]>h[
k+9] ? h[
k+10]>h[
k+11] ? h[
k+8] >h[
k+10] ? 0x00 : 0x100000
1174 : h[
k+8] >h[
k+11] ? 0x00 : 0x110000
1175 : h[
k+10]>h[
k+11] ? h[
k+9] >h[
k+10] ? 0x010000 : 0x100000
1176 : h[
k+9] >h[
k+11] ? 0x010000 : 0x110000)
1177 | (h[
k+12]>h[
k+13] ? h[
k+14]>h[
k+15] ? h[
k+12] >h[
k+14] ? 0x00 : 0x10000000
1178 : h[
k+12] >h[
k+15] ? 0x00 : 0x11000000
1179 : h[
k+14]>h[
k+15] ? h[
k+13] >h[
k+14] ? 0x01000000 : 0x10000000
1180 : h[
k+13] >h[
k+15] ? 0x01000000 : 0x11000000);
1188 KMeansDistanceComputer<ElementType**> invoker(
1189 distance_, dataset_, branching, indices, centers, veclen_, new_centroids, dists);
1192 for (
int i=0; i < indices_length; ++i) {
1193 DistanceType dist(dists[i]);
1194 int new_centroid(new_centroids[i]);
1195 if (dist > radiuses[new_centroid]) {
1196 radiuses[new_centroid] = dist;
1198 if (new_centroid != belongs_to[i]) {
1199 count[belongs_to[i]]--;
1200 count[new_centroid]++;
1201 belongs_to[i] = new_centroid;
1206 for (
int i=0; i<branching; ++i) {
1210 int j = (i+1)%branching;
1211 while (
count[j]<=1) {
1212 j = (j+1)%branching;
1215 for (
int k=0;
k<indices_length; ++
k) {
1216 if (belongs_to[
k]==j) {
1218 if ( distance_(dataset_[indices[
k]], centers[j], veclen_) == radiuses[j] ) {
1233 void computeSubClustering(KMeansNodePtr node,
int* indices,
int indices_length,
1234 int branching,
int level, CentersType** centers,
1238 node->childs = pool_.allocate<KMeansNodePtr>(branching);
1241 for (
int c=0; c<branching; ++c) {
1244 DistanceType variance = 0;
1245 DistanceType mean_radius =0;
1246 for (
int i=0; i<indices_length; ++i) {
1247 if (belongs_to[i]==c) {
1248 DistanceType d = distance_(dataset_[indices[i]], ZeroIterator<ElementType>(), veclen_);
1250 mean_radius +=
static_cast<DistanceType
>(
sqrt(d) );
1258 variance -= distance_(centers[c], ZeroIterator<ElementType>(), veclen_);
1260 node->childs[c] = pool_.allocate<KMeansNode>();
1261 std::memset(node->childs[c], 0,
sizeof(KMeansNode));
1262 node->childs[c]->radius = radiuses[c];
1263 node->childs[c]->pivot = centers[c];
1264 node->childs[c]->variance = variance;
1265 node->childs[c]->mean_radius = mean_radius;
1266 computeClustering(node->childs[c],indices+
start,
end-
start, branching, level+1);
1272 void computeAnyBitfieldSubClustering(KMeansNodePtr node,
int* indices,
int indices_length,
1273 int branching,
int level, CentersType** centers,
1277 node->childs = pool_.allocate<KMeansNodePtr>(branching);
1280 for (
int c=0; c<branching; ++c) {
1283 unsigned long long variance = 0ull;
1284 DistanceType mean_radius =0;
1285 for (
int i=0; i<indices_length; ++i) {
1286 if (belongs_to[i]==c) {
1287 DistanceType d = distance_(dataset_[indices[i]], ZeroIterator<ElementType>(), veclen_);
1288 variance +=
static_cast<unsigned long long>( ensureSquareDistance<Distance>(d) );
1289 mean_radius += ensureSimpleDistance<Distance>(d);
1295 mean_radius =
static_cast<DistanceType
>(
1296 0.5f +
static_cast<float>(mean_radius) /
static_cast<float>(s));
1297 variance =
static_cast<unsigned long long>(
1298 0.5 +
static_cast<double>(variance) /
static_cast<double>(s));
1299 variance -=
static_cast<unsigned long long>(
1300 ensureSquareDistance<Distance>(
1301 distance_(centers[c], ZeroIterator<ElementType>(), veclen_)));
1303 node->childs[c] = pool_.allocate<KMeansNode>();
1304 std::memset(node->childs[c], 0,
sizeof(KMeansNode));
1305 node->childs[c]->radius = radiuses[c];
1306 node->childs[c]->pivot = centers[c];
1307 node->childs[c]->variance =
static_cast<DistanceType
>(variance);
1308 node->childs[c]->mean_radius = mean_radius;
1309 computeClustering(node->childs[c],indices+
start,
end-
start, branching, level+1);
1315 template<
typename DistType>
1316 void refineAndSplitClustering(
1317 KMeansNodePtr node,
int* indices,
int indices_length,
int branching,
1319 int* belongs_to,
int*
count,
const DistType* identifier)
1322 refineClustering(indices, indices_length, branching, centers, radiuses, belongs_to,
count);
1324 computeSubClustering(node, indices, indices_length, branching,
1325 level, centers, radiuses, belongs_to,
count);
1373 void refineAndSplitClustering(
1374 KMeansNodePtr node,
int* indices,
int indices_length,
int branching,
1379 refineBitfieldClustering(
1380 indices, indices_length, branching, centers, radiuses, belongs_to,
count);
1382 computeAnyBitfieldSubClustering(node, indices, indices_length, branching,
1383 level, centers, radiuses, belongs_to,
count);
1387 void refineAndSplitClustering(
1388 KMeansNodePtr node,
int* indices,
int indices_length,
int branching,
1390 int* belongs_to,
int*
count,
const cvflann::Hamming<unsigned char>* identifier)
1393 refineBitfieldClustering(
1394 indices, indices_length, branching, centers, radiuses, belongs_to,
count);
1396 computeAnyBitfieldSubClustering(node, indices, indices_length, branching,
1397 level, centers, radiuses, belongs_to,
count);
1401 void refineAndSplitClustering(
1402 KMeansNodePtr node,
int* indices,
int indices_length,
int branching,
1404 int* belongs_to,
int*
count,
const cvflann::Hamming2<unsigned char>* identifier)
1407 refineBitfieldClustering(
1408 indices, indices_length, branching, centers, radiuses, belongs_to,
count);
1410 computeAnyBitfieldSubClustering(node, indices, indices_length, branching,
1411 level, centers, radiuses, belongs_to,
count);
1415 void refineAndSplitClustering(
1416 KMeansNodePtr node,
int* indices,
int indices_length,
int branching,
1418 int* belongs_to,
int*
count,
const cvflann::DNAmmingLUT* identifier)
1421 refineDnaClustering(
1422 indices, indices_length, branching, centers, radiuses, belongs_to,
count);
1424 computeAnyBitfieldSubClustering(node, indices, indices_length, branching,
1425 level, centers, radiuses, belongs_to,
count);
1429 void refineAndSplitClustering(
1430 KMeansNodePtr node,
int* indices,
int indices_length,
int branching,
1432 int* belongs_to,
int*
count,
const cvflann::DNAmming2<unsigned char>* identifier)
1435 refineDnaClustering(
1436 indices, indices_length, branching, centers, radiuses, belongs_to,
count);
1438 computeAnyBitfieldSubClustering(node, indices, indices_length, branching,
1439 level, centers, radiuses, belongs_to,
count);
1454 void computeClustering(KMeansNodePtr node,
int* indices,
int indices_length,
int branching,
int level)
1456 node->size = indices_length;
1457 node->level = level;
1459 if (indices_length < branching) {
1460 node->indices = indices;
1461 std::sort(node->indices,node->indices+indices_length);
1462 node->childs = NULL;
1467 int* centers_idx = centers_idx_buf.data();
1469 (this->*chooseCenters)(branching, indices, indices_length, centers_idx, centers_length);
1471 if (centers_length<branching) {
1472 node->indices = indices;
1473 std::sort(node->indices,node->indices+indices_length);
1474 node->childs = NULL;
1481 int*
count = count_buf.data();
1482 for (
int i=0; i<branching; ++i) {
1489 int* belongs_to = belongs_to_buf.data();
1490 for (
int i=0; i<indices_length; ++i) {
1491 DistanceType sq_dist = distance_(dataset_[indices[i]], dataset_[centers_idx[0]], veclen_);
1493 for (
int j=1; j<branching; ++j) {
1494 DistanceType new_sq_dist = distance_(dataset_[indices[i]], dataset_[centers_idx[j]], veclen_);
1495 if (sq_dist>new_sq_dist) {
1497 sq_dist = new_sq_dist;
1500 if (sq_dist>radiuses[belongs_to[i]]) {
1501 radiuses[belongs_to[i]] = sq_dist;
1503 count[belongs_to[i]]++;
1506 CentersType** centers =
new CentersType*[branching];
1508 Distance* dummy = NULL;
1509 refineAndSplitClustering(node, indices, indices_length, branching, level,
1510 centers, radiuses, belongs_to,
count, dummy);
1529 void findNN(KMeansNodePtr node, ResultSet<DistanceType>&
result,
const ElementType* vec,
int& checks,
int maxChecks,
1530 const cv::Ptr<Heap<BranchSt>>& heap)
1534 DistanceType bsq = distance_(vec, node->pivot, veclen_);
1535 DistanceType rsq = node->radius;
1536 DistanceType wsq =
result.worstDist();
1538 if (isSquareDistance<Distance>())
1540 DistanceType val = bsq-rsq-wsq;
1541 if ((val>0) && (val*val > 4*rsq*wsq))
1551 if (node->childs==NULL) {
1552 if ((checks>=maxChecks) &&
result.full()) {
1555 checks += node->size;
1556 for (
int i=0; i<node->size; ++i) {
1557 int index = node->indices[i];
1558 DistanceType dist = distance_(dataset_[
index], vec, veclen_);
1563 DistanceType* domain_distances =
new DistanceType[branching_];
1564 int closest_center = exploreNodeBranches(node, vec, domain_distances, heap);
1565 delete[] domain_distances;
1566 findNN(node->childs[closest_center],
result,vec, checks, maxChecks, heap);
1578 int exploreNodeBranches(KMeansNodePtr node,
const ElementType* q, DistanceType* domain_distances,
const cv::Ptr<Heap<BranchSt>>& heap)
1582 domain_distances[best_index] = distance_(q, node->childs[best_index]->pivot, veclen_);
1583 for (
int i=1; i<branching_; ++i) {
1584 domain_distances[i] = distance_(q, node->childs[i]->pivot, veclen_);
1585 if (domain_distances[i]<domain_distances[best_index]) {
1591 for (
int i=0; i<branching_; ++i) {
1592 if (i != best_index) {
1593 domain_distances[i] -= cvflann::round<DistanceType>(
1594 cb_index_*node->childs[i]->variance );
1600 heap->insert(BranchSt(node->childs[i],domain_distances[i]));
1611 void findExactNN(KMeansNodePtr node, ResultSet<DistanceType>&
result,
const ElementType* vec)
1615 DistanceType bsq = distance_(vec, node->pivot, veclen_);
1616 DistanceType rsq = node->radius;
1617 DistanceType wsq =
result.worstDist();
1619 if (isSquareDistance<Distance>())
1621 DistanceType val = bsq-rsq-wsq;
1622 if ((val>0) && (val*val > 4*rsq*wsq))
1633 if (node->childs==NULL) {
1634 for (
int i=0; i<node->size; ++i) {
1635 int index = node->indices[i];
1636 DistanceType dist = distance_(dataset_[
index], vec, veclen_);
1641 int* sort_indices =
new int[branching_];
1643 getCenterOrdering(node, vec, sort_indices);
1645 for (
int i=0; i<branching_; ++i) {
1646 findExactNN(node->childs[sort_indices[i]],
result,vec);
1649 delete[] sort_indices;
1659 void getCenterOrdering(KMeansNodePtr node,
const ElementType* q,
int* sort_indices)
1661 DistanceType* domain_distances =
new DistanceType[branching_];
1662 for (
int i=0; i<branching_; ++i) {
1663 DistanceType dist = distance_(q, node->childs[i]->pivot, veclen_);
1666 while (domain_distances[j]<dist && j<i)
1668 for (
int k=i;
k>j; --
k) {
1669 domain_distances[
k] = domain_distances[
k-1];
1670 sort_indices[
k] = sort_indices[
k-1];
1672 domain_distances[j] = dist;
1673 sort_indices[j] = i;
1675 delete[] domain_distances;
1683 DistanceType getDistanceToBorder(DistanceType* p, DistanceType* c, DistanceType* q)
1685 DistanceType
sum = 0;
1686 DistanceType sum2 = 0;
1688 for (
int i=0; i<veclen_; ++i) {
1689 DistanceType t = c[i]-p[i];
1690 sum += t*(q[i]-(c[i]+p[i])/2);
1707 int getMinVarianceClusters(KMeansNodePtr root, KMeansNodePtr* clusters,
int clusters_length, DistanceType& varianceValue)
1709 int clusterCount = 1;
1712 DistanceType meanVariance = root->variance*root->size;
1714 while (clusterCount<clusters_length) {
1716 int splitIndex = -1;
1718 for (
int i=0; i<clusterCount; ++i) {
1719 if (clusters[i]->childs != NULL) {
1721 DistanceType variance = meanVariance - clusters[i]->variance*clusters[i]->size;
1723 for (
int j=0; j<branching_; ++j) {
1724 variance += clusters[i]->childs[j]->variance*clusters[i]->childs[j]->size;
1726 if (variance<minVariance) {
1727 minVariance = variance;
1733 if (splitIndex==-1)
break;
1734 if ( (branching_+clusterCount-1) > clusters_length)
break;
1736 meanVariance = minVariance;
1739 KMeansNodePtr toSplit = clusters[splitIndex];
1740 clusters[splitIndex] = toSplit->childs[0];
1741 for (
int i=1; i<branching_; ++i) {
1742 clusters[clusterCount++] = toSplit->childs[i];
1746 varianceValue = meanVariance/root->size;
1747 return clusterCount;
1761 flann_centers_init_t centers_init_;
1774 const Matrix<ElementType> dataset_;
1777 IndexParams index_params_;
1792 KMeansNodePtr* root_;
1807 PooledAllocator pool_;
Automatically Allocated Buffer Class.
Definition: utility.hpp:102
Base class for parallel data processors.
Definition: utility.hpp:577
Template class specifying a continuous subsequence (slice) of a sequence.
Definition: types.hpp:623
double double end
Definition: core_c.h:1381
double start
Definition: core_c.h:1381
int index
Definition: core_c.h:634
CvSize size
Definition: core_c.h:112
int count
Definition: core_c.h:1413
CvArr * mean
Definition: core_c.h:1419
const CvArr const CvArr CvArr * result
Definition: core_c.h:1423
CV_EXPORTS void parallel_for_(const Range &range, const ParallelLoopBody &body, double nstripes=-1.)
Parallel data processor.
#define CV_OVERRIDE
Definition: cvdef.h:792
Hamming HammingLUT
Definition: base.hpp:393
#define CV_Assert(expr)
Checks a condition at runtime and throws exception if it fails.
Definition: base.hpp:342
#define CV_DbgAssert(expr)
Definition: base.hpp:375
Quat< S > sqrt(const Quat< S > &q, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT)
CvRect r
Definition: imgproc_c.h:984
CvArr * sum
Definition: imgproc_c.h:61
CvPoint2D32f float * radius
Definition: imgproc_c.h:534
CvArr CvSize range
Definition: imgproc_c.h:781
CvArr CvPoint2D32f center
Definition: imgproc_c.h:270
CV_EXPORTS OutputArray int double double InputArray OutputArray int int bool double k
Definition: imgproc.hpp:2133
@ StsError
unknown /unspecified error
Definition: base.hpp:71
@ StsBadArg
function arg/param is bad
Definition: base.hpp:74
CV_EXPORTS int getThreadID()
Definition: cvstd_wrapper.hpp:74