30 #ifndef OPENCV_FLANN_AUTOTUNED_INDEX_H_
31 #define OPENCV_FLANN_AUTOTUNED_INDEX_H_
38 #include "ground_truth.h"
39 #include "index_testing.h"
41 #include "kdtree_index.h"
42 #include "kdtree_single_index.h"
43 #include "kmeans_index.h"
44 #include "composite_index.h"
45 #include "linear_index.h"
51 template<
typename Distance>
52 NNIndex<Distance>* create_index_by_type(
const Matrix<typename Distance::ElementType>& dataset,
const IndexParams& params,
const Distance& distance);
55 struct AutotunedIndexParams :
public IndexParams
57 AutotunedIndexParams(
float target_precision = 0.8,
float build_weight = 0.01,
float memory_weight = 0,
float sample_fraction = 0.1)
59 (*this)[
"algorithm"] = FLANN_INDEX_AUTOTUNED;
61 (*this)[
"target_precision"] = target_precision;
63 (*this)[
"build_weight"] = build_weight;
65 (*this)[
"memory_weight"] = memory_weight;
67 (*this)[
"sample_fraction"] = sample_fraction;
72 template <
typename Distance>
73 class AutotunedIndex :
public NNIndex<Distance>
76 typedef typename Distance::ElementType ElementType;
77 typedef typename Distance::ResultType DistanceType;
79 AutotunedIndex(
const Matrix<ElementType>& inputData,
const IndexParams& params = AutotunedIndexParams(), Distance d = Distance()) :
80 dataset_(inputData), distance_(d)
82 target_precision_ = get_param(params,
"target_precision",0.8f);
83 build_weight_ = get_param(params,
"build_weight", 0.01f);
84 memory_weight_ = get_param(params,
"memory_weight", 0.0f);
85 sample_fraction_ = get_param(params,
"sample_fraction", 0.1f);
90 AutotunedIndex(
const AutotunedIndex&);
91 AutotunedIndex& operator=(
const AutotunedIndex&);
93 virtual ~AutotunedIndex()
95 if (bestIndex_ != NULL) {
107 bestParams_ = estimateBuildParams();
108 print_params(bestParams_, stream);
109 Logger::info(
"----------------------------------------------------\n");
110 Logger::info(
"Autotuned parameters:\n");
111 Logger::info(
"%s", stream.
str().c_str());
112 Logger::info(
"----------------------------------------------------\n");
114 bestIndex_ = create_index_by_type(dataset_, bestParams_, distance_);
115 bestIndex_->buildIndex();
116 speedup_ = estimateSearchParams(bestSearchParams_);
118 print_params(bestSearchParams_, stream);
119 Logger::info(
"----------------------------------------------------\n");
120 Logger::info(
"Search parameters:\n");
121 Logger::info(
"%s", stream.
str().c_str());
122 Logger::info(
"----------------------------------------------------\n");
130 save_value(stream, (
int)bestIndex_->getType());
131 bestIndex_->saveIndex(stream);
132 save_value(stream, get_param<int>(bestSearchParams_,
"checks"));
142 load_value(stream, index_type);
144 params[
"algorithm"] = (flann_algorithm_t)index_type;
145 bestIndex_ = create_index_by_type<Distance>(dataset_, params, distance_);
146 bestIndex_->loadIndex(stream);
148 load_value(stream, checks);
149 bestSearchParams_[
"checks"] = checks;
155 virtual void findNeighbors(ResultSet<DistanceType>&
result,
const ElementType* vec,
const SearchParams& searchParams)
CV_OVERRIDE
157 int checks = get_param<int>(searchParams,
"checks",FLANN_CHECKS_AUTOTUNED);
158 if (checks == FLANN_CHECKS_AUTOTUNED) {
159 bestIndex_->findNeighbors(
result, vec, bestSearchParams_);
162 bestIndex_->findNeighbors(
result, vec, searchParams);
169 return bestIndex_->getParameters();
172 SearchParams getSearchParameters()
const
174 return bestSearchParams_;
177 float getSpeedup()
const
188 return bestIndex_->size();
196 return bestIndex_->veclen();
204 return bestIndex_->usedMemory();
210 virtual flann_algorithm_t getType() const
CV_OVERRIDE
212 return FLANN_INDEX_AUTOTUNED;
219 float searchTimeCost;
226 void evaluate_kmeans(CostData&
cost)
232 Logger::info(
"KMeansTree using params: max_iterations=%d, branching=%d\n",
233 get_param<int>(
cost.params,
"iterations"),
234 get_param<int>(
cost.params,
"branching"));
235 KMeansIndex<Distance>
kmeans(sampledDataset_,
cost.params, distance_);
240 float buildTime = (float)t.value;
243 float searchTime = test_index_precision(
kmeans, sampledDataset_, testDataset_, gt_matches_, target_precision_, checks, distance_, nn);
245 float datasetMemory = float(sampledDataset_.rows * sampledDataset_.cols *
sizeof(
float));
246 cost.memoryCost = (
kmeans.usedMemory() + datasetMemory) / datasetMemory;
247 cost.searchTimeCost = searchTime;
248 cost.buildTimeCost = buildTime;
249 Logger::info(
"KMeansTree buildTime=%g, searchTime=%g, build_weight=%g\n", buildTime, searchTime, build_weight_);
253 void evaluate_kdtree(CostData&
cost)
259 Logger::info(
"KDTree using params: trees=%d\n", get_param<int>(
cost.params,
"trees"));
260 KDTreeIndex<Distance> kdtree(sampledDataset_,
cost.params, distance_);
265 float buildTime = (float)t.value;
268 float searchTime = test_index_precision(kdtree, sampledDataset_, testDataset_, gt_matches_, target_precision_, checks, distance_, nn);
270 float datasetMemory = float(sampledDataset_.rows * sampledDataset_.cols *
sizeof(
float));
271 cost.memoryCost = (kdtree.usedMemory() + datasetMemory) / datasetMemory;
272 cost.searchTimeCost = searchTime;
273 cost.buildTimeCost = buildTime;
274 Logger::info(
"KDTree buildTime=%g, searchTime=%g\n", buildTime, searchTime);
328 Logger::info(
"KMEANS, Step 1: Exploring parameter space\n");
331 int maxIterations[] = { 1, 5, 10, 15 };
332 int branchingFactors[] = { 16, 32, 64, 128, 256 };
334 int kmeansParamSpaceSize = FLANN_ARRAY_LEN(maxIterations) * FLANN_ARRAY_LEN(branchingFactors);
335 costs.
reserve(costs.
size() + kmeansParamSpaceSize);
338 for (
size_t i = 0; i < FLANN_ARRAY_LEN(maxIterations); ++i) {
339 for (
size_t j = 0; j < FLANN_ARRAY_LEN(branchingFactors); ++j) {
341 cost.params[
"algorithm"] = FLANN_INDEX_KMEANS;
342 cost.params[
"centers_init"] = FLANN_CENTERS_RANDOM;
343 cost.params[
"iterations"] = maxIterations[i];
344 cost.params[
"branching"] = branchingFactors[j];
346 evaluate_kmeans(
cost);
376 Logger::info(
"KD-TREE, Step 1: Exploring parameter space\n");
379 int testTrees[] = { 1, 4, 8, 16, 32 };
382 for (
size_t i = 0; i < FLANN_ARRAY_LEN(testTrees); ++i) {
384 cost.params[
"algorithm"] = FLANN_INDEX_KDTREE;
385 cost.params[
"trees"] = testTrees[i];
387 evaluate_kdtree(
cost);
416 IndexParams estimateBuildParams()
420 int sampleSize = int(sample_fraction_ * dataset_.rows);
421 int testSampleSize =
std::min(sampleSize / 10, 1000);
423 Logger::info(
"Entering autotuning, dataset size: %d, sampleSize: %d, testSampleSize: %d, target precision: %g\n", dataset_.rows, sampleSize, testSampleSize, target_precision_);
427 if (testSampleSize < 10) {
428 Logger::info(
"Choosing linear, dataset too small\n");
429 return LinearIndexParams();
433 sampledDataset_ = random_sample(dataset_, sampleSize);
435 testDataset_ = random_sample(sampledDataset_, testSampleSize,
true);
438 Logger::info(
"Computing ground truth... \n");
439 gt_matches_ = Matrix<int>(
new int[testDataset_.rows], testDataset_.rows, 1);
442 compute_ground_truth<Distance>(sampledDataset_, testDataset_, gt_matches_, 0, distance_);
445 CostData linear_cost;
446 linear_cost.searchTimeCost = (float)t.value;
447 linear_cost.buildTimeCost = 0;
448 linear_cost.memoryCost = 0;
449 linear_cost.params[
"algorithm"] = FLANN_INDEX_LINEAR;
454 Logger::info(
"Autotuning parameters...\n");
456 optimizeKMeans(costs);
457 optimizeKDTree(costs);
459 float bestTimeCost = costs[0].searchTimeCost;
460 for (
size_t i = 0; i < costs.
size(); ++i) {
461 float timeCost = costs[i].buildTimeCost * build_weight_ + costs[i].searchTimeCost;
462 if (timeCost < bestTimeCost) {
463 bestTimeCost = timeCost;
467 float bestCost = costs[0].searchTimeCost / bestTimeCost;
468 IndexParams bestParams = costs[0].params;
469 if (bestTimeCost > 0) {
470 for (
size_t i = 0; i < costs.
size(); ++i) {
471 float crtCost = (costs[i].buildTimeCost * build_weight_ + costs[i].searchTimeCost) / bestTimeCost +
472 memory_weight_ * costs[i].memoryCost;
473 if (crtCost < bestCost) {
475 bestParams = costs[i].params;
480 delete[] gt_matches_.
data;
481 delete[] testDataset_.data;
482 delete[] sampledDataset_.data;
494 float estimateSearchParams(SearchParams& searchParams)
497 const size_t SAMPLE_COUNT = 1000;
499 CV_Assert(bestIndex_ != NULL &&
"Requires a valid index");
503 int samples = (int)
std::min(dataset_.rows / 10, SAMPLE_COUNT);
505 Matrix<ElementType> testDataset = random_sample(dataset_, samples);
507 Logger::info(
"Computing ground truth\n");
510 Matrix<int> gt_matches(
new int[testDataset.rows], testDataset.rows, 1);
513 compute_ground_truth<Distance>(dataset_, testDataset, gt_matches, 1, distance_);
515 float linear = (float)t.value;
518 Logger::info(
"Estimating number of checks\n");
522 if (bestIndex_->getType() == FLANN_INDEX_KMEANS) {
523 Logger::info(
"KMeans algorithm, estimating cluster border factor\n");
524 KMeansIndex<Distance>*
kmeans = (KMeansIndex<Distance>*)bestIndex_;
525 float bestSearchTime = -1;
526 float best_cb_index = -1;
527 int best_checks = -1;
528 for (cb_index = 0; cb_index < 1.1f; cb_index += 0.2f) {
529 kmeans->set_cb_index(cb_index);
530 searchTime = test_index_precision(*
kmeans, dataset_, testDataset, gt_matches, target_precision_, checks, distance_, nn, 1);
531 if ((searchTime < bestSearchTime) || (bestSearchTime == -1)) {
532 bestSearchTime = searchTime;
533 best_cb_index = cb_index;
534 best_checks = checks;
537 searchTime = bestSearchTime;
538 cb_index = best_cb_index;
539 checks = best_checks;
541 kmeans->set_cb_index(best_cb_index);
542 Logger::info(
"Optimum cb_index: %g\n", cb_index);
543 bestParams_[
"cb_index"] = cb_index;
546 searchTime = test_index_precision(*bestIndex_, dataset_, testDataset, gt_matches, target_precision_, checks, distance_, nn, 1);
549 Logger::info(
"Required number of checks: %d \n", checks);
550 searchParams[
"checks"] = checks;
552 speedup = linear / searchTime;
554 delete[] gt_matches.data;
555 delete[] testDataset.data;
562 NNIndex<Distance>* bestIndex_;
564 IndexParams bestParams_;
565 SearchParams bestSearchParams_;
567 Matrix<ElementType> sampledDataset_;
568 Matrix<ElementType> testDataset_;
569 Matrix<int> gt_matches_;
576 const Matrix<ElementType> dataset_;
581 float target_precision_;
583 float memory_weight_;
584 float sample_fraction_;
CvSize size
Definition: core_c.h:112
const CvArr const CvArr CvArr * result
Definition: core_c.h:1423
CV_EXPORTS_W double kmeans(InputArray data, int K, InputOutputArray bestLabels, TermCriteria criteria, int attempts, int flags, OutputArray centers=noArray())
Finds centers of clusters and groups input samples around the clusters.
#define CV_OVERRIDE
Definition: cvdef.h:792
#define CV_Assert(expr)
Checks a condition at runtime and throws exception if it fails.
Definition: base.hpp:342
InputArray int InputArray cost
Definition: imgproc.hpp:3387