EstervQrCode 2.0.0
Library for qr code manipulation
Loading...
Searching...
No Matches
autotuned_index.h
1/***********************************************************************
2 * Software License Agreement (BSD License)
3 *
4 * Copyright 2008-2009 Marius Muja (mariusm@cs.ubc.ca). All rights reserved.
5 * Copyright 2008-2009 David G. Lowe (lowe@cs.ubc.ca). All rights reserved.
6 *
7 * THE BSD LICENSE
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *************************************************************************/
30#ifndef OPENCV_FLANN_AUTOTUNED_INDEX_H_
31#define OPENCV_FLANN_AUTOTUNED_INDEX_H_
32
34
35#include <sstream>
36
37#include "nn_index.h"
38#include "ground_truth.h"
39#include "index_testing.h"
40#include "sampling.h"
41#include "kdtree_index.h"
42#include "kdtree_single_index.h"
43#include "kmeans_index.h"
44#include "composite_index.h"
45#include "linear_index.h"
46#include "logger.h"
47
48namespace cvflann
49{
50
51template<typename Distance>
52NNIndex<Distance>* create_index_by_type(const Matrix<typename Distance::ElementType>& dataset, const IndexParams& params, const Distance& distance);
53
54
55struct AutotunedIndexParams : public IndexParams
56{
57 AutotunedIndexParams(float target_precision = 0.8, float build_weight = 0.01, float memory_weight = 0, float sample_fraction = 0.1)
58 {
59 (*this)["algorithm"] = FLANN_INDEX_AUTOTUNED;
60 // precision desired (used for autotuning, -1 otherwise)
61 (*this)["target_precision"] = target_precision;
62 // build tree time weighting factor
63 (*this)["build_weight"] = build_weight;
64 // index memory weighting factor
65 (*this)["memory_weight"] = memory_weight;
66 // what fraction of the dataset to use for autotuning
67 (*this)["sample_fraction"] = sample_fraction;
68 }
69};
70
71
72template <typename Distance>
73class AutotunedIndex : public NNIndex<Distance>
74{
75public:
76 typedef typename Distance::ElementType ElementType;
77 typedef typename Distance::ResultType DistanceType;
78
79 AutotunedIndex(const Matrix<ElementType>& inputData, const IndexParams& params = AutotunedIndexParams(), Distance d = Distance()) :
80 dataset_(inputData), distance_(d)
81 {
82 target_precision_ = get_param(params, "target_precision",0.8f);
83 build_weight_ = get_param(params,"build_weight", 0.01f);
84 memory_weight_ = get_param(params, "memory_weight", 0.0f);
85 sample_fraction_ = get_param(params,"sample_fraction", 0.1f);
86 bestIndex_ = NULL;
87 speedup_ = 0;
88 }
89
90 AutotunedIndex(const AutotunedIndex&);
91 AutotunedIndex& operator=(const AutotunedIndex&);
92
93 virtual ~AutotunedIndex()
94 {
95 if (bestIndex_ != NULL) {
96 delete bestIndex_;
97 bestIndex_ = NULL;
98 }
99 }
100
104 virtual void buildIndex() CV_OVERRIDE
105 {
106 std::ostringstream stream;
107 bestParams_ = estimateBuildParams();
108 print_params(bestParams_, stream);
109 Logger::info("----------------------------------------------------\n");
110 Logger::info("Autotuned parameters:\n");
111 Logger::info("%s", stream.str().c_str());
112 Logger::info("----------------------------------------------------\n");
113
114 bestIndex_ = create_index_by_type(dataset_, bestParams_, distance_);
115 bestIndex_->buildIndex();
116 speedup_ = estimateSearchParams(bestSearchParams_);
117 stream.str(std::string());
118 print_params(bestSearchParams_, stream);
119 Logger::info("----------------------------------------------------\n");
120 Logger::info("Search parameters:\n");
121 Logger::info("%s", stream.str().c_str());
122 Logger::info("----------------------------------------------------\n");
123 }
124
128 virtual void saveIndex(FILE* stream) CV_OVERRIDE
129 {
130 save_value(stream, (int)bestIndex_->getType());
131 bestIndex_->saveIndex(stream);
132 save_value(stream, get_param<int>(bestSearchParams_, "checks"));
133 }
134
138 virtual void loadIndex(FILE* stream) CV_OVERRIDE
139 {
140 int index_type;
141
142 load_value(stream, index_type);
143 IndexParams params;
144 params["algorithm"] = (flann_algorithm_t)index_type;
145 bestIndex_ = create_index_by_type<Distance>(dataset_, params, distance_);
146 bestIndex_->loadIndex(stream);
147 int checks;
148 load_value(stream, checks);
149 bestSearchParams_["checks"] = checks;
150 }
151
155 virtual void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams) CV_OVERRIDE
156 {
157 int checks = get_param<int>(searchParams,"checks",FLANN_CHECKS_AUTOTUNED);
158 if (checks == FLANN_CHECKS_AUTOTUNED) {
159 bestIndex_->findNeighbors(result, vec, bestSearchParams_);
160 }
161 else {
162 bestIndex_->findNeighbors(result, vec, searchParams);
163 }
164 }
165
166
167 IndexParams getParameters() const CV_OVERRIDE
168 {
169 return bestIndex_->getParameters();
170 }
171
172 SearchParams getSearchParameters() const
173 {
174 return bestSearchParams_;
175 }
176
177 float getSpeedup() const
178 {
179 return speedup_;
180 }
181
182
186 virtual size_t size() const CV_OVERRIDE
187 {
188 return bestIndex_->size();
189 }
190
194 virtual size_t veclen() const CV_OVERRIDE
195 {
196 return bestIndex_->veclen();
197 }
198
202 virtual int usedMemory() const CV_OVERRIDE
203 {
204 return bestIndex_->usedMemory();
205 }
206
210 virtual flann_algorithm_t getType() const CV_OVERRIDE
211 {
212 return FLANN_INDEX_AUTOTUNED;
213 }
214
215private:
216
217 struct CostData
218 {
219 float searchTimeCost;
220 float buildTimeCost;
221 float memoryCost;
222 float totalCost;
223 IndexParams params;
224 };
225
226 void evaluate_kmeans(CostData& cost)
227 {
228 StartStopTimer t;
229 int checks;
230 const int nn = 1;
231
232 Logger::info("KMeansTree using params: max_iterations=%d, branching=%d\n",
233 get_param<int>(cost.params,"iterations"),
234 get_param<int>(cost.params,"branching"));
235 KMeansIndex<Distance> kmeans(sampledDataset_, cost.params, distance_);
236 // measure index build time
237 t.start();
238 kmeans.buildIndex();
239 t.stop();
240 float buildTime = (float)t.value;
241
242 // measure search time
243 float searchTime = test_index_precision(kmeans, sampledDataset_, testDataset_, gt_matches_, target_precision_, checks, distance_, nn);
244
245 float datasetMemory = float(sampledDataset_.rows * sampledDataset_.cols * sizeof(float));
246 cost.memoryCost = (kmeans.usedMemory() + datasetMemory) / datasetMemory;
247 cost.searchTimeCost = searchTime;
248 cost.buildTimeCost = buildTime;
249 Logger::info("KMeansTree buildTime=%g, searchTime=%g, build_weight=%g\n", buildTime, searchTime, build_weight_);
250 }
251
252
253 void evaluate_kdtree(CostData& cost)
254 {
255 StartStopTimer t;
256 int checks;
257 const int nn = 1;
258
259 Logger::info("KDTree using params: trees=%d\n", get_param<int>(cost.params,"trees"));
260 KDTreeIndex<Distance> kdtree(sampledDataset_, cost.params, distance_);
261
262 t.start();
263 kdtree.buildIndex();
264 t.stop();
265 float buildTime = (float)t.value;
266
267 //measure search time
268 float searchTime = test_index_precision(kdtree, sampledDataset_, testDataset_, gt_matches_, target_precision_, checks, distance_, nn);
269
270 float datasetMemory = float(sampledDataset_.rows * sampledDataset_.cols * sizeof(float));
271 cost.memoryCost = (kdtree.usedMemory() + datasetMemory) / datasetMemory;
272 cost.searchTimeCost = searchTime;
273 cost.buildTimeCost = buildTime;
274 Logger::info("KDTree buildTime=%g, searchTime=%g\n", buildTime, searchTime);
275 }
276
277
278 // struct KMeansSimpleDownhillFunctor {
279 //
280 // Autotune& autotuner;
281 // KMeansSimpleDownhillFunctor(Autotune& autotuner_) : autotuner(autotuner_) {}
282 //
283 // float operator()(int* params) {
284 //
285 // float maxFloat = numeric_limits<float>::max();
286 //
287 // if (params[0]<2) return maxFloat;
288 // if (params[1]<0) return maxFloat;
289 //
290 // CostData c;
291 // c.params["algorithm"] = KMEANS;
292 // c.params["centers-init"] = CENTERS_RANDOM;
293 // c.params["branching"] = params[0];
294 // c.params["max-iterations"] = params[1];
295 //
296 // autotuner.evaluate_kmeans(c);
297 //
298 // return c.timeCost;
299 //
300 // }
301 // };
302 //
303 // struct KDTreeSimpleDownhillFunctor {
304 //
305 // Autotune& autotuner;
306 // KDTreeSimpleDownhillFunctor(Autotune& autotuner_) : autotuner(autotuner_) {}
307 //
308 // float operator()(int* params) {
309 // float maxFloat = numeric_limits<float>::max();
310 //
311 // if (params[0]<1) return maxFloat;
312 //
313 // CostData c;
314 // c.params["algorithm"] = KDTREE;
315 // c.params["trees"] = params[0];
316 //
317 // autotuner.evaluate_kdtree(c);
318 //
319 // return c.timeCost;
320 //
321 // }
322 // };
323
324
325
326 void optimizeKMeans(std::vector<CostData>& costs)
327 {
328 Logger::info("KMEANS, Step 1: Exploring parameter space\n");
329
330 // explore kmeans parameters space using combinations of the parameters below
331 int maxIterations[] = { 1, 5, 10, 15 };
332 int branchingFactors[] = { 16, 32, 64, 128, 256 };
333
334 int kmeansParamSpaceSize = FLANN_ARRAY_LEN(maxIterations) * FLANN_ARRAY_LEN(branchingFactors);
335 costs.reserve(costs.size() + kmeansParamSpaceSize);
336
337 // evaluate kmeans for all parameter combinations
338 for (size_t i = 0; i < FLANN_ARRAY_LEN(maxIterations); ++i) {
339 for (size_t j = 0; j < FLANN_ARRAY_LEN(branchingFactors); ++j) {
340 CostData cost;
341 cost.params["algorithm"] = FLANN_INDEX_KMEANS;
342 cost.params["centers_init"] = FLANN_CENTERS_RANDOM;
343 cost.params["iterations"] = maxIterations[i];
344 cost.params["branching"] = branchingFactors[j];
345
346 evaluate_kmeans(cost);
347 costs.push_back(cost);
348 }
349 }
350
351 // Logger::info("KMEANS, Step 2: simplex-downhill optimization\n");
352 //
353 // const int n = 2;
354 // // choose initial simplex points as the best parameters so far
355 // int kmeansNMPoints[n*(n+1)];
356 // float kmeansVals[n+1];
357 // for (int i=0;i<n+1;++i) {
358 // kmeansNMPoints[i*n] = (int)kmeansCosts[i].params["branching"];
359 // kmeansNMPoints[i*n+1] = (int)kmeansCosts[i].params["max-iterations"];
360 // kmeansVals[i] = kmeansCosts[i].timeCost;
361 // }
362 // KMeansSimpleDownhillFunctor kmeans_cost_func(*this);
363 // // run optimization
364 // optimizeSimplexDownhill(kmeansNMPoints,n,kmeans_cost_func,kmeansVals);
365 // // store results
366 // for (int i=0;i<n+1;++i) {
367 // kmeansCosts[i].params["branching"] = kmeansNMPoints[i*2];
368 // kmeansCosts[i].params["max-iterations"] = kmeansNMPoints[i*2+1];
369 // kmeansCosts[i].timeCost = kmeansVals[i];
370 // }
371 }
372
373
374 void optimizeKDTree(std::vector<CostData>& costs)
375 {
376 Logger::info("KD-TREE, Step 1: Exploring parameter space\n");
377
378 // explore kd-tree parameters space using the parameters below
379 int testTrees[] = { 1, 4, 8, 16, 32 };
380
381 // evaluate kdtree for all parameter combinations
382 for (size_t i = 0; i < FLANN_ARRAY_LEN(testTrees); ++i) {
383 CostData cost;
384 cost.params["algorithm"] = FLANN_INDEX_KDTREE;
385 cost.params["trees"] = testTrees[i];
386
387 evaluate_kdtree(cost);
388 costs.push_back(cost);
389 }
390
391 // Logger::info("KD-TREE, Step 2: simplex-downhill optimization\n");
392 //
393 // const int n = 1;
394 // // choose initial simplex points as the best parameters so far
395 // int kdtreeNMPoints[n*(n+1)];
396 // float kdtreeVals[n+1];
397 // for (int i=0;i<n+1;++i) {
398 // kdtreeNMPoints[i] = (int)kdtreeCosts[i].params["trees"];
399 // kdtreeVals[i] = kdtreeCosts[i].timeCost;
400 // }
401 // KDTreeSimpleDownhillFunctor kdtree_cost_func(*this);
402 // // run optimization
403 // optimizeSimplexDownhill(kdtreeNMPoints,n,kdtree_cost_func,kdtreeVals);
404 // // store results
405 // for (int i=0;i<n+1;++i) {
406 // kdtreeCosts[i].params["trees"] = kdtreeNMPoints[i];
407 // kdtreeCosts[i].timeCost = kdtreeVals[i];
408 // }
409 }
410
416 IndexParams estimateBuildParams()
417 {
419
420 int sampleSize = int(sample_fraction_ * dataset_.rows);
421 int testSampleSize = std::min(sampleSize / 10, 1000);
422
423 Logger::info("Entering autotuning, dataset size: %d, sampleSize: %d, testSampleSize: %d, target precision: %g\n", dataset_.rows, sampleSize, testSampleSize, target_precision_);
424
425 // For a very small dataset, it makes no sense to build any fancy index, just
426 // use linear search
427 if (testSampleSize < 10) {
428 Logger::info("Choosing linear, dataset too small\n");
429 return LinearIndexParams();
430 }
431
432 // We use a fraction of the original dataset to speedup the autotune algorithm
433 sampledDataset_ = random_sample(dataset_, sampleSize);
434 // We use a cross-validation approach, first we sample a testset from the dataset
435 testDataset_ = random_sample(sampledDataset_, testSampleSize, true);
436
437 // We compute the ground truth using linear search
438 Logger::info("Computing ground truth... \n");
439 gt_matches_ = Matrix<int>(new int[testDataset_.rows], testDataset_.rows, 1);
440 StartStopTimer t;
441 t.start();
442 compute_ground_truth<Distance>(sampledDataset_, testDataset_, gt_matches_, 0, distance_);
443 t.stop();
444
445 CostData linear_cost;
446 linear_cost.searchTimeCost = (float)t.value;
447 linear_cost.buildTimeCost = 0;
448 linear_cost.memoryCost = 0;
449 linear_cost.params["algorithm"] = FLANN_INDEX_LINEAR;
450
451 costs.push_back(linear_cost);
452
453 // Start parameter autotune process
454 Logger::info("Autotuning parameters...\n");
455
456 optimizeKMeans(costs);
457 optimizeKDTree(costs);
458
459 float bestTimeCost = costs[0].searchTimeCost;
460 for (size_t i = 0; i < costs.size(); ++i) {
461 float timeCost = costs[i].buildTimeCost * build_weight_ + costs[i].searchTimeCost;
462 if (timeCost < bestTimeCost) {
463 bestTimeCost = timeCost;
464 }
465 }
466
467 float bestCost = costs[0].searchTimeCost / bestTimeCost;
468 IndexParams bestParams = costs[0].params;
469 if (bestTimeCost > 0) {
470 for (size_t i = 0; i < costs.size(); ++i) {
471 float crtCost = (costs[i].buildTimeCost * build_weight_ + costs[i].searchTimeCost) / bestTimeCost +
472 memory_weight_ * costs[i].memoryCost;
473 if (crtCost < bestCost) {
474 bestCost = crtCost;
475 bestParams = costs[i].params;
476 }
477 }
478 }
479
480 delete[] gt_matches_.data;
481 delete[] testDataset_.data;
482 delete[] sampledDataset_.data;
483
484 return bestParams;
485 }
486
487
488
494 float estimateSearchParams(SearchParams& searchParams)
495 {
496 const int nn = 1;
497 const size_t SAMPLE_COUNT = 1000;
498
499 CV_Assert(bestIndex_ != NULL && "Requires a valid index"); // must have a valid index
500
501 float speedup = 0;
502
503 int samples = (int)std::min(dataset_.rows / 10, SAMPLE_COUNT);
504 if (samples > 0) {
505 Matrix<ElementType> testDataset = random_sample(dataset_, samples);
506
507 Logger::info("Computing ground truth\n");
508
509 // we need to compute the ground truth first
510 Matrix<int> gt_matches(new int[testDataset.rows], testDataset.rows, 1);
511 StartStopTimer t;
512 t.start();
513 compute_ground_truth<Distance>(dataset_, testDataset, gt_matches, 1, distance_);
514 t.stop();
515 float linear = (float)t.value;
516
517 int checks;
518 Logger::info("Estimating number of checks\n");
519
520 float searchTime;
521 float cb_index;
522 if (bestIndex_->getType() == FLANN_INDEX_KMEANS) {
523 Logger::info("KMeans algorithm, estimating cluster border factor\n");
524 KMeansIndex<Distance>* kmeans = (KMeansIndex<Distance>*)bestIndex_;
525 float bestSearchTime = -1;
526 float best_cb_index = -1;
527 int best_checks = -1;
528 for (cb_index = 0; cb_index < 1.1f; cb_index += 0.2f) {
529 kmeans->set_cb_index(cb_index);
530 searchTime = test_index_precision(*kmeans, dataset_, testDataset, gt_matches, target_precision_, checks, distance_, nn, 1);
531 if ((searchTime < bestSearchTime) || (bestSearchTime == -1)) {
532 bestSearchTime = searchTime;
533 best_cb_index = cb_index;
534 best_checks = checks;
535 }
536 }
537 searchTime = bestSearchTime;
538 cb_index = best_cb_index;
539 checks = best_checks;
540
541 kmeans->set_cb_index(best_cb_index);
542 Logger::info("Optimum cb_index: %g\n", cb_index);
543 bestParams_["cb_index"] = cb_index;
544 }
545 else {
546 searchTime = test_index_precision(*bestIndex_, dataset_, testDataset, gt_matches, target_precision_, checks, distance_, nn, 1);
547 }
548
549 Logger::info("Required number of checks: %d \n", checks);
550 searchParams["checks"] = checks;
551
552 speedup = linear / searchTime;
553
554 delete[] gt_matches.data;
555 delete[] testDataset.data;
556 }
557
558 return speedup;
559 }
560
561private:
562 NNIndex<Distance>* bestIndex_;
563
564 IndexParams bestParams_;
565 SearchParams bestSearchParams_;
566
567 Matrix<ElementType> sampledDataset_;
568 Matrix<ElementType> testDataset_;
569 Matrix<int> gt_matches_;
570
571 float speedup_;
572
576 const Matrix<ElementType> dataset_;
577
581 float target_precision_;
582 float build_weight_;
583 float memory_weight_;
584 float sample_fraction_;
585
586 Distance distance_;
587
588
589};
590}
591
593
594#endif /* OPENCV_FLANN_AUTOTUNED_INDEX_H_ */
T data(T... args)
CvSize size
Definition core_c.h:112
const CvArr const CvArr CvArr * result
Definition core_c.h:1423
CV_EXPORTS_W double kmeans(InputArray data, int K, InputOutputArray bestLabels, TermCriteria criteria, int attempts, int flags, OutputArray centers=noArray())
Finds centers of clusters and groups input samples around the clusters.
#define CV_OVERRIDE
Definition cvdef.h:792
#define CV_Assert(expr)
Checks a condition at runtime and throws exception if it fails.
Definition base.hpp:342
InputArray int InputArray cost
Definition imgproc.hpp:3387
T min(T... args)
Definition flann.hpp:60
T push_back(T... args)
T reserve(T... args)
T size(T... args)
T str(T... args)