EstervQrCode 1.1.1
Library for qr code manipulation
autotuned_index.h
1 /***********************************************************************
2  * Software License Agreement (BSD License)
3  *
4  * Copyright 2008-2009 Marius Muja (mariusm@cs.ubc.ca). All rights reserved.
5  * Copyright 2008-2009 David G. Lowe (lowe@cs.ubc.ca). All rights reserved.
6  *
7  * THE BSD LICENSE
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  * notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in the
17  * documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *************************************************************************/
30 #ifndef OPENCV_FLANN_AUTOTUNED_INDEX_H_
31 #define OPENCV_FLANN_AUTOTUNED_INDEX_H_
32 
34 
35 #include <sstream>
36 
37 #include "nn_index.h"
38 #include "ground_truth.h"
39 #include "index_testing.h"
40 #include "sampling.h"
41 #include "kdtree_index.h"
42 #include "kdtree_single_index.h"
43 #include "kmeans_index.h"
44 #include "composite_index.h"
45 #include "linear_index.h"
46 #include "logger.h"
47 
48 namespace cvflann
49 {
50 
51 template<typename Distance>
52 NNIndex<Distance>* create_index_by_type(const Matrix<typename Distance::ElementType>& dataset, const IndexParams& params, const Distance& distance);
53 
54 
55 struct AutotunedIndexParams : public IndexParams
56 {
57  AutotunedIndexParams(float target_precision = 0.8, float build_weight = 0.01, float memory_weight = 0, float sample_fraction = 0.1)
58  {
59  (*this)["algorithm"] = FLANN_INDEX_AUTOTUNED;
60  // precision desired (used for autotuning, -1 otherwise)
61  (*this)["target_precision"] = target_precision;
62  // build tree time weighting factor
63  (*this)["build_weight"] = build_weight;
64  // index memory weighting factor
65  (*this)["memory_weight"] = memory_weight;
66  // what fraction of the dataset to use for autotuning
67  (*this)["sample_fraction"] = sample_fraction;
68  }
69 };
70 
71 
72 template <typename Distance>
73 class AutotunedIndex : public NNIndex<Distance>
74 {
75 public:
76  typedef typename Distance::ElementType ElementType;
77  typedef typename Distance::ResultType DistanceType;
78 
79  AutotunedIndex(const Matrix<ElementType>& inputData, const IndexParams& params = AutotunedIndexParams(), Distance d = Distance()) :
80  dataset_(inputData), distance_(d)
81  {
82  target_precision_ = get_param(params, "target_precision",0.8f);
83  build_weight_ = get_param(params,"build_weight", 0.01f);
84  memory_weight_ = get_param(params, "memory_weight", 0.0f);
85  sample_fraction_ = get_param(params,"sample_fraction", 0.1f);
86  bestIndex_ = NULL;
87  speedup_ = 0;
88  }
89 
90  AutotunedIndex(const AutotunedIndex&);
91  AutotunedIndex& operator=(const AutotunedIndex&);
92 
93  virtual ~AutotunedIndex()
94  {
95  if (bestIndex_ != NULL) {
96  delete bestIndex_;
97  bestIndex_ = NULL;
98  }
99  }
100 
104  virtual void buildIndex() CV_OVERRIDE
105  {
106  std::ostringstream stream;
107  bestParams_ = estimateBuildParams();
108  print_params(bestParams_, stream);
109  Logger::info("----------------------------------------------------\n");
110  Logger::info("Autotuned parameters:\n");
111  Logger::info("%s", stream.str().c_str());
112  Logger::info("----------------------------------------------------\n");
113 
114  bestIndex_ = create_index_by_type(dataset_, bestParams_, distance_);
115  bestIndex_->buildIndex();
116  speedup_ = estimateSearchParams(bestSearchParams_);
117  stream.str(std::string());
118  print_params(bestSearchParams_, stream);
119  Logger::info("----------------------------------------------------\n");
120  Logger::info("Search parameters:\n");
121  Logger::info("%s", stream.str().c_str());
122  Logger::info("----------------------------------------------------\n");
123  }
124 
128  virtual void saveIndex(FILE* stream) CV_OVERRIDE
129  {
130  save_value(stream, (int)bestIndex_->getType());
131  bestIndex_->saveIndex(stream);
132  save_value(stream, get_param<int>(bestSearchParams_, "checks"));
133  }
134 
138  virtual void loadIndex(FILE* stream) CV_OVERRIDE
139  {
140  int index_type;
141 
142  load_value(stream, index_type);
143  IndexParams params;
144  params["algorithm"] = (flann_algorithm_t)index_type;
145  bestIndex_ = create_index_by_type<Distance>(dataset_, params, distance_);
146  bestIndex_->loadIndex(stream);
147  int checks;
148  load_value(stream, checks);
149  bestSearchParams_["checks"] = checks;
150  }
151 
155  virtual void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams) CV_OVERRIDE
156  {
157  int checks = get_param<int>(searchParams,"checks",FLANN_CHECKS_AUTOTUNED);
158  if (checks == FLANN_CHECKS_AUTOTUNED) {
159  bestIndex_->findNeighbors(result, vec, bestSearchParams_);
160  }
161  else {
162  bestIndex_->findNeighbors(result, vec, searchParams);
163  }
164  }
165 
166 
167  IndexParams getParameters() const CV_OVERRIDE
168  {
169  return bestIndex_->getParameters();
170  }
171 
172  SearchParams getSearchParameters() const
173  {
174  return bestSearchParams_;
175  }
176 
177  float getSpeedup() const
178  {
179  return speedup_;
180  }
181 
182 
186  virtual size_t size() const CV_OVERRIDE
187  {
188  return bestIndex_->size();
189  }
190 
194  virtual size_t veclen() const CV_OVERRIDE
195  {
196  return bestIndex_->veclen();
197  }
198 
202  virtual int usedMemory() const CV_OVERRIDE
203  {
204  return bestIndex_->usedMemory();
205  }
206 
210  virtual flann_algorithm_t getType() const CV_OVERRIDE
211  {
212  return FLANN_INDEX_AUTOTUNED;
213  }
214 
215 private:
216 
217  struct CostData
218  {
219  float searchTimeCost;
220  float buildTimeCost;
221  float memoryCost;
222  float totalCost;
223  IndexParams params;
224  };
225 
226  void evaluate_kmeans(CostData& cost)
227  {
228  StartStopTimer t;
229  int checks;
230  const int nn = 1;
231 
232  Logger::info("KMeansTree using params: max_iterations=%d, branching=%d\n",
233  get_param<int>(cost.params,"iterations"),
234  get_param<int>(cost.params,"branching"));
235  KMeansIndex<Distance> kmeans(sampledDataset_, cost.params, distance_);
236  // measure index build time
237  t.start();
238  kmeans.buildIndex();
239  t.stop();
240  float buildTime = (float)t.value;
241 
242  // measure search time
243  float searchTime = test_index_precision(kmeans, sampledDataset_, testDataset_, gt_matches_, target_precision_, checks, distance_, nn);
244 
245  float datasetMemory = float(sampledDataset_.rows * sampledDataset_.cols * sizeof(float));
246  cost.memoryCost = (kmeans.usedMemory() + datasetMemory) / datasetMemory;
247  cost.searchTimeCost = searchTime;
248  cost.buildTimeCost = buildTime;
249  Logger::info("KMeansTree buildTime=%g, searchTime=%g, build_weight=%g\n", buildTime, searchTime, build_weight_);
250  }
251 
252 
253  void evaluate_kdtree(CostData& cost)
254  {
255  StartStopTimer t;
256  int checks;
257  const int nn = 1;
258 
259  Logger::info("KDTree using params: trees=%d\n", get_param<int>(cost.params,"trees"));
260  KDTreeIndex<Distance> kdtree(sampledDataset_, cost.params, distance_);
261 
262  t.start();
263  kdtree.buildIndex();
264  t.stop();
265  float buildTime = (float)t.value;
266 
267  //measure search time
268  float searchTime = test_index_precision(kdtree, sampledDataset_, testDataset_, gt_matches_, target_precision_, checks, distance_, nn);
269 
270  float datasetMemory = float(sampledDataset_.rows * sampledDataset_.cols * sizeof(float));
271  cost.memoryCost = (kdtree.usedMemory() + datasetMemory) / datasetMemory;
272  cost.searchTimeCost = searchTime;
273  cost.buildTimeCost = buildTime;
274  Logger::info("KDTree buildTime=%g, searchTime=%g\n", buildTime, searchTime);
275  }
276 
277 
278  // struct KMeansSimpleDownhillFunctor {
279  //
280  // Autotune& autotuner;
281  // KMeansSimpleDownhillFunctor(Autotune& autotuner_) : autotuner(autotuner_) {}
282  //
283  // float operator()(int* params) {
284  //
285  // float maxFloat = numeric_limits<float>::max();
286  //
287  // if (params[0]<2) return maxFloat;
288  // if (params[1]<0) return maxFloat;
289  //
290  // CostData c;
291  // c.params["algorithm"] = KMEANS;
292  // c.params["centers-init"] = CENTERS_RANDOM;
293  // c.params["branching"] = params[0];
294  // c.params["max-iterations"] = params[1];
295  //
296  // autotuner.evaluate_kmeans(c);
297  //
298  // return c.timeCost;
299  //
300  // }
301  // };
302  //
303  // struct KDTreeSimpleDownhillFunctor {
304  //
305  // Autotune& autotuner;
306  // KDTreeSimpleDownhillFunctor(Autotune& autotuner_) : autotuner(autotuner_) {}
307  //
308  // float operator()(int* params) {
309  // float maxFloat = numeric_limits<float>::max();
310  //
311  // if (params[0]<1) return maxFloat;
312  //
313  // CostData c;
314  // c.params["algorithm"] = KDTREE;
315  // c.params["trees"] = params[0];
316  //
317  // autotuner.evaluate_kdtree(c);
318  //
319  // return c.timeCost;
320  //
321  // }
322  // };
323 
324 
325 
326  void optimizeKMeans(std::vector<CostData>& costs)
327  {
328  Logger::info("KMEANS, Step 1: Exploring parameter space\n");
329 
330  // explore kmeans parameters space using combinations of the parameters below
331  int maxIterations[] = { 1, 5, 10, 15 };
332  int branchingFactors[] = { 16, 32, 64, 128, 256 };
333 
334  int kmeansParamSpaceSize = FLANN_ARRAY_LEN(maxIterations) * FLANN_ARRAY_LEN(branchingFactors);
335  costs.reserve(costs.size() + kmeansParamSpaceSize);
336 
337  // evaluate kmeans for all parameter combinations
338  for (size_t i = 0; i < FLANN_ARRAY_LEN(maxIterations); ++i) {
339  for (size_t j = 0; j < FLANN_ARRAY_LEN(branchingFactors); ++j) {
340  CostData cost;
341  cost.params["algorithm"] = FLANN_INDEX_KMEANS;
342  cost.params["centers_init"] = FLANN_CENTERS_RANDOM;
343  cost.params["iterations"] = maxIterations[i];
344  cost.params["branching"] = branchingFactors[j];
345 
346  evaluate_kmeans(cost);
347  costs.push_back(cost);
348  }
349  }
350 
351  // Logger::info("KMEANS, Step 2: simplex-downhill optimization\n");
352  //
353  // const int n = 2;
354  // // choose initial simplex points as the best parameters so far
355  // int kmeansNMPoints[n*(n+1)];
356  // float kmeansVals[n+1];
357  // for (int i=0;i<n+1;++i) {
358  // kmeansNMPoints[i*n] = (int)kmeansCosts[i].params["branching"];
359  // kmeansNMPoints[i*n+1] = (int)kmeansCosts[i].params["max-iterations"];
360  // kmeansVals[i] = kmeansCosts[i].timeCost;
361  // }
362  // KMeansSimpleDownhillFunctor kmeans_cost_func(*this);
363  // // run optimization
364  // optimizeSimplexDownhill(kmeansNMPoints,n,kmeans_cost_func,kmeansVals);
365  // // store results
366  // for (int i=0;i<n+1;++i) {
367  // kmeansCosts[i].params["branching"] = kmeansNMPoints[i*2];
368  // kmeansCosts[i].params["max-iterations"] = kmeansNMPoints[i*2+1];
369  // kmeansCosts[i].timeCost = kmeansVals[i];
370  // }
371  }
372 
373 
374  void optimizeKDTree(std::vector<CostData>& costs)
375  {
376  Logger::info("KD-TREE, Step 1: Exploring parameter space\n");
377 
378  // explore kd-tree parameters space using the parameters below
379  int testTrees[] = { 1, 4, 8, 16, 32 };
380 
381  // evaluate kdtree for all parameter combinations
382  for (size_t i = 0; i < FLANN_ARRAY_LEN(testTrees); ++i) {
383  CostData cost;
384  cost.params["algorithm"] = FLANN_INDEX_KDTREE;
385  cost.params["trees"] = testTrees[i];
386 
387  evaluate_kdtree(cost);
388  costs.push_back(cost);
389  }
390 
391  // Logger::info("KD-TREE, Step 2: simplex-downhill optimization\n");
392  //
393  // const int n = 1;
394  // // choose initial simplex points as the best parameters so far
395  // int kdtreeNMPoints[n*(n+1)];
396  // float kdtreeVals[n+1];
397  // for (int i=0;i<n+1;++i) {
398  // kdtreeNMPoints[i] = (int)kdtreeCosts[i].params["trees"];
399  // kdtreeVals[i] = kdtreeCosts[i].timeCost;
400  // }
401  // KDTreeSimpleDownhillFunctor kdtree_cost_func(*this);
402  // // run optimization
403  // optimizeSimplexDownhill(kdtreeNMPoints,n,kdtree_cost_func,kdtreeVals);
404  // // store results
405  // for (int i=0;i<n+1;++i) {
406  // kdtreeCosts[i].params["trees"] = kdtreeNMPoints[i];
407  // kdtreeCosts[i].timeCost = kdtreeVals[i];
408  // }
409  }
410 
416  IndexParams estimateBuildParams()
417  {
418  std::vector<CostData> costs;
419 
420  int sampleSize = int(sample_fraction_ * dataset_.rows);
421  int testSampleSize = std::min(sampleSize / 10, 1000);
422 
423  Logger::info("Entering autotuning, dataset size: %d, sampleSize: %d, testSampleSize: %d, target precision: %g\n", dataset_.rows, sampleSize, testSampleSize, target_precision_);
424 
425  // For a very small dataset, it makes no sense to build any fancy index, just
426  // use linear search
427  if (testSampleSize < 10) {
428  Logger::info("Choosing linear, dataset too small\n");
429  return LinearIndexParams();
430  }
431 
432  // We use a fraction of the original dataset to speedup the autotune algorithm
433  sampledDataset_ = random_sample(dataset_, sampleSize);
434  // We use a cross-validation approach, first we sample a testset from the dataset
435  testDataset_ = random_sample(sampledDataset_, testSampleSize, true);
436 
437  // We compute the ground truth using linear search
438  Logger::info("Computing ground truth... \n");
439  gt_matches_ = Matrix<int>(new int[testDataset_.rows], testDataset_.rows, 1);
440  StartStopTimer t;
441  t.start();
442  compute_ground_truth<Distance>(sampledDataset_, testDataset_, gt_matches_, 0, distance_);
443  t.stop();
444 
445  CostData linear_cost;
446  linear_cost.searchTimeCost = (float)t.value;
447  linear_cost.buildTimeCost = 0;
448  linear_cost.memoryCost = 0;
449  linear_cost.params["algorithm"] = FLANN_INDEX_LINEAR;
450 
451  costs.push_back(linear_cost);
452 
453  // Start parameter autotune process
454  Logger::info("Autotuning parameters...\n");
455 
456  optimizeKMeans(costs);
457  optimizeKDTree(costs);
458 
459  float bestTimeCost = costs[0].searchTimeCost;
460  for (size_t i = 0; i < costs.size(); ++i) {
461  float timeCost = costs[i].buildTimeCost * build_weight_ + costs[i].searchTimeCost;
462  if (timeCost < bestTimeCost) {
463  bestTimeCost = timeCost;
464  }
465  }
466 
467  float bestCost = costs[0].searchTimeCost / bestTimeCost;
468  IndexParams bestParams = costs[0].params;
469  if (bestTimeCost > 0) {
470  for (size_t i = 0; i < costs.size(); ++i) {
471  float crtCost = (costs[i].buildTimeCost * build_weight_ + costs[i].searchTimeCost) / bestTimeCost +
472  memory_weight_ * costs[i].memoryCost;
473  if (crtCost < bestCost) {
474  bestCost = crtCost;
475  bestParams = costs[i].params;
476  }
477  }
478  }
479 
480  delete[] gt_matches_.data;
481  delete[] testDataset_.data;
482  delete[] sampledDataset_.data;
483 
484  return bestParams;
485  }
486 
487 
488 
494  float estimateSearchParams(SearchParams& searchParams)
495  {
496  const int nn = 1;
497  const size_t SAMPLE_COUNT = 1000;
498 
499  CV_Assert(bestIndex_ != NULL && "Requires a valid index"); // must have a valid index
500 
501  float speedup = 0;
502 
503  int samples = (int)std::min(dataset_.rows / 10, SAMPLE_COUNT);
504  if (samples > 0) {
505  Matrix<ElementType> testDataset = random_sample(dataset_, samples);
506 
507  Logger::info("Computing ground truth\n");
508 
509  // we need to compute the ground truth first
510  Matrix<int> gt_matches(new int[testDataset.rows], testDataset.rows, 1);
511  StartStopTimer t;
512  t.start();
513  compute_ground_truth<Distance>(dataset_, testDataset, gt_matches, 1, distance_);
514  t.stop();
515  float linear = (float)t.value;
516 
517  int checks;
518  Logger::info("Estimating number of checks\n");
519 
520  float searchTime;
521  float cb_index;
522  if (bestIndex_->getType() == FLANN_INDEX_KMEANS) {
523  Logger::info("KMeans algorithm, estimating cluster border factor\n");
524  KMeansIndex<Distance>* kmeans = (KMeansIndex<Distance>*)bestIndex_;
525  float bestSearchTime = -1;
526  float best_cb_index = -1;
527  int best_checks = -1;
528  for (cb_index = 0; cb_index < 1.1f; cb_index += 0.2f) {
529  kmeans->set_cb_index(cb_index);
530  searchTime = test_index_precision(*kmeans, dataset_, testDataset, gt_matches, target_precision_, checks, distance_, nn, 1);
531  if ((searchTime < bestSearchTime) || (bestSearchTime == -1)) {
532  bestSearchTime = searchTime;
533  best_cb_index = cb_index;
534  best_checks = checks;
535  }
536  }
537  searchTime = bestSearchTime;
538  cb_index = best_cb_index;
539  checks = best_checks;
540 
541  kmeans->set_cb_index(best_cb_index);
542  Logger::info("Optimum cb_index: %g\n", cb_index);
543  bestParams_["cb_index"] = cb_index;
544  }
545  else {
546  searchTime = test_index_precision(*bestIndex_, dataset_, testDataset, gt_matches, target_precision_, checks, distance_, nn, 1);
547  }
548 
549  Logger::info("Required number of checks: %d \n", checks);
550  searchParams["checks"] = checks;
551 
552  speedup = linear / searchTime;
553 
554  delete[] gt_matches.data;
555  delete[] testDataset.data;
556  }
557 
558  return speedup;
559  }
560 
561 private:
562  NNIndex<Distance>* bestIndex_;
563 
564  IndexParams bestParams_;
565  SearchParams bestSearchParams_;
566 
567  Matrix<ElementType> sampledDataset_;
568  Matrix<ElementType> testDataset_;
569  Matrix<int> gt_matches_;
570 
571  float speedup_;
572 
576  const Matrix<ElementType> dataset_;
577 
581  float target_precision_;
582  float build_weight_;
583  float memory_weight_;
584  float sample_fraction_;
585 
586  Distance distance_;
587 
588 
589 };
590 }
591 
593 
594 #endif /* OPENCV_FLANN_AUTOTUNED_INDEX_H_ */
T data(T... args)
CvSize size
Definition: core_c.h:112
const CvArr const CvArr CvArr * result
Definition: core_c.h:1423
CV_EXPORTS_W double kmeans(InputArray data, int K, InputOutputArray bestLabels, TermCriteria criteria, int attempts, int flags, OutputArray centers=noArray())
Finds centers of clusters and groups input samples around the clusters.
#define CV_OVERRIDE
Definition: cvdef.h:792
#define CV_Assert(expr)
Checks a condition at runtime and throws exception if it fails.
Definition: base.hpp:342
InputArray int InputArray cost
Definition: imgproc.hpp:3387
T min(T... args)
Definition: flann.hpp:60
T push_back(T... args)
T reserve(T... args)
T size(T... args)
T str(T... args)