EstervQrCode 1.1.1
Library for qr code manipulation
lsh_table.h
1 /***********************************************************************
2  * Software License Agreement (BSD License)
3  *
4  * Copyright 2008-2009 Marius Muja (mariusm@cs.ubc.ca). All rights reserved.
5  * Copyright 2008-2009 David G. Lowe (lowe@cs.ubc.ca). All rights reserved.
6  *
7  * THE BSD LICENSE
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  * notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in the
17  * documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *************************************************************************/
30 
31 /***********************************************************************
32  * Author: Vincent Rabaud
33  *************************************************************************/
34 
35 #ifndef OPENCV_FLANN_LSH_TABLE_H_
36 #define OPENCV_FLANN_LSH_TABLE_H_
37 
39 
40 #include <algorithm>
41 #include <iostream>
42 #include <iomanip>
43 #include <limits.h>
44 // TODO as soon as we use C++0x, use the code in USE_UNORDERED_MAP
45 #ifdef __GXX_EXPERIMENTAL_CXX0X__
46 # define USE_UNORDERED_MAP 1
47 #else
48 # define USE_UNORDERED_MAP 0
49 #endif
50 #if USE_UNORDERED_MAP
51 #include <unordered_map>
52 #else
53 #include <map>
54 #endif
55 #include <math.h>
56 #include <stddef.h>
57 
58 #include "dynamic_bitset.h"
59 #include "matrix.h"
60 
61 #ifdef _MSC_VER
62 #pragma warning(push)
63 #pragma warning(disable: 4702) //disable unreachable code
64 #endif
65 
66 
67 namespace cvflann
68 {
69 
70 namespace lsh
71 {
72 
74 
77 typedef uint32_t FeatureIndex;
80 typedef unsigned int BucketKey;
81 
84 typedef std::vector<FeatureIndex> Bucket;
85 
87 
90 struct LshStats
91 {
92  std::vector<unsigned int> bucket_sizes_;
93  size_t n_buckets_;
94  size_t bucket_size_mean_;
95  size_t bucket_size_median_;
96  size_t bucket_size_min_;
97  size_t bucket_size_max_;
98  size_t bucket_size_std_dev;
101  std::vector<std::vector<unsigned int> > size_histogram_;
102 };
103 
109 inline std::ostream& operator <<(std::ostream& out, const LshStats& stats)
110 {
111  int w = 20;
112  out << "Lsh Table Stats:\n" << std::setw(w) << std::setiosflags(std::ios::right) << "N buckets : "
113  << stats.n_buckets_ << "\n" << std::setw(w) << std::setiosflags(std::ios::right) << "mean size : "
114  << std::setiosflags(std::ios::left) << stats.bucket_size_mean_ << "\n" << std::setw(w)
115  << std::setiosflags(std::ios::right) << "median size : " << stats.bucket_size_median_ << "\n" << std::setw(w)
116  << std::setiosflags(std::ios::right) << "min size : " << std::setiosflags(std::ios::left)
117  << stats.bucket_size_min_ << "\n" << std::setw(w) << std::setiosflags(std::ios::right) << "max size : "
118  << std::setiosflags(std::ios::left) << stats.bucket_size_max_;
119 
120  // Display the histogram
121  out << std::endl << std::setw(w) << std::setiosflags(std::ios::right) << "histogram : "
122  << std::setiosflags(std::ios::left);
123  for (std::vector<std::vector<unsigned int> >::const_iterator iterator = stats.size_histogram_.begin(), end =
124  stats.size_histogram_.end(); iterator != end; ++iterator) out << (*iterator)[0] << "-" << (*iterator)[1] << ": " << (*iterator)[2] << ", ";
125 
126  return out;
127 }
128 
129 
131 
137 template<typename ElementType>
138 class LshTable
139 {
140 public:
143 #if USE_UNORDERED_MAP
144  typedef std::unordered_map<BucketKey, Bucket> BucketsSpace;
145 #else
146  typedef std::map<BucketKey, Bucket> BucketsSpace;
147 #endif
148 
151  typedef std::vector<Bucket> BucketsSpeed;
152 
155  LshTable()
156  {
157  key_size_ = 0;
158  feature_size_ = 0;
159  speed_level_ = kArray;
160  }
161 
167  LshTable(unsigned int feature_size, unsigned int key_size)
168  {
169  feature_size_ = feature_size;
170  CV_UNUSED(key_size);
171  CV_Error(cv::Error::StsUnsupportedFormat, "LSH is not implemented for that type" );
172  }
173 
178  void add(unsigned int value, const ElementType* feature)
179  {
180  // Add the value to the corresponding bucket
181  BucketKey key = (lsh::BucketKey)getKey(feature);
182 
183  switch (speed_level_) {
184  case kArray:
185  // That means we get the buckets from an array
186  buckets_speed_[key].push_back(value);
187  break;
188  case kBitsetHash:
189  // That means we can check the bitset for the presence of a key
190  key_bitset_.set(key);
191  buckets_space_[key].push_back(value);
192  break;
193  case kHash:
194  {
195  // That means we have to check for the hash table for the presence of a key
196  buckets_space_[key].push_back(value);
197  break;
198  }
199  }
200  }
201 
205  void add(Matrix<ElementType> dataset)
206  {
207 #if USE_UNORDERED_MAP
208  buckets_space_.rehash((buckets_space_.size() + dataset.rows) * 1.2);
209 #endif
210  // Add the features to the table
211  for (unsigned int i = 0; i < dataset.rows; ++i) add(i, dataset[i]);
212  // Now that the table is full, optimize it for speed/space
213  optimize();
214  }
215 
218  inline const Bucket* getBucketFromKey(BucketKey key) const
219  {
220  // Generate other buckets
221  switch (speed_level_) {
222  case kArray:
223  // That means we get the buckets from an array
224  return &buckets_speed_[key];
225  break;
226  case kBitsetHash:
227  // That means we can check the bitset for the presence of a key
228  if (key_bitset_.test(key)) return &buckets_space_.find(key)->second;
229  else return 0;
230  break;
231  case kHash:
232  {
233  // That means we have to check for the hash table for the presence of a key
234  BucketsSpace::const_iterator bucket_it, bucket_end = buckets_space_.end();
235  bucket_it = buckets_space_.find(key);
236  // Stop here if that bucket does not exist
237  if (bucket_it == bucket_end) return 0;
238  else return &bucket_it->second;
239  break;
240  }
241  }
242  return 0;
243  }
244 
247  size_t getKey(const ElementType* /*feature*/) const
248  {
249  CV_Error(cv::Error::StsUnsupportedFormat, "LSH is not implemented for that type" );
250  return 0;
251  }
252 
255  LshStats getStats() const;
256 
257 private:
263  enum SpeedLevel
264  {
265  kArray, kBitsetHash, kHash
266  };
267 
270  void initialize(size_t key_size)
271  {
272  const size_t key_size_lower_bound = 1;
273  //a value (size_t(1) << key_size) must fit the size_t type so key_size has to be strictly less than size of size_t
274  const size_t key_size_upper_bound = (std::min)(sizeof(BucketKey) * CHAR_BIT + 1, sizeof(size_t) * CHAR_BIT);
275  if (key_size < key_size_lower_bound || key_size >= key_size_upper_bound)
276  {
277  CV_Error(cv::Error::StsBadArg, cv::format("Invalid key_size (=%d). Valid values for your system are %d <= key_size < %d.", (int)key_size, (int)key_size_lower_bound, (int)key_size_upper_bound));
278  }
279 
280  speed_level_ = kHash;
281  key_size_ = (unsigned)key_size;
282  }
283 
286  void optimize()
287  {
288  // If we are already using the fast storage, no need to do anything
289  if (speed_level_ == kArray) return;
290 
291  // Use an array if it will be more than half full
292  if (buckets_space_.size() > ((size_t(1) << key_size_) / 2)) {
293  speed_level_ = kArray;
294  // Fill the array version of it
295  buckets_speed_.resize(size_t(1) << key_size_);
296  for (BucketsSpace::const_iterator key_bucket = buckets_space_.begin(); key_bucket != buckets_space_.end(); ++key_bucket) buckets_speed_[key_bucket->first] = key_bucket->second;
297 
298  // Empty the hash table
299  buckets_space_.clear();
300  return;
301  }
302 
303  // If the bitset is going to use less than 10% of the RAM of the hash map (at least 1 size_t for the key and two
304  // for the vector) or less than 512MB (key_size_ <= 30)
305  if (((std::max(buckets_space_.size(), buckets_speed_.size()) * CHAR_BIT * 3 * sizeof(BucketKey)) / 10
306  >= (size_t(1) << key_size_)) || (key_size_ <= 32)) {
307  speed_level_ = kBitsetHash;
308  key_bitset_.resize(size_t(1) << key_size_);
309  key_bitset_.reset();
310  // Try with the BucketsSpace
311  for (BucketsSpace::const_iterator key_bucket = buckets_space_.begin(); key_bucket != buckets_space_.end(); ++key_bucket) key_bitset_.set(key_bucket->first);
312  }
313  else {
314  speed_level_ = kHash;
315  key_bitset_.clear();
316  }
317  }
318 
321  BucketsSpeed buckets_speed_;
322 
325  BucketsSpace buckets_space_;
326 
328  SpeedLevel speed_level_;
329 
333  DynamicBitset key_bitset_;
334 
337  unsigned int key_size_;
338 
339  unsigned int feature_size_;
340 
341  // Members only used for the unsigned char specialization
345  std::vector<size_t> mask_;
346 };
347 
349 // Specialization for unsigned char
350 
351 template<>
352 inline LshTable<unsigned char>::LshTable(unsigned int feature_size, unsigned int subsignature_size)
353 {
354  feature_size_ = feature_size;
355  initialize(subsignature_size);
356  // Allocate the mask
357  mask_ = std::vector<size_t>((feature_size * sizeof(char) + sizeof(size_t) - 1) / sizeof(size_t), 0);
358 
359  // A bit brutal but fast to code
360  std::vector<int> indices(feature_size * CHAR_BIT);
361  for (size_t i = 0; i < feature_size * CHAR_BIT; ++i) indices[i] = (int)i;
362 #ifndef OPENCV_FLANN_USE_STD_RAND
363  cv::randShuffle(indices);
364 #else
365  std::random_shuffle(indices.begin(), indices.end());
366 #endif
367 
368  // Generate a random set of order of subsignature_size_ bits
369  for (unsigned int i = 0; i < key_size_; ++i) {
370  size_t index = indices[i];
371 
372  // Set that bit in the mask
373  size_t divisor = CHAR_BIT * sizeof(size_t);
374  size_t idx = index / divisor; //pick the right size_t index
375  mask_[idx] |= size_t(1) << (index % divisor); //use modulo to find the bit offset
376  }
377 
378  // Set to 1 if you want to display the mask for debug
379 #if 0
380  {
381  size_t bcount = 0;
382  BOOST_FOREACH(size_t mask_block, mask_){
383  out << std::setw(sizeof(size_t) * CHAR_BIT / 4) << std::setfill('0') << std::hex << mask_block
384  << std::endl;
385  bcount += __builtin_popcountll(mask_block);
386  }
387  out << "bit count : " << std::dec << bcount << std::endl;
388  out << "mask size : " << mask_.size() << std::endl;
389  return out;
390  }
391 #endif
392 }
393 
397 template<>
398 inline size_t LshTable<unsigned char>::getKey(const unsigned char* feature) const
399 {
400  // no need to check if T is dividable by sizeof(size_t) like in the Hamming
401  // distance computation as we have a mask
402  // FIXIT: This is bad assumption, because we reading tail bytes after of the allocated features buffer
403  const size_t* feature_block_ptr = reinterpret_cast<const size_t*> ((const void*)feature);
404 
405  // Figure out the subsignature of the feature
406  // Given the feature ABCDEF, and the mask 001011, the output will be
407  // 000CEF
408  size_t subsignature = 0;
409  size_t bit_index = 1;
410 
411  for (unsigned i = 0; i < feature_size_; i += sizeof(size_t)) {
412  // get the mask and signature blocks
413  size_t feature_block;
414  if (i <= feature_size_ - sizeof(size_t))
415  {
416  feature_block = *feature_block_ptr;
417  }
418  else
419  {
420  size_t tmp = 0;
421  memcpy(&tmp, feature_block_ptr, feature_size_ - i); // preserve bytes order
422  feature_block = tmp;
423  }
424  size_t mask_block = mask_[i / sizeof(size_t)];
425  while (mask_block) {
426  // Get the lowest set bit in the mask block
427  size_t lowest_bit = mask_block & ~(mask_block - 1);
428  // Add it to the current subsignature if necessary
429  subsignature += (feature_block & lowest_bit) ? bit_index : 0;
430  // Reset the bit in the mask block
431  mask_block ^= lowest_bit;
432  // increment the bit index for the subsignature
433  bit_index <<= 1;
434  }
435  // Check the next feature block
436  ++feature_block_ptr;
437  }
438  return subsignature;
439 }
440 
441 template<>
442 inline LshStats LshTable<unsigned char>::getStats() const
443 {
444  LshStats stats;
445  stats.bucket_size_mean_ = 0;
446  if ((buckets_speed_.empty()) && (buckets_space_.empty())) {
447  stats.n_buckets_ = 0;
448  stats.bucket_size_median_ = 0;
449  stats.bucket_size_min_ = 0;
450  stats.bucket_size_max_ = 0;
451  return stats;
452  }
453 
454  if (!buckets_speed_.empty()) {
455  for (BucketsSpeed::const_iterator pbucket = buckets_speed_.begin(); pbucket != buckets_speed_.end(); ++pbucket) {
456  stats.bucket_sizes_.push_back((lsh::FeatureIndex)pbucket->size());
457  stats.bucket_size_mean_ += pbucket->size();
458  }
459  stats.bucket_size_mean_ /= buckets_speed_.size();
460  stats.n_buckets_ = buckets_speed_.size();
461  }
462  else {
463  for (BucketsSpace::const_iterator x = buckets_space_.begin(); x != buckets_space_.end(); ++x) {
464  stats.bucket_sizes_.push_back((lsh::FeatureIndex)x->second.size());
465  stats.bucket_size_mean_ += x->second.size();
466  }
467  stats.bucket_size_mean_ /= buckets_space_.size();
468  stats.n_buckets_ = buckets_space_.size();
469  }
470 
471  std::sort(stats.bucket_sizes_.begin(), stats.bucket_sizes_.end());
472 
473  // BOOST_FOREACH(int size, stats.bucket_sizes_)
474  // std::cout << size << " ";
475  // std::cout << std::endl;
476  stats.bucket_size_median_ = stats.bucket_sizes_[stats.bucket_sizes_.size() / 2];
477  stats.bucket_size_min_ = stats.bucket_sizes_.front();
478  stats.bucket_size_max_ = stats.bucket_sizes_.back();
479 
480  // TODO compute mean and std
481  /*float mean, stddev;
482  stats.bucket_size_mean_ = mean;
483  stats.bucket_size_std_dev = stddev;*/
484 
485  // Include a histogram of the buckets
486  unsigned int bin_start = 0;
487  unsigned int bin_end = 20;
488  bool is_new_bin = true;
489  for (std::vector<unsigned int>::iterator iterator = stats.bucket_sizes_.begin(), end = stats.bucket_sizes_.end(); iterator
490  != end; )
491  if (*iterator < bin_end) {
492  if (is_new_bin) {
493  stats.size_histogram_.push_back(std::vector<unsigned int>(3, 0));
494  stats.size_histogram_.back()[0] = bin_start;
495  stats.size_histogram_.back()[1] = bin_end - 1;
496  is_new_bin = false;
497  }
498  ++stats.size_histogram_.back()[2];
499  ++iterator;
500  }
501  else {
502  bin_start += 20;
503  bin_end += 20;
504  is_new_bin = true;
505  }
506 
507  return stats;
508 }
509 
510 // End the two namespaces
511 }
512 }
513 
514 #ifdef _MSC_VER
515 #pragma warning(pop)
516 #endif
517 
519 
521 
522 #endif /* OPENCV_FLANN_LSH_TABLE_H_ */
Size size(int i=-1) const
T endl(T... args)
CV_EXPORTS_W void add(InputArray src1, InputArray src2, OutputArray dst, InputArray mask=noArray(), int dtype=-1)
Calculates the per-element sum of two arrays or an array and a scalar.
CV_EXPORTS_W void randShuffle(InputOutputArray dst, double iterFactor=1., RNG *rng=0)
Shuffles the array elements randomly.
int CvScalar value
Definition: core_c.h:720
double double end
Definition: core_c.h:1381
const int * idx
Definition: core_c.h:668
int index
Definition: core_c.h:634
const CvArr CvArr * x
Definition: core_c.h:1195
#define CV_Error(code, msg)
Call the error handler.
Definition: base.hpp:320
CV_EXPORTS String format(const char *fmt,...) CV_FORMAT_PRINTF(1
Returns a text string formatted using the printf-like expression.
std::ostream & operator<<(std::ostream &, const DualQuat< _Tp > &)
OutputArray OutputArray stats
Definition: imgproc.hpp:3975
T hex(T... args)
T max(T... args)
T memcpy(T... args)
T min(T... args)
@ StsUnsupportedFormat
the data format/type is not supported by the function
Definition: base.hpp:110
@ StsBadArg
function arg/param is bad
Definition: base.hpp:74
Definition: flann.hpp:60
T random_shuffle(T... args)
T setfill(T... args)
T setiosflags(T... args)
T setw(T... args)
T sort(T... args)