EstervQrCode 2.0.0
Library for qr code manipulation
Loading...
Searching...
No Matches
lsh_table.h
1/***********************************************************************
2 * Software License Agreement (BSD License)
3 *
4 * Copyright 2008-2009 Marius Muja (mariusm@cs.ubc.ca). All rights reserved.
5 * Copyright 2008-2009 David G. Lowe (lowe@cs.ubc.ca). All rights reserved.
6 *
7 * THE BSD LICENSE
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *************************************************************************/
30
31/***********************************************************************
32 * Author: Vincent Rabaud
33 *************************************************************************/
34
35#ifndef OPENCV_FLANN_LSH_TABLE_H_
36#define OPENCV_FLANN_LSH_TABLE_H_
37
39
40#include <algorithm>
41#include <iostream>
42#include <iomanip>
43#include <limits.h>
44// TODO as soon as we use C++0x, use the code in USE_UNORDERED_MAP
45#ifdef __GXX_EXPERIMENTAL_CXX0X__
46# define USE_UNORDERED_MAP 1
47#else
48# define USE_UNORDERED_MAP 0
49#endif
50#if USE_UNORDERED_MAP
51#include <unordered_map>
52#else
53#include <map>
54#endif
55#include <math.h>
56#include <stddef.h>
57
58#include "dynamic_bitset.h"
59#include "matrix.h"
60
61#ifdef _MSC_VER
62#pragma warning(push)
63#pragma warning(disable: 4702) //disable unreachable code
64#endif
65
66
67namespace cvflann
68{
69
70namespace lsh
71{
72
74
77typedef uint32_t FeatureIndex;
80typedef unsigned int BucketKey;
81
84typedef std::vector<FeatureIndex> Bucket;
85
87
90struct LshStats
91{
92 std::vector<unsigned int> bucket_sizes_;
93 size_t n_buckets_;
94 size_t bucket_size_mean_;
95 size_t bucket_size_median_;
96 size_t bucket_size_min_;
97 size_t bucket_size_max_;
98 size_t bucket_size_std_dev;
102};
103
109inline std::ostream& operator <<(std::ostream& out, const LshStats& stats)
110{
111 int w = 20;
112 out << "Lsh Table Stats:\n" << std::setw(w) << std::setiosflags(std::ios::right) << "N buckets : "
113 << stats.n_buckets_ << "\n" << std::setw(w) << std::setiosflags(std::ios::right) << "mean size : "
114 << std::setiosflags(std::ios::left) << stats.bucket_size_mean_ << "\n" << std::setw(w)
115 << std::setiosflags(std::ios::right) << "median size : " << stats.bucket_size_median_ << "\n" << std::setw(w)
116 << std::setiosflags(std::ios::right) << "min size : " << std::setiosflags(std::ios::left)
117 << stats.bucket_size_min_ << "\n" << std::setw(w) << std::setiosflags(std::ios::right) << "max size : "
118 << std::setiosflags(std::ios::left) << stats.bucket_size_max_;
119
120 // Display the histogram
121 out << std::endl << std::setw(w) << std::setiosflags(std::ios::right) << "histogram : "
122 << std::setiosflags(std::ios::left);
123 for (std::vector<std::vector<unsigned int> >::const_iterator iterator = stats.size_histogram_.begin(), end =
124 stats.size_histogram_.end(); iterator != end; ++iterator) out << (*iterator)[0] << "-" << (*iterator)[1] << ": " << (*iterator)[2] << ", ";
125
126 return out;
127}
128
129
131
137template<typename ElementType>
138class LshTable
139{
140public:
143#if USE_UNORDERED_MAP
144 typedef std::unordered_map<BucketKey, Bucket> BucketsSpace;
145#else
146 typedef std::map<BucketKey, Bucket> BucketsSpace;
147#endif
148
151 typedef std::vector<Bucket> BucketsSpeed;
152
155 LshTable()
156 {
157 key_size_ = 0;
158 feature_size_ = 0;
159 speed_level_ = kArray;
160 }
161
167 LshTable(unsigned int feature_size, unsigned int key_size)
168 {
169 feature_size_ = feature_size;
170 CV_UNUSED(key_size);
171 CV_Error(cv::Error::StsUnsupportedFormat, "LSH is not implemented for that type" );
172 }
173
178 void add(unsigned int value, const ElementType* feature)
179 {
180 // Add the value to the corresponding bucket
181 BucketKey key = (lsh::BucketKey)getKey(feature);
182
183 switch (speed_level_) {
184 case kArray:
185 // That means we get the buckets from an array
186 buckets_speed_[key].push_back(value);
187 break;
188 case kBitsetHash:
189 // That means we can check the bitset for the presence of a key
190 key_bitset_.set(key);
191 buckets_space_[key].push_back(value);
192 break;
193 case kHash:
194 {
195 // That means we have to check for the hash table for the presence of a key
196 buckets_space_[key].push_back(value);
197 break;
198 }
199 }
200 }
201
205 void add(Matrix<ElementType> dataset)
206 {
207#if USE_UNORDERED_MAP
208 buckets_space_.rehash((buckets_space_.size() + dataset.rows) * 1.2);
209#endif
210 // Add the features to the table
211 for (unsigned int i = 0; i < dataset.rows; ++i) add(i, dataset[i]);
212 // Now that the table is full, optimize it for speed/space
213 optimize();
214 }
215
218 inline const Bucket* getBucketFromKey(BucketKey key) const
219 {
220 // Generate other buckets
221 switch (speed_level_) {
222 case kArray:
223 // That means we get the buckets from an array
224 return &buckets_speed_[key];
225 break;
226 case kBitsetHash:
227 // That means we can check the bitset for the presence of a key
228 if (key_bitset_.test(key)) return &buckets_space_.find(key)->second;
229 else return 0;
230 break;
231 case kHash:
232 {
233 // That means we have to check for the hash table for the presence of a key
234 BucketsSpace::const_iterator bucket_it, bucket_end = buckets_space_.end();
235 bucket_it = buckets_space_.find(key);
236 // Stop here if that bucket does not exist
237 if (bucket_it == bucket_end) return 0;
238 else return &bucket_it->second;
239 break;
240 }
241 }
242 return 0;
243 }
244
247 size_t getKey(const ElementType* /*feature*/) const
248 {
249 CV_Error(cv::Error::StsUnsupportedFormat, "LSH is not implemented for that type" );
250 return 0;
251 }
252
255 LshStats getStats() const;
256
257private:
263 enum SpeedLevel
264 {
265 kArray, kBitsetHash, kHash
266 };
267
270 void initialize(size_t key_size)
271 {
272 const size_t key_size_lower_bound = 1;
273 //a value (size_t(1) << key_size) must fit the size_t type so key_size has to be strictly less than size of size_t
274 const size_t key_size_upper_bound = (std::min)(sizeof(BucketKey) * CHAR_BIT + 1, sizeof(size_t) * CHAR_BIT);
275 if (key_size < key_size_lower_bound || key_size >= key_size_upper_bound)
276 {
277 CV_Error(cv::Error::StsBadArg, cv::format("Invalid key_size (=%d). Valid values for your system are %d <= key_size < %d.", (int)key_size, (int)key_size_lower_bound, (int)key_size_upper_bound));
278 }
279
280 speed_level_ = kHash;
281 key_size_ = (unsigned)key_size;
282 }
283
286 void optimize()
287 {
288 // If we are already using the fast storage, no need to do anything
289 if (speed_level_ == kArray) return;
290
291 // Use an array if it will be more than half full
292 if (buckets_space_.size() > ((size_t(1) << key_size_) / 2)) {
293 speed_level_ = kArray;
294 // Fill the array version of it
295 buckets_speed_.resize(size_t(1) << key_size_);
296 for (BucketsSpace::const_iterator key_bucket = buckets_space_.begin(); key_bucket != buckets_space_.end(); ++key_bucket) buckets_speed_[key_bucket->first] = key_bucket->second;
297
298 // Empty the hash table
299 buckets_space_.clear();
300 return;
301 }
302
303 // If the bitset is going to use less than 10% of the RAM of the hash map (at least 1 size_t for the key and two
304 // for the vector) or less than 512MB (key_size_ <= 30)
305 if (((std::max(buckets_space_.size(), buckets_speed_.size()) * CHAR_BIT * 3 * sizeof(BucketKey)) / 10
306 >= (size_t(1) << key_size_)) || (key_size_ <= 32)) {
307 speed_level_ = kBitsetHash;
308 key_bitset_.resize(size_t(1) << key_size_);
309 key_bitset_.reset();
310 // Try with the BucketsSpace
311 for (BucketsSpace::const_iterator key_bucket = buckets_space_.begin(); key_bucket != buckets_space_.end(); ++key_bucket) key_bitset_.set(key_bucket->first);
312 }
313 else {
314 speed_level_ = kHash;
315 key_bitset_.clear();
316 }
317 }
318
321 BucketsSpeed buckets_speed_;
322
325 BucketsSpace buckets_space_;
326
328 SpeedLevel speed_level_;
329
333 DynamicBitset key_bitset_;
334
337 unsigned int key_size_;
338
339 unsigned int feature_size_;
340
341 // Members only used for the unsigned char specialization
346};
347
349// Specialization for unsigned char
350
351template<>
352inline LshTable<unsigned char>::LshTable(unsigned int feature_size, unsigned int subsignature_size)
353{
354 feature_size_ = feature_size;
355 initialize(subsignature_size);
356 // Allocate the mask
357 mask_ = std::vector<size_t>((feature_size * sizeof(char) + sizeof(size_t) - 1) / sizeof(size_t), 0);
358
359 // A bit brutal but fast to code
360 std::vector<int> indices(feature_size * CHAR_BIT);
361 for (size_t i = 0; i < feature_size * CHAR_BIT; ++i) indices[i] = (int)i;
362#ifndef OPENCV_FLANN_USE_STD_RAND
363 cv::randShuffle(indices);
364#else
365 std::random_shuffle(indices.begin(), indices.end());
366#endif
367
368 // Generate a random set of order of subsignature_size_ bits
369 for (unsigned int i = 0; i < key_size_; ++i) {
370 size_t index = indices[i];
371
372 // Set that bit in the mask
373 size_t divisor = CHAR_BIT * sizeof(size_t);
374 size_t idx = index / divisor; //pick the right size_t index
375 mask_[idx] |= size_t(1) << (index % divisor); //use modulo to find the bit offset
376 }
377
378 // Set to 1 if you want to display the mask for debug
379#if 0
380 {
381 size_t bcount = 0;
382 BOOST_FOREACH(size_t mask_block, mask_){
383 out << std::setw(sizeof(size_t) * CHAR_BIT / 4) << std::setfill('0') << std::hex << mask_block
384 << std::endl;
385 bcount += __builtin_popcountll(mask_block);
386 }
387 out << "bit count : " << std::dec << bcount << std::endl;
388 out << "mask size : " << mask_.size() << std::endl;
389 return out;
390 }
391#endif
392}
393
397template<>
398inline size_t LshTable<unsigned char>::getKey(const unsigned char* feature) const
399{
400 // no need to check if T is dividable by sizeof(size_t) like in the Hamming
401 // distance computation as we have a mask
402 // FIXIT: This is bad assumption, because we reading tail bytes after of the allocated features buffer
403 const size_t* feature_block_ptr = reinterpret_cast<const size_t*> ((const void*)feature);
404
405 // Figure out the subsignature of the feature
406 // Given the feature ABCDEF, and the mask 001011, the output will be
407 // 000CEF
408 size_t subsignature = 0;
409 size_t bit_index = 1;
410
411 for (unsigned i = 0; i < feature_size_; i += sizeof(size_t)) {
412 // get the mask and signature blocks
413 size_t feature_block;
414 if (i <= feature_size_ - sizeof(size_t))
415 {
416 feature_block = *feature_block_ptr;
417 }
418 else
419 {
420 size_t tmp = 0;
421 memcpy(&tmp, feature_block_ptr, feature_size_ - i); // preserve bytes order
422 feature_block = tmp;
423 }
424 size_t mask_block = mask_[i / sizeof(size_t)];
425 while (mask_block) {
426 // Get the lowest set bit in the mask block
427 size_t lowest_bit = mask_block & ~(mask_block - 1);
428 // Add it to the current subsignature if necessary
429 subsignature += (feature_block & lowest_bit) ? bit_index : 0;
430 // Reset the bit in the mask block
431 mask_block ^= lowest_bit;
432 // increment the bit index for the subsignature
433 bit_index <<= 1;
434 }
435 // Check the next feature block
436 ++feature_block_ptr;
437 }
438 return subsignature;
439}
440
441template<>
442inline LshStats LshTable<unsigned char>::getStats() const
443{
444 LshStats stats;
445 stats.bucket_size_mean_ = 0;
446 if ((buckets_speed_.empty()) && (buckets_space_.empty())) {
447 stats.n_buckets_ = 0;
448 stats.bucket_size_median_ = 0;
449 stats.bucket_size_min_ = 0;
450 stats.bucket_size_max_ = 0;
451 return stats;
452 }
453
454 if (!buckets_speed_.empty()) {
455 for (BucketsSpeed::const_iterator pbucket = buckets_speed_.begin(); pbucket != buckets_speed_.end(); ++pbucket) {
456 stats.bucket_sizes_.push_back((lsh::FeatureIndex)pbucket->size());
457 stats.bucket_size_mean_ += pbucket->size();
458 }
459 stats.bucket_size_mean_ /= buckets_speed_.size();
460 stats.n_buckets_ = buckets_speed_.size();
461 }
462 else {
463 for (BucketsSpace::const_iterator x = buckets_space_.begin(); x != buckets_space_.end(); ++x) {
464 stats.bucket_sizes_.push_back((lsh::FeatureIndex)x->second.size());
465 stats.bucket_size_mean_ += x->second.size();
466 }
467 stats.bucket_size_mean_ /= buckets_space_.size();
468 stats.n_buckets_ = buckets_space_.size();
469 }
470
471 std::sort(stats.bucket_sizes_.begin(), stats.bucket_sizes_.end());
472
473 // BOOST_FOREACH(int size, stats.bucket_sizes_)
474 // std::cout << size << " ";
475 // std::cout << std::endl;
476 stats.bucket_size_median_ = stats.bucket_sizes_[stats.bucket_sizes_.size() / 2];
477 stats.bucket_size_min_ = stats.bucket_sizes_.front();
478 stats.bucket_size_max_ = stats.bucket_sizes_.back();
479
480 // TODO compute mean and std
481 /*float mean, stddev;
482 stats.bucket_size_mean_ = mean;
483 stats.bucket_size_std_dev = stddev;*/
484
485 // Include a histogram of the buckets
486 unsigned int bin_start = 0;
487 unsigned int bin_end = 20;
488 bool is_new_bin = true;
489 for (std::vector<unsigned int>::iterator iterator = stats.bucket_sizes_.begin(), end = stats.bucket_sizes_.end(); iterator
490 != end; )
491 if (*iterator < bin_end) {
492 if (is_new_bin) {
493 stats.size_histogram_.push_back(std::vector<unsigned int>(3, 0));
494 stats.size_histogram_.back()[0] = bin_start;
495 stats.size_histogram_.back()[1] = bin_end - 1;
496 is_new_bin = false;
497 }
498 ++stats.size_histogram_.back()[2];
499 ++iterator;
500 }
501 else {
502 bin_start += 20;
503 bin_end += 20;
504 is_new_bin = true;
505 }
506
507 return stats;
508}
509
510// End the two namespaces
511}
512}
513
514#ifdef _MSC_VER
515#pragma warning(pop)
516#endif
517
519
521
522#endif /* OPENCV_FLANN_LSH_TABLE_H_ */
Size size(int i=-1) const
T endl(T... args)
CV_EXPORTS_W void add(InputArray src1, InputArray src2, OutputArray dst, InputArray mask=noArray(), int dtype=-1)
Calculates the per-element sum of two arrays or an array and a scalar.
CV_EXPORTS_W void randShuffle(InputOutputArray dst, double iterFactor=1., RNG *rng=0)
Shuffles the array elements randomly.
int CvScalar value
Definition core_c.h:720
double double end
Definition core_c.h:1381
const int * idx
Definition core_c.h:668
int index
Definition core_c.h:634
const CvArr CvArr * x
Definition core_c.h:1195
#define CV_Error(code, msg)
Call the error handler.
Definition base.hpp:320
CV_EXPORTS String format(const char *fmt,...) CV_FORMAT_PRINTF(1
Returns a text string formatted using the printf-like expression.
OutputArray OutputArray stats
Definition imgproc.hpp:3975
T hex(T... args)
T max(T... args)
T memcpy(T... args)
T min(T... args)
@ StsUnsupportedFormat
the data format/type is not supported by the function
Definition base.hpp:110
@ StsBadArg
function arg/param is bad
Definition base.hpp:74
Definition flann.hpp:60
T random_shuffle(T... args)
T setfill(T... args)
T setiosflags(T... args)
T setw(T... args)
T sort(T... args)