Esterv.Utils.QrCode/lsh__table_8h_source.html

/***********************************************************************

 * Software License Agreement (BSD License)

 *

 * Copyright 2008-2009  Marius Muja (mariusm@cs.ubc.ca). All rights reserved.

 * Copyright 2008-2009  David G. Lowe (lowe@cs.ubc.ca). All rights reserved.

 *

 * THE BSD LICENSE

 *

 * Redistribution and use in source and binary forms, with or without

 * modification, are permitted provided that the following conditions

 * are met:

 *

 * 1. Redistributions of source code must retain the above copyright

 *    notice, this list of conditions and the following disclaimer.

 * 2. Redistributions in binary form must reproduce the above copyright

 *    notice, this list of conditions and the following disclaimer in the

 *    documentation and/or other materials provided with the distribution.

 *

 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR

 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES

 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.

 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,

 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT

 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF

 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 *************************************************************************/


/***********************************************************************

 * Author: Vincent Rabaud

 *************************************************************************/


#ifndef OPENCV_FLANN_LSH_TABLE_H_

#define OPENCV_FLANN_LSH_TABLE_H_


#include <algorithm>

#include <iostream>

#include <iomanip>

#include <limits.h>

// TODO as soon as we use C++0x, use the code in USE_UNORDERED_MAP

#ifdef __GXX_EXPERIMENTAL_CXX0X__

#  define USE_UNORDERED_MAP 1

#else

#  define USE_UNORDERED_MAP 0

#endif

#if USE_UNORDERED_MAP

#include <unordered_map>

#else

#include <map>

#endif

#include <math.h>

#include <stddef.h>


#include "dynamic_bitset.h"

#include "matrix.h"


#ifdef _MSC_VER

#pragma warning(push)

#pragma warning(disable: 4702) //disable unreachable code

#endif


namespace cvflann

{


namespace lsh

{


typedef uint32_t FeatureIndex;

typedef unsigned int BucketKey;


typedef std::vector<FeatureIndex> Bucket;


struct LshStats

{

    std::vector<unsigned int> bucket_sizes_;

    size_t n_buckets_;

    size_t bucket_size_mean_;

    size_t bucket_size_median_;

    size_t bucket_size_min_;

    size_t bucket_size_max_;

    size_t bucket_size_std_dev;

    std::vector<std::vector<unsigned int> > size_histogram_;

};


inline std::ostream& operator <<(std::ostream& out, const LshStats& stats)

{

    int w = 20;

    out << "Lsh Table Stats:\n" << std::setw(w) << std::setiosflags(std::ios::right) << "N buckets : "

    << stats.n_buckets_ << "\n" << std::setw(w) << std::setiosflags(std::ios::right) << "mean size : "

    << std::setiosflags(std::ios::left) << stats.bucket_size_mean_ << "\n" << std::setw(w)

    << std::setiosflags(std::ios::right) << "median size : " << stats.bucket_size_median_ << "\n" << std::setw(w)

    << std::setiosflags(std::ios::right) << "min size : " << std::setiosflags(std::ios::left)

    << stats.bucket_size_min_ << "\n" << std::setw(w) << std::setiosflags(std::ios::right) << "max size : "

    << std::setiosflags(std::ios::left) << stats.bucket_size_max_;


    // Display the histogram

    out << std::endl << std::setw(w) << std::setiosflags(std::ios::right) << "histogram : "

    << std::setiosflags(std::ios::left);

    for (std::vector<std::vector<unsigned int> >::const_iterator iterator = stats.size_histogram_.begin(), end =

             stats.size_histogram_.end(); iterator != end; ++iterator) out << (*iterator)[0] << "-" << (*iterator)[1] << ": " << (*iterator)[2] << ",  ";


    return out;

}


template<typename ElementType>

class LshTable

{

public:

#if USE_UNORDERED_MAP

    typedef std::unordered_map<BucketKey, Bucket> BucketsSpace;

#else

    typedef std::map<BucketKey, Bucket> BucketsSpace;

#endif


    typedef std::vector<Bucket> BucketsSpeed;


    LshTable()

    {

        key_size_ = 0;

        feature_size_ = 0;

        speed_level_ = kArray;

    }


    LshTable(unsigned int feature_size, unsigned int key_size)

    {

        feature_size_ = feature_size;

        CV_UNUSED(key_size);

        CV_Error(cv::Error::StsUnsupportedFormat, "LSH is not implemented for that type" );

    }


    void add(unsigned int value, const ElementType* feature)

    {

        // Add the value to the corresponding bucket

        BucketKey key = (lsh::BucketKey)getKey(feature);


        switch (speed_level_) {

        case kArray:

            // That means we get the buckets from an array

            buckets_speed_[key].push_back(value);

            break;

        case kBitsetHash:

            // That means we can check the bitset for the presence of a key

            key_bitset_.set(key);

            buckets_space_[key].push_back(value);

            break;

        case kHash:

        {

            // That means we have to check for the hash table for the presence of a key

            buckets_space_[key].push_back(value);

            break;

        }

        }

    }


    void add(Matrix<ElementType> dataset)

    {

#if USE_UNORDERED_MAP

        buckets_space_.rehash((buckets_space_.size() + dataset.rows) * 1.2);

#endif

        // Add the features to the table

        for (unsigned int i = 0; i < dataset.rows; ++i) add(i, dataset[i]);

        // Now that the table is full, optimize it for speed/space

        optimize();

    }


    inline const Bucket* getBucketFromKey(BucketKey key) const

    {

        // Generate other buckets

        switch (speed_level_) {

        case kArray:

            // That means we get the buckets from an array

            return &buckets_speed_[key];

            break;

        case kBitsetHash:

            // That means we can check the bitset for the presence of a key

            if (key_bitset_.test(key)) return &buckets_space_.find(key)->second;

            else return 0;

            break;

        case kHash:

        {

            // That means we have to check for the hash table for the presence of a key

            BucketsSpace::const_iterator bucket_it, bucket_end = buckets_space_.end();

            bucket_it = buckets_space_.find(key);

            // Stop here if that bucket does not exist

            if (bucket_it == bucket_end) return 0;

            else return &bucket_it->second;

            break;

        }

        }

        return 0;

    }


    size_t getKey(const ElementType* /*feature*/) const

    {

        CV_Error(cv::Error::StsUnsupportedFormat, "LSH is not implemented for that type" );

        return 0;

    }


    LshStats getStats() const;


private:

    enum SpeedLevel

    {

        kArray, kBitsetHash, kHash

    };


    void initialize(size_t key_size)

    {

        const size_t key_size_lower_bound = 1;

        //a value (size_t(1) << key_size) must fit the size_t type so key_size has to be strictly less than size of size_t

        const size_t key_size_upper_bound = (std::min)(sizeof(BucketKey) * CHAR_BIT + 1, sizeof(size_t) * CHAR_BIT);

        if (key_size < key_size_lower_bound || key_size >= key_size_upper_bound)

        {

            CV_Error(cv::Error::StsBadArg, cv::format("Invalid key_size (=%d). Valid values for your system are %d <= key_size < %d.", (int)key_size, (int)key_size_lower_bound, (int)key_size_upper_bound));

        }


        speed_level_ = kHash;

        key_size_ = (unsigned)key_size;

    }


    void optimize()

    {

        // If we are already using the fast storage, no need to do anything

        if (speed_level_ == kArray) return;


        // Use an array if it will be more than half full

        if (buckets_space_.size() > ((size_t(1) << key_size_) / 2)) {

            speed_level_ = kArray;

            // Fill the array version of it

            buckets_speed_.resize(size_t(1) << key_size_);

            for (BucketsSpace::const_iterator key_bucket = buckets_space_.begin(); key_bucket != buckets_space_.end(); ++key_bucket) buckets_speed_[key_bucket->first] = key_bucket->second;


            // Empty the hash table

            buckets_space_.clear();

            return;

        }


        // If the bitset is going to use less than 10% of the RAM of the hash map (at least 1 size_t for the key and two

        // for the vector) or less than 512MB (key_size_ <= 30)

        if (((std::max(buckets_space_.size(), buckets_speed_.size()) * CHAR_BIT * 3 * sizeof(BucketKey)) / 10

             >= (size_t(1) << key_size_)) || (key_size_ <= 32)) {

            speed_level_ = kBitsetHash;

            key_bitset_.resize(size_t(1) << key_size_);

            key_bitset_.reset();

            // Try with the BucketsSpace

            for (BucketsSpace::const_iterator key_bucket = buckets_space_.begin(); key_bucket != buckets_space_.end(); ++key_bucket) key_bitset_.set(key_bucket->first);

        }

        else {

            speed_level_ = kHash;

            key_bitset_.clear();

        }

    }


    BucketsSpeed buckets_speed_;


    BucketsSpace buckets_space_;


    SpeedLevel speed_level_;


    DynamicBitset key_bitset_;


    unsigned int key_size_;


    unsigned int feature_size_;


    // Members only used for the unsigned char specialization

    std::vector<size_t> mask_;

};


// Specialization for unsigned char


template<>

inline LshTable<unsigned char>::LshTable(unsigned int feature_size, unsigned int subsignature_size)

{

    feature_size_ = feature_size;

    initialize(subsignature_size);

    // Allocate the mask

    mask_ = std::vector<size_t>((feature_size * sizeof(char) + sizeof(size_t) - 1) / sizeof(size_t), 0);


    // A bit brutal but fast to code

    std::vector<int> indices(feature_size * CHAR_BIT);

    for (size_t i = 0; i < feature_size * CHAR_BIT; ++i) indices[i] = (int)i;

#ifndef OPENCV_FLANN_USE_STD_RAND

    cv::randShuffle(indices);

#else

    std::random_shuffle(indices.begin(), indices.end());

#endif


    // Generate a random set of order of subsignature_size_ bits

    for (unsigned int i = 0; i < key_size_; ++i) {

        size_t index = indices[i];


        // Set that bit in the mask

        size_t divisor = CHAR_BIT * sizeof(size_t);

        size_t idx = index / divisor; //pick the right size_t index

        mask_[idx] |= size_t(1) << (index % divisor); //use modulo to find the bit offset

    }


    // Set to 1 if you want to display the mask for debug

#if 0

    {

        size_t bcount = 0;

        BOOST_FOREACH(size_t mask_block, mask_){

            out << std::setw(sizeof(size_t) * CHAR_BIT / 4) << std::setfill('0') << std::hex << mask_block

                << std::endl;

            bcount += __builtin_popcountll(mask_block);

        }

        out << "bit count : " << std::dec << bcount << std::endl;

        out << "mask size : " << mask_.size() << std::endl;

        return out;

    }

#endif

}


template<>

inline size_t LshTable<unsigned char>::getKey(const unsigned char* feature) const

{

    // no need to check if T is dividable by sizeof(size_t) like in the Hamming

    // distance computation as we have a mask

    // FIXIT: This is bad assumption, because we reading tail bytes after of the allocated features buffer

    const size_t* feature_block_ptr = reinterpret_cast<const size_t*> ((const void*)feature);


    // Figure out the subsignature of the feature

    // Given the feature ABCDEF, and the mask 001011, the output will be

    // 000CEF

    size_t subsignature = 0;

    size_t bit_index = 1;


    for (unsigned i = 0; i < feature_size_; i += sizeof(size_t)) {

        // get the mask and signature blocks

        size_t feature_block;

        if (i <= feature_size_ - sizeof(size_t))

        {

            feature_block = *feature_block_ptr;

        }

        else

        {

            size_t tmp = 0;

            memcpy(&tmp, feature_block_ptr, feature_size_ - i); // preserve bytes order

            feature_block = tmp;

        }

        size_t mask_block = mask_[i / sizeof(size_t)];

        while (mask_block) {

            // Get the lowest set bit in the mask block

            size_t lowest_bit = mask_block & ~(mask_block - 1);

            // Add it to the current subsignature if necessary

            subsignature += (feature_block & lowest_bit) ? bit_index : 0;

            // Reset the bit in the mask block

            mask_block ^= lowest_bit;

            // increment the bit index for the subsignature

            bit_index <<= 1;

        }

        // Check the next feature block

        ++feature_block_ptr;

    }

    return subsignature;

}


template<>

inline LshStats LshTable<unsigned char>::getStats() const

{

    LshStats stats;

    stats.bucket_size_mean_ = 0;

    if ((buckets_speed_.empty()) && (buckets_space_.empty())) {

        stats.n_buckets_ = 0;

        stats.bucket_size_median_ = 0;

        stats.bucket_size_min_ = 0;

        stats.bucket_size_max_ = 0;

        return stats;

    }


    if (!buckets_speed_.empty()) {

        for (BucketsSpeed::const_iterator pbucket = buckets_speed_.begin(); pbucket != buckets_speed_.end(); ++pbucket) {

            stats.bucket_sizes_.push_back((lsh::FeatureIndex)pbucket->size());

            stats.bucket_size_mean_ += pbucket->size();

        }

        stats.bucket_size_mean_ /= buckets_speed_.size();

        stats.n_buckets_ = buckets_speed_.size();

    }

    else {

        for (BucketsSpace::const_iterator x = buckets_space_.begin(); x != buckets_space_.end(); ++x) {

            stats.bucket_sizes_.push_back((lsh::FeatureIndex)x->second.size());

            stats.bucket_size_mean_ += x->second.size();

        }

        stats.bucket_size_mean_ /= buckets_space_.size();

        stats.n_buckets_ = buckets_space_.size();

    }


    std::sort(stats.bucket_sizes_.begin(), stats.bucket_sizes_.end());


    //  BOOST_FOREACH(int size, stats.bucket_sizes_)

    //          std::cout << size << " ";

    //  std::cout << std::endl;

    stats.bucket_size_median_ = stats.bucket_sizes_[stats.bucket_sizes_.size() / 2];

    stats.bucket_size_min_ = stats.bucket_sizes_.front();

    stats.bucket_size_max_ = stats.bucket_sizes_.back();


    // TODO compute mean and std

    /*float mean, stddev;

       stats.bucket_size_mean_ = mean;

       stats.bucket_size_std_dev = stddev;*/


    // Include a histogram of the buckets

    unsigned int bin_start = 0;

    unsigned int bin_end = 20;

    bool is_new_bin = true;

    for (std::vector<unsigned int>::iterator iterator = stats.bucket_sizes_.begin(), end = stats.bucket_sizes_.end(); iterator

         != end; )

        if (*iterator < bin_end) {

            if (is_new_bin) {

                stats.size_histogram_.push_back(std::vector<unsigned int>(3, 0));

                stats.size_histogram_.back()[0] = bin_start;

                stats.size_histogram_.back()[1] = bin_end - 1;

                is_new_bin = false;

            }

            ++stats.size_histogram_.back()[2];

            ++iterator;

        }

        else {

            bin_start += 20;

            bin_end += 20;

            is_new_bin = true;

        }


    return stats;

}


// End the two namespaces

}

}


#ifdef _MSC_VER

#pragma warning(pop)

#endif


#endif /* OPENCV_FLANN_LSH_TABLE_H_ */

std::ostream

cv::_InputArray::size
Size size(int i=-1) const

std::endl
T endl(T... args)

cv::add
CV_EXPORTS_W void add(InputArray src1, InputArray src2, OutputArray dst, InputArray mask=noArray(), int dtype=-1)
Calculates the per-element sum of two arrays or an array and a scalar.

cv::randShuffle
CV_EXPORTS_W void randShuffle(InputOutputArray dst, double iterFactor=1., RNG *rng=0)
Shuffles the array elements randomly.

value
int CvScalar value
Definition core_c.h:720

end
double double end
Definition core_c.h:1381

idx
const int * idx
Definition core_c.h:668

index
int index
Definition core_c.h:634

x
const CvArr CvArr * x
Definition core_c.h:1195

CV_Error
#define CV_Error(code, msg)
Call the error handler.
Definition base.hpp:320

cv::format
CV_EXPORTS String format(const char *fmt,...) CV_FORMAT_PRINTF(1
Returns a text string formatted using the printf-like expression.

cv::stats
OutputArray OutputArray stats
Definition imgproc.hpp:3975

std::hex
T hex(T... args)

std::map

std::max
T max(T... args)

std::memcpy
T memcpy(T... args)

std::min
T min(T... args)

cv::Error::StsUnsupportedFormat
@ StsUnsupportedFormat
the data format/type is not supported by the function
Definition base.hpp:110

cv::Error::StsBadArg
@ StsBadArg
function arg/param is bad
Definition base.hpp:74

cvflann
Definition flann.hpp:60

std::random_shuffle
T random_shuffle(T... args)

std::setfill
T setfill(T... args)

std::setiosflags
T setiosflags(T... args)

std::setw
T setw(T... args)

std::sort
T sort(T... args)

std::unordered_map

std::vector