EstervQrCode 1.1.1
Library for qr code manipulation
scan.hpp
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // License Agreement
11 // For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
22 //
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
26 //
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42 
43 #ifndef OPENCV_CUDA_SCAN_HPP
44 #define OPENCV_CUDA_SCAN_HPP
45 
46 #include "opencv2/core/cuda/common.hpp"
47 #include "opencv2/core/cuda/utility.hpp"
48 #include "opencv2/core/cuda/warp.hpp"
49 #include "opencv2/core/cuda/warp_shuffle.hpp"
50 
56 
57 namespace cv { namespace cuda { namespace device
58 {
59  enum ScanKind { EXCLUSIVE = 0, INCLUSIVE = 1 };
60 
61  template <ScanKind Kind, typename T, typename F> struct WarpScan
62  {
63  __device__ __forceinline__ WarpScan() {}
64  __device__ __forceinline__ WarpScan(const WarpScan& other) { CV_UNUSED(other); }
65 
66  __device__ __forceinline__ T operator()( volatile T *ptr , const unsigned int idx)
67  {
68  const unsigned int lane = idx & 31;
69  F op;
70 
71  if ( lane >= 1) ptr [idx ] = op(ptr [idx - 1], ptr [idx]);
72  if ( lane >= 2) ptr [idx ] = op(ptr [idx - 2], ptr [idx]);
73  if ( lane >= 4) ptr [idx ] = op(ptr [idx - 4], ptr [idx]);
74  if ( lane >= 8) ptr [idx ] = op(ptr [idx - 8], ptr [idx]);
75  if ( lane >= 16) ptr [idx ] = op(ptr [idx - 16], ptr [idx]);
76 
77  if( Kind == INCLUSIVE )
78  return ptr [idx];
79  else
80  return (lane > 0) ? ptr [idx - 1] : 0;
81  }
82 
83  __device__ __forceinline__ unsigned int index(const unsigned int tid)
84  {
85  return tid;
86  }
87 
88  __device__ __forceinline__ void init(volatile T *ptr){}
89 
90  static const int warp_offset = 0;
91 
92  typedef WarpScan<INCLUSIVE, T, F> merge;
93  };
94 
95  template <ScanKind Kind , typename T, typename F> struct WarpScanNoComp
96  {
97  __device__ __forceinline__ WarpScanNoComp() {}
98  __device__ __forceinline__ WarpScanNoComp(const WarpScanNoComp& other) { CV_UNUSED(other); }
99 
100  __device__ __forceinline__ T operator()( volatile T *ptr , const unsigned int idx)
101  {
102  const unsigned int lane = threadIdx.x & 31;
103  F op;
104 
105  ptr [idx ] = op(ptr [idx - 1], ptr [idx]);
106  ptr [idx ] = op(ptr [idx - 2], ptr [idx]);
107  ptr [idx ] = op(ptr [idx - 4], ptr [idx]);
108  ptr [idx ] = op(ptr [idx - 8], ptr [idx]);
109  ptr [idx ] = op(ptr [idx - 16], ptr [idx]);
110 
111  if( Kind == INCLUSIVE )
112  return ptr [idx];
113  else
114  return (lane > 0) ? ptr [idx - 1] : 0;
115  }
116 
117  __device__ __forceinline__ unsigned int index(const unsigned int tid)
118  {
119  return (tid >> warp_log) * warp_smem_stride + 16 + (tid & warp_mask);
120  }
121 
122  __device__ __forceinline__ void init(volatile T *ptr)
123  {
124  ptr[threadIdx.x] = 0;
125  }
126 
127  static const int warp_smem_stride = 32 + 16 + 1;
128  static const int warp_offset = 16;
129  static const int warp_log = 5;
130  static const int warp_mask = 31;
131 
132  typedef WarpScanNoComp<INCLUSIVE, T, F> merge;
133  };
134 
135  template <ScanKind Kind , typename T, typename Sc, typename F> struct BlockScan
136  {
137  __device__ __forceinline__ BlockScan() {}
138  __device__ __forceinline__ BlockScan(const BlockScan& other) { CV_UNUSED(other); }
139 
140  __device__ __forceinline__ T operator()(volatile T *ptr)
141  {
142  const unsigned int tid = threadIdx.x;
143  const unsigned int lane = tid & warp_mask;
144  const unsigned int warp = tid >> warp_log;
145 
146  Sc scan;
147  typename Sc::merge merge_scan;
148  const unsigned int idx = scan.index(tid);
149 
150  T val = scan(ptr, idx);
151  __syncthreads ();
152 
153  if( warp == 0)
154  scan.init(ptr);
155  __syncthreads ();
156 
157  if( lane == 31 )
158  ptr [scan.warp_offset + warp ] = (Kind == INCLUSIVE) ? val : ptr [idx];
159  __syncthreads ();
160 
161  if( warp == 0 )
162  merge_scan(ptr, idx);
163  __syncthreads();
164 
165  if ( warp > 0)
166  val = ptr [scan.warp_offset + warp - 1] + val;
167  __syncthreads ();
168 
169  ptr[idx] = val;
170  __syncthreads ();
171 
172  return val ;
173  }
174 
175  static const int warp_log = 5;
176  static const int warp_mask = 31;
177  };
178 
179  template <typename T>
180  __device__ T warpScanInclusive(T idata, volatile T* s_Data, unsigned int tid)
181  {
182  #if __CUDA_ARCH__ >= 300
183  const unsigned int laneId = cv::cuda::device::Warp::laneId();
184 
185  // scan on shuffl functions
186  #pragma unroll
187  for (int i = 1; i <= (OPENCV_CUDA_WARP_SIZE / 2); i *= 2)
188  {
189  const T n = cv::cuda::device::shfl_up(idata, i);
190  if (laneId >= i)
191  idata += n;
192  }
193 
194  return idata;
195  #else
196  unsigned int pos = 2 * tid - (tid & (OPENCV_CUDA_WARP_SIZE - 1));
197  s_Data[pos] = 0;
198  pos += OPENCV_CUDA_WARP_SIZE;
199  s_Data[pos] = idata;
200 
201  s_Data[pos] += s_Data[pos - 1];
202  s_Data[pos] += s_Data[pos - 2];
203  s_Data[pos] += s_Data[pos - 4];
204  s_Data[pos] += s_Data[pos - 8];
205  s_Data[pos] += s_Data[pos - 16];
206 
207  return s_Data[pos];
208  #endif
209  }
210 
211  template <typename T>
212  __device__ __forceinline__ T warpScanExclusive(T idata, volatile T* s_Data, unsigned int tid)
213  {
214  return warpScanInclusive(idata, s_Data, tid) - idata;
215  }
216 
217  template <int tiNumScanThreads, typename T>
218  __device__ T blockScanInclusive(T idata, volatile T* s_Data, unsigned int tid)
219  {
220  if (tiNumScanThreads > OPENCV_CUDA_WARP_SIZE)
221  {
222  //Bottom-level inclusive warp scan
223  T warpResult = warpScanInclusive(idata, s_Data, tid);
224 
225  //Save top elements of each warp for exclusive warp scan
226  //sync to wait for warp scans to complete (because s_Data is being overwritten)
227  __syncthreads();
228  if ((tid & (OPENCV_CUDA_WARP_SIZE - 1)) == (OPENCV_CUDA_WARP_SIZE - 1))
229  {
230  s_Data[tid >> OPENCV_CUDA_LOG_WARP_SIZE] = warpResult;
231  }
232 
233  //wait for warp scans to complete
234  __syncthreads();
235 
236  if (tid < (tiNumScanThreads / OPENCV_CUDA_WARP_SIZE) )
237  {
238  //grab top warp elements
239  T val = s_Data[tid];
240  //calculate exclusive scan and write back to shared memory
241  s_Data[tid] = warpScanExclusive(val, s_Data, tid);
242  }
243 
244  //return updated warp scans with exclusive scan results
245  __syncthreads();
246 
247  return warpResult + s_Data[tid >> OPENCV_CUDA_LOG_WARP_SIZE];
248  }
249  else
250  {
251  return warpScanInclusive(idata, s_Data, tid);
252  }
253  }
254 }}}
255 
257 
258 #endif // OPENCV_CUDA_SCAN_HPP
InputArrayOfArrays InputArrayOfArrays InputOutputArray InputOutputArray InputOutputArray InputOutputArray Size InputOutputArray InputOutputArray T
Definition: calib3d.hpp:1867
InputArrayOfArrays InputArrayOfArrays InputOutputArray InputOutputArray InputOutputArray InputOutputArray Size InputOutputArray InputOutputArray OutputArray OutputArray F
Definition: calib3d.hpp:1867
CV_EXPORTS void merge(const Mat *mv, size_t count, OutputArray dst)
Creates one multi-channel array out of several single-channel ones.
const int * idx
Definition: core_c.h:668
CvMemStoragePos * pos
Definition: core_c.h:1573
int index
Definition: core_c.h:634
"black box" representation of the file storage associated with a file on disk.
Definition: calib3d.hpp:441