EstervQrCode 2.0.0
Library for qr code manipulation
Loading...
Searching...
No Matches
scan.hpp
1/*M///////////////////////////////////////////////////////////////////////////////////////
2//
3// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4//
5// By downloading, copying, installing or using the software you agree to this license.
6// If you do not agree to this license, do not download, install,
7// copy or use the software.
8//
9//
10// License Agreement
11// For Open Source Computer Vision Library
12//
13// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15// Third party copyrights are property of their respective owners.
16//
17// Redistribution and use in source and binary forms, with or without modification,
18// are permitted provided that the following conditions are met:
19//
20// * Redistribution's of source code must retain the above copyright notice,
21// this list of conditions and the following disclaimer.
22//
23// * Redistribution's in binary form must reproduce the above copyright notice,
24// this list of conditions and the following disclaimer in the documentation
25// and/or other materials provided with the distribution.
26//
27// * The name of the copyright holders may not be used to endorse or promote products
28// derived from this software without specific prior written permission.
29//
30// This software is provided by the copyright holders and contributors "as is" and
31// any express or implied warranties, including, but not limited to, the implied
32// warranties of merchantability and fitness for a particular purpose are disclaimed.
33// In no event shall the Intel Corporation or contributors be liable for any direct,
34// indirect, incidental, special, exemplary, or consequential damages
35// (including, but not limited to, procurement of substitute goods or services;
36// loss of use, data, or profits; or business interruption) however caused
37// and on any theory of liability, whether in contract, strict liability,
38// or tort (including negligence or otherwise) arising in any way out of
39// the use of this software, even if advised of the possibility of such damage.
40//
41//M*/
42
43#ifndef OPENCV_CUDA_SCAN_HPP
44#define OPENCV_CUDA_SCAN_HPP
45
46#include "opencv2/core/cuda/common.hpp"
47#include "opencv2/core/cuda/utility.hpp"
48#include "opencv2/core/cuda/warp.hpp"
49#include "opencv2/core/cuda/warp_shuffle.hpp"
50
56
57namespace cv { namespace cuda { namespace device
58{
59 enum ScanKind { EXCLUSIVE = 0, INCLUSIVE = 1 };
60
61 template <ScanKind Kind, typename T, typename F> struct WarpScan
62 {
63 __device__ __forceinline__ WarpScan() {}
64 __device__ __forceinline__ WarpScan(const WarpScan& other) { CV_UNUSED(other); }
65
66 __device__ __forceinline__ T operator()( volatile T *ptr , const unsigned int idx)
67 {
68 const unsigned int lane = idx & 31;
69 F op;
70
71 if ( lane >= 1) ptr [idx ] = op(ptr [idx - 1], ptr [idx]);
72 if ( lane >= 2) ptr [idx ] = op(ptr [idx - 2], ptr [idx]);
73 if ( lane >= 4) ptr [idx ] = op(ptr [idx - 4], ptr [idx]);
74 if ( lane >= 8) ptr [idx ] = op(ptr [idx - 8], ptr [idx]);
75 if ( lane >= 16) ptr [idx ] = op(ptr [idx - 16], ptr [idx]);
76
77 if( Kind == INCLUSIVE )
78 return ptr [idx];
79 else
80 return (lane > 0) ? ptr [idx - 1] : 0;
81 }
82
83 __device__ __forceinline__ unsigned int index(const unsigned int tid)
84 {
85 return tid;
86 }
87
88 __device__ __forceinline__ void init(volatile T *ptr){}
89
90 static const int warp_offset = 0;
91
92 typedef WarpScan<INCLUSIVE, T, F> merge;
93 };
94
95 template <ScanKind Kind , typename T, typename F> struct WarpScanNoComp
96 {
97 __device__ __forceinline__ WarpScanNoComp() {}
98 __device__ __forceinline__ WarpScanNoComp(const WarpScanNoComp& other) { CV_UNUSED(other); }
99
100 __device__ __forceinline__ T operator()( volatile T *ptr , const unsigned int idx)
101 {
102 const unsigned int lane = threadIdx.x & 31;
103 F op;
104
105 ptr [idx ] = op(ptr [idx - 1], ptr [idx]);
106 ptr [idx ] = op(ptr [idx - 2], ptr [idx]);
107 ptr [idx ] = op(ptr [idx - 4], ptr [idx]);
108 ptr [idx ] = op(ptr [idx - 8], ptr [idx]);
109 ptr [idx ] = op(ptr [idx - 16], ptr [idx]);
110
111 if( Kind == INCLUSIVE )
112 return ptr [idx];
113 else
114 return (lane > 0) ? ptr [idx - 1] : 0;
115 }
116
117 __device__ __forceinline__ unsigned int index(const unsigned int tid)
118 {
119 return (tid >> warp_log) * warp_smem_stride + 16 + (tid & warp_mask);
120 }
121
122 __device__ __forceinline__ void init(volatile T *ptr)
123 {
124 ptr[threadIdx.x] = 0;
125 }
126
127 static const int warp_smem_stride = 32 + 16 + 1;
128 static const int warp_offset = 16;
129 static const int warp_log = 5;
130 static const int warp_mask = 31;
131
132 typedef WarpScanNoComp<INCLUSIVE, T, F> merge;
133 };
134
135 template <ScanKind Kind , typename T, typename Sc, typename F> struct BlockScan
136 {
137 __device__ __forceinline__ BlockScan() {}
138 __device__ __forceinline__ BlockScan(const BlockScan& other) { CV_UNUSED(other); }
139
140 __device__ __forceinline__ T operator()(volatile T *ptr)
141 {
142 const unsigned int tid = threadIdx.x;
143 const unsigned int lane = tid & warp_mask;
144 const unsigned int warp = tid >> warp_log;
145
146 Sc scan;
147 typename Sc::merge merge_scan;
148 const unsigned int idx = scan.index(tid);
149
150 T val = scan(ptr, idx);
151 __syncthreads ();
152
153 if( warp == 0)
154 scan.init(ptr);
155 __syncthreads ();
156
157 if( lane == 31 )
158 ptr [scan.warp_offset + warp ] = (Kind == INCLUSIVE) ? val : ptr [idx];
159 __syncthreads ();
160
161 if( warp == 0 )
162 merge_scan(ptr, idx);
163 __syncthreads();
164
165 if ( warp > 0)
166 val = ptr [scan.warp_offset + warp - 1] + val;
167 __syncthreads ();
168
169 ptr[idx] = val;
170 __syncthreads ();
171
172 return val ;
173 }
174
175 static const int warp_log = 5;
176 static const int warp_mask = 31;
177 };
178
179 template <typename T>
180 __device__ T warpScanInclusive(T idata, volatile T* s_Data, unsigned int tid)
181 {
182 #if __CUDA_ARCH__ >= 300
183 const unsigned int laneId = cv::cuda::device::Warp::laneId();
184
185 // scan on shuffl functions
186 #pragma unroll
187 for (int i = 1; i <= (OPENCV_CUDA_WARP_SIZE / 2); i *= 2)
188 {
189 const T n = cv::cuda::device::shfl_up(idata, i);
190 if (laneId >= i)
191 idata += n;
192 }
193
194 return idata;
195 #else
196 unsigned int pos = 2 * tid - (tid & (OPENCV_CUDA_WARP_SIZE - 1));
197 s_Data[pos] = 0;
198 pos += OPENCV_CUDA_WARP_SIZE;
199 s_Data[pos] = idata;
200
201 s_Data[pos] += s_Data[pos - 1];
202 s_Data[pos] += s_Data[pos - 2];
203 s_Data[pos] += s_Data[pos - 4];
204 s_Data[pos] += s_Data[pos - 8];
205 s_Data[pos] += s_Data[pos - 16];
206
207 return s_Data[pos];
208 #endif
209 }
210
211 template <typename T>
212 __device__ __forceinline__ T warpScanExclusive(T idata, volatile T* s_Data, unsigned int tid)
213 {
214 return warpScanInclusive(idata, s_Data, tid) - idata;
215 }
216
217 template <int tiNumScanThreads, typename T>
218 __device__ T blockScanInclusive(T idata, volatile T* s_Data, unsigned int tid)
219 {
220 if (tiNumScanThreads > OPENCV_CUDA_WARP_SIZE)
221 {
222 //Bottom-level inclusive warp scan
223 T warpResult = warpScanInclusive(idata, s_Data, tid);
224
225 //Save top elements of each warp for exclusive warp scan
226 //sync to wait for warp scans to complete (because s_Data is being overwritten)
227 __syncthreads();
228 if ((tid & (OPENCV_CUDA_WARP_SIZE - 1)) == (OPENCV_CUDA_WARP_SIZE - 1))
229 {
230 s_Data[tid >> OPENCV_CUDA_LOG_WARP_SIZE] = warpResult;
231 }
232
233 //wait for warp scans to complete
234 __syncthreads();
235
236 if (tid < (tiNumScanThreads / OPENCV_CUDA_WARP_SIZE) )
237 {
238 //grab top warp elements
239 T val = s_Data[tid];
240 //calculate exclusive scan and write back to shared memory
241 s_Data[tid] = warpScanExclusive(val, s_Data, tid);
242 }
243
244 //return updated warp scans with exclusive scan results
245 __syncthreads();
246
247 return warpResult + s_Data[tid >> OPENCV_CUDA_LOG_WARP_SIZE];
248 }
249 else
250 {
251 return warpScanInclusive(idata, s_Data, tid);
252 }
253 }
254}}}
255
257
258#endif // OPENCV_CUDA_SCAN_HPP
InputArrayOfArrays InputArrayOfArrays InputOutputArray InputOutputArray InputOutputArray InputOutputArray Size InputOutputArray InputOutputArray T
Definition calib3d.hpp:1867
InputArrayOfArrays InputArrayOfArrays InputOutputArray InputOutputArray InputOutputArray InputOutputArray Size InputOutputArray InputOutputArray OutputArray OutputArray F
Definition calib3d.hpp:1867
const int * idx
Definition core_c.h:668
CvMemStoragePos * pos
Definition core_c.h:1573
int index
Definition core_c.h:634
T merge(T... args)
"black box" representation of the file storage associated with a file on disk.
Definition calib3d.hpp:441