EstervQrCode 2.0.0
Library for qr code manipulation
Loading...
Searching...
No Matches
transform_detail.hpp
1/*M///////////////////////////////////////////////////////////////////////////////////////
2//
3// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4//
5// By downloading, copying, installing or using the software you agree to this license.
6// If you do not agree to this license, do not download, install,
7// copy or use the software.
8//
9//
10// License Agreement
11// For Open Source Computer Vision Library
12//
13// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15// Third party copyrights are property of their respective owners.
16//
17// Redistribution and use in source and binary forms, with or without modification,
18// are permitted provided that the following conditions are met:
19//
20// * Redistribution's of source code must retain the above copyright notice,
21// this list of conditions and the following disclaimer.
22//
23// * Redistribution's in binary form must reproduce the above copyright notice,
24// this list of conditions and the following disclaimer in the documentation
25// and/or other materials provided with the distribution.
26//
27// * The name of the copyright holders may not be used to endorse or promote products
28// derived from this software without specific prior written permission.
29//
30// This software is provided by the copyright holders and contributors "as is" and
31// any express or implied warranties, including, but not limited to, the implied
32// warranties of merchantability and fitness for a particular purpose are disclaimed.
33// In no event shall the Intel Corporation or contributors be liable for any direct,
34// indirect, incidental, special, exemplary, or consequential damages
35// (including, but not limited to, procurement of substitute goods or services;
36// loss of use, data, or profits; or business interruption) however caused
37// and on any theory of liability, whether in contract, strict liability,
38// or tort (including negligence or otherwise) arising in any way out of
39// the use of this software, even if advised of the possibility of such damage.
40//
41//M*/
42
43#ifndef OPENCV_CUDA_TRANSFORM_DETAIL_HPP
44#define OPENCV_CUDA_TRANSFORM_DETAIL_HPP
45
46#include "../common.hpp"
47#include "../vec_traits.hpp"
48#include "../functional.hpp"
49
51
52namespace cv { namespace cuda { namespace device
53{
54 namespace transform_detail
55 {
57
58 template <typename T, typename D, int shift> struct UnaryReadWriteTraits
59 {
60 typedef typename TypeVec<T, shift>::vec_type read_type;
61 typedef typename TypeVec<D, shift>::vec_type write_type;
62 };
63
64 template <typename T1, typename T2, typename D, int shift> struct BinaryReadWriteTraits
65 {
66 typedef typename TypeVec<T1, shift>::vec_type read_type1;
67 typedef typename TypeVec<T2, shift>::vec_type read_type2;
68 typedef typename TypeVec<D, shift>::vec_type write_type;
69 };
70
72
73 template <int shift> struct OpUnroller;
74 template <> struct OpUnroller<1>
75 {
76 template <typename T, typename D, typename UnOp, typename Mask>
77 static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
78 {
79 if (mask(y, x_shifted))
80 dst.x = op(src.x);
81 }
82
83 template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
84 static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
85 {
86 if (mask(y, x_shifted))
87 dst.x = op(src1.x, src2.x);
88 }
89 };
90 template <> struct OpUnroller<2>
91 {
92 template <typename T, typename D, typename UnOp, typename Mask>
93 static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
94 {
95 if (mask(y, x_shifted))
96 dst.x = op(src.x);
97 if (mask(y, x_shifted + 1))
98 dst.y = op(src.y);
99 }
100
101 template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
102 static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
103 {
104 if (mask(y, x_shifted))
105 dst.x = op(src1.x, src2.x);
106 if (mask(y, x_shifted + 1))
107 dst.y = op(src1.y, src2.y);
108 }
109 };
110 template <> struct OpUnroller<3>
111 {
112 template <typename T, typename D, typename UnOp, typename Mask>
113 static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
114 {
115 if (mask(y, x_shifted))
116 dst.x = op(src.x);
117 if (mask(y, x_shifted + 1))
118 dst.y = op(src.y);
119 if (mask(y, x_shifted + 2))
120 dst.z = op(src.z);
121 }
122
123 template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
124 static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
125 {
126 if (mask(y, x_shifted))
127 dst.x = op(src1.x, src2.x);
128 if (mask(y, x_shifted + 1))
129 dst.y = op(src1.y, src2.y);
130 if (mask(y, x_shifted + 2))
131 dst.z = op(src1.z, src2.z);
132 }
133 };
134 template <> struct OpUnroller<4>
135 {
136 template <typename T, typename D, typename UnOp, typename Mask>
137 static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
138 {
139 if (mask(y, x_shifted))
140 dst.x = op(src.x);
141 if (mask(y, x_shifted + 1))
142 dst.y = op(src.y);
143 if (mask(y, x_shifted + 2))
144 dst.z = op(src.z);
145 if (mask(y, x_shifted + 3))
146 dst.w = op(src.w);
147 }
148
149 template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
150 static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
151 {
152 if (mask(y, x_shifted))
153 dst.x = op(src1.x, src2.x);
154 if (mask(y, x_shifted + 1))
155 dst.y = op(src1.y, src2.y);
156 if (mask(y, x_shifted + 2))
157 dst.z = op(src1.z, src2.z);
158 if (mask(y, x_shifted + 3))
159 dst.w = op(src1.w, src2.w);
160 }
161 };
162 template <> struct OpUnroller<8>
163 {
164 template <typename T, typename D, typename UnOp, typename Mask>
165 static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
166 {
167 if (mask(y, x_shifted))
168 dst.a0 = op(src.a0);
169 if (mask(y, x_shifted + 1))
170 dst.a1 = op(src.a1);
171 if (mask(y, x_shifted + 2))
172 dst.a2 = op(src.a2);
173 if (mask(y, x_shifted + 3))
174 dst.a3 = op(src.a3);
175 if (mask(y, x_shifted + 4))
176 dst.a4 = op(src.a4);
177 if (mask(y, x_shifted + 5))
178 dst.a5 = op(src.a5);
179 if (mask(y, x_shifted + 6))
180 dst.a6 = op(src.a6);
181 if (mask(y, x_shifted + 7))
182 dst.a7 = op(src.a7);
183 }
184
185 template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
186 static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
187 {
188 if (mask(y, x_shifted))
189 dst.a0 = op(src1.a0, src2.a0);
190 if (mask(y, x_shifted + 1))
191 dst.a1 = op(src1.a1, src2.a1);
192 if (mask(y, x_shifted + 2))
193 dst.a2 = op(src1.a2, src2.a2);
194 if (mask(y, x_shifted + 3))
195 dst.a3 = op(src1.a3, src2.a3);
196 if (mask(y, x_shifted + 4))
197 dst.a4 = op(src1.a4, src2.a4);
198 if (mask(y, x_shifted + 5))
199 dst.a5 = op(src1.a5, src2.a5);
200 if (mask(y, x_shifted + 6))
201 dst.a6 = op(src1.a6, src2.a6);
202 if (mask(y, x_shifted + 7))
203 dst.a7 = op(src1.a7, src2.a7);
204 }
205 };
206
207 template <typename T, typename D, typename UnOp, typename Mask>
208 static __global__ void transformSmart(const PtrStepSz<T> src_, PtrStep<D> dst_, const Mask mask, const UnOp op)
209 {
210 typedef TransformFunctorTraits<UnOp> ft;
211 typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::read_type read_type;
212 typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::write_type write_type;
213
214 const int x = threadIdx.x + blockIdx.x * blockDim.x;
215 const int y = threadIdx.y + blockIdx.y * blockDim.y;
216 const int x_shifted = x * ft::smart_shift;
217
218 if (y < src_.rows)
219 {
220 const T* src = src_.ptr(y);
221 D* dst = dst_.ptr(y);
222
223 if (x_shifted + ft::smart_shift - 1 < src_.cols)
224 {
225 const read_type src_n_el = ((const read_type*)src)[x];
226 OpUnroller<ft::smart_shift>::unroll(src_n_el, ((write_type*)dst)[x], mask, op, x_shifted, y);
227 }
228 else
229 {
230 for (int real_x = x_shifted; real_x < src_.cols; ++real_x)
231 {
232 if (mask(y, real_x))
233 dst[real_x] = op(src[real_x]);
234 }
235 }
236 }
237 }
238
239 template <typename T, typename D, typename UnOp, typename Mask>
240 __global__ static void transformSimple(const PtrStepSz<T> src, PtrStep<D> dst, const Mask mask, const UnOp op)
241 {
242 const int x = blockDim.x * blockIdx.x + threadIdx.x;
243 const int y = blockDim.y * blockIdx.y + threadIdx.y;
244
245 if (x < src.cols && y < src.rows && mask(y, x))
246 {
247 dst.ptr(y)[x] = op(src.ptr(y)[x]);
248 }
249 }
250
251 template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
252 static __global__ void transformSmart(const PtrStepSz<T1> src1_, const PtrStep<T2> src2_, PtrStep<D> dst_,
253 const Mask mask, const BinOp op)
254 {
255 typedef TransformFunctorTraits<BinOp> ft;
256 typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type1 read_type1;
257 typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type2 read_type2;
258 typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::write_type write_type;
259
260 const int x = threadIdx.x + blockIdx.x * blockDim.x;
261 const int y = threadIdx.y + blockIdx.y * blockDim.y;
262 const int x_shifted = x * ft::smart_shift;
263
264 if (y < src1_.rows)
265 {
266 const T1* src1 = src1_.ptr(y);
267 const T2* src2 = src2_.ptr(y);
268 D* dst = dst_.ptr(y);
269
270 if (x_shifted + ft::smart_shift - 1 < src1_.cols)
271 {
272 const read_type1 src1_n_el = ((const read_type1*)src1)[x];
273 const read_type2 src2_n_el = ((const read_type2*)src2)[x];
274
275 OpUnroller<ft::smart_shift>::unroll(src1_n_el, src2_n_el, ((write_type*)dst)[x], mask, op, x_shifted, y);
276 }
277 else
278 {
279 for (int real_x = x_shifted; real_x < src1_.cols; ++real_x)
280 {
281 if (mask(y, real_x))
282 dst[real_x] = op(src1[real_x], src2[real_x]);
283 }
284 }
285 }
286 }
287
288 template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
289 static __global__ void transformSimple(const PtrStepSz<T1> src1, const PtrStep<T2> src2, PtrStep<D> dst,
290 const Mask mask, const BinOp op)
291 {
292 const int x = blockDim.x * blockIdx.x + threadIdx.x;
293 const int y = blockDim.y * blockIdx.y + threadIdx.y;
294
295 if (x < src1.cols && y < src1.rows && mask(y, x))
296 {
297 const T1 src1_data = src1.ptr(y)[x];
298 const T2 src2_data = src2.ptr(y)[x];
299 dst.ptr(y)[x] = op(src1_data, src2_data);
300 }
301 }
302
303 template <bool UseSmart> struct TransformDispatcher;
304 template<> struct TransformDispatcher<false>
305 {
306 template <typename T, typename D, typename UnOp, typename Mask>
307 static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)
308 {
309 typedef TransformFunctorTraits<UnOp> ft;
310
311 const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1);
312 const dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y), 1);
313
314 transformSimple<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
315 cudaSafeCall( cudaGetLastError() );
316
317 if (stream == 0)
318 cudaSafeCall( cudaDeviceSynchronize() );
319 }
320
321 template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
322 static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
323 {
324 typedef TransformFunctorTraits<BinOp> ft;
325
326 const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1);
327 const dim3 grid(divUp(src1.cols, threads.x), divUp(src1.rows, threads.y), 1);
328
329 transformSimple<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
330 cudaSafeCall( cudaGetLastError() );
331
332 if (stream == 0)
333 cudaSafeCall( cudaDeviceSynchronize() );
334 }
335 };
336 template<> struct TransformDispatcher<true>
337 {
338 template <typename T, typename D, typename UnOp, typename Mask>
339 static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)
340 {
341 typedef TransformFunctorTraits<UnOp> ft;
342
343 CV_StaticAssert(ft::smart_shift != 1, "");
344
345 if (!isAligned(src.data, ft::smart_shift * sizeof(T)) || !isAligned(src.step, ft::smart_shift * sizeof(T)) ||
346 !isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
347 {
348 TransformDispatcher<false>::call(src, dst, op, mask, stream);
349 return;
350 }
351
352 const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
353 const dim3 grid(divUp(src.cols, threads.x * ft::smart_shift), divUp(src.rows, threads.y), 1);
354
355 transformSmart<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
356 cudaSafeCall( cudaGetLastError() );
357
358 if (stream == 0)
359 cudaSafeCall( cudaDeviceSynchronize() );
360 }
361
362 template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
363 static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
364 {
365 typedef TransformFunctorTraits<BinOp> ft;
366
367 CV_StaticAssert(ft::smart_shift != 1, "");
368
369 if (!isAligned(src1.data, ft::smart_shift * sizeof(T1)) || !isAligned(src1.step, ft::smart_shift * sizeof(T1)) ||
370 !isAligned(src2.data, ft::smart_shift * sizeof(T2)) || !isAligned(src2.step, ft::smart_shift * sizeof(T2)) ||
371 !isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
372 {
373 TransformDispatcher<false>::call(src1, src2, dst, op, mask, stream);
374 return;
375 }
376
377 const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
378 const dim3 grid(divUp(src1.cols, threads.x * ft::smart_shift), divUp(src1.rows, threads.y), 1);
379
380 transformSmart<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
381 cudaSafeCall( cudaGetLastError() );
382
383 if (stream == 0)
384 cudaSafeCall( cudaDeviceSynchronize() );
385 }
386 };
387 } // namespace transform_detail
388}}} // namespace cv { namespace cuda { namespace cudev
389
391
392#endif // OPENCV_CUDA_TRANSFORM_DETAIL_HPP
InputArrayOfArrays InputArrayOfArrays InputOutputArray InputOutputArray InputOutputArray InputOutputArray Size InputOutputArray InputOutputArray T
Definition calib3d.hpp:1867
CvArr * dst
Definition core_c.h:875
const CvArr const CvArr * src2
Definition core_c.h:994
const CvArr * src1
Definition core_c.h:993
CvArr const CvArr * mask
Definition core_c.h:589
const CvArr CvArr * x
Definition core_c.h:1195
const CvArr * y
Definition core_c.h:1187
"black box" representation of the file storage associated with a file on disk.
Definition calib3d.hpp:441