EstervQrCode 1.1.1
Library for qr code manipulation
transform_detail.hpp
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // License Agreement
11 // For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
22 //
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
26 //
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42 
43 #ifndef OPENCV_CUDA_TRANSFORM_DETAIL_HPP
44 #define OPENCV_CUDA_TRANSFORM_DETAIL_HPP
45 
46 #include "../common.hpp"
47 #include "../vec_traits.hpp"
48 #include "../functional.hpp"
49 
51 
52 namespace cv { namespace cuda { namespace device
53 {
54  namespace transform_detail
55  {
57 
58  template <typename T, typename D, int shift> struct UnaryReadWriteTraits
59  {
60  typedef typename TypeVec<T, shift>::vec_type read_type;
61  typedef typename TypeVec<D, shift>::vec_type write_type;
62  };
63 
64  template <typename T1, typename T2, typename D, int shift> struct BinaryReadWriteTraits
65  {
66  typedef typename TypeVec<T1, shift>::vec_type read_type1;
67  typedef typename TypeVec<T2, shift>::vec_type read_type2;
68  typedef typename TypeVec<D, shift>::vec_type write_type;
69  };
70 
72 
73  template <int shift> struct OpUnroller;
74  template <> struct OpUnroller<1>
75  {
76  template <typename T, typename D, typename UnOp, typename Mask>
77  static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
78  {
79  if (mask(y, x_shifted))
80  dst.x = op(src.x);
81  }
82 
83  template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
84  static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
85  {
86  if (mask(y, x_shifted))
87  dst.x = op(src1.x, src2.x);
88  }
89  };
90  template <> struct OpUnroller<2>
91  {
92  template <typename T, typename D, typename UnOp, typename Mask>
93  static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
94  {
95  if (mask(y, x_shifted))
96  dst.x = op(src.x);
97  if (mask(y, x_shifted + 1))
98  dst.y = op(src.y);
99  }
100 
101  template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
102  static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
103  {
104  if (mask(y, x_shifted))
105  dst.x = op(src1.x, src2.x);
106  if (mask(y, x_shifted + 1))
107  dst.y = op(src1.y, src2.y);
108  }
109  };
110  template <> struct OpUnroller<3>
111  {
112  template <typename T, typename D, typename UnOp, typename Mask>
113  static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
114  {
115  if (mask(y, x_shifted))
116  dst.x = op(src.x);
117  if (mask(y, x_shifted + 1))
118  dst.y = op(src.y);
119  if (mask(y, x_shifted + 2))
120  dst.z = op(src.z);
121  }
122 
123  template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
124  static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
125  {
126  if (mask(y, x_shifted))
127  dst.x = op(src1.x, src2.x);
128  if (mask(y, x_shifted + 1))
129  dst.y = op(src1.y, src2.y);
130  if (mask(y, x_shifted + 2))
131  dst.z = op(src1.z, src2.z);
132  }
133  };
134  template <> struct OpUnroller<4>
135  {
136  template <typename T, typename D, typename UnOp, typename Mask>
137  static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
138  {
139  if (mask(y, x_shifted))
140  dst.x = op(src.x);
141  if (mask(y, x_shifted + 1))
142  dst.y = op(src.y);
143  if (mask(y, x_shifted + 2))
144  dst.z = op(src.z);
145  if (mask(y, x_shifted + 3))
146  dst.w = op(src.w);
147  }
148 
149  template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
150  static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
151  {
152  if (mask(y, x_shifted))
153  dst.x = op(src1.x, src2.x);
154  if (mask(y, x_shifted + 1))
155  dst.y = op(src1.y, src2.y);
156  if (mask(y, x_shifted + 2))
157  dst.z = op(src1.z, src2.z);
158  if (mask(y, x_shifted + 3))
159  dst.w = op(src1.w, src2.w);
160  }
161  };
162  template <> struct OpUnroller<8>
163  {
164  template <typename T, typename D, typename UnOp, typename Mask>
165  static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
166  {
167  if (mask(y, x_shifted))
168  dst.a0 = op(src.a0);
169  if (mask(y, x_shifted + 1))
170  dst.a1 = op(src.a1);
171  if (mask(y, x_shifted + 2))
172  dst.a2 = op(src.a2);
173  if (mask(y, x_shifted + 3))
174  dst.a3 = op(src.a3);
175  if (mask(y, x_shifted + 4))
176  dst.a4 = op(src.a4);
177  if (mask(y, x_shifted + 5))
178  dst.a5 = op(src.a5);
179  if (mask(y, x_shifted + 6))
180  dst.a6 = op(src.a6);
181  if (mask(y, x_shifted + 7))
182  dst.a7 = op(src.a7);
183  }
184 
185  template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
186  static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
187  {
188  if (mask(y, x_shifted))
189  dst.a0 = op(src1.a0, src2.a0);
190  if (mask(y, x_shifted + 1))
191  dst.a1 = op(src1.a1, src2.a1);
192  if (mask(y, x_shifted + 2))
193  dst.a2 = op(src1.a2, src2.a2);
194  if (mask(y, x_shifted + 3))
195  dst.a3 = op(src1.a3, src2.a3);
196  if (mask(y, x_shifted + 4))
197  dst.a4 = op(src1.a4, src2.a4);
198  if (mask(y, x_shifted + 5))
199  dst.a5 = op(src1.a5, src2.a5);
200  if (mask(y, x_shifted + 6))
201  dst.a6 = op(src1.a6, src2.a6);
202  if (mask(y, x_shifted + 7))
203  dst.a7 = op(src1.a7, src2.a7);
204  }
205  };
206 
207  template <typename T, typename D, typename UnOp, typename Mask>
208  static __global__ void transformSmart(const PtrStepSz<T> src_, PtrStep<D> dst_, const Mask mask, const UnOp op)
209  {
210  typedef TransformFunctorTraits<UnOp> ft;
211  typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::read_type read_type;
212  typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::write_type write_type;
213 
214  const int x = threadIdx.x + blockIdx.x * blockDim.x;
215  const int y = threadIdx.y + blockIdx.y * blockDim.y;
216  const int x_shifted = x * ft::smart_shift;
217 
218  if (y < src_.rows)
219  {
220  const T* src = src_.ptr(y);
221  D* dst = dst_.ptr(y);
222 
223  if (x_shifted + ft::smart_shift - 1 < src_.cols)
224  {
225  const read_type src_n_el = ((const read_type*)src)[x];
226  OpUnroller<ft::smart_shift>::unroll(src_n_el, ((write_type*)dst)[x], mask, op, x_shifted, y);
227  }
228  else
229  {
230  for (int real_x = x_shifted; real_x < src_.cols; ++real_x)
231  {
232  if (mask(y, real_x))
233  dst[real_x] = op(src[real_x]);
234  }
235  }
236  }
237  }
238 
239  template <typename T, typename D, typename UnOp, typename Mask>
240  __global__ static void transformSimple(const PtrStepSz<T> src, PtrStep<D> dst, const Mask mask, const UnOp op)
241  {
242  const int x = blockDim.x * blockIdx.x + threadIdx.x;
243  const int y = blockDim.y * blockIdx.y + threadIdx.y;
244 
245  if (x < src.cols && y < src.rows && mask(y, x))
246  {
247  dst.ptr(y)[x] = op(src.ptr(y)[x]);
248  }
249  }
250 
251  template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
252  static __global__ void transformSmart(const PtrStepSz<T1> src1_, const PtrStep<T2> src2_, PtrStep<D> dst_,
253  const Mask mask, const BinOp op)
254  {
255  typedef TransformFunctorTraits<BinOp> ft;
256  typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type1 read_type1;
257  typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type2 read_type2;
258  typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::write_type write_type;
259 
260  const int x = threadIdx.x + blockIdx.x * blockDim.x;
261  const int y = threadIdx.y + blockIdx.y * blockDim.y;
262  const int x_shifted = x * ft::smart_shift;
263 
264  if (y < src1_.rows)
265  {
266  const T1* src1 = src1_.ptr(y);
267  const T2* src2 = src2_.ptr(y);
268  D* dst = dst_.ptr(y);
269 
270  if (x_shifted + ft::smart_shift - 1 < src1_.cols)
271  {
272  const read_type1 src1_n_el = ((const read_type1*)src1)[x];
273  const read_type2 src2_n_el = ((const read_type2*)src2)[x];
274 
275  OpUnroller<ft::smart_shift>::unroll(src1_n_el, src2_n_el, ((write_type*)dst)[x], mask, op, x_shifted, y);
276  }
277  else
278  {
279  for (int real_x = x_shifted; real_x < src1_.cols; ++real_x)
280  {
281  if (mask(y, real_x))
282  dst[real_x] = op(src1[real_x], src2[real_x]);
283  }
284  }
285  }
286  }
287 
288  template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
289  static __global__ void transformSimple(const PtrStepSz<T1> src1, const PtrStep<T2> src2, PtrStep<D> dst,
290  const Mask mask, const BinOp op)
291  {
292  const int x = blockDim.x * blockIdx.x + threadIdx.x;
293  const int y = blockDim.y * blockIdx.y + threadIdx.y;
294 
295  if (x < src1.cols && y < src1.rows && mask(y, x))
296  {
297  const T1 src1_data = src1.ptr(y)[x];
298  const T2 src2_data = src2.ptr(y)[x];
299  dst.ptr(y)[x] = op(src1_data, src2_data);
300  }
301  }
302 
303  template <bool UseSmart> struct TransformDispatcher;
304  template<> struct TransformDispatcher<false>
305  {
306  template <typename T, typename D, typename UnOp, typename Mask>
307  static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)
308  {
309  typedef TransformFunctorTraits<UnOp> ft;
310 
311  const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1);
312  const dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y), 1);
313 
314  transformSimple<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
315  cudaSafeCall( cudaGetLastError() );
316 
317  if (stream == 0)
318  cudaSafeCall( cudaDeviceSynchronize() );
319  }
320 
321  template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
322  static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
323  {
324  typedef TransformFunctorTraits<BinOp> ft;
325 
326  const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1);
327  const dim3 grid(divUp(src1.cols, threads.x), divUp(src1.rows, threads.y), 1);
328 
329  transformSimple<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
330  cudaSafeCall( cudaGetLastError() );
331 
332  if (stream == 0)
333  cudaSafeCall( cudaDeviceSynchronize() );
334  }
335  };
336  template<> struct TransformDispatcher<true>
337  {
338  template <typename T, typename D, typename UnOp, typename Mask>
339  static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)
340  {
341  typedef TransformFunctorTraits<UnOp> ft;
342 
343  CV_StaticAssert(ft::smart_shift != 1, "");
344 
345  if (!isAligned(src.data, ft::smart_shift * sizeof(T)) || !isAligned(src.step, ft::smart_shift * sizeof(T)) ||
346  !isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
347  {
348  TransformDispatcher<false>::call(src, dst, op, mask, stream);
349  return;
350  }
351 
352  const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
353  const dim3 grid(divUp(src.cols, threads.x * ft::smart_shift), divUp(src.rows, threads.y), 1);
354 
355  transformSmart<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
356  cudaSafeCall( cudaGetLastError() );
357 
358  if (stream == 0)
359  cudaSafeCall( cudaDeviceSynchronize() );
360  }
361 
362  template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
363  static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
364  {
365  typedef TransformFunctorTraits<BinOp> ft;
366 
367  CV_StaticAssert(ft::smart_shift != 1, "");
368 
369  if (!isAligned(src1.data, ft::smart_shift * sizeof(T1)) || !isAligned(src1.step, ft::smart_shift * sizeof(T1)) ||
370  !isAligned(src2.data, ft::smart_shift * sizeof(T2)) || !isAligned(src2.step, ft::smart_shift * sizeof(T2)) ||
371  !isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
372  {
373  TransformDispatcher<false>::call(src1, src2, dst, op, mask, stream);
374  return;
375  }
376 
377  const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
378  const dim3 grid(divUp(src1.cols, threads.x * ft::smart_shift), divUp(src1.rows, threads.y), 1);
379 
380  transformSmart<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
381  cudaSafeCall( cudaGetLastError() );
382 
383  if (stream == 0)
384  cudaSafeCall( cudaDeviceSynchronize() );
385  }
386  };
387  } // namespace transform_detail
388 }}} // namespace cv { namespace cuda { namespace cudev
389 
391 
392 #endif // OPENCV_CUDA_TRANSFORM_DETAIL_HPP
size_t step(int i=-1) const
InputArrayOfArrays InputArrayOfArrays InputOutputArray InputOutputArray InputOutputArray InputOutputArray Size InputOutputArray InputOutputArray T
Definition: calib3d.hpp:1867
const CvArr const CvArr * src2
Definition: core_c.h:994
const CvArr * src1
Definition: core_c.h:993
const CvArr CvArr * x
Definition: core_c.h:1195
const CvArr * y
Definition: core_c.h:1187
static int divUp(int a, unsigned int b)
Integer division with result round up.
Definition: utility.hpp:482
static bool isAligned(const T &data)
Alignment check of passed values.
Definition: utility.hpp:517
CV_EXPORTS OutputArray int double double InputArray mask
Definition: imgproc.hpp:2132
OutputArray dst
Definition: imgproc.hpp:3564
"black box" representation of the file storage associated with a file on disk.
Definition: calib3d.hpp:441