43 #ifndef OPENCV_CUDA_DEVICE_WARP_HPP
44 #define OPENCV_CUDA_DEVICE_WARP_HPP
52 namespace cv {
namespace cuda {
namespace device
59 WARP_SIZE = 1 << LOG_WARP_SIZE,
64 static __device__ __forceinline__
unsigned int laneId()
67 asm(
"mov.u32 %0, %%laneid;" :
"=r"(ret) );
71 template<
typename It,
typename T>
72 static __device__ __forceinline__
void fill(It beg, It
end,
const T&
value)
74 for(It t = beg + laneId(); t <
end; t += STRIDE)
78 template<
typename InIt,
typename OutIt>
79 static __device__ __forceinline__ OutIt
copy(InIt beg, InIt
end, OutIt out)
81 for(InIt t = beg + laneId(); t <
end; t += STRIDE, out += STRIDE)
86 template<
typename InIt,
typename OutIt,
class UnOp>
87 static __device__ __forceinline__ OutIt
transform(InIt beg, InIt
end, OutIt out, UnOp op)
89 for(InIt t = beg + laneId(); t <
end; t += STRIDE, out += STRIDE)
94 template<
typename InIt1,
typename InIt2,
typename OutIt,
class BinOp>
95 static __device__ __forceinline__ OutIt
transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op)
97 unsigned int lane = laneId();
99 InIt1 t1 = beg1 + lane;
100 InIt2 t2 = beg2 + lane;
101 for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, out += STRIDE)
106 template <
class T,
class BinOp>
107 static __device__ __forceinline__
T reduce(
volatile T *ptr, BinOp op)
109 const unsigned int lane = laneId();
113 T partial = ptr[lane];
115 ptr[lane] = partial = op(partial, ptr[lane + 16]);
116 ptr[lane] = partial = op(partial, ptr[lane + 8]);
117 ptr[lane] = partial = op(partial, ptr[lane + 4]);
118 ptr[lane] = partial = op(partial, ptr[lane + 2]);
119 ptr[lane] = partial = op(partial, ptr[lane + 1]);
125 template<
typename OutIt,
typename T>
126 static __device__ __forceinline__
void yota(OutIt beg, OutIt
end,
T value)
128 unsigned int lane = laneId();
131 for(OutIt t = beg + lane; t <
end; t += STRIDE,
value += STRIDE)
InputArrayOfArrays InputArrayOfArrays InputOutputArray InputOutputArray InputOutputArray InputOutputArray Size InputOutputArray InputOutputArray T
Definition: calib3d.hpp:1867
CV_EXPORTS_W void reduce(InputArray src, OutputArray dst, int dim, int rtype, int dtype=-1)
Reduces a matrix to a vector.
CV_EXPORTS_W void transform(InputArray src, OutputArray dst, InputArray m)
Performs the matrix transformation of every array element.
int CvScalar value
Definition: core_c.h:720
double double end
Definition: core_c.h:1381
"black box" representation of the file storage associated with a file on disk.
Definition: calib3d.hpp:441