43 #ifndef OPENCV_CUDA_WARP_SHUFFLE_HPP
44 #define OPENCV_CUDA_WARP_SHUFFLE_HPP
52 namespace cv {
namespace cuda {
namespace device
54 #if __CUDACC_VER_MAJOR__ >= 9
55 # define __shfl(x, y, z) __shfl_sync(0xFFFFFFFFU, x, y, z)
56 # define __shfl_up(x, y, z) __shfl_up_sync(0xFFFFFFFFU, x, y, z)
57 # define __shfl_down(x, y, z) __shfl_down_sync(0xFFFFFFFFU, x, y, z)
60 __device__ __forceinline__
T shfl(
T val,
int srcLane,
int width = warpSize)
62 #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
63 return __shfl(val, srcLane, width);
68 __device__ __forceinline__
unsigned int shfl(
unsigned int val,
int srcLane,
int width = warpSize)
70 #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
71 return (
unsigned int) __shfl((
int) val, srcLane, width);
76 __device__ __forceinline__
double shfl(
double val,
int srcLane,
int width = warpSize)
78 #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
79 int lo = __double2loint(val);
80 int hi = __double2hiint(val);
82 lo = __shfl(lo, srcLane, width);
83 hi = __shfl(hi, srcLane, width);
85 return __hiloint2double(hi, lo);
92 __device__ __forceinline__
T shfl_down(
T val,
unsigned int delta,
int width = warpSize)
94 #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
95 return __shfl_down(val,
delta, width);
100 __device__ __forceinline__
unsigned int shfl_down(
unsigned int val,
unsigned int delta,
int width = warpSize)
102 #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
103 return (
unsigned int) __shfl_down((
int) val,
delta, width);
108 __device__ __forceinline__
double shfl_down(
double val,
unsigned int delta,
int width = warpSize)
110 #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
111 int lo = __double2loint(val);
112 int hi = __double2hiint(val);
114 lo = __shfl_down(lo,
delta, width);
115 hi = __shfl_down(hi,
delta, width);
117 return __hiloint2double(hi, lo);
123 template <
typename T>
124 __device__ __forceinline__
T shfl_up(
T val,
unsigned int delta,
int width = warpSize)
126 #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
127 return __shfl_up(val,
delta, width);
132 __device__ __forceinline__
unsigned int shfl_up(
unsigned int val,
unsigned int delta,
int width = warpSize)
134 #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
135 return (
unsigned int) __shfl_up((
int) val,
delta, width);
140 __device__ __forceinline__
double shfl_up(
double val,
unsigned int delta,
int width = warpSize)
142 #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
143 int lo = __double2loint(val);
144 int hi = __double2hiint(val);
146 lo = __shfl_up(lo,
delta, width);
147 hi = __shfl_up(hi,
delta, width);
149 return __hiloint2double(hi, lo);
InputArrayOfArrays InputArrayOfArrays InputOutputArray InputOutputArray InputOutputArray InputOutputArray Size InputOutputArray InputOutputArray T
Definition: calib3d.hpp:1867
CvSize int int int CvPoint int delta
Definition: imgproc_c.h:1168
"black box" representation of the file storage associated with a file on disk.
Definition: calib3d.hpp:441