43 #ifndef OPENCV_CUDA_DATAMOV_UTILS_HPP
44 #define OPENCV_CUDA_DATAMOV_UTILS_HPP
54 namespace cv {
namespace cuda {
namespace device
56 #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 200
59 template <
typename T>
struct ForceGlob
61 __device__ __forceinline__
static void Load(
const T* ptr,
int offset,
T& val) { val = ptr[
offset]; }
66 #if defined(_WIN64) || defined(__LP64__)
68 #define OPENCV_CUDA_ASM_PTR "l"
71 #define OPENCV_CUDA_ASM_PTR "r"
74 template<
class T>
struct ForceGlob;
76 #define OPENCV_CUDA_DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \
77 template <> struct ForceGlob<base_type> \
79 __device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
81 asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : OPENCV_CUDA_ASM_PTR(ptr + offset)); \
85 #define OPENCV_CUDA_DEFINE_FORCE_GLOB_B(base_type, ptx_type) \
86 template <> struct ForceGlob<base_type> \
88 __device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
90 asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast<uint*>(&val)) : OPENCV_CUDA_ASM_PTR(ptr + offset)); \
94 OPENCV_CUDA_DEFINE_FORCE_GLOB_B(
uchar, u8)
95 OPENCV_CUDA_DEFINE_FORCE_GLOB_B(
schar, s8)
96 OPENCV_CUDA_DEFINE_FORCE_GLOB_B(
char, b8)
97 OPENCV_CUDA_DEFINE_FORCE_GLOB (
ushort, u16, h)
98 OPENCV_CUDA_DEFINE_FORCE_GLOB (
short, s16, h)
99 OPENCV_CUDA_DEFINE_FORCE_GLOB (
uint, u32,
r)
100 OPENCV_CUDA_DEFINE_FORCE_GLOB (
int, s32,
r)
101 OPENCV_CUDA_DEFINE_FORCE_GLOB (
float, f32, f)
102 OPENCV_CUDA_DEFINE_FORCE_GLOB (
double, f64, d)
104 #undef OPENCV_CUDA_DEFINE_FORCE_GLOB
105 #undef OPENCV_CUDA_DEFINE_FORCE_GLOB_B
106 #undef OPENCV_CUDA_ASM_PTR
InputArrayOfArrays InputArrayOfArrays InputOutputArray InputOutputArray InputOutputArray InputOutputArray Size InputOutputArray InputOutputArray T
Definition: calib3d.hpp:1867
signed char schar
Definition: interface.h:48
uint32_t uint
Definition: interface.h:42
unsigned char uchar
Definition: interface.h:51
unsigned short ushort
Definition: interface.h:52
CvRect r
Definition: imgproc_c.h:984
CvArr CvPoint offset
Definition: imgproc_c.h:88
"black box" representation of the file storage associated with a file on disk.
Definition: calib3d.hpp:441