43 #ifndef OPENCV_CUDA_EMULATION_HPP_
44 #define OPENCV_CUDA_EMULATION_HPP_
47 #include "warp_reduce.hpp"
55 namespace cv {
namespace cuda {
namespace device
60 static __device__ __forceinline__
int syncthreadsOr(
int pred)
62 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 200)
66 return __syncthreads_or(pred);
70 template<
int CTA_SIZE>
71 static __forceinline__ __device__
int Ballot(
int predicate)
73 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
74 return __ballot(predicate);
76 __shared__
volatile int cta_buffer[CTA_SIZE];
78 int tid = threadIdx.x;
79 cta_buffer[tid] = predicate ? (1 << (tid & 31)) : 0;
80 return warp_reduce(cta_buffer);
86 enum { TAG_MASK = (1U << ( (
sizeof(
unsigned int) << 3) - 5U)) - 1U };
89 static __device__ __forceinline__
T atomicInc(
T* address,
T val)
91 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
93 unsigned int tag = threadIdx.x << ( (
sizeof(
unsigned int) << 3) - 5U);
96 count = *address & TAG_MASK;
99 }
while (*address !=
count);
101 return (
count & TAG_MASK) - 1;
103 return ::atomicInc(address, val);
108 static __device__ __forceinline__
T atomicAdd(
T* address,
T val)
110 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
112 unsigned int tag = threadIdx.x << ( (
sizeof(
unsigned int) << 3) - 5U);
115 count = *address & TAG_MASK;
118 }
while (*address !=
count);
120 return (
count & TAG_MASK) - val;
122 return ::atomicAdd(address, val);
127 static __device__ __forceinline__
T atomicMin(
T* address,
T val)
129 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
134 }
while (*address >
count);
138 return ::atomicMin(address, val);
145 static __device__ __forceinline__
int atomicAdd(
int* address,
int val)
147 return ::atomicAdd(address, val);
149 static __device__ __forceinline__
unsigned int atomicAdd(
unsigned int* address,
unsigned int val)
151 return ::atomicAdd(address, val);
153 static __device__ __forceinline__
float atomicAdd(
float* address,
float val)
155 #if __CUDA_ARCH__ >= 200
156 return ::atomicAdd(address, val);
158 int* address_as_i = (
int*) address;
159 int old = *address_as_i, assumed;
162 old = ::atomicCAS(address_as_i, assumed,
163 __float_as_int(val + __int_as_float(assumed)));
164 }
while (assumed != old);
165 return __int_as_float(old);
168 static __device__ __forceinline__
double atomicAdd(
double* address,
double val)
170 #if __CUDA_ARCH__ >= 130
171 unsigned long long int* address_as_ull = (
unsigned long long int*) address;
172 unsigned long long int old = *address_as_ull, assumed;
175 old = ::atomicCAS(address_as_ull, assumed,
176 __double_as_longlong(val + __longlong_as_double(assumed)));
177 }
while (assumed != old);
178 return __longlong_as_double(old);
186 static __device__ __forceinline__
int atomicMin(
int* address,
int val)
188 return ::atomicMin(address, val);
190 static __device__ __forceinline__
float atomicMin(
float* address,
float val)
192 #if __CUDA_ARCH__ >= 120
193 int* address_as_i = (
int*) address;
194 int old = *address_as_i, assumed;
197 old = ::atomicCAS(address_as_i, assumed,
198 __float_as_int(::fminf(val, __int_as_float(assumed))));
199 }
while (assumed != old);
200 return __int_as_float(old);
207 static __device__ __forceinline__
double atomicMin(
double* address,
double val)
209 #if __CUDA_ARCH__ >= 130
210 unsigned long long int* address_as_ull = (
unsigned long long int*) address;
211 unsigned long long int old = *address_as_ull, assumed;
214 old = ::atomicCAS(address_as_ull, assumed,
215 __double_as_longlong(::
fmin(val, __longlong_as_double(assumed))));
216 }
while (assumed != old);
217 return __longlong_as_double(old);
225 static __device__ __forceinline__
int atomicMax(
int* address,
int val)
227 return ::atomicMax(address, val);
229 static __device__ __forceinline__
float atomicMax(
float* address,
float val)
231 #if __CUDA_ARCH__ >= 120
232 int* address_as_i = (
int*) address;
233 int old = *address_as_i, assumed;
236 old = ::atomicCAS(address_as_i, assumed,
237 __float_as_int(::fmaxf(val, __int_as_float(assumed))));
238 }
while (assumed != old);
239 return __int_as_float(old);
246 static __device__ __forceinline__
double atomicMax(
double* address,
double val)
248 #if __CUDA_ARCH__ >= 130
249 unsigned long long int* address_as_ull = (
unsigned long long int*) address;
250 unsigned long long int old = *address_as_ull, assumed;
253 old = ::atomicCAS(address_as_ull, assumed,
254 __double_as_longlong(::
fmax(val, __longlong_as_double(assumed))));
255 }
while (assumed != old);
256 return __longlong_as_double(old);
InputArrayOfArrays InputArrayOfArrays InputOutputArray InputOutputArray InputOutputArray InputOutputArray Size InputOutputArray InputOutputArray T
Definition: calib3d.hpp:1867
int count
Definition: core_c.h:1413
softfloat min(const softfloat &a, const softfloat &b)
Min and Max functions.
Definition: softfloat.hpp:437
CV_EXPORTS void glob(String pattern, std::vector< String > &result, bool recursive=false)
"black box" representation of the file storage associated with a file on disk.
Definition: calib3d.hpp:441