43 #ifndef OPENCV_CUDA_VEC_DISTANCE_HPP
44 #define OPENCV_CUDA_VEC_DISTANCE_HPP
47 #include "functional.hpp"
48 #include "detail/vec_distance_detail.hpp"
56 namespace cv {
namespace cuda {
namespace device
58 template <
typename T>
struct L1Dist
60 typedef int value_type;
61 typedef int result_type;
63 __device__ __forceinline__ L1Dist() : mySum(0) {}
65 __device__ __forceinline__
void reduceIter(
int val1,
int val2)
67 mySum = __sad(val1, val2, mySum);
70 template <
int THREAD_DIM> __device__ __forceinline__
void reduceAll(
int* smem,
int tid)
72 reduce<THREAD_DIM>(smem, mySum, tid, plus<int>());
75 __device__ __forceinline__
operator int()
const
82 template <>
struct L1Dist<float>
84 typedef float value_type;
85 typedef float result_type;
87 __device__ __forceinline__ L1Dist() : mySum(0.0f) {}
89 __device__ __forceinline__
void reduceIter(
float val1,
float val2)
91 mySum += ::fabs(val1 - val2);
94 template <
int THREAD_DIM> __device__ __forceinline__
void reduceAll(
float* smem,
int tid)
96 reduce<THREAD_DIM>(smem, mySum, tid, plus<float>());
99 __device__ __forceinline__
operator float()
const
109 typedef float value_type;
110 typedef float result_type;
112 __device__ __forceinline__ L2Dist() : mySum(0.0f) {}
114 __device__ __forceinline__
void reduceIter(
float val1,
float val2)
116 float reg = val1 - val2;
120 template <
int THREAD_DIM> __device__ __forceinline__
void reduceAll(
float* smem,
int tid)
122 reduce<THREAD_DIM>(smem, mySum, tid, plus<float>());
125 __device__ __forceinline__
operator float()
const
135 typedef int value_type;
136 typedef int result_type;
138 __device__ __forceinline__ HammingDist() : mySum(0) {}
140 __device__ __forceinline__
void reduceIter(
int val1,
int val2)
142 mySum += __popc(val1 ^ val2);
145 template <
int THREAD_DIM> __device__ __forceinline__
void reduceAll(
int* smem,
int tid)
147 reduce<THREAD_DIM>(smem, mySum, tid, plus<int>());
150 __device__ __forceinline__
operator int()
const
159 template <
int THREAD_DIM,
typename Dist,
typename T1,
typename T2>
160 __device__
void calcVecDiffGlobal(
const T1* vec1,
const T2*
vec2,
int len, Dist& dist,
typename Dist::result_type* smem,
int tid)
162 for (
int i = tid; i < len; i += THREAD_DIM)
165 ForceGlob<T1>::Load(vec1, i, val1);
168 ForceGlob<T2>::Load(
vec2, i, val2);
170 dist.reduceIter(val1, val2);
173 dist.reduceAll<THREAD_DIM>(smem, tid);
177 template <
int THREAD_DIM,
int MAX_LEN,
bool LEN_EQ_MAX_LEN,
typename Dist,
typename T1,
typename T2>
178 __device__ __forceinline__
void calcVecDiffCached(
const T1* vecCached,
const T2* vecGlob,
int len, Dist& dist,
typename Dist::result_type* smem,
int tid)
180 vec_distance_detail::VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>::calc(vecCached, vecGlob, len, dist, tid);
182 dist.reduceAll<THREAD_DIM>(smem, tid);
186 template <
int THREAD_DIM,
typename T1>
struct VecDiffGlobal
188 explicit __device__ __forceinline__ VecDiffGlobal(
const T1* vec1_,
int = 0,
void* = 0,
int = 0,
int = 0)
193 template <
typename T2,
typename Dist>
194 __device__ __forceinline__
void calc(
const T2*
vec2,
int len, Dist& dist,
typename Dist::result_type* smem,
int tid)
const
196 calcVecDiffGlobal<THREAD_DIM>(vec1,
vec2, len, dist, smem, tid);
203 template <
int THREAD_DIM,
int MAX_LEN,
bool LEN_EQ_MAX_LEN,
typename U>
struct VecDiffCachedRegister
205 template <
typename T1> __device__ __forceinline__ VecDiffCachedRegister(
const T1* vec1,
int len,
U* smem,
int glob_tid,
int tid)
208 smem[glob_tid] = vec1[glob_tid];
211 U* vec1ValsPtr = vec1Vals;
214 for (
int i = tid; i < MAX_LEN; i += THREAD_DIM)
215 *vec1ValsPtr++ = smem[i];
220 template <
typename T2,
typename Dist>
221 __device__ __forceinline__
void calc(
const T2*
vec2,
int len, Dist& dist,
typename Dist::result_type* smem,
int tid)
const
223 calcVecDiffCached<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>(vec1Vals,
vec2, len, dist, smem, tid);
226 U vec1Vals[MAX_LEN / THREAD_DIM];
const CvArr * vec2
Definition: core_c.h:1429
const CvArr * U
Definition: core_c.h:1340
"black box" representation of the file storage associated with a file on disk.
Definition: calib3d.hpp:441