8 #ifndef OPENCV_HAL_INTRIN_RVV_SCALABLE_HPP
9 #define OPENCV_HAL_INTRIN_RVV_SCALABLE_HPP
11 #include <opencv2/core/check.hpp>
16 #if defined(__riscv_v_intrinsic) && __riscv_v_intrinsic>10999
17 #include "intrin_rvv_010_compat_non-policy.hpp"
18 #include "intrin_rvv_010_compat_overloaded-non-policy.hpp"
21 #if defined(__riscv_v_intrinsic) && __riscv_v_intrinsic>11999
22 #include "intrin_rvv_011_compat.hpp"
25 #if defined(__GNUC__) && !defined(__clang__)
29 #pragma GCC diagnostic ignored "-Wignored-attributes"
32 #ifndef CV_RVV_MAX_VLEN
33 #define CV_RVV_MAX_VLEN 1024
41 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
43 #define CV_SIMD_SCALABLE 1
44 #define CV_SIMD_SCALABLE_64F 1
46 using v_uint8 = vuint8m1_t;
47 using v_int8 = vint8m1_t;
48 using v_uint16 = vuint16m1_t;
49 using v_int16 = vint16m1_t;
50 using v_uint32 = vuint32m1_t;
51 using v_int32 = vint32m1_t;
52 using v_uint64 = vuint64m1_t;
53 using v_int64 = vint64m1_t;
55 using v_float32 = vfloat32m1_t;
56 #if CV_SIMD_SCALABLE_64F
57 using v_float64 = vfloat64m1_t;
60 using uchar =
unsigned char;
61 using schar =
signed char;
62 using ushort =
unsigned short;
63 using uint =
unsigned int;
64 using uint64 =
unsigned long int;
65 using int64 =
long int;
67 static const int __cv_rvv_e8m1_nlanes = vsetvlmax_e8m1();
68 static const int __cv_rvv_e16m1_nlanes = vsetvlmax_e16m1();
69 static const int __cv_rvv_e32m1_nlanes = vsetvlmax_e32m1();
70 static const int __cv_rvv_e64m1_nlanes = vsetvlmax_e64m1();
71 static const int __cv_rvv_e8m2_nlanes = vsetvlmax_e8m2();
72 static const int __cv_rvv_e16m2_nlanes = vsetvlmax_e16m2();
73 static const int __cv_rvv_e32m2_nlanes = vsetvlmax_e32m2();
74 static const int __cv_rvv_e64m2_nlanes = vsetvlmax_e64m2();
75 static const int __cv_rvv_e8m4_nlanes = vsetvlmax_e8m4();
76 static const int __cv_rvv_e16m4_nlanes = vsetvlmax_e16m4();
77 static const int __cv_rvv_e32m4_nlanes = vsetvlmax_e32m4();
78 static const int __cv_rvv_e64m4_nlanes = vsetvlmax_e64m4();
79 static const int __cv_rvv_e8m8_nlanes = vsetvlmax_e8m8();
80 static const int __cv_rvv_e16m8_nlanes = vsetvlmax_e16m8();
81 static const int __cv_rvv_e32m8_nlanes = vsetvlmax_e32m8();
82 static const int __cv_rvv_e64m8_nlanes = vsetvlmax_e64m8();
87 #define OPENCV_HAL_IMPL_RVV_TRAITS(REG, TYP, SUF, SZ) \
91 static inline int vlanes() { return __cv_rvv_##SUF##_nlanes; } \
92 using lane_type = TYP; \
93 static const int max_nlanes = CV_RVV_MAX_VLEN/SZ; \
96 OPENCV_HAL_IMPL_RVV_TRAITS(vint8m1_t, int8_t, e8m1, 8)
97 OPENCV_HAL_IMPL_RVV_TRAITS(vint8m2_t, int8_t, e8m2, 8)
98 OPENCV_HAL_IMPL_RVV_TRAITS(vint8m4_t, int8_t, e8m4, 8)
99 OPENCV_HAL_IMPL_RVV_TRAITS(vint8m8_t, int8_t, e8m8, 8)
100 OPENCV_HAL_IMPL_RVV_TRAITS(vuint8m1_t, uint8_t, e8m1, 8)
101 OPENCV_HAL_IMPL_RVV_TRAITS(vuint8m2_t, uint8_t, e8m2, 8)
102 OPENCV_HAL_IMPL_RVV_TRAITS(vuint8m4_t, uint8_t, e8m4, 8)
103 OPENCV_HAL_IMPL_RVV_TRAITS(vuint8m8_t, uint8_t, e8m8, 8)
105 OPENCV_HAL_IMPL_RVV_TRAITS(vint16m1_t, int16_t, e16m1, 16)
106 OPENCV_HAL_IMPL_RVV_TRAITS(vint16m2_t, int16_t, e16m2, 16)
107 OPENCV_HAL_IMPL_RVV_TRAITS(vint16m4_t, int16_t, e16m4, 16)
108 OPENCV_HAL_IMPL_RVV_TRAITS(vint16m8_t, int16_t, e16m8, 16)
109 OPENCV_HAL_IMPL_RVV_TRAITS(vuint16m1_t, uint16_t, e16m1, 16)
110 OPENCV_HAL_IMPL_RVV_TRAITS(vuint16m2_t, uint16_t, e16m2, 16)
111 OPENCV_HAL_IMPL_RVV_TRAITS(vuint16m4_t, uint16_t, e16m4, 16)
112 OPENCV_HAL_IMPL_RVV_TRAITS(vuint16m8_t, uint16_t, e16m8, 16)
114 OPENCV_HAL_IMPL_RVV_TRAITS(vint32m1_t, int32_t, e32m1, 32)
115 OPENCV_HAL_IMPL_RVV_TRAITS(vint32m2_t, int32_t, e32m2, 32)
116 OPENCV_HAL_IMPL_RVV_TRAITS(vint32m4_t, int32_t, e32m4, 32)
117 OPENCV_HAL_IMPL_RVV_TRAITS(vint32m8_t, int32_t, e32m8, 32)
118 OPENCV_HAL_IMPL_RVV_TRAITS(vuint32m1_t, uint32_t, e32m1, 32)
119 OPENCV_HAL_IMPL_RVV_TRAITS(vuint32m2_t, uint32_t, e32m2, 32)
120 OPENCV_HAL_IMPL_RVV_TRAITS(vuint32m4_t, uint32_t, e32m4, 32)
121 OPENCV_HAL_IMPL_RVV_TRAITS(vuint32m8_t, uint32_t, e32m8, 32)
123 OPENCV_HAL_IMPL_RVV_TRAITS(vint64m1_t, int64_t, e64m1, 64)
124 OPENCV_HAL_IMPL_RVV_TRAITS(vint64m2_t, int64_t, e64m2, 64)
125 OPENCV_HAL_IMPL_RVV_TRAITS(vint64m4_t, int64_t, e64m4, 64)
126 OPENCV_HAL_IMPL_RVV_TRAITS(vint64m8_t, int64_t, e64m8, 64)
127 OPENCV_HAL_IMPL_RVV_TRAITS(vuint64m1_t, uint64_t, e64m1, 64)
128 OPENCV_HAL_IMPL_RVV_TRAITS(vuint64m2_t, uint64_t, e64m2, 64)
129 OPENCV_HAL_IMPL_RVV_TRAITS(vuint64m4_t, uint64_t, e64m4, 64)
130 OPENCV_HAL_IMPL_RVV_TRAITS(vuint64m8_t, uint64_t, e64m8, 64)
132 OPENCV_HAL_IMPL_RVV_TRAITS(vfloat32m1_t,
float, e32m1, 32)
133 OPENCV_HAL_IMPL_RVV_TRAITS(vfloat32m2_t,
float, e32m2, 32)
134 OPENCV_HAL_IMPL_RVV_TRAITS(vfloat32m4_t,
float, e32m4, 32)
135 OPENCV_HAL_IMPL_RVV_TRAITS(vfloat32m8_t,
float, e32m8, 32)
137 #if CV_SIMD_SCALABLE_64F
138 OPENCV_HAL_IMPL_RVV_TRAITS(vfloat64m1_t,
double, e64m1, 64)
139 OPENCV_HAL_IMPL_RVV_TRAITS(vfloat64m2_t,
double, e64m2, 64)
140 OPENCV_HAL_IMPL_RVV_TRAITS(vfloat64m4_t,
double, e64m4, 64)
141 OPENCV_HAL_IMPL_RVV_TRAITS(vfloat64m8_t,
double, e64m8, 64)
149 #ifndef __riscv_v_intrinsic_overloading
150 #include "intrin_rvv_compat_overloaded.hpp"
155 #define OPENCV_HAL_IMPL_RVV_GRT0_INT(_Tpvec, _Tp) \
156 inline _Tp v_get0(const v_##_Tpvec& v) \
161 OPENCV_HAL_IMPL_RVV_GRT0_INT(uint8,
uchar)
162 OPENCV_HAL_IMPL_RVV_GRT0_INT(int8,
schar)
163 OPENCV_HAL_IMPL_RVV_GRT0_INT(uint16,
ushort)
164 OPENCV_HAL_IMPL_RVV_GRT0_INT(int16,
short)
165 OPENCV_HAL_IMPL_RVV_GRT0_INT(uint32,
unsigned)
166 OPENCV_HAL_IMPL_RVV_GRT0_INT(int32,
int)
170 inline float v_get0(
const v_float32& v) \
174 #if CV_SIMD_SCALABLE_64F
175 inline double v_get0(
const v_float64& v) \
183 #define OPENCV_HAL_IMPL_RVV_INIT_INTEGER(_Tpvec, _Tp, suffix1, suffix2, vl) \
184 inline v_##_Tpvec v_setzero_##suffix1() \
186 return vmv_v_x_##suffix2##m1(0, vl); \
188 inline v_##_Tpvec v_setall_##suffix1(_Tp v) \
190 return vmv_v_x_##suffix2##m1(v, vl); \
193 OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint8,
uchar, u8, u8, VTraits<v_uint8>::vlanes())
194 OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int8,
schar, s8, i8, VTraits<v_int8>::vlanes())
195 OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint16,
ushort, u16, u16, VTraits<v_uint16>::vlanes())
196 OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int16,
short, s16, i16, VTraits<v_int16>::vlanes())
197 OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint32,
uint, u32, u32, VTraits<v_uint32>::vlanes())
198 OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int32,
int, s32, i32, VTraits<v_int32>::vlanes())
199 OPENCV_HAL_IMPL_RVV_INIT_INTEGER(
uint64,
uint64, u64, u64, VTraits<v_uint64>::vlanes())
200 OPENCV_HAL_IMPL_RVV_INIT_INTEGER(
int64,
int64, s64, i64, VTraits<v_int64>::vlanes())
202 #define OPENCV_HAL_IMPL_RVV_INIT_FP(_Tpv, _Tp, suffix, vl) \
203 inline v_##_Tpv v_setzero_##suffix() \
205 return vfmv_v_f_##suffix##m1(0, vl); \
207 inline v_##_Tpv v_setall_##suffix(_Tp v) \
209 return vfmv_v_f_##suffix##m1(v, vl); \
212 OPENCV_HAL_IMPL_RVV_INIT_FP(float32,
float, f32, VTraits<v_float32>::vlanes())
213 #if CV_SIMD_SCALABLE_64F
214 OPENCV_HAL_IMPL_RVV_INIT_FP(float64,
double, f64, VTraits<v_float64>::vlanes())
218 #define OPENCV_HAL_IMPL_RVV_NOTHING_REINTERPRET(_Tpvec1, suffix1) \
219 inline v_##_Tpvec1 v_reinterpret_as_##suffix1(const v_##_Tpvec1& v) \
223 OPENCV_HAL_IMPL_RVV_NOTHING_REINTERPRET(uint8, u8)
224 OPENCV_HAL_IMPL_RVV_NOTHING_REINTERPRET(uint16, u16)
225 OPENCV_HAL_IMPL_RVV_NOTHING_REINTERPRET(uint32, u32)
226 OPENCV_HAL_IMPL_RVV_NOTHING_REINTERPRET(
uint64, u64)
227 OPENCV_HAL_IMPL_RVV_NOTHING_REINTERPRET(int8, s8)
228 OPENCV_HAL_IMPL_RVV_NOTHING_REINTERPRET(int16, s16)
229 OPENCV_HAL_IMPL_RVV_NOTHING_REINTERPRET(int32, s32)
230 OPENCV_HAL_IMPL_RVV_NOTHING_REINTERPRET(
int64, s64)
231 OPENCV_HAL_IMPL_RVV_NOTHING_REINTERPRET(float32, f32)
232 #if CV_SIMD_SCALABLE_64F
233 OPENCV_HAL_IMPL_RVV_NOTHING_REINTERPRET(float64, f64)
236 #define OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(_Tpvec1, _Tpvec2, suffix1, suffix2, nsuffix1, nsuffix2) \
237 inline v_##_Tpvec1 v_reinterpret_as_##suffix1(const v_##_Tpvec2& v) \
239 return v_##_Tpvec1(vreinterpret_v_##nsuffix2##m1_##nsuffix1##m1(v));\
241 inline v_##_Tpvec2 v_reinterpret_as_##suffix2(const v_##_Tpvec1& v) \
243 return v_##_Tpvec2(vreinterpret_v_##nsuffix1##m1_##nsuffix2##m1(v));\
246 OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint8, int8, u8, s8, u8, i8)
247 OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint16, int16, u16, s16, u16, i16)
248 OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint32, int32, u32, s32, u32, i32)
249 OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint32, float32, u32, f32, u32, f32)
250 OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(int32, float32, s32, f32, i32, f32)
251 OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(
uint64,
int64, u64, s64, u64, i64)
252 #if CV_SIMD_SCALABLE_64F
253 OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(
uint64, float64, u64, f64, u64, f64)
254 OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(
int64, float64, s64, f64, i64, f64)
256 OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint8, uint16, u8, u16, u8, u16)
257 OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint8, uint32, u8, u32, u8, u32)
258 OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint8,
uint64, u8, u64, u8, u64)
259 OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint16, uint32, u16, u32, u16, u32)
260 OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint16,
uint64, u16, u64, u16, u64)
261 OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint32,
uint64, u32, u64, u32, u64)
262 OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(int8, int16, s8, s16, i8, i16)
263 OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(int8, int32, s8, s32, i8, i32)
264 OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(int8,
int64, s8, s64, i8, i64)
265 OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(int16, int32, s16, s32, i16, i32)
266 OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(int16,
int64, s16, s64, i16, i64)
267 OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(int32,
int64, s32, s64, i32, i64)
270 #define OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(_Tpvec1, _Tpvec2, suffix1, suffix2, nsuffix1, nsuffix2, width1, width2) \
271 inline v_##_Tpvec1 v_reinterpret_as_##suffix1(const v_##_Tpvec2& v) \
273 return vreinterpret_v_##nsuffix1##width2##m1_##nsuffix1##width1##m1(vreinterpret_v_##nsuffix2##width2##m1_##nsuffix1##width2##m1(v));\
275 inline v_##_Tpvec2 v_reinterpret_as_##suffix2(const v_##_Tpvec1& v) \
277 return vreinterpret_v_##nsuffix1##width2##m1_##nsuffix2##width2##m1(vreinterpret_v_##nsuffix1##width1##m1_##nsuffix1##width2##m1(v));\
280 OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint8, int16, u8, s16, u, i, 8, 16)
281 OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint8, int32, u8, s32, u, i, 8, 32)
282 OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint8,
int64, u8, s64, u, i, 8, 64)
283 OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint16, int8, u16, s8, u, i, 16, 8)
284 OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint16, int32, u16, s32, u, i, 16, 32)
285 OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint16,
int64, u16, s64, u, i, 16, 64)
286 OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint32, int8, u32, s8, u, i, 32, 8)
287 OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint32, int16, u32, s16, u, i, 32, 16)
288 OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint32,
int64, u32, s64, u, i, 32, 64)
289 OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(
uint64, int8, u64, s8, u, i, 64, 8)
290 OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(
uint64, int16, u64, s16, u, i, 64, 16)
291 OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(
uint64, int32, u64, s32, u, i, 64, 32)
292 OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint8, float32, u8, f32, u, f, 8, 32)
293 OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint16, float32, u16, f32, u, f, 16, 32)
294 OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(
uint64, float32, u64, f32, u, f, 64, 32)
295 OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(int8, float32, s8, f32, i, f, 8, 32)
296 OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(int16, float32, s16, f32, i, f, 16, 32)
297 OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(
int64, float32, s64, f32, i, f, 64, 32)
298 #if CV_SIMD_SCALABLE_64F
299 OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint8, float64, u8, f64, u, f, 8, 64)
300 OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint16, float64, u16, f64, u, f, 16, 64)
301 OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint32, float64, u32, f64, u, f, 32, 64)
302 OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(int8, float64, s8, f64, i, f, 8, 64)
303 OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(int16, float64, s16, f64, i, f, 16, 64)
304 OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(int32, float64, s32, f64, i, f, 32, 64)
306 inline v_float32 v_reinterpret_as_f32(const v_float64& v) \
308 return vreinterpret_v_u32m1_f32m1(vreinterpret_v_u64m1_u32m1(vreinterpret_v_f64m1_u64m1(v)));\
311 inline v_float64 v_reinterpret_as_f64(
const v_float32& v) \
313 return vreinterpret_v_u64m1_f64m1(vreinterpret_v_u32m1_u64m1(vreinterpret_v_f32m1_u32m1(v)));\
319 #define OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(_Tpvec, _Tp, suffix, vl) \
320 template <int s = 0> \
321 inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b, int i = s) \
323 return vslideup(vslidedown(v_setzero_##suffix(), a, i, vl), b, VTraits<_Tpvec>::vlanes() - i, vl); \
325 template<int s = 0> inline _Tp v_extract_n(_Tpvec v, int i = s) \
327 return vmv_x(vslidedown(v_setzero_##suffix(), v, i, vl)); \
331 OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_uint8,
uchar, u8, VTraits<v_uint8>::vlanes())
332 OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_int8,
schar, s8, VTraits<v_int8>::vlanes())
333 OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_uint16,
ushort, u16, VTraits<v_uint16>::vlanes())
334 OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_int16,
short, s16, VTraits<v_int16>::vlanes())
335 OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_uint32,
unsigned int, u32, VTraits<v_uint32>::vlanes())
336 OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_int32,
int, s32, VTraits<v_int32>::vlanes())
337 OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_uint64,
uint64, u64, VTraits<v_uint64>::vlanes())
338 OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_int64,
int64, s64, VTraits<v_int64>::vlanes())
340 #define OPENCV_HAL_IMPL_RVV_EXTRACT_FP(_Tpvec, _Tp, suffix, vl) \
341 template <int s = 0> \
342 inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b, int i = s) \
344 return vslideup(vslidedown(v_setzero_##suffix(), a, i, vl), b, VTraits<_Tpvec>::vlanes() - i, vl); \
346 template<int s = 0> inline _Tp v_extract_n(_Tpvec v, int i = s) \
348 return vfmv_f(vslidedown(v_setzero_##suffix(), v, i, vl)); \
351 OPENCV_HAL_IMPL_RVV_EXTRACT_FP(v_float32,
float, f32, VTraits<v_float32>::vlanes())
352 #if CV_SIMD_SCALABLE_64F
353 OPENCV_HAL_IMPL_RVV_EXTRACT_FP(v_float64,
double, f64, VTraits<v_float64>::vlanes())
356 #define OPENCV_HAL_IMPL_RVV_EXTRACT(_Tpvec, _Tp, vl) \
357 inline _Tp v_extract_highest(_Tpvec v) \
359 return v_extract_n(v, vl-1); \
362 OPENCV_HAL_IMPL_RVV_EXTRACT(v_uint8,
uchar, VTraits<v_uint8>::vlanes())
363 OPENCV_HAL_IMPL_RVV_EXTRACT(v_int8,
schar, VTraits<v_int8>::vlanes())
364 OPENCV_HAL_IMPL_RVV_EXTRACT(v_uint16,
ushort, VTraits<v_uint16>::vlanes())
365 OPENCV_HAL_IMPL_RVV_EXTRACT(v_int16,
short, VTraits<v_int16>::vlanes())
366 OPENCV_HAL_IMPL_RVV_EXTRACT(v_uint32,
unsigned int, VTraits<v_uint32>::vlanes())
367 OPENCV_HAL_IMPL_RVV_EXTRACT(v_int32,
int, VTraits<v_int32>::vlanes())
368 OPENCV_HAL_IMPL_RVV_EXTRACT(v_uint64,
uint64, VTraits<v_uint64>::vlanes())
369 OPENCV_HAL_IMPL_RVV_EXTRACT(v_int64,
int64, VTraits<v_int64>::vlanes())
370 OPENCV_HAL_IMPL_RVV_EXTRACT(v_float32,
float, VTraits<v_float32>::vlanes())
371 #if CV_SIMD_SCALABLE_64F
372 OPENCV_HAL_IMPL_RVV_EXTRACT(v_float64,
double, VTraits<v_float64>::vlanes())
377 #define OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(_Tpvec, _nTpvec, _Tp, hvl, vl, width, suffix, vmv) \
378 inline _Tpvec v_load(const _Tp* ptr) \
380 return vle##width##_v_##suffix##m1(ptr, vl); \
382 inline _Tpvec v_load_aligned(const _Tp* ptr) \
384 return vle##width##_v_##suffix##m1(ptr, vl); \
386 inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode ) \
388 vse##width##_v_##suffix##m1(ptr, a, vl); \
390 inline _Tpvec v_load_low(const _Tp* ptr) \
392 return vle##width##_v_##suffix##m1(ptr, hvl); \
394 inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \
396 return vslideup(vle##width##_v_##suffix##m1(ptr0, hvl), vle##width##_v_##suffix##m1(ptr1, hvl), hvl, vl); \
398 inline void v_store(_Tp* ptr, const _Tpvec& a) \
400 vse##width(ptr, a, vl); \
402 inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
404 vse##width(ptr, a, vl); \
406 inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \
408 vse##width(ptr, a, vl); \
410 inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
412 vse##width(ptr, a, hvl); \
414 inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
416 vse##width(ptr, vslidedown_vx_##suffix##m1(vmv(0, vl), a, hvl, vl), hvl); \
418 template<typename... Targs> \
419 _Tpvec v_load_##suffix(Targs... nScalars) \
421 return v_load({nScalars...}); \
425 OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_uint8, vuint8m1_t,
uchar, VTraits<v_uint8>::vlanes() / 2, VTraits<v_uint8>::vlanes(), 8, u8, vmv_v_x_u8m1)
426 OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_int8, vint8m1_t,
schar, VTraits<v_int8>::vlanes() / 2, VTraits<v_int8>::vlanes(), 8, i8, vmv_v_x_i8m1)
427 OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_uint16, vuint16m1_t,
ushort, VTraits<v_uint16>::vlanes() / 2, VTraits<v_uint16>::vlanes(), 16, u16, vmv_v_x_u16m1)
428 OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_int16, vint16m1_t,
short, VTraits<v_int16>::vlanes() / 2, VTraits<v_int16>::vlanes(), 16, i16, vmv_v_x_i16m1)
429 OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_uint32, vuint32m1_t,
unsigned int, VTraits<v_uint32>::vlanes() / 2, VTraits<v_uint32>::vlanes(), 32, u32, vmv_v_x_u32m1)
430 OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_int32, vint32m1_t,
int, VTraits<v_int32>::vlanes() / 2, VTraits<v_int32>::vlanes(), 32, i32, vmv_v_x_i32m1)
431 OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_uint64, vuint64m1_t,
uint64, VTraits<v_uint64>::vlanes() / 2, VTraits<v_uint64>::vlanes(), 64, u64, vmv_v_x_u64m1)
432 OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_int64, vint64m1_t,
int64, VTraits<v_int64>::vlanes() / 2, VTraits<v_int64>::vlanes(), 64, i64, vmv_v_x_i64m1)
433 OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_float32, vfloat32m1_t,
float, VTraits<v_float32>::vlanes() /2 , VTraits<v_float32>::vlanes(), 32, f32, vfmv_v_f_f32m1)
435 #if CV_SIMD_SCALABLE_64F
436 OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_float64, vfloat64m1_t,
double, VTraits<v_float64>::vlanes() / 2, VTraits<v_float64>::vlanes(), 64, f64, vfmv_v_f_f64m1)
440 #define OPENCV_HAL_IMPL_RVV_LUT(_Tpvec, _Tp, suffix) \
441 inline _Tpvec v_lut(const _Tp* tab, const int* idx) \
443 auto vidx = vmul(vreinterpret_u32##suffix(vle32_v_i32##suffix(idx, VTraits<_Tpvec>::vlanes())), sizeof(_Tp), VTraits<_Tpvec>::vlanes()); \
444 return vloxei32(tab, vidx, VTraits<_Tpvec>::vlanes()); \
446 OPENCV_HAL_IMPL_RVV_LUT(v_int8,
schar, m4)
447 OPENCV_HAL_IMPL_RVV_LUT(v_int16,
short, m2)
448 OPENCV_HAL_IMPL_RVV_LUT(v_int32,
int, m1)
449 OPENCV_HAL_IMPL_RVV_LUT(v_int64, int64_t, mf2)
450 OPENCV_HAL_IMPL_RVV_LUT(v_float32,
float, m1)
451 #if CV_SIMD_SCALABLE_64F
452 OPENCV_HAL_IMPL_RVV_LUT(v_float64,
double, mf2)
455 #define OPENCV_HAL_IMPL_RVV_LUT_PAIRS(_Tpvec, _Tp, suffix1, suffix2, v_trunc) \
456 inline _Tpvec v_lut_pairs(const _Tp* tab, const int* idx) \
458 auto v0 = vle32_v_u32##suffix1((unsigned*)idx, VTraits<_Tpvec>::vlanes()/2); \
459 auto v1 = vadd(v0, 1, VTraits<_Tpvec>::vlanes()/2); \
460 auto w0 = vwcvtu_x(v0, VTraits<_Tpvec>::vlanes()/2); \
461 auto w1 = vwcvtu_x(v1, VTraits<_Tpvec>::vlanes()/2); \
462 auto sh1 = vslide1up(v_trunc(vreinterpret_u32##suffix2(w1)),0, VTraits<_Tpvec>::vlanes()); \
463 auto vid = vor(sh1, v_trunc(vreinterpret_u32##suffix2(w0)), VTraits<_Tpvec>::vlanes()); \
464 auto vidx = vmul(vid, sizeof(_Tp), VTraits<_Tpvec>::vlanes()); \
465 return vloxei32(tab, vidx, VTraits<_Tpvec>::vlanes()); \
467 OPENCV_HAL_IMPL_RVV_LUT_PAIRS(v_int8,
schar, m2, m4, OPENCV_HAL_NOP)
468 OPENCV_HAL_IMPL_RVV_LUT_PAIRS(v_int16,
short, m1, m2, OPENCV_HAL_NOP)
469 OPENCV_HAL_IMPL_RVV_LUT_PAIRS(v_int32,
int, mf2, m1, OPENCV_HAL_NOP)
470 OPENCV_HAL_IMPL_RVV_LUT_PAIRS(v_float32,
float, mf2, m1, OPENCV_HAL_NOP)
471 OPENCV_HAL_IMPL_RVV_LUT_PAIRS(v_int64, int64_t, mf2, m1, vlmul_trunc_u32mf2)
472 #if CV_SIMD_SCALABLE_64F
473 OPENCV_HAL_IMPL_RVV_LUT_PAIRS(v_float64,
double, mf2, m1, vlmul_trunc_u32mf2)
477 #define OPENCV_HAL_IMPL_RVV_LUT_QUADS(_Tpvec, _Tp, suffix0, suffix1, suffix2, v_trunc) \
478 inline _Tpvec v_lut_quads(const _Tp* tab, const int* idx) \
480 auto v0 = vle32_v_u32##suffix0((unsigned*)idx, VTraits<_Tpvec>::vlanes()/4); \
481 auto v1 = vadd(v0, 1, VTraits<_Tpvec>::vlanes()/4); \
482 auto v2 = vadd(v0, 2, VTraits<_Tpvec>::vlanes()/4); \
483 auto v3 = vadd(v0, 3, VTraits<_Tpvec>::vlanes()/4); \
484 auto w0 = vwcvtu_x(v0, VTraits<_Tpvec>::vlanes()/4); \
485 auto w1 = vwcvtu_x(v1, VTraits<_Tpvec>::vlanes()/4); \
486 auto w2 = vwcvtu_x(v2, VTraits<_Tpvec>::vlanes()/4); \
487 auto w3 = vwcvtu_x(v3, VTraits<_Tpvec>::vlanes()/4); \
488 auto sh2 = vslide1up(vreinterpret_u32##suffix1(w2),0, VTraits<_Tpvec>::vlanes()/2); \
489 auto sh3 = vslide1up(vreinterpret_u32##suffix1(w3),0, VTraits<_Tpvec>::vlanes()/2); \
490 auto vid0 = vor(sh2, vreinterpret_u32##suffix1(w0), VTraits<_Tpvec>::vlanes()/2); \
491 auto vid1 = vor(sh3, vreinterpret_u32##suffix1(w1), VTraits<_Tpvec>::vlanes()/2); \
492 auto wid0 = vwcvtu_x(v_trunc(vid0), VTraits<_Tpvec>::vlanes()/2); \
493 auto wid1 = vwcvtu_x(v_trunc(vid1), VTraits<_Tpvec>::vlanes()/2); \
494 auto shwid1 = vslide1up(vreinterpret_u32##suffix2(wid1),0, VTraits<_Tpvec>::vlanes()); \
495 auto vid = vor(shwid1, vreinterpret_u32##suffix2(wid0), VTraits<_Tpvec>::vlanes()); \
496 auto vidx = vmul(vid, sizeof(_Tp), VTraits<_Tpvec>::vlanes()); \
497 return vloxei32(tab, vidx, VTraits<_Tpvec>::vlanes()); \
499 OPENCV_HAL_IMPL_RVV_LUT_QUADS(v_int8,
schar, m1, m2, m4, OPENCV_HAL_NOP)
500 OPENCV_HAL_IMPL_RVV_LUT_QUADS(v_int16,
short, mf2 , m1, m2, OPENCV_HAL_NOP)
501 OPENCV_HAL_IMPL_RVV_LUT_QUADS(v_int32,
int, mf2, m1, m1, vlmul_trunc_u32mf2)
502 OPENCV_HAL_IMPL_RVV_LUT_QUADS(v_float32,
float, mf2, m1, m1, vlmul_trunc_u32mf2)
504 #define OPENCV_HAL_IMPL_RVV_LUT_VEC(_Tpvec, _Tp) \
505 inline _Tpvec v_lut(const _Tp* tab, const v_int32& vidx) \
507 v_uint32 vidx_ = vmul(vreinterpret_u32m1(vidx), sizeof(_Tp), VTraits<v_int32>::vlanes()); \
508 return vloxei32(tab, vidx_, VTraits<_Tpvec>::vlanes()); \
510 OPENCV_HAL_IMPL_RVV_LUT_VEC(v_float32,
float)
511 OPENCV_HAL_IMPL_RVV_LUT_VEC(v_int32,
int)
512 OPENCV_HAL_IMPL_RVV_LUT_VEC(v_uint32,
unsigned)
514 #if CV_SIMD_SCALABLE_64F
515 inline v_float64
v_lut(
const double* tab,
const v_int32& vidx) \
517 vuint32mf2_t vidx_ = vmul(vlmul_trunc_u32mf2(vreinterpret_u32m1(vidx)),
sizeof(
double), VTraits<v_float64>::vlanes()); \
518 return vloxei32(tab, vidx_, VTraits<v_float64>::vlanes()); \
526 inline v_uint16
v_lut(
const ushort* tab,
const int*
idx) {
return v_reinterpret_as_u16(
v_lut((
short*)tab,
idx)); }
529 inline v_uint32
v_lut(
const unsigned* tab,
const int*
idx) {
return v_reinterpret_as_u32(
v_lut((
int*)tab,
idx)); }
532 inline v_uint64
v_lut(
const uint64* tab,
const int*
idx) {
return v_reinterpret_as_u64(
v_lut((
const int64_t *)tab,
idx)); }
536 inline v_uint8
v_pack_b(
const v_uint16& a,
const v_uint16& b)
538 return vnsrl(vset(vlmul_ext_v_u16m1_u16m2(a),1,b), 0, VTraits<v_uint8>::vlanes());
541 inline v_uint8
v_pack_b(
const v_uint32& a,
const v_uint32& b,
542 const v_uint32& c,
const v_uint32& d)
545 return vnsrl(vnsrl(vset(vset(vset(vlmul_ext_u32m4(a),1,b),2,c),3,d), 0, VTraits<v_uint8>::vlanes()), 0, VTraits<v_uint8>::vlanes());
548 inline v_uint8
v_pack_b(
const v_uint64& a,
const v_uint64& b,
const v_uint64& c,
549 const v_uint64& d,
const v_uint64& e,
const v_uint64& f,
550 const v_uint64& g,
const v_uint64& h)
552 return vnsrl(vnsrl(vnsrl(
553 vset(vset(vset(vset(vset(vset(vset(vlmul_ext_u64m8(a),
554 1,b),2,c),3,d),4,e),5,f),6,g),7,h),
555 0, VTraits<v_uint8>::vlanes()), 0, VTraits<v_uint8>::vlanes()), 0, VTraits<v_uint8>::vlanes());
559 #define OPENCV_HAL_IMPL_RVV_BIN_OP(_Tpvec, ocv_intrin, rvv_intrin) \
560 inline _Tpvec v_##ocv_intrin(const _Tpvec& a, const _Tpvec& b) \
562 return rvv_intrin(a, b, VTraits<_Tpvec>::vlanes()); \
565 OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint8,
add, vsaddu)
566 OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint8, sub, vssubu)
567 OPENCV_HAL_IMPL_RVV_BIN_OP(v_int8,
add, vsadd)
568 OPENCV_HAL_IMPL_RVV_BIN_OP(v_int8, sub, vssub)
569 OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint16,
add, vsaddu)
570 OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint16, sub, vssubu)
571 OPENCV_HAL_IMPL_RVV_BIN_OP(v_int16,
add, vsadd)
572 OPENCV_HAL_IMPL_RVV_BIN_OP(v_int16, sub, vssub)
573 OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint32,
add, vadd)
574 OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint32, sub, vsub)
575 OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint32, mul, vmul)
576 OPENCV_HAL_IMPL_RVV_BIN_OP(v_int32,
add, vadd)
577 OPENCV_HAL_IMPL_RVV_BIN_OP(v_int32, sub, vsub)
578 OPENCV_HAL_IMPL_RVV_BIN_OP(v_int32, mul, vmul)
579 OPENCV_HAL_IMPL_RVV_BIN_OP(v_float32,
add, vfadd)
580 OPENCV_HAL_IMPL_RVV_BIN_OP(v_float32, sub, vfsub)
581 OPENCV_HAL_IMPL_RVV_BIN_OP(v_float32, mul, vfmul)
582 OPENCV_HAL_IMPL_RVV_BIN_OP(v_float32, div, vfdiv)
583 OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint64,
add, vadd)
584 OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint64, sub, vsub)
585 OPENCV_HAL_IMPL_RVV_BIN_OP(v_int64,
add, vadd)
586 OPENCV_HAL_IMPL_RVV_BIN_OP(v_int64, sub, vsub)
588 #if CV_SIMD_SCALABLE_64F
589 OPENCV_HAL_IMPL_RVV_BIN_OP(v_float64,
add, vfadd)
590 OPENCV_HAL_IMPL_RVV_BIN_OP(v_float64, sub, vfsub)
591 OPENCV_HAL_IMPL_RVV_BIN_OP(v_float64, mul, vfmul)
592 OPENCV_HAL_IMPL_RVV_BIN_OP(v_float64, div, vfdiv)
595 #define OPENCV_HAL_IMPL_RVV_BIN_MADD(_Tpvec, rvv_add) \
596 template<typename... Args> \
597 inline _Tpvec v_add(const _Tpvec& f1, const _Tpvec& f2, const Args&... vf) { \
598 return v_add(rvv_add(f1, f2, VTraits<_Tpvec>::vlanes()), vf...); \
600 #define OPENCV_HAL_IMPL_RVV_BIN_MMUL(_Tpvec, rvv_mul) \
601 template<typename... Args> \
602 inline _Tpvec v_mul(const _Tpvec& f1, const _Tpvec& f2, const Args&... vf) { \
603 return v_mul(rvv_mul(f1, f2, VTraits<_Tpvec>::vlanes()), vf...); \
605 OPENCV_HAL_IMPL_RVV_BIN_MADD(v_uint8, vsaddu)
606 OPENCV_HAL_IMPL_RVV_BIN_MADD(v_int8, vsadd)
607 OPENCV_HAL_IMPL_RVV_BIN_MADD(v_uint16, vsaddu)
608 OPENCV_HAL_IMPL_RVV_BIN_MADD(v_int16, vsadd)
609 OPENCV_HAL_IMPL_RVV_BIN_MADD(v_uint32, vadd)
610 OPENCV_HAL_IMPL_RVV_BIN_MADD(v_int32, vadd)
611 OPENCV_HAL_IMPL_RVV_BIN_MADD(v_float32, vfadd)
612 OPENCV_HAL_IMPL_RVV_BIN_MADD(v_uint64, vadd)
613 OPENCV_HAL_IMPL_RVV_BIN_MADD(v_int64, vadd)
615 OPENCV_HAL_IMPL_RVV_BIN_MMUL(v_uint32, vmul)
616 OPENCV_HAL_IMPL_RVV_BIN_MMUL(v_int32, vmul)
617 OPENCV_HAL_IMPL_RVV_BIN_MMUL(v_float32, vfmul)
618 #if CV_SIMD_SCALABLE_64F
619 OPENCV_HAL_IMPL_RVV_BIN_MADD(v_float64, vfadd)
620 OPENCV_HAL_IMPL_RVV_BIN_MMUL(v_float64, vfmul)
623 #define OPENCV_HAL_IMPL_RVV_MUL_EXPAND(_Tpvec, _Tpwvec, _TpwvecM2, suffix, wmul) \
624 inline void v_mul_expand(const _Tpvec& a, const _Tpvec& b, _Tpwvec& c, _Tpwvec& d) \
626 _TpwvecM2 temp = wmul(a, b, VTraits<_Tpvec>::vlanes()); \
627 c = vget_##suffix##m1(temp, 0); \
628 d = vget_##suffix##m1(temp, 1); \
631 OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_uint8, v_uint16, vuint16m2_t, u16, vwmulu)
632 OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_int8, v_int16, vint16m2_t, i16, vwmul)
633 OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_uint16, v_uint32, vuint32m2_t, u32, vwmulu)
634 OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_int16, v_int32, vint32m2_t, i32, vwmul)
635 OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_uint32, v_uint64, vuint64m2_t, u64, vwmulu)
637 inline v_int16
v_mul_hi(
const v_int16& a,
const v_int16& b)
639 return vmulh(a, b, VTraits<v_int16>::vlanes());
641 inline v_uint16
v_mul_hi(
const v_uint16& a,
const v_uint16& b)
643 return vmulhu(a, b, VTraits<v_uint16>::vlanes());
647 OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint8, add_wrap, vadd)
648 OPENCV_HAL_IMPL_RVV_BIN_OP(v_int8, add_wrap, vadd)
649 OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint16, add_wrap, vadd)
650 OPENCV_HAL_IMPL_RVV_BIN_OP(v_int16, add_wrap, vadd)
651 OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint8, sub_wrap, vsub)
652 OPENCV_HAL_IMPL_RVV_BIN_OP(v_int8, sub_wrap, vsub)
653 OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint16, sub_wrap, vsub)
654 OPENCV_HAL_IMPL_RVV_BIN_OP(v_int16, sub_wrap, vsub)
655 OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint8, mul_wrap, vmul)
656 OPENCV_HAL_IMPL_RVV_BIN_OP(v_int8, mul_wrap, vmul)
657 OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint16, mul_wrap, vmul)
658 OPENCV_HAL_IMPL_RVV_BIN_OP(v_int16, mul_wrap, vmul)
660 #define OPENCV_HAL_IMPL_RVV_MUL_SAT(_Tpvec, _clip, _wmul) \
662 inline _Tpvec v_mul(const _Tpvec& a, const _Tpvec& b) \
664 return _clip(_wmul(a, b, VTraits<_Tpvec>::vlanes()), 0, VTraits<_Tpvec>::vlanes()); \
666 template<typename... Args> \
667 inline _Tpvec v_mul(const _Tpvec& a1, const _Tpvec& a2, const Args&... va) { \
668 return v_mul(_clip(_wmul(a1, a2, VTraits<_Tpvec>::vlanes()), 0, VTraits<_Tpvec>::vlanes()), va...); \
671 OPENCV_HAL_IMPL_RVV_MUL_SAT(v_uint8, vnclipu, vwmulu)
672 OPENCV_HAL_IMPL_RVV_MUL_SAT(v_int8, vnclip, vwmul)
673 OPENCV_HAL_IMPL_RVV_MUL_SAT(v_uint16, vnclipu, vwmulu)
674 OPENCV_HAL_IMPL_RVV_MUL_SAT(v_int16, vnclip, vwmul)
678 #define OPENCV_HAL_IMPL_RVV_LOGIC_OP(_Tpvec, vl) \
679 inline _Tpvec v_and(const _Tpvec& a, const _Tpvec& b) \
681 return vand(a, b, vl); \
683 inline _Tpvec v_or(const _Tpvec& a, const _Tpvec& b) \
685 return vor(a, b, vl); \
687 inline _Tpvec v_xor(const _Tpvec& a, const _Tpvec& b) \
689 return vxor(a, b, vl); \
691 inline _Tpvec v_not (const _Tpvec& a) \
693 return vnot(a, vl); \
696 OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_uint8, VTraits<v_uint8>::vlanes())
697 OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_int8, VTraits<v_int8>::vlanes())
698 OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_uint16, VTraits<v_uint16>::vlanes())
699 OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_int16, VTraits<v_int16>::vlanes())
700 OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_uint32, VTraits<v_uint32>::vlanes())
701 OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_int32, VTraits<v_int32>::vlanes())
702 OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_uint64, VTraits<v_uint64>::vlanes())
703 OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_int64, VTraits<v_int64>::vlanes())
705 #define OPENCV_HAL_IMPL_RVV_FLT_BIT_OP(intrin) \
706 inline v_float32 intrin (const v_float32& a, const v_float32& b) \
708 return vreinterpret_f32m1(intrin(vreinterpret_i32m1(a), vreinterpret_i32m1(b))); \
710 OPENCV_HAL_IMPL_RVV_FLT_BIT_OP(v_and)
711 OPENCV_HAL_IMPL_RVV_FLT_BIT_OP(v_or)
712 OPENCV_HAL_IMPL_RVV_FLT_BIT_OP(v_xor)
714 inline v_float32 v_not (
const v_float32& a) \
716 return vreinterpret_f32m1(v_not(vreinterpret_i32m1(a))); \
719 #if CV_SIMD_SCALABLE_64F
720 #define OPENCV_HAL_IMPL_RVV_FLT64_BIT_OP(intrin) \
721 inline v_float64 intrin (const v_float64& a, const v_float64& b) \
723 return vreinterpret_f64m1(intrin(vreinterpret_i64m1(a), vreinterpret_i64m1(b))); \
725 OPENCV_HAL_IMPL_RVV_FLT64_BIT_OP(v_and)
726 OPENCV_HAL_IMPL_RVV_FLT64_BIT_OP(v_or)
727 OPENCV_HAL_IMPL_RVV_FLT64_BIT_OP(v_xor)
729 inline v_float64 v_not (
const v_float64& a) \
731 return vreinterpret_f64m1(v_not(vreinterpret_i64m1(a))); \
742 #define OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(_Tpvec, vl) \
743 template<int s = 0> inline _Tpvec v_shl(const _Tpvec& a, int n = s) \
745 return _Tpvec(vsll(a, uint8_t(n), vl)); \
747 template<int s = 0> inline _Tpvec v_shr(const _Tpvec& a, int n = s) \
749 return _Tpvec(vsrl(a, uint8_t(n), vl)); \
752 #define OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(_Tpvec, vl) \
753 template<int s = 0> inline _Tpvec v_shl(const _Tpvec& a, int n = s) \
755 return _Tpvec(vsll(a, uint8_t(n), vl)); \
757 template<int s = 0> inline _Tpvec v_shr(const _Tpvec& a, int n = s) \
759 return _Tpvec(vsra(a, uint8_t(n), vl)); \
762 OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(v_uint16, VTraits<v_uint16>::vlanes())
763 OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(v_uint32, VTraits<v_uint32>::vlanes())
764 OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(v_uint64, VTraits<v_uint64>::vlanes())
765 OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(v_int16, VTraits<v_int16>::vlanes())
766 OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(v_int32, VTraits<v_int32>::vlanes())
767 OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(v_int64, VTraits<v_int64>::vlanes())
770 #define OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, op, intrin, suffix) \
771 inline _Tpvec v_##op(const _Tpvec& a, const _Tpvec& b) \
773 size_t VLEN = VTraits<_Tpvec>::vlanes(); \
774 uint64_t ones = -1; \
775 return vmerge(intrin(a, b, VLEN), vmv_v_x_##suffix##m1(0, VLEN), ones, VLEN); \
778 #define OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, op, intrin, suffix) \
779 inline _Tpvec v_##op (const _Tpvec& a, const _Tpvec& b) \
781 size_t VLEN = VTraits<_Tpvec>::vlanes(); \
782 union { uint64_t u; VTraits<_Tpvec>::lane_type d; } ones; \
784 auto diff = intrin(a, b, VLEN); \
785 auto z = vfmv_v_f_##suffix##m1(0, VLEN); \
786 auto res = vfmerge(diff, z, ones.d, VLEN); \
787 return _Tpvec(res); \
790 #define OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(_Tpvec, suffix) \
791 OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, eq, vmseq, suffix) \
792 OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, ne, vmsne, suffix) \
793 OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, lt, vmsltu, suffix) \
794 OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, gt, vmsgtu, suffix) \
795 OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, le, vmsleu, suffix) \
796 OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, ge, vmsgeu, suffix)
798 #define OPENCV_HAL_IMPL_RVV_SIGNED_CMP(_Tpvec, suffix) \
799 OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, eq, vmseq, suffix) \
800 OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, ne, vmsne, suffix) \
801 OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, lt, vmslt, suffix) \
802 OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, gt, vmsgt, suffix) \
803 OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, le, vmsle, suffix) \
804 OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, ge, vmsge, suffix)
806 #define OPENCV_HAL_IMPL_RVV_FLOAT_CMP(_Tpvec, suffix) \
807 OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, eq, vmfeq, suffix) \
808 OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, ne, vmfne, suffix) \
809 OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, lt, vmflt, suffix) \
810 OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, gt, vmfgt, suffix) \
811 OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, le, vmfle, suffix) \
812 OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, ge, vmfge, suffix)
815 OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(v_uint8, u8)
816 OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(v_uint16, u16)
817 OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(v_uint32, u32)
818 OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(v_uint64, u64)
819 OPENCV_HAL_IMPL_RVV_SIGNED_CMP(v_int8, i8)
820 OPENCV_HAL_IMPL_RVV_SIGNED_CMP(v_int16, i16)
821 OPENCV_HAL_IMPL_RVV_SIGNED_CMP(v_int32, i32)
822 OPENCV_HAL_IMPL_RVV_SIGNED_CMP(v_int64, i64)
823 OPENCV_HAL_IMPL_RVV_FLOAT_CMP(v_float32, f32)
824 #if CV_SIMD_SCALABLE_64F
825 OPENCV_HAL_IMPL_RVV_FLOAT_CMP(v_float64, f64)
828 inline v_float32
v_not_nan(
const v_float32& a)
829 {
return v_eq(a, a); }
831 #if CV_SIMD_SCALABLE_64F
832 inline v_float64
v_not_nan(
const v_float64& a)
833 {
return v_eq(a, a); }
838 #define OPENCV_HAL_IMPL_RVV_BIN_FUNC(_Tpvec, func, intrin, vl) \
839 inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \
841 return intrin(a, b, vl); \
844 OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint8, v_min, vminu, VTraits<v_uint8>::vlanes())
845 OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint8, v_max, vmaxu, VTraits<v_uint8>::vlanes())
846 OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int8, v_min, vmin, VTraits<v_int8>::vlanes())
847 OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int8, v_max, vmax, VTraits<v_int8>::vlanes())
848 OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint16, v_min, vminu, VTraits<v_uint16>::vlanes())
849 OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint16, v_max, vmaxu, VTraits<v_uint16>::vlanes())
850 OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int16, v_min, vmin, VTraits<v_int16>::vlanes())
851 OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int16, v_max, vmax, VTraits<v_int16>::vlanes())
852 OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint32, v_min, vminu, VTraits<v_uint32>::vlanes())
853 OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint32, v_max, vmaxu, VTraits<v_uint32>::vlanes())
854 OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int32, v_min, vmin, VTraits<v_int32>::vlanes())
855 OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int32, v_max, vmax, VTraits<v_int32>::vlanes())
856 OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_float32, v_min, vfmin, VTraits<v_float32>::vlanes())
857 OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_float32, v_max, vfmax, VTraits<v_float32>::vlanes())
858 #if CV_SIMD_SCALABLE_64F
859 OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_float64, v_min, vfmin, VTraits<v_float64>::vlanes())
860 OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_float64, v_max, vfmax, VTraits<v_float64>::vlanes())
864 #define OPENCV_HAL_IMPL_RVV_ZIP4(_Tpvec, _wTpvec, suffix, convert2u, convert) \
865 inline void v_zip4(const _Tpvec& a0, const _Tpvec& a1, _Tpvec& b0, _Tpvec& b1) { \
867 _wTpvec temp = vreinterpret_##suffix##m2(convert2u( \
868 vor(vzext_vf2(convert(a0), vl), \
869 vreinterpret_u64m2(vslide1up(vreinterpret_u32m2(vzext_vf2(convert(a1), vl)), 0, vl*2)), \
871 b0 = vget_##suffix##m1(temp, 0); \
872 b1 = vget_##suffix##m1(vrgather(temp, vadd(vid_v_u32m2(vl), 4, vl), vl) ,0); \
875 OPENCV_HAL_IMPL_RVV_ZIP4(v_uint32, vuint32m2_t, u32, OPENCV_HAL_NOP, OPENCV_HAL_NOP)
876 OPENCV_HAL_IMPL_RVV_ZIP4(v_int32, vint32m2_t, i32, vreinterpret_u32m2, vreinterpret_u32m1)
877 OPENCV_HAL_IMPL_RVV_ZIP4(v_float32, vfloat32m2_t, f32, vreinterpret_u32m2, vreinterpret_u32m1)
881 inline void v_zip4(
const v_float32& a0,
const v_float32& a1, v_float32& b0, v_float32& b1) {
882 vuint64m1_t vid1 = vid_v_u64m1(VTraits<vuint64m1_t>::vlanes());
883 vuint16m1_t t1 = vreinterpret_u16m1(vid1);
884 vuint16m1_t t2 = vslide1up(t1, 0, VTraits<vuint16m1_t>::vlanes());
885 vuint16m1_t t3 = vslide1up(t2, 0, VTraits<vuint16m1_t>::vlanes());
886 vuint16m1_t t4 = vslide1up(t3, 0, VTraits<vuint16m1_t>::vlanes());
888 vor(t1, t2, VTraits<vuint16m1_t>::vlanes()),
889 vor(t3, t4, VTraits<vuint16m1_t>::vlanes()),
890 VTraits<vuint16m1_t>::vlanes()
892 vuint32m2_t vidx0 = vwmulu(t1, 4, VTraits<vuint32m1_t>::vlanes());
893 vidx0 = vadd(vidx0, vid_v_u32m2(VTraits<vuint32m1_t>::vlanes()), VTraits<vuint32m1_t>::vlanes());
894 vuint32m2_t vidx1 = vadd(vidx0, 4, VTraits<vuint32m1_t>::vlanes());
895 vfloat32m2_t
temp = vreinterpret_f32m2(vreinterpret_u32m2(
896 vor(vzext_vf2(vreinterpret_u32m1(a0), VTraits<vuint16m1_t>::vlanes()),
897 vreinterpret_u64m2(vslide1up(vreinterpret_u32m2(vzext_vf2(vreinterpret_u32m1(a1), VTraits<vuint16m1_t>::vlanes())), 0, VTraits<vfloat32m1_t>::vlanes()*2)),
898 VTraits<vfloat32m1_t>::vlanes())));
899 b0 = vlmul_trunc_f32m1(vrgather(
temp, vidx0, VTraits<vuint16m1_t>::vlanes()));
900 b1 = vlmul_trunc_f32m1(vrgather(
temp, vidx1, VTraits<vuint16m1_t>::vlanes()));
903 inline void v_transpose4x4(
const v_float32& a0,
const v_float32& a1,
const v_float32& a2,
const v_float32& a3,\
904 v_float32& b0, v_float32& b1, v_float32& b2, v_float32& b3) { \
905 vuint64m2_t vid1 = vid_v_u64m2(VTraits<vuint32m1_t>::vlanes());
906 vuint16m2_t t1 = vreinterpret_u16m2(vid1);
907 vuint16m2_t t2 = vslide1up(t1, 0, VTraits<vuint8m1_t>::vlanes());
908 vuint16m2_t t3 = vslide1up(t2, 0, VTraits<vuint8m1_t>::vlanes());
909 vuint16m2_t t4 = vslide1up(t3, 0, VTraits<vuint8m1_t>::vlanes());
911 vor(t1, t2, VTraits<vuint8m1_t>::vlanes()),
912 vor(t3, t4, VTraits<vuint8m1_t>::vlanes()),
913 VTraits<vuint8m1_t>::vlanes()
915 vuint16m2_t vidx0 = vmul(t1, 12, VTraits<vuint8m1_t>::vlanes());
916 vidx0 = vadd(vidx0, vid_v_u16m2(VTraits<vuint8m1_t>::vlanes()), VTraits<vuint8m1_t>::vlanes());
917 vuint16m2_t vidx1 = vadd(vidx0, 4, VTraits<vuint8m1_t>::vlanes());
918 vuint16m2_t vidx2 = vadd(vidx0, 8, VTraits<vuint8m1_t>::vlanes());
919 vuint16m2_t vidx3 = vadd(vidx0, 12, VTraits<vuint8m1_t>::vlanes());
920 vuint32m2_t tempA = vreinterpret_u32m2( \
921 vor(vzext_vf2(vreinterpret_u32m1(a0), VTraits<vuint16m1_t>::vlanes()), \
922 vreinterpret_u64m2(vslide1up(vreinterpret_u32m2(vzext_vf2(vreinterpret_u32m1(a2), VTraits<vuint16m1_t>::vlanes())), 0, VTraits<vuint16m1_t>::vlanes())), \
923 VTraits<vuint32m1_t>::vlanes())); \
924 vuint32m2_t tempB = vreinterpret_u32m2( \
925 vor(vzext_vf2(vreinterpret_u32m1(a1), VTraits<vuint16m1_t>::vlanes()), \
926 vreinterpret_u64m2(vslide1up(vreinterpret_u32m2(vzext_vf2(vreinterpret_u32m1(a3), VTraits<vuint16m1_t>::vlanes())), 0, VTraits<vuint16m1_t>::vlanes())), \
927 VTraits<vuint32m1_t>::vlanes())); \
928 vfloat32m4_t
temp = vreinterpret_f32m4(vreinterpret_u32m4( \
929 vor(vzext_vf2(tempA, VTraits<vuint8m1_t>::vlanes()), \
930 vreinterpret_u64m4(vslide1up(vreinterpret_u32m4(vzext_vf2(tempB, VTraits<vuint8m1_t>::vlanes())), 0, VTraits<vuint8m1_t>::vlanes())), \
931 VTraits<vuint16m1_t>::vlanes()))); \
932 b0 = vlmul_trunc_f32m1(vrgatherei16(
temp, vidx0, VTraits<vuint8m1_t>::vlanes()));
933 b1 = vlmul_trunc_f32m1(vrgatherei16(
temp, vidx1, VTraits<vuint8m1_t>::vlanes()));
934 b2 = vlmul_trunc_f32m1(vrgatherei16(
temp, vidx2, VTraits<vuint8m1_t>::vlanes()));
935 b3 = vlmul_trunc_f32m1(vrgatherei16(
temp, vidx3, VTraits<vuint8m1_t>::vlanes()));
939 #define OPENCV_HAL_IMPL_RVV_TRANSPOSE4x4(_Tpvec, suffix) \
940 inline void v_transpose4x4(const _Tpvec& a0, const _Tpvec& a1, const _Tpvec& a2, const _Tpvec& a3, _Tpvec& b0, _Tpvec& b1, _Tpvec& b2, _Tpvec& b3) { \
941 _Tpvec t0,t1,t2,t3; \
942 v_zip4(a0, a2, t0, t2); \
943 v_zip4(a1, a3, t1, t3); \
944 v_zip4(t0, t1, b0, b1); \
945 v_zip4(t2, t3, b2, b3); \
948 OPENCV_HAL_IMPL_RVV_TRANSPOSE4x4(v_uint32, u32)
949 OPENCV_HAL_IMPL_RVV_TRANSPOSE4x4(v_int32, i32)
950 OPENCV_HAL_IMPL_RVV_TRANSPOSE4x4(v_float32, f32)
954 #define OPENCV_HAL_IMPL_RVV_REDUCE_SUM(_Tpvec, _wTpvec, _nwTpvec, scalartype, wsuffix, vl, red) \
955 inline scalartype v_reduce_sum(const _Tpvec& a) \
957 _nwTpvec zero = vmv_v_x_##wsuffix##m1(0, vl); \
958 _nwTpvec res = vmv_v_x_##wsuffix##m1(0, vl); \
959 res = v##red(res, a, zero, vl); \
960 return (scalartype)v_get0(res); \
962 OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_uint8, v_uint16, vuint16m1_t,
unsigned, u16, VTraits<v_uint8>::vlanes(), wredsumu)
963 OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_int8, v_int16, vint16m1_t,
int, i16, VTraits<v_int8>::vlanes(), wredsum)
964 OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_uint16, v_uint32, vuint32m1_t,
unsigned, u32, VTraits<v_uint16>::vlanes(), wredsumu)
965 OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_int16, v_int32, vint32m1_t,
int, i32, VTraits<v_int16>::vlanes(), wredsum)
966 OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_uint32, v_uint64, vuint64m1_t,
unsigned, u64, VTraits<v_uint32>::vlanes(), wredsumu)
967 OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_int32, v_int64, vint64m1_t,
int, i64, VTraits<v_int32>::vlanes(), wredsum)
968 OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_uint64, v_uint64, vuint64m1_t,
uint64, u64, VTraits<v_uint64>::vlanes(), redsum)
969 OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_int64, v_int64, vint64m1_t,
int64, i64, VTraits<v_int64>::vlanes(), redsum)
972 #define OPENCV_HAL_IMPL_RVV_REDUCE_SUM_FP(_Tpvec, _wTpvec, _nwTpvec, scalartype, wsuffix, vl) \
973 inline scalartype v_reduce_sum(const _Tpvec& a) \
975 _nwTpvec zero = vfmv_v_f_##wsuffix##m1(0, vl); \
976 _nwTpvec res = vfmv_v_f_##wsuffix##m1(0, vl); \
977 res = vfredosum(res, a, zero, vl); \
978 return (scalartype)v_get0(res); \
980 OPENCV_HAL_IMPL_RVV_REDUCE_SUM_FP(v_float32, v_float32, vfloat32m1_t,
float, f32, VTraits<v_float32>::vlanes())
981 #if CV_SIMD_SCALABLE_64F
982 OPENCV_HAL_IMPL_RVV_REDUCE_SUM_FP(v_float64, v_float64, vfloat64m1_t,
float, f64, VTraits<v_float64>::vlanes())
985 #define OPENCV_HAL_IMPL_RVV_REDUCE(_Tpvec, func, scalartype, suffix, vl, red) \
986 inline scalartype v_reduce_##func(const _Tpvec& a) \
988 _Tpvec res = _Tpvec(v##red(a, a, a, vl)); \
989 return (scalartype)v_get0(res); \
992 OPENCV_HAL_IMPL_RVV_REDUCE(v_uint8,
min,
uchar, u8, VTraits<v_uint8>::vlanes(), redminu)
993 OPENCV_HAL_IMPL_RVV_REDUCE(v_int8,
min,
schar, i8, VTraits<v_int8>::vlanes(), redmin)
994 OPENCV_HAL_IMPL_RVV_REDUCE(v_uint16,
min,
ushort, u16, VTraits<v_uint16>::vlanes(), redminu)
995 OPENCV_HAL_IMPL_RVV_REDUCE(v_int16,
min,
short, i16, VTraits<v_int16>::vlanes(), redmin)
996 OPENCV_HAL_IMPL_RVV_REDUCE(v_uint32,
min,
unsigned, u32, VTraits<v_uint32>::vlanes(), redminu)
997 OPENCV_HAL_IMPL_RVV_REDUCE(v_int32,
min,
int, i32, VTraits<v_int32>::vlanes(), redmin)
998 OPENCV_HAL_IMPL_RVV_REDUCE(v_float32,
min,
float, f32, VTraits<v_float32>::vlanes(), fredmin)
999 OPENCV_HAL_IMPL_RVV_REDUCE(v_uint8,
max,
uchar, u8, VTraits<v_uint8>::vlanes(), redmaxu)
1000 OPENCV_HAL_IMPL_RVV_REDUCE(v_int8,
max,
schar, i8, VTraits<v_int8>::vlanes(), redmax)
1001 OPENCV_HAL_IMPL_RVV_REDUCE(v_uint16,
max,
ushort, u16, VTraits<v_uint16>::vlanes(), redmaxu)
1002 OPENCV_HAL_IMPL_RVV_REDUCE(v_int16,
max,
short, i16, VTraits<v_int16>::vlanes(), redmax)
1003 OPENCV_HAL_IMPL_RVV_REDUCE(v_uint32,
max,
unsigned, u32, VTraits<v_uint32>::vlanes(), redmaxu)
1004 OPENCV_HAL_IMPL_RVV_REDUCE(v_int32,
max,
int, i32, VTraits<v_int32>::vlanes(), redmax)
1005 OPENCV_HAL_IMPL_RVV_REDUCE(v_float32,
max,
float, f32, VTraits<v_float32>::vlanes(), fredmax)
1007 inline v_float32
v_reduce_sum4(
const v_float32& a,
const v_float32& b,
1008 const v_float32& c,
const v_float32& d)
1011 vuint64m2_t vid1 = vid_v_u64m2(VTraits<vuint32m1_t>::vlanes());
1012 vuint16m2_t t1 = vreinterpret_u16m2(vid1);
1013 vuint16m2_t t2 = vslide1up(t1, 0, VTraits<vuint8m1_t>::vlanes());
1014 vuint16m2_t t3 = vslide1up(t2, 0, VTraits<vuint8m1_t>::vlanes());
1015 vuint16m2_t t4 = vslide1up(t3, 0, VTraits<vuint8m1_t>::vlanes());
1017 vor(t1, t2, VTraits<vuint8m1_t>::vlanes()),
1018 vor(t3, t4, VTraits<vuint8m1_t>::vlanes()),
1019 VTraits<vuint8m1_t>::vlanes()
1023 vuint16m2_t vidx0 = vmul(t1, 12, VTraits<vuint8m1_t>::vlanes());
1024 vidx0 = vadd(vidx0, vid_v_u16m2(VTraits<vuint8m1_t>::vlanes()), VTraits<vuint8m1_t>::vlanes());
1025 vuint16m2_t vidx1 = vadd(vidx0, 4, VTraits<vuint8m1_t>::vlanes());
1026 vuint16m2_t vidx2 = vadd(vidx0, 8, VTraits<vuint8m1_t>::vlanes());
1027 vuint16m2_t vidx3 = vadd(vidx0, 12, VTraits<vuint8m1_t>::vlanes());
1030 vuint32m2_t tempA = vreinterpret_u32m2( \
1031 vor(vzext_vf2(vreinterpret_u32m1(a), VTraits<vuint16m1_t>::vlanes()), \
1032 vreinterpret_u64m2(vslide1up(vreinterpret_u32m2(vzext_vf2(vreinterpret_u32m1(c), VTraits<vuint16m1_t>::vlanes())), 0, VTraits<vuint16m1_t>::vlanes())), \
1033 VTraits<vuint32m1_t>::vlanes())); \
1034 vuint32m2_t tempB = vreinterpret_u32m2( \
1035 vor(vzext_vf2(vreinterpret_u32m1(b), VTraits<vuint16m1_t>::vlanes()), \
1036 vreinterpret_u64m2(vslide1up(vreinterpret_u32m2(vzext_vf2(vreinterpret_u32m1(d), VTraits<vuint16m1_t>::vlanes())), 0, VTraits<vuint16m1_t>::vlanes())), \
1037 VTraits<vuint32m1_t>::vlanes())); \
1038 vfloat32m4_t
temp = vreinterpret_f32m4(vreinterpret_u32m4( \
1039 vor(vzext_vf2(tempA, VTraits<vuint8m1_t>::vlanes()), \
1040 vreinterpret_u64m4(vslide1up(vreinterpret_u32m4(vzext_vf2(tempB, VTraits<vuint8m1_t>::vlanes())), 0, VTraits<vuint8m1_t>::vlanes())), \
1041 VTraits<vuint16m1_t>::vlanes())));
1044 vfloat32m1_t b0 = vlmul_trunc_f32m1(vrgatherei16(
temp, vidx0, VTraits<vuint8m1_t>::vlanes()));
1045 vfloat32m1_t b1 = vlmul_trunc_f32m1(vrgatherei16(
temp, vidx1, VTraits<vuint8m1_t>::vlanes()));
1046 vfloat32m1_t b2 = vlmul_trunc_f32m1(vrgatherei16(
temp, vidx2, VTraits<vuint8m1_t>::vlanes()));
1047 vfloat32m1_t b3 = vlmul_trunc_f32m1(vrgatherei16(
temp, vidx3, VTraits<vuint8m1_t>::vlanes()));
1050 v_float32 res = vfadd(
1051 vfadd(b0, b1, VTraits<vfloat32m1_t>::vlanes()),
1052 vfadd(b2, b3, VTraits<vfloat32m1_t>::vlanes()),
1053 VTraits<vfloat32m1_t>::vlanes()
1060 inline v_float32 v_sqrt(
const v_float32&
x)
1062 return vfsqrt(
x, VTraits<v_float32>::vlanes());
1065 inline v_float32
v_invsqrt(
const v_float32&
x)
1067 v_float32 one = v_setall_f32(1.0f);
1068 return v_div(one, v_sqrt(
x));
1071 #if CV_SIMD_SCALABLE_64F
1072 inline v_float64 v_sqrt(
const v_float64&
x)
1074 return vfsqrt(
x, VTraits<v_float64>::vlanes());
1077 inline v_float64
v_invsqrt(
const v_float64&
x)
1079 v_float64 one = v_setall_f64(1.0f);
1080 return v_div(one, v_sqrt(
x));
1084 inline v_float32
v_magnitude(
const v_float32& a,
const v_float32& b)
1086 v_float32
x = vfmacc(vfmul(a, a, VTraits<v_float32>::vlanes()), b, b, VTraits<v_float32>::vlanes());
1090 inline v_float32
v_sqr_magnitude(
const v_float32& a,
const v_float32& b)
1092 return v_float32(vfmacc(vfmul(a, a, VTraits<v_float32>::vlanes()), b, b, VTraits<v_float32>::vlanes()));
1095 #if CV_SIMD_SCALABLE_64F
1096 inline v_float64
v_magnitude(
const v_float64& a,
const v_float64& b)
1098 v_float64
x = vfmacc(vfmul(a, a, VTraits<v_float64>::vlanes()), b, b, VTraits<v_float64>::vlanes());
1102 inline v_float64
v_sqr_magnitude(
const v_float64& a,
const v_float64& b)
1104 return vfmacc(vfmul(a, a, VTraits<v_float64>::vlanes()), b, b, VTraits<v_float64>::vlanes());
1110 inline v_float32
v_fma(
const v_float32& a,
const v_float32& b,
const v_float32& c)
1112 return vfmacc(c, a, b, VTraits<v_float32>::vlanes());
1114 inline v_int32
v_fma(
const v_int32& a,
const v_int32& b,
const v_int32& c)
1116 return vmacc(c, a, b, VTraits<v_float32>::vlanes());
1119 inline v_float32
v_muladd(
const v_float32& a,
const v_float32& b,
const v_float32& c)
1121 return v_fma(a, b, c);
1124 inline v_int32
v_muladd(
const v_int32& a,
const v_int32& b,
const v_int32& c)
1126 return v_fma(a, b, c);
1129 #if CV_SIMD_SCALABLE_64F
1130 inline v_float64
v_fma(
const v_float64& a,
const v_float64& b,
const v_float64& c)
1132 return vfmacc_vv_f64m1(c, a, b, VTraits<v_float64>::vlanes());
1135 inline v_float64
v_muladd(
const v_float64& a,
const v_float64& b,
const v_float64& c)
1137 return v_fma(a, b, c);
1143 #define OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(_Tpvec, vl) \
1144 inline bool v_check_all(const _Tpvec& a) \
1146 return (int)vcpop(vmslt(a, 0, vl), vl) == vl; \
1148 inline bool v_check_any(const _Tpvec& a) \
1150 return (int)vcpop(vmslt(a, 0, vl), vl) != 0; \
1153 OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_int8, VTraits<v_int8>::vlanes())
1154 OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_int16, VTraits<v_int16>::vlanes())
1155 OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_int32, VTraits<v_int32>::vlanes())
1156 OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_int64, VTraits<v_int64>::vlanes())
1184 #if CV_SIMD_SCALABLE_64F
1193 #define OPENCV_HAL_IMPL_RVV_ABSDIFF(_Tpvec, abs) \
1194 inline _Tpvec v_##abs(const _Tpvec& a, const _Tpvec& b) \
1196 return v_sub(v_max(a, b), v_min(a, b)); \
1199 OPENCV_HAL_IMPL_RVV_ABSDIFF(v_uint8,
absdiff)
1200 OPENCV_HAL_IMPL_RVV_ABSDIFF(v_uint16,
absdiff)
1201 OPENCV_HAL_IMPL_RVV_ABSDIFF(v_uint32,
absdiff)
1202 OPENCV_HAL_IMPL_RVV_ABSDIFF(v_float32,
absdiff)
1203 #if CV_SIMD_SCALABLE_64F
1204 OPENCV_HAL_IMPL_RVV_ABSDIFF(v_float64,
absdiff)
1206 OPENCV_HAL_IMPL_RVV_ABSDIFF(v_int8, absdiffs)
1207 OPENCV_HAL_IMPL_RVV_ABSDIFF(v_int16, absdiffs)
1209 #define OPENCV_HAL_IMPL_RVV_ABSDIFF_S(_Tpvec, _rTpvec, width) \
1210 inline _rTpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \
1212 return vnclipu(vreinterpret_u##width##m2(vwsub_vv(v_max(a, b), v_min(a, b), VTraits<_Tpvec>::vlanes())), 0, VTraits<_Tpvec>::vlanes()); \
1215 OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int8, v_uint8, 16)
1216 OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int16, v_uint16, 32)
1217 OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int32, v_uint32, 64)
1219 #define OPENCV_HAL_IMPL_RVV_ABS(_Tprvec, _Tpvec, suffix) \
1220 inline _Tprvec v_abs(const _Tpvec& a) \
1222 return v_absdiff(a, v_setzero_##suffix()); \
1225 OPENCV_HAL_IMPL_RVV_ABS(v_uint8, v_int8, s8)
1226 OPENCV_HAL_IMPL_RVV_ABS(v_uint16, v_int16, s16)
1227 OPENCV_HAL_IMPL_RVV_ABS(v_uint32, v_int32, s32)
1228 OPENCV_HAL_IMPL_RVV_ABS(v_float32, v_float32, f32)
1229 #if CV_SIMD_SCALABLE_64F
1230 OPENCV_HAL_IMPL_RVV_ABS(v_float64, v_float64, f64)
1234 #define OPENCV_HAL_IMPL_RVV_REDUCE_SAD(_Tpvec, scalartype) \
1235 inline scalartype v_reduce_sad(const _Tpvec& a, const _Tpvec& b) \
1237 return v_reduce_sum(v_absdiff(a, b)); \
1240 OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_uint8,
unsigned)
1241 OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_int8,
unsigned)
1242 OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_uint16,
unsigned)
1243 OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_int16,
unsigned)
1244 OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_uint32,
unsigned)
1245 OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_int32,
unsigned)
1246 OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_float32,
float)
1250 #define OPENCV_HAL_IMPL_RVV_SELECT(_Tpvec, vl) \
1251 inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \
1253 return vmerge(vmsne(mask, 0, vl), b, a, vl); \
1256 OPENCV_HAL_IMPL_RVV_SELECT(v_uint8, VTraits<v_uint8>::vlanes())
1257 OPENCV_HAL_IMPL_RVV_SELECT(v_uint16, VTraits<v_uint16>::vlanes())
1258 OPENCV_HAL_IMPL_RVV_SELECT(v_uint32, VTraits<v_uint32>::vlanes())
1259 OPENCV_HAL_IMPL_RVV_SELECT(v_int8, VTraits<v_int8>::vlanes())
1260 OPENCV_HAL_IMPL_RVV_SELECT(v_int16, VTraits<v_int16>::vlanes())
1261 OPENCV_HAL_IMPL_RVV_SELECT(v_int32, VTraits<v_int32>::vlanes())
1263 inline v_float32
v_select(const v_float32&
mask, const v_float32& a, const v_float32& b) \
1265 return vmerge(vmfne(
mask, 0, VTraits<v_float32>::vlanes()), b, a, VTraits<v_float32>::vlanes()); \
1268 #if CV_SIMD_SCALABLE_64F
1269 inline v_float64
v_select(
const v_float64&
mask,
const v_float64& a,
const v_float64& b) \
1271 return vmerge(vmfne(
mask, 0, VTraits<v_float64>::vlanes()), b, a, VTraits<v_float64>::vlanes()); \
1277 #define OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(_Tpvec, suffix, vl) \
1278 template<int n> inline _Tpvec v_rotate_right(const _Tpvec& a) \
1280 return vslidedown(vmv_v_x_##suffix##m1(0, vl), a, n, vl); \
1282 template<int n> inline _Tpvec v_rotate_left(const _Tpvec& a) \
1284 return vslideup(vmv_v_x_##suffix##m1(0, vl), a, n, vl); \
1286 template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a) \
1288 template<int n> inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b) \
1290 return vslideup(vslidedown(vmv_v_x_##suffix##m1(0, vl), a, n, vl), b, VTraits<_Tpvec>::vlanes() - n, vl); \
1292 template<int n> inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b) \
1294 return vslideup(vslidedown(vmv_v_x_##suffix##m1(0, vl), b, VTraits<_Tpvec>::vlanes() - n, vl), a, n, vl); \
1296 template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a, const _Tpvec& b) \
1297 { CV_UNUSED(b); return a; }
1299 OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_uint8, u8, VTraits<v_uint8>::vlanes())
1300 OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_int8, i8, VTraits<v_int8>::vlanes())
1301 OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_uint16, u16, VTraits<v_uint16>::vlanes())
1302 OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_int16, i16, VTraits<v_int16>::vlanes())
1303 OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_uint32, u32, VTraits<v_uint32>::vlanes())
1304 OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_int32, i32, VTraits<v_int32>::vlanes())
1305 OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_uint64, u64, VTraits<v_uint64>::vlanes())
1306 OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_int64, i64, VTraits<v_int64>::vlanes())
1308 #define OPENCV_HAL_IMPL_RVV_ROTATE_FP(_Tpvec, suffix, vl) \
1309 template<int n> inline _Tpvec v_rotate_right(const _Tpvec& a) \
1311 return vslidedown(vfmv_v_f_##suffix##m1(0, vl), a, n, vl); \
1313 template<int n> inline _Tpvec v_rotate_left(const _Tpvec& a) \
1315 return vslideup(vfmv_v_f_##suffix##m1(0, vl), a, n, vl); \
1317 template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a) \
1319 template<int n> inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b) \
1321 return vslideup(vslidedown(vfmv_v_f_##suffix##m1(0, vl), a, n, vl), b, VTraits<_Tpvec>::vlanes() - n, vl); \
1323 template<int n> inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b) \
1325 return vslideup(vslidedown(vfmv_v_f_##suffix##m1(0, vl), b, VTraits<_Tpvec>::vlanes() - n, vl), a, n, vl); \
1327 template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a, const _Tpvec& b) \
1328 { CV_UNUSED(b); return a; }
1330 OPENCV_HAL_IMPL_RVV_ROTATE_FP(v_float32, f32, VTraits<v_float32>::vlanes())
1331 #if CV_SIMD_SCALABLE_64F
1332 OPENCV_HAL_IMPL_RVV_ROTATE_FP(v_float64, f64, VTraits<v_float64>::vlanes())
1336 inline v_float32
v_cvt_f32(
const v_int32& a)
1338 return vfcvt_f_x_v_f32m1(a, VTraits<v_float32>::vlanes());
1341 #if CV_SIMD_SCALABLE_64F
1342 inline v_float32
v_cvt_f32(
const v_float64& a)
1344 return vfncvt_f(vlmul_ext_f64m2(a), VTraits<v_float64>::vlanes());
1347 inline v_float32
v_cvt_f32(
const v_float64& a,
const v_float64& b)
1349 return vfncvt_f(vset(vlmul_ext_f64m2(a),1,b), VTraits<v_float32>::vlanes());
1352 inline v_float64
v_cvt_f64(
const v_int32& a)
1354 return vget_f64m1(vfwcvt_f(a, VTraits<v_int32>::vlanes()), 0);
1359 return vget_f64m1(vfwcvt_f(a, VTraits<v_int32>::vlanes()), 1);
1362 inline v_float64
v_cvt_f64(
const v_float32& a)
1364 return vget_f64m1(vfwcvt_f(a, VTraits<v_float32>::vlanes()), 0);
1369 return vget_f64m1(vfwcvt_f(a, VTraits<v_float32>::vlanes()), 1);
1372 inline v_float64
v_cvt_f64(
const v_int64& a)
1374 return vfcvt_f(a, VTraits<v_int64>::vlanes());
1380 #define OPENCV_HAL_IMPL_RVV_BROADCAST(_Tpvec, suffix) \
1381 template<int s = 0> inline _Tpvec v_broadcast_element(_Tpvec v, int i = s) \
1383 return v_setall_##suffix(v_extract_n(v, i)); \
1385 inline _Tpvec v_broadcast_highest(_Tpvec v) \
1387 return v_setall_##suffix(v_extract_n(v, VTraits<_Tpvec>::vlanes()-1)); \
1390 OPENCV_HAL_IMPL_RVV_BROADCAST(v_uint32, u32)
1391 OPENCV_HAL_IMPL_RVV_BROADCAST(v_int32, s32)
1392 OPENCV_HAL_IMPL_RVV_BROADCAST(v_float32, f32)
1395 #define OPENCV_HAL_IMPL_RVV_REVERSE(_Tpvec, width) \
1397 inline _Tpvec v_reverse(const _Tpvec& a) \
1399 vuint##width##m1_t vidx = vrsub(vid_v_u##width##m1(VTraits<_Tpvec>::vlanes()), VTraits<_Tpvec>::vlanes()-1, VTraits<_Tpvec>::vlanes()); \
1400 return vrgather(a, vidx, VTraits<_Tpvec>::vlanes()); \
1402 OPENCV_HAL_IMPL_RVV_REVERSE(v_uint8, 8)
1403 OPENCV_HAL_IMPL_RVV_REVERSE(v_int8, 8)
1404 OPENCV_HAL_IMPL_RVV_REVERSE(v_uint16, 16)
1405 OPENCV_HAL_IMPL_RVV_REVERSE(v_int16, 16)
1406 OPENCV_HAL_IMPL_RVV_REVERSE(v_uint32, 32)
1407 OPENCV_HAL_IMPL_RVV_REVERSE(v_int32, 32)
1408 OPENCV_HAL_IMPL_RVV_REVERSE(v_float32, 32)
1409 OPENCV_HAL_IMPL_RVV_REVERSE(v_uint64, 64)
1410 OPENCV_HAL_IMPL_RVV_REVERSE(v_int64, 64)
1411 #if CV_SIMD_SCALABLE_64F
1412 OPENCV_HAL_IMPL_RVV_REVERSE(v_float64, 64)
1417 #define OPENCV_HAL_IMPL_RVV_EXPAND(_Tp, _Tpwvec, _Tpwvec_m2, _Tpvec, width, suffix, suffix2, cvt) \
1418 inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \
1420 _Tpwvec_m2 temp = cvt(a, VTraits<_Tpvec>::vlanes()); \
1421 b0 = vget_##suffix##m1(temp, 0); \
1422 b1 = vget_##suffix##m1(temp, 1); \
1424 inline _Tpwvec v_expand_low(const _Tpvec& a) \
1426 _Tpwvec_m2 temp = cvt(a, VTraits<_Tpvec>::vlanes()); \
1427 return vget_##suffix##m1(temp, 0); \
1429 inline _Tpwvec v_expand_high(const _Tpvec& a) \
1431 _Tpwvec_m2 temp = cvt(a, VTraits<_Tpvec>::vlanes()); \
1432 return vget_##suffix##m1(temp, 1); \
1434 inline _Tpwvec v_load_expand(const _Tp* ptr) \
1436 return cvt(vle##width##_v_##suffix2##mf2(ptr, VTraits<_Tpvec>::vlanes()), VTraits<_Tpvec>::vlanes()); \
1439 OPENCV_HAL_IMPL_RVV_EXPAND(
uchar, v_uint16, vuint16m2_t, v_uint8, 8, u16, u8, vwcvtu_x)
1440 OPENCV_HAL_IMPL_RVV_EXPAND(
schar, v_int16, vint16m2_t, v_int8, 8, i16, i8, vwcvt_x)
1441 OPENCV_HAL_IMPL_RVV_EXPAND(
ushort, v_uint32, vuint32m2_t, v_uint16, 16, u32, u16, vwcvtu_x)
1442 OPENCV_HAL_IMPL_RVV_EXPAND(
short, v_int32, vint32m2_t, v_int16, 16, i32, i16, vwcvt_x)
1443 OPENCV_HAL_IMPL_RVV_EXPAND(
uint, v_uint64, vuint64m2_t, v_uint32, 32, u64, u32, vwcvtu_x)
1444 OPENCV_HAL_IMPL_RVV_EXPAND(
int, v_int64, vint64m2_t, v_int32, 32, i64, i32, vwcvt_x)
1448 return vwcvtu_x(vwcvtu_x(vle8_v_u8mf4(ptr, VTraits<v_uint32>::vlanes()), VTraits<v_uint32>::vlanes()), VTraits<v_uint32>::vlanes());
1453 return vwcvt_x(vwcvt_x(vle8_v_i8mf4(ptr, VTraits<v_int32>::vlanes()), VTraits<v_int32>::vlanes()), VTraits<v_int32>::vlanes());
1456 #define OPENCV_HAL_IMPL_RVV_PACK(_Tpvec, _Tp, _wTpvec, hwidth, hsuffix, suffix, rshr, shr) \
1457 inline _Tpvec v_pack(const _wTpvec& a, const _wTpvec& b) \
1459 return shr(vset(vlmul_ext_##suffix##m2(a), 1, b), 0, VTraits<_Tpvec>::vlanes()); \
1461 inline void v_pack_store(_Tp* ptr, const _wTpvec& a) \
1463 vse##hwidth##_v_##hsuffix##mf2(ptr, shr(a, 0, VTraits<_Tpvec>::vlanes()), VTraits<_wTpvec>::vlanes()); \
1465 template<int n = 0> inline \
1466 _Tpvec v_rshr_pack(const _wTpvec& a, const _wTpvec& b, int N = n) \
1468 return rshr(vset(vlmul_ext_##suffix##m2(a), 1, b), N, VTraits<_Tpvec>::vlanes()); \
1470 template<int n = 0> inline \
1471 void v_rshr_pack_store(_Tp* ptr, const _wTpvec& a, int N = n) \
1473 vse##hwidth##_v_##hsuffix##mf2(ptr, rshr(a, N, VTraits<_Tpvec>::vlanes()), VTraits<_wTpvec>::vlanes()); \
1476 OPENCV_HAL_IMPL_RVV_PACK(v_uint8,
uchar, v_uint16, 8, u8, u16, vnclipu, vnclipu)
1477 OPENCV_HAL_IMPL_RVV_PACK(v_int8,
schar, v_int16, 8, i8, i16, vnclip, vnclip)
1478 OPENCV_HAL_IMPL_RVV_PACK(v_uint16,
ushort, v_uint32, 16, u16, u32, vnclipu, vnclipu)
1479 OPENCV_HAL_IMPL_RVV_PACK(v_int16,
short, v_int32, 16, i16, i32, vnclip, vnclip)
1480 OPENCV_HAL_IMPL_RVV_PACK(v_uint32,
unsigned, v_uint64, 32, u32, u64, vnclipu, vnsrl)
1481 OPENCV_HAL_IMPL_RVV_PACK(v_int32,
int, v_int64, 32, i32, i64, vnclip, vnsra)
1483 #define OPENCV_HAL_IMPL_RVV_PACK_U(_Tpvec, _Tp, _wTpvec, _wTp, hwidth, width, hsuffix, suffix, rshr, cast, hvl, vl) \
1484 inline _Tpvec v_pack_u(const _wTpvec& a, const _wTpvec& b) \
1486 return vnclipu(cast(vmax(vset(vlmul_ext_##suffix##m2(a), 1, b), 0, vl)), 0, vl); \
1488 inline void v_pack_u_store(_Tp* ptr, const _wTpvec& a) \
1490 vse##hwidth##_v_##hsuffix##mf2(ptr, vnclipu(vreinterpret_u##width##m1(vmax(a, 0, vl)), 0, vl), hvl); \
1492 template<int N = 0> inline \
1493 _Tpvec v_rshr_pack_u(const _wTpvec& a, const _wTpvec& b, int n = N) \
1495 return vnclipu(cast(vmax(vset(vlmul_ext_##suffix##m2(a), 1, b), 0, vl)), n, vl); \
1497 template<int N = 0> inline \
1498 void v_rshr_pack_u_store(_Tp* ptr, const _wTpvec& a, int n = N) \
1500 vse##hwidth##_v_##hsuffix##mf2(ptr, vnclipu(vreinterpret_u##width##m1(vmax(a, 0, vl)), n, vl), hvl); \
1503 OPENCV_HAL_IMPL_RVV_PACK_U(v_uint8,
uchar, v_int16,
short, 8, 16, u8, i16, vnclipu_wx_u8m1, vreinterpret_v_i16m2_u16m2, VTraits<v_int16>::vlanes(), VTraits<v_uint8>::vlanes())
1504 OPENCV_HAL_IMPL_RVV_PACK_U(v_uint16,
ushort, v_int32,
int, 16, 32, u16, i32, vnclipu_wx_u16m1, vreinterpret_v_i32m2_u32m2, VTraits<v_int32>::vlanes(), VTraits<v_uint16>::vlanes())
1514 #define OPENCV_HAL_IMPL_RVV_ZIP(_Tpvec, _wTpvec, suffix, width, width2, convert2um2, convert2um1) \
1515 inline void v_zip(const _Tpvec& a0, const _Tpvec& a1, _Tpvec& b0, _Tpvec& b1) { \
1516 _wTpvec temp = vreinterpret_##suffix##m2(convert2um2( \
1517 vor(vzext_vf2(convert2um1(a0), VTraits<_Tpvec>::vlanes()*2), \
1518 vreinterpret_u##width2##m2(vslide1up(vreinterpret_u##width##m2(vzext_vf2(convert2um1(a1), VTraits<_Tpvec>::vlanes()*2)), 0, VTraits<_Tpvec>::vlanes()*2)), \
1519 VTraits<_Tpvec>::vlanes()))); \
1520 b0 = vget_##suffix##m1(temp, 0); \
1521 b1 = vget_##suffix##m1(temp, 1); \
1523 OPENCV_HAL_IMPL_RVV_ZIP(v_uint8, vuint8m2_t, u8, 8, 16, OPENCV_HAL_NOP, OPENCV_HAL_NOP)
1524 OPENCV_HAL_IMPL_RVV_ZIP(v_int8, vint8m2_t, i8, 8, 16, vreinterpret_u8m2, vreinterpret_u8m1)
1525 OPENCV_HAL_IMPL_RVV_ZIP(v_uint16, vuint16m2_t, u16, 16, 32, OPENCV_HAL_NOP, OPENCV_HAL_NOP)
1526 OPENCV_HAL_IMPL_RVV_ZIP(v_int16, vint16m2_t, i16, 16, 32, vreinterpret_u16m2, vreinterpret_u16m1)
1527 OPENCV_HAL_IMPL_RVV_ZIP(v_uint32, vuint32m2_t, u32, 32, 64, OPENCV_HAL_NOP, OPENCV_HAL_NOP)
1528 OPENCV_HAL_IMPL_RVV_ZIP(v_int32, vint32m2_t, i32, 32, 64, vreinterpret_u32m2, vreinterpret_u32m1)
1529 OPENCV_HAL_IMPL_RVV_ZIP(v_float32, vfloat32m2_t, f32, 32, 64, vreinterpret_u32m2, vreinterpret_u32m1)
1531 #if CV_SIMD_SCALABLE_64F
1532 inline void v_zip(
const v_float64& a0,
const v_float64& a1, v_float64& b0, v_float64& b1) { \
1533 vuint16mf4_t
idx0 = vid_v_u16mf4(VTraits<v_float64>::vlanes());
1534 vuint16mf4_t
idx1 = vadd(
idx0, VTraits<v_float64>::vlanes(), VTraits<v_float64>::vlanes());
1535 vuint16mf2_t
idx = vreinterpret_u16mf2(( \
1536 vor(vzext_vf2(
idx0, VTraits<v_float64>::vlanes()), \
1537 vreinterpret_u32mf2(vslide1up(vreinterpret_u16mf2(vzext_vf2(
idx1, VTraits<v_float64>::vlanes())), 0, VTraits<v_uint32>::vlanes())), \
1538 VTraits<v_uint32>::vlanes())));
1540 vfloat64m2_t
temp = __riscv_vcreate_v_f64m1_f64m2(a0, a1);
1542 vfloat64m2_t
temp = vlmul_ext_f64m2(a0);
1545 temp = vrgatherei16(
temp,
idx, VTraits<v_float64>::vlanes()*2);
1546 b0 = vget_f64m1(
temp, 0); \
1547 b1 = vget_f64m1(
temp, 1); \
1551 #define OPENCV_HAL_IMPL_RVV_UNPACKS(_Tpvec, width) \
1552 inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b) \
1554 return vslideup(a, b, VTraits<_Tpvec>::vlanes()/2, VTraits<_Tpvec>::vlanes());\
1556 inline _Tpvec v_combine_high(const _Tpvec& a, const _Tpvec& b) \
1559 vslidedown(a, a, VTraits<_Tpvec>::vlanes()/2, VTraits<_Tpvec>::vlanes()), \
1560 vslidedown(b, b, VTraits<_Tpvec>::vlanes()/2, VTraits<_Tpvec>::vlanes()), \
1561 VTraits<_Tpvec>::vlanes()/2, \
1562 VTraits<_Tpvec>::vlanes()); \
1564 inline void v_recombine(const _Tpvec& a, const _Tpvec& b, _Tpvec& c, _Tpvec& d) \
1566 c = v_combine_low(a, b); \
1567 d = v_combine_high(a, b); \
1570 OPENCV_HAL_IMPL_RVV_UNPACKS(v_uint8, 8)
1571 OPENCV_HAL_IMPL_RVV_UNPACKS(v_int8, 8)
1572 OPENCV_HAL_IMPL_RVV_UNPACKS(v_uint16, 16)
1573 OPENCV_HAL_IMPL_RVV_UNPACKS(v_int16, 16)
1574 OPENCV_HAL_IMPL_RVV_UNPACKS(v_uint32, 32)
1575 OPENCV_HAL_IMPL_RVV_UNPACKS(v_int32, 32)
1576 OPENCV_HAL_IMPL_RVV_UNPACKS(v_float32, 32)
1577 #if CV_SIMD_SCALABLE_64F
1578 OPENCV_HAL_IMPL_RVV_UNPACKS(v_float64, 64)
1581 #define OPENCV_HAL_IMPL_RVV_INTERLEAVED(_Tpvec, _Tp, suffix, width, hwidth, vl) \
1582 inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b) \
1584 a = vlse##width##_v_##suffix##m1(ptr , sizeof(_Tp)*2, VTraits<v_##_Tpvec>::vlanes()); \
1585 b = vlse##width##_v_##suffix##m1(ptr+1, sizeof(_Tp)*2, VTraits<v_##_Tpvec>::vlanes()); \
1587 inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, v_##_Tpvec& c) \
1589 a = vlse##width##_v_##suffix##m1(ptr , sizeof(_Tp)*3, VTraits<v_##_Tpvec>::vlanes()); \
1590 b = vlse##width##_v_##suffix##m1(ptr+1, sizeof(_Tp)*3, VTraits<v_##_Tpvec>::vlanes()); \
1591 c = vlse##width##_v_##suffix##m1(ptr+2, sizeof(_Tp)*3, VTraits<v_##_Tpvec>::vlanes()); \
1593 inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, \
1594 v_##_Tpvec& c, v_##_Tpvec& d) \
1597 a = vlse##width##_v_##suffix##m1(ptr , sizeof(_Tp)*4, VTraits<v_##_Tpvec>::vlanes()); \
1598 b = vlse##width##_v_##suffix##m1(ptr+1, sizeof(_Tp)*4, VTraits<v_##_Tpvec>::vlanes()); \
1599 c = vlse##width##_v_##suffix##m1(ptr+2, sizeof(_Tp)*4, VTraits<v_##_Tpvec>::vlanes()); \
1600 d = vlse##width##_v_##suffix##m1(ptr+3, sizeof(_Tp)*4, VTraits<v_##_Tpvec>::vlanes()); \
1602 inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \
1603 hal::StoreMode =hal::STORE_UNALIGNED) \
1605 vsse##width(ptr, sizeof(_Tp)*2, a, VTraits<v_##_Tpvec>::vlanes()); \
1606 vsse##width(ptr+1, sizeof(_Tp)*2, b, VTraits<v_##_Tpvec>::vlanes()); \
1608 inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \
1609 const v_##_Tpvec& c, hal::StoreMode =hal::STORE_UNALIGNED) \
1611 vsse##width(ptr, sizeof(_Tp)*3, a, VTraits<v_##_Tpvec>::vlanes()); \
1612 vsse##width(ptr+1, sizeof(_Tp)*3, b, VTraits<v_##_Tpvec>::vlanes()); \
1613 vsse##width(ptr+2, sizeof(_Tp)*3, c, VTraits<v_##_Tpvec>::vlanes()); \
1615 inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \
1616 const v_##_Tpvec& c, const v_##_Tpvec& d, \
1617 hal::StoreMode =hal::STORE_UNALIGNED ) \
1619 vsse##width(ptr, sizeof(_Tp)*4, a, VTraits<v_##_Tpvec>::vlanes()); \
1620 vsse##width(ptr+1, sizeof(_Tp)*4, b, VTraits<v_##_Tpvec>::vlanes()); \
1621 vsse##width(ptr+2, sizeof(_Tp)*4, c, VTraits<v_##_Tpvec>::vlanes()); \
1622 vsse##width(ptr+3, sizeof(_Tp)*4, d, VTraits<v_##_Tpvec>::vlanes()); \
1625 OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint8,
uchar, u8, 8, 4, VTraits<v_uint8>::vlanes())
1626 OPENCV_HAL_IMPL_RVV_INTERLEAVED(int8,
schar, i8, 8, 4, VTraits<v_int8>::vlanes())
1627 OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint16,
ushort, u16, 16, 8, VTraits<v_uint16>::vlanes())
1628 OPENCV_HAL_IMPL_RVV_INTERLEAVED(int16,
short, i16, 16, 8, VTraits<v_int16>::vlanes())
1629 OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint32,
unsigned, u32, 32, 16, VTraits<v_uint32>::vlanes())
1630 OPENCV_HAL_IMPL_RVV_INTERLEAVED(int32,
int, i32, 32, 16, VTraits<v_int32>::vlanes())
1631 OPENCV_HAL_IMPL_RVV_INTERLEAVED(float32,
float, f32, 32, 16, VTraits<v_float32>::vlanes())
1632 OPENCV_HAL_IMPL_RVV_INTERLEAVED(
uint64,
uint64, u64, 64, 32, VTraits<v_uint64>::vlanes())
1633 OPENCV_HAL_IMPL_RVV_INTERLEAVED(
int64,
int64, i64, 64, 32, VTraits<v_int64>::vlanes())
1634 #if CV_SIMD_SCALABLE_64F
1635 OPENCV_HAL_IMPL_RVV_INTERLEAVED(float64,
double, f64, 64, 32, VTraits<v_float64>::vlanes())
1638 static uint64_t idx_interleave_pairs[] = { \
1639 0x0705060403010200, 0x0f0d0e0c0b090a08, 0x1715161413111210, 0x1f1d1e1c1b191a18, \
1640 0x2725262423212220, 0x2f2d2e2c2b292a28, 0x3735363433313230, 0x3f3d3e3c3b393a38, \
1641 0x4745464443414240, 0x4f4d4e4c4b494a48, 0x5755565453515250, 0x5f5d5e5c5b595a58, \
1642 0x6765666463616260, 0x6f6d6e6c6b696a68, 0x7775767473717270, 0x7f7d7e7c7b797a78};
1644 static uint64_t idx_interleave_quads[] = { \
1645 0x0703060205010400, 0x0f0b0e0a0d090c08, 0x1713161215111410, 0x1f1b1e1a1d191c18, \
1646 0x2723262225212420, 0x2f2b2e2a2d292c28, 0x3733363235313430, 0x3f3b3e3a3d393c38, \
1647 0x4743464245414440, 0x4f4b4e4a4d494c48, 0x5753565255515450, 0x5f5b5e5a5d595c58, \
1648 0x6763666265616460, 0x6f6b6e6a6d696c68, 0x7773767275717470, 0x7f7b7e7a7d797c78};
1650 #define OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ_NOEXPEND(_Tpvec, func) \
1651 inline _Tpvec v_interleave_##func(const _Tpvec& vec) { \
1652 CV_CheckLE(VTraits<_Tpvec>::vlanes(), VTraits<_Tpvec>::max_nlanes, "RVV implementation only supports VLEN in the range [128, 1024]"); \
1653 vuint8m1_t vidx = vundefined_u8m1();\
1654 vidx = vreinterpret_u8m1(vle64_v_u64m1(idx_interleave_##func, 16)); \
1655 return vrgather(vec, vidx, VTraits<v_uint8>::vlanes()); \
1657 OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ_NOEXPEND(v_uint8, pairs)
1658 OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ_NOEXPEND(v_int8, pairs)
1659 OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ_NOEXPEND(v_uint8, quads)
1660 OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ_NOEXPEND(v_int8, quads)
1662 #define OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ(_Tpvec, width, vzext_vfx, func) \
1663 inline _Tpvec v_interleave_##func(const _Tpvec& vec) { \
1664 CV_CheckLE(VTraits<_Tpvec>::vlanes(), VTraits<_Tpvec>::max_nlanes, "RVV implementation only supports VLEN in the range [128, 1024]"); \
1665 vuint##width##m1_t vidx = vundefined_u##width##m1();\
1666 vidx = vget_u##width##m1(vzext_vfx(vreinterpret_u8m1(vle64_v_u64m1(idx_interleave_##func, 16)), VTraits<v_uint8>::vlanes()), 0); \
1667 return vrgather(vec, vidx, VTraits<_Tpvec>::vlanes()); \
1670 OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ(v_uint16, 16, vzext_vf2, pairs)
1671 OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ(v_int16, 16, vzext_vf2, pairs)
1672 OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ(v_uint32, 32, vzext_vf4, pairs)
1673 OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ(v_int32, 32, vzext_vf4, pairs)
1674 OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ(v_float32, 32, vzext_vf4, pairs)
1676 OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ(v_uint16, 16, vzext_vf2, quads)
1677 OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ(v_int16, 16, vzext_vf2, quads)
1678 OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ(v_uint32, 32, vzext_vf4, quads)
1679 OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ(v_int32, 32, vzext_vf4, quads)
1680 OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ(v_float32, 32, vzext_vf4, quads)
1682 static const unsigned char popCountTable[256] =
1685 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
1686 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1687 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1688 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1689 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1690 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1691 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1692 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1693 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1694 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1695 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1696 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1697 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1698 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1699 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1700 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
1702 #define OPENCV_HAL_IMPL_RVV_HADD(_Tpvec, _Tpvec2, _Tm2, width, width2, suffix, add) \
1703 static inline _Tpvec2 v_hadd(_Tpvec a) { \
1704 vuint##width2##m1_t oneX2 = vmv_v_x_u##width2##m1(1, VTraits<v_uint##width2>::vlanes()); \
1705 vuint##width##m1_t one = vreinterpret_u##width##m1(oneX2); \
1706 _Tm2 res = add(a, vslide1down(a, 0, VTraits<v_uint##width>::vlanes()), VTraits<v_uint##width>::vlanes()); \
1707 return vget_##suffix##m1(vcompress(vmseq(one, 1, VTraits<v_uint##width>::vlanes()), res, res, VTraits<v_uint##width>::vlanes()), 0); \
1709 OPENCV_HAL_IMPL_RVV_HADD(v_uint8, v_uint16, vuint16m2_t, 8, 16, u16, vwaddu_vv)
1710 OPENCV_HAL_IMPL_RVV_HADD(v_uint16, v_uint32, vuint32m2_t, 16, 32, u32, vwaddu_vv)
1711 OPENCV_HAL_IMPL_RVV_HADD(v_uint32, v_uint64, vuint64m2_t, 32, 64, u64, vwaddu_vv)
1712 OPENCV_HAL_IMPL_RVV_HADD(v_int8, v_int16, vint16m2_t, 8, 16, i16, vwadd_vv)
1713 OPENCV_HAL_IMPL_RVV_HADD(v_int16, v_int32, vint32m2_t, 16, 32, i32, vwadd_vv)
1714 OPENCV_HAL_IMPL_RVV_HADD(v_int32, v_int64, vint64m2_t, 32, 64, i64, vwadd_vv)
1716 OPENCV_HAL_IMPL_RVV_HADD(vint32m2_t, v_int32, vint32m2_t, 16, 32, i32, vadd)
1717 OPENCV_HAL_IMPL_RVV_HADD(vint64m2_t, v_int64, vint64m2_t, 32, 64, i64, vadd)
1721 return vloxei8(popCountTable, a, VTraits<v_uint8>::vlanes());
1723 inline v_uint16
v_popcount(
const v_uint16& a)
1725 return v_hadd(
v_popcount(vreinterpret_u8m1(a)));
1727 inline v_uint32
v_popcount(
const v_uint32& a)
1729 return v_hadd(v_hadd(
v_popcount(vreinterpret_u8m1(a))));
1731 inline v_uint64
v_popcount(
const v_uint64& a)
1733 return v_hadd(v_hadd(v_hadd(
v_popcount(vreinterpret_u8m1(a)))));
1751 return v_popcount(v_reinterpret_as_u64(vmax(a, v_sub(v_setzero_s64(), a), VTraits<v_int64>::vlanes())));
1756 #define OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(_Tpvec) \
1757 inline int v_signmask(const _Tpvec& a) \
1759 uint8_t ans[4] = {0}; \
1760 vsm(ans, vmslt(a, 0, VTraits<_Tpvec>::vlanes()), VTraits<_Tpvec>::vlanes()); \
1761 return *(reinterpret_cast<int*>(ans)) & (((__int128_t)1 << VTraits<_Tpvec>::vlanes()) - 1); \
1763 inline int v_scan_forward(const _Tpvec& a) \
1765 return (int)vfirst(vmslt(a, 0, VTraits<_Tpvec>::vlanes()), VTraits<_Tpvec>::vlanes()); \
1768 OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_int8)
1769 OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_int16)
1770 OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_int32)
1771 OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_int64)
1774 {
return v_signmask(v_reinterpret_as_s8(a)); }
1776 {
return v_signmask(v_reinterpret_as_s16(a)); }
1778 {
return v_signmask(v_reinterpret_as_s32(a)); }
1780 {
return v_signmask(v_reinterpret_as_s32(a)); }
1782 {
return v_signmask(v_reinterpret_as_s64(a)); }
1783 #if CV_SIMD_SCALABLE_64F
1785 {
return v_signmask(v_reinterpret_as_s64(a)); }
1799 #if CV_SIMD_SCALABLE_64F
1807 #define OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(_Tpvec, v_trunc) \
1808 inline _Tpvec v_pack_triplets(const _Tpvec& vec) { \
1809 size_t vl = __cv_rvv_e8m1_nlanes; \
1810 vuint32m1_t one = vmv_v_x_u32m1(1, __cv_rvv_e32m1_nlanes); \
1811 vuint8m1_t zero = vmv_v_x_u8m1(0, vl); \
1812 vuint8m1_t mask = vreinterpret_u8m1(one); \
1813 return vcompress(vmseq(v_trunc(vslideup(zero, mask, 3, vl)), 0, vl), vec, vec, VTraits<_Tpvec>::vlanes()); \
1816 OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_uint8, OPENCV_HAL_NOP)
1817 OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_int8, OPENCV_HAL_NOP)
1818 OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_uint16, vlmul_trunc_u8mf2)
1819 OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_int16, vlmul_trunc_u8mf2)
1820 OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_uint32, vlmul_trunc_u8mf4)
1821 OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_int32, vlmul_trunc_u8mf4)
1822 OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_float32, vlmul_trunc_u8mf4)
1823 OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_uint64, vlmul_trunc_u8mf8)
1824 OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_int64, vlmul_trunc_u8mf8)
1825 #if CV_SIMD_SCALABLE_64F
1826 OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_float64, vlmul_trunc_u8mf8)
1832 #if defined(__riscv_zfh) && __riscv_zfh
1835 return vfwcvt_f(vle16_v_f16mf2((_Float16*)ptr, VTraits<v_float32>::vlanes()) ,VTraits<v_float32>::vlanes());;
1838 inline void v_pack_store(hfloat* ptr,
const v_float32& v)
1840 vse16_v_f16mf2((_Float16*)ptr, vfncvt_f_f_w_f16mf2(v, VTraits<v_float32>::vlanes()), VTraits<v_float32>::vlanes());
1846 for(
int i = 0; i < VTraits<v_float32>::vlanes(); i++ ) buf[i] = (
float)ptr[i];
1850 inline void v_pack_store(hfloat* ptr,
const v_float32& v)
1854 for(
int i = 0; i < VTraits<v_float32>::vlanes(); i++ ) ptr[i] = hfloat(buf[i]);
1858 inline v_int32
v_round(
const v_float32& a)
1861 return vfcvt_x(a, VTraits<v_float32>::vlanes());
1864 inline v_int32
v_floor(
const v_float32& a)
1866 return vfcvt_x(vfsub(a, 0.5f - 1e-5, VTraits<v_float32>::vlanes()), VTraits<v_float32>::vlanes());
1870 inline v_int32
v_ceil(
const v_float32& a)
1872 return vfcvt_x(vfadd(a, 0.5f - 1e-5, VTraits<v_float32>::vlanes()), VTraits<v_float32>::vlanes());
1875 inline v_int32
v_trunc(
const v_float32& a)
1877 return vfcvt_rtz_x(a, VTraits<v_float32>::vlanes());
1879 #if CV_SIMD_SCALABLE_64F
1880 inline v_int32
v_round(
const v_float64& a)
1882 return vfncvt_x(vlmul_ext_f64m2(a), VTraits<v_float32>::vlanes());
1885 inline v_int32
v_round(
const v_float64& a,
const v_float64& b)
1889 return vfncvt_x(vset(vlmul_ext_f64m2(a), 1, b), VTraits<v_float32>::vlanes());
1892 inline v_int32
v_floor(
const v_float64& a)
1894 return vfncvt_x(vlmul_ext_f64m2(vfsub(a, 0.5f - 1e-6, VTraits<v_float64>::vlanes())), VTraits<v_float32>::vlanes());
1897 inline v_int32
v_ceil(
const v_float64& a)
1899 return vfncvt_x(vlmul_ext_f64m2(vfadd(a, 0.5f - 1e-6, VTraits<v_float64>::vlanes())), VTraits<v_float32>::vlanes());
1902 inline v_int32
v_trunc(
const v_float64& a)
1904 return vfncvt_rtz_x(vlmul_ext_f64m2(a), VTraits<v_float32>::vlanes());
1911 inline v_int32
v_dotprod(
const v_int16& a,
const v_int16& b)
1913 vint32m2_t temp1 = vwmul(a, b, VTraits<v_int16>::vlanes());
1914 return v_hadd(temp1);
1917 inline v_int32
v_dotprod(
const v_int16& a,
const v_int16& b,
const v_int32& c)
1919 vint32m2_t temp1 = vwmul(a, b, VTraits<v_int16>::vlanes());
1920 return vadd(v_hadd(temp1), c, VTraits<v_int32>::vlanes());
1924 inline v_int64
v_dotprod(
const v_int32& a,
const v_int32& b)
1926 vuint64m1_t one64 = vmv_v_x_u64m1(1, VTraits<v_uint64>::vlanes()); \
1927 vuint32m1_t one32 = vreinterpret_u32m1(one64); \
1928 vbool32_t
mask = vmseq(one32, 1, VTraits<v_uint32>::vlanes()); \
1929 vint64m2_t temp1 = vwmul(a, b, VTraits<v_int32>::vlanes()); \
1930 vint64m2_t temp2 = vslide1down(temp1, 0, VTraits<v_int32>::vlanes());
1931 vint64m2_t res = vadd(temp1, temp2, VTraits<v_int32>::vlanes());
1932 res = vcompress(
mask, res, res, VTraits<v_int32>::vlanes()); \
1933 return vlmul_trunc_i64m1(res); \
1935 inline v_int64
v_dotprod(
const v_int32& a,
const v_int32& b,
const v_int64& c)
1937 vuint64m1_t one64 = vmv_v_x_u64m1(1, VTraits<v_uint64>::vlanes()); \
1938 vuint32m1_t one32 = vreinterpret_u32m1(one64); \
1939 vbool32_t
mask = vmseq(one32, 1, VTraits<v_uint32>::vlanes()); \
1940 vint64m2_t temp1 = vwmul(a, b, VTraits<v_int32>::vlanes()); \
1941 vint64m2_t temp2 = vslide1down(temp1, 0, VTraits<v_int32>::vlanes());
1942 vint64m2_t res = vadd(temp1, temp2, VTraits<v_int32>::vlanes());
1943 res = vcompress(
mask, res, res, VTraits<v_int32>::vlanes()); \
1944 return vadd(vlmul_trunc_i64m1(res), c, VTraits<v_int64>::vlanes()); \
1950 vuint32m1_t one32 = vmv_v_x_u32m1(1, VTraits<v_uint32>::vlanes()); \
1951 vuint8m1_t one8 = vreinterpret_u8m1(one32); \
1952 vbool8_t
mask = vmseq(one8, 1, VTraits<v_uint8>::vlanes()); \
1953 vuint16m2_t t0 = vwmulu(a, b, VTraits<v_uint8>::vlanes()); \
1954 vuint16m2_t t1= vslide1down(t0, 0, VTraits<v_uint8>::vlanes());
1955 vuint16m2_t t2= vslide1down(t1, 0, VTraits<v_uint8>::vlanes());
1956 vuint16m2_t t3= vslide1down(t2, 0, VTraits<v_uint8>::vlanes());
1957 vuint32m4_t res = vadd(vwaddu_vv(t2, t3, VTraits<v_uint8>::vlanes()), vwaddu_vv(t0, t1, VTraits<v_uint8>::vlanes()), VTraits<v_uint8>::vlanes());
1958 res = vcompress(
mask, res, res, VTraits<v_uint8>::vlanes()); \
1959 return vlmul_trunc_u32m1(res);
1965 vuint32m1_t one32 = vmv_v_x_u32m1(1, VTraits<v_uint32>::vlanes()); \
1966 vuint8m1_t one8 = vreinterpret_u8m1(one32); \
1967 vbool8_t
mask = vmseq(one8, 1, VTraits<v_uint8>::vlanes()); \
1968 vuint16m2_t t0 = vwmulu(a, b, VTraits<v_uint8>::vlanes()); \
1969 vuint16m2_t t1= vslide1down(t0, 0, VTraits<v_uint8>::vlanes());
1970 vuint16m2_t t2= vslide1down(t1, 0, VTraits<v_uint8>::vlanes());
1971 vuint16m2_t t3= vslide1down(t2, 0, VTraits<v_uint8>::vlanes());
1972 vuint32m4_t res = vadd(vwaddu_vv(t2, t3, VTraits<v_uint8>::vlanes()), vwaddu_vv(t0, t1, VTraits<v_uint8>::vlanes()), VTraits<v_uint8>::vlanes());
1973 res = vcompress(
mask, res, res, VTraits<v_uint8>::vlanes()); \
1974 return vadd(vlmul_trunc_u32m1(res), c, VTraits<v_uint8>::vlanes());
1979 vuint32m1_t one32 = vmv_v_x_u32m1(1, VTraits<v_uint32>::vlanes()); \
1980 vuint8m1_t one8 = vreinterpret_u8m1(one32); \
1981 vbool8_t
mask = vmseq(one8, 1, VTraits<v_uint8>::vlanes()); \
1982 vint16m2_t t0 = vwmul(a, b, VTraits<v_int8>::vlanes()); \
1983 vint16m2_t t1= vslide1down(t0, 0, VTraits<v_int8>::vlanes());
1984 vint16m2_t t2= vslide1down(t1, 0, VTraits<v_int8>::vlanes());
1985 vint16m2_t t3= vslide1down(t2, 0, VTraits<v_int8>::vlanes());
1986 vint32m4_t res = vadd(vwadd_vv(t2, t3, VTraits<v_int8>::vlanes()), vwadd_vv(t0, t1, VTraits<v_int8>::vlanes()), VTraits<v_int8>::vlanes());
1987 res = vcompress(
mask, res, res, VTraits<v_int8>::vlanes()); \
1988 return vlmul_trunc_i32m1(res);
1994 vuint32m1_t one32 = vmv_v_x_u32m1(1, VTraits<v_uint32>::vlanes()); \
1995 vuint8m1_t one8 = vreinterpret_u8m1(one32); \
1996 vbool8_t
mask = vmseq(one8, 1, VTraits<v_uint8>::vlanes()); \
1997 vint16m2_t t0 = vwmul(a, b, VTraits<v_int8>::vlanes()); \
1998 vint16m2_t t1= vslide1down(t0, 0, VTraits<v_int8>::vlanes());
1999 vint16m2_t t2= vslide1down(t1, 0, VTraits<v_int8>::vlanes());
2000 vint16m2_t t3= vslide1down(t2, 0, VTraits<v_int8>::vlanes());
2001 vint32m4_t res = vadd(vwadd_vv(t2, t3, VTraits<v_int8>::vlanes()), vwadd_vv(t0, t1, VTraits<v_int8>::vlanes()), VTraits<v_int8>::vlanes());
2002 res = vcompress(
mask, res, res, VTraits<v_int8>::vlanes()); \
2003 return vadd(vlmul_trunc_i32m1(res), c, VTraits<v_int8>::vlanes());
2010 vuint64m1_t one64 = vmv_v_x_u64m1(1, VTraits<v_uint64>::vlanes()); \
2011 vuint16m1_t one16 = vreinterpret_u16m1(one64); \
2012 vbool16_t
mask = vmseq(one16, 1, VTraits<v_uint16>::vlanes()); \
2013 vuint32m2_t t0 = vwmulu(a, b, VTraits<v_uint16>::vlanes()); \
2014 vuint32m2_t t1= vslide1down(t0, 0, VTraits<v_uint16>::vlanes());
2015 vuint32m2_t t2= vslide1down(t1, 0, VTraits<v_uint16>::vlanes());
2016 vuint32m2_t t3= vslide1down(t2, 0, VTraits<v_uint16>::vlanes());
2017 vuint64m4_t res = vadd(vwaddu_vv(t2, t3, VTraits<v_uint16>::vlanes()), vwaddu_vv(t0, t1, VTraits<v_uint16>::vlanes()), VTraits<v_uint16>::vlanes());
2018 res = vcompress(
mask, res, res, VTraits<v_uint16>::vlanes()); \
2019 return vlmul_trunc_u64m1(res);
2021 inline v_uint64
v_dotprod_expand(
const v_uint16& a,
const v_uint16& b,
const v_uint64& c)
2023 vuint64m1_t one64 = vmv_v_x_u64m1(1, VTraits<v_uint64>::vlanes()); \
2024 vuint16m1_t one16 = vreinterpret_u16m1(one64); \
2025 vbool16_t
mask = vmseq(one16, 1, VTraits<v_uint16>::vlanes()); \
2026 vuint32m2_t t0 = vwmulu(a, b, VTraits<v_uint16>::vlanes()); \
2027 vuint32m2_t t1= vslide1down(t0, 0, VTraits<v_uint16>::vlanes());
2028 vuint32m2_t t2= vslide1down(t1, 0, VTraits<v_uint16>::vlanes());
2029 vuint32m2_t t3= vslide1down(t2, 0, VTraits<v_uint16>::vlanes());
2030 vuint64m4_t res = vadd(vwaddu_vv(t2, t3, VTraits<v_uint16>::vlanes()), vwaddu_vv(t0, t1, VTraits<v_uint16>::vlanes()), VTraits<v_uint16>::vlanes());
2031 res = vcompress(
mask, res, res, VTraits<v_uint16>::vlanes()); \
2032 return vadd(vlmul_trunc_u64m1(res), c, VTraits<v_uint16>::vlanes());
2037 vuint64m1_t one64 = vmv_v_x_u64m1(1, VTraits<v_uint64>::vlanes()); \
2038 vuint16m1_t one16 = vreinterpret_u16m1(one64); \
2039 vbool16_t
mask = vmseq(one16, 1, VTraits<v_uint16>::vlanes()); \
2040 vint32m2_t t0 = vwmul(a, b, VTraits<v_int16>::vlanes()); \
2041 vint32m2_t t1= vslide1down(t0, 0, VTraits<v_int16>::vlanes());
2042 vint32m2_t t2= vslide1down(t1, 0, VTraits<v_int16>::vlanes());
2043 vint32m2_t t3= vslide1down(t2, 0, VTraits<v_int16>::vlanes());
2044 vint64m4_t res = vadd(vwadd_vv(t2, t3, VTraits<v_int16>::vlanes()), vwadd_vv(t0, t1, VTraits<v_int16>::vlanes()), VTraits<v_int16>::vlanes());
2045 res = vcompress(
mask, res, res, VTraits<v_int16>::vlanes()); \
2046 return vlmul_trunc_i64m1(res);
2051 vuint64m1_t one64 = vmv_v_x_u64m1(1, VTraits<v_uint64>::vlanes()); \
2052 vuint16m1_t one16 = vreinterpret_u16m1(one64); \
2053 vbool16_t
mask = vmseq(one16, 1, VTraits<v_uint16>::vlanes()); \
2054 vint32m2_t t0 = vwmul(a, b, VTraits<v_int16>::vlanes()); \
2055 vint32m2_t t1= vslide1down(t0, 0, VTraits<v_int16>::vlanes());
2056 vint32m2_t t2= vslide1down(t1, 0, VTraits<v_int16>::vlanes());
2057 vint32m2_t t3= vslide1down(t2, 0, VTraits<v_int16>::vlanes());
2058 vint64m4_t res = vadd(vwadd_vv(t2, t3, VTraits<v_int16>::vlanes()), vwadd_vv(t0, t1, VTraits<v_int16>::vlanes()), VTraits<v_int16>::vlanes());
2059 res = vcompress(
mask, res, res, VTraits<v_int16>::vlanes()); \
2060 return vadd(vlmul_trunc_i64m1(res), c, VTraits<v_int16>::vlanes());
2064 #if CV_SIMD_SCALABLE_64F
2074 inline v_int32
v_dotprod_fast(
const v_int16& a,
const v_int16& b)
2076 v_int32 zero = v_setzero_s32();
2077 return vredsum(zero, vwmul(a, b, VTraits<v_int16>::vlanes()), zero, VTraits<v_int16>::vlanes());
2079 inline v_int32
v_dotprod_fast(
const v_int16& a,
const v_int16& b,
const v_int32& c)
2081 v_int32 zero = v_setzero_s32();
2082 return vredsum(zero, vwmul(a, b, VTraits<v_int16>::vlanes()), vredsum(zero, c, zero, VTraits<v_int32>::vlanes()), VTraits<v_int16>::vlanes());
2086 inline v_int64
v_dotprod_fast(
const v_int32& a,
const v_int32& b)
2088 v_int64 zero = v_setzero_s64();
2089 return vredsum(zero, vwmul(a, b, VTraits<v_int32>::vlanes()), zero, VTraits<v_int32>::vlanes());
2091 inline v_int64
v_dotprod_fast(
const v_int32& a,
const v_int32& b,
const v_int64& c)
2093 v_int64 zero = v_setzero_s64();
2094 return vadd(vredsum(zero, vwmul(a, b, VTraits<v_int32>::vlanes()), zero, VTraits<v_int32>::vlanes()) , vredsum(zero, c, zero, VTraits<v_int64>::vlanes()), VTraits<v_int64>::vlanes());
2101 v_uint32 zero = v_setzero_u32();
2102 return vwredsumu(zero, vwmulu(a, b, VTraits<v_uint8>::vlanes()), zero, VTraits<v_uint8>::vlanes());
2106 v_uint32 zero = v_setzero_u32();
2107 return vadd(vwredsumu(zero, vwmulu(a, b, VTraits<v_uint8>::vlanes()), zero, VTraits<v_uint8>::vlanes()) , vredsum(zero, c, zero, VTraits<v_uint32>::vlanes()), VTraits<v_uint32>::vlanes());
2111 v_int32 zero = v_setzero_s32();
2112 return vwredsum(zero, vwmul(a, b, VTraits<v_int8>::vlanes()), zero, VTraits<v_int8>::vlanes());
2116 v_int32 zero = v_setzero_s32();
2117 return vadd(vwredsum(zero, vwmul(a, b, VTraits<v_int8>::vlanes()), zero, VTraits<v_int8>::vlanes()) , vredsum(zero, c, zero, VTraits<v_int32>::vlanes()), VTraits<v_int32>::vlanes());
2123 v_uint64 zero = v_setzero_u64();
2124 return vwredsumu(zero, vwmulu(a, b, VTraits<v_uint16>::vlanes()), zero, VTraits<v_uint16>::vlanes());
2128 v_uint64 zero = v_setzero_u64();
2129 return vadd(vwredsumu(zero, vwmulu(a, b, VTraits<v_uint16>::vlanes()), zero, VTraits<v_uint16>::vlanes()), vredsum(zero, c, zero, VTraits<v_uint64>::vlanes()), VTraits<v_uint64>::vlanes());
2133 v_int64 zero = v_setzero_s64();
2134 return vwredsum(zero, vwmul(a, b, VTraits<v_int16>::vlanes()), zero, VTraits<v_int16>::vlanes());
2138 v_int64 zero = v_setzero_s64();
2139 return vadd(vwredsum(zero, vwmul(a, b, VTraits<v_int16>::vlanes()), zero, VTraits<v_int16>::vlanes()), vredsum(zero, c, zero, VTraits<v_int64>::vlanes()), VTraits<v_int64>::vlanes());
2143 #if CV_SIMD_SCALABLE_64F
2151 inline v_float32
v_matmul(
const v_float32& v,
const v_float32& m0,
2152 const v_float32& m1,
const v_float32& m2,
2153 const v_float32& m3)
2156 res = vfmul_vf_f32m1(m0,
v_extract_n(v, 0), VTraits<v_float32>::vlanes());
2157 res = vfmacc_vf_f32m1(res,
v_extract_n(v, 1), m1, VTraits<v_float32>::vlanes());
2158 res = vfmacc_vf_f32m1(res,
v_extract_n(v, 2), m2, VTraits<v_float32>::vlanes());
2159 res = vfmacc_vf_f32m1(res,
v_extract_n(v, 3), m3, VTraits<v_float32>::vlanes());
2164 inline v_float32
v_matmuladd(
const v_float32& v,
const v_float32& m0,
2165 const v_float32& m1,
const v_float32& m2,
2168 vfloat32m1_t res = vfmul_vf_f32m1(m0,
v_extract_n(v,0), VTraits<v_float32>::vlanes());
2169 res = vfmacc_vf_f32m1(res,
v_extract_n(v,1), m1, VTraits<v_float32>::vlanes());
2170 res = vfmacc_vf_f32m1(res,
v_extract_n(v,2), m2, VTraits<v_float32>::vlanes());
2171 return vfadd(res, a, VTraits<v_float32>::vlanes());
2176 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
CV_EXPORTS_W void absdiff(InputArray src1, InputArray src2, OutputArray dst)
Calculates the per-element absolute difference between two arrays or between an array and a scalar.
CV_EXPORTS_W void add(InputArray src1, InputArray src2, OutputArray dst, InputArray mask=noArray(), int dtype=-1)
Calculates the per-element sum of two arrays or an array and a scalar.
int int idx1
Definition: core_c.h:654
const int * idx
Definition: core_c.h:668
const CvArr CvArr * x
Definition: core_c.h:1195
int idx0
Definition: core_c.h:652
signed char schar
Definition: interface.h:48
uint32_t uint
Definition: interface.h:42
unsigned char uchar
Definition: interface.h:51
int64_t int64
Definition: interface.h:61
unsigned short ushort
Definition: interface.h:52
uint64_t uint64
Definition: interface.h:62
bool v_check_any(const v_reg< _Tp, n > &a)
Check if any of packed values is less than zero.
Definition: intrin_cpp.hpp:1433
v_reg< _Tp, n > v_invsqrt(const v_reg< _Tp, n > &a)
Inversed square root.
Definition: intrin_cpp.hpp:1007
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_lut_quads(const _Tp *tab, const int *idx)
Definition: intrin_cpp.hpp:2640
v_reg< int, n > v_round(const v_reg< float, n > &a)
Round elements.
Definition: intrin_cpp.hpp:2424
v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > v_dotprod_expand_fast(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Fast Dot product of elements and expand.
Definition: intrin_cpp.hpp:1185
v_reg< float, n > v_cvt_f32(const v_reg< int, n > &a)
Convert to float.
Definition: intrin_cpp.hpp:2534
int v_signmask(const v_reg< _Tp, n > &a)
Get negative values mask.
Definition: intrin_cpp.hpp:1392
v_reg< _Tp, n > v_mul_hi(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Multiply and extract high part.
Definition: intrin_cpp.hpp:1233
void v_zip(const v_reg< _Tp, n > &a0, const v_reg< _Tp, n > &a1, v_reg< _Tp, n > &b0, v_reg< _Tp, n > &b1)
Interleave two vectors.
Definition: intrin_cpp.hpp:1554
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_load(const _Tp *ptr)
Load register contents from memory.
Definition: intrin_cpp.hpp:1584
v_reg< float, n > v_matmul(const v_reg< float, n > &v, const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
Matrix multiplication.
Definition: intrin_cpp.hpp:3193
void v_store(_Tp *ptr, const v_reg< _Tp, n > &a)
Store data to memory.
Definition: intrin_cpp.hpp:2190
CV_INLINE v_reg< double, n/2 > v_cvt_f64(const v_reg< int, n > &a)
Convert lower half to double.
Definition: intrin_cpp.hpp:2573
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_lut(const _Tp *tab, const int *idx)
Definition: intrin_cpp.hpp:2626
v_reg< _Tp, n > v_magnitude(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Magnitude.
Definition: intrin_cpp.hpp:1020
v_reg< _Tp, n > v_fma(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c)
Multiply and add.
Definition: intrin_cpp.hpp:1046
int v_scan_forward(const v_reg< _Tp, n > &a)
Get first negative lane index.
Definition: intrin_cpp.hpp:1409
v_reg< int, n > v_trunc(const v_reg< float, n > &a)
Truncate elements.
Definition: intrin_cpp.hpp:2475
v_reg< float, n > v_not_nan(const v_reg< float, n > &a)
Less-than comparison.
Definition: intrin_cpp.hpp:890
v_reg< float, n > v_reduce_sum4(const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
Sums all elements of each input vector, returns the vector of sums.
Definition: intrin_cpp.hpp:1353
v_reg< typename V_TypeTraits< _Tp >::abs_type, n > v_popcount(const v_reg< _Tp, n > &a)
Count the 1 bits in the vector lanes and return result as corresponding unsigned type.
Definition: intrin_cpp.hpp:828
v_reg< _Tp, n > v_sqr_magnitude(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Square of the magnitude.
Definition: intrin_cpp.hpp:1033
v_reg< typename V_TypeTraits< _Tp >::q_type, simd128_width/sizeof(typename V_TypeTraits< _Tp >::q_type)> v_load_expand_q(const _Tp *ptr)
Load register contents from memory with quad expand.
Definition: intrin_cpp.hpp:1961
v_reg< _Tp, n > v_muladd(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c)
A synonym for v_fma.
Definition: intrin_cpp.hpp:1057
v_reg< int, n > v_floor(const v_reg< float, n > &a)
Floor elements.
Definition: intrin_cpp.hpp:2449
v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > v_dotprod_expand(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Dot product of elements and expand.
Definition: intrin_cpp.hpp:1142
void v_pack_store(hfloat *ptr, const v_reg< float, n > &v)
Definition: intrin_cpp.hpp:3289
void v_cleanup()
Definition: intrin_cpp.hpp:3297
v_reg< float, n > v_matmuladd(const v_reg< float, n > &v, const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
Matrix multiplication and add.
Definition: intrin_cpp.hpp:3223
void v_transpose4x4(v_reg< _Tp, n > &a0, const v_reg< _Tp, n > &a1, const v_reg< _Tp, n > &a2, const v_reg< _Tp, n > &a3, v_reg< _Tp, n > &b0, v_reg< _Tp, n > &b1, v_reg< _Tp, n > &b2, v_reg< _Tp, n > &b3)
Transpose 4x4 matrix.
Definition: intrin_cpp.hpp:2761
v_reg< typename V_TypeTraits< _Tp >::w_type, simd128_width/sizeof(typename V_TypeTraits< _Tp >::w_type)> v_load_expand(const _Tp *ptr)
Load register contents from memory with double expand.
Definition: intrin_cpp.hpp:1872
v_reg< int, n > v_ceil(const v_reg< float, n > &a)
Ceil elements.
Definition: intrin_cpp.hpp:2462
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_dotprod(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Dot product of elements.
Definition: intrin_cpp.hpp:1077
bool v_check_all(const v_reg< _Tp, n > &a)
Check if all packed values are less than zero.
Definition: intrin_cpp.hpp:1421
_Tp v_extract_n(const v_reg< _Tp, n > &v)
Vector extract.
Definition: intrin_cpp.hpp:2397
v_reg< _Tp, n > v_select(const v_reg< _Tp, n > &mask, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Per-element select (blend operation)
Definition: intrin_cpp.hpp:1451
CV_INLINE v_reg< double,(n/2)> v_cvt_f64_high(const v_reg< int, n > &a)
Convert to double high part of vector.
Definition: intrin_cpp.hpp:2584
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_dotprod_fast(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Fast Dot product of elements.
Definition: intrin_cpp.hpp:1116
v_reg< uchar, 2 *n > v_pack_b(const v_reg< ushort, n > &a, const v_reg< ushort, n > &b)
! For 16-bit boolean values
Definition: intrin_cpp.hpp:3111
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_lut_pairs(const _Tp *tab, const int *idx)
Definition: intrin_cpp.hpp:2633
softfloat max(const softfloat &a, const softfloat &b)
Definition: softfloat.hpp:440
softfloat min(const softfloat &a, const softfloat &b)
Min and Max functions.
Definition: softfloat.hpp:437
CvArr CvArr * temp
Definition: imgproc_c.h:329
CV_EXPORTS OutputArray int double double InputArray mask
Definition: imgproc.hpp:2132
"black box" representation of the file storage associated with a file on disk.
Definition: calib3d.hpp:441