8#ifndef OPENCV_HAL_INTRIN_RVV_SCALABLE_HPP
9#define OPENCV_HAL_INTRIN_RVV_SCALABLE_HPP
11#include <opencv2/core/check.hpp>
16#if defined(__riscv_v_intrinsic) && __riscv_v_intrinsic>10999
17#include "intrin_rvv_010_compat_non-policy.hpp"
18#include "intrin_rvv_010_compat_overloaded-non-policy.hpp"
21#if defined(__riscv_v_intrinsic) && __riscv_v_intrinsic>11999
22#include "intrin_rvv_011_compat.hpp"
25#if defined(__GNUC__) && !defined(__clang__)
29#pragma GCC diagnostic ignored "-Wignored-attributes"
32#ifndef CV_RVV_MAX_VLEN
33#define CV_RVV_MAX_VLEN 1024
41CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
43#define CV_SIMD_SCALABLE 1
44#define CV_SIMD_SCALABLE_64F 1
46using v_uint8 = vuint8m1_t;
47using v_int8 = vint8m1_t;
48using v_uint16 = vuint16m1_t;
49using v_int16 = vint16m1_t;
50using v_uint32 = vuint32m1_t;
51using v_int32 = vint32m1_t;
52using v_uint64 = vuint64m1_t;
53using v_int64 = vint64m1_t;
55using v_float32 = vfloat32m1_t;
56#if CV_SIMD_SCALABLE_64F
57using v_float64 = vfloat64m1_t;
60using uchar =
unsigned char;
61using schar =
signed char;
62using ushort =
unsigned short;
63using uint =
unsigned int;
64using uint64 =
unsigned long int;
65using int64 =
long int;
67static const int __cv_rvv_e8m1_nlanes = vsetvlmax_e8m1();
68static const int __cv_rvv_e16m1_nlanes = vsetvlmax_e16m1();
69static const int __cv_rvv_e32m1_nlanes = vsetvlmax_e32m1();
70static const int __cv_rvv_e64m1_nlanes = vsetvlmax_e64m1();
71static const int __cv_rvv_e8m2_nlanes = vsetvlmax_e8m2();
72static const int __cv_rvv_e16m2_nlanes = vsetvlmax_e16m2();
73static const int __cv_rvv_e32m2_nlanes = vsetvlmax_e32m2();
74static const int __cv_rvv_e64m2_nlanes = vsetvlmax_e64m2();
75static const int __cv_rvv_e8m4_nlanes = vsetvlmax_e8m4();
76static const int __cv_rvv_e16m4_nlanes = vsetvlmax_e16m4();
77static const int __cv_rvv_e32m4_nlanes = vsetvlmax_e32m4();
78static const int __cv_rvv_e64m4_nlanes = vsetvlmax_e64m4();
79static const int __cv_rvv_e8m8_nlanes = vsetvlmax_e8m8();
80static const int __cv_rvv_e16m8_nlanes = vsetvlmax_e16m8();
81static const int __cv_rvv_e32m8_nlanes = vsetvlmax_e32m8();
82static const int __cv_rvv_e64m8_nlanes = vsetvlmax_e64m8();
87#define OPENCV_HAL_IMPL_RVV_TRAITS(REG, TYP, SUF, SZ) \
91 static inline int vlanes() { return __cv_rvv_##SUF##_nlanes; } \
92 using lane_type = TYP; \
93 static const int max_nlanes = CV_RVV_MAX_VLEN/SZ; \
96OPENCV_HAL_IMPL_RVV_TRAITS(vint8m1_t, int8_t, e8m1, 8)
97OPENCV_HAL_IMPL_RVV_TRAITS(vint8m2_t, int8_t, e8m2, 8)
98OPENCV_HAL_IMPL_RVV_TRAITS(vint8m4_t, int8_t, e8m4, 8)
99OPENCV_HAL_IMPL_RVV_TRAITS(vint8m8_t, int8_t, e8m8, 8)
100OPENCV_HAL_IMPL_RVV_TRAITS(vuint8m1_t, uint8_t, e8m1, 8)
101OPENCV_HAL_IMPL_RVV_TRAITS(vuint8m2_t, uint8_t, e8m2, 8)
102OPENCV_HAL_IMPL_RVV_TRAITS(vuint8m4_t, uint8_t, e8m4, 8)
103OPENCV_HAL_IMPL_RVV_TRAITS(vuint8m8_t, uint8_t, e8m8, 8)
105OPENCV_HAL_IMPL_RVV_TRAITS(vint16m1_t, int16_t, e16m1, 16)
106OPENCV_HAL_IMPL_RVV_TRAITS(vint16m2_t, int16_t, e16m2, 16)
107OPENCV_HAL_IMPL_RVV_TRAITS(vint16m4_t, int16_t, e16m4, 16)
108OPENCV_HAL_IMPL_RVV_TRAITS(vint16m8_t, int16_t, e16m8, 16)
109OPENCV_HAL_IMPL_RVV_TRAITS(vuint16m1_t, uint16_t, e16m1, 16)
110OPENCV_HAL_IMPL_RVV_TRAITS(vuint16m2_t, uint16_t, e16m2, 16)
111OPENCV_HAL_IMPL_RVV_TRAITS(vuint16m4_t, uint16_t, e16m4, 16)
112OPENCV_HAL_IMPL_RVV_TRAITS(vuint16m8_t, uint16_t, e16m8, 16)
114OPENCV_HAL_IMPL_RVV_TRAITS(vint32m1_t, int32_t, e32m1, 32)
115OPENCV_HAL_IMPL_RVV_TRAITS(vint32m2_t, int32_t, e32m2, 32)
116OPENCV_HAL_IMPL_RVV_TRAITS(vint32m4_t, int32_t, e32m4, 32)
117OPENCV_HAL_IMPL_RVV_TRAITS(vint32m8_t, int32_t, e32m8, 32)
118OPENCV_HAL_IMPL_RVV_TRAITS(vuint32m1_t, uint32_t, e32m1, 32)
119OPENCV_HAL_IMPL_RVV_TRAITS(vuint32m2_t, uint32_t, e32m2, 32)
120OPENCV_HAL_IMPL_RVV_TRAITS(vuint32m4_t, uint32_t, e32m4, 32)
121OPENCV_HAL_IMPL_RVV_TRAITS(vuint32m8_t, uint32_t, e32m8, 32)
123OPENCV_HAL_IMPL_RVV_TRAITS(vint64m1_t, int64_t, e64m1, 64)
124OPENCV_HAL_IMPL_RVV_TRAITS(vint64m2_t, int64_t, e64m2, 64)
125OPENCV_HAL_IMPL_RVV_TRAITS(vint64m4_t, int64_t, e64m4, 64)
126OPENCV_HAL_IMPL_RVV_TRAITS(vint64m8_t, int64_t, e64m8, 64)
127OPENCV_HAL_IMPL_RVV_TRAITS(vuint64m1_t, uint64_t, e64m1, 64)
128OPENCV_HAL_IMPL_RVV_TRAITS(vuint64m2_t, uint64_t, e64m2, 64)
129OPENCV_HAL_IMPL_RVV_TRAITS(vuint64m4_t, uint64_t, e64m4, 64)
130OPENCV_HAL_IMPL_RVV_TRAITS(vuint64m8_t, uint64_t, e64m8, 64)
132OPENCV_HAL_IMPL_RVV_TRAITS(vfloat32m1_t,
float, e32m1, 32)
133OPENCV_HAL_IMPL_RVV_TRAITS(vfloat32m2_t,
float, e32m2, 32)
134OPENCV_HAL_IMPL_RVV_TRAITS(vfloat32m4_t,
float, e32m4, 32)
135OPENCV_HAL_IMPL_RVV_TRAITS(vfloat32m8_t,
float, e32m8, 32)
137#if CV_SIMD_SCALABLE_64F
138OPENCV_HAL_IMPL_RVV_TRAITS(vfloat64m1_t,
double, e64m1, 64)
139OPENCV_HAL_IMPL_RVV_TRAITS(vfloat64m2_t,
double, e64m2, 64)
140OPENCV_HAL_IMPL_RVV_TRAITS(vfloat64m4_t,
double, e64m4, 64)
141OPENCV_HAL_IMPL_RVV_TRAITS(vfloat64m8_t,
double, e64m8, 64)
149#ifndef __riscv_v_intrinsic_overloading
150#include "intrin_rvv_compat_overloaded.hpp"
155#define OPENCV_HAL_IMPL_RVV_GRT0_INT(_Tpvec, _Tp) \
156inline _Tp v_get0(const v_##_Tpvec& v) \
161OPENCV_HAL_IMPL_RVV_GRT0_INT(uint8,
uchar)
162OPENCV_HAL_IMPL_RVV_GRT0_INT(int8,
schar)
163OPENCV_HAL_IMPL_RVV_GRT0_INT(uint16,
ushort)
164OPENCV_HAL_IMPL_RVV_GRT0_INT(int16,
short)
165OPENCV_HAL_IMPL_RVV_GRT0_INT(uint32,
unsigned)
166OPENCV_HAL_IMPL_RVV_GRT0_INT(int32,
int)
170inline float v_get0(
const v_float32& v) \
174#if CV_SIMD_SCALABLE_64F
175inline double v_get0(
const v_float64& v) \
183#define OPENCV_HAL_IMPL_RVV_INIT_INTEGER(_Tpvec, _Tp, suffix1, suffix2, vl) \
184inline v_##_Tpvec v_setzero_##suffix1() \
186 return vmv_v_x_##suffix2##m1(0, vl); \
188inline v_##_Tpvec v_setall_##suffix1(_Tp v) \
190 return vmv_v_x_##suffix2##m1(v, vl); \
193OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint8,
uchar, u8, u8, VTraits<v_uint8>::vlanes())
194OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int8,
schar, s8, i8, VTraits<v_int8>::vlanes())
195OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint16,
ushort, u16, u16, VTraits<v_uint16>::vlanes())
196OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int16,
short, s16, i16, VTraits<v_int16>::vlanes())
197OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint32,
uint, u32, u32, VTraits<v_uint32>::vlanes())
198OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int32,
int, s32, i32, VTraits<v_int32>::vlanes())
199OPENCV_HAL_IMPL_RVV_INIT_INTEGER(
uint64,
uint64, u64, u64, VTraits<v_uint64>::vlanes())
200OPENCV_HAL_IMPL_RVV_INIT_INTEGER(
int64,
int64, s64, i64, VTraits<v_int64>::vlanes())
202#define OPENCV_HAL_IMPL_RVV_INIT_FP(_Tpv, _Tp, suffix, vl) \
203inline v_##_Tpv v_setzero_##suffix() \
205 return vfmv_v_f_##suffix##m1(0, vl); \
207inline v_##_Tpv v_setall_##suffix(_Tp v) \
209 return vfmv_v_f_##suffix##m1(v, vl); \
212OPENCV_HAL_IMPL_RVV_INIT_FP(float32,
float, f32, VTraits<v_float32>::vlanes())
213#if CV_SIMD_SCALABLE_64F
214OPENCV_HAL_IMPL_RVV_INIT_FP(float64,
double, f64, VTraits<v_float64>::vlanes())
218#define OPENCV_HAL_IMPL_RVV_NOTHING_REINTERPRET(_Tpvec1, suffix1) \
219inline v_##_Tpvec1 v_reinterpret_as_##suffix1(const v_##_Tpvec1& v) \
223OPENCV_HAL_IMPL_RVV_NOTHING_REINTERPRET(uint8, u8)
224OPENCV_HAL_IMPL_RVV_NOTHING_REINTERPRET(uint16, u16)
225OPENCV_HAL_IMPL_RVV_NOTHING_REINTERPRET(uint32, u32)
226OPENCV_HAL_IMPL_RVV_NOTHING_REINTERPRET(
uint64, u64)
227OPENCV_HAL_IMPL_RVV_NOTHING_REINTERPRET(int8, s8)
228OPENCV_HAL_IMPL_RVV_NOTHING_REINTERPRET(int16, s16)
229OPENCV_HAL_IMPL_RVV_NOTHING_REINTERPRET(int32, s32)
230OPENCV_HAL_IMPL_RVV_NOTHING_REINTERPRET(
int64, s64)
231OPENCV_HAL_IMPL_RVV_NOTHING_REINTERPRET(float32, f32)
232#if CV_SIMD_SCALABLE_64F
233OPENCV_HAL_IMPL_RVV_NOTHING_REINTERPRET(float64, f64)
236#define OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(_Tpvec1, _Tpvec2, suffix1, suffix2, nsuffix1, nsuffix2) \
237inline v_##_Tpvec1 v_reinterpret_as_##suffix1(const v_##_Tpvec2& v) \
239 return v_##_Tpvec1(vreinterpret_v_##nsuffix2##m1_##nsuffix1##m1(v));\
241inline v_##_Tpvec2 v_reinterpret_as_##suffix2(const v_##_Tpvec1& v) \
243 return v_##_Tpvec2(vreinterpret_v_##nsuffix1##m1_##nsuffix2##m1(v));\
246OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint8, int8, u8, s8, u8, i8)
247OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint16, int16, u16, s16, u16, i16)
248OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint32, int32, u32, s32, u32, i32)
249OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint32, float32, u32, f32, u32, f32)
250OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(int32, float32, s32, f32, i32, f32)
251OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(
uint64,
int64, u64, s64, u64, i64)
252#if CV_SIMD_SCALABLE_64F
253OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(
uint64, float64, u64, f64, u64, f64)
254OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(
int64, float64, s64, f64, i64, f64)
256OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint8, uint16, u8, u16, u8, u16)
257OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint8, uint32, u8, u32, u8, u32)
258OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint8,
uint64, u8, u64, u8, u64)
259OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint16, uint32, u16, u32, u16, u32)
260OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint16,
uint64, u16, u64, u16, u64)
261OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint32,
uint64, u32, u64, u32, u64)
262OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(int8, int16, s8, s16, i8, i16)
263OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(int8, int32, s8, s32, i8, i32)
264OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(int8,
int64, s8, s64, i8, i64)
265OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(int16, int32, s16, s32, i16, i32)
266OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(int16,
int64, s16, s64, i16, i64)
267OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(int32,
int64, s32, s64, i32, i64)
270#define OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(_Tpvec1, _Tpvec2, suffix1, suffix2, nsuffix1, nsuffix2, width1, width2) \
271inline v_##_Tpvec1 v_reinterpret_as_##suffix1(const v_##_Tpvec2& v) \
273 return vreinterpret_v_##nsuffix1##width2##m1_##nsuffix1##width1##m1(vreinterpret_v_##nsuffix2##width2##m1_##nsuffix1##width2##m1(v));\
275inline v_##_Tpvec2 v_reinterpret_as_##suffix2(const v_##_Tpvec1& v) \
277 return vreinterpret_v_##nsuffix1##width2##m1_##nsuffix2##width2##m1(vreinterpret_v_##nsuffix1##width1##m1_##nsuffix1##width2##m1(v));\
280OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint8, int16, u8, s16, u, i, 8, 16)
281OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint8, int32, u8, s32, u, i, 8, 32)
282OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint8,
int64, u8, s64, u, i, 8, 64)
283OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint16, int8, u16, s8, u, i, 16, 8)
284OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint16, int32, u16, s32, u, i, 16, 32)
285OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint16,
int64, u16, s64, u, i, 16, 64)
286OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint32, int8, u32, s8, u, i, 32, 8)
287OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint32, int16, u32, s16, u, i, 32, 16)
288OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint32,
int64, u32, s64, u, i, 32, 64)
289OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(
uint64, int8, u64, s8, u, i, 64, 8)
290OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(
uint64, int16, u64, s16, u, i, 64, 16)
291OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(
uint64, int32, u64, s32, u, i, 64, 32)
292OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint8, float32, u8, f32, u, f, 8, 32)
293OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint16, float32, u16, f32, u, f, 16, 32)
294OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(
uint64, float32, u64, f32, u, f, 64, 32)
295OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(int8, float32, s8, f32, i, f, 8, 32)
296OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(int16, float32, s16, f32, i, f, 16, 32)
297OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(
int64, float32, s64, f32, i, f, 64, 32)
298#if CV_SIMD_SCALABLE_64F
299OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint8, float64, u8, f64, u, f, 8, 64)
300OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint16, float64, u16, f64, u, f, 16, 64)
301OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint32, float64, u32, f64, u, f, 32, 64)
302OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(int8, float64, s8, f64, i, f, 8, 64)
303OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(int16, float64, s16, f64, i, f, 16, 64)
304OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(int32, float64, s32, f64, i, f, 32, 64)
306inline v_float32 v_reinterpret_as_f32(const v_float64& v) \
308 return vreinterpret_v_u32m1_f32m1(vreinterpret_v_u64m1_u32m1(vreinterpret_v_f64m1_u64m1(v)));\
311inline v_float64 v_reinterpret_as_f64(
const v_float32& v) \
313 return vreinterpret_v_u64m1_f64m1(vreinterpret_v_u32m1_u64m1(vreinterpret_v_f32m1_u32m1(v)));\
319#define OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(_Tpvec, _Tp, suffix, vl) \
320template <int s = 0> \
321inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b, int i = s) \
323 return vslideup(vslidedown(v_setzero_##suffix(), a, i, vl), b, VTraits<_Tpvec>::vlanes() - i, vl); \
325template<int s = 0> inline _Tp v_extract_n(_Tpvec v, int i = s) \
327 return vmv_x(vslidedown(v_setzero_##suffix(), v, i, vl)); \
331OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_uint8,
uchar, u8, VTraits<v_uint8>::vlanes())
332OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_int8,
schar, s8, VTraits<v_int8>::vlanes())
333OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_uint16,
ushort, u16, VTraits<v_uint16>::vlanes())
334OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_int16,
short, s16, VTraits<v_int16>::vlanes())
335OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_uint32,
unsigned int, u32, VTraits<v_uint32>::vlanes())
336OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_int32,
int, s32, VTraits<v_int32>::vlanes())
337OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_uint64,
uint64, u64, VTraits<v_uint64>::vlanes())
338OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_int64,
int64, s64, VTraits<v_int64>::vlanes())
340#define OPENCV_HAL_IMPL_RVV_EXTRACT_FP(_Tpvec, _Tp, suffix, vl) \
341template <int s = 0> \
342inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b, int i = s) \
344 return vslideup(vslidedown(v_setzero_##suffix(), a, i, vl), b, VTraits<_Tpvec>::vlanes() - i, vl); \
346template<int s = 0> inline _Tp v_extract_n(_Tpvec v, int i = s) \
348 return vfmv_f(vslidedown(v_setzero_##suffix(), v, i, vl)); \
351OPENCV_HAL_IMPL_RVV_EXTRACT_FP(v_float32,
float, f32, VTraits<v_float32>::vlanes())
352#if CV_SIMD_SCALABLE_64F
353OPENCV_HAL_IMPL_RVV_EXTRACT_FP(v_float64,
double, f64, VTraits<v_float64>::vlanes())
356#define OPENCV_HAL_IMPL_RVV_EXTRACT(_Tpvec, _Tp, vl) \
357inline _Tp v_extract_highest(_Tpvec v) \
359 return v_extract_n(v, vl-1); \
362OPENCV_HAL_IMPL_RVV_EXTRACT(v_uint8,
uchar, VTraits<v_uint8>::vlanes())
363OPENCV_HAL_IMPL_RVV_EXTRACT(v_int8,
schar, VTraits<v_int8>::vlanes())
364OPENCV_HAL_IMPL_RVV_EXTRACT(v_uint16,
ushort, VTraits<v_uint16>::vlanes())
365OPENCV_HAL_IMPL_RVV_EXTRACT(v_int16,
short, VTraits<v_int16>::vlanes())
366OPENCV_HAL_IMPL_RVV_EXTRACT(v_uint32,
unsigned int, VTraits<v_uint32>::vlanes())
367OPENCV_HAL_IMPL_RVV_EXTRACT(v_int32,
int, VTraits<v_int32>::vlanes())
368OPENCV_HAL_IMPL_RVV_EXTRACT(v_uint64,
uint64, VTraits<v_uint64>::vlanes())
369OPENCV_HAL_IMPL_RVV_EXTRACT(v_int64,
int64, VTraits<v_int64>::vlanes())
370OPENCV_HAL_IMPL_RVV_EXTRACT(v_float32,
float, VTraits<v_float32>::vlanes())
371#if CV_SIMD_SCALABLE_64F
372OPENCV_HAL_IMPL_RVV_EXTRACT(v_float64,
double, VTraits<v_float64>::vlanes())
377#define OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(_Tpvec, _nTpvec, _Tp, hvl, vl, width, suffix, vmv) \
378inline _Tpvec v_load(const _Tp* ptr) \
380 return vle##width##_v_##suffix##m1(ptr, vl); \
382inline _Tpvec v_load_aligned(const _Tp* ptr) \
384 return vle##width##_v_##suffix##m1(ptr, vl); \
386inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode ) \
388 vse##width##_v_##suffix##m1(ptr, a, vl); \
390inline _Tpvec v_load_low(const _Tp* ptr) \
392 return vle##width##_v_##suffix##m1(ptr, hvl); \
394inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \
396 return vslideup(vle##width##_v_##suffix##m1(ptr0, hvl), vle##width##_v_##suffix##m1(ptr1, hvl), hvl, vl); \
398inline void v_store(_Tp* ptr, const _Tpvec& a) \
400 vse##width(ptr, a, vl); \
402inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
404 vse##width(ptr, a, vl); \
406inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \
408 vse##width(ptr, a, vl); \
410inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
412 vse##width(ptr, a, hvl); \
414inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
416 vse##width(ptr, vslidedown_vx_##suffix##m1(vmv(0, vl), a, hvl, vl), hvl); \
418template<typename... Targs> \
419_Tpvec v_load_##suffix(Targs... nScalars) \
421 return v_load({nScalars...}); \
425OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_uint8, vuint8m1_t,
uchar, VTraits<v_uint8>::vlanes() / 2, VTraits<v_uint8>::vlanes(), 8, u8, vmv_v_x_u8m1)
426OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_int8, vint8m1_t,
schar, VTraits<v_int8>::vlanes() / 2, VTraits<v_int8>::vlanes(), 8, i8, vmv_v_x_i8m1)
427OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_uint16, vuint16m1_t,
ushort, VTraits<v_uint16>::vlanes() / 2, VTraits<v_uint16>::vlanes(), 16, u16, vmv_v_x_u16m1)
428OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_int16, vint16m1_t,
short, VTraits<v_int16>::vlanes() / 2, VTraits<v_int16>::vlanes(), 16, i16, vmv_v_x_i16m1)
429OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_uint32, vuint32m1_t,
unsigned int, VTraits<v_uint32>::vlanes() / 2, VTraits<v_uint32>::vlanes(), 32, u32, vmv_v_x_u32m1)
430OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_int32, vint32m1_t,
int, VTraits<v_int32>::vlanes() / 2, VTraits<v_int32>::vlanes(), 32, i32, vmv_v_x_i32m1)
431OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_uint64, vuint64m1_t,
uint64, VTraits<v_uint64>::vlanes() / 2, VTraits<v_uint64>::vlanes(), 64, u64, vmv_v_x_u64m1)
432OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_int64, vint64m1_t,
int64, VTraits<v_int64>::vlanes() / 2, VTraits<v_int64>::vlanes(), 64, i64, vmv_v_x_i64m1)
433OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_float32, vfloat32m1_t,
float, VTraits<v_float32>::vlanes() /2 , VTraits<v_float32>::vlanes(), 32, f32, vfmv_v_f_f32m1)
435#if CV_SIMD_SCALABLE_64F
436OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_float64, vfloat64m1_t,
double, VTraits<v_float64>::vlanes() / 2, VTraits<v_float64>::vlanes(), 64, f64, vfmv_v_f_f64m1)
440#define OPENCV_HAL_IMPL_RVV_LUT(_Tpvec, _Tp, suffix) \
441inline _Tpvec v_lut(const _Tp* tab, const int* idx) \
443 auto vidx = vmul(vreinterpret_u32##suffix(vle32_v_i32##suffix(idx, VTraits<_Tpvec>::vlanes())), sizeof(_Tp), VTraits<_Tpvec>::vlanes()); \
444 return vloxei32(tab, vidx, VTraits<_Tpvec>::vlanes()); \
446OPENCV_HAL_IMPL_RVV_LUT(v_int8,
schar, m4)
447OPENCV_HAL_IMPL_RVV_LUT(v_int16,
short, m2)
448OPENCV_HAL_IMPL_RVV_LUT(v_int32,
int, m1)
449OPENCV_HAL_IMPL_RVV_LUT(v_int64, int64_t, mf2)
450OPENCV_HAL_IMPL_RVV_LUT(v_float32,
float, m1)
451#if CV_SIMD_SCALABLE_64F
452OPENCV_HAL_IMPL_RVV_LUT(v_float64,
double, mf2)
455#define OPENCV_HAL_IMPL_RVV_LUT_PAIRS(_Tpvec, _Tp, suffix1, suffix2, v_trunc) \
456inline _Tpvec v_lut_pairs(const _Tp* tab, const int* idx) \
458 auto v0 = vle32_v_u32##suffix1((unsigned*)idx, VTraits<_Tpvec>::vlanes()/2); \
459 auto v1 = vadd(v0, 1, VTraits<_Tpvec>::vlanes()/2); \
460 auto w0 = vwcvtu_x(v0, VTraits<_Tpvec>::vlanes()/2); \
461 auto w1 = vwcvtu_x(v1, VTraits<_Tpvec>::vlanes()/2); \
462 auto sh1 = vslide1up(v_trunc(vreinterpret_u32##suffix2(w1)),0, VTraits<_Tpvec>::vlanes()); \
463 auto vid = vor(sh1, v_trunc(vreinterpret_u32##suffix2(w0)), VTraits<_Tpvec>::vlanes()); \
464 auto vidx = vmul(vid, sizeof(_Tp), VTraits<_Tpvec>::vlanes()); \
465 return vloxei32(tab, vidx, VTraits<_Tpvec>::vlanes()); \
467OPENCV_HAL_IMPL_RVV_LUT_PAIRS(v_int8,
schar, m2, m4, OPENCV_HAL_NOP)
468OPENCV_HAL_IMPL_RVV_LUT_PAIRS(v_int16,
short, m1, m2, OPENCV_HAL_NOP)
469OPENCV_HAL_IMPL_RVV_LUT_PAIRS(v_int32,
int, mf2, m1, OPENCV_HAL_NOP)
470OPENCV_HAL_IMPL_RVV_LUT_PAIRS(v_float32,
float, mf2, m1, OPENCV_HAL_NOP)
471OPENCV_HAL_IMPL_RVV_LUT_PAIRS(v_int64, int64_t, mf2, m1, vlmul_trunc_u32mf2)
472#if CV_SIMD_SCALABLE_64F
473OPENCV_HAL_IMPL_RVV_LUT_PAIRS(v_float64,
double, mf2, m1, vlmul_trunc_u32mf2)
477#define OPENCV_HAL_IMPL_RVV_LUT_QUADS(_Tpvec, _Tp, suffix0, suffix1, suffix2, v_trunc) \
478inline _Tpvec v_lut_quads(const _Tp* tab, const int* idx) \
480 auto v0 = vle32_v_u32##suffix0((unsigned*)idx, VTraits<_Tpvec>::vlanes()/4); \
481 auto v1 = vadd(v0, 1, VTraits<_Tpvec>::vlanes()/4); \
482 auto v2 = vadd(v0, 2, VTraits<_Tpvec>::vlanes()/4); \
483 auto v3 = vadd(v0, 3, VTraits<_Tpvec>::vlanes()/4); \
484 auto w0 = vwcvtu_x(v0, VTraits<_Tpvec>::vlanes()/4); \
485 auto w1 = vwcvtu_x(v1, VTraits<_Tpvec>::vlanes()/4); \
486 auto w2 = vwcvtu_x(v2, VTraits<_Tpvec>::vlanes()/4); \
487 auto w3 = vwcvtu_x(v3, VTraits<_Tpvec>::vlanes()/4); \
488 auto sh2 = vslide1up(vreinterpret_u32##suffix1(w2),0, VTraits<_Tpvec>::vlanes()/2); \
489 auto sh3 = vslide1up(vreinterpret_u32##suffix1(w3),0, VTraits<_Tpvec>::vlanes()/2); \
490 auto vid0 = vor(sh2, vreinterpret_u32##suffix1(w0), VTraits<_Tpvec>::vlanes()/2); \
491 auto vid1 = vor(sh3, vreinterpret_u32##suffix1(w1), VTraits<_Tpvec>::vlanes()/2); \
492 auto wid0 = vwcvtu_x(v_trunc(vid0), VTraits<_Tpvec>::vlanes()/2); \
493 auto wid1 = vwcvtu_x(v_trunc(vid1), VTraits<_Tpvec>::vlanes()/2); \
494 auto shwid1 = vslide1up(vreinterpret_u32##suffix2(wid1),0, VTraits<_Tpvec>::vlanes()); \
495 auto vid = vor(shwid1, vreinterpret_u32##suffix2(wid0), VTraits<_Tpvec>::vlanes()); \
496 auto vidx = vmul(vid, sizeof(_Tp), VTraits<_Tpvec>::vlanes()); \
497 return vloxei32(tab, vidx, VTraits<_Tpvec>::vlanes()); \
499OPENCV_HAL_IMPL_RVV_LUT_QUADS(v_int8,
schar, m1, m2, m4, OPENCV_HAL_NOP)
500OPENCV_HAL_IMPL_RVV_LUT_QUADS(v_int16,
short, mf2 , m1, m2, OPENCV_HAL_NOP)
501OPENCV_HAL_IMPL_RVV_LUT_QUADS(v_int32,
int, mf2, m1, m1, vlmul_trunc_u32mf2)
502OPENCV_HAL_IMPL_RVV_LUT_QUADS(v_float32,
float, mf2, m1, m1, vlmul_trunc_u32mf2)
504#define OPENCV_HAL_IMPL_RVV_LUT_VEC(_Tpvec, _Tp) \
505inline _Tpvec v_lut(const _Tp* tab, const v_int32& vidx) \
507 v_uint32 vidx_ = vmul(vreinterpret_u32m1(vidx), sizeof(_Tp), VTraits<v_int32>::vlanes()); \
508 return vloxei32(tab, vidx_, VTraits<_Tpvec>::vlanes()); \
510OPENCV_HAL_IMPL_RVV_LUT_VEC(v_float32,
float)
511OPENCV_HAL_IMPL_RVV_LUT_VEC(v_int32,
int)
512OPENCV_HAL_IMPL_RVV_LUT_VEC(v_uint32,
unsigned)
514#if CV_SIMD_SCALABLE_64F
515inline v_float64
v_lut(
const double* tab,
const v_int32& vidx) \
517 vuint32mf2_t vidx_ = vmul(vlmul_trunc_u32mf2(vreinterpret_u32m1(vidx)),
sizeof(
double), VTraits<v_float64>::vlanes()); \
518 return vloxei32(tab, vidx_, VTraits<v_float64>::vlanes()); \
526inline v_uint16
v_lut(
const ushort* tab,
const int*
idx) {
return v_reinterpret_as_u16(
v_lut((
short*)tab,
idx)); }
529inline v_uint32
v_lut(
const unsigned* tab,
const int*
idx) {
return v_reinterpret_as_u32(
v_lut((
int*)tab,
idx)); }
532inline v_uint64
v_lut(
const uint64* tab,
const int*
idx) {
return v_reinterpret_as_u64(
v_lut((
const int64_t *)tab,
idx)); }
536inline v_uint8
v_pack_b(
const v_uint16& a,
const v_uint16& b)
538 return vnsrl(vset(vlmul_ext_v_u16m1_u16m2(a),1,b), 0, VTraits<v_uint8>::vlanes());
541inline v_uint8
v_pack_b(
const v_uint32& a,
const v_uint32& b,
542 const v_uint32& c,
const v_uint32& d)
545 return vnsrl(vnsrl(vset(vset(vset(vlmul_ext_u32m4(a),1,b),2,c),3,d), 0, VTraits<v_uint8>::vlanes()), 0, VTraits<v_uint8>::vlanes());
548inline v_uint8
v_pack_b(
const v_uint64& a,
const v_uint64& b,
const v_uint64& c,
549 const v_uint64& d,
const v_uint64& e,
const v_uint64& f,
550 const v_uint64& g,
const v_uint64& h)
552 return vnsrl(vnsrl(vnsrl(
553 vset(vset(vset(vset(vset(vset(vset(vlmul_ext_u64m8(a),
554 1,b),2,c),3,d),4,e),5,f),6,g),7,h),
555 0, VTraits<v_uint8>::vlanes()), 0, VTraits<v_uint8>::vlanes()), 0, VTraits<v_uint8>::vlanes());
559#define OPENCV_HAL_IMPL_RVV_BIN_OP(_Tpvec, ocv_intrin, rvv_intrin) \
560inline _Tpvec v_##ocv_intrin(const _Tpvec& a, const _Tpvec& b) \
562 return rvv_intrin(a, b, VTraits<_Tpvec>::vlanes()); \
565OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint8,
add, vsaddu)
566OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint8, sub, vssubu)
567OPENCV_HAL_IMPL_RVV_BIN_OP(v_int8,
add, vsadd)
568OPENCV_HAL_IMPL_RVV_BIN_OP(v_int8, sub, vssub)
569OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint16,
add, vsaddu)
570OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint16, sub, vssubu)
571OPENCV_HAL_IMPL_RVV_BIN_OP(v_int16,
add, vsadd)
572OPENCV_HAL_IMPL_RVV_BIN_OP(v_int16, sub, vssub)
573OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint32,
add, vadd)
574OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint32, sub, vsub)
575OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint32, mul, vmul)
576OPENCV_HAL_IMPL_RVV_BIN_OP(v_int32,
add, vadd)
577OPENCV_HAL_IMPL_RVV_BIN_OP(v_int32, sub, vsub)
578OPENCV_HAL_IMPL_RVV_BIN_OP(v_int32, mul, vmul)
579OPENCV_HAL_IMPL_RVV_BIN_OP(v_float32,
add, vfadd)
580OPENCV_HAL_IMPL_RVV_BIN_OP(v_float32, sub, vfsub)
581OPENCV_HAL_IMPL_RVV_BIN_OP(v_float32, mul, vfmul)
582OPENCV_HAL_IMPL_RVV_BIN_OP(v_float32, div, vfdiv)
583OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint64,
add, vadd)
584OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint64, sub, vsub)
585OPENCV_HAL_IMPL_RVV_BIN_OP(v_int64,
add, vadd)
586OPENCV_HAL_IMPL_RVV_BIN_OP(v_int64, sub, vsub)
588#if CV_SIMD_SCALABLE_64F
589OPENCV_HAL_IMPL_RVV_BIN_OP(v_float64,
add, vfadd)
590OPENCV_HAL_IMPL_RVV_BIN_OP(v_float64, sub, vfsub)
591OPENCV_HAL_IMPL_RVV_BIN_OP(v_float64, mul, vfmul)
592OPENCV_HAL_IMPL_RVV_BIN_OP(v_float64, div, vfdiv)
595#define OPENCV_HAL_IMPL_RVV_BIN_MADD(_Tpvec, rvv_add) \
596template<typename... Args> \
597inline _Tpvec v_add(const _Tpvec& f1, const _Tpvec& f2, const Args&... vf) { \
598 return v_add(rvv_add(f1, f2, VTraits<_Tpvec>::vlanes()), vf...); \
600#define OPENCV_HAL_IMPL_RVV_BIN_MMUL(_Tpvec, rvv_mul) \
601template<typename... Args> \
602inline _Tpvec v_mul(const _Tpvec& f1, const _Tpvec& f2, const Args&... vf) { \
603 return v_mul(rvv_mul(f1, f2, VTraits<_Tpvec>::vlanes()), vf...); \
605OPENCV_HAL_IMPL_RVV_BIN_MADD(v_uint8, vsaddu)
606OPENCV_HAL_IMPL_RVV_BIN_MADD(v_int8, vsadd)
607OPENCV_HAL_IMPL_RVV_BIN_MADD(v_uint16, vsaddu)
608OPENCV_HAL_IMPL_RVV_BIN_MADD(v_int16, vsadd)
609OPENCV_HAL_IMPL_RVV_BIN_MADD(v_uint32, vadd)
610OPENCV_HAL_IMPL_RVV_BIN_MADD(v_int32, vadd)
611OPENCV_HAL_IMPL_RVV_BIN_MADD(v_float32, vfadd)
612OPENCV_HAL_IMPL_RVV_BIN_MADD(v_uint64, vadd)
613OPENCV_HAL_IMPL_RVV_BIN_MADD(v_int64, vadd)
615OPENCV_HAL_IMPL_RVV_BIN_MMUL(v_uint32, vmul)
616OPENCV_HAL_IMPL_RVV_BIN_MMUL(v_int32, vmul)
617OPENCV_HAL_IMPL_RVV_BIN_MMUL(v_float32, vfmul)
618#if CV_SIMD_SCALABLE_64F
619OPENCV_HAL_IMPL_RVV_BIN_MADD(v_float64, vfadd)
620OPENCV_HAL_IMPL_RVV_BIN_MMUL(v_float64, vfmul)
623#define OPENCV_HAL_IMPL_RVV_MUL_EXPAND(_Tpvec, _Tpwvec, _TpwvecM2, suffix, wmul) \
624inline void v_mul_expand(const _Tpvec& a, const _Tpvec& b, _Tpwvec& c, _Tpwvec& d) \
626 _TpwvecM2 temp = wmul(a, b, VTraits<_Tpvec>::vlanes()); \
627 c = vget_##suffix##m1(temp, 0); \
628 d = vget_##suffix##m1(temp, 1); \
631OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_uint8, v_uint16, vuint16m2_t, u16, vwmulu)
632OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_int8, v_int16, vint16m2_t, i16, vwmul)
633OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_uint16, v_uint32, vuint32m2_t, u32, vwmulu)
634OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_int16, v_int32, vint32m2_t, i32, vwmul)
635OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_uint32, v_uint64, vuint64m2_t, u64, vwmulu)
637inline v_int16
v_mul_hi(
const v_int16& a,
const v_int16& b)
639 return vmulh(a, b, VTraits<v_int16>::vlanes());
641inline v_uint16
v_mul_hi(
const v_uint16& a,
const v_uint16& b)
643 return vmulhu(a, b, VTraits<v_uint16>::vlanes());
647OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint8, add_wrap, vadd)
648OPENCV_HAL_IMPL_RVV_BIN_OP(v_int8, add_wrap, vadd)
649OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint16, add_wrap, vadd)
650OPENCV_HAL_IMPL_RVV_BIN_OP(v_int16, add_wrap, vadd)
651OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint8, sub_wrap, vsub)
652OPENCV_HAL_IMPL_RVV_BIN_OP(v_int8, sub_wrap, vsub)
653OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint16, sub_wrap, vsub)
654OPENCV_HAL_IMPL_RVV_BIN_OP(v_int16, sub_wrap, vsub)
655OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint8, mul_wrap, vmul)
656OPENCV_HAL_IMPL_RVV_BIN_OP(v_int8, mul_wrap, vmul)
657OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint16, mul_wrap, vmul)
658OPENCV_HAL_IMPL_RVV_BIN_OP(v_int16, mul_wrap, vmul)
661#define OPENCV_HAL_IMPL_RVV_MUL_SAT(_Tpvec, _clip, _wmul) \
662inline _Tpvec v_mul(const _Tpvec& a, const _Tpvec& b) \
664 return _clip(_wmul(a, b, VTraits<_Tpvec>::vlanes()), 0, VTraits<_Tpvec>::vlanes()); \
666template<typename... Args> \
667inline _Tpvec v_mul(const _Tpvec& a1, const _Tpvec& a2, const Args&... va) { \
668 return v_mul(_clip(_wmul(a1, a2, VTraits<_Tpvec>::vlanes()), 0, VTraits<_Tpvec>::vlanes()), va...); \
671OPENCV_HAL_IMPL_RVV_MUL_SAT(v_uint8, vnclipu, vwmulu)
672OPENCV_HAL_IMPL_RVV_MUL_SAT(v_int8, vnclip, vwmul)
673OPENCV_HAL_IMPL_RVV_MUL_SAT(v_uint16, vnclipu, vwmulu)
674OPENCV_HAL_IMPL_RVV_MUL_SAT(v_int16, vnclip, vwmul)
678#define OPENCV_HAL_IMPL_RVV_LOGIC_OP(_Tpvec, vl) \
679inline _Tpvec v_and(const _Tpvec& a, const _Tpvec& b) \
681 return vand(a, b, vl); \
683inline _Tpvec v_or(const _Tpvec& a, const _Tpvec& b) \
685 return vor(a, b, vl); \
687inline _Tpvec v_xor(const _Tpvec& a, const _Tpvec& b) \
689 return vxor(a, b, vl); \
691inline _Tpvec v_not (const _Tpvec& a) \
693 return vnot(a, vl); \
696OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_uint8, VTraits<v_uint8>::vlanes())
697OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_int8, VTraits<v_int8>::vlanes())
698OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_uint16, VTraits<v_uint16>::vlanes())
699OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_int16, VTraits<v_int16>::vlanes())
700OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_uint32, VTraits<v_uint32>::vlanes())
701OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_int32, VTraits<v_int32>::vlanes())
702OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_uint64, VTraits<v_uint64>::vlanes())
703OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_int64, VTraits<v_int64>::vlanes())
705#define OPENCV_HAL_IMPL_RVV_FLT_BIT_OP(intrin) \
706inline v_float32 intrin (const v_float32& a, const v_float32& b) \
708 return vreinterpret_f32m1(intrin(vreinterpret_i32m1(a), vreinterpret_i32m1(b))); \
710OPENCV_HAL_IMPL_RVV_FLT_BIT_OP(v_and)
711OPENCV_HAL_IMPL_RVV_FLT_BIT_OP(v_or)
712OPENCV_HAL_IMPL_RVV_FLT_BIT_OP(v_xor)
714inline v_float32 v_not (
const v_float32& a) \
716 return vreinterpret_f32m1(v_not(vreinterpret_i32m1(a))); \
719#if CV_SIMD_SCALABLE_64F
720#define OPENCV_HAL_IMPL_RVV_FLT64_BIT_OP(intrin) \
721inline v_float64 intrin (const v_float64& a, const v_float64& b) \
723 return vreinterpret_f64m1(intrin(vreinterpret_i64m1(a), vreinterpret_i64m1(b))); \
725OPENCV_HAL_IMPL_RVV_FLT64_BIT_OP(v_and)
726OPENCV_HAL_IMPL_RVV_FLT64_BIT_OP(v_or)
727OPENCV_HAL_IMPL_RVV_FLT64_BIT_OP(v_xor)
729inline v_float64 v_not (
const v_float64& a) \
731 return vreinterpret_f64m1(v_not(vreinterpret_i64m1(a))); \
742#define OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(_Tpvec, vl) \
743template<int s = 0> inline _Tpvec v_shl(const _Tpvec& a, int n = s) \
745 return _Tpvec(vsll(a, uint8_t(n), vl)); \
747template<int s = 0> inline _Tpvec v_shr(const _Tpvec& a, int n = s) \
749 return _Tpvec(vsrl(a, uint8_t(n), vl)); \
752#define OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(_Tpvec, vl) \
753template<int s = 0> inline _Tpvec v_shl(const _Tpvec& a, int n = s) \
755 return _Tpvec(vsll(a, uint8_t(n), vl)); \
757template<int s = 0> inline _Tpvec v_shr(const _Tpvec& a, int n = s) \
759 return _Tpvec(vsra(a, uint8_t(n), vl)); \
762OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(v_uint16, VTraits<v_uint16>::vlanes())
763OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(v_uint32, VTraits<v_uint32>::vlanes())
764OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(v_uint64, VTraits<v_uint64>::vlanes())
765OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(v_int16, VTraits<v_int16>::vlanes())
766OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(v_int32, VTraits<v_int32>::vlanes())
767OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(v_int64, VTraits<v_int64>::vlanes())
770#define OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, op, intrin, suffix) \
771inline _Tpvec v_##op(const _Tpvec& a, const _Tpvec& b) \
773 size_t VLEN = VTraits<_Tpvec>::vlanes(); \
774 uint64_t ones = -1; \
775 return vmerge(intrin(a, b, VLEN), vmv_v_x_##suffix##m1(0, VLEN), ones, VLEN); \
778#define OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, op, intrin, suffix) \
779inline _Tpvec v_##op (const _Tpvec& a, const _Tpvec& b) \
781 size_t VLEN = VTraits<_Tpvec>::vlanes(); \
782 union { uint64_t u; VTraits<_Tpvec>::lane_type d; } ones; \
784 auto diff = intrin(a, b, VLEN); \
785 auto z = vfmv_v_f_##suffix##m1(0, VLEN); \
786 auto res = vfmerge(diff, z, ones.d, VLEN); \
787 return _Tpvec(res); \
790#define OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(_Tpvec, suffix) \
791OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, eq, vmseq, suffix) \
792OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, ne, vmsne, suffix) \
793OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, lt, vmsltu, suffix) \
794OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, gt, vmsgtu, suffix) \
795OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, le, vmsleu, suffix) \
796OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, ge, vmsgeu, suffix)
798#define OPENCV_HAL_IMPL_RVV_SIGNED_CMP(_Tpvec, suffix) \
799OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, eq, vmseq, suffix) \
800OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, ne, vmsne, suffix) \
801OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, lt, vmslt, suffix) \
802OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, gt, vmsgt, suffix) \
803OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, le, vmsle, suffix) \
804OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, ge, vmsge, suffix)
806#define OPENCV_HAL_IMPL_RVV_FLOAT_CMP(_Tpvec, suffix) \
807OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, eq, vmfeq, suffix) \
808OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, ne, vmfne, suffix) \
809OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, lt, vmflt, suffix) \
810OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, gt, vmfgt, suffix) \
811OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, le, vmfle, suffix) \
812OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, ge, vmfge, suffix)
815OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(v_uint8, u8)
816OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(v_uint16, u16)
817OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(v_uint32, u32)
818OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(v_uint64, u64)
819OPENCV_HAL_IMPL_RVV_SIGNED_CMP(v_int8, i8)
820OPENCV_HAL_IMPL_RVV_SIGNED_CMP(v_int16, i16)
821OPENCV_HAL_IMPL_RVV_SIGNED_CMP(v_int32, i32)
822OPENCV_HAL_IMPL_RVV_SIGNED_CMP(v_int64, i64)
823OPENCV_HAL_IMPL_RVV_FLOAT_CMP(v_float32, f32)
824#if CV_SIMD_SCALABLE_64F
825OPENCV_HAL_IMPL_RVV_FLOAT_CMP(v_float64, f64)
828inline v_float32
v_not_nan(
const v_float32& a)
829{
return v_eq(a, a); }
831#if CV_SIMD_SCALABLE_64F
832inline v_float64
v_not_nan(
const v_float64& a)
833{
return v_eq(a, a); }
838#define OPENCV_HAL_IMPL_RVV_BIN_FUNC(_Tpvec, func, intrin, vl) \
839inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \
841 return intrin(a, b, vl); \
844OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint8, v_min, vminu, VTraits<v_uint8>::vlanes())
845OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint8, v_max, vmaxu, VTraits<v_uint8>::vlanes())
846OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int8, v_min, vmin, VTraits<v_int8>::vlanes())
847OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int8, v_max, vmax, VTraits<v_int8>::vlanes())
848OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint16, v_min, vminu, VTraits<v_uint16>::vlanes())
849OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint16, v_max, vmaxu, VTraits<v_uint16>::vlanes())
850OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int16, v_min, vmin, VTraits<v_int16>::vlanes())
851OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int16, v_max, vmax, VTraits<v_int16>::vlanes())
852OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint32, v_min, vminu, VTraits<v_uint32>::vlanes())
853OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint32, v_max, vmaxu, VTraits<v_uint32>::vlanes())
854OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int32, v_min, vmin, VTraits<v_int32>::vlanes())
855OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int32, v_max, vmax, VTraits<v_int32>::vlanes())
856OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_float32, v_min, vfmin, VTraits<v_float32>::vlanes())
857OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_float32, v_max, vfmax, VTraits<v_float32>::vlanes())
858#if CV_SIMD_SCALABLE_64F
859OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_float64, v_min, vfmin, VTraits<v_float64>::vlanes())
860OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_float64, v_max, vfmax, VTraits<v_float64>::vlanes())
864#define OPENCV_HAL_IMPL_RVV_ZIP4(_Tpvec, _wTpvec, suffix, convert2u, convert) \
865inline void v_zip4(const _Tpvec& a0, const _Tpvec& a1, _Tpvec& b0, _Tpvec& b1) { \
867 _wTpvec temp = vreinterpret_##suffix##m2(convert2u( \
868 vor(vzext_vf2(convert(a0), vl), \
869 vreinterpret_u64m2(vslide1up(vreinterpret_u32m2(vzext_vf2(convert(a1), vl)), 0, vl*2)), \
871 b0 = vget_##suffix##m1(temp, 0); \
872 b1 = vget_##suffix##m1(vrgather(temp, vadd(vid_v_u32m2(vl), 4, vl), vl) ,0); \
875OPENCV_HAL_IMPL_RVV_ZIP4(v_uint32, vuint32m2_t, u32, OPENCV_HAL_NOP, OPENCV_HAL_NOP)
876OPENCV_HAL_IMPL_RVV_ZIP4(v_int32, vint32m2_t, i32, vreinterpret_u32m2, vreinterpret_u32m1)
877OPENCV_HAL_IMPL_RVV_ZIP4(v_float32, vfloat32m2_t, f32, vreinterpret_u32m2, vreinterpret_u32m1)
881inline void v_zip4(
const v_float32& a0,
const v_float32& a1, v_float32& b0, v_float32& b1) {
882 vuint64m1_t vid1 = vid_v_u64m1(VTraits<vuint64m1_t>::vlanes());
883 vuint16m1_t t1 = vreinterpret_u16m1(vid1);
884 vuint16m1_t t2 = vslide1up(t1, 0, VTraits<vuint16m1_t>::vlanes());
885 vuint16m1_t t3 = vslide1up(t2, 0, VTraits<vuint16m1_t>::vlanes());
886 vuint16m1_t t4 = vslide1up(t3, 0, VTraits<vuint16m1_t>::vlanes());
888 vor(t1, t2, VTraits<vuint16m1_t>::vlanes()),
889 vor(t3, t4, VTraits<vuint16m1_t>::vlanes()),
890 VTraits<vuint16m1_t>::vlanes()
892 vuint32m2_t vidx0 = vwmulu(t1, 4, VTraits<vuint32m1_t>::vlanes());
893 vidx0 = vadd(vidx0, vid_v_u32m2(VTraits<vuint32m1_t>::vlanes()), VTraits<vuint32m1_t>::vlanes());
894 vuint32m2_t vidx1 = vadd(vidx0, 4, VTraits<vuint32m1_t>::vlanes());
895 vfloat32m2_t
temp = vreinterpret_f32m2(vreinterpret_u32m2(
896 vor(vzext_vf2(vreinterpret_u32m1(a0), VTraits<vuint16m1_t>::vlanes()),
897 vreinterpret_u64m2(vslide1up(vreinterpret_u32m2(vzext_vf2(vreinterpret_u32m1(a1), VTraits<vuint16m1_t>::vlanes())), 0, VTraits<vfloat32m1_t>::vlanes()*2)),
898 VTraits<vfloat32m1_t>::vlanes())));
899 b0 = vlmul_trunc_f32m1(vrgather(
temp, vidx0, VTraits<vuint16m1_t>::vlanes()));
900 b1 = vlmul_trunc_f32m1(vrgather(
temp, vidx1, VTraits<vuint16m1_t>::vlanes()));
903inline void v_transpose4x4(
const v_float32& a0,
const v_float32& a1,
const v_float32& a2,
const v_float32& a3,\
904 v_float32& b0, v_float32& b1, v_float32& b2, v_float32& b3) { \
905 vuint64m2_t vid1 = vid_v_u64m2(VTraits<vuint32m1_t>::vlanes());
906 vuint16m2_t t1 = vreinterpret_u16m2(vid1);
907 vuint16m2_t t2 = vslide1up(t1, 0, VTraits<vuint8m1_t>::vlanes());
908 vuint16m2_t t3 = vslide1up(t2, 0, VTraits<vuint8m1_t>::vlanes());
909 vuint16m2_t t4 = vslide1up(t3, 0, VTraits<vuint8m1_t>::vlanes());
911 vor(t1, t2, VTraits<vuint8m1_t>::vlanes()),
912 vor(t3, t4, VTraits<vuint8m1_t>::vlanes()),
913 VTraits<vuint8m1_t>::vlanes()
915 vuint16m2_t vidx0 = vmul(t1, 12, VTraits<vuint8m1_t>::vlanes());
916 vidx0 = vadd(vidx0, vid_v_u16m2(VTraits<vuint8m1_t>::vlanes()), VTraits<vuint8m1_t>::vlanes());
917 vuint16m2_t vidx1 = vadd(vidx0, 4, VTraits<vuint8m1_t>::vlanes());
918 vuint16m2_t vidx2 = vadd(vidx0, 8, VTraits<vuint8m1_t>::vlanes());
919 vuint16m2_t vidx3 = vadd(vidx0, 12, VTraits<vuint8m1_t>::vlanes());
920 vuint32m2_t tempA = vreinterpret_u32m2( \
921 vor(vzext_vf2(vreinterpret_u32m1(a0), VTraits<vuint16m1_t>::vlanes()), \
922 vreinterpret_u64m2(vslide1up(vreinterpret_u32m2(vzext_vf2(vreinterpret_u32m1(a2), VTraits<vuint16m1_t>::vlanes())), 0, VTraits<vuint16m1_t>::vlanes())), \
923 VTraits<vuint32m1_t>::vlanes())); \
924 vuint32m2_t tempB = vreinterpret_u32m2( \
925 vor(vzext_vf2(vreinterpret_u32m1(a1), VTraits<vuint16m1_t>::vlanes()), \
926 vreinterpret_u64m2(vslide1up(vreinterpret_u32m2(vzext_vf2(vreinterpret_u32m1(a3), VTraits<vuint16m1_t>::vlanes())), 0, VTraits<vuint16m1_t>::vlanes())), \
927 VTraits<vuint32m1_t>::vlanes())); \
928 vfloat32m4_t
temp = vreinterpret_f32m4(vreinterpret_u32m4( \
929 vor(vzext_vf2(tempA, VTraits<vuint8m1_t>::vlanes()), \
930 vreinterpret_u64m4(vslide1up(vreinterpret_u32m4(vzext_vf2(tempB, VTraits<vuint8m1_t>::vlanes())), 0, VTraits<vuint8m1_t>::vlanes())), \
931 VTraits<vuint16m1_t>::vlanes()))); \
932 b0 = vlmul_trunc_f32m1(vrgatherei16(
temp, vidx0, VTraits<vuint8m1_t>::vlanes()));
933 b1 = vlmul_trunc_f32m1(vrgatherei16(
temp, vidx1, VTraits<vuint8m1_t>::vlanes()));
934 b2 = vlmul_trunc_f32m1(vrgatherei16(
temp, vidx2, VTraits<vuint8m1_t>::vlanes()));
935 b3 = vlmul_trunc_f32m1(vrgatherei16(
temp, vidx3, VTraits<vuint8m1_t>::vlanes()));
939#define OPENCV_HAL_IMPL_RVV_TRANSPOSE4x4(_Tpvec, suffix) \
940inline void v_transpose4x4(const _Tpvec& a0, const _Tpvec& a1, const _Tpvec& a2, const _Tpvec& a3, _Tpvec& b0, _Tpvec& b1, _Tpvec& b2, _Tpvec& b3) { \
941 _Tpvec t0,t1,t2,t3; \
942 v_zip4(a0, a2, t0, t2); \
943 v_zip4(a1, a3, t1, t3); \
944 v_zip4(t0, t1, b0, b1); \
945 v_zip4(t2, t3, b2, b3); \
948OPENCV_HAL_IMPL_RVV_TRANSPOSE4x4(v_uint32, u32)
949OPENCV_HAL_IMPL_RVV_TRANSPOSE4x4(v_int32, i32)
950OPENCV_HAL_IMPL_RVV_TRANSPOSE4x4(v_float32, f32)
954#define OPENCV_HAL_IMPL_RVV_REDUCE_SUM(_Tpvec, _wTpvec, _nwTpvec, scalartype, wsuffix, vl, red) \
955inline scalartype v_reduce_sum(const _Tpvec& a) \
957 _nwTpvec zero = vmv_v_x_##wsuffix##m1(0, vl); \
958 _nwTpvec res = vmv_v_x_##wsuffix##m1(0, vl); \
959 res = v##red(res, a, zero, vl); \
960 return (scalartype)v_get0(res); \
962OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_uint8, v_uint16, vuint16m1_t,
unsigned, u16, VTraits<v_uint8>::vlanes(), wredsumu)
963OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_int8, v_int16, vint16m1_t,
int, i16, VTraits<v_int8>::vlanes(), wredsum)
964OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_uint16, v_uint32, vuint32m1_t,
unsigned, u32, VTraits<v_uint16>::vlanes(), wredsumu)
965OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_int16, v_int32, vint32m1_t,
int, i32, VTraits<v_int16>::vlanes(), wredsum)
966OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_uint32, v_uint64, vuint64m1_t,
unsigned, u64, VTraits<v_uint32>::vlanes(), wredsumu)
967OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_int32, v_int64, vint64m1_t,
int, i64, VTraits<v_int32>::vlanes(), wredsum)
968OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_uint64, v_uint64, vuint64m1_t,
uint64, u64, VTraits<v_uint64>::vlanes(), redsum)
969OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_int64, v_int64, vint64m1_t,
int64, i64, VTraits<v_int64>::vlanes(), redsum)
972#define OPENCV_HAL_IMPL_RVV_REDUCE_SUM_FP(_Tpvec, _wTpvec, _nwTpvec, scalartype, wsuffix, vl) \
973inline scalartype v_reduce_sum(const _Tpvec& a) \
975 _nwTpvec zero = vfmv_v_f_##wsuffix##m1(0, vl); \
976 _nwTpvec res = vfmv_v_f_##wsuffix##m1(0, vl); \
977 res = vfredosum(res, a, zero, vl); \
978 return (scalartype)v_get0(res); \
980OPENCV_HAL_IMPL_RVV_REDUCE_SUM_FP(v_float32, v_float32, vfloat32m1_t,
float, f32, VTraits<v_float32>::vlanes())
981#if CV_SIMD_SCALABLE_64F
982OPENCV_HAL_IMPL_RVV_REDUCE_SUM_FP(v_float64, v_float64, vfloat64m1_t,
float, f64, VTraits<v_float64>::vlanes())
985#define OPENCV_HAL_IMPL_RVV_REDUCE(_Tpvec, func, scalartype, suffix, vl, red) \
986inline scalartype v_reduce_##func(const _Tpvec& a) \
988 _Tpvec res = _Tpvec(v##red(a, a, a, vl)); \
989 return (scalartype)v_get0(res); \
992OPENCV_HAL_IMPL_RVV_REDUCE(v_uint8, min,
uchar, u8, VTraits<v_uint8>::vlanes(), redminu)
993OPENCV_HAL_IMPL_RVV_REDUCE(v_int8, min,
schar, i8, VTraits<v_int8>::vlanes(), redmin)
994OPENCV_HAL_IMPL_RVV_REDUCE(v_uint16, min,
ushort, u16, VTraits<v_uint16>::vlanes(), redminu)
995OPENCV_HAL_IMPL_RVV_REDUCE(v_int16, min,
short, i16, VTraits<v_int16>::vlanes(), redmin)
996OPENCV_HAL_IMPL_RVV_REDUCE(v_uint32, min,
unsigned, u32, VTraits<v_uint32>::vlanes(), redminu)
997OPENCV_HAL_IMPL_RVV_REDUCE(v_int32, min,
int, i32, VTraits<v_int32>::vlanes(), redmin)
998OPENCV_HAL_IMPL_RVV_REDUCE(v_float32, min,
float, f32, VTraits<v_float32>::vlanes(), fredmin)
999OPENCV_HAL_IMPL_RVV_REDUCE(v_uint8, max,
uchar, u8, VTraits<v_uint8>::vlanes(), redmaxu)
1000OPENCV_HAL_IMPL_RVV_REDUCE(v_int8, max,
schar, i8, VTraits<v_int8>::vlanes(), redmax)
1001OPENCV_HAL_IMPL_RVV_REDUCE(v_uint16, max,
ushort, u16, VTraits<v_uint16>::vlanes(), redmaxu)
1002OPENCV_HAL_IMPL_RVV_REDUCE(v_int16, max,
short, i16, VTraits<v_int16>::vlanes(), redmax)
1003OPENCV_HAL_IMPL_RVV_REDUCE(v_uint32, max,
unsigned, u32, VTraits<v_uint32>::vlanes(), redmaxu)
1004OPENCV_HAL_IMPL_RVV_REDUCE(v_int32, max,
int, i32, VTraits<v_int32>::vlanes(), redmax)
1005OPENCV_HAL_IMPL_RVV_REDUCE(v_float32, max,
float, f32, VTraits<v_float32>::vlanes(), fredmax)
1007inline v_float32
v_reduce_sum4(
const v_float32& a,
const v_float32& b,
1008 const v_float32& c,
const v_float32& d)
1011 vuint64m2_t vid1 = vid_v_u64m2(VTraits<vuint32m1_t>::vlanes());
1012 vuint16m2_t t1 = vreinterpret_u16m2(vid1);
1013 vuint16m2_t t2 = vslide1up(t1, 0, VTraits<vuint8m1_t>::vlanes());
1014 vuint16m2_t t3 = vslide1up(t2, 0, VTraits<vuint8m1_t>::vlanes());
1015 vuint16m2_t t4 = vslide1up(t3, 0, VTraits<vuint8m1_t>::vlanes());
1017 vor(t1, t2, VTraits<vuint8m1_t>::vlanes()),
1018 vor(t3, t4, VTraits<vuint8m1_t>::vlanes()),
1019 VTraits<vuint8m1_t>::vlanes()
1023 vuint16m2_t vidx0 = vmul(t1, 12, VTraits<vuint8m1_t>::vlanes());
1024 vidx0 = vadd(vidx0, vid_v_u16m2(VTraits<vuint8m1_t>::vlanes()), VTraits<vuint8m1_t>::vlanes());
1025 vuint16m2_t vidx1 = vadd(vidx0, 4, VTraits<vuint8m1_t>::vlanes());
1026 vuint16m2_t vidx2 = vadd(vidx0, 8, VTraits<vuint8m1_t>::vlanes());
1027 vuint16m2_t vidx3 = vadd(vidx0, 12, VTraits<vuint8m1_t>::vlanes());
1030 vuint32m2_t tempA = vreinterpret_u32m2( \
1031 vor(vzext_vf2(vreinterpret_u32m1(a), VTraits<vuint16m1_t>::vlanes()), \
1032 vreinterpret_u64m2(vslide1up(vreinterpret_u32m2(vzext_vf2(vreinterpret_u32m1(c), VTraits<vuint16m1_t>::vlanes())), 0, VTraits<vuint16m1_t>::vlanes())), \
1033 VTraits<vuint32m1_t>::vlanes())); \
1034 vuint32m2_t tempB = vreinterpret_u32m2( \
1035 vor(vzext_vf2(vreinterpret_u32m1(b), VTraits<vuint16m1_t>::vlanes()), \
1036 vreinterpret_u64m2(vslide1up(vreinterpret_u32m2(vzext_vf2(vreinterpret_u32m1(d), VTraits<vuint16m1_t>::vlanes())), 0, VTraits<vuint16m1_t>::vlanes())), \
1037 VTraits<vuint32m1_t>::vlanes())); \
1038 vfloat32m4_t
temp = vreinterpret_f32m4(vreinterpret_u32m4( \
1039 vor(vzext_vf2(tempA, VTraits<vuint8m1_t>::vlanes()), \
1040 vreinterpret_u64m4(vslide1up(vreinterpret_u32m4(vzext_vf2(tempB, VTraits<vuint8m1_t>::vlanes())), 0, VTraits<vuint8m1_t>::vlanes())), \
1041 VTraits<vuint16m1_t>::vlanes())));
1044 vfloat32m1_t b0 = vlmul_trunc_f32m1(vrgatherei16(
temp, vidx0, VTraits<vuint8m1_t>::vlanes()));
1045 vfloat32m1_t b1 = vlmul_trunc_f32m1(vrgatherei16(
temp, vidx1, VTraits<vuint8m1_t>::vlanes()));
1046 vfloat32m1_t b2 = vlmul_trunc_f32m1(vrgatherei16(
temp, vidx2, VTraits<vuint8m1_t>::vlanes()));
1047 vfloat32m1_t b3 = vlmul_trunc_f32m1(vrgatherei16(
temp, vidx3, VTraits<vuint8m1_t>::vlanes()));
1050 v_float32 res = vfadd(
1051 vfadd(b0, b1, VTraits<vfloat32m1_t>::vlanes()),
1052 vfadd(b2, b3, VTraits<vfloat32m1_t>::vlanes()),
1053 VTraits<vfloat32m1_t>::vlanes()
1060inline v_float32 v_sqrt(
const v_float32&
x)
1062 return vfsqrt(
x, VTraits<v_float32>::vlanes());
1065inline v_float32
v_invsqrt(
const v_float32&
x)
1067 v_float32 one = v_setall_f32(1.0f);
1068 return v_div(one, v_sqrt(
x));
1071#if CV_SIMD_SCALABLE_64F
1072inline v_float64 v_sqrt(
const v_float64&
x)
1074 return vfsqrt(
x, VTraits<v_float64>::vlanes());
1077inline v_float64
v_invsqrt(
const v_float64&
x)
1079 v_float64 one = v_setall_f64(1.0f);
1080 return v_div(one, v_sqrt(
x));
1084inline v_float32
v_magnitude(
const v_float32& a,
const v_float32& b)
1086 v_float32
x = vfmacc(vfmul(a, a, VTraits<v_float32>::vlanes()), b, b, VTraits<v_float32>::vlanes());
1090inline v_float32
v_sqr_magnitude(
const v_float32& a,
const v_float32& b)
1092 return v_float32(vfmacc(vfmul(a, a, VTraits<v_float32>::vlanes()), b, b, VTraits<v_float32>::vlanes()));
1095#if CV_SIMD_SCALABLE_64F
1096inline v_float64
v_magnitude(
const v_float64& a,
const v_float64& b)
1098 v_float64
x = vfmacc(vfmul(a, a, VTraits<v_float64>::vlanes()), b, b, VTraits<v_float64>::vlanes());
1102inline v_float64
v_sqr_magnitude(
const v_float64& a,
const v_float64& b)
1104 return vfmacc(vfmul(a, a, VTraits<v_float64>::vlanes()), b, b, VTraits<v_float64>::vlanes());
1110inline v_float32
v_fma(
const v_float32& a,
const v_float32& b,
const v_float32& c)
1112 return vfmacc(c, a, b, VTraits<v_float32>::vlanes());
1114inline v_int32
v_fma(
const v_int32& a,
const v_int32& b,
const v_int32& c)
1116 return vmacc(c, a, b, VTraits<v_float32>::vlanes());
1119inline v_float32
v_muladd(
const v_float32& a,
const v_float32& b,
const v_float32& c)
1121 return v_fma(a, b, c);
1124inline v_int32
v_muladd(
const v_int32& a,
const v_int32& b,
const v_int32& c)
1126 return v_fma(a, b, c);
1129#if CV_SIMD_SCALABLE_64F
1130inline v_float64
v_fma(
const v_float64& a,
const v_float64& b,
const v_float64& c)
1132 return vfmacc_vv_f64m1(c, a, b, VTraits<v_float64>::vlanes());
1135inline v_float64
v_muladd(
const v_float64& a,
const v_float64& b,
const v_float64& c)
1137 return v_fma(a, b, c);
1143#define OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(_Tpvec, vl) \
1144inline bool v_check_all(const _Tpvec& a) \
1146 return (int)vcpop(vmslt(a, 0, vl), vl) == vl; \
1148inline bool v_check_any(const _Tpvec& a) \
1150 return (int)vcpop(vmslt(a, 0, vl), vl) != 0; \
1153OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_int8, VTraits<v_int8>::vlanes())
1154OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_int16, VTraits<v_int16>::vlanes())
1155OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_int32, VTraits<v_int32>::vlanes())
1156OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_int64, VTraits<v_int64>::vlanes())
1184#if CV_SIMD_SCALABLE_64F
1193#define OPENCV_HAL_IMPL_RVV_ABSDIFF(_Tpvec, abs) \
1194inline _Tpvec v_##abs(const _Tpvec& a, const _Tpvec& b) \
1196 return v_sub(v_max(a, b), v_min(a, b)); \
1199OPENCV_HAL_IMPL_RVV_ABSDIFF(v_uint8,
absdiff)
1200OPENCV_HAL_IMPL_RVV_ABSDIFF(v_uint16,
absdiff)
1201OPENCV_HAL_IMPL_RVV_ABSDIFF(v_uint32,
absdiff)
1202OPENCV_HAL_IMPL_RVV_ABSDIFF(v_float32,
absdiff)
1203#if CV_SIMD_SCALABLE_64F
1204OPENCV_HAL_IMPL_RVV_ABSDIFF(v_float64,
absdiff)
1206OPENCV_HAL_IMPL_RVV_ABSDIFF(v_int8, absdiffs)
1207OPENCV_HAL_IMPL_RVV_ABSDIFF(v_int16, absdiffs)
1209#define OPENCV_HAL_IMPL_RVV_ABSDIFF_S(_Tpvec, _rTpvec, width) \
1210inline _rTpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \
1212 return vnclipu(vreinterpret_u##width##m2(vwsub_vv(v_max(a, b), v_min(a, b), VTraits<_Tpvec>::vlanes())), 0, VTraits<_Tpvec>::vlanes()); \
1215OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int8, v_uint8, 16)
1216OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int16, v_uint16, 32)
1217OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int32, v_uint32, 64)
1219#define OPENCV_HAL_IMPL_RVV_ABS(_Tprvec, _Tpvec, suffix) \
1220inline _Tprvec v_abs(const _Tpvec& a) \
1222 return v_absdiff(a, v_setzero_##suffix()); \
1225OPENCV_HAL_IMPL_RVV_ABS(v_uint8, v_int8, s8)
1226OPENCV_HAL_IMPL_RVV_ABS(v_uint16, v_int16, s16)
1227OPENCV_HAL_IMPL_RVV_ABS(v_uint32, v_int32, s32)
1228OPENCV_HAL_IMPL_RVV_ABS(v_float32, v_float32, f32)
1229#if CV_SIMD_SCALABLE_64F
1230OPENCV_HAL_IMPL_RVV_ABS(v_float64, v_float64, f64)
1234#define OPENCV_HAL_IMPL_RVV_REDUCE_SAD(_Tpvec, scalartype) \
1235inline scalartype v_reduce_sad(const _Tpvec& a, const _Tpvec& b) \
1237 return v_reduce_sum(v_absdiff(a, b)); \
1240OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_uint8,
unsigned)
1241OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_int8,
unsigned)
1242OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_uint16,
unsigned)
1243OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_int16,
unsigned)
1244OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_uint32,
unsigned)
1245OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_int32,
unsigned)
1246OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_float32,
float)
1250#define OPENCV_HAL_IMPL_RVV_SELECT(_Tpvec, vl) \
1251inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \
1253 return vmerge(vmsne(mask, 0, vl), b, a, vl); \
1256OPENCV_HAL_IMPL_RVV_SELECT(v_uint8, VTraits<v_uint8>::vlanes())
1257OPENCV_HAL_IMPL_RVV_SELECT(v_uint16, VTraits<v_uint16>::vlanes())
1258OPENCV_HAL_IMPL_RVV_SELECT(v_uint32, VTraits<v_uint32>::vlanes())
1259OPENCV_HAL_IMPL_RVV_SELECT(v_int8, VTraits<v_int8>::vlanes())
1260OPENCV_HAL_IMPL_RVV_SELECT(v_int16, VTraits<v_int16>::vlanes())
1261OPENCV_HAL_IMPL_RVV_SELECT(v_int32, VTraits<v_int32>::vlanes())
1263inline v_float32
v_select(const v_float32&
mask, const v_float32& a, const v_float32& b) \
1265 return vmerge(vmfne(
mask, 0, VTraits<v_float32>::vlanes()), b, a, VTraits<v_float32>::vlanes()); \
1268#if CV_SIMD_SCALABLE_64F
1269inline v_float64
v_select(
const v_float64&
mask,
const v_float64& a,
const v_float64& b) \
1271 return vmerge(vmfne(
mask, 0, VTraits<v_float64>::vlanes()), b, a, VTraits<v_float64>::vlanes()); \
1277#define OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(_Tpvec, suffix, vl) \
1278template<int n> inline _Tpvec v_rotate_right(const _Tpvec& a) \
1280 return vslidedown(vmv_v_x_##suffix##m1(0, vl), a, n, vl); \
1282template<int n> inline _Tpvec v_rotate_left(const _Tpvec& a) \
1284 return vslideup(vmv_v_x_##suffix##m1(0, vl), a, n, vl); \
1286template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a) \
1288template<int n> inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b) \
1290 return vslideup(vslidedown(vmv_v_x_##suffix##m1(0, vl), a, n, vl), b, VTraits<_Tpvec>::vlanes() - n, vl); \
1292template<int n> inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b) \
1294 return vslideup(vslidedown(vmv_v_x_##suffix##m1(0, vl), b, VTraits<_Tpvec>::vlanes() - n, vl), a, n, vl); \
1296template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a, const _Tpvec& b) \
1297{ CV_UNUSED(b); return a; }
1299OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_uint8, u8, VTraits<v_uint8>::vlanes())
1300OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_int8, i8, VTraits<v_int8>::vlanes())
1301OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_uint16, u16, VTraits<v_uint16>::vlanes())
1302OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_int16, i16, VTraits<v_int16>::vlanes())
1303OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_uint32, u32, VTraits<v_uint32>::vlanes())
1304OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_int32, i32, VTraits<v_int32>::vlanes())
1305OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_uint64, u64, VTraits<v_uint64>::vlanes())
1306OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_int64, i64, VTraits<v_int64>::vlanes())
1308#define OPENCV_HAL_IMPL_RVV_ROTATE_FP(_Tpvec, suffix, vl) \
1309template<int n> inline _Tpvec v_rotate_right(const _Tpvec& a) \
1311 return vslidedown(vfmv_v_f_##suffix##m1(0, vl), a, n, vl); \
1313template<int n> inline _Tpvec v_rotate_left(const _Tpvec& a) \
1315 return vslideup(vfmv_v_f_##suffix##m1(0, vl), a, n, vl); \
1317template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a) \
1319template<int n> inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b) \
1321 return vslideup(vslidedown(vfmv_v_f_##suffix##m1(0, vl), a, n, vl), b, VTraits<_Tpvec>::vlanes() - n, vl); \
1323template<int n> inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b) \
1325 return vslideup(vslidedown(vfmv_v_f_##suffix##m1(0, vl), b, VTraits<_Tpvec>::vlanes() - n, vl), a, n, vl); \
1327template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a, const _Tpvec& b) \
1328{ CV_UNUSED(b); return a; }
1330OPENCV_HAL_IMPL_RVV_ROTATE_FP(v_float32, f32, VTraits<v_float32>::vlanes())
1331#if CV_SIMD_SCALABLE_64F
1332OPENCV_HAL_IMPL_RVV_ROTATE_FP(v_float64, f64, VTraits<v_float64>::vlanes())
1336inline v_float32
v_cvt_f32(
const v_int32& a)
1338 return vfcvt_f_x_v_f32m1(a, VTraits<v_float32>::vlanes());
1341#if CV_SIMD_SCALABLE_64F
1342inline v_float32
v_cvt_f32(
const v_float64& a)
1344 return vfncvt_f(vlmul_ext_f64m2(a), VTraits<v_float64>::vlanes());
1347inline v_float32
v_cvt_f32(
const v_float64& a,
const v_float64& b)
1349 return vfncvt_f(vset(vlmul_ext_f64m2(a),1,b), VTraits<v_float32>::vlanes());
1352inline v_float64
v_cvt_f64(
const v_int32& a)
1354 return vget_f64m1(vfwcvt_f(a, VTraits<v_int32>::vlanes()), 0);
1359 return vget_f64m1(vfwcvt_f(a, VTraits<v_int32>::vlanes()), 1);
1362inline v_float64
v_cvt_f64(
const v_float32& a)
1364 return vget_f64m1(vfwcvt_f(a, VTraits<v_float32>::vlanes()), 0);
1369 return vget_f64m1(vfwcvt_f(a, VTraits<v_float32>::vlanes()), 1);
1372inline v_float64
v_cvt_f64(
const v_int64& a)
1374 return vfcvt_f(a, VTraits<v_int64>::vlanes());
1380#define OPENCV_HAL_IMPL_RVV_BROADCAST(_Tpvec, suffix) \
1381template<int s = 0> inline _Tpvec v_broadcast_element(_Tpvec v, int i = s) \
1383 return v_setall_##suffix(v_extract_n(v, i)); \
1385inline _Tpvec v_broadcast_highest(_Tpvec v) \
1387 return v_setall_##suffix(v_extract_n(v, VTraits<_Tpvec>::vlanes()-1)); \
1390OPENCV_HAL_IMPL_RVV_BROADCAST(v_uint32, u32)
1391OPENCV_HAL_IMPL_RVV_BROADCAST(v_int32, s32)
1392OPENCV_HAL_IMPL_RVV_BROADCAST(v_float32, f32)
1396#define OPENCV_HAL_IMPL_RVV_REVERSE(_Tpvec, width) \
1397inline _Tpvec v_reverse(const _Tpvec& a) \
1399 vuint##width##m1_t vidx = vrsub(vid_v_u##width##m1(VTraits<_Tpvec>::vlanes()), VTraits<_Tpvec>::vlanes()-1, VTraits<_Tpvec>::vlanes()); \
1400 return vrgather(a, vidx, VTraits<_Tpvec>::vlanes()); \
1402OPENCV_HAL_IMPL_RVV_REVERSE(v_uint8, 8)
1403OPENCV_HAL_IMPL_RVV_REVERSE(v_int8, 8)
1404OPENCV_HAL_IMPL_RVV_REVERSE(v_uint16, 16)
1405OPENCV_HAL_IMPL_RVV_REVERSE(v_int16, 16)
1406OPENCV_HAL_IMPL_RVV_REVERSE(v_uint32, 32)
1407OPENCV_HAL_IMPL_RVV_REVERSE(v_int32, 32)
1408OPENCV_HAL_IMPL_RVV_REVERSE(v_float32, 32)
1409OPENCV_HAL_IMPL_RVV_REVERSE(v_uint64, 64)
1410OPENCV_HAL_IMPL_RVV_REVERSE(v_int64, 64)
1411#if CV_SIMD_SCALABLE_64F
1412OPENCV_HAL_IMPL_RVV_REVERSE(v_float64, 64)
1417#define OPENCV_HAL_IMPL_RVV_EXPAND(_Tp, _Tpwvec, _Tpwvec_m2, _Tpvec, width, suffix, suffix2, cvt) \
1418inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \
1420 _Tpwvec_m2 temp = cvt(a, VTraits<_Tpvec>::vlanes()); \
1421 b0 = vget_##suffix##m1(temp, 0); \
1422 b1 = vget_##suffix##m1(temp, 1); \
1424inline _Tpwvec v_expand_low(const _Tpvec& a) \
1426 _Tpwvec_m2 temp = cvt(a, VTraits<_Tpvec>::vlanes()); \
1427 return vget_##suffix##m1(temp, 0); \
1429inline _Tpwvec v_expand_high(const _Tpvec& a) \
1431 _Tpwvec_m2 temp = cvt(a, VTraits<_Tpvec>::vlanes()); \
1432 return vget_##suffix##m1(temp, 1); \
1434inline _Tpwvec v_load_expand(const _Tp* ptr) \
1436 return cvt(vle##width##_v_##suffix2##mf2(ptr, VTraits<_Tpvec>::vlanes()), VTraits<_Tpvec>::vlanes()); \
1439OPENCV_HAL_IMPL_RVV_EXPAND(
uchar, v_uint16, vuint16m2_t, v_uint8, 8, u16, u8, vwcvtu_x)
1440OPENCV_HAL_IMPL_RVV_EXPAND(
schar, v_int16, vint16m2_t, v_int8, 8, i16, i8, vwcvt_x)
1441OPENCV_HAL_IMPL_RVV_EXPAND(
ushort, v_uint32, vuint32m2_t, v_uint16, 16, u32, u16, vwcvtu_x)
1442OPENCV_HAL_IMPL_RVV_EXPAND(
short, v_int32, vint32m2_t, v_int16, 16, i32, i16, vwcvt_x)
1443OPENCV_HAL_IMPL_RVV_EXPAND(
uint, v_uint64, vuint64m2_t, v_uint32, 32, u64, u32, vwcvtu_x)
1444OPENCV_HAL_IMPL_RVV_EXPAND(
int, v_int64, vint64m2_t, v_int32, 32, i64, i32, vwcvt_x)
1448 return vwcvtu_x(vwcvtu_x(vle8_v_u8mf4(ptr, VTraits<v_uint32>::vlanes()), VTraits<v_uint32>::vlanes()), VTraits<v_uint32>::vlanes());
1453 return vwcvt_x(vwcvt_x(vle8_v_i8mf4(ptr, VTraits<v_int32>::vlanes()), VTraits<v_int32>::vlanes()), VTraits<v_int32>::vlanes());
1456#define OPENCV_HAL_IMPL_RVV_PACK(_Tpvec, _Tp, _wTpvec, hwidth, hsuffix, suffix, rshr, shr) \
1457inline _Tpvec v_pack(const _wTpvec& a, const _wTpvec& b) \
1459 return shr(vset(vlmul_ext_##suffix##m2(a), 1, b), 0, VTraits<_Tpvec>::vlanes()); \
1461inline void v_pack_store(_Tp* ptr, const _wTpvec& a) \
1463 vse##hwidth##_v_##hsuffix##mf2(ptr, shr(a, 0, VTraits<_Tpvec>::vlanes()), VTraits<_wTpvec>::vlanes()); \
1465template<int n = 0> inline \
1466_Tpvec v_rshr_pack(const _wTpvec& a, const _wTpvec& b, int N = n) \
1468 return rshr(vset(vlmul_ext_##suffix##m2(a), 1, b), N, VTraits<_Tpvec>::vlanes()); \
1470template<int n = 0> inline \
1471void v_rshr_pack_store(_Tp* ptr, const _wTpvec& a, int N = n) \
1473 vse##hwidth##_v_##hsuffix##mf2(ptr, rshr(a, N, VTraits<_Tpvec>::vlanes()), VTraits<_wTpvec>::vlanes()); \
1476OPENCV_HAL_IMPL_RVV_PACK(v_uint8,
uchar, v_uint16, 8, u8, u16, vnclipu, vnclipu)
1477OPENCV_HAL_IMPL_RVV_PACK(v_int8,
schar, v_int16, 8, i8, i16, vnclip, vnclip)
1478OPENCV_HAL_IMPL_RVV_PACK(v_uint16,
ushort, v_uint32, 16, u16, u32, vnclipu, vnclipu)
1479OPENCV_HAL_IMPL_RVV_PACK(v_int16,
short, v_int32, 16, i16, i32, vnclip, vnclip)
1480OPENCV_HAL_IMPL_RVV_PACK(v_uint32,
unsigned, v_uint64, 32, u32, u64, vnclipu, vnsrl)
1481OPENCV_HAL_IMPL_RVV_PACK(v_int32,
int, v_int64, 32, i32, i64, vnclip, vnsra)
1483#define OPENCV_HAL_IMPL_RVV_PACK_U(_Tpvec, _Tp, _wTpvec, _wTp, hwidth, width, hsuffix, suffix, rshr, cast, hvl, vl) \
1484inline _Tpvec v_pack_u(const _wTpvec& a, const _wTpvec& b) \
1486 return vnclipu(cast(vmax(vset(vlmul_ext_##suffix##m2(a), 1, b), 0, vl)), 0, vl); \
1488inline void v_pack_u_store(_Tp* ptr, const _wTpvec& a) \
1490 vse##hwidth##_v_##hsuffix##mf2(ptr, vnclipu(vreinterpret_u##width##m1(vmax(a, 0, vl)), 0, vl), hvl); \
1492template<int N = 0> inline \
1493_Tpvec v_rshr_pack_u(const _wTpvec& a, const _wTpvec& b, int n = N) \
1495 return vnclipu(cast(vmax(vset(vlmul_ext_##suffix##m2(a), 1, b), 0, vl)), n, vl); \
1497template<int N = 0> inline \
1498void v_rshr_pack_u_store(_Tp* ptr, const _wTpvec& a, int n = N) \
1500 vse##hwidth##_v_##hsuffix##mf2(ptr, vnclipu(vreinterpret_u##width##m1(vmax(a, 0, vl)), n, vl), hvl); \
1503OPENCV_HAL_IMPL_RVV_PACK_U(v_uint8,
uchar, v_int16,
short, 8, 16, u8, i16, vnclipu_wx_u8m1, vreinterpret_v_i16m2_u16m2, VTraits<v_int16>::vlanes(), VTraits<v_uint8>::vlanes())
1504OPENCV_HAL_IMPL_RVV_PACK_U(v_uint16,
ushort, v_int32,
int, 16, 32, u16, i32, vnclipu_wx_u16m1, vreinterpret_v_i32m2_u32m2, VTraits<v_int32>::vlanes(), VTraits<v_uint16>::vlanes())
1514#define OPENCV_HAL_IMPL_RVV_ZIP(_Tpvec, _wTpvec, suffix, width, width2, convert2um2, convert2um1) \
1515inline void v_zip(const _Tpvec& a0, const _Tpvec& a1, _Tpvec& b0, _Tpvec& b1) { \
1516 _wTpvec temp = vreinterpret_##suffix##m2(convert2um2( \
1517 vor(vzext_vf2(convert2um1(a0), VTraits<_Tpvec>::vlanes()*2), \
1518 vreinterpret_u##width2##m2(vslide1up(vreinterpret_u##width##m2(vzext_vf2(convert2um1(a1), VTraits<_Tpvec>::vlanes()*2)), 0, VTraits<_Tpvec>::vlanes()*2)), \
1519 VTraits<_Tpvec>::vlanes()))); \
1520 b0 = vget_##suffix##m1(temp, 0); \
1521 b1 = vget_##suffix##m1(temp, 1); \
1523OPENCV_HAL_IMPL_RVV_ZIP(v_uint8, vuint8m2_t, u8, 8, 16, OPENCV_HAL_NOP, OPENCV_HAL_NOP)
1524OPENCV_HAL_IMPL_RVV_ZIP(v_int8, vint8m2_t, i8, 8, 16, vreinterpret_u8m2, vreinterpret_u8m1)
1525OPENCV_HAL_IMPL_RVV_ZIP(v_uint16, vuint16m2_t, u16, 16, 32, OPENCV_HAL_NOP, OPENCV_HAL_NOP)
1526OPENCV_HAL_IMPL_RVV_ZIP(v_int16, vint16m2_t, i16, 16, 32, vreinterpret_u16m2, vreinterpret_u16m1)
1527OPENCV_HAL_IMPL_RVV_ZIP(v_uint32, vuint32m2_t, u32, 32, 64, OPENCV_HAL_NOP, OPENCV_HAL_NOP)
1528OPENCV_HAL_IMPL_RVV_ZIP(v_int32, vint32m2_t, i32, 32, 64, vreinterpret_u32m2, vreinterpret_u32m1)
1529OPENCV_HAL_IMPL_RVV_ZIP(v_float32, vfloat32m2_t, f32, 32, 64, vreinterpret_u32m2, vreinterpret_u32m1)
1531#if CV_SIMD_SCALABLE_64F
1532inline void v_zip(
const v_float64& a0,
const v_float64& a1, v_float64& b0, v_float64& b1) { \
1533 vuint16mf4_t
idx0 = vid_v_u16mf4(VTraits<v_float64>::vlanes());
1534 vuint16mf4_t
idx1 = vadd(
idx0, VTraits<v_float64>::vlanes(), VTraits<v_float64>::vlanes());
1535 vuint16mf2_t
idx = vreinterpret_u16mf2(( \
1536 vor(vzext_vf2(
idx0, VTraits<v_float64>::vlanes()), \
1537 vreinterpret_u32mf2(vslide1up(vreinterpret_u16mf2(vzext_vf2(
idx1, VTraits<v_float64>::vlanes())), 0, VTraits<v_uint32>::vlanes())), \
1538 VTraits<v_uint32>::vlanes())));
1540 vfloat64m2_t
temp = __riscv_vcreate_v_f64m1_f64m2(a0, a1);
1542 vfloat64m2_t
temp = vlmul_ext_f64m2(a0);
1545 temp = vrgatherei16(
temp,
idx, VTraits<v_float64>::vlanes()*2);
1546 b0 = vget_f64m1(
temp, 0); \
1547 b1 = vget_f64m1(
temp, 1); \
1551#define OPENCV_HAL_IMPL_RVV_UNPACKS(_Tpvec, width) \
1552inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b) \
1554 return vslideup(a, b, VTraits<_Tpvec>::vlanes()/2, VTraits<_Tpvec>::vlanes());\
1556inline _Tpvec v_combine_high(const _Tpvec& a, const _Tpvec& b) \
1559 vslidedown(a, a, VTraits<_Tpvec>::vlanes()/2, VTraits<_Tpvec>::vlanes()), \
1560 vslidedown(b, b, VTraits<_Tpvec>::vlanes()/2, VTraits<_Tpvec>::vlanes()), \
1561 VTraits<_Tpvec>::vlanes()/2, \
1562 VTraits<_Tpvec>::vlanes()); \
1564inline void v_recombine(const _Tpvec& a, const _Tpvec& b, _Tpvec& c, _Tpvec& d) \
1566 c = v_combine_low(a, b); \
1567 d = v_combine_high(a, b); \
1570OPENCV_HAL_IMPL_RVV_UNPACKS(v_uint8, 8)
1571OPENCV_HAL_IMPL_RVV_UNPACKS(v_int8, 8)
1572OPENCV_HAL_IMPL_RVV_UNPACKS(v_uint16, 16)
1573OPENCV_HAL_IMPL_RVV_UNPACKS(v_int16, 16)
1574OPENCV_HAL_IMPL_RVV_UNPACKS(v_uint32, 32)
1575OPENCV_HAL_IMPL_RVV_UNPACKS(v_int32, 32)
1576OPENCV_HAL_IMPL_RVV_UNPACKS(v_float32, 32)
1577#if CV_SIMD_SCALABLE_64F
1578OPENCV_HAL_IMPL_RVV_UNPACKS(v_float64, 64)
1581#define OPENCV_HAL_IMPL_RVV_INTERLEAVED(_Tpvec, _Tp, suffix, width, hwidth, vl) \
1582inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b) \
1584 a = vlse##width##_v_##suffix##m1(ptr , sizeof(_Tp)*2, VTraits<v_##_Tpvec>::vlanes()); \
1585 b = vlse##width##_v_##suffix##m1(ptr+1, sizeof(_Tp)*2, VTraits<v_##_Tpvec>::vlanes()); \
1587inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, v_##_Tpvec& c) \
1589 a = vlse##width##_v_##suffix##m1(ptr , sizeof(_Tp)*3, VTraits<v_##_Tpvec>::vlanes()); \
1590 b = vlse##width##_v_##suffix##m1(ptr+1, sizeof(_Tp)*3, VTraits<v_##_Tpvec>::vlanes()); \
1591 c = vlse##width##_v_##suffix##m1(ptr+2, sizeof(_Tp)*3, VTraits<v_##_Tpvec>::vlanes()); \
1593inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, \
1594 v_##_Tpvec& c, v_##_Tpvec& d) \
1597 a = vlse##width##_v_##suffix##m1(ptr , sizeof(_Tp)*4, VTraits<v_##_Tpvec>::vlanes()); \
1598 b = vlse##width##_v_##suffix##m1(ptr+1, sizeof(_Tp)*4, VTraits<v_##_Tpvec>::vlanes()); \
1599 c = vlse##width##_v_##suffix##m1(ptr+2, sizeof(_Tp)*4, VTraits<v_##_Tpvec>::vlanes()); \
1600 d = vlse##width##_v_##suffix##m1(ptr+3, sizeof(_Tp)*4, VTraits<v_##_Tpvec>::vlanes()); \
1602inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \
1603 hal::StoreMode =hal::STORE_UNALIGNED) \
1605 vsse##width(ptr, sizeof(_Tp)*2, a, VTraits<v_##_Tpvec>::vlanes()); \
1606 vsse##width(ptr+1, sizeof(_Tp)*2, b, VTraits<v_##_Tpvec>::vlanes()); \
1608inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \
1609 const v_##_Tpvec& c, hal::StoreMode =hal::STORE_UNALIGNED) \
1611 vsse##width(ptr, sizeof(_Tp)*3, a, VTraits<v_##_Tpvec>::vlanes()); \
1612 vsse##width(ptr+1, sizeof(_Tp)*3, b, VTraits<v_##_Tpvec>::vlanes()); \
1613 vsse##width(ptr+2, sizeof(_Tp)*3, c, VTraits<v_##_Tpvec>::vlanes()); \
1615inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \
1616 const v_##_Tpvec& c, const v_##_Tpvec& d, \
1617 hal::StoreMode =hal::STORE_UNALIGNED ) \
1619 vsse##width(ptr, sizeof(_Tp)*4, a, VTraits<v_##_Tpvec>::vlanes()); \
1620 vsse##width(ptr+1, sizeof(_Tp)*4, b, VTraits<v_##_Tpvec>::vlanes()); \
1621 vsse##width(ptr+2, sizeof(_Tp)*4, c, VTraits<v_##_Tpvec>::vlanes()); \
1622 vsse##width(ptr+3, sizeof(_Tp)*4, d, VTraits<v_##_Tpvec>::vlanes()); \
1625OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint8,
uchar, u8, 8, 4, VTraits<v_uint8>::vlanes())
1626OPENCV_HAL_IMPL_RVV_INTERLEAVED(int8,
schar, i8, 8, 4, VTraits<v_int8>::vlanes())
1627OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint16,
ushort, u16, 16, 8, VTraits<v_uint16>::vlanes())
1628OPENCV_HAL_IMPL_RVV_INTERLEAVED(int16,
short, i16, 16, 8, VTraits<v_int16>::vlanes())
1629OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint32,
unsigned, u32, 32, 16, VTraits<v_uint32>::vlanes())
1630OPENCV_HAL_IMPL_RVV_INTERLEAVED(int32,
int, i32, 32, 16, VTraits<v_int32>::vlanes())
1631OPENCV_HAL_IMPL_RVV_INTERLEAVED(float32,
float, f32, 32, 16, VTraits<v_float32>::vlanes())
1632OPENCV_HAL_IMPL_RVV_INTERLEAVED(
uint64,
uint64, u64, 64, 32, VTraits<v_uint64>::vlanes())
1633OPENCV_HAL_IMPL_RVV_INTERLEAVED(
int64,
int64, i64, 64, 32, VTraits<v_int64>::vlanes())
1634#if CV_SIMD_SCALABLE_64F
1635OPENCV_HAL_IMPL_RVV_INTERLEAVED(float64,
double, f64, 64, 32, VTraits<v_float64>::vlanes())
1638static uint64_t idx_interleave_pairs[] = { \
1639 0x0705060403010200, 0x0f0d0e0c0b090a08, 0x1715161413111210, 0x1f1d1e1c1b191a18, \
1640 0x2725262423212220, 0x2f2d2e2c2b292a28, 0x3735363433313230, 0x3f3d3e3c3b393a38, \
1641 0x4745464443414240, 0x4f4d4e4c4b494a48, 0x5755565453515250, 0x5f5d5e5c5b595a58, \
1642 0x6765666463616260, 0x6f6d6e6c6b696a68, 0x7775767473717270, 0x7f7d7e7c7b797a78};
1644static uint64_t idx_interleave_quads[] = { \
1645 0x0703060205010400, 0x0f0b0e0a0d090c08, 0x1713161215111410, 0x1f1b1e1a1d191c18, \
1646 0x2723262225212420, 0x2f2b2e2a2d292c28, 0x3733363235313430, 0x3f3b3e3a3d393c38, \
1647 0x4743464245414440, 0x4f4b4e4a4d494c48, 0x5753565255515450, 0x5f5b5e5a5d595c58, \
1648 0x6763666265616460, 0x6f6b6e6a6d696c68, 0x7773767275717470, 0x7f7b7e7a7d797c78};
1650#define OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ_NOEXPEND(_Tpvec, func) \
1651inline _Tpvec v_interleave_##func(const _Tpvec& vec) { \
1652 CV_CheckLE(VTraits<_Tpvec>::vlanes(), VTraits<_Tpvec>::max_nlanes, "RVV implementation only supports VLEN in the range [128, 1024]"); \
1653 vuint8m1_t vidx = vundefined_u8m1();\
1654 vidx = vreinterpret_u8m1(vle64_v_u64m1(idx_interleave_##func, 16)); \
1655 return vrgather(vec, vidx, VTraits<v_uint8>::vlanes()); \
1657OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ_NOEXPEND(v_uint8, pairs)
1658OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ_NOEXPEND(v_int8, pairs)
1659OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ_NOEXPEND(v_uint8, quads)
1660OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ_NOEXPEND(v_int8, quads)
1662#define OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ(_Tpvec, width, vzext_vfx, func) \
1663inline _Tpvec v_interleave_##func(const _Tpvec& vec) { \
1664 CV_CheckLE(VTraits<_Tpvec>::vlanes(), VTraits<_Tpvec>::max_nlanes, "RVV implementation only supports VLEN in the range [128, 1024]"); \
1665 vuint##width##m1_t vidx = vundefined_u##width##m1();\
1666 vidx = vget_u##width##m1(vzext_vfx(vreinterpret_u8m1(vle64_v_u64m1(idx_interleave_##func, 16)), VTraits<v_uint8>::vlanes()), 0); \
1667 return vrgather(vec, vidx, VTraits<_Tpvec>::vlanes()); \
1670OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ(v_uint16, 16, vzext_vf2, pairs)
1671OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ(v_int16, 16, vzext_vf2, pairs)
1672OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ(v_uint32, 32, vzext_vf4, pairs)
1673OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ(v_int32, 32, vzext_vf4, pairs)
1674OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ(v_float32, 32, vzext_vf4, pairs)
1676OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ(v_uint16, 16, vzext_vf2, quads)
1677OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ(v_int16, 16, vzext_vf2, quads)
1678OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ(v_uint32, 32, vzext_vf4, quads)
1679OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ(v_int32, 32, vzext_vf4, quads)
1680OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ(v_float32, 32, vzext_vf4, quads)
1683static const unsigned char popCountTable[256] =
1685 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
1686 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1687 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1688 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1689 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1690 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1691 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1692 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1693 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1694 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1695 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1696 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1697 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1698 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1699 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1700 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
1702#define OPENCV_HAL_IMPL_RVV_HADD(_Tpvec, _Tpvec2, _Tm2, width, width2, suffix, add) \
1703static inline _Tpvec2 v_hadd(_Tpvec a) { \
1704 vuint##width2##m1_t oneX2 = vmv_v_x_u##width2##m1(1, VTraits<v_uint##width2>::vlanes()); \
1705 vuint##width##m1_t one = vreinterpret_u##width##m1(oneX2); \
1706 _Tm2 res = add(a, vslide1down(a, 0, VTraits<v_uint##width>::vlanes()), VTraits<v_uint##width>::vlanes()); \
1707 return vget_##suffix##m1(vcompress(vmseq(one, 1, VTraits<v_uint##width>::vlanes()), res, res, VTraits<v_uint##width>::vlanes()), 0); \
1709OPENCV_HAL_IMPL_RVV_HADD(v_uint8, v_uint16, vuint16m2_t, 8, 16, u16, vwaddu_vv)
1710OPENCV_HAL_IMPL_RVV_HADD(v_uint16, v_uint32, vuint32m2_t, 16, 32, u32, vwaddu_vv)
1711OPENCV_HAL_IMPL_RVV_HADD(v_uint32, v_uint64, vuint64m2_t, 32, 64, u64, vwaddu_vv)
1712OPENCV_HAL_IMPL_RVV_HADD(v_int8, v_int16, vint16m2_t, 8, 16, i16, vwadd_vv)
1713OPENCV_HAL_IMPL_RVV_HADD(v_int16, v_int32, vint32m2_t, 16, 32, i32, vwadd_vv)
1714OPENCV_HAL_IMPL_RVV_HADD(v_int32, v_int64, vint64m2_t, 32, 64, i64, vwadd_vv)
1716OPENCV_HAL_IMPL_RVV_HADD(vint32m2_t, v_int32, vint32m2_t, 16, 32, i32, vadd)
1717OPENCV_HAL_IMPL_RVV_HADD(vint64m2_t, v_int64, vint64m2_t, 32, 64, i64, vadd)
1721 return vloxei8(popCountTable, a, VTraits<v_uint8>::vlanes());
1725 return v_hadd(
v_popcount(vreinterpret_u8m1(a)));
1729 return v_hadd(v_hadd(
v_popcount(vreinterpret_u8m1(a))));
1733 return v_hadd(v_hadd(v_hadd(
v_popcount(vreinterpret_u8m1(a)))));
1751 return v_popcount(v_reinterpret_as_u64(vmax(a, v_sub(v_setzero_s64(), a), VTraits<v_int64>::vlanes())));
1756#define OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(_Tpvec) \
1757inline int v_signmask(const _Tpvec& a) \
1759 uint8_t ans[4] = {0}; \
1760 vsm(ans, vmslt(a, 0, VTraits<_Tpvec>::vlanes()), VTraits<_Tpvec>::vlanes()); \
1761 return *(reinterpret_cast<int*>(ans)) & (((__int128_t)1 << VTraits<_Tpvec>::vlanes()) - 1); \
1763inline int v_scan_forward(const _Tpvec& a) \
1765 return (int)vfirst(vmslt(a, 0, VTraits<_Tpvec>::vlanes()), VTraits<_Tpvec>::vlanes()); \
1768OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_int8)
1769OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_int16)
1770OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_int32)
1771OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_int64)
1774{
return v_signmask(v_reinterpret_as_s8(a)); }
1776{
return v_signmask(v_reinterpret_as_s16(a)); }
1778{
return v_signmask(v_reinterpret_as_s32(a)); }
1780{
return v_signmask(v_reinterpret_as_s32(a)); }
1782{
return v_signmask(v_reinterpret_as_s64(a)); }
1783#if CV_SIMD_SCALABLE_64F
1785{
return v_signmask(v_reinterpret_as_s64(a)); }
1799#if CV_SIMD_SCALABLE_64F
1807#define OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(_Tpvec, v_trunc) \
1808inline _Tpvec v_pack_triplets(const _Tpvec& vec) { \
1809 size_t vl = __cv_rvv_e8m1_nlanes; \
1810 vuint32m1_t one = vmv_v_x_u32m1(1, __cv_rvv_e32m1_nlanes); \
1811 vuint8m1_t zero = vmv_v_x_u8m1(0, vl); \
1812 vuint8m1_t mask = vreinterpret_u8m1(one); \
1813 return vcompress(vmseq(v_trunc(vslideup(zero, mask, 3, vl)), 0, vl), vec, vec, VTraits<_Tpvec>::vlanes()); \
1816OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_uint8, OPENCV_HAL_NOP)
1817OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_int8, OPENCV_HAL_NOP)
1818OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_uint16, vlmul_trunc_u8mf2)
1819OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_int16, vlmul_trunc_u8mf2)
1820OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_uint32, vlmul_trunc_u8mf4)
1821OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_int32, vlmul_trunc_u8mf4)
1822OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_float32, vlmul_trunc_u8mf4)
1823OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_uint64, vlmul_trunc_u8mf8)
1824OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_int64, vlmul_trunc_u8mf8)
1825#if CV_SIMD_SCALABLE_64F
1826OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_float64, vlmul_trunc_u8mf8)
1832#if defined(__riscv_zfh) && __riscv_zfh
1835 return vfwcvt_f(vle16_v_f16mf2((_Float16*)ptr, VTraits<v_float32>::vlanes()) ,VTraits<v_float32>::vlanes());;
1838inline void v_pack_store(hfloat* ptr,
const v_float32& v)
1840 vse16_v_f16mf2((_Float16*)ptr, vfncvt_f_f_w_f16mf2(v, VTraits<v_float32>::vlanes()), VTraits<v_float32>::vlanes());
1846 for(
int i = 0; i < VTraits<v_float32>::vlanes(); i++ ) buf[i] = (
float)ptr[i];
1850inline void v_pack_store(hfloat* ptr,
const v_float32& v)
1854 for(
int i = 0; i < VTraits<v_float32>::vlanes(); i++ ) ptr[i] = hfloat(buf[i]);
1858inline v_int32
v_round(
const v_float32& a)
1861 return vfcvt_x(a, VTraits<v_float32>::vlanes());
1864inline v_int32
v_floor(
const v_float32& a)
1866 return vfcvt_x(vfsub(a, 0.5f - 1e-5, VTraits<v_float32>::vlanes()), VTraits<v_float32>::vlanes());
1870inline v_int32
v_ceil(
const v_float32& a)
1872 return vfcvt_x(vfadd(a, 0.5f - 1e-5, VTraits<v_float32>::vlanes()), VTraits<v_float32>::vlanes());
1875inline v_int32
v_trunc(
const v_float32& a)
1877 return vfcvt_rtz_x(a, VTraits<v_float32>::vlanes());
1879#if CV_SIMD_SCALABLE_64F
1880inline v_int32
v_round(
const v_float64& a)
1882 return vfncvt_x(vlmul_ext_f64m2(a), VTraits<v_float32>::vlanes());
1885inline v_int32
v_round(
const v_float64& a,
const v_float64& b)
1889 return vfncvt_x(vset(vlmul_ext_f64m2(a), 1, b), VTraits<v_float32>::vlanes());
1892inline v_int32
v_floor(
const v_float64& a)
1894 return vfncvt_x(vlmul_ext_f64m2(vfsub(a, 0.5f - 1e-6, VTraits<v_float64>::vlanes())), VTraits<v_float32>::vlanes());
1897inline v_int32
v_ceil(
const v_float64& a)
1899 return vfncvt_x(vlmul_ext_f64m2(vfadd(a, 0.5f - 1e-6, VTraits<v_float64>::vlanes())), VTraits<v_float32>::vlanes());
1902inline v_int32
v_trunc(
const v_float64& a)
1904 return vfncvt_rtz_x(vlmul_ext_f64m2(a), VTraits<v_float32>::vlanes());
1911inline v_int32
v_dotprod(
const v_int16& a,
const v_int16& b)
1913 vint32m2_t temp1 = vwmul(a, b, VTraits<v_int16>::vlanes());
1914 return v_hadd(temp1);
1917inline v_int32
v_dotprod(
const v_int16& a,
const v_int16& b,
const v_int32& c)
1919 vint32m2_t temp1 = vwmul(a, b, VTraits<v_int16>::vlanes());
1920 return vadd(v_hadd(temp1), c, VTraits<v_int32>::vlanes());
1924inline v_int64
v_dotprod(
const v_int32& a,
const v_int32& b)
1926 vuint64m1_t one64 = vmv_v_x_u64m1(1, VTraits<v_uint64>::vlanes()); \
1927 vuint32m1_t one32 = vreinterpret_u32m1(one64); \
1928 vbool32_t
mask = vmseq(one32, 1, VTraits<v_uint32>::vlanes()); \
1929 vint64m2_t temp1 = vwmul(a, b, VTraits<v_int32>::vlanes()); \
1930 vint64m2_t temp2 = vslide1down(temp1, 0, VTraits<v_int32>::vlanes());
1931 vint64m2_t res = vadd(temp1, temp2, VTraits<v_int32>::vlanes());
1932 res = vcompress(
mask, res, res, VTraits<v_int32>::vlanes()); \
1933 return vlmul_trunc_i64m1(res); \
1935inline v_int64
v_dotprod(
const v_int32& a,
const v_int32& b,
const v_int64& c)
1937 vuint64m1_t one64 = vmv_v_x_u64m1(1, VTraits<v_uint64>::vlanes()); \
1938 vuint32m1_t one32 = vreinterpret_u32m1(one64); \
1939 vbool32_t
mask = vmseq(one32, 1, VTraits<v_uint32>::vlanes()); \
1940 vint64m2_t temp1 = vwmul(a, b, VTraits<v_int32>::vlanes()); \
1941 vint64m2_t temp2 = vslide1down(temp1, 0, VTraits<v_int32>::vlanes());
1942 vint64m2_t res = vadd(temp1, temp2, VTraits<v_int32>::vlanes());
1943 res = vcompress(
mask, res, res, VTraits<v_int32>::vlanes()); \
1944 return vadd(vlmul_trunc_i64m1(res), c, VTraits<v_int64>::vlanes()); \
1950 vuint32m1_t one32 = vmv_v_x_u32m1(1, VTraits<v_uint32>::vlanes()); \
1951 vuint8m1_t one8 = vreinterpret_u8m1(one32); \
1952 vbool8_t
mask = vmseq(one8, 1, VTraits<v_uint8>::vlanes()); \
1953 vuint16m2_t t0 = vwmulu(a, b, VTraits<v_uint8>::vlanes()); \
1954 vuint16m2_t t1= vslide1down(t0, 0, VTraits<v_uint8>::vlanes());
1955 vuint16m2_t t2= vslide1down(t1, 0, VTraits<v_uint8>::vlanes());
1956 vuint16m2_t t3= vslide1down(t2, 0, VTraits<v_uint8>::vlanes());
1957 vuint32m4_t res = vadd(vwaddu_vv(t2, t3, VTraits<v_uint8>::vlanes()), vwaddu_vv(t0, t1, VTraits<v_uint8>::vlanes()), VTraits<v_uint8>::vlanes());
1958 res = vcompress(
mask, res, res, VTraits<v_uint8>::vlanes()); \
1959 return vlmul_trunc_u32m1(res);
1965 vuint32m1_t one32 = vmv_v_x_u32m1(1, VTraits<v_uint32>::vlanes()); \
1966 vuint8m1_t one8 = vreinterpret_u8m1(one32); \
1967 vbool8_t
mask = vmseq(one8, 1, VTraits<v_uint8>::vlanes()); \
1968 vuint16m2_t t0 = vwmulu(a, b, VTraits<v_uint8>::vlanes()); \
1969 vuint16m2_t t1= vslide1down(t0, 0, VTraits<v_uint8>::vlanes());
1970 vuint16m2_t t2= vslide1down(t1, 0, VTraits<v_uint8>::vlanes());
1971 vuint16m2_t t3= vslide1down(t2, 0, VTraits<v_uint8>::vlanes());
1972 vuint32m4_t res = vadd(vwaddu_vv(t2, t3, VTraits<v_uint8>::vlanes()), vwaddu_vv(t0, t1, VTraits<v_uint8>::vlanes()), VTraits<v_uint8>::vlanes());
1973 res = vcompress(
mask, res, res, VTraits<v_uint8>::vlanes()); \
1974 return vadd(vlmul_trunc_u32m1(res), c, VTraits<v_uint8>::vlanes());
1979 vuint32m1_t one32 = vmv_v_x_u32m1(1, VTraits<v_uint32>::vlanes()); \
1980 vuint8m1_t one8 = vreinterpret_u8m1(one32); \
1981 vbool8_t
mask = vmseq(one8, 1, VTraits<v_uint8>::vlanes()); \
1982 vint16m2_t t0 = vwmul(a, b, VTraits<v_int8>::vlanes()); \
1983 vint16m2_t t1= vslide1down(t0, 0, VTraits<v_int8>::vlanes());
1984 vint16m2_t t2= vslide1down(t1, 0, VTraits<v_int8>::vlanes());
1985 vint16m2_t t3= vslide1down(t2, 0, VTraits<v_int8>::vlanes());
1986 vint32m4_t res = vadd(vwadd_vv(t2, t3, VTraits<v_int8>::vlanes()), vwadd_vv(t0, t1, VTraits<v_int8>::vlanes()), VTraits<v_int8>::vlanes());
1987 res = vcompress(
mask, res, res, VTraits<v_int8>::vlanes()); \
1988 return vlmul_trunc_i32m1(res);
1994 vuint32m1_t one32 = vmv_v_x_u32m1(1, VTraits<v_uint32>::vlanes()); \
1995 vuint8m1_t one8 = vreinterpret_u8m1(one32); \
1996 vbool8_t
mask = vmseq(one8, 1, VTraits<v_uint8>::vlanes()); \
1997 vint16m2_t t0 = vwmul(a, b, VTraits<v_int8>::vlanes()); \
1998 vint16m2_t t1= vslide1down(t0, 0, VTraits<v_int8>::vlanes());
1999 vint16m2_t t2= vslide1down(t1, 0, VTraits<v_int8>::vlanes());
2000 vint16m2_t t3= vslide1down(t2, 0, VTraits<v_int8>::vlanes());
2001 vint32m4_t res = vadd(vwadd_vv(t2, t3, VTraits<v_int8>::vlanes()), vwadd_vv(t0, t1, VTraits<v_int8>::vlanes()), VTraits<v_int8>::vlanes());
2002 res = vcompress(
mask, res, res, VTraits<v_int8>::vlanes()); \
2003 return vadd(vlmul_trunc_i32m1(res), c, VTraits<v_int8>::vlanes());
2010 vuint64m1_t one64 = vmv_v_x_u64m1(1, VTraits<v_uint64>::vlanes()); \
2011 vuint16m1_t one16 = vreinterpret_u16m1(one64); \
2012 vbool16_t
mask = vmseq(one16, 1, VTraits<v_uint16>::vlanes()); \
2013 vuint32m2_t t0 = vwmulu(a, b, VTraits<v_uint16>::vlanes()); \
2014 vuint32m2_t t1= vslide1down(t0, 0, VTraits<v_uint16>::vlanes());
2015 vuint32m2_t t2= vslide1down(t1, 0, VTraits<v_uint16>::vlanes());
2016 vuint32m2_t t3= vslide1down(t2, 0, VTraits<v_uint16>::vlanes());
2017 vuint64m4_t res = vadd(vwaddu_vv(t2, t3, VTraits<v_uint16>::vlanes()), vwaddu_vv(t0, t1, VTraits<v_uint16>::vlanes()), VTraits<v_uint16>::vlanes());
2018 res = vcompress(
mask, res, res, VTraits<v_uint16>::vlanes()); \
2019 return vlmul_trunc_u64m1(res);
2021inline v_uint64
v_dotprod_expand(
const v_uint16& a,
const v_uint16& b,
const v_uint64& c)
2023 vuint64m1_t one64 = vmv_v_x_u64m1(1, VTraits<v_uint64>::vlanes()); \
2024 vuint16m1_t one16 = vreinterpret_u16m1(one64); \
2025 vbool16_t
mask = vmseq(one16, 1, VTraits<v_uint16>::vlanes()); \
2026 vuint32m2_t t0 = vwmulu(a, b, VTraits<v_uint16>::vlanes()); \
2027 vuint32m2_t t1= vslide1down(t0, 0, VTraits<v_uint16>::vlanes());
2028 vuint32m2_t t2= vslide1down(t1, 0, VTraits<v_uint16>::vlanes());
2029 vuint32m2_t t3= vslide1down(t2, 0, VTraits<v_uint16>::vlanes());
2030 vuint64m4_t res = vadd(vwaddu_vv(t2, t3, VTraits<v_uint16>::vlanes()), vwaddu_vv(t0, t1, VTraits<v_uint16>::vlanes()), VTraits<v_uint16>::vlanes());
2031 res = vcompress(
mask, res, res, VTraits<v_uint16>::vlanes()); \
2032 return vadd(vlmul_trunc_u64m1(res), c, VTraits<v_uint16>::vlanes());
2037 vuint64m1_t one64 = vmv_v_x_u64m1(1, VTraits<v_uint64>::vlanes()); \
2038 vuint16m1_t one16 = vreinterpret_u16m1(one64); \
2039 vbool16_t
mask = vmseq(one16, 1, VTraits<v_uint16>::vlanes()); \
2040 vint32m2_t t0 = vwmul(a, b, VTraits<v_int16>::vlanes()); \
2041 vint32m2_t t1= vslide1down(t0, 0, VTraits<v_int16>::vlanes());
2042 vint32m2_t t2= vslide1down(t1, 0, VTraits<v_int16>::vlanes());
2043 vint32m2_t t3= vslide1down(t2, 0, VTraits<v_int16>::vlanes());
2044 vint64m4_t res = vadd(vwadd_vv(t2, t3, VTraits<v_int16>::vlanes()), vwadd_vv(t0, t1, VTraits<v_int16>::vlanes()), VTraits<v_int16>::vlanes());
2045 res = vcompress(
mask, res, res, VTraits<v_int16>::vlanes()); \
2046 return vlmul_trunc_i64m1(res);
2051 vuint64m1_t one64 = vmv_v_x_u64m1(1, VTraits<v_uint64>::vlanes()); \
2052 vuint16m1_t one16 = vreinterpret_u16m1(one64); \
2053 vbool16_t
mask = vmseq(one16, 1, VTraits<v_uint16>::vlanes()); \
2054 vint32m2_t t0 = vwmul(a, b, VTraits<v_int16>::vlanes()); \
2055 vint32m2_t t1= vslide1down(t0, 0, VTraits<v_int16>::vlanes());
2056 vint32m2_t t2= vslide1down(t1, 0, VTraits<v_int16>::vlanes());
2057 vint32m2_t t3= vslide1down(t2, 0, VTraits<v_int16>::vlanes());
2058 vint64m4_t res = vadd(vwadd_vv(t2, t3, VTraits<v_int16>::vlanes()), vwadd_vv(t0, t1, VTraits<v_int16>::vlanes()), VTraits<v_int16>::vlanes());
2059 res = vcompress(
mask, res, res, VTraits<v_int16>::vlanes()); \
2060 return vadd(vlmul_trunc_i64m1(res), c, VTraits<v_int16>::vlanes());
2064#if CV_SIMD_SCALABLE_64F
2076 v_int32 zero = v_setzero_s32();
2077 return vredsum(zero, vwmul(a, b, VTraits<v_int16>::vlanes()), zero, VTraits<v_int16>::vlanes());
2079inline v_int32
v_dotprod_fast(
const v_int16& a,
const v_int16& b,
const v_int32& c)
2081 v_int32 zero = v_setzero_s32();
2082 return vredsum(zero, vwmul(a, b, VTraits<v_int16>::vlanes()), vredsum(zero, c, zero, VTraits<v_int32>::vlanes()), VTraits<v_int16>::vlanes());
2088 v_int64 zero = v_setzero_s64();
2089 return vredsum(zero, vwmul(a, b, VTraits<v_int32>::vlanes()), zero, VTraits<v_int32>::vlanes());
2091inline v_int64
v_dotprod_fast(
const v_int32& a,
const v_int32& b,
const v_int64& c)
2093 v_int64 zero = v_setzero_s64();
2094 return vadd(vredsum(zero, vwmul(a, b, VTraits<v_int32>::vlanes()), zero, VTraits<v_int32>::vlanes()) , vredsum(zero, c, zero, VTraits<v_int64>::vlanes()), VTraits<v_int64>::vlanes());
2101 v_uint32 zero = v_setzero_u32();
2102 return vwredsumu(zero, vwmulu(a, b, VTraits<v_uint8>::vlanes()), zero, VTraits<v_uint8>::vlanes());
2106 v_uint32 zero = v_setzero_u32();
2107 return vadd(vwredsumu(zero, vwmulu(a, b, VTraits<v_uint8>::vlanes()), zero, VTraits<v_uint8>::vlanes()) , vredsum(zero, c, zero, VTraits<v_uint32>::vlanes()), VTraits<v_uint32>::vlanes());
2111 v_int32 zero = v_setzero_s32();
2112 return vwredsum(zero, vwmul(a, b, VTraits<v_int8>::vlanes()), zero, VTraits<v_int8>::vlanes());
2116 v_int32 zero = v_setzero_s32();
2117 return vadd(vwredsum(zero, vwmul(a, b, VTraits<v_int8>::vlanes()), zero, VTraits<v_int8>::vlanes()) , vredsum(zero, c, zero, VTraits<v_int32>::vlanes()), VTraits<v_int32>::vlanes());
2123 v_uint64 zero = v_setzero_u64();
2124 return vwredsumu(zero, vwmulu(a, b, VTraits<v_uint16>::vlanes()), zero, VTraits<v_uint16>::vlanes());
2128 v_uint64 zero = v_setzero_u64();
2129 return vadd(vwredsumu(zero, vwmulu(a, b, VTraits<v_uint16>::vlanes()), zero, VTraits<v_uint16>::vlanes()), vredsum(zero, c, zero, VTraits<v_uint64>::vlanes()), VTraits<v_uint64>::vlanes());
2133 v_int64 zero = v_setzero_s64();
2134 return vwredsum(zero, vwmul(a, b, VTraits<v_int16>::vlanes()), zero, VTraits<v_int16>::vlanes());
2138 v_int64 zero = v_setzero_s64();
2139 return vadd(vwredsum(zero, vwmul(a, b, VTraits<v_int16>::vlanes()), zero, VTraits<v_int16>::vlanes()), vredsum(zero, c, zero, VTraits<v_int64>::vlanes()), VTraits<v_int64>::vlanes());
2143#if CV_SIMD_SCALABLE_64F
2151inline v_float32
v_matmul(
const v_float32& v,
const v_float32& m0,
2152 const v_float32& m1,
const v_float32& m2,
2153 const v_float32& m3)
2156 res = vfmul_vf_f32m1(m0,
v_extract_n(v, 0), VTraits<v_float32>::vlanes());
2157 res = vfmacc_vf_f32m1(res,
v_extract_n(v, 1), m1, VTraits<v_float32>::vlanes());
2158 res = vfmacc_vf_f32m1(res,
v_extract_n(v, 2), m2, VTraits<v_float32>::vlanes());
2159 res = vfmacc_vf_f32m1(res,
v_extract_n(v, 3), m3, VTraits<v_float32>::vlanes());
2164inline v_float32
v_matmuladd(
const v_float32& v,
const v_float32& m0,
2165 const v_float32& m1,
const v_float32& m2,
2168 vfloat32m1_t res = vfmul_vf_f32m1(m0,
v_extract_n(v,0), VTraits<v_float32>::vlanes());
2169 res = vfmacc_vf_f32m1(res,
v_extract_n(v,1), m1, VTraits<v_float32>::vlanes());
2170 res = vfmacc_vf_f32m1(res,
v_extract_n(v,2), m2, VTraits<v_float32>::vlanes());
2171 return vfadd(res, a, VTraits<v_float32>::vlanes());
2176CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
CV_EXPORTS_W void absdiff(InputArray src1, InputArray src2, OutputArray dst)
Calculates the per-element absolute difference between two arrays or between an array and a scalar.
CV_EXPORTS_W void add(InputArray src1, InputArray src2, OutputArray dst, InputArray mask=noArray(), int dtype=-1)
Calculates the per-element sum of two arrays or an array and a scalar.
int int idx1
Definition core_c.h:654
const int * idx
Definition core_c.h:668
const CvArr CvArr * x
Definition core_c.h:1195
int idx0
Definition core_c.h:652
signed char schar
Definition interface.h:48
uint32_t uint
Definition interface.h:42
unsigned char uchar
Definition interface.h:51
int64_t int64
Definition interface.h:61
unsigned short ushort
Definition interface.h:52
uint64_t uint64
Definition interface.h:62
bool v_check_any(const v_reg< _Tp, n > &a)
Check if any of packed values is less than zero.
Definition intrin_cpp.hpp:1433
v_reg< float, n > v_matmul(const v_reg< float, n > &v, const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
Matrix multiplication.
Definition intrin_cpp.hpp:3193
v_reg< int, n > v_round(const v_reg< float, n > &a)
Round elements.
Definition intrin_cpp.hpp:2424
int v_signmask(const v_reg< _Tp, n > &a)
Get negative values mask.
Definition intrin_cpp.hpp:1392
void v_zip(const v_reg< _Tp, n > &a0, const v_reg< _Tp, n > &a1, v_reg< _Tp, n > &b0, v_reg< _Tp, n > &b1)
Interleave two vectors.
Definition intrin_cpp.hpp:1554
void v_store(_Tp *ptr, const v_reg< _Tp, n > &a)
Store data to memory.
Definition intrin_cpp.hpp:2190
v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > v_dotprod_expand(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Dot product of elements and expand.
Definition intrin_cpp.hpp:1142
v_reg< int, n > v_ceil(const v_reg< float, n > &a)
Ceil elements.
Definition intrin_cpp.hpp:2462
v_reg< int, n > v_floor(const v_reg< float, n > &a)
Floor elements.
Definition intrin_cpp.hpp:2449
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_dotprod(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Dot product of elements.
Definition intrin_cpp.hpp:1077
int v_scan_forward(const v_reg< _Tp, n > &a)
Get first negative lane index.
Definition intrin_cpp.hpp:1409
v_reg< typename V_TypeTraits< _Tp >::w_type, simd128_width/sizeof(typename V_TypeTraits< _Tp >::w_type)> v_load_expand(const _Tp *ptr)
Load register contents from memory with double expand.
Definition intrin_cpp.hpp:1872
v_reg< _Tp, n > v_muladd(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c)
A synonym for v_fma.
Definition intrin_cpp.hpp:1057
v_reg< _Tp, n > v_sqr_magnitude(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Square of the magnitude.
Definition intrin_cpp.hpp:1033
v_reg< int, n > v_trunc(const v_reg< float, n > &a)
Truncate elements.
Definition intrin_cpp.hpp:2475
v_reg< _Tp, n > v_invsqrt(const v_reg< _Tp, n > &a)
Inversed square root.
Definition intrin_cpp.hpp:1007
v_reg< _Tp, n > v_magnitude(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Magnitude.
Definition intrin_cpp.hpp:1020
v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > v_dotprod_expand_fast(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Fast Dot product of elements and expand.
Definition intrin_cpp.hpp:1185
CV_INLINE v_reg< double,(n/2)> v_cvt_f64_high(const v_reg< int, n > &a)
Convert to double high part of vector.
Definition intrin_cpp.hpp:2584
v_reg< float, n > v_reduce_sum4(const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
Sums all elements of each input vector, returns the vector of sums.
Definition intrin_cpp.hpp:1353
void v_pack_store(hfloat *ptr, const v_reg< float, n > &v)
Definition intrin_cpp.hpp:3289
v_reg< _Tp, n > v_select(const v_reg< _Tp, n > &mask, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Per-element select (blend operation)
Definition intrin_cpp.hpp:1451
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_load(const _Tp *ptr)
Load register contents from memory.
Definition intrin_cpp.hpp:1584
CV_INLINE v_reg< double, n/2 > v_cvt_f64(const v_reg< int, n > &a)
Convert lower half to double.
Definition intrin_cpp.hpp:2573
v_reg< typename V_TypeTraits< _Tp >::q_type, simd128_width/sizeof(typename V_TypeTraits< _Tp >::q_type)> v_load_expand_q(const _Tp *ptr)
Load register contents from memory with quad expand.
Definition intrin_cpp.hpp:1961
v_reg< uchar, 2 *n > v_pack_b(const v_reg< ushort, n > &a, const v_reg< ushort, n > &b)
! For 16-bit boolean values
Definition intrin_cpp.hpp:3111
void v_cleanup()
Definition intrin_cpp.hpp:3297
v_reg< _Tp, n > v_fma(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c)
Multiply and add.
Definition intrin_cpp.hpp:1046
void v_transpose4x4(v_reg< _Tp, n > &a0, const v_reg< _Tp, n > &a1, const v_reg< _Tp, n > &a2, const v_reg< _Tp, n > &a3, v_reg< _Tp, n > &b0, v_reg< _Tp, n > &b1, v_reg< _Tp, n > &b2, v_reg< _Tp, n > &b3)
Transpose 4x4 matrix.
Definition intrin_cpp.hpp:2761
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_dotprod_fast(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Fast Dot product of elements.
Definition intrin_cpp.hpp:1116
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_lut(const _Tp *tab, const int *idx)
Definition intrin_cpp.hpp:2626
v_reg< _Tp, n > v_mul_hi(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Multiply and extract high part.
Definition intrin_cpp.hpp:1233
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_lut_quads(const _Tp *tab, const int *idx)
Definition intrin_cpp.hpp:2640
v_reg< float, n > v_cvt_f32(const v_reg< int, n > &a)
Convert to float.
Definition intrin_cpp.hpp:2534
bool v_check_all(const v_reg< _Tp, n > &a)
Check if all packed values are less than zero.
Definition intrin_cpp.hpp:1421
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_lut_pairs(const _Tp *tab, const int *idx)
Definition intrin_cpp.hpp:2633
v_reg< float, n > v_matmuladd(const v_reg< float, n > &v, const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
Matrix multiplication and add.
Definition intrin_cpp.hpp:3223
_Tp v_extract_n(const v_reg< _Tp, n > &v)
Vector extract.
Definition intrin_cpp.hpp:2397
v_reg< float, n > v_not_nan(const v_reg< float, n > &a)
Less-than comparison.
Definition intrin_cpp.hpp:890
v_reg< typename V_TypeTraits< _Tp >::abs_type, n > v_popcount(const v_reg< _Tp, n > &a)
Count the 1 bits in the vector lanes and return result as corresponding unsigned type.
Definition intrin_cpp.hpp:828
CvArr CvArr * temp
Definition imgproc_c.h:329
CV_EXPORTS OutputArray int double double InputArray mask
Definition imgproc.hpp:2132
"black box" representation of the file storage associated with a file on disk.
Definition calib3d.hpp:441