5 #ifndef CV__SIMD_FORWARD
6 #error "Need to pre-define forward width"
14 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
17 #if CV__SIMD_FORWARD == 1024
19 #error "1024-long ops not implemented yet"
20 #elif CV__SIMD_FORWARD == 512
22 #define __CV_VX(fun) v512_##fun
23 #define __CV_V_UINT8 v_uint8x64
24 #define __CV_V_INT8 v_int8x64
25 #define __CV_V_UINT16 v_uint16x32
26 #define __CV_V_INT16 v_int16x32
27 #define __CV_V_UINT32 v_uint32x16
28 #define __CV_V_INT32 v_int32x16
29 #define __CV_V_UINT64 v_uint64x8
30 #define __CV_V_INT64 v_int64x8
31 #define __CV_V_FLOAT32 v_float32x16
32 #define __CV_V_FLOAT64 v_float64x8
43 #elif CV__SIMD_FORWARD == 256
45 #define __CV_VX(fun) v256_##fun
46 #define __CV_V_UINT8 v_uint8x32
47 #define __CV_V_INT8 v_int8x32
48 #define __CV_V_UINT16 v_uint16x16
49 #define __CV_V_INT16 v_int16x16
50 #define __CV_V_UINT32 v_uint32x8
51 #define __CV_V_INT32 v_int32x8
52 #define __CV_V_UINT64 v_uint64x4
53 #define __CV_V_INT64 v_int64x4
54 #define __CV_V_FLOAT32 v_float32x8
55 #define __CV_V_FLOAT64 v_float64x4
68 #define __CV_VX(fun) v_##fun
69 #define __CV_V_UINT8 v_uint8x16
70 #define __CV_V_INT8 v_int8x16
71 #define __CV_V_UINT16 v_uint16x8
72 #define __CV_V_INT16 v_int16x8
73 #define __CV_V_UINT32 v_uint32x4
74 #define __CV_V_INT32 v_int32x4
75 #define __CV_V_UINT64 v_uint64x2
76 #define __CV_V_INT64 v_int64x2
77 #define __CV_V_FLOAT32 v_float32x4
78 #define __CV_V_FLOAT64 v_float64x2
94 void v_expand(
const __CV_V_UINT8&, __CV_V_UINT16&, __CV_V_UINT16&);
95 void v_expand(
const __CV_V_INT8&, __CV_V_INT16&, __CV_V_INT16&);
96 void v_expand(
const __CV_V_UINT16&, __CV_V_UINT32&, __CV_V_UINT32&);
97 void v_expand(
const __CV_V_INT16&, __CV_V_INT32&, __CV_V_INT32&);
98 void v_expand(
const __CV_V_UINT32&, __CV_V_UINT64&, __CV_V_UINT64&);
99 void v_expand(
const __CV_V_INT32&, __CV_V_INT64&, __CV_V_INT64&);
115 __CV_V_UINT16 __CV_VX(load_expand)(
const uchar*);
116 __CV_V_INT16 __CV_VX(load_expand)(
const schar*);
117 __CV_V_UINT32 __CV_VX(load_expand)(
const ushort*);
118 __CV_V_INT32 __CV_VX(load_expand)(
const short*);
119 __CV_V_UINT64 __CV_VX(load_expand)(
const uint*);
120 __CV_V_INT64 __CV_VX(load_expand)(
const int*);
122 __CV_V_UINT32 __CV_VX(load_expand_q)(
const uchar*);
123 __CV_V_INT32 __CV_VX(load_expand_q)(
const schar*);
126 __CV_V_UINT8 v_pack(
const __CV_V_UINT16&,
const __CV_V_UINT16&);
127 __CV_V_INT8 v_pack(
const __CV_V_INT16&,
const __CV_V_INT16&);
128 __CV_V_UINT16 v_pack(
const __CV_V_UINT32&,
const __CV_V_UINT32&);
129 __CV_V_INT16 v_pack(
const __CV_V_INT32&,
const __CV_V_INT32&);
131 __CV_V_UINT32 v_pack(
const __CV_V_UINT64&,
const __CV_V_UINT64&);
132 __CV_V_INT32 v_pack(
const __CV_V_INT64&,
const __CV_V_INT64&);
134 __CV_V_UINT8 v_pack_u(
const __CV_V_INT16&,
const __CV_V_INT16&);
135 __CV_V_UINT16 v_pack_u(
const __CV_V_INT32&,
const __CV_V_INT32&);
141 template<
typename Tvec>
142 Tvec v_mul_wrap(
const Tvec& a,
const Tvec& b);
144 __CV_V_UINT8 v_mul_wrap(
const __CV_V_UINT8&,
const __CV_V_UINT8&);
145 __CV_V_INT8 v_mul_wrap(
const __CV_V_INT8&,
const __CV_V_INT8&);
146 __CV_V_UINT16 v_mul_wrap(
const __CV_V_UINT16&,
const __CV_V_UINT16&);
147 __CV_V_INT16 v_mul_wrap(
const __CV_V_INT16&,
const __CV_V_INT16&);
152 template<
typename Tvec,
typename Twvec>
153 void v_mul_expand(
const Tvec& a,
const Tvec& b, Twvec& c, Twvec& d);
155 void v_mul_expand(
const __CV_V_UINT8&,
const __CV_V_UINT8&, __CV_V_UINT16&, __CV_V_UINT16&);
156 void v_mul_expand(
const __CV_V_INT8&,
const __CV_V_INT8&, __CV_V_INT16&, __CV_V_INT16&);
157 void v_mul_expand(
const __CV_V_UINT16&,
const __CV_V_UINT16&, __CV_V_UINT32&, __CV_V_UINT32&);
158 void v_mul_expand(
const __CV_V_INT16&,
const __CV_V_INT16&, __CV_V_INT32&, __CV_V_INT32&);
159 void v_mul_expand(
const __CV_V_UINT32&,
const __CV_V_UINT32&, __CV_V_UINT64&, __CV_V_UINT64&);
160 void v_mul_expand(
const __CV_V_INT32&,
const __CV_V_INT32&, __CV_V_INT64&, __CV_V_INT64&);
164 __CV_V_FLOAT32
v_cvt_f32(
const __CV_V_INT32& a);
165 __CV_V_FLOAT32
v_cvt_f32(
const __CV_V_FLOAT64& a);
166 __CV_V_FLOAT32
v_cvt_f32(
const __CV_V_FLOAT64& a,
const __CV_V_FLOAT64& b);
167 __CV_V_FLOAT64
v_cvt_f64(
const __CV_V_INT32& a);
169 __CV_V_FLOAT64
v_cvt_f64(
const __CV_V_FLOAT32& a);
171 __CV_V_FLOAT64
v_cvt_f64(
const __CV_V_INT64& a);
174 #undef CV__SIMD_FORWARD
184 #undef __CV_V_FLOAT32
185 #undef __CV_V_FLOAT64
187 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
signed char schar
Definition: interface.h:48
uint32_t uint
Definition: interface.h:42
unsigned char uchar
Definition: interface.h:51
unsigned short ushort
Definition: interface.h:52
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_expand_high(const v_reg< _Tp, n > &a)
Expand higher values to the wider pack type.
Definition: intrin_cpp.hpp:1515
v_reg< schar, 16 > v_int8x16
Sixteen 8-bit signed integer values.
Definition: intrin_cpp.hpp:491
v_reg< float, n > v_cvt_f32(const v_reg< int, n > &a)
Convert to float.
Definition: intrin_cpp.hpp:2534
v_reg< uchar, 16 > v_uint8x16
Sixteen 8-bit unsigned integer values.
Definition: intrin_cpp.hpp:489
v_reg< int64, 2 > v_int64x2
Two 64-bit signed integer values.
Definition: intrin_cpp.hpp:507
v_reg< ushort, 8 > v_uint16x8
Eight 16-bit unsigned integer values.
Definition: intrin_cpp.hpp:493
CV_INLINE v_reg< double, n/2 > v_cvt_f64(const v_reg< int, n > &a)
Convert lower half to double.
Definition: intrin_cpp.hpp:2573
v_reg< int, 4 > v_int32x4
Four 32-bit signed integer values.
Definition: intrin_cpp.hpp:499
v_reg< unsigned, 4 > v_uint32x4
Four 32-bit unsigned integer values.
Definition: intrin_cpp.hpp:497
void v_mul_expand(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &c, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &d)
Multiply and expand.
Definition: intrin_cpp.hpp:1216
void v_expand(const v_reg< _Tp, n > &a, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &b0, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &b1)
Expand values to the wider pack type.
Definition: intrin_cpp.hpp:1474
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_expand_low(const v_reg< _Tp, n > &a)
Expand lower values to the wider pack type.
Definition: intrin_cpp.hpp:1496
v_reg< uint64, 2 > v_uint64x2
Two 64-bit unsigned integer values.
Definition: intrin_cpp.hpp:505
v_reg< float, 4 > v_float32x4
Four 32-bit floating point values (single precision)
Definition: intrin_cpp.hpp:501
CV_INLINE v_reg< double,(n/2)> v_cvt_f64_high(const v_reg< int, n > &a)
Convert to double high part of vector.
Definition: intrin_cpp.hpp:2584
v_reg< short, 8 > v_int16x8
Eight 16-bit signed integer values.
Definition: intrin_cpp.hpp:495
v_reg< double, 2 > v_float64x2
Two 64-bit floating point values (double precision)
Definition: intrin_cpp.hpp:503
"black box" representation of the file storage associated with a file on disk.
Definition: calib3d.hpp:441