45 #ifndef OPENCV_HAL_INTRIN_CPP_HPP
46 #define OPENCV_HAL_INTRIN_CPP_HPP
51 #include "opencv2/core/utility.hpp"
52 #include "opencv2/core/saturate.hpp"
55 #define CV_SIMD128_CPP 1
56 #if defined(CV_FORCE_SIMD128_CPP)
58 #define CV_SIMD128_64F 1
60 #if defined(CV_DOXYGEN)
62 #define CV_SIMD128_64F 1
64 #define CV_SIMD256_64F 1
66 #define CV_SIMD512_64F 1
77 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
369 template<
typename _Tp,
int n>
struct v_reg
372 typedef _Tp lane_type;
380 explicit v_reg(
const _Tp* ptr) {
for(
int i = 0; i < n; i++ ) s[i] = ptr[i]; }
385 v_reg(_Tp s0, _Tp s1) { s[0] = s0; s[1] = s1; }
390 v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3) { s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; }
395 v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3,
396 _Tp s4, _Tp s5, _Tp s6, _Tp s7)
398 s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3;
399 s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7;
405 v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3,
406 _Tp s4, _Tp s5, _Tp s6, _Tp s7,
407 _Tp s8, _Tp s9, _Tp s10, _Tp s11,
408 _Tp s12, _Tp s13, _Tp s14, _Tp s15)
410 s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3;
411 s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7;
412 s[8] = s8; s[9] = s9; s[10] = s10; s[11] = s11;
413 s[12] = s12; s[13] = s13; s[14] = s14; s[15] = s15;
424 for(
int i = 0; i < n; i++ )
437 _Tp
get0()
const {
return s[0]; }
440 _Tp get(
const int i)
const {
return s[i]; }
441 v_reg<_Tp, n> high()
const
445 for( i = 0; i < n/2; i++ )
453 static v_reg<_Tp, n> zero()
456 for(
int i = 0; i < n; i++ )
461 static v_reg<_Tp, n> all(_Tp s)
464 for(
int i = 0; i < n; i++ )
469 template<
typename _Tp2,
int n2> v_reg<_Tp2, n2> reinterpret_as()
const
471 size_t bytes =
std::min(
sizeof(_Tp2)*n2,
sizeof(_Tp)*n);
477 v_reg& operator=(
const v_reg<_Tp, n> &
r)
479 for(
int i = 0; i < n; i++ )
621 #define CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(macro_name, ...) \
622 __CV_EXPAND(macro_name(uchar, __VA_ARGS__)) \
623 __CV_EXPAND(macro_name(schar, __VA_ARGS__)) \
624 __CV_EXPAND(macro_name(ushort, __VA_ARGS__)) \
625 __CV_EXPAND(macro_name(short, __VA_ARGS__)) \
626 __CV_EXPAND(macro_name(unsigned, __VA_ARGS__)) \
627 __CV_EXPAND(macro_name(int, __VA_ARGS__)) \
628 __CV_EXPAND(macro_name(uint64, __VA_ARGS__)) \
629 __CV_EXPAND(macro_name(int64, __VA_ARGS__)) \
631 #define CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(macro_name, ...) \
632 __CV_EXPAND(macro_name(float, __VA_ARGS__)) \
633 __CV_EXPAND(macro_name(double, __VA_ARGS__)) \
635 #define CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES(macro_name, ...) \
636 CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(macro_name, __VA_ARGS__) \
637 CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(macro_name, __VA_ARGS__) \
639 #define CV__HAL_INTRIN_IMPL_BIN_OP_(_Tp, bin_op) \
640 template<int n> inline \
641 v_reg<_Tp, n> operator bin_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
644 for( int i = 0; i < n; i++ ) \
645 c.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \
648 template<int n> inline \
649 v_reg<_Tp, n>& operator bin_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
651 for( int i = 0; i < n; i++ ) \
652 a.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \
656 #define CV__HAL_INTRIN_IMPL_BIN_OP(bin_op) CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES(CV__HAL_INTRIN_IMPL_BIN_OP_, bin_op)
663 #define CV__HAL_INTRIN_IMPL_BIT_OP_(_Tp, bit_op) \
664 template<int n> CV_INLINE \
665 v_reg<_Tp, n> operator bit_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
668 typedef typename V_TypeTraits<_Tp>::int_type itype; \
669 for( int i = 0; i < n; i++ ) \
670 c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \
671 V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \
674 template<int n> CV_INLINE \
675 v_reg<_Tp, n>& operator bit_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
677 typedef typename V_TypeTraits<_Tp>::int_type itype; \
678 for( int i = 0; i < n; i++ ) \
679 a.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \
680 V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \
684 #define CV__HAL_INTRIN_IMPL_BIT_OP(bit_op) \
685 CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(CV__HAL_INTRIN_IMPL_BIT_OP_, bit_op) \
686 CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(CV__HAL_INTRIN_IMPL_BIT_OP_, bit_op)
693 #define CV__HAL_INTRIN_IMPL_BITWISE_NOT_(_Tp, dummy) \
694 template<int n> CV_INLINE \
695 v_reg<_Tp, n> operator ~ (const v_reg<_Tp, n>& a) \
698 for( int i = 0; i < n; i++ ) \
699 c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int(~V_TypeTraits<_Tp>::reinterpret_int(a.s[i])); \
710 #define OPENCV_HAL_IMPL_MATH_FUNC(func, cfunc, _Tp2) \
711 template<typename _Tp, int n> inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a) \
714 for( int i = 0; i < n; i++ ) \
715 c.s[i] = cfunc(a.s[i]); \
739 #define OPENCV_HAL_IMPL_MINMAX_FUNC(func, cfunc) \
740 template<typename _Tp, int n> inline v_reg<_Tp, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
743 for( int i = 0; i < n; i++ ) \
744 c.s[i] = cfunc(a.s[i], b.s[i]); \
750 #define OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(func, cfunc) \
751 template<typename _Tp, int n> inline _Tp func(const v_reg<_Tp, n>& a) \
754 for( int i = 1; i < n; i++ ) \
755 c = cfunc(c, a.s[i]); \
769 OPENCV_HAL_IMPL_MINMAX_FUNC(v_min,
std::min)
781 OPENCV_HAL_IMPL_MINMAX_FUNC(v_max,
std::max)
790 OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_min,
std::min)
799 OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_max,
std::max)
801 static const unsigned char popCountTable[] =
803 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
804 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
805 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
806 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
807 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
808 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
809 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
810 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
811 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
812 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
813 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
814 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
815 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
816 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
817 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
818 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
827 template<
typename _Tp,
int n>
831 for (
int i = 0; i < n*(int)
sizeof(_Tp); i++)
832 b.s[i/
sizeof(_Tp)] += popCountTable[v_reinterpret_as_u8(a).s[i]];
838 template<
typename _Tp,
int n>
839 inline void v_minmax(
const v_reg<_Tp, n>& a,
const v_reg<_Tp, n>& b,
840 v_reg<_Tp, n>& minval, v_reg<_Tp, n>& maxval )
842 for(
int i = 0; i < n; i++ )
844 minval.s[i] =
std::min(a.s[i], b.s[i]);
845 maxval.s[i] =
std::max(a.s[i], b.s[i]);
852 #define OPENCV_HAL_IMPL_CMP_OP(cmp_op) \
853 template<typename _Tp, int n> \
854 inline v_reg<_Tp, n> operator cmp_op(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
856 typedef typename V_TypeTraits<_Tp>::int_type itype; \
858 for( int i = 0; i < n; i++ ) \
859 c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)-(int)(a.s[i] cmp_op b.s[i])); \
894 for (
int i = 0; i < n; i++)
903 for (
int i = 0; i < n; i++)
910 #define OPENCV_HAL_IMPL_ARITHM_OP(func, bin_op, cast_op, _Tp2) \
911 template<typename _Tp, int n> \
912 inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
914 typedef _Tp2 rtype; \
916 for( int i = 0; i < n; i++ ) \
917 c.s[i] = cast_op(a.s[i] bin_op b.s[i]); \
936 template<
typename T>
inline T _absdiff(
T a,
T b)
939 return a > b ? a - b : b - a;
952 template<
typename _Tp,
int n>
958 for(
int i = 0; i < n; i++ )
960 rtype ua = a.s[i] ^
mask;
961 rtype ub = b.s[i] ^
mask;
962 c.s[i] = _absdiff(ua, ub);
973 for(
int i = 0; i < c.nlanes; i++ )
974 c.s[i] = _absdiff(a.s[i], b.s[i]);
984 for(
int i = 0; i < c.nlanes; i++ )
985 c.s[i] = _absdiff(a.s[i], b.s[i]);
993 template<
typename _Tp,
int n>
997 for(
int i = 0; i < n; i++)
998 c.s[i] = saturate_cast<_Tp>(
std::abs(a.s[i] - b.s[i]));
1006 template<
typename _Tp,
int n>
1010 for(
int i = 0; i < n; i++ )
1019 template<
typename _Tp,
int n>
1023 for(
int i = 0; i < n; i++ )
1024 c.s[i] =
std::sqrt(a.s[i]*a.s[i] + b.s[i]*b.s[i]);
1032 template<
typename _Tp,
int n>
1036 for(
int i = 0; i < n; i++ )
1037 c.s[i] = a.s[i]*a.s[i] + b.s[i]*b.s[i];
1045 template<
typename _Tp,
int n>
1050 for(
int i = 0; i < n; i++ )
1051 d.s[i] = a.s[i]*b.s[i] + c.s[i];
1056 template<
typename _Tp,
int n>
1060 return v_fma(a, b, c);
1076 template<
typename _Tp,
int n>
inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
1080 v_reg<w_type, n/2> c;
1081 for(
int i = 0; i < (n/2); i++ )
1082 c.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1];
1097 template<
typename _Tp,
int n>
inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
1102 v_reg<w_type, n/2> s;
1103 for(
int i = 0; i < (n/2); i++ )
1104 s.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1] + c.s[i];
1115 template<
typename _Tp,
int n>
inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
1123 template<
typename _Tp,
int n>
inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
1141 template<
typename _Tp,
int n>
inline v_reg<typename V_TypeTraits<_Tp>::q_type, n/4>
1145 v_reg<q_type, n/4> s;
1146 for(
int i = 0; i < (n/4); i++ )
1147 s.s[i] = (q_type)a.s[i*4 ]*b.s[i*4 ] + (q_type)a.s[i*4 + 1]*b.s[i*4 + 1] +
1148 (q_type)a.s[i*4 + 2]*b.s[i*4 + 2] + (q_type)a.s[i*4 + 3]*b.s[i*4 + 3];
1163 template<
typename _Tp,
int n>
inline v_reg<typename V_TypeTraits<_Tp>::q_type, n/4>
1168 v_reg<q_type, n/4> s;
1169 for(
int i = 0; i < (n/4); i++ )
1170 s.s[i] = (q_type)a.s[i*4 ]*b.s[i*4 ] + (q_type)a.s[i*4 + 1]*b.s[i*4 + 1] +
1171 (q_type)a.s[i*4 + 2]*b.s[i*4 + 2] + (q_type)a.s[i*4 + 3]*b.s[i*4 + 3] + c.s[i];
1184 template<
typename _Tp,
int n>
inline v_reg<typename V_TypeTraits<_Tp>::q_type, n/4>
1192 template<
typename _Tp,
int n>
inline v_reg<typename V_TypeTraits<_Tp>::q_type, n/4>
1221 for(
int i = 0; i < (n/2); i++ )
1223 c.s[i] = (w_type)a.s[i]*b.s[i];
1224 d.s[i] = (w_type)a.s[i+(n/2)]*b.s[i+(n/2)];
1237 for (
int i = 0; i < n; i++)
1238 c.s[i] = (_Tp)(((w_type)a.s[i] * b.s[i]) >>
sizeof(_Tp)*8);
1243 template<
typename _Tp,
int n>
inline void v_hsum(
const v_reg<_Tp, n>& a,
1244 v_reg<
typename V_TypeTraits<_Tp>::w_type, n/2>& c)
1246 typedef typename V_TypeTraits<_Tp>::w_type w_type;
1247 for(
int i = 0; i < (n/2); i++ )
1249 c.s[i] = (w_type)a.s[i*2] + a.s[i*2+1];
1256 #define OPENCV_HAL_IMPL_SHIFT_OP(shift_op) \
1257 template<typename _Tp, int n> inline v_reg<_Tp, n> operator shift_op(const v_reg<_Tp, n>& a, int imm) \
1260 for( int i = 0; i < n; i++ ) \
1261 c.s[i] = (_Tp)(a.s[i] shift_op imm); \
1277 #define OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(suffix,opA,opB) \
1278 template<int imm, typename _Tp, int n> inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a) \
1281 for (int i = 0; i < n; i++) \
1283 int sIndex = i opA imm; \
1284 if (0 <= sIndex && sIndex < n) \
1286 b.s[i] = a.s[sIndex]; \
1295 template<int imm, typename _Tp, int n> inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
1298 for (int i = 0; i < n; i++) \
1300 int aIndex = i opA imm; \
1301 int bIndex = i opA imm opB n; \
1302 if (0 <= bIndex && bIndex < n) \
1304 c.s[i] = b.s[bIndex]; \
1306 else if (0 <= aIndex && aIndex < n) \
1308 c.s[i] = a.s[aIndex]; \
1338 for(
int i = 1; i < n; i++ )
1357 for(
int i = 0; i < (n/4); i++)
1359 r.s[i*4 + 0] = a.s[i*4 + 0] + a.s[i*4 + 1] + a.s[i*4 + 2] + a.s[i*4 + 3];
1360 r.s[i*4 + 1] = b.s[i*4 + 0] + b.s[i*4 + 1] + b.s[i*4 + 2] + b.s[i*4 + 3];
1361 r.s[i*4 + 2] = c.s[i*4 + 0] + c.s[i*4 + 1] + c.s[i*4 + 2] + c.s[i*4 + 3];
1362 r.s[i*4 + 3] = d.s[i*4 + 0] + d.s[i*4 + 1] + d.s[i*4 + 2] + d.s[i*4 + 3];
1377 for (
int i = 1; i < n; i++)
1378 c += _absdiff(a.s[i], b.s[i]);
1395 for(
int i = 0; i < n; i++ )
1411 for (
int i = 0; i < n; i++)
1423 for(
int i = 0; i < n; i++ )
1435 for(
int i = 0; i < n; i++ )
1455 typedef typename Traits::int_type int_type;
1457 for(
int i = 0; i < n; i++ )
1459 int_type m = Traits::reinterpret_int(
mask.s[i]);
1461 c.s[i] = m ? a.s[i] : b.s[i];
1478 for(
int i = 0; i < (n/2); i++ )
1481 b1.s[i] = a.s[i+(n/2)];
1494 template<
typename _Tp,
int n>
1495 inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
1499 for(
int i = 0; i < (n/2); i++ )
1513 template<
typename _Tp,
int n>
1514 inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
1518 for(
int i = 0; i < (n/2); i++ )
1519 b.s[i] = a.s[i+(n/2)];
1524 template<
typename _Tp,
int n>
inline v_reg<typename V_TypeTraits<_Tp>::int_type, n>
1525 v_reinterpret_as_int(
const v_reg<_Tp, n>& a)
1527 v_reg<typename V_TypeTraits<_Tp>::int_type, n> c;
1528 for(
int i = 0; i < n; i++ )
1529 c.s[i] = V_TypeTraits<_Tp>::reinterpret_int(a.s[i]);
1533 template<
typename _Tp,
int n>
inline v_reg<typename V_TypeTraits<_Tp>::uint_type, n>
1534 v_reinterpret_as_uint(
const v_reg<_Tp, n>& a)
1536 v_reg<typename V_TypeTraits<_Tp>::uint_type, n> c;
1537 for(
int i = 0; i < n; i++ )
1538 c.s[i] = V_TypeTraits<_Tp>::reinterpret_uint(a.s[i]);
1558 for( i = 0; i < n/2; i++ )
1560 b0.s[i*2] = a0.s[i];
1561 b0.s[i*2+1] = a1.s[i];
1565 b1.s[i*2-n] = a0.s[i];
1566 b1.s[i*2-n+1] = a1.s[i];
1583 template<
typename _Tp>
1586 #if CV_STRONG_ALIGNMENT
1607 template<
typename _Tp>
1608 inline v_reg<_Tp, simd256_width /
sizeof(_Tp)> v256_load(
const _Tp* ptr)
1610 #if CV_STRONG_ALIGNMENT
1613 return v_reg<_Tp, simd256_width /
sizeof(_Tp)>(ptr);
1632 template<
typename _Tp>
1633 inline v_reg<_Tp, simd512_width /
sizeof(_Tp)> v512_load(
const _Tp* ptr)
1635 #if CV_STRONG_ALIGNMENT
1638 return v_reg<_Tp, simd512_width /
sizeof(_Tp)>(ptr);
1648 template<
typename _Tp>
1663 template<
typename _Tp>
1664 inline v_reg<_Tp, simd256_width /
sizeof(_Tp)> v256_load_aligned(
const _Tp* ptr)
1667 return v_reg<_Tp, simd256_width /
sizeof(_Tp)>(ptr);
1679 template<
typename _Tp>
1680 inline v_reg<_Tp, simd512_width /
sizeof(_Tp)> v512_load_aligned(
const _Tp* ptr)
1683 return v_reg<_Tp, simd512_width /
sizeof(_Tp)>(ptr);
1698 template<
typename _Tp>
1701 #if CV_STRONG_ALIGNMENT
1705 for(
int i = 0; i < c.nlanes/2; i++ )
1725 template<
typename _Tp>
1726 inline v_reg<_Tp, simd256_width /
sizeof(_Tp)> v256_load_low(
const _Tp* ptr)
1728 #if CV_STRONG_ALIGNMENT
1731 v_reg<_Tp, simd256_width /
sizeof(_Tp)> c;
1732 for (
int i = 0; i < c.nlanes / 2; i++)
1753 template<
typename _Tp>
1754 inline v_reg<_Tp, simd512_width /
sizeof(_Tp)> v512_load_low(
const _Tp* ptr)
1756 #if CV_STRONG_ALIGNMENT
1759 v_reg<_Tp, simd512_width /
sizeof(_Tp)> c;
1760 for (
int i = 0; i < c.nlanes / 2; i++)
1780 template<
typename _Tp>
1783 #if CV_STRONG_ALIGNMENT
1788 for(
int i = 0; i < c.nlanes/2; i++ )
1791 c.s[i+c.nlanes/2] = hiptr[i];
1810 template<
typename _Tp>
1811 inline v_reg<_Tp, simd256_width /
sizeof(_Tp)> v256_load_halves(
const _Tp* loptr,
const _Tp* hiptr)
1813 #if CV_STRONG_ALIGNMENT
1817 v_reg<_Tp, simd256_width /
sizeof(_Tp)> c;
1818 for (
int i = 0; i < c.nlanes / 2; i++)
1821 c.s[i + c.nlanes / 2] = hiptr[i];
1841 template<
typename _Tp>
1842 inline v_reg<_Tp, simd512_width /
sizeof(_Tp)> v512_load_halves(
const _Tp* loptr,
const _Tp* hiptr)
1844 #if CV_STRONG_ALIGNMENT
1848 v_reg<_Tp, simd512_width /
sizeof(_Tp)> c;
1849 for (
int i = 0; i < c.nlanes / 2; i++)
1852 c.s[i + c.nlanes / 2] = hiptr[i];
1870 template<
typename _Tp>
1871 inline v_reg<typename V_TypeTraits<_Tp>::w_type,
simd128_width /
sizeof(
typename V_TypeTraits<_Tp>::w_type)>
1874 #if CV_STRONG_ALIGNMENT
1879 for(
int i = 0; i < c.nlanes; i++ )
1900 template<
typename _Tp>
1901 inline v_reg<typename V_TypeTraits<_Tp>::w_type, simd256_width /
sizeof(
typename V_TypeTraits<_Tp>::w_type)>
1902 v256_load_expand(
const _Tp* ptr)
1904 #if CV_STRONG_ALIGNMENT
1907 typedef typename V_TypeTraits<_Tp>::w_type w_type;
1908 v_reg<w_type, simd256_width /
sizeof(w_type)> c;
1909 for (
int i = 0; i < c.nlanes; i++)
1931 template<
typename _Tp>
1932 inline v_reg<typename V_TypeTraits<_Tp>::w_type, simd512_width /
sizeof(
typename V_TypeTraits<_Tp>::w_type)>
1933 v512_load_expand(
const _Tp* ptr)
1935 #if CV_STRONG_ALIGNMENT
1938 typedef typename V_TypeTraits<_Tp>::w_type w_type;
1939 v_reg<w_type, simd512_width /
sizeof(w_type)> c;
1940 for (
int i = 0; i < c.nlanes; i++)
1959 template<
typename _Tp>
1960 inline v_reg<typename V_TypeTraits<_Tp>::q_type,
simd128_width /
sizeof(
typename V_TypeTraits<_Tp>::q_type)>
1963 #if CV_STRONG_ALIGNMENT
1968 for(
int i = 0; i < c.nlanes; i++ )
1988 template<
typename _Tp>
1989 inline v_reg<typename V_TypeTraits<_Tp>::q_type, simd256_width /
sizeof(
typename V_TypeTraits<_Tp>::q_type)>
1990 v256_load_expand_q(
const _Tp* ptr)
1992 #if CV_STRONG_ALIGNMENT
1995 typedef typename V_TypeTraits<_Tp>::q_type q_type;
1996 v_reg<q_type, simd256_width /
sizeof(q_type)> c;
1997 for (
int i = 0; i < c.nlanes; i++)
2018 template<
typename _Tp>
2019 inline v_reg<typename V_TypeTraits<_Tp>::q_type, simd512_width /
sizeof(
typename V_TypeTraits<_Tp>::q_type)>
2020 v512_load_expand_q(
const _Tp* ptr)
2022 #if CV_STRONG_ALIGNMENT
2025 typedef typename V_TypeTraits<_Tp>::q_type q_type;
2026 v_reg<q_type, simd512_width /
sizeof(q_type)> c;
2027 for (
int i = 0; i < c.nlanes; i++)
2046 #if CV_STRONG_ALIGNMENT
2050 for( i = i2 = 0; i < n; i++, i2 += 2 )
2068 #if CV_STRONG_ALIGNMENT
2072 for( i = i3 = 0; i < n; i++, i3 += 3 )
2088 template<
typename _Tp,
int n>
2093 #if CV_STRONG_ALIGNMENT
2097 for( i = i4 = 0; i < n; i++, i4 += 4 )
2114 template<
typename _Tp,
int n>
2119 #if CV_STRONG_ALIGNMENT
2123 for( i = i2 = 0; i < n; i++, i2 += 2 )
2138 template<
typename _Tp,
int n>
2143 #if CV_STRONG_ALIGNMENT
2147 for( i = i3 = 0; i < n; i++, i3 += 3 )
2168 #if CV_STRONG_ALIGNMENT
2172 for( i = i4 = 0; i < n; i++, i4 += 4 )
2189 template<
typename _Tp,
int n>
2192 #if CV_STRONG_ALIGNMENT
2195 for(
int i = 0; i < n; i++ )
2199 template<
typename _Tp,
int n>
2202 #if CV_STRONG_ALIGNMENT
2215 template<
typename _Tp,
int n>
2218 #if CV_STRONG_ALIGNMENT
2221 for(
int i = 0; i < (n/2); i++ )
2232 template<
typename _Tp,
int n>
2235 #if CV_STRONG_ALIGNMENT
2238 for(
int i = 0; i < (n/2); i++ )
2239 ptr[i] = a.s[i+(n/2)];
2250 template<
typename _Tp,
int n>
2257 template<
typename _Tp,
int n>
2264 template<
typename _Tp,
int n>
2281 template<
typename _Tp,
int n>
2285 for(
int i = 0; i < (n/2); i++ )
2288 c.s[i+(n/2)] = b.s[i];
2303 template<
typename _Tp,
int n>
2307 for(
int i = 0; i < (n/2); i++ )
2309 c.s[i] = a.s[i+(n/2)];
2310 c.s[i+(n/2)] = b.s[i+(n/2)];
2321 template<
typename _Tp,
int n>
2325 for(
int i = 0; i < (n/2); i++ )
2328 low.s[i+(n/2)] = b.s[i];
2329 high.s[i] = a.s[i+(n/2)];
2330 high.s[i+(n/2)] = b.s[i+(n/2)];
2342 template<
typename _Tp,
int n>
2346 for(
int i = 0; i < n; i++ )
2347 c.s[i] = a.s[n-i-1];
2370 template<
int s,
typename _Tp,
int n>
2374 const int shift = n - s;
2376 for (; i < shift; ++i)
2379 r.s[i] = b.s[i-shift];
2396 template<
int s,
typename _Tp,
int n>
2412 template<
int i,
typename _Tp,
int n>
2427 for(
int i = 0; i < n; i++ )
2436 for(
int i = 0; i < n; i++ )
2452 for(
int i = 0; i < n; i++ )
2465 for(
int i = 0; i < n; i++ )
2478 for(
int i = 0; i < n; i++ )
2479 c.s[i] = (
int)(a.s[i]);
2487 for(
int i = 0; i < n; i++ )
2499 for(
int i = 0; i < n; i++ )
2511 for(
int i = 0; i < n; i++ )
2523 for(
int i = 0; i < n; i++ )
2525 c.s[i] = (int)(a.s[i]);
2537 for(
int i = 0; i < n; i++ )
2538 c.s[i] = (
float)a.s[i];
2548 for(
int i = 0; i < n; i++ )
2550 c.s[i] = (float)a.s[i];
2562 for(
int i = 0; i < n; i++ )
2564 c.s[i] = (float)a.s[i];
2565 c.s[i+n] = (
float)b.s[i];
2575 v_reg<double, (n/2)> c;
2576 for(
int i = 0; i < (n/2); i++ )
2577 c.s[i] = (
double)a.s[i];
2586 v_reg<double, (n/2)> c;
2587 for(
int i = 0; i < (n/2); i++ )
2588 c.s[i] = (
double)a.s[i + (n/2)];
2597 v_reg<double, (n/2)> c;
2598 for(
int i = 0; i < (n/2); i++ )
2599 c.s[i] = (
double)a.s[i];
2608 v_reg<double, (n/2)> c;
2609 for(
int i = 0; i < (n/2); i++ )
2610 c.s[i] = (
double)a.s[i + (n/2)];
2620 for(
int i = 0; i < n; i++ )
2621 c.s[i] = (
double)a.s[i];
2629 for (
int i = 0; i < c.nlanes; i++)
2630 c.s[i] = tab[
idx[i]];
2636 for (
int i = 0; i < c.nlanes; i++)
2637 c.s[i] = tab[
idx[i / 2] + i % 2];
2643 for (
int i = 0; i < c.nlanes; i++)
2644 c.s[i] = tab[
idx[i / 4] + i % 4];
2651 for(
int i = 0; i < n; i++ )
2652 c.s[i] = tab[
idx.s[i]];
2659 for (
int i = 0; i < n; i++)
2660 c.s[i] = tab[
idx.s[i]];
2667 for(
int i = 0; i < n; i++ )
2668 c.s[i] = tab[
idx.s[i]];
2674 v_reg<double, n/2> c;
2675 for(
int i = 0; i < n/2; i++ )
2676 c.s[i] = tab[
idx.s[i]];
2684 for(
int i = 0; i < n; i++ )
2695 for(
int i = 0; i < n; i++ )
2706 for (
int i = 0; i < n/4; i++)
2708 c.s[4*i ] = vec.s[4*i ];
2709 c.s[4*i+1] = vec.s[4*i+2];
2710 c.s[4*i+2] = vec.s[4*i+1];
2711 c.s[4*i+3] = vec.s[4*i+3];
2719 for (
int i = 0; i < n/8; i++)
2721 c.s[8*i ] = vec.s[8*i ];
2722 c.s[8*i+1] = vec.s[8*i+4];
2723 c.s[8*i+2] = vec.s[8*i+1];
2724 c.s[8*i+3] = vec.s[8*i+5];
2725 c.s[8*i+4] = vec.s[8*i+2];
2726 c.s[8*i+5] = vec.s[8*i+6];
2727 c.s[8*i+6] = vec.s[8*i+3];
2728 c.s[8*i+7] = vec.s[8*i+7];
2736 for (
int i = 0; i < n/4; i++)
2738 c.s[3*i ] = vec.s[4*i ];
2739 c.s[3*i+1] = vec.s[4*i+1];
2740 c.s[3*i+2] = vec.s[4*i+2];
2760 template<
typename _Tp,
int n>
2766 for (
int i = 0; i < n / 4; i++)
2768 b0.s[0 + i*4] = a0.s[0 + i*4]; b0.s[1 + i*4] = a1.s[0 + i*4];
2769 b0.s[2 + i*4] = a2.s[0 + i*4]; b0.s[3 + i*4] = a3.s[0 + i*4];
2770 b1.s[0 + i*4] = a0.s[1 + i*4]; b1.s[1 + i*4] = a1.s[1 + i*4];
2771 b1.s[2 + i*4] = a2.s[1 + i*4]; b1.s[3 + i*4] = a3.s[1 + i*4];
2772 b2.s[0 + i*4] = a0.s[2 + i*4]; b2.s[1 + i*4] = a1.s[2 + i*4];
2773 b2.s[2 + i*4] = a2.s[2 + i*4]; b2.s[3 + i*4] = a3.s[2 + i*4];
2774 b3.s[0 + i*4] = a0.s[3 + i*4]; b3.s[1 + i*4] = a1.s[3 + i*4];
2775 b3.s[2 + i*4] = a2.s[3 + i*4]; b3.s[3 + i*4] = a3.s[3 + i*4];
2781 #define OPENCV_HAL_IMPL_C_INIT_ZERO(_Tpvec, prefix, suffix) \
2782 inline _Tpvec prefix##_setzero_##suffix() { return _Tpvec::zero(); }
2827 #define OPENCV_HAL_IMPL_C_INIT_VAL(_Tpvec, _Tp, prefix, suffix) \
2828 inline _Tpvec prefix##_setall_##suffix(_Tp val) { return _Tpvec::all(val); }
2873 #define OPENCV_HAL_IMPL_C_REINTERPRET(_Tp, suffix) \
2874 template<typename _Tp0, int n0> inline v_reg<_Tp, n0*sizeof(_Tp0)/sizeof(_Tp)> \
2875 v_reinterpret_as_##suffix(const v_reg<_Tp0, n0>& a) \
2876 { return a.template reinterpret_as<_Tp, n0*sizeof(_Tp0)/sizeof(_Tp)>(); }
2895 #define OPENCV_HAL_IMPL_C_SHIFTL(_Tp) \
2896 template<int shift, int n> inline v_reg<_Tp, n> v_shl(const v_reg<_Tp, n>& a) \
2897 { return a << shift; }
2912 #define OPENCV_HAL_IMPL_C_SHIFTR(_Tp) \
2913 template<int shift, int n> inline v_reg<_Tp, n> v_shr(const v_reg<_Tp, n>& a) \
2914 { return a >> shift; }
2929 #define OPENCV_HAL_IMPL_C_RSHIFTR(_Tp) \
2930 template<int shift, int n> inline v_reg<_Tp, n> v_rshr(const v_reg<_Tp, n>& a) \
2933 for( int i = 0; i < n; i++ ) \
2934 c.s[i] = (_Tp)((a.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \
2951 #define OPENCV_HAL_IMPL_C_PACK(_Tp, _Tpn, pack_suffix, cast) \
2952 template<int n> inline v_reg<_Tpn, 2*n> v_##pack_suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
2954 v_reg<_Tpn, 2*n> c; \
2955 for( int i = 0; i < n; i++ ) \
2957 c.s[i] = cast<_Tpn>(a.s[i]); \
2958 c.s[i+n] = cast<_Tpn>(b.s[i]); \
2986 #define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tp, _Tpn, pack_suffix, cast) \
2987 template<int shift, int n> inline v_reg<_Tpn, 2*n> v_rshr_##pack_suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
2989 v_reg<_Tpn, 2*n> c; \
2990 for( int i = 0; i < n; i++ ) \
2992 c.s[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \
2993 c.s[i+n] = cast<_Tpn>((b.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \
3021 #define OPENCV_HAL_IMPL_C_PACK_STORE(_Tp, _Tpn, pack_suffix, cast) \
3022 template<int n> inline void v_##pack_suffix##_store(_Tpn* ptr, const v_reg<_Tp, n>& a) \
3024 for( int i = 0; i < n; i++ ) \
3025 ptr[i] = cast<_Tpn>(a.s[i]); \
3051 #define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tp, _Tpn, pack_suffix, cast) \
3052 template<int shift, int n> inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const v_reg<_Tp, n>& a) \
3054 for( int i = 0; i < n; i++ ) \
3055 ptr[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \
3080 template<
typename _Tpm,
typename _Tp,
int n>
3081 inline void _pack_b(_Tpm* mptr,
const v_reg<_Tp, n>& a,
const v_reg<_Tp, n>& b)
3083 for (
int i = 0; i < n; ++i)
3085 mptr[i] = (_Tpm)a.s[i];
3086 mptr[i + n] = (_Tpm)b.s[i];
3114 _pack_b(
mask.s, a, b);
3138 _pack_b(
mask.s, a, b);
3139 _pack_b(
mask.s + 2*n, c, d);
3169 _pack_b(
mask.s, a, b);
3170 _pack_b(
mask.s + 2*n, c, d);
3171 _pack_b(
mask.s + 4*n, e, f);
3172 _pack_b(
mask.s + 6*n, g, h);
3198 for (
int i = 0; i < n / 4; i++)
3200 res.s[0 + i*4] = v.s[0 + i*4] * a.s[0 + i*4] + v.s[1 + i*4] * b.s[0 + i*4] + v.s[2 + i*4] * c.s[0 + i*4] + v.s[3 + i*4] * d.s[0 + i*4];
3201 res.s[1 + i*4] = v.s[0 + i*4] * a.s[1 + i*4] + v.s[1 + i*4] * b.s[1 + i*4] + v.s[2 + i*4] * c.s[1 + i*4] + v.s[3 + i*4] * d.s[1 + i*4];
3202 res.s[2 + i*4] = v.s[0 + i*4] * a.s[2 + i*4] + v.s[1 + i*4] * b.s[2 + i*4] + v.s[2 + i*4] * c.s[2 + i*4] + v.s[3 + i*4] * d.s[2 + i*4];
3203 res.s[3 + i*4] = v.s[0 + i*4] * a.s[3 + i*4] + v.s[1 + i*4] * b.s[3 + i*4] + v.s[2 + i*4] * c.s[3 + i*4] + v.s[3 + i*4] * d.s[3 + i*4];
3228 for (
int i = 0; i < n / 4; i++)
3230 res.s[0 + i * 4] = v.s[0 + i * 4] * a.s[0 + i * 4] + v.s[1 + i * 4] * b.s[0 + i * 4] + v.s[2 + i * 4] * c.s[0 + i * 4] + d.s[0 + i * 4];
3231 res.s[1 + i * 4] = v.s[0 + i * 4] * a.s[1 + i * 4] + v.s[1 + i * 4] * b.s[1 + i * 4] + v.s[2 + i * 4] * c.s[1 + i * 4] + d.s[1 + i * 4];
3232 res.s[2 + i * 4] = v.s[0 + i * 4] * a.s[2 + i * 4] + v.s[1 + i * 4] * b.s[2 + i * 4] + v.s[2 + i * 4] * c.s[2 + i * 4] + d.s[2 + i * 4];
3233 res.s[3 + i * 4] = v.s[0 + i * 4] * a.s[3 + i * 4] + v.s[1 + i * 4] * b.s[3 + i * 4] + v.s[2 + i * 4] * c.s[3 + i * 4] + d.s[3 + i * 4];
3257 for(
int i = 0; i < v.nlanes; i++ )
3264 inline v_reg<float, simd256_width /
sizeof(float)>
3265 v256_load_expand(
const hfloat* ptr)
3267 v_reg<float, simd256_width /
sizeof(float)> v;
3268 for (
int i = 0; i < v.nlanes; i++)
3276 inline v_reg<float, simd512_width /
sizeof(float)>
3277 v512_load_expand(
const hfloat* ptr)
3279 v_reg<float, simd512_width /
sizeof(float)> v;
3280 for (
int i = 0; i < v.nlanes; i++)
3288 template<
int n>
inline void
3291 for(
int i = 0; i < v.nlanes; i++ )
3293 ptr[i] = hfloat(v.s[i]);
3299 inline void v256_cleanup() {}
3302 inline void v512_cleanup() {}
3308 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
3312 #if !defined(CV_DOXYGEN)
InputArrayOfArrays InputArrayOfArrays InputOutputArray InputOutputArray InputOutputArray InputOutputArray Size InputOutputArray InputOutputArray T
Definition: calib3d.hpp:1867
const int * idx
Definition: core_c.h:668
const CvArr CvArr * x
Definition: core_c.h:1195
const CvArr * y
Definition: core_c.h:1187
signed char schar
Definition: interface.h:48
unsigned char uchar
Definition: interface.h:51
int64_t int64
Definition: interface.h:61
unsigned short ushort
Definition: interface.h:52
uint64_t uint64
Definition: interface.h:62
#define OPENCV_HAL_IMPL_C_INIT_VAL(_Tpvec, _Tp, prefix, suffix)
Helper macro.
Definition: intrin_cpp.hpp:2827
#define OPENCV_HAL_IMPL_C_RSHIFTR(_Tp)
Helper macro.
Definition: intrin_cpp.hpp:2929
#define OPENCV_HAL_IMPL_C_SHIFTR(_Tp)
Helper macro.
Definition: intrin_cpp.hpp:2912
#define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tp, _Tpn, pack_suffix, cast)
Helper macro.
Definition: intrin_cpp.hpp:2986
#define OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(suffix, opA, opB)
Bitwise shift left.
Definition: intrin_cpp.hpp:1277
#define OPENCV_HAL_IMPL_CMP_OP(cmp_op)
Helper macro.
Definition: intrin_cpp.hpp:852
OPENCV_HAL_IMPL_MATH_FUNC(v_abs,(typename V_TypeTraits< _Tp >::abs_type) std::abs, typename V_TypeTraits< _Tp >::abs_type) static const unsigned char popCountTable[]
Square root of elements.
#define OPENCV_HAL_IMPL_C_SHIFTL(_Tp)
Helper macro.
Definition: intrin_cpp.hpp:2895
#define OPENCV_HAL_IMPL_C_INIT_ZERO(_Tpvec, prefix, suffix)
Helper macro.
Definition: intrin_cpp.hpp:2781
#define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tp, _Tpn, pack_suffix, cast)
Helper macro.
Definition: intrin_cpp.hpp:3051
#define OPENCV_HAL_IMPL_C_REINTERPRET(_Tp, suffix)
Helper macro.
Definition: intrin_cpp.hpp:2873
#define OPENCV_HAL_IMPL_C_PACK(_Tp, _Tpn, pack_suffix, cast)
Helper macro.
Definition: intrin_cpp.hpp:2951
#define OPENCV_HAL_IMPL_ARITHM_OP(func, bin_op, cast_op, _Tp2)
Helper macro.
Definition: intrin_cpp.hpp:910
#define OPENCV_HAL_IMPL_C_PACK_STORE(_Tp, _Tpn, pack_suffix, cast)
Helper macro.
Definition: intrin_cpp.hpp:3021
#define OPENCV_HAL_IMPL_SHIFT_OP(shift_op)
Helper macro.
Definition: intrin_cpp.hpp:1256
bool v_check_any(const v_reg< _Tp, n > &a)
Check if any of packed values is less than zero.
Definition: intrin_cpp.hpp:1433
#define CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(macro_name,...)
Definition: intrin_cpp.hpp:621
#define CV__HAL_INTRIN_IMPL_BITWISE_NOT_(_Tp, dummy)
Definition: intrin_cpp.hpp:693
#define CV__HAL_INTRIN_IMPL_BIN_OP(bin_op)
Definition: intrin_cpp.hpp:656
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_expand_high(const v_reg< _Tp, n > &a)
Expand higher values to the wider pack type.
Definition: intrin_cpp.hpp:1515
v_reg< _Tp, n > v_invsqrt(const v_reg< _Tp, n > &a)
Inversed square root.
Definition: intrin_cpp.hpp:1007
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_lut_quads(const _Tp *tab, const int *idx)
Definition: intrin_cpp.hpp:2640
v_reg< int, n > v_round(const v_reg< float, n > &a)
Round elements.
Definition: intrin_cpp.hpp:2424
v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > v_dotprod_expand_fast(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Fast Dot product of elements and expand.
Definition: intrin_cpp.hpp:1185
v_reg< schar, 16 > v_int8x16
Sixteen 8-bit signed integer values.
Definition: intrin_cpp.hpp:491
v_reg< float, n > v_cvt_f32(const v_reg< int, n > &a)
Convert to float.
Definition: intrin_cpp.hpp:2534
v_reg< uchar, 16 > v_uint8x16
Sixteen 8-bit unsigned integer values.
Definition: intrin_cpp.hpp:489
void v_store_high(_Tp *ptr, const v_reg< _Tp, n > &a)
Store data to memory (higher half)
Definition: intrin_cpp.hpp:2233
int v_signmask(const v_reg< _Tp, n > &a)
Get negative values mask.
Definition: intrin_cpp.hpp:1392
v_reg< _Tp, n > v_mul_hi(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Multiply and extract high part.
Definition: intrin_cpp.hpp:1233
void v_zip(const v_reg< _Tp, n > &a0, const v_reg< _Tp, n > &a1, v_reg< _Tp, n > &b0, v_reg< _Tp, n > &b1)
Interleave two vectors.
Definition: intrin_cpp.hpp:1554
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_load_aligned(const _Tp *ptr)
Load register contents from memory (aligned)
Definition: intrin_cpp.hpp:1649
CV_INLINE v_reg< _Tp, n > operator|(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Bitwise OR.
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_load(const _Tp *ptr)
Load register contents from memory.
Definition: intrin_cpp.hpp:1584
v_reg< float, n > v_matmul(const v_reg< float, n > &v, const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
Matrix multiplication.
Definition: intrin_cpp.hpp:3193
v_reg< int64, 2 > v_int64x2
Two 64-bit signed integer values.
Definition: intrin_cpp.hpp:507
v_reg< _Tp, n > v_interleave_pairs(const v_reg< _Tp, n > &vec)
Definition: intrin_cpp.hpp:2703
void v_store(_Tp *ptr, const v_reg< _Tp, n > &a)
Store data to memory.
Definition: intrin_cpp.hpp:2190
v_reg< _Tp, n > v_combine_high(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Combine vector from last elements of two vectors.
Definition: intrin_cpp.hpp:2304
V_TypeTraits< typename V_TypeTraits< _Tp >::abs_type >::sum_type v_reduce_sad(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Sum absolute differences of values.
Definition: intrin_cpp.hpp:1374
v_reg< ushort, 8 > v_uint16x8
Eight 16-bit unsigned integer values.
Definition: intrin_cpp.hpp:493
CV_INLINE v_reg< double, n/2 > v_cvt_f64(const v_reg< int, n > &a)
Convert lower half to double.
Definition: intrin_cpp.hpp:2573
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_lut(const _Tp *tab, const int *idx)
Definition: intrin_cpp.hpp:2626
V_TypeTraits< _Tp >::sum_type v_reduce_sum(const v_reg< _Tp, n > &a)
Element shift left among vector.
Definition: intrin_cpp.hpp:1335
void v_store_low(_Tp *ptr, const v_reg< _Tp, n > &a)
Store data to memory (lower half)
Definition: intrin_cpp.hpp:2216
#define CV__HAL_INTRIN_IMPL_BIT_OP(bit_op)
Definition: intrin_cpp.hpp:684
v_reg< _Tp, n > v_magnitude(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Magnitude.
Definition: intrin_cpp.hpp:1020
v_reg< _Tp, n > v_fma(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c)
Multiply and add.
Definition: intrin_cpp.hpp:1046
int v_scan_forward(const v_reg< _Tp, n > &a)
Get first negative lane index.
Definition: intrin_cpp.hpp:1409
v_reg< int, n > v_trunc(const v_reg< float, n > &a)
Truncate elements.
Definition: intrin_cpp.hpp:2475
v_reg< int, 4 > v_int32x4
Four 32-bit signed integer values.
Definition: intrin_cpp.hpp:499
v_reg< float, n > v_not_nan(const v_reg< float, n > &a)
Less-than comparison.
Definition: intrin_cpp.hpp:890
v_reg< float, n > v_reduce_sum4(const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
Sums all elements of each input vector, returns the vector of sums.
Definition: intrin_cpp.hpp:1353
v_reg< typename V_TypeTraits< _Tp >::abs_type, n > v_popcount(const v_reg< _Tp, n > &a)
Count the 1 bits in the vector lanes and return result as corresponding unsigned type.
Definition: intrin_cpp.hpp:828
v_reg< _Tp, n > v_sqr_magnitude(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Square of the magnitude.
Definition: intrin_cpp.hpp:1033
void v_store_aligned_nocache(_Tp *ptr, const v_reg< _Tp, n > &a)
Definition: intrin_cpp.hpp:2258
v_reg< _Tp, n > v_interleave_quads(const v_reg< _Tp, n > &vec)
Definition: intrin_cpp.hpp:2716
v_reg< unsigned, 4 > v_uint32x4
Four 32-bit unsigned integer values.
Definition: intrin_cpp.hpp:497
CV_INLINE v_reg< _Tp, n > & operator-=(v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
#define CV__HAL_INTRIN_IMPL_BIN_OP_(_Tp, bin_op)
Definition: intrin_cpp.hpp:639
v_reg< typename V_TypeTraits< _Tp >::q_type, simd128_width/sizeof(typename V_TypeTraits< _Tp >::q_type)> v_load_expand_q(const _Tp *ptr)
Load register contents from memory with quad expand.
Definition: intrin_cpp.hpp:1961
v_reg< _Tp, n > v_pack_triplets(const v_reg< _Tp, n > &vec)
Definition: intrin_cpp.hpp:2733
v_reg< _Tp, n > v_muladd(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c)
A synonym for v_fma.
Definition: intrin_cpp.hpp:1057
v_reg< int, n > v_floor(const v_reg< float, n > &a)
Floor elements.
Definition: intrin_cpp.hpp:2449
CV_INLINE v_reg< _Tp, n > & operator*=(v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
v_reg< _Tp, n > v_broadcast_element(const v_reg< _Tp, n > &a)
Broadcast i-th element of vector.
Definition: intrin_cpp.hpp:2413
void v_recombine(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, v_reg< _Tp, n > &low, v_reg< _Tp, n > &high)
Combine two vectors from lower and higher parts of two other vectors.
Definition: intrin_cpp.hpp:2322
v_reg< _Tp, n > v_reverse(const v_reg< _Tp, n > &a)
Vector reverse order.
Definition: intrin_cpp.hpp:2343
void v_mul_expand(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &c, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &d)
Multiply and expand.
Definition: intrin_cpp.hpp:1216
v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > v_dotprod_expand(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Dot product of elements and expand.
Definition: intrin_cpp.hpp:1142
void v_pack_store(hfloat *ptr, const v_reg< float, n > &v)
Definition: intrin_cpp.hpp:3289
#define CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(macro_name,...)
Definition: intrin_cpp.hpp:631
void v_expand(const v_reg< _Tp, n > &a, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &b0, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &b1)
Expand values to the wider pack type.
Definition: intrin_cpp.hpp:1474
v_reg< _Tp, n > v_combine_low(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Combine vector from first elements of two vectors.
Definition: intrin_cpp.hpp:2282
void v_cleanup()
Definition: intrin_cpp.hpp:3297
CV_INLINE v_reg< _Tp, n > operator^(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Bitwise XOR.
v_reg< float, n > v_matmuladd(const v_reg< float, n > &v, const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
Matrix multiplication and add.
Definition: intrin_cpp.hpp:3223
void v_store_interleave(_Tp *ptr, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, hal::StoreMode=hal::STORE_UNALIGNED)
Interleave and store (2 channels)
Definition: intrin_cpp.hpp:2115
void v_lut_deinterleave(const float *tab, const v_reg< int, n > &idx, v_reg< float, n > &x, v_reg< float, n > &y)
Definition: intrin_cpp.hpp:2681
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_load_low(const _Tp *ptr)
Load 64-bits of data to lower part (high part is undefined).
Definition: intrin_cpp.hpp:1699
void v_transpose4x4(v_reg< _Tp, n > &a0, const v_reg< _Tp, n > &a1, const v_reg< _Tp, n > &a2, const v_reg< _Tp, n > &a3, v_reg< _Tp, n > &b0, v_reg< _Tp, n > &b1, v_reg< _Tp, n > &b2, v_reg< _Tp, n > &b3)
Transpose 4x4 matrix.
Definition: intrin_cpp.hpp:2761
CV_INLINE v_reg< _Tp, n > & operator/=(v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
CV_INLINE v_reg< _Tp, n > operator/(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Divide values.
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_expand_low(const v_reg< _Tp, n > &a)
Expand lower values to the wider pack type.
Definition: intrin_cpp.hpp:1496
v_reg< uint64, 2 > v_uint64x2
Two 64-bit unsigned integer values.
Definition: intrin_cpp.hpp:505
v_reg< typename V_TypeTraits< _Tp >::w_type, simd128_width/sizeof(typename V_TypeTraits< _Tp >::w_type)> v_load_expand(const _Tp *ptr)
Load register contents from memory with double expand.
Definition: intrin_cpp.hpp:1872
v_reg< int, n > v_ceil(const v_reg< float, n > &a)
Ceil elements.
Definition: intrin_cpp.hpp:2462
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_dotprod(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Dot product of elements.
Definition: intrin_cpp.hpp:1077
CV_INLINE v_reg< _Tp, n > & operator^=(v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
v_reg< _Tp, n > v_extract(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Vector extract.
Definition: intrin_cpp.hpp:2371
v_reg< float, 4 > v_float32x4
Four 32-bit floating point values (single precision)
Definition: intrin_cpp.hpp:501
CV_INLINE v_reg< _Tp, n > operator~(const v_reg< _Tp, n > &a)
Bitwise NOT.
bool v_check_all(const v_reg< _Tp, n > &a)
Check if all packed values are less than zero.
Definition: intrin_cpp.hpp:1421
v_reg< typename V_TypeTraits< _Tp >::abs_type, n > v_absdiff(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Add values without saturation.
Definition: intrin_cpp.hpp:953
CV_INLINE v_reg< _Tp, n > operator&(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Bitwise AND.
CV_INLINE v_reg< _Tp, n > & operator&=(v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
_Tp v_extract_n(const v_reg< _Tp, n > &v)
Vector extract.
Definition: intrin_cpp.hpp:2397
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_load_halves(const _Tp *loptr, const _Tp *hiptr)
Load register contents from two memory blocks.
Definition: intrin_cpp.hpp:1781
CV_INLINE v_reg< _Tp, n > & operator|=(v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
v_reg< _Tp, n > v_absdiffs(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Saturating absolute difference.
Definition: intrin_cpp.hpp:994
v_reg< _Tp, n > v_select(const v_reg< _Tp, n > &mask, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Per-element select (blend operation)
Definition: intrin_cpp.hpp:1451
CV_INLINE v_reg< double,(n/2)> v_cvt_f64_high(const v_reg< int, n > &a)
Convert to double high part of vector.
Definition: intrin_cpp.hpp:2584
CV_INLINE v_reg< _Tp, n > & operator+=(v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_dotprod_fast(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Fast Dot product of elements.
Definition: intrin_cpp.hpp:1116
void v_store_aligned(_Tp *ptr, const v_reg< _Tp, n > &a)
Store data to memory (aligned)
Definition: intrin_cpp.hpp:2251
v_reg< uchar, 2 *n > v_pack_b(const v_reg< ushort, n > &a, const v_reg< ushort, n > &b)
! For 16-bit boolean values
Definition: intrin_cpp.hpp:3111
v_reg< short, 8 > v_int16x8
Eight 16-bit signed integer values.
Definition: intrin_cpp.hpp:495
v_reg< double, 2 > v_float64x2
Two 64-bit floating point values (double precision)
Definition: intrin_cpp.hpp:503
void v_load_deinterleave(const _Tp *ptr, v_reg< _Tp, n > &a, v_reg< _Tp, n > &b)
Load and deinterleave (2 channels)
Definition: intrin_cpp.hpp:2043
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_lut_pairs(const _Tp *tab, const int *idx)
Definition: intrin_cpp.hpp:2633
@ simdmax_width
Definition: intrin_cpp.hpp:566
@ simd128_width
Definition: intrin_cpp.hpp:556
softfloat abs(softfloat a)
Absolute value.
Definition: softfloat.hpp:444
CV_INLINE int cvRound(double value)
Rounds floating-point number to the nearest integer.
Definition: fast_math.hpp:200
CV_INLINE int cvCeil(double value)
Rounds floating-point number to the nearest integer not smaller than the original.
Definition: fast_math.hpp:258
static _Tp saturate_cast(uchar v)
Template function for accurate conversion from one primitive type to another.
Definition: saturate.hpp:81
CV_INLINE int cvFloor(double value)
Rounds floating-point number to the nearest integer not larger than the original.
Definition: fast_math.hpp:231
static bool isAligned(const T &data)
Alignment check of passed values.
Definition: utility.hpp:517
#define CV_Assert(expr)
Checks a condition at runtime and throws exception if it fails.
Definition: base.hpp:342
#define CV_INLINE
Definition: cvdef.h:218
#define CV_DbgAssert(expr)
Definition: base.hpp:375
CvRect r
Definition: imgproc_c.h:984
CV_EXPORTS OutputArray int double double InputArray mask
Definition: imgproc.hpp:2132
StoreMode
Definition: intrin.hpp:100
@ STORE_UNALIGNED
Definition: intrin.hpp:101
"black box" representation of the file storage associated with a file on disk.
Definition: calib3d.hpp:441
DualQuat< T > operator+(const T a, const DualQuat< T > &q)
Definition: dualquaternion.inl.hpp:243
DualQuat< T > operator*(const T a, const DualQuat< T > &q)
Definition: dualquaternion.inl.hpp:274
DualQuat< T > operator-(const DualQuat< T > &q, const T a)
Definition: dualquaternion.inl.hpp:255
static uchar abs(uchar a)
Definition: cvstd.hpp:66
Definition: intrin.hpp:110
Definition: intrin_cpp.hpp:370
_Tp get0() const
Access first value.
Definition: intrin_cpp.hpp:437
v_reg(const v_reg< _Tp, n > &r)
Copy constructor.
Definition: intrin_cpp.hpp:422
v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3, _Tp s4, _Tp s5, _Tp s6, _Tp s7, _Tp s8, _Tp s9, _Tp s10, _Tp s11, _Tp s12, _Tp s13, _Tp s14, _Tp s15)
Constructor.
Definition: intrin_cpp.hpp:405
v_reg(const _Tp *ptr)
Constructor.
Definition: intrin_cpp.hpp:380
v_reg()
Default constructor.
Definition: intrin_cpp.hpp:419
v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3, _Tp s4, _Tp s5, _Tp s6, _Tp s7)
Constructor.
Definition: intrin_cpp.hpp:395
v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3)
Constructor.
Definition: intrin_cpp.hpp:390
v_reg(_Tp s0, _Tp s1)
Constructor.
Definition: intrin_cpp.hpp:385