45#ifndef OPENCV_HAL_INTRIN_CPP_HPP
46#define OPENCV_HAL_INTRIN_CPP_HPP
51#include "opencv2/core/utility.hpp"
52#include "opencv2/core/saturate.hpp"
55#define CV_SIMD128_CPP 1
56#if defined(CV_FORCE_SIMD128_CPP)
58#define CV_SIMD128_64F 1
60#if defined(CV_DOXYGEN)
62#define CV_SIMD128_64F 1
64#define CV_SIMD256_64F 1
66#define CV_SIMD512_64F 1
77CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
369template<
typename _Tp,
int n>
struct v_reg
372 typedef _Tp lane_type;
380 explicit v_reg(
const _Tp* ptr) {
for(
int i = 0; i < n; i++ ) s[i] = ptr[i]; }
385 v_reg(_Tp s0, _Tp s1) { s[0] = s0; s[1] = s1; }
390 v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3) { s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; }
395 v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3,
396 _Tp s4, _Tp s5, _Tp s6, _Tp s7)
398 s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3;
399 s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7;
405 v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3,
406 _Tp s4, _Tp s5, _Tp s6, _Tp s7,
407 _Tp s8, _Tp s9, _Tp s10, _Tp s11,
408 _Tp s12, _Tp s13, _Tp s14, _Tp s15)
410 s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3;
411 s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7;
412 s[8] = s8; s[9] = s9; s[10] = s10; s[11] = s11;
413 s[12] = s12; s[13] = s13; s[14] = s14; s[15] = s15;
424 for(
int i = 0; i < n; i++ )
437 _Tp
get0()
const {
return s[0]; }
440 _Tp get(
const int i)
const {
return s[i]; }
441 v_reg<_Tp, n> high()
const
445 for( i = 0; i < n/2; i++ )
453 static v_reg<_Tp, n> zero()
456 for(
int i = 0; i < n; i++ )
461 static v_reg<_Tp, n> all(_Tp s)
464 for(
int i = 0; i < n; i++ )
469 template<
typename _Tp2,
int n2> v_reg<_Tp2, n2> reinterpret_as()
const
471 size_t bytes =
std::min(
sizeof(_Tp2)*n2,
sizeof(_Tp)*n);
477 v_reg& operator=(
const v_reg<_Tp, n> &
r)
479 for(
int i = 0; i < n; i++ )
621#define CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(macro_name, ...) \
622__CV_EXPAND(macro_name(uchar, __VA_ARGS__)) \
623__CV_EXPAND(macro_name(schar, __VA_ARGS__)) \
624__CV_EXPAND(macro_name(ushort, __VA_ARGS__)) \
625__CV_EXPAND(macro_name(short, __VA_ARGS__)) \
626__CV_EXPAND(macro_name(unsigned, __VA_ARGS__)) \
627__CV_EXPAND(macro_name(int, __VA_ARGS__)) \
628__CV_EXPAND(macro_name(uint64, __VA_ARGS__)) \
629__CV_EXPAND(macro_name(int64, __VA_ARGS__)) \
631#define CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(macro_name, ...) \
632__CV_EXPAND(macro_name(float, __VA_ARGS__)) \
633__CV_EXPAND(macro_name(double, __VA_ARGS__)) \
635#define CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES(macro_name, ...) \
636CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(macro_name, __VA_ARGS__) \
637CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(macro_name, __VA_ARGS__) \
639#define CV__HAL_INTRIN_IMPL_BIN_OP_(_Tp, bin_op) \
640template<int n> inline \
641v_reg<_Tp, n> operator bin_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
644 for( int i = 0; i < n; i++ ) \
645 c.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \
648template<int n> inline \
649v_reg<_Tp, n>& operator bin_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
651 for( int i = 0; i < n; i++ ) \
652 a.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \
656#define CV__HAL_INTRIN_IMPL_BIN_OP(bin_op) CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES(CV__HAL_INTRIN_IMPL_BIN_OP_, bin_op)
663#define CV__HAL_INTRIN_IMPL_BIT_OP_(_Tp, bit_op) \
664template<int n> CV_INLINE \
665v_reg<_Tp, n> operator bit_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
668 typedef typename V_TypeTraits<_Tp>::int_type itype; \
669 for( int i = 0; i < n; i++ ) \
670 c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \
671 V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \
674template<int n> CV_INLINE \
675v_reg<_Tp, n>& operator bit_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
677 typedef typename V_TypeTraits<_Tp>::int_type itype; \
678 for( int i = 0; i < n; i++ ) \
679 a.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \
680 V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \
684#define CV__HAL_INTRIN_IMPL_BIT_OP(bit_op) \
685CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(CV__HAL_INTRIN_IMPL_BIT_OP_, bit_op) \
686CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(CV__HAL_INTRIN_IMPL_BIT_OP_, bit_op)
693#define CV__HAL_INTRIN_IMPL_BITWISE_NOT_(_Tp, dummy) \
694template<int n> CV_INLINE \
695v_reg<_Tp, n> operator ~ (const v_reg<_Tp, n>& a) \
698 for( int i = 0; i < n; i++ ) \
699 c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int(~V_TypeTraits<_Tp>::reinterpret_int(a.s[i])); \
710#define OPENCV_HAL_IMPL_MATH_FUNC(func, cfunc, _Tp2) \
711template<typename _Tp, int n> inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a) \
714 for( int i = 0; i < n; i++ ) \
715 c.s[i] = cfunc(a.s[i]); \
739#define OPENCV_HAL_IMPL_MINMAX_FUNC(func, cfunc) \
740template<typename _Tp, int n> inline v_reg<_Tp, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
743 for( int i = 0; i < n; i++ ) \
744 c.s[i] = cfunc(a.s[i], b.s[i]); \
750#define OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(func, cfunc) \
751template<typename _Tp, int n> inline _Tp func(const v_reg<_Tp, n>& a) \
754 for( int i = 1; i < n; i++ ) \
755 c = cfunc(c, a.s[i]); \
769OPENCV_HAL_IMPL_MINMAX_FUNC(v_min,
std::min)
781OPENCV_HAL_IMPL_MINMAX_FUNC(v_max,
std::max)
790OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_min,
std::min)
799OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_max,
std::max)
801static const unsigned char popCountTable[] =
803 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
804 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
805 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
806 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
807 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
808 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
809 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
810 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
811 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
812 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
813 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
814 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
815 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
816 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
817 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
818 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
827template<
typename _Tp,
int n>
831 for (
int i = 0; i < n*(int)
sizeof(_Tp); i++)
832 b.s[i/
sizeof(_Tp)] += popCountTable[v_reinterpret_as_u8(a).s[i]];
838template<
typename _Tp,
int n>
839inline void v_minmax(
const v_reg<_Tp, n>& a,
const v_reg<_Tp, n>& b,
840 v_reg<_Tp, n>& minval, v_reg<_Tp, n>& maxval )
842 for(
int i = 0; i < n; i++ )
844 minval.s[i] =
std::min(a.s[i], b.s[i]);
845 maxval.s[i] =
std::max(a.s[i], b.s[i]);
852#define OPENCV_HAL_IMPL_CMP_OP(cmp_op) \
853template<typename _Tp, int n> \
854inline v_reg<_Tp, n> operator cmp_op(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
856 typedef typename V_TypeTraits<_Tp>::int_type itype; \
858 for( int i = 0; i < n; i++ ) \
859 c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)-(int)(a.s[i] cmp_op b.s[i])); \
894 for (
int i = 0; i < n; i++)
903 for (
int i = 0; i < n; i++)
910#define OPENCV_HAL_IMPL_ARITHM_OP(func, bin_op, cast_op, _Tp2) \
911template<typename _Tp, int n> \
912inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
914 typedef _Tp2 rtype; \
916 for( int i = 0; i < n; i++ ) \
917 c.s[i] = cast_op(a.s[i] bin_op b.s[i]); \
937template<
typename T>
inline T _absdiff(
T a,
T b)
939 return a > b ? a - b : b - a;
952template<
typename _Tp,
int n>
958 for(
int i = 0; i < n; i++ )
960 rtype ua = a.s[i] ^
mask;
961 rtype ub = b.s[i] ^
mask;
962 c.s[i] = _absdiff(ua, ub);
973 for(
int i = 0; i < c.nlanes; i++ )
974 c.s[i] = _absdiff(a.s[i], b.s[i]);
984 for(
int i = 0; i < c.nlanes; i++ )
985 c.s[i] = _absdiff(a.s[i], b.s[i]);
993template<
typename _Tp,
int n>
997 for(
int i = 0; i < n; i++)
998 c.s[i] = saturate_cast<_Tp>(std::abs(a.s[i] - b.s[i]));
1006template<
typename _Tp,
int n>
1010 for(
int i = 0; i < n; i++ )
1019template<
typename _Tp,
int n>
1023 for(
int i = 0; i < n; i++ )
1024 c.s[i] =
std::sqrt(a.s[i]*a.s[i] + b.s[i]*b.s[i]);
1032template<
typename _Tp,
int n>
1036 for(
int i = 0; i < n; i++ )
1037 c.s[i] = a.s[i]*a.s[i] + b.s[i]*b.s[i];
1045template<
typename _Tp,
int n>
1050 for(
int i = 0; i < n; i++ )
1051 d.s[i] = a.s[i]*b.s[i] + c.s[i];
1056template<
typename _Tp,
int n>
1060 return v_fma(a, b, c);
1076template<
typename _Tp,
int n>
inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
1080 v_reg<w_type, n/2> c;
1081 for(
int i = 0; i < (n/2); i++ )
1082 c.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1];
1097template<
typename _Tp,
int n>
inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
1102 v_reg<w_type, n/2> s;
1103 for(
int i = 0; i < (n/2); i++ )
1104 s.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1] + c.s[i];
1115template<
typename _Tp,
int n>
inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
1123template<
typename _Tp,
int n>
inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
1141template<
typename _Tp,
int n>
inline v_reg<typename V_TypeTraits<_Tp>::q_type, n/4>
1145 v_reg<q_type, n/4> s;
1146 for(
int i = 0; i < (n/4); i++ )
1147 s.s[i] = (q_type)a.s[i*4 ]*b.s[i*4 ] + (q_type)a.s[i*4 + 1]*b.s[i*4 + 1] +
1148 (q_type)a.s[i*4 + 2]*b.s[i*4 + 2] + (q_type)a.s[i*4 + 3]*b.s[i*4 + 3];
1163template<
typename _Tp,
int n>
inline v_reg<typename V_TypeTraits<_Tp>::q_type, n/4>
1168 v_reg<q_type, n/4> s;
1169 for(
int i = 0; i < (n/4); i++ )
1170 s.s[i] = (q_type)a.s[i*4 ]*b.s[i*4 ] + (q_type)a.s[i*4 + 1]*b.s[i*4 + 1] +
1171 (q_type)a.s[i*4 + 2]*b.s[i*4 + 2] + (q_type)a.s[i*4 + 3]*b.s[i*4 + 3] + c.s[i];
1184template<
typename _Tp,
int n>
inline v_reg<typename V_TypeTraits<_Tp>::q_type, n/4>
1192template<
typename _Tp,
int n>
inline v_reg<typename V_TypeTraits<_Tp>::q_type, n/4>
1221 for(
int i = 0; i < (n/2); i++ )
1223 c.s[i] = (w_type)a.s[i]*b.s[i];
1224 d.s[i] = (w_type)a.s[i+(n/2)]*b.s[i+(n/2)];
1237 for (
int i = 0; i < n; i++)
1238 c.s[i] = (_Tp)(((w_type)a.s[i] * b.s[i]) >>
sizeof(_Tp)*8);
1243template<
typename _Tp,
int n>
inline void v_hsum(
const v_reg<_Tp, n>& a,
1244 v_reg<
typename V_TypeTraits<_Tp>::w_type, n/2>& c)
1246 typedef typename V_TypeTraits<_Tp>::w_type w_type;
1247 for(
int i = 0; i < (n/2); i++ )
1249 c.s[i] = (w_type)a.s[i*2] + a.s[i*2+1];
1256#define OPENCV_HAL_IMPL_SHIFT_OP(shift_op) \
1257template<typename _Tp, int n> inline v_reg<_Tp, n> operator shift_op(const v_reg<_Tp, n>& a, int imm) \
1260 for( int i = 0; i < n; i++ ) \
1261 c.s[i] = (_Tp)(a.s[i] shift_op imm); \
1277#define OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(suffix,opA,opB) \
1278template<int imm, typename _Tp, int n> inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a) \
1281 for (int i = 0; i < n; i++) \
1283 int sIndex = i opA imm; \
1284 if (0 <= sIndex && sIndex < n) \
1286 b.s[i] = a.s[sIndex]; \
1295template<int imm, typename _Tp, int n> inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
1298 for (int i = 0; i < n; i++) \
1300 int aIndex = i opA imm; \
1301 int bIndex = i opA imm opB n; \
1302 if (0 <= bIndex && bIndex < n) \
1304 c.s[i] = b.s[bIndex]; \
1306 else if (0 <= aIndex && aIndex < n) \
1308 c.s[i] = a.s[aIndex]; \
1338 for(
int i = 1; i < n; i++ )
1357 for(
int i = 0; i < (n/4); i++)
1359 r.s[i*4 + 0] = a.s[i*4 + 0] + a.s[i*4 + 1] + a.s[i*4 + 2] + a.s[i*4 + 3];
1360 r.s[i*4 + 1] = b.s[i*4 + 0] + b.s[i*4 + 1] + b.s[i*4 + 2] + b.s[i*4 + 3];
1361 r.s[i*4 + 2] = c.s[i*4 + 0] + c.s[i*4 + 1] + c.s[i*4 + 2] + c.s[i*4 + 3];
1362 r.s[i*4 + 3] = d.s[i*4 + 0] + d.s[i*4 + 1] + d.s[i*4 + 2] + d.s[i*4 + 3];
1377 for (
int i = 1; i < n; i++)
1378 c += _absdiff(a.s[i], b.s[i]);
1395 for(
int i = 0; i < n; i++ )
1411 for (
int i = 0; i < n; i++)
1423 for(
int i = 0; i < n; i++ )
1435 for(
int i = 0; i < n; i++ )
1455 typedef typename Traits::int_type int_type;
1457 for(
int i = 0; i < n; i++ )
1459 int_type m = Traits::reinterpret_int(
mask.s[i]);
1461 c.s[i] = m ? a.s[i] : b.s[i];
1478 for(
int i = 0; i < (n/2); i++ )
1481 b1.s[i] = a.s[i+(n/2)];
1494template<
typename _Tp,
int n>
1495inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
1499 for(
int i = 0; i < (n/2); i++ )
1513template<
typename _Tp,
int n>
1514inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
1518 for(
int i = 0; i < (n/2); i++ )
1519 b.s[i] = a.s[i+(n/2)];
1524template<
typename _Tp,
int n>
inline v_reg<typename V_TypeTraits<_Tp>::int_type, n>
1525 v_reinterpret_as_int(
const v_reg<_Tp, n>& a)
1527 v_reg<typename V_TypeTraits<_Tp>::int_type, n> c;
1528 for(
int i = 0; i < n; i++ )
1529 c.s[i] = V_TypeTraits<_Tp>::reinterpret_int(a.s[i]);
1533template<
typename _Tp,
int n>
inline v_reg<typename V_TypeTraits<_Tp>::uint_type, n>
1534 v_reinterpret_as_uint(
const v_reg<_Tp, n>& a)
1536 v_reg<typename V_TypeTraits<_Tp>::uint_type, n> c;
1537 for(
int i = 0; i < n; i++ )
1538 c.s[i] = V_TypeTraits<_Tp>::reinterpret_uint(a.s[i]);
1558 for( i = 0; i < n/2; i++ )
1560 b0.s[i*2] = a0.s[i];
1561 b0.s[i*2+1] = a1.s[i];
1565 b1.s[i*2-n] = a0.s[i];
1566 b1.s[i*2-n+1] = a1.s[i];
1583template<
typename _Tp>
1586#if CV_STRONG_ALIGNMENT
1607template<
typename _Tp>
1608inline v_reg<_Tp, simd256_width /
sizeof(_Tp)> v256_load(
const _Tp* ptr)
1610#if CV_STRONG_ALIGNMENT
1613 return v_reg<_Tp, simd256_width /
sizeof(_Tp)>(ptr);
1632template<
typename _Tp>
1633inline v_reg<_Tp, simd512_width /
sizeof(_Tp)> v512_load(
const _Tp* ptr)
1635#if CV_STRONG_ALIGNMENT
1638 return v_reg<_Tp, simd512_width /
sizeof(_Tp)>(ptr);
1648template<
typename _Tp>
1663template<
typename _Tp>
1664inline v_reg<_Tp, simd256_width /
sizeof(_Tp)> v256_load_aligned(
const _Tp* ptr)
1667 return v_reg<_Tp, simd256_width /
sizeof(_Tp)>(ptr);
1679template<
typename _Tp>
1680inline v_reg<_Tp, simd512_width /
sizeof(_Tp)> v512_load_aligned(
const _Tp* ptr)
1683 return v_reg<_Tp, simd512_width /
sizeof(_Tp)>(ptr);
1698template<
typename _Tp>
1701#if CV_STRONG_ALIGNMENT
1705 for(
int i = 0; i < c.nlanes/2; i++ )
1725template<
typename _Tp>
1726inline v_reg<_Tp, simd256_width /
sizeof(_Tp)> v256_load_low(
const _Tp* ptr)
1728#if CV_STRONG_ALIGNMENT
1731 v_reg<_Tp, simd256_width /
sizeof(_Tp)> c;
1732 for (
int i = 0; i < c.nlanes / 2; i++)
1753template<
typename _Tp>
1754inline v_reg<_Tp, simd512_width /
sizeof(_Tp)> v512_load_low(
const _Tp* ptr)
1756#if CV_STRONG_ALIGNMENT
1759 v_reg<_Tp, simd512_width /
sizeof(_Tp)> c;
1760 for (
int i = 0; i < c.nlanes / 2; i++)
1780template<
typename _Tp>
1783#if CV_STRONG_ALIGNMENT
1788 for(
int i = 0; i < c.nlanes/2; i++ )
1791 c.s[i+c.nlanes/2] = hiptr[i];
1810template<
typename _Tp>
1811inline v_reg<_Tp, simd256_width /
sizeof(_Tp)> v256_load_halves(
const _Tp* loptr,
const _Tp* hiptr)
1813#if CV_STRONG_ALIGNMENT
1817 v_reg<_Tp, simd256_width /
sizeof(_Tp)> c;
1818 for (
int i = 0; i < c.nlanes / 2; i++)
1821 c.s[i + c.nlanes / 2] = hiptr[i];
1841template<
typename _Tp>
1842inline v_reg<_Tp, simd512_width /
sizeof(_Tp)> v512_load_halves(
const _Tp* loptr,
const _Tp* hiptr)
1844#if CV_STRONG_ALIGNMENT
1848 v_reg<_Tp, simd512_width /
sizeof(_Tp)> c;
1849 for (
int i = 0; i < c.nlanes / 2; i++)
1852 c.s[i + c.nlanes / 2] = hiptr[i];
1870template<
typename _Tp>
1871inline v_reg<typename V_TypeTraits<_Tp>::w_type,
simd128_width /
sizeof(
typename V_TypeTraits<_Tp>::w_type)>
1874#if CV_STRONG_ALIGNMENT
1879 for(
int i = 0; i < c.nlanes; i++ )
1900template<
typename _Tp>
1901inline v_reg<typename V_TypeTraits<_Tp>::w_type, simd256_width /
sizeof(
typename V_TypeTraits<_Tp>::w_type)>
1902v256_load_expand(
const _Tp* ptr)
1904#if CV_STRONG_ALIGNMENT
1907 typedef typename V_TypeTraits<_Tp>::w_type w_type;
1908 v_reg<w_type, simd256_width /
sizeof(w_type)> c;
1909 for (
int i = 0; i < c.nlanes; i++)
1931template<
typename _Tp>
1932inline v_reg<typename V_TypeTraits<_Tp>::w_type, simd512_width /
sizeof(
typename V_TypeTraits<_Tp>::w_type)>
1933v512_load_expand(
const _Tp* ptr)
1935#if CV_STRONG_ALIGNMENT
1938 typedef typename V_TypeTraits<_Tp>::w_type w_type;
1939 v_reg<w_type, simd512_width /
sizeof(w_type)> c;
1940 for (
int i = 0; i < c.nlanes; i++)
1959template<
typename _Tp>
1960inline v_reg<typename V_TypeTraits<_Tp>::q_type,
simd128_width /
sizeof(
typename V_TypeTraits<_Tp>::q_type)>
1963#if CV_STRONG_ALIGNMENT
1968 for(
int i = 0; i < c.nlanes; i++ )
1988template<
typename _Tp>
1989inline v_reg<typename V_TypeTraits<_Tp>::q_type, simd256_width /
sizeof(
typename V_TypeTraits<_Tp>::q_type)>
1990v256_load_expand_q(
const _Tp* ptr)
1992#if CV_STRONG_ALIGNMENT
1995 typedef typename V_TypeTraits<_Tp>::q_type q_type;
1996 v_reg<q_type, simd256_width /
sizeof(q_type)> c;
1997 for (
int i = 0; i < c.nlanes; i++)
2018template<
typename _Tp>
2019inline v_reg<typename V_TypeTraits<_Tp>::q_type, simd512_width /
sizeof(
typename V_TypeTraits<_Tp>::q_type)>
2020v512_load_expand_q(
const _Tp* ptr)
2022#if CV_STRONG_ALIGNMENT
2025 typedef typename V_TypeTraits<_Tp>::q_type q_type;
2026 v_reg<q_type, simd512_width /
sizeof(q_type)> c;
2027 for (
int i = 0; i < c.nlanes; i++)
2046#if CV_STRONG_ALIGNMENT
2050 for( i = i2 = 0; i < n; i++, i2 += 2 )
2068#if CV_STRONG_ALIGNMENT
2072 for( i = i3 = 0; i < n; i++, i3 += 3 )
2088template<
typename _Tp,
int n>
2093#if CV_STRONG_ALIGNMENT
2097 for( i = i4 = 0; i < n; i++, i4 += 4 )
2114template<
typename _Tp,
int n>
2119#if CV_STRONG_ALIGNMENT
2123 for( i = i2 = 0; i < n; i++, i2 += 2 )
2138template<
typename _Tp,
int n>
2143#if CV_STRONG_ALIGNMENT
2147 for( i = i3 = 0; i < n; i++, i3 += 3 )
2168#if CV_STRONG_ALIGNMENT
2172 for( i = i4 = 0; i < n; i++, i4 += 4 )
2189template<
typename _Tp,
int n>
2192#if CV_STRONG_ALIGNMENT
2195 for(
int i = 0; i < n; i++ )
2199template<
typename _Tp,
int n>
2202#if CV_STRONG_ALIGNMENT
2215template<
typename _Tp,
int n>
2218#if CV_STRONG_ALIGNMENT
2221 for(
int i = 0; i < (n/2); i++ )
2232template<
typename _Tp,
int n>
2235#if CV_STRONG_ALIGNMENT
2238 for(
int i = 0; i < (n/2); i++ )
2239 ptr[i] = a.s[i+(n/2)];
2250template<
typename _Tp,
int n>
2257template<
typename _Tp,
int n>
2264template<
typename _Tp,
int n>
2281template<
typename _Tp,
int n>
2285 for(
int i = 0; i < (n/2); i++ )
2288 c.s[i+(n/2)] = b.s[i];
2303template<
typename _Tp,
int n>
2307 for(
int i = 0; i < (n/2); i++ )
2309 c.s[i] = a.s[i+(n/2)];
2310 c.s[i+(n/2)] = b.s[i+(n/2)];
2321template<
typename _Tp,
int n>
2325 for(
int i = 0; i < (n/2); i++ )
2328 low.s[i+(n/2)] = b.s[i];
2329 high.s[i] = a.s[i+(n/2)];
2330 high.s[i+(n/2)] = b.s[i+(n/2)];
2342template<
typename _Tp,
int n>
2346 for(
int i = 0; i < n; i++ )
2347 c.s[i] = a.s[n-i-1];
2370template<
int s,
typename _Tp,
int n>
2374 const int shift = n - s;
2376 for (; i < shift; ++i)
2379 r.s[i] = b.s[i-shift];
2396template<
int s,
typename _Tp,
int n>
2412template<
int i,
typename _Tp,
int n>
2427 for(
int i = 0; i < n; i++ )
2436 for(
int i = 0; i < n; i++ )
2452 for(
int i = 0; i < n; i++ )
2465 for(
int i = 0; i < n; i++ )
2478 for(
int i = 0; i < n; i++ )
2479 c.s[i] = (
int)(a.s[i]);
2487 for(
int i = 0; i < n; i++ )
2499 for(
int i = 0; i < n; i++ )
2511 for(
int i = 0; i < n; i++ )
2523 for(
int i = 0; i < n; i++ )
2525 c.s[i] = (int)(a.s[i]);
2537 for(
int i = 0; i < n; i++ )
2538 c.s[i] = (
float)a.s[i];
2548 for(
int i = 0; i < n; i++ )
2550 c.s[i] = (float)a.s[i];
2562 for(
int i = 0; i < n; i++ )
2564 c.s[i] = (float)a.s[i];
2565 c.s[i+n] = (float)b.s[i];
2575 v_reg<double, (n/2)> c;
2576 for(
int i = 0; i < (n/2); i++ )
2577 c.s[i] = (
double)a.s[i];
2586 v_reg<double, (n/2)> c;
2587 for(
int i = 0; i < (n/2); i++ )
2588 c.s[i] = (
double)a.s[i + (n/2)];
2597 v_reg<double, (n/2)> c;
2598 for(
int i = 0; i < (n/2); i++ )
2599 c.s[i] = (
double)a.s[i];
2608 v_reg<double, (n/2)> c;
2609 for(
int i = 0; i < (n/2); i++ )
2610 c.s[i] = (
double)a.s[i + (n/2)];
2620 for(
int i = 0; i < n; i++ )
2621 c.s[i] = (
double)a.s[i];
2629 for (
int i = 0; i < c.nlanes; i++)
2630 c.s[i] = tab[
idx[i]];
2636 for (
int i = 0; i < c.nlanes; i++)
2637 c.s[i] = tab[
idx[i / 2] + i % 2];
2643 for (
int i = 0; i < c.nlanes; i++)
2644 c.s[i] = tab[
idx[i / 4] + i % 4];
2651 for(
int i = 0; i < n; i++ )
2652 c.s[i] = tab[
idx.s[i]];
2659 for (
int i = 0; i < n; i++)
2660 c.s[i] = tab[
idx.s[i]];
2667 for(
int i = 0; i < n; i++ )
2668 c.s[i] = tab[
idx.s[i]];
2674 v_reg<double, n/2> c;
2675 for(
int i = 0; i < n/2; i++ )
2676 c.s[i] = tab[
idx.s[i]];
2684 for(
int i = 0; i < n; i++ )
2695 for(
int i = 0; i < n; i++ )
2706 for (
int i = 0; i < n/4; i++)
2708 c.s[4*i ] = vec.s[4*i ];
2709 c.s[4*i+1] = vec.s[4*i+2];
2710 c.s[4*i+2] = vec.s[4*i+1];
2711 c.s[4*i+3] = vec.s[4*i+3];
2719 for (
int i = 0; i < n/8; i++)
2721 c.s[8*i ] = vec.s[8*i ];
2722 c.s[8*i+1] = vec.s[8*i+4];
2723 c.s[8*i+2] = vec.s[8*i+1];
2724 c.s[8*i+3] = vec.s[8*i+5];
2725 c.s[8*i+4] = vec.s[8*i+2];
2726 c.s[8*i+5] = vec.s[8*i+6];
2727 c.s[8*i+6] = vec.s[8*i+3];
2728 c.s[8*i+7] = vec.s[8*i+7];
2736 for (
int i = 0; i < n/4; i++)
2738 c.s[3*i ] = vec.s[4*i ];
2739 c.s[3*i+1] = vec.s[4*i+1];
2740 c.s[3*i+2] = vec.s[4*i+2];
2760template<
typename _Tp,
int n>
2766 for (
int i = 0; i < n / 4; i++)
2768 b0.s[0 + i*4] = a0.s[0 + i*4]; b0.s[1 + i*4] = a1.s[0 + i*4];
2769 b0.s[2 + i*4] = a2.s[0 + i*4]; b0.s[3 + i*4] = a3.s[0 + i*4];
2770 b1.s[0 + i*4] = a0.s[1 + i*4]; b1.s[1 + i*4] = a1.s[1 + i*4];
2771 b1.s[2 + i*4] = a2.s[1 + i*4]; b1.s[3 + i*4] = a3.s[1 + i*4];
2772 b2.s[0 + i*4] = a0.s[2 + i*4]; b2.s[1 + i*4] = a1.s[2 + i*4];
2773 b2.s[2 + i*4] = a2.s[2 + i*4]; b2.s[3 + i*4] = a3.s[2 + i*4];
2774 b3.s[0 + i*4] = a0.s[3 + i*4]; b3.s[1 + i*4] = a1.s[3 + i*4];
2775 b3.s[2 + i*4] = a2.s[3 + i*4]; b3.s[3 + i*4] = a3.s[3 + i*4];
2781#define OPENCV_HAL_IMPL_C_INIT_ZERO(_Tpvec, prefix, suffix) \
2782inline _Tpvec prefix##_setzero_##suffix() { return _Tpvec::zero(); }
2827#define OPENCV_HAL_IMPL_C_INIT_VAL(_Tpvec, _Tp, prefix, suffix) \
2828inline _Tpvec prefix##_setall_##suffix(_Tp val) { return _Tpvec::all(val); }
2873#define OPENCV_HAL_IMPL_C_REINTERPRET(_Tp, suffix) \
2874template<typename _Tp0, int n0> inline v_reg<_Tp, n0*sizeof(_Tp0)/sizeof(_Tp)> \
2875 v_reinterpret_as_##suffix(const v_reg<_Tp0, n0>& a) \
2876{ return a.template reinterpret_as<_Tp, n0*sizeof(_Tp0)/sizeof(_Tp)>(); }
2895#define OPENCV_HAL_IMPL_C_SHIFTL(_Tp) \
2896template<int shift, int n> inline v_reg<_Tp, n> v_shl(const v_reg<_Tp, n>& a) \
2897{ return a << shift; }
2912#define OPENCV_HAL_IMPL_C_SHIFTR(_Tp) \
2913template<int shift, int n> inline v_reg<_Tp, n> v_shr(const v_reg<_Tp, n>& a) \
2914{ return a >> shift; }
2929#define OPENCV_HAL_IMPL_C_RSHIFTR(_Tp) \
2930template<int shift, int n> inline v_reg<_Tp, n> v_rshr(const v_reg<_Tp, n>& a) \
2933 for( int i = 0; i < n; i++ ) \
2934 c.s[i] = (_Tp)((a.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \
2951#define OPENCV_HAL_IMPL_C_PACK(_Tp, _Tpn, pack_suffix, cast) \
2952template<int n> inline v_reg<_Tpn, 2*n> v_##pack_suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
2954 v_reg<_Tpn, 2*n> c; \
2955 for( int i = 0; i < n; i++ ) \
2957 c.s[i] = cast<_Tpn>(a.s[i]); \
2958 c.s[i+n] = cast<_Tpn>(b.s[i]); \
2986#define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tp, _Tpn, pack_suffix, cast) \
2987template<int shift, int n> inline v_reg<_Tpn, 2*n> v_rshr_##pack_suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
2989 v_reg<_Tpn, 2*n> c; \
2990 for( int i = 0; i < n; i++ ) \
2992 c.s[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \
2993 c.s[i+n] = cast<_Tpn>((b.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \
3021#define OPENCV_HAL_IMPL_C_PACK_STORE(_Tp, _Tpn, pack_suffix, cast) \
3022template<int n> inline void v_##pack_suffix##_store(_Tpn* ptr, const v_reg<_Tp, n>& a) \
3024 for( int i = 0; i < n; i++ ) \
3025 ptr[i] = cast<_Tpn>(a.s[i]); \
3051#define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tp, _Tpn, pack_suffix, cast) \
3052template<int shift, int n> inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const v_reg<_Tp, n>& a) \
3054 for( int i = 0; i < n; i++ ) \
3055 ptr[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \
3080template<
typename _Tpm,
typename _Tp,
int n>
3081inline void _pack_b(_Tpm* mptr,
const v_reg<_Tp, n>& a,
const v_reg<_Tp, n>& b)
3083 for (
int i = 0; i < n; ++i)
3085 mptr[i] = (_Tpm)a.s[i];
3086 mptr[i + n] = (_Tpm)b.s[i];
3114 _pack_b(
mask.s, a, b);
3138 _pack_b(
mask.s, a, b);
3139 _pack_b(
mask.s + 2*n, c, d);
3169 _pack_b(
mask.s, a, b);
3170 _pack_b(
mask.s + 2*n, c, d);
3171 _pack_b(
mask.s + 4*n, e, f);
3172 _pack_b(
mask.s + 6*n, g, h);
3198 for (
int i = 0; i < n / 4; i++)
3200 res.s[0 + i*4] = v.s[0 + i*4] * a.s[0 + i*4] + v.s[1 + i*4] * b.s[0 + i*4] + v.s[2 + i*4] * c.s[0 + i*4] + v.s[3 + i*4] * d.s[0 + i*4];
3201 res.s[1 + i*4] = v.s[0 + i*4] * a.s[1 + i*4] + v.s[1 + i*4] * b.s[1 + i*4] + v.s[2 + i*4] * c.s[1 + i*4] + v.s[3 + i*4] * d.s[1 + i*4];
3202 res.s[2 + i*4] = v.s[0 + i*4] * a.s[2 + i*4] + v.s[1 + i*4] * b.s[2 + i*4] + v.s[2 + i*4] * c.s[2 + i*4] + v.s[3 + i*4] * d.s[2 + i*4];
3203 res.s[3 + i*4] = v.s[0 + i*4] * a.s[3 + i*4] + v.s[1 + i*4] * b.s[3 + i*4] + v.s[2 + i*4] * c.s[3 + i*4] + v.s[3 + i*4] * d.s[3 + i*4];
3228 for (
int i = 0; i < n / 4; i++)
3230 res.s[0 + i * 4] = v.s[0 + i * 4] * a.s[0 + i * 4] + v.s[1 + i * 4] * b.s[0 + i * 4] + v.s[2 + i * 4] * c.s[0 + i * 4] + d.s[0 + i * 4];
3231 res.s[1 + i * 4] = v.s[0 + i * 4] * a.s[1 + i * 4] + v.s[1 + i * 4] * b.s[1 + i * 4] + v.s[2 + i * 4] * c.s[1 + i * 4] + d.s[1 + i * 4];
3232 res.s[2 + i * 4] = v.s[0 + i * 4] * a.s[2 + i * 4] + v.s[1 + i * 4] * b.s[2 + i * 4] + v.s[2 + i * 4] * c.s[2 + i * 4] + d.s[2 + i * 4];
3233 res.s[3 + i * 4] = v.s[0 + i * 4] * a.s[3 + i * 4] + v.s[1 + i * 4] * b.s[3 + i * 4] + v.s[2 + i * 4] * c.s[3 + i * 4] + d.s[3 + i * 4];
3257 for(
int i = 0; i < v.nlanes; i++ )
3264inline v_reg<float, simd256_width /
sizeof(float)>
3265v256_load_expand(
const hfloat* ptr)
3267 v_reg<float, simd256_width /
sizeof(float)> v;
3268 for (
int i = 0; i < v.nlanes; i++)
3276inline v_reg<float, simd512_width /
sizeof(float)>
3277v512_load_expand(
const hfloat* ptr)
3279 v_reg<float, simd512_width /
sizeof(float)> v;
3280 for (
int i = 0; i < v.nlanes; i++)
3288template<
int n>
inline void
3291 for(
int i = 0; i < v.nlanes; i++ )
3293 ptr[i] = hfloat(v.s[i]);
3299inline void v256_cleanup() {}
3302inline void v512_cleanup() {}
3308CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
3312#if !defined(CV_DOXYGEN)
InputArrayOfArrays InputArrayOfArrays InputOutputArray InputOutputArray InputOutputArray InputOutputArray Size InputOutputArray InputOutputArray T
Definition calib3d.hpp:1867
const int * idx
Definition core_c.h:668
const CvArr CvArr * x
Definition core_c.h:1195
const CvArr * y
Definition core_c.h:1187
signed char schar
Definition interface.h:48
unsigned char uchar
Definition interface.h:51
int64_t int64
Definition interface.h:61
unsigned short ushort
Definition interface.h:52
uint64_t uint64
Definition interface.h:62
#define OPENCV_HAL_IMPL_C_INIT_VAL(_Tpvec, _Tp, prefix, suffix)
Helper macro.
Definition intrin_cpp.hpp:2827
#define OPENCV_HAL_IMPL_C_RSHIFTR(_Tp)
Helper macro.
Definition intrin_cpp.hpp:2929
#define OPENCV_HAL_IMPL_C_SHIFTR(_Tp)
Helper macro.
Definition intrin_cpp.hpp:2912
#define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tp, _Tpn, pack_suffix, cast)
Helper macro.
Definition intrin_cpp.hpp:2986
#define OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(suffix, opA, opB)
Bitwise shift left.
Definition intrin_cpp.hpp:1277
#define OPENCV_HAL_IMPL_CMP_OP(cmp_op)
Helper macro.
Definition intrin_cpp.hpp:852
#define OPENCV_HAL_IMPL_C_SHIFTL(_Tp)
Helper macro.
Definition intrin_cpp.hpp:2895
#define OPENCV_HAL_IMPL_C_INIT_ZERO(_Tpvec, prefix, suffix)
Helper macro.
Definition intrin_cpp.hpp:2781
#define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tp, _Tpn, pack_suffix, cast)
Helper macro.
Definition intrin_cpp.hpp:3051
#define OPENCV_HAL_IMPL_C_REINTERPRET(_Tp, suffix)
Helper macro.
Definition intrin_cpp.hpp:2873
#define OPENCV_HAL_IMPL_MATH_FUNC(func, cfunc, _Tp2)
Helper macro.
Definition intrin_cpp.hpp:710
#define OPENCV_HAL_IMPL_C_PACK(_Tp, _Tpn, pack_suffix, cast)
Helper macro.
Definition intrin_cpp.hpp:2951
#define OPENCV_HAL_IMPL_ARITHM_OP(func, bin_op, cast_op, _Tp2)
Helper macro.
Definition intrin_cpp.hpp:910
#define OPENCV_HAL_IMPL_C_PACK_STORE(_Tp, _Tpn, pack_suffix, cast)
Helper macro.
Definition intrin_cpp.hpp:3021
#define OPENCV_HAL_IMPL_SHIFT_OP(shift_op)
Helper macro.
Definition intrin_cpp.hpp:1256
bool v_check_any(const v_reg< _Tp, n > &a)
Check if any of packed values is less than zero.
Definition intrin_cpp.hpp:1433
#define CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(macro_name,...)
Definition intrin_cpp.hpp:621
v_reg< _Tp, n > v_combine_high(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Combine vector from last elements of two vectors.
Definition intrin_cpp.hpp:2304
v_reg< float, n > v_matmul(const v_reg< float, n > &v, const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
Matrix multiplication.
Definition intrin_cpp.hpp:3193
v_reg< int, n > v_round(const v_reg< float, n > &a)
Round elements.
Definition intrin_cpp.hpp:2424
#define CV__HAL_INTRIN_IMPL_BITWISE_NOT_(_Tp, dummy)
Definition intrin_cpp.hpp:693
#define CV__HAL_INTRIN_IMPL_BIN_OP(bin_op)
Definition intrin_cpp.hpp:656
CV_INLINE v_reg< _Tp, n > operator|(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Bitwise OR.
v_reg< schar, 16 > v_int8x16
Sixteen 8-bit signed integer values.
Definition intrin_cpp.hpp:491
v_reg< uchar, 16 > v_uint8x16
Sixteen 8-bit unsigned integer values.
Definition intrin_cpp.hpp:489
void v_store_high(_Tp *ptr, const v_reg< _Tp, n > &a)
Store data to memory (higher half)
Definition intrin_cpp.hpp:2233
int v_signmask(const v_reg< _Tp, n > &a)
Get negative values mask.
Definition intrin_cpp.hpp:1392
void v_zip(const v_reg< _Tp, n > &a0, const v_reg< _Tp, n > &a1, v_reg< _Tp, n > &b0, v_reg< _Tp, n > &b1)
Interleave two vectors.
Definition intrin_cpp.hpp:1554
v_reg< int64, 2 > v_int64x2
Two 64-bit signed integer values.
Definition intrin_cpp.hpp:507
void v_store(_Tp *ptr, const v_reg< _Tp, n > &a)
Store data to memory.
Definition intrin_cpp.hpp:2190
v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > v_dotprod_expand(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Dot product of elements and expand.
Definition intrin_cpp.hpp:1142
V_TypeTraits< typenameV_TypeTraits< _Tp >::abs_type >::sum_type v_reduce_sad(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Sum absolute differences of values.
Definition intrin_cpp.hpp:1374
v_reg< int, n > v_ceil(const v_reg< float, n > &a)
Ceil elements.
Definition intrin_cpp.hpp:2462
v_reg< ushort, 8 > v_uint16x8
Eight 16-bit unsigned integer values.
Definition intrin_cpp.hpp:493
v_reg< _Tp, n > v_pack_triplets(const v_reg< _Tp, n > &vec)
Definition intrin_cpp.hpp:2733
CV_INLINE v_reg< _Tp, n > operator&(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Bitwise AND.
CV_INLINE v_reg< _Tp, n > & operator^=(v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
void v_store_low(_Tp *ptr, const v_reg< _Tp, n > &a)
Store data to memory (lower half)
Definition intrin_cpp.hpp:2216
#define CV__HAL_INTRIN_IMPL_BIT_OP(bit_op)
Definition intrin_cpp.hpp:684
v_reg< int, n > v_floor(const v_reg< float, n > &a)
Floor elements.
Definition intrin_cpp.hpp:2449
CV_INLINE v_reg< _Tp, n > & operator|=(v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_dotprod(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Dot product of elements.
Definition intrin_cpp.hpp:1077
int v_scan_forward(const v_reg< _Tp, n > &a)
Get first negative lane index.
Definition intrin_cpp.hpp:1409
CV_INLINE v_reg< _Tp, n > & operator/=(v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
v_reg< _Tp, n > v_reverse(const v_reg< _Tp, n > &a)
Vector reverse order.
Definition intrin_cpp.hpp:2343
v_reg< typename V_TypeTraits< _Tp >::w_type, simd128_width/sizeof(typename V_TypeTraits< _Tp >::w_type)> v_load_expand(const _Tp *ptr)
Load register contents from memory with double expand.
Definition intrin_cpp.hpp:1872
v_reg< int, 4 > v_int32x4
Four 32-bit signed integer values.
Definition intrin_cpp.hpp:499
v_reg< typename V_TypeTraits< _Tp >::abs_type, n > v_absdiff(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Add values without saturation.
Definition intrin_cpp.hpp:953
v_reg< _Tp, n > v_interleave_pairs(const v_reg< _Tp, n > &vec)
Definition intrin_cpp.hpp:2703
V_TypeTraits< _Tp >::sum_type v_reduce_sum(const v_reg< _Tp, n > &a)
Element shift left among vector.
Definition intrin_cpp.hpp:1335
v_reg< _Tp, n > v_muladd(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c)
A synonym for v_fma.
Definition intrin_cpp.hpp:1057
void v_store_aligned_nocache(_Tp *ptr, const v_reg< _Tp, n > &a)
Definition intrin_cpp.hpp:2258
v_reg< _Tp, n > v_sqr_magnitude(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Square of the magnitude.
Definition intrin_cpp.hpp:1033
v_reg< int, n > v_trunc(const v_reg< float, n > &a)
Truncate elements.
Definition intrin_cpp.hpp:2475
v_reg< unsigned, 4 > v_uint32x4
Four 32-bit unsigned integer values.
Definition intrin_cpp.hpp:497
#define CV__HAL_INTRIN_IMPL_BIN_OP_(_Tp, bin_op)
Definition intrin_cpp.hpp:639
CV_INLINE v_reg< _Tp, n > operator/(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Divide values.
v_reg< _Tp, n > v_invsqrt(const v_reg< _Tp, n > &a)
Inversed square root.
Definition intrin_cpp.hpp:1007
v_reg< _Tp, n > v_magnitude(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Magnitude.
Definition intrin_cpp.hpp:1020
v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > v_dotprod_expand_fast(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Fast Dot product of elements and expand.
Definition intrin_cpp.hpp:1185
CV_INLINE v_reg< _Tp, n > & operator&=(v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
CV_INLINE v_reg< double,(n/2)> v_cvt_f64_high(const v_reg< int, n > &a)
Convert to double high part of vector.
Definition intrin_cpp.hpp:2584
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_load_low(const _Tp *ptr)
Load 64-bits of data to lower part (high part is undefined).
Definition intrin_cpp.hpp:1699
void v_recombine(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, v_reg< _Tp, n > &low, v_reg< _Tp, n > &high)
Combine two vectors from lower and higher parts of two other vectors.
Definition intrin_cpp.hpp:2322
v_reg< float, n > v_reduce_sum4(const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
Sums all elements of each input vector, returns the vector of sums.
Definition intrin_cpp.hpp:1353
void v_mul_expand(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &c, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &d)
Multiply and expand.
Definition intrin_cpp.hpp:1216
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_load_aligned(const _Tp *ptr)
Load register contents from memory (aligned)
Definition intrin_cpp.hpp:1649
v_reg< _Tp, n > v_broadcast_element(const v_reg< _Tp, n > &a)
Broadcast i-th element of vector.
Definition intrin_cpp.hpp:2413
void v_pack_store(hfloat *ptr, const v_reg< float, n > &v)
Definition intrin_cpp.hpp:3289
#define CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(macro_name,...)
Definition intrin_cpp.hpp:631
v_reg< _Tp, n > v_interleave_quads(const v_reg< _Tp, n > &vec)
Definition intrin_cpp.hpp:2716
v_reg< _Tp, n > v_select(const v_reg< _Tp, n > &mask, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Per-element select (blend operation)
Definition intrin_cpp.hpp:1451
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_load(const _Tp *ptr)
Load register contents from memory.
Definition intrin_cpp.hpp:1584
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_expand_low(const v_reg< _Tp, n > &a)
Expand lower values to the wider pack type.
Definition intrin_cpp.hpp:1496
CV_INLINE v_reg< _Tp, n > operator~(const v_reg< _Tp, n > &a)
Bitwise NOT.
CV_INLINE v_reg< double, n/2 > v_cvt_f64(const v_reg< int, n > &a)
Convert lower half to double.
Definition intrin_cpp.hpp:2573
v_reg< typename V_TypeTraits< _Tp >::q_type, simd128_width/sizeof(typename V_TypeTraits< _Tp >::q_type)> v_load_expand_q(const _Tp *ptr)
Load register contents from memory with quad expand.
Definition intrin_cpp.hpp:1961
void v_expand(const v_reg< _Tp, n > &a, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &b0, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &b1)
Expand values to the wider pack type.
Definition intrin_cpp.hpp:1474
v_reg< uchar, 2 *n > v_pack_b(const v_reg< ushort, n > &a, const v_reg< ushort, n > &b)
! For 16-bit boolean values
Definition intrin_cpp.hpp:3111
void v_cleanup()
Definition intrin_cpp.hpp:3297
v_reg< _Tp, n > v_fma(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c)
Multiply and add.
Definition intrin_cpp.hpp:1046
CV_INLINE v_reg< _Tp, n > operator^(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Bitwise XOR.
void v_store_interleave(_Tp *ptr, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, hal::StoreMode=hal::STORE_UNALIGNED)
Interleave and store (2 channels)
Definition intrin_cpp.hpp:2115
void v_lut_deinterleave(const float *tab, const v_reg< int, n > &idx, v_reg< float, n > &x, v_reg< float, n > &y)
Definition intrin_cpp.hpp:2681
void v_transpose4x4(v_reg< _Tp, n > &a0, const v_reg< _Tp, n > &a1, const v_reg< _Tp, n > &a2, const v_reg< _Tp, n > &a3, v_reg< _Tp, n > &b0, v_reg< _Tp, n > &b1, v_reg< _Tp, n > &b2, v_reg< _Tp, n > &b3)
Transpose 4x4 matrix.
Definition intrin_cpp.hpp:2761
v_reg< _Tp, n > v_absdiffs(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Saturating absolute difference.
Definition intrin_cpp.hpp:994
v_reg< uint64, 2 > v_uint64x2
Two 64-bit unsigned integer values.
Definition intrin_cpp.hpp:505
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_expand_high(const v_reg< _Tp, n > &a)
Expand higher values to the wider pack type.
Definition intrin_cpp.hpp:1515
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_dotprod_fast(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Fast Dot product of elements.
Definition intrin_cpp.hpp:1116
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_load_halves(const _Tp *loptr, const _Tp *hiptr)
Load register contents from two memory blocks.
Definition intrin_cpp.hpp:1781
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_lut(const _Tp *tab, const int *idx)
Definition intrin_cpp.hpp:2626
v_reg< _Tp, n > v_mul_hi(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Multiply and extract high part.
Definition intrin_cpp.hpp:1233
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_lut_quads(const _Tp *tab, const int *idx)
Definition intrin_cpp.hpp:2640
v_reg< _Tp, n > v_combine_low(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Combine vector from first elements of two vectors.
Definition intrin_cpp.hpp:2282
v_reg< float, 4 > v_float32x4
Four 32-bit floating point values (single precision)
Definition intrin_cpp.hpp:501
v_reg< float, n > v_cvt_f32(const v_reg< int, n > &a)
Convert to float.
Definition intrin_cpp.hpp:2534
bool v_check_all(const v_reg< _Tp, n > &a)
Check if all packed values are less than zero.
Definition intrin_cpp.hpp:1421
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_lut_pairs(const _Tp *tab, const int *idx)
Definition intrin_cpp.hpp:2633
v_reg< float, n > v_matmuladd(const v_reg< float, n > &v, const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
Matrix multiplication and add.
Definition intrin_cpp.hpp:3223
_Tp v_extract_n(const v_reg< _Tp, n > &v)
Vector extract.
Definition intrin_cpp.hpp:2397
CV_INLINE v_reg< _Tp, n > & operator-=(v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
v_reg< float, n > v_not_nan(const v_reg< float, n > &a)
Less-than comparison.
Definition intrin_cpp.hpp:890
v_reg< typename V_TypeTraits< _Tp >::abs_type, n > v_popcount(const v_reg< _Tp, n > &a)
Count the 1 bits in the vector lanes and return result as corresponding unsigned type.
Definition intrin_cpp.hpp:828
CV_INLINE v_reg< _Tp, n > & operator+=(v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
void v_store_aligned(_Tp *ptr, const v_reg< _Tp, n > &a)
Store data to memory (aligned)
Definition intrin_cpp.hpp:2251
CV_INLINE v_reg< _Tp, n > & operator*=(v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
v_reg< short, 8 > v_int16x8
Eight 16-bit signed integer values.
Definition intrin_cpp.hpp:495
v_reg< double, 2 > v_float64x2
Two 64-bit floating point values (double precision)
Definition intrin_cpp.hpp:503
v_reg< _Tp, n > v_extract(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Vector extract.
Definition intrin_cpp.hpp:2371
void v_load_deinterleave(const _Tp *ptr, v_reg< _Tp, n > &a, v_reg< _Tp, n > &b)
Load and deinterleave (2 channels)
Definition intrin_cpp.hpp:2043
@ simdmax_width
Definition intrin_cpp.hpp:566
@ simd128_width
Definition intrin_cpp.hpp:556
CV_INLINE int cvRound(double value)
Rounds floating-point number to the nearest integer.
Definition fast_math.hpp:200
CV_INLINE int cvCeil(double value)
Rounds floating-point number to the nearest integer not smaller than the original.
Definition fast_math.hpp:258
static _Tp saturate_cast(uchar v)
Template function for accurate conversion from one primitive type to another.
Definition saturate.hpp:81
CV_INLINE int cvFloor(double value)
Rounds floating-point number to the nearest integer not larger than the original.
Definition fast_math.hpp:231
static bool isAligned(const T &data)
Alignment check of passed values.
Definition utility.hpp:517
#define CV_Assert(expr)
Checks a condition at runtime and throws exception if it fails.
Definition base.hpp:342
#define CV_INLINE
Definition cvdef.h:218
#define CV_DbgAssert(expr)
Definition base.hpp:375
CvRect r
Definition imgproc_c.h:984
CV_EXPORTS OutputArray int double double InputArray mask
Definition imgproc.hpp:2132
StoreMode
Definition intrin.hpp:100
@ STORE_UNALIGNED
Definition intrin.hpp:101
"black box" representation of the file storage associated with a file on disk.
Definition calib3d.hpp:441
DualQuat< T > operator-(const DualQuat< T > &q, const T a)
Definition dualquaternion.inl.hpp:255
DualQuat< T > operator+(const T a, const DualQuat< T > &q)
Definition dualquaternion.inl.hpp:243
DualQuat< T > operator*(const T a, const DualQuat< T > &q)
Definition dualquaternion.inl.hpp:274
Definition intrin.hpp:110
Definition intrin_cpp.hpp:370
_Tp get0() const
Access first value.
Definition intrin_cpp.hpp:437
v_reg(const v_reg< _Tp, n > &r)
Copy constructor.
Definition intrin_cpp.hpp:422
v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3, _Tp s4, _Tp s5, _Tp s6, _Tp s7, _Tp s8, _Tp s9, _Tp s10, _Tp s11, _Tp s12, _Tp s13, _Tp s14, _Tp s15)
Constructor.
Definition intrin_cpp.hpp:405
v_reg(const _Tp *ptr)
Constructor.
Definition intrin_cpp.hpp:380
v_reg()
Default constructor.
Definition intrin_cpp.hpp:419
v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3, _Tp s4, _Tp s5, _Tp s6, _Tp s7)
Constructor.
Definition intrin_cpp.hpp:395
v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3)
Constructor.
Definition intrin_cpp.hpp:390
v_reg(_Tp s0, _Tp s1)
Constructor.
Definition intrin_cpp.hpp:385