EstervQrCode 1.1.1
Library for qr code manipulation
intrin_cpp.hpp
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // License Agreement
11 // For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
16 // Copyright (C) 2015, Itseez Inc., all rights reserved.
17 // Third party copyrights are property of their respective owners.
18 //
19 // Redistribution and use in source and binary forms, with or without modification,
20 // are permitted provided that the following conditions are met:
21 //
22 // * Redistribution's of source code must retain the above copyright notice,
23 // this list of conditions and the following disclaimer.
24 //
25 // * Redistribution's in binary form must reproduce the above copyright notice,
26 // this list of conditions and the following disclaimer in the documentation
27 // and/or other materials provided with the distribution.
28 //
29 // * The name of the copyright holders may not be used to endorse or promote products
30 // derived from this software without specific prior written permission.
31 //
32 // This software is provided by the copyright holders and contributors "as is" and
33 // any express or implied warranties, including, but not limited to, the implied
34 // warranties of merchantability and fitness for a particular purpose are disclaimed.
35 // In no event shall the Intel Corporation or contributors be liable for any direct,
36 // indirect, incidental, special, exemplary, or consequential damages
37 // (including, but not limited to, procurement of substitute goods or services;
38 // loss of use, data, or profits; or business interruption) however caused
39 // and on any theory of liability, whether in contract, strict liability,
40 // or tort (including negligence or otherwise) arising in any way out of
41 // the use of this software, even if advised of the possibility of such damage.
42 //
43 //M*/
44 
45 #ifndef OPENCV_HAL_INTRIN_CPP_HPP
46 #define OPENCV_HAL_INTRIN_CPP_HPP
47 
48 #include <limits>
49 #include <cstring>
50 #include <algorithm>
51 #include "opencv2/core/utility.hpp"
52 #include "opencv2/core/saturate.hpp"
53 
55 #define CV_SIMD128_CPP 1
56 #if defined(CV_FORCE_SIMD128_CPP)
57 #define CV_SIMD128 1
58 #define CV_SIMD128_64F 1
59 #endif
60 #if defined(CV_DOXYGEN)
61 #define CV_SIMD128 1
62 #define CV_SIMD128_64F 1
63 #define CV_SIMD256 1
64 #define CV_SIMD256_64F 1
65 #define CV_SIMD512 1
66 #define CV_SIMD512_64F 1
67 #else
68 #define CV_SIMD256 0 // Explicitly disable SIMD256 and SIMD512 support for scalar intrinsic implementation
69 #define CV_SIMD512 0 // to avoid warnings during compilation
70 #endif
72 
73 namespace cv
74 {
75 
76 #ifndef CV_DOXYGEN
77 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
78 #endif
79 
369 template<typename _Tp, int n> struct v_reg
370 {
372  typedef _Tp lane_type;
373  enum { nlanes = n };
374 // !@endcond
375 
380  explicit v_reg(const _Tp* ptr) { for( int i = 0; i < n; i++ ) s[i] = ptr[i]; }
381 
385  v_reg(_Tp s0, _Tp s1) { s[0] = s0; s[1] = s1; }
386 
390  v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3) { s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; }
391 
395  v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3,
396  _Tp s4, _Tp s5, _Tp s6, _Tp s7)
397  {
398  s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3;
399  s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7;
400  }
401 
405  v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3,
406  _Tp s4, _Tp s5, _Tp s6, _Tp s7,
407  _Tp s8, _Tp s9, _Tp s10, _Tp s11,
408  _Tp s12, _Tp s13, _Tp s14, _Tp s15)
409  {
410  s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3;
411  s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7;
412  s[8] = s8; s[9] = s9; s[10] = s10; s[11] = s11;
413  s[12] = s12; s[13] = s13; s[14] = s14; s[15] = s15;
414  }
415 
419  v_reg() {}
420 
423  {
424  for( int i = 0; i < n; i++ )
425  s[i] = r.s[i];
426  }
437  _Tp get0() const { return s[0]; }
438 
440  _Tp get(const int i) const { return s[i]; }
441  v_reg<_Tp, n> high() const
442  {
443  v_reg<_Tp, n> c;
444  int i;
445  for( i = 0; i < n/2; i++ )
446  {
447  c.s[i] = s[i+(n/2)];
448  c.s[i+(n/2)] = 0;
449  }
450  return c;
451  }
452 
453  static v_reg<_Tp, n> zero()
454  {
455  v_reg<_Tp, n> c;
456  for( int i = 0; i < n; i++ )
457  c.s[i] = (_Tp)0;
458  return c;
459  }
460 
461  static v_reg<_Tp, n> all(_Tp s)
462  {
463  v_reg<_Tp, n> c;
464  for( int i = 0; i < n; i++ )
465  c.s[i] = s;
466  return c;
467  }
468 
469  template<typename _Tp2, int n2> v_reg<_Tp2, n2> reinterpret_as() const
470  {
471  size_t bytes = std::min(sizeof(_Tp2)*n2, sizeof(_Tp)*n);
472  v_reg<_Tp2, n2> c;
473  std::memcpy(&c.s[0], &s[0], bytes);
474  return c;
475  }
476 
477  v_reg& operator=(const v_reg<_Tp, n> & r)
478  {
479  for( int i = 0; i < n; i++ )
480  s[i] = r.s[i];
481  return *this;
482  }
483 
484  _Tp s[n];
486 };
487 
508 
509 #if CV_SIMD256
511 typedef v_reg<uchar, 32> v_uint8x32;
513 typedef v_reg<schar, 32> v_int8x32;
515 typedef v_reg<ushort, 16> v_uint16x16;
517 typedef v_reg<short, 16> v_int16x16;
519 typedef v_reg<unsigned, 8> v_uint32x8;
521 typedef v_reg<int, 8> v_int32x8;
523 typedef v_reg<float, 8> v_float32x8;
525 typedef v_reg<double, 4> v_float64x4;
527 typedef v_reg<uint64, 4> v_uint64x4;
529 typedef v_reg<int64, 4> v_int64x4;
530 #endif
531 
532 #if CV_SIMD512
534 typedef v_reg<uchar, 64> v_uint8x64;
536 typedef v_reg<schar, 64> v_int8x64;
538 typedef v_reg<ushort, 32> v_uint16x32;
540 typedef v_reg<short, 32> v_int16x32;
542 typedef v_reg<unsigned, 16> v_uint32x16;
544 typedef v_reg<int, 16> v_int32x16;
546 typedef v_reg<float, 16> v_float32x16;
548 typedef v_reg<double, 8> v_float64x8;
550 typedef v_reg<uint64, 8> v_uint64x8;
552 typedef v_reg<int64, 8> v_int64x8;
553 #endif
554 
555 enum {
557 #if CV_SIMD256
558  simd256_width = 32,
559 #endif
560 #if CV_SIMD512
561  simd512_width = 64,
562  simdmax_width = simd512_width
563 #elif CV_SIMD256
564  simdmax_width = simd256_width
565 #else
567 #endif
568 };
569 
573 template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator+(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
574 template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator+=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
575 
579 template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator-(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
580 template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator-=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
581 
585 template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator*(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
586 template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator*=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
587 
591 template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator/(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
592 template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator/=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
593 
594 
598 template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator&(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
599 template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator&=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
600 
604 template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
605 template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
606 
610 template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator^(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
611 template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator^=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
612 
616 template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator~(const v_reg<_Tp, n>& a);
617 
618 
619 #ifndef CV_DOXYGEN
620 
621 #define CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(macro_name, ...) \
622 __CV_EXPAND(macro_name(uchar, __VA_ARGS__)) \
623 __CV_EXPAND(macro_name(schar, __VA_ARGS__)) \
624 __CV_EXPAND(macro_name(ushort, __VA_ARGS__)) \
625 __CV_EXPAND(macro_name(short, __VA_ARGS__)) \
626 __CV_EXPAND(macro_name(unsigned, __VA_ARGS__)) \
627 __CV_EXPAND(macro_name(int, __VA_ARGS__)) \
628 __CV_EXPAND(macro_name(uint64, __VA_ARGS__)) \
629 __CV_EXPAND(macro_name(int64, __VA_ARGS__)) \
630 
631 #define CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(macro_name, ...) \
632 __CV_EXPAND(macro_name(float, __VA_ARGS__)) \
633 __CV_EXPAND(macro_name(double, __VA_ARGS__)) \
634 
635 #define CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES(macro_name, ...) \
636 CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(macro_name, __VA_ARGS__) \
637 CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(macro_name, __VA_ARGS__) \
638 
639 #define CV__HAL_INTRIN_IMPL_BIN_OP_(_Tp, bin_op) \
640 template<int n> inline \
641 v_reg<_Tp, n> operator bin_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
642 { \
643  v_reg<_Tp, n> c; \
644  for( int i = 0; i < n; i++ ) \
645  c.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \
646  return c; \
647 } \
648 template<int n> inline \
649 v_reg<_Tp, n>& operator bin_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
650 { \
651  for( int i = 0; i < n; i++ ) \
652  a.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \
653  return a; \
654 }
655 
656 #define CV__HAL_INTRIN_IMPL_BIN_OP(bin_op) CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES(CV__HAL_INTRIN_IMPL_BIN_OP_, bin_op)
657 
662 
663 #define CV__HAL_INTRIN_IMPL_BIT_OP_(_Tp, bit_op) \
664 template<int n> CV_INLINE \
665 v_reg<_Tp, n> operator bit_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
666 { \
667  v_reg<_Tp, n> c; \
668  typedef typename V_TypeTraits<_Tp>::int_type itype; \
669  for( int i = 0; i < n; i++ ) \
670  c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \
671  V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \
672  return c; \
673 } \
674 template<int n> CV_INLINE \
675 v_reg<_Tp, n>& operator bit_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
676 { \
677  typedef typename V_TypeTraits<_Tp>::int_type itype; \
678  for( int i = 0; i < n; i++ ) \
679  a.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \
680  V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \
681  return a; \
682 }
683 
684 #define CV__HAL_INTRIN_IMPL_BIT_OP(bit_op) \
685 CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(CV__HAL_INTRIN_IMPL_BIT_OP_, bit_op) \
686 CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(CV__HAL_INTRIN_IMPL_BIT_OP_, bit_op) /* TODO: FIXIT remove this after masks refactoring */
687 
688 
692 
693 #define CV__HAL_INTRIN_IMPL_BITWISE_NOT_(_Tp, dummy) \
694 template<int n> CV_INLINE \
695 v_reg<_Tp, n> operator ~ (const v_reg<_Tp, n>& a) \
696 { \
697  v_reg<_Tp, n> c; \
698  for( int i = 0; i < n; i++ ) \
699  c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int(~V_TypeTraits<_Tp>::reinterpret_int(a.s[i])); \
700  return c; \
701 } \
702 
704 
705 #endif // !CV_DOXYGEN
706 
707 
710 #define OPENCV_HAL_IMPL_MATH_FUNC(func, cfunc, _Tp2) \
711 template<typename _Tp, int n> inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a) \
712 { \
713  v_reg<_Tp2, n> c; \
714  for( int i = 0; i < n; i++ ) \
715  c.s[i] = cfunc(a.s[i]); \
716  return c; \
717 }
718 
723 
730 
734 OPENCV_HAL_IMPL_MATH_FUNC(v_abs, (typename V_TypeTraits<_Tp>::abs_type)std::abs,
735  typename V_TypeTraits<_Tp>::abs_type)
736 
739 #define OPENCV_HAL_IMPL_MINMAX_FUNC(func, cfunc) \
740 template<typename _Tp, int n> inline v_reg<_Tp, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
741 { \
742  v_reg<_Tp, n> c; \
743  for( int i = 0; i < n; i++ ) \
744  c.s[i] = cfunc(a.s[i], b.s[i]); \
745  return c; \
746 }
747 
750 #define OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(func, cfunc) \
751 template<typename _Tp, int n> inline _Tp func(const v_reg<_Tp, n>& a) \
752 { \
753  _Tp c = a.s[0]; \
754  for( int i = 1; i < n; i++ ) \
755  c = cfunc(c, a.s[i]); \
756  return c; \
757 }
758 
769 OPENCV_HAL_IMPL_MINMAX_FUNC(v_min, std::min)
770 
771 
781 OPENCV_HAL_IMPL_MINMAX_FUNC(v_max, std::max)
782 
783 
790 OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_min, std::min)
791 
792 
799 OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_max, std::max)
800 
801 static const unsigned char popCountTable[] =
802 {
803  0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
804  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
805  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
806  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
807  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
808  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
809  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
810  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
811  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
812  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
813  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
814  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
815  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
816  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
817  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
818  4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
819 };
827 template<typename _Tp, int n>
829 {
831  for (int i = 0; i < n*(int)sizeof(_Tp); i++)
832  b.s[i/sizeof(_Tp)] += popCountTable[v_reinterpret_as_u8(a).s[i]];
833  return b;
834 }
835 
836 
838 template<typename _Tp, int n>
839 inline void v_minmax( const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
840  v_reg<_Tp, n>& minval, v_reg<_Tp, n>& maxval )
841 {
842  for( int i = 0; i < n; i++ )
843  {
844  minval.s[i] = std::min(a.s[i], b.s[i]);
845  maxval.s[i] = std::max(a.s[i], b.s[i]);
846  }
847 }
849 
852 #define OPENCV_HAL_IMPL_CMP_OP(cmp_op) \
853 template<typename _Tp, int n> \
854 inline v_reg<_Tp, n> operator cmp_op(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
855 { \
856  typedef typename V_TypeTraits<_Tp>::int_type itype; \
857  v_reg<_Tp, n> c; \
858  for( int i = 0; i < n; i++ ) \
859  c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)-(int)(a.s[i] cmp_op b.s[i])); \
860  return c; \
861 }
862 
867 
868 
872 
877 
882 
885 
888 
889 template<int n>
890 inline v_reg<float, n> v_not_nan(const v_reg<float, n>& a)
891 {
892  typedef typename V_TypeTraits<float>::int_type itype;
893  v_reg<float, n> c;
894  for (int i = 0; i < n; i++)
895  c.s[i] = V_TypeTraits<float>::reinterpret_from_int((itype)-(int)(a.s[i] == a.s[i]));
896  return c;
897 }
898 template<int n>
900 {
901  typedef typename V_TypeTraits<double>::int_type itype;
903  for (int i = 0; i < n; i++)
904  c.s[i] = V_TypeTraits<double>::reinterpret_from_int((itype)-(int)(a.s[i] == a.s[i]));
905  return c;
906 }
907 
910 #define OPENCV_HAL_IMPL_ARITHM_OP(func, bin_op, cast_op, _Tp2) \
911 template<typename _Tp, int n> \
912 inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
913 { \
914  typedef _Tp2 rtype; \
915  v_reg<rtype, n> c; \
916  for( int i = 0; i < n; i++ ) \
917  c.s[i] = cast_op(a.s[i] bin_op b.s[i]); \
918  return c; \
919 }
920 
924 OPENCV_HAL_IMPL_ARITHM_OP(v_add_wrap, +, (_Tp), _Tp)
925 
926 
929 OPENCV_HAL_IMPL_ARITHM_OP(v_sub_wrap, -, (_Tp), _Tp)
930 
931 
934 OPENCV_HAL_IMPL_ARITHM_OP(v_mul_wrap, *, (_Tp), _Tp)
935 
936 template<typename T> inline T _absdiff(T a, T b)
938 {
939  return a > b ? a - b : b - a;
940 }
942 
952 template<typename _Tp, int n>
954 {
955  typedef typename V_TypeTraits<_Tp>::abs_type rtype;
956  v_reg<rtype, n> c;
957  const rtype mask = (rtype)(std::numeric_limits<_Tp>::is_signed ? (1 << (sizeof(rtype)*8 - 1)) : 0);
958  for( int i = 0; i < n; i++ )
959  {
960  rtype ua = a.s[i] ^ mask;
961  rtype ub = b.s[i] ^ mask;
962  c.s[i] = _absdiff(ua, ub);
963  }
964  return c;
965 }
966 
970 template<int n> inline v_reg<float, n> v_absdiff(const v_reg<float, n>& a, const v_reg<float, n>& b)
971 {
972  v_reg<float, n> c;
973  for( int i = 0; i < c.nlanes; i++ )
974  c.s[i] = _absdiff(a.s[i], b.s[i]);
975  return c;
976 }
977 
981 template<int n> inline v_reg<double, n> v_absdiff(const v_reg<double, n>& a, const v_reg<double, n>& b)
982 {
984  for( int i = 0; i < c.nlanes; i++ )
985  c.s[i] = _absdiff(a.s[i], b.s[i]);
986  return c;
987 }
988 
993 template<typename _Tp, int n>
995 {
996  v_reg<_Tp, n> c;
997  for( int i = 0; i < n; i++)
998  c.s[i] = saturate_cast<_Tp>(std::abs(a.s[i] - b.s[i]));
999  return c;
1000 }
1001 
1006 template<typename _Tp, int n>
1008 {
1009  v_reg<_Tp, n> c;
1010  for( int i = 0; i < n; i++ )
1011  c.s[i] = 1.f/std::sqrt(a.s[i]);
1012  return c;
1013 }
1014 
1019 template<typename _Tp, int n>
1021 {
1022  v_reg<_Tp, n> c;
1023  for( int i = 0; i < n; i++ )
1024  c.s[i] = std::sqrt(a.s[i]*a.s[i] + b.s[i]*b.s[i]);
1025  return c;
1026 }
1027 
1032 template<typename _Tp, int n>
1034 {
1035  v_reg<_Tp, n> c;
1036  for( int i = 0; i < n; i++ )
1037  c.s[i] = a.s[i]*a.s[i] + b.s[i]*b.s[i];
1038  return c;
1039 }
1040 
1045 template<typename _Tp, int n>
1047  const v_reg<_Tp, n>& c)
1048 {
1049  v_reg<_Tp, n> d;
1050  for( int i = 0; i < n; i++ )
1051  d.s[i] = a.s[i]*b.s[i] + c.s[i];
1052  return d;
1053 }
1054 
1056 template<typename _Tp, int n>
1058  const v_reg<_Tp, n>& c)
1059 {
1060  return v_fma(a, b, c);
1061 }
1062 
1076 template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
1078 {
1079  typedef typename V_TypeTraits<_Tp>::w_type w_type;
1080  v_reg<w_type, n/2> c;
1081  for( int i = 0; i < (n/2); i++ )
1082  c.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1];
1083  return c;
1084 }
1085 
1097 template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
1099  const v_reg<typename V_TypeTraits<_Tp>::w_type, n / 2>& c)
1100 {
1101  typedef typename V_TypeTraits<_Tp>::w_type w_type;
1102  v_reg<w_type, n/2> s;
1103  for( int i = 0; i < (n/2); i++ )
1104  s.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1] + c.s[i];
1105  return s;
1106 }
1107 
1115 template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
1117 { return v_dotprod(a, b); }
1118 
1123 template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
1125  const v_reg<typename V_TypeTraits<_Tp>::w_type, n / 2>& c)
1126 { return v_dotprod(a, b, c); }
1127 
1141 template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::q_type, n/4>
1143 {
1144  typedef typename V_TypeTraits<_Tp>::q_type q_type;
1145  v_reg<q_type, n/4> s;
1146  for( int i = 0; i < (n/4); i++ )
1147  s.s[i] = (q_type)a.s[i*4 ]*b.s[i*4 ] + (q_type)a.s[i*4 + 1]*b.s[i*4 + 1] +
1148  (q_type)a.s[i*4 + 2]*b.s[i*4 + 2] + (q_type)a.s[i*4 + 3]*b.s[i*4 + 3];
1149  return s;
1150 }
1151 
1163 template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::q_type, n/4>
1165  const v_reg<typename V_TypeTraits<_Tp>::q_type, n / 4>& c)
1166 {
1167  typedef typename V_TypeTraits<_Tp>::q_type q_type;
1168  v_reg<q_type, n/4> s;
1169  for( int i = 0; i < (n/4); i++ )
1170  s.s[i] = (q_type)a.s[i*4 ]*b.s[i*4 ] + (q_type)a.s[i*4 + 1]*b.s[i*4 + 1] +
1171  (q_type)a.s[i*4 + 2]*b.s[i*4 + 2] + (q_type)a.s[i*4 + 3]*b.s[i*4 + 3] + c.s[i];
1172  return s;
1173 }
1174 
1184 template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::q_type, n/4>
1186 { return v_dotprod_expand(a, b); }
1187 
1192 template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::q_type, n/4>
1194  const v_reg<typename V_TypeTraits<_Tp>::q_type, n / 4>& c)
1195 { return v_dotprod_expand(a, b, c); }
1196 
1216 template<typename _Tp, int n> inline void v_mul_expand(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
1217  v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& c,
1218  v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& d)
1219 {
1220  typedef typename V_TypeTraits<_Tp>::w_type w_type;
1221  for( int i = 0; i < (n/2); i++ )
1222  {
1223  c.s[i] = (w_type)a.s[i]*b.s[i];
1224  d.s[i] = (w_type)a.s[i+(n/2)]*b.s[i+(n/2)];
1225  }
1226 }
1227 
1233 template<typename _Tp, int n> inline v_reg<_Tp, n> v_mul_hi(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
1234 {
1235  typedef typename V_TypeTraits<_Tp>::w_type w_type;
1236  v_reg<_Tp, n> c;
1237  for (int i = 0; i < n; i++)
1238  c.s[i] = (_Tp)(((w_type)a.s[i] * b.s[i]) >> sizeof(_Tp)*8);
1239  return c;
1240 }
1241 
1243 template<typename _Tp, int n> inline void v_hsum(const v_reg<_Tp, n>& a,
1244  v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& c)
1245 {
1246  typedef typename V_TypeTraits<_Tp>::w_type w_type;
1247  for( int i = 0; i < (n/2); i++ )
1248  {
1249  c.s[i] = (w_type)a.s[i*2] + a.s[i*2+1];
1250  }
1251 }
1253 
1256 #define OPENCV_HAL_IMPL_SHIFT_OP(shift_op) \
1257 template<typename _Tp, int n> inline v_reg<_Tp, n> operator shift_op(const v_reg<_Tp, n>& a, int imm) \
1258 { \
1259  v_reg<_Tp, n> c; \
1260  for( int i = 0; i < n; i++ ) \
1261  c.s[i] = (_Tp)(a.s[i] shift_op imm); \
1262  return c; \
1263 }
1264 
1269 
1270 
1274 
1277 #define OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(suffix,opA,opB) \
1278 template<int imm, typename _Tp, int n> inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a) \
1279 { \
1280  v_reg<_Tp, n> b; \
1281  for (int i = 0; i < n; i++) \
1282  { \
1283  int sIndex = i opA imm; \
1284  if (0 <= sIndex && sIndex < n) \
1285  { \
1286  b.s[i] = a.s[sIndex]; \
1287  } \
1288  else \
1289  { \
1290  b.s[i] = 0; \
1291  } \
1292  } \
1293  return b; \
1294 } \
1295 template<int imm, typename _Tp, int n> inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
1296 { \
1297  v_reg<_Tp, n> c; \
1298  for (int i = 0; i < n; i++) \
1299  { \
1300  int aIndex = i opA imm; \
1301  int bIndex = i opA imm opB n; \
1302  if (0 <= bIndex && bIndex < n) \
1303  { \
1304  c.s[i] = b.s[bIndex]; \
1305  } \
1306  else if (0 <= aIndex && aIndex < n) \
1307  { \
1308  c.s[i] = a.s[aIndex]; \
1309  } \
1310  else \
1311  { \
1312  c.s[i] = 0; \
1313  } \
1314  } \
1315  return c; \
1316 }
1317 
1322 
1323 
1327 
1335 template<typename _Tp, int n> inline typename V_TypeTraits<_Tp>::sum_type v_reduce_sum(const v_reg<_Tp, n>& a)
1336 {
1337  typename V_TypeTraits<_Tp>::sum_type c = a.s[0];
1338  for( int i = 1; i < n; i++ )
1339  c += a.s[i];
1340  return c;
1341 }
1342 
1353 template<int n> inline v_reg<float, n> v_reduce_sum4(const v_reg<float, n>& a, const v_reg<float, n>& b,
1354  const v_reg<float, n>& c, const v_reg<float, n>& d)
1355 {
1357  for(int i = 0; i < (n/4); i++)
1358  {
1359  r.s[i*4 + 0] = a.s[i*4 + 0] + a.s[i*4 + 1] + a.s[i*4 + 2] + a.s[i*4 + 3];
1360  r.s[i*4 + 1] = b.s[i*4 + 0] + b.s[i*4 + 1] + b.s[i*4 + 2] + b.s[i*4 + 3];
1361  r.s[i*4 + 2] = c.s[i*4 + 0] + c.s[i*4 + 1] + c.s[i*4 + 2] + c.s[i*4 + 3];
1362  r.s[i*4 + 3] = d.s[i*4 + 0] + d.s[i*4 + 1] + d.s[i*4 + 2] + d.s[i*4 + 3];
1363  }
1364  return r;
1365 }
1366 
1374 template<typename _Tp, int n> inline typename V_TypeTraits< typename V_TypeTraits<_Tp>::abs_type >::sum_type v_reduce_sad(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
1375 {
1376  typename V_TypeTraits< typename V_TypeTraits<_Tp>::abs_type >::sum_type c = _absdiff(a.s[0], b.s[0]);
1377  for (int i = 1; i < n; i++)
1378  c += _absdiff(a.s[i], b.s[i]);
1379  return c;
1380 }
1381 
1392 template<typename _Tp, int n> inline int v_signmask(const v_reg<_Tp, n>& a)
1393 {
1394  int mask = 0;
1395  for( int i = 0; i < n; i++ )
1396  mask |= (V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0) << i;
1397  return mask;
1398 }
1399 
1409 template <typename _Tp, int n> inline int v_scan_forward(const v_reg<_Tp, n>& a)
1410 {
1411  for (int i = 0; i < n; i++)
1412  if(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0)
1413  return i;
1414  return 0;
1415 }
1416 
1421 template<typename _Tp, int n> inline bool v_check_all(const v_reg<_Tp, n>& a)
1422 {
1423  for( int i = 0; i < n; i++ )
1424  if( V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) >= 0 )
1425  return false;
1426  return true;
1427 }
1428 
1433 template<typename _Tp, int n> inline bool v_check_any(const v_reg<_Tp, n>& a)
1434 {
1435  for( int i = 0; i < n; i++ )
1436  if( V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0 )
1437  return true;
1438  return false;
1439 }
1440 
1451 template<typename _Tp, int n> inline v_reg<_Tp, n> v_select(const v_reg<_Tp, n>& mask,
1452  const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
1453 {
1454  typedef V_TypeTraits<_Tp> Traits;
1455  typedef typename Traits::int_type int_type;
1456  v_reg<_Tp, n> c;
1457  for( int i = 0; i < n; i++ )
1458  {
1459  int_type m = Traits::reinterpret_int(mask.s[i]);
1460  CV_DbgAssert(m == 0 || m == (~(int_type)0)); // restrict mask values: 0 or 0xff/0xffff/etc
1461  c.s[i] = m ? a.s[i] : b.s[i];
1462  }
1463  return c;
1464 }
1465 
1474 template<typename _Tp, int n> inline void v_expand(const v_reg<_Tp, n>& a,
1475  v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& b0,
1476  v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& b1)
1477 {
1478  for( int i = 0; i < (n/2); i++ )
1479  {
1480  b0.s[i] = a.s[i];
1481  b1.s[i] = a.s[i+(n/2)];
1482  }
1483 }
1484 
1494 template<typename _Tp, int n>
1495 inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
1497 {
1499  for( int i = 0; i < (n/2); i++ )
1500  b.s[i] = a.s[i];
1501  return b;
1502 }
1503 
1513 template<typename _Tp, int n>
1514 inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
1516 {
1518  for( int i = 0; i < (n/2); i++ )
1519  b.s[i] = a.s[i+(n/2)];
1520  return b;
1521 }
1522 
1524 template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::int_type, n>
1525  v_reinterpret_as_int(const v_reg<_Tp, n>& a)
1526 {
1527  v_reg<typename V_TypeTraits<_Tp>::int_type, n> c;
1528  for( int i = 0; i < n; i++ )
1529  c.s[i] = V_TypeTraits<_Tp>::reinterpret_int(a.s[i]);
1530  return c;
1531 }
1532 
1533 template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::uint_type, n>
1534  v_reinterpret_as_uint(const v_reg<_Tp, n>& a)
1535 {
1536  v_reg<typename V_TypeTraits<_Tp>::uint_type, n> c;
1537  for( int i = 0; i < n; i++ )
1538  c.s[i] = V_TypeTraits<_Tp>::reinterpret_uint(a.s[i]);
1539  return c;
1540 }
1542 
1554 template<typename _Tp, int n> inline void v_zip( const v_reg<_Tp, n>& a0, const v_reg<_Tp, n>& a1,
1555  v_reg<_Tp, n>& b0, v_reg<_Tp, n>& b1 )
1556 {
1557  int i;
1558  for( i = 0; i < n/2; i++ )
1559  {
1560  b0.s[i*2] = a0.s[i];
1561  b0.s[i*2+1] = a1.s[i];
1562  }
1563  for( ; i < n; i++ )
1564  {
1565  b1.s[i*2-n] = a0.s[i];
1566  b1.s[i*2-n+1] = a1.s[i];
1567  }
1568 }
1569 
1583 template<typename _Tp>
1584 inline v_reg<_Tp, simd128_width / sizeof(_Tp)> v_load(const _Tp* ptr)
1585 {
1586 #if CV_STRONG_ALIGNMENT
1587  CV_Assert(isAligned<sizeof(_Tp)>(ptr));
1588 #endif
1589  return v_reg<_Tp, simd128_width / sizeof(_Tp)>(ptr);
1590 }
1591 
1592 #if CV_SIMD256
1607 template<typename _Tp>
1608 inline v_reg<_Tp, simd256_width / sizeof(_Tp)> v256_load(const _Tp* ptr)
1609 {
1610 #if CV_STRONG_ALIGNMENT
1611  CV_Assert(isAligned<sizeof(_Tp)>(ptr));
1612 #endif
1613  return v_reg<_Tp, simd256_width / sizeof(_Tp)>(ptr);
1614 }
1615 #endif
1616 
1617 #if CV_SIMD512
1632 template<typename _Tp>
1633 inline v_reg<_Tp, simd512_width / sizeof(_Tp)> v512_load(const _Tp* ptr)
1634 {
1635 #if CV_STRONG_ALIGNMENT
1636  CV_Assert(isAligned<sizeof(_Tp)>(ptr));
1637 #endif
1638  return v_reg<_Tp, simd512_width / sizeof(_Tp)>(ptr);
1639 }
1640 #endif
1641 
1648 template<typename _Tp>
1649 inline v_reg<_Tp, simd128_width / sizeof(_Tp)> v_load_aligned(const _Tp* ptr)
1650 {
1651  CV_Assert(isAligned<sizeof(v_reg<_Tp, simd128_width / sizeof(_Tp)>)>(ptr));
1652  return v_reg<_Tp, simd128_width / sizeof(_Tp)>(ptr);
1653 }
1654 
1655 #if CV_SIMD256
1663 template<typename _Tp>
1664 inline v_reg<_Tp, simd256_width / sizeof(_Tp)> v256_load_aligned(const _Tp* ptr)
1665 {
1666  CV_Assert(isAligned<sizeof(v_reg<_Tp, simd256_width / sizeof(_Tp)>)>(ptr));
1667  return v_reg<_Tp, simd256_width / sizeof(_Tp)>(ptr);
1668 }
1669 #endif
1670 
1671 #if CV_SIMD512
1679 template<typename _Tp>
1680 inline v_reg<_Tp, simd512_width / sizeof(_Tp)> v512_load_aligned(const _Tp* ptr)
1681 {
1682  CV_Assert(isAligned<sizeof(v_reg<_Tp, simd512_width / sizeof(_Tp)>)>(ptr));
1683  return v_reg<_Tp, simd512_width / sizeof(_Tp)>(ptr);
1684 }
1685 #endif
1686 
1698 template<typename _Tp>
1699 inline v_reg<_Tp, simd128_width / sizeof(_Tp)> v_load_low(const _Tp* ptr)
1700 {
1701 #if CV_STRONG_ALIGNMENT
1702  CV_Assert(isAligned<sizeof(_Tp)>(ptr));
1703 #endif
1704  v_reg<_Tp, simd128_width / sizeof(_Tp)> c;
1705  for( int i = 0; i < c.nlanes/2; i++ )
1706  {
1707  c.s[i] = ptr[i];
1708  }
1709  return c;
1710 }
1711 
1712 #if CV_SIMD256
1725 template<typename _Tp>
1726 inline v_reg<_Tp, simd256_width / sizeof(_Tp)> v256_load_low(const _Tp* ptr)
1727 {
1728 #if CV_STRONG_ALIGNMENT
1729  CV_Assert(isAligned<sizeof(_Tp)>(ptr));
1730 #endif
1731  v_reg<_Tp, simd256_width / sizeof(_Tp)> c;
1732  for (int i = 0; i < c.nlanes / 2; i++)
1733  {
1734  c.s[i] = ptr[i];
1735  }
1736  return c;
1737 }
1738 #endif
1739 
1740 #if CV_SIMD512
1753 template<typename _Tp>
1754 inline v_reg<_Tp, simd512_width / sizeof(_Tp)> v512_load_low(const _Tp* ptr)
1755 {
1756 #if CV_STRONG_ALIGNMENT
1757  CV_Assert(isAligned<sizeof(_Tp)>(ptr));
1758 #endif
1759  v_reg<_Tp, simd512_width / sizeof(_Tp)> c;
1760  for (int i = 0; i < c.nlanes / 2; i++)
1761  {
1762  c.s[i] = ptr[i];
1763  }
1764  return c;
1765 }
1766 #endif
1767 
1780 template<typename _Tp>
1781 inline v_reg<_Tp, simd128_width / sizeof(_Tp)> v_load_halves(const _Tp* loptr, const _Tp* hiptr)
1782 {
1783 #if CV_STRONG_ALIGNMENT
1784  CV_Assert(isAligned<sizeof(_Tp)>(loptr));
1785  CV_Assert(isAligned<sizeof(_Tp)>(hiptr));
1786 #endif
1787  v_reg<_Tp, simd128_width / sizeof(_Tp)> c;
1788  for( int i = 0; i < c.nlanes/2; i++ )
1789  {
1790  c.s[i] = loptr[i];
1791  c.s[i+c.nlanes/2] = hiptr[i];
1792  }
1793  return c;
1794 }
1795 
1796 #if CV_SIMD256
1810 template<typename _Tp>
1811 inline v_reg<_Tp, simd256_width / sizeof(_Tp)> v256_load_halves(const _Tp* loptr, const _Tp* hiptr)
1812 {
1813 #if CV_STRONG_ALIGNMENT
1814  CV_Assert(isAligned<sizeof(_Tp)>(loptr));
1815  CV_Assert(isAligned<sizeof(_Tp)>(hiptr));
1816 #endif
1817  v_reg<_Tp, simd256_width / sizeof(_Tp)> c;
1818  for (int i = 0; i < c.nlanes / 2; i++)
1819  {
1820  c.s[i] = loptr[i];
1821  c.s[i + c.nlanes / 2] = hiptr[i];
1822  }
1823  return c;
1824 }
1825 #endif
1826 
1827 #if CV_SIMD512
1841 template<typename _Tp>
1842 inline v_reg<_Tp, simd512_width / sizeof(_Tp)> v512_load_halves(const _Tp* loptr, const _Tp* hiptr)
1843 {
1844 #if CV_STRONG_ALIGNMENT
1845  CV_Assert(isAligned<sizeof(_Tp)>(loptr));
1846  CV_Assert(isAligned<sizeof(_Tp)>(hiptr));
1847 #endif
1848  v_reg<_Tp, simd512_width / sizeof(_Tp)> c;
1849  for (int i = 0; i < c.nlanes / 2; i++)
1850  {
1851  c.s[i] = loptr[i];
1852  c.s[i + c.nlanes / 2] = hiptr[i];
1853  }
1854  return c;
1855 }
1856 #endif
1857 
1870 template<typename _Tp>
1871 inline v_reg<typename V_TypeTraits<_Tp>::w_type, simd128_width / sizeof(typename V_TypeTraits<_Tp>::w_type)>
1872 v_load_expand(const _Tp* ptr)
1873 {
1874 #if CV_STRONG_ALIGNMENT
1875  CV_Assert(isAligned<sizeof(_Tp)>(ptr));
1876 #endif
1877  typedef typename V_TypeTraits<_Tp>::w_type w_type;
1878  v_reg<w_type, simd128_width / sizeof(w_type)> c;
1879  for( int i = 0; i < c.nlanes; i++ )
1880  {
1881  c.s[i] = ptr[i];
1882  }
1883  return c;
1884 }
1885 
1886 #if CV_SIMD256
1900 template<typename _Tp>
1901 inline v_reg<typename V_TypeTraits<_Tp>::w_type, simd256_width / sizeof(typename V_TypeTraits<_Tp>::w_type)>
1902 v256_load_expand(const _Tp* ptr)
1903 {
1904 #if CV_STRONG_ALIGNMENT
1905  CV_Assert(isAligned<sizeof(_Tp)>(ptr));
1906 #endif
1907  typedef typename V_TypeTraits<_Tp>::w_type w_type;
1908  v_reg<w_type, simd256_width / sizeof(w_type)> c;
1909  for (int i = 0; i < c.nlanes; i++)
1910  {
1911  c.s[i] = ptr[i];
1912  }
1913  return c;
1914 }
1915 #endif
1916 
1917 #if CV_SIMD512
1931 template<typename _Tp>
1932 inline v_reg<typename V_TypeTraits<_Tp>::w_type, simd512_width / sizeof(typename V_TypeTraits<_Tp>::w_type)>
1933 v512_load_expand(const _Tp* ptr)
1934 {
1935 #if CV_STRONG_ALIGNMENT
1936  CV_Assert(isAligned<sizeof(_Tp)>(ptr));
1937 #endif
1938  typedef typename V_TypeTraits<_Tp>::w_type w_type;
1939  v_reg<w_type, simd512_width / sizeof(w_type)> c;
1940  for (int i = 0; i < c.nlanes; i++)
1941  {
1942  c.s[i] = ptr[i];
1943  }
1944  return c;
1945 }
1946 #endif
1947 
1959 template<typename _Tp>
1960 inline v_reg<typename V_TypeTraits<_Tp>::q_type, simd128_width / sizeof(typename V_TypeTraits<_Tp>::q_type)>
1961 v_load_expand_q(const _Tp* ptr)
1962 {
1963 #if CV_STRONG_ALIGNMENT
1964  CV_Assert(isAligned<sizeof(_Tp)>(ptr));
1965 #endif
1966  typedef typename V_TypeTraits<_Tp>::q_type q_type;
1967  v_reg<q_type, simd128_width / sizeof(q_type)> c;
1968  for( int i = 0; i < c.nlanes; i++ )
1969  {
1970  c.s[i] = ptr[i];
1971  }
1972  return c;
1973 }
1974 
1975 #if CV_SIMD256
1988 template<typename _Tp>
1989 inline v_reg<typename V_TypeTraits<_Tp>::q_type, simd256_width / sizeof(typename V_TypeTraits<_Tp>::q_type)>
1990 v256_load_expand_q(const _Tp* ptr)
1991 {
1992 #if CV_STRONG_ALIGNMENT
1993  CV_Assert(isAligned<sizeof(_Tp)>(ptr));
1994 #endif
1995  typedef typename V_TypeTraits<_Tp>::q_type q_type;
1996  v_reg<q_type, simd256_width / sizeof(q_type)> c;
1997  for (int i = 0; i < c.nlanes; i++)
1998  {
1999  c.s[i] = ptr[i];
2000  }
2001  return c;
2002 }
2003 #endif
2004 
2005 #if CV_SIMD512
2018 template<typename _Tp>
2019 inline v_reg<typename V_TypeTraits<_Tp>::q_type, simd512_width / sizeof(typename V_TypeTraits<_Tp>::q_type)>
2020 v512_load_expand_q(const _Tp* ptr)
2021 {
2022 #if CV_STRONG_ALIGNMENT
2023  CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2024 #endif
2025  typedef typename V_TypeTraits<_Tp>::q_type q_type;
2026  v_reg<q_type, simd512_width / sizeof(q_type)> c;
2027  for (int i = 0; i < c.nlanes; i++)
2028  {
2029  c.s[i] = ptr[i];
2030  }
2031  return c;
2032 }
2033 #endif
2034 
2043 template<typename _Tp, int n> inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a,
2044  v_reg<_Tp, n>& b)
2045 {
2046 #if CV_STRONG_ALIGNMENT
2047  CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2048 #endif
2049  int i, i2;
2050  for( i = i2 = 0; i < n; i++, i2 += 2 )
2051  {
2052  a.s[i] = ptr[i2];
2053  b.s[i] = ptr[i2+1];
2054  }
2055 }
2056 
2065 template<typename _Tp, int n> inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a,
2067 {
2068 #if CV_STRONG_ALIGNMENT
2069  CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2070 #endif
2071  int i, i3;
2072  for( i = i3 = 0; i < n; i++, i3 += 3 )
2073  {
2074  a.s[i] = ptr[i3];
2075  b.s[i] = ptr[i3+1];
2076  c.s[i] = ptr[i3+2];
2077  }
2078 }
2079 
2088 template<typename _Tp, int n>
2089 inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a,
2091  v_reg<_Tp, n>& d)
2092 {
2093 #if CV_STRONG_ALIGNMENT
2094  CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2095 #endif
2096  int i, i4;
2097  for( i = i4 = 0; i < n; i++, i4 += 4 )
2098  {
2099  a.s[i] = ptr[i4];
2100  b.s[i] = ptr[i4+1];
2101  c.s[i] = ptr[i4+2];
2102  d.s[i] = ptr[i4+3];
2103  }
2104 }
2105 
2114 template<typename _Tp, int n>
2115 inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a,
2116  const v_reg<_Tp, n>& b,
2118 {
2119 #if CV_STRONG_ALIGNMENT
2120  CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2121 #endif
2122  int i, i2;
2123  for( i = i2 = 0; i < n; i++, i2 += 2 )
2124  {
2125  ptr[i2] = a.s[i];
2126  ptr[i2+1] = b.s[i];
2127  }
2128 }
2129 
2138 template<typename _Tp, int n>
2139 inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a,
2140  const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c,
2142 {
2143 #if CV_STRONG_ALIGNMENT
2144  CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2145 #endif
2146  int i, i3;
2147  for( i = i3 = 0; i < n; i++, i3 += 3 )
2148  {
2149  ptr[i3] = a.s[i];
2150  ptr[i3+1] = b.s[i];
2151  ptr[i3+2] = c.s[i];
2152  }
2153 }
2154 
2163 template<typename _Tp, int n> inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a,
2164  const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c,
2165  const v_reg<_Tp, n>& d,
2167 {
2168 #if CV_STRONG_ALIGNMENT
2169  CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2170 #endif
2171  int i, i4;
2172  for( i = i4 = 0; i < n; i++, i4 += 4 )
2173  {
2174  ptr[i4] = a.s[i];
2175  ptr[i4+1] = b.s[i];
2176  ptr[i4+2] = c.s[i];
2177  ptr[i4+3] = d.s[i];
2178  }
2179 }
2180 
2189 template<typename _Tp, int n>
2190 inline void v_store(_Tp* ptr, const v_reg<_Tp, n>& a)
2191 {
2192 #if CV_STRONG_ALIGNMENT
2193  CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2194 #endif
2195  for( int i = 0; i < n; i++ )
2196  ptr[i] = a.s[i];
2197 }
2198 
2199 template<typename _Tp, int n>
2200 inline void v_store(_Tp* ptr, const v_reg<_Tp, n>& a, hal::StoreMode /*mode*/)
2201 {
2202 #if CV_STRONG_ALIGNMENT
2203  CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2204 #endif
2205  v_store(ptr, a);
2206 }
2207 
2215 template<typename _Tp, int n>
2216 inline void v_store_low(_Tp* ptr, const v_reg<_Tp, n>& a)
2217 {
2218 #if CV_STRONG_ALIGNMENT
2219  CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2220 #endif
2221  for( int i = 0; i < (n/2); i++ )
2222  ptr[i] = a.s[i];
2223 }
2224 
2232 template<typename _Tp, int n>
2233 inline void v_store_high(_Tp* ptr, const v_reg<_Tp, n>& a)
2234 {
2235 #if CV_STRONG_ALIGNMENT
2236  CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2237 #endif
2238  for( int i = 0; i < (n/2); i++ )
2239  ptr[i] = a.s[i+(n/2)];
2240 }
2241 
2250 template<typename _Tp, int n>
2251 inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a)
2252 {
2253  CV_Assert(isAligned<sizeof(v_reg<_Tp, n>)>(ptr));
2254  v_store(ptr, a);
2255 }
2256 
2257 template<typename _Tp, int n>
2258 inline void v_store_aligned_nocache(_Tp* ptr, const v_reg<_Tp, n>& a)
2259 {
2260  CV_Assert(isAligned<sizeof(v_reg<_Tp, n>)>(ptr));
2261  v_store(ptr, a);
2262 }
2263 
2264 template<typename _Tp, int n>
2265 inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a, hal::StoreMode /*mode*/)
2266 {
2267  CV_Assert(isAligned<sizeof(v_reg<_Tp, n>)>(ptr));
2268  v_store(ptr, a);
2269 }
2270 
2281 template<typename _Tp, int n>
2283 {
2284  v_reg<_Tp, n> c;
2285  for( int i = 0; i < (n/2); i++ )
2286  {
2287  c.s[i] = a.s[i];
2288  c.s[i+(n/2)] = b.s[i];
2289  }
2290  return c;
2291 }
2292 
2303 template<typename _Tp, int n>
2305 {
2306  v_reg<_Tp, n> c;
2307  for( int i = 0; i < (n/2); i++ )
2308  {
2309  c.s[i] = a.s[i+(n/2)];
2310  c.s[i+(n/2)] = b.s[i+(n/2)];
2311  }
2312  return c;
2313 }
2314 
2321 template<typename _Tp, int n>
2322 inline void v_recombine(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
2323  v_reg<_Tp, n>& low, v_reg<_Tp, n>& high)
2324 {
2325  for( int i = 0; i < (n/2); i++ )
2326  {
2327  low.s[i] = a.s[i];
2328  low.s[i+(n/2)] = b.s[i];
2329  high.s[i] = a.s[i+(n/2)];
2330  high.s[i+(n/2)] = b.s[i+(n/2)];
2331  }
2332 }
2333 
2342 template<typename _Tp, int n>
2344 {
2345  v_reg<_Tp, n> c;
2346  for( int i = 0; i < n; i++ )
2347  c.s[i] = a.s[n-i-1];
2348  return c;
2349 }
2350 
2370 template<int s, typename _Tp, int n>
2372 {
2373  v_reg<_Tp, n> r;
2374  const int shift = n - s;
2375  int i = 0;
2376  for (; i < shift; ++i)
2377  r.s[i] = a.s[i+s];
2378  for (; i < n; ++i)
2379  r.s[i] = b.s[i-shift];
2380  return r;
2381 }
2382 
2396 template<int s, typename _Tp, int n>
2397 inline _Tp v_extract_n(const v_reg<_Tp, n>& v)
2398 {
2399  CV_DbgAssert(s >= 0 && s < n);
2400  return v.s[s];
2401 }
2402 
2412 template<int i, typename _Tp, int n>
2414 {
2415  CV_DbgAssert(i >= 0 && i < n);
2416  return v_reg<_Tp, n>::all(a.s[i]);
2417 }
2418 
2424 template<int n> inline v_reg<int, n> v_round(const v_reg<float, n>& a)
2425 {
2426  v_reg<int, n> c;
2427  for( int i = 0; i < n; i++ )
2428  c.s[i] = cvRound(a.s[i]);
2429  return c;
2430 }
2431 
2433 template<int n> inline v_reg<int, n*2> v_round(const v_reg<double, n>& a, const v_reg<double, n>& b)
2434 {
2435  v_reg<int, n*2> c;
2436  for( int i = 0; i < n; i++ )
2437  {
2438  c.s[i] = cvRound(a.s[i]);
2439  c.s[i+n] = cvRound(b.s[i]);
2440  }
2441  return c;
2442 }
2443 
2449 template<int n> inline v_reg<int, n> v_floor(const v_reg<float, n>& a)
2450 {
2451  v_reg<int, n> c;
2452  for( int i = 0; i < n; i++ )
2453  c.s[i] = cvFloor(a.s[i]);
2454  return c;
2455 }
2456 
2462 template<int n> inline v_reg<int, n> v_ceil(const v_reg<float, n>& a)
2463 {
2464  v_reg<int, n> c;
2465  for( int i = 0; i < n; i++ )
2466  c.s[i] = cvCeil(a.s[i]);
2467  return c;
2468 }
2469 
2475 template<int n> inline v_reg<int, n> v_trunc(const v_reg<float, n>& a)
2476 {
2477  v_reg<int, n> c;
2478  for( int i = 0; i < n; i++ )
2479  c.s[i] = (int)(a.s[i]);
2480  return c;
2481 }
2482 
2484 template<int n> inline v_reg<int, n*2> v_round(const v_reg<double, n>& a)
2485 {
2486  v_reg<int, n*2> c;
2487  for( int i = 0; i < n; i++ )
2488  {
2489  c.s[i] = cvRound(a.s[i]);
2490  c.s[i+n] = 0;
2491  }
2492  return c;
2493 }
2494 
2496 template<int n> inline v_reg<int, n*2> v_floor(const v_reg<double, n>& a)
2497 {
2498  v_reg<int, n*2> c;
2499  for( int i = 0; i < n; i++ )
2500  {
2501  c.s[i] = cvFloor(a.s[i]);
2502  c.s[i+n] = 0;
2503  }
2504  return c;
2505 }
2506 
2508 template<int n> inline v_reg<int, n*2> v_ceil(const v_reg<double, n>& a)
2509 {
2510  v_reg<int, n*2> c;
2511  for( int i = 0; i < n; i++ )
2512  {
2513  c.s[i] = cvCeil(a.s[i]);
2514  c.s[i+n] = 0;
2515  }
2516  return c;
2517 }
2518 
2520 template<int n> inline v_reg<int, n*2> v_trunc(const v_reg<double, n>& a)
2521 {
2522  v_reg<int, n*2> c;
2523  for( int i = 0; i < n; i++ )
2524  {
2525  c.s[i] = (int)(a.s[i]);
2526  c.s[i+n] = 0;
2527  }
2528  return c;
2529 }
2530 
2534 template<int n> inline v_reg<float, n> v_cvt_f32(const v_reg<int, n>& a)
2535 {
2536  v_reg<float, n> c;
2537  for( int i = 0; i < n; i++ )
2538  c.s[i] = (float)a.s[i];
2539  return c;
2540 }
2541 
2545 template<int n> inline v_reg<float, n*2> v_cvt_f32(const v_reg<double, n>& a)
2546 {
2548  for( int i = 0; i < n; i++ )
2549  {
2550  c.s[i] = (float)a.s[i];
2551  c.s[i+n] = 0;
2552  }
2553  return c;
2554 }
2555 
2559 template<int n> inline v_reg<float, n*2> v_cvt_f32(const v_reg<double, n>& a, const v_reg<double, n>& b)
2560 {
2562  for( int i = 0; i < n; i++ )
2563  {
2564  c.s[i] = (float)a.s[i];
2565  c.s[i+n] = (float)b.s[i];
2566  }
2567  return c;
2568 }
2569 
2573 template<int n> CV_INLINE v_reg<double, n/2> v_cvt_f64(const v_reg<int, n>& a)
2574 {
2575  v_reg<double, (n/2)> c;
2576  for( int i = 0; i < (n/2); i++ )
2577  c.s[i] = (double)a.s[i];
2578  return c;
2579 }
2580 
2584 template<int n> CV_INLINE v_reg<double, (n/2)> v_cvt_f64_high(const v_reg<int, n>& a)
2585 {
2586  v_reg<double, (n/2)> c;
2587  for( int i = 0; i < (n/2); i++ )
2588  c.s[i] = (double)a.s[i + (n/2)];
2589  return c;
2590 }
2591 
2595 template<int n> CV_INLINE v_reg<double, (n/2)> v_cvt_f64(const v_reg<float, n>& a)
2596 {
2597  v_reg<double, (n/2)> c;
2598  for( int i = 0; i < (n/2); i++ )
2599  c.s[i] = (double)a.s[i];
2600  return c;
2601 }
2602 
2606 template<int n> CV_INLINE v_reg<double, (n/2)> v_cvt_f64_high(const v_reg<float, n>& a)
2607 {
2608  v_reg<double, (n/2)> c;
2609  for( int i = 0; i < (n/2); i++ )
2610  c.s[i] = (double)a.s[i + (n/2)];
2611  return c;
2612 }
2613 
2618 {
2619  v_reg<double, n> c;
2620  for( int i = 0; i < n; i++ )
2621  c.s[i] = (double)a.s[i];
2622  return c;
2623 }
2624 
2625 
2626 template<typename _Tp> inline v_reg<_Tp, simd128_width / sizeof(_Tp)> v_lut(const _Tp* tab, const int* idx)
2627 {
2628  v_reg<_Tp, simd128_width / sizeof(_Tp)> c;
2629  for (int i = 0; i < c.nlanes; i++)
2630  c.s[i] = tab[idx[i]];
2631  return c;
2632 }
2633 template<typename _Tp> inline v_reg<_Tp, simd128_width / sizeof(_Tp)> v_lut_pairs(const _Tp* tab, const int* idx)
2634 {
2635  v_reg<_Tp, simd128_width / sizeof(_Tp)> c;
2636  for (int i = 0; i < c.nlanes; i++)
2637  c.s[i] = tab[idx[i / 2] + i % 2];
2638  return c;
2639 }
2640 template<typename _Tp> inline v_reg<_Tp, simd128_width / sizeof(_Tp)> v_lut_quads(const _Tp* tab, const int* idx)
2641 {
2642  v_reg<_Tp, simd128_width / sizeof(_Tp)> c;
2643  for (int i = 0; i < c.nlanes; i++)
2644  c.s[i] = tab[idx[i / 4] + i % 4];
2645  return c;
2646 }
2647 
2648 template<int n> inline v_reg<int, n> v_lut(const int* tab, const v_reg<int, n>& idx)
2649 {
2650  v_reg<int, n> c;
2651  for( int i = 0; i < n; i++ )
2652  c.s[i] = tab[idx.s[i]];
2653  return c;
2654 }
2655 
2656 template<int n> inline v_reg<unsigned, n> v_lut(const unsigned* tab, const v_reg<int, n>& idx)
2657 {
2658  v_reg<int, n> c;
2659  for (int i = 0; i < n; i++)
2660  c.s[i] = tab[idx.s[i]];
2661  return c;
2662 }
2663 
2664 template<int n> inline v_reg<float, n> v_lut(const float* tab, const v_reg<int, n>& idx)
2665 {
2666  v_reg<float, n> c;
2667  for( int i = 0; i < n; i++ )
2668  c.s[i] = tab[idx.s[i]];
2669  return c;
2670 }
2671 
2672 template<int n> inline v_reg<double, n/2> v_lut(const double* tab, const v_reg<int, n>& idx)
2673 {
2674  v_reg<double, n/2> c;
2675  for( int i = 0; i < n/2; i++ )
2676  c.s[i] = tab[idx.s[i]];
2677  return c;
2678 }
2679 
2680 
2681 template<int n> inline void v_lut_deinterleave(const float* tab, const v_reg<int, n>& idx,
2683 {
2684  for( int i = 0; i < n; i++ )
2685  {
2686  int j = idx.s[i];
2687  x.s[i] = tab[j];
2688  y.s[i] = tab[j+1];
2689  }
2690 }
2691 
2692 template<int n> inline void v_lut_deinterleave(const double* tab, const v_reg<int, n*2>& idx,
2694 {
2695  for( int i = 0; i < n; i++ )
2696  {
2697  int j = idx.s[i];
2698  x.s[i] = tab[j];
2699  y.s[i] = tab[j+1];
2700  }
2701 }
2702 
2703 template<typename _Tp, int n> inline v_reg<_Tp, n> v_interleave_pairs(const v_reg<_Tp, n>& vec)
2704 {
2705  v_reg<_Tp, n> c;
2706  for (int i = 0; i < n/4; i++)
2707  {
2708  c.s[4*i ] = vec.s[4*i ];
2709  c.s[4*i+1] = vec.s[4*i+2];
2710  c.s[4*i+2] = vec.s[4*i+1];
2711  c.s[4*i+3] = vec.s[4*i+3];
2712  }
2713  return c;
2714 }
2715 
2716 template<typename _Tp, int n> inline v_reg<_Tp, n> v_interleave_quads(const v_reg<_Tp, n>& vec)
2717 {
2718  v_reg<_Tp, n> c;
2719  for (int i = 0; i < n/8; i++)
2720  {
2721  c.s[8*i ] = vec.s[8*i ];
2722  c.s[8*i+1] = vec.s[8*i+4];
2723  c.s[8*i+2] = vec.s[8*i+1];
2724  c.s[8*i+3] = vec.s[8*i+5];
2725  c.s[8*i+4] = vec.s[8*i+2];
2726  c.s[8*i+5] = vec.s[8*i+6];
2727  c.s[8*i+6] = vec.s[8*i+3];
2728  c.s[8*i+7] = vec.s[8*i+7];
2729  }
2730  return c;
2731 }
2732 
2733 template<typename _Tp, int n> inline v_reg<_Tp, n> v_pack_triplets(const v_reg<_Tp, n>& vec)
2734 {
2735  v_reg<_Tp, n> c;
2736  for (int i = 0; i < n/4; i++)
2737  {
2738  c.s[3*i ] = vec.s[4*i ];
2739  c.s[3*i+1] = vec.s[4*i+1];
2740  c.s[3*i+2] = vec.s[4*i+2];
2741  }
2742  return c;
2743 }
2744 
2760 template<typename _Tp, int n>
2761 inline void v_transpose4x4( v_reg<_Tp, n>& a0, const v_reg<_Tp, n>& a1,
2762  const v_reg<_Tp, n>& a2, const v_reg<_Tp, n>& a3,
2763  v_reg<_Tp, n>& b0, v_reg<_Tp, n>& b1,
2764  v_reg<_Tp, n>& b2, v_reg<_Tp, n>& b3 )
2765 {
2766  for (int i = 0; i < n / 4; i++)
2767  {
2768  b0.s[0 + i*4] = a0.s[0 + i*4]; b0.s[1 + i*4] = a1.s[0 + i*4];
2769  b0.s[2 + i*4] = a2.s[0 + i*4]; b0.s[3 + i*4] = a3.s[0 + i*4];
2770  b1.s[0 + i*4] = a0.s[1 + i*4]; b1.s[1 + i*4] = a1.s[1 + i*4];
2771  b1.s[2 + i*4] = a2.s[1 + i*4]; b1.s[3 + i*4] = a3.s[1 + i*4];
2772  b2.s[0 + i*4] = a0.s[2 + i*4]; b2.s[1 + i*4] = a1.s[2 + i*4];
2773  b2.s[2 + i*4] = a2.s[2 + i*4]; b2.s[3 + i*4] = a3.s[2 + i*4];
2774  b3.s[0 + i*4] = a0.s[3 + i*4]; b3.s[1 + i*4] = a1.s[3 + i*4];
2775  b3.s[2 + i*4] = a2.s[3 + i*4]; b3.s[3 + i*4] = a3.s[3 + i*4];
2776  }
2777 }
2778 
2781 #define OPENCV_HAL_IMPL_C_INIT_ZERO(_Tpvec, prefix, suffix) \
2782 inline _Tpvec prefix##_setzero_##suffix() { return _Tpvec::zero(); }
2783 
2797 
2798 #if CV_SIMD256
2799 OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint8x32, v256, u8)
2800 OPENCV_HAL_IMPL_C_INIT_ZERO(v_int8x32, v256, s8)
2801 OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint16x16, v256, u16)
2802 OPENCV_HAL_IMPL_C_INIT_ZERO(v_int16x16, v256, s16)
2803 OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint32x8, v256, u32)
2804 OPENCV_HAL_IMPL_C_INIT_ZERO(v_int32x8, v256, s32)
2805 OPENCV_HAL_IMPL_C_INIT_ZERO(v_float32x8, v256, f32)
2806 OPENCV_HAL_IMPL_C_INIT_ZERO(v_float64x4, v256, f64)
2807 OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint64x4, v256, u64)
2808 OPENCV_HAL_IMPL_C_INIT_ZERO(v_int64x4, v256, s64)
2809 #endif
2810 
2811 #if CV_SIMD512
2812 OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint8x64, v512, u8)
2813 OPENCV_HAL_IMPL_C_INIT_ZERO(v_int8x64, v512, s8)
2814 OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint16x32, v512, u16)
2815 OPENCV_HAL_IMPL_C_INIT_ZERO(v_int16x32, v512, s16)
2816 OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint32x16, v512, u32)
2817 OPENCV_HAL_IMPL_C_INIT_ZERO(v_int32x16, v512, s32)
2818 OPENCV_HAL_IMPL_C_INIT_ZERO(v_float32x16, v512, f32)
2819 OPENCV_HAL_IMPL_C_INIT_ZERO(v_float64x8, v512, f64)
2820 OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint64x8, v512, u64)
2821 OPENCV_HAL_IMPL_C_INIT_ZERO(v_int64x8, v512, s64)
2822 #endif
2824 
2827 #define OPENCV_HAL_IMPL_C_INIT_VAL(_Tpvec, _Tp, prefix, suffix) \
2828 inline _Tpvec prefix##_setall_##suffix(_Tp val) { return _Tpvec::all(val); }
2829 
2836 OPENCV_HAL_IMPL_C_INIT_VAL(v_int16x8, short, v, s16)
2837 OPENCV_HAL_IMPL_C_INIT_VAL(v_uint32x4, unsigned, v, u32)
2840 OPENCV_HAL_IMPL_C_INIT_VAL(v_float64x2, double, v, f64)
2843 
2844 #if CV_SIMD256
2845 OPENCV_HAL_IMPL_C_INIT_VAL(v_uint8x32, uchar, v256, u8)
2846 OPENCV_HAL_IMPL_C_INIT_VAL(v_int8x32, schar, v256, s8)
2847 OPENCV_HAL_IMPL_C_INIT_VAL(v_uint16x16, ushort, v256, u16)
2848 OPENCV_HAL_IMPL_C_INIT_VAL(v_int16x16, short, v256, s16)
2849 OPENCV_HAL_IMPL_C_INIT_VAL(v_uint32x8, unsigned, v256, u32)
2850 OPENCV_HAL_IMPL_C_INIT_VAL(v_int32x8, int, v256, s32)
2851 OPENCV_HAL_IMPL_C_INIT_VAL(v_float32x8, float, v256, f32)
2852 OPENCV_HAL_IMPL_C_INIT_VAL(v_float64x4, double, v256, f64)
2853 OPENCV_HAL_IMPL_C_INIT_VAL(v_uint64x4, uint64, v256, u64)
2854 OPENCV_HAL_IMPL_C_INIT_VAL(v_int64x4, int64, v256, s64)
2855 #endif
2856 
2857 #if CV_SIMD512
2858 OPENCV_HAL_IMPL_C_INIT_VAL(v_uint8x64, uchar, v512, u8)
2859 OPENCV_HAL_IMPL_C_INIT_VAL(v_int8x64, schar, v512, s8)
2860 OPENCV_HAL_IMPL_C_INIT_VAL(v_uint16x32, ushort, v512, u16)
2861 OPENCV_HAL_IMPL_C_INIT_VAL(v_int16x32, short, v512, s16)
2862 OPENCV_HAL_IMPL_C_INIT_VAL(v_uint32x16, unsigned, v512, u32)
2863 OPENCV_HAL_IMPL_C_INIT_VAL(v_int32x16, int, v512, s32)
2864 OPENCV_HAL_IMPL_C_INIT_VAL(v_float32x16, float, v512, f32)
2865 OPENCV_HAL_IMPL_C_INIT_VAL(v_float64x8, double, v512, f64)
2866 OPENCV_HAL_IMPL_C_INIT_VAL(v_uint64x8, uint64, v512, u64)
2867 OPENCV_HAL_IMPL_C_INIT_VAL(v_int64x8, int64, v512, s64)
2868 #endif
2870 
2873 #define OPENCV_HAL_IMPL_C_REINTERPRET(_Tp, suffix) \
2874 template<typename _Tp0, int n0> inline v_reg<_Tp, n0*sizeof(_Tp0)/sizeof(_Tp)> \
2875  v_reinterpret_as_##suffix(const v_reg<_Tp0, n0>& a) \
2876 { return a.template reinterpret_as<_Tp, n0*sizeof(_Tp0)/sizeof(_Tp)>(); }
2877 
2885 OPENCV_HAL_IMPL_C_REINTERPRET(unsigned, u32)
2888 OPENCV_HAL_IMPL_C_REINTERPRET(double, f64)
2892 
2895 #define OPENCV_HAL_IMPL_C_SHIFTL(_Tp) \
2896 template<int shift, int n> inline v_reg<_Tp, n> v_shl(const v_reg<_Tp, n>& a) \
2897 { return a << shift; }
2898 
2904 OPENCV_HAL_IMPL_C_SHIFTL(unsigned)
2909 
2912 #define OPENCV_HAL_IMPL_C_SHIFTR(_Tp) \
2913 template<int shift, int n> inline v_reg<_Tp, n> v_shr(const v_reg<_Tp, n>& a) \
2914 { return a >> shift; }
2915 
2921 OPENCV_HAL_IMPL_C_SHIFTR(unsigned)
2926 
2929 #define OPENCV_HAL_IMPL_C_RSHIFTR(_Tp) \
2930 template<int shift, int n> inline v_reg<_Tp, n> v_rshr(const v_reg<_Tp, n>& a) \
2931 { \
2932  v_reg<_Tp, n> c; \
2933  for( int i = 0; i < n; i++ ) \
2934  c.s[i] = (_Tp)((a.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \
2935  return c; \
2936 }
2937 
2943 OPENCV_HAL_IMPL_C_RSHIFTR(unsigned)
2948 
2951 #define OPENCV_HAL_IMPL_C_PACK(_Tp, _Tpn, pack_suffix, cast) \
2952 template<int n> inline v_reg<_Tpn, 2*n> v_##pack_suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
2953 { \
2954  v_reg<_Tpn, 2*n> c; \
2955  for( int i = 0; i < n; i++ ) \
2956  { \
2957  c.s[i] = cast<_Tpn>(a.s[i]); \
2958  c.s[i+n] = cast<_Tpn>(b.s[i]); \
2959  } \
2960  return c; \
2961 }
2962 
2977 OPENCV_HAL_IMPL_C_PACK(int, short, pack, saturate_cast)
2978 OPENCV_HAL_IMPL_C_PACK(uint64, unsigned, pack, static_cast)
2979 OPENCV_HAL_IMPL_C_PACK(int64, int, pack, static_cast)
2983 
2986 #define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tp, _Tpn, pack_suffix, cast) \
2987 template<int shift, int n> inline v_reg<_Tpn, 2*n> v_rshr_##pack_suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
2988 { \
2989  v_reg<_Tpn, 2*n> c; \
2990  for( int i = 0; i < n; i++ ) \
2991  { \
2992  c.s[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \
2993  c.s[i+n] = cast<_Tpn>((b.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \
2994  } \
2995  return c; \
2996 }
2997 
3012 OPENCV_HAL_IMPL_C_RSHR_PACK(int, short, pack, saturate_cast)
3013 OPENCV_HAL_IMPL_C_RSHR_PACK(uint64, unsigned, pack, static_cast)
3014 OPENCV_HAL_IMPL_C_RSHR_PACK(int64, int, pack, static_cast)
3018 
3021 #define OPENCV_HAL_IMPL_C_PACK_STORE(_Tp, _Tpn, pack_suffix, cast) \
3022 template<int n> inline void v_##pack_suffix##_store(_Tpn* ptr, const v_reg<_Tp, n>& a) \
3023 { \
3024  for( int i = 0; i < n; i++ ) \
3025  ptr[i] = cast<_Tpn>(a.s[i]); \
3026 }
3027 
3043 OPENCV_HAL_IMPL_C_PACK_STORE(uint64, unsigned, pack, static_cast)
3044 OPENCV_HAL_IMPL_C_PACK_STORE(int64, int, pack, static_cast)
3048 
3051 #define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tp, _Tpn, pack_suffix, cast) \
3052 template<int shift, int n> inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const v_reg<_Tp, n>& a) \
3053 { \
3054  for( int i = 0; i < n; i++ ) \
3055  ptr[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \
3056 }
3057 
3073 OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(uint64, unsigned, pack, static_cast)
3074 OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(int64, int, pack, static_cast)
3078 
3080 template<typename _Tpm, typename _Tp, int n>
3081 inline void _pack_b(_Tpm* mptr, const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
3082 {
3083  for (int i = 0; i < n; ++i)
3084  {
3085  mptr[i] = (_Tpm)a.s[i];
3086  mptr[i + n] = (_Tpm)b.s[i];
3087  }
3088 }
3090 
3096 
3099 
3111 template<int n> inline v_reg<uchar, 2*n> v_pack_b(const v_reg<ushort, n>& a, const v_reg<ushort, n>& b)
3112 {
3114  _pack_b(mask.s, a, b);
3115  return mask;
3116 }
3117 
3134 template<int n> inline v_reg<uchar, 4*n> v_pack_b(const v_reg<unsigned, n>& a, const v_reg<unsigned, n>& b,
3135  const v_reg<unsigned, n>& c, const v_reg<unsigned, n>& d)
3136 {
3138  _pack_b(mask.s, a, b);
3139  _pack_b(mask.s + 2*n, c, d);
3140  return mask;
3141 }
3142 
3163 template<int n> inline v_reg<uchar, 8*n> v_pack_b(const v_reg<uint64, n>& a, const v_reg<uint64, n>& b,
3164  const v_reg<uint64, n>& c, const v_reg<uint64, n>& d,
3165  const v_reg<uint64, n>& e, const v_reg<uint64, n>& f,
3166  const v_reg<uint64, n>& g, const v_reg<uint64, n>& h)
3167 {
3169  _pack_b(mask.s, a, b);
3170  _pack_b(mask.s + 2*n, c, d);
3171  _pack_b(mask.s + 4*n, e, f);
3172  _pack_b(mask.s + 6*n, g, h);
3173  return mask;
3174 }
3176 
3192 template<int n>
3194  const v_reg<float, n>& a, const v_reg<float, n>& b,
3195  const v_reg<float, n>& c, const v_reg<float, n>& d)
3196 {
3197  v_reg<float, n> res;
3198  for (int i = 0; i < n / 4; i++)
3199  {
3200  res.s[0 + i*4] = v.s[0 + i*4] * a.s[0 + i*4] + v.s[1 + i*4] * b.s[0 + i*4] + v.s[2 + i*4] * c.s[0 + i*4] + v.s[3 + i*4] * d.s[0 + i*4];
3201  res.s[1 + i*4] = v.s[0 + i*4] * a.s[1 + i*4] + v.s[1 + i*4] * b.s[1 + i*4] + v.s[2 + i*4] * c.s[1 + i*4] + v.s[3 + i*4] * d.s[1 + i*4];
3202  res.s[2 + i*4] = v.s[0 + i*4] * a.s[2 + i*4] + v.s[1 + i*4] * b.s[2 + i*4] + v.s[2 + i*4] * c.s[2 + i*4] + v.s[3 + i*4] * d.s[2 + i*4];
3203  res.s[3 + i*4] = v.s[0 + i*4] * a.s[3 + i*4] + v.s[1 + i*4] * b.s[3 + i*4] + v.s[2 + i*4] * c.s[3 + i*4] + v.s[3 + i*4] * d.s[3 + i*4];
3204  }
3205  return res;
3206 }
3207 
3222 template<int n>
3224  const v_reg<float, n>& a, const v_reg<float, n>& b,
3225  const v_reg<float, n>& c, const v_reg<float, n>& d)
3226 {
3227  v_reg<float, n> res;
3228  for (int i = 0; i < n / 4; i++)
3229  {
3230  res.s[0 + i * 4] = v.s[0 + i * 4] * a.s[0 + i * 4] + v.s[1 + i * 4] * b.s[0 + i * 4] + v.s[2 + i * 4] * c.s[0 + i * 4] + d.s[0 + i * 4];
3231  res.s[1 + i * 4] = v.s[0 + i * 4] * a.s[1 + i * 4] + v.s[1 + i * 4] * b.s[1 + i * 4] + v.s[2 + i * 4] * c.s[1 + i * 4] + d.s[1 + i * 4];
3232  res.s[2 + i * 4] = v.s[0 + i * 4] * a.s[2 + i * 4] + v.s[1 + i * 4] * b.s[2 + i * 4] + v.s[2 + i * 4] * c.s[2 + i * 4] + d.s[2 + i * 4];
3233  res.s[3 + i * 4] = v.s[0 + i * 4] * a.s[3 + i * 4] + v.s[1 + i * 4] * b.s[3 + i * 4] + v.s[2 + i * 4] * c.s[3 + i * 4] + d.s[3 + i * 4];
3234  }
3235  return res;
3236 }
3237 
3238 
3239 template<int n> inline v_reg<double, n/2> v_dotprod_expand(const v_reg<int, n>& a, const v_reg<int, n>& b)
3240 { return v_fma(v_cvt_f64(a), v_cvt_f64(b), v_cvt_f64_high(a) * v_cvt_f64_high(b)); }
3241 template<int n> inline v_reg<double, n/2> v_dotprod_expand(const v_reg<int, n>& a, const v_reg<int, n>& b,
3242  const v_reg<double, n/2>& c)
3243 { return v_fma(v_cvt_f64(a), v_cvt_f64(b), v_fma(v_cvt_f64_high(a), v_cvt_f64_high(b), c)); }
3244 
3245 template<int n> inline v_reg<double, n/2> v_dotprod_expand_fast(const v_reg<int, n>& a, const v_reg<int, n>& b)
3246 { return v_dotprod_expand(a, b); }
3247 template<int n> inline v_reg<double, n/2> v_dotprod_expand_fast(const v_reg<int, n>& a, const v_reg<int, n>& b,
3248  const v_reg<double, n/2>& c)
3249 { return v_dotprod_expand(a, b, c); }
3250 
3252 
3253 inline v_reg<float, simd128_width / sizeof(float)>
3254 v_load_expand(const hfloat* ptr)
3255 {
3256  v_reg<float, simd128_width / sizeof(float)> v;
3257  for( int i = 0; i < v.nlanes; i++ )
3258  {
3259  v.s[i] = ptr[i];
3260  }
3261  return v;
3262 }
3263 #if CV_SIMD256
3264 inline v_reg<float, simd256_width / sizeof(float)>
3265 v256_load_expand(const hfloat* ptr)
3266 {
3267  v_reg<float, simd256_width / sizeof(float)> v;
3268  for (int i = 0; i < v.nlanes; i++)
3269  {
3270  v.s[i] = ptr[i];
3271  }
3272  return v;
3273 }
3274 #endif
3275 #if CV_SIMD512
3276 inline v_reg<float, simd512_width / sizeof(float)>
3277 v512_load_expand(const hfloat* ptr)
3278 {
3279  v_reg<float, simd512_width / sizeof(float)> v;
3280  for (int i = 0; i < v.nlanes; i++)
3281  {
3282  v.s[i] = ptr[i];
3283  }
3284  return v;
3285 }
3286 #endif
3287 
3288 template<int n> inline void
3289 v_pack_store(hfloat* ptr, const v_reg<float, n>& v)
3290 {
3291  for( int i = 0; i < v.nlanes; i++ )
3292  {
3293  ptr[i] = hfloat(v.s[i]);
3294  }
3295 }
3296 
3297 inline void v_cleanup() {}
3298 #if CV_SIMD256
3299 inline void v256_cleanup() {}
3300 #endif
3301 #if CV_SIMD512
3302 inline void v512_cleanup() {}
3303 #endif
3304 
3306 
3307 #ifndef CV_DOXYGEN
3308 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
3309 #endif
3310 }
3311 
3312 #if !defined(CV_DOXYGEN)
3313 #undef CV_SIMD256
3314 #undef CV_SIMD512
3315 #endif
3316 
3317 #endif
T cos(T... args)
T exp(T... args)
InputArrayOfArrays InputArrayOfArrays InputOutputArray InputOutputArray InputOutputArray InputOutputArray Size InputOutputArray InputOutputArray T
Definition: calib3d.hpp:1867
const int * idx
Definition: core_c.h:668
const CvArr CvArr * x
Definition: core_c.h:1195
const CvArr * y
Definition: core_c.h:1187
signed char schar
Definition: interface.h:48
unsigned char uchar
Definition: interface.h:51
int64_t int64
Definition: interface.h:61
unsigned short ushort
Definition: interface.h:52
uint64_t uint64
Definition: interface.h:62
#define OPENCV_HAL_IMPL_C_INIT_VAL(_Tpvec, _Tp, prefix, suffix)
Helper macro.
Definition: intrin_cpp.hpp:2827
#define OPENCV_HAL_IMPL_C_RSHIFTR(_Tp)
Helper macro.
Definition: intrin_cpp.hpp:2929
#define OPENCV_HAL_IMPL_C_SHIFTR(_Tp)
Helper macro.
Definition: intrin_cpp.hpp:2912
#define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tp, _Tpn, pack_suffix, cast)
Helper macro.
Definition: intrin_cpp.hpp:2986
#define OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(suffix, opA, opB)
Bitwise shift left.
Definition: intrin_cpp.hpp:1277
#define OPENCV_HAL_IMPL_CMP_OP(cmp_op)
Helper macro.
Definition: intrin_cpp.hpp:852
OPENCV_HAL_IMPL_MATH_FUNC(v_abs,(typename V_TypeTraits< _Tp >::abs_type) std::abs, typename V_TypeTraits< _Tp >::abs_type) static const unsigned char popCountTable[]
Square root of elements.
#define OPENCV_HAL_IMPL_C_SHIFTL(_Tp)
Helper macro.
Definition: intrin_cpp.hpp:2895
#define OPENCV_HAL_IMPL_C_INIT_ZERO(_Tpvec, prefix, suffix)
Helper macro.
Definition: intrin_cpp.hpp:2781
#define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tp, _Tpn, pack_suffix, cast)
Helper macro.
Definition: intrin_cpp.hpp:3051
#define OPENCV_HAL_IMPL_C_REINTERPRET(_Tp, suffix)
Helper macro.
Definition: intrin_cpp.hpp:2873
#define OPENCV_HAL_IMPL_C_PACK(_Tp, _Tpn, pack_suffix, cast)
Helper macro.
Definition: intrin_cpp.hpp:2951
#define OPENCV_HAL_IMPL_ARITHM_OP(func, bin_op, cast_op, _Tp2)
Helper macro.
Definition: intrin_cpp.hpp:910
#define OPENCV_HAL_IMPL_C_PACK_STORE(_Tp, _Tpn, pack_suffix, cast)
Helper macro.
Definition: intrin_cpp.hpp:3021
#define OPENCV_HAL_IMPL_SHIFT_OP(shift_op)
Helper macro.
Definition: intrin_cpp.hpp:1256
bool v_check_any(const v_reg< _Tp, n > &a)
Check if any of packed values is less than zero.
Definition: intrin_cpp.hpp:1433
#define CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(macro_name,...)
Definition: intrin_cpp.hpp:621
#define CV__HAL_INTRIN_IMPL_BITWISE_NOT_(_Tp, dummy)
Definition: intrin_cpp.hpp:693
#define CV__HAL_INTRIN_IMPL_BIN_OP(bin_op)
Definition: intrin_cpp.hpp:656
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_expand_high(const v_reg< _Tp, n > &a)
Expand higher values to the wider pack type.
Definition: intrin_cpp.hpp:1515
v_reg< _Tp, n > v_invsqrt(const v_reg< _Tp, n > &a)
Inversed square root.
Definition: intrin_cpp.hpp:1007
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_lut_quads(const _Tp *tab, const int *idx)
Definition: intrin_cpp.hpp:2640
v_reg< int, n > v_round(const v_reg< float, n > &a)
Round elements.
Definition: intrin_cpp.hpp:2424
v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > v_dotprod_expand_fast(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Fast Dot product of elements and expand.
Definition: intrin_cpp.hpp:1185
v_reg< schar, 16 > v_int8x16
Sixteen 8-bit signed integer values.
Definition: intrin_cpp.hpp:491
v_reg< float, n > v_cvt_f32(const v_reg< int, n > &a)
Convert to float.
Definition: intrin_cpp.hpp:2534
v_reg< uchar, 16 > v_uint8x16
Sixteen 8-bit unsigned integer values.
Definition: intrin_cpp.hpp:489
void v_store_high(_Tp *ptr, const v_reg< _Tp, n > &a)
Store data to memory (higher half)
Definition: intrin_cpp.hpp:2233
int v_signmask(const v_reg< _Tp, n > &a)
Get negative values mask.
Definition: intrin_cpp.hpp:1392
v_reg< _Tp, n > v_mul_hi(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Multiply and extract high part.
Definition: intrin_cpp.hpp:1233
void v_zip(const v_reg< _Tp, n > &a0, const v_reg< _Tp, n > &a1, v_reg< _Tp, n > &b0, v_reg< _Tp, n > &b1)
Interleave two vectors.
Definition: intrin_cpp.hpp:1554
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_load_aligned(const _Tp *ptr)
Load register contents from memory (aligned)
Definition: intrin_cpp.hpp:1649
CV_INLINE v_reg< _Tp, n > operator|(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Bitwise OR.
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_load(const _Tp *ptr)
Load register contents from memory.
Definition: intrin_cpp.hpp:1584
v_reg< float, n > v_matmul(const v_reg< float, n > &v, const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
Matrix multiplication.
Definition: intrin_cpp.hpp:3193
v_reg< int64, 2 > v_int64x2
Two 64-bit signed integer values.
Definition: intrin_cpp.hpp:507
v_reg< _Tp, n > v_interleave_pairs(const v_reg< _Tp, n > &vec)
Definition: intrin_cpp.hpp:2703
void v_store(_Tp *ptr, const v_reg< _Tp, n > &a)
Store data to memory.
Definition: intrin_cpp.hpp:2190
v_reg< _Tp, n > v_combine_high(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Combine vector from last elements of two vectors.
Definition: intrin_cpp.hpp:2304
V_TypeTraits< typename V_TypeTraits< _Tp >::abs_type >::sum_type v_reduce_sad(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Sum absolute differences of values.
Definition: intrin_cpp.hpp:1374
v_reg< ushort, 8 > v_uint16x8
Eight 16-bit unsigned integer values.
Definition: intrin_cpp.hpp:493
CV_INLINE v_reg< double, n/2 > v_cvt_f64(const v_reg< int, n > &a)
Convert lower half to double.
Definition: intrin_cpp.hpp:2573
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_lut(const _Tp *tab, const int *idx)
Definition: intrin_cpp.hpp:2626
V_TypeTraits< _Tp >::sum_type v_reduce_sum(const v_reg< _Tp, n > &a)
Element shift left among vector.
Definition: intrin_cpp.hpp:1335
void v_store_low(_Tp *ptr, const v_reg< _Tp, n > &a)
Store data to memory (lower half)
Definition: intrin_cpp.hpp:2216
#define CV__HAL_INTRIN_IMPL_BIT_OP(bit_op)
Definition: intrin_cpp.hpp:684
v_reg< _Tp, n > v_magnitude(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Magnitude.
Definition: intrin_cpp.hpp:1020
v_reg< _Tp, n > v_fma(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c)
Multiply and add.
Definition: intrin_cpp.hpp:1046
int v_scan_forward(const v_reg< _Tp, n > &a)
Get first negative lane index.
Definition: intrin_cpp.hpp:1409
v_reg< int, n > v_trunc(const v_reg< float, n > &a)
Truncate elements.
Definition: intrin_cpp.hpp:2475
v_reg< int, 4 > v_int32x4
Four 32-bit signed integer values.
Definition: intrin_cpp.hpp:499
v_reg< float, n > v_not_nan(const v_reg< float, n > &a)
Less-than comparison.
Definition: intrin_cpp.hpp:890
v_reg< float, n > v_reduce_sum4(const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
Sums all elements of each input vector, returns the vector of sums.
Definition: intrin_cpp.hpp:1353
v_reg< typename V_TypeTraits< _Tp >::abs_type, n > v_popcount(const v_reg< _Tp, n > &a)
Count the 1 bits in the vector lanes and return result as corresponding unsigned type.
Definition: intrin_cpp.hpp:828
v_reg< _Tp, n > v_sqr_magnitude(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Square of the magnitude.
Definition: intrin_cpp.hpp:1033
void v_store_aligned_nocache(_Tp *ptr, const v_reg< _Tp, n > &a)
Definition: intrin_cpp.hpp:2258
v_reg< _Tp, n > v_interleave_quads(const v_reg< _Tp, n > &vec)
Definition: intrin_cpp.hpp:2716
v_reg< unsigned, 4 > v_uint32x4
Four 32-bit unsigned integer values.
Definition: intrin_cpp.hpp:497
CV_INLINE v_reg< _Tp, n > & operator-=(v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
#define CV__HAL_INTRIN_IMPL_BIN_OP_(_Tp, bin_op)
Definition: intrin_cpp.hpp:639
v_reg< typename V_TypeTraits< _Tp >::q_type, simd128_width/sizeof(typename V_TypeTraits< _Tp >::q_type)> v_load_expand_q(const _Tp *ptr)
Load register contents from memory with quad expand.
Definition: intrin_cpp.hpp:1961
v_reg< _Tp, n > v_pack_triplets(const v_reg< _Tp, n > &vec)
Definition: intrin_cpp.hpp:2733
v_reg< _Tp, n > v_muladd(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c)
A synonym for v_fma.
Definition: intrin_cpp.hpp:1057
v_reg< int, n > v_floor(const v_reg< float, n > &a)
Floor elements.
Definition: intrin_cpp.hpp:2449
CV_INLINE v_reg< _Tp, n > & operator*=(v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
v_reg< _Tp, n > v_broadcast_element(const v_reg< _Tp, n > &a)
Broadcast i-th element of vector.
Definition: intrin_cpp.hpp:2413
void v_recombine(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, v_reg< _Tp, n > &low, v_reg< _Tp, n > &high)
Combine two vectors from lower and higher parts of two other vectors.
Definition: intrin_cpp.hpp:2322
v_reg< _Tp, n > v_reverse(const v_reg< _Tp, n > &a)
Vector reverse order.
Definition: intrin_cpp.hpp:2343
void v_mul_expand(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &c, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &d)
Multiply and expand.
Definition: intrin_cpp.hpp:1216
v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > v_dotprod_expand(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Dot product of elements and expand.
Definition: intrin_cpp.hpp:1142
void v_pack_store(hfloat *ptr, const v_reg< float, n > &v)
Definition: intrin_cpp.hpp:3289
#define CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(macro_name,...)
Definition: intrin_cpp.hpp:631
void v_expand(const v_reg< _Tp, n > &a, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &b0, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &b1)
Expand values to the wider pack type.
Definition: intrin_cpp.hpp:1474
v_reg< _Tp, n > v_combine_low(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Combine vector from first elements of two vectors.
Definition: intrin_cpp.hpp:2282
void v_cleanup()
Definition: intrin_cpp.hpp:3297
CV_INLINE v_reg< _Tp, n > operator^(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Bitwise XOR.
v_reg< float, n > v_matmuladd(const v_reg< float, n > &v, const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
Matrix multiplication and add.
Definition: intrin_cpp.hpp:3223
void v_store_interleave(_Tp *ptr, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, hal::StoreMode=hal::STORE_UNALIGNED)
Interleave and store (2 channels)
Definition: intrin_cpp.hpp:2115
void v_lut_deinterleave(const float *tab, const v_reg< int, n > &idx, v_reg< float, n > &x, v_reg< float, n > &y)
Definition: intrin_cpp.hpp:2681
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_load_low(const _Tp *ptr)
Load 64-bits of data to lower part (high part is undefined).
Definition: intrin_cpp.hpp:1699
void v_transpose4x4(v_reg< _Tp, n > &a0, const v_reg< _Tp, n > &a1, const v_reg< _Tp, n > &a2, const v_reg< _Tp, n > &a3, v_reg< _Tp, n > &b0, v_reg< _Tp, n > &b1, v_reg< _Tp, n > &b2, v_reg< _Tp, n > &b3)
Transpose 4x4 matrix.
Definition: intrin_cpp.hpp:2761
CV_INLINE v_reg< _Tp, n > & operator/=(v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
CV_INLINE v_reg< _Tp, n > operator/(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Divide values.
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_expand_low(const v_reg< _Tp, n > &a)
Expand lower values to the wider pack type.
Definition: intrin_cpp.hpp:1496
v_reg< uint64, 2 > v_uint64x2
Two 64-bit unsigned integer values.
Definition: intrin_cpp.hpp:505
v_reg< typename V_TypeTraits< _Tp >::w_type, simd128_width/sizeof(typename V_TypeTraits< _Tp >::w_type)> v_load_expand(const _Tp *ptr)
Load register contents from memory with double expand.
Definition: intrin_cpp.hpp:1872
v_reg< int, n > v_ceil(const v_reg< float, n > &a)
Ceil elements.
Definition: intrin_cpp.hpp:2462
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_dotprod(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Dot product of elements.
Definition: intrin_cpp.hpp:1077
CV_INLINE v_reg< _Tp, n > & operator^=(v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
v_reg< _Tp, n > v_extract(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Vector extract.
Definition: intrin_cpp.hpp:2371
v_reg< float, 4 > v_float32x4
Four 32-bit floating point values (single precision)
Definition: intrin_cpp.hpp:501
CV_INLINE v_reg< _Tp, n > operator~(const v_reg< _Tp, n > &a)
Bitwise NOT.
bool v_check_all(const v_reg< _Tp, n > &a)
Check if all packed values are less than zero.
Definition: intrin_cpp.hpp:1421
v_reg< typename V_TypeTraits< _Tp >::abs_type, n > v_absdiff(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Add values without saturation.
Definition: intrin_cpp.hpp:953
CV_INLINE v_reg< _Tp, n > operator&(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Bitwise AND.
CV_INLINE v_reg< _Tp, n > & operator&=(v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
_Tp v_extract_n(const v_reg< _Tp, n > &v)
Vector extract.
Definition: intrin_cpp.hpp:2397
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_load_halves(const _Tp *loptr, const _Tp *hiptr)
Load register contents from two memory blocks.
Definition: intrin_cpp.hpp:1781
CV_INLINE v_reg< _Tp, n > & operator|=(v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
v_reg< _Tp, n > v_absdiffs(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Saturating absolute difference.
Definition: intrin_cpp.hpp:994
v_reg< _Tp, n > v_select(const v_reg< _Tp, n > &mask, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Per-element select (blend operation)
Definition: intrin_cpp.hpp:1451
CV_INLINE v_reg< double,(n/2)> v_cvt_f64_high(const v_reg< int, n > &a)
Convert to double high part of vector.
Definition: intrin_cpp.hpp:2584
CV_INLINE v_reg< _Tp, n > & operator+=(v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_dotprod_fast(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Fast Dot product of elements.
Definition: intrin_cpp.hpp:1116
void v_store_aligned(_Tp *ptr, const v_reg< _Tp, n > &a)
Store data to memory (aligned)
Definition: intrin_cpp.hpp:2251
v_reg< uchar, 2 *n > v_pack_b(const v_reg< ushort, n > &a, const v_reg< ushort, n > &b)
! For 16-bit boolean values
Definition: intrin_cpp.hpp:3111
v_reg< short, 8 > v_int16x8
Eight 16-bit signed integer values.
Definition: intrin_cpp.hpp:495
v_reg< double, 2 > v_float64x2
Two 64-bit floating point values (double precision)
Definition: intrin_cpp.hpp:503
void v_load_deinterleave(const _Tp *ptr, v_reg< _Tp, n > &a, v_reg< _Tp, n > &b)
Load and deinterleave (2 channels)
Definition: intrin_cpp.hpp:2043
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_lut_pairs(const _Tp *tab, const int *idx)
Definition: intrin_cpp.hpp:2633
@ simdmax_width
Definition: intrin_cpp.hpp:566
@ simd128_width
Definition: intrin_cpp.hpp:556
softfloat abs(softfloat a)
Absolute value.
Definition: softfloat.hpp:444
CV_INLINE int cvRound(double value)
Rounds floating-point number to the nearest integer.
Definition: fast_math.hpp:200
CV_INLINE int cvCeil(double value)
Rounds floating-point number to the nearest integer not smaller than the original.
Definition: fast_math.hpp:258
static _Tp saturate_cast(uchar v)
Template function for accurate conversion from one primitive type to another.
Definition: saturate.hpp:81
CV_INLINE int cvFloor(double value)
Rounds floating-point number to the nearest integer not larger than the original.
Definition: fast_math.hpp:231
static bool isAligned(const T &data)
Alignment check of passed values.
Definition: utility.hpp:517
#define CV_Assert(expr)
Checks a condition at runtime and throws exception if it fails.
Definition: base.hpp:342
#define CV_INLINE
Definition: cvdef.h:218
#define CV_DbgAssert(expr)
Definition: base.hpp:375
CvRect r
Definition: imgproc_c.h:984
CV_EXPORTS OutputArray int double double InputArray mask
Definition: imgproc.hpp:2132
T log(T... args)
T max(T... args)
T memcpy(T... args)
T min(T... args)
StoreMode
Definition: intrin.hpp:100
@ STORE_UNALIGNED
Definition: intrin.hpp:101
"black box" representation of the file storage associated with a file on disk.
Definition: calib3d.hpp:441
DualQuat< T > operator+(const T a, const DualQuat< T > &q)
Definition: dualquaternion.inl.hpp:243
DualQuat< T > operator*(const T a, const DualQuat< T > &q)
Definition: dualquaternion.inl.hpp:274
DualQuat< T > operator-(const DualQuat< T > &q, const T a)
Definition: dualquaternion.inl.hpp:255
static uchar abs(uchar a)
Definition: cvstd.hpp:66
T sin(T... args)
T sqrt(T... args)
Definition: intrin.hpp:110
Definition: intrin_cpp.hpp:370
_Tp get0() const
Access first value.
Definition: intrin_cpp.hpp:437
v_reg(const v_reg< _Tp, n > &r)
Copy constructor.
Definition: intrin_cpp.hpp:422
v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3, _Tp s4, _Tp s5, _Tp s6, _Tp s7, _Tp s8, _Tp s9, _Tp s10, _Tp s11, _Tp s12, _Tp s13, _Tp s14, _Tp s15)
Constructor.
Definition: intrin_cpp.hpp:405
v_reg(const _Tp *ptr)
Constructor.
Definition: intrin_cpp.hpp:380
v_reg()
Default constructor.
Definition: intrin_cpp.hpp:419
v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3, _Tp s4, _Tp s5, _Tp s6, _Tp s7)
Constructor.
Definition: intrin_cpp.hpp:395
v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3)
Constructor.
Definition: intrin_cpp.hpp:390
v_reg(_Tp s0, _Tp s1)
Constructor.
Definition: intrin_cpp.hpp:385