EstervQrCode 2.0.0
Library for qr code manipulation
Loading...
Searching...
No Matches
intrin_cpp.hpp
1/*M///////////////////////////////////////////////////////////////////////////////////////
2//
3// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4//
5// By downloading, copying, installing or using the software you agree to this license.
6// If you do not agree to this license, do not download, install,
7// copy or use the software.
8//
9//
10// License Agreement
11// For Open Source Computer Vision Library
12//
13// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
16// Copyright (C) 2015, Itseez Inc., all rights reserved.
17// Third party copyrights are property of their respective owners.
18//
19// Redistribution and use in source and binary forms, with or without modification,
20// are permitted provided that the following conditions are met:
21//
22// * Redistribution's of source code must retain the above copyright notice,
23// this list of conditions and the following disclaimer.
24//
25// * Redistribution's in binary form must reproduce the above copyright notice,
26// this list of conditions and the following disclaimer in the documentation
27// and/or other materials provided with the distribution.
28//
29// * The name of the copyright holders may not be used to endorse or promote products
30// derived from this software without specific prior written permission.
31//
32// This software is provided by the copyright holders and contributors "as is" and
33// any express or implied warranties, including, but not limited to, the implied
34// warranties of merchantability and fitness for a particular purpose are disclaimed.
35// In no event shall the Intel Corporation or contributors be liable for any direct,
36// indirect, incidental, special, exemplary, or consequential damages
37// (including, but not limited to, procurement of substitute goods or services;
38// loss of use, data, or profits; or business interruption) however caused
39// and on any theory of liability, whether in contract, strict liability,
40// or tort (including negligence or otherwise) arising in any way out of
41// the use of this software, even if advised of the possibility of such damage.
42//
43//M*/
44
45#ifndef OPENCV_HAL_INTRIN_CPP_HPP
46#define OPENCV_HAL_INTRIN_CPP_HPP
47
48#include <limits>
49#include <cstring>
50#include <algorithm>
51#include "opencv2/core/utility.hpp"
52#include "opencv2/core/saturate.hpp"
53
55#define CV_SIMD128_CPP 1
56#if defined(CV_FORCE_SIMD128_CPP)
57#define CV_SIMD128 1
58#define CV_SIMD128_64F 1
59#endif
60#if defined(CV_DOXYGEN)
61#define CV_SIMD128 1
62#define CV_SIMD128_64F 1
63#define CV_SIMD256 1
64#define CV_SIMD256_64F 1
65#define CV_SIMD512 1
66#define CV_SIMD512_64F 1
67#else
68#define CV_SIMD256 0 // Explicitly disable SIMD256 and SIMD512 support for scalar intrinsic implementation
69#define CV_SIMD512 0 // to avoid warnings during compilation
70#endif
72
73namespace cv
74{
75
76#ifndef CV_DOXYGEN
77CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
78#endif
79
369template<typename _Tp, int n> struct v_reg
370{
372 typedef _Tp lane_type;
373 enum { nlanes = n };
374// !@endcond
375
380 explicit v_reg(const _Tp* ptr) { for( int i = 0; i < n; i++ ) s[i] = ptr[i]; }
381
385 v_reg(_Tp s0, _Tp s1) { s[0] = s0; s[1] = s1; }
386
390 v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3) { s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; }
391
395 v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3,
396 _Tp s4, _Tp s5, _Tp s6, _Tp s7)
397 {
398 s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3;
399 s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7;
400 }
401
405 v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3,
406 _Tp s4, _Tp s5, _Tp s6, _Tp s7,
407 _Tp s8, _Tp s9, _Tp s10, _Tp s11,
408 _Tp s12, _Tp s13, _Tp s14, _Tp s15)
409 {
410 s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3;
411 s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7;
412 s[8] = s8; s[9] = s9; s[10] = s10; s[11] = s11;
413 s[12] = s12; s[13] = s13; s[14] = s14; s[15] = s15;
414 }
415
419 v_reg() {}
420
423 {
424 for( int i = 0; i < n; i++ )
425 s[i] = r.s[i];
426 }
437 _Tp get0() const { return s[0]; }
438
440 _Tp get(const int i) const { return s[i]; }
441 v_reg<_Tp, n> high() const
442 {
443 v_reg<_Tp, n> c;
444 int i;
445 for( i = 0; i < n/2; i++ )
446 {
447 c.s[i] = s[i+(n/2)];
448 c.s[i+(n/2)] = 0;
449 }
450 return c;
451 }
452
453 static v_reg<_Tp, n> zero()
454 {
455 v_reg<_Tp, n> c;
456 for( int i = 0; i < n; i++ )
457 c.s[i] = (_Tp)0;
458 return c;
459 }
460
461 static v_reg<_Tp, n> all(_Tp s)
462 {
463 v_reg<_Tp, n> c;
464 for( int i = 0; i < n; i++ )
465 c.s[i] = s;
466 return c;
467 }
468
469 template<typename _Tp2, int n2> v_reg<_Tp2, n2> reinterpret_as() const
470 {
471 size_t bytes = std::min(sizeof(_Tp2)*n2, sizeof(_Tp)*n);
472 v_reg<_Tp2, n2> c;
473 std::memcpy(&c.s[0], &s[0], bytes);
474 return c;
475 }
476
477 v_reg& operator=(const v_reg<_Tp, n> & r)
478 {
479 for( int i = 0; i < n; i++ )
480 s[i] = r.s[i];
481 return *this;
482 }
483
484 _Tp s[n];
486};
487
508
509#if CV_SIMD256
511typedef v_reg<uchar, 32> v_uint8x32;
513typedef v_reg<schar, 32> v_int8x32;
515typedef v_reg<ushort, 16> v_uint16x16;
517typedef v_reg<short, 16> v_int16x16;
519typedef v_reg<unsigned, 8> v_uint32x8;
521typedef v_reg<int, 8> v_int32x8;
523typedef v_reg<float, 8> v_float32x8;
525typedef v_reg<double, 4> v_float64x4;
527typedef v_reg<uint64, 4> v_uint64x4;
529typedef v_reg<int64, 4> v_int64x4;
530#endif
531
532#if CV_SIMD512
534typedef v_reg<uchar, 64> v_uint8x64;
536typedef v_reg<schar, 64> v_int8x64;
538typedef v_reg<ushort, 32> v_uint16x32;
540typedef v_reg<short, 32> v_int16x32;
542typedef v_reg<unsigned, 16> v_uint32x16;
544typedef v_reg<int, 16> v_int32x16;
546typedef v_reg<float, 16> v_float32x16;
548typedef v_reg<double, 8> v_float64x8;
550typedef v_reg<uint64, 8> v_uint64x8;
552typedef v_reg<int64, 8> v_int64x8;
553#endif
554
555enum {
557#if CV_SIMD256
558 simd256_width = 32,
559#endif
560#if CV_SIMD512
561 simd512_width = 64,
562 simdmax_width = simd512_width
563#elif CV_SIMD256
564 simdmax_width = simd256_width
565#else
567#endif
569
573template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator+(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
574template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator+=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
575
579template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator-(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
580template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator-=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
581
585template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator*(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
586template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator*=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
587
591template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator/(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
592template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator/=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
593
594
598template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator&(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
599template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator&=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
600
604template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
605template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
606
610template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator^(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
611template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator^=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
612
616template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator~(const v_reg<_Tp, n>& a);
617
618
619#ifndef CV_DOXYGEN
620
621#define CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(macro_name, ...) \
622__CV_EXPAND(macro_name(uchar, __VA_ARGS__)) \
623__CV_EXPAND(macro_name(schar, __VA_ARGS__)) \
624__CV_EXPAND(macro_name(ushort, __VA_ARGS__)) \
625__CV_EXPAND(macro_name(short, __VA_ARGS__)) \
626__CV_EXPAND(macro_name(unsigned, __VA_ARGS__)) \
627__CV_EXPAND(macro_name(int, __VA_ARGS__)) \
628__CV_EXPAND(macro_name(uint64, __VA_ARGS__)) \
629__CV_EXPAND(macro_name(int64, __VA_ARGS__)) \
630
631#define CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(macro_name, ...) \
632__CV_EXPAND(macro_name(float, __VA_ARGS__)) \
633__CV_EXPAND(macro_name(double, __VA_ARGS__)) \
634
635#define CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES(macro_name, ...) \
636CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(macro_name, __VA_ARGS__) \
637CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(macro_name, __VA_ARGS__) \
638
639#define CV__HAL_INTRIN_IMPL_BIN_OP_(_Tp, bin_op) \
640template<int n> inline \
641v_reg<_Tp, n> operator bin_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
642{ \
643 v_reg<_Tp, n> c; \
644 for( int i = 0; i < n; i++ ) \
645 c.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \
646 return c; \
647} \
648template<int n> inline \
649v_reg<_Tp, n>& operator bin_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
650{ \
651 for( int i = 0; i < n; i++ ) \
652 a.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \
653 return a; \
654}
655
656#define CV__HAL_INTRIN_IMPL_BIN_OP(bin_op) CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES(CV__HAL_INTRIN_IMPL_BIN_OP_, bin_op)
657
662
663#define CV__HAL_INTRIN_IMPL_BIT_OP_(_Tp, bit_op) \
664template<int n> CV_INLINE \
665v_reg<_Tp, n> operator bit_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
666{ \
667 v_reg<_Tp, n> c; \
668 typedef typename V_TypeTraits<_Tp>::int_type itype; \
669 for( int i = 0; i < n; i++ ) \
670 c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \
671 V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \
672 return c; \
673} \
674template<int n> CV_INLINE \
675v_reg<_Tp, n>& operator bit_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
676{ \
677 typedef typename V_TypeTraits<_Tp>::int_type itype; \
678 for( int i = 0; i < n; i++ ) \
679 a.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \
680 V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \
681 return a; \
682}
683
684#define CV__HAL_INTRIN_IMPL_BIT_OP(bit_op) \
685CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(CV__HAL_INTRIN_IMPL_BIT_OP_, bit_op) \
686CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(CV__HAL_INTRIN_IMPL_BIT_OP_, bit_op) /* TODO: FIXIT remove this after masks refactoring */
687
688
692
693#define CV__HAL_INTRIN_IMPL_BITWISE_NOT_(_Tp, dummy) \
694template<int n> CV_INLINE \
695v_reg<_Tp, n> operator ~ (const v_reg<_Tp, n>& a) \
696{ \
697 v_reg<_Tp, n> c; \
698 for( int i = 0; i < n; i++ ) \
699 c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int(~V_TypeTraits<_Tp>::reinterpret_int(a.s[i])); \
700 return c; \
701} \
702
704
705#endif // !CV_DOXYGEN
706
707
710#define OPENCV_HAL_IMPL_MATH_FUNC(func, cfunc, _Tp2) \
711template<typename _Tp, int n> inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a) \
712{ \
713 v_reg<_Tp2, n> c; \
714 for( int i = 0; i < n; i++ ) \
715 c.s[i] = cfunc(a.s[i]); \
716 return c; \
717}
718
723
724
730
734OPENCV_HAL_IMPL_MATH_FUNC(v_abs, (typename V_TypeTraits<_Tp>::abs_type)std::abs,
735 typename V_TypeTraits<_Tp>::abs_type)
736
739#define OPENCV_HAL_IMPL_MINMAX_FUNC(func, cfunc) \
740template<typename _Tp, int n> inline v_reg<_Tp, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
741{ \
742 v_reg<_Tp, n> c; \
743 for( int i = 0; i < n; i++ ) \
744 c.s[i] = cfunc(a.s[i], b.s[i]); \
745 return c; \
746}
747
750#define OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(func, cfunc) \
751template<typename _Tp, int n> inline _Tp func(const v_reg<_Tp, n>& a) \
752{ \
753 _Tp c = a.s[0]; \
754 for( int i = 1; i < n; i++ ) \
755 c = cfunc(c, a.s[i]); \
756 return c; \
757}
758
769OPENCV_HAL_IMPL_MINMAX_FUNC(v_min, std::min)
770
771
781OPENCV_HAL_IMPL_MINMAX_FUNC(v_max, std::max)
782
783
790OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_min, std::min)
791
792
799OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_max, std::max)
800
801static const unsigned char popCountTable[] =
802{
803 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
804 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
805 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
806 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
807 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
808 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
809 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
810 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
811 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
812 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
813 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
814 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
815 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
816 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
817 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
818 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
819};
827template<typename _Tp, int n>
829{
831 for (int i = 0; i < n*(int)sizeof(_Tp); i++)
832 b.s[i/sizeof(_Tp)] += popCountTable[v_reinterpret_as_u8(a).s[i]];
833 return b;
834}
835
836
838template<typename _Tp, int n>
839inline void v_minmax( const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
840 v_reg<_Tp, n>& minval, v_reg<_Tp, n>& maxval )
841{
842 for( int i = 0; i < n; i++ )
843 {
844 minval.s[i] = std::min(a.s[i], b.s[i]);
845 maxval.s[i] = std::max(a.s[i], b.s[i]);
846 }
847}
849
852#define OPENCV_HAL_IMPL_CMP_OP(cmp_op) \
853template<typename _Tp, int n> \
854inline v_reg<_Tp, n> operator cmp_op(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
855{ \
856 typedef typename V_TypeTraits<_Tp>::int_type itype; \
857 v_reg<_Tp, n> c; \
858 for( int i = 0; i < n; i++ ) \
859 c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)-(int)(a.s[i] cmp_op b.s[i])); \
860 return c; \
861}
862
867
868
872
877
882
885
888
889template<int n>
890inline v_reg<float, n> v_not_nan(const v_reg<float, n>& a)
891{
892 typedef typename V_TypeTraits<float>::int_type itype;
894 for (int i = 0; i < n; i++)
895 c.s[i] = V_TypeTraits<float>::reinterpret_from_int((itype)-(int)(a.s[i] == a.s[i]));
896 return c;
897}
898template<int n>
900{
901 typedef typename V_TypeTraits<double>::int_type itype;
903 for (int i = 0; i < n; i++)
904 c.s[i] = V_TypeTraits<double>::reinterpret_from_int((itype)-(int)(a.s[i] == a.s[i]));
905 return c;
906}
907
910#define OPENCV_HAL_IMPL_ARITHM_OP(func, bin_op, cast_op, _Tp2) \
911template<typename _Tp, int n> \
912inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
913{ \
914 typedef _Tp2 rtype; \
915 v_reg<rtype, n> c; \
916 for( int i = 0; i < n; i++ ) \
917 c.s[i] = cast_op(a.s[i] bin_op b.s[i]); \
918 return c; \
919}
920
924OPENCV_HAL_IMPL_ARITHM_OP(v_add_wrap, +, (_Tp), _Tp)
925
926
929OPENCV_HAL_IMPL_ARITHM_OP(v_sub_wrap, -, (_Tp), _Tp)
930
931
934OPENCV_HAL_IMPL_ARITHM_OP(v_mul_wrap, *, (_Tp), _Tp)
935
936
937template<typename T> inline T _absdiff(T a, T b)
938{
939 return a > b ? a - b : b - a;
940}
942
952template<typename _Tp, int n>
954{
955 typedef typename V_TypeTraits<_Tp>::abs_type rtype;
957 const rtype mask = (rtype)(std::numeric_limits<_Tp>::is_signed ? (1 << (sizeof(rtype)*8 - 1)) : 0);
958 for( int i = 0; i < n; i++ )
959 {
960 rtype ua = a.s[i] ^ mask;
961 rtype ub = b.s[i] ^ mask;
962 c.s[i] = _absdiff(ua, ub);
963 }
964 return c;
965}
966
970template<int n> inline v_reg<float, n> v_absdiff(const v_reg<float, n>& a, const v_reg<float, n>& b)
971{
973 for( int i = 0; i < c.nlanes; i++ )
974 c.s[i] = _absdiff(a.s[i], b.s[i]);
975 return c;
976}
977
981template<int n> inline v_reg<double, n> v_absdiff(const v_reg<double, n>& a, const v_reg<double, n>& b)
982{
984 for( int i = 0; i < c.nlanes; i++ )
985 c.s[i] = _absdiff(a.s[i], b.s[i]);
986 return c;
987}
988
993template<typename _Tp, int n>
995{
997 for( int i = 0; i < n; i++)
998 c.s[i] = saturate_cast<_Tp>(std::abs(a.s[i] - b.s[i]));
999 return c;
1000}
1001
1006template<typename _Tp, int n>
1008{
1009 v_reg<_Tp, n> c;
1010 for( int i = 0; i < n; i++ )
1011 c.s[i] = 1.f/std::sqrt(a.s[i]);
1012 return c;
1013}
1014
1019template<typename _Tp, int n>
1021{
1022 v_reg<_Tp, n> c;
1023 for( int i = 0; i < n; i++ )
1024 c.s[i] = std::sqrt(a.s[i]*a.s[i] + b.s[i]*b.s[i]);
1025 return c;
1026}
1027
1032template<typename _Tp, int n>
1034{
1035 v_reg<_Tp, n> c;
1036 for( int i = 0; i < n; i++ )
1037 c.s[i] = a.s[i]*a.s[i] + b.s[i]*b.s[i];
1038 return c;
1039}
1040
1045template<typename _Tp, int n>
1047 const v_reg<_Tp, n>& c)
1048{
1049 v_reg<_Tp, n> d;
1050 for( int i = 0; i < n; i++ )
1051 d.s[i] = a.s[i]*b.s[i] + c.s[i];
1052 return d;
1053}
1054
1056template<typename _Tp, int n>
1058 const v_reg<_Tp, n>& c)
1059{
1060 return v_fma(a, b, c);
1061}
1062
1076template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
1078{
1079 typedef typename V_TypeTraits<_Tp>::w_type w_type;
1080 v_reg<w_type, n/2> c;
1081 for( int i = 0; i < (n/2); i++ )
1082 c.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1];
1083 return c;
1084}
1085
1097template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
1099 const v_reg<typename V_TypeTraits<_Tp>::w_type, n / 2>& c)
1100{
1101 typedef typename V_TypeTraits<_Tp>::w_type w_type;
1102 v_reg<w_type, n/2> s;
1103 for( int i = 0; i < (n/2); i++ )
1104 s.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1] + c.s[i];
1105 return s;
1106}
1107
1115template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
1117{ return v_dotprod(a, b); }
1118
1123template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
1125 const v_reg<typename V_TypeTraits<_Tp>::w_type, n / 2>& c)
1126{ return v_dotprod(a, b, c); }
1127
1141template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::q_type, n/4>
1143{
1144 typedef typename V_TypeTraits<_Tp>::q_type q_type;
1145 v_reg<q_type, n/4> s;
1146 for( int i = 0; i < (n/4); i++ )
1147 s.s[i] = (q_type)a.s[i*4 ]*b.s[i*4 ] + (q_type)a.s[i*4 + 1]*b.s[i*4 + 1] +
1148 (q_type)a.s[i*4 + 2]*b.s[i*4 + 2] + (q_type)a.s[i*4 + 3]*b.s[i*4 + 3];
1149 return s;
1150}
1151
1163template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::q_type, n/4>
1165 const v_reg<typename V_TypeTraits<_Tp>::q_type, n / 4>& c)
1166{
1167 typedef typename V_TypeTraits<_Tp>::q_type q_type;
1168 v_reg<q_type, n/4> s;
1169 for( int i = 0; i < (n/4); i++ )
1170 s.s[i] = (q_type)a.s[i*4 ]*b.s[i*4 ] + (q_type)a.s[i*4 + 1]*b.s[i*4 + 1] +
1171 (q_type)a.s[i*4 + 2]*b.s[i*4 + 2] + (q_type)a.s[i*4 + 3]*b.s[i*4 + 3] + c.s[i];
1172 return s;
1173}
1174
1184template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::q_type, n/4>
1186{ return v_dotprod_expand(a, b); }
1187
1192template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::q_type, n/4>
1194 const v_reg<typename V_TypeTraits<_Tp>::q_type, n / 4>& c)
1195{ return v_dotprod_expand(a, b, c); }
1196
1216template<typename _Tp, int n> inline void v_mul_expand(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
1217 v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& c,
1218 v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& d)
1219{
1220 typedef typename V_TypeTraits<_Tp>::w_type w_type;
1221 for( int i = 0; i < (n/2); i++ )
1222 {
1223 c.s[i] = (w_type)a.s[i]*b.s[i];
1224 d.s[i] = (w_type)a.s[i+(n/2)]*b.s[i+(n/2)];
1225 }
1226}
1227
1233template<typename _Tp, int n> inline v_reg<_Tp, n> v_mul_hi(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
1234{
1235 typedef typename V_TypeTraits<_Tp>::w_type w_type;
1236 v_reg<_Tp, n> c;
1237 for (int i = 0; i < n; i++)
1238 c.s[i] = (_Tp)(((w_type)a.s[i] * b.s[i]) >> sizeof(_Tp)*8);
1239 return c;
1240}
1241
1243template<typename _Tp, int n> inline void v_hsum(const v_reg<_Tp, n>& a,
1244 v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& c)
1245{
1246 typedef typename V_TypeTraits<_Tp>::w_type w_type;
1247 for( int i = 0; i < (n/2); i++ )
1248 {
1249 c.s[i] = (w_type)a.s[i*2] + a.s[i*2+1];
1250 }
1251}
1253
1256#define OPENCV_HAL_IMPL_SHIFT_OP(shift_op) \
1257template<typename _Tp, int n> inline v_reg<_Tp, n> operator shift_op(const v_reg<_Tp, n>& a, int imm) \
1258{ \
1259 v_reg<_Tp, n> c; \
1260 for( int i = 0; i < n; i++ ) \
1261 c.s[i] = (_Tp)(a.s[i] shift_op imm); \
1262 return c; \
1263}
1264
1269
1270
1274
1277#define OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(suffix,opA,opB) \
1278template<int imm, typename _Tp, int n> inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a) \
1279{ \
1280 v_reg<_Tp, n> b; \
1281 for (int i = 0; i < n; i++) \
1282 { \
1283 int sIndex = i opA imm; \
1284 if (0 <= sIndex && sIndex < n) \
1285 { \
1286 b.s[i] = a.s[sIndex]; \
1287 } \
1288 else \
1289 { \
1290 b.s[i] = 0; \
1291 } \
1292 } \
1293 return b; \
1294} \
1295template<int imm, typename _Tp, int n> inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
1296{ \
1297 v_reg<_Tp, n> c; \
1298 for (int i = 0; i < n; i++) \
1299 { \
1300 int aIndex = i opA imm; \
1301 int bIndex = i opA imm opB n; \
1302 if (0 <= bIndex && bIndex < n) \
1303 { \
1304 c.s[i] = b.s[bIndex]; \
1305 } \
1306 else if (0 <= aIndex && aIndex < n) \
1307 { \
1308 c.s[i] = a.s[aIndex]; \
1309 } \
1310 else \
1311 { \
1312 c.s[i] = 0; \
1313 } \
1314 } \
1315 return c; \
1316}
1317
1322
1323
1327
1335template<typename _Tp, int n> inline typename V_TypeTraits<_Tp>::sum_type v_reduce_sum(const v_reg<_Tp, n>& a)
1336{
1337 typename V_TypeTraits<_Tp>::sum_type c = a.s[0];
1338 for( int i = 1; i < n; i++ )
1339 c += a.s[i];
1340 return c;
1341}
1342
1353template<int n> inline v_reg<float, n> v_reduce_sum4(const v_reg<float, n>& a, const v_reg<float, n>& b,
1354 const v_reg<float, n>& c, const v_reg<float, n>& d)
1355{
1357 for(int i = 0; i < (n/4); i++)
1358 {
1359 r.s[i*4 + 0] = a.s[i*4 + 0] + a.s[i*4 + 1] + a.s[i*4 + 2] + a.s[i*4 + 3];
1360 r.s[i*4 + 1] = b.s[i*4 + 0] + b.s[i*4 + 1] + b.s[i*4 + 2] + b.s[i*4 + 3];
1361 r.s[i*4 + 2] = c.s[i*4 + 0] + c.s[i*4 + 1] + c.s[i*4 + 2] + c.s[i*4 + 3];
1362 r.s[i*4 + 3] = d.s[i*4 + 0] + d.s[i*4 + 1] + d.s[i*4 + 2] + d.s[i*4 + 3];
1363 }
1364 return r;
1365}
1366
1374template<typename _Tp, int n> inline typename V_TypeTraits< typename V_TypeTraits<_Tp>::abs_type >::sum_type v_reduce_sad(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
1375{
1376 typename V_TypeTraits< typename V_TypeTraits<_Tp>::abs_type >::sum_type c = _absdiff(a.s[0], b.s[0]);
1377 for (int i = 1; i < n; i++)
1378 c += _absdiff(a.s[i], b.s[i]);
1379 return c;
1380}
1381
1392template<typename _Tp, int n> inline int v_signmask(const v_reg<_Tp, n>& a)
1393{
1394 int mask = 0;
1395 for( int i = 0; i < n; i++ )
1396 mask |= (V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0) << i;
1397 return mask;
1398}
1399
1409template <typename _Tp, int n> inline int v_scan_forward(const v_reg<_Tp, n>& a)
1410{
1411 for (int i = 0; i < n; i++)
1413 return i;
1414 return 0;
1415}
1416
1421template<typename _Tp, int n> inline bool v_check_all(const v_reg<_Tp, n>& a)
1422{
1423 for( int i = 0; i < n; i++ )
1424 if( V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) >= 0 )
1425 return false;
1426 return true;
1427}
1428
1433template<typename _Tp, int n> inline bool v_check_any(const v_reg<_Tp, n>& a)
1434{
1435 for( int i = 0; i < n; i++ )
1436 if( V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0 )
1437 return true;
1438 return false;
1439}
1440
1451template<typename _Tp, int n> inline v_reg<_Tp, n> v_select(const v_reg<_Tp, n>& mask,
1452 const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
1453{
1454 typedef V_TypeTraits<_Tp> Traits;
1455 typedef typename Traits::int_type int_type;
1456 v_reg<_Tp, n> c;
1457 for( int i = 0; i < n; i++ )
1458 {
1459 int_type m = Traits::reinterpret_int(mask.s[i]);
1460 CV_DbgAssert(m == 0 || m == (~(int_type)0)); // restrict mask values: 0 or 0xff/0xffff/etc
1461 c.s[i] = m ? a.s[i] : b.s[i];
1462 }
1463 return c;
1464}
1465
1474template<typename _Tp, int n> inline void v_expand(const v_reg<_Tp, n>& a,
1475 v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& b0,
1476 v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& b1)
1477{
1478 for( int i = 0; i < (n/2); i++ )
1479 {
1480 b0.s[i] = a.s[i];
1481 b1.s[i] = a.s[i+(n/2)];
1482 }
1483}
1484
1494template<typename _Tp, int n>
1495inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
1497{
1499 for( int i = 0; i < (n/2); i++ )
1500 b.s[i] = a.s[i];
1501 return b;
1502}
1503
1513template<typename _Tp, int n>
1514inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
1516{
1518 for( int i = 0; i < (n/2); i++ )
1519 b.s[i] = a.s[i+(n/2)];
1520 return b;
1521}
1522
1524template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::int_type, n>
1525 v_reinterpret_as_int(const v_reg<_Tp, n>& a)
1526{
1527 v_reg<typename V_TypeTraits<_Tp>::int_type, n> c;
1528 for( int i = 0; i < n; i++ )
1529 c.s[i] = V_TypeTraits<_Tp>::reinterpret_int(a.s[i]);
1530 return c;
1531}
1532
1533template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::uint_type, n>
1534 v_reinterpret_as_uint(const v_reg<_Tp, n>& a)
1535{
1536 v_reg<typename V_TypeTraits<_Tp>::uint_type, n> c;
1537 for( int i = 0; i < n; i++ )
1538 c.s[i] = V_TypeTraits<_Tp>::reinterpret_uint(a.s[i]);
1539 return c;
1540}
1542
1554template<typename _Tp, int n> inline void v_zip( const v_reg<_Tp, n>& a0, const v_reg<_Tp, n>& a1,
1555 v_reg<_Tp, n>& b0, v_reg<_Tp, n>& b1 )
1556{
1557 int i;
1558 for( i = 0; i < n/2; i++ )
1559 {
1560 b0.s[i*2] = a0.s[i];
1561 b0.s[i*2+1] = a1.s[i];
1562 }
1563 for( ; i < n; i++ )
1564 {
1565 b1.s[i*2-n] = a0.s[i];
1566 b1.s[i*2-n+1] = a1.s[i];
1567 }
1568}
1569
1583template<typename _Tp>
1584inline v_reg<_Tp, simd128_width / sizeof(_Tp)> v_load(const _Tp* ptr)
1585{
1586#if CV_STRONG_ALIGNMENT
1587 CV_Assert(isAligned<sizeof(_Tp)>(ptr));
1588#endif
1589 return v_reg<_Tp, simd128_width / sizeof(_Tp)>(ptr);
1590}
1591
1592#if CV_SIMD256
1607template<typename _Tp>
1608inline v_reg<_Tp, simd256_width / sizeof(_Tp)> v256_load(const _Tp* ptr)
1609{
1610#if CV_STRONG_ALIGNMENT
1611 CV_Assert(isAligned<sizeof(_Tp)>(ptr));
1612#endif
1613 return v_reg<_Tp, simd256_width / sizeof(_Tp)>(ptr);
1614}
1615#endif
1616
1617#if CV_SIMD512
1632template<typename _Tp>
1633inline v_reg<_Tp, simd512_width / sizeof(_Tp)> v512_load(const _Tp* ptr)
1634{
1635#if CV_STRONG_ALIGNMENT
1636 CV_Assert(isAligned<sizeof(_Tp)>(ptr));
1637#endif
1638 return v_reg<_Tp, simd512_width / sizeof(_Tp)>(ptr);
1639}
1640#endif
1641
1648template<typename _Tp>
1649inline v_reg<_Tp, simd128_width / sizeof(_Tp)> v_load_aligned(const _Tp* ptr)
1650{
1651 CV_Assert(isAligned<sizeof(v_reg<_Tp, simd128_width / sizeof(_Tp)>)>(ptr));
1652 return v_reg<_Tp, simd128_width / sizeof(_Tp)>(ptr);
1653}
1654
1655#if CV_SIMD256
1663template<typename _Tp>
1664inline v_reg<_Tp, simd256_width / sizeof(_Tp)> v256_load_aligned(const _Tp* ptr)
1665{
1666 CV_Assert(isAligned<sizeof(v_reg<_Tp, simd256_width / sizeof(_Tp)>)>(ptr));
1667 return v_reg<_Tp, simd256_width / sizeof(_Tp)>(ptr);
1668}
1669#endif
1670
1671#if CV_SIMD512
1679template<typename _Tp>
1680inline v_reg<_Tp, simd512_width / sizeof(_Tp)> v512_load_aligned(const _Tp* ptr)
1681{
1682 CV_Assert(isAligned<sizeof(v_reg<_Tp, simd512_width / sizeof(_Tp)>)>(ptr));
1683 return v_reg<_Tp, simd512_width / sizeof(_Tp)>(ptr);
1684}
1685#endif
1686
1698template<typename _Tp>
1699inline v_reg<_Tp, simd128_width / sizeof(_Tp)> v_load_low(const _Tp* ptr)
1700{
1701#if CV_STRONG_ALIGNMENT
1702 CV_Assert(isAligned<sizeof(_Tp)>(ptr));
1703#endif
1704 v_reg<_Tp, simd128_width / sizeof(_Tp)> c;
1705 for( int i = 0; i < c.nlanes/2; i++ )
1706 {
1707 c.s[i] = ptr[i];
1708 }
1709 return c;
1710}
1711
1712#if CV_SIMD256
1725template<typename _Tp>
1726inline v_reg<_Tp, simd256_width / sizeof(_Tp)> v256_load_low(const _Tp* ptr)
1727{
1728#if CV_STRONG_ALIGNMENT
1729 CV_Assert(isAligned<sizeof(_Tp)>(ptr));
1730#endif
1731 v_reg<_Tp, simd256_width / sizeof(_Tp)> c;
1732 for (int i = 0; i < c.nlanes / 2; i++)
1733 {
1734 c.s[i] = ptr[i];
1735 }
1736 return c;
1737}
1738#endif
1739
1740#if CV_SIMD512
1753template<typename _Tp>
1754inline v_reg<_Tp, simd512_width / sizeof(_Tp)> v512_load_low(const _Tp* ptr)
1755{
1756#if CV_STRONG_ALIGNMENT
1757 CV_Assert(isAligned<sizeof(_Tp)>(ptr));
1758#endif
1759 v_reg<_Tp, simd512_width / sizeof(_Tp)> c;
1760 for (int i = 0; i < c.nlanes / 2; i++)
1761 {
1762 c.s[i] = ptr[i];
1763 }
1764 return c;
1765}
1766#endif
1767
1780template<typename _Tp>
1781inline v_reg<_Tp, simd128_width / sizeof(_Tp)> v_load_halves(const _Tp* loptr, const _Tp* hiptr)
1782{
1783#if CV_STRONG_ALIGNMENT
1784 CV_Assert(isAligned<sizeof(_Tp)>(loptr));
1785 CV_Assert(isAligned<sizeof(_Tp)>(hiptr));
1786#endif
1787 v_reg<_Tp, simd128_width / sizeof(_Tp)> c;
1788 for( int i = 0; i < c.nlanes/2; i++ )
1789 {
1790 c.s[i] = loptr[i];
1791 c.s[i+c.nlanes/2] = hiptr[i];
1792 }
1793 return c;
1794}
1795
1796#if CV_SIMD256
1810template<typename _Tp>
1811inline v_reg<_Tp, simd256_width / sizeof(_Tp)> v256_load_halves(const _Tp* loptr, const _Tp* hiptr)
1812{
1813#if CV_STRONG_ALIGNMENT
1814 CV_Assert(isAligned<sizeof(_Tp)>(loptr));
1815 CV_Assert(isAligned<sizeof(_Tp)>(hiptr));
1816#endif
1817 v_reg<_Tp, simd256_width / sizeof(_Tp)> c;
1818 for (int i = 0; i < c.nlanes / 2; i++)
1819 {
1820 c.s[i] = loptr[i];
1821 c.s[i + c.nlanes / 2] = hiptr[i];
1822 }
1823 return c;
1824}
1825#endif
1826
1827#if CV_SIMD512
1841template<typename _Tp>
1842inline v_reg<_Tp, simd512_width / sizeof(_Tp)> v512_load_halves(const _Tp* loptr, const _Tp* hiptr)
1843{
1844#if CV_STRONG_ALIGNMENT
1845 CV_Assert(isAligned<sizeof(_Tp)>(loptr));
1846 CV_Assert(isAligned<sizeof(_Tp)>(hiptr));
1847#endif
1848 v_reg<_Tp, simd512_width / sizeof(_Tp)> c;
1849 for (int i = 0; i < c.nlanes / 2; i++)
1850 {
1851 c.s[i] = loptr[i];
1852 c.s[i + c.nlanes / 2] = hiptr[i];
1853 }
1854 return c;
1855}
1856#endif
1857
1870template<typename _Tp>
1871inline v_reg<typename V_TypeTraits<_Tp>::w_type, simd128_width / sizeof(typename V_TypeTraits<_Tp>::w_type)>
1872v_load_expand(const _Tp* ptr)
1873{
1874#if CV_STRONG_ALIGNMENT
1875 CV_Assert(isAligned<sizeof(_Tp)>(ptr));
1876#endif
1877 typedef typename V_TypeTraits<_Tp>::w_type w_type;
1878 v_reg<w_type, simd128_width / sizeof(w_type)> c;
1879 for( int i = 0; i < c.nlanes; i++ )
1880 {
1881 c.s[i] = ptr[i];
1882 }
1883 return c;
1884}
1885
1886#if CV_SIMD256
1900template<typename _Tp>
1901inline v_reg<typename V_TypeTraits<_Tp>::w_type, simd256_width / sizeof(typename V_TypeTraits<_Tp>::w_type)>
1902v256_load_expand(const _Tp* ptr)
1903{
1904#if CV_STRONG_ALIGNMENT
1905 CV_Assert(isAligned<sizeof(_Tp)>(ptr));
1906#endif
1907 typedef typename V_TypeTraits<_Tp>::w_type w_type;
1908 v_reg<w_type, simd256_width / sizeof(w_type)> c;
1909 for (int i = 0; i < c.nlanes; i++)
1910 {
1911 c.s[i] = ptr[i];
1912 }
1913 return c;
1914}
1915#endif
1916
1917#if CV_SIMD512
1931template<typename _Tp>
1932inline v_reg<typename V_TypeTraits<_Tp>::w_type, simd512_width / sizeof(typename V_TypeTraits<_Tp>::w_type)>
1933v512_load_expand(const _Tp* ptr)
1934{
1935#if CV_STRONG_ALIGNMENT
1936 CV_Assert(isAligned<sizeof(_Tp)>(ptr));
1937#endif
1938 typedef typename V_TypeTraits<_Tp>::w_type w_type;
1939 v_reg<w_type, simd512_width / sizeof(w_type)> c;
1940 for (int i = 0; i < c.nlanes; i++)
1941 {
1942 c.s[i] = ptr[i];
1943 }
1944 return c;
1945}
1946#endif
1947
1959template<typename _Tp>
1960inline v_reg<typename V_TypeTraits<_Tp>::q_type, simd128_width / sizeof(typename V_TypeTraits<_Tp>::q_type)>
1961v_load_expand_q(const _Tp* ptr)
1962{
1963#if CV_STRONG_ALIGNMENT
1964 CV_Assert(isAligned<sizeof(_Tp)>(ptr));
1965#endif
1966 typedef typename V_TypeTraits<_Tp>::q_type q_type;
1967 v_reg<q_type, simd128_width / sizeof(q_type)> c;
1968 for( int i = 0; i < c.nlanes; i++ )
1969 {
1970 c.s[i] = ptr[i];
1971 }
1972 return c;
1973}
1974
1975#if CV_SIMD256
1988template<typename _Tp>
1989inline v_reg<typename V_TypeTraits<_Tp>::q_type, simd256_width / sizeof(typename V_TypeTraits<_Tp>::q_type)>
1990v256_load_expand_q(const _Tp* ptr)
1991{
1992#if CV_STRONG_ALIGNMENT
1993 CV_Assert(isAligned<sizeof(_Tp)>(ptr));
1994#endif
1995 typedef typename V_TypeTraits<_Tp>::q_type q_type;
1996 v_reg<q_type, simd256_width / sizeof(q_type)> c;
1997 for (int i = 0; i < c.nlanes; i++)
1998 {
1999 c.s[i] = ptr[i];
2000 }
2001 return c;
2002}
2003#endif
2004
2005#if CV_SIMD512
2018template<typename _Tp>
2019inline v_reg<typename V_TypeTraits<_Tp>::q_type, simd512_width / sizeof(typename V_TypeTraits<_Tp>::q_type)>
2020v512_load_expand_q(const _Tp* ptr)
2021{
2022#if CV_STRONG_ALIGNMENT
2023 CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2024#endif
2025 typedef typename V_TypeTraits<_Tp>::q_type q_type;
2026 v_reg<q_type, simd512_width / sizeof(q_type)> c;
2027 for (int i = 0; i < c.nlanes; i++)
2028 {
2029 c.s[i] = ptr[i];
2030 }
2031 return c;
2032}
2033#endif
2034
2043template<typename _Tp, int n> inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a,
2044 v_reg<_Tp, n>& b)
2045{
2046#if CV_STRONG_ALIGNMENT
2047 CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2048#endif
2049 int i, i2;
2050 for( i = i2 = 0; i < n; i++, i2 += 2 )
2051 {
2052 a.s[i] = ptr[i2];
2053 b.s[i] = ptr[i2+1];
2054 }
2055}
2056
2065template<typename _Tp, int n> inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a,
2067{
2068#if CV_STRONG_ALIGNMENT
2069 CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2070#endif
2071 int i, i3;
2072 for( i = i3 = 0; i < n; i++, i3 += 3 )
2073 {
2074 a.s[i] = ptr[i3];
2075 b.s[i] = ptr[i3+1];
2076 c.s[i] = ptr[i3+2];
2077 }
2078}
2079
2088template<typename _Tp, int n>
2089inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a,
2091 v_reg<_Tp, n>& d)
2092{
2093#if CV_STRONG_ALIGNMENT
2094 CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2095#endif
2096 int i, i4;
2097 for( i = i4 = 0; i < n; i++, i4 += 4 )
2098 {
2099 a.s[i] = ptr[i4];
2100 b.s[i] = ptr[i4+1];
2101 c.s[i] = ptr[i4+2];
2102 d.s[i] = ptr[i4+3];
2103 }
2104}
2105
2114template<typename _Tp, int n>
2115inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a,
2116 const v_reg<_Tp, n>& b,
2118{
2119#if CV_STRONG_ALIGNMENT
2120 CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2121#endif
2122 int i, i2;
2123 for( i = i2 = 0; i < n; i++, i2 += 2 )
2124 {
2125 ptr[i2] = a.s[i];
2126 ptr[i2+1] = b.s[i];
2127 }
2128}
2129
2138template<typename _Tp, int n>
2139inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a,
2140 const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c,
2142{
2143#if CV_STRONG_ALIGNMENT
2144 CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2145#endif
2146 int i, i3;
2147 for( i = i3 = 0; i < n; i++, i3 += 3 )
2148 {
2149 ptr[i3] = a.s[i];
2150 ptr[i3+1] = b.s[i];
2151 ptr[i3+2] = c.s[i];
2152 }
2153}
2154
2163template<typename _Tp, int n> inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a,
2164 const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c,
2165 const v_reg<_Tp, n>& d,
2167{
2168#if CV_STRONG_ALIGNMENT
2169 CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2170#endif
2171 int i, i4;
2172 for( i = i4 = 0; i < n; i++, i4 += 4 )
2173 {
2174 ptr[i4] = a.s[i];
2175 ptr[i4+1] = b.s[i];
2176 ptr[i4+2] = c.s[i];
2177 ptr[i4+3] = d.s[i];
2178 }
2179}
2180
2189template<typename _Tp, int n>
2190inline void v_store(_Tp* ptr, const v_reg<_Tp, n>& a)
2191{
2192#if CV_STRONG_ALIGNMENT
2193 CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2194#endif
2195 for( int i = 0; i < n; i++ )
2196 ptr[i] = a.s[i];
2197}
2198
2199template<typename _Tp, int n>
2200inline void v_store(_Tp* ptr, const v_reg<_Tp, n>& a, hal::StoreMode /*mode*/)
2201{
2202#if CV_STRONG_ALIGNMENT
2203 CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2204#endif
2205 v_store(ptr, a);
2206}
2207
2215template<typename _Tp, int n>
2216inline void v_store_low(_Tp* ptr, const v_reg<_Tp, n>& a)
2217{
2218#if CV_STRONG_ALIGNMENT
2219 CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2220#endif
2221 for( int i = 0; i < (n/2); i++ )
2222 ptr[i] = a.s[i];
2223}
2224
2232template<typename _Tp, int n>
2233inline void v_store_high(_Tp* ptr, const v_reg<_Tp, n>& a)
2234{
2235#if CV_STRONG_ALIGNMENT
2236 CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2237#endif
2238 for( int i = 0; i < (n/2); i++ )
2239 ptr[i] = a.s[i+(n/2)];
2240}
2241
2250template<typename _Tp, int n>
2251inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a)
2252{
2253 CV_Assert(isAligned<sizeof(v_reg<_Tp, n>)>(ptr));
2254 v_store(ptr, a);
2255}
2256
2257template<typename _Tp, int n>
2258inline void v_store_aligned_nocache(_Tp* ptr, const v_reg<_Tp, n>& a)
2259{
2260 CV_Assert(isAligned<sizeof(v_reg<_Tp, n>)>(ptr));
2261 v_store(ptr, a);
2262}
2263
2264template<typename _Tp, int n>
2265inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a, hal::StoreMode /*mode*/)
2266{
2267 CV_Assert(isAligned<sizeof(v_reg<_Tp, n>)>(ptr));
2268 v_store(ptr, a);
2269}
2270
2281template<typename _Tp, int n>
2283{
2284 v_reg<_Tp, n> c;
2285 for( int i = 0; i < (n/2); i++ )
2286 {
2287 c.s[i] = a.s[i];
2288 c.s[i+(n/2)] = b.s[i];
2289 }
2290 return c;
2291}
2292
2303template<typename _Tp, int n>
2305{
2306 v_reg<_Tp, n> c;
2307 for( int i = 0; i < (n/2); i++ )
2308 {
2309 c.s[i] = a.s[i+(n/2)];
2310 c.s[i+(n/2)] = b.s[i+(n/2)];
2311 }
2312 return c;
2313}
2314
2321template<typename _Tp, int n>
2322inline void v_recombine(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
2323 v_reg<_Tp, n>& low, v_reg<_Tp, n>& high)
2324{
2325 for( int i = 0; i < (n/2); i++ )
2326 {
2327 low.s[i] = a.s[i];
2328 low.s[i+(n/2)] = b.s[i];
2329 high.s[i] = a.s[i+(n/2)];
2330 high.s[i+(n/2)] = b.s[i+(n/2)];
2331 }
2332}
2333
2342template<typename _Tp, int n>
2344{
2345 v_reg<_Tp, n> c;
2346 for( int i = 0; i < n; i++ )
2347 c.s[i] = a.s[n-i-1];
2348 return c;
2349}
2350
2370template<int s, typename _Tp, int n>
2372{
2374 const int shift = n - s;
2375 int i = 0;
2376 for (; i < shift; ++i)
2377 r.s[i] = a.s[i+s];
2378 for (; i < n; ++i)
2379 r.s[i] = b.s[i-shift];
2380 return r;
2381}
2382
2396template<int s, typename _Tp, int n>
2397inline _Tp v_extract_n(const v_reg<_Tp, n>& v)
2398{
2399 CV_DbgAssert(s >= 0 && s < n);
2400 return v.s[s];
2401}
2402
2412template<int i, typename _Tp, int n>
2414{
2415 CV_DbgAssert(i >= 0 && i < n);
2416 return v_reg<_Tp, n>::all(a.s[i]);
2417}
2418
2424template<int n> inline v_reg<int, n> v_round(const v_reg<float, n>& a)
2425{
2426 v_reg<int, n> c;
2427 for( int i = 0; i < n; i++ )
2428 c.s[i] = cvRound(a.s[i]);
2429 return c;
2430}
2431
2433template<int n> inline v_reg<int, n*2> v_round(const v_reg<double, n>& a, const v_reg<double, n>& b)
2434{
2436 for( int i = 0; i < n; i++ )
2437 {
2438 c.s[i] = cvRound(a.s[i]);
2439 c.s[i+n] = cvRound(b.s[i]);
2440 }
2441 return c;
2442}
2443
2449template<int n> inline v_reg<int, n> v_floor(const v_reg<float, n>& a)
2450{
2451 v_reg<int, n> c;
2452 for( int i = 0; i < n; i++ )
2453 c.s[i] = cvFloor(a.s[i]);
2454 return c;
2455}
2456
2462template<int n> inline v_reg<int, n> v_ceil(const v_reg<float, n>& a)
2463{
2464 v_reg<int, n> c;
2465 for( int i = 0; i < n; i++ )
2466 c.s[i] = cvCeil(a.s[i]);
2467 return c;
2468}
2469
2475template<int n> inline v_reg<int, n> v_trunc(const v_reg<float, n>& a)
2476{
2477 v_reg<int, n> c;
2478 for( int i = 0; i < n; i++ )
2479 c.s[i] = (int)(a.s[i]);
2480 return c;
2481}
2482
2484template<int n> inline v_reg<int, n*2> v_round(const v_reg<double, n>& a)
2485{
2487 for( int i = 0; i < n; i++ )
2488 {
2489 c.s[i] = cvRound(a.s[i]);
2490 c.s[i+n] = 0;
2491 }
2492 return c;
2493}
2494
2496template<int n> inline v_reg<int, n*2> v_floor(const v_reg<double, n>& a)
2497{
2499 for( int i = 0; i < n; i++ )
2500 {
2501 c.s[i] = cvFloor(a.s[i]);
2502 c.s[i+n] = 0;
2503 }
2504 return c;
2505}
2506
2508template<int n> inline v_reg<int, n*2> v_ceil(const v_reg<double, n>& a)
2509{
2511 for( int i = 0; i < n; i++ )
2512 {
2513 c.s[i] = cvCeil(a.s[i]);
2514 c.s[i+n] = 0;
2515 }
2516 return c;
2517}
2518
2520template<int n> inline v_reg<int, n*2> v_trunc(const v_reg<double, n>& a)
2521{
2523 for( int i = 0; i < n; i++ )
2524 {
2525 c.s[i] = (int)(a.s[i]);
2526 c.s[i+n] = 0;
2527 }
2528 return c;
2529}
2530
2534template<int n> inline v_reg<float, n> v_cvt_f32(const v_reg<int, n>& a)
2535{
2537 for( int i = 0; i < n; i++ )
2538 c.s[i] = (float)a.s[i];
2539 return c;
2540}
2541
2545template<int n> inline v_reg<float, n*2> v_cvt_f32(const v_reg<double, n>& a)
2546{
2548 for( int i = 0; i < n; i++ )
2549 {
2550 c.s[i] = (float)a.s[i];
2551 c.s[i+n] = 0;
2552 }
2553 return c;
2554}
2555
2559template<int n> inline v_reg<float, n*2> v_cvt_f32(const v_reg<double, n>& a, const v_reg<double, n>& b)
2560{
2562 for( int i = 0; i < n; i++ )
2563 {
2564 c.s[i] = (float)a.s[i];
2565 c.s[i+n] = (float)b.s[i];
2566 }
2567 return c;
2568}
2569
2573template<int n> CV_INLINE v_reg<double, n/2> v_cvt_f64(const v_reg<int, n>& a)
2574{
2575 v_reg<double, (n/2)> c;
2576 for( int i = 0; i < (n/2); i++ )
2577 c.s[i] = (double)a.s[i];
2578 return c;
2579}
2580
2584template<int n> CV_INLINE v_reg<double, (n/2)> v_cvt_f64_high(const v_reg<int, n>& a)
2585{
2586 v_reg<double, (n/2)> c;
2587 for( int i = 0; i < (n/2); i++ )
2588 c.s[i] = (double)a.s[i + (n/2)];
2589 return c;
2590}
2591
2595template<int n> CV_INLINE v_reg<double, (n/2)> v_cvt_f64(const v_reg<float, n>& a)
2596{
2597 v_reg<double, (n/2)> c;
2598 for( int i = 0; i < (n/2); i++ )
2599 c.s[i] = (double)a.s[i];
2600 return c;
2601}
2602
2606template<int n> CV_INLINE v_reg<double, (n/2)> v_cvt_f64_high(const v_reg<float, n>& a)
2607{
2608 v_reg<double, (n/2)> c;
2609 for( int i = 0; i < (n/2); i++ )
2610 c.s[i] = (double)a.s[i + (n/2)];
2611 return c;
2612}
2613
2618{
2620 for( int i = 0; i < n; i++ )
2621 c.s[i] = (double)a.s[i];
2622 return c;
2623}
2624
2625
2626template<typename _Tp> inline v_reg<_Tp, simd128_width / sizeof(_Tp)> v_lut(const _Tp* tab, const int* idx)
2627{
2628 v_reg<_Tp, simd128_width / sizeof(_Tp)> c;
2629 for (int i = 0; i < c.nlanes; i++)
2630 c.s[i] = tab[idx[i]];
2631 return c;
2632}
2633template<typename _Tp> inline v_reg<_Tp, simd128_width / sizeof(_Tp)> v_lut_pairs(const _Tp* tab, const int* idx)
2634{
2635 v_reg<_Tp, simd128_width / sizeof(_Tp)> c;
2636 for (int i = 0; i < c.nlanes; i++)
2637 c.s[i] = tab[idx[i / 2] + i % 2];
2638 return c;
2639}
2640template<typename _Tp> inline v_reg<_Tp, simd128_width / sizeof(_Tp)> v_lut_quads(const _Tp* tab, const int* idx)
2641{
2642 v_reg<_Tp, simd128_width / sizeof(_Tp)> c;
2643 for (int i = 0; i < c.nlanes; i++)
2644 c.s[i] = tab[idx[i / 4] + i % 4];
2645 return c;
2646}
2647
2648template<int n> inline v_reg<int, n> v_lut(const int* tab, const v_reg<int, n>& idx)
2649{
2650 v_reg<int, n> c;
2651 for( int i = 0; i < n; i++ )
2652 c.s[i] = tab[idx.s[i]];
2653 return c;
2654}
2655
2656template<int n> inline v_reg<unsigned, n> v_lut(const unsigned* tab, const v_reg<int, n>& idx)
2657{
2658 v_reg<int, n> c;
2659 for (int i = 0; i < n; i++)
2660 c.s[i] = tab[idx.s[i]];
2661 return c;
2662}
2663
2664template<int n> inline v_reg<float, n> v_lut(const float* tab, const v_reg<int, n>& idx)
2665{
2667 for( int i = 0; i < n; i++ )
2668 c.s[i] = tab[idx.s[i]];
2669 return c;
2670}
2671
2672template<int n> inline v_reg<double, n/2> v_lut(const double* tab, const v_reg<int, n>& idx)
2673{
2674 v_reg<double, n/2> c;
2675 for( int i = 0; i < n/2; i++ )
2676 c.s[i] = tab[idx.s[i]];
2677 return c;
2678}
2679
2680
2681template<int n> inline void v_lut_deinterleave(const float* tab, const v_reg<int, n>& idx,
2683{
2684 for( int i = 0; i < n; i++ )
2685 {
2686 int j = idx.s[i];
2687 x.s[i] = tab[j];
2688 y.s[i] = tab[j+1];
2689 }
2690}
2691
2692template<int n> inline void v_lut_deinterleave(const double* tab, const v_reg<int, n*2>& idx,
2694{
2695 for( int i = 0; i < n; i++ )
2696 {
2697 int j = idx.s[i];
2698 x.s[i] = tab[j];
2699 y.s[i] = tab[j+1];
2700 }
2701}
2702
2703template<typename _Tp, int n> inline v_reg<_Tp, n> v_interleave_pairs(const v_reg<_Tp, n>& vec)
2704{
2705 v_reg<_Tp, n> c;
2706 for (int i = 0; i < n/4; i++)
2707 {
2708 c.s[4*i ] = vec.s[4*i ];
2709 c.s[4*i+1] = vec.s[4*i+2];
2710 c.s[4*i+2] = vec.s[4*i+1];
2711 c.s[4*i+3] = vec.s[4*i+3];
2712 }
2713 return c;
2714}
2715
2716template<typename _Tp, int n> inline v_reg<_Tp, n> v_interleave_quads(const v_reg<_Tp, n>& vec)
2717{
2718 v_reg<_Tp, n> c;
2719 for (int i = 0; i < n/8; i++)
2720 {
2721 c.s[8*i ] = vec.s[8*i ];
2722 c.s[8*i+1] = vec.s[8*i+4];
2723 c.s[8*i+2] = vec.s[8*i+1];
2724 c.s[8*i+3] = vec.s[8*i+5];
2725 c.s[8*i+4] = vec.s[8*i+2];
2726 c.s[8*i+5] = vec.s[8*i+6];
2727 c.s[8*i+6] = vec.s[8*i+3];
2728 c.s[8*i+7] = vec.s[8*i+7];
2729 }
2730 return c;
2731}
2732
2733template<typename _Tp, int n> inline v_reg<_Tp, n> v_pack_triplets(const v_reg<_Tp, n>& vec)
2734{
2735 v_reg<_Tp, n> c;
2736 for (int i = 0; i < n/4; i++)
2737 {
2738 c.s[3*i ] = vec.s[4*i ];
2739 c.s[3*i+1] = vec.s[4*i+1];
2740 c.s[3*i+2] = vec.s[4*i+2];
2741 }
2742 return c;
2743}
2744
2760template<typename _Tp, int n>
2761inline void v_transpose4x4( v_reg<_Tp, n>& a0, const v_reg<_Tp, n>& a1,
2762 const v_reg<_Tp, n>& a2, const v_reg<_Tp, n>& a3,
2764 v_reg<_Tp, n>& b2, v_reg<_Tp, n>& b3 )
2765{
2766 for (int i = 0; i < n / 4; i++)
2767 {
2768 b0.s[0 + i*4] = a0.s[0 + i*4]; b0.s[1 + i*4] = a1.s[0 + i*4];
2769 b0.s[2 + i*4] = a2.s[0 + i*4]; b0.s[3 + i*4] = a3.s[0 + i*4];
2770 b1.s[0 + i*4] = a0.s[1 + i*4]; b1.s[1 + i*4] = a1.s[1 + i*4];
2771 b1.s[2 + i*4] = a2.s[1 + i*4]; b1.s[3 + i*4] = a3.s[1 + i*4];
2772 b2.s[0 + i*4] = a0.s[2 + i*4]; b2.s[1 + i*4] = a1.s[2 + i*4];
2773 b2.s[2 + i*4] = a2.s[2 + i*4]; b2.s[3 + i*4] = a3.s[2 + i*4];
2774 b3.s[0 + i*4] = a0.s[3 + i*4]; b3.s[1 + i*4] = a1.s[3 + i*4];
2775 b3.s[2 + i*4] = a2.s[3 + i*4]; b3.s[3 + i*4] = a3.s[3 + i*4];
2776 }
2777}
2778
2781#define OPENCV_HAL_IMPL_C_INIT_ZERO(_Tpvec, prefix, suffix) \
2782inline _Tpvec prefix##_setzero_##suffix() { return _Tpvec::zero(); }
2783
2797
2798#if CV_SIMD256
2799OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint8x32, v256, u8)
2800OPENCV_HAL_IMPL_C_INIT_ZERO(v_int8x32, v256, s8)
2801OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint16x16, v256, u16)
2802OPENCV_HAL_IMPL_C_INIT_ZERO(v_int16x16, v256, s16)
2803OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint32x8, v256, u32)
2804OPENCV_HAL_IMPL_C_INIT_ZERO(v_int32x8, v256, s32)
2805OPENCV_HAL_IMPL_C_INIT_ZERO(v_float32x8, v256, f32)
2806OPENCV_HAL_IMPL_C_INIT_ZERO(v_float64x4, v256, f64)
2807OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint64x4, v256, u64)
2808OPENCV_HAL_IMPL_C_INIT_ZERO(v_int64x4, v256, s64)
2809#endif
2810
2811#if CV_SIMD512
2812OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint8x64, v512, u8)
2813OPENCV_HAL_IMPL_C_INIT_ZERO(v_int8x64, v512, s8)
2814OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint16x32, v512, u16)
2815OPENCV_HAL_IMPL_C_INIT_ZERO(v_int16x32, v512, s16)
2816OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint32x16, v512, u32)
2817OPENCV_HAL_IMPL_C_INIT_ZERO(v_int32x16, v512, s32)
2818OPENCV_HAL_IMPL_C_INIT_ZERO(v_float32x16, v512, f32)
2819OPENCV_HAL_IMPL_C_INIT_ZERO(v_float64x8, v512, f64)
2820OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint64x8, v512, u64)
2821OPENCV_HAL_IMPL_C_INIT_ZERO(v_int64x8, v512, s64)
2822#endif
2824
2827#define OPENCV_HAL_IMPL_C_INIT_VAL(_Tpvec, _Tp, prefix, suffix) \
2828inline _Tpvec prefix##_setall_##suffix(_Tp val) { return _Tpvec::all(val); }
2829
2837OPENCV_HAL_IMPL_C_INIT_VAL(v_uint32x4, unsigned, v, u32)
2843
2844#if CV_SIMD256
2845OPENCV_HAL_IMPL_C_INIT_VAL(v_uint8x32, uchar, v256, u8)
2846OPENCV_HAL_IMPL_C_INIT_VAL(v_int8x32, schar, v256, s8)
2847OPENCV_HAL_IMPL_C_INIT_VAL(v_uint16x16, ushort, v256, u16)
2848OPENCV_HAL_IMPL_C_INIT_VAL(v_int16x16, short, v256, s16)
2849OPENCV_HAL_IMPL_C_INIT_VAL(v_uint32x8, unsigned, v256, u32)
2850OPENCV_HAL_IMPL_C_INIT_VAL(v_int32x8, int, v256, s32)
2851OPENCV_HAL_IMPL_C_INIT_VAL(v_float32x8, float, v256, f32)
2852OPENCV_HAL_IMPL_C_INIT_VAL(v_float64x4, double, v256, f64)
2853OPENCV_HAL_IMPL_C_INIT_VAL(v_uint64x4, uint64, v256, u64)
2854OPENCV_HAL_IMPL_C_INIT_VAL(v_int64x4, int64, v256, s64)
2855#endif
2856
2857#if CV_SIMD512
2858OPENCV_HAL_IMPL_C_INIT_VAL(v_uint8x64, uchar, v512, u8)
2859OPENCV_HAL_IMPL_C_INIT_VAL(v_int8x64, schar, v512, s8)
2860OPENCV_HAL_IMPL_C_INIT_VAL(v_uint16x32, ushort, v512, u16)
2861OPENCV_HAL_IMPL_C_INIT_VAL(v_int16x32, short, v512, s16)
2862OPENCV_HAL_IMPL_C_INIT_VAL(v_uint32x16, unsigned, v512, u32)
2863OPENCV_HAL_IMPL_C_INIT_VAL(v_int32x16, int, v512, s32)
2864OPENCV_HAL_IMPL_C_INIT_VAL(v_float32x16, float, v512, f32)
2865OPENCV_HAL_IMPL_C_INIT_VAL(v_float64x8, double, v512, f64)
2866OPENCV_HAL_IMPL_C_INIT_VAL(v_uint64x8, uint64, v512, u64)
2867OPENCV_HAL_IMPL_C_INIT_VAL(v_int64x8, int64, v512, s64)
2868#endif
2870
2873#define OPENCV_HAL_IMPL_C_REINTERPRET(_Tp, suffix) \
2874template<typename _Tp0, int n0> inline v_reg<_Tp, n0*sizeof(_Tp0)/sizeof(_Tp)> \
2875 v_reinterpret_as_##suffix(const v_reg<_Tp0, n0>& a) \
2876{ return a.template reinterpret_as<_Tp, n0*sizeof(_Tp0)/sizeof(_Tp)>(); }
2877
2885OPENCV_HAL_IMPL_C_REINTERPRET(unsigned, u32)
2892
2895#define OPENCV_HAL_IMPL_C_SHIFTL(_Tp) \
2896template<int shift, int n> inline v_reg<_Tp, n> v_shl(const v_reg<_Tp, n>& a) \
2897{ return a << shift; }
2898
2909
2912#define OPENCV_HAL_IMPL_C_SHIFTR(_Tp) \
2913template<int shift, int n> inline v_reg<_Tp, n> v_shr(const v_reg<_Tp, n>& a) \
2914{ return a >> shift; }
2915
2926
2929#define OPENCV_HAL_IMPL_C_RSHIFTR(_Tp) \
2930template<int shift, int n> inline v_reg<_Tp, n> v_rshr(const v_reg<_Tp, n>& a) \
2931{ \
2932 v_reg<_Tp, n> c; \
2933 for( int i = 0; i < n; i++ ) \
2934 c.s[i] = (_Tp)((a.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \
2935 return c; \
2936}
2937
2948
2951#define OPENCV_HAL_IMPL_C_PACK(_Tp, _Tpn, pack_suffix, cast) \
2952template<int n> inline v_reg<_Tpn, 2*n> v_##pack_suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
2953{ \
2954 v_reg<_Tpn, 2*n> c; \
2955 for( int i = 0; i < n; i++ ) \
2956 { \
2957 c.s[i] = cast<_Tpn>(a.s[i]); \
2958 c.s[i+n] = cast<_Tpn>(b.s[i]); \
2959 } \
2960 return c; \
2961}
2962
2977OPENCV_HAL_IMPL_C_PACK(int, short, pack, saturate_cast)
2978OPENCV_HAL_IMPL_C_PACK(uint64, unsigned, pack, static_cast)
2979OPENCV_HAL_IMPL_C_PACK(int64, int, pack, static_cast)
2983
2986#define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tp, _Tpn, pack_suffix, cast) \
2987template<int shift, int n> inline v_reg<_Tpn, 2*n> v_rshr_##pack_suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
2988{ \
2989 v_reg<_Tpn, 2*n> c; \
2990 for( int i = 0; i < n; i++ ) \
2991 { \
2992 c.s[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \
2993 c.s[i+n] = cast<_Tpn>((b.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \
2994 } \
2995 return c; \
2996}
2997
3013OPENCV_HAL_IMPL_C_RSHR_PACK(uint64, unsigned, pack, static_cast)
3014OPENCV_HAL_IMPL_C_RSHR_PACK(int64, int, pack, static_cast)
3018
3021#define OPENCV_HAL_IMPL_C_PACK_STORE(_Tp, _Tpn, pack_suffix, cast) \
3022template<int n> inline void v_##pack_suffix##_store(_Tpn* ptr, const v_reg<_Tp, n>& a) \
3023{ \
3024 for( int i = 0; i < n; i++ ) \
3025 ptr[i] = cast<_Tpn>(a.s[i]); \
3026}
3027
3043OPENCV_HAL_IMPL_C_PACK_STORE(uint64, unsigned, pack, static_cast)
3044OPENCV_HAL_IMPL_C_PACK_STORE(int64, int, pack, static_cast)
3048
3051#define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tp, _Tpn, pack_suffix, cast) \
3052template<int shift, int n> inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const v_reg<_Tp, n>& a) \
3053{ \
3054 for( int i = 0; i < n; i++ ) \
3055 ptr[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \
3056}
3057
3073OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(uint64, unsigned, pack, static_cast)
3074OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(int64, int, pack, static_cast)
3078
3080template<typename _Tpm, typename _Tp, int n>
3081inline void _pack_b(_Tpm* mptr, const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
3082{
3083 for (int i = 0; i < n; ++i)
3084 {
3085 mptr[i] = (_Tpm)a.s[i];
3086 mptr[i + n] = (_Tpm)b.s[i];
3087 }
3088}
3090
3096
3099
3111template<int n> inline v_reg<uchar, 2*n> v_pack_b(const v_reg<ushort, n>& a, const v_reg<ushort, n>& b)
3112{
3114 _pack_b(mask.s, a, b);
3115 return mask;
3116}
3117
3134template<int n> inline v_reg<uchar, 4*n> v_pack_b(const v_reg<unsigned, n>& a, const v_reg<unsigned, n>& b,
3135 const v_reg<unsigned, n>& c, const v_reg<unsigned, n>& d)
3136{
3138 _pack_b(mask.s, a, b);
3139 _pack_b(mask.s + 2*n, c, d);
3140 return mask;
3141}
3142
3163template<int n> inline v_reg<uchar, 8*n> v_pack_b(const v_reg<uint64, n>& a, const v_reg<uint64, n>& b,
3164 const v_reg<uint64, n>& c, const v_reg<uint64, n>& d,
3165 const v_reg<uint64, n>& e, const v_reg<uint64, n>& f,
3166 const v_reg<uint64, n>& g, const v_reg<uint64, n>& h)
3167{
3169 _pack_b(mask.s, a, b);
3170 _pack_b(mask.s + 2*n, c, d);
3171 _pack_b(mask.s + 4*n, e, f);
3172 _pack_b(mask.s + 6*n, g, h);
3173 return mask;
3174}
3176
3192template<int n>
3194 const v_reg<float, n>& a, const v_reg<float, n>& b,
3195 const v_reg<float, n>& c, const v_reg<float, n>& d)
3196{
3197 v_reg<float, n> res;
3198 for (int i = 0; i < n / 4; i++)
3199 {
3200 res.s[0 + i*4] = v.s[0 + i*4] * a.s[0 + i*4] + v.s[1 + i*4] * b.s[0 + i*4] + v.s[2 + i*4] * c.s[0 + i*4] + v.s[3 + i*4] * d.s[0 + i*4];
3201 res.s[1 + i*4] = v.s[0 + i*4] * a.s[1 + i*4] + v.s[1 + i*4] * b.s[1 + i*4] + v.s[2 + i*4] * c.s[1 + i*4] + v.s[3 + i*4] * d.s[1 + i*4];
3202 res.s[2 + i*4] = v.s[0 + i*4] * a.s[2 + i*4] + v.s[1 + i*4] * b.s[2 + i*4] + v.s[2 + i*4] * c.s[2 + i*4] + v.s[3 + i*4] * d.s[2 + i*4];
3203 res.s[3 + i*4] = v.s[0 + i*4] * a.s[3 + i*4] + v.s[1 + i*4] * b.s[3 + i*4] + v.s[2 + i*4] * c.s[3 + i*4] + v.s[3 + i*4] * d.s[3 + i*4];
3204 }
3205 return res;
3206}
3207
3222template<int n>
3224 const v_reg<float, n>& a, const v_reg<float, n>& b,
3225 const v_reg<float, n>& c, const v_reg<float, n>& d)
3226{
3227 v_reg<float, n> res;
3228 for (int i = 0; i < n / 4; i++)
3229 {
3230 res.s[0 + i * 4] = v.s[0 + i * 4] * a.s[0 + i * 4] + v.s[1 + i * 4] * b.s[0 + i * 4] + v.s[2 + i * 4] * c.s[0 + i * 4] + d.s[0 + i * 4];
3231 res.s[1 + i * 4] = v.s[0 + i * 4] * a.s[1 + i * 4] + v.s[1 + i * 4] * b.s[1 + i * 4] + v.s[2 + i * 4] * c.s[1 + i * 4] + d.s[1 + i * 4];
3232 res.s[2 + i * 4] = v.s[0 + i * 4] * a.s[2 + i * 4] + v.s[1 + i * 4] * b.s[2 + i * 4] + v.s[2 + i * 4] * c.s[2 + i * 4] + d.s[2 + i * 4];
3233 res.s[3 + i * 4] = v.s[0 + i * 4] * a.s[3 + i * 4] + v.s[1 + i * 4] * b.s[3 + i * 4] + v.s[2 + i * 4] * c.s[3 + i * 4] + d.s[3 + i * 4];
3234 }
3235 return res;
3236}
3237
3238
3239template<int n> inline v_reg<double, n/2> v_dotprod_expand(const v_reg<int, n>& a, const v_reg<int, n>& b)
3240{ return v_fma(v_cvt_f64(a), v_cvt_f64(b), v_cvt_f64_high(a) * v_cvt_f64_high(b)); }
3241template<int n> inline v_reg<double, n/2> v_dotprod_expand(const v_reg<int, n>& a, const v_reg<int, n>& b,
3242 const v_reg<double, n/2>& c)
3243{ return v_fma(v_cvt_f64(a), v_cvt_f64(b), v_fma(v_cvt_f64_high(a), v_cvt_f64_high(b), c)); }
3244
3245template<int n> inline v_reg<double, n/2> v_dotprod_expand_fast(const v_reg<int, n>& a, const v_reg<int, n>& b)
3246{ return v_dotprod_expand(a, b); }
3247template<int n> inline v_reg<double, n/2> v_dotprod_expand_fast(const v_reg<int, n>& a, const v_reg<int, n>& b,
3248 const v_reg<double, n/2>& c)
3249{ return v_dotprod_expand(a, b, c); }
3250
3252
3253inline v_reg<float, simd128_width / sizeof(float)>
3254v_load_expand(const hfloat* ptr)
3255{
3256 v_reg<float, simd128_width / sizeof(float)> v;
3257 for( int i = 0; i < v.nlanes; i++ )
3258 {
3259 v.s[i] = ptr[i];
3260 }
3261 return v;
3262}
3263#if CV_SIMD256
3264inline v_reg<float, simd256_width / sizeof(float)>
3265v256_load_expand(const hfloat* ptr)
3266{
3267 v_reg<float, simd256_width / sizeof(float)> v;
3268 for (int i = 0; i < v.nlanes; i++)
3269 {
3270 v.s[i] = ptr[i];
3271 }
3272 return v;
3273}
3274#endif
3275#if CV_SIMD512
3276inline v_reg<float, simd512_width / sizeof(float)>
3277v512_load_expand(const hfloat* ptr)
3278{
3279 v_reg<float, simd512_width / sizeof(float)> v;
3280 for (int i = 0; i < v.nlanes; i++)
3281 {
3282 v.s[i] = ptr[i];
3283 }
3284 return v;
3285}
3286#endif
3287
3288template<int n> inline void
3289v_pack_store(hfloat* ptr, const v_reg<float, n>& v)
3290{
3291 for( int i = 0; i < v.nlanes; i++ )
3292 {
3293 ptr[i] = hfloat(v.s[i]);
3294 }
3295}
3296
3297inline void v_cleanup() {}
3298#if CV_SIMD256
3299inline void v256_cleanup() {}
3300#endif
3301#if CV_SIMD512
3302inline void v512_cleanup() {}
3303#endif
3304
3306
3307#ifndef CV_DOXYGEN
3308CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
3309#endif
3310}
3311
3312#if !defined(CV_DOXYGEN)
3313#undef CV_SIMD256
3314#undef CV_SIMD512
3315#endif
3316
3317#endif
T cos(T... args)
T exp(T... args)
InputArrayOfArrays InputArrayOfArrays InputOutputArray InputOutputArray InputOutputArray InputOutputArray Size InputOutputArray InputOutputArray T
Definition calib3d.hpp:1867
const int * idx
Definition core_c.h:668
const CvArr CvArr * x
Definition core_c.h:1195
const CvArr * y
Definition core_c.h:1187
signed char schar
Definition interface.h:48
unsigned char uchar
Definition interface.h:51
int64_t int64
Definition interface.h:61
unsigned short ushort
Definition interface.h:52
uint64_t uint64
Definition interface.h:62
#define OPENCV_HAL_IMPL_C_INIT_VAL(_Tpvec, _Tp, prefix, suffix)
Helper macro.
Definition intrin_cpp.hpp:2827
#define OPENCV_HAL_IMPL_C_RSHIFTR(_Tp)
Helper macro.
Definition intrin_cpp.hpp:2929
#define OPENCV_HAL_IMPL_C_SHIFTR(_Tp)
Helper macro.
Definition intrin_cpp.hpp:2912
#define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tp, _Tpn, pack_suffix, cast)
Helper macro.
Definition intrin_cpp.hpp:2986
#define OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(suffix, opA, opB)
Bitwise shift left.
Definition intrin_cpp.hpp:1277
#define OPENCV_HAL_IMPL_CMP_OP(cmp_op)
Helper macro.
Definition intrin_cpp.hpp:852
#define OPENCV_HAL_IMPL_C_SHIFTL(_Tp)
Helper macro.
Definition intrin_cpp.hpp:2895
#define OPENCV_HAL_IMPL_C_INIT_ZERO(_Tpvec, prefix, suffix)
Helper macro.
Definition intrin_cpp.hpp:2781
#define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tp, _Tpn, pack_suffix, cast)
Helper macro.
Definition intrin_cpp.hpp:3051
#define OPENCV_HAL_IMPL_C_REINTERPRET(_Tp, suffix)
Helper macro.
Definition intrin_cpp.hpp:2873
#define OPENCV_HAL_IMPL_MATH_FUNC(func, cfunc, _Tp2)
Helper macro.
Definition intrin_cpp.hpp:710
#define OPENCV_HAL_IMPL_C_PACK(_Tp, _Tpn, pack_suffix, cast)
Helper macro.
Definition intrin_cpp.hpp:2951
#define OPENCV_HAL_IMPL_ARITHM_OP(func, bin_op, cast_op, _Tp2)
Helper macro.
Definition intrin_cpp.hpp:910
#define OPENCV_HAL_IMPL_C_PACK_STORE(_Tp, _Tpn, pack_suffix, cast)
Helper macro.
Definition intrin_cpp.hpp:3021
#define OPENCV_HAL_IMPL_SHIFT_OP(shift_op)
Helper macro.
Definition intrin_cpp.hpp:1256
bool v_check_any(const v_reg< _Tp, n > &a)
Check if any of packed values is less than zero.
Definition intrin_cpp.hpp:1433
#define CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(macro_name,...)
Definition intrin_cpp.hpp:621
v_reg< _Tp, n > v_combine_high(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Combine vector from last elements of two vectors.
Definition intrin_cpp.hpp:2304
v_reg< float, n > v_matmul(const v_reg< float, n > &v, const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
Matrix multiplication.
Definition intrin_cpp.hpp:3193
v_reg< int, n > v_round(const v_reg< float, n > &a)
Round elements.
Definition intrin_cpp.hpp:2424
#define CV__HAL_INTRIN_IMPL_BITWISE_NOT_(_Tp, dummy)
Definition intrin_cpp.hpp:693
#define CV__HAL_INTRIN_IMPL_BIN_OP(bin_op)
Definition intrin_cpp.hpp:656
CV_INLINE v_reg< _Tp, n > operator|(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Bitwise OR.
v_reg< schar, 16 > v_int8x16
Sixteen 8-bit signed integer values.
Definition intrin_cpp.hpp:491
v_reg< uchar, 16 > v_uint8x16
Sixteen 8-bit unsigned integer values.
Definition intrin_cpp.hpp:489
void v_store_high(_Tp *ptr, const v_reg< _Tp, n > &a)
Store data to memory (higher half)
Definition intrin_cpp.hpp:2233
int v_signmask(const v_reg< _Tp, n > &a)
Get negative values mask.
Definition intrin_cpp.hpp:1392
void v_zip(const v_reg< _Tp, n > &a0, const v_reg< _Tp, n > &a1, v_reg< _Tp, n > &b0, v_reg< _Tp, n > &b1)
Interleave two vectors.
Definition intrin_cpp.hpp:1554
v_reg< int64, 2 > v_int64x2
Two 64-bit signed integer values.
Definition intrin_cpp.hpp:507
void v_store(_Tp *ptr, const v_reg< _Tp, n > &a)
Store data to memory.
Definition intrin_cpp.hpp:2190
v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > v_dotprod_expand(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Dot product of elements and expand.
Definition intrin_cpp.hpp:1142
V_TypeTraits< typenameV_TypeTraits< _Tp >::abs_type >::sum_type v_reduce_sad(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Sum absolute differences of values.
Definition intrin_cpp.hpp:1374
v_reg< int, n > v_ceil(const v_reg< float, n > &a)
Ceil elements.
Definition intrin_cpp.hpp:2462
v_reg< ushort, 8 > v_uint16x8
Eight 16-bit unsigned integer values.
Definition intrin_cpp.hpp:493
v_reg< _Tp, n > v_pack_triplets(const v_reg< _Tp, n > &vec)
Definition intrin_cpp.hpp:2733
CV_INLINE v_reg< _Tp, n > operator&(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Bitwise AND.
CV_INLINE v_reg< _Tp, n > & operator^=(v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
void v_store_low(_Tp *ptr, const v_reg< _Tp, n > &a)
Store data to memory (lower half)
Definition intrin_cpp.hpp:2216
#define CV__HAL_INTRIN_IMPL_BIT_OP(bit_op)
Definition intrin_cpp.hpp:684
v_reg< int, n > v_floor(const v_reg< float, n > &a)
Floor elements.
Definition intrin_cpp.hpp:2449
CV_INLINE v_reg< _Tp, n > & operator|=(v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_dotprod(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Dot product of elements.
Definition intrin_cpp.hpp:1077
int v_scan_forward(const v_reg< _Tp, n > &a)
Get first negative lane index.
Definition intrin_cpp.hpp:1409
CV_INLINE v_reg< _Tp, n > & operator/=(v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
v_reg< _Tp, n > v_reverse(const v_reg< _Tp, n > &a)
Vector reverse order.
Definition intrin_cpp.hpp:2343
v_reg< typename V_TypeTraits< _Tp >::w_type, simd128_width/sizeof(typename V_TypeTraits< _Tp >::w_type)> v_load_expand(const _Tp *ptr)
Load register contents from memory with double expand.
Definition intrin_cpp.hpp:1872
v_reg< int, 4 > v_int32x4
Four 32-bit signed integer values.
Definition intrin_cpp.hpp:499
v_reg< typename V_TypeTraits< _Tp >::abs_type, n > v_absdiff(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Add values without saturation.
Definition intrin_cpp.hpp:953
v_reg< _Tp, n > v_interleave_pairs(const v_reg< _Tp, n > &vec)
Definition intrin_cpp.hpp:2703
V_TypeTraits< _Tp >::sum_type v_reduce_sum(const v_reg< _Tp, n > &a)
Element shift left among vector.
Definition intrin_cpp.hpp:1335
v_reg< _Tp, n > v_muladd(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c)
A synonym for v_fma.
Definition intrin_cpp.hpp:1057
void v_store_aligned_nocache(_Tp *ptr, const v_reg< _Tp, n > &a)
Definition intrin_cpp.hpp:2258
v_reg< _Tp, n > v_sqr_magnitude(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Square of the magnitude.
Definition intrin_cpp.hpp:1033
v_reg< int, n > v_trunc(const v_reg< float, n > &a)
Truncate elements.
Definition intrin_cpp.hpp:2475
v_reg< unsigned, 4 > v_uint32x4
Four 32-bit unsigned integer values.
Definition intrin_cpp.hpp:497
#define CV__HAL_INTRIN_IMPL_BIN_OP_(_Tp, bin_op)
Definition intrin_cpp.hpp:639
CV_INLINE v_reg< _Tp, n > operator/(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Divide values.
v_reg< _Tp, n > v_invsqrt(const v_reg< _Tp, n > &a)
Inversed square root.
Definition intrin_cpp.hpp:1007
v_reg< _Tp, n > v_magnitude(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Magnitude.
Definition intrin_cpp.hpp:1020
v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > v_dotprod_expand_fast(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Fast Dot product of elements and expand.
Definition intrin_cpp.hpp:1185
CV_INLINE v_reg< _Tp, n > & operator&=(v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
CV_INLINE v_reg< double,(n/2)> v_cvt_f64_high(const v_reg< int, n > &a)
Convert to double high part of vector.
Definition intrin_cpp.hpp:2584
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_load_low(const _Tp *ptr)
Load 64-bits of data to lower part (high part is undefined).
Definition intrin_cpp.hpp:1699
void v_recombine(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, v_reg< _Tp, n > &low, v_reg< _Tp, n > &high)
Combine two vectors from lower and higher parts of two other vectors.
Definition intrin_cpp.hpp:2322
v_reg< float, n > v_reduce_sum4(const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
Sums all elements of each input vector, returns the vector of sums.
Definition intrin_cpp.hpp:1353
void v_mul_expand(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &c, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &d)
Multiply and expand.
Definition intrin_cpp.hpp:1216
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_load_aligned(const _Tp *ptr)
Load register contents from memory (aligned)
Definition intrin_cpp.hpp:1649
v_reg< _Tp, n > v_broadcast_element(const v_reg< _Tp, n > &a)
Broadcast i-th element of vector.
Definition intrin_cpp.hpp:2413
void v_pack_store(hfloat *ptr, const v_reg< float, n > &v)
Definition intrin_cpp.hpp:3289
#define CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(macro_name,...)
Definition intrin_cpp.hpp:631
v_reg< _Tp, n > v_interleave_quads(const v_reg< _Tp, n > &vec)
Definition intrin_cpp.hpp:2716
v_reg< _Tp, n > v_select(const v_reg< _Tp, n > &mask, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Per-element select (blend operation)
Definition intrin_cpp.hpp:1451
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_load(const _Tp *ptr)
Load register contents from memory.
Definition intrin_cpp.hpp:1584
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_expand_low(const v_reg< _Tp, n > &a)
Expand lower values to the wider pack type.
Definition intrin_cpp.hpp:1496
CV_INLINE v_reg< _Tp, n > operator~(const v_reg< _Tp, n > &a)
Bitwise NOT.
CV_INLINE v_reg< double, n/2 > v_cvt_f64(const v_reg< int, n > &a)
Convert lower half to double.
Definition intrin_cpp.hpp:2573
v_reg< typename V_TypeTraits< _Tp >::q_type, simd128_width/sizeof(typename V_TypeTraits< _Tp >::q_type)> v_load_expand_q(const _Tp *ptr)
Load register contents from memory with quad expand.
Definition intrin_cpp.hpp:1961
void v_expand(const v_reg< _Tp, n > &a, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &b0, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &b1)
Expand values to the wider pack type.
Definition intrin_cpp.hpp:1474
v_reg< uchar, 2 *n > v_pack_b(const v_reg< ushort, n > &a, const v_reg< ushort, n > &b)
! For 16-bit boolean values
Definition intrin_cpp.hpp:3111
void v_cleanup()
Definition intrin_cpp.hpp:3297
v_reg< _Tp, n > v_fma(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c)
Multiply and add.
Definition intrin_cpp.hpp:1046
CV_INLINE v_reg< _Tp, n > operator^(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Bitwise XOR.
void v_store_interleave(_Tp *ptr, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, hal::StoreMode=hal::STORE_UNALIGNED)
Interleave and store (2 channels)
Definition intrin_cpp.hpp:2115
void v_lut_deinterleave(const float *tab, const v_reg< int, n > &idx, v_reg< float, n > &x, v_reg< float, n > &y)
Definition intrin_cpp.hpp:2681
void v_transpose4x4(v_reg< _Tp, n > &a0, const v_reg< _Tp, n > &a1, const v_reg< _Tp, n > &a2, const v_reg< _Tp, n > &a3, v_reg< _Tp, n > &b0, v_reg< _Tp, n > &b1, v_reg< _Tp, n > &b2, v_reg< _Tp, n > &b3)
Transpose 4x4 matrix.
Definition intrin_cpp.hpp:2761
v_reg< _Tp, n > v_absdiffs(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Saturating absolute difference.
Definition intrin_cpp.hpp:994
v_reg< uint64, 2 > v_uint64x2
Two 64-bit unsigned integer values.
Definition intrin_cpp.hpp:505
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_expand_high(const v_reg< _Tp, n > &a)
Expand higher values to the wider pack type.
Definition intrin_cpp.hpp:1515
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_dotprod_fast(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Fast Dot product of elements.
Definition intrin_cpp.hpp:1116
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_load_halves(const _Tp *loptr, const _Tp *hiptr)
Load register contents from two memory blocks.
Definition intrin_cpp.hpp:1781
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_lut(const _Tp *tab, const int *idx)
Definition intrin_cpp.hpp:2626
v_reg< _Tp, n > v_mul_hi(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Multiply and extract high part.
Definition intrin_cpp.hpp:1233
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_lut_quads(const _Tp *tab, const int *idx)
Definition intrin_cpp.hpp:2640
v_reg< _Tp, n > v_combine_low(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Combine vector from first elements of two vectors.
Definition intrin_cpp.hpp:2282
v_reg< float, 4 > v_float32x4
Four 32-bit floating point values (single precision)
Definition intrin_cpp.hpp:501
v_reg< float, n > v_cvt_f32(const v_reg< int, n > &a)
Convert to float.
Definition intrin_cpp.hpp:2534
bool v_check_all(const v_reg< _Tp, n > &a)
Check if all packed values are less than zero.
Definition intrin_cpp.hpp:1421
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_lut_pairs(const _Tp *tab, const int *idx)
Definition intrin_cpp.hpp:2633
v_reg< float, n > v_matmuladd(const v_reg< float, n > &v, const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
Matrix multiplication and add.
Definition intrin_cpp.hpp:3223
_Tp v_extract_n(const v_reg< _Tp, n > &v)
Vector extract.
Definition intrin_cpp.hpp:2397
CV_INLINE v_reg< _Tp, n > & operator-=(v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
v_reg< float, n > v_not_nan(const v_reg< float, n > &a)
Less-than comparison.
Definition intrin_cpp.hpp:890
v_reg< typename V_TypeTraits< _Tp >::abs_type, n > v_popcount(const v_reg< _Tp, n > &a)
Count the 1 bits in the vector lanes and return result as corresponding unsigned type.
Definition intrin_cpp.hpp:828
CV_INLINE v_reg< _Tp, n > & operator+=(v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
void v_store_aligned(_Tp *ptr, const v_reg< _Tp, n > &a)
Store data to memory (aligned)
Definition intrin_cpp.hpp:2251
CV_INLINE v_reg< _Tp, n > & operator*=(v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
v_reg< short, 8 > v_int16x8
Eight 16-bit signed integer values.
Definition intrin_cpp.hpp:495
v_reg< double, 2 > v_float64x2
Two 64-bit floating point values (double precision)
Definition intrin_cpp.hpp:503
v_reg< _Tp, n > v_extract(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Vector extract.
Definition intrin_cpp.hpp:2371
void v_load_deinterleave(const _Tp *ptr, v_reg< _Tp, n > &a, v_reg< _Tp, n > &b)
Load and deinterleave (2 channels)
Definition intrin_cpp.hpp:2043
@ simdmax_width
Definition intrin_cpp.hpp:566
@ simd128_width
Definition intrin_cpp.hpp:556
CV_INLINE int cvRound(double value)
Rounds floating-point number to the nearest integer.
Definition fast_math.hpp:200
CV_INLINE int cvCeil(double value)
Rounds floating-point number to the nearest integer not smaller than the original.
Definition fast_math.hpp:258
static _Tp saturate_cast(uchar v)
Template function for accurate conversion from one primitive type to another.
Definition saturate.hpp:81
CV_INLINE int cvFloor(double value)
Rounds floating-point number to the nearest integer not larger than the original.
Definition fast_math.hpp:231
static bool isAligned(const T &data)
Alignment check of passed values.
Definition utility.hpp:517
#define CV_Assert(expr)
Checks a condition at runtime and throws exception if it fails.
Definition base.hpp:342
#define CV_INLINE
Definition cvdef.h:218
#define CV_DbgAssert(expr)
Definition base.hpp:375
CvRect r
Definition imgproc_c.h:984
CV_EXPORTS OutputArray int double double InputArray mask
Definition imgproc.hpp:2132
T log(T... args)
T max(T... args)
T memcpy(T... args)
T min(T... args)
StoreMode
Definition intrin.hpp:100
@ STORE_UNALIGNED
Definition intrin.hpp:101
"black box" representation of the file storage associated with a file on disk.
Definition calib3d.hpp:441
DualQuat< T > operator-(const DualQuat< T > &q, const T a)
Definition dualquaternion.inl.hpp:255
DualQuat< T > operator+(const T a, const DualQuat< T > &q)
Definition dualquaternion.inl.hpp:243
DualQuat< T > operator*(const T a, const DualQuat< T > &q)
Definition dualquaternion.inl.hpp:274
T sin(T... args)
T sqrt(T... args)
Definition intrin.hpp:110
Definition intrin_cpp.hpp:370
_Tp get0() const
Access first value.
Definition intrin_cpp.hpp:437
v_reg(const v_reg< _Tp, n > &r)
Copy constructor.
Definition intrin_cpp.hpp:422
v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3, _Tp s4, _Tp s5, _Tp s6, _Tp s7, _Tp s8, _Tp s9, _Tp s10, _Tp s11, _Tp s12, _Tp s13, _Tp s14, _Tp s15)
Constructor.
Definition intrin_cpp.hpp:405
v_reg(const _Tp *ptr)
Constructor.
Definition intrin_cpp.hpp:380
v_reg()
Default constructor.
Definition intrin_cpp.hpp:419
v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3, _Tp s4, _Tp s5, _Tp s6, _Tp s7)
Constructor.
Definition intrin_cpp.hpp:395
v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3)
Constructor.
Definition intrin_cpp.hpp:390
v_reg(_Tp s0, _Tp s1)
Constructor.
Definition intrin_cpp.hpp:385