EstervQrCode 2.0.0
Library for qr code manipulation
Loading...
Searching...
No Matches
intrin_sse_em.hpp
1// This file is part of OpenCV project.
2// It is subject to the license terms in the LICENSE file found in the top-level directory
3// of this distribution and at http://opencv.org/license.html
4
5#ifndef OPENCV_HAL_INTRIN_SSE_EM_HPP
6#define OPENCV_HAL_INTRIN_SSE_EM_HPP
7
8namespace cv
9{
10
12
13CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
14
15#define OPENCV_HAL_SSE_WRAP_1(fun, tp) \
16 inline tp _v128_##fun(const tp& a) \
17 { return _mm_##fun(a); }
18
19#define OPENCV_HAL_SSE_WRAP_2(fun, tp) \
20 inline tp _v128_##fun(const tp& a, const tp& b) \
21 { return _mm_##fun(a, b); }
22
23#define OPENCV_HAL_SSE_WRAP_3(fun, tp) \
24 inline tp _v128_##fun(const tp& a, const tp& b, const tp& c) \
25 { return _mm_##fun(a, b, c); }
26
28
29// [todo] define CV_XOP
30#if 1 // CV_XOP
31inline __m128i _v128_comgt_epu32(const __m128i& a, const __m128i& b)
32{
33 const __m128i delta = _mm_set1_epi32((int)0x80000000);
34 return _mm_cmpgt_epi32(_mm_xor_si128(a, delta), _mm_xor_si128(b, delta));
35}
36// wrapping XOP
37#else
38OPENCV_HAL_SSE_WRAP_2(_v128_comgt_epu32, __m128i)
39#endif // !CV_XOP
40
42
43#if !CV_SSE4_1
44
46inline __m128i _v128_blendv_epi8(const __m128i& a, const __m128i& b, const __m128i& mask)
47{ return _mm_xor_si128(a, _mm_and_si128(_mm_xor_si128(b, a), mask)); }
48
50// 8 >> 16
51inline __m128i _v128_cvtepu8_epi16(const __m128i& a)
52{
53 const __m128i z = _mm_setzero_si128();
54 return _mm_unpacklo_epi8(a, z);
55}
56inline __m128i _v128_cvtepi8_epi16(const __m128i& a)
57{ return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8); }
58// 8 >> 32
59inline __m128i _v128_cvtepu8_epi32(const __m128i& a)
60{
61 const __m128i z = _mm_setzero_si128();
62 return _mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z);
63}
64inline __m128i _v128_cvtepi8_epi32(const __m128i& a)
65{
66 __m128i r = _mm_unpacklo_epi8(a, a);
67 r = _mm_unpacklo_epi8(r, r);
68 return _mm_srai_epi32(r, 24);
69}
70// 16 >> 32
71inline __m128i _v128_cvtepu16_epi32(const __m128i& a)
72{
73 const __m128i z = _mm_setzero_si128();
74 return _mm_unpacklo_epi16(a, z);
75}
76inline __m128i _v128_cvtepi16_epi32(const __m128i& a)
77{ return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16); }
78// 32 >> 64
79inline __m128i _v128_cvtepu32_epi64(const __m128i& a)
80{
81 const __m128i z = _mm_setzero_si128();
82 return _mm_unpacklo_epi32(a, z);
83}
84inline __m128i _v128_cvtepi32_epi64(const __m128i& a)
85{ return _mm_unpacklo_epi32(a, _mm_srai_epi32(a, 31)); }
86
88inline __m128i _v128_mullo_epi32(const __m128i& a, const __m128i& b)
89{
90 __m128i c0 = _mm_mul_epu32(a, b);
91 __m128i c1 = _mm_mul_epu32(_mm_srli_epi64(a, 32), _mm_srli_epi64(b, 32));
92 __m128i d0 = _mm_unpacklo_epi32(c0, c1);
93 __m128i d1 = _mm_unpackhi_epi32(c0, c1);
94 return _mm_unpacklo_epi64(d0, d1);
95}
96
98inline __m128i _v128_min_epu32(const __m128i& a, const __m128i& b)
99{ return _v128_blendv_epi8(a, b, _v128_comgt_epu32(a, b)); }
100
101// wrapping SSE4.1
102#else
103OPENCV_HAL_SSE_WRAP_1(cvtepu8_epi16, __m128i)
104OPENCV_HAL_SSE_WRAP_1(cvtepi8_epi16, __m128i)
105OPENCV_HAL_SSE_WRAP_1(cvtepu8_epi32, __m128i)
106OPENCV_HAL_SSE_WRAP_1(cvtepi8_epi32, __m128i)
107OPENCV_HAL_SSE_WRAP_1(cvtepu16_epi32, __m128i)
108OPENCV_HAL_SSE_WRAP_1(cvtepi16_epi32, __m128i)
109OPENCV_HAL_SSE_WRAP_1(cvtepu32_epi64, __m128i)
110OPENCV_HAL_SSE_WRAP_1(cvtepi32_epi64, __m128i)
111OPENCV_HAL_SSE_WRAP_2(min_epu32, __m128i)
112OPENCV_HAL_SSE_WRAP_2(mullo_epi32, __m128i)
113OPENCV_HAL_SSE_WRAP_3(blendv_epi8, __m128i)
114#endif // !CV_SSE4_1
115
117
119// 16 << 8
120inline __m128i _v128_cvtepu8_epi16_high(const __m128i& a)
121{
122 const __m128i z = _mm_setzero_si128();
123 return _mm_unpackhi_epi8(a, z);
124}
125inline __m128i _v128_cvtepi8_epi16_high(const __m128i& a)
126{ return _mm_srai_epi16(_mm_unpackhi_epi8(a, a), 8); }
127// 32 << 16
128inline __m128i _v128_cvtepu16_epi32_high(const __m128i& a)
129{
130 const __m128i z = _mm_setzero_si128();
131 return _mm_unpackhi_epi16(a, z);
132}
133inline __m128i _v128_cvtepi16_epi32_high(const __m128i& a)
134{ return _mm_srai_epi32(_mm_unpackhi_epi16(a, a), 16); }
135// 64 << 32
136inline __m128i _v128_cvtepu32_epi64_high(const __m128i& a)
137{
138 const __m128i z = _mm_setzero_si128();
139 return _mm_unpackhi_epi32(a, z);
140}
141inline __m128i _v128_cvtepi32_epi64_high(const __m128i& a)
142{ return _mm_unpackhi_epi32(a, _mm_srai_epi32(a, 31)); }
143
145inline __m128i _v128_packs_epu32(const __m128i& a, const __m128i& b)
146{
147 const __m128i m = _mm_set1_epi32(65535);
148 __m128i am = _v128_min_epu32(a, m);
149 __m128i bm = _v128_min_epu32(b, m);
150#if CV_SSE4_1
151 return _mm_packus_epi32(am, bm);
152#else
153 const __m128i d = _mm_set1_epi32(32768), nd = _mm_set1_epi16(-32768);
154 am = _mm_sub_epi32(am, d);
155 bm = _mm_sub_epi32(bm, d);
156 am = _mm_packs_epi32(am, bm);
157 return _mm_sub_epi16(am, nd);
158#endif
159}
160
161template<int i>
162inline int64 _v128_extract_epi64(const __m128i& a)
163{
164#if defined(CV__SIMD_HAVE_mm_extract_epi64) || (CV_SSE4_1 && (defined(__x86_64__)/*GCC*/ || defined(_M_X64)/*MSVC*/))
165#define CV__SIMD_NATIVE_mm_extract_epi64 1
166 return _mm_extract_epi64(a, i);
167#else
168 CV_DECL_ALIGNED(16) int64 tmp[2];
169 _mm_store_si128((__m128i*)tmp, a);
170 return tmp[i];
171#endif
172}
173
174CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
175
177
178} // cv::
179
180#endif // OPENCV_HAL_INTRIN_SSE_EM_HPP
int64_t int64
Definition interface.h:61
#define CV_DECL_ALIGNED(x)
Definition cvdef.h:243
CvRect r
Definition imgproc_c.h:984
CvSize int int int CvPoint int delta
Definition imgproc_c.h:1168
CV_EXPORTS OutputArray int double double InputArray mask
Definition imgproc.hpp:2132
"black box" representation of the file storage associated with a file on disk.
Definition calib3d.hpp:441