EstervQrCode 2.0.0
Library for qr code manipulation
Loading...
Searching...
No Matches
intrin.hpp
1/*M///////////////////////////////////////////////////////////////////////////////////////
2//
3// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4//
5// By downloading, copying, installing or using the software you agree to this license.
6// If you do not agree to this license, do not download, install,
7// copy or use the software.
8//
9//
10// License Agreement
11// For Open Source Computer Vision Library
12//
13// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
16// Copyright (C) 2015, Itseez Inc., all rights reserved.
17// Third party copyrights are property of their respective owners.
18//
19// Redistribution and use in source and binary forms, with or without modification,
20// are permitted provided that the following conditions are met:
21//
22// * Redistribution's of source code must retain the above copyright notice,
23// this list of conditions and the following disclaimer.
24//
25// * Redistribution's in binary form must reproduce the above copyright notice,
26// this list of conditions and the following disclaimer in the documentation
27// and/or other materials provided with the distribution.
28//
29// * The name of the copyright holders may not be used to endorse or promote products
30// derived from this software without specific prior written permission.
31//
32// This software is provided by the copyright holders and contributors "as is" and
33// any express or implied warranties, including, but not limited to, the implied
34// warranties of merchantability and fitness for a particular purpose are disclaimed.
35// In no event shall the Intel Corporation or contributors be liable for any direct,
36// indirect, incidental, special, exemplary, or consequential damages
37// (including, but not limited to, procurement of substitute goods or services;
38// loss of use, data, or profits; or business interruption) however caused
39// and on any theory of liability, whether in contract, strict liability,
40// or tort (including negligence or otherwise) arising in any way out of
41// the use of this software, even if advised of the possibility of such damage.
42//
43//M*/
44
45#ifndef OPENCV_HAL_INTRIN_HPP
46#define OPENCV_HAL_INTRIN_HPP
47
48#include <cmath>
49#include <float.h>
50#include <stdlib.h>
51#include "opencv2/core/cvdef.h"
52
53#if defined(__GNUC__) && __GNUC__ == 12
54#pragma GCC diagnostic push
55#pragma GCC diagnostic ignored "-Wuninitialized"
56#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
57#endif
58
59#define OPENCV_HAL_ADD(a, b) ((a) + (b))
60#define OPENCV_HAL_AND(a, b) ((a) & (b))
61#define OPENCV_HAL_NOP(a) (a)
62#define OPENCV_HAL_1ST(a, b) (a)
63
64namespace {
65inline unsigned int trailingZeros32(unsigned int value) {
66#if defined(_MSC_VER)
67#if (_MSC_VER < 1700) || defined(_M_ARM) || defined(_M_ARM64)
68 unsigned long index = 0;
69 _BitScanForward(&index, value);
70 return (unsigned int)index;
71#elif defined(__clang__)
72 // clang-cl doesn't export _tzcnt_u32 for non BMI systems
73 return value ? __builtin_ctz(value) : 32;
74#else
75 return _tzcnt_u32(value);
76#endif
77#elif defined(__GNUC__) || defined(__GNUG__)
78 return __builtin_ctz(value);
79#elif defined(__ICC) || defined(__INTEL_COMPILER)
80 return _bit_scan_forward(value);
81#elif defined(__clang__)
82 return llvm.cttz.i32(value, true);
83#else
84 static const int MultiplyDeBruijnBitPosition[32] = {
85 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
86 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 };
87 return MultiplyDeBruijnBitPosition[((uint32_t)((value & -value) * 0x077CB531U)) >> 27];
88#endif
89}
90}
91
92// unlike HAL API, which is in cv::hal,
93// we put intrinsics into cv namespace to make its
94// access from within opencv code more accessible
95namespace cv {
96
97namespace hal {
98
105
106}
107
108// TODO FIXIT: Don't use "God" traits. Split on separate cases.
109template<typename _Tp> struct V_TypeTraits
110{
111};
112
113#define CV_INTRIN_DEF_TYPE_TRAITS(type, int_type_, uint_type_, abs_type_, w_type_, q_type_, sum_type_) \
114 template<> struct V_TypeTraits<type> \
115 { \
116 typedef type value_type; \
117 typedef int_type_ int_type; \
118 typedef abs_type_ abs_type; \
119 typedef uint_type_ uint_type; \
120 typedef w_type_ w_type; \
121 typedef q_type_ q_type; \
122 typedef sum_type_ sum_type; \
123 \
124 static inline int_type reinterpret_int(type x) \
125 { \
126 union { type l; int_type i; } v; \
127 v.l = x; \
128 return v.i; \
129 } \
130 \
131 static inline type reinterpret_from_int(int_type x) \
132 { \
133 union { type l; int_type i; } v; \
134 v.i = x; \
135 return v.l; \
136 } \
137 }
138
139#define CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(type, int_type_, uint_type_, abs_type_, w_type_, sum_type_) \
140 template<> struct V_TypeTraits<type> \
141 { \
142 typedef type value_type; \
143 typedef int_type_ int_type; \
144 typedef abs_type_ abs_type; \
145 typedef uint_type_ uint_type; \
146 typedef w_type_ w_type; \
147 typedef sum_type_ sum_type; \
148 \
149 static inline int_type reinterpret_int(type x) \
150 { \
151 union { type l; int_type i; } v; \
152 v.l = x; \
153 return v.i; \
154 } \
155 \
156 static inline type reinterpret_from_int(int_type x) \
157 { \
158 union { type l; int_type i; } v; \
159 v.i = x; \
160 return v.l; \
161 } \
162 }
163
164CV_INTRIN_DEF_TYPE_TRAITS(uchar, schar, uchar, uchar, ushort, unsigned, unsigned);
165CV_INTRIN_DEF_TYPE_TRAITS(schar, schar, uchar, uchar, short, int, int);
166CV_INTRIN_DEF_TYPE_TRAITS(ushort, short, ushort, ushort, unsigned, uint64, unsigned);
167CV_INTRIN_DEF_TYPE_TRAITS(short, short, ushort, ushort, int, int64, int);
168CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(unsigned, int, unsigned, unsigned, uint64, unsigned);
169CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int, int, unsigned, unsigned, int64, int);
170CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(float, int, unsigned, float, double, float);
171CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(uint64, int64, uint64, uint64, void, uint64);
172CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int64, int64, uint64, uint64, void, int64);
173CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(double, int64, uint64, double, void, double);
174
175#ifndef CV_DOXYGEN
176
177#ifndef CV_CPU_OPTIMIZATION_HAL_NAMESPACE
178#ifdef CV_FORCE_SIMD128_CPP
179 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_EMULATOR_CPP
180 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_EMULATOR_CPP {
181 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
182#elif defined(CV_CPU_DISPATCH_MODE)
183 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE __CV_CAT(hal_, CV_CPU_DISPATCH_MODE)
184 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace __CV_CAT(hal_, CV_CPU_DISPATCH_MODE) {
185 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
186#else
187 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_baseline
188 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_baseline {
189 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
190#endif
191#endif // CV_CPU_OPTIMIZATION_HAL_NAMESPACE
192
193CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
194CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
195using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
196#endif
197}
198
199#ifdef CV_DOXYGEN
200# undef CV_AVX2
201# undef CV_SSE2
202# undef CV_NEON
203# undef CV_VSX
204# undef CV_FP16
205# undef CV_MSA
206# undef CV_RVV
207#endif
208
209#if (CV_SSE2 || CV_NEON || CV_VSX || CV_MSA || CV_WASM_SIMD || CV_RVV071 || CV_LSX) && !defined(CV_FORCE_SIMD128_CPP)
210#define CV__SIMD_FORWARD 128
211#include "opencv2/core/hal/intrin_forward.hpp"
212#endif
213
214#if CV_SSE2 && !defined(CV_FORCE_SIMD128_CPP)
215
216#include "opencv2/core/hal/intrin_sse_em.hpp"
217#include "opencv2/core/hal/intrin_sse.hpp"
218
219#elif CV_NEON && !defined(CV_FORCE_SIMD128_CPP)
220
221#include "opencv2/core/hal/intrin_neon.hpp"
222
223#elif CV_RVV071 && !defined(CV_FORCE_SIMD128_CPP)
224#define CV_SIMD128_CPP 0
225#include "opencv2/core/hal/intrin_rvv071.hpp"
226
227#elif CV_VSX && !defined(CV_FORCE_SIMD128_CPP)
228
229#include "opencv2/core/hal/intrin_vsx.hpp"
230
231#elif CV_MSA && !defined(CV_FORCE_SIMD128_CPP)
232
233#include "opencv2/core/hal/intrin_msa.hpp"
234
235#elif CV_WASM_SIMD && !defined(CV_FORCE_SIMD128_CPP)
236#include "opencv2/core/hal/intrin_wasm.hpp"
237
238#elif CV_RVV && !defined(CV_FORCE_SIMD128_CPP)
239#if defined(CV_RVV_SCALABLE)
240#include "opencv2/core/hal/intrin_rvv_scalable.hpp"
241#else
242#include "opencv2/core/hal/intrin_rvv.hpp"
243#endif
244
245#elif CV_LSX && !defined(CV_FORCE_SIMD128_CPP)
246
247#include "opencv2/core/hal/intrin_lsx.hpp"
248
249#else
250
251#include "opencv2/core/hal/intrin_cpp.hpp"
252
253#endif
254
255// AVX2 can be used together with SSE2, so
256// we define those two sets of intrinsics at once.
257// Most of the intrinsics do not conflict (the proper overloaded variant is
258// resolved by the argument types, e.g. v_float32x4 ~ SSE2, v_float32x8 ~ AVX2),
259// but some of AVX2 intrinsics get v256_ prefix instead of v_, e.g. v256_load() vs v_load().
260// Correspondingly, the wide intrinsics (which are mapped to the "widest"
261// available instruction set) will get vx_ prefix
262// (and will be mapped to v256_ counterparts) (e.g. vx_load() => v256_load())
263#if CV_AVX2
264
265#define CV__SIMD_FORWARD 256
266#include "opencv2/core/hal/intrin_forward.hpp"
267#include "opencv2/core/hal/intrin_avx.hpp"
268
269#endif
270
271// AVX512 can be used together with SSE2 and AVX2, so
272// we define those sets of intrinsics at once.
273// For some of AVX512 intrinsics get v512_ prefix instead of v_, e.g. v512_load() vs v_load().
274// Wide intrinsics will be mapped to v512_ counterparts in this case(e.g. vx_load() => v512_load())
275#if CV_AVX512_SKX
276
277#define CV__SIMD_FORWARD 512
278#include "opencv2/core/hal/intrin_forward.hpp"
279#include "opencv2/core/hal/intrin_avx512.hpp"
280
281#endif
282
283#if CV_LASX
284
285#define CV__SIMD_FORWARD 256
286#include "opencv2/core/hal/intrin_forward.hpp"
287#include "opencv2/core/hal/intrin_lasx.hpp"
288
289#endif
290
292
293namespace cv {
294
295#ifndef CV_DOXYGEN
296CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
297#endif
298
299#ifndef CV_SIMD128
300#define CV_SIMD128 0
301#endif
302
303#ifndef CV_SIMD128_CPP
304#define CV_SIMD128_CPP 0
305#endif
306
307#ifndef CV_SIMD128_64F
308#define CV_SIMD128_64F 0
309#endif
310
311#ifndef CV_SIMD256
312#define CV_SIMD256 0
313#endif
314
315#ifndef CV_SIMD256_64F
316#define CV_SIMD256_64F 0
317#endif
318
319#ifndef CV_SIMD512
320#define CV_SIMD512 0
321#endif
322
323#ifndef CV_SIMD512_64F
324#define CV_SIMD512_64F 0
325#endif
326
327#ifndef CV_SIMD128_FP16
328#define CV_SIMD128_FP16 0
329#endif
330
331#ifndef CV_SIMD256_FP16
332#define CV_SIMD256_FP16 0
333#endif
334
335#ifndef CV_SIMD512_FP16
336#define CV_SIMD512_FP16 0
337#endif
338
339#ifndef CV_SIMD_SCALABLE
340#define CV_SIMD_SCALABLE 0
341#endif
342
343#ifndef CV_SIMD_SCALABLE_64F
344#define CV_SIMD_SCALABLE_64F 0
345#endif
346
347//==================================================================================================
348
349template<typename _Tp> struct V_RegTraits
350{
351};
352
353#define CV_DEF_REG_TRAITS(prefix, _reg, lane_type, suffix, _u_reg, _w_reg, _q_reg, _int_reg, _round_reg) \
354 template<> struct V_RegTraits<_reg> \
355 { \
356 typedef _reg reg; \
357 typedef _u_reg u_reg; \
358 typedef _w_reg w_reg; \
359 typedef _q_reg q_reg; \
360 typedef _int_reg int_reg; \
361 typedef _round_reg round_reg; \
362 }
363
364#if CV_SIMD128 || CV_SIMD128_CPP
365 CV_DEF_REG_TRAITS(v, v_uint8x16, uchar, u8, v_uint8x16, v_uint16x8, v_uint32x4, v_int8x16, void);
366 CV_DEF_REG_TRAITS(v, v_int8x16, schar, s8, v_uint8x16, v_int16x8, v_int32x4, v_int8x16, void);
367 CV_DEF_REG_TRAITS(v, v_uint16x8, ushort, u16, v_uint16x8, v_uint32x4, v_uint64x2, v_int16x8, void);
368 CV_DEF_REG_TRAITS(v, v_int16x8, short, s16, v_uint16x8, v_int32x4, v_int64x2, v_int16x8, void);
369 CV_DEF_REG_TRAITS(v, v_uint32x4, unsigned, u32, v_uint32x4, v_uint64x2, void, v_int32x4, void);
370 CV_DEF_REG_TRAITS(v, v_int32x4, int, s32, v_uint32x4, v_int64x2, void, v_int32x4, void);
371#if CV_SIMD128_64F || CV_SIMD128_CPP
372 CV_DEF_REG_TRAITS(v, v_float32x4, float, f32, v_float32x4, v_float64x2, void, v_int32x4, v_int32x4);
373#else
374 CV_DEF_REG_TRAITS(v, v_float32x4, float, f32, v_float32x4, void, void, v_int32x4, v_int32x4);
375#endif
376 CV_DEF_REG_TRAITS(v, v_uint64x2, uint64, u64, v_uint64x2, void, void, v_int64x2, void);
377 CV_DEF_REG_TRAITS(v, v_int64x2, int64, s64, v_uint64x2, void, void, v_int64x2, void);
378#if CV_SIMD128_64F
379 CV_DEF_REG_TRAITS(v, v_float64x2, double, f64, v_float64x2, void, void, v_int64x2, v_int32x4);
380#endif
381#endif
382
383#if CV_SIMD256
384 CV_DEF_REG_TRAITS(v256, v_uint8x32, uchar, u8, v_uint8x32, v_uint16x16, v_uint32x8, v_int8x32, void);
385 CV_DEF_REG_TRAITS(v256, v_int8x32, schar, s8, v_uint8x32, v_int16x16, v_int32x8, v_int8x32, void);
386 CV_DEF_REG_TRAITS(v256, v_uint16x16, ushort, u16, v_uint16x16, v_uint32x8, v_uint64x4, v_int16x16, void);
387 CV_DEF_REG_TRAITS(v256, v_int16x16, short, s16, v_uint16x16, v_int32x8, v_int64x4, v_int16x16, void);
388 CV_DEF_REG_TRAITS(v256, v_uint32x8, unsigned, u32, v_uint32x8, v_uint64x4, void, v_int32x8, void);
389 CV_DEF_REG_TRAITS(v256, v_int32x8, int, s32, v_uint32x8, v_int64x4, void, v_int32x8, void);
390 CV_DEF_REG_TRAITS(v256, v_float32x8, float, f32, v_float32x8, v_float64x4, void, v_int32x8, v_int32x8);
391 CV_DEF_REG_TRAITS(v256, v_uint64x4, uint64, u64, v_uint64x4, void, void, v_int64x4, void);
392 CV_DEF_REG_TRAITS(v256, v_int64x4, int64, s64, v_uint64x4, void, void, v_int64x4, void);
393 CV_DEF_REG_TRAITS(v256, v_float64x4, double, f64, v_float64x4, void, void, v_int64x4, v_int32x8);
394#endif
395
396#if CV_SIMD512
397 CV_DEF_REG_TRAITS(v512, v_uint8x64, uchar, u8, v_uint8x64, v_uint16x32, v_uint32x16, v_int8x64, void);
398 CV_DEF_REG_TRAITS(v512, v_int8x64, schar, s8, v_uint8x64, v_int16x32, v_int32x16, v_int8x64, void);
399 CV_DEF_REG_TRAITS(v512, v_uint16x32, ushort, u16, v_uint16x32, v_uint32x16, v_uint64x8, v_int16x32, void);
400 CV_DEF_REG_TRAITS(v512, v_int16x32, short, s16, v_uint16x32, v_int32x16, v_int64x8, v_int16x32, void);
401 CV_DEF_REG_TRAITS(v512, v_uint32x16, unsigned, u32, v_uint32x16, v_uint64x8, void, v_int32x16, void);
402 CV_DEF_REG_TRAITS(v512, v_int32x16, int, s32, v_uint32x16, v_int64x8, void, v_int32x16, void);
403 CV_DEF_REG_TRAITS(v512, v_float32x16, float, f32, v_float32x16, v_float64x8, void, v_int32x16, v_int32x16);
404 CV_DEF_REG_TRAITS(v512, v_uint64x8, uint64, u64, v_uint64x8, void, void, v_int64x8, void);
405 CV_DEF_REG_TRAITS(v512, v_int64x8, int64, s64, v_uint64x8, void, void, v_int64x8, void);
406 CV_DEF_REG_TRAITS(v512, v_float64x8, double, f64, v_float64x8, void, void, v_int64x8, v_int32x16);
407#endif
408#if CV_SIMD_SCALABLE
409 CV_DEF_REG_TRAITS(v, v_uint8, uchar, u8, v_uint8, v_uint16, v_uint32, v_int8, void);
410 CV_DEF_REG_TRAITS(v, v_int8, schar, s8, v_uint8, v_int16, v_int32, v_int8, void);
411 CV_DEF_REG_TRAITS(v, v_uint16, ushort, u16, v_uint16, v_uint32, v_uint64, v_int16, void);
412 CV_DEF_REG_TRAITS(v, v_int16, short, s16, v_uint16, v_int32, v_int64, v_int16, void);
413 CV_DEF_REG_TRAITS(v, v_uint32, unsigned, u32, v_uint32, v_uint64, void, v_int32, void);
414 CV_DEF_REG_TRAITS(v, v_int32, int, s32, v_uint32, v_int64, void, v_int32, void);
415 CV_DEF_REG_TRAITS(v, v_float32, float, f32, v_float32, v_float64, void, v_int32, v_int32);
416 CV_DEF_REG_TRAITS(v, v_uint64, uint64, u64, v_uint64, void, void, v_int64, void);
417 CV_DEF_REG_TRAITS(v, v_int64, int64, s64, v_uint64, void, void, v_int64, void);
418 CV_DEF_REG_TRAITS(v, v_float64, double, f64, v_float64, void, void, v_int64, v_int32);
419#endif
421
422#if CV_SIMD512 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 512)
423#define CV__SIMD_NAMESPACE simd512
424namespace CV__SIMD_NAMESPACE {
425 #define CV_SIMD 1
426 #define CV_SIMD_64F CV_SIMD512_64F
427 #define CV_SIMD_FP16 CV_SIMD512_FP16
428 #define CV_SIMD_WIDTH 64
432 typedef v_uint8x64 v_uint8;
434 typedef v_int8x64 v_int8;
436 typedef v_uint16x32 v_uint16;
438 typedef v_int16x32 v_int16;
440 typedef v_uint32x16 v_uint32;
442 typedef v_int32x16 v_int32;
444 typedef v_uint64x8 v_uint64;
446 typedef v_int64x8 v_int64;
448 typedef v_float32x16 v_float32;
449 #if CV_SIMD512_64F
451 typedef v_float64x8 v_float64;
452 #endif
454
455 #define VXPREFIX(func) v512##func
456} // namespace
457using namespace CV__SIMD_NAMESPACE;
458#elif CV_SIMD256 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 256)
459#define CV__SIMD_NAMESPACE simd256
460namespace CV__SIMD_NAMESPACE {
461 #define CV_SIMD 1
462 #define CV_SIMD_64F CV_SIMD256_64F
463 #define CV_SIMD_FP16 CV_SIMD256_FP16
464 #define CV_SIMD_WIDTH 32
468 typedef v_uint8x32 v_uint8;
470 typedef v_int8x32 v_int8;
472 typedef v_uint16x16 v_uint16;
474 typedef v_int16x16 v_int16;
476 typedef v_uint32x8 v_uint32;
478 typedef v_int32x8 v_int32;
480 typedef v_uint64x4 v_uint64;
482 typedef v_int64x4 v_int64;
484 typedef v_float32x8 v_float32;
485 #if CV_SIMD256_64F
487 typedef v_float64x4 v_float64;
488 #endif
490
491 #define VXPREFIX(func) v256##func
492} // namespace
493using namespace CV__SIMD_NAMESPACE;
494#elif (CV_SIMD128 || CV_SIMD128_CPP) && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 128)
495#if defined CV_SIMD128_CPP
496#define CV__SIMD_NAMESPACE simd128_cpp
497#else
498#define CV__SIMD_NAMESPACE simd128
499#endif
500namespace CV__SIMD_NAMESPACE {
501 #define CV_SIMD CV_SIMD128
502 #define CV_SIMD_64F CV_SIMD128_64F
503 #define CV_SIMD_WIDTH 16
507 typedef v_uint8x16 v_uint8;
509 typedef v_int8x16 v_int8;
511 typedef v_uint16x8 v_uint16;
513 typedef v_int16x8 v_int16;
515 typedef v_uint32x4 v_uint32;
517 typedef v_int32x4 v_int32;
519 typedef v_uint64x2 v_uint64;
521 typedef v_int64x2 v_int64;
523 typedef v_float32x4 v_float32;
524 #if CV_SIMD128_64F
526 typedef v_float64x2 v_float64;
527 #endif
529
530 #define VXPREFIX(func) v##func
531} // namespace
532using namespace CV__SIMD_NAMESPACE;
533
534#elif CV_SIMD_SCALABLE
535#define CV__SIMD_NAMESPACE simd
536namespace CV__SIMD_NAMESPACE {
537 #define CV_SIMD 0
538 #define CV_SIMD_WIDTH 128 /* 1024/8 */
539
540 #define VXPREFIX(func) v##func
541} // namespace
542using namespace CV__SIMD_NAMESPACE;
543
544#endif
545
547#ifndef CV_SIMD_64F
548#define CV_SIMD_64F 0
549#endif
550
551namespace CV__SIMD_NAMESPACE {
557 inline v_uint8 vx_setall_u8(uchar v) { return VXPREFIX(_setall_u8)(v); }
558 inline v_int8 vx_setall_s8(schar v) { return VXPREFIX(_setall_s8)(v); }
559 inline v_uint16 vx_setall_u16(ushort v) { return VXPREFIX(_setall_u16)(v); }
560 inline v_int16 vx_setall_s16(short v) { return VXPREFIX(_setall_s16)(v); }
561 inline v_int32 vx_setall_s32(int v) { return VXPREFIX(_setall_s32)(v); }
562 inline v_uint32 vx_setall_u32(unsigned v) { return VXPREFIX(_setall_u32)(v); }
563 inline v_float32 vx_setall_f32(float v) { return VXPREFIX(_setall_f32)(v); }
564 inline v_int64 vx_setall_s64(int64 v) { return VXPREFIX(_setall_s64)(v); }
565 inline v_uint64 vx_setall_u64(uint64 v) { return VXPREFIX(_setall_u64)(v); }
566#if CV_SIMD_64F || CV_SIMD_SCALABLE_64F
567 inline v_float64 vx_setall_f64(double v) { return VXPREFIX(_setall_f64)(v); }
568#endif
570
574 inline v_uint8 vx_setzero_u8() { return VXPREFIX(_setzero_u8)(); }
575 inline v_int8 vx_setzero_s8() { return VXPREFIX(_setzero_s8)(); }
576 inline v_uint16 vx_setzero_u16() { return VXPREFIX(_setzero_u16)(); }
577 inline v_int16 vx_setzero_s16() { return VXPREFIX(_setzero_s16)(); }
578 inline v_int32 vx_setzero_s32() { return VXPREFIX(_setzero_s32)(); }
579 inline v_uint32 vx_setzero_u32() { return VXPREFIX(_setzero_u32)(); }
580 inline v_float32 vx_setzero_f32() { return VXPREFIX(_setzero_f32)(); }
581 inline v_int64 vx_setzero_s64() { return VXPREFIX(_setzero_s64)(); }
582 inline v_uint64 vx_setzero_u64() { return VXPREFIX(_setzero_u64)(); }
583#if CV_SIMD_64F || CV_SIMD_SCALABLE_64F
584 inline v_float64 vx_setzero_f64() { return VXPREFIX(_setzero_f64)(); }
585#endif
587
591 inline v_uint8 vx_load(const uchar * ptr) { return VXPREFIX(_load)(ptr); }
592 inline v_int8 vx_load(const schar * ptr) { return VXPREFIX(_load)(ptr); }
593 inline v_uint16 vx_load(const ushort * ptr) { return VXPREFIX(_load)(ptr); }
594 inline v_int16 vx_load(const short * ptr) { return VXPREFIX(_load)(ptr); }
595 inline v_int32 vx_load(const int * ptr) { return VXPREFIX(_load)(ptr); }
596 inline v_uint32 vx_load(const unsigned * ptr) { return VXPREFIX(_load)(ptr); }
597 inline v_float32 vx_load(const float * ptr) { return VXPREFIX(_load)(ptr); }
598 inline v_int64 vx_load(const int64 * ptr) { return VXPREFIX(_load)(ptr); }
599 inline v_uint64 vx_load(const uint64 * ptr) { return VXPREFIX(_load)(ptr); }
600#if CV_SIMD_64F || CV_SIMD_SCALABLE_64F
601 inline v_float64 vx_load(const double * ptr) { return VXPREFIX(_load)(ptr); }
602#endif
604
608 inline v_uint8 vx_load_aligned(const uchar * ptr) { return VXPREFIX(_load_aligned)(ptr); }
609 inline v_int8 vx_load_aligned(const schar * ptr) { return VXPREFIX(_load_aligned)(ptr); }
610 inline v_uint16 vx_load_aligned(const ushort * ptr) { return VXPREFIX(_load_aligned)(ptr); }
611 inline v_int16 vx_load_aligned(const short * ptr) { return VXPREFIX(_load_aligned)(ptr); }
612 inline v_int32 vx_load_aligned(const int * ptr) { return VXPREFIX(_load_aligned)(ptr); }
613 inline v_uint32 vx_load_aligned(const unsigned * ptr) { return VXPREFIX(_load_aligned)(ptr); }
614 inline v_float32 vx_load_aligned(const float * ptr) { return VXPREFIX(_load_aligned)(ptr); }
615 inline v_int64 vx_load_aligned(const int64 * ptr) { return VXPREFIX(_load_aligned)(ptr); }
616 inline v_uint64 vx_load_aligned(const uint64 * ptr) { return VXPREFIX(_load_aligned)(ptr); }
617#if CV_SIMD_64F || CV_SIMD_SCALABLE_64F
618 inline v_float64 vx_load_aligned(const double * ptr) { return VXPREFIX(_load_aligned)(ptr); }
619#endif
621
625 inline v_uint8 vx_load_low(const uchar * ptr) { return VXPREFIX(_load_low)(ptr); }
626 inline v_int8 vx_load_low(const schar * ptr) { return VXPREFIX(_load_low)(ptr); }
627 inline v_uint16 vx_load_low(const ushort * ptr) { return VXPREFIX(_load_low)(ptr); }
628 inline v_int16 vx_load_low(const short * ptr) { return VXPREFIX(_load_low)(ptr); }
629 inline v_int32 vx_load_low(const int * ptr) { return VXPREFIX(_load_low)(ptr); }
630 inline v_uint32 vx_load_low(const unsigned * ptr) { return VXPREFIX(_load_low)(ptr); }
631 inline v_float32 vx_load_low(const float * ptr) { return VXPREFIX(_load_low)(ptr); }
632 inline v_int64 vx_load_low(const int64 * ptr) { return VXPREFIX(_load_low)(ptr); }
633 inline v_uint64 vx_load_low(const uint64 * ptr) { return VXPREFIX(_load_low)(ptr); }
634#if CV_SIMD_64F || CV_SIMD_SCALABLE_64F
635 inline v_float64 vx_load_low(const double * ptr) { return VXPREFIX(_load_low)(ptr); }
636#endif
638
642 inline v_uint8 vx_load_halves(const uchar * ptr0, const uchar * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
643 inline v_int8 vx_load_halves(const schar * ptr0, const schar * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
644 inline v_uint16 vx_load_halves(const ushort * ptr0, const ushort * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
645 inline v_int16 vx_load_halves(const short * ptr0, const short * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
646 inline v_int32 vx_load_halves(const int * ptr0, const int * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
647 inline v_uint32 vx_load_halves(const unsigned * ptr0, const unsigned * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
648 inline v_float32 vx_load_halves(const float * ptr0, const float * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
649 inline v_int64 vx_load_halves(const int64 * ptr0, const int64 * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
650 inline v_uint64 vx_load_halves(const uint64 * ptr0, const uint64 * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
651#if CV_SIMD_64F || CV_SIMD_SCALABLE_64F
652 inline v_float64 vx_load_halves(const double * ptr0, const double * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
653#endif
655
659 inline v_uint8 vx_lut(const uchar * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
660 inline v_int8 vx_lut(const schar * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
661 inline v_uint16 vx_lut(const ushort * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
662 inline v_int16 vx_lut(const short* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
663 inline v_int32 vx_lut(const int* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
664 inline v_uint32 vx_lut(const unsigned* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
665 inline v_float32 vx_lut(const float* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
666 inline v_int64 vx_lut(const int64 * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
667 inline v_uint64 vx_lut(const uint64 * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
668#if CV_SIMD_64F || CV_SIMD_SCALABLE_64F
669 inline v_float64 vx_lut(const double* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
670#endif
672
676 inline v_uint8 vx_lut_pairs(const uchar * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
677 inline v_int8 vx_lut_pairs(const schar * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
678 inline v_uint16 vx_lut_pairs(const ushort * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
679 inline v_int16 vx_lut_pairs(const short* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
680 inline v_int32 vx_lut_pairs(const int* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
681 inline v_uint32 vx_lut_pairs(const unsigned* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
682 inline v_float32 vx_lut_pairs(const float* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
683 inline v_int64 vx_lut_pairs(const int64 * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
684 inline v_uint64 vx_lut_pairs(const uint64 * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
685#if CV_SIMD_64F || CV_SIMD_SCALABLE_64F
686 inline v_float64 vx_lut_pairs(const double* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
687#endif
689
693 inline v_uint8 vx_lut_quads(const uchar* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
694 inline v_int8 vx_lut_quads(const schar* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
695 inline v_uint16 vx_lut_quads(const ushort* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
696 inline v_int16 vx_lut_quads(const short* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
697 inline v_int32 vx_lut_quads(const int* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
698 inline v_uint32 vx_lut_quads(const unsigned* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
699 inline v_float32 vx_lut_quads(const float* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
701
705 inline v_uint16 vx_load_expand(const uchar * ptr) { return VXPREFIX(_load_expand)(ptr); }
706 inline v_int16 vx_load_expand(const schar * ptr) { return VXPREFIX(_load_expand)(ptr); }
707 inline v_uint32 vx_load_expand(const ushort * ptr) { return VXPREFIX(_load_expand)(ptr); }
708 inline v_int32 vx_load_expand(const short* ptr) { return VXPREFIX(_load_expand)(ptr); }
709 inline v_int64 vx_load_expand(const int* ptr) { return VXPREFIX(_load_expand)(ptr); }
710 inline v_uint64 vx_load_expand(const unsigned* ptr) { return VXPREFIX(_load_expand)(ptr); }
711 inline v_float32 vx_load_expand(const hfloat * ptr) { return VXPREFIX(_load_expand)(ptr); }
713
717 inline v_uint32 vx_load_expand_q(const uchar * ptr) { return VXPREFIX(_load_expand_q)(ptr); }
718 inline v_int32 vx_load_expand_q(const schar * ptr) { return VXPREFIX(_load_expand_q)(ptr); }
720
722 inline void vx_cleanup() { VXPREFIX(_cleanup)(); }
723
724#if !CV_SIMD_SCALABLE && !(CV_NEON && !defined(CV_FORCE_SIMD128_CPP))
725 // Compatibility layer
726
727 template<typename T> struct VTraits {
728 static inline int vlanes() { return T::nlanes; }
729 enum { nlanes = T::nlanes, max_nlanes = T::nlanes };
730 using lane_type = typename T::lane_type;
731 };
732
733 #define OPENCV_HAL_WRAP_BIN_OP_ADDSUB(_Tpvec) \
734 inline _Tpvec v_add(const _Tpvec& a, const _Tpvec& b) \
735 { \
736 return a + b; \
737 } \
738 inline _Tpvec v_sub(const _Tpvec& a, const _Tpvec& b) \
739 { \
740 return a - b; \
741 } \
742 template<typename... Args> \
743 inline _Tpvec v_add(const _Tpvec& f1, const _Tpvec& f2, const Args&... vf) { \
744 return v_add(f1 + f2, vf...); \
745 }
746 #define OPENCV_HAL_WRAP_SHIFT_OP(_Tpvec) \
747 inline _Tpvec v_shr(const _Tpvec& a, int n) \
748 { \
749 return a >> n; \
750 } \
751 inline _Tpvec v_shl(const _Tpvec& a, int n) \
752 { \
753 return a << n; \
754 }
755
756 OPENCV_HAL_WRAP_SHIFT_OP(v_uint16)
757 OPENCV_HAL_WRAP_SHIFT_OP(v_uint32)
758 OPENCV_HAL_WRAP_SHIFT_OP(v_uint64)
759 OPENCV_HAL_WRAP_SHIFT_OP(v_int16)
760 OPENCV_HAL_WRAP_SHIFT_OP(v_int32)
761 OPENCV_HAL_WRAP_SHIFT_OP(v_int64)
762 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint8)
763 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint16)
764 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint32)
765 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint64)
766 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int8)
767 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int16)
768 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int32)
769 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int64)
770 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_float32)
771 #if CV_SIMD_64F
772 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_float64)
773 #endif
774 #if CV_SIMD_WIDTH != 16/*128*/ && CV_SIMD128
775 // when we use CV_SIMD128 with 256/512 bit SIMD (e.g. AVX2 or AVX512)
776 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint8x16)
777 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint16x8)
778 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint32x4)
779 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint64x2)
780 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int8x16)
781 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int16x8)
782 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int32x4)
783 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int64x2)
784 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_float32x4)
785 OPENCV_HAL_WRAP_SHIFT_OP(v_uint16x8)
786 OPENCV_HAL_WRAP_SHIFT_OP(v_uint32x4)
787 OPENCV_HAL_WRAP_SHIFT_OP(v_uint64x2)
788 OPENCV_HAL_WRAP_SHIFT_OP(v_int16x8)
789 OPENCV_HAL_WRAP_SHIFT_OP(v_int32x4)
790 OPENCV_HAL_WRAP_SHIFT_OP(v_int64x2)
791 #if CV_SIMD_64F
792 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_float64x2)
793 #endif
794 #endif
795 #if CV_SIMD_WIDTH != 32/*256*/ && CV_SIMD256
796 // when we use CV_SIMD256 with 512 bit SIMD (e.g. AVX512)
797 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint8x32)
798 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint16x16)
799 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint32x8)
800 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint64x4)
801 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int8x32)
802 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int16x16)
803 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int32x8)
804 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int64x4)
805 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_float32x8)
806 OPENCV_HAL_WRAP_SHIFT_OP(v_uint16x16)
807 OPENCV_HAL_WRAP_SHIFT_OP(v_uint32x8)
808 OPENCV_HAL_WRAP_SHIFT_OP(v_uint64x4)
809 OPENCV_HAL_WRAP_SHIFT_OP(v_int16x16)
810 OPENCV_HAL_WRAP_SHIFT_OP(v_int32x8)
811 OPENCV_HAL_WRAP_SHIFT_OP(v_int64x4)
812 #if CV_SIMD_64F
813 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_float64x4)
814 #endif
815 #endif
816
817 #define OPENCV_HAL_WRAP_BIN_OP_LOGIC(_Tpvec) \
818 inline _Tpvec v_and(const _Tpvec& a, const _Tpvec& b) \
819 { \
820 return a & b; \
821 } \
822 inline _Tpvec v_or(const _Tpvec& a, const _Tpvec& b) \
823 { \
824 return a | b; \
825 } \
826 inline _Tpvec v_xor(const _Tpvec& a, const _Tpvec& b) \
827 { \
828 return a ^ b; \
829 }
830
831 #define OPENCV_HAL_WRAP_NOT_OP(_Tpvec) \
832 inline _Tpvec v_not(const _Tpvec& a) \
833 { \
834 return ~a; \
835 }
836
837 OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_uint8)
838 OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_uint16)
839 OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_uint32)
840 OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_uint64)
841 OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_int8)
842 OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_int16)
843 OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_int32)
844 OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_int64)
845 OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_float32)
846 OPENCV_HAL_WRAP_NOT_OP(v_uint8)
847 OPENCV_HAL_WRAP_NOT_OP(v_uint16)
848 OPENCV_HAL_WRAP_NOT_OP(v_uint32)
849 OPENCV_HAL_WRAP_NOT_OP(v_uint64)
850 OPENCV_HAL_WRAP_NOT_OP(v_int8)
851 OPENCV_HAL_WRAP_NOT_OP(v_int16)
852 OPENCV_HAL_WRAP_NOT_OP(v_int32)
853 OPENCV_HAL_WRAP_NOT_OP(v_int64)
854 #if CV_SIMD_64F
855 OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_float64)
856 #endif
857 #if CV_SIMD_WIDTH != 16/*128*/ && CV_SIMD128
858 OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_uint8x16)
859 OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_uint16x8)
860 OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_uint32x4)
861 OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_uint64x2)
862 OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_int8x16)
863 OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_int16x8)
864 OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_int32x4)
865 OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_int64x2)
866 OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_float32x4)
867 OPENCV_HAL_WRAP_NOT_OP(v_uint8x16)
868 OPENCV_HAL_WRAP_NOT_OP(v_uint16x8)
869 OPENCV_HAL_WRAP_NOT_OP(v_uint32x4)
870 OPENCV_HAL_WRAP_NOT_OP(v_uint64x2)
871 OPENCV_HAL_WRAP_NOT_OP(v_int8x16)
872 OPENCV_HAL_WRAP_NOT_OP(v_int16x8)
873 OPENCV_HAL_WRAP_NOT_OP(v_int32x4)
874 OPENCV_HAL_WRAP_NOT_OP(v_int64x2)
875 #if CV_SIMD_64F
876 OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_float64x2)
877 #endif
878 #endif
879 #if CV_SIMD_WIDTH != 32/*256*/ && CV_SIMD256
880 OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_uint8x32)
881 OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_uint16x16)
882 OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_uint32x8)
883 OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_uint64x4)
884 OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_int8x32)
885 OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_int16x16)
886 OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_int32x8)
887 OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_int64x4)
888 OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_float32x8)
889 OPENCV_HAL_WRAP_NOT_OP(v_uint8x32)
890 OPENCV_HAL_WRAP_NOT_OP(v_uint16x16)
891 OPENCV_HAL_WRAP_NOT_OP(v_uint32x8)
892 OPENCV_HAL_WRAP_NOT_OP(v_uint64x4)
893 OPENCV_HAL_WRAP_NOT_OP(v_int8x32)
894 OPENCV_HAL_WRAP_NOT_OP(v_int16x16)
895 OPENCV_HAL_WRAP_NOT_OP(v_int32x8)
896 OPENCV_HAL_WRAP_NOT_OP(v_int64x4)
897 #if CV_SIMD_64F
898 OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_float64x4)
899 #endif
900 #endif
901
902 #define OPENCV_HAL_WRAP_BIN_OP_MUL(_Tpvec) \
903 inline _Tpvec v_mul(const _Tpvec& a, const _Tpvec& b) \
904 { \
905 return a * b; \
906 } \
907 template<typename... Args> \
908 inline _Tpvec v_mul(const _Tpvec& f1, const _Tpvec& f2, const Args&... vf) { \
909 return v_mul(f1 * f2, vf...); \
910 }
911 OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint8)
912 OPENCV_HAL_WRAP_BIN_OP_MUL(v_int8)
913 OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint16)
914 OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint32)
915 OPENCV_HAL_WRAP_BIN_OP_MUL(v_int16)
916 OPENCV_HAL_WRAP_BIN_OP_MUL(v_int32)
917 OPENCV_HAL_WRAP_BIN_OP_MUL(v_float32)
918 #if CV_SIMD_64F
919 OPENCV_HAL_WRAP_BIN_OP_MUL(v_float64)
920 #endif
921 #if CV_SIMD_WIDTH != 16/*128*/ && CV_SIMD128
922 OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint8x16)
923 OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint16x8)
924 OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint32x4)
925 OPENCV_HAL_WRAP_BIN_OP_MUL(v_int8x16)
926 OPENCV_HAL_WRAP_BIN_OP_MUL(v_int16x8)
927 OPENCV_HAL_WRAP_BIN_OP_MUL(v_int32x4)
928 OPENCV_HAL_WRAP_BIN_OP_MUL(v_float32x4)
929 #if CV_SIMD_64F
930 OPENCV_HAL_WRAP_BIN_OP_MUL(v_float64x2)
931 #endif
932 #endif
933 #if CV_SIMD_WIDTH != 32/*256*/ && CV_SIMD256
934 OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint8x32)
935 OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint16x16)
936 OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint32x8)
937 OPENCV_HAL_WRAP_BIN_OP_MUL(v_int8x32)
938 OPENCV_HAL_WRAP_BIN_OP_MUL(v_int16x16)
939 OPENCV_HAL_WRAP_BIN_OP_MUL(v_int32x8)
940 OPENCV_HAL_WRAP_BIN_OP_MUL(v_float32x8)
941 #if CV_SIMD_64F
942 OPENCV_HAL_WRAP_BIN_OP_MUL(v_float64x4)
943 #endif
944 #endif
945
946 #define OPENCV_HAL_WRAP_BIN_OP_DIV(_Tpvec) \
947 inline _Tpvec v_div(const _Tpvec& a, const _Tpvec& b) \
948 { \
949 return a / b; \
950 }
951 OPENCV_HAL_WRAP_BIN_OP_DIV(v_float32)
952 #if CV_SIMD_64F
953 OPENCV_HAL_WRAP_BIN_OP_DIV(v_float64)
954 #endif
955 #if CV_SIMD_WIDTH != 16/*128*/ && CV_SIMD128
956 OPENCV_HAL_WRAP_BIN_OP_DIV(v_float32x4)
957 #if CV_SIMD_64F
958 OPENCV_HAL_WRAP_BIN_OP_DIV(v_float64x2)
959 #endif
960 #endif
961 #if CV_SIMD_WIDTH != 32/*256*/ && CV_SIMD256
962 OPENCV_HAL_WRAP_BIN_OP_DIV(v_float32x8)
963 #if CV_SIMD_64F
964 OPENCV_HAL_WRAP_BIN_OP_DIV(v_float64x4)
965 #endif
966 #endif
967
968 #define OPENCV_HAL_WRAP_CMP_OP(_Tpvec, intrin, op) \
969 inline _Tpvec v_##intrin(const _Tpvec& a, const _Tpvec& b) \
970 { \
971 return a op b; \
972 }
973 #define OPENCV_HAL_WRAP_EQ_OP(_Tpvec) \
974 inline _Tpvec v_eq(const _Tpvec& a, const _Tpvec& b) \
975 { \
976 return a == b; \
977 } \
978 inline _Tpvec v_ne(const _Tpvec& a, const _Tpvec& b) \
979 { \
980 return a != b; \
981 }
982
983 #define OPENCV_HAL_WRAP_CMP(_Tpvec) \
984 OPENCV_HAL_WRAP_CMP_OP(_Tpvec, eq, ==) \
985 OPENCV_HAL_WRAP_CMP_OP(_Tpvec, ne, !=) \
986 OPENCV_HAL_WRAP_CMP_OP(_Tpvec, lt, <) \
987 OPENCV_HAL_WRAP_CMP_OP(_Tpvec, gt, >) \
988 OPENCV_HAL_WRAP_CMP_OP(_Tpvec, le, <=) \
989 OPENCV_HAL_WRAP_CMP_OP(_Tpvec, ge, >=)
990
991 OPENCV_HAL_WRAP_CMP(v_uint8)
992 OPENCV_HAL_WRAP_CMP(v_uint16)
993 OPENCV_HAL_WRAP_CMP(v_uint32)
994 OPENCV_HAL_WRAP_EQ_OP(v_uint64)
995 OPENCV_HAL_WRAP_CMP(v_int8)
996 OPENCV_HAL_WRAP_CMP(v_int16)
997 OPENCV_HAL_WRAP_CMP(v_int32)
998 OPENCV_HAL_WRAP_EQ_OP(v_int64)
999 OPENCV_HAL_WRAP_CMP(v_float32)
1000 #if CV_SIMD_64F
1001 OPENCV_HAL_WRAP_CMP(v_float64)
1002 #endif
1003 #if CV_SIMD_WIDTH != 16/*128*/ && CV_SIMD128
1004 OPENCV_HAL_WRAP_CMP(v_uint8x16)
1005 OPENCV_HAL_WRAP_CMP(v_uint16x8)
1006 OPENCV_HAL_WRAP_CMP(v_uint32x4)
1007 OPENCV_HAL_WRAP_EQ_OP(v_uint64x2)
1008 OPENCV_HAL_WRAP_CMP(v_int8x16)
1009 OPENCV_HAL_WRAP_CMP(v_int16x8)
1010 OPENCV_HAL_WRAP_CMP(v_int32x4)
1011 OPENCV_HAL_WRAP_EQ_OP(v_int64x2)
1012 OPENCV_HAL_WRAP_CMP(v_float32x4)
1013 #if CV_SIMD_64F
1014 OPENCV_HAL_WRAP_CMP(v_float64x2)
1015 #endif
1016 #endif
1017 #if CV_SIMD_WIDTH != 32/*256*/ && CV_SIMD256
1018 OPENCV_HAL_WRAP_CMP(v_uint8x32)
1019 OPENCV_HAL_WRAP_CMP(v_uint16x16)
1020 OPENCV_HAL_WRAP_CMP(v_uint32x8)
1021 OPENCV_HAL_WRAP_EQ_OP(v_uint64x4)
1022 OPENCV_HAL_WRAP_CMP(v_int8x32)
1023 OPENCV_HAL_WRAP_CMP(v_int16x16)
1024 OPENCV_HAL_WRAP_CMP(v_int32x8)
1025 OPENCV_HAL_WRAP_EQ_OP(v_int64x4)
1026 OPENCV_HAL_WRAP_CMP(v_float32x8)
1027 #if CV_SIMD_64F
1028 OPENCV_HAL_WRAP_CMP(v_float64x4)
1029 #endif
1030 #endif
1031
1033 #define OPENCV_HAL_WRAP_GRT0(_Tpvec) \
1034 inline typename VTraits<_Tpvec>::lane_type v_get0(const _Tpvec& v) \
1035 { \
1036 return v.get0(); \
1037 }
1038
1039 OPENCV_HAL_WRAP_GRT0(v_uint8)
1040 OPENCV_HAL_WRAP_GRT0(v_int8)
1041 OPENCV_HAL_WRAP_GRT0(v_uint16)
1042 OPENCV_HAL_WRAP_GRT0(v_int16)
1043 OPENCV_HAL_WRAP_GRT0(v_uint32)
1044 OPENCV_HAL_WRAP_GRT0(v_int32)
1045 OPENCV_HAL_WRAP_GRT0(v_uint64)
1046 OPENCV_HAL_WRAP_GRT0(v_int64)
1047 OPENCV_HAL_WRAP_GRT0(v_float32)
1048 #if CV_SIMD_64F
1049 OPENCV_HAL_WRAP_GRT0(v_float64)
1050 #endif
1051 #if CV_SIMD_WIDTH != 16/*128*/ && CV_SIMD128
1052 OPENCV_HAL_WRAP_GRT0(v_uint8x16)
1053 OPENCV_HAL_WRAP_GRT0(v_uint16x8)
1054 OPENCV_HAL_WRAP_GRT0(v_uint32x4)
1055 OPENCV_HAL_WRAP_GRT0(v_uint64x2)
1056 OPENCV_HAL_WRAP_GRT0(v_int8x16)
1057 OPENCV_HAL_WRAP_GRT0(v_int16x8)
1058 OPENCV_HAL_WRAP_GRT0(v_int32x4)
1059 OPENCV_HAL_WRAP_GRT0(v_int64x2)
1060 OPENCV_HAL_WRAP_GRT0(v_float32x4)
1061 #if CV_SIMD_64F
1062 OPENCV_HAL_WRAP_GRT0(v_float64x2)
1063 #endif
1064 #endif
1065 #if CV_SIMD_WIDTH != 32/*256*/ && CV_SIMD256
1066 OPENCV_HAL_WRAP_GRT0(v_uint8x32)
1067 OPENCV_HAL_WRAP_GRT0(v_uint16x16)
1068 OPENCV_HAL_WRAP_GRT0(v_uint32x8)
1069 OPENCV_HAL_WRAP_GRT0(v_uint64x4)
1070 OPENCV_HAL_WRAP_GRT0(v_int8x32)
1071 OPENCV_HAL_WRAP_GRT0(v_int16x16)
1072 OPENCV_HAL_WRAP_GRT0(v_int32x8)
1073 OPENCV_HAL_WRAP_GRT0(v_int64x4)
1074 OPENCV_HAL_WRAP_GRT0(v_float32x8)
1075 #if CV_SIMD_64F
1076 OPENCV_HAL_WRAP_GRT0(v_float64x4)
1077 #endif
1078 #endif
1079
1080 #define OPENCV_HAL_WRAP_EXTRACT(_Tpvec) \
1081 inline typename VTraits<_Tpvec>::lane_type v_extract_highest(const _Tpvec& v) \
1082 { \
1083 return v_extract_n<VTraits<_Tpvec>::nlanes-1>(v); \
1084 }
1085
1086 OPENCV_HAL_WRAP_EXTRACT(v_uint8)
1087 OPENCV_HAL_WRAP_EXTRACT(v_int8)
1088 OPENCV_HAL_WRAP_EXTRACT(v_uint16)
1089 OPENCV_HAL_WRAP_EXTRACT(v_int16)
1090 OPENCV_HAL_WRAP_EXTRACT(v_uint32)
1091 OPENCV_HAL_WRAP_EXTRACT(v_int32)
1092 OPENCV_HAL_WRAP_EXTRACT(v_uint64)
1093 OPENCV_HAL_WRAP_EXTRACT(v_int64)
1094 OPENCV_HAL_WRAP_EXTRACT(v_float32)
1095 #if CV_SIMD_64F
1096 OPENCV_HAL_WRAP_EXTRACT(v_float64)
1097 #endif
1098 #if CV_SIMD_WIDTH != 16/*128*/ && CV_SIMD128
1099 OPENCV_HAL_WRAP_EXTRACT(v_uint8x16)
1100 OPENCV_HAL_WRAP_EXTRACT(v_uint16x8)
1101 OPENCV_HAL_WRAP_EXTRACT(v_uint32x4)
1102 OPENCV_HAL_WRAP_EXTRACT(v_uint64x2)
1103 OPENCV_HAL_WRAP_EXTRACT(v_int8x16)
1104 OPENCV_HAL_WRAP_EXTRACT(v_int16x8)
1105 OPENCV_HAL_WRAP_EXTRACT(v_int32x4)
1106 OPENCV_HAL_WRAP_EXTRACT(v_int64x2)
1107 OPENCV_HAL_WRAP_EXTRACT(v_float32x4)
1108 #if CV_SIMD_64F
1109 OPENCV_HAL_WRAP_EXTRACT(v_float64x2)
1110 #endif
1111 #endif
1112 #if CV_SIMD_WIDTH != 32/*256*/ && CV_SIMD256
1113 OPENCV_HAL_WRAP_EXTRACT(v_uint8x32)
1114 OPENCV_HAL_WRAP_EXTRACT(v_uint16x16)
1115 OPENCV_HAL_WRAP_EXTRACT(v_uint32x8)
1116 OPENCV_HAL_WRAP_EXTRACT(v_uint64x4)
1117 OPENCV_HAL_WRAP_EXTRACT(v_int8x32)
1118 OPENCV_HAL_WRAP_EXTRACT(v_int16x16)
1119 OPENCV_HAL_WRAP_EXTRACT(v_int32x8)
1120 OPENCV_HAL_WRAP_EXTRACT(v_int64x4)
1121 OPENCV_HAL_WRAP_EXTRACT(v_float32x8)
1122 #if CV_SIMD_64F
1123 OPENCV_HAL_WRAP_EXTRACT(v_float64x4)
1124 #endif
1125 #endif
1126
1127 #define OPENCV_HAL_WRAP_BROADCAST(_Tpvec) \
1128 inline _Tpvec v_broadcast_highest(const _Tpvec& v) \
1129 { \
1130 return v_broadcast_element<VTraits<_Tpvec>::nlanes-1>(v); \
1131 }
1132
1133 OPENCV_HAL_WRAP_BROADCAST(v_uint32)
1134 OPENCV_HAL_WRAP_BROADCAST(v_int32)
1135 OPENCV_HAL_WRAP_BROADCAST(v_float32)
1136 #if CV_SIMD_WIDTH != 16/*128*/ && CV_SIMD128
1137 OPENCV_HAL_WRAP_BROADCAST(v_uint32x4)
1138 OPENCV_HAL_WRAP_BROADCAST(v_int32x4)
1139 OPENCV_HAL_WRAP_BROADCAST(v_float32x4)
1140 #endif
1141 #if CV_SIMD_WIDTH != 32/*256*/ && CV_SIMD256
1142 OPENCV_HAL_WRAP_BROADCAST(v_uint32x8)
1143 OPENCV_HAL_WRAP_BROADCAST(v_int32x8)
1144 OPENCV_HAL_WRAP_BROADCAST(v_float32x8)
1145 #endif
1146
1147#endif
1148
1149#if (CV_NEON /* || CV_others */) && !defined(CV_FORCE_SIMD128_CPP)
1150// Compatibility layer for the backend that cleaned up.
1151 #define OPENCV_HAL_WRAP_BIN_OP_ADDSUB(_Tpvec) \
1152 template<typename... Args> \
1153 inline _Tpvec v_add(const _Tpvec& f1, const _Tpvec& f2, const Args&... vf) { \
1154 return v_add(v_add(f1, f2), vf...); \
1155 }
1156
1157 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint8)
1158 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint16)
1159 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint32)
1160 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint64)
1161 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int8)
1162 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int16)
1163 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int32)
1164 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int64)
1165 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_float32)
1166 #if CV_SIMD_64F
1167 OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_float64)
1168 #endif
1169
1170 #define OPENCV_HAL_WRAP_BIN_OP_MUL(_Tpvec) \
1171 template<typename... Args> \
1172 inline _Tpvec v_mul(const _Tpvec& f1, const _Tpvec& f2, const Args&... vf) { \
1173 return v_mul(v_mul(f1, f2), vf...); \
1174 }
1175 OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint8)
1176 OPENCV_HAL_WRAP_BIN_OP_MUL(v_int8)
1177 OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint16)
1178 OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint32)
1179 OPENCV_HAL_WRAP_BIN_OP_MUL(v_int16)
1180 OPENCV_HAL_WRAP_BIN_OP_MUL(v_int32)
1181 OPENCV_HAL_WRAP_BIN_OP_MUL(v_float32)
1182 #if CV_SIMD_64F
1183 OPENCV_HAL_WRAP_BIN_OP_MUL(v_float64)
1184 #endif
1185
1186 #define OPENCV_HAL_WRAP_EXTRACT(_Tpvec) \
1187 inline typename VTraits<_Tpvec>::lane_type v_extract_highest(const _Tpvec& v) \
1188 { \
1189 return v_extract_n<VTraits<_Tpvec>::nlanes-1>(v); \
1190 }
1191
1192 OPENCV_HAL_WRAP_EXTRACT(v_uint8)
1193 OPENCV_HAL_WRAP_EXTRACT(v_int8)
1194 OPENCV_HAL_WRAP_EXTRACT(v_uint16)
1195 OPENCV_HAL_WRAP_EXTRACT(v_int16)
1196 OPENCV_HAL_WRAP_EXTRACT(v_uint32)
1197 OPENCV_HAL_WRAP_EXTRACT(v_int32)
1198 OPENCV_HAL_WRAP_EXTRACT(v_uint64)
1199 OPENCV_HAL_WRAP_EXTRACT(v_int64)
1200 OPENCV_HAL_WRAP_EXTRACT(v_float32)
1201 #if CV_SIMD_64F
1202 OPENCV_HAL_WRAP_EXTRACT(v_float64)
1203 #endif
1204
1205 #define OPENCV_HAL_WRAP_BROADCAST(_Tpvec) \
1206 inline _Tpvec v_broadcast_highest(const _Tpvec& v) \
1207 { \
1208 return v_broadcast_element<VTraits<_Tpvec>::nlanes-1>(v); \
1209 }
1210
1211 OPENCV_HAL_WRAP_BROADCAST(v_uint32)
1212 OPENCV_HAL_WRAP_BROADCAST(v_int32)
1213 OPENCV_HAL_WRAP_BROADCAST(v_float32)
1214
1215#endif //CV_NEON
1216
1218
1219 // backward compatibility
1220 template<typename _Tp, typename _Tvec> static inline
1221 void vx_store(_Tp* dst, const _Tvec& v) { return v_store(dst, v); }
1222 // backward compatibility
1223 template<typename _Tp, typename _Tvec> static inline
1224 void vx_store_aligned(_Tp* dst, const _Tvec& v) { return v_store_aligned(dst, v); }
1225
1227
1228
1230 #undef VXPREFIX
1231} // namespace
1232
1233
1234#ifndef CV_SIMD_FP16
1235#define CV_SIMD_FP16 0
1236#endif
1237
1238#ifndef CV_SIMD
1239#define CV_SIMD 0
1240#endif
1241
1242#include "simd_utils.impl.hpp"
1243
1244#ifndef CV_DOXYGEN
1245CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
1246#endif
1247
1248} // cv::
1249
1251
1252#if defined(__GNUC__) && __GNUC__ == 12
1253#pragma GCC diagnostic pop
1254#endif
1255
1256#endif
int CvScalar value
Definition core_c.h:720
CvArr * dst
Definition core_c.h:875
const int * idx
Definition core_c.h:668
int index
Definition core_c.h:634
signed char schar
Definition interface.h:48
unsigned char uchar
Definition interface.h:51
int64_t int64
Definition interface.h:61
unsigned short ushort
Definition interface.h:52
uint64_t uint64
Definition interface.h:62
v_reg< schar, 16 > v_int8x16
Sixteen 8-bit signed integer values.
Definition intrin_cpp.hpp:491
v_reg< uchar, 16 > v_uint8x16
Sixteen 8-bit unsigned integer values.
Definition intrin_cpp.hpp:489
v_reg< int64, 2 > v_int64x2
Two 64-bit signed integer values.
Definition intrin_cpp.hpp:507
void v_store(_Tp *ptr, const v_reg< _Tp, n > &a)
Store data to memory.
Definition intrin_cpp.hpp:2190
v_reg< ushort, 8 > v_uint16x8
Eight 16-bit unsigned integer values.
Definition intrin_cpp.hpp:493
v_reg< int, 4 > v_int32x4
Four 32-bit signed integer values.
Definition intrin_cpp.hpp:499
v_reg< unsigned, 4 > v_uint32x4
Four 32-bit unsigned integer values.
Definition intrin_cpp.hpp:497
v_reg< uint64, 2 > v_uint64x2
Two 64-bit unsigned integer values.
Definition intrin_cpp.hpp:505
v_reg< float, 4 > v_float32x4
Four 32-bit floating point values (single precision)
Definition intrin_cpp.hpp:501
void v_store_aligned(_Tp *ptr, const v_reg< _Tp, n > &a)
Store data to memory (aligned)
Definition intrin_cpp.hpp:2251
v_reg< short, 8 > v_int16x8
Eight 16-bit signed integer values.
Definition intrin_cpp.hpp:495
v_reg< double, 2 > v_float64x2
Two 64-bit floating point values (double precision)
Definition intrin_cpp.hpp:503
StoreMode
Definition intrin.hpp:100
@ STORE_ALIGNED_NOCACHE
Definition intrin.hpp:103
@ STORE_ALIGNED
Definition intrin.hpp:102
@ STORE_UNALIGNED
Definition intrin.hpp:101
"black box" representation of the file storage associated with a file on disk.
Definition calib3d.hpp:441
Definition intrin.hpp:110