5 #if defined __OPENCV_BUILD \
7 #include "cv_cpu_config.h"
8 #include "cv_cpu_helper.h"
10 #ifdef CV_CPU_DISPATCH_MODE
11 #define CV_CPU_OPTIMIZATION_NAMESPACE __CV_CAT(opt_, CV_CPU_DISPATCH_MODE)
12 #define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace __CV_CAT(opt_, CV_CPU_DISPATCH_MODE) {
13 #define CV_CPU_OPTIMIZATION_NAMESPACE_END }
15 #define CV_CPU_OPTIMIZATION_NAMESPACE cpu_baseline
16 #define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace cpu_baseline {
17 #define CV_CPU_OPTIMIZATION_NAMESPACE_END }
18 #define CV_CPU_BASELINE_MODE 1
22 #define __CV_CPU_DISPATCH_CHAIN_END(fn, args, mode, ...)
23 #define __CV_CPU_DISPATCH(fn, args, mode, ...) __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
24 #define __CV_CPU_DISPATCH_EXPAND(fn, args, ...) __CV_EXPAND(__CV_CPU_DISPATCH(fn, args, __VA_ARGS__))
25 #define CV_CPU_DISPATCH(fn, args, ...) __CV_CPU_DISPATCH_EXPAND(fn, args, __VA_ARGS__, END)
28 #if defined CV_ENABLE_INTRINSICS \
29 && !defined CV_DISABLE_OPTIMIZATION \
30 && !defined __CUDACC__ \
32 #ifdef CV_CPU_COMPILE_SSE2
33 # include <emmintrin.h>
38 #ifdef CV_CPU_COMPILE_SSE3
39 # include <pmmintrin.h>
42 #ifdef CV_CPU_COMPILE_SSSE3
43 # include <tmmintrin.h>
46 #ifdef CV_CPU_COMPILE_SSE4_1
47 # include <smmintrin.h>
50 #ifdef CV_CPU_COMPILE_SSE4_2
51 # include <nmmintrin.h>
54 #ifdef CV_CPU_COMPILE_POPCNT
56 # include <nmmintrin.h>
58 # define CV_POPCNT_U64 (int)_mm_popcnt_u64
60 # define CV_POPCNT_U32 _mm_popcnt_u32
62 # include <popcntintrin.h>
63 # if defined(__x86_64__)
64 # define CV_POPCNT_U64 __builtin_popcountll
66 # define CV_POPCNT_U32 __builtin_popcount
70 #ifdef CV_CPU_COMPILE_AVX
71 # include <immintrin.h>
74 #ifdef CV_CPU_COMPILE_FP16
75 # if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || defined(_M_ARM64)
76 # include <arm_neon.h>
78 # include <immintrin.h>
82 #ifdef CV_CPU_COMPILE_NEON_DOTPROD
83 # include <arm_neon.h>
84 # define CV_NEON_DOT 1
86 #ifdef CV_CPU_COMPILE_AVX2
87 # include <immintrin.h>
90 #ifdef CV_CPU_COMPILE_AVX_512F
91 # include <immintrin.h>
92 # define CV_AVX_512F 1
94 #ifdef CV_CPU_COMPILE_AVX512_COMMON
95 # define CV_AVX512_COMMON 1
96 # define CV_AVX_512CD 1
98 #ifdef CV_CPU_COMPILE_AVX512_KNL
99 # define CV_AVX512_KNL 1
100 # define CV_AVX_512ER 1
101 # define CV_AVX_512PF 1
103 #ifdef CV_CPU_COMPILE_AVX512_KNM
104 # define CV_AVX512_KNM 1
105 # define CV_AVX_5124FMAPS 1
106 # define CV_AVX_5124VNNIW 1
107 # define CV_AVX_512VPOPCNTDQ 1
109 #ifdef CV_CPU_COMPILE_AVX512_SKX
110 # define CV_AVX512_SKX 1
111 # define CV_AVX_512VL 1
112 # define CV_AVX_512BW 1
113 # define CV_AVX_512DQ 1
115 #ifdef CV_CPU_COMPILE_AVX512_CNL
116 # define CV_AVX512_CNL 1
117 # define CV_AVX_512IFMA 1
118 # define CV_AVX_512VBMI 1
120 #ifdef CV_CPU_COMPILE_AVX512_CLX
121 # define CV_AVX512_CLX 1
122 # define CV_AVX_512VNNI 1
124 #ifdef CV_CPU_COMPILE_AVX512_ICL
125 # define CV_AVX512_ICL 1
126 # undef CV_AVX_512IFMA
127 # define CV_AVX_512IFMA 1
128 # undef CV_AVX_512VBMI
129 # define CV_AVX_512VBMI 1
130 # undef CV_AVX_512VNNI
131 # define CV_AVX_512VNNI 1
132 # define CV_AVX_512VBMI2 1
133 # define CV_AVX_512BITALG 1
134 # define CV_AVX_512VPOPCNTDQ 1
136 #ifdef CV_CPU_COMPILE_FMA3
140 #if defined _WIN32 && (defined(_M_ARM) || defined(_M_ARM64)) && (defined(CV_CPU_COMPILE_NEON) || !defined(_MSC_VER))
142 # include <arm_neon.h>
144 #elif defined(__ARM_NEON)
145 # include <arm_neon.h>
149 #if defined(__riscv) && defined(__riscv_vector) && defined(__riscv_vector_071)
150 # include<riscv_vector.h>
154 #ifdef CV_CPU_COMPILE_VSX
155 # include <altivec.h>
162 #ifdef CV_CPU_COMPILE_VSX3
166 #ifdef CV_CPU_COMPILE_MSA
167 # include "hal/msa_macros.h"
171 #ifdef CV_CPU_COMPILE_LSX
172 # include <lsxintrin.h>
176 #ifdef CV_CPU_COMPILE_LASX
177 # include <lasxintrin.h>
181 #ifdef __EMSCRIPTEN__
182 # define CV_WASM_SIMD 1
183 # include <wasm_simd128.h>
186 #if defined CV_CPU_COMPILE_RVV
188 # include <riscv_vector.h>
193 #if defined CV_CPU_COMPILE_AVX && !defined CV_CPU_BASELINE_COMPILE_AVX
194 struct VZeroUpperGuard {
196 __attribute__((always_inline))
198 inline VZeroUpperGuard() { _mm256_zeroupper(); }
200 __attribute__((always_inline))
202 inline ~VZeroUpperGuard() { _mm256_zeroupper(); }
204 #define __CV_AVX_GUARD VZeroUpperGuard __vzeroupper_guard; CV_UNUSED(__vzeroupper_guard);
207 #ifdef __CV_AVX_GUARD
208 #define CV_AVX_GUARD __CV_AVX_GUARD
217 #if !defined __OPENCV_BUILD \
218 && !defined __CUDACC__
219 #if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2)
220 # include <emmintrin.h>
224 #elif defined _WIN32 && (defined(_M_ARM) || defined(_M_ARM64)) && (defined(CV_CPU_COMPILE_NEON) || !defined(_MSC_VER))
226 # include <arm_neon.h>
228 #elif defined(__ARM_NEON)
229 # include <arm_neon.h>
231 #elif defined(__VSX__) && defined(__PPC64__) && defined(__LITTLE_ENDIAN__)
232 # include <altivec.h>
240 # include <immintrin.h>
285 # define CV_AVX_512F 0
288 # define CV_AVX_512BW 0
291 # define CV_AVX_512CD 0
294 # define CV_AVX_512DQ 0
297 # define CV_AVX_512ER 0
299 #ifndef CV_AVX_512IFMA
300 # define CV_AVX_512IFMA 0
302 #define CV_AVX_512IFMA512 CV_AVX_512IFMA
304 # define CV_AVX_512PF 0
306 #ifndef CV_AVX_512VBMI
307 # define CV_AVX_512VBMI 0
310 # define CV_AVX_512VL 0
312 #ifndef CV_AVX_5124FMAPS
313 # define CV_AVX_5124FMAPS 0
315 #ifndef CV_AVX_5124VNNIW
316 # define CV_AVX_5124VNNIW 0
318 #ifndef CV_AVX_512VPOPCNTDQ
319 # define CV_AVX_512VPOPCNTDQ 0
321 #ifndef CV_AVX_512VNNI
322 # define CV_AVX_512VNNI 0
324 #ifndef CV_AVX_512VBMI2
325 # define CV_AVX_512VBMI2 0
327 #ifndef CV_AVX_512BITALG
328 # define CV_AVX_512BITALG 0
330 #ifndef CV_AVX512_COMMON
331 # define CV_AVX512_COMMON 0
333 #ifndef CV_AVX512_KNL
334 # define CV_AVX512_KNL 0
336 #ifndef CV_AVX512_KNM
337 # define CV_AVX512_KNM 0
339 #ifndef CV_AVX512_SKX
340 # define CV_AVX512_SKX 0
342 #ifndef CV_AVX512_CNL
343 # define CV_AVX512_CNL 0
345 #ifndef CV_AVX512_CLX
346 # define CV_AVX512_CLX 0
348 #ifndef CV_AVX512_ICL
349 # define CV_AVX512_ICL 0
373 # define CV_WASM_SIMD 0