5 #ifndef OPENCV_HAL_INTRIN_WASM_HPP
6 #define OPENCV_HAL_INTRIN_WASM_HPP
11 #include <emscripten/version.h>
12 #include "opencv2/core/saturate.hpp"
15 #define CV_SIMD128_64F 0
16 #define CV_SIMD128_FP16 0
23 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
25 #if (__EMSCRIPTEN_major__ * 1000000 + __EMSCRIPTEN_minor__ * 1000 + __EMSCRIPTEN_tiny__) < (1038046)
27 #define wasm_i32x4_trunc_saturate_f32x4 wasm_trunc_saturate_i32x4_f32x4
28 #define wasm_u32x4_trunc_saturate_f32x4 wasm_trunc_saturate_u32x4_f32x4
29 #define wasm_i64x2_trunc_saturate_f64x2 wasm_trunc_saturate_i64x2_f64x2
30 #define wasm_u64x2_trunc_saturate_f64x2 wasm_trunc_saturate_u64x2_f64x2
31 #define wasm_f32x4_convert_i32x4 wasm_convert_f32x4_i32x4
32 #define wasm_f32x4_convert_u32x4 wasm_convert_f32x4_u32x4
33 #define wasm_f64x2_convert_i64x2 wasm_convert_f64x2_i64x2
34 #define wasm_f64x2_convert_u64x2 wasm_convert_f64x2_u64x2
41 typedef uchar lane_type;
42 typedef v128_t vector_type;
50 uchar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15};
51 val = wasm_v128_load(v);
56 return (
uchar)wasm_i8x16_extract_lane(val, 0);
64 typedef schar lane_type;
65 typedef v128_t vector_type;
73 schar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15};
74 val = wasm_v128_load(v);
79 return wasm_i8x16_extract_lane(val, 0);
88 typedef v128_t vector_type;
95 ushort v[] = {v0, v1, v2, v3, v4, v5, v6, v7};
96 val = wasm_v128_load(v);
101 return (
ushort)wasm_i16x8_extract_lane(val, 0);
109 typedef short lane_type;
110 typedef v128_t vector_type;
115 v_int16x8(
short v0,
short v1,
short v2,
short v3,
short v4,
short v5,
short v6,
short v7)
117 short v[] = {v0, v1, v2, v3, v4, v5, v6, v7};
118 val = wasm_v128_load(v);
123 return wasm_i16x8_extract_lane(val, 0);
131 typedef unsigned lane_type;
132 typedef v128_t vector_type;
137 v_uint32x4(
unsigned v0,
unsigned v1,
unsigned v2,
unsigned v3)
139 unsigned v[] = {v0, v1, v2, v3};
140 val = wasm_v128_load(v);
143 unsigned get0()
const
145 return (
unsigned)wasm_i32x4_extract_lane(val, 0);
153 typedef int lane_type;
154 typedef v128_t vector_type;
159 v_int32x4(
int v0,
int v1,
int v2,
int v3)
161 int v[] = {v0, v1, v2, v3};
162 val = wasm_v128_load(v);
167 return wasm_i32x4_extract_lane(val, 0);
175 typedef float lane_type;
176 typedef v128_t vector_type;
181 v_float32x4(
float v0,
float v1,
float v2,
float v3)
183 float v[] = {v0, v1, v2, v3};
184 val = wasm_v128_load(v);
189 return wasm_f32x4_extract_lane(val, 0);
198 typedef v128_t vector_type;
206 val = wasm_v128_load(v);
211 return (
uint64)wasm_i64x2_extract_lane(val, 0);
219 typedef int64 lane_type;
220 typedef v128_t vector_type;
227 int64 v[] = {v0, v1};
228 val = wasm_v128_load(v);
233 return wasm_i64x2_extract_lane(val, 0);
241 typedef double lane_type;
242 typedef v128_t vector_type;
249 double v[] = {v0, v1};
250 val = wasm_v128_load(v);
255 return wasm_f64x2_extract_lane(val, 0);
263 #define OPENCV_HAL_IMPL_REINTERPRET_INT(ft, tt) \
264 inline tt reinterpret_int(ft x) { union { ft l; tt i; } v; v.l = x; return v.i; }
267 OPENCV_HAL_IMPL_REINTERPRET_INT(
ushort,
short)
268 OPENCV_HAL_IMPL_REINTERPRET_INT(
short,
short)
269 OPENCV_HAL_IMPL_REINTERPRET_INT(
unsigned,
int)
270 OPENCV_HAL_IMPL_REINTERPRET_INT(
int,
int)
271 OPENCV_HAL_IMPL_REINTERPRET_INT(
float,
int)
274 OPENCV_HAL_IMPL_REINTERPRET_INT(
double,
int64)
276 static const unsigned char popCountTable[] =
278 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
279 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
280 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
281 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
282 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
283 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
284 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
285 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
286 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
287 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
288 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
289 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
290 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
291 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
292 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
293 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
297 static v128_t wasm_unpacklo_i8x16(v128_t a, v128_t b) {
298 return wasm_v8x16_shuffle(a, b, 0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23);
301 static v128_t wasm_unpacklo_i16x8(v128_t a, v128_t b) {
302 return wasm_v8x16_shuffle(a, b, 0,1,16,17,2,3,18,19,4,5,20,21,6,7,22,23);
305 static v128_t wasm_unpacklo_i32x4(v128_t a, v128_t b) {
306 return wasm_v8x16_shuffle(a, b, 0,1,2,3,16,17,18,19,4,5,6,7,20,21,22,23);
309 static v128_t wasm_unpacklo_i64x2(v128_t a, v128_t b) {
310 return wasm_v8x16_shuffle(a, b, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23);
313 static v128_t wasm_unpackhi_i8x16(v128_t a, v128_t b) {
314 return wasm_v8x16_shuffle(a, b, 8,24,9,25,10,26,11,27,12,28,13,29,14,30,15,31);
317 static v128_t wasm_unpackhi_i16x8(v128_t a, v128_t b) {
318 return wasm_v8x16_shuffle(a, b, 8,9,24,25,10,11,26,27,12,13,28,29,14,15,30,31);
321 static v128_t wasm_unpackhi_i32x4(v128_t a, v128_t b) {
322 return wasm_v8x16_shuffle(a, b, 8,9,10,11,24,25,26,27,12,13,14,15,28,29,30,31);
325 static v128_t wasm_unpackhi_i64x2(v128_t a, v128_t b) {
326 return wasm_v8x16_shuffle(a, b, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31);
331 inline v128_t v128_cvtu8x16_i16x8(
const v128_t& a)
333 const v128_t z = wasm_i8x16_splat(0);
334 return wasm_unpacklo_i8x16(a, z);
336 inline v128_t v128_cvti8x16_i16x8(
const v128_t& a)
337 {
return wasm_i16x8_shr(wasm_unpacklo_i8x16(a, a), 8); }
339 inline v128_t v128_cvtu8x16_i32x4(
const v128_t& a)
341 const v128_t z = wasm_i8x16_splat(0);
342 return wasm_unpacklo_i16x8(wasm_unpacklo_i8x16(a, z), z);
344 inline v128_t v128_cvti8x16_i32x4(
const v128_t& a)
346 v128_t
r = wasm_unpacklo_i8x16(a, a);
347 r = wasm_unpacklo_i8x16(
r,
r);
348 return wasm_i32x4_shr(
r, 24);
351 inline v128_t v128_cvtu16x8_i32x4(
const v128_t& a)
353 const v128_t z = wasm_i8x16_splat(0);
354 return wasm_unpacklo_i16x8(a, z);
356 inline v128_t v128_cvti16x8_i32x4(
const v128_t& a)
357 {
return wasm_i32x4_shr(wasm_unpacklo_i16x8(a, a), 16); }
359 inline v128_t v128_cvtu32x4_i64x2(
const v128_t& a)
361 const v128_t z = wasm_i8x16_splat(0);
362 return wasm_unpacklo_i32x4(a, z);
364 inline v128_t v128_cvti32x4_i64x2(
const v128_t& a)
365 {
return wasm_unpacklo_i32x4(a, wasm_i32x4_shr(a, 31)); }
368 inline v128_t v128_cvtu8x16_i16x8_high(
const v128_t& a)
370 const v128_t z = wasm_i8x16_splat(0);
371 return wasm_unpackhi_i8x16(a, z);
373 inline v128_t v128_cvti8x16_i16x8_high(
const v128_t& a)
374 {
return wasm_i16x8_shr(wasm_unpackhi_i8x16(a, a), 8); }
376 inline v128_t v128_cvtu16x8_i32x4_high(
const v128_t& a)
378 const v128_t z = wasm_i8x16_splat(0);
379 return wasm_unpackhi_i16x8(a, z);
381 inline v128_t v128_cvti16x8_i32x4_high(
const v128_t& a)
382 {
return wasm_i32x4_shr(wasm_unpackhi_i16x8(a, a), 16); }
384 inline v128_t v128_cvtu32x4_i64x2_high(
const v128_t& a)
386 const v128_t z = wasm_i8x16_splat(0);
387 return wasm_unpackhi_i32x4(a, z);
389 inline v128_t v128_cvti32x4_i64x2_high(
const v128_t& a)
390 {
return wasm_unpackhi_i32x4(a, wasm_i32x4_shr(a, 31)); }
392 #define OPENCV_HAL_IMPL_WASM_INITVEC(_Tpvec, _Tp, suffix, zsuffix, _Tps) \
393 inline _Tpvec v_setzero_##suffix() { return _Tpvec(wasm_##zsuffix##_splat((_Tps)0)); } \
394 inline _Tpvec v_setall_##suffix(_Tp v) { return _Tpvec(wasm_##zsuffix##_splat((_Tps)v)); } \
395 template<typename _Tpvec0> inline _Tpvec v_reinterpret_as_##suffix(const _Tpvec0& a) \
396 { return _Tpvec(a.val); }
401 OPENCV_HAL_IMPL_WASM_INITVEC(
v_int16x8,
short, s16, i16x8,
short)
402 OPENCV_HAL_IMPL_WASM_INITVEC(
v_uint32x4,
unsigned, u32, i32x4,
int)
403 OPENCV_HAL_IMPL_WASM_INITVEC(
v_int32x4,
int, s32, i32x4,
int)
404 OPENCV_HAL_IMPL_WASM_INITVEC(
v_float32x4,
float, f32, f32x4,
float)
407 OPENCV_HAL_IMPL_WASM_INITVEC(
v_float64x2,
double, f64, f64x2,
double)
412 v128_t maxval = wasm_i16x8_splat(255);
413 v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u16x8_gt(a.val, maxval));
414 v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_u16x8_gt(b.val, maxval));
415 return v_uint8x16(wasm_v8x16_shuffle(a1, b1, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30));
419 v128_t maxval = wasm_i16x8_splat(127);
420 v128_t minval = wasm_i16x8_splat(-128);
421 v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i16x8_gt(a.val, maxval));
422 v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_i16x8_gt(b.val, maxval));
423 v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i16x8_lt(a1, minval));
424 v128_t b2 = wasm_v128_bitselect(minval, b1, wasm_i16x8_lt(b1, minval));
425 return v_int8x16(wasm_v8x16_shuffle(a2, b2, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30));
429 v128_t maxval = wasm_i32x4_splat(65535);
430 v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u32x4_gt(a.val, maxval));
431 v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_u32x4_gt(b.val, maxval));
432 return v_uint16x8(wasm_v8x16_shuffle(a1, b1, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29));
436 v128_t maxval = wasm_i32x4_splat(32767);
437 v128_t minval = wasm_i32x4_splat(-32768);
438 v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i32x4_gt(a.val, maxval));
439 v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_i32x4_gt(b.val, maxval));
440 v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i32x4_lt(a1, minval));
441 v128_t b2 = wasm_v128_bitselect(minval, b1, wasm_i32x4_lt(b1, minval));
442 return v_int16x8(wasm_v8x16_shuffle(a2, b2, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29));
446 return v_uint32x4(wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27));
450 return v_int32x4(wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27));
454 v128_t maxval = wasm_i16x8_splat(255);
455 v128_t minval = wasm_i16x8_splat(0);
456 v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i16x8_gt(a.val, maxval));
457 v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_i16x8_gt(b.val, maxval));
458 v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i16x8_lt(a1, minval));
459 v128_t b2 = wasm_v128_bitselect(minval, b1, wasm_i16x8_lt(b1, minval));
460 return v_uint8x16(wasm_v8x16_shuffle(a2, b2, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30));
464 v128_t maxval = wasm_i32x4_splat(65535);
465 v128_t minval = wasm_i32x4_splat(0);
466 v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i32x4_gt(a.val, maxval));
467 v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_i32x4_gt(b.val, maxval));
468 v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i32x4_lt(a1, minval));
469 v128_t b2 = wasm_v128_bitselect(minval, b1, wasm_i32x4_lt(b1, minval));
470 return v_uint16x8(wasm_v8x16_shuffle(a2, b2, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29));
476 v128_t
delta = wasm_i16x8_splat(((
short)1 << (n-1)));
477 v128_t a1 = wasm_u16x8_shr(wasm_i16x8_add(a.val,
delta), n);
478 v128_t b1 = wasm_u16x8_shr(wasm_i16x8_add(b.val,
delta), n);
479 v128_t maxval = wasm_i16x8_splat(255);
480 v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_u16x8_gt(a1, maxval));
481 v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_u16x8_gt(b1, maxval));
482 return v_uint8x16(wasm_v8x16_shuffle(a2, b2, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30));
487 v128_t
delta = wasm_i16x8_splat(((
short)1 << (n-1)));
488 v128_t a1 = wasm_i16x8_shr(wasm_i16x8_add(a.val,
delta), n);
489 v128_t b1 = wasm_i16x8_shr(wasm_i16x8_add(b.val,
delta), n);
490 v128_t maxval = wasm_i16x8_splat(127);
491 v128_t minval = wasm_i16x8_splat(-128);
492 v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i16x8_gt(a1, maxval));
493 v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_i16x8_gt(b1, maxval));
494 v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i16x8_lt(a1, minval));
495 v128_t b3 = wasm_v128_bitselect(minval, b2, wasm_i16x8_lt(b1, minval));
496 return v_int8x16(wasm_v8x16_shuffle(a3, b3, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30));
501 v128_t
delta = wasm_i32x4_splat(((
int)1 << (n-1)));
502 v128_t a1 = wasm_u32x4_shr(wasm_i32x4_add(a.val,
delta), n);
503 v128_t b1 = wasm_u32x4_shr(wasm_i32x4_add(b.val,
delta), n);
504 v128_t maxval = wasm_i32x4_splat(65535);
505 v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_u32x4_gt(a1, maxval));
506 v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_u32x4_gt(b1, maxval));
507 return v_uint16x8(wasm_v8x16_shuffle(a2, b2, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29));
512 v128_t
delta = wasm_i32x4_splat(((
int)1 << (n-1)));
513 v128_t a1 = wasm_i32x4_shr(wasm_i32x4_add(a.val,
delta), n);
514 v128_t b1 = wasm_i32x4_shr(wasm_i32x4_add(b.val,
delta), n);
515 v128_t maxval = wasm_i32x4_splat(32767);
516 v128_t minval = wasm_i16x8_splat(-32768);
517 v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i32x4_gt(a1, maxval));
518 v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_i32x4_gt(b1, maxval));
519 v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i32x4_lt(a1, minval));
520 v128_t b3 = wasm_v128_bitselect(minval, b2, wasm_i32x4_lt(b1, minval));
521 return v_int16x8(wasm_v8x16_shuffle(a3, b3, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29));
526 v128_t
delta = wasm_i64x2_splat(((
int64)1 << (n-1)));
527 v128_t a1 = wasm_u64x2_shr(wasm_i64x2_add(a.val,
delta), n);
528 v128_t b1 = wasm_u64x2_shr(wasm_i64x2_add(b.val,
delta), n);
529 return v_uint32x4(wasm_v8x16_shuffle(a1, b1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27));
534 v128_t
delta = wasm_i64x2_splat(((
int64)1 << (n-1)));
535 v128_t a1 = wasm_i64x2_shr(wasm_i64x2_add(a.val,
delta), n);
536 v128_t b1 = wasm_i64x2_shr(wasm_i64x2_add(b.val,
delta), n);
537 return v_int32x4(wasm_v8x16_shuffle(a1, b1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27));
542 v128_t
delta = wasm_i16x8_splat(((
short)1 << (n-1)));
543 v128_t a1 = wasm_i16x8_shr(wasm_i16x8_add(a.val,
delta), n);
544 v128_t b1 = wasm_i16x8_shr(wasm_i16x8_add(b.val,
delta), n);
545 v128_t maxval = wasm_i16x8_splat(255);
546 v128_t minval = wasm_i16x8_splat(0);
547 v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i16x8_gt(a1, maxval));
548 v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_i16x8_gt(b1, maxval));
549 v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i16x8_lt(a1, minval));
550 v128_t b3 = wasm_v128_bitselect(minval, b2, wasm_i16x8_lt(b1, minval));
551 return v_uint8x16(wasm_v8x16_shuffle(a3, b3, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30));
556 v128_t
delta = wasm_i32x4_splat(((
int)1 << (n-1)));
557 v128_t a1 = wasm_i32x4_shr(wasm_i32x4_add(a.val,
delta), n);
558 v128_t b1 = wasm_i32x4_shr(wasm_i32x4_add(b.val,
delta), n);
559 v128_t maxval = wasm_i32x4_splat(65535);
560 v128_t minval = wasm_i16x8_splat(0);
561 v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i32x4_gt(a1, maxval));
562 v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_i32x4_gt(b1, maxval));
563 v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i32x4_lt(a1, minval));
564 v128_t b3 = wasm_v128_bitselect(minval, b2, wasm_i32x4_lt(b1, minval));
565 return v_uint16x8(wasm_v8x16_shuffle(a3, b3, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29));
570 v128_t maxval = wasm_i16x8_splat(255);
571 v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u16x8_gt(a.val, maxval));
572 v128_t
r = wasm_v8x16_shuffle(a1, a1, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14);
574 wasm_v128_store(t_ptr,
r);
575 for (
int i=0; i<8; ++i) {
581 v128_t maxval = wasm_i16x8_splat(127);
582 v128_t minval = wasm_i16x8_splat(-128);
583 v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i16x8_gt(a.val, maxval));
584 v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i16x8_lt(a1, minval));
585 v128_t
r = wasm_v8x16_shuffle(a2, a2, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14);
587 wasm_v128_store(t_ptr,
r);
588 for (
int i=0; i<8; ++i) {
594 v128_t maxval = wasm_i32x4_splat(65535);
595 v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u32x4_gt(a.val, maxval));
596 v128_t
r = wasm_v8x16_shuffle(a1, a1, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13);
598 wasm_v128_store(t_ptr,
r);
599 for (
int i=0; i<4; ++i) {
605 v128_t maxval = wasm_i32x4_splat(32767);
606 v128_t minval = wasm_i32x4_splat(-32768);
607 v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i32x4_gt(a.val, maxval));
608 v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i32x4_lt(a1, minval));
609 v128_t
r = wasm_v8x16_shuffle(a2, a2, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13);
611 wasm_v128_store(t_ptr,
r);
612 for (
int i=0; i<4; ++i) {
618 v128_t
r = wasm_v8x16_shuffle(a.val, a.val, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11);
620 wasm_v128_store(t_ptr,
r);
621 for (
int i=0; i<2; ++i) {
627 v128_t
r = wasm_v8x16_shuffle(a.val, a.val, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11);
629 wasm_v128_store(t_ptr,
r);
630 for (
int i=0; i<2; ++i) {
636 v128_t maxval = wasm_i16x8_splat(255);
637 v128_t minval = wasm_i16x8_splat(0);
638 v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i16x8_gt(a.val, maxval));
639 v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i16x8_lt(a1, minval));
640 v128_t
r = wasm_v8x16_shuffle(a2, a2, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14);
642 wasm_v128_store(t_ptr,
r);
643 for (
int i=0; i<8; ++i) {
649 v128_t maxval = wasm_i32x4_splat(65535);
650 v128_t minval = wasm_i32x4_splat(0);
651 v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i32x4_gt(a.val, maxval));
652 v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i32x4_lt(a1, minval));
653 v128_t
r = wasm_v8x16_shuffle(a2, a2, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13);
655 wasm_v128_store(t_ptr,
r);
656 for (
int i=0; i<4; ++i) {
664 v128_t
delta = wasm_i16x8_splat((
short)(1 << (n-1)));
665 v128_t a1 = wasm_u16x8_shr(wasm_i16x8_add(a.val,
delta), n);
666 v128_t maxval = wasm_i16x8_splat(255);
667 v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_u16x8_gt(a1, maxval));
668 v128_t
r = wasm_v8x16_shuffle(a2, a2, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14);
670 wasm_v128_store(t_ptr,
r);
671 for (
int i=0; i<8; ++i) {
678 v128_t
delta = wasm_i16x8_splat(((
short)1 << (n-1)));
679 v128_t a1 = wasm_i16x8_shr(wasm_i16x8_add(a.val,
delta), n);
680 v128_t maxval = wasm_i16x8_splat(127);
681 v128_t minval = wasm_i16x8_splat(-128);
682 v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i16x8_gt(a1, maxval));
683 v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i16x8_lt(a1, minval));
684 v128_t
r = wasm_v8x16_shuffle(a3, a3, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14);
686 wasm_v128_store(t_ptr,
r);
687 for (
int i=0; i<8; ++i) {
694 v128_t
delta = wasm_i32x4_splat(((
int)1 << (n-1)));
695 v128_t a1 = wasm_u32x4_shr(wasm_i32x4_add(a.val,
delta), n);
696 v128_t maxval = wasm_i32x4_splat(65535);
697 v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_u32x4_gt(a1, maxval));
698 v128_t
r = wasm_v8x16_shuffle(a2, a2, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13);
700 wasm_v128_store(t_ptr,
r);
701 for (
int i=0; i<4; ++i) {
706 inline void v_rshr_pack_store(
short* ptr,
const v_int32x4& a)
708 v128_t
delta = wasm_i32x4_splat(((
int)1 << (n-1)));
709 v128_t a1 = wasm_i32x4_shr(wasm_i32x4_add(a.val,
delta), n);
710 v128_t maxval = wasm_i32x4_splat(32767);
711 v128_t minval = wasm_i32x4_splat(-32768);
712 v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i32x4_gt(a1, maxval));
713 v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i32x4_lt(a1, minval));
714 v128_t
r = wasm_v8x16_shuffle(a3, a3, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13);
716 wasm_v128_store(t_ptr,
r);
717 for (
int i=0; i<4; ++i) {
722 inline void v_rshr_pack_store(
unsigned* ptr,
const v_uint64x2& a)
724 v128_t
delta = wasm_i64x2_splat(((
int64)1 << (n-1)));
725 v128_t a1 = wasm_u64x2_shr(wasm_i64x2_add(a.val,
delta), n);
726 v128_t
r = wasm_v8x16_shuffle(a1, a1, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11);
728 wasm_v128_store(t_ptr,
r);
729 for (
int i=0; i<2; ++i) {
734 inline void v_rshr_pack_store(
int* ptr,
const v_int64x2& a)
736 v128_t
delta = wasm_i64x2_splat(((
int64)1 << (n-1)));
737 v128_t a1 = wasm_i64x2_shr(wasm_i64x2_add(a.val,
delta), n);
738 v128_t
r = wasm_v8x16_shuffle(a1, a1, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11);
740 wasm_v128_store(t_ptr,
r);
741 for (
int i=0; i<2; ++i) {
748 v128_t
delta = wasm_i16x8_splat(((
short)1 << (n-1)));
749 v128_t a1 = wasm_i16x8_shr(wasm_i16x8_add(a.val,
delta), n);
750 v128_t maxval = wasm_i16x8_splat(255);
751 v128_t minval = wasm_i16x8_splat(0);
752 v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i16x8_gt(a1, maxval));
753 v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i16x8_lt(a1, minval));
754 v128_t
r = wasm_v8x16_shuffle(a3, a3, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14);
756 wasm_v128_store(t_ptr,
r);
757 for (
int i=0; i<8; ++i) {
764 v128_t
delta = wasm_i32x4_splat(((
int)1 << (n-1)));
765 v128_t a1 = wasm_i32x4_shr(wasm_i32x4_add(a.val,
delta), n);
766 v128_t maxval = wasm_i32x4_splat(65535);
767 v128_t minval = wasm_i32x4_splat(0);
768 v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i32x4_gt(a1, maxval));
769 v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i32x4_lt(a1, minval));
770 v128_t
r = wasm_v8x16_shuffle(a3, a3, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13);
772 wasm_v128_store(t_ptr,
r);
773 for (
int i=0; i<4; ++i) {
780 v128_t maxval = wasm_i16x8_splat(255);
781 v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u16x8_gt(a.val, maxval));
782 v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_u16x8_gt(b.val, maxval));
783 return v_uint8x16(wasm_v8x16_shuffle(a1, b1, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30));
789 v128_t maxval = wasm_i32x4_splat(255);
790 v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u32x4_gt(a.val, maxval));
791 v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_u32x4_gt(b.val, maxval));
792 v128_t c1 = wasm_v128_bitselect(maxval, c.val, wasm_u32x4_gt(c.val, maxval));
793 v128_t d1 = wasm_v128_bitselect(maxval, d.val, wasm_u32x4_gt(d.val, maxval));
794 v128_t ab = wasm_v8x16_shuffle(a1, b1, 0,4,8,12,16,20,24,28,0,4,8,12,16,20,24,28);
795 v128_t cd = wasm_v8x16_shuffle(c1, d1, 0,4,8,12,16,20,24,28,0,4,8,12,16,20,24,28);
796 return v_uint8x16(wasm_v8x16_shuffle(ab, cd, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23));
803 v128_t maxval = wasm_i32x4_splat(255);
804 v128_t a1 = wasm_v128_bitselect(maxval, a.val, ((__u64x2)(a.val) > (__u64x2)maxval));
805 v128_t b1 = wasm_v128_bitselect(maxval, b.val, ((__u64x2)(b.val) > (__u64x2)maxval));
806 v128_t c1 = wasm_v128_bitselect(maxval, c.val, ((__u64x2)(c.val) > (__u64x2)maxval));
807 v128_t d1 = wasm_v128_bitselect(maxval, d.val, ((__u64x2)(d.val) > (__u64x2)maxval));
808 v128_t e1 = wasm_v128_bitselect(maxval, e.val, ((__u64x2)(e.val) > (__u64x2)maxval));
809 v128_t f1 = wasm_v128_bitselect(maxval, f.val, ((__u64x2)(f.val) > (__u64x2)maxval));
810 v128_t g1 = wasm_v128_bitselect(maxval, g.val, ((__u64x2)(g.val) > (__u64x2)maxval));
811 v128_t h1 = wasm_v128_bitselect(maxval, h.val, ((__u64x2)(h.val) > (__u64x2)maxval));
812 v128_t ab = wasm_v8x16_shuffle(a1, b1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24);
813 v128_t cd = wasm_v8x16_shuffle(c1, d1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24);
814 v128_t ef = wasm_v8x16_shuffle(e1, f1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24);
815 v128_t gh = wasm_v8x16_shuffle(g1, h1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24);
816 v128_t abcd = wasm_v8x16_shuffle(ab, cd, 0,1,2,3,16,17,18,19,0,1,2,3,16,17,18,19);
817 v128_t efgh = wasm_v8x16_shuffle(ef, gh, 0,1,2,3,16,17,18,19,0,1,2,3,16,17,18,19);
818 return v_uint8x16(wasm_v8x16_shuffle(abcd, efgh, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23));
825 v128_t v0 = wasm_f32x4_splat(wasm_f32x4_extract_lane(v.val, 0));
826 v128_t v1 = wasm_f32x4_splat(wasm_f32x4_extract_lane(v.val, 1));
827 v128_t v2 = wasm_f32x4_splat(wasm_f32x4_extract_lane(v.val, 2));
828 v128_t v3 = wasm_f32x4_splat(wasm_f32x4_extract_lane(v.val, 3));
829 v0 = wasm_f32x4_mul(v0, m0.val);
830 v1 = wasm_f32x4_mul(v1, m1.val);
831 v2 = wasm_f32x4_mul(v2, m2.val);
832 v3 = wasm_f32x4_mul(v3, m3.val);
834 return v_float32x4(wasm_f32x4_add(wasm_f32x4_add(v0, v1), wasm_f32x4_add(v2, v3)));
841 v128_t v0 = wasm_f32x4_splat(wasm_f32x4_extract_lane(v.val, 0));
842 v128_t v1 = wasm_f32x4_splat(wasm_f32x4_extract_lane(v.val, 1));
843 v128_t v2 = wasm_f32x4_splat(wasm_f32x4_extract_lane(v.val, 2));
844 v0 = wasm_f32x4_mul(v0, m0.val);
845 v1 = wasm_f32x4_mul(v1, m1.val);
846 v2 = wasm_f32x4_mul(v2, m2.val);
848 return v_float32x4(wasm_f32x4_add(wasm_f32x4_add(v0, v1), wasm_f32x4_add(v2, a.val)));
851 #define OPENCV_HAL_IMPL_WASM_BIN_OP(bin_op, _Tpvec, intrin) \
852 inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \
854 return _Tpvec(intrin(a.val, b.val)); \
856 inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \
858 a.val = intrin(a.val, b.val); \
862 OPENCV_HAL_IMPL_WASM_BIN_OP(+,
v_uint8x16, wasm_u8x16_add_saturate)
863 OPENCV_HAL_IMPL_WASM_BIN_OP(-,
v_uint8x16, wasm_u8x16_sub_saturate)
864 OPENCV_HAL_IMPL_WASM_BIN_OP(+,
v_int8x16, wasm_i8x16_add_saturate)
865 OPENCV_HAL_IMPL_WASM_BIN_OP(-,
v_int8x16, wasm_i8x16_sub_saturate)
866 OPENCV_HAL_IMPL_WASM_BIN_OP(+,
v_uint16x8, wasm_u16x8_add_saturate)
867 OPENCV_HAL_IMPL_WASM_BIN_OP(-,
v_uint16x8, wasm_u16x8_sub_saturate)
868 OPENCV_HAL_IMPL_WASM_BIN_OP(+,
v_int16x8, wasm_i16x8_add_saturate)
869 OPENCV_HAL_IMPL_WASM_BIN_OP(-,
v_int16x8, wasm_i16x8_sub_saturate)
870 OPENCV_HAL_IMPL_WASM_BIN_OP(+,
v_uint32x4, wasm_i32x4_add)
871 OPENCV_HAL_IMPL_WASM_BIN_OP(-,
v_uint32x4, wasm_i32x4_sub)
872 OPENCV_HAL_IMPL_WASM_BIN_OP(*,
v_uint32x4, wasm_i32x4_mul)
873 OPENCV_HAL_IMPL_WASM_BIN_OP(+,
v_int32x4, wasm_i32x4_add)
874 OPENCV_HAL_IMPL_WASM_BIN_OP(-,
v_int32x4, wasm_i32x4_sub)
875 OPENCV_HAL_IMPL_WASM_BIN_OP(*,
v_int32x4, wasm_i32x4_mul)
876 OPENCV_HAL_IMPL_WASM_BIN_OP(+,
v_float32x4, wasm_f32x4_add)
877 OPENCV_HAL_IMPL_WASM_BIN_OP(-,
v_float32x4, wasm_f32x4_sub)
878 OPENCV_HAL_IMPL_WASM_BIN_OP(*,
v_float32x4, wasm_f32x4_mul)
879 OPENCV_HAL_IMPL_WASM_BIN_OP(/,
v_float32x4, wasm_f32x4_div)
880 OPENCV_HAL_IMPL_WASM_BIN_OP(+,
v_uint64x2, wasm_i64x2_add)
881 OPENCV_HAL_IMPL_WASM_BIN_OP(-,
v_uint64x2, wasm_i64x2_sub)
882 OPENCV_HAL_IMPL_WASM_BIN_OP(+,
v_int64x2, wasm_i64x2_add)
883 OPENCV_HAL_IMPL_WASM_BIN_OP(-,
v_int64x2, wasm_i64x2_sub)
884 OPENCV_HAL_IMPL_WASM_BIN_OP(+,
v_float64x2, wasm_f64x2_add)
885 OPENCV_HAL_IMPL_WASM_BIN_OP(-,
v_float64x2, wasm_f64x2_sub)
886 OPENCV_HAL_IMPL_WASM_BIN_OP(*,
v_float64x2, wasm_f64x2_mul)
887 OPENCV_HAL_IMPL_WASM_BIN_OP(/,
v_float64x2, wasm_f64x2_div)
890 #define OPENCV_HAL_IMPL_WASM_MUL_SAT(_Tpvec, _Tpwvec) \
891 inline _Tpvec operator * (const _Tpvec& a, const _Tpvec& b) \
894 v_mul_expand(a, b, c, d); \
895 return v_pack(c, d); \
897 inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b) \
898 { a = a * b; return a; }
912 c = v_mul_wrap(a0, b0);
913 d = v_mul_wrap(a1, b1);
922 c = v_mul_wrap(a0, b0);
923 d = v_mul_wrap(a1, b1);
932 c.val = wasm_i32x4_mul(a0.val, b0.val);
933 d.val = wasm_i32x4_mul(a1.val, b1.val);
942 c.val = wasm_i32x4_mul(a0.val, b0.val);
943 d.val = wasm_i32x4_mul(a1.val, b1.val);
952 c.val = ((__u64x2)(a0.val) * (__u64x2)(b0.val));
953 d.val = ((__u64x2)(a1.val) * (__u64x2)(b1.val));
961 v128_t c = wasm_i32x4_mul(a0.val, b0.val);
962 v128_t d = wasm_i32x4_mul(a1.val, b1.val);
963 return v_int16x8(wasm_v8x16_shuffle(c, d, 2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31));
970 v128_t c = wasm_i32x4_mul(a0.val, b0.val);
971 v128_t d = wasm_i32x4_mul(a1.val, b1.val);
972 return v_uint16x8(wasm_v8x16_shuffle(c, d, 2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31));
979 v128_t a0 = wasm_i32x4_shr(wasm_i32x4_shl(a.val, 16), 16);
980 v128_t a1 = wasm_i32x4_shr(a.val, 16);
981 v128_t b0 = wasm_i32x4_shr(wasm_i32x4_shl(b.val, 16), 16);
982 v128_t b1 = wasm_i32x4_shr(b.val, 16);
983 v128_t c = wasm_i32x4_mul(a0, b0);
984 v128_t d = wasm_i32x4_mul(a1, b1);
993 v128_t a0 = wasm_i64x2_shr(wasm_i64x2_shl(a.val, 32), 32);
994 v128_t a1 = wasm_i64x2_shr(a.val, 32);
995 v128_t b0 = wasm_i64x2_shr(wasm_i64x2_shl(b.val, 32), 32);
996 v128_t b1 = wasm_i64x2_shr(b.val, 32);
997 v128_t c = (v128_t)((__i64x2)a0 * (__i64x2)b0);
998 v128_t d = (v128_t)((__i64x2)a1 * (__i64x2)b1);
1009 v128_t a0 = wasm_u16x8_shr(wasm_i16x8_shl(a.val, 8), 8);
1010 v128_t a1 = wasm_u16x8_shr(a.val, 8);
1011 v128_t b0 = wasm_u16x8_shr(wasm_i16x8_shl(b.val, 8), 8);
1012 v128_t b1 = wasm_u16x8_shr(b.val, 8);
1023 v128_t a0 = wasm_i16x8_shr(wasm_i16x8_shl(a.val, 8), 8);
1024 v128_t a1 = wasm_i16x8_shr(a.val, 8);
1025 v128_t b0 = wasm_i16x8_shr(wasm_i16x8_shl(b.val, 8), 8);
1026 v128_t b1 = wasm_i16x8_shr(b.val, 8);
1038 v128_t a0 = wasm_u32x4_shr(wasm_i32x4_shl(a.val, 16), 16);
1039 v128_t a1 = wasm_u32x4_shr(a.val, 16);
1040 v128_t b0 = wasm_u32x4_shr(wasm_i32x4_shl(b.val, 16), 16);
1041 v128_t b1 = wasm_u32x4_shr(b.val, 16);
1052 v128_t a0 = wasm_i32x4_shr(wasm_i32x4_shl(a.val, 16), 16);
1053 v128_t a1 = wasm_i32x4_shr(a.val, 16);
1054 v128_t b0 = wasm_i32x4_shr(wasm_i32x4_shl(b.val, 16), 16);
1055 v128_t b1 = wasm_i32x4_shr(b.val, 16);
1111 #define OPENCV_HAL_IMPL_WASM_LOGIC_OP(_Tpvec) \
1112 OPENCV_HAL_IMPL_WASM_BIN_OP(&, _Tpvec, wasm_v128_and) \
1113 OPENCV_HAL_IMPL_WASM_BIN_OP(|, _Tpvec, wasm_v128_or) \
1114 OPENCV_HAL_IMPL_WASM_BIN_OP(^, _Tpvec, wasm_v128_xor) \
1115 inline _Tpvec operator ~ (const _Tpvec& a) \
1117 return _Tpvec(wasm_v128_not(a.val)); \
1121 OPENCV_HAL_IMPL_WASM_LOGIC_OP(
v_int8x16)
1123 OPENCV_HAL_IMPL_WASM_LOGIC_OP(
v_int16x8)
1125 OPENCV_HAL_IMPL_WASM_LOGIC_OP(
v_int32x4)
1127 OPENCV_HAL_IMPL_WASM_LOGIC_OP(
v_int64x2)
1138 const v128_t _1_0 = wasm_f32x4_splat(1.0);
1139 return v_float32x4(wasm_f32x4_div(_1_0, wasm_f32x4_sqrt(
x.val)));
1149 const v128_t _1_0 = wasm_f64x2_splat(1.0);
1150 return v_float64x2(wasm_f64x2_div(_1_0, wasm_f64x2_sqrt(
x.val)));
1153 #define OPENCV_HAL_IMPL_WASM_ABS_INT_FUNC(_Tpuvec, _Tpsvec, suffix, zsuffix, shiftWidth) \
1154 inline _Tpuvec v_abs(const _Tpsvec& x) \
1156 v128_t s = wasm_##suffix##_shr(x.val, shiftWidth); \
1157 v128_t f = wasm_##zsuffix##_shr(x.val, shiftWidth); \
1158 return _Tpuvec(wasm_##zsuffix##_add(wasm_v128_xor(x.val, f), s)); \
1174 #define OPENCV_HAL_IMPL_WASM_BIN_FUNC(_Tpvec, func, intrin) \
1175 inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \
1177 return _Tpvec(intrin(a.val, b.val)); \
1180 OPENCV_HAL_IMPL_WASM_BIN_FUNC(
v_float32x4, v_min, wasm_f32x4_min)
1181 OPENCV_HAL_IMPL_WASM_BIN_FUNC(
v_float32x4, v_max, wasm_f32x4_max)
1182 OPENCV_HAL_IMPL_WASM_BIN_FUNC(
v_float64x2, v_min, wasm_f64x2_min)
1183 OPENCV_HAL_IMPL_WASM_BIN_FUNC(
v_float64x2, v_max, wasm_f64x2_max)
1185 #define OPENCV_HAL_IMPL_WASM_MINMAX_S_INIT_FUNC(_Tpvec, suffix) \
1186 inline _Tpvec v_min(const _Tpvec& a, const _Tpvec& b) \
1188 return _Tpvec(wasm_v128_bitselect(b.val, a.val, wasm_##suffix##_gt(a.val, b.val))); \
1190 inline _Tpvec v_max(const _Tpvec& a, const _Tpvec& b) \
1192 return _Tpvec(wasm_v128_bitselect(a.val, b.val, wasm_##suffix##_gt(a.val, b.val))); \
1195 OPENCV_HAL_IMPL_WASM_MINMAX_S_INIT_FUNC(
v_int8x16, i8x16)
1196 OPENCV_HAL_IMPL_WASM_MINMAX_S_INIT_FUNC(
v_int16x8, i16x8)
1197 OPENCV_HAL_IMPL_WASM_MINMAX_S_INIT_FUNC(
v_int32x4, i32x4)
1199 #define OPENCV_HAL_IMPL_WASM_MINMAX_U_INIT_FUNC(_Tpvec, suffix, deltaNum) \
1200 inline _Tpvec v_min(const _Tpvec& a, const _Tpvec& b) \
1202 v128_t delta = wasm_##suffix##_splat(deltaNum); \
1203 v128_t mask = wasm_##suffix##_gt(wasm_v128_xor(a.val, delta), wasm_v128_xor(b.val, delta)); \
1204 return _Tpvec(wasm_v128_bitselect(b.val, a.val, mask)); \
1206 inline _Tpvec v_max(const _Tpvec& a, const _Tpvec& b) \
1208 v128_t delta = wasm_##suffix##_splat(deltaNum); \
1209 v128_t mask = wasm_##suffix##_gt(wasm_v128_xor(a.val, delta), wasm_v128_xor(b.val, delta)); \
1210 return _Tpvec(wasm_v128_bitselect(a.val, b.val, mask)); \
1213 OPENCV_HAL_IMPL_WASM_MINMAX_U_INIT_FUNC(
v_uint8x16, i8x16, (
schar)0x80)
1214 OPENCV_HAL_IMPL_WASM_MINMAX_U_INIT_FUNC(
v_uint16x8, i16x8, (
short)0x8000)
1215 OPENCV_HAL_IMPL_WASM_MINMAX_U_INIT_FUNC(
v_uint32x4, i32x4, (
int)0x80000000)
1217 #define OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(_Tpvec, suffix, esuffix) \
1218 inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \
1219 { return _Tpvec(wasm_##esuffix##_eq(a.val, b.val)); } \
1220 inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
1221 { return _Tpvec(wasm_##esuffix##_ne(a.val, b.val)); } \
1222 inline _Tpvec operator < (const _Tpvec& a, const _Tpvec& b) \
1223 { return _Tpvec(wasm_##suffix##_lt(a.val, b.val)); } \
1224 inline _Tpvec operator > (const _Tpvec& a, const _Tpvec& b) \
1225 { return _Tpvec(wasm_##suffix##_gt(a.val, b.val)); } \
1226 inline _Tpvec operator <= (const _Tpvec& a, const _Tpvec& b) \
1227 { return _Tpvec(wasm_##suffix##_le(a.val, b.val)); } \
1228 inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \
1229 { return _Tpvec(wasm_##suffix##_ge(a.val, b.val)); }
1231 OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(
v_uint8x16, u8x16, i8x16)
1232 OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(
v_int8x16, i8x16, i8x16)
1233 OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(
v_uint16x8, u16x8, i16x8)
1234 OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(
v_int16x8, i16x8, i16x8)
1235 OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(
v_uint32x4, u32x4, i32x4)
1236 OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(
v_int32x4, i32x4, i32x4)
1237 OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(
v_float32x4, f32x4, f32x4)
1238 OPENCV_HAL_IMPL_WASM_INIT_CMP_OP(
v_float64x2, f64x2, f64x2)
1240 #define OPENCV_HAL_IMPL_WASM_64BIT_CMP_OP(_Tpvec, cast) \
1241 inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \
1242 { return cast(v_reinterpret_as_f64(a) == v_reinterpret_as_f64(b)); } \
1243 inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
1244 { return cast(v_reinterpret_as_f64(a) != v_reinterpret_as_f64(b)); }
1246 OPENCV_HAL_IMPL_WASM_64BIT_CMP_OP(
v_uint64x2, v_reinterpret_as_u64)
1247 OPENCV_HAL_IMPL_WASM_64BIT_CMP_OP(
v_int64x2, v_reinterpret_as_s64)
1251 v128_t z = wasm_i32x4_splat(0x7fffffff);
1252 v128_t t = wasm_i32x4_splat(0x7f800000);
1253 return v_float32x4(wasm_u32x4_lt(wasm_v128_and(a.val, z), t));
1257 v128_t z = wasm_i64x2_splat(0x7fffffffffffffff);
1258 v128_t t = wasm_i64x2_splat(0x7ff0000000000000);
1259 return v_float64x2((__u64x2)(wasm_v128_and(a.val, z)) < (__u64x2)t);
1262 OPENCV_HAL_IMPL_WASM_BIN_FUNC(
v_uint8x16, v_add_wrap, wasm_i8x16_add)
1263 OPENCV_HAL_IMPL_WASM_BIN_FUNC(
v_int8x16, v_add_wrap, wasm_i8x16_add)
1264 OPENCV_HAL_IMPL_WASM_BIN_FUNC(
v_uint16x8, v_add_wrap, wasm_i16x8_add)
1265 OPENCV_HAL_IMPL_WASM_BIN_FUNC(
v_int16x8, v_add_wrap, wasm_i16x8_add)
1266 OPENCV_HAL_IMPL_WASM_BIN_FUNC(
v_uint8x16, v_sub_wrap, wasm_i8x16_sub)
1267 OPENCV_HAL_IMPL_WASM_BIN_FUNC(
v_int8x16, v_sub_wrap, wasm_i8x16_sub)
1268 OPENCV_HAL_IMPL_WASM_BIN_FUNC(
v_uint16x8, v_sub_wrap, wasm_i16x8_sub)
1269 OPENCV_HAL_IMPL_WASM_BIN_FUNC(
v_int16x8, v_sub_wrap, wasm_i16x8_sub)
1270 #if (__EMSCRIPTEN_major__ * 1000000 + __EMSCRIPTEN_minor__ * 1000 + __EMSCRIPTEN_tiny__) >= (1039012)
1275 uchar a_[16], b_[16];
1276 wasm_v128_store(a_, a.val);
1277 wasm_v128_store(b_, b.val);
1278 for (
int i = 0; i < 16; i++)
1279 a_[i] = (
uchar)(a_[i] * b_[i]);
1284 schar a_[16], b_[16];
1285 wasm_v128_store(a_, a.val);
1286 wasm_v128_store(b_, b.val);
1287 for (
int i = 0; i < 16; i++)
1288 a_[i] = (
schar)(a_[i] * b_[i]);
1292 OPENCV_HAL_IMPL_WASM_BIN_FUNC(
v_uint8x16, v_mul_wrap, wasm_i8x16_mul)
1293 OPENCV_HAL_IMPL_WASM_BIN_FUNC(
v_int8x16, v_mul_wrap, wasm_i8x16_mul)
1295 OPENCV_HAL_IMPL_WASM_BIN_FUNC(
v_uint16x8, v_mul_wrap, wasm_i16x8_mul)
1296 OPENCV_HAL_IMPL_WASM_BIN_FUNC(
v_int16x8, v_mul_wrap, wasm_i16x8_mul)
1302 {
return v_add_wrap(a - b, b - a); }
1304 {
return v_add_wrap(a - b, b - a); }
1306 {
return v_max(a, b) - v_min(a, b); }
1312 return v_reinterpret_as_u8(v_sub_wrap(d ^ m, m));
1316 return v_reinterpret_as_u16(v_sub_wrap(v_max(a, b), v_min(a, b)));
1322 return v_reinterpret_as_u32((d ^ m) - m);
1333 {
return v_max(a, b) - v_min(a, b); }
1343 return v_fma(a, b, c);
1358 v128_t absmask_vec = wasm_i32x4_splat(0x7fffffff);
1359 return v_float32x4(wasm_v128_and(wasm_f32x4_sub(a.val, b.val), absmask_vec));
1363 v128_t absmask_vec = wasm_u64x2_shr(wasm_i32x4_splat(-1), 1);
1364 return v_float64x2(wasm_v128_and(wasm_f64x2_sub(a.val, b.val), absmask_vec));
1367 #define OPENCV_HAL_IMPL_WASM_MISC_FLT_OP(_Tpvec, suffix) \
1368 inline _Tpvec v_magnitude(const _Tpvec& a, const _Tpvec& b) \
1370 v128_t a_Square = wasm_##suffix##_mul(a.val, a.val); \
1371 v128_t b_Square = wasm_##suffix##_mul(b.val, b.val); \
1372 return _Tpvec(wasm_##suffix##_sqrt(wasm_##suffix##_add(a_Square, b_Square))); \
1374 inline _Tpvec v_sqr_magnitude(const _Tpvec& a, const _Tpvec& b) \
1376 v128_t a_Square = wasm_##suffix##_mul(a.val, a.val); \
1377 v128_t b_Square = wasm_##suffix##_mul(b.val, b.val); \
1378 return _Tpvec(wasm_##suffix##_add(a_Square, b_Square)); \
1380 inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \
1382 return _Tpvec(wasm_##suffix##_add(wasm_##suffix##_mul(a.val, b.val), c.val)); \
1385 OPENCV_HAL_IMPL_WASM_MISC_FLT_OP(
v_float32x4, f32x4)
1386 OPENCV_HAL_IMPL_WASM_MISC_FLT_OP(
v_float64x2, f64x2)
1388 #define OPENCV_HAL_IMPL_WASM_SHIFT_OP(_Tpuvec, _Tpsvec, suffix, ssuffix) \
1389 inline _Tpuvec operator << (const _Tpuvec& a, int imm) \
1391 return _Tpuvec(wasm_##suffix##_shl(a.val, imm)); \
1393 inline _Tpsvec operator << (const _Tpsvec& a, int imm) \
1395 return _Tpsvec(wasm_##suffix##_shl(a.val, imm)); \
1397 inline _Tpuvec operator >> (const _Tpuvec& a, int imm) \
1399 return _Tpuvec(wasm_##ssuffix##_shr(a.val, imm)); \
1401 inline _Tpsvec operator >> (const _Tpsvec& a, int imm) \
1403 return _Tpsvec(wasm_##suffix##_shr(a.val, imm)); \
1406 inline _Tpuvec v_shl(const _Tpuvec& a) \
1408 return _Tpuvec(wasm_##suffix##_shl(a.val, imm)); \
1411 inline _Tpsvec v_shl(const _Tpsvec& a) \
1413 return _Tpsvec(wasm_##suffix##_shl(a.val, imm)); \
1416 inline _Tpuvec v_shr(const _Tpuvec& a) \
1418 return _Tpuvec(wasm_##ssuffix##_shr(a.val, imm)); \
1421 inline _Tpsvec v_shr(const _Tpsvec& a) \
1423 return _Tpsvec(wasm_##suffix##_shr(a.val, imm)); \
1431 namespace hal_wasm_internal
1434 bool is_invalid = ((imm < 0) || (imm > 16)),
1435 bool is_first = (imm == 0),
1436 bool is_second = (imm == 16),
1437 bool is_other = (((imm > 0) && (imm < 16)))>
1438 class v_wasm_palignr_u8_class;
1441 class v_wasm_palignr_u8_class<imm, true, false, false, false>;
1444 class v_wasm_palignr_u8_class<imm, false, true, false, false>
1447 inline v128_t operator()(
const v128_t& a,
const v128_t&)
const
1454 class v_wasm_palignr_u8_class<imm, false, false, true, false>
1457 inline v128_t operator()(
const v128_t&,
const v128_t& b)
const
1464 class v_wasm_palignr_u8_class<imm, false, false, false, true>
1467 inline v128_t operator()(
const v128_t& a,
const v128_t& b)
const
1469 enum { imm2 = (
sizeof(v128_t) - imm) };
1470 return wasm_v8x16_shuffle(a, b,
1471 imm, imm+1, imm+2, imm+3,
1472 imm+4, imm+5, imm+6, imm+7,
1473 imm+8, imm+9, imm+10, imm+11,
1474 imm+12, imm+13, imm+14, imm+15);
1479 inline v128_t v_wasm_palignr_u8(
const v128_t& a,
const v128_t& b)
1481 CV_StaticAssert((imm >= 0) && (imm <= 16),
"Invalid imm for v_wasm_palignr_u8.");
1482 return v_wasm_palignr_u8_class<imm>()(a, b);
1486 template<
int imm,
typename _Tpvec>
1487 inline _Tpvec v_rotate_right(
const _Tpvec &a)
1489 using namespace hal_wasm_internal;
1490 enum { imm2 = (imm *
sizeof(
typename _Tpvec::lane_type)) };
1491 v128_t z = wasm_i8x16_splat(0);
1492 return _Tpvec(v_wasm_palignr_u8<imm2>(a.val, z));
1495 template<
int imm,
typename _Tpvec>
1496 inline _Tpvec v_rotate_left(
const _Tpvec &a)
1498 using namespace hal_wasm_internal;
1499 enum { imm2 = ((_Tpvec::nlanes - imm) *
sizeof(
typename _Tpvec::lane_type)) };
1500 v128_t z = wasm_i8x16_splat(0);
1501 return _Tpvec(v_wasm_palignr_u8<imm2>(z, a.val));
1504 template<
int imm,
typename _Tpvec>
1505 inline _Tpvec v_rotate_right(
const _Tpvec &a,
const _Tpvec &b)
1507 using namespace hal_wasm_internal;
1508 enum { imm2 = (imm *
sizeof(
typename _Tpvec::lane_type)) };
1509 return _Tpvec(v_wasm_palignr_u8<imm2>(a.val, b.val));
1512 template<
int imm,
typename _Tpvec>
1513 inline _Tpvec v_rotate_left(
const _Tpvec &a,
const _Tpvec &b)
1515 using namespace hal_wasm_internal;
1516 enum { imm2 = ((_Tpvec::nlanes - imm) *
sizeof(
typename _Tpvec::lane_type)) };
1517 return _Tpvec(v_wasm_palignr_u8<imm2>(b.val, a.val));
1520 #define OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(_Tpvec, _Tp) \
1521 inline _Tpvec v_load(const _Tp* ptr) \
1522 { return _Tpvec(wasm_v128_load(ptr)); } \
1523 inline _Tpvec v_load_aligned(const _Tp* ptr) \
1524 { return _Tpvec(wasm_v128_load(ptr)); } \
1525 inline _Tpvec v_load_low(const _Tp* ptr) \
1527 _Tp tmp[_Tpvec::nlanes] = {0}; \
1528 for (int i=0; i<_Tpvec::nlanes/2; ++i) { \
1531 return _Tpvec(wasm_v128_load(tmp)); \
1533 inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \
1535 _Tp tmp[_Tpvec::nlanes]; \
1536 for (int i=0; i<_Tpvec::nlanes/2; ++i) { \
1538 tmp[i+_Tpvec::nlanes/2] = ptr1[i]; \
1540 return _Tpvec(wasm_v128_load(tmp)); \
1542 inline void v_store(_Tp* ptr, const _Tpvec& a) \
1543 { wasm_v128_store(ptr, a.val); } \
1544 inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
1545 { wasm_v128_store(ptr, a.val); } \
1546 inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \
1547 { wasm_v128_store(ptr, a.val); } \
1548 inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode ) \
1550 wasm_v128_store(ptr, a.val); \
1552 inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
1554 _Tpvec::lane_type a_[_Tpvec::nlanes]; \
1555 wasm_v128_store(a_, a.val); \
1556 for (int i = 0; i < (_Tpvec::nlanes / 2); i++) \
1559 inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
1561 _Tpvec::lane_type a_[_Tpvec::nlanes]; \
1562 wasm_v128_store(a_, a.val); \
1563 for (int i = 0; i < (_Tpvec::nlanes / 2); i++) \
1564 ptr[i] = a_[i + (_Tpvec::nlanes / 2)]; \
1570 OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(
v_int16x8,
short)
1571 OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(
v_uint32x4,
unsigned)
1572 OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(
v_int32x4,
int)
1575 OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(
v_float32x4,
float)
1576 OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(
v_float64x2,
double)
1581 {
return v_uint8x16(wasm_v8x16_shuffle(a.val, a.val, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); }
1584 {
return v_reinterpret_as_s8(
v_reverse(v_reinterpret_as_u8(a))); }
1587 {
return v_uint16x8(wasm_v8x16_shuffle(a.val, a.val, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1)); }
1590 {
return v_reinterpret_as_s16(
v_reverse(v_reinterpret_as_u16(a))); }
1593 {
return v_uint32x4(wasm_v8x16_shuffle(a.val, a.val, 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3)); }
1596 {
return v_reinterpret_as_s32(
v_reverse(v_reinterpret_as_u32(a))); }
1599 {
return v_reinterpret_as_f32(
v_reverse(v_reinterpret_as_u32(a))); }
1602 {
return v_uint64x2(wasm_v8x16_shuffle(a.val, a.val, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)); }
1605 {
return v_reinterpret_as_s64(
v_reverse(v_reinterpret_as_u64(a))); }
1608 {
return v_reinterpret_as_f64(
v_reverse(v_reinterpret_as_u64(a))); }
1611 #define OPENCV_HAL_IMPL_WASM_REDUCE_OP_4_SUM(_Tpvec, scalartype, regtype, suffix, esuffix) \
1612 inline scalartype v_reduce_sum(const _Tpvec& a) \
1614 regtype val = a.val; \
1615 val = wasm_##suffix##_add(val, wasm_v8x16_shuffle(val, val, 8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)); \
1616 val = wasm_##suffix##_add(val, wasm_v8x16_shuffle(val, val, 4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3)); \
1617 return (scalartype)wasm_##esuffix##_extract_lane(val, 0); \
1620 OPENCV_HAL_IMPL_WASM_REDUCE_OP_4_SUM(
v_uint32x4,
unsigned, v128_t, i32x4, i32x4)
1621 OPENCV_HAL_IMPL_WASM_REDUCE_OP_4_SUM(
v_int32x4,
int, v128_t, i32x4, i32x4)
1622 OPENCV_HAL_IMPL_WASM_REDUCE_OP_4_SUM(
v_float32x4,
float, v128_t, f32x4, f32x4)
1627 #define OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(_Tpvec, scalartype) \
1628 inline scalartype v_reduce_sum(const _Tpvec& a) \
1630 _Tpvec::lane_type a_[_Tpvec::nlanes]; \
1631 wasm_v128_store(a_, a.val); \
1632 scalartype c = a_[0]; \
1633 for (int i = 1; i < _Tpvec::nlanes; i++) \
1638 OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(
v_uint8x16,
unsigned)
1639 OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(
v_int8x16,
int)
1640 OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(
v_uint16x8,
unsigned)
1641 OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(
v_int16x8,
int)
1644 #define OPENCV_HAL_IMPL_WASM_REDUCE_OP_2_SUM(_Tpvec, scalartype, regtype, suffix, esuffix) \
1645 inline scalartype v_reduce_sum(const _Tpvec& a) \
1647 regtype val = a.val; \
1648 val = wasm_##suffix##_add(val, wasm_v8x16_shuffle(val, val, 8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)); \
1649 return (scalartype)wasm_##esuffix##_extract_lane(val, 0); \
1651 OPENCV_HAL_IMPL_WASM_REDUCE_OP_2_SUM(
v_uint64x2,
uint64, v128_t, i64x2, i64x2)
1652 OPENCV_HAL_IMPL_WASM_REDUCE_OP_2_SUM(
v_int64x2,
int64, v128_t, i64x2, i64x2)
1653 OPENCV_HAL_IMPL_WASM_REDUCE_OP_2_SUM(
v_float64x2,
double, v128_t, f64x2,f64x2)
1658 v128_t ac = wasm_f32x4_add(wasm_unpacklo_i32x4(a.val, c.val), wasm_unpackhi_i32x4(a.val, c.val));
1659 v128_t bd = wasm_f32x4_add(wasm_unpacklo_i32x4(b.val, d.val), wasm_unpackhi_i32x4(b.val, d.val));
1660 return v_float32x4(wasm_f32x4_add(wasm_unpacklo_i32x4(ac, bd), wasm_unpackhi_i32x4(ac, bd)));
1663 #define OPENCV_HAL_IMPL_WASM_REDUCE_OP(_Tpvec, scalartype, func, scalar_func) \
1664 inline scalartype v_reduce_##func(const _Tpvec& a) \
1666 scalartype buf[_Tpvec::nlanes]; \
1668 scalartype tmp = buf[0]; \
1669 for (int i=1; i<_Tpvec::nlanes; ++i) { \
1670 tmp = scalar_func(tmp, buf[i]); \
1693 v_uint32x4 l16_l32, l16_h32, h16_l32, h16_h32;
1702 v_uint32x4 l16_l32, l16_h32, h16_l32, h16_h32;
1735 v128_t m1 = wasm_i32x4_splat(0x55555555);
1736 v128_t m2 = wasm_i32x4_splat(0x33333333);
1737 v128_t m4 = wasm_i32x4_splat(0x0f0f0f0f);
1739 p = wasm_i32x4_add(wasm_v128_and(wasm_u32x4_shr(p, 1), m1), wasm_v128_and(p, m1));
1740 p = wasm_i32x4_add(wasm_v128_and(wasm_u32x4_shr(p, 2), m2), wasm_v128_and(p, m2));
1741 p = wasm_i32x4_add(wasm_v128_and(wasm_u32x4_shr(p, 4), m4), wasm_v128_and(p, m4));
1747 p += v_rotate_right<1>(p);
1748 return v_reinterpret_as_u16(p) & v_setall_u16(0x00ff);
1753 p += v_rotate_right<1>(p);
1754 p += v_rotate_right<2>(p);
1755 return v_reinterpret_as_u32(p) & v_setall_u32(0x000000ff);
1759 uint64 a_[2], b_[2] = { 0 };
1760 wasm_v128_store(a_, a.val);
1761 for (
int i = 0; i < 16; i++)
1762 b_[i / 8] += popCountTable[((uint8_t*)a_)[i]];
1766 {
return v_popcount(v_reinterpret_as_u8(a)); }
1768 {
return v_popcount(v_reinterpret_as_u16(a)); }
1770 {
return v_popcount(v_reinterpret_as_u32(a)); }
1772 {
return v_popcount(v_reinterpret_as_u64(a)); }
1774 #define OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(_Tpvec, suffix, scalarType) \
1775 inline int v_signmask(const _Tpvec& a) \
1777 _Tpvec::lane_type a_[_Tpvec::nlanes]; \
1778 wasm_v128_store(a_, a.val); \
1780 for (int i = 0; i < _Tpvec::nlanes; i++) \
1781 mask |= (reinterpret_int(a_[i]) < 0) << i; \
1784 inline bool v_check_all(const _Tpvec& a) \
1785 { return wasm_i8x16_all_true(wasm_##suffix##_lt(a.val, wasm_##suffix##_splat(0))); } \
1786 inline bool v_check_any(const _Tpvec& a) \
1787 { return wasm_i8x16_any_true(wasm_##suffix##_lt(a.val, wasm_##suffix##_splat(0)));; }
1791 OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(
v_uint16x8, i16x8,
short)
1792 OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(
v_int16x8, i16x8,
short)
1793 OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(
v_uint32x4, i32x4,
int)
1794 OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(
v_int32x4, i32x4,
int)
1795 OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(
v_float32x4, i32x4,
float)
1796 OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(
v_float64x2, f64x2,
double)
1798 #define OPENCV_HAL_IMPL_WASM_CHECK_ALL_ANY(_Tpvec, suffix, esuffix) \
1799 inline bool v_check_all(const _Tpvec& a) \
1801 v128_t masked = v_reinterpret_as_##esuffix(a).val; \
1802 masked = wasm_i32x4_replace_lane(masked, 0, 0xffffffff); \
1803 masked = wasm_i32x4_replace_lane(masked, 2, 0xffffffff); \
1804 return wasm_i8x16_all_true(wasm_##suffix##_lt(masked, wasm_##suffix##_splat(0))); \
1806 inline bool v_check_any(const _Tpvec& a) \
1808 v128_t masked = v_reinterpret_as_##esuffix(a).val; \
1809 masked = wasm_i32x4_replace_lane(masked, 0, 0x0); \
1810 masked = wasm_i32x4_replace_lane(masked, 2, 0x0); \
1811 return wasm_i8x16_any_true(wasm_##suffix##_lt(masked, wasm_##suffix##_splat(0))); \
1814 OPENCV_HAL_IMPL_WASM_CHECK_ALL_ANY(
v_int64x2, i32x4, s32)
1815 OPENCV_HAL_IMPL_WASM_CHECK_ALL_ANY(
v_uint64x2, i32x4, u32)
1829 #define OPENCV_HAL_IMPL_WASM_SELECT(_Tpvec) \
1830 inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \
1832 return _Tpvec(wasm_v128_bitselect(a.val, b.val, mask.val)); \
1846 #define OPENCV_HAL_IMPL_WASM_EXPAND(_Tpvec, _Tpwvec, _Tp, intrin) \
1847 inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \
1849 b0.val = intrin(a.val); \
1850 b1.val = __CV_CAT(intrin, _high)(a.val); \
1852 inline _Tpwvec v_expand_low(const _Tpvec& a) \
1853 { return _Tpwvec(intrin(a.val)); } \
1854 inline _Tpwvec v_expand_high(const _Tpvec& a) \
1855 { return _Tpwvec(__CV_CAT(intrin, _high)(a.val)); } \
1856 inline _Tpwvec v_load_expand(const _Tp* ptr) \
1858 v128_t a = wasm_v128_load(ptr); \
1859 return _Tpwvec(intrin(a)); \
1869 #define OPENCV_HAL_IMPL_WASM_EXPAND_Q(_Tpvec, _Tp, intrin) \
1870 inline _Tpvec v_load_expand_q(const _Tp* ptr) \
1872 v128_t a = wasm_v128_load(ptr); \
1873 return _Tpvec(intrin(a)); \
1877 OPENCV_HAL_IMPL_WASM_EXPAND_Q(
v_int32x4,
schar, v128_cvti8x16_i32x4)
1879 #define OPENCV_HAL_IMPL_WASM_UNPACKS(_Tpvec, suffix) \
1880 inline void v_zip(const _Tpvec& a0, const _Tpvec& a1, _Tpvec& b0, _Tpvec& b1) \
1882 b0.val = wasm_unpacklo_##suffix(a0.val, a1.val); \
1883 b1.val = wasm_unpackhi_##suffix(a0.val, a1.val); \
1885 inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b) \
1887 return _Tpvec(wasm_unpacklo_i64x2(a.val, b.val)); \
1889 inline _Tpvec v_combine_high(const _Tpvec& a, const _Tpvec& b) \
1891 return _Tpvec(wasm_unpackhi_i64x2(a.val, b.val)); \
1893 inline void v_recombine(const _Tpvec& a, const _Tpvec& b, _Tpvec& c, _Tpvec& d) \
1895 c.val = wasm_unpacklo_i64x2(a.val, b.val); \
1896 d.val = wasm_unpackhi_i64x2(a.val, b.val); \
1899 OPENCV_HAL_IMPL_WASM_UNPACKS(
v_uint8x16, i8x16)
1900 OPENCV_HAL_IMPL_WASM_UNPACKS(
v_int8x16, i8x16)
1901 OPENCV_HAL_IMPL_WASM_UNPACKS(
v_uint16x8, i16x8)
1902 OPENCV_HAL_IMPL_WASM_UNPACKS(
v_int16x8, i16x8)
1903 OPENCV_HAL_IMPL_WASM_UNPACKS(
v_uint32x4, i32x4)
1904 OPENCV_HAL_IMPL_WASM_UNPACKS(
v_int32x4, i32x4)
1908 template<
int s,
typename _Tpvec>
1909 inline _Tpvec
v_extract(
const _Tpvec& a,
const _Tpvec& b)
1911 return v_rotate_right<s>(a, b);
1916 v128_t h = wasm_f32x4_splat(0.5);
1917 return v_int32x4(wasm_i32x4_trunc_saturate_f32x4(wasm_f32x4_add(a.val, h)));
1922 v128_t a1 = wasm_i32x4_trunc_saturate_f32x4(a.val);
1923 v128_t
mask = wasm_f32x4_lt(a.val, wasm_f32x4_convert_i32x4(a1));
1929 v128_t a1 = wasm_i32x4_trunc_saturate_f32x4(a.val);
1930 v128_t
mask = wasm_f32x4_gt(a.val, wasm_f32x4_convert_i32x4(a1));
1935 {
return v_int32x4(wasm_i32x4_trunc_saturate_f32x4(a.val)); }
1937 #define OPENCV_HAL_IMPL_WASM_MATH_FUNC(func, cfunc) \
1938 inline v_int32x4 func(const v_float64x2& a) \
1941 wasm_v128_store(a_, a.val); \
1943 c_[0] = cfunc(a_[0]); \
1944 c_[1] = cfunc(a_[1]); \
1947 return v_int32x4(wasm_v128_load(c_)); \
1953 OPENCV_HAL_IMPL_WASM_MATH_FUNC(
v_trunc,
int)
1957 double a_[2], b_[2];
1958 wasm_v128_store(a_, a.val);
1959 wasm_v128_store(b_, b.val);
1968 #define OPENCV_HAL_IMPL_WASM_TRANSPOSE4x4(_Tpvec, suffix) \
1969 inline void v_transpose4x4(const _Tpvec& a0, const _Tpvec& a1, \
1970 const _Tpvec& a2, const _Tpvec& a3, \
1971 _Tpvec& b0, _Tpvec& b1, \
1972 _Tpvec& b2, _Tpvec& b3) \
1974 v128_t t0 = wasm_unpacklo_##suffix(a0.val, a1.val); \
1975 v128_t t1 = wasm_unpacklo_##suffix(a2.val, a3.val); \
1976 v128_t t2 = wasm_unpackhi_##suffix(a0.val, a1.val); \
1977 v128_t t3 = wasm_unpackhi_##suffix(a2.val, a3.val); \
1979 b0.val = wasm_unpacklo_i64x2(t0, t1); \
1980 b1.val = wasm_unpackhi_i64x2(t0, t1); \
1981 b2.val = wasm_unpacklo_i64x2(t2, t3); \
1982 b3.val = wasm_unpackhi_i64x2(t2, t3); \
1985 OPENCV_HAL_IMPL_WASM_TRANSPOSE4x4(
v_uint32x4, i32x4)
1986 OPENCV_HAL_IMPL_WASM_TRANSPOSE4x4(
v_int32x4, i32x4)
1987 OPENCV_HAL_IMPL_WASM_TRANSPOSE4x4(
v_float32x4, i32x4)
1992 v128_t t00 = wasm_v128_load(ptr);
1993 v128_t t01 = wasm_v128_load(ptr + 16);
1995 a.val = wasm_v8x16_shuffle(t00, t01, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30);
1996 b.val = wasm_v8x16_shuffle(t00, t01, 1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31);
2001 v128_t t00 = wasm_v128_load(ptr);
2002 v128_t t01 = wasm_v128_load(ptr + 16);
2003 v128_t t02 = wasm_v128_load(ptr + 32);
2005 v128_t t10 = wasm_v8x16_shuffle(t00, t01, 0,3,6,9,12,15,18,21,24,27,30,1,2,4,5,7);
2006 v128_t t11 = wasm_v8x16_shuffle(t00, t01, 1,4,7,10,13,16,19,22,25,28,31,0,2,3,5,6);
2007 v128_t t12 = wasm_v8x16_shuffle(t00, t01, 2,5,8,11,14,17,20,23,26,29,0,1,3,4,6,7);
2009 a.val = wasm_v8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,17,20,23,26,29);
2010 b.val = wasm_v8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,10,18,21,24,27,30);
2011 c.val = wasm_v8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,8,9,16,19,22,25,28,31);
2016 v128_t u0 = wasm_v128_load(ptr);
2017 v128_t u1 = wasm_v128_load(ptr + 16);
2018 v128_t u2 = wasm_v128_load(ptr + 32);
2019 v128_t u3 = wasm_v128_load(ptr + 48);
2021 v128_t v0 = wasm_v8x16_shuffle(u0, u1, 0,4,8,12,16,20,24,28,1,5,9,13,17,21,25,29);
2022 v128_t v1 = wasm_v8x16_shuffle(u2, u3, 0,4,8,12,16,20,24,28,1,5,9,13,17,21,25,29);
2023 v128_t v2 = wasm_v8x16_shuffle(u0, u1, 2,6,10,14,18,22,26,30,3,7,11,15,19,23,27,31);
2024 v128_t v3 = wasm_v8x16_shuffle(u2, u3, 2,6,10,14,18,22,26,30,3,7,11,15,19,23,27,31);
2026 a.val = wasm_v8x16_shuffle(v0, v1, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23);
2027 b.val = wasm_v8x16_shuffle(v0, v1, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31);
2028 c.val = wasm_v8x16_shuffle(v2, v3, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23);
2029 d.val = wasm_v8x16_shuffle(v2, v3, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31);
2034 v128_t v0 = wasm_v128_load(ptr);
2035 v128_t v1 = wasm_v128_load(ptr + 8);
2037 a.val = wasm_v8x16_shuffle(v0, v1, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29);
2038 b.val = wasm_v8x16_shuffle(v0, v1, 2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31);
2043 v128_t t00 = wasm_v128_load(ptr);
2044 v128_t t01 = wasm_v128_load(ptr + 8);
2045 v128_t t02 = wasm_v128_load(ptr + 16);
2047 v128_t t10 = wasm_v8x16_shuffle(t00, t01, 0,1,6,7,12,13,18,19,24,25,30,31,2,3,4,5);
2048 v128_t t11 = wasm_v8x16_shuffle(t00, t01, 2,3,8,9,14,15,20,21,26,27,0,1,4,5,6,7);
2049 v128_t t12 = wasm_v8x16_shuffle(t00, t01, 4,5,10,11,16,17,22,23,28,29,0,1,2,3,6,7);
2051 a.val = wasm_v8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,11,20,21,26,27);
2052 b.val = wasm_v8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,16,17,22,23,28,29);
2053 c.val = wasm_v8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,8,9,18,19,24,25,30,31);
2058 v128_t u0 = wasm_v128_load(ptr);
2059 v128_t u1 = wasm_v128_load(ptr + 8);
2060 v128_t u2 = wasm_v128_load(ptr + 16);
2061 v128_t u3 = wasm_v128_load(ptr + 24);
2063 v128_t v0 = wasm_v8x16_shuffle(u0, u1, 0,1,8,9,16,17,24,25,2,3,10,11,18,19,26,27);
2064 v128_t v1 = wasm_v8x16_shuffle(u2, u3, 0,1,8,9,16,17,24,25,2,3,10,11,18,19,26,27);
2065 v128_t v2 = wasm_v8x16_shuffle(u0, u1, 4,5,12,13,20,21,28,29,6,7,14,15,22,23,30,31);
2066 v128_t v3 = wasm_v8x16_shuffle(u2, u3, 4,5,12,13,20,21,28,29,6,7,14,15,22,23,30,31);
2068 a.val = wasm_v8x16_shuffle(v0, v1, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23);
2069 b.val = wasm_v8x16_shuffle(v0, v1, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31);
2070 c.val = wasm_v8x16_shuffle(v2, v3, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23);
2071 d.val = wasm_v8x16_shuffle(v2, v3, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31);
2076 v128_t v0 = wasm_v128_load(ptr);
2077 v128_t v1 = wasm_v128_load(ptr + 4);
2079 a.val = wasm_v8x16_shuffle(v0, v1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27);
2080 b.val = wasm_v8x16_shuffle(v0, v1, 4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31);
2085 v128_t t00 = wasm_v128_load(ptr);
2086 v128_t t01 = wasm_v128_load(ptr + 4);
2087 v128_t t02 = wasm_v128_load(ptr + 8);
2089 v128_t t10 = wasm_v8x16_shuffle(t00, t01, 0,1,2,3,12,13,14,15,24,25,26,27,4,5,6,7);
2090 v128_t t11 = wasm_v8x16_shuffle(t00, t01, 4,5,6,7,16,17,18,19,28,29,30,31,0,1,2,3);
2091 v128_t t12 = wasm_v8x16_shuffle(t00, t01, 8,9,10,11,20,21,22,23,0,1,2,3,4,5,6,7);
2093 a.val = wasm_v8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,11,20,21,22,23);
2094 b.val = wasm_v8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,10,11,24,25,26,27);
2095 c.val = wasm_v8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,16,17,18,19,28,29,30,31);
2110 v128_t v0 = wasm_v128_load(ptr);
2111 v128_t v1 = wasm_v128_load((ptr + 4));
2113 a.val = wasm_v8x16_shuffle(v0, v1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27);
2114 b.val = wasm_v8x16_shuffle(v0, v1, 4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31);
2119 v128_t t00 = wasm_v128_load(ptr);
2120 v128_t t01 = wasm_v128_load(ptr + 4);
2121 v128_t t02 = wasm_v128_load(ptr + 8);
2123 v128_t t10 = wasm_v8x16_shuffle(t00, t01, 0,1,2,3,12,13,14,15,24,25,26,27,4,5,6,7);
2124 v128_t t11 = wasm_v8x16_shuffle(t00, t01, 4,5,6,7,16,17,18,19,28,29,30,31,0,1,2,3);
2125 v128_t t12 = wasm_v8x16_shuffle(t00, t01, 8,9,10,11,20,21,22,23,0,1,2,3,4,5,6,7);
2127 a.val = wasm_v8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,11,20,21,22,23);
2128 b.val = wasm_v8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,10,11,24,25,26,27);
2129 c.val = wasm_v8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,16,17,18,19,28,29,30,31);
2144 v128_t t0 = wasm_v128_load(ptr);
2145 v128_t t1 = wasm_v128_load(ptr + 2);
2147 a.val = wasm_unpacklo_i64x2(t0, t1);
2148 b.val = wasm_unpackhi_i64x2(t0, t1);
2153 v128_t t0 = wasm_v128_load(ptr);
2154 v128_t t1 = wasm_v128_load(ptr + 2);
2155 v128_t t2 = wasm_v128_load(ptr + 4);
2157 a.val = wasm_v8x16_shuffle(t0, t1, 0,1,2,3,4,5,6,7,24,25,26,27,28,29,30,31);
2158 b.val = wasm_v8x16_shuffle(t0, t2, 8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23);
2159 c.val = wasm_v8x16_shuffle(t1, t2, 0,1,2,3,4,5,6,7,24,25,26,27,28,29,30,31);
2165 v128_t t0 = wasm_v128_load(ptr);
2166 v128_t t1 = wasm_v128_load(ptr + 2);
2167 v128_t t2 = wasm_v128_load(ptr + 4);
2168 v128_t t3 = wasm_v128_load(ptr + 6);
2170 a.val = wasm_unpacklo_i64x2(t0, t2);
2171 b.val = wasm_unpackhi_i64x2(t0, t2);
2172 c.val = wasm_unpacklo_i64x2(t1, t3);
2173 d.val = wasm_unpackhi_i64x2(t1, t3);
2181 v128_t v0 = wasm_unpacklo_i8x16(a.val, b.val);
2182 v128_t v1 = wasm_unpackhi_i8x16(a.val, b.val);
2184 wasm_v128_store(ptr, v0);
2185 wasm_v128_store(ptr + 16, v1);
2191 v128_t t00 = wasm_v8x16_shuffle(a.val, b.val, 0,16,0,1,17,0,2,18,0,3,19,0,4,20,0,5);
2192 v128_t t01 = wasm_v8x16_shuffle(a.val, b.val, 21,0,6,22,0,7,23,0,8,24,0,9,25,0,10,26);
2193 v128_t t02 = wasm_v8x16_shuffle(a.val, b.val, 0,11,27,0,12,28,0,13,29,0,14,30,0,15,31,0);
2195 v128_t t10 = wasm_v8x16_shuffle(t00, c.val, 0,1,16,3,4,17,6,7,18,9,10,19,12,13,20,15);
2196 v128_t t11 = wasm_v8x16_shuffle(t01, c.val, 0,21,2,3,22,5,6,23,8,9,24,11,12,25,14,15);
2197 v128_t t12 = wasm_v8x16_shuffle(t02, c.val, 26,1,2,27,4,5,28,7,8,29,10,11,30,13,14,31);
2199 wasm_v128_store(ptr, t10);
2200 wasm_v128_store(ptr + 16, t11);
2201 wasm_v128_store(ptr + 32, t12);
2212 v128_t u0 = wasm_unpacklo_i8x16(a.val, c.val);
2213 v128_t u1 = wasm_unpackhi_i8x16(a.val, c.val);
2214 v128_t u2 = wasm_unpacklo_i8x16(b.val, d.val);
2215 v128_t u3 = wasm_unpackhi_i8x16(b.val, d.val);
2217 v128_t v0 = wasm_unpacklo_i8x16(u0, u2);
2218 v128_t v1 = wasm_unpackhi_i8x16(u0, u2);
2219 v128_t v2 = wasm_unpacklo_i8x16(u1, u3);
2220 v128_t v3 = wasm_unpackhi_i8x16(u1, u3);
2222 wasm_v128_store(ptr, v0);
2223 wasm_v128_store(ptr + 16, v1);
2224 wasm_v128_store(ptr + 32, v2);
2225 wasm_v128_store(ptr + 48, v3);
2231 v128_t v0 = wasm_unpacklo_i16x8(a.val, b.val);
2232 v128_t v1 = wasm_unpackhi_i16x8(a.val, b.val);
2234 wasm_v128_store(ptr, v0);
2235 wasm_v128_store(ptr + 8, v1);
2242 v128_t t00 = wasm_v8x16_shuffle(a.val, b.val, 0,1,16,17,0,0,2,3,18,19,0,0,4,5,20,21);
2243 v128_t t01 = wasm_v8x16_shuffle(a.val, b.val, 0,0,6,7,22,23,0,0,8,9,24,25,0,0,10,11);
2244 v128_t t02 = wasm_v8x16_shuffle(a.val, b.val, 26,27,0,0,12,13,28,29,0,0,14,15,30,31,0,0);
2246 v128_t t10 = wasm_v8x16_shuffle(t00, c.val, 0,1,2,3,16,17,6,7,8,9,18,19,12,13,14,15);
2247 v128_t t11 = wasm_v8x16_shuffle(t01, c.val, 20,21,2,3,4,5,22,23,8,9,10,11,24,25,14,15);
2248 v128_t t12 = wasm_v8x16_shuffle(t02, c.val, 0,1,26,27,4,5,6,7,28,29,10,11,12,13,30,31);
2250 wasm_v128_store(ptr, t10);
2251 wasm_v128_store(ptr + 8, t11);
2252 wasm_v128_store(ptr + 16, t12);
2263 v128_t u0 = wasm_unpacklo_i16x8(a.val, c.val);
2264 v128_t u1 = wasm_unpackhi_i16x8(a.val, c.val);
2265 v128_t u2 = wasm_unpacklo_i16x8(b.val, d.val);
2266 v128_t u3 = wasm_unpackhi_i16x8(b.val, d.val);
2268 v128_t v0 = wasm_unpacklo_i16x8(u0, u2);
2269 v128_t v1 = wasm_unpackhi_i16x8(u0, u2);
2270 v128_t v2 = wasm_unpacklo_i16x8(u1, u3);
2271 v128_t v3 = wasm_unpackhi_i16x8(u1, u3);
2273 wasm_v128_store(ptr, v0);
2274 wasm_v128_store(ptr + 8, v1);
2275 wasm_v128_store(ptr + 16, v2);
2276 wasm_v128_store(ptr + 24, v3);
2282 v128_t v0 = wasm_unpacklo_i32x4(a.val, b.val);
2283 v128_t v1 = wasm_unpackhi_i32x4(a.val, b.val);
2285 wasm_v128_store(ptr, v0);
2286 wasm_v128_store(ptr + 4, v1);
2292 v128_t t00 = wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,16,17,18,19,0,0,0,0,4,5,6,7);
2293 v128_t t01 = wasm_v8x16_shuffle(a.val, b.val, 20,21,22,23,0,0,0,0,8,9,10,11,24,25,26,27);
2294 v128_t t02 = wasm_v8x16_shuffle(a.val, b.val, 0,0,0,0,12,13,14,15,28,29,30,31,0,0,0,0);
2296 v128_t t10 = wasm_v8x16_shuffle(t00, c.val, 0,1,2,3,4,5,6,7,16,17,18,19,12,13,14,15);
2297 v128_t t11 = wasm_v8x16_shuffle(t01, c.val, 0,1,2,3,20,21,22,23,8,9,10,11,12,13,14,15);
2298 v128_t t12 = wasm_v8x16_shuffle(t02, c.val, 24,25,26,27,4,5,6,7,8,9,10,11,28,29,30,31);
2300 wasm_v128_store(ptr, t10);
2301 wasm_v128_store(ptr + 4, t11);
2302 wasm_v128_store(ptr + 8, t12);
2312 wasm_v128_store(ptr, v0.val);
2313 wasm_v128_store(ptr + 4, v1.val);
2314 wasm_v128_store(ptr + 8, v2.val);
2315 wasm_v128_store(ptr + 12, v3.val);
2322 v128_t v0 = wasm_unpacklo_i32x4(a.val, b.val);
2323 v128_t v1 = wasm_unpackhi_i32x4(a.val, b.val);
2325 wasm_v128_store(ptr, v0);
2326 wasm_v128_store(ptr + 4, v1);
2332 v128_t t00 = wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,16,17,18,19,0,0,0,0,4,5,6,7);
2333 v128_t t01 = wasm_v8x16_shuffle(a.val, b.val, 20,21,22,23,0,0,0,0,8,9,10,11,24,25,26,27);
2334 v128_t t02 = wasm_v8x16_shuffle(a.val, b.val, 0,0,0,0,12,13,14,15,28,29,30,31,0,0,0,0);
2336 v128_t t10 = wasm_v8x16_shuffle(t00, c.val, 0,1,2,3,4,5,6,7,16,17,18,19,12,13,14,15);
2337 v128_t t11 = wasm_v8x16_shuffle(t01, c.val, 0,1,2,3,20,21,22,23,8,9,10,11,12,13,14,15);
2338 v128_t t12 = wasm_v8x16_shuffle(t02, c.val, 24,25,26,27,4,5,6,7,8,9,10,11,28,29,30,31);
2340 wasm_v128_store(ptr, t10);
2341 wasm_v128_store(ptr + 4, t11);
2342 wasm_v128_store(ptr + 8, t12);
2352 wasm_v128_store(ptr, v0.val);
2353 wasm_v128_store(ptr + 4, v1.val);
2354 wasm_v128_store(ptr + 8, v2.val);
2355 wasm_v128_store(ptr + 12, v3.val);
2361 v128_t v0 = wasm_unpacklo_i64x2(a.val, b.val);
2362 v128_t v1 = wasm_unpackhi_i64x2(a.val, b.val);
2364 wasm_v128_store(ptr, v0);
2365 wasm_v128_store(ptr + 2, v1);
2371 v128_t v0 = wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23);
2372 v128_t v1 = wasm_v8x16_shuffle(a.val, c.val, 16,17,18,19,20,21,22,23,8,9,10,11,12,13,14,15);
2373 v128_t v2 = wasm_v8x16_shuffle(b.val, c.val, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31);
2375 wasm_v128_store(ptr, v0);
2376 wasm_v128_store(ptr + 2, v1);
2377 wasm_v128_store(ptr + 4, v2);
2384 v128_t v0 = wasm_unpacklo_i64x2(a.val, b.val);
2385 v128_t v1 = wasm_unpacklo_i64x2(c.val, d.val);
2386 v128_t v2 = wasm_unpackhi_i64x2(a.val, b.val);
2387 v128_t v3 = wasm_unpackhi_i64x2(c.val, d.val);
2389 wasm_v128_store(ptr, v0);
2390 wasm_v128_store(ptr + 2, v1);
2391 wasm_v128_store(ptr + 4, v2);
2392 wasm_v128_store(ptr + 6, v3);
2395 #define OPENCV_HAL_IMPL_WASM_LOADSTORE_INTERLEAVE(_Tpvec0, _Tp0, suffix0, _Tpvec1, _Tp1, suffix1) \
2396 inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0 ) \
2399 v_load_deinterleave((const _Tp1*)ptr, a1, b1); \
2400 a0 = v_reinterpret_as_##suffix0(a1); \
2401 b0 = v_reinterpret_as_##suffix0(b1); \
2403 inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0 ) \
2405 _Tpvec1 a1, b1, c1; \
2406 v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1); \
2407 a0 = v_reinterpret_as_##suffix0(a1); \
2408 b0 = v_reinterpret_as_##suffix0(b1); \
2409 c0 = v_reinterpret_as_##suffix0(c1); \
2411 inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0, _Tpvec0& d0 ) \
2413 _Tpvec1 a1, b1, c1, d1; \
2414 v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1, d1); \
2415 a0 = v_reinterpret_as_##suffix0(a1); \
2416 b0 = v_reinterpret_as_##suffix0(b1); \
2417 c0 = v_reinterpret_as_##suffix0(c1); \
2418 d0 = v_reinterpret_as_##suffix0(d1); \
2420 inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \
2421 hal::StoreMode mode = hal::STORE_UNALIGNED ) \
2423 _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
2424 _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
2425 v_store_interleave((_Tp1*)ptr, a1, b1, mode); \
2427 inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \
2428 const _Tpvec0& c0, hal::StoreMode mode = hal::STORE_UNALIGNED ) \
2430 _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
2431 _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
2432 _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \
2433 v_store_interleave((_Tp1*)ptr, a1, b1, c1, mode); \
2435 inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \
2436 const _Tpvec0& c0, const _Tpvec0& d0, \
2437 hal::StoreMode mode = hal::STORE_UNALIGNED ) \
2439 _Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
2440 _Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
2441 _Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \
2442 _Tpvec1 d1 = v_reinterpret_as_##suffix1(d0); \
2443 v_store_interleave((_Tp1*)ptr, a1, b1, c1, d1, mode); \
2454 return v_float32x4(wasm_f32x4_convert_i32x4(a.val));
2460 wasm_v128_store(a_, a.val);
2462 c_[0] = (float)(a_[0]);
2463 c_[1] = (float)(a_[1]);
2471 double a_[2], b_[2];
2472 wasm_v128_store(a_, a.val);
2473 wasm_v128_store(b_, b.val);
2475 c_[0] = (float)(a_[0]);
2476 c_[1] = (float)(a_[1]);
2477 c_[2] = (float)(b_[0]);
2478 c_[3] = (float)(b_[1]);
2484 #ifdef __wasm_unimplemented_simd128__
2485 v128_t p = v128_cvti32x4_i64x2(a.val);
2489 wasm_v128_store(a_, a.val);
2491 c_[0] = (double)(a_[0]);
2492 c_[1] = (double)(a_[1]);
2499 #ifdef __wasm_unimplemented_simd128__
2500 v128_t p = v128_cvti32x4_i64x2_high(a.val);
2504 wasm_v128_store(a_, a.val);
2506 c_[0] = (double)(a_[2]);
2507 c_[1] = (double)(a_[3]);
2515 wasm_v128_store(a_, a.val);
2517 c_[0] = (double)(a_[0]);
2518 c_[1] = (double)(a_[1]);
2525 wasm_v128_store(a_, a.val);
2527 c_[0] = (double)(a_[2]);
2528 c_[1] = (double)(a_[3]);
2534 #ifdef __wasm_unimplemented_simd128__
2535 return v_float64x2(wasm_f64x2_convert_i64x2(a.val));
2538 wasm_v128_store(a_, a.val);
2540 c_[0] = (double)(a_[0]);
2541 c_[1] = (double)(a_[1]);
2561 tab[
idx[2]], tab[
idx[2]+1], tab[
idx[2]+2], tab[
idx[2]+3], tab[
idx[3]], tab[
idx[3]+1], tab[
idx[3]+2], tab[
idx[3]+3]);
2575 tab[
idx[2]], tab[
idx[2]+1], tab[
idx[3]], tab[
idx[3]+1]);
2580 tab[
idx[1]], tab[
idx[1]+1], tab[
idx[1]+2], tab[
idx[1]+3]);
2589 tab[
idx[2]], tab[
idx[3]]);
2594 tab[
idx[1]], tab[
idx[1]+1]);
2612 inline v_uint64x2 v_lut(
const uint64_t* tab,
const int*
idx) {
return v_reinterpret_as_u64(
v_lut((
const int64_t *)tab,
idx)); }
2633 return v_int32x4(tab[wasm_i32x4_extract_lane(idxvec.val, 0)],
2634 tab[wasm_i32x4_extract_lane(idxvec.val, 1)],
2635 tab[wasm_i32x4_extract_lane(idxvec.val, 2)],
2636 tab[wasm_i32x4_extract_lane(idxvec.val, 3)]);
2641 return v_reinterpret_as_u32(
v_lut((
const int *)tab, idxvec));
2646 return v_float32x4(tab[wasm_i32x4_extract_lane(idxvec.val, 0)],
2647 tab[wasm_i32x4_extract_lane(idxvec.val, 1)],
2648 tab[wasm_i32x4_extract_lane(idxvec.val, 2)],
2649 tab[wasm_i32x4_extract_lane(idxvec.val, 3)]);
2654 return v_float64x2(tab[wasm_i32x4_extract_lane(idxvec.val, 0)],
2655 tab[wasm_i32x4_extract_lane(idxvec.val, 1)]);
2666 x =
v_float32x4(tab[wasm_i32x4_extract_lane(idxvec.val, 0)],
2667 tab[wasm_i32x4_extract_lane(idxvec.val, 1)],
2668 tab[wasm_i32x4_extract_lane(idxvec.val, 2)],
2669 tab[wasm_i32x4_extract_lane(idxvec.val, 3)]);
2670 y =
v_float32x4(tab[wasm_i32x4_extract_lane(idxvec.val, 0)+1],
2671 tab[wasm_i32x4_extract_lane(idxvec.val, 1)+1],
2672 tab[wasm_i32x4_extract_lane(idxvec.val, 2)+1],
2673 tab[wasm_i32x4_extract_lane(idxvec.val, 3)+1]);
2678 v128_t xy0 = wasm_v128_load(tab + wasm_i32x4_extract_lane(idxvec.val, 0));
2679 v128_t xy1 = wasm_v128_load(tab + wasm_i32x4_extract_lane(idxvec.val, 1));
2680 x.val = wasm_unpacklo_i64x2(xy0, xy1);
2681 y.val = wasm_unpacklo_i64x2(xy0, xy1);
2686 return v_int8x16(wasm_v8x16_shuffle(vec.val, vec.val, 0,2,1,3,4,6,5,7,8,10,9,11,12,14,13,15));
2691 return v_int8x16(wasm_v8x16_shuffle(vec.val, vec.val, 0,4,1,5,2,6,3,7,8,12,9,13,10,14,11,15));
2697 return v_int16x8(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,4,5,2,3,6,7,8,9,12,13,10,11,14,15));
2702 return v_int16x8(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15));
2708 return v_int32x4(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,2,3,8,9,10,11,4,5,6,7,12,13,14,15));
2713 return v_float32x4(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,2,3,8,9,10,11,4,5,6,7,12,13,14,15));
2718 return v_int8x16(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,2,4,5,6,8,9,10,12,13,14,16,16,16,16));
2724 return v_int16x8(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,2,3,4,5,8,9,10,11,12,13,14,15,6,7));
2732 template<
int i,
typename _Tp>
2733 inline typename _Tp::lane_type
v_extract_n(
const _Tp& a)
2735 return v_rotate_right<i>(a).get0();
2741 return v_setall_u32(v_extract_n<i>(a));
2746 return v_setall_s32(v_extract_n<i>(a));
2751 return v_setall_f32(v_extract_n<i>(a));
2760 for (
int i = 0; i < 4; i++)
2768 wasm_v128_store(v_, v.val);
2769 ptr[0] = hfloat(v_[0]);
2770 ptr[1] = hfloat(v_[1]);
2771 ptr[2] = hfloat(v_[2]);
2772 ptr[3] = hfloat(v_[3]);
2777 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
const int * idx
Definition: core_c.h:668
const CvArr CvArr * x
Definition: core_c.h:1195
const CvArr * y
Definition: core_c.h:1187
signed char schar
Definition: interface.h:48
unsigned char uchar
Definition: interface.h:51
int64_t int64
Definition: interface.h:61
unsigned short ushort
Definition: interface.h:52
uint64_t uint64
Definition: interface.h:62
v_reg< _Tp, n > v_invsqrt(const v_reg< _Tp, n > &a)
Inversed square root.
Definition: intrin_cpp.hpp:1007
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_lut_quads(const _Tp *tab, const int *idx)
Definition: intrin_cpp.hpp:2640
v_reg< int, n > v_round(const v_reg< float, n > &a)
Round elements.
Definition: intrin_cpp.hpp:2424
v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > v_dotprod_expand_fast(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Fast Dot product of elements and expand.
Definition: intrin_cpp.hpp:1185
v_reg< schar, 16 > v_int8x16
Sixteen 8-bit signed integer values.
Definition: intrin_cpp.hpp:491
v_reg< float, n > v_cvt_f32(const v_reg< int, n > &a)
Convert to float.
Definition: intrin_cpp.hpp:2534
v_reg< uchar, 16 > v_uint8x16
Sixteen 8-bit unsigned integer values.
Definition: intrin_cpp.hpp:489
int v_signmask(const v_reg< _Tp, n > &a)
Get negative values mask.
Definition: intrin_cpp.hpp:1392
v_reg< _Tp, n > v_mul_hi(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Multiply and extract high part.
Definition: intrin_cpp.hpp:1233
v_reg< float, n > v_matmul(const v_reg< float, n > &v, const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
Matrix multiplication.
Definition: intrin_cpp.hpp:3193
v_reg< int64, 2 > v_int64x2
Two 64-bit signed integer values.
Definition: intrin_cpp.hpp:507
v_reg< _Tp, n > v_interleave_pairs(const v_reg< _Tp, n > &vec)
Definition: intrin_cpp.hpp:2703
V_TypeTraits< typename V_TypeTraits< _Tp >::abs_type >::sum_type v_reduce_sad(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Sum absolute differences of values.
Definition: intrin_cpp.hpp:1374
v_reg< ushort, 8 > v_uint16x8
Eight 16-bit unsigned integer values.
Definition: intrin_cpp.hpp:493
CV_INLINE v_reg< double, n/2 > v_cvt_f64(const v_reg< int, n > &a)
Convert lower half to double.
Definition: intrin_cpp.hpp:2573
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_lut(const _Tp *tab, const int *idx)
Definition: intrin_cpp.hpp:2626
V_TypeTraits< _Tp >::sum_type v_reduce_sum(const v_reg< _Tp, n > &a)
Element shift left among vector.
Definition: intrin_cpp.hpp:1335
v_reg< _Tp, n > v_fma(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c)
Multiply and add.
Definition: intrin_cpp.hpp:1046
int v_scan_forward(const v_reg< _Tp, n > &a)
Get first negative lane index.
Definition: intrin_cpp.hpp:1409
v_reg< int, n > v_trunc(const v_reg< float, n > &a)
Truncate elements.
Definition: intrin_cpp.hpp:2475
v_reg< int, 4 > v_int32x4
Four 32-bit signed integer values.
Definition: intrin_cpp.hpp:499
v_reg< float, n > v_not_nan(const v_reg< float, n > &a)
Less-than comparison.
Definition: intrin_cpp.hpp:890
v_reg< float, n > v_reduce_sum4(const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
Sums all elements of each input vector, returns the vector of sums.
Definition: intrin_cpp.hpp:1353
v_reg< typename V_TypeTraits< _Tp >::abs_type, n > v_popcount(const v_reg< _Tp, n > &a)
Count the 1 bits in the vector lanes and return result as corresponding unsigned type.
Definition: intrin_cpp.hpp:828
v_reg< _Tp, n > v_interleave_quads(const v_reg< _Tp, n > &vec)
Definition: intrin_cpp.hpp:2716
v_reg< unsigned, 4 > v_uint32x4
Four 32-bit unsigned integer values.
Definition: intrin_cpp.hpp:497
v_reg< _Tp, n > v_pack_triplets(const v_reg< _Tp, n > &vec)
Definition: intrin_cpp.hpp:2733
v_reg< _Tp, n > v_muladd(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c)
A synonym for v_fma.
Definition: intrin_cpp.hpp:1057
v_reg< int, n > v_floor(const v_reg< float, n > &a)
Floor elements.
Definition: intrin_cpp.hpp:2449
v_reg< _Tp, n > v_broadcast_element(const v_reg< _Tp, n > &a)
Broadcast i-th element of vector.
Definition: intrin_cpp.hpp:2413
v_reg< _Tp, n > v_reverse(const v_reg< _Tp, n > &a)
Vector reverse order.
Definition: intrin_cpp.hpp:2343
void v_mul_expand(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &c, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &d)
Multiply and expand.
Definition: intrin_cpp.hpp:1216
v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > v_dotprod_expand(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Dot product of elements and expand.
Definition: intrin_cpp.hpp:1142
void v_pack_store(hfloat *ptr, const v_reg< float, n > &v)
Definition: intrin_cpp.hpp:3289
void v_expand(const v_reg< _Tp, n > &a, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &b0, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &b1)
Expand values to the wider pack type.
Definition: intrin_cpp.hpp:1474
void v_cleanup()
Definition: intrin_cpp.hpp:3297
v_reg< float, n > v_matmuladd(const v_reg< float, n > &v, const v_reg< float, n > &a, const v_reg< float, n > &b, const v_reg< float, n > &c, const v_reg< float, n > &d)
Matrix multiplication and add.
Definition: intrin_cpp.hpp:3223
void v_store_interleave(_Tp *ptr, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, hal::StoreMode=hal::STORE_UNALIGNED)
Interleave and store (2 channels)
Definition: intrin_cpp.hpp:2115
void v_lut_deinterleave(const float *tab, const v_reg< int, n > &idx, v_reg< float, n > &x, v_reg< float, n > &y)
Definition: intrin_cpp.hpp:2681
void v_transpose4x4(v_reg< _Tp, n > &a0, const v_reg< _Tp, n > &a1, const v_reg< _Tp, n > &a2, const v_reg< _Tp, n > &a3, v_reg< _Tp, n > &b0, v_reg< _Tp, n > &b1, v_reg< _Tp, n > &b2, v_reg< _Tp, n > &b3)
Transpose 4x4 matrix.
Definition: intrin_cpp.hpp:2761
v_reg< uint64, 2 > v_uint64x2
Two 64-bit unsigned integer values.
Definition: intrin_cpp.hpp:505
v_reg< typename V_TypeTraits< _Tp >::w_type, simd128_width/sizeof(typename V_TypeTraits< _Tp >::w_type)> v_load_expand(const _Tp *ptr)
Load register contents from memory with double expand.
Definition: intrin_cpp.hpp:1872
v_reg< int, n > v_ceil(const v_reg< float, n > &a)
Ceil elements.
Definition: intrin_cpp.hpp:2462
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_dotprod(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Dot product of elements.
Definition: intrin_cpp.hpp:1077
v_reg< _Tp, n > v_extract(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Vector extract.
Definition: intrin_cpp.hpp:2371
v_reg< float, 4 > v_float32x4
Four 32-bit floating point values (single precision)
Definition: intrin_cpp.hpp:501
v_reg< typename V_TypeTraits< _Tp >::abs_type, n > v_absdiff(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Add values without saturation.
Definition: intrin_cpp.hpp:953
_Tp v_extract_n(const v_reg< _Tp, n > &v)
Vector extract.
Definition: intrin_cpp.hpp:2397
v_reg< _Tp, n > v_absdiffs(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Saturating absolute difference.
Definition: intrin_cpp.hpp:994
CV_INLINE v_reg< double,(n/2)> v_cvt_f64_high(const v_reg< int, n > &a)
Convert to double high part of vector.
Definition: intrin_cpp.hpp:2584
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > v_dotprod_fast(const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
Fast Dot product of elements.
Definition: intrin_cpp.hpp:1116
v_reg< uchar, 2 *n > v_pack_b(const v_reg< ushort, n > &a, const v_reg< ushort, n > &b)
! For 16-bit boolean values
Definition: intrin_cpp.hpp:3111
v_reg< short, 8 > v_int16x8
Eight 16-bit signed integer values.
Definition: intrin_cpp.hpp:495
v_reg< double, 2 > v_float64x2
Two 64-bit floating point values (double precision)
Definition: intrin_cpp.hpp:503
void v_load_deinterleave(const _Tp *ptr, v_reg< _Tp, n > &a, v_reg< _Tp, n > &b)
Load and deinterleave (2 channels)
Definition: intrin_cpp.hpp:2043
v_reg< _Tp, simd128_width/sizeof(_Tp)> v_lut_pairs(const _Tp *tab, const int *idx)
Definition: intrin_cpp.hpp:2633
softfloat max(const softfloat &a, const softfloat &b)
Definition: softfloat.hpp:440
softfloat min(const softfloat &a, const softfloat &b)
Min and Max functions.
Definition: softfloat.hpp:437
CV_INLINE int cvRound(double value)
Rounds floating-point number to the nearest integer.
Definition: fast_math.hpp:200
CV_INLINE int cvCeil(double value)
Rounds floating-point number to the nearest integer not smaller than the original.
Definition: fast_math.hpp:258
CV_INLINE int cvFloor(double value)
Rounds floating-point number to the nearest integer not larger than the original.
Definition: fast_math.hpp:231
CvRect r
Definition: imgproc_c.h:984
CvSize int int int CvPoint int delta
Definition: imgproc_c.h:1168
CV_EXPORTS OutputArray int double double InputArray mask
Definition: imgproc.hpp:2132
StoreMode
Definition: intrin.hpp:100
@ STORE_UNALIGNED
Definition: intrin.hpp:101
"black box" representation of the file storage associated with a file on disk.
Definition: calib3d.hpp:441
_Tp get0() const
Access first value.
Definition: intrin_cpp.hpp:437