42 #ifndef OPENCV_HAL_NEON_UTILS_HPP
43 #define OPENCV_HAL_NEON_UTILS_HPP
45 #include "opencv2/core/cvdef.h"
52 inline int32x2_t cv_vrnd_s32_f32(float32x2_t v)
54 static int32x2_t v_sign = vdup_n_s32(1 << 31),
55 v_05 = vreinterpret_s32_f32(vdup_n_f32(0.5f));
57 int32x2_t v_addition = vorr_s32(v_05, vand_s32(v_sign, vreinterpret_s32_f32(v)));
58 return vcvt_s32_f32(vadd_f32(v, vreinterpret_f32_s32(v_addition)));
61 inline int32x4_t cv_vrndq_s32_f32(float32x4_t v)
63 static int32x4_t v_sign = vdupq_n_s32(1 << 31),
64 v_05 = vreinterpretq_s32_f32(vdupq_n_f32(0.5f));
66 int32x4_t v_addition = vorrq_s32(v_05, vandq_s32(v_sign, vreinterpretq_s32_f32(v)));
67 return vcvtq_s32_f32(vaddq_f32(v, vreinterpretq_f32_s32(v_addition)));
70 inline uint32x2_t cv_vrnd_u32_f32(float32x2_t v)
72 static float32x2_t v_05 = vdup_n_f32(0.5f);
73 return vcvt_u32_f32(vadd_f32(v, v_05));
76 inline uint32x4_t cv_vrndq_u32_f32(float32x4_t v)
78 static float32x4_t v_05 = vdupq_n_f32(0.5f);
79 return vcvtq_u32_f32(vaddq_f32(v, v_05));
82 inline float32x4_t cv_vrecpq_f32(float32x4_t val)
84 float32x4_t reciprocal = vrecpeq_f32(val);
85 reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal);
86 reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal);
90 inline float32x2_t cv_vrecp_f32(float32x2_t val)
92 float32x2_t reciprocal = vrecpe_f32(val);
93 reciprocal = vmul_f32(vrecps_f32(val, reciprocal), reciprocal);
94 reciprocal = vmul_f32(vrecps_f32(val, reciprocal), reciprocal);
98 inline float32x4_t cv_vrsqrtq_f32(float32x4_t val)
100 float32x4_t e = vrsqrteq_f32(val);
101 e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(e, e), val), e);
102 e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(e, e), val), e);
106 inline float32x2_t cv_vrsqrt_f32(float32x2_t val)
108 float32x2_t e = vrsqrte_f32(val);
109 e = vmul_f32(vrsqrts_f32(vmul_f32(e, e), val), e);
110 e = vmul_f32(vrsqrts_f32(vmul_f32(e, e), val), e);
114 inline float32x4_t cv_vsqrtq_f32(float32x4_t val)
116 return cv_vrecpq_f32(cv_vrsqrtq_f32(val));
119 inline float32x2_t cv_vsqrt_f32(float32x2_t val)
121 return cv_vrecp_f32(cv_vrsqrt_f32(val));