24 #ifndef ARM_COMPUTE_NESYMM_H 25 #define ARM_COMPUTE_NESYMM_H 51 template <
bool is_bounded_relu>
53 int result_fixedpoint_multiplier,
60 in_s32.val[0] = vmulq_n_s32(in_s32.val[0], (1 << -result_shift));
61 in_s32.val[1] = vmulq_n_s32(in_s32.val[1], (1 << -result_shift));
63 in_s32.val[0] = vqrdmulhq_n_s32(in_s32.val[0], result_fixedpoint_multiplier);
64 in_s32.val[1] = vqrdmulhq_n_s32(in_s32.val[1], result_fixedpoint_multiplier);
69 in_s32.val[0] = vqrdmulhq_n_s32(in_s32.val[0], result_fixedpoint_multiplier);
70 in_s32.val[1] = vqrdmulhq_n_s32(in_s32.val[1], result_fixedpoint_multiplier);
77 int16x8_t out_s16 = vcombine_s16(vqmovn_s32(in_s32.val[0]), vqmovn_s32(in_s32.val[1]));
81 out_s16 = vmaxq_s16(out_s16, min_s16);
82 out_s16 = vminq_s16(out_s16, max_s16);
100 template <
bool is_bounded_relu>
102 int32_t result_shift, int16_t min_s16, int16_t max_s16)
106 const int64_t in_64 = static_cast<int64_t>(in_value) * (1 << (-result_shift)) * static_cast<int64_t>(result_fixedpoint_multiplier);
107 in_value = static_cast<int32_t>((in_64 + (1 << 30)) >> 31);
112 const int64_t in_64 = static_cast<int64_t>(in_value) * static_cast<int64_t>(result_fixedpoint_multiplier);
118 int16_t out_s16 = static_cast<int16_t>(std::max<int32_t>(-32768, std::min<int32_t>(32767, in_value)));
122 out_s16 = static_cast<int16_t>(std::max(min_s16, std::min(max_s16, out_s16)));
137 const float32x4_t vscale = vdupq_n_f32(
scale);
138 const float32x4x2_t vdequantized_input =
141 vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(qv))), vscale),
142 vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(qv))), vscale)
145 return vdequantized_input;
157 const float32x4_t vinvscale = vdupq_n_f32(1.f /
scale);
159 const int32x4x2_t rf =
163 vcvtnq_s32_f32(vmulq_f32(qv.val[0], vinvscale)),
164 vcvtnq_s32_f32(vmulq_f32(qv.val[1], vinvscale))
166 vcvtq_s32_f32(vmulq_f32(qv.val[0], vinvscale)),
167 vcvtq_s32_f32(vmulq_f32(qv.val[1], vinvscale))
171 return vcombine_s16(vqmovn_s32(rf.val[0]), vqmovn_s32(rf.val[1]));
184 const float32x4_t vscale = vdupq_n_f32(
scale);
185 const float32x4x4_t vdequantized_input =
188 vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(qv.val[0]))), vscale),
189 vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(qv.val[0]))), vscale),
190 vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(qv.val[1]))), vscale),
191 vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(qv.val[1]))), vscale),
194 return vdequantized_input;
208 const float32x4_t vinvscale = vdupq_n_f32(1.f /
scale);
209 const int32x4x4_t rf =
213 vcvtnq_s32_f32(vmulq_f32(qv.val[0], vinvscale)),
214 vcvtnq_s32_f32(vmulq_f32(qv.val[1], vinvscale)),
215 vcvtnq_s32_f32(vmulq_f32(qv.val[2], vinvscale)),
216 vcvtnq_s32_f32(vmulq_f32(qv.val[3], vinvscale)),
218 vcvtq_s32_f32(vmulq_f32(qv.val[0], vinvscale)),
219 vcvtq_s32_f32(vmulq_f32(qv.val[1], vinvscale)),
220 vcvtq_s32_f32(vmulq_f32(qv.val[2], vinvscale)),
221 vcvtq_s32_f32(vmulq_f32(qv.val[3], vinvscale)),
227 vcombine_s16(vqmovn_s32(rf.val[0]), vqmovn_s32(rf.val[1])),
228 vcombine_s16(vqmovn_s32(rf.val[2]), vqmovn_s32(rf.val[3])),
244 const auto left_shift = shift > 0 ? shift : 0;
245 const auto right_shift = shift > 0 ? 0 : -shift;
246 const auto one_shifted = 1 << left_shift;
256 #endif // ARM_COMPUTE_NESYMM_H int8_t qsymm8_t
8 bit quantized symmetric scalar value
qsymm16x8x2_t vquantize_qsymm16(const float32x4x4_t &qv, const UniformQuantizationInfo &qi)
Quantize a neon vector holding 16 floating point values.
float32x4x2_t vdequantize(const uint8x8_t &qv, const UniformQuantizationInfo &qi)
Dequantize a neon vector holding 8 quantized values.
float32x4x2_t vdequantize_int16(const int16x8_t &qv, float scale)
Dequantize a neon vector holding 8 16-bit quantized values.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
int16x8_t vquantize_int16(const float32x4x2_t &qv, float scale)
Quantize a neon vector holding 8 floating point values.
Copyright (c) 2017-2021 Arm Limited.
int16x8_t qsymm16x8_t
16 bit quantized symmetric vector with 8 elements
int16x8_t finalize_quantization_int16(int32x4x2_t &in_s32, int result_fixedpoint_multiplier, int32_t result_shift, int16x8_t min_s16, int16x8_t max_s16)
Performs final quantization step on 8 signed 16-bit elements.
int32x4x2_t multiply_by_quantized_multiplier_2row(int32x4x2_t input, int32_t qmul, int32_t shift)
Multiply a neon vector using quantized multiplier and shift.
int32x4_t rounding_divide_by_pow2(int32x4_t x, int32x4_t exponent)
Round to the nearest division by a power-of-two using exponent.
int16x8x2_t qsymm16x8x2_t
16 bit quantized symmetric vector with 16 elements
int16_t qsymm16_t
16 bit quantized symmetric scalar value