24 #ifndef ARM_COMPUTE_SVEASYMM_H 25 #define ARM_COMPUTE_SVEASYMM_H 27 #if defined(__ARM_FEATURE_SVE2) 44 svuint8_t svmla_qasymm8_z(svbool_t pg, svuint8_t vd, svfloat32_t vs, svfloat32_t vo);
57 svint8_t svmla_qasymm8_signed_z(svbool_t pg, svint8_t vd, svfloat32_t vs, svfloat32_t vo);
68 inline svfloat32x4_t svdequantize_z(svbool_t pg,
const svuint8_t &qv,
float scale, int32_t
offset)
70 const auto voffset = svdup_n_s32(offset);
71 const auto vscale = svdup_n_f32(scale);
72 const svfloat32x4_t vdequantized_input =
75 svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svreinterpret_s32_u32(svmovlb_u32(svmovlb_u16(qv))), voffset)), vscale),
76 svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svreinterpret_s32_u32(svmovlt_u32(svmovlb_u16(qv))), voffset)), vscale),
77 svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svreinterpret_s32_u32(svmovlb_u32(svmovlt_u16(qv))), voffset)), vscale),
78 svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svreinterpret_s32_u32(svmovlt_u32(svmovlt_u16(qv))), voffset)), vscale),
82 return vdequantized_input;
93 inline svfloat32x4_t svdequantize_z(svbool_t pg,
const svuint8_t &qv,
const UniformQuantizationInfo &qi)
95 return svdequantize_z(pg, qv, qi.scale, qi.offset);
107 inline svfloat32x4_t svdequantize_z(svbool_t pg,
const svint8_t &qv,
float scale, int32_t offset)
109 const auto voffset = svdup_n_s32(offset);
110 const auto vscale = svdup_n_f32(scale);
111 const svfloat32x4_t vdequantized_input =
114 svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svmovlb_s32(svmovlb_s16(qv)), voffset)), vscale),
115 svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svmovlt_s32(svmovlb_s16(qv)), voffset)), vscale),
116 svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svmovlb_s32(svmovlt_s16(qv)), voffset)), vscale),
117 svmul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svmovlt_s32(svmovlt_s16(qv)), voffset)), vscale),
121 return vdequantized_input;
132 inline svfloat32x4_t svdequantize_z(svbool_t pg,
const svint8_t &qv,
const UniformQuantizationInfo &qi)
134 return svdequantize_z(pg, qv, qi.scale, qi.offset);
145 inline svfloat32x4_t svdequantize_z(svbool_t pg,
const svint8_t &qv,
const svfloat32x4_t vscale)
147 const svfloat32x4_t vdequantized_input =
150 svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svmovlb_s16(qv))), svget4_f32(vscale, 0)),
151 svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svmovlb_s16(qv))), svget4_f32(vscale, 1)),
152 svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svmovlt_s16(qv))), svget4_f32(vscale, 2)),
153 svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svmovlt_s16(qv))), svget4_f32(vscale, 3)),
157 return vdequantized_input;
167 inline svfloat32x4_t svdequantize_z(svbool_t pg,
const svint8_t &qv,
float scale)
169 const auto vscale = svdup_n_f32(scale);
170 const svfloat32x4_t vdequantized_input =
173 svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svmovlb_s16(qv))), vscale),
174 svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svmovlb_s16(qv))), vscale),
175 svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svmovlt_s16(qv))), vscale),
176 svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svmovlt_s16(qv))), vscale),
180 return vdequantized_input;
191 inline svuint8_t svquantize_z(svbool_t pg,
const svfloat32x4_t qv,
const UniformQuantizationInfo &qi)
193 const float scale = qi.scale;
194 const int offset = qi.offset;
195 const auto voffset = svdup_n_f32(offset);
196 const auto vinvscale = svdup_n_f32(1.f / scale);
198 const auto rf_0 = svcvt_u32_f32_z(pg, svmla_f32_z(pg, voffset, svget4_f32(qv, 0), vinvscale));
199 const auto rf_1 = svcvt_u32_f32_z(pg, svmla_f32_z(pg, voffset, svget4_f32(qv, 1), vinvscale));
200 const auto rf_2 = svcvt_u32_f32_z(pg, svmla_f32_z(pg, voffset, svget4_f32(qv, 2), vinvscale));
201 const auto rf_3 = svcvt_u32_f32_z(pg, svmla_f32_z(pg, voffset, svget4_f32(qv, 3), vinvscale));
203 const auto pa = svqxtnt_u32(svqxtnb_u32(rf_0), rf_1);
204 const auto pb = svqxtnt_u32(svqxtnb_u32(rf_2), rf_3);
206 return svqxtnt_u16(svqxtnb_u16(pa), pb);
217 inline svint8_t svquantize_signed_z(svbool_t pg,
const svfloat32x4_t qv,
const UniformQuantizationInfo &qi)
219 const float scale = qi.scale;
220 const int offset = qi.offset;
221 const auto voffset = svdup_n_f32(offset);
222 const auto vinvscale = svdup_n_f32(1.f / scale);
223 const auto rf_0 = svcvt_s32_f32_z(pg, svmla_f32_z(pg, voffset, svget4_f32(qv, 0), vinvscale));
224 const auto rf_1 = svcvt_s32_f32_z(pg, svmla_f32_z(pg, voffset, svget4_f32(qv, 1), vinvscale));
225 const auto rf_2 = svcvt_s32_f32_z(pg, svmla_f32_z(pg, voffset, svget4_f32(qv, 2), vinvscale));
226 const auto rf_3 = svcvt_s32_f32_z(pg, svmla_f32_z(pg, voffset, svget4_f32(qv, 3), vinvscale));
228 const auto pa = svqxtnt_s32(svqxtnb_s32(rf_0), rf_1);
229 const auto pb = svqxtnt_s32(svqxtnb_s32(rf_2), rf_3);
231 return svqxtnt_s16(svqxtnb_s16(pa), pb);
242 inline svuint16x2_t svquantize_qasymm16_z(svbool_t pg,
const svfloat32x4_t qv,
const UniformQuantizationInfo &qi)
244 const float scale = qi.scale;
245 const int offset = qi.offset;
246 const auto voffset = svdup_n_f32(offset);
247 const auto vinvscale = svdup_n_f32(1.f / scale);
249 const auto rf_0 = svcvt_u32_f32_z(pg, svmla_f32_z(pg, voffset, svget4_f32(qv, 0), vinvscale));
250 const auto rf_1 = svcvt_u32_f32_z(pg, svmla_f32_z(pg, voffset, svget4_f32(qv, 1), vinvscale));
251 const auto rf_2 = svcvt_u32_f32_z(pg, svmla_f32_z(pg, voffset, svget4_f32(qv, 2), vinvscale));
252 const auto rf_3 = svcvt_u32_f32_z(pg, svmla_f32_z(pg, voffset, svget4_f32(qv, 3), vinvscale));
254 const auto pa = svqxtnt_u32(svqxtnb_u32(rf_0), rf_1);
255 const auto pb = svqxtnt_u32(svqxtnb_u32(rf_2), rf_3);
257 return svcreate2_u16(pa, pb);
262 #endif // ARM_COMPUTE_NEASYMM_H __global uchar * offset(const Image *img, int x, int y)
Get the pointer position of a Image.
Copyright (c) 2017-2021 Arm Limited.