24 #ifndef ARM_COMPUTE_SVESYMM_H
25 #define ARM_COMPUTE_SVESYMM_H
29 #if defined(ARM_COMPUTE_ENABLE_SVE2)
44 inline svfloat32x2_t svdequantize_qsymm16_z(svbool_t pg,
const svint16_t &qv,
float scale)
46 const auto vscale = svdup_n_f32(
scale);
47 const svfloat32x2_t vdequantized_input =
48 svcreate2_f32(svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(qv)), vscale),
49 svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(qv)), vscale));
50 return vdequantized_input;
61 inline svint16_t svquantize_qsymm16_z(svbool_t pg,
const svfloat32x2_t qv,
float scale)
63 const svfloat32_t vinvscale = svdup_n_f32(1.f /
scale);
65 const auto rf_0 = svcvt_s32_f32_z(pg, svmul_f32_z(pg, svget2_f32(qv, 0), vinvscale));
66 const auto rf_1 = svcvt_s32_f32_z(pg, svmul_f32_z(pg, svget2_f32(qv, 1), vinvscale));
67 const auto pa = svqxtnt_s32(svqxtnb_s32(rf_0), rf_1);
80 inline svfloat32x4_t svdequantize_z(svbool_t pg,
const svint16x2_t qv,
const UniformQuantizationInfo &qi)
82 const float scale = qi.scale;
83 const auto vscale = svdup_n_f32(
scale);
84 const svfloat32x4_t vdequantized_input =
85 svcreate4_f32(svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svget2_s16(qv, 0))), vscale),
86 svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svget2_s16(qv, 0))), vscale),
87 svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlb_s32(svget2_s16(qv, 1))), vscale),
88 svmul_f32_z(pg, svcvt_f32_s32_z(pg, svmovlt_s32(svget2_s16(qv, 1))), vscale));
89 return vdequantized_input;
100 inline svint16x2_t svquantize_qsymm16_z(svbool_t pg,
const svfloat32x4_t qv,
const UniformQuantizationInfo &qi)
102 const float scale = qi.scale;
104 const auto vinvscale = svdup_n_f32(1.f /
scale);
105 const auto rf_0 = svcvt_s32_f32_z(pg, svmul_f32_z(pg, svget4_f32(qv, 0), vinvscale));
106 const auto rf_1 = svcvt_s32_f32_z(pg, svmul_f32_z(pg, svget4_f32(qv, 1), vinvscale));
107 const auto rf_2 = svcvt_s32_f32_z(pg, svmul_f32_z(pg, svget4_f32(qv, 2), vinvscale));
108 const auto rf_3 = svcvt_s32_f32_z(pg, svmul_f32_z(pg, svget4_f32(qv, 3), vinvscale));
110 const auto pa = svqxtnt_s32(svqxtnb_s32(rf_0), rf_1);
111 const auto pb = svqxtnt_s32(svqxtnb_s32(rf_2), rf_3);
113 return svcreate2_s16(pa, pb);
118 #endif // ARM_COMPUTE_NESYMM_H