30 #if defined(ARM_COMPUTE_ENABLE_SVE)
35 void sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst_impl(
36 const unsigned int n_channels,
37 const uint8_t *
const *
const inptrs,
38 const uint8_t *
const weights,
39 const int32_t *
const bias,
43 uint8_t *
const *
const outptrs
48 long unsigned int n_channels;
54 uint8_t *
const *
const outptrs;
55 const uint8_t *inptrs[16];
58 long unsigned int n_channels,
59 const uint8_t *
const *inptrs_raw,
60 const void *
const weights,
61 const int32_t *
const bias,
65 uint8_t *
const *outptrs
66 ) : n_channels(n_channels), weights(weights),
bias(
bias),
70 inptrs[0] = inptrs_raw[5];
71 inptrs[1] = inptrs_raw[0];
72 inptrs[2] = inptrs_raw[3];
73 inptrs[3] = inptrs_raw[6];
74 inptrs[4] = inptrs_raw[9];
75 inptrs[5] = inptrs_raw[12];
76 inptrs[6] = inptrs_raw[15];
77 inptrs[7] = inptrs_raw[1];
78 inptrs[8] = inptrs_raw[2];
79 inptrs[9] = inptrs_raw[10];
80 inptrs[10] = inptrs_raw[4];
81 inptrs[11] = inptrs_raw[7];
82 inptrs[12] = inptrs_raw[8];
83 inptrs[13] = inptrs_raw[11];
84 inptrs[14] = inptrs_raw[13];
85 inptrs[15] = inptrs_raw[14];
90 const Params params(n_channels, inptrs, weights,
bias, qp,
95 "ldr x25, [%x[params], %[offsetof_Params_requant]]\n"
97 "ldr x24, [%x[params], %[offsetof_Params_outptrs]]\n"
99 "add x21, x25, %[offsetof_Requantize32_a_offset]\n"
100 "ldr x15, [%x[params], %[offsetof_Params_n_channels]]\n"
101 "ldr x14, [%x[params], %[offsetof_Params_weights]]\n"
102 "add x20, x25, %[offsetof_Requantize32_b_offset]\n"
103 "add x22, x25, %[offsetof_Requantize32_c_offset]\n"
104 "ld1rb { z12.b }, p4/Z, [x21]\n"
105 "ld1rb { z30.b }, p4/Z, [x20]\n"
106 "add x21, x25, %[offsetof_Requantize32_minval]\n"
107 "add x20, x25, %[offsetof_Requantize32_maxval]\n"
108 "ld1rh { z24.h }, p4/Z, [x22]\n"
109 "ld1rh { z11.h }, p4/Z, [x21]\n"
110 "ld1rh { z26.h }, p4/Z, [x20]\n"
111 "ldp x13, x12, [x24, #0x0]\n"
113 "whilelt p3.h, x16, x15\n"
114 "ldp x11, x10, [x24, #0x10]\n"
115 "whilelt p2.s, x16, x15\n"
116 "whilelt p1.s, x23, x15\n"
117 "ldr x9, [%x[params], %[offsetof_Params_bias]]\n"
118 "ld1b { z14.h }, p4/Z, [x14]\n"
119 "ld1b { z21.h }, p4/Z, [x14, #1, MUL VL]\n"
120 "add x28, %x[params], %[offsetof_Params_inptrs]\n"
122 "ld1b { z1.h }, p4/Z, [x14, #2, MUL VL]\n"
123 "ld1b { z6.h }, p4/Z, [x14, #3, MUL VL]\n"
124 ".inst 0x455e19ce // usublb z14.h, z14.b, z30.b\n"
125 ".inst 0x455e1ab5 // usublb z21.h, z21.b, z30.b\n"
126 "ld1b { z2.h }, p4/Z, [x14, #4, MUL VL]\n"
127 "ld1b { z18.h }, p4/Z, [x14, #5, MUL VL]\n"
128 ".inst 0x455e1821 // usublb z1.h, z1.b, z30.b\n"
129 ".inst 0x455e18c6 // usublb z6.h, z6.b, z30.b\n"
130 "ld1b { z7.h }, p4/Z, [x14, #6, MUL VL]\n"
131 "ld1b { z10.h }, p4/Z, [x14, #7, MUL VL]\n"
132 "inch x14, ALL, MUL #8\n"
133 ".inst 0x455e1842 // usublb z2.h, z2.b, z30.b\n"
134 "ld1w { z17.s }, p2/Z, [x9]\n"
135 "ld1w { z16.s }, p1/Z, [x9, #1, MUL VL]\n"
136 "uzp1 z5.s, z17.s, z16.s\n"
137 "uzp2 z9.s, z17.s, z16.s\n"
138 "ld1b { z8.h }, p4/Z, [x14]\n"
139 "ldp x24, x23, [x28, #0x0]\n"
142 "ldp x22, x21, [x28, #0x10]\n"
143 "ldr x20, [x28, #0x20]\n"
146 "ld1b { z0.h }, p3/Z, [x24, x16]\n"
147 "ld1b { z29.h }, p3/Z, [x23, x16]\n"
150 "ld1b { z4.h }, p3/Z, [x22, x16]\n"
151 "ld1b { z13.h }, p3/Z, [x21, x16]\n"
153 ".inst 0x455e1a52 // usublb z18.h, z18.b, z30.b\n"
154 "ld1b { z20.h }, p3/Z, [x20, x16]\n"
155 "ldr x26, [%x[params], %[offsetof_Params_requant_muls]]\n"
156 ".inst 0x455e18e7 // usublb z7.h, z7.b, z30.b\n"
157 ".inst 0x455e194a // usublb z10.h, z10.b, z30.b\n"
158 "ldr x25, [%x[params], %[offsetof_Params_requant_shifts]]\n"
159 "str x9, [%x[params], %[offsetof_Params_bias]]\n"
160 ".inst 0x455e1908 // usublb z8.h, z8.b, z30.b\n"
161 ".inst 0x454c1800 // usublb z0.h, z0.b, z12.b\n"
162 ".inst 0x454c1bbd // usublb z29.h, z29.b, z12.b\n"
163 ".inst 0x454c1884 // usublb z4.h, z4.b, z12.b\n"
164 ".inst 0x454c19ad // usublb z13.h, z13.b, z12.b\n"
165 ".inst 0x454c1a94 // usublb z20.h, z20.b, z12.b\n"
167 ".inst 0x44824005 // smlalb z5.s, p4/M, z0.h, z2.h\n"
168 ".inst 0x44824409 // smlalt z9.s, p4/M, z0.h, z2.h\n"
169 "ldr x20, [x28, #0x28]\n"
170 "ldr x21, [x28, #0x38]\n"
171 ".inst 0x448e43a5 // smlalb z5.s, p4/M, z29.h, z14.h\n"
172 ".inst 0x44864011 // smlalb z17.s, p4/M, z0.h, z6.h\n"
173 "ld1b { z3.h }, p3/Z, [x20, x16]\n"
174 "ldr x20, [x28, #0x30]\n"
175 ".inst 0x44954010 // smlalb z16.s, p4/M, z0.h, z21.h\n"
176 ".inst 0x448e4016 // smlalb z22.s, p4/M, z0.h, z14.h\n"
177 "ld1b { z31.h }, p3/Z, [x21, x16]\n"
178 ".inst 0x454c1863 // usublb z3.h, z3.b, z12.b\n"
179 ".inst 0x448e47a9 // smlalt z9.s, p4/M, z29.h, z14.h\n"
180 ".inst 0x449241a5 // smlalb z5.s, p4/M, z13.h, z18.h\n"
181 "ldr x21, [x28, #0x40]\n"
182 "ld1b { z15.h }, p3/Z, [x20, x16]\n"
183 ".inst 0x44864419 // smlalt z25.s, p4/M, z0.h, z6.h\n"
184 ".inst 0x44954417 // smlalt z23.s, p4/M, z0.h, z21.h\n"
185 ".inst 0x454c1bff // usublb z31.h, z31.b, z12.b\n"
186 "ldr x20, [x28, #0x48]\n"
187 ".inst 0x448e441b // smlalt z27.s, p4/M, z0.h, z14.h\n"
188 ".inst 0x44814091 // smlalb z17.s, p4/M, z4.h, z1.h\n"
189 "ld1b { z19.h }, p3/Z, [x21, x16]\n"
190 ".inst 0x454c19ef // usublb z15.h, z15.b, z12.b\n"
191 ".inst 0x448141b0 // smlalb z16.s, p4/M, z13.h, z1.h\n"
192 ".inst 0x449541b6 // smlalb z22.s, p4/M, z13.h, z21.h\n"
193 "ld1b { z28.h }, p3/Z, [x20, x16]\n"
194 ".inst 0x454c1a73 // usublb z19.h, z19.b, z12.b\n"
195 ".inst 0x449245a9 // smlalt z9.s, p4/M, z13.h, z18.h\n"
196 ".inst 0x448a4285 // smlalb z5.s, p4/M, z20.h, z10.h\n"
197 "ldr x21, [x28, #0x50]\n"
198 "ldr x20, [x28, #0x58]\n"
199 ".inst 0x44814499 // smlalt z25.s, p4/M, z4.h, z1.h\n"
200 ".inst 0x448145b7 // smlalt z23.s, p4/M, z13.h, z1.h\n"
201 ".inst 0x454c1b9c // usublb z28.h, z28.b, z12.b\n"
202 "ld1b { z4.h }, p3/Z, [x21, x16]\n"
203 ".inst 0x449545bb // smlalt z27.s, p4/M, z13.h, z21.h\n"
204 ".inst 0x448241b1 // smlalb z17.s, p4/M, z13.h, z2.h\n"
205 "ld1b { z29.h }, p3/Z, [x20, x16]\n"
206 "ldr x21, [x28, #0x60]\n"
207 ".inst 0x44874070 // smlalb z16.s, p4/M, z3.h, z7.h\n"
208 ".inst 0x44864296 // smlalb z22.s, p4/M, z20.h, z6.h\n"
209 "ldr x20, [x28, #0x68]\n"
210 ".inst 0x454c1884 // usublb z4.h, z4.b, z12.b\n"
211 ".inst 0x448a4689 // smlalt z9.s, p4/M, z20.h, z10.h\n"
212 ".inst 0x449543e5 // smlalb z5.s, p4/M, z31.h, z21.h\n"
213 ".inst 0x454c1bbd // usublb z29.h, z29.b, z12.b\n"
214 "ld1b { z0.h }, p3/Z, [x21, x16]\n"
215 ".inst 0x448245b9 // smlalt z25.s, p4/M, z13.h, z2.h\n"
216 ".inst 0x44874477 // smlalt z23.s, p4/M, z3.h, z7.h\n"
217 "ld1b { z3.h }, p3/Z, [x20, x16]\n"
218 "ldr x20, [x28, #0x70]\n"
219 ".inst 0x4486469b // smlalt z27.s, p4/M, z20.h, z6.h\n"
220 ".inst 0x44874291 // smlalb z17.s, p4/M, z20.h, z7.h\n"
221 ".inst 0x454c1800 // usublb z0.h, z0.b, z12.b\n"
222 "ld1b { z13.h }, p3/Z, [x20, x16]\n"
223 ".inst 0x44824290 // smlalb z16.s, p4/M, z20.h, z2.h\n"
224 ".inst 0x448841f6 // smlalb z22.s, p4/M, z15.h, z8.h\n"
225 ".inst 0x454c1863 // usublb z3.h, z3.b, z12.b\n"
226 "ldr x20, [x28, #0x78]\n"
227 ".inst 0x449547e9 // smlalt z9.s, p4/M, z31.h, z21.h\n"
228 ".inst 0x44814265 // smlalb z5.s, p4/M, z19.h, z1.h\n"
229 ".inst 0x454c19ad // usublb z13.h, z13.b, z12.b\n"
230 "whilelt p0.h, x27, x15\n"
231 ".inst 0x44874699 // smlalt z25.s, p4/M, z20.h, z7.h\n"
232 ".inst 0x44824697 // smlalt z23.s, p4/M, z20.h, z2.h\n"
233 "ld1w { z20.s }, p2/Z, [x26]\n"
235 ".inst 0x448845fb // smlalt z27.s, p4/M, z15.h, z8.h\n"
236 ".inst 0x448e43f1 // smlalb z17.s, p4/M, z31.h, z14.h\n"
237 "ld1w { z15.s }, p1/Z, [x26, #1, MUL VL]\n"
238 "ldr x21, [%x[params], %[offsetof_Params_bias]]\n"
239 ".inst 0x44924390 // smlalb z16.s, p4/M, z28.h, z18.h\n"
240 ".inst 0x44824396 // smlalb z22.s, p4/M, z28.h, z2.h\n"
241 "addvl x26, x26, #2\n"
242 ".inst 0x44814669 // smlalt z9.s, p4/M, z19.h, z1.h\n"
243 ".inst 0x44884385 // smlalb z5.s, p4/M, z28.h, z8.h\n"
244 ".inst 0x448e47f9 // smlalt z25.s, p4/M, z31.h, z14.h\n"
245 ".inst 0x44924797 // smlalt z23.s, p4/M, z28.h, z18.h\n"
246 "ld1b { z31.h }, p3/Z, [x20, x16]\n"
247 ".inst 0x454c1bff // usublb z31.h, z31.b, z12.b\n"
248 ".inst 0x4482479b // smlalt z27.s, p4/M, z28.h, z2.h\n"
249 ".inst 0x44954271 // smlalb z17.s, p4/M, z19.h, z21.h\n"
250 "uzp1 z2.s, z20.s, z15.s\n"
252 ".inst 0x448e4090 // smlalb z16.s, p4/M, z4.h, z14.h\n"
253 ".inst 0x448143b6 // smlalb z22.s, p4/M, z29.h, z1.h\n"
254 "uzp2 z15.s, z20.s, z15.s\n"
255 "ld1w { z20.s }, p2/Z, [x25]\n"
256 ".inst 0x44884789 // smlalt z9.s, p4/M, z28.h, z8.h\n"
257 ".inst 0x44864085 // smlalb z5.s, p4/M, z4.h, z6.h\n"
260 ".inst 0x44954679 // smlalt z25.s, p4/M, z19.h, z21.h\n"
261 ".inst 0x448e4497 // smlalt z23.s, p4/M, z4.h, z14.h\n"
262 "ld1w { z19.s }, p1/Z, [x25, #1, MUL VL]\n"
263 "uzp1 z21.s, z20.s, z19.s\n"
264 ".inst 0x448147bb // smlalt z27.s, p4/M, z29.h, z1.h\n"
265 ".inst 0x448a4391 // smlalb z17.s, p4/M, z28.h, z10.h\n"
266 "uzp2 z1.s, z20.s, z19.s\n"
267 "whilelt p2.s, x16, x15\n"
268 ".inst 0x44864010 // smlalb z16.s, p4/M, z0.h, z6.h\n"
269 ".inst 0x44924076 // smlalb z22.s, p4/M, z3.h, z18.h\n"
270 "whilelt p1.s, x20, x15\n"
271 "whilelt p3.h, x16, x15\n"
272 ".inst 0x44864489 // smlalt z9.s, p4/M, z4.h, z6.h\n"
273 ".inst 0x44874005 // smlalb z5.s, p4/M, z0.h, z7.h\n"
274 ".inst 0x04a274a5 // sqrdmulh z5.s, z5.s, z2.s\n"
275 "addvl x25, x25, #2\n"
276 ".inst 0x448a4799 // smlalt z25.s, p4/M, z28.h, z10.h\n"
277 ".inst 0x44864417 // smlalt z23.s, p4/M, z0.h, z6.h\n"
278 "and z19.d, z5.d, z21.d\n"
279 ".inst 0x4492447b // smlalt z27.s, p4/M, z3.h, z18.h\n"
280 ".inst 0x449243b1 // smlalb z17.s, p4/M, z29.h, z18.h\n"
281 "asr z19.s, z19.s, #0x1f\n"
282 ".inst 0x448a41b0 // smlalb z16.s, p4/M, z13.h, z10.h\n"
283 ".inst 0x448741b6 // smlalb z22.s, p4/M, z13.h, z7.h\n"
284 "sqadd z5.s, z5.s, z19.s\n"
285 ".inst 0x448292a5 // srshl z5.s, p4/M, z5.s, z21.s\n"
286 ".inst 0x44874409 // smlalt z9.s, p4/M, z0.h, z7.h\n"
287 ".inst 0x449247b9 // smlalt z25.s, p4/M, z29.h, z18.h\n"
288 ".inst 0x04af7529 // sqrdmulh z9.s, z9.s, z15.s\n"
289 ".inst 0x448a45b7 // smlalt z23.s, p4/M, z13.h, z10.h\n"
290 ".inst 0x448745bb // smlalt z27.s, p4/M, z13.h, z7.h\n"
291 "and z29.d, z9.d, z1.d\n"
292 ".inst 0x44884071 // smlalb z17.s, p4/M, z3.h, z8.h\n"
293 ".inst 0x448843f0 // smlalb z16.s, p4/M, z31.h, z8.h\n"
294 ".inst 0x04a27631 // sqrdmulh z17.s, z17.s, z2.s\n"
295 ".inst 0x448a43f6 // smlalb z22.s, p4/M, z31.h, z10.h\n"
296 ".inst 0x44884479 // smlalt z25.s, p4/M, z3.h, z8.h\n"
297 ".inst 0x04a27610 // sqrdmulh z16.s, z16.s, z2.s\n"
298 ".inst 0x448847f7 // smlalt z23.s, p4/M, z31.h, z8.h\n"
299 ".inst 0x448a47fb // smlalt z27.s, p4/M, z31.h, z10.h\n"
300 ".inst 0x04a276d6 // sqrdmulh z22.s, z22.s, z2.s\n"
301 "asr z29.s, z29.s, #0x1f\n"
302 "and z18.d, z17.d, z21.d\n"
303 ".inst 0x04af7739 // sqrdmulh z25.s, z25.s, z15.s\n"
304 "and z20.d, z16.d, z21.d\n"
305 ".inst 0x04af76f7 // sqrdmulh z23.s, z23.s, z15.s\n"
306 "and z19.d, z22.d, z21.d\n"
307 ".inst 0x04af777b // sqrdmulh z27.s, z27.s, z15.s\n"
308 "sqadd z9.s, z9.s, z29.s\n"
309 ".inst 0x44829029 // srshl z9.s, p4/M, z9.s, z1.s\n"
310 "asr z18.s, z18.s, #0x1f\n"
311 "and z7.d, z25.d, z1.d\n"
312 "asr z20.s, z20.s, #0x1f\n"
313 "and z6.d, z23.d, z1.d\n"
314 "asr z19.s, z19.s, #0x1f\n"
315 "and z2.d, z27.d, z1.d\n"
316 "sqadd z17.s, z17.s, z18.s\n"
317 "asr z7.s, z7.s, #0x1f\n"
318 ".inst 0x448292b1 // srshl z17.s, p4/M, z17.s, z21.s\n"
319 "sqadd z16.s, z16.s, z20.s\n"
320 "asr z6.s, z6.s, #0x1f\n"
321 ".inst 0x448292b0 // srshl z16.s, p4/M, z16.s, z21.s\n"
322 "sqadd z22.s, z22.s, z19.s\n"
323 "asr z2.s, z2.s, #0x1f\n"
324 ".inst 0x448292b6 // srshl z22.s, p4/M, z22.s, z21.s\n"
325 "sqadd z25.s, z25.s, z7.s\n"
326 "sqadd z23.s, z23.s, z6.s\n"
327 ".inst 0x44829039 // srshl z25.s, p4/M, z25.s, z1.s\n"
328 ".inst 0x44829037 // srshl z23.s, p4/M, z23.s, z1.s\n"
329 "sqadd z27.s, z27.s, z2.s\n"
330 ".inst 0x453040a5 // sqxtnb z5.h, z5.s\n"
331 ".inst 0x4482903b // srshl z27.s, p4/M, z27.s, z1.s\n"
332 ".inst 0x45304231 // sqxtnb z17.h, z17.s\n"
333 ".inst 0x45304210 // sqxtnb z16.h, z16.s\n"
334 ".inst 0x453042d6 // sqxtnb z22.h, z22.s\n"
335 ".inst 0x45304525 // sqxtnt z5.h, z9.s\n"
336 ".inst 0x45304731 // sqxtnt z17.h, z25.s\n"
337 ".inst 0x453046f0 // sqxtnt z16.h, z23.s\n"
338 ".inst 0x45304776 // sqxtnt z22.h, z27.s\n"
339 "sqadd z5.h, z5.h, z24.h\n"
340 "smax z5.h, p4/M, z5.h, z11.h\n"
341 "smin z5.h, p4/M, z5.h, z26.h\n"
342 "sqadd z17.h, z17.h, z24.h\n"
343 "sqadd z16.h, z16.h, z24.h\n"
344 "smax z17.h, p4/M, z17.h, z11.h\n"
345 "smax z16.h, p4/M, z16.h, z11.h\n"
346 "sqadd z22.h, z22.h, z24.h\n"
347 "smax z22.h, p4/M, z22.h, z11.h\n"
348 "smin z17.h, p4/M, z17.h, z26.h\n"
349 "st1b { z5.h }, p0, [x13, x27]\n"
350 "smin z16.h, p4/M, z16.h, z26.h\n"
351 "smin z22.h, p4/M, z22.h, z26.h\n"
352 "st1b { z17.h }, p0, [x12, x27]\n"
353 "st1b { z16.h }, p0, [x11, x27]\n"
354 "st1b { z22.h }, p0, [x10, x27]\n"
355 "ld1b { z14.h }, p4/Z, [x14]\n"
356 "ld1b { z21.h }, p4/Z, [x14, #1, MUL VL]\n"
358 "ld1b { z1.h }, p4/Z, [x14, #2, MUL VL]\n"
359 "ld1b { z6.h }, p4/Z, [x14, #3, MUL VL]\n"
360 ".inst 0x455e19ce // usublb z14.h, z14.b, z30.b\n"
361 ".inst 0x455e1ab5 // usublb z21.h, z21.b, z30.b\n"
362 "ld1b { z2.h }, p4/Z, [x14, #4, MUL VL]\n"
363 "ld1b { z18.h }, p4/Z, [x14, #5, MUL VL]\n"
364 ".inst 0x455e1821 // usublb z1.h, z1.b, z30.b\n"
365 ".inst 0x455e18c6 // usublb z6.h, z6.b, z30.b\n"
366 "ld1b { z7.h }, p4/Z, [x14, #6, MUL VL]\n"
367 "ld1b { z10.h }, p4/Z, [x14, #7, MUL VL]\n"
368 "inch x14, ALL, MUL #8\n"
369 ".inst 0x455e1842 // usublb z2.h, z2.b, z30.b\n"
370 "ld1w { z17.s }, p2/Z, [x21]\n"
371 "ld1w { z16.s }, p1/Z, [x21, #1, MUL VL]\n"
372 "uzp1 z5.s, z17.s, z16.s\n"
373 "uzp2 z9.s, z17.s, z16.s\n"
374 "ld1b { z8.h }, p4/Z, [x14]\n"
375 "ldp x24, x23, [x28, #0x0]\n"
376 "addvl x21, x21, #2\n"
377 "str x21, [%x[params], %[offsetof_Params_bias]]\n"
378 "ldp x22, x21, [x28, #0x10]\n"
379 "ldr x20, [x28, #0x20]\n"
382 "ld1b { z0.h }, p3/Z, [x24, x16]\n"
383 "ld1b { z29.h }, p3/Z, [x23, x16]\n"
386 "ld1b { z4.h }, p3/Z, [x22, x16]\n"
387 "ld1b { z13.h }, p3/Z, [x21, x16]\n"
390 "ld1b { z20.h }, p3/Z, [x20, x16]\n"
391 ".inst 0x455e1a52 // usublb z18.h, z18.b, z30.b\n"
392 ".inst 0x455e18e7 // usublb z7.h, z7.b, z30.b\n"
393 ".inst 0x455e194a // usublb z10.h, z10.b, z30.b\n"
394 ".inst 0x455e1908 // usublb z8.h, z8.b, z30.b\n"
395 ".inst 0x454c1800 // usublb z0.h, z0.b, z12.b\n"
396 ".inst 0x454c1bbd // usublb z29.h, z29.b, z12.b\n"
397 ".inst 0x454c1884 // usublb z4.h, z4.b, z12.b\n"
398 ".inst 0x454c19ad // usublb z13.h, z13.b, z12.b\n"
399 ".inst 0x454c1a94 // usublb z20.h, z20.b, z12.b\n"
402 : [offsetof_Params_bias]
"I" (offsetof(Params,
bias)), [offsetof_Params_inptrs]
"I" (offsetof(Params, inptrs)), [offsetof_Params_n_channels]
"I" (offsetof(Params, n_channels)), [offsetof_Params_outptrs]
"I" (offsetof(Params, outptrs)), [offsetof_Params_requant]
"I" (offsetof(Params, requant)), [offsetof_Params_requant_muls]
"I" (offsetof(Params,
requant_muls)), [offsetof_Params_requant_shifts]
"I" (offsetof(Params,
requant_shifts)), [offsetof_Params_weights]
"I" (offsetof(Params, weights)), [offsetof_Requantize32_a_offset]
"I" (offsetof(
arm_gemm::Requantize32, a_offset)), [offsetof_Requantize32_b_offset]
"I" (offsetof(
arm_gemm::Requantize32, b_offset)), [offsetof_Requantize32_c_offset]
"I" (offsetof(
arm_gemm::Requantize32, c_offset)), [offsetof_Requantize32_maxval]
"I" (offsetof(
arm_gemm::Requantize32, maxval)), [offsetof_Requantize32_minval]
"I" (offsetof(
arm_gemm::Requantize32, minval)), [params]
"r" (¶ms)
403 :
"cc",
"memory",
"p0",
"p1",
"p2",
"p3",
"p4",
"x9",
"x10",
"x11",
"x12",
"x13",
"x14",
"x15",
"x16",
"x20",
"x21",
"x22",
"x23",
"x24",
"x25",
"x26",
"x27",
"x28",
"z0",
"z1",
"z2",
"z3",
"z4",
"z5",
"z6",
"z7",
"z8",
"z9",
"z10",
"z11",
"z12",
"z13",
"z14",
"z15",
"z16",
"z17",
"z18",
"z19",
"z20",
"z21",
"z22",
"z23",
"z24",
"z25",
"z26",
"z27",
"z28",
"z29",
"z30",
"z31"
410 #endif // defined(ARM_COMPUTE_ENABLE_SVE)