35 #if defined(__aarch64__)
36 #if defined(ARM_COMPUTE_ENABLE_SVE)
37 #if defined(ARM_COMPUTE_ENABLE_SME2)
42 #endif // defined(ARM_COMPUTE_ENABLE_SME2)
51 #endif // defined(ARM_COMPUTE_ENABLE_SVE)
61 #endif // defined(__aarch64__)
72 #if defined(__aarch64__)
73 bool qp_weights_are_symmetric(
const DepthwiseArgs &,
const void *_qp)
79 uint64_t not_preferred(
const DepthwiseArgs &,
const Requantize32 &)
81 return std::numeric_limits<uint64_t>::max();
83 #endif // defined(__aarch64__)
86 static const DepthwiseImplementation<int8_t, int8_t, int8_t, Requantize32> depthwise_s8q_methods[] = {
87 #if defined(__aarch64__)
88 #if defined(ARM_COMPUTE_ENABLE_SVE)
89 #if defined(ARM_COMPUTE_ENABLE_SME2)
91 DepthwiseMethod::PLANAR,
92 "sme2_s8q_planar_3x3_s1_4rows_dot_za",
93 constraint<Requantize32>(cpu_has_sme, cpu_has_sme2,
94 is_supported<sme2_s8q_planar_3x3_s1_4rows_dot_za>,
95 has_no_channel_multiplier,
96 qp_has_no_left_shift, no_prime_right_pad),
98 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
99 auto strat =
new sme2_s8q_planar_3x3_s1_4rows_dot_za(
args.cpu_info);
100 return new DepthwisePlanar<int8_t>(strat,
args, qp);
104 DepthwiseMethod::PLANAR,
105 "sme2_s8q_planar_3x3_s2_4rows_dot_za",
106 constraint<Requantize32>(cpu_has_sme, cpu_has_sme2,
107 is_supported<sme2_s8q_planar_3x3_s2_4rows_dot_za>,
108 has_no_channel_multiplier,
109 qp_has_no_left_shift, no_prime_right_pad),
111 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
112 auto strat =
new sme2_s8q_planar_3x3_s2_4rows_dot_za(
args.cpu_info);
113 return new DepthwisePlanar<int8_t>(strat,
args, qp);
117 DepthwiseMethod::PLANAR,
118 "sme2_s8q_planar_5x5_s1_4rows_dot_za",
119 constraint<Requantize32>(cpu_has_sme, cpu_has_sme2,
120 is_supported<sme2_s8q_planar_5x5_s1_4rows_dot_za>,
121 has_no_channel_multiplier,
122 qp_has_no_left_shift, no_prime_right_pad),
124 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
125 auto strat =
new sme2_s8q_planar_5x5_s1_4rows_dot_za(
args.cpu_info);
126 return new DepthwisePlanar<int8_t>(strat,
args, qp);
130 DepthwiseMethod::PLANAR,
131 "sme2_s8q_planar_5x5_s2_4rows_dot_za",
132 constraint<Requantize32>(cpu_has_sme, cpu_has_sme2,
133 is_supported<sme2_s8q_planar_5x5_s2_4rows_dot_za>,
134 has_no_channel_multiplier,
135 qp_has_no_left_shift, no_prime_right_pad),
137 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
138 auto strat =
new sme2_s8q_planar_5x5_s2_4rows_dot_za(
args.cpu_info);
139 return new DepthwisePlanar<int8_t>(strat,
args, qp);
142 #endif // defined(ARM_COMPUTE_ENABLE_SME2)
144 DepthwiseMethod::DEPTHFIRST,
145 "sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst",
146 constraint<Requantize32>(is_supported<sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst>,
147 qp_has_no_left_shift,
148 qp_weights_are_symmetric,
151 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
152 auto strat =
new sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst(
args.cpu_info);
153 return new DepthwiseDepthfirst<int8_t>(strat,
args, qp);
157 DepthwiseMethod::DEPTHFIRST,
158 "sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst",
159 constraint<Requantize32>(is_supported<sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst>,
160 qp_has_no_left_shift,
163 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
164 auto strat =
new sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst(
args.cpu_info);
165 return new DepthwiseDepthfirst<int8_t>(strat,
args, qp);
169 DepthwiseMethod::DEPTHFIRST,
170 "sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst",
171 constraint<Requantize32>(is_supported<sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst>,
172 qp_has_no_left_shift,
175 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
176 auto strat =
new sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst(
args.cpu_info);
177 return new DepthwiseDepthfirst<int8_t>(strat,
args, qp);
181 DepthwiseMethod::DEPTHFIRST,
182 "sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst",
183 constraint<Requantize32>(is_supported<sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst>,
184 qp_has_no_left_shift,
187 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
188 auto strat =
new sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst(
args.cpu_info);
189 return new DepthwiseDepthfirst<int8_t>(strat,
args, qp);
193 DepthwiseMethod::DEPTHFIRST,
194 "sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst",
195 constraint<Requantize32>(is_supported<sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst>,
196 qp_has_no_left_shift,
199 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
200 auto strat =
new sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst(
args.cpu_info);
201 return new DepthwiseDepthfirst<int8_t>(strat,
args, qp);
205 DepthwiseMethod::DEPTHFIRST,
206 "sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst",
207 constraint<Requantize32>(is_supported<sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst>,
208 qp_has_no_left_shift,
209 has_channel_multiplier,
212 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
213 auto strat =
new sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst(
args.cpu_info);
214 return new DepthwiseDepthfirstMultiplier<int8_t, int8_t, int8_t, int32_t, false>(strat,
args, qp);
218 DepthwiseMethod::DEPTHFIRST,
219 "sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst",
220 constraint<Requantize32>(is_supported<sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst>,
221 qp_has_no_left_shift,
222 has_channel_multiplier,
225 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
226 auto strat =
new sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst(
args.cpu_info);
227 return new DepthwiseDepthfirstMultiplier<int8_t, int8_t, int8_t, int32_t, false>(strat,
args, qp);
230 #endif // defined(ARM_COMPUTE_ENABLE_SVE)
232 DepthwiseMethod::DEPTHFIRST,
233 "a64_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst",
234 constraint<Requantize32>(is_supported<a64_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst>,
235 qp_weights_are_symmetric,
236 qp_has_no_left_shift,
237 cpu_has_dot_product),
239 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
240 auto strat =
new a64_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst(
args.cpu_info);
241 return new DepthwiseDepthfirst<int8_t>(strat,
args, qp);
245 DepthwiseMethod::DEPTHFIRST,
246 "a64_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst",
247 constraint<Requantize32>(is_supported<a64_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst>,
248 qp_has_no_left_shift,
249 cpu_has_dot_product),
251 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
252 auto strat =
new a64_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst(
args.cpu_info);
253 return new DepthwiseDepthfirst<int8_t>(strat,
args, qp);
257 DepthwiseMethod::DEPTHFIRST,
258 "a64_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst",
259 constraint<Requantize32>(is_supported<a64_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst>,
260 qp_has_no_left_shift),
262 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
263 auto strat =
new a64_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst(
args.cpu_info);
264 return new DepthwiseDepthfirst<int8_t>(strat,
args, qp);
268 DepthwiseMethod::DEPTHFIRST,
269 "a64_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst",
270 constraint<Requantize32>(is_supported<a64_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst>,
271 qp_has_no_left_shift),
273 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
274 auto strat =
new a64_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst(
args.cpu_info);
275 return new DepthwiseDepthfirst<int8_t>(strat,
args, qp);
279 DepthwiseMethod::DEPTHFIRST,
280 "a64_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst",
281 constraint<Requantize32>(is_supported<a64_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst>,
282 qp_has_no_left_shift),
284 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
285 auto strat =
new a64_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst(
args.cpu_info);
286 return new DepthwiseDepthfirst<int8_t>(strat,
args, qp);
290 DepthwiseMethod::DEPTHFIRST,
291 "a64_s8q_nhwc_generic_output3x3_mla_depthfirst",
294 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
295 auto kernel =
new a64_s8q_nhwc_generic_output9_mla_depthfirst(
args.cpu_info);
296 auto strat =
new GenericDepthfirstStrategy<int8_t>(kernel, 3, 3,
args);
297 return new DepthwiseDepthfirstGeneric<int8_t>(strat,
args, qp);
301 DepthwiseMethod::DEPTHFIRST,
302 "a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst",
303 constraint<Requantize32>(is_supported<a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst>,
304 qp_has_no_left_shift,
305 has_channel_multiplier,
306 cpu_has_dot_product),
308 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
309 auto strat =
new a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst(
args.cpu_info);
310 return new DepthwiseDepthfirstMultiplier<int8_t, int8_t, int8_t, int32_t, false>(strat,
args, qp);
314 DepthwiseMethod::DEPTHFIRST,
315 "a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst",
316 constraint<Requantize32>(is_supported<a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst>,
317 qp_has_no_left_shift,
318 has_channel_multiplier,
319 cpu_has_dot_product),
321 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
322 auto strat =
new a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst(
args.cpu_info);
323 return new DepthwiseDepthfirstMultiplier<int8_t, int8_t, int8_t, int32_t, false>(strat,
args, qp);
327 DepthwiseMethod::DEPTHFIRST,
328 "a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst",
329 constraint<Requantize32>(has_channel_multiplier),
331 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
332 auto kern =
new a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst(
args.cpu_info);
333 auto strat =
new GenericDepthfirstMultiplierStrategy<int8_t>(kern,
args);
334 return new DepthwiseDepthfirstMultiplier<int8_t, int8_t, int8_t, int32_t, true>(strat,
args, qp);
337 #endif // defined(__aarch64__)
338 { DepthwiseMethod::DEFAULT,
"",
nullptr,
nullptr,
nullptr },
344 return depthwise_s8q_methods;
347 template UniqueDepthwiseCommon<int8_t, int8_t, int8_t>
depthwise(
const DepthwiseArgs &,
const Requantize32 &);