35 #if defined(__aarch64__)
36 #if defined(ARM_COMPUTE_ENABLE_SVE)
37 #if defined(ARM_COMPUTE_ENABLE_SME2)
42 #endif // defined(ARM_COMPUTE_ENABLE_SME2)
46 #endif // defined(ARM_COMPUTE_ENABLE_SVE)
52 #endif // defined(__aarch64__)
63 #if defined(__aarch64__)
64 uint64_t not_preferred(
const DepthwiseArgs &,
const Requantize32 &)
66 return std::numeric_limits<uint64_t>::max();
68 #endif // defined(__aarch64__)
71 static const DepthwiseImplementation<uint8_t, int8_t, uint8_t, Requantize32> depthwise_u8q_methods[] = {
72 #if defined(__aarch64__)
73 #if defined(ARM_COMPUTE_ENABLE_SVE)
74 #if defined(ARM_COMPUTE_ENABLE_SME2)
76 DepthwiseMethod::PLANAR,
77 "sme2_u8s8u8q_planar_3x3_s1_4rows_dot_za",
78 constraint<Requantize32>(cpu_has_sme, cpu_has_sme2,
79 is_supported<sme2_u8s8u8q_planar_3x3_s1_4rows_dot_za>,
80 has_no_channel_multiplier,
81 qp_has_no_left_shift),
83 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<uint8_t, int8_t, uint8_t> * {
84 auto strat =
new sme2_u8s8u8q_planar_3x3_s1_4rows_dot_za(
args.cpu_info);
85 return new DepthwisePlanar<uint8_t, int8_t>(strat,
args, qp);
89 DepthwiseMethod::PLANAR,
90 "sme2_u8s8u8q_planar_3x3_s2_4rows_dot_za",
91 constraint<Requantize32>(cpu_has_sme, cpu_has_sme2,
92 is_supported<sme2_u8s8u8q_planar_3x3_s2_4rows_dot_za>,
93 has_no_channel_multiplier,
94 qp_has_no_left_shift),
96 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<uint8_t, int8_t, uint8_t> * {
97 auto strat =
new sme2_u8s8u8q_planar_3x3_s2_4rows_dot_za(
args.cpu_info);
98 return new DepthwisePlanar<uint8_t, int8_t>(strat,
args, qp);
102 DepthwiseMethod::PLANAR,
103 "sme2_u8s8u8q_planar_5x5_s1_4rows_dot_za",
104 constraint<Requantize32>(cpu_has_sme, cpu_has_sme2,
105 is_supported<sme2_u8s8u8q_planar_5x5_s1_4rows_dot_za>,
106 has_no_channel_multiplier,
107 qp_has_no_left_shift),
109 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<uint8_t, int8_t, uint8_t> * {
110 auto strat =
new sme2_u8s8u8q_planar_5x5_s1_4rows_dot_za(
args.cpu_info);
111 return new DepthwisePlanar<uint8_t, int8_t>(strat,
args, qp);
115 DepthwiseMethod::PLANAR,
116 "sme2_u8s8u8q_planar_5x5_s2_4rows_dot_za",
117 constraint<Requantize32>(cpu_has_sme, cpu_has_sme2,
118 is_supported<sme2_u8s8u8q_planar_5x5_s2_4rows_dot_za>,
119 has_no_channel_multiplier,
120 qp_has_no_left_shift),
122 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<uint8_t, int8_t, uint8_t> * {
123 auto strat =
new sme2_u8s8u8q_planar_5x5_s2_4rows_dot_za(
args.cpu_info);
124 return new DepthwisePlanar<uint8_t, int8_t>(strat,
args, qp);
127 #endif // defined(ARM_COMPUTE_ENABLE_SME2)
129 DepthwiseMethod::DEPTHFIRST,
130 "sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst",
131 constraint<Requantize32>(is_supported<sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst>,
132 qp_has_no_left_shift,
135 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<uint8_t, int8_t, uint8_t> * {
136 auto strat =
new sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst(
args.cpu_info);
137 return new DepthwiseDepthfirst<uint8_t, int8_t>(strat,
args, qp);
141 DepthwiseMethod::DEPTHFIRST,
142 "sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst",
143 constraint<Requantize32>(is_supported<sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst>,
144 qp_has_no_left_shift,
147 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<uint8_t, int8_t, uint8_t> * {
148 auto strat =
new sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst(
args.cpu_info);
149 return new DepthwiseDepthfirst<uint8_t, int8_t>(strat,
args, qp);
153 DepthwiseMethod::DEPTHFIRST,
154 "sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst",
155 constraint<Requantize32>(is_supported<sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst>,
156 qp_has_no_left_shift,
159 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<uint8_t, int8_t, uint8_t> * {
160 auto strat =
new sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst(
args.cpu_info);
161 return new DepthwiseDepthfirst<uint8_t, int8_t>(strat,
args, qp);
164 #endif // defined(ARM_COMPUTE_ENABLE_SVE)
166 DepthwiseMethod::DEPTHFIRST,
167 "a64_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst",
168 constraint<Requantize32>(is_supported<a64_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst>,
169 qp_has_no_left_shift),
171 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<uint8_t, int8_t, uint8_t> * {
172 auto strat =
new a64_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst(
args.cpu_info);
173 return new DepthwiseDepthfirst<uint8_t, int8_t>(strat,
args, qp);
177 DepthwiseMethod::DEPTHFIRST,
178 "a64_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst",
179 constraint<Requantize32>(is_supported<a64_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst>,
180 qp_has_no_left_shift),
182 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<uint8_t, int8_t, uint8_t> * {
183 auto strat =
new a64_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst(
args.cpu_info);
184 return new DepthwiseDepthfirst<uint8_t, int8_t>(strat,
args, qp);
188 DepthwiseMethod::DEPTHFIRST,
189 "a64_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst",
190 constraint<Requantize32>(is_supported<a64_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst>,
191 qp_has_no_left_shift),
193 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<uint8_t, int8_t, uint8_t> * {
194 auto strat =
new a64_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst(
args.cpu_info);
195 return new DepthwiseDepthfirst<uint8_t, int8_t>(strat,
args, qp);
199 DepthwiseMethod::DEPTHFIRST,
200 "a64_u8s8u8q_nhwc_generic_output3x3_mla_depthfirst",
203 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<uint8_t, int8_t, uint8_t> * {
204 auto kernel =
new a64_u8s8u8q_nhwc_generic_output9_mla_depthfirst(
args.cpu_info);
205 auto strat =
new GenericDepthfirstStrategy<uint8_t, int8_t>(kernel, 3, 3,
args);
206 return new DepthwiseDepthfirstGeneric<uint8_t, int8_t>(strat,
args, qp);
210 DepthwiseMethod::DEPTHFIRST,
211 "a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst",
212 constraint<Requantize32>(has_channel_multiplier),
214 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<uint8_t, int8_t, uint8_t> * {
215 auto kern =
new a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst(
args.cpu_info);
216 auto strat =
new GenericDepthfirstMultiplierStrategy<uint8_t, int8_t>(kern,
args);
217 return new DepthwiseDepthfirstMultiplier<uint8_t, int8_t, uint8_t, int32_t, true>(strat,
args, qp);
220 #endif // defined(__aarch64__)
221 { DepthwiseMethod::DEFAULT,
"",
nullptr,
nullptr,
nullptr },
227 return depthwise_u8q_methods;
230 template UniqueDepthwiseCommon<uint8_t, int8_t, uint8_t>
depthwise(
const DepthwiseArgs &,
const Requantize32 &);