35 #if defined(__aarch64__) 36 #if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) 44 #endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) 54 #endif // defined(__aarch64__) 66 bool qp_weights_are_symmetric(
const DepthwiseArgs &,
const void *_qp)
74 static const DepthwiseImplementation<int8_t, int8_t, int8_t, Requantize32> depthwise_s8q_methods[] = {
75 #if defined(__aarch64__) 76 #if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) 78 DepthwiseMethod::DEPTHFIRST,
79 "sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst",
80 constraint<Requantize32>(is_supported<sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst>,
81 has_no_channel_multiplier,
83 qp_weights_are_symmetric,
86 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
87 return new DepthwiseDepthfirstQuantized<sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst>(
args, qp);
91 DepthwiseMethod::DEPTHFIRST,
92 "sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst",
93 constraint<Requantize32>(is_supported<sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst>,
94 has_no_channel_multiplier,
98 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
99 return new DepthwiseDepthfirstQuantized<sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst>(
args, qp);
103 DepthwiseMethod::DEPTHFIRST,
104 "sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst",
105 constraint<Requantize32>(is_supported<sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst>,
106 has_no_channel_multiplier,
107 qp_has_no_left_shift,
110 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
111 return new DepthwiseDepthfirstQuantized<sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst>(
args, qp);
115 DepthwiseMethod::DEPTHFIRST,
116 "sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst",
117 constraint<Requantize32>(is_supported<sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst>,
118 has_no_channel_multiplier,
119 qp_has_no_left_shift,
122 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
123 return new DepthwiseDepthfirstQuantized<sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst>(
args, qp);
127 DepthwiseMethod::DEPTHFIRST,
128 "sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst",
129 constraint<Requantize32>(is_supported<sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst>,
130 has_no_channel_multiplier,
131 qp_has_no_left_shift,
134 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
135 return new DepthwiseDepthfirstQuantized<sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst>(
args, qp);
139 DepthwiseMethod::DEPTHFIRST,
140 "sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst",
141 constraint<Requantize32>(is_supported<sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst>,
142 qp_has_no_left_shift,
145 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
146 return new DepthwiseDepthfirstWithMultiplierQuantized<sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst>(
args, qp);
150 DepthwiseMethod::DEPTHFIRST,
151 "sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst",
152 constraint<Requantize32>(is_supported<sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst>,
153 qp_has_no_left_shift,
156 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
157 return new DepthwiseDepthfirstWithMultiplierQuantized<sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst>(
args, qp);
160 #endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) 162 DepthwiseMethod::DEPTHFIRST,
163 "a64_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst",
164 constraint<Requantize32>(is_supported<a64_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst>,
165 has_no_channel_multiplier,
166 qp_weights_are_symmetric,
167 qp_has_no_left_shift,
168 cpu_has_dot_product),
170 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
171 return new DepthwiseDepthfirstQuantized<a64_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst>(
args, qp);
175 DepthwiseMethod::DEPTHFIRST,
176 "a64_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst",
177 constraint<Requantize32>(is_supported<a64_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst>,
178 has_no_channel_multiplier,
179 qp_has_no_left_shift,
180 cpu_has_dot_product),
182 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
183 return new DepthwiseDepthfirstQuantized<a64_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst>(
args, qp);
187 DepthwiseMethod::DEPTHFIRST,
188 "a64_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst",
189 constraint<Requantize32>(is_supported<a64_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst>,
190 has_no_channel_multiplier,
191 qp_has_no_left_shift),
193 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
194 return new DepthwiseDepthfirstQuantized<a64_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst>(
args, qp);
198 DepthwiseMethod::DEPTHFIRST,
199 "a64_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst",
200 constraint<Requantize32>(is_supported<a64_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst>,
201 has_no_channel_multiplier,
202 qp_has_no_left_shift),
204 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
205 return new DepthwiseDepthfirstQuantized<a64_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst>(
args, qp);
209 DepthwiseMethod::DEPTHFIRST,
210 "a64_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst",
211 constraint<Requantize32>(is_supported<a64_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst>,
212 has_no_channel_multiplier,
213 qp_has_no_left_shift),
215 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
216 return new DepthwiseDepthfirstQuantized<a64_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst>(
args, qp);
220 DepthwiseMethod::DEPTHFIRST,
221 "a64_s8q_nhwc_generic_output3x3_mla_depthfirst",
222 constraint<Requantize32>(has_no_channel_multiplier),
224 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
225 return new DepthwiseDepthfirstGenericQuantized<a64_s8q_nhwc_generic_output9_mla_depthfirst, 3, 3>(
args, qp);
229 DepthwiseMethod::DEPTHFIRST,
230 "a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst",
231 constraint<Requantize32>(is_supported<a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst>,
232 qp_has_no_left_shift,
233 cpu_has_dot_product),
235 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
236 return new DepthwiseDepthfirstWithMultiplierQuantized<a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst>(
args, qp);
240 DepthwiseMethod::DEPTHFIRST,
241 "a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst",
242 constraint<Requantize32>(is_supported<a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst>,
243 qp_has_no_left_shift,
244 cpu_has_dot_product),
246 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
247 return new DepthwiseDepthfirstWithMultiplierQuantized<a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst>(
args, qp);
251 DepthwiseMethod::DEPTHFIRST,
252 "a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst",
255 [] (
const DepthwiseArgs &
args,
const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
256 return new DepthwiseDepthfirstGenericWithMultiplierQuantized<a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst>(
args, qp);
259 #endif // defined(__aarch64__) 260 { DepthwiseMethod::DEFAULT,
"",
nullptr,
nullptr,
nullptr },
266 return depthwise_s8q_methods;
269 template UniqueDepthwiseCommon<int8_t, int8_t, int8_t>
depthwise(
const DepthwiseArgs &,
const Requantize32 &);
const DepthwiseImplementation< float > * depthwise_implementation_list()
template std::vector< KernelDescription > get_compatible_kernels< int8_t, int8_t, int8_t, Requantize32 >(const DepthwiseArgs &, const Requantize32 &)
template UniqueDepthwiseCommon< float > depthwise(const DepthwiseArgs &, const Nothing &)
template UniqueDepthwiseCommon< int8_t, int8_t, int8_t > depthwise(const DepthwiseArgs &, const Requantize32 &)