36 #if defined(ENABLE_FP32_KERNELS)
39 static constexpr
size_t default_min_max_mws_N1_fp32_neon = 25308;
40 static constexpr
size_t default_min_max_mws_V1_fp32_neon = 34772;
41 static constexpr
size_t default_div_mws_N1_fp32_neon = 19043;
42 static constexpr
size_t default_div_mws_V1_fp32_neon = 25511;
54 template <ArithmeticOperation op>
55 const std::vector<CpuElementwiseKernel<CpuArithmeticKernel>::ElementwiseKernel> available_kernels_arithmetic = {
56 {
"sve2_qu8_arithmetic",
57 [](
const ElementwiseDataTypeISASelectorData &data)
60 {
"sve2_qs8_arithmetic",
61 [](
const ElementwiseDataTypeISASelectorData &data) {
65 {
"sve_fp32_arithmetic",
66 [](
const ElementwiseDataTypeISASelectorData &data)
69 {
"sve_s32_arithmetic",
70 [](
const ElementwiseDataTypeISASelectorData &data)
73 {
"sve_s16_arithmetic",
74 [](
const ElementwiseDataTypeISASelectorData &data)
77 {
"sve_fp16_arithmetic",
78 [](
const ElementwiseDataTypeISASelectorData &data)
80 return data.dt ==
DataType::F16 && data.isa.sve && data.isa.fp16 &&
84 {
"neon_fp32_arithmetic",
86 [](
const ElementwiseDataTypeISASelectorData &data)
89 {
"neon_s32_arithmetic",
90 [](
const ElementwiseDataTypeISASelectorData &data)
93 {
"neon_fp16_arithmetic",
94 [](
const ElementwiseDataTypeISASelectorData &data)
97 {
"neon_s16_arithmetic",
98 [](
const ElementwiseDataTypeISASelectorData &data)
101 {
"neon_qu8_arithmetic",
102 [](
const ElementwiseDataTypeISASelectorData &data)
105 {
"neon_qs8_arithmetic",
106 [](
const ElementwiseDataTypeISASelectorData &data)
110 template <ComparisonOperation op>
111 const std::vector<CpuElementwiseKernel<CpuComparisonKernel>::ElementwiseKernel> available_kernels_comperison = {
112 {
"sve2_qu8_comparison",
113 [](
const ElementwiseDataTypeISASelectorData &data)
116 {
"sve2_qs8_comparison",
117 [](
const ElementwiseDataTypeISASelectorData &data) {
121 {
"sve_u8_comparison",
122 [](
const ElementwiseDataTypeISASelectorData &data)
125 {
"sve_fp32_comparison",
126 [](
const ElementwiseDataTypeISASelectorData &data)
129 {
"sve_s16_comparison",
130 [](
const ElementwiseDataTypeISASelectorData &data)
133 {
"sve_s32_comparison",
134 [](
const ElementwiseDataTypeISASelectorData &data)
137 {
"sve_fp16_comparison",
138 [](
const ElementwiseDataTypeISASelectorData &data)
140 return data.dt ==
DataType::F16 && data.isa.sve && data.isa.fp16 &&
144 {
"neon_u8_comparison",
145 [](
const ElementwiseDataTypeISASelectorData &data)
148 {
"neon_fp32_comparison",
149 [](
const ElementwiseDataTypeISASelectorData &data)
152 {
"neon_s16_comparison",
153 [](
const ElementwiseDataTypeISASelectorData &data)
156 {
"neon_s32_comparison",
157 [](
const ElementwiseDataTypeISASelectorData &data)
160 {
"neon_qu8_comparison",
161 [](
const ElementwiseDataTypeISASelectorData &data)
164 {
"neon_qs8_comparison",
165 [](
const ElementwiseDataTypeISASelectorData &data)
168 {
"neon_fp16_comparison",
169 [](
const ElementwiseDataTypeISASelectorData &data)
175 const std::vector<CpuElementwiseKernel<CpuArithmeticKernel>::ElementwiseKernel> &
178 static std::vector<CpuElementwiseKernel<CpuArithmeticKernel>::ElementwiseKernel> available_kernels;
179 std::move(available_kernels_arithmetic<ArithmeticOperation::ADD>.begin(),
180 available_kernels_arithmetic<ArithmeticOperation::ADD>.
end(), std::back_inserter(available_kernels));
181 std::move(available_kernels_arithmetic<ArithmeticOperation::SUB>.begin(),
182 available_kernels_arithmetic<ArithmeticOperation::SUB>.
end(), std::back_inserter(available_kernels));
183 std::move(available_kernels_arithmetic<ArithmeticOperation::DIV>.begin(),
184 available_kernels_arithmetic<ArithmeticOperation::DIV>.
end(), std::back_inserter(available_kernels));
185 std::move(available_kernels_arithmetic<ArithmeticOperation::MIN>.begin(),
186 available_kernels_arithmetic<ArithmeticOperation::MIN>.
end(), std::back_inserter(available_kernels));
187 std::move(available_kernels_arithmetic<ArithmeticOperation::MAX>.begin(),
188 available_kernels_arithmetic<ArithmeticOperation::MAX>.
end(), std::back_inserter(available_kernels));
189 std::move(available_kernels_arithmetic<ArithmeticOperation::SQUARED_DIFF>.begin(),
190 available_kernels_arithmetic<ArithmeticOperation::SQUARED_DIFF>.
end(),
191 std::back_inserter(available_kernels));
192 std::move(available_kernels_arithmetic<ArithmeticOperation::POWER>.begin(),
193 available_kernels_arithmetic<ArithmeticOperation::POWER>.
end(), std::back_inserter(available_kernels));
194 std::move(available_kernels_arithmetic<ArithmeticOperation::PRELU>.begin(),
195 available_kernels_arithmetic<ArithmeticOperation::PRELU>.
end(), std::back_inserter(available_kernels));
197 return available_kernels;
200 const std::vector<CpuElementwiseKernel<CpuComparisonKernel>::ElementwiseKernel> &
203 static std::vector<CpuElementwiseKernel<CpuComparisonKernel>::ElementwiseKernel> available_kernels;
204 std::move(available_kernels_comperison<ComparisonOperation::Equal>.begin(),
205 available_kernels_comperison<ComparisonOperation::Equal>.
end(), std::back_inserter(available_kernels));
206 std::move(available_kernels_comperison<ComparisonOperation::NotEqual>.begin(),
207 available_kernels_comperison<ComparisonOperation::NotEqual>.
end(), std::back_inserter(available_kernels));
208 std::move(available_kernels_comperison<ComparisonOperation::Greater>.begin(),
209 available_kernels_comperison<ComparisonOperation::Greater>.
end(), std::back_inserter(available_kernels));
210 std::move(available_kernels_comperison<ComparisonOperation::GreaterEqual>.begin(),
211 available_kernels_comperison<ComparisonOperation::GreaterEqual>.
end(),
212 std::back_inserter(available_kernels));
213 std::move(available_kernels_comperison<ComparisonOperation::Less>.begin(),
214 available_kernels_comperison<ComparisonOperation::Less>.
end(), std::back_inserter(available_kernels));
215 std::move(available_kernels_comperison<ComparisonOperation::LessEqual>.begin(),
216 available_kernels_comperison<ComparisonOperation::LessEqual>.
end(),
217 std::back_inserter(available_kernels));
219 return available_kernels;
222 template <
class Derived>
235 if (
dst.total_size() > 0)
238 "Wrong shape for output");
244 void CpuArithmeticKernel::configure_common(
const ITensorInfo *src0,
const ITensorInfo *src1, ITensorInfo *
dst)
249 ElementwiseDataTypeISASelectorData{src0->data_type(),
CPUInfo::get().get_isa(),
static_cast<int>(_op)});
253 _run_method = uk->ukernel;
254 _name = std::string(
"CpuArithmeticKernel").append(
"/").append(uk->name);
257 if (src0->is_dynamic() || src1->is_dynamic())
262 auto shape_and_window = compute_output_shape_and_window(src0->tensor_shape(), src1->tensor_shape());
264 ICpuKernel::configure(shape_and_window.second);
267 void CpuComparisonKernel::configure_common(
const ITensorInfo *src0,
const ITensorInfo *src1, ITensorInfo *
dst)
272 ElementwiseDataTypeISASelectorData{src0->data_type(),
CPUInfo::get().get_isa(),
static_cast<int>(_op)});
276 _run_method = uk->ukernel;
277 _name = std::string(
"CpuComparisonKernel").append(
"/").append(uk->name);
280 if (src0->is_dynamic() || src1->is_dynamic())
285 auto shape_and_window = compute_output_shape_and_window(src0->tensor_shape(), src1->tensor_shape());
287 ICpuKernel::configure(shape_and_window.second);
290 template <
class Derived>
300 _run_method(src0, src1,
dst, window);
307 template <
class Derived>
310 return _name.c_str();
323 CpuArithmeticKernel::configure_common(src0, src1,
dst);
331 if (
dst.total_size() > 0)
335 return validate_arguments_common(src0, src1,
dst);
353 #if defined(ENABLE_FP32_KERNELS)
354 if (this->_run_method == &neon_fp32_elementwise_binary<ArithmeticOperation::MIN> ||
355 this->_run_method == &neon_fp32_elementwise_binary<ArithmeticOperation::MAX>)
360 mws = default_min_max_mws_N1_fp32_neon;
364 mws = default_min_max_mws_V1_fp32_neon;
382 return std::max(
static_cast<size_t>(1), mws);
397 CpuArithmeticKernel::configure_common(src0, src1,
dst);
404 #if defined(ENABLE_FP32_KERNELS)
405 if (this->_run_method == &neon_fp32_elementwise_binary<ArithmeticOperation::DIV>)
410 mws = default_div_mws_N1_fp32_neon;
414 mws = default_div_mws_V1_fp32_neon;
432 return std::max(
static_cast<size_t>(1), mws);
444 return CpuArithmeticKernel::validate_arguments(src0, src1,
dst);
459 CpuArithmeticKernel::configure_common(src0, src1,
dst);
465 return CpuArithmeticKernel::validate_arguments(src0, src1,
dst);
483 CpuComparisonKernel::configure_common(src0, src1,
dst);
492 if (
dst.total_size() > 0)
496 return validate_arguments_common(src0, src1,
dst);