24.04
|
Go to the documentation of this file.
38 #if defined(ENABLE_FP32_KERNELS)
41 static constexpr
size_t default_mws_N1_fp32_neon = 24536;
42 static constexpr
size_t default_mws_V1_fp32_neon = 40510;
54 static const std::vector<CpuAddKernel::AddKernel> available_kernels = {
55 {
"neon_qu8_add_fixedpoint",
56 [](
const CpuAddKernelDataTypeISASelectorData &data)
59 {
"neon_qs8_add_fixedpoint",
60 [](
const CpuAddKernelDataTypeISASelectorData &data)
64 [](
const CpuAddKernelDataTypeISASelectorData &data) {
return (data.dt ==
DataType::QASYMM8) && data.isa.sve2; },
67 [](
const CpuAddKernelDataTypeISASelectorData &data)
71 [](
const CpuAddKernelDataTypeISASelectorData &data) {
return (data.dt ==
DataType::QSYMM16) && data.isa.sve2; },
74 [](
const CpuAddKernelDataTypeISASelectorData &data) {
return (data.dt ==
DataType::F32) && data.isa.sve; },
77 [](
const CpuAddKernelDataTypeISASelectorData &data)
78 {
return (data.dt ==
DataType::F16) && data.isa.sve && data.isa.fp16; },
81 [](
const CpuAddKernelDataTypeISASelectorData &data) {
return (data.dt ==
DataType::U8) && data.isa.sve; },
84 [](
const CpuAddKernelDataTypeISASelectorData &data) {
return (data.dt ==
DataType::S16) && data.isa.sve; },
87 [](
const CpuAddKernelDataTypeISASelectorData &data) {
return (data.dt ==
DataType::S32) && data.isa.sve; },
89 {
"neon_fp32_add", [](
const CpuAddKernelDataTypeISASelectorData &data) {
return (data.dt ==
DataType::F32); },
92 [](
const CpuAddKernelDataTypeISASelectorData &data) {
return (data.dt ==
DataType::F16) && data.isa.fp16; },
94 {
"neon_u8_add", [](
const CpuAddKernelDataTypeISASelectorData &data) {
return (data.dt ==
DataType::U8); },
96 {
"neon_s16_add", [](
const CpuAddKernelDataTypeISASelectorData &data) {
return (data.dt ==
DataType::S16); },
98 {
"neon_s32_add", [](
const CpuAddKernelDataTypeISASelectorData &data) {
return (data.dt ==
DataType::S32); },
100 {
"neon_qu8_add", [](
const CpuAddKernelDataTypeISASelectorData &data) {
return (data.dt ==
DataType::QASYMM8); },
105 {
"neon_qs16_add", [](
const CpuAddKernelDataTypeISASelectorData &data) {
return (data.dt ==
DataType::QSYMM16); },
123 (src0.tensor_shape().x() != src1.tensor_shape().x()) &&
124 ((src0.data_type() != src1.data_type()) || (src0.data_type() !=
dst.data_type()) ||
125 (src1.data_type() !=
dst.data_type())),
126 "Broadcasting across width is supported on configurations where all tensors have the same data type");
129 if (
dst.total_size() > 0)
133 "Wrong shape for dst");
137 const auto uk = CpuAddKernel::get_implementation<CpuAddKernelDataTypeISASelectorData>(
138 CpuAddKernelDataTypeISASelectorData{src0.data_type(),
CPUInfo::get().get_isa(), can_use_fixedpoint});
151 const auto uk = CpuAddKernel::get_implementation<CpuAddKernelDataTypeISASelectorData>(
157 _run_method = uk->ukernel;
158 _name = std::string(
"CpuAddKernel").append(
"/").append(uk->name);
169 ICpuKernel::configure(win);
195 _run_method(src0, src1,
dst, _policy,
window);
200 return _name.c_str();
205 return available_kernels;
212 #if defined(ENABLE_FP32_KERNELS)
218 mws = default_mws_N1_fp32_neon;
222 mws = default_mws_V1_fp32_neon;
240 return std::max(
static_cast<size_t>(1), mws);
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
void add_fp32_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
void add_fp32_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
template void add_q8_neon_fixedpoint< int8_t >(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
@ QASYMM8
quantized, asymmetric fixed-point 8-bit number unsigned
void add_qasymm8_signed_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
bool set_data_type_if_unknown(ITensorInfo &info, DataType data_type)
Set the data type and number of channels to the specified value if the current data type is unknown.
Status validate_arguments(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const PadStrideInfo &conv_info)
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
#define REGISTER_QASYMM8_SIGNED_NEON(func_name)
static CPUInfo & get()
Access the KernelLibrary singleton.
constexpr size_t num_iterations(size_t dimension) const
Return the number of iterations needed to iterate through a given dimension.
Interface for CPU tensor.
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
#define REGISTER_FP16_NEON(func_name)
static const std::vector< AddKernel > & get_available_kernels()
void add_s32_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
void add_u8_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
#define REGISTER_QSYMM16_SVE2(func_name)
#define REGISTER_QASYMM8_NEON(func_name)
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
@ QSYMM16
quantized, symmetric fixed-point 16-bit number
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
#define REGISTER_FP32_NEON(func_name)
void add_s16_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
bool empty() const
Checks if pack is empty.
#define REGISTER_QSYMM16_NEON(func_name)
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
#define REGISTER_FP32_SVE(func_name)
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
void add_qasymm8_signed_sve2(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
void add_qsymm16_sve2(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
void add_fp16_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(tensor)
@ U8
unsigned 8-bit number
size_t get_mws(const CPUInfo &platform, size_t thread_count) const override
Return minimum workload size of the relevant kernel.
void add_qasymm8_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
@ S16
signed 16-bit number
bool have_different_dimensions(const Dimensions< T > &dim1, const Dimensions< T > &dim2, unsigned int upper_dim)
const char * name() const override
Name of the kernel.
@ QASYMM8_SIGNED
quantized, asymmetric fixed-point 8-bit number signed
void add_fp16_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
#define REGISTER_INTEGER_NEON(func_name)
virtual DataType data_type() const =0
Data type used for each element of the tensor.
void add_s32_sve(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
#define REGISTER_QASYMM8_SIGNED_SVE2(func_name)
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
void add_qasymm8_sve2(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
const Window & window() const
The maximum window the kernel can be executed on.
Information about executing thread and CPU.
bool set_shape_if_empty(ITensorInfo &info, const TensorShape &shape)
Set the shape to the specified value if the current assignment is empty.
#define REGISTER_INTEGER_SVE(func_name)
void add_s16_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
template void add_q8_neon_fixedpoint< uint8_t >(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
Describe a multidimensional execution window.
void configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst, ConvertPolicy policy)
Initialise the kernel's input, dst and border mode.
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
std::pair< Window, size_t > calculate_squashed_or_max_window(const ITensorInfo &src0, const ITensorInfo &src1)
Copyright (c) 2017-2024 Arm Limited.
#define REGISTER_FP16_SVE(func_name)
ConvertPolicy
Policy to handle integer overflow.
@ F16
16-bit floating-point number
@ S32
signed 32-bit number
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
size_t num_iterations_total() const
Return the total number of iterations needed to iterate through the entire window.
static Status validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst, ConvertPolicy policy)
Static function to check if given info will lead to a valid configuration.
Store the tensor's metadata.
@ F32
32-bit floating-point number
static TensorShape broadcast_shape(const Shapes &...shapes)
If shapes are broadcast compatible, return the broadcasted shape.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
void add_u8_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
#define REGISTER_QASYMM8_SVE2(func_name)
static constexpr size_t default_mws
bool add_q8_neon_fixedpoint_possible(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst)
void add_qsymm16_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
CPUModel get_cpu_model(unsigned int cpuid) const
Gets the cpu model for a given cpuid.