53 using namespace misc::shape_calculator;
55 static const std::vector<CpuPool2dKernel::PoolingKernel> available_kernels =
58 "neon_qu8_nhwc_poolMxN",
63 "neon_qs8_nhwc_poolMxN",
68 "neon_f16_nhwc_poolMxN",
73 "neon_fp32_nhwc_poolMxN",
77 #if defined(ENABLE_NCHW_KERNELS) 79 "neon_qu8_nchw_pool2",
80 [](
const PoolDataTypeISASelectorData & data) {
return ((data.dl ==
DataLayout::NCHW) && (data.dt ==
DataType::QASYMM8) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2) && (data.pool_stride_x < 3)); },
84 "neon_qu8_nchw_pool3",
85 [](
const PoolDataTypeISASelectorData & data) {
return ((data.dl ==
DataLayout::NCHW) && (data.dt ==
DataType::QASYMM8) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3) && (data.pool_stride_x < 3)); },
89 "neon_qu8_nchw_poolMxN",
94 "neon_qs8_nchw_pool2",
95 [](
const PoolDataTypeISASelectorData & data) {
return ((data.dl ==
DataLayout::NCHW) && (data.dt ==
DataType::QASYMM8_SIGNED) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2) && (data.pool_stride_x < 3)); },
99 "neon_qs8_nchw_pool3",
100 [](
const PoolDataTypeISASelectorData & data) {
return ((data.dl ==
DataLayout::NCHW) && (data.dt ==
DataType::QASYMM8_SIGNED) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3) && (data.pool_stride_x < 3)); },
104 "neon_qs8_nchw_poolMxN",
109 "neon_fp16_nchw_pool2",
110 [](
const PoolDataTypeISASelectorData & data) {
return ((data.dl ==
DataLayout::NCHW) && (data.dt ==
DataType::F16 && data.isa.fp16) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2)); },
114 "neon_fp16_nchw_pool3",
115 [](
const PoolDataTypeISASelectorData & data) {
return ((data.dl ==
DataLayout::NCHW) && (data.dt ==
DataType::F16 && data.isa.fp16) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3)); },
119 "neon_fp16_nchw_poolMxN",
124 "neon_fp32_nchw_pool2",
125 [](
const PoolDataTypeISASelectorData & data) {
return ((data.dl ==
DataLayout::NCHW) && (data.dt ==
DataType::F32) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2)); },
129 "neon_fp32_nchw_pool3",
130 [](
const PoolDataTypeISASelectorData & data) {
return ((data.dl ==
DataLayout::NCHW) && (data.dt ==
DataType::F32) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3)); },
134 "neon_fp32_nchw_pool7",
135 [](
const PoolDataTypeISASelectorData & data) {
return ((data.dl ==
DataLayout::NCHW) && (data.dt ==
DataType::F32) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 7)); },
139 "neon_fp32_nchw_poolMxN",
147 const ITensorInfo *indices, Size2D pool_size)
153 int pool_stride_x = 0;
154 int pool_stride_y = 0;
155 int output_width = 0;
156 int output_height = 0;
158 const PadStrideInfo pad_stride_info = pool_info.pad_stride_info;
165 "Pooling region that is entirely outside input tensor is unsupported for non-float types");
168 pool_size.x(), pool_size.y(), pool_info.pad_stride_info);
171 TensorInfo out_info(TensorInfo(
compute_pool_shape(*src, pool_info), 1, dst->data_type()));
172 std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride();
185 "exclude_padding equal false is not supported for AVG Pooling with padding on quantized types");
187 if(dst->total_size() != 0)
205 std::pair<Status, Window>
validate_and_configure_window(ITensorInfo *src, ITensorInfo *dst, ITensorInfo *indices,
const PoolingLayerInfo &pool_info,
207 int pool_size_x,
int pool_size_y)
220 int pool_stride_x = 0;
221 int pool_stride_y = 0;
224 const PadStrideInfo pad_stride_info = pool_info.pad_stride_info;
226 std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride();
227 const bool is_square = pool_size_x == pool_size_y;
228 const unsigned int pooled_w = dst->dimension(idx_width);
229 const unsigned int pooled_h = dst->dimension(idx_height);
232 num_elems_processed_per_iteration = 1;
236 switch(src->data_type())
243 num_elems_processed_per_iteration = (pool_stride_x == 2) ? 8 : 15;
246 num_elems_processed_per_iteration = (pool_stride_x == 2) ? 7 : 14;
252 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 254 num_elems_processed_per_iteration = 1;
258 num_elems_processed_per_iteration = 1;
266 bool window_changed =
false;
269 TensorShape
dst_shape{ src->tensor_shape() };
272 TensorInfo dst_info(src->clone()->set_tensor_shape(
dst_shape));
276 return std::make_pair(err, win);
303 _pool_info = pool_info;
305 _pool_size = pool_size;
306 _pool_stride_x = pad_stride_info.
stride().first;
307 _run_method = uk->ukernel;
308 _name = std::string(
"CpuPool2dKernel").append(
"/").append(uk->name);
314 ICpuKernel::configure(win);
320 pool_size.x(), pool_size.y());
322 ICpuKernel::configure(win_config.second);
330 unsigned int num_elems_processed_per_iteration = 0;
345 pool_size_x, pool_size_y)
362 const unsigned int pool_stride_x = _pool_info.pad_stride_info.stride().first;
363 const unsigned int pool_stride_y = _pool_info.pad_stride_info.stride().second;
364 const unsigned int pool_size = _pool_info.pool_size.width;
366 Window window_src(window);
370 unsigned int window_x_inc = 0;
376 window_x_inc = pool_stride_x;
377 if((pool_size == 2 || pool_size == 3) && pool_stride_x < 3)
379 window_x_inc = (pool_stride_x == 2) ? _num_elems_processed_per_iteration * 2 : _num_elems_processed_per_iteration;
387 window_x_inc = pool_stride_x;
404 _run_method(src, dst, indices, _pool_info, window_src, window);
409 return _name.c_str();
414 return available_kernels;
static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &pool_info, const ITensorInfo *indices=nullptr)
Static function to check if given info will lead to a valid configuration.
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
const Window & window() const
The maximum window the kernel can be executed on.
void poolingMxN_qasymm8_neon_nhwc(const ITensor *src0, ITensor *dst0, ITensor *dst1, PoolingLayerInfo &, const Window &window_src, const Window &window)
static const auto * get_implementation(const SelectorType &selector, KernelSelectionType selection_type=KernelSelectionType::Supported)
Micro-kernel selector.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(...)
#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(tensor)
#define REGISTER_FP16_NEON(func_name)
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
void poolingMxN_fp32_neon_nhwc(const ITensor *src, ITensor *dst0, ITensor *dst1, PoolingLayerInfo &pool_info, const Window &window_src, const Window &window)
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
#define REGISTER_FP32_NEON(func_name)
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
1 channel, 1 F32 per channel
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
#define REGISTER_QASYMM8_SIGNED_NEON(func_name)
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Describe one of the image's dimensions with a start, end and step.
void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
Status validate_arguments(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const PadStrideInfo &conv_info)
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Interface for CPU tensor.
static const std::vector< PoolingKernel > & get_available_kernels()
SimpleTensor< float > src
Copyright (c) 2017-2022 Arm Limited.
size_t height
Height of the image region or rectangle.
1 channel, 1 F16 per channel
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
std::pair< int, int > scaled_dimensions_signed(int width, int height, int kernel_width, int kernel_height, const PadStrideInfo &pad_stride_info)
Returns calculated width and height of output scaled tensor depending on dimensions rounding mode...
TensorShape compute_pool_shape(const ITensorInfo &input, PoolingLayerInfo pool_info)
Calculate the output pool shape of a tensor.
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
#define REGISTER_QASYMM8_NEON(func_name)
1 channel, 1 U32 per channel
quantized, asymmetric fixed-point 8-bit number unsigned
Class to describe a number of elements in each dimension.
std::pair< unsigned int, unsigned int > stride() const
Get the stride.
unsigned int num_elems_processed_per_iteration
Pooling Layer Information struct.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
void poolingMxN_fp16_neon_nhwc(const ITensor *src0, ITensor *dst0, ITensor *dst1, PoolingLayerInfo &, const Window &window_src, const Window &window)
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
Padding and stride information class.
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
Num samples, channels, height, width.
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
const char * name() const override
Name of the kernel.
PoolingType
Available pooling types.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
std::pair< Status, Window > validate_and_configure_window(ITensorInfo *src, ITensorInfo *dst)
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
PadStrideInfo pad_stride_info
Information about executing thread and CPU.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
#define ARM_COMPUTE_CREATE_ERROR(error_code, msg)
Creates an error with a given message.
size_t width
Width of the image region or rectangle.
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
size_t get_data_layout_dimension_index(const DataLayout &data_layout, const DataLayoutDimension &data_layout_dimension)
Get the index of the given dimension.
void poolingMxN_qasymm8_signed_neon_nhwc(const ITensor *src0, ITensor *dst0, ITensor *dst1, PoolingLayerInfo &, const Window &window_src, const Window &window)
Class for specifying the size of an image or rectangle.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Num samples, height, width, channels.
constexpr const Dimension & y() const
Alias to access the second dimension of the window.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
bool is_pool_region_entirely_outside_input(const PoolingLayerInfo &info)
Check if the pool region is entirely outside the input tensor.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
quantized, asymmetric fixed-point 8-bit number signed
Includes all wrapper headers at once.
static CPUInfo & get()
Access the KernelLibrary singleton.
constexpr int end() const
Return the end of the dimension.
void configure(ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &pool_info, ITensorInfo *indices=nullptr)
Configure kernel for a given list of arguments.
constexpr int start() const
Return the start of the dimension.
Describe a multidimensional execution window.
TensorShape & set(size_t dimension, size_t value, bool apply_dim_correction=true, bool increase_dim_unit=true)
Accessor to set the value of one of the dimensions.
bool is_data_type_float(DataType dt)
Check if a given data type is of floating point type.
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
cpuinfo::CpuIsaInfo get_isa() const
Gets the current cpu's ISA information.
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.
constexpr const Dimension & x() const
Alias to access the first dimension of the window.