53 using namespace misc::shape_calculator;
55 struct PoolingSelectorData
72 static const PoolingKernel available_kernels[] =
75 "neon_qu8_nhwc_poolMxN",
80 "neon_qs8_nhwc_poolMxN",
84 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) 86 "neon_f16_nhwc_poolMxN",
92 "neon_fp32_nhwc_poolMxN",
96 #if defined(ENABLE_NCHW_KERNELS) 98 "neon_qu8_nchw_pool2",
99 [](
const PoolingSelectorData & data) {
return ((data.dl ==
DataLayout::NCHW) && (data.dt ==
DataType::QASYMM8) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2) && (data.pool_stride_x < 3)); },
103 "neon_qu8_nchw_pool3",
104 [](
const PoolingSelectorData & data) {
return ((data.dl ==
DataLayout::NCHW) && (data.dt ==
DataType::QASYMM8) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3) && (data.pool_stride_x < 3)); },
108 "neon_qu8_nchw_poolMxN",
113 "neon_qs8_nchw_pool2",
114 [](
const PoolingSelectorData & data) {
return ((data.dl ==
DataLayout::NCHW) && (data.dt ==
DataType::QASYMM8_SIGNED) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2) && (data.pool_stride_x < 3)); },
118 "neon_qs8_nchw_pool3",
119 [](
const PoolingSelectorData & data) {
return ((data.dl ==
DataLayout::NCHW) && (data.dt ==
DataType::QASYMM8_SIGNED) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3) && (data.pool_stride_x < 3)); },
123 "neon_qs8_nchw_poolMxN",
127 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) 129 "neon_fp16_nchw_pool2",
130 [](
const PoolingSelectorData & data) {
return ((data.dl ==
DataLayout::NCHW) && (data.dt ==
DataType::F16) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2)); },
134 "neon_fp16_nchw_pool3",
135 [](
const PoolingSelectorData & data) {
return ((data.dl ==
DataLayout::NCHW) && (data.dt ==
DataType::F16) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3)); },
139 "neon_fp16_nchw_poolMxN",
145 "neon_fp32_nchw_pool2",
146 [](
const PoolingSelectorData & data) {
return ((data.dl ==
DataLayout::NCHW) && (data.dt ==
DataType::F32) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2)); },
150 "neon_fp32_nchw_pool3",
151 [](
const PoolingSelectorData & data) {
return ((data.dl ==
DataLayout::NCHW) && (data.dt ==
DataType::F32) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3)); },
155 "neon_fp32_nchw_pool7",
156 [](
const PoolingSelectorData & data) {
return ((data.dl ==
DataLayout::NCHW) && (data.dt ==
DataType::F32) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 7)); },
160 "neon_fp32_nchw_poolMxN",
175 for(
const auto &uk : available_kernels)
177 if(uk.is_selected({ dt, dl, pool_stride_x, pool_size }))
185 Status validate_arguments(
const ITensorInfo *
src,
const ITensorInfo *
dst,
const PoolingLayerInfo &pool_info,
186 const ITensorInfo *indices, Size2D pool_size)
192 int pool_stride_x = 0;
193 int pool_stride_y = 0;
194 int output_width = 0;
195 int output_height = 0;
197 const PadStrideInfo pad_stride_info = pool_info.pad_stride_info;
203 pool_size.x(), pool_size.y(), pool_info.pad_stride_info);
206 TensorInfo out_info(TensorInfo(
compute_pool_shape(*src, pool_info), 1, dst->data_type()));
207 std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride();
220 "exclude_padding equal false is not supported for AVG Pooling with padding on quantized types");
222 if(dst->total_size() != 0)
240 std::pair<Status, Window> validate_and_configure_window(ITensorInfo *src, ITensorInfo *dst, ITensorInfo *indices,
const PoolingLayerInfo &pool_info,
242 BorderSize &border_size,
243 int pool_size_x,
int pool_size_y)
255 unsigned int num_elems_read_per_iteration = 0;
256 unsigned int num_elems_horizontal_window = 0;
257 int pool_stride_x = 0;
258 int pool_stride_y = 0;
261 const int src_width = src->dimension(idx_width);
262 const int src_height = src->dimension(idx_height);
263 const PadStrideInfo pad_stride_info = pool_info.pad_stride_info;
264 std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride();
265 const int pool_pad_right = pad_stride_info.pad_right();
266 const int pool_pad_top = pad_stride_info.pad_top();
267 const int pool_pad_left = pad_stride_info.pad_left();
268 const int pool_pad_bottom = pad_stride_info.pad_bottom();
269 const bool is_square = pool_size_x == pool_size_y;
270 const unsigned int pooled_w = dst->dimension(idx_width);
271 const unsigned int pooled_h = dst->dimension(idx_height);
274 num_elems_read_per_iteration = 1;
275 num_elems_processed_per_iteration = 1;
276 num_elems_horizontal_window = 1;
280 switch(src->data_type())
287 num_elems_read_per_iteration = 16;
288 num_elems_processed_per_iteration = (pool_stride_x == 2) ? 8 : 15;
289 num_elems_horizontal_window = (pool_stride_x == 2) ? 8 : 16;
292 num_elems_read_per_iteration = 16;
293 num_elems_processed_per_iteration = (pool_stride_x == 2) ? 7 : 14;
294 num_elems_horizontal_window = (pool_stride_x == 2) ? 8 : 16;
300 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 306 num_elems_read_per_iteration = 4;
307 num_elems_processed_per_iteration = 1;
308 num_elems_horizontal_window = 1;
319 num_elems_read_per_iteration = 2;
322 num_elems_read_per_iteration = 4;
325 num_elems_read_per_iteration = 8;
330 num_elems_processed_per_iteration = 1;
331 num_elems_horizontal_window = 1;
339 bool window_changed =
false;
344 const int num_iterations_x = (pooled_w + num_elems_processed_per_iteration - 1) / num_elems_processed_per_iteration;
346 const int upper_bound_w = ((num_iterations_x - 1) * num_elems_processed_per_iteration * pool_stride_x - pool_pad_left + num_elems_read_per_iteration) - src_width;
347 const int upper_bound_h = ((pooled_h - 1) * pool_stride_y - pool_pad_top + pool_size_y) - src_height;
348 border_size = BorderSize(pool_pad_top, pool_pad_right, pool_pad_bottom, pool_pad_left);
349 border_size.right = std::max(upper_bound_w, pool_pad_right);
350 border_size.bottom = std::max(upper_bound_h, pool_pad_bottom);
351 TensorShape
dst_shape{ src->tensor_shape() };
354 TensorInfo dst_info(src->clone()->set_tensor_shape(
dst_shape));
356 AccessWindowStatic src_access(src, -pool_pad_left, -pool_pad_top,
ceil_to_multiple(src_width + border_size.right, pool_size_x), src_height + border_size.bottom);
357 AccessWindowHorizontal dst_access(dst, 0, num_elems_horizontal_window);
360 AccessWindowHorizontal indices_access(indices, 0, num_elems_horizontal_window);
367 dst_access.set_valid_region(win, ValidRegion(Coordinates(), dst->tensor_shape()));
369 border_size = src->padding();
373 return std::make_pair(err, win);
405 _pool_info = pool_info;
408 _pool_stride_x = pad_stride_info.
stride().first;
409 _run_method = uk->ukernel;
410 _name = std::string(
"CpuPool2dKernel").append(
"/").append(uk->name);
416 ICpuKernel::configure(win);
421 auto win_config = validate_and_configure_window(src, dst, indices, pool_info, _num_elems_processed_per_iteration,
424 ICpuKernel::configure(win_config.second);
448 pool_size_x, pool_size_y)
465 const unsigned int pool_stride_x = _pool_info.pad_stride_info.stride().first;
466 const unsigned int pool_stride_y = _pool_info.pad_stride_info.stride().second;
467 const unsigned int pool_size = _pool_info.pool_size.width;
469 Window window_src(window);
473 unsigned int window_x_inc = 0;
480 if((pool_size == 2 || pool_size == 3) && pool_stride_x < 3)
482 window_x_inc = (pool_stride_x == 2) ? _num_elems_processed_per_iteration * 2 : _num_elems_processed_per_iteration;
507 _run_method(src, dst, indices, _pool_info, window_src, window);
512 return _name.c_str();
static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &pool_info, const ITensorInfo *indices=nullptr)
Static function to check if given info will lead to a valid configuration.
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
const Window & window() const
The maximum window the kernel can be executed on.
void poolingMxN_qasymm8_neon_nhwc(const ITensor *src0, ITensor *dst0, ITensor *dst1, PoolingLayerInfo &, const Window &window_src, const Window &window)
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(...)
#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(tensor)
#define REGISTER_FP16_NEON(func_name)
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
void poolingMxN_fp32_neon_nhwc(const ITensor *src, ITensor *dst0, ITensor *dst1, PoolingLayerInfo &pool_info, const Window &window_src, const Window &window)
Container for 2D border size.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
BorderSize border_size() const override
The size of the border for that kernel.
#define REGISTER_FP32_NEON(func_name)
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
1 channel, 1 F32 per channel
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
#define REGISTER_QASYMM8_SIGNED_NEON(func_name)
const DataLayout data_layout
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Describe one of the image's dimensions with a start, end and step.
void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
decltype(strategy::transforms) typedef type
Interface for CPU tensor.
SimpleTensor< float > src
Copyright (c) 2017-2021 Arm Limited.
size_t height
Height of the image region or rectangle.
1 channel, 1 F16 per channel
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
std::pair< int, int > scaled_dimensions_signed(int width, int height, int kernel_width, int kernel_height, const PadStrideInfo &pad_stride_info)
Returns calculated width and height of output scaled tensor depending on dimensions rounding mode...
TensorShape compute_pool_shape(const ITensorInfo &input, PoolingLayerInfo pool_info)
Calculate the output pool shape of a tensor.
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
bool update_window_and_padding(Window &win, Ts &&... patterns)
Update window and padding size for each of the access patterns.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
#define REGISTER_QASYMM8_NEON(func_name)
1 channel, 1 U32 per channel
auto ceil_to_multiple(S value, T divisor) -> decltype(((value+divisor - 1)/divisor) *divisor)
Computes the smallest number larger or equal to value that is a multiple of divisor.
quantized, asymmetric fixed-point 8-bit number unsigned
Class to describe a number of elements in each dimension.
std::pair< unsigned int, unsigned int > stride() const
Get the stride.
unsigned int num_elems_processed_per_iteration
Pooling Layer Information struct.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
void poolingMxN_fp16_neon_nhwc(const ITensor *src0, ITensor *dst0, ITensor *dst1, PoolingLayerInfo &, const Window &window_src, const Window &window)
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
Padding and stride information class.
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
Num samples, channels, height, width.
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
const char * name() const override
Name of the kernel.
PoolingType
Available pooling types.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
PadStrideInfo pad_stride_info
Information about executing thread and CPU.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
#define ARM_COMPUTE_CREATE_ERROR(error_code, msg)
Creates an error with a given message.
size_t width
Width of the image region or rectangle.
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
void poolingMxN_qasymm8_signed_neon_nhwc(const ITensor *src0, ITensor *dst0, ITensor *dst1, PoolingLayerInfo &, const Window &window_src, const Window &window)
Class for specifying the size of an image or rectangle.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Num samples, height, width, channels.
constexpr const Dimension & y() const
Alias to access the second dimension of the window.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
const PoolingSelectorPtr is_selected
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
quantized, asymmetric fixed-point 8-bit number signed
Includes all wrapper headers at once.
size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
Get the index of the given dimension.
constexpr int end() const
Return the end of the dimension.
DataType
Available data types.
void configure(ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &pool_info, ITensorInfo *indices=nullptr)
Configure kernel for a given list of arguments.
DataLayout
[DataLayout enum definition]
constexpr int start() const
Return the start of the dimension.
Describe a multidimensional execution window.
TensorShape & set(size_t dimension, size_t value, bool apply_dim_correction=true, bool increase_dim_unit=true)
Accessor to set the value of one of the dimensions.
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.
constexpr const Dimension & x() const
Alias to access the first dimension of the window.