53 using namespace misc::shape_calculator;
55 struct PoolingSelectorData
63 using PoolingSelectorPtr = std::add_pointer<bool(
const PoolingSelectorData &data)>
::type;
64 using PoolingKernelPtr = std::add_pointer<void(
const ITensor *, ITensor *, ITensor *, PoolingLayerInfo &,
const Window &,
const Window &)>
::type;
72 static const PoolingKernel available_kernels[] =
75 "poolingMxN_qasymm8_neon_nhwc",
80 "poolingMxN_qasymm8_signed_neon_nhwc",
84 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) 86 "poolingMxN_fp16_neon_nhwc",
92 "poolingMxN_fp32_neon_nhwc",
96 #if defined(ENABLE_NCHW_KERNELS) 98 "pooling2_qasymm8_neon_nchw",
99 [](
const PoolingSelectorData & data) {
return ((data.dl ==
DataLayout::NCHW) && (data.dt ==
DataType::QASYMM8) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2) && (data.pool_stride_x < 3)); },
103 "pooling3_qasymm8_neon_nchw",
104 [](
const PoolingSelectorData & data) {
return ((data.dl ==
DataLayout::NCHW) && (data.dt ==
DataType::QASYMM8) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3) && (data.pool_stride_x < 3)); },
108 "poolingMxN_qasymm8_neon_nchw",
113 "pooling2_qasymm8_signed_neon_nchw",
114 [](
const PoolingSelectorData & data) {
return ((data.dl ==
DataLayout::NCHW) && (data.dt ==
DataType::QASYMM8_SIGNED) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2) && (data.pool_stride_x < 3)); },
118 "pooling3_qasymm8_signed_neon_nchw",
119 [](
const PoolingSelectorData & data) {
return ((data.dl ==
DataLayout::NCHW) && (data.dt ==
DataType::QASYMM8_SIGNED) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3) && (data.pool_stride_x < 3)); },
123 "poolingMxN_qasymm8_signed_neon_nchw",
127 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) 129 "pooling2_fp16_neon_nchw",
130 [](
const PoolingSelectorData & data) {
return ((data.dl ==
DataLayout::NCHW) && (data.dt ==
DataType::F16) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2)); },
134 "pooling3_fp16_neon_nchw",
135 [](
const PoolingSelectorData & data) {
return ((data.dl ==
DataLayout::NCHW) && (data.dt ==
DataType::F16) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3)); },
139 "poolingMxN_fp16_neon_nchw",
145 "pooling2_fp32_neon_nchw",
146 [](
const PoolingSelectorData & data) {
return ((data.dl ==
DataLayout::NCHW) && (data.dt ==
DataType::F32) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2)); },
150 "pooling3_fp32_neon_nchw",
151 [](
const PoolingSelectorData & data) {
return ((data.dl ==
DataLayout::NCHW) && (data.dt ==
DataType::F32) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3)); },
155 "pooling7_fp32_neon_nchw",
156 [](
const PoolingSelectorData & data) {
return ((data.dl ==
DataLayout::NCHW) && (data.dt ==
DataType::F32) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 7)); },
160 "poolingMxN_fp32_neon_nchw",
175 for(
const auto &uk : available_kernels)
177 if(uk.is_selected({ dt, dl, pool_stride_x, pool_size }))
186 unsigned int &pooled_w,
unsigned int pooled_h,
const ITensorInfo *indices, Size2D
pool_size)
191 int pool_stride_y = 0;
193 const PadStrideInfo pad_stride_info = pool_info.pad_stride_info;
194 std::tie(
pool_stride_x, pool_stride_y) = pad_stride_info.stride();
207 "exclude_padding equal false is not supported for AVG Pooling with padding on quantized types");
209 if(
dst->total_size() != 0)
230 Status validate_arguments_pool_info(
const unsigned int pool_size_x,
const unsigned int pool_size_y)
238 std::pair<Status, Window> validate_and_configure_window(ITensorInfo *
src, ITensorInfo *
dst, ITensorInfo *indices,
const PoolingLayerInfo &pool_info,
240 BorderSize &border_size,
241 unsigned int pooled_w,
unsigned int pooled_h,
int pool_size_x,
int pool_size_y)
253 unsigned int num_elems_read_per_iteration = 0;
254 unsigned int num_elems_horizontal_window = 0;
256 int pool_stride_y = 0;
261 const PadStrideInfo pad_stride_info = pool_info.pad_stride_info;
262 std::tie(
pool_stride_x, pool_stride_y) = pad_stride_info.stride();
263 const int pool_pad_right = pad_stride_info.pad_right();
264 const int pool_pad_top = pad_stride_info.pad_top();
265 const int pool_pad_left = pad_stride_info.pad_left();
266 const int pool_pad_bottom = pad_stride_info.pad_bottom();
267 const bool is_square = pool_size_x == pool_size_y;
277 num_elems_read_per_iteration = 1;
279 num_elems_horizontal_window = 1;
283 switch(
src->data_type())
290 num_elems_read_per_iteration = 16;
295 num_elems_read_per_iteration = 16;
303 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 309 num_elems_read_per_iteration = 4;
311 num_elems_horizontal_window = 1;
322 num_elems_read_per_iteration = 2;
325 num_elems_read_per_iteration = 4;
328 num_elems_read_per_iteration = 8;
334 num_elems_horizontal_window = 1;
342 bool window_changed =
false;
350 const int upper_bound_h = ((pooled_h - 1) * pool_stride_y - pool_pad_top + pool_size_y) - src_height;
351 border_size = BorderSize(pool_pad_top, pool_pad_right, pool_pad_bottom, pool_pad_left);
352 border_size.right = std::max(upper_bound_w, pool_pad_right);
353 border_size.bottom = std::max(upper_bound_h, pool_pad_bottom);
357 TensorInfo dst_info(
src->clone()->set_tensor_shape(
dst_shape));
359 AccessWindowStatic src_access(
src, -pool_pad_left, -pool_pad_top,
ceil_to_multiple(src_width + border_size.right, pool_size_x), src_height + border_size.bottom);
360 AccessWindowHorizontal dst_access(
dst, 0, num_elems_horizontal_window);
363 AccessWindowHorizontal indices_access(indices, 0, num_elems_horizontal_window);
370 dst_access.set_valid_region(win, ValidRegion(Coordinates(),
dst->tensor_shape()));
372 border_size =
src->padding();
376 return std::make_pair(err, win);
405 unsigned int pooled_w;
406 unsigned int pooled_h;
417 _pool_info = pool_info;
418 _data_layout =
src->data_layout();
420 _pool_stride_x = pad_stride_info.
stride().first;
426 ICpuKernel::configure(win);
431 auto win_config = validate_and_configure_window(
src,
dst, indices, pool_info, _num_elems_processed_per_iteration,
434 ICpuKernel::configure(win_config.second);
442 unsigned int pooled_w = 0;
443 unsigned int pooled_h = 0;
448 unsigned int pool_size_x = 0;
449 unsigned int pool_size_y = 0;
472 pool_size_x, pool_size_y)
496 unsigned int window_x_inc = 0;
497 switch(
src->info()->data_type())
505 window_x_inc = (
pool_stride_x == 2) ? _num_elems_processed_per_iteration * 2 : _num_elems_processed_per_iteration;
531 const auto *uk = get_implementation(
src->info()->data_type(), _data_layout, _pool_stride_x, _pool_size);
534 uk->ukernel(
src,
dst, indices, _pool_info, window_src,
window);
539 return "CpuPoolingKernel";
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
const Window & window() const
The maximum window the kernel can be executed on.
void poolingMxN_qasymm8_neon_nhwc(const ITensor *src0, ITensor *dst0, ITensor *dst1, PoolingLayerInfo &, const Window &window_src, const Window &window)
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(...)
#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(tensor)
#define REGISTER_FP16_NEON(func_name)
void poolingMxN_fp32_neon_nhwc(const ITensor *src, ITensor *dst0, ITensor *dst1, PoolingLayerInfo &pool_info, const Window &window_src, const Window &window)
Container for 2D border size.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
#define REGISTER_FP32_NEON(func_name)
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
1 channel, 1 F32 per channel
static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &pool_info, const ITensorInfo *indices=nullptr)
Static function to check if given info will lead to a valid configuration of CpuPoolingKernel.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
#define REGISTER_QASYMM8_SIGNED_NEON(func_name)
const DataLayout data_layout
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Describe one of the image's dimensions with a start, end and step.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
decltype(strategy::transforms) typedef type
Interface for CPU tensor.
SimpleTensor< float > src
Copyright (c) 2017-2021 Arm Limited.
size_t height
Height of the image region or rectangle.
1 channel, 1 F16 per channel
std::pair< unsigned int, unsigned int > scaled_dimensions(int width, int height, int kernel_width, int kernel_height, const PadStrideInfo &pad_stride_info, const Size2D &dilation=Size2D(1U, 1U))
Returns expected width and height of output scaled tensor depending on dimensions rounding mode.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
TensorShape compute_pool_shape(const ITensorInfo &input, PoolingLayerInfo pool_info)
Calculate the output pool shape of a tensor.
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
bool update_window_and_padding(Window &win, Ts &&... patterns)
Update window and padding size for each of the access patterns.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
#define REGISTER_QASYMM8_NEON(func_name)
1 channel, 1 U32 per channel
auto ceil_to_multiple(S value, T divisor) -> decltype(((value+divisor - 1)/divisor) *divisor)
Computes the smallest number larger or equal to value that is a multiple of divisor.
quantized, asymmetric fixed-point 8-bit number unsigned
Class to describe a number of elements in each dimension.
void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
void configure(ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &pool_info, ITensorInfo *indices=nullptr)
Configure kernel for a given list of arguments.
std::pair< unsigned int, unsigned int > stride() const
Get the stride.
Pooling Layer Information struct.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
void poolingMxN_fp16_neon_nhwc(const ITensor *src0, ITensor *dst0, ITensor *dst1, PoolingLayerInfo &, const Window &window_src, const Window &window)
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
Padding and stride information class.
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
BorderSize border_size() const override
The size of the border for that kernel.
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
Num samples, channels, height, width.
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
PoolingType
Available pooling types.
const char * name() const override
Name of the kernel.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
PadStrideInfo pad_stride_info
Information about executing thread and CPU.
#define ARM_COMPUTE_CREATE_ERROR(error_code, msg)
Creates an error with a given message.
size_t width
Width of the image region or rectangle.
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
void poolingMxN_qasymm8_signed_neon_nhwc(const ITensor *src0, ITensor *dst0, ITensor *dst1, PoolingLayerInfo &, const Window &window_src, const Window &window)
Class for specifying the size of an image or rectangle.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Num samples, height, width, channels.
constexpr const Dimension & y() const
Alias to access the second dimension of the window.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage)
unsigned int num_elems_processed_per_iteration
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
const PoolingSelectorPtr is_selected
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
quantized, asymmetric fixed-point 8-bit number signed
Includes all wrapper headers at once.
size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
Get the index of the given dimension.
constexpr int end() const
Return the end of the dimension.
DataType
Available data types.
DataLayout
[DataLayout enum definition]
constexpr int start() const
Return the start of the dimension.
Describe a multidimensional execution window.
TensorShape & set(size_t dimension, size_t value, bool apply_dim_correction=true, bool increase_dim_unit=true)
Accessor to set the value of one of the dimensions.
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
constexpr const Dimension & x() const
Alias to access the first dimension of the window.