Compute Library
 21.02
CpuPoolingKernel.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
32 #include "src/core/CPP/Validate.h"
33 #include "src/core/NEON/NEAsymm.h"
35 #include "src/core/NEON/NEMath.h"
41 
43 #include <arm_neon.h>
44 
45 namespace arm_compute
46 {
47 namespace cpu
48 {
49 namespace kernels
50 {
51 namespace
52 {
53 using namespace misc::shape_calculator;
54 
55 struct PoolingSelectorData
56 {
60  Size2D pool_size;
61 };
62 
65 struct PoolingKernel
66 {
67  const char *name;
68  const PoolingSelectorPtr is_selected;
69  PoolingKernelPtr ukernel;
70 };
71 
72 static const PoolingKernel available_kernels[] =
73 {
74  {
75  "poolingMxN_qasymm8_neon_nhwc",
76  [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NHWC) && (data.dt == DataType::QASYMM8)); },
78  },
79  {
80  "poolingMxN_qasymm8_signed_neon_nhwc",
81  [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NHWC) && (data.dt == DataType::QASYMM8_SIGNED)); },
83  },
84 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
85  {
86  "poolingMxN_fp16_neon_nhwc",
87  [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NHWC) && (data.dt == DataType::F16)); },
89  },
90 #endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */
91  {
92  "poolingMxN_fp32_neon_nhwc",
93  [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NHWC) && (data.dt == DataType::F32)); },
95  },
96 #if defined(ENABLE_NCHW_KERNELS)
97  {
98  "pooling2_qasymm8_neon_nchw",
99  [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2) && (data.pool_stride_x < 3)); },
100  REGISTER_QASYMM8_NEON(arm_compute::cpu::pooling2_quantized_neon_nchw<uint8_t>)
101  },
102  {
103  "pooling3_qasymm8_neon_nchw",
104  [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3) && (data.pool_stride_x < 3)); },
105  REGISTER_QASYMM8_NEON(arm_compute::cpu::pooling3_quantized_neon_nchw<uint8_t>)
106  },
107  {
108  "poolingMxN_qasymm8_neon_nchw",
109  [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8)); },
110  REGISTER_QASYMM8_NEON(arm_compute::cpu::poolingMxN_quantized_neon_nchw<uint8_t>)
111  },
112  {
113  "pooling2_qasymm8_signed_neon_nchw",
114  [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8_SIGNED) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2) && (data.pool_stride_x < 3)); },
115  REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::pooling2_quantized_neon_nchw<int8_t>)
116  },
117  {
118  "pooling3_qasymm8_signed_neon_nchw",
119  [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8_SIGNED) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3) && (data.pool_stride_x < 3)); },
120  REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::pooling3_quantized_neon_nchw<int8_t>)
121  },
122  {
123  "poolingMxN_qasymm8_signed_neon_nchw",
124  [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8_SIGNED)); },
125  REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::poolingMxN_quantized_neon_nchw<int8_t>)
126  },
127 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
128  {
129  "pooling2_fp16_neon_nchw",
130  [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F16) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2)); },
131  REGISTER_FP16_NEON(arm_compute::cpu::pooling2_fp16_neon_nchw)
132  },
133  {
134  "pooling3_fp16_neon_nchw",
135  [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F16) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3)); },
136  REGISTER_FP16_NEON(arm_compute::cpu::pooling3_fp16_neon_nchw)
137  },
138  {
139  "poolingMxN_fp16_neon_nchw",
140  [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F16)); },
141  REGISTER_FP16_NEON(arm_compute::cpu::poolingMxN_fp16_neon_nchw)
142  },
143 #endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */
144  {
145  "pooling2_fp32_neon_nchw",
146  [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F32) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2)); },
147  REGISTER_FP32_NEON(arm_compute::cpu::pooling2_fp32_neon_nchw)
148  },
149  {
150  "pooling3_fp32_neon_nchw",
151  [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F32) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3)); },
152  REGISTER_FP32_NEON(arm_compute::cpu::pooling3_fp32_neon_nchw)
153  },
154  {
155  "pooling7_fp32_neon_nchw",
156  [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F32) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 7)); },
157  REGISTER_FP32_NEON(arm_compute::cpu::pooling7_fp32_neon_nchw)
158  },
159  {
160  "poolingMxN_fp32_neon_nchw",
161  [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F32)); },
162  REGISTER_FP32_NEON(arm_compute::cpu::poolingMxN_fp32_neon_nchw)
163  },
164 #endif /* defined(ENABLE_NCHW_KERNELS) */
165 };
166 
167 /** Micro-kernel selector
168  *
169  * @param[in] data Selection data passed to help pick the appropriate micro-kernel
170  *
171  * @return A matching micro-kernel else nullptr
172  */
173 const PoolingKernel *get_implementation(DataType dt, DataLayout dl, int pool_stride_x, Size2D pool_size)
174 {
175  for(const auto &uk : available_kernels)
176  {
177  if(uk.is_selected({ dt, dl, pool_stride_x, pool_size }))
178  {
179  return &uk;
180  }
181  }
182  return nullptr;
183 }
184 
185 Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &pool_info,
186  unsigned int &pooled_w, unsigned int pooled_h, const ITensorInfo *indices, Size2D pool_size)
187 {
189 
190  int pool_stride_x = 0;
191  int pool_stride_y = 0;
192  PoolingType pool_type = pool_info.pool_type;
193  const PadStrideInfo pad_stride_info = pool_info.pad_stride_info;
194  std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride();
195 
197  if(indices)
198  {
201  ARM_COMPUTE_RETURN_ERROR_ON_MSG(pool_type != PoolingType::MAX, "Pooling indices only supported for MAX pooling method");
202  }
204  ARM_COMPUTE_RETURN_ERROR_ON(pool_type == PoolingType::L2 && is_data_type_quantized(src->data_type()));
205  ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized(src->data_type()) && !pool_info.exclude_padding && (pool_info.pool_type == PoolingType::AVG) && pool_info.pad_stride_info.has_padding()
206  && (src->data_layout() == DataLayout::NHWC),
207  "exclude_padding equal false is not supported for AVG Pooling with padding on quantized types");
208 
209  if(dst->total_size() != 0)
210  {
213  ARM_COMPUTE_RETURN_ERROR_ON((dst->dimension(get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::WIDTH)) != pooled_w)
214  || (dst->dimension(get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::HEIGHT)) != pooled_h));
215 
216  if(indices)
217  {
218  ARM_COMPUTE_RETURN_ERROR_ON_MSG((pool_size != Size2D(2, 2)), "Pooling indices only supported for pool size 2x2");
219  ARM_COMPUTE_RETURN_ERROR_ON((indices->dimension(get_data_layout_dimension_index(indices->data_layout(), DataLayoutDimension::WIDTH)) != pooled_w)
220  || (indices->dimension(get_data_layout_dimension_index(indices->data_layout(), DataLayoutDimension::HEIGHT)) != pooled_h));
221  }
222  }
223 
224  const auto *uk = get_implementation(src->data_type(), src->data_layout(), pool_stride_x, pool_size);
225  ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
226 
227  return Status{};
228 }
229 
230 Status validate_arguments_pool_info(const unsigned int pool_size_x, const unsigned int pool_size_y)
231 {
232  ARM_COMPUTE_RETURN_ERROR_ON(pool_size_x == 0);
233  ARM_COMPUTE_RETURN_ERROR_ON(pool_size_y == 0);
234 
235  return Status{};
236 }
237 
238 std::pair<Status, Window> validate_and_configure_window(ITensorInfo *src, ITensorInfo *dst, ITensorInfo *indices, const PoolingLayerInfo &pool_info,
239  unsigned int &num_elems_processed_per_iteration,
240  BorderSize &border_size,
241  unsigned int pooled_w, unsigned int pooled_h, int pool_size_x, int pool_size_y)
242 {
243  // dst auto inizialitation if not yet initialized
244  auto_init_if_empty(*dst, src->clone()->set_tensor_shape(compute_pool_shape(*src, pool_info)));
245  if(indices)
246  {
247  // Indices auto inizialitation if not yet initialized
248  auto_init_if_empty(*indices, (src->clone()->set_tensor_shape(compute_pool_shape(*src,
249  pool_info)))
250  .set_data_type(DataType::U32) /* we store the offset to the element */);
251  }
252  const auto data_layout = pool_info.data_layout == DataLayout::UNKNOWN ? src->data_layout() : pool_info.data_layout;
253  unsigned int num_elems_read_per_iteration = 0;
254  unsigned int num_elems_horizontal_window = 0;
255  int pool_stride_x = 0;
256  int pool_stride_y = 0;
259  const int src_width = src->dimension(idx_width);
260  const int src_height = src->dimension(idx_height);
261  const PadStrideInfo pad_stride_info = pool_info.pad_stride_info;
262  std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride();
263  const int pool_pad_right = pad_stride_info.pad_right();
264  const int pool_pad_top = pad_stride_info.pad_top();
265  const int pool_pad_left = pad_stride_info.pad_left();
266  const int pool_pad_bottom = pad_stride_info.pad_bottom();
267  const bool is_square = pool_size_x == pool_size_y;
268 
269  // Check dst dimensions
270  std::tie(pooled_w, pooled_h) = scaled_dimensions(src->dimension(idx_width),
271  src->dimension(idx_height),
272  pool_size_x,
273  pool_size_y,
274  pad_stride_info);
275 
276  //If it's not squared and optimized will be executed the MxN
277  num_elems_read_per_iteration = 1;
278  num_elems_processed_per_iteration = 1;
279  num_elems_horizontal_window = 1;
280 
281  if(is_square)
282  {
283  switch(src->data_type())
284  {
285  case DataType::QASYMM8:
287  switch(pool_size_x)
288  {
289  case 2:
290  num_elems_read_per_iteration = 16;
291  num_elems_processed_per_iteration = (pool_stride_x == 2) ? 8 : 15;
292  num_elems_horizontal_window = (pool_stride_x == 2) ? 8 : 16;
293  break;
294  case 3:
295  num_elems_read_per_iteration = 16;
296  num_elems_processed_per_iteration = (pool_stride_x == 2) ? 7 : 14;
297  num_elems_horizontal_window = (pool_stride_x == 2) ? 8 : 16;
298  break;
299  default:
300  break;
301  }
302  break;
303 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
304  case DataType::F16:
305  switch(pool_size_x)
306  {
307  case 2:
308  case 3:
309  num_elems_read_per_iteration = 4;
310  num_elems_processed_per_iteration = 1;
311  num_elems_horizontal_window = 1;
312  break;
313  default:
314  break;
315  }
316  break;
317 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
318  case DataType::F32:
319  switch(pool_size_x)
320  {
321  case 2:
322  num_elems_read_per_iteration = 2;
323  break;
324  case 3:
325  num_elems_read_per_iteration = 4; // We use vload4 for pooling3
326  break;
327  case 7:
328  num_elems_read_per_iteration = 8; // We use vload8 for pooling7
329  break;
330  default:
331  break;
332  }
333  num_elems_processed_per_iteration = 1;
334  num_elems_horizontal_window = 1;
335  break;
336  default:
337  ARM_COMPUTE_ERROR("Element size not supported");
338  break;
339  }
340  }
341 
342  bool window_changed = false;
343  Window win{};
344  if(data_layout == DataLayout::NCHW)
345  {
346  // Number of iterations in X dimension
347  const int num_iterations_x = (pooled_w + num_elems_processed_per_iteration - 1) / num_elems_processed_per_iteration;
348  // Upper limit for the number of right/bottom border elements that are accessed
349  const int upper_bound_w = ((num_iterations_x - 1) * num_elems_processed_per_iteration * pool_stride_x - pool_pad_left + num_elems_read_per_iteration) - src_width;
350  const int upper_bound_h = ((pooled_h - 1) * pool_stride_y - pool_pad_top + pool_size_y) - src_height;
351  border_size = BorderSize(pool_pad_top, pool_pad_right, pool_pad_bottom, pool_pad_left);
352  border_size.right = std::max(upper_bound_w, pool_pad_right);
353  border_size.bottom = std::max(upper_bound_h, pool_pad_bottom);
354  TensorShape dst_shape{ src->tensor_shape() };
355  dst_shape.set(0, pooled_w);
356  dst_shape.set(1, pooled_h);
357  TensorInfo dst_info(src->clone()->set_tensor_shape(dst_shape));
358  win = calculate_max_window(dst_info, Steps(num_elems_processed_per_iteration));
359  AccessWindowStatic src_access(src, -pool_pad_left, -pool_pad_top, ceil_to_multiple(src_width + border_size.right, pool_size_x), src_height + border_size.bottom);
360  AccessWindowHorizontal dst_access(dst, 0, num_elems_horizontal_window);
361  if(indices)
362  {
363  AccessWindowHorizontal indices_access(indices, 0, num_elems_horizontal_window);
364  window_changed = update_window_and_padding(win, src_access, dst_access, indices_access);
365  }
366  else
367  {
368  window_changed = update_window_and_padding(win, src_access, dst_access);
369  }
370  dst_access.set_valid_region(win, ValidRegion(Coordinates(), dst->tensor_shape()));
371 
372  border_size = src->padding();
373  }
374 
375  Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
376  return std::make_pair(err, win);
377 }
378 } // namespace
379 
381 {
382  return _border_size;
383 }
384 
386 {
388  const PadStrideInfo pad_stride_info = pool_info.pad_stride_info;
389  const bool is_global_pooling = pool_info.is_global_pooling;
390 
391  // Get data layout
392  const auto data_layout = pool_info.data_layout == DataLayout::UNKNOWN ? src->data_layout() : pool_info.data_layout;
395 
396  // Update pool size in case of global pooling
397  const Size2D pool_size(
398  is_global_pooling ? src->dimension(idx_width) : pool_info.pool_size.width,
399  is_global_pooling ? src->dimension(idx_height) : pool_info.pool_size.height);
400 
401  // Validate pool info before calling scaled_dimensions
402  ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_pool_info(pool_size.x(), pool_size.y()));
403 
404  // Check dst dimensions
405  unsigned int pooled_w;
406  unsigned int pooled_h;
407  std::tie(pooled_w, pooled_h) = scaled_dimensions(src->dimension(idx_width),
408  src->dimension(idx_height),
409  pool_size.x(),
410  pool_size.y(),
411  pad_stride_info);
412 
413  // Perform validation step
414  ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, dst, pool_info, pooled_w, pooled_h, indices, pool_size));
415 
416  // Set instance variables
417  _pool_info = pool_info;
418  _data_layout = src->data_layout();
419  _pool_size = pool_size;
420  _pool_stride_x = pad_stride_info.stride().first;
421 
422  if(_data_layout == DataLayout::NHWC)
423  {
424  // Configure kernel window
425  Window win = calculate_max_window(*dst, Steps());
426  Coordinates coord;
427  coord.set_num_dimensions(dst->num_dimensions());
428  dst->set_valid_region(ValidRegion(coord, dst->tensor_shape()));
429  ICpuKernel::configure(win);
430  }
431  else
432  {
433  // Configure kernel window
434  auto win_config = validate_and_configure_window(src, dst, indices, pool_info, _num_elems_processed_per_iteration,
435  _border_size, pooled_w, pooled_h, pool_size.x(), pool_size.y());
436  ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
437  ICpuKernel::configure(win_config.second);
438  }
439 }
440 
442 {
444 
445  unsigned int pooled_w = 0;
446  unsigned int pooled_h = 0;
447  unsigned int num_elems_processed_per_iteration = 0;
448  BorderSize border_size(0);
449 
450  const bool is_global_pooling = pool_info.is_global_pooling;
451  unsigned int pool_size_x = 0;
452  unsigned int pool_size_y = 0;
453 
454  // Get data layout
455  const auto data_layout = pool_info.data_layout == DataLayout::UNKNOWN ? src->data_layout() : pool_info.data_layout;
458 
459  pool_size_x = is_global_pooling ? src->dimension(idx_width) : pool_info.pool_size.width;
460  pool_size_y = is_global_pooling ? src->dimension(idx_height) : pool_info.pool_size.height;
461 
462  // Validate pool info before calling scaled_dimensions
463  ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_pool_info(pool_size_x, pool_size_y));
464 
465  // Check dst dimensions
466  std::tie(pooled_w, pooled_h) = scaled_dimensions(src->dimension(idx_width),
467  src->dimension(idx_height),
468  pool_size_x,
469  pool_size_y,
470  pool_info.pad_stride_info);
471 
472  ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, dst, pool_info, pooled_w, pooled_h, indices, Size2D(pool_size_x, pool_size_y)));
473  ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(src->clone().get(), dst->clone().get(),
474  (indices) ? indices->clone().get() : nullptr, pool_info, num_elems_processed_per_iteration, border_size, pooled_w, pooled_h,
475  pool_size_x, pool_size_y)
476  .first);
477 
478  return Status{};
479 }
480 
481 void CpuPoolingKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
482 {
483  ARM_COMPUTE_UNUSED(info);
486 
489  ITensor *indices = tensors.get_tensor(TensorType::ACL_DST_1);
490 
491  const unsigned int pool_stride_x = _pool_info.pad_stride_info.stride().first;
492  const unsigned int pool_stride_y = _pool_info.pad_stride_info.stride().second;
493  const unsigned int pool_size = _pool_info.pool_size.width;
494 
495  Window window_src(window);
496  if(_data_layout == DataLayout::NCHW)
497  {
498  // Set step for src in x and y direction for the src
499  unsigned int window_x_inc = 0;
500  switch(src->info()->data_type())
501  {
502  case DataType::QASYMM8:
504  {
505  window_x_inc = pool_stride_x;
506  if((pool_size == 2 || pool_size == 3) && pool_stride_x < 3)
507  {
508  window_x_inc = (pool_stride_x == 2) ? _num_elems_processed_per_iteration * 2 : _num_elems_processed_per_iteration;
509  }
510  break;
511  }
512 
513  case DataType::F16:
514  case DataType::F32:
515  {
516  window_x_inc = pool_stride_x;
517  break;
518  }
519  default:
520  {
521  ARM_COMPUTE_ERROR("Not supported");
522  }
523  }
524  window_src.set(Window::DimX, Window::Dimension(window.x().start() * pool_stride_x, window.x().end() * pool_stride_x, window_x_inc));
525  window_src.set(Window::DimY, Window::Dimension(window.y().start() * pool_stride_y, window.y().end() * pool_stride_y, pool_stride_y));
526  }
527  else
528  {
529  window_src.set(Window::DimX, Window::Dimension(0, 1, 1));
530  window_src.set(Window::DimY, Window::Dimension(0, src->info()->dimension(1), pool_stride_x));
531  window_src.set(Window::DimZ, Window::Dimension(0, src->info()->dimension(2), pool_stride_y));
532  }
533 
534  const auto *uk = get_implementation(src->info()->data_type(), _data_layout, _pool_stride_x, _pool_size);
535  ARM_COMPUTE_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
536 
537  uk->ukernel(src, dst, indices, _pool_info, window_src, window);
538 }
539 
540 const char *CpuPoolingKernel::name() const
541 {
542  return "CpuPoolingKernel";
543 }
544 } // namespace kernels
545 } // namespace cpu
546 } // namespace arm_compute
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
Definition: Utils.h:1168
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
const Window & window() const
The maximum window the kernel can be executed on.
Definition: IKernel.cpp:28
void poolingMxN_qasymm8_neon_nhwc(const ITensor *src0, ITensor *dst0, ITensor *dst1, PoolingLayerInfo &, const Window &window_src, const Window &window)
Definition: qasymm8.cpp:36
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(...)
Definition: Validate.h:494
#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(tensor)
Definition: Validate.h:108
#define REGISTER_FP16_NEON(func_name)
Definition: Registrars.h:42
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
void poolingMxN_fp32_neon_nhwc(const ITensor *src, ITensor *dst0, ITensor *dst1, PoolingLayerInfo &pool_info, const Window &window_src, const Window &window)
Definition: fp32.cpp:144
Container for 2D border size.
Definition: Types.h:273
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352
#define REGISTER_FP32_NEON(func_name)
Definition: Registrars.h:52
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
virtual DataType data_type() const =0
Data type used for each element of the tensor.
1 channel, 1 F32 per channel
const char * name
static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &pool_info, const ITensorInfo *indices=nullptr)
Static function to check if given info will lead to a valid configuration of CpuPoolingKernel.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
#define REGISTER_QASYMM8_SIGNED_NEON(func_name)
Definition: Registrars.h:62
const DataLayout data_layout
Definition: Im2Col.cpp:151
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
Describe one of the image&#39;s dimensions with a start, end and step.
Definition: Window.h:77
Status class.
Definition: Error.h:52
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:296
decltype(strategy::transforms) typedef type
Interface for Neon tensor.
Definition: ITensor.h:36
SimpleTensor< float > src
Definition: DFT.cpp:155
int pool_stride_x
Copyright (c) 2017-2021 Arm Limited.
virtual void set_valid_region(const ValidRegion &valid_region)=0
Set the valid region of the tensor.
size_t height
Height of the image region or rectangle.
Definition: Size2D.h:90
1 channel, 1 F16 per channel
std::pair< unsigned int, unsigned int > scaled_dimensions(int width, int height, int kernel_width, int kernel_height, const PadStrideInfo &pad_stride_info, const Size2D &dilation=Size2D(1U, 1U))
Returns expected width and height of output scaled tensor depending on dimensions rounding mode...
Definition: Utils.cpp:419
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:163
TensorShape compute_pool_shape(const ITensorInfo &input, PoolingLayerInfo pool_info)
Calculate the output pool shape of a tensor.
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
Definition: ITensorPack.cpp:40
DataLayout dl
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Definition: Window.h:43
bool update_window_and_padding(Window &win, Ts &&... patterns)
Update window and padding size for each of the access patterns.
Definition: WindowHelpers.h:46
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
#define REGISTER_QASYMM8_NEON(func_name)
Definition: Registrars.h:72
1 channel, 1 U32 per channel
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
auto ceil_to_multiple(S value, T divisor) -> decltype(((value+divisor - 1)/divisor) *divisor)
Computes the smallest number larger or equal to value that is a multiple of divisor.
Definition: Utils.h:71
quantized, asymmetric fixed-point 8-bit number unsigned
Class to describe a number of elements in each dimension.
Definition: Steps.h:40
void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
Coordinates of an item.
Definition: Coordinates.h:37
void configure(ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &pool_info, ITensorInfo *indices=nullptr)
Configure kernel for a given list of arguments.
std::pair< unsigned int, unsigned int > stride() const
Get the stride.
Definition: Types.h:770
Pooling Layer Information struct.
Definition: Types.h:1214
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
void poolingMxN_fp16_neon_nhwc(const ITensor *src0, ITensor *dst0, ITensor *dst1, PoolingLayerInfo &, const Window &window_src, const Window &window)
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.
Padding and stride information class.
Definition: Types.h:722
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
Definition: Window.inl:49
BorderSize border_size() const override
The size of the border for that kernel.
DataType dt
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
Definition: Validate.h:941
Num samples, channels, height, width.
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
Definition: Window.h:45
PoolingType
Available pooling types.
Definition: Types.h:610
const char * name() const override
Name of the kernel.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
Definition: ITensorPack.cpp:50
PadStrideInfo pad_stride_info
Definition: Types.h:1302
Information about executing thread and CPU.
Definition: CPPTypes.h:235
#define ARM_COMPUTE_CREATE_ERROR(error_code, msg)
Creates an error with a given message.
Definition: Error.h:159
size_t width
Width of the image region or rectangle.
Definition: Size2D.h:89
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
Definition: Window.h:47
void poolingMxN_qasymm8_signed_neon_nhwc(const ITensor *src0, ITensor *dst0, ITensor *dst1, PoolingLayerInfo &, const Window &window_src, const Window &window)
Class for specifying the size of an image or rectangle.
Definition: Size2D.h:34
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:545
Num samples, height, width, channels.
constexpr const Dimension & y() const
Alias to access the second dimension of the window.
Definition: Window.h:154
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:792
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage)
Size2D pool_size
unsigned int num_elems_processed_per_iteration
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:244
const PoolingSelectorPtr is_selected
Tensor packing service.
Definition: ITensorPack.h:37
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:161
void set_num_dimensions(size_t num_dimensions)
Set number of dimensions.
Definition: Dimensions.h:149
quantized, asymmetric fixed-point 8-bit number signed
Includes all wrapper headers at once.
Container for valid region of a window.
Definition: Types.h:188
size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
Get the index of the given dimension.
Definition: Helpers.inl:193
constexpr int end() const
Return the end of the dimension.
Definition: Window.h:99
PoolingKernelPtr ukernel
DataType
Available data types.
Definition: Types.h:77
DataLayout
[DataLayout enum definition]
Definition: Types.h:120
constexpr int start() const
Return the start of the dimension.
Definition: Window.h:94
Describe a multidimensional execution window.
Definition: Window.h:39
TensorShape & set(size_t dimension, size_t value, bool apply_dim_correction=true, bool increase_dim_unit=true)
Accessor to set the value of one of the dimensions.
Definition: TensorShape.h:79
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
Definition: Validate.h:205
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.
constexpr const Dimension & x() const
Alias to access the first dimension of the window.
Definition: Window.h:145