Compute Library
 23.11
quantized.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021-2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef SRC_CORE_NEON_KERNELS_QUANTIZED_H
25 #define SRC_CORE_NEON_KERNELS_QUANTIZED_H
26 
27 #include "arm_compute/core/Types.h"
29 
31 #include "src/core/NEON/NEAsymm.h"
33 #include "src/core/NEON/NEMath.h"
35 
36 #include <arm_neon.h>
37 
38 namespace arm_compute
39 {
40 namespace cpu
41 {
42 template <typename T>
44  ITensor *dst0,
45  ITensor *dst1,
46  PoolingLayerInfo &pool_info,
47  const Window &window_src,
48  const Window &window)
49 {
50  ARM_COMPUTE_UNUSED(dst1);
51 
52  const int window_start_x = window.x().start();
53  const int window_end_x = window.x().end();
54  const int window_step_x = 16;
55  const int window_half_step_x = window_step_x / 2;
56 
57  Window window_out = window;
58  window_out.set(Window::DimX, Window::Dimension(0, 1, 1));
59 
60  Iterator in(src, window_src);
61  Iterator out(dst0, window_out);
62 
63  using q8x8_t = typename wrapper::traits::neon_vector<T, 8>::type;
64  using q8x16_t = typename wrapper::traits::neon_vector<T, 16>::type;
65  using q16_t = typename wrapper::traits::promote_t<T>;
66  using q16x8_t = typename wrapper::traits::neon_vector<q16_t, 8>::type;
67  using q32_t = typename wrapper::traits::promote_t<q16_t>;
68  using q32x4_t = typename wrapper::traits::neon_vector<q32_t, 4>::type;
69 
70  const int pool_size_x = pool_info.is_global_pooling ? src->info()->tensor_shape().y() : pool_info.pool_size.width;
71  const int pool_size_y = pool_info.is_global_pooling ? src->info()->tensor_shape().z() : pool_info.pool_size.height;
72  const int pool_pad_right = pool_info.pad_stride_info.pad_right();
73  const int pool_pad_top = pool_info.pad_stride_info.pad_top();
74  const int pool_pad_left = pool_info.pad_stride_info.pad_left();
75  const int pool_pad_bottom = pool_info.pad_stride_info.pad_bottom();
76 
77  int pool_stride_x = 0;
78  int pool_stride_y = 0;
79  std::tie(pool_stride_x, pool_stride_y) = pool_info.pad_stride_info.stride();
80  const int upper_bound_w = src->info()->dimension(1) + (pool_info.exclude_padding ? 0 : pool_pad_right);
81  const int upper_bound_h = src->info()->dimension(2) + (pool_info.exclude_padding ? 0 : pool_pad_bottom);
82 
83  const float32x4_t half_scale_v = vdupq_n_f32(0.5f);
84  const UniformQuantizationInfo src_qinfo = src->info()->quantization_info().uniform();
85  const UniformQuantizationInfo dst_qinfo = dst0->info()->quantization_info().uniform();
86 
87  const float quant_rescale = dst_qinfo.scale / src_qinfo.scale;
88  // "new_offset" doesn't have to consider the "half_scale_v" in its computation
89  // With a requantization performed in a single step there won't be uncertainties introduced
90  const int32_t new_offset =
91  dst_qinfo.offset - static_cast<int32_t>(static_cast<float>(src_qinfo.offset) / quant_rescale);
92 
93  const float requant_scale = dst_qinfo.scale / src_qinfo.scale;
94  const int32_t requant_offset =
95  dst_qinfo.offset - static_cast<int32_t>(static_cast<float>(src_qinfo.offset) / requant_scale);
96  const UniformQuantizationInfo requant_qinfo = UniformQuantizationInfo(requant_scale, requant_offset);
97 
99  window_out,
100  [&](const Coordinates &id)
101  {
102  const int idx_width = id.y() * pool_stride_x;
103  const int idx_height = id.z() * pool_stride_y;
104  const int pool_limit_y = pool_pad_top - idx_height;
105  const int pool_limit_x = pool_pad_left - idx_width;
106 
107  const int pool_start_y = std::max(0, window_src.z().start() + pool_limit_y);
108  const int pool_end_y = std::min(pool_size_y, window_src.z().end() + pool_limit_y);
109  const int pool_start_x = std::max(0, window_src.y().start() + pool_limit_x);
110  const int pool_end_x = std::min(pool_size_x, window_src.y().end() + pool_limit_x);
111 
112  int x_off = window_start_x;
113  for (; x_off <= (window_end_x - window_step_x); x_off += window_step_x)
114  {
115  if (pool_info.pool_type != PoolingType::MAX)
116  {
117  q32x4_t vres1 = wrapper::vdup_n(static_cast<q32_t>(0.f), wrapper::traits::vector_128_tag{});
118  q32x4_t vres2 = wrapper::vdup_n(static_cast<q32_t>(0.f), wrapper::traits::vector_128_tag{});
119  q32x4_t vres3 = wrapper::vdup_n(static_cast<q32_t>(0.f), wrapper::traits::vector_128_tag{});
120  q32x4_t vres4 = wrapper::vdup_n(static_cast<q32_t>(0.f), wrapper::traits::vector_128_tag{});
121 
122  // Calculate scale
123  const float scale = calculate_avg_scale_pool2d(
124  pool_info.exclude_padding, DataLayout::NHWC, id, pool_size_x, pool_size_y, upper_bound_w,
125  upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
126 
127  // Perform pooling
128  for (int y = pool_start_y; y < pool_end_y; ++y)
129  {
130  for (int x = pool_start_x; x < pool_end_x; ++x)
131  {
132  const q8x16_t data = wrapper::vloadq(
133  reinterpret_cast<const T *>(
134  in.ptr() +
135  (x - pool_pad_left) * static_cast<int>(src->info()->strides_in_bytes().y()) +
136  (y - pool_pad_top) * static_cast<int>(src->info()->strides_in_bytes().z())) +
137  x_off);
138 
139  const q16x8_t data_q16 = wrapper::vmovl(wrapper::vgetlow(data));
140  const q16x8_t data2_q16 = wrapper::vmovl(wrapper::vgethigh(data));
141  vres1 = wrapper::vadd(vres1, wrapper::vmovl(wrapper::vgetlow(data_q16)));
142  vres2 = wrapper::vadd(vres2, wrapper::vmovl(wrapper::vgethigh(data_q16)));
143  vres3 = wrapper::vadd(vres3, wrapper::vmovl(wrapper::vgetlow(data2_q16)));
144  vres4 = wrapper::vadd(vres4, wrapper::vmovl(wrapper::vgethigh(data2_q16)));
145  }
146  }
147 
148  if (src_qinfo != dst_qinfo)
149  {
150  const float32x4x4_t vres = {{
151  vcvtq_f32_q32(vres1),
152  vcvtq_f32_q32(vres2),
153  vcvtq_f32_q32(vres3),
154  vcvtq_f32_q32(vres4),
155  }};
156  const auto requantized_dst =
157  vrequantize_pooling_with_scale<q8x16_t>(vres, quant_rescale, scale, new_offset);
158  // Store result
159  wrapper::vstore(reinterpret_cast<T *>(out.ptr()) + x_off, wrapper::vgetlow(requantized_dst));
160  wrapper::vstore(reinterpret_cast<T *>(out.ptr()) + x_off + 8,
161  wrapper::vgethigh(requantized_dst));
162  }
163  else
164  {
165  const float32x4_t scale_v = vdupq_n_f32(scale);
166  // Divide by scale and add 0.5f to round to nearest instead of rounding towards zero
167  vres1 = vcvtq_q32_f32<q32x4_t>(wrapper::vmla(half_scale_v, vcvtq_f32_q32(vres1), scale_v));
168  vres2 = vcvtq_q32_f32<q32x4_t>(wrapper::vmla(half_scale_v, vcvtq_f32_q32(vres2), scale_v));
169  vres3 = vcvtq_q32_f32<q32x4_t>(wrapper::vmla(half_scale_v, vcvtq_f32_q32(vres3), scale_v));
170  vres4 = vcvtq_q32_f32<q32x4_t>(wrapper::vmla(half_scale_v, vcvtq_f32_q32(vres4), scale_v));
171 
172  const q8x8_t res1 =
174  const q8x8_t res2 =
176  // Store result
177  wrapper::vstore(reinterpret_cast<T *>(out.ptr()) + x_off, res1);
178  wrapper::vstore(reinterpret_cast<T *>(out.ptr()) + x_off + 8, res2);
179  }
180  }
181  else
182  {
183  q8x16_t vres = wrapper::vdup_n(std::numeric_limits<T>::min(), wrapper::traits::vector_128_tag{});
184 
185  for (int y = pool_start_y; y < pool_end_y; ++y)
186  {
187  for (int x = pool_start_x; x < pool_end_x; ++x)
188  {
189  const q8x16_t data = wrapper::vloadq(
190  reinterpret_cast<const T *>(
191  in.ptr() +
192  (x - pool_pad_left) * static_cast<int>(src->info()->strides_in_bytes().y()) +
193  (y - pool_pad_top) * static_cast<int>(src->info()->strides_in_bytes().z())) +
194  x_off);
195  vres = wrapper::vmax(vres, data);
196  }
197  }
198 
199  // Store result
200  wrapper::vstore(reinterpret_cast<T *>(out.ptr()) + x_off,
201  (src_qinfo != dst_qinfo)
202  ? vrequantize_pooling<q8x8_t, q8x16_t>(wrapper::vgetlow(vres),
203  wrapper::vgethigh(vres), requant_qinfo)
204  : vres);
205  }
206  }
207 
208  if (pool_info.pool_type == PoolingType::MAX)
209  {
210  for (; x_off <= (window_end_x - window_half_step_x); x_off += window_half_step_x)
211  {
212  q8x8_t vres = wrapper::vdup_n(std::numeric_limits<T>::min(), wrapper::traits::vector_64_tag{});
213  for (int y = pool_start_y; y < pool_end_y; ++y)
214  {
215  for (int x = pool_start_x; x < pool_end_x; ++x)
216  {
217  const q8x8_t data = wrapper::vload(
218  reinterpret_cast<const T *>(
219  in.ptr() +
220  (x - pool_pad_left) * static_cast<int>(src->info()->strides_in_bytes().y()) +
221  (y - pool_pad_top) * static_cast<int>(src->info()->strides_in_bytes().z())) +
222  x_off);
223  vres = wrapper::vmax(vres, data);
224  }
225  }
226 
227  // Store result
228  wrapper::vstore(reinterpret_cast<T *>(out.ptr()) + x_off,
229  (src_qinfo != dst_qinfo) ? vrequantize_pooling<q8x8_t>(vres, requant_qinfo) : vres);
230  }
231  }
232 
233  // Left-overs loop
234  for (; x_off < window_end_x; ++x_off)
235  {
236  if (pool_info.pool_type != PoolingType::MAX)
237  {
238  q32_t res = static_cast<q32_t>(0.f);
239 
240  // Calculate scale
241  const float scale = calculate_avg_scale_pool2d(
242  pool_info.exclude_padding, DataLayout::NHWC, id, pool_size_x, pool_size_y, upper_bound_w,
243  upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
244 
245  // Perform pooling
246  for (int y = pool_start_y; y < pool_end_y; ++y)
247  {
248  for (int x = pool_start_x; x < pool_end_x; ++x)
249  {
250  const T data =
251  *(reinterpret_cast<const T *>(
252  in.ptr() +
253  (x - pool_pad_left) * static_cast<int>(src->info()->strides_in_bytes().y()) +
254  (y - pool_pad_top) * static_cast<int>(src->info()->strides_in_bytes().z())) +
255  x_off);
256  res += data;
257  }
258  }
259 
260  if (src_qinfo != dst_qinfo)
261  {
262  const float res_f = static_cast<float>(res);
263  const float new_scale = quant_rescale / scale;
264  const auto requantized_dst = quantize<T>(res_f, UniformQuantizationInfo(new_scale, new_offset));
265 
266  // Store result
267  *(reinterpret_cast<T *>(out.ptr()) + x_off) = requantized_dst;
268  }
269  else
270  {
271  // Divide by scale and add 0.5f to round to nearest instead of rounding towards zero
272  res = static_cast<T>(0.5f + static_cast<float>(res) * scale);
273 
274  // Store result
275  *(reinterpret_cast<T *>(out.ptr()) + x_off) = res;
276  }
277  }
278  else
279  {
280  T res = std::numeric_limits<T>::min();
281 
282  for (int y = pool_start_y; y < pool_end_y; ++y)
283  {
284  for (int x = pool_start_x; x < pool_end_x; ++x)
285  {
286  const T data =
287  *(reinterpret_cast<const T *>(
288  in.ptr() +
289  (x - pool_pad_left) * static_cast<int>(src->info()->strides_in_bytes().y()) +
290  (y - pool_pad_top) * static_cast<int>(src->info()->strides_in_bytes().z())) +
291  x_off);
292  res = std::max(res, data);
293  }
294  }
295 
296  // Store result
297  if (src_qinfo != dst_qinfo)
298  {
299  const float res_f = static_cast<float>(res);
300  *(reinterpret_cast<T *>(out.ptr()) + x_off) = quantize<T>(res_f, requant_qinfo);
301  }
302  else
303  {
304  *(reinterpret_cast<T *>(out.ptr()) + x_off) = res;
305  }
306  }
307  }
308  },
309  in, out);
310 }
311 
312 #if defined(ENABLE_NCHW_KERNELS)
313 template <typename T, typename TVec>
314 inline void scale_vector_q16x8(bool exclude_padding,
315  TVec &v,
316  const Coordinates &id,
317  int id_offset,
318  int step,
319  const int pool_size,
320  const int upper_bound_w,
321  const int upper_bound_h,
322  const int pad_x,
323  const int pad_y,
324  const int stride_x,
325  const int stride_y)
326 {
327  int start_x = (id.x() + id_offset) * stride_x - pad_x;
328  int start_y = id.y() * stride_y - pad_y;
329  const int end_y = std::min(start_y + pool_size, upper_bound_h);
330  if (exclude_padding)
331  {
332  start_y = std::max(0, start_y);
333  }
334 
335  std::array<T, 8> elems = {{
336  wrapper::vgetlane(v, 0),
337  wrapper::vgetlane(v, 1),
338  wrapper::vgetlane(v, 2),
339  wrapper::vgetlane(v, 3),
340  wrapper::vgetlane(v, 4),
341  wrapper::vgetlane(v, 5),
342  wrapper::vgetlane(v, 6),
343  wrapper::vgetlane(v, 7),
344  }};
345 
346  for (auto &el : elems)
347  {
348  int c_start_x = start_x;
349  const int end_x = std::min(c_start_x + pool_size, upper_bound_w);
350  if (exclude_padding)
351  {
352  c_start_x = std::max(0, c_start_x);
353  }
354  float scale = 1.f / ((end_y - start_y) * (end_x - c_start_x));
355  el *= scale;
356  start_x += step * stride_x;
357  }
358 
359  v = wrapper::vsetlane(elems[0], v, 0);
360  v = wrapper::vsetlane(elems[1], v, 1);
361  v = wrapper::vsetlane(elems[2], v, 2);
362  v = wrapper::vsetlane(elems[3], v, 3);
363  v = wrapper::vsetlane(elems[4], v, 4);
364  v = wrapper::vsetlane(elems[5], v, 5);
365  v = wrapper::vsetlane(elems[6], v, 6);
366  v = wrapper::vsetlane(elems[7], v, 7);
367 }
368 
369 template <typename T>
370 auto load16_boundary_aware(
371  int srcw, int srch, int pad_l, int pad_r, int pad_t, int pad_b, int x, int y, const T *ptr, T fval)
372 {
373  ARM_COMPUTE_UNUSED(pad_b, pad_r);
374  T vec[16];
375  //handle reading a row out of the tensor
376  const bool row_in_bounds((y >= pad_t) && (y < (srch + pad_t)));
377  for (int i = 0; i < 16; i++)
378  {
379  if (row_in_bounds && (x + i >= pad_l) && (x + i < (srcw + pad_l)))
380  {
381  vec[i] = *(ptr + i);
382  }
383  else
384  {
385  vec[i] = fval;
386  }
387  }
388  return wrapper::vloadq(vec);
389 }
390 
391 template <typename T, typename V, bool deinterleave>
392 inline void write16_boundary_aware(int x, int dst_w, const V &lower, const V &upper, T *ptr)
393 {
394  if (deinterleave)
395  {
396  for (int i = 0; i < 8 && (i * 2 + x) < dst_w; ++i)
397  {
398  *(ptr + i * 2) = lower[i];
399  }
400  for (int i = 0; i < 8 && (i * 2 + x + 1) < dst_w; ++i)
401  {
402  *(ptr + 1 + i * 2) = upper[i];
403  }
404  }
405  else
406  {
407  for (int i = 0; i < 8 && (i + x) < dst_w; ++i)
408  {
409  *(ptr + i) = lower[i];
410  }
411  for (int i = 0; i < 8 && (i + x + 8) < dst_w; ++i)
412  {
413  *(ptr + i + 8) = upper[i];
414  }
415  }
416 }
417 
418 template <typename T, typename V>
419 inline void write8_boundary_aware(int x, int dst_w, const V &v, T *ptr)
420 {
421  for (int i = 0; i < 8 && (i + x) < dst_w; ++i)
422  {
423  *(ptr + i) = v[i];
424  }
425 }
426 
427 template <typename T>
428 void pooling2_quantized_neon_nchw(const ITensor *src,
429  ITensor *dst0,
430  ITensor *dst1,
431  PoolingLayerInfo &pool_info,
432  const Window &window_src,
433  const Window &window)
434 {
435  ARM_COMPUTE_UNUSED(dst1);
436  Iterator in(src, window_src);
437  Iterator out(dst0, window);
438 
439  /** SIMD vector types */
440  using q8x8_t = typename wrapper::traits::neon_vector<T, 8>::type;
441  using q8x16_t = typename wrapper::traits::neon_vector<T, 16>::type;
442  using q16_t = typename wrapper::traits::promote_t<T>;
443  using q16x4_t = typename wrapper::traits::neon_vector<q16_t, 4>::type;
444  using q16x8_t = typename wrapper::traits::neon_vector<q16_t, 8>::type;
445  using q16x8x2_t = typename wrapper::traits::neon_vector<q16_t, 16>::type;
446 
447  constexpr int pool_size = 2;
448  int pool_stride_x = 0;
449  int pool_stride_y = 0;
450  const int pool_pad_right = pool_info.pad_stride_info.pad_right();
451  const int pool_pad_top = pool_info.pad_stride_info.pad_top();
452  const int pool_pad_left = pool_info.pad_stride_info.pad_left();
453  const int pool_pad_bottom = pool_info.pad_stride_info.pad_bottom();
454  std::tie(pool_stride_x, pool_stride_y) = pool_info.pad_stride_info.stride();
455  const int upper_bound_w = src->info()->dimension(0) + (pool_info.exclude_padding ? 0 : pool_pad_right);
456  const int upper_bound_h = src->info()->dimension(1) + (pool_info.exclude_padding ? 0 : pool_pad_bottom);
457  const T *const src_top_ptr = reinterpret_cast<const T *>(
458  src->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top))));
459  const T *const src_bottom_ptr = reinterpret_cast<const T *>(
460  src->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1)));
461  const int scale_step_x = (pool_stride_x == 1) ? 2 : 1;
462  const UniformQuantizationInfo src_qinfo = src->info()->quantization_info().uniform();
463  const UniformQuantizationInfo dst_qinfo = dst0->info()->quantization_info().uniform();
464  const bool have_different_qinfo = src_qinfo != dst_qinfo;
465 
466  const float requant_scale = dst_qinfo.scale / src_qinfo.scale;
467  const int32_t requant_offset =
468  dst_qinfo.offset - static_cast<int32_t>(static_cast<float>(src_qinfo.offset) / requant_scale);
469  const UniformQuantizationInfo requant_qinfo = UniformQuantizationInfo(requant_scale, requant_offset);
470  const int src_w = src->info()->dimension(0);
471  const int src_h = src->info()->dimension(1);
472  const int dst_w = dst0->info()->dimension(0);
473 
474  const T fill_value = (pool_info.pool_type == PoolingType::MAX) ? std::numeric_limits<T>::min() : T(0);
475 
477  window,
478  [&](const Coordinates &id)
479  {
480  const auto x_val = id.x() * pool_stride_x;
481  const auto y_val_0 = id.y() * pool_stride_y;
482  const auto y_val_1 = (id.y() * pool_stride_y) + 1;
483 
484  auto top_data =
485  load16_boundary_aware(src_w, src_h, pool_pad_left, pool_pad_right, pool_pad_top, pool_pad_bottom, x_val,
486  y_val_0, reinterpret_cast<const T *>(src_top_ptr + in.offset()), fill_value);
487  auto bottom_data =
488  load16_boundary_aware(src_w, src_h, pool_pad_left, pool_pad_right, pool_pad_top, pool_pad_bottom, x_val,
489  y_val_1, reinterpret_cast<const T *>(src_bottom_ptr + in.offset()), fill_value);
490 
491  q8x8_t lower_res = {};
492  q8x8_t upper_res = {};
493 
494  if (pool_info.pool_type != PoolingType::MAX)
495  {
496  const q16x8x2_t top_data_q16 = {
498  const q16x8x2_t bottom_data_q16 = {
499  {wrapper::vmovl(wrapper::vgetlow(bottom_data)), wrapper::vmovl(wrapper::vgethigh(bottom_data))}};
500 
501  // Add rows
502  const q16x8x2_t vrsum = {{
503  wrapper::vadd(top_data_q16.val[0], bottom_data_q16.val[0]),
504  wrapper::vadd(top_data_q16.val[1], bottom_data_q16.val[1]),
505  }};
506 
507  // Pair-wise add row data
508  const q16x4_t vpsum_1 = wrapper::vpadd(wrapper::vgetlow(vrsum.val[0]), wrapper::vgethigh(vrsum.val[0]));
509  const q16x4_t vpsum_2 = wrapper::vpadd(wrapper::vgetlow(vrsum.val[1]), wrapper::vgethigh(vrsum.val[1]));
510 
511  q16x8_t res_lower = wrapper::vcombine(vpsum_1, vpsum_2);
512 
513  // Scale lower result
514  scale_vector_q16x8<q16_t, q16x8_t>(pool_info.exclude_padding, res_lower, id, 0, scale_step_x, pool_size,
515  upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top,
516  pool_stride_x, pool_stride_y);
517  lower_res = wrapper::vmovn(res_lower);
518 
519  // Compute upper result for stride_x == 1
520  if (pool_stride_x == 1)
521  {
522  // Shifted row sum
523  const q16x8x2_t vrsum_shifted = {
524  {wrapper::vext_1(vrsum.val[0], vrsum.val[1]), wrapper::vext_1(vrsum.val[1], vrsum.val[1])}};
525 
526  // Pair-wise add shifted row
527  q16x8_t res_upper = wrapper::vcombine(
528  wrapper::vpadd(wrapper::vgetlow(vrsum_shifted.val[0]), wrapper::vgethigh(vrsum_shifted.val[0])),
529  wrapper::vpadd(wrapper::vgetlow(vrsum_shifted.val[1]),
530  wrapper::vgethigh(vrsum_shifted.val[1])));
531 
532  // Scale upper result
533  scale_vector_q16x8<q16_t, q16x8_t>(pool_info.exclude_padding, res_upper, id, 1, 2, pool_size,
534  upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top,
535  pool_stride_x, pool_stride_y);
536  upper_res = wrapper::vmovn(res_upper);
537  }
538  }
539  else
540  {
541  const q8x16_t max_data = wrapper::vmax(top_data, bottom_data);
542  lower_res = wrapper::vpmax(wrapper::vgetlow(max_data), wrapper::vgethigh(max_data));
543  if (pool_stride_x == 1)
544  {
545  const q8x16_t max_data_shifted = wrapper::vext_1(max_data, max_data);
546  upper_res = wrapper::vpmax(wrapper::vgetlow(max_data_shifted), wrapper::vgethigh(max_data_shifted));
547  }
548  }
549 
550  if (have_different_qinfo)
551  {
552  const auto requantized_dst = vrequantize_pooling<q8x8_t, q8x16_t>(lower_res, upper_res, requant_qinfo);
553  lower_res = wrapper::vgetlow(requantized_dst);
554  upper_res = wrapper::vgethigh(requantized_dst);
555  }
556  auto out_ptr = reinterpret_cast<T *>(out.ptr());
557  // Store result
558  if (pool_stride_x == 1)
559  {
560  write16_boundary_aware<T, q8x8_t, true>(id.x(), dst_w, lower_res, upper_res, out_ptr);
561  }
562  else
563  {
564  write8_boundary_aware<T, q8x8_t>(id.x(), dst_w, lower_res, out_ptr);
565  }
566  },
567  in, out);
568 }
569 
570 template <typename T>
571 void pooling3_quantized_neon_nchw(const ITensor *src,
572  ITensor *dst0,
573  ITensor *dst1,
574  PoolingLayerInfo &pool_info,
575  const Window &window_src,
576  const Window &window)
577 {
578  ARM_COMPUTE_UNUSED(dst1);
579  Iterator in(src, window_src);
580  Iterator out(dst0, window);
581 
582  /** SIMD vector types */
583  using q8x8_t = typename wrapper::traits::neon_vector<T, 8>::type;
584  using q8x16_t = typename wrapper::traits::neon_vector<T, 16>::type;
585  using q8x8x2_t = typename std::conditional<std::is_same<T, uint8_t>::value, uint8x8x2_t, int8x8x2_t>::type;
586  using q16_t = typename wrapper::traits::promote_t<T>;
587  using q16x8_t = typename wrapper::traits::neon_vector<q16_t, 8>::type;
588  using q16x8x2_t = typename wrapper::traits::neon_vector<q16_t, 16>::type;
589 
590  constexpr int pool_size = 3;
591  const int pool_pad_right = pool_info.pad_stride_info.pad_right();
592  const int pool_pad_top = pool_info.pad_stride_info.pad_top();
593  const int pool_pad_left = pool_info.pad_stride_info.pad_left();
594  const int pool_pad_bottom = pool_info.pad_stride_info.pad_bottom();
595  int pool_stride_x = 0;
596  int pool_stride_y = 0;
597  std::tie(pool_stride_x, pool_stride_y) = pool_info.pad_stride_info.stride();
598  const int upper_bound_w = src->info()->dimension(0) + (pool_info.exclude_padding ? 0 : pool_pad_right);
599  const int upper_bound_h = src->info()->dimension(1) + (pool_info.exclude_padding ? 0 : pool_pad_bottom);
600 
601  const UniformQuantizationInfo &src_qinfo = src->info()->quantization_info().uniform();
602  const UniformQuantizationInfo &dst_qinfo = dst0->info()->quantization_info().uniform();
603 
604  const float requant_scale = dst_qinfo.scale / src_qinfo.scale;
605  const int32_t requant_offset =
606  dst_qinfo.offset - static_cast<int32_t>(static_cast<float>(src_qinfo.offset) / requant_scale);
607  const UniformQuantizationInfo requant_qinfo = UniformQuantizationInfo(requant_scale, requant_offset);
608 
609  const T *const src_top_ptr = reinterpret_cast<const T *>(
610  src->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top))));
611  const T *const src_middle_ptr = reinterpret_cast<const T *>(
612  src->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1)));
613  const T *const src_bottom_ptr = reinterpret_cast<const T *>(
614  src->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 2)));
615 
616  const int src_w = src->info()->dimension(0);
617  const int src_h = src->info()->dimension(1);
618  const T fill_value = (pool_info.pool_type == PoolingType::AVG) ? T(0) : std::numeric_limits<T>::min();
619  const int dst_w = dst0->info()->dimension(0);
620 
622  window,
623  [&](const Coordinates &id)
624  {
625  const auto x_val = id.x() * pool_stride_x;
626  const auto y_val_0 = id.y() * pool_stride_y;
627  const auto y_val_1 = (id.y() * pool_stride_y) + 1;
628  const auto y_val_2 = (id.y() * pool_stride_y) + 2;
629 
630  auto top_data =
631  load16_boundary_aware(src_w, src_h, pool_pad_left, pool_pad_right, pool_pad_top, pool_pad_bottom, x_val,
632  y_val_0, reinterpret_cast<const T *>(src_top_ptr + in.offset()), fill_value);
633  auto middle_data =
634  load16_boundary_aware(src_w, src_h, pool_pad_left, pool_pad_right, pool_pad_top, pool_pad_bottom, x_val,
635  y_val_1, reinterpret_cast<const T *>(src_middle_ptr + in.offset()), fill_value);
636  auto bottom_data =
637  load16_boundary_aware(src_w, src_h, pool_pad_left, pool_pad_right, pool_pad_top, pool_pad_bottom, x_val,
638  y_val_2, reinterpret_cast<const T *>(src_bottom_ptr + in.offset()), fill_value);
639 
640  q8x8_t fres = {};
641  q8x16_t fqres = {};
642 
643  if (pool_info.pool_type == PoolingType::AVG)
644  {
645  // Convert data to u16
646  const q16x8x2_t top_data_q16 = {
648  const q16x8x2_t middle_data_q16 = {
649  {wrapper::vmovl(wrapper::vgetlow(middle_data)), wrapper::vmovl(wrapper::vgethigh(middle_data))}};
650  const q16x8x2_t bottom_data_q16 = {
651  {wrapper::vmovl(wrapper::vgetlow(bottom_data)), wrapper::vmovl(wrapper::vgethigh(bottom_data))}};
652 
653  // Calculate row sums
654  const q16x8x2_t vrsum = {{
655  wrapper::vadd(wrapper::vadd(top_data_q16.val[0], bottom_data_q16.val[0]), middle_data_q16.val[0]),
656  wrapper::vadd(wrapper::vadd(top_data_q16.val[1], bottom_data_q16.val[1]), middle_data_q16.val[1]),
657  }};
658  const q16x8x2_t vrsum_shifted_1 = {
659  {wrapper::vext_1(vrsum.val[0], vrsum.val[1]), wrapper::vext_1(vrsum.val[1], vrsum.val[1])}};
660  const q16x8x2_t vrsum_shifted_2 = {
661  {wrapper::vext_2(vrsum.val[0], vrsum.val[1]), wrapper::vext_2(vrsum.val[1], vrsum.val[1])}};
662  // Calculate final sum
663  q16x8x2_t final_sum = {{
664  wrapper::vadd(wrapper::vadd(vrsum.val[0], vrsum_shifted_1.val[0]), vrsum_shifted_2.val[0]),
665  wrapper::vadd(wrapper::vadd(vrsum.val[1], vrsum_shifted_1.val[1]), vrsum_shifted_2.val[1]),
666  }};
667  if (pool_stride_x == 2)
668  {
669  q16x8_t res = {
670  wrapper::vgetlane(final_sum.val[0], 0), wrapper::vgetlane(final_sum.val[0], 2),
671  wrapper::vgetlane(final_sum.val[0], 4), wrapper::vgetlane(final_sum.val[0], 6),
672  wrapper::vgetlane(final_sum.val[1], 0), wrapper::vgetlane(final_sum.val[1], 2),
673  wrapper::vgetlane(final_sum.val[1], 4), wrapper::vgetlane(final_sum.val[1], 6),
674  };
675 
676  scale_vector_q16x8<q16_t, q16x8_t>(pool_info.exclude_padding, res, id, 0, 1, pool_size,
677  upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top,
678  pool_stride_x, pool_stride_y);
679  fres = wrapper::vmovn(res);
680  }
681  else
682  {
683  // Scale lower result
684  scale_vector_q16x8<q16_t, q16x8_t>(pool_info.exclude_padding, final_sum.val[0], id, 0, 1, pool_size,
685  upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top,
686  pool_stride_x, pool_stride_y);
687  // Scale lower result
688  scale_vector_q16x8<q16_t, q16x8_t>(pool_info.exclude_padding, final_sum.val[1], id, 8, 1, pool_size,
689  upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top,
690  pool_stride_x, pool_stride_y);
691  fqres = wrapper::vcombine(wrapper::vmovn(final_sum.val[0]), wrapper::vmovn(final_sum.val[1]));
692  }
693  }
694  else
695  {
696  const q8x16_t max_data = wrapper::vmax(wrapper::vmax(top_data, bottom_data), middle_data);
697  const q8x16_t max_data_shift1 = wrapper::vext_1(max_data, max_data);
698  const q8x16_t max_data_shift2 = wrapper::vext_2(max_data, max_data);
699  const q8x16_t final_max = wrapper::vmax(wrapper::vmax(max_data, max_data_shift1), max_data_shift2);
700 
701  if (pool_stride_x == 2)
702  {
703  const q8x8x2_t table = {{wrapper::vgetlow(final_max), wrapper::vgethigh(final_max)}};
704  static const q8x8_t lookup_val = {0, 2, 4, 6, 8, 10, 12, 14};
705  fres = wrapper::vtbl(table, lookup_val);
706  }
707  else
708  {
709  fqres = final_max;
710  }
711  }
712 
713  // Store result
714  if (pool_stride_x == 1)
715  {
716  if (src_qinfo != dst_qinfo)
717  {
718  fqres = vrequantize_pooling<q8x8_t, q8x16_t>(wrapper::vgetlow(fqres), wrapper::vgethigh(fqres),
719  requant_qinfo);
720  }
721  write16_boundary_aware<T, q8x8_t, false>(id.x(), dst_w, wrapper::vgetlow(fqres),
722  wrapper::vgethigh(fqres), reinterpret_cast<T *>(out.ptr()));
723  }
724  else
725  {
726  if (src_qinfo != dst_qinfo)
727  {
728  fres = vrequantize_pooling<q8x8_t>(fres, requant_qinfo);
729  }
730  write8_boundary_aware<T, q8x8_t>(id.x(), dst_w, fres, reinterpret_cast<T *>(out.ptr()));
731  }
732  },
733  in, out);
734 }
735 
736 template <typename T>
737 void poolingMxN_quantized_neon_nchw(const ITensor *src,
738  ITensor *dst0,
739  ITensor *dst1,
740  PoolingLayerInfo &pool_info,
741  const Window &window_src,
742  const Window &window)
743 {
744  ARM_COMPUTE_UNUSED(dst1);
745  Iterator in(src, window_src);
746  Iterator out(dst0, window);
747 
748  /** SIMD vector types */
749  using q16_t = typename wrapper::traits::promote_t<T>;
750  using q32_t = typename wrapper::traits::promote_t<q16_t>;
751 
752  const int pool_size_x = pool_info.is_global_pooling ? src->info()->tensor_shape().x() : pool_info.pool_size.width;
753  const int pool_size_y = pool_info.is_global_pooling ? src->info()->tensor_shape().y() : pool_info.pool_size.height;
754  const int pool_pad_right = pool_info.pad_stride_info.pad_right();
755  const int pool_pad_top = pool_info.pad_stride_info.pad_top();
756  const int pool_pad_left = pool_info.pad_stride_info.pad_left();
757  const int pool_pad_bottom = pool_info.pad_stride_info.pad_bottom();
758  int pool_stride_x = 0;
759  int pool_stride_y = 0;
760  std::tie(pool_stride_x, pool_stride_y) = pool_info.pad_stride_info.stride();
761  const int upper_bound_w = src->info()->dimension(0) + (pool_info.exclude_padding ? 0 : pool_pad_right);
762  const int upper_bound_h = src->info()->dimension(1) + (pool_info.exclude_padding ? 0 : pool_pad_bottom);
763 
764  const UniformQuantizationInfo &src_qinfo = src->info()->quantization_info().uniform();
765  const UniformQuantizationInfo &dst_qinfo = dst0->info()->quantization_info().uniform();
766  const int src_w = src->info()->dimension(0);
767  const int src_h = src->info()->dimension(1);
768  const T fill_value = (pool_info.pool_type == PoolingType::AVG) ? T(0) : std::numeric_limits<T>::min();
769  const int stridex_in_bytes = static_cast<int>(src->info()->strides_in_bytes().x());
770  const int stridey_in_bytes = static_cast<int>(src->info()->strides_in_bytes().y());
771 
773  window,
774  [&](const Coordinates &id)
775  {
776  T res = std::numeric_limits<T>::min();
777 
778  if (pool_info.pool_type != PoolingType::MAX)
779  {
780  q32_t sres = 0;
781 
782  // Calculate scale
783  const float scale = calculate_avg_scale_pool2d(
784  pool_info.exclude_padding, DataLayout::NCHW, id, pool_size_x, pool_size_y, upper_bound_w,
785  upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
786 
787  // Perform pooling
788  for (int y = 0; y < pool_size_y; ++y)
789  {
790  for (int x = 0; x < pool_size_x; ++x)
791  {
792  const auto in_ptr = reinterpret_cast<const T *>(
793  in.ptr() + (x - pool_pad_left) * stridex_in_bytes + (y - pool_pad_top) * stridey_in_bytes);
794 
795  const int idx = x + id.x() * pool_stride_x - pool_pad_left;
796  const int idy = y + id.y() * pool_stride_y - pool_pad_top;
797  const T data = (idx < 0 || idy < 0 || idx >= src_w || idy >= src_h) ? fill_value : *in_ptr;
798  sres += data;
799  }
800  }
801  // Divide by scale
802  res = static_cast<T>(support::cpp11::round(sres * scale));
803  }
804  else
805  {
806  for (int y = 0; y < pool_size_y; ++y)
807  {
808  for (int x = 0; x < pool_size_x; ++x)
809  {
810  const auto in_ptr = reinterpret_cast<const T *>(
811  in.ptr() + (x - pool_pad_left) * stridex_in_bytes + (y - pool_pad_top) * stridey_in_bytes);
812 
813  const int idx = x + id.x() * pool_stride_x - pool_pad_left;
814  const int idy = y + id.y() * pool_stride_y - pool_pad_top;
815  const T data = (idx < 0 || idy < 0 || idx >= src_w || idy >= src_h) ? fill_value : *in_ptr;
816  res = std::max(res, data);
817  }
818  }
819  }
820  // Store result
821  res = (src_qinfo != dst_qinfo) ? Qasymm8QuantizationHelper<T>::quantize(
822  Qasymm8QuantizationHelper<T>::dequantize(res, src_qinfo), dst_qinfo)
823  : res;
824  *(reinterpret_cast<T *>(out.ptr())) = res;
825  },
826  in, out);
827 }
828 #endif /* defined(ENABLE_NCHW_KERNELS) */
829 } // namespace cpu
830 } // namespace arm_compute
831 
832 #endif // SRC_CORE_NEON_KERNELS_QUANTIZED_H
arm_compute::DataLayout::NCHW
@ NCHW
Num samples, channels, height, width.
arm_compute::wrapper::vadd
uint8x8_t vadd(const uint8x8_t &a, const uint8x8_t &b)
Definition: add.h:39
arm_compute::Window::Dimension::start
constexpr int start() const
Return the start of the dimension.
Definition: Window.h:96
arm_compute::UniformQuantizationInfo::offset
int32_t offset
Definition: QuantizationInfo.h:63
arm_compute::test::validation::src
SimpleTensor< float > src
Definition: DFT.cpp:155
type
decltype(strategy::transforms) typedef type
Definition: gemm_interleaved.hpp:261
arm_compute::test::validation::idx_height
const int idx_height
Definition: Scale.cpp:263
arm_compute::wrapper::vmovl
uint16x8_t vmovl(const uint8x8_t &a)
Definition: movl.h:39
arm_compute::PadStrideInfo::pad_right
unsigned int pad_right() const
Get the right padding.
Definition: CoreTypes.h:223
arm_compute::DataLayout::NHWC
@ NHWC
Num samples, height, width, channels.
arm_compute::support::cpp11::round
T round(T value)
Round floating-point value with half value rounding away from zero.
Definition: ToolchainSupport.h:185
arm_compute::PoolingLayerInfo::exclude_padding
bool exclude_padding
Definition: Types.h:1148
Types.h
arm_compute::Window::DimX
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Definition: Window.h:43
arm_compute::wrapper::vgetlane
uint8_t vgetlane(const uint8x8_t vector, const unsigned int lane)
Definition: getlane.h:91
arm_compute::wrapper::vmovn
uint32x2_t vmovn(const uint64x2_t &a)
Definition: movn.h:39
arm_compute::Size2D::height
size_t height
Height of the image region or rectangle.
Definition: Size2D.h:89
arm_compute::ITensor
Interface for CPU tensor.
Definition: ITensor.h:36
NEAsymm.h
arm_compute::PoolingLayerInfo::pool_size
Size2D pool_size
Definition: Types.h:1145
arm_compute::test::validation::idx_width
const int idx_width
Definition: Scale.cpp:262
arm_compute::wrapper::vloadq
uint8x16_t vloadq(const uint8_t *ptr)
Definition: load.h:58
arm_compute::UniformQuantizationInfo
Quantization info when assuming per layer quantization.
Definition: QuantizationInfo.h:42
wrapper.h
Includes all wrapper headers at once.
NEMath.h
arm_compute::wrapper::vpadd
uint8x8_t vpadd(const uint8x8_t &a, const uint8x8_t &b)
Definition: add.h:187
arm_compute::wrapper::traits::neon_vector
Create the appropriate SIMD vector given its type and size in terms of elements.
Definition: traits.h:57
arm_compute::ITensor::info
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
arm_compute::Iterator::ptr
constexpr uint8_t * ptr() const
Return a pointer to the current pixel.
Definition: Helpers.inl:147
arm_compute::Qasymm8QuantizationHelper::quantize
static QUANTIZED_TYPE quantize(float value, const UniformQuantizationInfo &qinfo)
Quantize a value given a 8-bit asymmetric quantization scheme.
Definition: QuantizationInfo.h:214
arm_compute::Size2D::width
size_t width
Width of the image region or rectangle.
Definition: Size2D.h:88
arm_compute::execute_window_loop
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&...iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
Definition: Helpers.inl:74
arm_compute::Iterator
Iterator updated by execute_window_loop for each window element.
Definition: Helpers.h:46
arm_compute::PoolingType::AVG
@ AVG
Average Pooling.
arm_compute::PoolingLayerInfo
Pooling Layer Information struct.
Definition: Types.h:1043
arm_compute::wrapper::vcombine
uint8x16_t vcombine(const uint8x8_t &a, const uint8x8_t &b)
Definition: combine.h:39
arm_compute::Window::y
constexpr const Dimension & y() const
Alias to access the second dimension of the window.
Definition: Window.h:167
arm_compute::QuantizationInfo::uniform
UniformQuantizationInfo uniform() const
Return per layer quantization info.
Definition: QuantizationInfo.h:140
arm_compute::wrapper::traits::vector_64_tag
64-bit vector tag
Definition: traits.h:52
arm_compute::Qasymm8QuantizationHelper::dequantize
static float dequantize(QUANTIZED_TYPE value, const UniformQuantizationInfo &qinfo)
Dequantize a value given a 8-bit asymmetric quantization scheme.
Definition: QuantizationInfo.h:266
arm_compute::wrapper::vmla
uint8x8_t vmla(const uint8x8_t &a, const uint8x8_t &b, const uint8x8_t &c)
Definition: mla.h:46
ARM_COMPUTE_UNUSED
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:151
arm_compute::Coordinates
Coordinates of an item.
Definition: Coordinates.h:37
arm_compute::Window::Dimension
Describe one of the image's dimensions with a start, end and step.
Definition: Window.h:79
arm_compute::wrapper::vsetlane
uint8x8_t vsetlane(const uint8_t value, const uint8x8_t vector, const unsigned int lane)
Definition: setlane.h:91
arm_compute::Window::set
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
Definition: Window.inl:53
arm_compute::wrapper::vtbl
uint8x8_t vtbl(const uint8x8x2_t &a, const uint8x8_t &b)
Definition: tbl.h:39
arm_compute::ITensorInfo::quantization_info
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
arm_compute::PadStrideInfo::pad_left
unsigned int pad_left() const
Get the left padding.
Definition: CoreTypes.h:218
arm_compute::wrapper::vmax
uint8x8_t vmax(const uint8x8_t &a, const uint8x8_t &b)
Definition: max.h:39
arm_compute::wrapper::vext_1
uint8x8_t vext_1(uint8x8_t value_a, uint8x8_t value_b)
Definition: ext.h:39
arm_compute::UniformQuantizationInfo::scale
float scale
Definition: QuantizationInfo.h:62
arm_compute::wrapper::traits::vector_128_tag
128-bit vector tag
Definition: traits.h:54
arm_compute::PoolingType::MAX
@ MAX
Max Pooling.
arm_compute::wrapper::vgetlow
uint8x8_t vgetlow(const uint8x16_t val)
Definition: getlow.h:39
arm_compute::PadStrideInfo::pad_bottom
unsigned int pad_bottom() const
Get the bottom padding.
Definition: CoreTypes.h:233
arm_compute::wrapper::vstore
void vstore(uint8_t *ptr, uint8x8_t val)
Definition: store.h:39
arm_compute::PoolingLayerInfo::is_global_pooling
bool is_global_pooling
Definition: Types.h:1149
arm_compute::Window
Describe a multidimensional execution window.
Definition: Window.h:39
arm_compute::test::validation::scale
NEScale scale
Definition: Scale.cpp:272
arm_compute
Copyright (c) 2017-2023 Arm Limited.
Definition: introduction.dox:24
arm_compute::wrapper::vpmax
uint8x8_t vpmax(const uint8x8_t &a, const uint8x8_t &b)
Definition: pmax.h:39
arm_compute::wrapper::vload
uint8x8_t vload(const uint8_t *ptr)
Definition: load.h:39
arm_compute::cpu::poolingMxN_q8_neon_nhwc
void poolingMxN_q8_neon_nhwc(const ITensor *src, ITensor *dst0, ITensor *dst1, PoolingLayerInfo &pool_info, const Window &window_src, const Window &window)
Definition: quantized.h:43
arm_compute::cpu::step
constexpr int step
Definition: fp32.cpp:35
arm_compute::wrapper::traits::promote_t
typename promote< T >::type promote_t
Get promoted type.
Definition: traits.h:156
arm_compute::Window::Dimension::end
constexpr int end() const
Return the end of the dimension.
Definition: Window.h:101
arm_compute::PoolingLayerInfo::pad_stride_info
PadStrideInfo pad_stride_info
Definition: Types.h:1147
Traits.h
arm_compute::PoolingLayerInfo::pool_type
PoolingType pool_type
Definition: Types.h:1144
arm_compute::wrapper::vgethigh
uint8x8_t vgethigh(const uint8x16_t val)
Definition: gethigh.h:39
arm_compute::Window::x
constexpr const Dimension & x() const
Alias to access the first dimension of the window.
Definition: Window.h:158
PoolingHelpers.h
arm_compute::PadStrideInfo::pad_top
unsigned int pad_top() const
Get the top padding.
Definition: CoreTypes.h:228
arm_compute::PadStrideInfo::stride
std::pair< unsigned int, unsigned int > stride() const
Get the stride.
Definition: CoreTypes.h:192
arm_compute::Window::z
constexpr const Dimension & z() const
Alias to access the third dimension of the window.
Definition: Window.h:176
NEFixedPoint.h
arm_compute::wrapper::vext_2
uint8x8_t vext_2(uint8x8_t value_a, uint8x8_t value_b)
Definition: ext.h:40
arm_compute::wrapper::vdup_n
uint8x8_t vdup_n(uint8_t value, traits::vector_64_tag)
Definition: dup_n.h:41