Compute Library
 21.02
quantized.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef SRC_CORE_NEON_KERNELS_QUANTIZED_H
25 #define SRC_CORE_NEON_KERNELS_QUANTIZED_H
26 
27 #include "arm_compute/core/Types.h"
29 #include "src/core/NEON/NEAsymm.h"
31 #include "src/core/NEON/NEMath.h"
33 #include <arm_neon.h>
34 
35 namespace arm_compute
36 {
37 namespace cpu
38 {
39 template <typename T>
40 inline typename std::enable_if<std::is_same<T, int8_t>::value, int8_t>::type
42 {
43  return quantize_qasymm8_signed(val, info);
44 }
45 
46 template <typename T>
47 inline typename std::enable_if<std::is_same<T, uint8_t>::value, uint8_t>::type
49 {
50  return quantize_qasymm8(val, info);
51 }
52 
53 template <typename T>
54 inline T vcvtq_q32_f32(float32x4_t values);
55 
56 template <>
57 inline uint32x4_t vcvtq_q32_f32(float32x4_t values)
58 {
59  return vcvtq_u32_f32(values);
60 }
61 
62 template <>
63 inline int32x4_t vcvtq_q32_f32(float32x4_t values)
64 {
65  return vcvtq_s32_f32(values);
66 }
67 
68 template <typename T>
69 inline float32x4_t vcvtq_f32_q32(T values);
70 
71 template <>
72 inline float32x4_t vcvtq_f32_q32(uint32x4_t values)
73 {
74  return vcvtq_f32_u32(values);
75 }
76 
77 template <>
78 inline float32x4_t vcvtq_f32_q32(int32x4_t values)
79 {
80  return vcvtq_f32_s32(values);
81 }
82 
83 template <typename Tout>
84 inline Tout vrequantize_pooling_with_scale(const float32x4x4_t &acc, const float quant_rescale, const float scale_pooling, const int32_t new_offset);
85 
86 template <>
87 inline uint8x16_t vrequantize_pooling_with_scale(const float32x4x4_t &acc, const float quant_rescale, const float scale_pooling, const int32_t new_offset)
88 {
89  const float new_scale = quant_rescale / scale_pooling;
90  return vquantize(acc, UniformQuantizationInfo(new_scale, new_offset));
91 }
92 
93 template <>
94 inline int8x16_t vrequantize_pooling_with_scale(const float32x4x4_t &acc, const float quant_rescale, const float scale_pooling, const int32_t new_offset)
95 {
96  const float new_scale = quant_rescale / scale_pooling;
97  return vquantize_signed(acc, UniformQuantizationInfo(new_scale, new_offset));
98 }
99 
100 template <typename Tin, typename Tout>
101 inline Tout vrequantize_pooling(Tin vec1, Tin vec2, const UniformQuantizationInfo &requant_qinfo);
102 
103 template <>
104 inline uint8x16_t vrequantize_pooling(uint8x8_t vec1, uint8x8_t vec2, const UniformQuantizationInfo &requant_qinfo)
105 {
106  const float32x4x4_t acc =
107  {
108  {
109  vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8((vec1))))),
110  vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8((vec1))))),
111  vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8((vec2))))),
112  vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8((vec2))))),
113  }
114  };
115  return vquantize(acc, requant_qinfo);
116 }
117 
118 template <>
119 inline int8x16_t vrequantize_pooling(int8x8_t vec1, int8x8_t vec2, const UniformQuantizationInfo &requant_qinfo)
120 {
121  const float32x4x4_t acc =
122  {
123  {
124  vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8((vec1))))),
125  vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8((vec1))))),
126  vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8((vec2))))),
127  vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8((vec2))))),
128  }
129  };
130  return vquantize_signed(acc, requant_qinfo);
131 }
132 
133 template <typename T>
134 inline T vrequantize_pooling(T &vec, const UniformQuantizationInfo &requant_qinfo);
135 
136 template <>
137 inline uint8x8_t vrequantize_pooling(uint8x8_t &vec, const UniformQuantizationInfo &requant_qinfo)
138 {
139  const float32x4x2_t acc =
140  {
141  {
142  vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8((vec))))),
143  vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8((vec))))),
144  }
145  };
146  return vquantize(acc, requant_qinfo);
147 }
148 
149 template <>
150 inline int8x8_t vrequantize_pooling(int8x8_t &vec, const UniformQuantizationInfo &requant_qinfo)
151 {
152  const float32x4x2_t acc =
153  {
154  {
155  vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8((vec))))),
156  vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8((vec))))),
157  }
158  };
159  return vquantize_signed(acc, requant_qinfo);
160 }
161 
162 inline float calculate_avg_scale(bool exclude_padding, DataLayout data_layout, const Coordinates &id, const int pool_size_x, const int pool_size_y, const int upper_bound_w, const int upper_bound_h,
163  const int pad_x, const int pad_y, const int stride_x, const int stride_y)
164 {
165  const unsigned int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
167 
168  int start_x = id[idx_width] * stride_x - pad_x;
169  int start_y = id[idx_height] * stride_y - pad_y;
170 
171  const int end_x = std::min(start_x + pool_size_x, upper_bound_w);
172  const int end_y = std::min(start_y + pool_size_y, upper_bound_h);
173  if(exclude_padding)
174  {
175  start_x = std::max(0, start_x);
176  start_y = std::max(0, start_y);
177  }
178  return 1.f / ((end_y - start_y) * (end_x - start_x));
179 }
180 
181 template <typename T>
182 void poolingMxN_q8_neon_nhwc(const ITensor *src, ITensor *dst0, ITensor *dst1, PoolingLayerInfo &pool_info, const Window &window_src, const Window &window)
183 {
184  ARM_COMPUTE_UNUSED(dst1);
185 
186  const int window_start_x = window.x().start();
187  const int window_end_x = window.x().end();
188  const int window_step_x = 16;
189  const int window_half_step_x = window_step_x / 2;
190 
191  Window window_out = window;
192  window_out.set(Window::DimX, Window::Dimension(0, 1, 1));
193 
194  Iterator in(src, window_src);
195  Iterator out(dst0, window_out);
196 
197  using q8x8_t = typename wrapper::traits::neon_vector<T, 8>::type;
198  using q8x16_t = typename wrapper::traits::neon_vector<T, 16>::type;
199  using q16_t = typename wrapper::traits::promote_t<T>;
200  using q16x8_t = typename wrapper::traits::neon_vector<q16_t, 8>::type;
201  using q32_t = typename wrapper::traits::promote_t<q16_t>;
202  using q32x4_t = typename wrapper::traits::neon_vector<q32_t, 4>::type;
203 
204  const int pool_size_x = pool_info.is_global_pooling ? src->info()->tensor_shape().y() : pool_info.pool_size.width;
205  const int pool_size_y = pool_info.is_global_pooling ? src->info()->tensor_shape().z() : pool_info.pool_size.height;
206  const int pool_pad_right = pool_info.pad_stride_info.pad_right();
207  const int pool_pad_top = pool_info.pad_stride_info.pad_top();
208  const int pool_pad_left = pool_info.pad_stride_info.pad_left();
209  const int pool_pad_bottom = pool_info.pad_stride_info.pad_bottom();
210 
211  int pool_stride_x = 0;
212  int pool_stride_y = 0;
213  std::tie(pool_stride_x, pool_stride_y) = pool_info.pad_stride_info.stride();
214  const int upper_bound_w = src->info()->dimension(1) + (pool_info.exclude_padding ? 0 : pool_pad_right);
215  const int upper_bound_h = src->info()->dimension(2) + (pool_info.exclude_padding ? 0 : pool_pad_bottom);
216 
217  const float32x4_t half_scale_v = vdupq_n_f32(0.5f);
218  const UniformQuantizationInfo src_qinfo = src->info()->quantization_info().uniform();
219  const UniformQuantizationInfo dst_qinfo = dst0->info()->quantization_info().uniform();
220 
221  const float quant_rescale = dst_qinfo.scale / src_qinfo.scale;
222  // "new_offset" doesn't have to consider the "half_scale_v" in its computation
223  // With a requantization performed in a single step there won't be uncertainties introduced
224  const int32_t new_offset = dst_qinfo.offset - static_cast<int32_t>(static_cast<float>(src_qinfo.offset) / quant_rescale);
225 
226  const float requant_scale = dst_qinfo.scale / src_qinfo.scale;
227  const int32_t requant_offset = dst_qinfo.offset - static_cast<int32_t>(static_cast<float>(src_qinfo.offset) / requant_scale);
228  const UniformQuantizationInfo requant_qinfo = UniformQuantizationInfo(requant_scale, requant_offset);
229 
230  execute_window_loop(window_out, [&](const Coordinates & id)
231  {
232  const int idx_width = id.y() * pool_stride_x;
233  const int idx_height = id.z() * pool_stride_y;
234  const int pool_limit_y = pool_pad_top - idx_height;
235  const int pool_limit_x = pool_pad_left - idx_width;
236 
237  const int pool_start_y = std::max(0, window_src.z().start() + pool_limit_y);
238  const int pool_end_y = std::min(pool_size_y, window_src.z().end() + pool_limit_y);
239  const int pool_start_x = std::max(0, window_src.y().start() + pool_limit_x);
240  const int pool_end_x = std::min(pool_size_x, window_src.y().end() + pool_limit_x);
241 
242  int x_off = window_start_x;
243  for(; x_off <= (window_end_x - window_step_x); x_off += window_step_x)
244  {
245  if(pool_info.pool_type != PoolingType::MAX)
246  {
247  q32x4_t vres1 = wrapper::vdup_n(static_cast<q32_t>(0.f), wrapper::traits::vector_128_tag{});
248  q32x4_t vres2 = wrapper::vdup_n(static_cast<q32_t>(0.f), wrapper::traits::vector_128_tag{});
249  q32x4_t vres3 = wrapper::vdup_n(static_cast<q32_t>(0.f), wrapper::traits::vector_128_tag{});
250  q32x4_t vres4 = wrapper::vdup_n(static_cast<q32_t>(0.f), wrapper::traits::vector_128_tag{});
251 
252  // Calculate scale
253  const float scale = calculate_avg_scale(pool_info.exclude_padding, DataLayout::NHWC, id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
254  pool_stride_y);
255 
256  // Perform pooling
257  for(int y = pool_start_y; y < pool_end_y; ++y)
258  {
259  for(int x = pool_start_x; x < pool_end_x; ++x)
260  {
261  const q8x16_t data = wrapper::vloadq(reinterpret_cast<const T *>(in.ptr() + (x - pool_pad_left) * static_cast<int>(src->info()->strides_in_bytes().y()) + (y - pool_pad_top) * static_cast<int>
262  (src->info()->strides_in_bytes().z())) + x_off);
263 
264  const q16x8_t data_q16 = wrapper::vmovl(wrapper::vgetlow(data));
265  const q16x8_t data2_q16 = wrapper::vmovl(wrapper::vgethigh(data));
266  vres1 = wrapper::vadd(vres1, wrapper::vmovl(wrapper::vgetlow(data_q16)));
267  vres2 = wrapper::vadd(vres2, wrapper::vmovl(wrapper::vgethigh(data_q16)));
268  vres3 = wrapper::vadd(vres3, wrapper::vmovl(wrapper::vgetlow(data2_q16)));
269  vres4 = wrapper::vadd(vres4, wrapper::vmovl(wrapper::vgethigh(data2_q16)));
270  }
271  }
272 
273  if(src_qinfo != dst_qinfo)
274  {
275  const float32x4x4_t vres =
276  {
277  {
278  vcvtq_f32_q32(vres1),
279  vcvtq_f32_q32(vres2),
280  vcvtq_f32_q32(vres3),
281  vcvtq_f32_q32(vres4),
282  }
283  };
284  const auto requantized_dst = vrequantize_pooling_with_scale<q8x16_t>(vres, quant_rescale, scale, new_offset);
285  // Store result
286  wrapper::vstore(reinterpret_cast<T *>(out.ptr()) + x_off, wrapper::vgetlow(requantized_dst));
287  wrapper::vstore(reinterpret_cast<T *>(out.ptr()) + x_off + 8, wrapper::vgethigh(requantized_dst));
288  }
289  else
290  {
291  const float32x4_t scale_v = vdupq_n_f32(scale);
292  // Divide by scale and add 0.5f to round to nearest instead of rounding towards zero
293  vres1 = vcvtq_q32_f32<q32x4_t>(wrapper::vmla(half_scale_v, vcvtq_f32_q32(vres1), scale_v));
294  vres2 = vcvtq_q32_f32<q32x4_t>(wrapper::vmla(half_scale_v, vcvtq_f32_q32(vres2), scale_v));
295  vres3 = vcvtq_q32_f32<q32x4_t>(wrapper::vmla(half_scale_v, vcvtq_f32_q32(vres3), scale_v));
296  vres4 = vcvtq_q32_f32<q32x4_t>(wrapper::vmla(half_scale_v, vcvtq_f32_q32(vres4), scale_v));
297 
298  const q8x8_t res1 = wrapper::vmovn(wrapper::vcombine(wrapper::vmovn(vres1), wrapper::vmovn(vres2)));
299  const q8x8_t res2 = wrapper::vmovn(wrapper::vcombine(wrapper::vmovn(vres3), wrapper::vmovn(vres4)));
300  // Store result
301  wrapper::vstore(reinterpret_cast<T *>(out.ptr()) + x_off, res1);
302  wrapper::vstore(reinterpret_cast<T *>(out.ptr()) + x_off + 8, res2);
303  }
304  }
305  else
306  {
307  q8x16_t vres = wrapper::vdup_n(std::numeric_limits<T>::min(), wrapper::traits::vector_128_tag{});
308 
309  for(int y = pool_start_y; y < pool_end_y; ++y)
310  {
311  for(int x = pool_start_x; x < pool_end_x; ++x)
312  {
313  const q8x16_t data = wrapper::vloadq(reinterpret_cast<const T *>(in.ptr() + (x - pool_pad_left) * static_cast<int>(src->info()->strides_in_bytes().y()) + (y - pool_pad_top) * static_cast<int>
314  (src->info()->strides_in_bytes().z())) + x_off);
315  vres = wrapper::vmax(vres, data);
316  }
317  }
318 
319  // Store result
320  wrapper::vstore(reinterpret_cast<T *>(out.ptr()) + x_off, (src_qinfo != dst_qinfo) ? vrequantize_pooling<q8x8_t, q8x16_t>(wrapper::vgetlow(vres), wrapper::vgethigh(vres),
321  requant_qinfo) :
322  vres);
323  }
324  }
325 
326  if(pool_info.pool_type == PoolingType::MAX)
327  {
328  for(; x_off <= (window_end_x - window_half_step_x); x_off += window_half_step_x)
329  {
330  q8x8_t vres = wrapper::vdup_n(std::numeric_limits<T>::min(), wrapper::traits::vector_64_tag{});
331  for(int y = pool_start_y; y < pool_end_y; ++y)
332  {
333  for(int x = pool_start_x; x < pool_end_x; ++x)
334  {
335  const q8x8_t data = wrapper::vload(reinterpret_cast<const T *>(in.ptr() + (x - pool_pad_left) * static_cast<int>(src->info()->strides_in_bytes().y()) + (y - pool_pad_top) * static_cast<int>
336  (src->info()->strides_in_bytes().z())) + x_off);
337  vres = wrapper::vmax(vres, data);
338  }
339  }
340 
341  // Store result
342  wrapper::vstore(reinterpret_cast<T *>(out.ptr()) + x_off,
343  (src_qinfo != dst_qinfo) ? vrequantize_pooling<q8x8_t>(vres, requant_qinfo) : vres);
344  }
345  }
346 
347  // Left-overs loop
348  for(; x_off < window_end_x; ++x_off)
349  {
350  if(pool_info.pool_type != PoolingType::MAX)
351  {
352  q32_t res = static_cast<q32_t>(0.f);
353 
354  // Calculate scale
355  const float scale = calculate_avg_scale(pool_info.exclude_padding, DataLayout::NHWC, id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
356  pool_stride_y);
357 
358  // Perform pooling
359  for(int y = pool_start_y; y < pool_end_y; ++y)
360  {
361  for(int x = pool_start_x; x < pool_end_x; ++x)
362  {
363  const T data = *(reinterpret_cast<const T *>(in.ptr() + (x - pool_pad_left) * static_cast<int>(src->info()->strides_in_bytes().y()) + (y - pool_pad_top) * static_cast<int>
364  (src->info()->strides_in_bytes().z())) + x_off);
365  res += data;
366  }
367  }
368 
369  if(src_qinfo != dst_qinfo)
370  {
371  const float res_f = static_cast<float>(res);
372  const float new_scale = quant_rescale / scale;
373  const auto requantized_dst = quantize<T>(res_f, UniformQuantizationInfo(new_scale, new_offset));
374 
375  // Store result
376  *(reinterpret_cast<T *>(out.ptr()) + x_off) = requantized_dst;
377  }
378  else
379  {
380  // Divide by scale and add 0.5f to round to nearest instead of rounding towards zero
381  res = static_cast<T>(0.5f + static_cast<float>(res) * scale);
382 
383  // Store result
384  *(reinterpret_cast<T *>(out.ptr()) + x_off) = res;
385  }
386  }
387  else
388  {
389  T res = std::numeric_limits<T>::min();
390 
391  for(int y = pool_start_y; y < pool_end_y; ++y)
392  {
393  for(int x = pool_start_x; x < pool_end_x; ++x)
394  {
395  const T data = *(reinterpret_cast<const T *>(in.ptr() + (x - pool_pad_left) * static_cast<int>(src->info()->strides_in_bytes().y()) + (y - pool_pad_top) * static_cast<int>
396  (src->info()->strides_in_bytes().z())) + x_off);
397  res = std::max(res, data);
398  }
399  }
400 
401  // Store result
402  if(src_qinfo != dst_qinfo)
403  {
404  const float res_f = static_cast<float>(res);
405  *(reinterpret_cast<T *>(out.ptr()) + x_off) = quantize<T>(res_f, requant_qinfo);
406  }
407  else
408  {
409  *(reinterpret_cast<T *>(out.ptr()) + x_off) = res;
410  }
411  }
412  }
413 
414  },
415  in, out);
416 }
417 
418 #if defined(ENABLE_NCHW_KERNELS)
419 template <typename T, typename TVec>
420 inline void scale_vector_q16x8(bool exclude_padding, TVec &v, const Coordinates &id, int id_offset, int step,
421  const int pool_size, const int upper_bound_w, const int upper_bound_h,
422  const int pad_x, const int pad_y, const int stride_x, const int stride_y)
423 {
424  int start_x = (id.x() + id_offset) * stride_x - pad_x;
425  int start_y = id.y() * stride_y - pad_y;
426  const int end_y = std::min(start_y + pool_size, upper_bound_h);
427  if(exclude_padding)
428  {
429  start_y = std::max(0, start_y);
430  }
431 
432  std::array<T, 8> elems =
433  {
434  {
435  wrapper::vgetlane(v, 0),
436  wrapper::vgetlane(v, 1),
437  wrapper::vgetlane(v, 2),
438  wrapper::vgetlane(v, 3),
439  wrapper::vgetlane(v, 4),
440  wrapper::vgetlane(v, 5),
441  wrapper::vgetlane(v, 6),
442  wrapper::vgetlane(v, 7),
443  }
444  };
445 
446  for(auto &el : elems)
447  {
448  int c_start_x = start_x;
449  const int end_x = std::min(c_start_x + pool_size, upper_bound_w);
450  if(exclude_padding)
451  {
452  c_start_x = std::max(0, c_start_x);
453  }
454  float scale = 1.f / ((end_y - start_y) * (end_x - c_start_x));
455  el *= scale;
456  start_x += step * stride_x;
457  }
458 
459  v = wrapper::vsetlane(elems[0], v, 0);
460  v = wrapper::vsetlane(elems[1], v, 1);
461  v = wrapper::vsetlane(elems[2], v, 2);
462  v = wrapper::vsetlane(elems[3], v, 3);
463  v = wrapper::vsetlane(elems[4], v, 4);
464  v = wrapper::vsetlane(elems[5], v, 5);
465  v = wrapper::vsetlane(elems[6], v, 6);
466  v = wrapper::vsetlane(elems[7], v, 7);
467 }
468 
469 template <typename T>
470 void pooling2_quantized_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, PoolingLayerInfo &pool_info, const Window &window_src, const Window &window)
471 {
472  ARM_COMPUTE_UNUSED(dst1);
473  Iterator in(src, window_src);
474  Iterator out(dst0, window);
475 
476  /** Neon vector types */
477  using q8x8_t = typename wrapper::traits::neon_vector<T, 8>::type;
478  using q8x16_t = typename wrapper::traits::neon_vector<T, 16>::type;
479  using q8x8x2_t = typename std::conditional<std::is_same<T, uint8_t>::value, uint8x8x2_t, int8x8x2_t>::type;
480  using q16_t = typename wrapper::traits::promote_t<T>;
481  using q16x4_t = typename wrapper::traits::neon_vector<q16_t, 4>::type;
482  using q16x8_t = typename wrapper::traits::neon_vector<q16_t, 8>::type;
483  using q16x8x2_t = typename wrapper::traits::neon_vector<q16_t, 16>::type;
484 
485  constexpr int pool_size = 2;
486  int pool_stride_x = 0;
487  int pool_stride_y = 0;
488  const int pool_pad_right = pool_info.pad_stride_info.pad_right();
489  const int pool_pad_top = pool_info.pad_stride_info.pad_top();
490  const int pool_pad_left = pool_info.pad_stride_info.pad_left();
491  const int pool_pad_bottom = pool_info.pad_stride_info.pad_bottom();
492  std::tie(pool_stride_x, pool_stride_y) = pool_info.pad_stride_info.stride();
493  const int upper_bound_w = src->info()->dimension(0) + (pool_info.exclude_padding ? 0 : pool_pad_right);
494  const int upper_bound_h = src->info()->dimension(1) + (pool_info.exclude_padding ? 0 : pool_pad_bottom);
495 
496  const T *const src_top_ptr = reinterpret_cast<const T *>(src->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top))));
497  const T *const src_bottom_ptr = reinterpret_cast<const T *>(src->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1)));
498 
499  const int scale_step_x = (pool_stride_x == 1) ? 2 : 1;
500 
501  const UniformQuantizationInfo src_qinfo = src->info()->quantization_info().uniform();
502  const UniformQuantizationInfo dst_qinfo = dst0->info()->quantization_info().uniform();
503  const bool have_different_qinfo = src_qinfo != dst_qinfo;
504 
505  const float requant_scale = dst_qinfo.scale / src_qinfo.scale;
506  const int32_t requant_offset = dst_qinfo.offset - static_cast<int32_t>(static_cast<float>(src_qinfo.offset) / requant_scale);
507  const UniformQuantizationInfo requant_qinfo = UniformQuantizationInfo(requant_scale, requant_offset);
508 
509  execute_window_loop(window, [&](const Coordinates & id)
510  {
511  const auto top_data = wrapper::vloadq(src_top_ptr + in.offset());
512  const auto bottom_data = wrapper::vloadq(src_bottom_ptr + in.offset());
513  q8x8_t lower_res = {};
514  q8x8_t upper_res = {};
515 
516  if(pool_info.pool_type != PoolingType::MAX)
517  {
518  const q16x8x2_t top_data_q16 = { { wrapper::vmovl(wrapper::vgetlow(top_data)), wrapper::vmovl(wrapper::vgethigh(top_data)) } };
519  const q16x8x2_t bottom_data_q16 = { { wrapper::vmovl(wrapper::vgetlow(bottom_data)), wrapper::vmovl(wrapper::vgethigh(bottom_data)) } };
520 
521  // Add rows
522  const q16x8x2_t vrsum =
523  {
524  {
525  wrapper::vadd(top_data_q16.val[0], bottom_data_q16.val[0]),
526  wrapper::vadd(top_data_q16.val[1], bottom_data_q16.val[1]),
527  }
528  };
529 
530  // Pair-wise add row data
531  const q16x4_t vpsum_1 = wrapper::vpadd(wrapper::vgetlow(vrsum.val[0]), wrapper::vgethigh(vrsum.val[0]));
532  const q16x4_t vpsum_2 = wrapper::vpadd(wrapper::vgetlow(vrsum.val[1]), wrapper::vgethigh(vrsum.val[1]));
533 
534  q16x8_t res_lower = wrapper::vcombine(vpsum_1, vpsum_2);
535 
536  // Scale lower result
537  scale_vector_q16x8<q16_t, q16x8_t>(pool_info.exclude_padding, res_lower, id, 0, scale_step_x,
538  pool_size, upper_bound_w, upper_bound_h,
539  pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
540  lower_res = wrapper::vmovn(res_lower);
541 
542  // Compute upper result for stride_x == 1
543  if(pool_stride_x == 1)
544  {
545  // Shifted row sum
546  const q16x8x2_t vrsum_shifted =
547  {
548  {
549  wrapper::vext_1(vrsum.val[0], vrsum.val[1]),
550  wrapper::vext_1(vrsum.val[1], vrsum.val[1])
551  }
552  };
553 
554  // Pair-wise add shifted row
555  q16x8_t res_upper = wrapper::vcombine(
556  wrapper::vpadd(wrapper::vgetlow(vrsum_shifted.val[0]), wrapper::vgethigh(vrsum_shifted.val[0])),
557  wrapper::vpadd(wrapper::vgetlow(vrsum_shifted.val[1]), wrapper::vgethigh(vrsum_shifted.val[1])));
558 
559  // Scale upper result
560  scale_vector_q16x8<q16_t, q16x8_t>(pool_info.exclude_padding, res_upper, id, 1, 2,
561  pool_size, upper_bound_w, upper_bound_h,
562  pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
563  upper_res = wrapper::vmovn(res_upper);
564  }
565  }
566  else
567  {
568  const q8x16_t max_data = wrapper::vmax(top_data, bottom_data);
569  lower_res = wrapper::vpmax(wrapper::vgetlow(max_data), wrapper::vgethigh(max_data));
570  if(pool_stride_x == 1)
571  {
572  const q8x16_t max_data_shifted = wrapper::vext_1(max_data, max_data);
573  upper_res = wrapper::vpmax(wrapper::vgetlow(max_data_shifted), wrapper::vgethigh(max_data_shifted));
574  }
575  }
576 
577  if(have_different_qinfo)
578  {
579  const auto requantized_dst = vrequantize_pooling<q8x8_t, q8x16_t>(lower_res, upper_res, requant_qinfo);
580  lower_res = wrapper::vgetlow(requantized_dst);
581  upper_res = wrapper::vgethigh(requantized_dst);
582  }
583 
584  // Store result
585  if(pool_stride_x == 1)
586  {
587  const q8x8x2_t res = { { lower_res, upper_res } };
588  wrapper::vstore(reinterpret_cast<T *>(out.ptr()), res);
589  }
590  else
591  {
592  wrapper::vstore(reinterpret_cast<T *>(out.ptr()), lower_res);
593  }
594  },
595  in, out);
596 }
597 
598 template <typename T>
599 void pooling3_quantized_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, PoolingLayerInfo &pool_info, const Window &window_src, const Window &window)
600 {
601  ARM_COMPUTE_UNUSED(dst1);
602  Iterator in(src, window_src);
603  Iterator out(dst0, window);
604 
605  /** Neon vector types */
606  using q8x8_t = typename wrapper::traits::neon_vector<T, 8>::type;
607  using q8x16_t = typename wrapper::traits::neon_vector<T, 16>::type;
608  using q8x8x2_t = typename std::conditional<std::is_same<T, uint8_t>::value, uint8x8x2_t, int8x8x2_t>::type;
609  using q16_t = typename wrapper::traits::promote_t<T>;
610  using q16x8_t = typename wrapper::traits::neon_vector<q16_t, 8>::type;
611  using q16x8x2_t = typename wrapper::traits::neon_vector<q16_t, 16>::type;
612 
613  constexpr int pool_size = 3;
614  const int pool_pad_right = pool_info.pad_stride_info.pad_right();
615  const int pool_pad_top = pool_info.pad_stride_info.pad_top();
616  const int pool_pad_left = pool_info.pad_stride_info.pad_left();
617  const int pool_pad_bottom = pool_info.pad_stride_info.pad_bottom();
618  int pool_stride_x = 0;
619  int pool_stride_y = 0;
620  std::tie(pool_stride_x, pool_stride_y) = pool_info.pad_stride_info.stride();
621  const int upper_bound_w = src->info()->dimension(0) + (pool_info.exclude_padding ? 0 : pool_pad_right);
622  const int upper_bound_h = src->info()->dimension(1) + (pool_info.exclude_padding ? 0 : pool_pad_bottom);
623 
624  const UniformQuantizationInfo &src_qinfo = src->info()->quantization_info().uniform();
625  const UniformQuantizationInfo &dst_qinfo = dst0->info()->quantization_info().uniform();
626 
627  const float requant_scale = dst_qinfo.scale / src_qinfo.scale;
628  const int32_t requant_offset = dst_qinfo.offset - static_cast<int32_t>(static_cast<float>(src_qinfo.offset) / requant_scale);
629  const UniformQuantizationInfo requant_qinfo = UniformQuantizationInfo(requant_scale, requant_offset);
630 
631  const T *const src_top_ptr = reinterpret_cast<const T *>(src->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top))));
632  const T *const src_middle_ptr = reinterpret_cast<const T *>(src->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1)));
633  const T *const src_bottom_ptr = reinterpret_cast<const T *>(src->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 2)));
634 
635  execute_window_loop(window, [&](const Coordinates & id)
636  {
637  const auto top_data = wrapper::vloadq(src_top_ptr + in.offset());
638  const auto middle_data = wrapper::vloadq(src_middle_ptr + in.offset());
639  const auto bottom_data = wrapper::vloadq(src_bottom_ptr + in.offset());
640  q8x8_t fres = {};
641  q8x16_t fqres = {};
642 
643  if(pool_info.pool_type == PoolingType::AVG)
644  {
645  // Convert data to u16
646  const q16x8x2_t top_data_q16 = { { wrapper::vmovl(wrapper::vgetlow(top_data)), wrapper::vmovl(wrapper::vgethigh(top_data)) } };
647  const q16x8x2_t middle_data_q16 = { { wrapper::vmovl(wrapper::vgetlow(middle_data)), wrapper::vmovl(wrapper::vgethigh(middle_data)) } };
648  const q16x8x2_t bottom_data_q16 = { { wrapper::vmovl(wrapper::vgetlow(bottom_data)), wrapper::vmovl(wrapper::vgethigh(bottom_data)) } };
649 
650  // Calculate row sums
651  const q16x8x2_t vrsum =
652  {
653  {
654  wrapper::vadd(wrapper::vadd(top_data_q16.val[0], bottom_data_q16.val[0]), middle_data_q16.val[0]),
655  wrapper::vadd(wrapper::vadd(top_data_q16.val[1], bottom_data_q16.val[1]), middle_data_q16.val[1]),
656  }
657  };
658  const q16x8x2_t vrsum_shifted_1 =
659  {
660  {
661  wrapper::vext_1(vrsum.val[0], vrsum.val[1]),
662  wrapper::vext_1(vrsum.val[1], vrsum.val[1])
663  }
664  };
665  const q16x8x2_t vrsum_shifted_2 =
666  {
667  {
668  wrapper::vext_2(vrsum.val[0], vrsum.val[1]),
669  wrapper::vext_2(vrsum.val[1], vrsum.val[1])
670  }
671  };
672  // Calculate final sum
673  q16x8x2_t final_sum =
674  {
675  {
676  wrapper::vadd(wrapper::vadd(vrsum.val[0], vrsum_shifted_1.val[0]), vrsum_shifted_2.val[0]),
677  wrapper::vadd(wrapper::vadd(vrsum.val[1], vrsum_shifted_1.val[1]), vrsum_shifted_2.val[1]),
678  }
679  };
680  if(pool_stride_x == 2)
681  {
682  q16x8_t res =
683  {
684  wrapper::vgetlane(final_sum.val[0], 0),
685  wrapper::vgetlane(final_sum.val[0], 2),
686  wrapper::vgetlane(final_sum.val[0], 4),
687  wrapper::vgetlane(final_sum.val[0], 6),
688  wrapper::vgetlane(final_sum.val[1], 0),
689  wrapper::vgetlane(final_sum.val[1], 2),
690  wrapper::vgetlane(final_sum.val[1], 4),
691  wrapper::vgetlane(final_sum.val[1], 6),
692  };
693 
694  scale_vector_q16x8<q16_t, q16x8_t>(pool_info.exclude_padding, res, id, 0, 1,
695  pool_size, upper_bound_w, upper_bound_h,
696  pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
697  fres = wrapper::vmovn(res);
698  }
699  else
700  {
701  // Scale lower result
702  scale_vector_q16x8<q16_t, q16x8_t>(pool_info.exclude_padding, final_sum.val[0], id, 0, 1,
703  pool_size, upper_bound_w, upper_bound_h,
704  pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
705  // Scale lower result
706  scale_vector_q16x8<q16_t, q16x8_t>(pool_info.exclude_padding, final_sum.val[1], id, 8, 1,
707  pool_size, upper_bound_w, upper_bound_h,
708  pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
709  fqres = wrapper::vcombine(wrapper::vmovn(final_sum.val[0]), wrapper::vmovn(final_sum.val[1]));
710  }
711  }
712  else
713  {
714  const q8x16_t max_data = wrapper::vmax(wrapper::vmax(top_data, bottom_data), middle_data);
715  const q8x16_t max_data_shift1 = wrapper::vext_1(max_data, max_data);
716  const q8x16_t max_data_shift2 = wrapper::vext_2(max_data, max_data);
717  const q8x16_t final_max = wrapper::vmax(wrapper::vmax(max_data, max_data_shift1), max_data_shift2);
718 
719  if(pool_stride_x == 2)
720  {
721  const q8x8x2_t table = { { wrapper::vgetlow(final_max), wrapper::vgethigh(final_max) } };
722  static const q8x8_t lookup_val = { 0, 2, 4, 6, 8, 10, 12, 14 };
723  fres = wrapper::vtbl(table, lookup_val);
724  }
725  else
726  {
727  fqres = final_max;
728  }
729  }
730 
731  // Store result
732  if(pool_stride_x == 1)
733  {
734  if(src_qinfo != dst_qinfo)
735  {
736  fqres = vrequantize_pooling<q8x8_t, q8x16_t>(wrapper::vgetlow(fqres), wrapper::vgethigh(fqres), requant_qinfo);
737  }
738  wrapper::vstore(reinterpret_cast<T *>(out.ptr()), fqres);
739  }
740  else
741  {
742  if(src_qinfo != dst_qinfo)
743  {
744  fres = vrequantize_pooling<q8x8_t>(fres, requant_qinfo);
745  }
746  wrapper::vstore(reinterpret_cast<T *>(out.ptr()), fres);
747  }
748  },
749  in, out);
750 }
751 
752 template <typename T>
753 void poolingMxN_quantized_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, PoolingLayerInfo &pool_info, const Window &window_src, const Window &window)
754 {
755  ARM_COMPUTE_UNUSED(dst1);
756  Iterator in(src, window_src);
757  Iterator out(dst0, window);
758 
759  /** Neon vector types */
760  using q8x8_t = typename wrapper::traits::neon_vector<T, 8>::type;
761  using q16_t = typename wrapper::traits::promote_t<T>;
762  using q16x8_t = typename wrapper::traits::neon_vector<q16_t, 8>::type;
763  using q32_t = typename wrapper::traits::promote_t<q16_t>;
764  using q32x4_t = typename wrapper::traits::neon_vector<q32_t, 4>::type;
765 
766  const int pool_size_x = pool_info.is_global_pooling ? src->info()->tensor_shape().x() : pool_info.pool_size.width;
767  const int pool_size_y = pool_info.is_global_pooling ? src->info()->tensor_shape().y() : pool_info.pool_size.height;
768  const int pool_pad_right = pool_info.pad_stride_info.pad_right();
769  const int pool_pad_top = pool_info.pad_stride_info.pad_top();
770  const int pool_pad_left = pool_info.pad_stride_info.pad_left();
771  const int pool_pad_bottom = pool_info.pad_stride_info.pad_bottom();
772  int pool_stride_x = 0;
773  int pool_stride_y = 0;
774  std::tie(pool_stride_x, pool_stride_y) = pool_info.pad_stride_info.stride();
775  const int upper_bound_w = src->info()->dimension(0) + (pool_info.exclude_padding ? 0 : pool_pad_right);
776  const int upper_bound_h = src->info()->dimension(1) + (pool_info.exclude_padding ? 0 : pool_pad_bottom);
777 
778  const UniformQuantizationInfo &src_qinfo = src->info()->quantization_info().uniform();
779  const UniformQuantizationInfo &dst_qinfo = dst0->info()->quantization_info().uniform();
780 
781  execute_window_loop(window, [&](const Coordinates & id)
782  {
783  T res = std::numeric_limits<T>::min();
784 
785  if(pool_info.pool_type != PoolingType::MAX)
786  {
787  q32x4_t vres = wrapper::vdup_n(static_cast<q32_t>(0.f), wrapper::traits::vector_128_tag{});
788  q32_t sres = 0;
789 
790  // Calculate scale
791  const float scale = calculate_avg_scale(pool_info.exclude_padding, DataLayout::NCHW, id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
792  pool_stride_y);
793 
794  // Perform pooling
795  for(int y = 0; y < pool_size_y; ++y)
796  {
797  int x = 0;
798  for(; x <= (pool_size_x - 8); x += 8)
799  {
800  const q8x8_t data = wrapper::vload(reinterpret_cast<const T *>(in.ptr() + (x - pool_pad_left) * static_cast<int>(src->info()->strides_in_bytes().x()) + (y - pool_pad_top) * static_cast<int>
801  (src->info()->strides_in_bytes().y())));
802 
803  const q16x8_t data_q16 = wrapper::vmovl(data);
804  vres = wrapper::vadd(vres, wrapper::vaddl(wrapper::vgethigh(data_q16), wrapper::vgetlow(data_q16)));
805  }
806 
807  // Leftover for loop
808  for(; x < pool_size_x; ++x)
809  {
810  T data = *(reinterpret_cast<const T *>(in.ptr() + (x - pool_pad_left) * static_cast<int>(src->info()->strides_in_bytes().x()) + (y - pool_pad_top) * static_cast<int>
811  (src->info()->strides_in_bytes().y())));
812  sres += data;
813  }
814  }
815 
816  // Reduction
817  const auto tmp = wrapper::vpadd(wrapper::vgethigh(vres), wrapper::vgetlow(vres));
818  sres += wrapper::vgetlane(tmp, 0) + wrapper::vgetlane(tmp, 1);
819 
820  // Divide by scale
821  res = static_cast<T>(support::cpp11::round(sres * scale));
822  }
823  else
824  {
825  q8x8_t vres = wrapper::vdup_n(std::numeric_limits<T>::min(), wrapper::traits::vector_64_tag{});
826 
827  for(int y = 0; y < pool_size_y; ++y)
828  {
829  int x = 0;
830  for(; x <= (pool_size_x - 8); x += 8)
831  {
832  const q8x8_t data = wrapper::vload(reinterpret_cast<const T *>(in.ptr() + (x - pool_pad_left) * static_cast<int>(src->info()->strides_in_bytes().x()) + (y - pool_pad_top) * static_cast<int>
833  (src->info()->strides_in_bytes().y())));
834  vres = wrapper::vmax(vres, data);
835  }
836  // Leftover for loop
837  for(; x < pool_size_x; ++x)
838  {
839  const T data = *(reinterpret_cast<const T *>(in.ptr() + (x - pool_pad_left) * static_cast<int>(src->info()->strides_in_bytes().x()) + (y - pool_pad_top) * static_cast<int>
840  (src->info()->strides_in_bytes().y())));
841  res = std::max(res, data);
842  }
843  }
844 
845  // Reduce max
846  vres = wrapper::vpmax(vres, vres);
847  vres = wrapper::vpmax(vres, vres);
848  vres = wrapper::vpmax(vres, vres);
849 
850  // Get max value
851  res = std::max(res, wrapper::vgetlane(vres, 0));
852  }
853  // Store result
854  res = (src_qinfo != dst_qinfo) ? Qasymm8QuantizationHelper<T>::quantize(Qasymm8QuantizationHelper<T>::dequantize(res, src_qinfo), dst_qinfo) : res;
855  *(reinterpret_cast<T *>(out.ptr())) = res;
856  },
857  in, out);
858 }
859 #endif /* defined(ENABLE_NCHW_KERNELS) */
860 } // namespace cpu
861 } // namespace arm_compute
862 
863 #endif // SRC_CORE_NEON_KERNELS_QUANTIZED_H
uint8_t * ptr_to_element(const Coordinates &id) const
Return a pointer to the element at the passed coordinates.
Definition: ITensor.h:63
uint32x2_t vmovn(const uint64x2_t &a)
Definition: movn.h:39
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
uint8_t quantize_qasymm8(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given an unsigned 8-bit asymmetric quantization scheme.
uint8x16_t vloadq(const uint8_t *ptr)
Definition: load.h:58
uint8x8_t vadd(const uint8x8_t &a, const uint8x8_t &b)
Definition: add.h:39
const DataLayout data_layout
Definition: Im2Col.cpp:151
uint8x8_t vext_2(uint8x8_t value_a, uint8x8_t value_b)
Definition: ext.h:40
Quantization info when assuming per layer quantization.
Describe one of the image&#39;s dimensions with a start, end and step.
Definition: Window.h:77
unsigned int pad_top() const
Get the top padding.
Definition: Types.h:806
constexpr const Dimension & z() const
Alias to access the third dimension of the window.
Definition: Window.h:163
decltype(strategy::transforms) typedef type
Interface for Neon tensor.
Definition: ITensor.h:36
SimpleTensor< float > src
Definition: DFT.cpp:155
int pool_stride_x
Copyright (c) 2017-2021 Arm Limited.
size_t height
Height of the image region or rectangle.
Definition: Size2D.h:90
float32x4_t vcvtq_f32_q32(T values)
typename promote< T >::type promote_t
Get promoted type.
Definition: traits.h:147
T x() const
Alias to access the size of the first dimension.
Definition: Dimensions.h:87
uint8x8_t vpadd(const uint8x8_t &a, const uint8x8_t &b)
Definition: add.h:187
Tout vrequantize_pooling_with_scale(const float32x4x4_t &acc, const float quant_rescale, const float scale_pooling, const int32_t new_offset)
Definition: quantized.h:87
uint8_t vgetlane(const uint8x8_t vector, const unsigned int lane)
Definition: getlane.h:91
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Definition: Window.h:43
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
uint8x8_t vext_1(uint8x8_t value_a, uint8x8_t value_b)
Definition: ext.h:39
Create the appropriate Neon vector given its type and size in terms of elements.
Definition: traits.h:48
T vcvtq_q32_f32(float32x4_t values)
Definition: quantized.h:57
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
std::enable_if< std::is_same< T, int8_t >::value, int8_t >::type quantize(float val, const UniformQuantizationInfo &info)
Definition: quantized.h:41
int8_t quantize_qasymm8_signed(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given a signed 8-bit asymmetric quantization scheme.
T z() const
Alias to access the size of the third dimension.
Definition: Dimensions.h:97
Coordinates of an item.
Definition: Coordinates.h:37
std::pair< unsigned int, unsigned int > stride() const
Get the stride.
Definition: Types.h:770
Tout vrequantize_pooling(Tin vec1, Tin vec2, const UniformQuantizationInfo &requant_qinfo)
Pooling Layer Information struct.
Definition: Types.h:1214
UniformQuantizationInfo uniform() const
Return per layer quantization info.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.
unsigned int pad_right() const
Get the right padding.
Definition: Types.h:801
constexpr uint8_t * ptr() const
Return a pointer to the current pixel.
Definition: Helpers.inl:139
uint8x8_t vgetlow(const uint8x16_t val)
Definition: getlow.h:39
void poolingMxN_q8_neon_nhwc(const ITensor *src, ITensor *dst0, ITensor *dst1, PoolingLayerInfo &pool_info, const Window &window_src, const Window &window)
Definition: quantized.h:182
uint8x8_t vtbl(const uint8x8x2_t &a, const uint8x8_t &b)
Definition: tbl.h:39
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
Definition: Window.inl:49
uint8x16_t vcombine(const uint8x8_t &a, const uint8x8_t &b)
Definition: combine.h:39
uint8x8_t vsetlane(const uint8_t value, const uint8x8_t vector, const unsigned int lane)
Definition: setlane.h:91
uint16x8_t vaddl(const uint8x8_t &a, const uint8x8_t &b)
Definition: add.h:122
float calculate_avg_scale(bool exclude_padding, DataLayout data_layout, const Coordinates &id, const int pool_size_x, const int pool_size_y, const int upper_bound_w, const int upper_bound_h, const int pad_x, const int pad_y, const int stride_x, const int stride_y)
Definition: quantized.h:162
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
Num samples, channels, height, width.
uint8x8_t vgethigh(const uint8x16_t val)
Definition: gethigh.h:39
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
PadStrideInfo pad_stride_info
Definition: Types.h:1302
size_t width
Width of the image region or rectangle.
Definition: Size2D.h:89
constexpr int step
Definition: fp32.cpp:35
T round(T value)
Round floating-point value with half value rounding away from zero.
Num samples, height, width, channels.
constexpr const Dimension & y() const
Alias to access the second dimension of the window.
Definition: Window.h:154
Size2D pool_size
uint8x8_t vquantize(const float32x4x2_t &qv, const UniformQuantizationInfo &qi)
Quantize a neon vector holding 8 floating point values.
Definition: NEAsymm.h:602
uint8x8_t vload(const uint8_t *ptr)
Definition: load.h:39
void vstore(uint8_t *ptr, uint8x8_t val)
Definition: store.h:39
uint8x8_t vdup_n(uint8_t value, traits::vector_64_tag)
Definition: dup_n.h:41
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
Definition: Helpers.inl:77
T y() const
Alias to access the size of the second dimension.
Definition: Dimensions.h:92
Includes all wrapper headers at once.
int8x8_t vquantize_signed(const float32x4x2_t &qv, const UniformQuantizationInfo &qi)
Quantize a neon vector holding 8 floating point values.
Definition: NEAsymm.h:630
virtual const Strides & strides_in_bytes() const =0
The strides in bytes for accessing each dimension of the tensor.
uint8x8_t vpmax(const uint8x8_t &a, const uint8x8_t &b)
Definition: pmax.h:39
uint8x8_t vmla(const uint8x8_t &a, const uint8x8_t &b, const uint8x8_t &c)
Definition: mla.h:46
constexpr size_t offset() const
Return the offset in bytes from the first element to the current position of the iterator.
Definition: Helpers.inl:134
size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
Get the index of the given dimension.
Definition: Helpers.inl:193
constexpr int end() const
Return the end of the dimension.
Definition: Window.h:99
unsigned int pad_bottom() const
Get the bottom padding.
Definition: Types.h:811
Iterator updated by execute_window_loop for each window element.
Definition: Helpers.h:46
uint16x8_t vmovl(const uint8x8_t &a)
Definition: movl.h:39
unsigned int pad_left() const
Get the left padding.
Definition: Types.h:796
uint8x8_t vmax(const uint8x8_t &a, const uint8x8_t &b)
Definition: max.h:39
DataLayout
[DataLayout enum definition]
Definition: Types.h:120
constexpr int start() const
Return the start of the dimension.
Definition: Window.h:94
Describe a multidimensional execution window.
Definition: Window.h:39
constexpr const Dimension & x() const
Alias to access the first dimension of the window.
Definition: Window.h:145