Compute Library
 22.02
quantized.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef SRC_CORE_NEON_KERNELS_QUANTIZED_H
25 #define SRC_CORE_NEON_KERNELS_QUANTIZED_H
26 
27 #include "arm_compute/core/Types.h"
29 #include "src/core/NEON/NEAsymm.h"
31 #include "src/core/NEON/NEMath.h"
33 #include <arm_neon.h>
34 
35 namespace arm_compute
36 {
37 namespace cpu
38 {
39 template <typename T>
40 inline typename std::enable_if<std::is_same<T, int8_t>::value, int8_t>::type
42 {
43  return quantize_qasymm8_signed(val, info);
44 }
45 
46 template <typename T>
47 inline typename std::enable_if<std::is_same<T, uint8_t>::value, uint8_t>::type
49 {
50  return quantize_qasymm8(val, info);
51 }
52 
53 template <typename T>
54 inline T vcvtq_q32_f32(float32x4_t values);
55 
56 template <>
57 inline uint32x4_t vcvtq_q32_f32(float32x4_t values)
58 {
59  return vcvtq_u32_f32(values);
60 }
61 
62 template <>
63 inline int32x4_t vcvtq_q32_f32(float32x4_t values)
64 {
65  return vcvtq_s32_f32(values);
66 }
67 
68 template <typename T>
69 inline float32x4_t vcvtq_f32_q32(T values);
70 
71 template <>
72 inline float32x4_t vcvtq_f32_q32(uint32x4_t values)
73 {
74  return vcvtq_f32_u32(values);
75 }
76 
77 template <>
78 inline float32x4_t vcvtq_f32_q32(int32x4_t values)
79 {
80  return vcvtq_f32_s32(values);
81 }
82 
83 template <typename Tout>
84 inline Tout vrequantize_pooling_with_scale(const float32x4x4_t &acc, const float quant_rescale, const float scale_pooling, const int32_t new_offset);
85 
86 template <>
87 inline uint8x16_t vrequantize_pooling_with_scale(const float32x4x4_t &acc, const float quant_rescale, const float scale_pooling, const int32_t new_offset)
88 {
89  const float new_scale = quant_rescale / scale_pooling;
90  return vquantize(acc, UniformQuantizationInfo(new_scale, new_offset));
91 }
92 
93 template <>
94 inline int8x16_t vrequantize_pooling_with_scale(const float32x4x4_t &acc, const float quant_rescale, const float scale_pooling, const int32_t new_offset)
95 {
96  const float new_scale = quant_rescale / scale_pooling;
97  return vquantize_signed(acc, UniformQuantizationInfo(new_scale, new_offset));
98 }
99 
100 template <typename Tin, typename Tout>
101 inline Tout vrequantize_pooling(Tin vec1, Tin vec2, const UniformQuantizationInfo &requant_qinfo);
102 
103 template <>
104 inline uint8x16_t vrequantize_pooling(uint8x8_t vec1, uint8x8_t vec2, const UniformQuantizationInfo &requant_qinfo)
105 {
106  const float32x4x4_t acc =
107  {
108  {
109  vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8((vec1))))),
110  vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8((vec1))))),
111  vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8((vec2))))),
112  vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8((vec2))))),
113  }
114  };
115  return vquantize(acc, requant_qinfo);
116 }
117 
118 template <>
119 inline int8x16_t vrequantize_pooling(int8x8_t vec1, int8x8_t vec2, const UniformQuantizationInfo &requant_qinfo)
120 {
121  const float32x4x4_t acc =
122  {
123  {
124  vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8((vec1))))),
125  vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8((vec1))))),
126  vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8((vec2))))),
127  vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8((vec2))))),
128  }
129  };
130  return vquantize_signed(acc, requant_qinfo);
131 }
132 
133 template <typename T>
134 inline T vrequantize_pooling(T &vec, const UniformQuantizationInfo &requant_qinfo);
135 
136 template <>
137 inline uint8x8_t vrequantize_pooling(uint8x8_t &vec, const UniformQuantizationInfo &requant_qinfo)
138 {
139  const float32x4x2_t acc =
140  {
141  {
142  vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8((vec))))),
143  vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8((vec))))),
144  }
145  };
146  return vquantize(acc, requant_qinfo);
147 }
148 
149 template <>
150 inline int8x8_t vrequantize_pooling(int8x8_t &vec, const UniformQuantizationInfo &requant_qinfo)
151 {
152  const float32x4x2_t acc =
153  {
154  {
155  vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8((vec))))),
156  vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8((vec))))),
157  }
158  };
159  return vquantize_signed(acc, requant_qinfo);
160 }
161 
162 inline float calculate_avg_scale(bool exclude_padding, DataLayout data_layout, const Coordinates &id, const int pool_size_x, const int pool_size_y, const int upper_bound_w, const int upper_bound_h,
163  const int pad_x, const int pad_y, const int stride_x, const int stride_y)
164 {
165  const unsigned int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
167 
168  int start_x = id[idx_width] * stride_x - pad_x;
169  int start_y = id[idx_height] * stride_y - pad_y;
170 
171  const int end_x = std::min(start_x + pool_size_x, upper_bound_w);
172  const int end_y = std::min(start_y + pool_size_y, upper_bound_h);
173  if(exclude_padding)
174  {
175  start_x = std::max(0, start_x);
176  start_y = std::max(0, start_y);
177  }
178  return 1.f / ((end_y - start_y) * (end_x - start_x));
179 }
180 
181 template <typename T>
182 void poolingMxN_q8_neon_nhwc(const ITensor *src, ITensor *dst0, ITensor *dst1, PoolingLayerInfo &pool_info, const Window &window_src, const Window &window)
183 {
184  ARM_COMPUTE_UNUSED(dst1);
185 
186  const int window_start_x = window.x().start();
187  const int window_end_x = window.x().end();
188  const int window_step_x = 16;
189  const int window_half_step_x = window_step_x / 2;
190 
191  Window window_out = window;
192  window_out.set(Window::DimX, Window::Dimension(0, 1, 1));
193 
194  Iterator in(src, window_src);
195  Iterator out(dst0, window_out);
196 
197  using q8x8_t = typename wrapper::traits::neon_vector<T, 8>::type;
198  using q8x16_t = typename wrapper::traits::neon_vector<T, 16>::type;
199  using q16_t = typename wrapper::traits::promote_t<T>;
200  using q16x8_t = typename wrapper::traits::neon_vector<q16_t, 8>::type;
201  using q32_t = typename wrapper::traits::promote_t<q16_t>;
202  using q32x4_t = typename wrapper::traits::neon_vector<q32_t, 4>::type;
203 
204  const int pool_size_x = pool_info.is_global_pooling ? src->info()->tensor_shape().y() : pool_info.pool_size.width;
205  const int pool_size_y = pool_info.is_global_pooling ? src->info()->tensor_shape().z() : pool_info.pool_size.height;
206  const int pool_pad_right = pool_info.pad_stride_info.pad_right();
207  const int pool_pad_top = pool_info.pad_stride_info.pad_top();
208  const int pool_pad_left = pool_info.pad_stride_info.pad_left();
209  const int pool_pad_bottom = pool_info.pad_stride_info.pad_bottom();
210 
211  int pool_stride_x = 0;
212  int pool_stride_y = 0;
213  std::tie(pool_stride_x, pool_stride_y) = pool_info.pad_stride_info.stride();
214  const int upper_bound_w = src->info()->dimension(1) + (pool_info.exclude_padding ? 0 : pool_pad_right);
215  const int upper_bound_h = src->info()->dimension(2) + (pool_info.exclude_padding ? 0 : pool_pad_bottom);
216 
217  const float32x4_t half_scale_v = vdupq_n_f32(0.5f);
218  const UniformQuantizationInfo src_qinfo = src->info()->quantization_info().uniform();
219  const UniformQuantizationInfo dst_qinfo = dst0->info()->quantization_info().uniform();
220 
221  const float quant_rescale = dst_qinfo.scale / src_qinfo.scale;
222  // "new_offset" doesn't have to consider the "half_scale_v" in its computation
223  // With a requantization performed in a single step there won't be uncertainties introduced
224  const int32_t new_offset = dst_qinfo.offset - static_cast<int32_t>(static_cast<float>(src_qinfo.offset) / quant_rescale);
225 
226  const float requant_scale = dst_qinfo.scale / src_qinfo.scale;
227  const int32_t requant_offset = dst_qinfo.offset - static_cast<int32_t>(static_cast<float>(src_qinfo.offset) / requant_scale);
228  const UniformQuantizationInfo requant_qinfo = UniformQuantizationInfo(requant_scale, requant_offset);
229 
230  execute_window_loop(window_out, [&](const Coordinates & id)
231  {
232  const int idx_width = id.y() * pool_stride_x;
233  const int idx_height = id.z() * pool_stride_y;
234  const int pool_limit_y = pool_pad_top - idx_height;
235  const int pool_limit_x = pool_pad_left - idx_width;
236 
237  const int pool_start_y = std::max(0, window_src.z().start() + pool_limit_y);
238  const int pool_end_y = std::min(pool_size_y, window_src.z().end() + pool_limit_y);
239  const int pool_start_x = std::max(0, window_src.y().start() + pool_limit_x);
240  const int pool_end_x = std::min(pool_size_x, window_src.y().end() + pool_limit_x);
241 
242  int x_off = window_start_x;
243  for(; x_off <= (window_end_x - window_step_x); x_off += window_step_x)
244  {
245  if(pool_info.pool_type != PoolingType::MAX)
246  {
247  q32x4_t vres1 = wrapper::vdup_n(static_cast<q32_t>(0.f), wrapper::traits::vector_128_tag{});
248  q32x4_t vres2 = wrapper::vdup_n(static_cast<q32_t>(0.f), wrapper::traits::vector_128_tag{});
249  q32x4_t vres3 = wrapper::vdup_n(static_cast<q32_t>(0.f), wrapper::traits::vector_128_tag{});
250  q32x4_t vres4 = wrapper::vdup_n(static_cast<q32_t>(0.f), wrapper::traits::vector_128_tag{});
251 
252  // Calculate scale
253  const float scale = calculate_avg_scale(pool_info.exclude_padding, DataLayout::NHWC, id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
254  pool_stride_y);
255 
256  // Perform pooling
257  for(int y = pool_start_y; y < pool_end_y; ++y)
258  {
259  for(int x = pool_start_x; x < pool_end_x; ++x)
260  {
261  const q8x16_t data = wrapper::vloadq(reinterpret_cast<const T *>(in.ptr() + (x - pool_pad_left) * static_cast<int>(src->info()->strides_in_bytes().y()) + (y - pool_pad_top) * static_cast<int>
262  (src->info()->strides_in_bytes().z())) + x_off);
263 
264  const q16x8_t data_q16 = wrapper::vmovl(wrapper::vgetlow(data));
265  const q16x8_t data2_q16 = wrapper::vmovl(wrapper::vgethigh(data));
266  vres1 = wrapper::vadd(vres1, wrapper::vmovl(wrapper::vgetlow(data_q16)));
267  vres2 = wrapper::vadd(vres2, wrapper::vmovl(wrapper::vgethigh(data_q16)));
268  vres3 = wrapper::vadd(vres3, wrapper::vmovl(wrapper::vgetlow(data2_q16)));
269  vres4 = wrapper::vadd(vres4, wrapper::vmovl(wrapper::vgethigh(data2_q16)));
270  }
271  }
272 
273  if(src_qinfo != dst_qinfo)
274  {
275  const float32x4x4_t vres =
276  {
277  {
278  vcvtq_f32_q32(vres1),
279  vcvtq_f32_q32(vres2),
280  vcvtq_f32_q32(vres3),
281  vcvtq_f32_q32(vres4),
282  }
283  };
284  const auto requantized_dst = vrequantize_pooling_with_scale<q8x16_t>(vres, quant_rescale, scale, new_offset);
285  // Store result
286  wrapper::vstore(reinterpret_cast<T *>(out.ptr()) + x_off, wrapper::vgetlow(requantized_dst));
287  wrapper::vstore(reinterpret_cast<T *>(out.ptr()) + x_off + 8, wrapper::vgethigh(requantized_dst));
288  }
289  else
290  {
291  const float32x4_t scale_v = vdupq_n_f32(scale);
292  // Divide by scale and add 0.5f to round to nearest instead of rounding towards zero
293  vres1 = vcvtq_q32_f32<q32x4_t>(wrapper::vmla(half_scale_v, vcvtq_f32_q32(vres1), scale_v));
294  vres2 = vcvtq_q32_f32<q32x4_t>(wrapper::vmla(half_scale_v, vcvtq_f32_q32(vres2), scale_v));
295  vres3 = vcvtq_q32_f32<q32x4_t>(wrapper::vmla(half_scale_v, vcvtq_f32_q32(vres3), scale_v));
296  vres4 = vcvtq_q32_f32<q32x4_t>(wrapper::vmla(half_scale_v, vcvtq_f32_q32(vres4), scale_v));
297 
298  const q8x8_t res1 = wrapper::vmovn(wrapper::vcombine(wrapper::vmovn(vres1), wrapper::vmovn(vres2)));
299  const q8x8_t res2 = wrapper::vmovn(wrapper::vcombine(wrapper::vmovn(vres3), wrapper::vmovn(vres4)));
300  // Store result
301  wrapper::vstore(reinterpret_cast<T *>(out.ptr()) + x_off, res1);
302  wrapper::vstore(reinterpret_cast<T *>(out.ptr()) + x_off + 8, res2);
303  }
304  }
305  else
306  {
307  q8x16_t vres = wrapper::vdup_n(std::numeric_limits<T>::min(), wrapper::traits::vector_128_tag{});
308 
309  for(int y = pool_start_y; y < pool_end_y; ++y)
310  {
311  for(int x = pool_start_x; x < pool_end_x; ++x)
312  {
313  const q8x16_t data = wrapper::vloadq(reinterpret_cast<const T *>(in.ptr() + (x - pool_pad_left) * static_cast<int>(src->info()->strides_in_bytes().y()) + (y - pool_pad_top) * static_cast<int>
314  (src->info()->strides_in_bytes().z())) + x_off);
315  vres = wrapper::vmax(vres, data);
316  }
317  }
318 
319  // Store result
320  wrapper::vstore(reinterpret_cast<T *>(out.ptr()) + x_off, (src_qinfo != dst_qinfo) ? vrequantize_pooling<q8x8_t, q8x16_t>(wrapper::vgetlow(vres), wrapper::vgethigh(vres),
321  requant_qinfo) :
322  vres);
323  }
324  }
325 
326  if(pool_info.pool_type == PoolingType::MAX)
327  {
328  for(; x_off <= (window_end_x - window_half_step_x); x_off += window_half_step_x)
329  {
330  q8x8_t vres = wrapper::vdup_n(std::numeric_limits<T>::min(), wrapper::traits::vector_64_tag{});
331  for(int y = pool_start_y; y < pool_end_y; ++y)
332  {
333  for(int x = pool_start_x; x < pool_end_x; ++x)
334  {
335  const q8x8_t data = wrapper::vload(reinterpret_cast<const T *>(in.ptr() + (x - pool_pad_left) * static_cast<int>(src->info()->strides_in_bytes().y()) + (y - pool_pad_top) * static_cast<int>
336  (src->info()->strides_in_bytes().z())) + x_off);
337  vres = wrapper::vmax(vres, data);
338  }
339  }
340 
341  // Store result
342  wrapper::vstore(reinterpret_cast<T *>(out.ptr()) + x_off,
343  (src_qinfo != dst_qinfo) ? vrequantize_pooling<q8x8_t>(vres, requant_qinfo) : vres);
344  }
345  }
346 
347  // Left-overs loop
348  for(; x_off < window_end_x; ++x_off)
349  {
350  if(pool_info.pool_type != PoolingType::MAX)
351  {
352  q32_t res = static_cast<q32_t>(0.f);
353 
354  // Calculate scale
355  const float scale = calculate_avg_scale(pool_info.exclude_padding, DataLayout::NHWC, id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
356  pool_stride_y);
357 
358  // Perform pooling
359  for(int y = pool_start_y; y < pool_end_y; ++y)
360  {
361  for(int x = pool_start_x; x < pool_end_x; ++x)
362  {
363  const T data = *(reinterpret_cast<const T *>(in.ptr() + (x - pool_pad_left) * static_cast<int>(src->info()->strides_in_bytes().y()) + (y - pool_pad_top) * static_cast<int>
364  (src->info()->strides_in_bytes().z())) + x_off);
365  res += data;
366  }
367  }
368 
369  if(src_qinfo != dst_qinfo)
370  {
371  const float res_f = static_cast<float>(res);
372  const float new_scale = quant_rescale / scale;
373  const auto requantized_dst = quantize<T>(res_f, UniformQuantizationInfo(new_scale, new_offset));
374 
375  // Store result
376  *(reinterpret_cast<T *>(out.ptr()) + x_off) = requantized_dst;
377  }
378  else
379  {
380  // Divide by scale and add 0.5f to round to nearest instead of rounding towards zero
381  res = static_cast<T>(0.5f + static_cast<float>(res) * scale);
382 
383  // Store result
384  *(reinterpret_cast<T *>(out.ptr()) + x_off) = res;
385  }
386  }
387  else
388  {
389  T res = std::numeric_limits<T>::min();
390 
391  for(int y = pool_start_y; y < pool_end_y; ++y)
392  {
393  for(int x = pool_start_x; x < pool_end_x; ++x)
394  {
395  const T data = *(reinterpret_cast<const T *>(in.ptr() + (x - pool_pad_left) * static_cast<int>(src->info()->strides_in_bytes().y()) + (y - pool_pad_top) * static_cast<int>
396  (src->info()->strides_in_bytes().z())) + x_off);
397  res = std::max(res, data);
398  }
399  }
400 
401  // Store result
402  if(src_qinfo != dst_qinfo)
403  {
404  const float res_f = static_cast<float>(res);
405  *(reinterpret_cast<T *>(out.ptr()) + x_off) = quantize<T>(res_f, requant_qinfo);
406  }
407  else
408  {
409  *(reinterpret_cast<T *>(out.ptr()) + x_off) = res;
410  }
411  }
412  }
413 
414  },
415  in, out);
416 }
417 
418 #if defined(ENABLE_NCHW_KERNELS)
419 template <typename T, typename TVec>
420 inline void scale_vector_q16x8(bool exclude_padding, TVec &v, const Coordinates &id, int id_offset, int step,
421  const int pool_size, const int upper_bound_w, const int upper_bound_h,
422  const int pad_x, const int pad_y, const int stride_x, const int stride_y)
423 {
424  int start_x = (id.x() + id_offset) * stride_x - pad_x;
425  int start_y = id.y() * stride_y - pad_y;
426  const int end_y = std::min(start_y + pool_size, upper_bound_h);
427  if(exclude_padding)
428  {
429  start_y = std::max(0, start_y);
430  }
431 
432  std::array<T, 8> elems =
433  {
434  {
435  wrapper::vgetlane(v, 0),
436  wrapper::vgetlane(v, 1),
437  wrapper::vgetlane(v, 2),
438  wrapper::vgetlane(v, 3),
439  wrapper::vgetlane(v, 4),
440  wrapper::vgetlane(v, 5),
441  wrapper::vgetlane(v, 6),
442  wrapper::vgetlane(v, 7),
443  }
444  };
445 
446  for(auto &el : elems)
447  {
448  int c_start_x = start_x;
449  const int end_x = std::min(c_start_x + pool_size, upper_bound_w);
450  if(exclude_padding)
451  {
452  c_start_x = std::max(0, c_start_x);
453  }
454  float scale = 1.f / ((end_y - start_y) * (end_x - c_start_x));
455  el *= scale;
456  start_x += step * stride_x;
457  }
458 
459  v = wrapper::vsetlane(elems[0], v, 0);
460  v = wrapper::vsetlane(elems[1], v, 1);
461  v = wrapper::vsetlane(elems[2], v, 2);
462  v = wrapper::vsetlane(elems[3], v, 3);
463  v = wrapper::vsetlane(elems[4], v, 4);
464  v = wrapper::vsetlane(elems[5], v, 5);
465  v = wrapper::vsetlane(elems[6], v, 6);
466  v = wrapper::vsetlane(elems[7], v, 7);
467 }
468 
469 template <typename T>
470 auto load16_boundary_aware(int srcw, int srch, int pad_l, int pad_r, int pad_t, int pad_b, int x, int y, const T *ptr, T fval)
471 {
472  ARM_COMPUTE_UNUSED(pad_b, pad_r);
473  T vec[16];
474  //handle reading a row out of the tensor
475  const bool row_in_bounds((y >= pad_t) && (y < (srch + pad_t)));
476  for(int i = 0; i < 16; i++)
477  {
478  if(row_in_bounds && (x + i >= pad_l) && (x + i < (srcw + pad_l)))
479  {
480  vec[i] = *(ptr + i);
481  }
482  else
483  {
484  vec[i] = fval;
485  }
486  }
487  return wrapper::vloadq(vec);
488 }
489 
490 template <typename T, typename V, bool deinterleave>
491 inline void write16_boundary_aware(int x, int dst_w, const V &lower, const V &upper, T *ptr)
492 {
493  if(deinterleave)
494  {
495  for(int i = 0; i < 8 && (i * 2 + x) < dst_w; ++i)
496  {
497  *(ptr + i * 2) = lower[i];
498  }
499  for(int i = 0; i < 8 && (i * 2 + x + 1) < dst_w; ++i)
500  {
501  *(ptr + 1 + i * 2) = upper[i];
502  }
503  }
504  else
505  {
506  for(int i = 0; i < 8 && (i + x) < dst_w; ++i)
507  {
508  *(ptr + i) = lower[i];
509  }
510  for(int i = 0; i < 8 && (i + x + 8) < dst_w; ++i)
511  {
512  *(ptr + i + 8) = upper[i];
513  }
514  }
515 }
516 
517 template <typename T, typename V>
518 inline void write8_boundary_aware(int x, int dst_w, const V &v, T *ptr)
519 {
520  for(int i = 0; i < 8 && (i + x) < dst_w; ++i)
521  {
522  *(ptr + i) = v[i];
523  }
524 }
525 
526 template <typename T>
527 void pooling2_quantized_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, PoolingLayerInfo &pool_info, const Window &window_src, const Window &window)
528 {
529  ARM_COMPUTE_UNUSED(dst1);
530  Iterator in(src, window_src);
531  Iterator out(dst0, window);
532 
533  /** SIMD vector types */
534  using q8x8_t = typename wrapper::traits::neon_vector<T, 8>::type;
535  using q8x16_t = typename wrapper::traits::neon_vector<T, 16>::type;
536  using q16_t = typename wrapper::traits::promote_t<T>;
537  using q16x4_t = typename wrapper::traits::neon_vector<q16_t, 4>::type;
538  using q16x8_t = typename wrapper::traits::neon_vector<q16_t, 8>::type;
539  using q16x8x2_t = typename wrapper::traits::neon_vector<q16_t, 16>::type;
540 
541  constexpr int pool_size = 2;
542  int pool_stride_x = 0;
543  int pool_stride_y = 0;
544  const int pool_pad_right = pool_info.pad_stride_info.pad_right();
545  const int pool_pad_top = pool_info.pad_stride_info.pad_top();
546  const int pool_pad_left = pool_info.pad_stride_info.pad_left();
547  const int pool_pad_bottom = pool_info.pad_stride_info.pad_bottom();
548  std::tie(pool_stride_x, pool_stride_y) = pool_info.pad_stride_info.stride();
549  const int upper_bound_w = src->info()->dimension(0) + (pool_info.exclude_padding ? 0 : pool_pad_right);
550  const int upper_bound_h = src->info()->dimension(1) + (pool_info.exclude_padding ? 0 : pool_pad_bottom);
551  const T *const src_top_ptr = reinterpret_cast<const T *>(src->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top))));
552  const T *const src_bottom_ptr = reinterpret_cast<const T *>(src->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1)));
553  const int scale_step_x = (pool_stride_x == 1) ? 2 : 1;
554  const UniformQuantizationInfo src_qinfo = src->info()->quantization_info().uniform();
555  const UniformQuantizationInfo dst_qinfo = dst0->info()->quantization_info().uniform();
556  const bool have_different_qinfo = src_qinfo != dst_qinfo;
557 
558  const float requant_scale = dst_qinfo.scale / src_qinfo.scale;
559  const int32_t requant_offset = dst_qinfo.offset - static_cast<int32_t>(static_cast<float>(src_qinfo.offset) / requant_scale);
560  const UniformQuantizationInfo requant_qinfo = UniformQuantizationInfo(requant_scale, requant_offset);
561  const int src_w = src->info()->dimension(0);
562  const int src_h = src->info()->dimension(1);
563  const int dst_w = dst0->info()->dimension(0);
564 
565  const T fill_value = (pool_info.pool_type == PoolingType::MAX) ? std::numeric_limits<T>::min() : T(0);
566 
567  execute_window_loop(window, [&](const Coordinates & id)
568  {
569  const auto x_val = id.x() * pool_stride_x;
570  const auto y_val_0 = id.y() * pool_stride_y;
571  const auto y_val_1 = (id.y() * pool_stride_y) + 1;
572 
573  auto top_data = load16_boundary_aware(src_w, src_h, pool_pad_left, pool_pad_right, pool_pad_top, pool_pad_bottom,
574  x_val, y_val_0, reinterpret_cast<const T *>(src_top_ptr + in.offset()), fill_value);
575  auto bottom_data = load16_boundary_aware(src_w, src_h, pool_pad_left, pool_pad_right, pool_pad_top, pool_pad_bottom,
576  x_val, y_val_1, reinterpret_cast<const T *>(src_bottom_ptr + in.offset()), fill_value);
577 
578  q8x8_t lower_res = {};
579  q8x8_t upper_res = {};
580 
581  if(pool_info.pool_type != PoolingType::MAX)
582  {
583  const q16x8x2_t top_data_q16 = { { wrapper::vmovl(wrapper::vgetlow(top_data)), wrapper::vmovl(wrapper::vgethigh(top_data)) } };
584  const q16x8x2_t bottom_data_q16 = { { wrapper::vmovl(wrapper::vgetlow(bottom_data)), wrapper::vmovl(wrapper::vgethigh(bottom_data)) } };
585 
586  // Add rows
587  const q16x8x2_t vrsum =
588  {
589  {
590  wrapper::vadd(top_data_q16.val[0], bottom_data_q16.val[0]),
591  wrapper::vadd(top_data_q16.val[1], bottom_data_q16.val[1]),
592  }
593  };
594 
595  // Pair-wise add row data
596  const q16x4_t vpsum_1 = wrapper::vpadd(wrapper::vgetlow(vrsum.val[0]), wrapper::vgethigh(vrsum.val[0]));
597  const q16x4_t vpsum_2 = wrapper::vpadd(wrapper::vgetlow(vrsum.val[1]), wrapper::vgethigh(vrsum.val[1]));
598 
599  q16x8_t res_lower = wrapper::vcombine(vpsum_1, vpsum_2);
600 
601  // Scale lower result
602  scale_vector_q16x8<q16_t, q16x8_t>(pool_info.exclude_padding, res_lower, id, 0, scale_step_x,
603  pool_size, upper_bound_w, upper_bound_h,
604  pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
605  lower_res = wrapper::vmovn(res_lower);
606 
607  // Compute upper result for stride_x == 1
608  if(pool_stride_x == 1)
609  {
610  // Shifted row sum
611  const q16x8x2_t vrsum_shifted =
612  {
613  {
614  wrapper::vext_1(vrsum.val[0], vrsum.val[1]),
615  wrapper::vext_1(vrsum.val[1], vrsum.val[1])
616  }
617  };
618 
619  // Pair-wise add shifted row
620  q16x8_t res_upper = wrapper::vcombine(
621  wrapper::vpadd(wrapper::vgetlow(vrsum_shifted.val[0]), wrapper::vgethigh(vrsum_shifted.val[0])),
622  wrapper::vpadd(wrapper::vgetlow(vrsum_shifted.val[1]), wrapper::vgethigh(vrsum_shifted.val[1])));
623 
624  // Scale upper result
625  scale_vector_q16x8<q16_t, q16x8_t>(pool_info.exclude_padding, res_upper, id, 1, 2,
626  pool_size, upper_bound_w, upper_bound_h,
627  pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
628  upper_res = wrapper::vmovn(res_upper);
629  }
630  }
631  else
632  {
633  const q8x16_t max_data = wrapper::vmax(top_data, bottom_data);
634  lower_res = wrapper::vpmax(wrapper::vgetlow(max_data), wrapper::vgethigh(max_data));
635  if(pool_stride_x == 1)
636  {
637  const q8x16_t max_data_shifted = wrapper::vext_1(max_data, max_data);
638  upper_res = wrapper::vpmax(wrapper::vgetlow(max_data_shifted), wrapper::vgethigh(max_data_shifted));
639  }
640  }
641 
642  if(have_different_qinfo)
643  {
644  const auto requantized_dst = vrequantize_pooling<q8x8_t, q8x16_t>(lower_res, upper_res, requant_qinfo);
645  lower_res = wrapper::vgetlow(requantized_dst);
646  upper_res = wrapper::vgethigh(requantized_dst);
647  }
648  auto out_ptr = reinterpret_cast<T *>(out.ptr());
649  // Store result
650  if(pool_stride_x == 1)
651  {
652  write16_boundary_aware<T, q8x8_t, true>(id.x(), dst_w, lower_res, upper_res, out_ptr);
653  }
654  else
655  {
656  write8_boundary_aware<T, q8x8_t>(id.x(), dst_w, lower_res, out_ptr);
657  }
658  },
659  in, out);
660 }
661 
662 template <typename T>
663 void pooling3_quantized_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, PoolingLayerInfo &pool_info, const Window &window_src, const Window &window)
664 {
665  ARM_COMPUTE_UNUSED(dst1);
666  Iterator in(src, window_src);
667  Iterator out(dst0, window);
668 
669  /** SIMD vector types */
670  using q8x8_t = typename wrapper::traits::neon_vector<T, 8>::type;
671  using q8x16_t = typename wrapper::traits::neon_vector<T, 16>::type;
672  using q8x8x2_t = typename std::conditional<std::is_same<T, uint8_t>::value, uint8x8x2_t, int8x8x2_t>::type;
673  using q16_t = typename wrapper::traits::promote_t<T>;
674  using q16x8_t = typename wrapper::traits::neon_vector<q16_t, 8>::type;
675  using q16x8x2_t = typename wrapper::traits::neon_vector<q16_t, 16>::type;
676 
677  constexpr int pool_size = 3;
678  const int pool_pad_right = pool_info.pad_stride_info.pad_right();
679  const int pool_pad_top = pool_info.pad_stride_info.pad_top();
680  const int pool_pad_left = pool_info.pad_stride_info.pad_left();
681  const int pool_pad_bottom = pool_info.pad_stride_info.pad_bottom();
682  int pool_stride_x = 0;
683  int pool_stride_y = 0;
684  std::tie(pool_stride_x, pool_stride_y) = pool_info.pad_stride_info.stride();
685  const int upper_bound_w = src->info()->dimension(0) + (pool_info.exclude_padding ? 0 : pool_pad_right);
686  const int upper_bound_h = src->info()->dimension(1) + (pool_info.exclude_padding ? 0 : pool_pad_bottom);
687 
688  const UniformQuantizationInfo &src_qinfo = src->info()->quantization_info().uniform();
689  const UniformQuantizationInfo &dst_qinfo = dst0->info()->quantization_info().uniform();
690 
691  const float requant_scale = dst_qinfo.scale / src_qinfo.scale;
692  const int32_t requant_offset = dst_qinfo.offset - static_cast<int32_t>(static_cast<float>(src_qinfo.offset) / requant_scale);
693  const UniformQuantizationInfo requant_qinfo = UniformQuantizationInfo(requant_scale, requant_offset);
694 
695  const T *const src_top_ptr = reinterpret_cast<const T *>(src->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top))));
696  const T *const src_middle_ptr = reinterpret_cast<const T *>(src->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1)));
697  const T *const src_bottom_ptr = reinterpret_cast<const T *>(src->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 2)));
698 
699  const int src_w = src->info()->dimension(0);
700  const int src_h = src->info()->dimension(1);
701  const T fill_value = (pool_info.pool_type == PoolingType::AVG) ? T(0) : std::numeric_limits<T>::min();
702  const int dst_w = dst0->info()->dimension(0);
703 
704  execute_window_loop(window, [&](const Coordinates & id)
705  {
706  const auto x_val = id.x() * pool_stride_x;
707  const auto y_val_0 = id.y() * pool_stride_y;
708  const auto y_val_1 = (id.y() * pool_stride_y) + 1;
709  const auto y_val_2 = (id.y() * pool_stride_y) + 2;
710 
711  auto top_data = load16_boundary_aware(src_w, src_h, pool_pad_left, pool_pad_right, pool_pad_top, pool_pad_bottom,
712  x_val, y_val_0, reinterpret_cast<const T *>(src_top_ptr + in.offset()), fill_value);
713  auto middle_data = load16_boundary_aware(src_w, src_h, pool_pad_left, pool_pad_right, pool_pad_top, pool_pad_bottom,
714  x_val, y_val_1, reinterpret_cast<const T *>(src_middle_ptr + in.offset()), fill_value);
715  auto bottom_data = load16_boundary_aware(src_w, src_h, pool_pad_left, pool_pad_right, pool_pad_top, pool_pad_bottom,
716  x_val, y_val_2, reinterpret_cast<const T *>(src_bottom_ptr + in.offset()), fill_value);
717 
718  q8x8_t fres = {};
719  q8x16_t fqres = {};
720 
721  if(pool_info.pool_type == PoolingType::AVG)
722  {
723  // Convert data to u16
724  const q16x8x2_t top_data_q16 = { { wrapper::vmovl(wrapper::vgetlow(top_data)), wrapper::vmovl(wrapper::vgethigh(top_data)) } };
725  const q16x8x2_t middle_data_q16 = { { wrapper::vmovl(wrapper::vgetlow(middle_data)), wrapper::vmovl(wrapper::vgethigh(middle_data)) } };
726  const q16x8x2_t bottom_data_q16 = { { wrapper::vmovl(wrapper::vgetlow(bottom_data)), wrapper::vmovl(wrapper::vgethigh(bottom_data)) } };
727 
728  // Calculate row sums
729  const q16x8x2_t vrsum =
730  {
731  {
732  wrapper::vadd(wrapper::vadd(top_data_q16.val[0], bottom_data_q16.val[0]), middle_data_q16.val[0]),
733  wrapper::vadd(wrapper::vadd(top_data_q16.val[1], bottom_data_q16.val[1]), middle_data_q16.val[1]),
734  }
735  };
736  const q16x8x2_t vrsum_shifted_1 =
737  {
738  {
739  wrapper::vext_1(vrsum.val[0], vrsum.val[1]),
740  wrapper::vext_1(vrsum.val[1], vrsum.val[1])
741  }
742  };
743  const q16x8x2_t vrsum_shifted_2 =
744  {
745  {
746  wrapper::vext_2(vrsum.val[0], vrsum.val[1]),
747  wrapper::vext_2(vrsum.val[1], vrsum.val[1])
748  }
749  };
750  // Calculate final sum
751  q16x8x2_t final_sum =
752  {
753  {
754  wrapper::vadd(wrapper::vadd(vrsum.val[0], vrsum_shifted_1.val[0]), vrsum_shifted_2.val[0]),
755  wrapper::vadd(wrapper::vadd(vrsum.val[1], vrsum_shifted_1.val[1]), vrsum_shifted_2.val[1]),
756  }
757  };
758  if(pool_stride_x == 2)
759  {
760  q16x8_t res =
761  {
762  wrapper::vgetlane(final_sum.val[0], 0),
763  wrapper::vgetlane(final_sum.val[0], 2),
764  wrapper::vgetlane(final_sum.val[0], 4),
765  wrapper::vgetlane(final_sum.val[0], 6),
766  wrapper::vgetlane(final_sum.val[1], 0),
767  wrapper::vgetlane(final_sum.val[1], 2),
768  wrapper::vgetlane(final_sum.val[1], 4),
769  wrapper::vgetlane(final_sum.val[1], 6),
770  };
771 
772  scale_vector_q16x8<q16_t, q16x8_t>(pool_info.exclude_padding, res, id, 0, 1,
773  pool_size, upper_bound_w, upper_bound_h,
774  pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
775  fres = wrapper::vmovn(res);
776  }
777  else
778  {
779  // Scale lower result
780  scale_vector_q16x8<q16_t, q16x8_t>(pool_info.exclude_padding, final_sum.val[0], id, 0, 1,
781  pool_size, upper_bound_w, upper_bound_h,
782  pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
783  // Scale lower result
784  scale_vector_q16x8<q16_t, q16x8_t>(pool_info.exclude_padding, final_sum.val[1], id, 8, 1,
785  pool_size, upper_bound_w, upper_bound_h,
786  pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
787  fqres = wrapper::vcombine(wrapper::vmovn(final_sum.val[0]), wrapper::vmovn(final_sum.val[1]));
788  }
789  }
790  else
791  {
792  const q8x16_t max_data = wrapper::vmax(wrapper::vmax(top_data, bottom_data), middle_data);
793  const q8x16_t max_data_shift1 = wrapper::vext_1(max_data, max_data);
794  const q8x16_t max_data_shift2 = wrapper::vext_2(max_data, max_data);
795  const q8x16_t final_max = wrapper::vmax(wrapper::vmax(max_data, max_data_shift1), max_data_shift2);
796 
797  if(pool_stride_x == 2)
798  {
799  const q8x8x2_t table = { { wrapper::vgetlow(final_max), wrapper::vgethigh(final_max) } };
800  static const q8x8_t lookup_val = { 0, 2, 4, 6, 8, 10, 12, 14 };
801  fres = wrapper::vtbl(table, lookup_val);
802  }
803  else
804  {
805  fqres = final_max;
806  }
807  }
808 
809  // Store result
810  if(pool_stride_x == 1)
811  {
812  if(src_qinfo != dst_qinfo)
813  {
814  fqres = vrequantize_pooling<q8x8_t, q8x16_t>(wrapper::vgetlow(fqres), wrapper::vgethigh(fqres), requant_qinfo);
815  }
816  write16_boundary_aware<T, q8x8_t, false>(id.x(), dst_w, wrapper::vgetlow(fqres), wrapper::vgethigh(fqres), reinterpret_cast<T *>(out.ptr()));
817  }
818  else
819  {
820  if(src_qinfo != dst_qinfo)
821  {
822  fres = vrequantize_pooling<q8x8_t>(fres, requant_qinfo);
823  }
824  write8_boundary_aware<T, q8x8_t>(id.x(), dst_w, fres, reinterpret_cast<T *>(out.ptr()));
825  }
826  },
827  in, out);
828 }
829 
830 template <typename T>
831 void poolingMxN_quantized_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, PoolingLayerInfo &pool_info, const Window &window_src, const Window &window)
832 {
833  ARM_COMPUTE_UNUSED(dst1);
834  Iterator in(src, window_src);
835  Iterator out(dst0, window);
836 
837  /** SIMD vector types */
838  using q16_t = typename wrapper::traits::promote_t<T>;
839  using q32_t = typename wrapper::traits::promote_t<q16_t>;
840 
841  const int pool_size_x = pool_info.is_global_pooling ? src->info()->tensor_shape().x() : pool_info.pool_size.width;
842  const int pool_size_y = pool_info.is_global_pooling ? src->info()->tensor_shape().y() : pool_info.pool_size.height;
843  const int pool_pad_right = pool_info.pad_stride_info.pad_right();
844  const int pool_pad_top = pool_info.pad_stride_info.pad_top();
845  const int pool_pad_left = pool_info.pad_stride_info.pad_left();
846  const int pool_pad_bottom = pool_info.pad_stride_info.pad_bottom();
847  int pool_stride_x = 0;
848  int pool_stride_y = 0;
849  std::tie(pool_stride_x, pool_stride_y) = pool_info.pad_stride_info.stride();
850  const int upper_bound_w = src->info()->dimension(0) + (pool_info.exclude_padding ? 0 : pool_pad_right);
851  const int upper_bound_h = src->info()->dimension(1) + (pool_info.exclude_padding ? 0 : pool_pad_bottom);
852 
853  const UniformQuantizationInfo &src_qinfo = src->info()->quantization_info().uniform();
854  const UniformQuantizationInfo &dst_qinfo = dst0->info()->quantization_info().uniform();
855  const int src_w = src->info()->dimension(0);
856  const int src_h = src->info()->dimension(1);
857  const T fill_value = (pool_info.pool_type == PoolingType::AVG) ? T(0) : std::numeric_limits<T>::min();
858  const int stridex_in_bytes = static_cast<int>(src->info()->strides_in_bytes().x());
859  const int stridey_in_bytes = static_cast<int>(src->info()->strides_in_bytes().y());
860 
861  execute_window_loop(window, [&](const Coordinates & id)
862  {
863  T res = std::numeric_limits<T>::min();
864 
865  if(pool_info.pool_type != PoolingType::MAX)
866  {
867  q32_t sres = 0;
868 
869  // Calculate scale
870  const float scale = calculate_avg_scale(pool_info.exclude_padding, DataLayout::NCHW, id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
871  pool_stride_y);
872 
873  // Perform pooling
874  for(int y = 0; y < pool_size_y; ++y)
875  {
876  for(int x = 0; x < pool_size_x; ++x)
877  {
878  const auto in_ptr = reinterpret_cast<const T *>(in.ptr() + (x - pool_pad_left) * stridex_in_bytes + (y - pool_pad_top) * stridey_in_bytes);
879 
880  const int idx = x + id.x() * pool_stride_x - pool_pad_left;
881  const int idy = y + id.y() * pool_stride_y - pool_pad_top;
882  const T data = (idx < 0 || idy < 0 || idx >= src_w || idy >= src_h) ? fill_value : *in_ptr;
883  sres += data;
884  }
885  }
886  // Divide by scale
887  res = static_cast<T>(support::cpp11::round(sres * scale));
888  }
889  else
890  {
891  for(int y = 0; y < pool_size_y; ++y)
892  {
893  for(int x = 0; x < pool_size_x; ++x)
894  {
895  const auto in_ptr = reinterpret_cast<const T *>(in.ptr() + (x - pool_pad_left) * stridex_in_bytes + (y - pool_pad_top) * stridey_in_bytes);
896 
897  const int idx = x + id.x() * pool_stride_x - pool_pad_left;
898  const int idy = y + id.y() * pool_stride_y - pool_pad_top;
899  const T data = (idx < 0 || idy < 0 || idx >= src_w || idy >= src_h) ? fill_value : *in_ptr;
900  res = std::max(res, data);
901  }
902  }
903  }
904  // Store result
905  res = (src_qinfo != dst_qinfo) ? Qasymm8QuantizationHelper<T>::quantize(Qasymm8QuantizationHelper<T>::dequantize(res, src_qinfo), dst_qinfo) : res;
906  *(reinterpret_cast<T *>(out.ptr())) = res;
907  },
908  in, out);
909 }
910 #endif /* defined(ENABLE_NCHW_KERNELS) */
911 } // namespace cpu
912 } // namespace arm_compute
913 
914 #endif // SRC_CORE_NEON_KERNELS_QUANTIZED_H
uint8_t * ptr_to_element(const Coordinates &id) const
Return a pointer to the element at the passed coordinates.
Definition: ITensor.h:63
uint32x2_t vmovn(const uint64x2_t &a)
Definition: movn.h:39
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
uint8_t quantize_qasymm8(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given an unsigned 8-bit asymmetric quantization scheme.
Cr/V/Value channel.
uint8x16_t vloadq(const uint8_t *ptr)
Definition: load.h:58
uint8x8_t vadd(const uint8x8_t &a, const uint8x8_t &b)
Definition: add.h:39
uint8x8_t vext_2(uint8x8_t value_a, uint8x8_t value_b)
Definition: ext.h:40
Quantization info when assuming per layer quantization.
Describe one of the image&#39;s dimensions with a start, end and step.
Definition: Window.h:77
unsigned int pad_top() const
Get the top padding.
Definition: Types.h:740
constexpr const Dimension & z() const
Alias to access the third dimension of the window.
Definition: Window.h:163
decltype(strategy::transforms) typedef type
Interface for CPU tensor.
Definition: ITensor.h:36
SimpleTensor< float > src
Definition: DFT.cpp:155
Copyright (c) 2017-2021 Arm Limited.
size_t height
Height of the image region or rectangle.
Definition: Size2D.h:91
float32x4_t vcvtq_f32_q32(T values)
typename promote< T >::type promote_t
Get promoted type.
Definition: traits.h:147
T x() const
Alias to access the size of the first dimension.
Definition: Dimensions.h:87
uint8x8_t vpadd(const uint8x8_t &a, const uint8x8_t &b)
Definition: add.h:187
Tout vrequantize_pooling_with_scale(const float32x4x4_t &acc, const float quant_rescale, const float scale_pooling, const int32_t new_offset)
Definition: quantized.h:87
uint8_t vgetlane(const uint8x8_t vector, const unsigned int lane)
Definition: getlane.h:91
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Definition: Window.h:43
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
uint8x8_t vext_1(uint8x8_t value_a, uint8x8_t value_b)
Definition: ext.h:39
Create the appropriate SIMD vector given its type and size in terms of elements.
Definition: traits.h:48
T vcvtq_q32_f32(float32x4_t values)
Definition: quantized.h:57
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
std::enable_if< std::is_same< T, int8_t >::value, int8_t >::type quantize(float val, const UniformQuantizationInfo &info)
Definition: quantized.h:41
int8_t quantize_qasymm8_signed(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given a signed 8-bit asymmetric quantization scheme.
T z() const
Alias to access the size of the third dimension.
Definition: Dimensions.h:97
Coordinates of an item.
Definition: Coordinates.h:37
std::pair< unsigned int, unsigned int > stride() const
Get the stride.
Definition: Types.h:704
Tout vrequantize_pooling(Tin vec1, Tin vec2, const UniformQuantizationInfo &requant_qinfo)
Pooling Layer Information struct.
Definition: Types.h:1173
UniformQuantizationInfo uniform() const
Return per layer quantization info.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.
unsigned int pad_right() const
Get the right padding.
Definition: Types.h:735
constexpr uint8_t * ptr() const
Return a pointer to the current pixel.
Definition: Helpers.inl:139
uint8x8_t vgetlow(const uint8x16_t val)
Definition: getlow.h:39
void poolingMxN_q8_neon_nhwc(const ITensor *src, ITensor *dst0, ITensor *dst1, PoolingLayerInfo &pool_info, const Window &window_src, const Window &window)
Definition: quantized.h:182
uint8x8_t vtbl(const uint8x8x2_t &a, const uint8x8_t &b)
Definition: tbl.h:39
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
Definition: Window.inl:49
uint8x16_t vcombine(const uint8x8_t &a, const uint8x8_t &b)
Definition: combine.h:39
uint8x8_t vsetlane(const uint8_t value, const uint8x8_t vector, const unsigned int lane)
Definition: setlane.h:91
float calculate_avg_scale(bool exclude_padding, DataLayout data_layout, const Coordinates &id, const int pool_size_x, const int pool_size_y, const int upper_bound_w, const int upper_bound_h, const int pad_x, const int pad_y, const int stride_x, const int stride_y)
Definition: quantized.h:162
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
Num samples, channels, height, width.
uint8x8_t vgethigh(const uint8x16_t val)
Definition: gethigh.h:39
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
PadStrideInfo pad_stride_info
Definition: Types.h:1261
size_t width
Width of the image region or rectangle.
Definition: Size2D.h:90
size_t get_data_layout_dimension_index(const DataLayout &data_layout, const DataLayoutDimension &data_layout_dimension)
Get the index of the given dimension.
Definition: Helpers.inl:193
constexpr int step
Definition: fp32.cpp:35
T round(T value)
Round floating-point value with half value rounding away from zero.
Num samples, height, width, channels.
constexpr const Dimension & y() const
Alias to access the second dimension of the window.
Definition: Window.h:154
uint8x8_t vquantize(const float32x4x2_t &qv, const UniformQuantizationInfo &qi)
Quantize a neon vector holding 8 floating point values.
Definition: NEAsymm.h:602
uint8x8_t vload(const uint8_t *ptr)
Definition: load.h:39
void vstore(uint8_t *ptr, uint8x8_t val)
Definition: store.h:39
uint8x8_t vdup_n(uint8_t value, traits::vector_64_tag)
Definition: dup_n.h:41
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
Definition: Helpers.inl:77
T y() const
Alias to access the size of the second dimension.
Definition: Dimensions.h:92
Includes all wrapper headers at once.
int8x8_t vquantize_signed(const float32x4x2_t &qv, const UniformQuantizationInfo &qi)
Quantize a neon vector holding 8 floating point values.
Definition: NEAsymm.h:630
virtual const Strides & strides_in_bytes() const =0
The strides in bytes for accessing each dimension of the tensor.
uint8x8_t vpmax(const uint8x8_t &a, const uint8x8_t &b)
Definition: pmax.h:39
uint8x8_t vmla(const uint8x8_t &a, const uint8x8_t &b, const uint8x8_t &c)
Definition: mla.h:46
constexpr size_t offset() const
Return the offset in bytes from the first element to the current position of the iterator.
Definition: Helpers.inl:134
constexpr int end() const
Return the end of the dimension.
Definition: Window.h:99
unsigned int pad_bottom() const
Get the bottom padding.
Definition: Types.h:745
Iterator updated by execute_window_loop for each window element.
Definition: Helpers.h:46
uint16x8_t vmovl(const uint8x8_t &a)
Definition: movl.h:39
unsigned int pad_left() const
Get the left padding.
Definition: Types.h:730
uint8x8_t vmax(const uint8x8_t &a, const uint8x8_t &b)
Definition: max.h:39
DataLayout
[DataLayout enum definition]
Definition: Types.h:113
constexpr int start() const
Return the start of the dimension.
Definition: Window.h:94
Describe a multidimensional execution window.
Definition: Window.h:39
constexpr const Dimension & x() const
Alias to access the first dimension of the window.
Definition: Window.h:145