Compute Library
 22.11
quantized.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef SRC_CORE_NEON_KERNELS_POOL3D_QUANTIZED_H
25 #define SRC_CORE_NEON_KERNELS_POOL3D_QUANTIZED_H
26 
28 #include "arm_compute/core/Types.h"
32 
33 namespace arm_compute
34 {
35 namespace cpu
36 {
37 template <typename T>
38 void avg_poolingMxNxD_q8_neon_ndhwc(const ITensor *src, ITensor *dst0, Pooling3dLayerInfo &pool_info, const Window &window_out,
39  const int window_step_x)
40 
41 {
42  using q8x8_t = typename wrapper::traits::neon_vector<T, 8>::type;
43  using q8x16_t = typename wrapper::traits::neon_vector<T, 16>::type;
44  using q16_t = typename wrapper::traits::promote_t<T>;
45  using q16x8_t = typename wrapper::traits::neon_vector<q16_t, 8>::type;
46  using q32_t = typename wrapper::traits::promote_t<q16_t>;
47  using q32x4_t = typename wrapper::traits::neon_vector<q32_t, 4>::type;
48 
49  int pool_stride_x = static_cast<int>(pool_info.stride.width);
50  int pool_stride_y = static_cast<int>(pool_info.stride.height);
51  int pool_stride_z = static_cast<int>(pool_info.stride.depth);
52 
53  const int pool_size_x = pool_info.is_global_pooling ? src->info()->tensor_shape().y() : pool_info.pool_size.width;
54  const int pool_size_y = pool_info.is_global_pooling ? src->info()->tensor_shape().z() : pool_info.pool_size.height;
55  const int pool_size_z = pool_info.is_global_pooling ? src->info()->tensor_shape()[3] : pool_info.pool_size.depth;
56 
57  const int pool_pad_top = static_cast<int>(pool_info.padding.top);
58  const int pool_pad_bottom = static_cast<int>(pool_info.padding.bottom);
59  const int pool_pad_left = static_cast<int>(pool_info.padding.left);
60  const int pool_pad_right = static_cast<int>(pool_info.padding.right);
61  const int pool_pad_front = static_cast<int>(pool_info.padding.front);
62  const int pool_pad_back = static_cast<int>(pool_info.padding.back);
63 
64  const int upper_bound_w = src->info()->dimension(1) + (pool_info.exclude_padding ? 0 : pool_pad_right);
65  const int upper_bound_h = src->info()->dimension(2) + (pool_info.exclude_padding ? 0 : pool_pad_bottom);
66  const int upper_bound_d = src->info()->dimension(3) + (pool_info.exclude_padding ? 0 : pool_pad_back);
67 
68  const int input_dim_c = src->info()->dimension(0);
69  const int input_dim_w = src->info()->dimension(1);
70  const int input_dim_h = src->info()->dimension(2);
71  const int input_dim_d = src->info()->dimension(3);
72 
73  const int y_stride = static_cast<int>(src->info()->strides_in_bytes().y());
74  const int z_stride = static_cast<int>(src->info()->strides_in_bytes().z());
75  const int w_stride = static_cast<int>(src->info()->strides_in_bytes()[3]);
76  const int n_stride = static_cast<int>(src->info()->strides_in_bytes()[4]);
77 
78  const uint8_t *in_ptr_start = src->buffer() + src->info()->offset_first_element_in_bytes();
79 
80  const int window_end_x = input_dim_c;
81  const int window_start_x = 0;
82 
83  Iterator out(dst0, window_out);
84 
85  const float32x4_t half_scale_v = vdupq_n_f32(0.5f);
86  const UniformQuantizationInfo src_qinfo = src->info()->quantization_info().uniform();
87  const UniformQuantizationInfo dst_qinfo = dst0->info()->quantization_info().uniform();
88 
89  const float quant_rescale = dst_qinfo.scale / src_qinfo.scale;
90  // "new_offset" doesn't have to consider the "half_scale_v" in its computation
91  // With a requantization performed in a single step there won't be uncertainties introduced
92  const int32_t new_offset = dst_qinfo.offset - static_cast<int32_t>(static_cast<float>(src_qinfo.offset) / quant_rescale);
93 
94  execute_window_loop(window_out, [&](const Coordinates & id)
95  {
96  // Computing the theoretical input starting/ending points
97  const int in_idx_width = static_cast<int>(id.y()) * pool_stride_x - pool_pad_left;
98  const int in_idx_height = static_cast<int>(id.z()) * pool_stride_y - pool_pad_top;
99  const int in_idx_depth = static_cast<int>(id[3]) * pool_stride_z - pool_pad_front;
100 
101  const int pool_start_x = std::max(0, -in_idx_width);
102  const int pool_end_x_t = std::min(input_dim_w + pool_pad_left - in_idx_width, pool_size_x);
103  const int pool_start_y = std::max(0, -in_idx_height);
104  const int pool_end_y_t = std::min(input_dim_h + pool_pad_top - in_idx_height, pool_size_y);
105 
106  const int pool_start_z = std::max(0, -in_idx_depth);
107  const int pool_end_z_t = std::min(input_dim_d + pool_pad_front - in_idx_depth, pool_size_z);
108 
109  // The end of width to consider in calculation should exclude PAD_X, PAD_Y and PAD_Z
110  const int pool_end_x = std::min(pool_end_x_t, input_dim_w - in_idx_width);
111  const int pool_end_y = std::min(pool_end_y_t, input_dim_h - in_idx_height);
112  const int pool_end_z = std::min(pool_end_z_t, input_dim_d - in_idx_depth);
113 
114  // Calculate scale
115  const float scale = calculate_avg_scale_pool3d(pool_info.exclude_padding, id, pool_size_x, pool_size_y, pool_size_z, upper_bound_w, upper_bound_h, upper_bound_d, pool_pad_left,
116  pool_pad_top, pool_pad_front, pool_stride_x, pool_stride_y, pool_stride_z);
117 
118  const uint8_t *in_ptr_n = in_ptr_start + id[4] * n_stride;
119 
120  int x_off = window_start_x;
121 
122  for(; x_off <= (window_end_x - window_step_x); x_off += window_step_x) // C
123  {
124  q32x4_t vres1 = wrapper::vdup_n(static_cast<q32_t>(0.f), wrapper::traits::vector_128_tag{});
125  q32x4_t vres2 = wrapper::vdup_n(static_cast<q32_t>(0.f), wrapper::traits::vector_128_tag{});
126  q32x4_t vres3 = wrapper::vdup_n(static_cast<q32_t>(0.f), wrapper::traits::vector_128_tag{});
127  q32x4_t vres4 = wrapper::vdup_n(static_cast<q32_t>(0.f), wrapper::traits::vector_128_tag{});
128 
129  // Perform pooling
130  for(int z = pool_start_z; z < pool_end_z; ++z)
131  {
132  const uint8_t *in_ptr_z = in_ptr_n + (z + in_idx_depth) * w_stride;
133  for(int y = pool_start_y; y < pool_end_y; ++y)
134  {
135  const uint8_t *in_ptr_y = in_ptr_z + (y + in_idx_height) * z_stride;
136  for(int x = pool_start_x; x < pool_end_x; ++x)
137  {
138  const uint8_t *in_ptr_x = in_ptr_y + (x + in_idx_width) * y_stride;
139  const q8x16_t data = wrapper::vloadq(reinterpret_cast<const T *>(in_ptr_x) + x_off);
140 
141  const q16x8_t data_q16 = wrapper::vmovl(wrapper::vgetlow(data));
142  const q16x8_t data2_q16 = wrapper::vmovl(wrapper::vgethigh(data));
143  vres1 = wrapper::vadd(vres1, wrapper::vmovl(wrapper::vgetlow(data_q16)));
144  vres2 = wrapper::vadd(vres2, wrapper::vmovl(wrapper::vgethigh(data_q16)));
145  vres3 = wrapper::vadd(vres3, wrapper::vmovl(wrapper::vgetlow(data2_q16)));
146  vres4 = wrapper::vadd(vres4, wrapper::vmovl(wrapper::vgethigh(data2_q16)));
147  }
148  }
149  }
150 
151  if(src_qinfo != dst_qinfo)
152  {
153  const float32x4x4_t vres =
154  {
155  {
156  vcvtq_f32_q32(vres1),
157  vcvtq_f32_q32(vres2),
158  vcvtq_f32_q32(vres3),
159  vcvtq_f32_q32(vres4),
160  }
161  };
162  const auto requantized_dst = vrequantize_pooling_with_scale<q8x16_t>(vres, quant_rescale, scale, new_offset);
163  // Store result
164  wrapper::vstore(reinterpret_cast<T *>(out.ptr()) + x_off, wrapper::vgetlow(requantized_dst));
165  wrapper::vstore(reinterpret_cast<T *>(out.ptr()) + x_off + 8, wrapper::vgethigh(requantized_dst));
166  }
167  else
168  {
169  const float32x4_t scale_v = vdupq_n_f32(scale);
170  // Divide by scale and add 0.5f to round to nearest instead of rounding towards zero
171  vres1 = vcvtq_q32_f32<q32x4_t>(wrapper::vmla(half_scale_v, vcvtq_f32_q32(vres1), scale_v));
172  vres2 = vcvtq_q32_f32<q32x4_t>(wrapper::vmla(half_scale_v, vcvtq_f32_q32(vres2), scale_v));
173  vres3 = vcvtq_q32_f32<q32x4_t>(wrapper::vmla(half_scale_v, vcvtq_f32_q32(vres3), scale_v));
174  vres4 = vcvtq_q32_f32<q32x4_t>(wrapper::vmla(half_scale_v, vcvtq_f32_q32(vres4), scale_v));
175 
176  const q8x8_t res1 = wrapper::vmovn(wrapper::vcombine(wrapper::vmovn(vres1), wrapper::vmovn(vres2)));
177  const q8x8_t res2 = wrapper::vmovn(wrapper::vcombine(wrapper::vmovn(vres3), wrapper::vmovn(vres4)));
178  // Store result
179  wrapper::vstore(reinterpret_cast<T *>(out.ptr()) + x_off, res1);
180  wrapper::vstore(reinterpret_cast<T *>(out.ptr()) + x_off + 8, res2);
181  }
182  }
183 
184  // Left-overs loop
185  for(; x_off < window_end_x; ++x_off)
186  {
187  q32_t res = static_cast<q32_t>(0.f);
188 
189  // Perform pooling
190  for(int z = pool_start_z; z < pool_end_z; ++z)
191  {
192  const uint8_t *in_ptr_z = in_ptr_n + (z + in_idx_depth) * w_stride;
193  for(int y = pool_start_y; y < pool_end_y; ++y)
194  {
195  const uint8_t *in_ptr_y = in_ptr_z + (y + in_idx_height) * z_stride;
196  for(int x = pool_start_x; x < pool_end_x; ++x)
197  {
198  const uint8_t *in_ptr_x = in_ptr_y + (x + in_idx_width) * y_stride;
199  const T data = *(reinterpret_cast<const T *>(in_ptr_x) + x_off);
200  res += data;
201  }
202  }
203  }
204 
205  if(src_qinfo != dst_qinfo)
206  {
207  const float res_f = static_cast<float>(res);
208  const float new_scale = quant_rescale / scale;
209  const auto requantized_dst = quantize<T>(res_f, UniformQuantizationInfo(new_scale, new_offset));
210 
211  // Store result
212  *(reinterpret_cast<T *>(out.ptr()) + x_off) = requantized_dst;
213  }
214  else
215  {
216  // Divide by scale and add 0.5f to round to nearest instead of rounding towards zero
217  res = static_cast<T>(0.5f + static_cast<float>(res) * scale);
218 
219  // Store result
220  *(reinterpret_cast<T *>(out.ptr()) + x_off) = res;
221  }
222  }
223  },
224  out);
225 }
226 
227 template <typename T>
228 void max_poolingMxNxD_q8_neon_ndhwc(const ITensor *src, ITensor *dst0, Pooling3dLayerInfo &pool_info, const Window &window_out,
229  const int window_step_x)
230 
231 {
232  using q8x8_t = typename wrapper::traits::neon_vector<T, 8>::type;
233  using q8x16_t = typename wrapper::traits::neon_vector<T, 16>::type;
234 
235  const int window_half_step_x = window_step_x / 2;
236 
237  int pool_stride_x = static_cast<int>(pool_info.stride.width);
238  int pool_stride_y = static_cast<int>(pool_info.stride.height);
239  int pool_stride_z = static_cast<int>(pool_info.stride.depth);
240 
241  const int pool_size_x = pool_info.is_global_pooling ? src->info()->tensor_shape().y() : pool_info.pool_size.width;
242  const int pool_size_y = pool_info.is_global_pooling ? src->info()->tensor_shape().z() : pool_info.pool_size.height;
243  const int pool_size_z = pool_info.is_global_pooling ? src->info()->tensor_shape()[3] : pool_info.pool_size.depth;
244 
245  const int pool_pad_top = static_cast<int>(pool_info.padding.top);
246  const int pool_pad_left = static_cast<int>(pool_info.padding.left);
247  const int pool_pad_front = static_cast<int>(pool_info.padding.front);
248 
249  const int input_dim_c = src->info()->dimension(0);
250  const int input_dim_w = src->info()->dimension(1);
251  const int input_dim_h = src->info()->dimension(2);
252  const int input_dim_d = src->info()->dimension(3);
253 
254  const int y_stride = static_cast<int>(src->info()->strides_in_bytes().y());
255  const int z_stride = static_cast<int>(src->info()->strides_in_bytes().z());
256  const int w_stride = static_cast<int>(src->info()->strides_in_bytes()[3]);
257  const int n_stride = static_cast<int>(src->info()->strides_in_bytes()[4]);
258 
259  const uint8_t *in_ptr_start = src->buffer() + src->info()->offset_first_element_in_bytes();
260 
261  const int window_end_x = input_dim_c;
262  const int window_start_x = 0;
263 
264  Iterator out(dst0, window_out);
265 
266  const UniformQuantizationInfo src_qinfo = src->info()->quantization_info().uniform();
267  const UniformQuantizationInfo dst_qinfo = dst0->info()->quantization_info().uniform();
268 
269  const float requant_scale = dst_qinfo.scale / src_qinfo.scale;
270  const int32_t requant_offset = dst_qinfo.offset - static_cast<int32_t>(static_cast<float>(src_qinfo.offset) / requant_scale);
271  const UniformQuantizationInfo requant_qinfo = UniformQuantizationInfo(requant_scale, requant_offset);
272 
273  execute_window_loop(window_out, [&](const Coordinates & id)
274  {
275  // Computing the theoretical input starting/ending points
276  const int in_idx_width = static_cast<int>(id.y()) * pool_stride_x - pool_pad_left;
277  const int in_idx_height = static_cast<int>(id.z()) * pool_stride_y - pool_pad_top;
278  const int in_idx_depth = static_cast<int>(id[3]) * pool_stride_z - pool_pad_front;
279 
280  const int pool_start_x = std::max(0, -in_idx_width);
281  const int pool_end_x_t = std::min(input_dim_w + pool_pad_left - in_idx_width, pool_size_x);
282  const int pool_start_y = std::max(0, -in_idx_height);
283  const int pool_end_y_t = std::min(input_dim_h + pool_pad_top - in_idx_height, pool_size_y);
284 
285  const int pool_start_z = std::max(0, -in_idx_depth);
286  const int pool_end_z_t = std::min(input_dim_d + pool_pad_front - in_idx_depth, pool_size_z);
287 
288  // The end of width to consider in calculation should exclude PAD_X, PAD_Y and PAD_Z
289  const int pool_end_x = std::min(pool_end_x_t, input_dim_w - in_idx_width);
290  const int pool_end_y = std::min(pool_end_y_t, input_dim_h - in_idx_height);
291  const int pool_end_z = std::min(pool_end_z_t, input_dim_d - in_idx_depth);
292 
293  const uint8_t *in_ptr_n = in_ptr_start + id[4] * n_stride;
294 
295  int x_off = window_start_x;
296 
297  for(; x_off <= (window_end_x - window_step_x); x_off += window_step_x) // C
298  {
299  q8x16_t vres = wrapper::vdup_n(std::numeric_limits<T>::min(), wrapper::traits::vector_128_tag{});
300 
301  // Perform pooling
302  for(int z = pool_start_z; z < pool_end_z; ++z)
303  {
304  const uint8_t *in_ptr_z = in_ptr_n + (z + in_idx_depth) * w_stride;
305  for(int y = pool_start_y; y < pool_end_y; ++y)
306  {
307  const uint8_t *in_ptr_y = in_ptr_z + (y + in_idx_height) * z_stride;
308  for(int x = pool_start_x; x < pool_end_x; ++x)
309  {
310  const uint8_t *in_ptr_x = in_ptr_y + (x + in_idx_width) * y_stride;
311  const q8x16_t data = wrapper::vloadq(reinterpret_cast<const T *>(in_ptr_x) + x_off);
312 
313  vres = wrapper::vmax(vres, data);
314  }
315  }
316  }
317 
318  // Store result
319  wrapper::vstore(reinterpret_cast<T *>(out.ptr()) + x_off, (src_qinfo != dst_qinfo) ? vrequantize_pooling<q8x8_t, q8x16_t>(wrapper::vgetlow(vres), wrapper::vgethigh(vres),
320  requant_qinfo) :
321  vres);
322  }
323 
324  // Leftovers using half the window step
325  for(; x_off <= (window_end_x - window_half_step_x); x_off += window_half_step_x)
326  {
327  q8x8_t vres = wrapper::vdup_n(std::numeric_limits<T>::min(), wrapper::traits::vector_64_tag{});
328 
329  // Perform pooling
330  for(int z = pool_start_z; z < pool_end_z; ++z)
331  {
332  const uint8_t *in_ptr_z = in_ptr_n + (z + in_idx_depth) * w_stride;
333  for(int y = pool_start_y; y < pool_end_y; ++y)
334  {
335  const uint8_t *in_ptr_y = in_ptr_z + (y + in_idx_height) * z_stride;
336  for(int x = pool_start_x; x < pool_end_x; ++x)
337  {
338  const uint8_t *in_ptr_x = in_ptr_y + (x + in_idx_width) * y_stride;
339  const q8x8_t data = wrapper::vload(reinterpret_cast<const T *>(in_ptr_x) + x_off);
340 
341  vres = wrapper::vmax(vres, data);
342  }
343  }
344  }
345 
346  // Store result
347  wrapper::vstore(reinterpret_cast<T *>(out.ptr()) + x_off,
348  (src_qinfo != dst_qinfo) ? vrequantize_pooling<q8x8_t>(vres, requant_qinfo) : vres);
349  }
350 
351  // Left-overs loop
352  for(; x_off < window_end_x; ++x_off)
353  {
354  T res = std::numeric_limits<T>::min();
355 
356  for(int z = pool_start_z; z < pool_end_z; ++z)
357  {
358  const uint8_t *in_ptr_z = in_ptr_n + (z + in_idx_depth) * w_stride;
359  for(int y = pool_start_y; y < pool_end_y; ++y)
360  {
361  const uint8_t *in_ptr_y = in_ptr_z + (y + in_idx_height) * z_stride;
362  for(int x = pool_start_x; x < pool_end_x; ++x)
363  {
364  const uint8_t *in_ptr_x = in_ptr_y + (x + in_idx_width) * y_stride;
365  const T data = *(reinterpret_cast<const T *>(in_ptr_x) + x_off);
366 
367  res = std::max(res, data);
368  }
369  }
370  }
371 
372  // Store result
373  if(src_qinfo != dst_qinfo)
374  {
375  const float res_f = static_cast<float>(res);
376  *(reinterpret_cast<T *>(out.ptr()) + x_off) = quantize<T>(res_f, requant_qinfo);
377  }
378  else
379  {
380  *(reinterpret_cast<T *>(out.ptr()) + x_off) = res;
381  }
382  }
383  },
384  out);
385 }
386 
387 } // namespace cpu
388 } // namespace arm_compute
389 
390 #endif // SRC_CORE_NEON_KERNELS_POOL3D_QUANTIZED_H
uint32x2_t vmovn(const uint64x2_t &a)
Definition: movn.h:39
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
uint8x16_t vloadq(const uint8_t *ptr)
Definition: load.h:58
uint8x8_t vadd(const uint8x8_t &a, const uint8x8_t &b)
Definition: add.h:39
Quantization info when assuming per layer quantization.
Interface for CPU tensor.
Definition: ITensor.h:36
SimpleTensor< float > src
Definition: DFT.cpp:155
Copyright (c) 2017-2022 Arm Limited.
typename promote< T >::type promote_t
Get promoted type.
Definition: traits.h:147
size_t front
Padding across the depth dimenstion on the front, in elements.
Definition: Types.h:820
Create the appropriate SIMD vector given its type and size in terms of elements.
Definition: traits.h:48
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
T z() const
Alias to access the size of the third dimension.
Definition: Dimensions.h:97
size_t height
Height of the 3D shape or object.
Definition: Size3D.h:93
void max_poolingMxNxD_q8_neon_ndhwc(const ITensor *src, ITensor *dst0, Pooling3dLayerInfo &pool_info, const Window &window_out, const int window_step_x)
Definition: quantized.h:228
Coordinates of an item.
Definition: Coordinates.h:37
virtual uint8_t * buffer() const =0
Interface to be implemented by the child class to return a pointer to CPU memory. ...
Pooling Layer Information struct.
Definition: Types.h:1295
UniformQuantizationInfo uniform() const
Return per layer quantization info.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.
size_t top
Padding across the height dimenstion on the top, in elements.
Definition: Types.h:818
constexpr uint8_t * ptr() const
Return a pointer to the current pixel.
Definition: Helpers.inl:139
uint8x8_t vgetlow(const uint8x16_t val)
Definition: getlow.h:39
size_t left
Padding across the width dimenstion on the left, in elements.
Definition: Types.h:816
size_t width
Width of the 3D shape or object.
Definition: Size3D.h:92
uint8x16_t vcombine(const uint8x8_t &a, const uint8x8_t &b)
Definition: combine.h:39
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
size_t back
Padding across the depth dimenstion on the back, in elements.
Definition: Types.h:821
uint8x8_t vgethigh(const uint8x16_t val)
Definition: gethigh.h:39
virtual size_t offset_first_element_in_bytes() const =0
The offset from the beginning of the memory allocation to the first element of the tensor...
size_t right
Padding across the width dimenstion on the right, in elements.
Definition: Types.h:817
void avg_poolingMxNxD_q8_neon_ndhwc(const ITensor *src, ITensor *dst0, Pooling3dLayerInfo &pool_info, const Window &window_out, const int window_step_x)
Definition: quantized.h:38
size_t depth
Depth of the 3D shape or object.
Definition: Size3D.h:94
uint8x8_t vload(const uint8_t *ptr)
Definition: load.h:39
void vstore(uint8_t *ptr, uint8x8_t val)
Definition: store.h:39
uint8x8_t vdup_n(uint8_t value, traits::vector_64_tag)
Definition: dup_n.h:41
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
Definition: Helpers.inl:77
T y() const
Alias to access the size of the second dimension.
Definition: Dimensions.h:92
Includes all wrapper headers at once.
virtual const Strides & strides_in_bytes() const =0
The strides in bytes for accessing each dimension of the tensor.
uint8x8_t vmla(const uint8x8_t &a, const uint8x8_t &b, const uint8x8_t &c)
Definition: mla.h:46
Iterator updated by execute_window_loop for each window element.
Definition: Helpers.h:46
uint16x8_t vmovl(const uint8x8_t &a)
Definition: movl.h:39
uint8x8_t vmax(const uint8x8_t &a, const uint8x8_t &b)
Definition: max.h:39
Describe a multidimensional execution window.
Definition: Window.h:39
size_t bottom
Padding across the height dimenstion on the bottom, in elements.
Definition: Types.h:819