Compute Library
 22.11
impl.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
29 
31 
32 namespace arm_compute
33 {
34 namespace cpu
35 {
36 namespace
37 {
38 template <typename T>
39 void max_poolingMxNxD_fp_neon_ndhwc(const ITensor *src, ITensor *dst0, Pooling3dLayerInfo &pool_info, const Window &window_out,
40  const int window_start_x, const int window_end_x, const int window_step_x)
41 
42 {
43  using vtype = wrapper::traits::neon_bitvector<T, wrapper::traits::BitWidth::W128>;
44  using vector_type = typename vtype::type;
45  using tag_type = typename vtype::tag_type;
46 
47  int pool_stride_x = static_cast<int>(pool_info.stride.width);
48  int pool_stride_y = static_cast<int>(pool_info.stride.height);
49  int pool_stride_z = static_cast<int>(pool_info.stride.depth);
50 
51  const int pool_size_x = pool_info.is_global_pooling ? src->info()->tensor_shape().y() : pool_info.pool_size.width;
52  const int pool_size_y = pool_info.is_global_pooling ? src->info()->tensor_shape().z() : pool_info.pool_size.height;
53  const int pool_size_z = pool_info.is_global_pooling ? src->info()->tensor_shape()[3] : pool_info.pool_size.depth;
54 
55  const int pool_pad_top = static_cast<int>(pool_info.padding.top);
56  const int pool_pad_left = static_cast<int>(pool_info.padding.left);
57  const int pool_pad_front = static_cast<int>(pool_info.padding.front);
58 
59  const int input_dim_w = src->info()->dimension(1);
60  const int input_dim_h = src->info()->dimension(2);
61  const int input_dim_d = src->info()->dimension(3);
62 
63  const int y_stride = static_cast<int>(src->info()->strides_in_bytes().y());
64  const int z_stride = static_cast<int>(src->info()->strides_in_bytes().z());
65  const int w_stride = static_cast<int>(src->info()->strides_in_bytes()[3]);
66  const int n_stride = static_cast<int>(src->info()->strides_in_bytes()[4]);
67 
68  const uint8_t *in_ptr_start = src->buffer() + src->info()->offset_first_element_in_bytes();
69 
70  Iterator out(dst0, window_out);
71 
72  vector_type vres;
73  execute_window_loop(window_out, [&](const Coordinates & id)
74  {
75  // Computing the theoretical input starting/ending points
76  const int in_idx_width = static_cast<int>(id.y()) * pool_stride_x - pool_pad_left;
77  const int in_idx_height = static_cast<int>(id.z()) * pool_stride_y - pool_pad_top;
78  const int in_idx_depth = static_cast<int>(id[3]) * pool_stride_z - pool_pad_front;
79 
80  const int pool_start_x = std::max(0, -in_idx_width);
81  const int pool_end_x_t = std::min(input_dim_w + pool_pad_left - in_idx_width, pool_size_x);
82  const int pool_start_y = std::max(0, -in_idx_height);
83  const int pool_end_y_t = std::min(input_dim_h + pool_pad_top - in_idx_height, pool_size_y);
84 
85  const int pool_start_z = std::max(0, -in_idx_depth);
86  const int pool_end_z_t = std::min(input_dim_d + pool_pad_front - in_idx_depth, pool_size_z);
87 
88  // The end of width to consider in calculation should exclude PAD_X, PAD_Y and PAD_Z
89  const int pool_end_x = std::min(pool_end_x_t, input_dim_w - in_idx_width);
90  const int pool_end_y = std::min(pool_end_y_t, input_dim_h - in_idx_height);
91  const int pool_end_z = std::min(pool_end_z_t, input_dim_d - in_idx_depth);
92 
93  const uint8_t *in_ptr_n = in_ptr_start + id[4] * n_stride;
94 
95  int x_off = window_start_x;
96 
97  for(; x_off <= (window_end_x - window_step_x); x_off += window_step_x) // C
98  {
99  vres = wrapper::vdup_n(static_cast<T>(-std::numeric_limits<float>::infinity()), tag_type());
100  for(int z = pool_start_z; z < pool_end_z; ++z)
101  {
102  const uint8_t *in_ptr_z = in_ptr_n + (z + in_idx_depth) * w_stride;
103  for(int y = pool_start_y; y < pool_end_y; ++y)
104  {
105  const uint8_t *in_ptr_y = in_ptr_z + (y + in_idx_height) * z_stride;
106  for(int x = pool_start_x; x < pool_end_x; ++x)
107  {
108  const uint8_t *in_ptr_x = in_ptr_y + (x + in_idx_width) * y_stride;
109  const vector_type data = wrapper::vloadq(reinterpret_cast<const T *>(in_ptr_x) + x_off);
110  vres = wrapper::vmax(vres, data);
111  }
112  }
113  }
114  // Store result
115  wrapper::vstore(reinterpret_cast<T *>(out.ptr()) + x_off, vres);
116  }
117 
118  // Left-overs loop
119  for(; x_off < window_end_x; ++x_off)
120  {
121  T res(0);
122  res = -std::numeric_limits<float>::infinity();
123  for(int z = pool_start_z; z < pool_end_z; ++z)
124  {
125  const uint8_t *in_ptr_z = in_ptr_n + (z + in_idx_depth) * w_stride;
126  for(int y = pool_start_y; y < pool_end_y; ++y)
127  {
128  const uint8_t *in_ptr_y = in_ptr_z + (y + in_idx_height) * z_stride;
129  for(int x = pool_start_x; x < pool_end_x; ++x)
130  {
131  const uint8_t *in_ptr_x = in_ptr_y + (x + in_idx_width) * y_stride;
132  const T data = *(reinterpret_cast<const T *>(in_ptr_x) + x_off);
133  res = std::max(res, data);
134  }
135  }
136  }
137  // Store result
138  *(reinterpret_cast<T *>(out.ptr()) + x_off) = res;
139  }
140  },
141  out);
142 }
143 
144 template <typename T>
145 void avg_poolingMxNxD_fp_neon_ndhwc(const ITensor *src, ITensor *dst0, Pooling3dLayerInfo &pool_info,
146  const Window &window_out, const int window_start_x, const int window_end_x, const int window_step_x)
147 {
148  using vtype = wrapper::traits::neon_bitvector<T, wrapper::traits::BitWidth::W128>;
149  using vector_type = typename vtype::type;
150  using tag_type = typename vtype::tag_type;
151 
152  int pool_stride_x = static_cast<int>(pool_info.stride.width);
153  int pool_stride_y = static_cast<int>(pool_info.stride.height);
154  int pool_stride_z = static_cast<int>(pool_info.stride.depth);
155 
156  const int pool_size_x = pool_info.is_global_pooling ? src->info()->tensor_shape().y() : pool_info.pool_size.width;
157  const int pool_size_y = pool_info.is_global_pooling ? src->info()->tensor_shape().z() : pool_info.pool_size.height;
158  const int pool_size_z = pool_info.is_global_pooling ? src->info()->tensor_shape()[3] : pool_info.pool_size.depth;
159 
160  const int pool_pad_top = static_cast<int>(pool_info.padding.top);
161  const int pool_pad_bottom = static_cast<int>(pool_info.padding.bottom);
162  const int pool_pad_left = static_cast<int>(pool_info.padding.left);
163  const int pool_pad_right = static_cast<int>(pool_info.padding.right);
164  const int pool_pad_front = static_cast<int>(pool_info.padding.front);
165  const int pool_pad_back = static_cast<int>(pool_info.padding.back);
166 
167  const int upper_bound_w = src->info()->dimension(1) + (pool_info.exclude_padding ? 0 : pool_pad_right);
168  const int upper_bound_h = src->info()->dimension(2) + (pool_info.exclude_padding ? 0 : pool_pad_bottom);
169  const int upper_bound_d = src->info()->dimension(3) + (pool_info.exclude_padding ? 0 : pool_pad_back);
170 
171  const int input_dim_w = src->info()->dimension(1);
172  const int input_dim_h = src->info()->dimension(2);
173  const int input_dim_d = src->info()->dimension(3);
174 
175  const int y_stride = static_cast<int>(src->info()->strides_in_bytes().y());
176  const int z_stride = static_cast<int>(src->info()->strides_in_bytes().z());
177  const int w_stride = static_cast<int>(src->info()->strides_in_bytes()[3]);
178  const int n_stride = static_cast<int>(src->info()->strides_in_bytes()[4]);
179 
180  const uint8_t *in_ptr_start = src->buffer() + src->info()->offset_first_element_in_bytes();
181 
182  Iterator out(dst0, window_out);
183 
184  vector_type vres;
185  execute_window_loop(window_out, [&](const Coordinates & id)
186  {
187  // Computing the theoretical input starting/ending points
188  const int in_idx_width = static_cast<int>(id.y()) * pool_stride_x - pool_pad_left;
189  const int in_idx_height = static_cast<int>(id.z()) * pool_stride_y - pool_pad_top;
190  const int in_idx_depth = static_cast<int>(id[3]) * pool_stride_z - pool_pad_front;
191 
192  const int pool_start_x = std::max(0, -in_idx_width);
193  const int pool_end_x_t = std::min(input_dim_w + pool_pad_left - in_idx_width, pool_size_x);
194  const int pool_start_y = std::max(0, -in_idx_height);
195  const int pool_end_y_t = std::min(input_dim_h + pool_pad_top - in_idx_height, pool_size_y);
196 
197  const int pool_start_z = std::max(0, -in_idx_depth);
198  const int pool_end_z_t = std::min(input_dim_d + pool_pad_front - in_idx_depth, pool_size_z);
199 
200  // The end of width to consider in calculation should exclude PAD_X, PAD_Y and PAD_Z
201  const int pool_end_x = std::min(pool_end_x_t, input_dim_w - in_idx_width);
202  const int pool_end_y = std::min(pool_end_y_t, input_dim_h - in_idx_height);
203  const int pool_end_z = std::min(pool_end_z_t, input_dim_d - in_idx_depth);
204 
205  const uint8_t *in_ptr_n = in_ptr_start + id[4] * n_stride;
206 
207  // Calculate scale
208  const float scale = calculate_avg_scale_pool3d(pool_info.exclude_padding, id, pool_size_x, pool_size_y, pool_size_z, upper_bound_w, upper_bound_h, upper_bound_d, pool_pad_left,
209  pool_pad_top, pool_pad_front, pool_stride_x,
210  pool_stride_y, pool_stride_z);
211  const vector_type scale_v = wrapper::vdup_n(static_cast<T>(scale), tag_type());
212 
213  int x_off = window_start_x;
214 
215  for(; x_off <= (window_end_x - window_step_x); x_off += window_step_x) // C
216  {
217  // Perform pooling
218  vres = wrapper::vdup_n(static_cast<T>(0.0f), tag_type());
219  for(int z = pool_start_z; z < pool_end_z; ++z)
220  {
221  const uint8_t *in_ptr_z = in_ptr_n + (z + in_idx_depth) * w_stride;
222  for(int y = pool_start_y; y < pool_end_y; ++y)
223  {
224  const uint8_t *in_ptr_y = in_ptr_z + (y + in_idx_height) * z_stride;
225  for(int x = pool_start_x; x < pool_end_x; ++x)
226  {
227  const uint8_t *in_ptr_x = in_ptr_y + (x + in_idx_width) * y_stride;
228  const vector_type data = wrapper::vloadq(reinterpret_cast<const T *>(in_ptr_x) + x_off);
229  vres = wrapper::vadd(vres, data);
230  }
231  }
232  }
233 
234  // Divide by scale
235  vres = wrapper::vmul(vres, scale_v);
236 
237  // Store result
238  wrapper::vstore(reinterpret_cast<T *>(out.ptr()) + x_off, vres);
239  }
240 
241  // Left-overs loop
242  for(; x_off < window_end_x; ++x_off)
243  {
244  T res(0);
245 
246  for(int z = pool_start_z; z < pool_end_z; ++z)
247  {
248  const uint8_t *in_ptr_z = in_ptr_n + (z + in_idx_depth) * w_stride;
249  for(int y = pool_start_y; y < pool_end_y; ++y)
250  {
251  const uint8_t *in_ptr_y = in_ptr_z + (y + in_idx_height) * z_stride;
252  for(int x = pool_start_x; x < pool_end_x; ++x)
253  {
254  const uint8_t *in_ptr_x = in_ptr_y + (x + in_idx_width) * y_stride;
255  const T data = *(reinterpret_cast<const T *>(in_ptr_x) + x_off);
256  res += data;
257  }
258  }
259  }
260 
261  // Divide by scale
262  res *= scale;
263 
264  // Store result
265  *(reinterpret_cast<T *>(out.ptr()) + x_off) = res;
266  }
267  },
268  out);
269 }
270 
271 template <typename T>
272 void l2_poolingMxNxD_fp_neon_ndhwc(const ITensor *src, ITensor *dst0, Pooling3dLayerInfo &pool_info,
273  const Window &window_out, const int window_start_x, const int window_end_x, const int window_step_x)
274 {
275  using vtype = wrapper::traits::neon_bitvector<T, wrapper::traits::BitWidth::W128>;
276  using vector_type = typename vtype::type;
277  using tag_type = typename vtype::tag_type;
278 
279  int pool_stride_x = static_cast<int>(pool_info.stride.width);
280  int pool_stride_y = static_cast<int>(pool_info.stride.height);
281  int pool_stride_z = static_cast<int>(pool_info.stride.depth);
282 
283  const int pool_size_x = pool_info.is_global_pooling ? src->info()->tensor_shape().y() : pool_info.pool_size.width;
284  const int pool_size_y = pool_info.is_global_pooling ? src->info()->tensor_shape().z() : pool_info.pool_size.height;
285  const int pool_size_z = pool_info.is_global_pooling ? src->info()->tensor_shape()[3] : pool_info.pool_size.depth;
286 
287  const int pool_pad_top = static_cast<int>(pool_info.padding.top);
288  const int pool_pad_bottom = static_cast<int>(pool_info.padding.bottom);
289  const int pool_pad_left = static_cast<int>(pool_info.padding.left);
290  const int pool_pad_right = static_cast<int>(pool_info.padding.right);
291  const int pool_pad_front = static_cast<int>(pool_info.padding.front);
292  const int pool_pad_back = static_cast<int>(pool_info.padding.back);
293 
294  const int upper_bound_w = src->info()->dimension(1) + (pool_info.exclude_padding ? 0 : pool_pad_right);
295  const int upper_bound_h = src->info()->dimension(2) + (pool_info.exclude_padding ? 0 : pool_pad_bottom);
296  const int upper_bound_d = src->info()->dimension(3) + (pool_info.exclude_padding ? 0 : pool_pad_back);
297 
298  const int input_dim_w = src->info()->dimension(1);
299  const int input_dim_h = src->info()->dimension(2);
300  const int input_dim_d = src->info()->dimension(3);
301 
302  const int y_stride = static_cast<int>(src->info()->strides_in_bytes().y());
303  const int z_stride = static_cast<int>(src->info()->strides_in_bytes().z());
304  const int w_stride = static_cast<int>(src->info()->strides_in_bytes()[3]);
305  const int n_stride = static_cast<int>(src->info()->strides_in_bytes()[4]);
306 
307  const uint8_t *in_ptr_start = src->buffer() + src->info()->offset_first_element_in_bytes();
308 
309  Iterator out(dst0, window_out);
310 
311  vector_type vres;
312  execute_window_loop(window_out, [&](const Coordinates & id)
313  {
314  // Computing the theoretical input starting/ending points
315  const int in_idx_width = static_cast<int>(id.y()) * pool_stride_x - pool_pad_left;
316  const int in_idx_height = static_cast<int>(id.z()) * pool_stride_y - pool_pad_top;
317  const int in_idx_depth = static_cast<int>(id[3]) * pool_stride_z - pool_pad_front;
318 
319  const int pool_start_x = std::max(0, -in_idx_width);
320  const int pool_end_x_t = std::min(input_dim_w + pool_pad_left - in_idx_width, pool_size_x);
321  const int pool_start_y = std::max(0, -in_idx_height);
322  const int pool_end_y_t = std::min(input_dim_h + pool_pad_top - in_idx_height, pool_size_y);
323 
324  const int pool_start_z = std::max(0, -in_idx_depth);
325  const int pool_end_z_t = std::min(input_dim_d + pool_pad_front - in_idx_depth, pool_size_z);
326 
327  // The end of width to consider in calculation should exclude PAD_X, PAD_Y and PAD_Z
328  const int pool_end_x = std::min(pool_end_x_t, input_dim_w - in_idx_width);
329  const int pool_end_y = std::min(pool_end_y_t, input_dim_h - in_idx_height);
330  const int pool_end_z = std::min(pool_end_z_t, input_dim_d - in_idx_depth);
331 
332  const uint8_t *in_ptr_n = in_ptr_start + id[4] * n_stride;
333 
334  // Calculate scale
335  const float scale = calculate_avg_scale_pool3d(pool_info.exclude_padding, id, pool_size_x, pool_size_y, pool_size_z, upper_bound_w, upper_bound_h, upper_bound_d, pool_pad_left,
336  pool_pad_top, pool_pad_front, pool_stride_x,
337  pool_stride_y, pool_stride_z);
338 
339  int x_off = window_start_x;
340 
341  for(; x_off <= (window_end_x - window_step_x); x_off += window_step_x) // C
342  {
343  // Perform pooling
344  vres = wrapper::vdup_n(static_cast<T>(0.0f), tag_type());
345  for(int z = pool_start_z; z < pool_end_z; ++z)
346  {
347  const uint8_t *in_ptr_z = in_ptr_n + (z + in_idx_depth) * w_stride;
348  for(int y = pool_start_y; y < pool_end_y; ++y)
349  {
350  const uint8_t *in_ptr_y = in_ptr_z + (y + in_idx_height) * z_stride;
351  for(int x = pool_start_x; x < pool_end_x; ++x)
352  {
353  const uint8_t *in_ptr_x = in_ptr_y + (x + in_idx_width) * y_stride;
354  const vector_type data = wrapper::vloadq(reinterpret_cast<const T *>(in_ptr_x) + x_off);
355  vres = wrapper::vmla(vres, data, data);
356  }
357  }
358  }
359 
360  const vector_type scale_v = wrapper::vdup_n(static_cast<T>(scale), tag_type());
361 
362  // Divide by scale
363  vres = wrapper::vmul(vres, scale_v);
364 
365  // Calculate square-root
366  vres = wrapper::vinv(wrapper::vinvsqrt(vres));
367 
368  // Store result
369  wrapper::vstore(reinterpret_cast<T *>(out.ptr()) + x_off, vres);
370  }
371 
372  // Left-overs loop
373  for(; x_off < window_end_x; ++x_off)
374  {
375  T res(0);
376 
377  for(int z = pool_start_z; z < pool_end_z; ++z)
378  {
379  const uint8_t *in_ptr_z = in_ptr_n + (z + in_idx_depth) * w_stride;
380  for(int y = pool_start_y; y < pool_end_y; ++y)
381  {
382  const uint8_t *in_ptr_y = in_ptr_z + (y + in_idx_height) * z_stride;
383  for(int x = pool_start_x; x < pool_end_x; ++x)
384  {
385  const uint8_t *in_ptr_x = in_ptr_y + (x + in_idx_width) * y_stride;
386  const T data = *(reinterpret_cast<const T *>(in_ptr_x) + x_off);
387  res += data * data;
388  }
389  }
390  }
391 
392  // Divide by scale
393  res *= scale;
394 
395  // Square root
396  res = std::sqrt(res);
397 
398  // Store result
399  *(reinterpret_cast<T *>(out.ptr()) + x_off) = res;
400  }
401  },
402  out);
403 }
404 } // namespace
405 
406 template <typename T>
407 void poolingMxNxD_fp_neon_ndhwc(const ITensor *src, ITensor *dst0, Pooling3dLayerInfo &pool_info, const Window &window)
408 {
409  const int window_start_x = window.x().start();
410  const int window_end_x = window.x().end();
411  constexpr int window_step_x = 16 / sizeof(T);
412  Window window_out = window;
413 
414  // Needed to handle loop left-over
415  window_out.set(Window::DimX, Window::Dimension(0, 1, 1));
416 
417  switch(pool_info.pool_type)
418  {
419  case PoolingType::MAX:
420  max_poolingMxNxD_fp_neon_ndhwc<T>(src, dst0, pool_info, window_out, window_start_x, window_end_x, window_step_x);
421  break;
422  case PoolingType::AVG:
423  avg_poolingMxNxD_fp_neon_ndhwc<T>(src, dst0, pool_info, window_out, window_start_x, window_end_x, window_step_x);
424  break;
425  case PoolingType::L2:
426  l2_poolingMxNxD_fp_neon_ndhwc<T>(src, dst0, pool_info, window_out, window_start_x, window_end_x, window_step_x);
427  break;
428  default:
429  ARM_COMPUTE_ERROR("Pool operation not supported");
430  }
431 }
432 
433 template <typename T>
434 void poolingMxNxD_q8_neon_ndhwc(const ITensor *src, ITensor *dst0, Pooling3dLayerInfo &pool_info, const Window &window)
435 {
436  constexpr int window_step_x = 16;
437  Window window_out = window;
438 
439  // Needed to handle loop left-over
440  window_out.set(Window::DimX, Window::Dimension(0, 1, 1));
441 
442  switch(pool_info.pool_type)
443  {
444  case PoolingType::MAX:
445  max_poolingMxNxD_q8_neon_ndhwc<T>(src, dst0, pool_info, window_out, window_step_x);
446  break;
447  case PoolingType::AVG:
448  avg_poolingMxNxD_q8_neon_ndhwc<T>(src, dst0, pool_info, window_out, window_step_x);
449  break;
450  default:
451  ARM_COMPUTE_ERROR("Pool operation not supported");
452  }
453 }
454 
455 template void poolingMxNxD_fp_neon_ndhwc<float>(const ITensor *src, ITensor *dst0, Pooling3dLayerInfo &pool_info, const Window &window);
456 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
457 template void poolingMxNxD_fp_neon_ndhwc<float16_t>(const ITensor *src, ITensor *dst0, Pooling3dLayerInfo &pool_info, const Window &window);
458 #endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */
459 template void poolingMxNxD_q8_neon_ndhwc<uint8_t>(const ITensor *src, ITensor *dst0, Pooling3dLayerInfo &pool_info, const Window &window);
460 template void poolingMxNxD_q8_neon_ndhwc<int8_t>(const ITensor *src, ITensor *dst0, Pooling3dLayerInfo &pool_info, const Window &window);
461 } // namespace cpu
462 } // namespace arm_compute
void poolingMxNxD_fp_neon_ndhwc(const ITensor *src, ITensor *dst0, Pooling3dLayerInfo &pool_info, const Window &window)
Definition: impl.cpp:407
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352
void poolingMxNxD_q8_neon_ndhwc(const ITensor *src, ITensor *dst0, Pooling3dLayerInfo &pool_info, const Window &window)
Definition: impl.cpp:434
template void poolingMxNxD_fp_neon_ndhwc< float >(const ITensor *src, ITensor *dst0, Pooling3dLayerInfo &pool_info, const Window &window)
float32x2_t vinvsqrt(const float32x2_t &a)
Definition: invsqrt.h:47
uint8x16_t vloadq(const uint8_t *ptr)
Definition: load.h:58
uint8x8_t vadd(const uint8x8_t &a, const uint8x8_t &b)
Definition: add.h:39
template void poolingMxNxD_q8_neon_ndhwc< int8_t >(const ITensor *src, ITensor *dst0, Pooling3dLayerInfo &pool_info, const Window &window)
float32x2_t vinv(const float32x2_t &a)
Definition: inv.h:47
Describe one of the image&#39;s dimensions with a start, end and step.
Definition: Window.h:79
decltype(strategy::transforms) typedef type
Interface for CPU tensor.
Definition: ITensor.h:36
SimpleTensor< float > src
Definition: DFT.cpp:155
Copyright (c) 2017-2022 Arm Limited.
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Definition: Window.h:43
Pooling Layer Information struct.
Definition: Types.h:1295
template void poolingMxNxD_q8_neon_ndhwc< uint8_t >(const ITensor *src, ITensor *dst0, Pooling3dLayerInfo &pool_info, const Window &window)
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
Definition: Window.inl:49
uint8x8_t vmul(const uint8x8_t &a, const uint8x8_t &b)
Definition: mul.h:39
void vstore(uint8_t *ptr, uint8x8_t val)
Definition: store.h:39
uint8x8_t vdup_n(uint8_t value, traits::vector_64_tag)
Definition: dup_n.h:41
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
Definition: Helpers.inl:77
uint8x8_t vmla(const uint8x8_t &a, const uint8x8_t &b, const uint8x8_t &c)
Definition: mla.h:46
constexpr int end() const
Return the end of the dimension.
Definition: Window.h:102
uint8x8_t vmax(const uint8x8_t &a, const uint8x8_t &b)
Definition: max.h:39
constexpr int start() const
Return the start of the dimension.
Definition: Window.h:97
Describe a multidimensional execution window.
Definition: Window.h:39
constexpr const Dimension & x() const
Alias to access the first dimension of the window.
Definition: Window.h:159