24 #ifndef SRC_CORE_POOLING_3D_LAYER_IMPL_H
25 #define SRC_CORE_POOLING_3D_LAYER_IMPL_H
41 void max_poolingMxNxD_fp_neon_ndhwc(
const ITensor *
src,
43 Pooling3dLayerInfo &pool_info,
44 const Window &window_out,
45 const int window_start_x,
46 const int window_end_x,
47 const int window_step_x)
50 using vtype = wrapper::traits::neon_bitvector<T, wrapper::traits::BitWidth::W128>;
52 using tag_type =
typename vtype::tag_type;
54 int pool_stride_x =
static_cast<int>(pool_info.stride.width);
55 int pool_stride_y =
static_cast<int>(pool_info.stride.height);
56 int pool_stride_z =
static_cast<int>(pool_info.stride.depth);
58 const int pool_size_x = pool_info.is_global_pooling ?
src->info()->tensor_shape().y() : pool_info.pool_size.width;
59 const int pool_size_y = pool_info.is_global_pooling ?
src->info()->tensor_shape().z() : pool_info.pool_size.height;
60 const int pool_size_z = pool_info.is_global_pooling ?
src->info()->tensor_shape()[3] : pool_info.pool_size.depth;
62 const int pool_pad_top =
static_cast<int>(pool_info.padding.top);
63 const int pool_pad_left =
static_cast<int>(pool_info.padding.left);
64 const int pool_pad_front =
static_cast<int>(pool_info.padding.front);
66 const int input_dim_w =
src->info()->dimension(1);
67 const int input_dim_h =
src->info()->dimension(2);
68 const int input_dim_d =
src->info()->dimension(3);
70 const int y_stride =
static_cast<int>(
src->info()->strides_in_bytes().y());
71 const int z_stride =
static_cast<int>(
src->info()->strides_in_bytes().z());
72 const int w_stride =
static_cast<int>(
src->info()->strides_in_bytes()[3]);
73 const int n_stride =
static_cast<int>(
src->info()->strides_in_bytes()[4]);
75 const uint8_t *in_ptr_start =
src->buffer() +
src->info()->offset_first_element_in_bytes();
77 Iterator out(dst0, window_out);
82 [&](
const Coordinates &
id)
85 const int in_idx_width =
static_cast<int>(
id.y()) * pool_stride_x - pool_pad_left;
86 const int in_idx_height =
static_cast<int>(
id.z()) * pool_stride_y - pool_pad_top;
87 const int in_idx_depth =
static_cast<int>(
id[3]) * pool_stride_z - pool_pad_front;
89 const int pool_start_x = std::max(0, -in_idx_width);
90 const int pool_end_x_t = std::min(input_dim_w + pool_pad_left - in_idx_width, pool_size_x);
91 const int pool_start_y = std::max(0, -in_idx_height);
92 const int pool_end_y_t = std::min(input_dim_h + pool_pad_top - in_idx_height, pool_size_y);
94 const int pool_start_z = std::max(0, -in_idx_depth);
95 const int pool_end_z_t = std::min(input_dim_d + pool_pad_front - in_idx_depth, pool_size_z);
98 const int pool_end_x = std::min(pool_end_x_t, input_dim_w - in_idx_width);
99 const int pool_end_y = std::min(pool_end_y_t, input_dim_h - in_idx_height);
100 const int pool_end_z = std::min(pool_end_z_t, input_dim_d - in_idx_depth);
102 const uint8_t *in_ptr_n = in_ptr_start +
id[4] * n_stride;
104 int x_off = window_start_x;
106 for (; x_off <= (window_end_x - window_step_x); x_off += window_step_x)
108 vres =
wrapper::vdup_n(
static_cast<T
>(-std::numeric_limits<float>::infinity()), tag_type());
109 for (
int z = pool_start_z; z < pool_end_z; ++z)
111 const uint8_t *in_ptr_z = in_ptr_n + (z + in_idx_depth) * w_stride;
112 for (
int y = pool_start_y; y < pool_end_y; ++y)
114 const uint8_t *in_ptr_y = in_ptr_z + (y + in_idx_height) * z_stride;
115 for (
int x = pool_start_x; x < pool_end_x; ++x)
117 const uint8_t *in_ptr_x = in_ptr_y + (x + in_idx_width) * y_stride;
118 const vector_type data =
wrapper::vloadq(
reinterpret_cast<const T *
>(in_ptr_x) + x_off);
128 for (; x_off < window_end_x; ++x_off)
131 res = -std::numeric_limits<float>::infinity();
132 for (
int z = pool_start_z; z < pool_end_z; ++z)
134 const uint8_t *in_ptr_z = in_ptr_n + (z + in_idx_depth) * w_stride;
135 for (
int y = pool_start_y; y < pool_end_y; ++y)
137 const uint8_t *in_ptr_y = in_ptr_z + (y + in_idx_height) * z_stride;
138 for (
int x = pool_start_x; x < pool_end_x; ++x)
140 const uint8_t *in_ptr_x = in_ptr_y + (x + in_idx_width) * y_stride;
141 const T data = *(
reinterpret_cast<const T *
>(in_ptr_x) + x_off);
142 res = std::max(res, data);
147 *(
reinterpret_cast<T *
>(out.ptr()) + x_off) = res;
153 template <
typename T>
154 void avg_poolingMxNxD_fp_neon_ndhwc(
const ITensor *
src,
156 Pooling3dLayerInfo &pool_info,
157 const Window &window_out,
158 const int window_start_x,
159 const int window_end_x,
160 const int window_step_x)
162 using vtype = wrapper::traits::neon_bitvector<T, wrapper::traits::BitWidth::W128>;
164 using tag_type =
typename vtype::tag_type;
166 int pool_stride_x =
static_cast<int>(pool_info.stride.width);
167 int pool_stride_y =
static_cast<int>(pool_info.stride.height);
168 int pool_stride_z =
static_cast<int>(pool_info.stride.depth);
170 const int pool_size_x = pool_info.is_global_pooling ?
src->info()->tensor_shape().y() : pool_info.pool_size.width;
171 const int pool_size_y = pool_info.is_global_pooling ?
src->info()->tensor_shape().z() : pool_info.pool_size.height;
172 const int pool_size_z = pool_info.is_global_pooling ?
src->info()->tensor_shape()[3] : pool_info.pool_size.depth;
174 const int pool_pad_top =
static_cast<int>(pool_info.padding.top);
175 const int pool_pad_bottom =
static_cast<int>(pool_info.padding.bottom);
176 const int pool_pad_left =
static_cast<int>(pool_info.padding.left);
177 const int pool_pad_right =
static_cast<int>(pool_info.padding.right);
178 const int pool_pad_front =
static_cast<int>(pool_info.padding.front);
179 const int pool_pad_back =
static_cast<int>(pool_info.padding.back);
181 const int upper_bound_w =
src->info()->dimension(1) + (pool_info.exclude_padding ? 0 : pool_pad_right);
182 const int upper_bound_h =
src->info()->dimension(2) + (pool_info.exclude_padding ? 0 : pool_pad_bottom);
183 const int upper_bound_d =
src->info()->dimension(3) + (pool_info.exclude_padding ? 0 : pool_pad_back);
185 const int input_dim_w =
src->info()->dimension(1);
186 const int input_dim_h =
src->info()->dimension(2);
187 const int input_dim_d =
src->info()->dimension(3);
189 const int y_stride =
static_cast<int>(
src->info()->strides_in_bytes().y());
190 const int z_stride =
static_cast<int>(
src->info()->strides_in_bytes().z());
191 const int w_stride =
static_cast<int>(
src->info()->strides_in_bytes()[3]);
192 const int n_stride =
static_cast<int>(
src->info()->strides_in_bytes()[4]);
194 const uint8_t *in_ptr_start =
src->buffer() +
src->info()->offset_first_element_in_bytes();
196 Iterator out(dst0, window_out);
201 [&](
const Coordinates &
id)
204 const int in_idx_width =
static_cast<int>(
id.y()) * pool_stride_x - pool_pad_left;
205 const int in_idx_height =
static_cast<int>(
id.z()) * pool_stride_y - pool_pad_top;
206 const int in_idx_depth =
static_cast<int>(
id[3]) * pool_stride_z - pool_pad_front;
208 const int pool_start_x = std::max(0, -in_idx_width);
209 const int pool_end_x_t = std::min(input_dim_w + pool_pad_left - in_idx_width, pool_size_x);
210 const int pool_start_y = std::max(0, -in_idx_height);
211 const int pool_end_y_t = std::min(input_dim_h + pool_pad_top - in_idx_height, pool_size_y);
213 const int pool_start_z = std::max(0, -in_idx_depth);
214 const int pool_end_z_t = std::min(input_dim_d + pool_pad_front - in_idx_depth, pool_size_z);
217 const int pool_end_x = std::min(pool_end_x_t, input_dim_w - in_idx_width);
218 const int pool_end_y = std::min(pool_end_y_t, input_dim_h - in_idx_height);
219 const int pool_end_z = std::min(pool_end_z_t, input_dim_d - in_idx_depth);
221 const uint8_t *in_ptr_n = in_ptr_start +
id[4] * n_stride;
225 calculate_avg_scale_pool3d(pool_info.exclude_padding,
id, pool_size_x, pool_size_y, pool_size_z,
226 upper_bound_w, upper_bound_h, upper_bound_d, pool_pad_left, pool_pad_top,
227 pool_pad_front, pool_stride_x, pool_stride_y, pool_stride_z);
230 int x_off = window_start_x;
232 for (; x_off <= (window_end_x - window_step_x); x_off += window_step_x)
236 for (
int z = pool_start_z; z < pool_end_z; ++z)
238 const uint8_t *in_ptr_z = in_ptr_n + (z + in_idx_depth) * w_stride;
239 for (
int y = pool_start_y; y < pool_end_y; ++y)
241 const uint8_t *in_ptr_y = in_ptr_z + (y + in_idx_height) * z_stride;
242 for (
int x = pool_start_x; x < pool_end_x; ++x)
244 const uint8_t *in_ptr_x = in_ptr_y + (x + in_idx_width) * y_stride;
245 const vector_type data =
wrapper::vloadq(
reinterpret_cast<const T *
>(in_ptr_x) + x_off);
259 for (; x_off < window_end_x; ++x_off)
263 for (
int z = pool_start_z; z < pool_end_z; ++z)
265 const uint8_t *in_ptr_z = in_ptr_n + (z + in_idx_depth) * w_stride;
266 for (
int y = pool_start_y; y < pool_end_y; ++y)
268 const uint8_t *in_ptr_y = in_ptr_z + (y + in_idx_height) * z_stride;
269 for (
int x = pool_start_x; x < pool_end_x; ++x)
271 const uint8_t *in_ptr_x = in_ptr_y + (x + in_idx_width) * y_stride;
272 const T data = *(
reinterpret_cast<const T *
>(in_ptr_x) + x_off);
282 *(
reinterpret_cast<T *
>(out.ptr()) + x_off) = res;
288 template <
typename T>
289 void l2_poolingMxNxD_fp_neon_ndhwc(
const ITensor *
src,
291 Pooling3dLayerInfo &pool_info,
292 const Window &window_out,
293 const int window_start_x,
294 const int window_end_x,
295 const int window_step_x)
297 using vtype = wrapper::traits::neon_bitvector<T, wrapper::traits::BitWidth::W128>;
299 using tag_type =
typename vtype::tag_type;
301 int pool_stride_x =
static_cast<int>(pool_info.stride.width);
302 int pool_stride_y =
static_cast<int>(pool_info.stride.height);
303 int pool_stride_z =
static_cast<int>(pool_info.stride.depth);
305 const int pool_size_x = pool_info.is_global_pooling ?
src->info()->tensor_shape().y() : pool_info.pool_size.width;
306 const int pool_size_y = pool_info.is_global_pooling ?
src->info()->tensor_shape().z() : pool_info.pool_size.height;
307 const int pool_size_z = pool_info.is_global_pooling ?
src->info()->tensor_shape()[3] : pool_info.pool_size.depth;
309 const int pool_pad_top =
static_cast<int>(pool_info.padding.top);
310 const int pool_pad_bottom =
static_cast<int>(pool_info.padding.bottom);
311 const int pool_pad_left =
static_cast<int>(pool_info.padding.left);
312 const int pool_pad_right =
static_cast<int>(pool_info.padding.right);
313 const int pool_pad_front =
static_cast<int>(pool_info.padding.front);
314 const int pool_pad_back =
static_cast<int>(pool_info.padding.back);
316 const int upper_bound_w =
src->info()->dimension(1) + (pool_info.exclude_padding ? 0 : pool_pad_right);
317 const int upper_bound_h =
src->info()->dimension(2) + (pool_info.exclude_padding ? 0 : pool_pad_bottom);
318 const int upper_bound_d =
src->info()->dimension(3) + (pool_info.exclude_padding ? 0 : pool_pad_back);
320 const int input_dim_w =
src->info()->dimension(1);
321 const int input_dim_h =
src->info()->dimension(2);
322 const int input_dim_d =
src->info()->dimension(3);
324 const int y_stride =
static_cast<int>(
src->info()->strides_in_bytes().y());
325 const int z_stride =
static_cast<int>(
src->info()->strides_in_bytes().z());
326 const int w_stride =
static_cast<int>(
src->info()->strides_in_bytes()[3]);
327 const int n_stride =
static_cast<int>(
src->info()->strides_in_bytes()[4]);
329 const uint8_t *in_ptr_start =
src->buffer() +
src->info()->offset_first_element_in_bytes();
331 Iterator out(dst0, window_out);
336 [&](
const Coordinates &
id)
339 const int in_idx_width =
static_cast<int>(
id.y()) * pool_stride_x - pool_pad_left;
340 const int in_idx_height =
static_cast<int>(
id.z()) * pool_stride_y - pool_pad_top;
341 const int in_idx_depth =
static_cast<int>(
id[3]) * pool_stride_z - pool_pad_front;
343 const int pool_start_x = std::max(0, -in_idx_width);
344 const int pool_end_x_t = std::min(input_dim_w + pool_pad_left - in_idx_width, pool_size_x);
345 const int pool_start_y = std::max(0, -in_idx_height);
346 const int pool_end_y_t = std::min(input_dim_h + pool_pad_top - in_idx_height, pool_size_y);
348 const int pool_start_z = std::max(0, -in_idx_depth);
349 const int pool_end_z_t = std::min(input_dim_d + pool_pad_front - in_idx_depth, pool_size_z);
352 const int pool_end_x = std::min(pool_end_x_t, input_dim_w - in_idx_width);
353 const int pool_end_y = std::min(pool_end_y_t, input_dim_h - in_idx_height);
354 const int pool_end_z = std::min(pool_end_z_t, input_dim_d - in_idx_depth);
356 const uint8_t *in_ptr_n = in_ptr_start +
id[4] * n_stride;
360 calculate_avg_scale_pool3d(pool_info.exclude_padding,
id, pool_size_x, pool_size_y, pool_size_z,
361 upper_bound_w, upper_bound_h, upper_bound_d, pool_pad_left, pool_pad_top,
362 pool_pad_front, pool_stride_x, pool_stride_y, pool_stride_z);
364 int x_off = window_start_x;
366 for (; x_off <= (window_end_x - window_step_x); x_off += window_step_x)
370 for (
int z = pool_start_z; z < pool_end_z; ++z)
372 const uint8_t *in_ptr_z = in_ptr_n + (z + in_idx_depth) * w_stride;
373 for (
int y = pool_start_y; y < pool_end_y; ++y)
375 const uint8_t *in_ptr_y = in_ptr_z + (y + in_idx_height) * z_stride;
376 for (
int x = pool_start_x; x < pool_end_x; ++x)
378 const uint8_t *in_ptr_x = in_ptr_y + (x + in_idx_width) * y_stride;
379 const vector_type data =
wrapper::vloadq(
reinterpret_cast<const T *
>(in_ptr_x) + x_off);
398 for (; x_off < window_end_x; ++x_off)
402 for (
int z = pool_start_z; z < pool_end_z; ++z)
404 const uint8_t *in_ptr_z = in_ptr_n + (z + in_idx_depth) * w_stride;
405 for (
int y = pool_start_y; y < pool_end_y; ++y)
407 const uint8_t *in_ptr_y = in_ptr_z + (y + in_idx_height) * z_stride;
408 for (
int x = pool_start_x; x < pool_end_x; ++x)
410 const uint8_t *in_ptr_x = in_ptr_y + (x + in_idx_width) * y_stride;
411 const T data = *(
reinterpret_cast<const T *
>(in_ptr_x) + x_off);
421 res = std::sqrt(res);
424 *(
reinterpret_cast<T *
>(out.ptr()) + x_off) = res;
431 template <
typename T>
434 const int window_start_x = window.
x().
start();
435 const int window_end_x = window.
x().
end();
436 constexpr
int window_step_x = 16 /
sizeof(T);
437 Window window_out = window;
445 max_poolingMxNxD_fp_neon_ndhwc<T>(
src, dst0, pool_info, window_out, window_start_x, window_end_x,
449 avg_poolingMxNxD_fp_neon_ndhwc<T>(
src, dst0, pool_info, window_out, window_start_x, window_end_x,
453 l2_poolingMxNxD_fp_neon_ndhwc<T>(
src, dst0, pool_info, window_out, window_start_x, window_end_x,
461 template <
typename T>
464 constexpr
int window_step_x = 16;
465 Window window_out = window;
473 max_poolingMxNxD_q8_neon_ndhwc<T>(
src, dst0, pool_info, window_out, window_step_x);
476 avg_poolingMxNxD_q8_neon_ndhwc<T>(
src, dst0, pool_info, window_out, window_step_x);
484 #endif //define SRC_CORE_POOLING_3D_LAYER_IMPL_H