40 template <
typename T,
typename OT>
64 if(std::is_integral<type>::value)
66 auto int_res =
static_cast<int32_t
>(res);
67 for(
int i = 0; i < reduce_elements; ++i)
69 auto elem = *(ptr + stride * i);
74 if(
static_cast<T
>(int_res) > elem)
80 if(
static_cast<T
>(int_res) < elem)
86 int_res += elem * elem;
104 int_res =
round(
static_cast<float>(int_res) /
static_cast<float>(reduce_elements), policy);
105 #else // defined(__aarch64__)
107 int_res /= reduce_elements;
110 res =
static_cast<type>(int_res);
114 for(
int i = 0; i < reduce_elements; ++i)
116 auto elem = *(ptr + stride * i);
147 res /= reduce_elements;
153 template <
typename T,
typename OT>
154 OT reduce_operation_arg_min_max(
const T *ptr,
int reduce_elements,
ReductionOperation op,
int stride)
157 for(
int i = 0; i < reduce_elements; ++i)
159 auto elem = *(ptr + stride * i);
163 if(*(ptr + stride * res) > elem)
165 res =
static_cast<uint32_t
>(i);
169 if(*(ptr + stride * res) < elem)
171 res =
static_cast<uint32_t
>(i);
178 return static_cast<OT
>(res);
183 template <
typename T,
typename OT>
190 const unsigned int src_width =
src.shape().x();
191 const unsigned int src_height =
src.shape().y();
192 const unsigned int src_depth =
src.shape().z();
193 const unsigned int src_batch =
src.shape()[3];
194 const int reduce_elems =
src.shape()[axis];
200 const unsigned int upper_dims =
src.shape().total_size_upper(1);
201 for(
unsigned int du = 0; du < upper_dims; ++du)
203 const T *src_row_ptr =
src.data() + du * reduce_elems;
204 dst[du] = is_arg_min_max ?
205 reduce_operation_arg_min_max<T, OT>(src_row_ptr, reduce_elems, op, 1) :
206 reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, 1, policy);
212 const unsigned int upper_dims =
src.shape().total_size_upper(2);
213 for(
unsigned int du = 0; du < upper_dims; ++du)
215 for(
unsigned int x = 0; x < src_width; ++x)
217 const int in_offset = du * src_height * src_width + x;
218 const int out_offset = du * src_width + x;
219 const T *src_row_ptr =
src.data() + in_offset;
220 dst[out_offset] = is_arg_min_max ?
221 reduce_operation_arg_min_max<T, OT>(src_row_ptr, reduce_elems, op, src_width) :
222 reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width, policy);
229 const unsigned int upper_dims =
src.shape().total_size_upper(3);
230 for(
unsigned int du = 0; du < upper_dims; ++du)
232 for(
unsigned int x = 0; x < src_width; ++x)
234 for(
unsigned int y = 0; y < src_height; ++y)
236 const int in_offset = du * src_depth * src_height * src_width + y * src_width + x;
237 const int out_offset = du * src_width * src_height + y * src_width + x;
238 const T *src_row_ptr =
src.data() + in_offset;
239 dst[out_offset] = is_arg_min_max ?
240 reduce_operation_arg_min_max<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height) :
241 reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height, policy);
249 const unsigned int upper_dims =
src.shape().total_size_upper(4);
250 for(
unsigned int du = 0; du < upper_dims; ++du)
252 for(
unsigned int z = 0; z < src_depth; ++z)
254 for(
unsigned int y = 0; y < src_height; ++y)
256 for(
unsigned int x = 0; x < src_width; ++x)
258 const int in_offset = du * src_batch * src_depth * src_height * src_width + z * src_width * src_height + y * src_width + x;
259 const int out_offset = du * src_depth * src_height * src_width + z * src_width * src_height + y * src_width + x;
260 const T *src_row_ptr =
src.data() + in_offset;
261 dst[out_offset] = is_arg_min_max ?
262 reduce_operation_arg_min_max<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height * src_depth) :
263 reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height * src_depth, policy);
277 template <
typename T,
typename OT>
282 return compute_reduction_operation<T, OT>(
src,
dst_shape, axis, op, output_type, policy);
294 return compute_reduction_operation<uint8_t, uint8_t>(
src,
dst_shape, axis, op, output_type, policy);
300 return convert_to_asymmetric<uint8_t>(dst_f, quantization_info_output);
305 return compute_reduction_operation<uint8_t, uint8_t>(
src,
dst_shape, axis, op, output_type, policy);
318 return compute_reduction_operation<int8_t, int8_t>(
src,
dst_shape, axis, op, output_type, policy);
324 return convert_to_asymmetric<int8_t>(dst_f, quantization_info_output);
329 return compute_reduction_operation<int8_t, int8_t>(
src,
dst_shape, axis, op, output_type, policy);