47 : _func(), _input(nullptr), _min(), _max(), _mtx()
65 _func = &NEMinMaxKernel::minmax_U8;
68 _func = &NEMinMaxKernel::minmax_S16;
71 _func = &NEMinMaxKernel::minmax_F32;
83 INEKernel::configure(win);
93 (this->*_func)(window);
102 *
static_cast<int32_t *
>(_min) = UCHAR_MAX;
103 *
static_cast<int32_t *
>(_max) = 0;
106 *
static_cast<int32_t *
>(_min) = SHRT_MAX;
107 *
static_cast<int32_t *
>(_max) = SHRT_MIN;
110 *
static_cast<float *
>(_min) = std::numeric_limits<float>::max();
119 template <
typename T>
120 void NEMinMaxKernel::update_min_max(
const T min,
const T max)
124 using type =
typename std::conditional<std::is_same<T, float>::value, float, int32_t>
::type;
126 auto min_ptr =
static_cast<type *
>(_min);
127 auto max_ptr =
static_cast<type *
>(_max);
140 void NEMinMaxKernel::minmax_U8(
Window win)
142 uint8x8_t carry_min = vdup_n_u8(UCHAR_MAX);
143 uint8x8_t carry_max = vdup_n_u8(0);
145 uint8_t carry_max_scalar = 0;
146 uint8_t carry_min_scalar = UCHAR_MAX;
162 for(; x <= x_end - 16; x += 16)
164 const uint8x16_t pixels = vld1q_u8(input.
ptr() + x);
165 const uint8x8_t tmp_min = vmin_u8(vget_high_u8(pixels), vget_low_u8(pixels));
166 const uint8x8_t tmp_max = vmax_u8(vget_high_u8(pixels), vget_low_u8(pixels));
167 carry_min = vmin_u8(tmp_min, carry_min);
168 carry_max = vmax_u8(tmp_max, carry_max);
172 for(; x <
x_end; ++x)
174 const uint8_t pixel = input.
ptr()[x];
175 carry_min_scalar = std::min(pixel, carry_min_scalar);
176 carry_max_scalar = std::max(pixel, carry_max_scalar);
182 carry_min = vpmin_u8(carry_min, carry_min);
183 carry_max = vpmax_u8(carry_max, carry_max);
184 carry_min = vpmin_u8(carry_min, carry_min);
185 carry_max = vpmax_u8(carry_max, carry_max);
186 carry_min = vpmin_u8(carry_min, carry_min);
187 carry_max = vpmax_u8(carry_max, carry_max);
190 const uint8_t min_i = std::min(vget_lane_u8(carry_min, 0), carry_min_scalar);
191 const uint8_t max_i = std::max(vget_lane_u8(carry_max, 0), carry_max_scalar);
194 update_min_max(min_i, max_i);
197 void NEMinMaxKernel::minmax_S16(
Window win)
199 int16x4_t carry_min = vdup_n_s16(SHRT_MAX);
200 int16x4_t carry_max = vdup_n_s16(SHRT_MIN);
202 int16_t carry_max_scalar = SHRT_MIN;
203 int16_t carry_min_scalar = SHRT_MAX;
217 const auto in_ptr =
reinterpret_cast<const int16_t *
>(input.
ptr());
220 for(; x <= x_end - 16; x += 16)
222 const int16x8x2_t pixels = vld2q_s16(in_ptr + x);
223 const int16x8_t tmp_min1 = vminq_s16(pixels.val[0], pixels.val[1]);
224 const int16x8_t tmp_max1 = vmaxq_s16(pixels.val[0], pixels.val[1]);
225 const int16x4_t tmp_min2 = vmin_s16(vget_high_s16(tmp_min1), vget_low_s16(tmp_min1));
226 const int16x4_t tmp_max2 = vmax_s16(vget_high_s16(tmp_max1), vget_low_s16(tmp_max1));
227 carry_min = vmin_s16(tmp_min2, carry_min);
228 carry_max = vmax_s16(tmp_max2, carry_max);
232 for(; x <
x_end; ++x)
234 const int16_t pixel = in_ptr[x];
235 carry_min_scalar = std::min(pixel, carry_min_scalar);
236 carry_max_scalar = std::max(pixel, carry_max_scalar);
243 carry_min = vpmin_s16(carry_min, carry_min);
244 carry_max = vpmax_s16(carry_max, carry_max);
245 carry_min = vpmin_s16(carry_min, carry_min);
246 carry_max = vpmax_s16(carry_max, carry_max);
249 const int16_t min_i = std::min(vget_lane_s16(carry_min, 0), carry_min_scalar);
250 const int16_t max_i = std::max(vget_lane_s16(carry_max, 0), carry_max_scalar);
253 update_min_max(min_i, max_i);
256 void NEMinMaxKernel::minmax_F32(
Window win)
258 float32x2_t carry_min = vdup_n_f32(std::numeric_limits<float>::max());
261 float carry_min_scalar = std::numeric_limits<float>::max();
276 const auto in_ptr =
reinterpret_cast<const float *
>(input.
ptr());
279 for(; x <= x_end - 8; x += 8)
281 const float32x4x2_t pixels = vld2q_f32(in_ptr + x);
282 const float32x4_t tmp_min1 = vminq_f32(pixels.val[0], pixels.val[1]);
283 const float32x4_t tmp_max1 = vmaxq_f32(pixels.val[0], pixels.val[1]);
284 const float32x2_t tmp_min2 = vmin_f32(vget_high_f32(tmp_min1), vget_low_f32(tmp_min1));
285 const float32x2_t tmp_max2 = vmax_f32(vget_high_f32(tmp_max1), vget_low_f32(tmp_max1));
286 carry_min = vmin_f32(tmp_min2, carry_min);
287 carry_max = vmax_f32(tmp_max2, carry_max);
291 for(; x <
x_end; ++x)
293 const float pixel = in_ptr[x];
294 carry_min_scalar = std::min(pixel, carry_min_scalar);
295 carry_max_scalar = std::max(pixel, carry_max_scalar);
302 carry_min = vpmin_f32(carry_min, carry_min);
303 carry_max = vpmax_f32(carry_max, carry_max);
304 carry_min = vpmin_f32(carry_min, carry_min);
305 carry_max = vpmax_f32(carry_max, carry_max);
308 const float min_i = std::min(vget_lane_f32(carry_min, 0), carry_min_scalar);
309 const float max_i = std::max(vget_lane_f32(carry_max, 0), carry_max_scalar);
312 update_min_max(min_i, max_i);
316 : _func(nullptr), _input(nullptr), _min(nullptr), _max(nullptr), _min_count(nullptr), _max_count(nullptr), _min_loc(nullptr), _max_loc(nullptr)
325 template <
class T, std::size_t...
N>
326 struct NEMinMaxLocationKernel::create_func_table<T, utility::index_sequence<N...>>
328 static const std::array<NEMinMaxLocationKernel::MinMaxLocFunction,
sizeof...(N)> func_table;
331 template <
class T, std::size_t...
N>
332 const std::array<NEMinMaxLocationKernel::MinMaxLocFunction,
sizeof...(N)> NEMinMaxLocationKernel::create_func_table<T, utility::index_sequence<N...>>::func_table
334 &NEMinMaxLocationKernel::minmax_loc<T, bool(N & 8), bool(N & 4), bool(N & 2), bool(N & 1)>...
339 uint32_t *min_count, uint32_t *max_count)
349 _min_count = min_count;
350 _max_count = max_count;
354 unsigned int count_min = (
nullptr != min_count ? 1 : 0);
355 unsigned int count_max = (
nullptr != max_count ? 1 : 0);
356 unsigned int loc_min = (
nullptr != min_loc ? 1 : 0);
357 unsigned int loc_max = (
nullptr != max_loc ? 1 : 0);
359 unsigned int table_idx = (count_min << 3) | (count_max << 2) | (loc_min << 1) | loc_max;
364 _func = create_func_table<uint8_t, utility::index_sequence_t<16>>::func_table[table_idx];
367 _func = create_func_table<int16_t, utility::index_sequence_t<16>>::func_table[table_idx];
370 _func = create_func_table<float, utility::index_sequence_t<16>>::func_table[table_idx];
384 INEKernel::configure(win);
394 (this->*_func)(window);
397 template <
class T,
bool count_min,
bool count_max,
bool loc_min,
bool loc_max>
398 void NEMinMaxLocationKernel::minmax_loc(
const Window &win)
400 if(count_min || count_max || loc_min || loc_max)
404 size_t min_count = 0;
405 size_t max_count = 0;
419 using type =
typename std::conditional<std::is_same<T, float>::value, float, int32_t>
::type;
421 auto min_ptr =
static_cast<type *
>(_min);
422 auto max_ptr =
static_cast<type *
>(_max);
426 auto in_ptr =
reinterpret_cast<const T *
>(input.
ptr());
427 int32_t idx =
id.
x();
428 int32_t idy =
id.y();
430 const T pixel = *in_ptr;
433 if(count_min || loc_min)
435 if(*min_ptr == pixel)
444 _min_loc->push_back(p);
449 if(count_max || loc_max)
451 if(*max_ptr == pixel)
460 _max_loc->push_back(p);
469 *_min_count = min_count;
474 *_max_count = max_count;
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
const Window & window() const
The maximum window the kernel can be executed on.
void run(const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
#define ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(t)
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
1 channel, 1 U8 per channel
virtual DataType data_type() const =0
Data type used for each element of the tensor.
1 channel, 1 F32 per channel
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Describe one of the image's dimensions with a start, end and step.
void configure(const IImage *input, void *min, void *max, ICoordinates2DArray *min_loc=nullptr, ICoordinates2DArray *max_loc=nullptr, uint32_t *min_count=nullptr, uint32_t *max_count=nullptr)
Initialise the kernel's input and outputs.
decltype(strategy::transforms) typedef type
Interface for Neon tensor.
Copyright (c) 2017-2021 Arm Limited.
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
bool update_window_and_padding(Window &win, Ts &&... patterns)
Update window and padding size for each of the access patterns.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Class to describe a number of elements in each dimension.
void configure(const IImage *input, void *min, void *max)
Initialise the kernel's input and outputs.
Implementation of a row access pattern.
void reset()
Resets global minimum and maximum.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
constexpr uint8_t * ptr() const
Return a pointer to the current pixel.
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
1 channel, 1 S16 per channel
#define ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
void run(const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
Information about executing thread and CPU.
bool is_parallelisable() const override
Indicates whether or not the kernel is parallelisable.
unsigned int num_elems_processed_per_iteration
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
NEMinMaxLocationKernel()
Default constructor.
constexpr int end() const
Return the end of the dimension.
NEMinMaxKernel()
Default constructor.
Iterator updated by execute_window_loop for each window element.
std::lock_guard< Mutex > lock_guard
Wrapper of lock_guard data-object.
constexpr int start() const
Return the start of the dimension.
Describe a multidimensional execution window.
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
constexpr const Dimension & x() const
Alias to access the first dimension of the window.