56 if(output->tensor_shape().total_size() > 0)
68 std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output)
79 AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration);
80 AccessWindowHorizontal output_access(output, 0, 2);
84 output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
87 return std::make_tuple(err, win);
92 : _input(nullptr), _output(nullptr), _mtx()
104 auto win_config = validate_and_configure_window(input->
info(), output->
info());
108 INEKernel::configure(std::get<1>(win_config));
134 Window window_input(window);
139 Iterator output(_output, window_output);
143 float32x2_t carry_min = vdup_n_f32(std::numeric_limits<float>::max());
146 float carry_min_scalar = std::numeric_limits<float>::max();
152 const auto in_ptr =
reinterpret_cast<const float *
>(input.
ptr() + id_batch[1] * _input->
info()->
strides_in_bytes()[3]);
155 for(; x <= x_end - 8; x += 8)
157 const float32x4x2_t pixels = vld2q_f32(in_ptr + x);
158 const float32x4_t tmp_min1 = vminq_f32(pixels.val[0], pixels.val[1]);
159 const float32x4_t tmp_max1 = vmaxq_f32(pixels.val[0], pixels.val[1]);
160 const float32x2_t tmp_min2 = vmin_f32(vget_high_f32(tmp_min1), vget_low_f32(tmp_min1));
161 const float32x2_t tmp_max2 = vmax_f32(vget_high_f32(tmp_max1), vget_low_f32(tmp_max1));
162 carry_min = vmin_f32(tmp_min2, carry_min);
163 carry_max = vmax_f32(tmp_max2, carry_max);
167 for(; x <
x_end; ++x)
169 const float pixel = in_ptr[x];
170 carry_min_scalar = std::min(pixel, carry_min_scalar);
171 carry_max_scalar = std::max(pixel, carry_max_scalar);
177 carry_min = vpmin_f32(carry_min, carry_min);
178 carry_max = vpmax_f32(carry_max, carry_max);
179 carry_min = vpmin_f32(carry_min, carry_min);
180 carry_max = vpmax_f32(carry_max, carry_max);
183 const float min_i = std::min(vget_lane_f32(carry_min, 0), carry_min_scalar);
184 const float max_i = std::max(vget_lane_f32(carry_max, 0), carry_max_scalar);
186 auto out_ptr =
reinterpret_cast<float *
>(output.
ptr());
189 update_min_max(out_ptr, min_i, max_i);
198 float32x2_t reset_values = vdup_n_f32(0.0f);
199 reset_values = vset_lane_f32(std::numeric_limits<float>::max(), reset_values, 0);
206 Iterator output(_output, window_output);
210 vst1_f32(reinterpret_cast<float *>(output.
ptr()), reset_values);
215 void NEMinMaxLayerKernel::update_min_max(
float *out_ptr,
float min,
float max)
219 const float32x2_t old_min = vld1_dup_f32(out_ptr);
220 const float32x2_t old_max = vld1_dup_f32(out_ptr + 1);
221 const float32x2_t new_min = vmin_f32(vdup_n_f32(min), old_min);
222 const float32x2_t new_max = vmax_f32(vdup_n_f32(max), old_max);
224 vst1_f32(out_ptr, vzip_f32(new_min, new_max).val[0]);
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
const Window & window() const
The maximum window the kernel can be executed on.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
1 channel, 1 F32 per channel
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Describe one of the image's dimensions with a start, end and step.
void reset()
Resets global minimum and maximum.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Interface for Neon tensor.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(...)
void use_tensor_dimensions(const TensorShape &shape, size_t first_dimension=Window::DimX)
Use the tensor's dimensions to fill the window dimensions.
Copyright (c) 2017-2021 Arm Limited.
TensorShape compute_min_max_shape(const ITensorInfo *input)
Calculate the min/max shape output shape of a tensor.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
bool update_window_and_padding(Window &win, Ts &&... patterns)
Update window and padding size for each of the access patterns.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
void configure(const ITensor *input, ITensor *output)
Initialise the kernel's input and outputs.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
constexpr uint8_t * ptr() const
Return a pointer to the current pixel.
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
void run(const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
Information about executing thread and CPU.
#define ARM_COMPUTE_CREATE_ERROR(error_code, msg)
Creates an error with a given message.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage)
unsigned int num_elems_processed_per_iteration
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
virtual const Strides & strides_in_bytes() const =0
The strides in bytes for accessing each dimension of the tensor.
constexpr int end() const
Return the end of the dimension.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CLMinMaxLayerKernel.
Iterator updated by execute_window_loop for each window element.
std::lock_guard< Mutex > lock_guard
Wrapper of lock_guard data-object.
constexpr int start() const
Return the start of the dimension.
Describe a multidimensional execution window.
NEMinMaxLayerKernel()
Default constructor.
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
constexpr const Dimension & x() const
Alias to access the first dimension of the window.