Compute Library
 21.02
NEMinMaxLayerKernel.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
27 #include "arm_compute/core/Error.h"
32 #include "arm_compute/core/Types.h"
38 
39 #include <algorithm>
40 #include <arm_neon.h>
41 #include <climits>
42 #include <cstddef>
43 
45 
46 namespace arm_compute
47 {
48 namespace
49 {
50 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
51 {
54  ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() < 3);
55 
56  if(output->tensor_shape().total_size() > 0)
57  {
59 
60  TensorShape output_shape = compute_min_max_shape(input);
61 
63  }
64 
65  return Status{};
66 }
67 
68 std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output)
69 {
70  TensorShape output_shape = compute_min_max_shape(input);
71 
72  // Output auto initialization if not yet initialized
73  auto_init_if_empty(*output, output_shape, 1, input->data_type());
74 
75  constexpr unsigned int num_elems_processed_per_iteration = 1;
76 
77  // Configure kernel window
78  Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
79  AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration);
80  AccessWindowHorizontal output_access(output, 0, 2);
81 
82  bool window_changed = update_window_and_padding(win, input_access, output_access);
83 
84  output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
85 
86  Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
87  return std::make_tuple(err, win);
88 }
89 } // namespace
90 
92  : _input(nullptr), _output(nullptr), _mtx()
93 {
94 }
95 
96 void NEMinMaxLayerKernel::configure(const ITensor *input, ITensor *output)
97 {
98  ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
100 
101  _input = input;
102  _output = output;
103 
104  auto win_config = validate_and_configure_window(input->info(), output->info());
105 
106  ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
107 
108  INEKernel::configure(std::get<1>(win_config));
109 }
110 
112 {
114  ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get())));
115 
116  return Status{};
117 }
118 
120 {
121  ARM_COMPUTE_UNUSED(info);
124 
125  const int x_start = window.x().start();
126  const int x_end = window.x().end();
127 
128  Window window_output;
129  window_output.use_tensor_dimensions(_output->info()->tensor_shape());
130  window_output.set(Window::DimX, Window::Dimension(0, 1, 1));
131 
132  // Handle X dimension manually to split into two loops
133  // First one will use vector operations, second one processes the left over pixels
134  Window window_input(window);
135  window_input.set(Window::DimX, Window::Dimension(0, 1, 1));
136  window_input.set(3, Window::Dimension(0, 1, 1));
137 
138  Iterator input(_input, window_input);
139  Iterator output(_output, window_output);
140 
141  execute_window_loop(window_output, [&](const Coordinates & id_batch)
142  {
143  float32x2_t carry_min = vdup_n_f32(std::numeric_limits<float>::max());
144  float32x2_t carry_max = vdup_n_f32(std::numeric_limits<float>::lowest());
145 
146  float carry_min_scalar = std::numeric_limits<float>::max();
147  float carry_max_scalar = std::numeric_limits<float>::lowest();
148 
149  execute_window_loop(window_input, [&](const Coordinates &)
150  {
151  int x = x_start;
152  const auto in_ptr = reinterpret_cast<const float *>(input.ptr() + id_batch[1] * _input->info()->strides_in_bytes()[3]);
153 
154  // Vector loop
155  for(; x <= x_end - 8; x += 8)
156  {
157  const float32x4x2_t pixels = vld2q_f32(in_ptr + x);
158  const float32x4_t tmp_min1 = vminq_f32(pixels.val[0], pixels.val[1]);
159  const float32x4_t tmp_max1 = vmaxq_f32(pixels.val[0], pixels.val[1]);
160  const float32x2_t tmp_min2 = vmin_f32(vget_high_f32(tmp_min1), vget_low_f32(tmp_min1));
161  const float32x2_t tmp_max2 = vmax_f32(vget_high_f32(tmp_max1), vget_low_f32(tmp_max1));
162  carry_min = vmin_f32(tmp_min2, carry_min);
163  carry_max = vmax_f32(tmp_max2, carry_max);
164  }
165 
166  // Process leftover pixels
167  for(; x < x_end; ++x)
168  {
169  const float pixel = in_ptr[x];
170  carry_min_scalar = std::min(pixel, carry_min_scalar);
171  carry_max_scalar = std::max(pixel, carry_max_scalar);
172  }
173  },
174  input);
175 
176  // Reduce result
177  carry_min = vpmin_f32(carry_min, carry_min);
178  carry_max = vpmax_f32(carry_max, carry_max);
179  carry_min = vpmin_f32(carry_min, carry_min);
180  carry_max = vpmax_f32(carry_max, carry_max);
181 
182  // Extract max/min values
183  const float min_i = std::min(vget_lane_f32(carry_min, 0), carry_min_scalar);
184  const float max_i = std::max(vget_lane_f32(carry_max, 0), carry_max_scalar);
185 
186  auto out_ptr = reinterpret_cast<float *>(output.ptr());
187 
188  // Perform reduction of local min/max values
189  update_min_max(out_ptr, min_i, max_i);
190  },
191  output);
192 }
193 
195 {
197 
198  float32x2_t reset_values = vdup_n_f32(0.0f);
199  reset_values = vset_lane_f32(std::numeric_limits<float>::max(), reset_values, 0);
200  reset_values = vset_lane_f32(std::numeric_limits<float>::lowest(), reset_values, 1);
201 
202  Window window_output;
203  window_output.use_tensor_dimensions(_output->info()->tensor_shape());
204  window_output.set(Window::DimX, Window::Dimension(0, 1, 1));
205 
206  Iterator output(_output, window_output);
207 
208  execute_window_loop(window_output, [&](const Coordinates &)
209  {
210  vst1_f32(reinterpret_cast<float *>(output.ptr()), reset_values);
211  },
212  output);
213 }
214 
215 void NEMinMaxLayerKernel::update_min_max(float *out_ptr, float min, float max)
216 {
218 
219  const float32x2_t old_min = vld1_dup_f32(out_ptr);
220  const float32x2_t old_max = vld1_dup_f32(out_ptr + 1);
221  const float32x2_t new_min = vmin_f32(vdup_n_f32(min), old_min);
222  const float32x2_t new_max = vmax_f32(vdup_n_f32(max), old_max);
223 
224  vst1_f32(out_ptr, vzip_f32(new_min, new_max).val[0]);
225 }
226 } // namespace arm_compute
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
const Window & window() const
The maximum window the kernel can be executed on.
Definition: IKernel.cpp:28
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
1 channel, 1 F32 per channel
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
Describe one of the image&#39;s dimensions with a start, end and step.
Definition: Window.h:77
Status class.
Definition: Error.h:52
void reset()
Resets global minimum and maximum.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:296
Interface for Neon tensor.
Definition: ITensor.h:36
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(...)
Definition: Validate.h:288
void use_tensor_dimensions(const TensorShape &shape, size_t first_dimension=Window::DimX)
Use the tensor&#39;s dimensions to fill the window dimensions.
Definition: Window.inl:276
Copyright (c) 2017-2021 Arm Limited.
TensorShape compute_min_max_shape(const ITensorInfo *input)
Calculate the min/max shape output shape of a tensor.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:163
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Definition: Window.h:43
bool update_window_and_padding(Window &win, Ts &&... patterns)
Update window and padding size for each of the access patterns.
Definition: WindowHelpers.h:46
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
void configure(const ITensor *input, ITensor *output)
Initialise the kernel&#39;s input and outputs.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
Coordinates of an item.
Definition: Coordinates.h:37
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.
constexpr uint8_t * ptr() const
Return a pointer to the current pixel.
Definition: Helpers.inl:139
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
Definition: Window.inl:49
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
Definition: Validate.h:941
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
void run(const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
Information about executing thread and CPU.
Definition: CPPTypes.h:235
#define ARM_COMPUTE_CREATE_ERROR(error_code, msg)
Creates an error with a given message.
Definition: Error.h:159
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:545
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:792
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage)
unsigned int num_elems_processed_per_iteration
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:161
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
Definition: Helpers.inl:77
virtual const Strides & strides_in_bytes() const =0
The strides in bytes for accessing each dimension of the tensor.
constexpr int end() const
Return the end of the dimension.
Definition: Window.h:99
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CLMinMaxLayerKernel.
Iterator updated by execute_window_loop for each window element.
Definition: Helpers.h:46
std::lock_guard< Mutex > lock_guard
Wrapper of lock_guard data-object.
Definition: Mutex.h:37
constexpr int start() const
Return the start of the dimension.
Definition: Window.h:94
Describe a multidimensional execution window.
Definition: Window.h:39
NEMinMaxLayerKernel()
Default constructor.
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
Definition: Validate.h:205
constexpr const Dimension & x() const
Alias to access the first dimension of the window.
Definition: Window.h:145