Compute Library
 20.02.1
CLReductionOperation.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2020 ARM Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
28 #include "arm_compute/core/Error.h"
38 
39 namespace arm_compute
40 {
41 CLReductionOperation::CLReductionOperation(std::shared_ptr<IMemoryManager> memory_manager)
42  : _memory_group(std::move(memory_manager)), _results_vector(), _reduction_kernels_vector(), _border_handlers_vector(), _reshape_kernel(), _op(), _num_of_stages(), _reduction_axis(), _is_serial(),
43  _is_reshape_required(false)
44 {
45 }
46 
47 Status CLReductionOperation::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op, bool keep_dims)
48 {
50  ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions");
51  ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 3, "Unsupported reduction axis");
52 
53  const unsigned int num_of_stages = calculate_number_of_stages_only_x_axis(input->dimension(0), axis);
54  const bool is_serial = needs_serialized_reduction(op, input->data_type(), axis);
55  const bool is_reshape_required = !keep_dims;
56 
57  if(is_reshape_required && output->total_size() != 0)
58  {
59  const TensorInfo expected_output_shape = output->clone()->set_tensor_shape(arm_compute::misc::shape_calculator::compute_reduced_shape(input->tensor_shape(), axis, keep_dims));
60  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&expected_output_shape, output);
61  }
62 
63  auto *output_internal = output;
64 
65  TensorInfo output_before_reshape;
66  const auto input_shape = input->tensor_shape();
67  const auto input_data_type = input->data_type();
68  const auto input_num_channles = input->num_channels();
69  const auto input_qinfo = input->quantization_info();
70  const auto output_data_type = output->data_type();
71 
72  auto initialize_tensorinfo = [](TensorInfo & ti, TensorShape shape, DataType data_type, int num_channels, QuantizationInfo qinfo)
73  {
75  };
76 
77  if(is_reshape_required)
78  {
79  auto shape_before_reshape = input_shape;
80  shape_before_reshape.set(axis, 1);
81  initialize_tensorinfo(output_before_reshape, shape_before_reshape, output_data_type, input_num_channles, input_qinfo);
82  output_internal = &output_before_reshape;
83  }
84 
85  if(is_serial)
86  {
88  }
89  else
90  {
91  // Create temporary tensor infos
92  std::vector<TensorInfo> sums_vector(num_of_stages - 1);
93 
94  // Create intermediate tensor info
96 
97  shape.set(0, ceil(shape.x() / 128.f));
98 
99  for(unsigned int i = 0; i < num_of_stages - 1; i++)
100  {
101  initialize_tensorinfo(sums_vector[i], shape, input_data_type, input_num_channles, input_qinfo);
102  }
103 
104  ReductionOperation first_kernel_op;
105  ReductionOperation intermediate_kernel_op;
106  ReductionOperation last_kernel_op;
107  switch(op)
108  {
111  first_kernel_op = ReductionOperation::SUM;
112  intermediate_kernel_op = ReductionOperation::SUM;
113  last_kernel_op = op;
114  break;
116  first_kernel_op = ReductionOperation::SUM_SQUARE;
117  intermediate_kernel_op = ReductionOperation::SUM;
118  last_kernel_op = ReductionOperation::SUM;
119  break;
121  first_kernel_op = ReductionOperation::PROD;
122  intermediate_kernel_op = ReductionOperation::PROD;
123  last_kernel_op = ReductionOperation::PROD;
124  break;
126  first_kernel_op = ReductionOperation::MIN;
127  intermediate_kernel_op = ReductionOperation::MIN;
128  last_kernel_op = ReductionOperation::MIN;
129  break;
131  first_kernel_op = ReductionOperation::MAX;
132  intermediate_kernel_op = ReductionOperation::MAX;
133  last_kernel_op = ReductionOperation::MAX;
134  break;
135  default:
136  ARM_COMPUTE_ERROR("Not supported");
137  }
138 
139  // Validate ReductionOperation only on first kernel
140  ARM_COMPUTE_RETURN_ON_ERROR(CLReductionOperationKernel::validate(input, &sums_vector[0], axis, first_kernel_op));
141 
142  // Validate ReductionOperation on intermediate stages
143  for(unsigned int i = 1; i < num_of_stages - 1; ++i)
144  {
145  ARM_COMPUTE_RETURN_ON_ERROR(CLReductionOperationKernel::validate(&sums_vector[i - 1], &sums_vector[i], axis, intermediate_kernel_op));
146  }
147 
148  // Validate ReductionOperation on the last stage
149  const unsigned int last_stage = num_of_stages - 1;
150  ARM_COMPUTE_RETURN_ON_ERROR(CLReductionOperationKernel::validate(&sums_vector[last_stage - 1], output_internal, axis, last_kernel_op, input->dimension(0)));
151  }
152 
153  if(is_reshape_required)
154  {
156  }
157 
158  return Status{};
159 }
160 
161 ICLTensor *CLReductionOperation::configure_intermediate_result_vector(ICLTensor *input, ICLTensor *output)
162 {
163  if(!_is_reshape_required && _is_serial)
164  {
165  return output;
166  }
167 
168  auto intermediate_result_vector_size = _is_serial ? 1 : _num_of_stages;
169 
170  if(!_is_reshape_required)
171  {
172  --intermediate_result_vector_size;
173  }
174 
175  _results_vector.resize(intermediate_result_vector_size);
176  auto shape = input->info()->tensor_shape();
177 
178  shape.set(_reduction_axis, _is_serial ? 1 : ceil(shape.x() / 128.f));
179 
180  for(auto &v : _results_vector)
181  {
182  if(&v == &_results_vector.back() && _is_reshape_required)
183  {
184  shape.set(_reduction_axis, 1);
185  }
186  v.allocator()->init(input->info()->clone()->set_tensor_shape(shape));
187  }
188 
189  return _is_reshape_required ? &_results_vector.back() : output;
190 }
191 
192 void CLReductionOperation::configure(ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, bool keep_dims)
193 {
195  _op = op;
196  _num_of_stages = calculate_number_of_stages_only_x_axis(input->info()->dimension(0), axis);
197  _reduction_axis = axis;
198  _is_serial = needs_serialized_reduction(op, input->info()->data_type(), axis);
199  _is_reshape_required = !keep_dims;
200 
201  auto *output_internal = configure_intermediate_result_vector(input, output);
202 
203  if(_is_reshape_required)
204  {
206  const auto output_data_type = input->info()->data_type();
207  auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape).set_data_type(output_data_type).reset_padding().set_is_resizable(true));
208  }
209 
210  // Configure reduction operation kernels
211  _reduction_kernels_vector.resize(_num_of_stages);
212 
213  // Create temporary tensors
214  if(_is_serial)
215  {
216  if(_is_reshape_required)
217  {
218  _memory_group.manage(&_results_vector.back());
219  }
220 
221  _reduction_kernels_vector[0].configure(input, output_internal, axis, op, 0);
222  }
223  else
224  {
225  _border_handlers_vector.resize(_num_of_stages);
226  _memory_group.manage(&_results_vector[0]);
227 
228  ReductionOperation first_kernel_op;
229  ReductionOperation intermediate_kernel_op;
230  ReductionOperation last_kernel_op;
231  PixelValue pixelValue;
232  switch(op)
233  {
236  first_kernel_op = ReductionOperation::SUM;
237  intermediate_kernel_op = ReductionOperation::SUM;
238  last_kernel_op = op;
239  pixelValue = PixelValue();
240  break;
242  first_kernel_op = ReductionOperation::SUM_SQUARE;
243  intermediate_kernel_op = ReductionOperation::SUM;
244  last_kernel_op = ReductionOperation::SUM;
245  pixelValue = PixelValue();
246  break;
248  first_kernel_op = ReductionOperation::PROD;
249  intermediate_kernel_op = ReductionOperation::PROD;
250  last_kernel_op = ReductionOperation::PROD;
251  pixelValue = PixelValue(1, input->info()->data_type());
252  break;
254  first_kernel_op = ReductionOperation::MIN;
255  intermediate_kernel_op = ReductionOperation::MIN;
256  last_kernel_op = ReductionOperation::MIN;
257  switch(input->info()->data_type())
258  {
259  case DataType::F32:
260  {
261  pixelValue = PixelValue(std::numeric_limits<float>::max());
262  break;
263  }
264  case DataType::F16:
265  {
266  pixelValue = PixelValue(static_cast<half>(65504.0f));
267  break;
268  }
269  case DataType::QASYMM8:
270  {
271  pixelValue = std::get<1>(get_min_max(input->info()->data_type()));
272  break;
273  }
275  {
276  pixelValue = PixelValue(127, input->info()->data_type(), input->info()->quantization_info());
277  break;
278  }
279  default:
280  {
281  ARM_COMPUTE_ERROR("Unsupported DataType");
282  }
283  }
284  break;
286  first_kernel_op = ReductionOperation::MAX;
287  intermediate_kernel_op = ReductionOperation::MAX;
288  last_kernel_op = ReductionOperation::MAX;
289  switch(input->info()->data_type())
290  {
291  case DataType::F32:
292  {
293  pixelValue = PixelValue(-std::numeric_limits<float>::max());
294  break;
295  }
296  case DataType::F16:
297  {
298  pixelValue = PixelValue(static_cast<half>(-65504.0f));
299  break;
300  }
301  case DataType::QASYMM8:
302  {
303  pixelValue = std::get<0>(get_min_max(input->info()->data_type()));
304  break;
305  }
307  {
308  pixelValue = PixelValue(-128, input->info()->data_type(), input->info()->quantization_info());
309  break;
310  }
311  default:
312  {
313  ARM_COMPUTE_ERROR("Unsupported DataType");
314  }
315  }
316  break;
317  default:
318  ARM_COMPUTE_ERROR("Not supported");
319  }
320 
321  _reduction_kernels_vector[0].configure(input, &_results_vector[0], axis, first_kernel_op);
322  _border_handlers_vector[0].configure(input, _reduction_kernels_vector[0].border_size(), BorderMode::CONSTANT, pixelValue);
323 
324  // Apply ReductionOperation on intermediate stages
325  for(unsigned int i = 1; i < _num_of_stages - 1; ++i)
326  {
327  _memory_group.manage(&_results_vector[i]);
328  _reduction_kernels_vector[i].configure(&_results_vector[i - 1], &_results_vector[i], axis, intermediate_kernel_op);
329  _border_handlers_vector[i].configure(&_results_vector[i - 1], _reduction_kernels_vector[i].border_size(), BorderMode::CONSTANT, pixelValue);
330  _results_vector[i - 1].allocator()->allocate();
331  }
332 
333  // Apply ReductionOperation on the last stage
334  const unsigned int last_stage = _num_of_stages - 1;
335  const unsigned int input_width = input->info()->dimension(0);
336 
337  if(_is_reshape_required)
338  {
339  _memory_group.manage(&_results_vector.back());
340  }
341 
342  _reduction_kernels_vector[last_stage].configure(&_results_vector[last_stage - 1], output_internal, axis, last_kernel_op, input_width);
343  _border_handlers_vector[last_stage].configure(&_results_vector[last_stage - 1], _reduction_kernels_vector[last_stage].border_size(), BorderMode::CONSTANT, pixelValue);
344  _results_vector[last_stage - 1].allocator()->allocate();
345  }
346 
347  if(_is_reshape_required)
348  {
349  _reshape_kernel.configure(&_results_vector.back(), output);
350  _results_vector.back().allocator()->allocate();
351  }
352 }
353 
355 {
356  MemoryGroupResourceScope scope_mg(_memory_group);
357 
358  if(_is_serial)
359  {
360  CLScheduler::get().enqueue(_reduction_kernels_vector[0], false);
361  }
362  else
363  {
364  for(unsigned int i = 0; i < _num_of_stages; ++i)
365  {
366  CLScheduler::get().enqueue(_border_handlers_vector[i], false);
367  CLScheduler::get().enqueue(_reduction_kernels_vector[i], false);
368  }
369  }
370 
371  if(_is_reshape_required)
372  {
373  CLScheduler::get().enqueue(_reshape_kernel, false);
374  }
375 }
376 } // namespace arm_compute
virtual ITensorInfo & set_num_channels(int num_channels)=0
Set the number of channels to the specified value.
bool needs_serialized_reduction(ReductionOperation op, DataType dt, unsigned int axis)
Check if the given reduction operation should be handled in a serial way.
Definition: Utils.cpp:436
Class describing the value of a pixel for any image format.
Definition: PixelValue.h:34
Shape of a tensor.
Definition: TensorShape.h:39
ReductionOperation
Available reduction operations.
Definition: Types.h:495
static CLScheduler & get()
Access the scheduler singleton.
Definition: CLScheduler.cpp:99
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352
virtual ITensorInfo & set_tensor_shape(const TensorShape &shape)=0
Set the shape of an already initialized tensor.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
virtual DataType data_type() const =0
Data type used for each element of the tensor.
1 channel, 1 F32 per channel
ITensorInfo & set_data_type(DataType data_type) override
Set the data type to the specified value.
Definition: TensorInfo.cpp:319
Store the tensor's metadata.
Definition: ITensorInfo.h:40
Status class.
Definition: Error.h:52
Copyright (c) 2017-2020 ARM Limited.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
Definition: Helpers.inl:202
1 channel, 1 F16 per channel
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
Definition: MemoryGroup.h:79
Quantization information.
unsigned int calculate_number_of_stages_only_x_axis(size_t input_x_dimension, unsigned int axis)
Calculate number of stages for parallel implementations.
Definition: Utils.cpp:66
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
Definition: Validate.h:443
void run() override
Run the kernels contained in the function.
quantized, asymmetric fixed-point 8-bit number unsigned
TensorShape compute_reduced_shape(const TensorShape &input, unsigned int axis, bool keep_dims=true)
Calculate the reduced shape of a tensor given an axis.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op, unsigned int width=0)
Static function to check if given info will lead to a valid configuration of CLReductionOperationKern...
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
virtual ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info)=0
Set the quantization settings (scale and offset) of the tensor.
CLReductionOperation(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default Constructor.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op, bool keep_dims=true)
Static function to check if given info will lead to a valid configuration of CLReductionOperation.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CLReshapeLayerKernel.
void enqueue(ICLKernel &kernel, bool flush=true)
Schedule the execution of the passed kernel if possible.
void configure(const ICLTensor *input, ICLTensor *output)
Set the input and output of the kernel.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:161
Memory group resources scope handling class.
Definition: IMemoryGroup.h:82
Interface for OpenCL tensor.
Definition: ICLTensor.h:42
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
const QuantizationInfo qinfo
Definition: Im2Col.cpp:150
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:244
Store the tensor's metadata.
Definition: TensorInfo.h:45
void configure(ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, bool keep_dims=true)
Set the input and output tensors.
quantized, asymmetric fixed-point 8-bit number signed
static constexpr size_t num_max_dimensions
Number of dimensions the tensor has.
Definition: Dimensions.h:45
DataType
Available data types.
Definition: Types.h:75
std::tuple< PixelValue, PixelValue > get_min_max(DataType dt)
Compute the mininum and maximum values a data type can take.
Definition: Utils.h:558