Compute Library
 19.08
CLReductionOperation Class Reference

Perform reduction operation. More...

#include <CLReductionOperation.h>

Collaboration diagram for CLReductionOperation:
[legend]

Public Member Functions

 CLReductionOperation (std::shared_ptr< IMemoryManager > memory_manager=nullptr)
 Default Constructor. More...
 
void configure (ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op)
 Set the input and output tensors. More...
 
void run () override
 Run the kernels contained in the function. More...
 
- Public Member Functions inherited from IFunction
virtual ~IFunction ()=default
 Destructor. More...
 
virtual void prepare ()
 Prepare the function for executing. More...
 

Static Public Member Functions

static Status validate (const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op)
 Static function to check if given info will lead to a valid configuration of CLReductionOperation. More...
 

Detailed Description

Perform reduction operation.

Definition at line 45 of file CLReductionOperation.h.

Constructor & Destructor Documentation

◆ CLReductionOperation()

CLReductionOperation ( std::shared_ptr< IMemoryManager memory_manager = nullptr)

Default Constructor.

Parameters
[in]memory_manager(Optional) Memory manager.

Definition at line 58 of file CLReductionOperation.cpp.

59  : _memory_group(std::move(memory_manager)), _results_vector(), _reduction_kernels_vector(), _border_handlers_vector(), _num_of_stages(), _reduction_axis(), _is_serial()
60 {
61 }

Member Function Documentation

◆ configure()

void configure ( ICLTensor input,
ICLTensor output,
unsigned int  axis,
ReductionOperation  op 
)

Set the input and output tensors.

Parameters
[in]inputSource tensor. Data types supported: QASYMM8/F16/F32.
[out]outputDestination tensor. Data types and data layouts supported: Same as input.
[in]axisAxis along which to reduce. Supported reduction axis : 0, 1, 2, 3
[in]opReduction operation to perform.

Definition at line 139 of file CLReductionOperation.cpp.

140 {
141  _num_of_stages = calculate_number_of_stages(input->info(), axis);
142  _reduction_axis = axis;
143  _is_serial = is_data_type_quantized(input->info()->data_type()) || axis != 0;
144 
145  // Configure reduction operation kernels
146  _reduction_kernels_vector.resize(_num_of_stages);
147 
148  // Create temporary tensors
149  if(_is_serial)
150  {
151  _reduction_kernels_vector[0].configure(input, output, axis, op, 0);
152  }
153  else
154  {
155  _border_handlers_vector.resize(_num_of_stages);
156  _results_vector.resize(_num_of_stages - 1);
157  TensorShape shape{ input->info()->tensor_shape() };
158  for(unsigned int i = 0; i < _num_of_stages - 1; i++)
159  {
160  shape.set(0, ceil(shape.x() / 128.f));
161  _results_vector[i].allocator()->init(input->info()->clone()->set_tensor_shape(shape));
162  }
163 
164  // Apply ReductionOperation only on first kernel
165  _memory_group.manage(&_results_vector[0]);
166 
167  ReductionOperation first_kernel_op;
168  ReductionOperation intermediate_kernel_op;
169  ReductionOperation last_kernel_op;
170  PixelValue pixelValue;
171  switch(op)
172  {
175  first_kernel_op = ReductionOperation::SUM;
176  intermediate_kernel_op = ReductionOperation::SUM;
177  last_kernel_op = op;
178  pixelValue = PixelValue();
179  break;
181  first_kernel_op = ReductionOperation::SUM_SQUARE;
182  intermediate_kernel_op = ReductionOperation::SUM;
183  last_kernel_op = ReductionOperation::SUM;
184  pixelValue = PixelValue();
185  break;
187  first_kernel_op = ReductionOperation::PROD;
188  intermediate_kernel_op = ReductionOperation::PROD;
189  last_kernel_op = ReductionOperation::PROD;
190  pixelValue = PixelValue(1, input->info()->data_type());
191  break;
193  first_kernel_op = ReductionOperation::MIN;
194  intermediate_kernel_op = ReductionOperation::MIN;
195  last_kernel_op = ReductionOperation::MIN;
196  switch(input->info()->data_type())
197  {
198  case DataType::F32:
199  {
200  pixelValue = PixelValue(std::numeric_limits<float>::max());
201  break;
202  }
203  case DataType::F16:
204  {
205  pixelValue = PixelValue(static_cast<half>(65504.0f));
206  break;
207  }
208  case DataType::QASYMM8:
209  {
210  pixelValue = PixelValue(255, input->info()->data_type(), input->info()->quantization_info());
211  break;
212  }
213  default:
214  {
215  ARM_COMPUTE_ERROR("Unsupported DataType");
216  }
217  }
218  break;
220  first_kernel_op = ReductionOperation::MAX;
221  intermediate_kernel_op = ReductionOperation::MAX;
222  last_kernel_op = ReductionOperation::MAX;
223  switch(input->info()->data_type())
224  {
225  case DataType::F32:
226  {
227  pixelValue = PixelValue(-std::numeric_limits<float>::max());
228  break;
229  }
230  case DataType::F16:
231  {
232  pixelValue = PixelValue(static_cast<half>(-65504.0f));
233  break;
234  }
235  case DataType::QASYMM8:
236  {
237  pixelValue = PixelValue(0, input->info()->data_type(), input->info()->quantization_info());
238  break;
239  }
240  default:
241  {
242  ARM_COMPUTE_ERROR("Unsupported DataType");
243  }
244  }
245  break;
246  default:
247  ARM_COMPUTE_ERROR("Not supported");
248  }
249 
250  _reduction_kernels_vector[0].configure(input, &_results_vector[0], axis, first_kernel_op);
251  _border_handlers_vector[0].configure(input, _reduction_kernels_vector[0].border_size(), BorderMode::CONSTANT, pixelValue);
252 
253  // Apply ReductionOperation on intermediate stages
254  for(unsigned int i = 1; i < _num_of_stages - 1; ++i)
255  {
256  _memory_group.manage(&_results_vector[i]);
257  _reduction_kernels_vector[i].configure(&_results_vector[i - 1], &_results_vector[i], axis, intermediate_kernel_op);
258  _border_handlers_vector[i].configure(&_results_vector[i - 1], _reduction_kernels_vector[i].border_size(), BorderMode::CONSTANT, pixelValue);
259  _results_vector[i - 1].allocator()->allocate();
260  }
261 
262  // Apply ReductionOperation on the last stage
263  const unsigned int last_stage = _num_of_stages - 1;
264  const unsigned int input_width = input->info()->dimension(0);
265  _reduction_kernels_vector[last_stage].configure(&_results_vector[last_stage - 1], output, axis, last_kernel_op, input_width);
266  _border_handlers_vector[last_stage].configure(&_results_vector[last_stage - 1], _reduction_kernels_vector[last_stage].border_size(), BorderMode::CONSTANT, pixelValue);
267  _results_vector[last_stage - 1].allocator()->allocate();
268  }
269 }
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
Definition: Utils.h:1010
#define ARM_COMPUTE_ERROR(...)
Print the given message then throw an std::runtime_error.
Definition: Error.h:261
Class describing the value of a pixel for any image format.
Definition: PixelValue.h:34
Shape of a tensor.
Definition: TensorShape.h:39
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
ReductionOperation
Available reduction operations.
Definition: Types.h:485
virtual DataType data_type() const =0
Data type used for each element of the tensor.
1 channel, 1 F32 per channel
1 channel, 1 F16 per channel
void manage(TensorType *obj)
Sets a object to be managed by the given memory group.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
quantized, asymmetric fixed-point 8-bit number
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.

References ARM_COMPUTE_ERROR, arm_compute::test::validation::axis, ICloneable< T >::clone(), arm_compute::CONSTANT, ITensorInfo::data_type(), ITensorInfo::dimension(), arm_compute::F16, arm_compute::F32, ITensor::info(), arm_compute::is_data_type_quantized(), MemoryGroupBase< TensorType >::manage(), arm_compute::MAX, arm_compute::MEAN_SUM, arm_compute::MIN, arm_compute::PROD, arm_compute::QASYMM8, ITensorInfo::quantization_info(), arm_compute::test::validation::shape, arm_compute::SUM, arm_compute::SUM_SQUARE, and ITensorInfo::tensor_shape().

Referenced by CLMeanStdDev::configure(), CLL2NormalizeLayer::configure(), and CLFFTConvolutionLayer::configure().

◆ run()

void run ( )
overridevirtual

Run the kernels contained in the function.

For NEON kernels:

  • Multi-threading is used for the kernels which are parallelisable.
  • By default std::thread::hardware_concurrency() threads are used.
Note
CPPScheduler::set_num_threads() can be used to manually set the number of threads

For OpenCL kernels:

  • All the kernels are enqueued on the queue associated with CLScheduler.
  • The queue is then flushed.
Note
The function will not block until the kernels are executed. It is the user's responsibility to wait.
Will call prepare() on first run if hasn't been done

Implements IFunction.

Definition at line 271 of file CLReductionOperation.cpp.

272 {
273  MemoryGroupResourceScope scope_mg(_memory_group);
274 
275  if(_is_serial)
276  {
277  CLScheduler::get().enqueue(_reduction_kernels_vector[0], false);
278  }
279  else
280  {
281  for(unsigned int i = 0; i < _num_of_stages; ++i)
282  {
283  CLScheduler::get().enqueue(_border_handlers_vector[i], false);
284  CLScheduler::get().enqueue(_reduction_kernels_vector[i], false);
285  }
286  }
287 }
static CLScheduler & get()
Access the scheduler singleton.
Definition: CLScheduler.cpp:41
void enqueue(ICLKernel &kernel, bool flush=true)
Schedule the execution of the passed kernel if possible.
Definition: CLScheduler.cpp:95
Memory group resources scope handling class.
Definition: IMemoryGroup.h:46

References CLScheduler::enqueue(), and CLScheduler::get().

Referenced by CLL2NormalizeLayer::run(), and CLFFTConvolutionLayer::run().

◆ validate()

Status validate ( const ITensorInfo input,
const ITensorInfo output,
unsigned int  axis,
ReductionOperation  op 
)
static

Static function to check if given info will lead to a valid configuration of CLReductionOperation.

Parameters
[in]inputSource tensor info. Data types supported: QASYMM8/F16/F32.
[in]outputDestination tensor info. Data types and data layouts supported: Same as input.
[in]axisAxis along which to reduce. Supported reduction axis : 0, 1, 2, 3
[in]opReduction operation to perform.
Returns
a status

Definition at line 63 of file CLReductionOperation.cpp.

64 {
65  const unsigned int num_of_stages = calculate_number_of_stages(input, axis);
66  bool is_serial = is_data_type_quantized(input->data_type()) || axis != 0;
67  if(is_serial)
68  {
70  }
71  else
72  {
73  // Create temporary tensor infos
74  std::vector<TensorInfo> sums_vector(num_of_stages - 1);
75 
76  // Create intermediate tensor info
77  TensorShape shape{ input->tensor_shape() };
78 
79  for(unsigned int i = 0; i < num_of_stages - 1; i++)
80  {
81  shape.set(0, ceil(shape.x() / 128.f));
82  sums_vector[i].set_data_type(input->data_type());
83  sums_vector[i].set_tensor_shape(shape);
84  sums_vector[i].set_num_channels(input->num_channels());
85  }
86 
87  ReductionOperation first_kernel_op;
88  ReductionOperation intermediate_kernel_op;
89  ReductionOperation last_kernel_op;
90  switch(op)
91  {
94  first_kernel_op = ReductionOperation::SUM;
95  intermediate_kernel_op = ReductionOperation::SUM;
96  last_kernel_op = op;
97  break;
99  first_kernel_op = ReductionOperation::SUM_SQUARE;
100  intermediate_kernel_op = ReductionOperation::SUM;
101  last_kernel_op = ReductionOperation::SUM;
102  break;
104  first_kernel_op = ReductionOperation::PROD;
105  intermediate_kernel_op = ReductionOperation::PROD;
106  last_kernel_op = ReductionOperation::PROD;
107  break;
109  first_kernel_op = ReductionOperation::MIN;
110  intermediate_kernel_op = ReductionOperation::MIN;
111  last_kernel_op = ReductionOperation::MIN;
112  break;
114  first_kernel_op = ReductionOperation::MAX;
115  intermediate_kernel_op = ReductionOperation::MAX;
116  last_kernel_op = ReductionOperation::MAX;
117  break;
118  default:
119  ARM_COMPUTE_ERROR("Not supported");
120  }
121 
122  // Validate ReductionOperation only on first kernel
123  ARM_COMPUTE_RETURN_ON_ERROR(CLReductionOperationKernel::validate(input, &sums_vector[0], axis, first_kernel_op));
124 
125  // Validate ReductionOperation on intermediate stages
126  for(unsigned int i = 1; i < num_of_stages - 1; ++i)
127  {
128  ARM_COMPUTE_RETURN_ON_ERROR(CLReductionOperationKernel::validate(&sums_vector[i - 1], &sums_vector[i], axis, intermediate_kernel_op));
129  }
130 
131  // Validate ReductionOperation on the last stage
132  const unsigned int last_stage = num_of_stages - 1;
133  ARM_COMPUTE_RETURN_ON_ERROR(CLReductionOperationKernel::validate(&sums_vector[last_stage - 1], output, axis, last_kernel_op, input->dimension(0)));
134  }
135 
136  return Status{};
137 }
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
Definition: Utils.h:1010
#define ARM_COMPUTE_ERROR(...)
Print the given message then throw an std::runtime_error.
Definition: Error.h:261
Shape of a tensor.
Definition: TensorShape.h:39
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
ReductionOperation
Available reduction operations.
Definition: Types.h:485
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:193
virtual DataType data_type() const =0
Data type used for each element of the tensor.
Status class.
Definition: Error.h:52
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op, unsigned int width=0)
Static function to check if given info will lead to a valid configuration of CLReductionOperationKern...
virtual size_t num_channels() const =0
The number of channels for each tensor element.

References ARM_COMPUTE_ERROR, ARM_COMPUTE_RETURN_ON_ERROR, arm_compute::test::validation::axis, ITensorInfo::data_type(), ITensorInfo::dimension(), arm_compute::is_data_type_quantized(), arm_compute::MAX, arm_compute::MEAN_SUM, arm_compute::MIN, ITensorInfo::num_channels(), arm_compute::PROD, arm_compute::test::validation::shape, arm_compute::SUM, arm_compute::SUM_SQUARE, ITensorInfo::tensor_shape(), and CLReductionOperationKernel::validate().

Referenced by CLMeanStdDev::validate(), and CLL2NormalizeLayer::validate().


The documentation for this class was generated from the following files: