41 : _memory_group(
std::move(memory_manager)), _results_vector(), _reduction_kernels_vector(), _border_handlers_vector(), _reshape(), _num_of_stages(), _reduction_axis(), _is_serial(),
42 _is_reshape_required(false)
56 const bool is_reshape_required = !keep_dims;
58 if(is_reshape_required && output->
total_size() != 0)
64 auto *output_internal = output;
68 const auto input_data_type = input->
data_type();
71 const auto output_data_type = output->
data_type();
78 if(is_reshape_required)
81 shape_before_reshape.
set(axis, 1);
82 initialize_tensorinfo(output_before_reshape, shape_before_reshape, output_data_type, input_num_channles, input_qinfo);
83 output_internal = &output_before_reshape;
93 std::vector<TensorInfo> sums_vector(num_of_stages - 1);
98 shape.
set(0, ceil(shape.x() / 128.f));
100 for(
unsigned int i = 0; i < num_of_stages - 1; i++)
102 initialize_tensorinfo(sums_vector[i], shape, input_data_type, input_num_channles, input_qinfo);
144 for(
unsigned int i = 1; i < num_of_stages - 1; ++i)
150 const unsigned int last_stage = num_of_stages - 1;
154 if(is_reshape_required)
164 if(!_is_reshape_required && _is_serial)
169 auto intermediate_result_vector_size = _is_serial ? 1 : _num_of_stages;
171 if(!_is_reshape_required)
173 --intermediate_result_vector_size;
176 _results_vector.resize(intermediate_result_vector_size);
179 shape.set(_reduction_axis, _is_serial ? 1 : ceil(
shape.x() / 128.f));
181 for(
auto &v : _results_vector)
183 if(&v == &_results_vector.back() && _is_reshape_required)
185 shape.set(_reduction_axis, 1);
187 v.allocator()->init(input->
info()->
clone()->set_tensor_shape(
shape));
190 return _is_reshape_required ? &_results_vector.back() : output;
202 _reduction_axis = axis;
204 _is_reshape_required = !keep_dims;
206 auto *output_internal = configure_intermediate_result_vector(input, output);
208 if(_is_reshape_required)
212 auto_init_if_empty(*output->
info(), input->
info()->
clone()->set_tensor_shape(output_shape).set_data_type(output_data_type).reset_padding().set_is_resizable(
true));
216 _reduction_kernels_vector.reserve(_num_of_stages);
221 if(_is_reshape_required)
223 _memory_group.
manage(&_results_vector.back());
226 _reduction_kernels_vector.emplace_back(std::make_unique<CLReductionOperationKernel>());
227 _reduction_kernels_vector[0]->configure(compile_context, input, output_internal, axis, op, 0);
231 _border_handlers_vector.reserve(_num_of_stages);
232 _memory_group.
manage(&_results_vector[0]);
275 _reduction_kernels_vector.emplace_back(std::make_unique<CLReductionOperationKernel>());
276 _reduction_kernels_vector[0]->configure(compile_context, input, &_results_vector[0], axis, first_kernel_op);
278 _border_handlers_vector.emplace_back(std::make_unique<CLFillBorderKernel>());
279 _border_handlers_vector[0]->configure(compile_context, input, _reduction_kernels_vector[0]->border_size(),
BorderMode::CONSTANT, pixelValue);
282 for(
unsigned int i = 1; i < _num_of_stages - 1; ++i)
284 _memory_group.
manage(&_results_vector[i]);
286 _reduction_kernels_vector.emplace_back(std::make_unique<CLReductionOperationKernel>());
287 _reduction_kernels_vector[i]->configure(compile_context, &_results_vector[i - 1], &_results_vector[i], axis, intermediate_kernel_op);
289 _border_handlers_vector.emplace_back(std::make_unique<CLFillBorderKernel>());
290 _border_handlers_vector[i]->configure(compile_context, &_results_vector[i - 1], _reduction_kernels_vector[i]->border_size(),
BorderMode::CONSTANT, pixelValue);
292 _results_vector[i - 1].allocator()->allocate();
296 const unsigned int last_stage = _num_of_stages - 1;
299 if(_is_reshape_required)
301 _memory_group.
manage(&_results_vector.back());
304 _reduction_kernels_vector.emplace_back(std::make_unique<CLReductionOperationKernel>());
305 _reduction_kernels_vector[last_stage]->configure(compile_context, &_results_vector[last_stage - 1], output_internal, axis, last_kernel_op, input_width);
307 _border_handlers_vector.emplace_back(std::make_unique<CLFillBorderKernel>());
308 _border_handlers_vector[last_stage]->configure(compile_context, &_results_vector[last_stage - 1], _reduction_kernels_vector[last_stage]->border_size(),
BorderMode::CONSTANT, pixelValue);
310 _results_vector[last_stage - 1].allocator()->allocate();
313 if(_is_reshape_required)
315 _reshape.
configure(compile_context, &_results_vector.back(), output);
316 _results_vector.back().allocator()->allocate();
330 for(
unsigned int i = 0; i < _num_of_stages; ++i)
337 if(_is_reshape_required)
unsigned int calculate_number_of_stages_only_x_axis(size_t input_x_dimension, unsigned int axis)
Calculate number of stages for parallel implementations.
virtual ITensorInfo & set_num_channels(int num_channels)=0
Set the number of channels to the specified value.
bool needs_serialized_reduction(ReductionOperation op, DataType dt, unsigned int axis)
Check if the given reduction operation should be handled in a serial way.
Class describing the value of a pixel for any image format.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CLReshapeLayer.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
ReductionOperation
Available reduction operations.
static CLScheduler & get()
Access the scheduler singleton.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
virtual ITensorInfo & set_tensor_shape(const TensorShape &shape)=0
Set the shape of an already initialized tensor.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
ITensorInfo & set_data_type(DataType data_type) override
Set the data type to the specified value.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor's metadata.
Copyright (c) 2017-2021 Arm Limited.
void run() override
Run the kernels contained in the function.
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
Quantization information.
TensorShape input_shape
Validate test suite is to test ARM_COMPUTE_RETURN_ON_* macros we use to check the validity of given a...
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
void run() override
Run the kernels contained in the function.
TensorShape compute_reduced_shape(const TensorShape &input, unsigned int axis, bool keep_dims=true)
Calculate the reduced shape of a tensor given an axis.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op, unsigned int width=0)
Static function to check if given info will lead to a valid configuration of CLReductionOperationKern...
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
virtual ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info)=0
Set the quantization settings (scale and offset) of the tensor.
CLReductionOperation(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default Constructor.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op, bool keep_dims=true)
Static function to check if given info will lead to a valid configuration of CLReductionOperation.
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
void enqueue(ICLKernel &kernel, bool flush=true)
Schedule the execution of the passed kernel if possible.
Memory group resources scope handling class.
Interface for OpenCL tensor.
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
const QuantizationInfo qinfo
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Store the tensor's metadata.
void configure(ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, bool keep_dims=true)
Set the input and output tensors.
void configure(const ICLTensor *input, ICLTensor *output)
Initialise the kernel's inputs and outputs.
~CLReductionOperation()
Default Destructor.
static constexpr size_t num_max_dimensions
Number of dimensions the tensor has.
DataType
Available data types.
std::tuple< PixelValue, PixelValue > get_min_max(DataType dt)
Compute the mininum and maximum values a data type can take.
virtual size_t num_channels() const =0
The number of channels for each tensor element.
TensorShape & set(size_t dimension, size_t value, bool apply_dim_correction=true, bool increase_dim_unit=true)
Accessor to set the value of one of the dimensions.