76 (biases !=
nullptr) ? biases->
info() :
nullptr,
81 const ICLTensor *biases_to_use = (append_biases) ? biases :
nullptr;
83 _weights_reshape_kernel->configure(compile_context, weights, biases_to_use, output, num_groups);
104 if((output !=
nullptr) && (output->
total_size() != 0))
119 : _memory_group(memory_manager), _weights_manager(weights_manager), _reshape_weights(), _reshape_weights_managed(), _im2col_kernel(
std::make_unique<
CLIm2ColKernel>()), _mm_gemm(memory_manager,
120 weights_manager), _mm_gemmlowp(memory_manager), _col2im_kernel(
std::make_unique<
CLCol2ImKernel>()), _activationlayer_function(), _original_weights(nullptr), _im2col_output(), _weights_reshaped(),
121 _gemm_output(), _skip_im2col(false), _skip_col2im(false), _is_quantized(false), _fuse_activation(true), _is_prepared(false)
140 gemmlowp_output_stage,
155 _mm_gemmlowp.
configure(compile_context, input, weights, biases, output, gemm_info);
164 _mm_gemm.
configure(compile_context, input, weights, biases, output, 1.0f, 1.0f, gemm_info);
179 gemmlowp_output_stage,
191 std::unique_ptr<ITensorInfo> input_qa = input->
clone();
192 std::unique_ptr<ITensorInfo> weights_qa = weights->
clone();
202 return CLGEMM::validate(input, weights, biases, output, 1.0f, 1.0f, gemm_info);
209 configure(
CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info, weights_info, dilation, act_info, num_groups);
220 biases !=
nullptr ? biases->
info() :
nullptr,
234 const unsigned int kernel_width = weights->
info()->
dimension(idx_width);
235 const unsigned int kernel_height = weights->
info()->
dimension(idx_height);
236 const unsigned int num_kernels = weights->
info()->
dimension(idx_kernels);
241 _is_prepared = weights_info.retain_internal_weights();
242 _original_weights = weights;
244 _skip_im2col = (data_layout ==
DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv_info.
stride().first == 1 && conv_info.
stride().second == 1);
248 _fuse_activation =
true;
258 unsigned int stride_x = 0;
259 unsigned int stride_y = 0;
260 std::tie(stride_x, stride_y) = conv_info.
stride();
263 unsigned int conv_w = 0;
264 unsigned int conv_h = 0;
272 unsigned int mat_weights_cols = num_kernels /
num_groups;
275 bool append_bias =
false;
277 ICLTensor *weights_to_use = &_weights_reshaped;
278 if(num_groups != 1 && biases !=
nullptr)
282 biases_to_use =
nullptr;
287 _reshape_weights_managed.
configure(compile_context, weights, biases, num_groups);
288 weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->
acquire(weights, &_reshape_weights_managed));
292 _reshape_weights.
configure(compile_context, weights, biases, &_weights_reshaped, num_groups);
299 _reshape_weights_managed.
configure(compile_context, weights,
nullptr, num_groups);
300 weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->
acquire(weights, &_reshape_weights_managed));
304 _reshape_weights.
configure(compile_context, weights,
nullptr, &_weights_reshaped, num_groups);
311 _memory_group.
manage(&_im2col_output);
314 _im2col_kernel->configure(compile_context, input, &_im2col_output,
Size2D(kernel_width, kernel_height), conv_info, append_bias, dilation, num_groups);
321 gemm_input_to_use = &_im2col_output;
331 shape_gemm.
set(0, mat_weights_cols);
332 shape_gemm.
set(1, conv_w * conv_h);
334 TensorInfo info_gemm(shape_gemm, 1, data_type);
337 _memory_group.
manage(&_gemm_output);
340 gemm_output_to_use = &_gemm_output;
350 const auto output_quant_info = (output->
info()->
total_size() == 0) ? iq_info : oq_info;
352 const unsigned int num_filters = (is_quantized_per_channel) ? num_kernels : 1;
371 auto min_activation = min_val.get<int32_t>();
372 auto max_activation = max_val.get<int32_t>();
379 if(act_info.enabled())
381 if(supported_acts.count(act_info.activation()) != 0)
387 _fuse_activation =
false;
399 const unsigned int gemm_3d_depth = (data_layout ==
DataLayout::NHWC) ? conv_h : 0;
401 configure_mm(compile_context, gemm_input_to_use, weights_to_use, biases_to_use, gemm_output_to_use, gemmlowp_output_stage, gemm_3d_depth, act_info);
411 _col2im_kernel->configure(compile_context, gemm_output_to_use, output,
Size2D(conv_w, conv_h), num_groups);
421 "Output shape does not match the expected one");
423 if(!_fuse_activation)
425 _activationlayer_function.
configure(compile_context, output,
nullptr, act_info);
439 if(!is_quantized_per_channel)
455 const unsigned int kernel_width = weights->
dimension(idx_width);
456 const unsigned int kernel_height = weights->
dimension(idx_height);
457 const unsigned int num_kernels = weights->
dimension(idx_kernels);
466 const bool skip_im2col = (data_layout ==
DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv_info.
stride().first == 1 && conv_info.
stride().second == 1);
468 bool fuse_activation =
true;
474 if(biases !=
nullptr)
494 unsigned int conv_w = 0;
495 unsigned int conv_h = 0;
504 unsigned int mat_weights_cols = num_kernels /
num_groups;
507 bool append_bias =
false;
509 if(num_groups != 1 && biases !=
nullptr)
513 biases_to_use =
nullptr;
525 weights_to_use = &weights_reshaped_info;
529 const Size2D kernel_dims(kernel_width, kernel_height);
537 gemm_input_to_use = &im2col_reshaped_info;
546 shape_gemm.
set(0, mat_weights_cols);
547 shape_gemm.
set(1, conv_w * conv_h);
549 info_gemm =
TensorInfo(shape_gemm, 1, data_type);
551 gemm_output_to_use = &info_gemm;
563 const auto output_quant_info = (output->
total_size() == 0) ? iq_info : oq_info;
564 const unsigned int num_filters = (is_quantized_per_channel) ? num_kernels : 1;
577 int min_activation = 0;
578 int max_activation = 0;
587 if(supported_acts.count(act_info.
activation()) != 0)
593 fuse_activation =
false;
604 const unsigned int gemm_3d_depth = (data_layout ==
DataLayout::NHWC) ? conv_h : 0;
606 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemm_input_to_use, weights_to_use, biases_to_use, gemm_output_to_use, gemmlowp_output_stage, gemm_3d_depth, skip_im2col, act_info));
654 if(!_fuse_activation)
656 _activationlayer_function.
run();
667 _weights_manager->
run(_original_weights, &_reshape_weights_managed);
673 _reshape_weights.
run();
679 if(!_weights_reshaped.
is_used())
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
Class describing the value of a pixel for any image format.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
Static function to check if given info will lead to a valid configuration of CLActivationLayer.
int32_t gemmlowp_multiplier
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
Quantize using a fixed point multiplication.
void prepare() override
Prepare the function for executing.
TensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
void compute_quantized_multipliers_and_shifts(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, unsigned int idx_ofms, int32_t *output_multipliers_ptr, int32_t *output_shifts_ptr)
Compute quantized per-channel multipliers and shifts.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(...)
static Status validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output, unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration of CLWeightsReshapeKernel.
void prepare() override
Prepare the function for executing.
void run() override
Run the kernels contained in the function.
bool enabled() const
Check if initialised.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
static CLScheduler & get()
Access the scheduler singleton.
Interface for the im2col reshape kernel.
float a() const
Get the alpha value.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
bool is_used() const
Flags if the tensor is used or not.
bool are_reshaped() const
Flag which specifies if the weights tensor has been reshaped.
1 channel, 1 F32 per channel
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
const DataLayout data_layout
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info=WeightsInfo(), const Size2D &dilation=Size2D(1U, 1U), const ActivationLayerInfo &act_info=ActivationLayerInfo(), unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration of CLGEMMConvolutionLayer.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor's metadata.
void run() override
Run the kernels contained in the function.
CLTensorAllocator * allocator()
Return a pointer to the tensor's allocator.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs, output.
int32_t gemmlowp_offset
GEMMLowp output stage offset used for quantizing to QASYMM8.
void run() override
Run the kernels contained in the function.
int32_t gemmlowp_max_bound
GEMMLowp max value used to saturate down the output result before converting back to QASYMM8...
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Activation Layer Information class.
GEMMLowpOutputStageType type
GEMMLowp output stage type.
void init(const TensorInfo &input, size_t alignment=0)
Initialize a tensor based on the passed TensorInfo.
Copyright (c) 2017-2021 Arm Limited.
1 channel, 1 F16 per channel
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims, unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration of CLCol2ImKernel.
std::pair< unsigned int, unsigned int > scaled_dimensions(int width, int height, int kernel_width, int kernel_height, const PadStrideInfo &pad_stride_info, const Size2D &dilation=Size2D(1U, 1U))
Returns expected width and height of output scaled tensor depending on dimensions rounding mode...
ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info) override
Set the quantization settings (scale and offset) of the tensor.
bool is_quantized_per_channel
GEMMLowp quantized per-channel flag.
Convolution Layer Weights Information class.
std::vector< int32_t > gemmlowp_shifts
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
void mark_as_unused() const
Marks a tensor as unused.
1 channel, 1 S32 per channel
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
bool are_weights_managed(const ITensor *weights)
Check if the weights are managed.
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
Quantization information.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of CLGEMMLowpMatrixMultiply...
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation=Size2D(1U, 1U), unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration of CLIm2ColKernel.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
CLGEMMConvolutionLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr, IWeightsManager *weights_manager=nullptr)
Constructor.
bool is_data_type_quantized_per_channel(DataType dt)
Check if a given data type is of per channel type.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
std::pair< int32_t, int32_t > get_quantized_activation_min_max(ActivationLayerInfo act_info, DataType data_type, UniformQuantizationInfo oq_info)
Returns a pair of minimum and maximum values for a quantized activation.
quantized, asymmetric fixed-point 8-bit number unsigned
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
const unsigned int num_groups
std::pair< unsigned int, unsigned int > stride() const
Get the stride.
std::vector< int32_t > gemmlowp_multipliers
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
CLConvolutionLayerReshapeWeights()
Constructor.
UniformQuantizationInfo uniform() const
Return per layer quantization info.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
GEMMLowp output stage info.
Interface for the col2im reshaping kernel.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
Padding and stride information class.
virtual ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info)=0
Set the quantization settings (scale and offset) of the tensor.
void run() override
Run the kernels contained in the function.
cl::CommandQueue & queue()
Accessor for the associated CL command queue.
Weights manager interface to handle weights transformations.
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
void enqueue(ICLKernel &kernel, bool flush=true)
Schedule the execution of the passed kernel if possible.
Num samples, channels, height, width.
void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info=WeightsInfo(), const Size2D &dilation=Size2D(1U, 1U), const ActivationLayerInfo &act_info=ActivationLayerInfo(), unsigned int num_groups=1)
Set the input and output tensors.
src_info set_data_layout(data_layout)
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
static Status validate(const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration of CLConvolutionLayerReshap...
quantized, symmetric per channel fixed-point 8-bit number
Lower and Upper Bounded Rectifier ( )
void allocate() override
Allocate size specified by TensorInfo of OpenCL memory.
int32_t gemmlowp_shift
GEMMLowp output stage shift used for quantizing to uint8.
Memory group resources scope handling class.
Interface for OpenCL tensor.
Upper Bounded Rectifier ( )
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bool has_bias=false, unsigned int num_groups=1)
Calculate the reshaped shape of the weights.
void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs and output.
OpenCL kernel to perform reshaping on the weights used by convolution and locally connected layer...
Class for specifying the size of an image or rectangle.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Num samples, height, width, channels.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
void free() override
Free allocated OpenCL memory.
void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info)
Set the input and output tensor.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of CLGEMM.
~CLConvolutionLayerReshapeWeights()
Default destructor.
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
void prepare() override
Prepare the function for executing.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Store the tensor's metadata.
ITensor * run(const ITensor *weights, ITransformWeights *weights_transform)
Run the reshape function.
ActivationFunction activation() const
Get the type of activation function.
float b() const
Get the beta value.
quantized, asymmetric fixed-point 8-bit number signed
void configure(const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups=1)
Set the input and output tensors.
size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
Get the index of the given dimension.
int32_t gemmlowp_min_bound
GEMMLowp min value used to saturate down the output result before converting back to QASYMM8...
void tune_kernel_static(ICLKernel &kernel)
Tunes OpenCL kernel.
const TensorShape & tensor_shape() const override
Size for each dimension of the tensor.
DataType
Available data types.
DataLayout
[DataLayout enum definition]
TensorShape compute_im2col_conv_shape(const ITensorInfo *input, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation, bool batch_size_on_z, unsigned int num_groups=1)
Calculate the im2col output shape of a tensor.
std::tuple< PixelValue, PixelValue > get_min_max(DataType dt)
Compute the mininum and maximum values a data type can take.
void run() override
Run the kernels contained in the function.
TensorShape & set(size_t dimension, size_t value, bool apply_dim_correction=true, bool increase_dim_unit=true)
Accessor to set the value of one of the dimensions.
ITensor * acquire(const ITensor *weights, ITransformWeights *weights_transform)
Acquire the requested reshape tensor of the selected weights.
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.
~CLGEMMConvolutionLayer()
Default destructor.