55 Status construct_gemmlowp_output_stage(
const ITensorInfo &
input,
const ITensorInfo &weights,
const ITensorInfo &output,
56 GEMMLowpOutputStageInfo &gemmlowp_output_stage, ActivationLayerInfo activation_info)
59 gemmlowp_output_stage.gemmlowp_offset = 0;
60 gemmlowp_output_stage.gemmlowp_multiplier = 0;
61 gemmlowp_output_stage.gemmlowp_shift = 0;
68 const QuantizationInfo oq_info = output.quantization_info();
69 const UniformQuantizationInfo iq_unif = input.quantization_info().uniform();
70 const UniformQuantizationInfo wq_unif = weights.quantization_info().uniform();
71 const UniformQuantizationInfo oq_unif = oq_info.uniform();
73 const auto output_quant_info = (output.total_size() == 0) ? iq_unif : oq_unif;
75 const float multiplier = (iq_unif.scale * wq_unif.scale) / output_quant_info.scale;
76 int output_multiplier = 0;
84 if(activation_info.enabled())
90 gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset;
91 gemmlowp_output_stage.gemmlowp_multiplier = output_multiplier;
92 gemmlowp_output_stage.gemmlowp_shift = output_shift;
93 gemmlowp_output_stage.gemmlowp_multipliers.push_back(output_multiplier);
94 gemmlowp_output_stage.gemmlowp_shifts.push_back(output_shift);
95 type_min.get(gemmlowp_output_stage.gemmlowp_min_bound);
96 type_max.get(gemmlowp_output_stage.gemmlowp_max_bound);
102 Status validate_mm(
const ITensorInfo &input,
const ITensorInfo &weights,
const ITensorInfo *bias,
const ITensorInfo &output,
const FullyConnectedLayerInfo &fc_info)
104 GEMMLowpOutputStageInfo gemmlowp_output_stage;
105 ARM_COMPUTE_RETURN_ON_ERROR(construct_gemmlowp_output_stage(input, weights, output, gemmlowp_output_stage, fc_info.activation_info));
107 const GEMMInfo &gemm_info = GEMMInfo(
false,
112 fc_info.retain_internal_weights,
113 gemmlowp_output_stage,
114 fc_info.fp_mixed_precision,
116 ActivationLayerInfo());
120 const UniformQuantizationInfo iq_info = input.quantization_info().uniform();
121 const UniformQuantizationInfo wq_info = weights.quantization_info().uniform();
125 const QuantizationInfo input_quantization_info(iq_info.scale, -iq_info.offset);
126 const QuantizationInfo weights_quantization_info(wq_info.scale, -wq_info.offset);
130 &weights.clone()->set_quantization_info(weights_quantization_info),
151 auto k = std::make_unique<CLTransposeKernel>();
152 k->configure(compile_context, input, output);
153 _kernel = std::move(k);
162 : _memory_group(memory_manager), _weights_manager(weights_manager), _convert_weights(), _convert_weights_managed(), _reshape_weights_managed_function(), _flatten_layer(), _reshape_weights_function(),
163 _mm_gemm(memory_manager, weights_manager), _mm_gemmlowp(memory_manager), _flatten_output(), _converted_weights_output(), _reshape_weights_output(), _are_weights_converted(true),
164 _are_weights_reshaped(true), _is_fc_after_conv(true), _is_quantized(false), _is_prepared(false), _original_weights(nullptr)
171 construct_gemmlowp_output_stage(*input->
info(), *weights->
info(), *output->
info(), gemmlowp_output_stage, fc_info.
activation_info);
179 gemmlowp_output_stage,
195 _mm_gemmlowp.
configure(compile_context, input, weights, bias, output, gemm_info);
204 _mm_gemm.
configure(compile_context, input, weights, bias, output, 1.f, 1.f, gemm_info);
220 _memory_group.
manage(&_flatten_output);
221 _flatten_layer.
configure(compile_context, input, &_flatten_output);
224 configure_mm(compile_context, &_flatten_output, weights, bias, output, fc_info);
236 configure_mm(compile_context, input, weights, bias, output, fc_info);
253 biases !=
nullptr ? biases->
info() :
nullptr,
257 _are_weights_converted =
true;
259 _is_fc_after_conv =
true;
262 _original_weights = weights;
266 _weights_manager->
manage(weights);
269 const ICLTensor *weights_to_use = weights;
278 const bool is_batched_fc_layer = output->
info()->
dimension(1) > 1;
279 if(is_batched_fc_layer)
291 if(!_are_weights_reshaped)
295 _reshape_weights_managed_function.
configure(compile_context, weights);
296 weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->
acquire(weights, &_reshape_weights_managed_function));
301 _reshape_weights_function.
configure(compile_context, weights, &_reshape_weights_output);
302 weights_to_use = &_reshape_weights_output;
311 _convert_weights_managed.
configure(compile_context, weights_to_use,
314 weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->
acquire(weights, &_convert_weights_managed));
319 _convert_weights.
configure(compile_context, weights_to_use,
320 &_converted_weights_output,
324 weights_to_use = &_converted_weights_output;
326 _are_weights_converted =
false;
329 if(_is_fc_after_conv)
332 configure_conv_fc(compile_context, input, weights_to_use, biases, output, fc_info);
337 configure_fc_fc(compile_context, input, weights_to_use, biases, output, fc_info);
352 bool is_fc_after_conv =
true;
368 const bool is_batched_fc_layer = output->
dimension(1) > 1;
369 if(is_batched_fc_layer)
380 if(!weights_reshaped)
384 weights_to_use = &reshaped_weights;
394 weights_to_use = &converted_weights;
404 input_to_use = &flatten_input;
425 if(_is_fc_after_conv)
427 _flatten_layer.
run();
445 if(!_weights_manager)
455 w->allocator()->free();
460 const ICLTensor *cur_weights = _original_weights;
463 if(!_are_weights_reshaped)
467 cur_weights = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->
run(cur_weights, &_reshape_weights_managed_function));
473 _reshape_weights_function.
run();
476 cur_weights = &_reshape_weights_output;
478 _are_weights_reshaped =
true;
482 if(!_are_weights_converted)
486 _weights_manager->
run(cur_weights, &_convert_weights_managed);
491 _convert_weights.
run();
495 _are_weights_converted =
true;
499 release_unused(&_reshape_weights_output);
508 release_unused(&_reshape_weights_output);
509 release_unused(&_converted_weights_output);
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CLFlattenLayer.
Quantize using a fixed point multiplication.
void prepare() override
Prepare the function for executing.
CLFullyConnectedLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr, IWeightsManager *weights_manager=nullptr)
Constructor.
void run() override
Run the kernels contained in the function.
bool enabled() const
Check if initialised.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
bool retain_internal_weights
Retain internal reshaped weights.
static CLScheduler & get()
Access the scheduler singleton.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
bool is_used() const
Flags if the tensor is used or not.
1 channel, 1 F32 per channel
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Fully connected layer info.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor's metadata.
CLTensorAllocator * allocator()
Return a pointer to the tensor's allocator.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs, output.
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.
void manage(const ITensor *weights, ITransformWeights *parent=nullptr)
Start managing a weights tensor.
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
void prepare() override
Prepare the function for executing.
void init(const TensorInfo &input, size_t alignment=0)
Initialize a tensor based on the passed TensorInfo.
Copyright (c) 2017-2021 Arm Limited.
1 channel, 1 F16 per channel
TensorShape compute_transposed_shape(const ITensorInfo &input)
Calculate the transposed shape of a tensor.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
void mark_as_unused() const
Marks a tensor as unused.
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
bool are_weights_managed(const ITensor *weights)
Check if the weights are managed.
TensorShape compute_flatten_shape(const ITensorInfo *input)
Calculate the flattened output shape of a tensor.
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, FullyConnectedLayerInfo fc_info=FullyConnectedLayerInfo())
Set the input and output tensors.
Quantization information.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of CLGEMMLowpMatrixMultiply...
void run() override final
Run the kernels contained in the function.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
std::pair< int32_t, int32_t > get_quantized_activation_min_max(ActivationLayerInfo act_info, DataType data_type, UniformQuantizationInfo oq_info)
Returns a pair of minimum and maximum values for a quantized activation.
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, FullyConnectedLayerInfo fc_info=FullyConnectedLayerInfo())
Static function to check if given info will lead to a valid configuration of CLFullyConnectedLayer.
quantized, asymmetric fixed-point 8-bit number unsigned
bool are_weights_reshaped
Reshape the weights tensor if false.
void run() override
Run the kernels contained in the function.
UniformQuantizationInfo uniform() const
Return per layer quantization info.
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
GEMMLowp output stage info.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
void configure(const ICLTensor *input, ICLTensor *output)
Initialise the kernel's input and output.
virtual ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info)=0
Set the quantization settings (scale and offset) of the tensor.
void run() override
Run the kernels contained in the function.
ActivationLayerInfo activation_info
Fused activation to apply after the matrix multiplication.
cl::CommandQueue & queue()
Accessor for the associated CL command queue.
Weights manager interface to handle weights transformations.
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
Num samples, channels, height, width.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape, DataLayout data_layout)
Static function to check if given info will lead to a valid configuration of CLConvertFullyConnectedW...
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
__constant DATA_TYPE16 type_min
std::array< T, num_max_dimensions >::const_iterator cend() const
Returns a read-only (constant) iterator that points one past the last element in the dimension array...
Lower and Upper Bounded Rectifier ( )
void allocate() override
Allocate size specified by TensorInfo of OpenCL memory.
std::array< T, num_max_dimensions >::const_iterator cbegin() const
Returns a read-only (constant) iterator that points to the first element in the dimension array...
Memory group resources scope handling class.
Interface for OpenCL tensor.
Upper Bounded Rectifier ( )
void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs and output.
DataLayout weights_trained_layout
Layout that the weights have been trained with.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
void configure(const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout)
Initialize the function.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
bool fp_mixed_precision
Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy.
void configure(const ICLTensor *input, ICLTensor *output)
Set the input and output tensors.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of CLGEMM.
__constant DATA_TYPE16 type_max
bool transpose_weights
Transpose weights if true.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Store the tensor's metadata.
ITensor * run(const ITensor *weights, ITransformWeights *weights_transform)
Run the reshape function.
ActivationFunction activation() const
Get the type of activation function.
quantized, asymmetric fixed-point 8-bit number signed
static constexpr size_t num_max_dimensions
Number of dimensions the tensor has.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CLFullyConnectedLayerRes...
std::tuple< PixelValue, PixelValue > get_min_max(DataType dt)
Compute the mininum and maximum values a data type can take.
ITensor * acquire(const ITensor *weights, ITransformWeights *weights_transform)
Acquire the requested reshape tensor of the selected weights.
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CLTransposeKernel.
Basic implementation of the OpenCL tensor interface.