55 Status construct_gemmlowp_output_stage(
const ITensorInfo &
input,
const ITensorInfo &weights,
const ITensorInfo &output,
56 GEMMLowpOutputStageInfo &gemmlowp_output_stage, ActivationLayerInfo activation_info)
59 gemmlowp_output_stage.gemmlowp_offset = 0;
60 gemmlowp_output_stage.gemmlowp_multiplier = 0;
61 gemmlowp_output_stage.gemmlowp_shift = 0;
68 const QuantizationInfo oq_info = output.quantization_info();
69 const UniformQuantizationInfo iq_unif =
input.quantization_info().uniform();
70 const UniformQuantizationInfo wq_unif = weights.quantization_info().uniform();
71 const UniformQuantizationInfo oq_unif = oq_info.uniform();
73 const auto output_quant_info = (output.total_size() == 0) ? iq_unif : oq_unif;
75 const float multiplier = (iq_unif.scale * wq_unif.scale) / output_quant_info.scale;
76 int output_multiplier = 0;
80 PixelValue type_min{};
81 PixelValue type_max{};
84 if(activation_info.enabled())
90 gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset;
91 gemmlowp_output_stage.gemmlowp_multiplier = output_multiplier;
92 gemmlowp_output_stage.gemmlowp_shift = output_shift;
93 gemmlowp_output_stage.gemmlowp_multipliers.push_back(output_multiplier);
94 gemmlowp_output_stage.gemmlowp_shifts.push_back(output_shift);
95 type_min.get(gemmlowp_output_stage.gemmlowp_min_bound);
96 type_max.get(gemmlowp_output_stage.gemmlowp_max_bound);
102 Status validate_mm(
const ITensorInfo &
input,
const ITensorInfo &weights,
const ITensorInfo *bias,
const ITensorInfo &output,
const FullyConnectedLayerInfo &fc_info)
104 GEMMLowpOutputStageInfo gemmlowp_output_stage;
107 const GEMMInfo &gemm_info = GEMMInfo(
false,
112 fc_info.retain_internal_weights,
113 gemmlowp_output_stage,
114 fc_info.fp_mixed_precision,
116 ActivationLayerInfo());
120 const UniformQuantizationInfo iq_info =
input.quantization_info().uniform();
121 const UniformQuantizationInfo wq_info = weights.quantization_info().uniform();
125 const QuantizationInfo input_quantization_info(iq_info.scale, -iq_info.offset);
126 const QuantizationInfo weights_quantization_info(wq_info.scale, -wq_info.offset);
130 &weights.clone()->set_quantization_info(weights_quantization_info),
145 : _memory_group(memory_manager), _weights_manager(weights_manager), _convert_weights(), _convert_weights_managed(), _reshape_weights_managed_function(), _flatten_layer(), _reshape_weights_function(),
146 _mm_gemm(memory_manager, weights_manager), _mm_gemmlowp(memory_manager), _flatten_output(), _converted_weights_output(), _reshape_weights_output(), _are_weights_converted(true),
147 _are_weights_reshaped(true), _is_fc_after_conv(true), _is_quantized(false), _is_prepared(false), _original_weights(nullptr)
163 gemmlowp_output_stage,
179 _mm_gemmlowp.
configure(compile_context,
input, weights, bias, output, gemm_info);
182 input->info()->set_quantization_info(input_quantization_info);
188 _mm_gemm.
configure(compile_context,
input, weights, bias, output, 1.f, 1.f, gemm_info);
192 void CLFullyConnectedLayer::configure_conv_fc(
const CLCompileContext &compile_context,
const ICLTensor *
input,
const ICLTensor *weights,
const ICLTensor *bias, ICLTensor *output,
193 const FullyConnectedLayerInfo &fc_info)
204 _memory_group.
manage(&_flatten_output);
205 _flatten_layer.
configure(compile_context,
input, &_flatten_output);
208 configure_mm(compile_context, &_flatten_output, weights, bias, output, fc_info);
214 void CLFullyConnectedLayer::configure_fc_fc(
const CLCompileContext &compile_context,
const ICLTensor *
input,
const ICLTensor *weights,
const ICLTensor *bias, ICLTensor *output,
215 const FullyConnectedLayerInfo &fc_info)
220 configure_mm(compile_context,
input, weights, bias, output, fc_info);
237 biases !=
nullptr ? biases->
info() :
nullptr,
241 _are_weights_converted =
true;
243 _is_fc_after_conv =
true;
246 _original_weights = weights;
250 _weights_manager->
manage(weights);
253 const ICLTensor *weights_to_use = weights;
262 const bool is_batched_fc_layer = output->
info()->
dimension(1) > 1;
263 if(is_batched_fc_layer)
266 input->info()->tensor_shape().cend(),
271 _is_fc_after_conv =
input->info()->num_dimensions() > 1;
275 if(!_are_weights_reshaped)
279 _reshape_weights_managed_function.
configure(compile_context, weights);
280 weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->
acquire(weights, &_reshape_weights_managed_function));
285 _reshape_weights_function.
configure(compile_context, weights, &_reshape_weights_output);
286 weights_to_use = &_reshape_weights_output;
295 _convert_weights_managed.
configure(compile_context, weights_to_use,
296 input->info()->tensor_shape(),
298 weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->
acquire(weights, &_convert_weights_managed));
303 _convert_weights.
configure(compile_context, weights_to_use,
304 &_converted_weights_output,
305 input->info()->tensor_shape(),
308 weights_to_use = &_converted_weights_output;
310 _are_weights_converted =
false;
313 if(_is_fc_after_conv)
316 configure_conv_fc(compile_context,
input, weights_to_use, biases, output, fc_info);
321 configure_fc_fc(compile_context,
input, weights_to_use, biases, output, fc_info);
336 bool is_fc_after_conv =
true;
352 const bool is_batched_fc_layer = output->
dimension(1) > 1;
353 if(is_batched_fc_layer)
356 input->tensor_shape().cend(),
361 is_fc_after_conv =
input->num_dimensions() > 1;
364 if(!weights_reshaped)
368 weights_to_use = &reshaped_weights;
376 input->tensor_shape(),
378 weights_to_use = &converted_weights;
388 input_to_use = &flatten_input;
409 if(_is_fc_after_conv)
411 _flatten_layer.
run();
429 if(!_weights_manager)
439 w->allocator()->free();
444 const ICLTensor *cur_weights = _original_weights;
447 if(!_are_weights_reshaped)
451 cur_weights = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->
run(cur_weights, &_reshape_weights_managed_function));
457 _reshape_weights_function.
run();
460 cur_weights = &_reshape_weights_output;
462 _are_weights_reshaped =
true;
466 if(!_are_weights_converted)
470 _weights_manager->
run(cur_weights, &_convert_weights_managed);
475 _convert_weights.
run();
479 _are_weights_converted =
true;
483 release_unused(&_reshape_weights_output);
492 release_unused(&_reshape_weights_output);
493 release_unused(&_converted_weights_output);
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CLFlattenLayer.
Quantize using a fixed point multiplication.
void prepare() override
Prepare the function for executing.
CLFullyConnectedLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr, IWeightsManager *weights_manager=nullptr)
Constructor.
void run() override
Run the kernels contained in the function.
bool enabled() const
Check if initialised.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
bool retain_internal_weights
Retain internal reshaped weights.
static CLScheduler & get()
Access the scheduler singleton.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
bool is_used() const
Flags if the tensor is used or not.
1 channel, 1 F32 per channel
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Fully connected layer info.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor's metadata.
CLTensorAllocator * allocator()
Return a pointer to the tensor's allocator.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs, output.
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.
void manage(const ITensor *weights, ITransformWeights *parent=nullptr)
Start managing a weights tensor.
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
void prepare() override
Prepare the function for executing.
void init(const TensorInfo &input, size_t alignment=0)
Initialize a tensor based on the passed TensorInfo.
Copyright (c) 2017-2021 Arm Limited.
1 channel, 1 F16 per channel
TensorShape compute_transposed_shape(const ITensorInfo &input)
Calculate the transposed shape of a tensor.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
void mark_as_unused() const
Marks a tensor as unused.
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
bool are_weights_managed(const ITensor *weights)
Check if the weights are managed.
TensorShape compute_flatten_shape(const ITensorInfo *input)
Calculate the flattened output shape of a tensor.
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, FullyConnectedLayerInfo fc_info=FullyConnectedLayerInfo())
Set the input and output tensors.
Quantization information.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of CLGEMMLowpMatrixMultiply...
void run() override
Run the kernels contained in the function.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
std::pair< int32_t, int32_t > get_quantized_activation_min_max(ActivationLayerInfo act_info, DataType data_type, UniformQuantizationInfo oq_info)
Returns a pair of minimum and maximum values for a quantized activation.
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, FullyConnectedLayerInfo fc_info=FullyConnectedLayerInfo())
Static function to check if given info will lead to a valid configuration of CLFullyConnectedLayer.
quantized, asymmetric fixed-point 8-bit number unsigned
bool are_weights_reshaped
Reshape the weights tensor if false.
void run() override
Run the kernels contained in the function.
void run() override
Run the kernels contained in the function.
UniformQuantizationInfo uniform() const
Return per layer quantization info.
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
GEMMLowp output stage info.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
void configure(const ICLTensor *input, ICLTensor *output)
Initialise the kernel's input and output.
virtual ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info)=0
Set the quantization settings (scale and offset) of the tensor.
void run() override
Run the kernels contained in the function.
ActivationLayerInfo activation_info
Fused activation to apply after the matrix multiplication.
cl::CommandQueue & queue()
Accessor for the associated CL command queue.
Weights manager interface to handle weights transformations.
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
Num samples, channels, height, width.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape, DataLayout data_layout)
Static function to check if given info will lead to a valid configuration of CLConvertFullyConnectedW...
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
Lower and Upper Bounded Rectifier ( )
void allocate() override
Allocate size specified by TensorInfo of OpenCL memory.
std::array< T, num_max_dimensions >::const_iterator cbegin() const
Returns a read-only (constant) iterator that points to the first element in the dimension array.
Memory group resources scope handling class.
Interface for OpenCL tensor.
Upper Bounded Rectifier ( )
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CLTranspose.
void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs and output.
DataLayout weights_trained_layout
Layout that the weights have been trained with.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
void configure(const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout)
Initialize the function.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
bool fp_mixed_precision
Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of CLGEMM.
bool transpose_weights
Transpose weights if true.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Store the tensor's metadata.
ITensor * run(const ITensor *weights, ITransformWeights *weights_transform)
Run the reshape function.
ActivationFunction activation() const
Get the type of activation function.
quantized, asymmetric fixed-point 8-bit number signed
void configure(const ICLTensor *input, ICLTensor *output)
Initialise the kernel's inputs and output.
static constexpr size_t num_max_dimensions
Number of dimensions the tensor has.
std::tuple< PixelValue, PixelValue > get_min_max(DataType dt)
Compute the mininum and maximum values a data type can take.
ITensor * acquire(const ITensor *weights, ITransformWeights *weights_transform)
Acquire the requested reshape tensor of the selected weights.
Basic implementation of the OpenCL tensor interface.