53 std::pair<PixelValue, PixelValue> get_quantized_asymmetric_output_min_max(
const QuantizationInfo &q_info,
const ActivationLayerInfo &act_info,
DataType data_type)
55 PixelValue type_min{};
56 PixelValue type_max{};
58 const UniformQuantizationInfo q_unif = q_info.uniform();
60 if(act_info.enabled())
62 switch(act_info.activation())
65 type_min = PixelValue(q_unif.offset);
68 type_min = PixelValue(q_unif.offset);
69 type_max = PixelValue(act_info.a(),
data_type, q_info);
72 type_min = PixelValue(act_info.b(),
data_type, q_info);
73 type_max = PixelValue(act_info.a(),
data_type, q_info);
81 return std::make_pair(type_min, type_max);
84 Status get_gemmlowp_output_stage_info(
const ITensorInfo *
input,
const ITensorInfo *weights,
const ITensorInfo *output,
const ActivationLayerInfo &act,
85 GEMMLowpOutputStageInfo &gemmlowp_output_stage_info)
88 const QuantizationInfo oq_info = output->quantization_info();
89 const UniformQuantizationInfo iq_unif =
input->quantization_info().uniform();
90 const UniformQuantizationInfo wq_unif = weights->quantization_info().uniform();
91 const UniformQuantizationInfo oq_unif = oq_info.uniform();
93 float multiplier = (iq_unif.scale * wq_unif.scale) / oq_unif.scale;
94 int32_t output_multiplier;
99 PixelValue type_min{};
100 PixelValue type_max{};
101 std::tie(type_min, type_max) = get_quantized_asymmetric_output_min_max(oq_info, act,
data_type);
103 gemmlowp_output_stage_info.gemmlowp_multiplier = output_multiplier;
104 gemmlowp_output_stage_info.gemmlowp_shift = output_shift;
105 gemmlowp_output_stage_info.gemmlowp_offset = oq_unif.offset;
107 gemmlowp_output_stage_info.gemmlowp_min_bound = type_min.get<int32_t>();
108 gemmlowp_output_stage_info.gemmlowp_max_bound = type_max.get<int32_t>();
113 Status validate_mm(
const ITensorInfo *
input,
const ITensorInfo *weights,
const ITensorInfo *biases,
const ITensorInfo *output,
const ActivationLayerInfo &act)
119 const QuantizationInfo input_quantization_info(
input->quantization_info().uniform().scale, -
input->quantization_info().uniform().offset);
120 const QuantizationInfo weights_quantization_info(weights->quantization_info().uniform().scale, -weights->quantization_info().uniform().offset);
122 GEMMLowpOutputStageInfo gemmlowp_output_stage_info;
126 gemm_info.set_gemmlowp_output_stage(gemmlowp_output_stage_info);
130 &weights->clone()->set_quantization_info(weights_quantization_info),
147 : _memory_group(std::move(memory_manager)), _weights_manager(weights_manager), _flatten(), _convert_weights(), _convert_weights_managed(), _reshape_weights_function(),
148 _reshape_weights_managed_function(), _mm_gemm(nullptr, weights_manager), _mm_gemmlowp(nullptr, weights_manager), _flatten_output(), _converted_weights_output(), _reshape_weights_output(),
149 _original_weights(nullptr), _are_weights_converted(true), _are_weights_reshaped(false), _is_fc_after_conv(false), _is_quantized_asymmetric(false), _is_prepared(false)
155 if(_is_quantized_asymmetric)
167 const Status status = get_gemmlowp_output_stage_info(
input->info(), weights->
info(), output->
info(), act, gemmlowp_output_stage_info);
172 gemm_info.set_activation_info(act);
173 _mm_gemmlowp.
configure(
input, weights, biases, output, gemm_info);
176 input->info()->set_quantization_info(input_quantization_info);
182 GEMMInfo gemm_info(
false,
false,
true );
183 gemm_info.set_activation_info(act);
184 _mm_gemm.
configure(
input, weights, biases, output, 1.f, 1.0f, gemm_info);
188 void NEFullyConnectedLayer::configure_conv_fc(
const ITensor *
input,
const ITensor *weights,
const ITensor *biases, ITensor *output,
const ActivationLayerInfo &act)
196 _flatten_output.
allocator()->
init(
input->info()->clone()->set_is_resizable(
true).reset_padding().set_tensor_shape(shape_flatten));
199 _memory_group.
manage(&_flatten_output);
204 configure_mm(&_flatten_output, weights, biases, output, act);
210 void NEFullyConnectedLayer::configure_fc_fc(
const ITensor *
input,
const ITensor *weights,
const ITensor *biases, ITensor *output,
const ActivationLayerInfo &act)
215 configure_mm(
input, weights, biases, output, act);
225 biases !=
nullptr ? biases->
info() :
nullptr,
229 _are_weights_converted =
true;
231 _is_fc_after_conv =
true;
233 _original_weights = weights;
237 _weights_manager->
manage(weights);
246 const ITensor *weights_to_use = weights;
249 const bool is_batched_fc_layer = output->
info()->
dimension(1) > 1;
250 if(is_batched_fc_layer)
253 input->info()->tensor_shape().cend(),
258 _is_fc_after_conv =
input->info()->num_dimensions() > 1;
262 if(!_are_weights_reshaped)
266 _reshape_weights_managed_function.
configure(weights);
267 weights_to_use = _weights_manager->
acquire(weights, &_reshape_weights_managed_function);
272 _reshape_weights_function.
configure(weights, &_reshape_weights_output);
273 weights_to_use = &_reshape_weights_output;
282 _convert_weights_managed.
configure(weights_to_use,
283 input->info()->tensor_shape(),
285 weights_to_use = _weights_manager->
acquire(weights, &_convert_weights_managed);
290 _convert_weights.
configure(weights_to_use,
291 &_converted_weights_output,
292 input->info()->tensor_shape(),
295 weights_to_use = &_converted_weights_output;
297 _are_weights_converted =
false;
300 if(_is_fc_after_conv)
327 bool is_fc_after_conv =
true;
343 const bool is_batched_fc_layer = output->
dimension(1) > 1;
345 if(is_batched_fc_layer)
348 input->tensor_shape().cend(),
353 is_fc_after_conv =
input->num_dimensions() > 1;
356 if(!weights_reshaped)
360 weights_to_use = &reshaped_weights;
368 input->tensor_shape(),
370 weights_to_use = &converted_weights;
380 input_to_use = &flatten_input;
400 if(_is_fc_after_conv)
406 if(_is_quantized_asymmetric)
420 if(!_weights_manager)
425 auto release_unused = [](
Tensor *
w)
429 w->allocator()->free();
434 const ITensor *cur_weights = _original_weights;
437 if(!_are_weights_reshaped)
441 cur_weights = _weights_manager->
run(cur_weights, &_reshape_weights_managed_function);
446 if(!_are_weights_reshaped)
450 _reshape_weights_function.
run();
453 cur_weights = &_reshape_weights_output;
455 _are_weights_reshaped =
true;
459 if(!_are_weights_converted)
463 _weights_manager->
run(cur_weights, &_convert_weights_managed);
468 _convert_weights.
run();
472 _are_weights_converted =
true;
476 release_unused(&_reshape_weights_output);
479 if(!_is_quantized_asymmetric)
485 release_unused(&_reshape_weights_output);
486 release_unused(&_converted_weights_output);
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
Quantize using a fixed point multiplication.
void init(const TensorAllocator &allocator, const Coordinates &coords, TensorInfo &sub_info)
Shares the same backing memory with another tensor allocator, while the tensor info might be differen...
bool enabled() const
Check if initialised.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
bool retain_internal_weights
Retain internal reshaped weights.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
bool is_used() const
Flags if the tensor is used or not.
1 channel, 1 F32 per channel
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Fully connected layer info.
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.
void manage(const ITensor *weights, ITransformWeights *parent=nullptr)
Start managing a weights tensor.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Activation Layer Information class.
Interface for CPU tensor.
Copyright (c) 2017-2021 Arm Limited.
1 channel, 1 F16 per channel
TensorAllocator * allocator()
Return a pointer to the tensor's allocator.
TensorShape compute_transposed_shape(const ITensorInfo &input)
Calculate the transposed shape of a tensor.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
void mark_as_unused() const
Marks a tensor as unused.
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
void configure(const ITensor *input, ITensor *output)
Initialise the kernel's inputs and output.
bool are_weights_managed(const ITensor *weights)
Check if the weights are managed.
TensorShape compute_flatten_shape(const ITensorInfo *input)
Calculate the flattened output shape of a tensor.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of NEGEMM.
Quantization information.
void run() override
Run the kernels contained in the function.
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
void run() override
Run the kernels contained in the function.
void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs, output.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
void run() override
Run the kernels contained in the function.
quantized, asymmetric fixed-point 8-bit number unsigned
bool are_weights_reshaped
Reshape the weights tensor if false.
void configure(const ITensor *input, ITensor *output)
Initialise the kernel's input and output.
void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, FullyConnectedLayerInfo fc_info=FullyConnectedLayerInfo())
Set the input and output tensors.
void allocate() override
Allocate size specified by TensorInfo of CPU memory.
NEFullyConnectedLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr, IWeightsManager *weights_manager=nullptr)
Constructor.
UniformQuantizationInfo uniform() const
Return per layer quantization info.
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
GEMMLowp output stage info.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
Basic implementation of the tensor interface.
virtual ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info)=0
Set the quantization settings (scale and offset) of the tensor.
ActivationLayerInfo activation_info
Fused activation to apply after the matrix multiplication.
Weights manager interface to handle weights transformations.
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
void configure(const ITensor *input, ITensor *output, const TensorShape &original_input_shape, DataLayout data_layout)
Initialize the function.
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
Lower and Upper Bounded Rectifier ( )
std::array< T, num_max_dimensions >::const_iterator cbegin() const
Returns a read-only (constant) iterator that points to the first element in the dimension array.
~NEFullyConnectedLayer()
Default destructor.
Memory group resources scope handling class.
Upper Bounded Rectifier ( )
void set_gemmlowp_output_stage(GEMMLowpOutputStageInfo &output_stage)
Sets GEMMLowp output stage.
void run() override
Run the kernels contained in the function.
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, FullyConnectedLayerInfo fc_info=FullyConnectedLayerInfo())
Static function to check if given info will lead to a valid configuration of NEFullyConnectedLayer.
DataLayout weights_trained_layout
Layout that the weights have been trained with.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape, DataLayout data_layout)
Static function to check if given info will lead to a valid configuration of NEConvertFullyConnectedW...
void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs, output.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
void prepare() override
Prepare the function for executing.
bool transpose_weights
Transpose weights if true.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Store the tensor's metadata.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of NEGEMMLowpMatrixMultiply...
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of NEFlattenLayer.
ITensor * run(const ITensor *weights, ITransformWeights *weights_transform)
Run the reshape function.
ActivationFunction activation() const
Get the type of activation function.
quantized, asymmetric fixed-point 8-bit number signed
void prepare() override
Prepare the function for executing.
static constexpr size_t num_max_dimensions
Number of dimensions the tensor has.
DataType
Available data types.
std::tuple< PixelValue, PixelValue > get_min_max(DataType dt)
Compute the mininum and maximum values a data type can take.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of NETranspose.
ErrorCode error_code() const
Gets error code.
ITensor * acquire(const ITensor *weights, ITransformWeights *weights_transform)
Acquire the requested reshape tensor of the selected weights.
void run() override
Run the kernels contained in the function.