54 std::pair<PixelValue, PixelValue> get_quantized_asymmetric_output_min_max(
const QuantizationInfo &q_info,
const ActivationLayerInfo &act_info,
DataType data_type)
59 const UniformQuantizationInfo q_unif = q_info.uniform();
61 if(act_info.enabled())
63 switch(act_info.activation())
66 type_min = PixelValue(q_unif.offset);
69 type_min = PixelValue(q_unif.offset);
85 Status get_gemmlowp_output_stage_info(
const ITensorInfo *
input,
const ITensorInfo *weights,
const ITensorInfo *output,
const ActivationLayerInfo &act,
86 GEMMLowpOutputStageInfo &gemmlowp_output_stage_info)
88 const auto data_type = input->data_type();
89 const QuantizationInfo oq_info = output->quantization_info();
90 const UniformQuantizationInfo iq_unif = input->quantization_info().uniform();
91 const UniformQuantizationInfo wq_unif = weights->quantization_info().uniform();
92 const UniformQuantizationInfo oq_unif = oq_info.uniform();
94 float multiplier = (iq_unif.scale * wq_unif.scale) / oq_unif.scale;
95 int32_t output_multiplier;
102 std::tie(
type_min,
type_max) = get_quantized_asymmetric_output_min_max(oq_info, act, data_type);
104 gemmlowp_output_stage_info.gemmlowp_multiplier = output_multiplier;
105 gemmlowp_output_stage_info.gemmlowp_shift = output_shift;
106 gemmlowp_output_stage_info.gemmlowp_offset = oq_unif.offset;
108 gemmlowp_output_stage_info.gemmlowp_min_bound =
type_min.get<int32_t>();
109 gemmlowp_output_stage_info.gemmlowp_max_bound =
type_max.get<int32_t>();
114 Status validate_mm(
const ITensorInfo *input,
const ITensorInfo *weights,
const ITensorInfo *biases,
const ITensorInfo *output,
const ActivationLayerInfo &act)
120 const QuantizationInfo input_quantization_info(input->quantization_info().uniform().scale, -input->quantization_info().uniform().offset);
121 const QuantizationInfo weights_quantization_info(weights->quantization_info().uniform().scale, -weights->quantization_info().uniform().offset);
123 GEMMLowpOutputStageInfo gemmlowp_output_stage_info;
127 gemm_info.set_gemmlowp_output_stage(gemmlowp_output_stage_info);
131 &weights->clone()->set_quantization_info(weights_quantization_info),
147 auto k = std::make_unique<NETransposeKernel>();
148 k->configure(input, output);
149 _kernel = std::move(k);
160 : _memory_group(
std::move(memory_manager)), _weights_manager(weights_manager), _flatten(), _convert_weights(), _convert_weights_managed(), _reshape_weights_function(),
161 _reshape_weights_managed_function(), _mm_gemm(nullptr, weights_manager), _mm_gemmlowp(nullptr, weights_manager), _flatten_output(), _converted_weights_output(), _reshape_weights_output(),
162 _original_weights(nullptr), _are_weights_converted(true), _are_weights_reshaped(false), _is_fc_after_conv(false), _is_quantized_asymmetric(false), _is_prepared(false)
168 if(_is_quantized_asymmetric)
180 const Status status = get_gemmlowp_output_stage_info(input->
info(), weights->
info(), output->
info(), act, gemmlowp_output_stage_info);
185 gemm_info.set_activation_info(act);
186 _mm_gemmlowp.
configure(input, weights, biases, output, gemm_info);
195 GEMMInfo gemm_info(
false,
false,
true );
197 _mm_gemm.
configure(input, weights, biases, output, 1.f, 1.0f, gemm_info);
209 _flatten_output.
allocator()->
init(input->
info()->
clone()->set_is_resizable(
true).reset_padding().set_tensor_shape(shape_flatten));
212 _memory_group.
manage(&_flatten_output);
214 _flatten.
configure(input, &_flatten_output);
217 configure_mm(&_flatten_output, weights, biases, output, act);
228 configure_mm(input, weights, biases, output, act);
238 biases !=
nullptr ? biases->
info() :
nullptr,
242 _are_weights_converted =
true;
244 _is_fc_after_conv =
true;
246 _original_weights = weights;
250 _weights_manager->
manage(weights);
259 const ITensor *weights_to_use = weights;
262 const bool is_batched_fc_layer = output->
info()->
dimension(1) > 1;
263 if(is_batched_fc_layer)
275 if(!_are_weights_reshaped)
279 _reshape_weights_managed_function.
configure(weights);
280 weights_to_use = _weights_manager->
acquire(weights, &_reshape_weights_managed_function);
285 _reshape_weights_function.
configure(weights, &_reshape_weights_output);
286 weights_to_use = &_reshape_weights_output;
295 _convert_weights_managed.
configure(weights_to_use,
298 weights_to_use = _weights_manager->
acquire(weights, &_convert_weights_managed);
303 _convert_weights.
configure(weights_to_use,
304 &_converted_weights_output,
308 weights_to_use = &_converted_weights_output;
310 _are_weights_converted =
false;
313 if(_is_fc_after_conv)
316 configure_conv_fc(input, weights_to_use, biases, output, fc_info.
activation_info);
321 configure_fc_fc(input, weights_to_use, biases, output, fc_info.
activation_info);
340 bool is_fc_after_conv =
true;
356 const bool is_batched_fc_layer = output->
dimension(1) > 1;
358 if(is_batched_fc_layer)
369 if(!weights_reshaped)
373 weights_to_use = &reshaped_weights;
383 weights_to_use = &converted_weights;
393 input_to_use = &flatten_input;
413 if(_is_fc_after_conv)
419 if(_is_quantized_asymmetric)
433 if(!_weights_manager)
438 auto release_unused = [](
Tensor *
w)
442 w->allocator()->free();
447 const ITensor *cur_weights = _original_weights;
450 if(!_are_weights_reshaped)
454 cur_weights = _weights_manager->
run(cur_weights, &_reshape_weights_managed_function);
459 if(!_are_weights_reshaped)
463 _reshape_weights_function.
run();
466 cur_weights = &_reshape_weights_output;
468 _are_weights_reshaped =
true;
472 if(!_are_weights_converted)
476 _weights_manager->
run(cur_weights, &_convert_weights_managed);
481 _convert_weights.
run();
485 _are_weights_converted =
true;
489 release_unused(&_reshape_weights_output);
492 if(!_is_quantized_asymmetric)
498 release_unused(&_reshape_weights_output);
499 release_unused(&_converted_weights_output);
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
Quantize using a fixed point multiplication.
void run() override final
Run the kernels contained in the function.
void set_activation_info(const ActivationLayerInfo &activation_info)
Set activation layer info.
void init(const TensorAllocator &allocator, const Coordinates &coords, TensorInfo &sub_info)
Shares the same backing memory with another tensor allocator, while the tensor info might be differen...
bool enabled() const
Check if initialised.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
bool retain_internal_weights
Retain internal reshaped weights.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
bool is_used() const
Flags if the tensor is used or not.
1 channel, 1 F32 per channel
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Fully connected layer info.
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.
void manage(const ITensor *weights, ITransformWeights *parent=nullptr)
Start managing a weights tensor.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Activation Layer Information class.
Interface for Neon tensor.
Copyright (c) 2017-2021 Arm Limited.
1 channel, 1 F16 per channel
TensorAllocator * allocator()
Return a pointer to the tensor's allocator.
TensorShape compute_transposed_shape(const ITensorInfo &input)
Calculate the transposed shape of a tensor.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
void mark_as_unused() const
Marks a tensor as unused.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of NEFullyConnectedLayerRes...
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
bool are_weights_managed(const ITensor *weights)
Check if the weights are managed.
TensorShape compute_flatten_shape(const ITensorInfo *input)
Calculate the flattened output shape of a tensor.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of NEGEMM.
Quantization information.
void run() override
Run the kernels contained in the function.
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs, output.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
void run() override
Run the kernels contained in the function.
quantized, asymmetric fixed-point 8-bit number unsigned
bool are_weights_reshaped
Reshape the weights tensor if false.
void configure(const ITensor *input, ITensor *output)
Initialise the kernel's input and output.
void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, FullyConnectedLayerInfo fc_info=FullyConnectedLayerInfo())
Set the input and output tensors.
void allocate() override
Allocate size specified by TensorInfo of CPU memory.
NEFullyConnectedLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr, IWeightsManager *weights_manager=nullptr)
Constructor.
UniformQuantizationInfo uniform() const
Return per layer quantization info.
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
GEMMLowp output stage info.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
Basic implementation of the tensor interface.
virtual ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info)=0
Set the quantization settings (scale and offset) of the tensor.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of NETransposeKernel.
ActivationLayerInfo activation_info
Fused activation to apply after the matrix multiplication.
Weights manager interface to handle weights transformations.
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
void configure(const ITensor *input, ITensor *output, const TensorShape &original_input_shape, DataLayout data_layout)
Initialize the function.
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
__constant DATA_TYPE16 type_min
std::array< T, num_max_dimensions >::const_iterator cend() const
Returns a read-only (constant) iterator that points one past the last element in the dimension array...
Lower and Upper Bounded Rectifier ( )
std::array< T, num_max_dimensions >::const_iterator cbegin() const
Returns a read-only (constant) iterator that points to the first element in the dimension array...
~NEFullyConnectedLayer()
Default destructor.
Memory group resources scope handling class.
Upper Bounded Rectifier ( )
void set_gemmlowp_output_stage(GEMMLowpOutputStageInfo &output_stage)
Sets GEMMLowp output stage.
void run() override
Run the kernels contained in the function.
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, FullyConnectedLayerInfo fc_info=FullyConnectedLayerInfo())
Static function to check if given info will lead to a valid configuration of NEFullyConnectedLayer.
DataLayout weights_trained_layout
Layout that the weights have been trained with.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape, DataLayout data_layout)
Static function to check if given info will lead to a valid configuration of NEConvertFullyConnectedW...
void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs, output.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
void prepare() override
Prepare the function for executing.
__constant DATA_TYPE16 type_max
bool transpose_weights
Transpose weights if true.
void configure(const ITensor *input, ITensor *output)
Set the input and output tensors.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Store the tensor's metadata.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of NEGEMMLowpMatrixMultiply...
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of NEFlattenLayer.
ITensor * run(const ITensor *weights, ITransformWeights *weights_transform)
Run the reshape function.
ActivationFunction activation() const
Get the type of activation function.
quantized, asymmetric fixed-point 8-bit number signed
void prepare() override
Prepare the function for executing.
static constexpr size_t num_max_dimensions
Number of dimensions the tensor has.
DataType
Available data types.
std::tuple< PixelValue, PixelValue > get_min_max(DataType dt)
Compute the mininum and maximum values a data type can take.
ErrorCode error_code() const
Gets error code.
ITensor * acquire(const ITensor *weights, ITransformWeights *weights_transform)
Acquire the requested reshape tensor of the selected weights.
void run() override
Run the kernels contained in the function.
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.