54 Status construct_gemmlowp_output_stage(
const ITensorInfo &
src,
const ITensorInfo &weights,
const ITensorInfo &
dst,
55 GEMMLowpOutputStageInfo &gemmlowp_output_stage, ActivationLayerInfo activation_info)
58 gemmlowp_output_stage.gemmlowp_offset = 0;
59 gemmlowp_output_stage.gemmlowp_multiplier = 0;
60 gemmlowp_output_stage.gemmlowp_shift = 0;
67 const QuantizationInfo oq_info = dst.quantization_info();
68 const UniformQuantizationInfo iq_unif = src.quantization_info().uniform();
69 const UniformQuantizationInfo wq_unif = weights.quantization_info().uniform();
70 const UniformQuantizationInfo oq_unif = oq_info.uniform();
72 const auto output_quant_info = (dst.total_size() == 0) ? iq_unif : oq_unif;
74 const float multiplier = (iq_unif.scale * wq_unif.scale) / output_quant_info.scale;
75 int output_multiplier = 0;
79 PixelValue type_min{};
80 PixelValue type_max{};
83 if(activation_info.enabled())
89 gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset;
90 gemmlowp_output_stage.gemmlowp_multiplier = output_multiplier;
91 gemmlowp_output_stage.gemmlowp_shift = output_shift;
92 gemmlowp_output_stage.gemmlowp_multipliers.push_back(output_multiplier);
93 gemmlowp_output_stage.gemmlowp_shifts.push_back(output_shift);
94 type_min.get(gemmlowp_output_stage.gemmlowp_min_bound);
95 type_max.get(gemmlowp_output_stage.gemmlowp_max_bound);
101 Status validate_mm(
const ITensorInfo &src,
const ITensorInfo &weights,
const ITensorInfo *bias,
const ITensorInfo &dst,
const FullyConnectedLayerInfo &fc_info)
103 GEMMLowpOutputStageInfo gemmlowp_output_stage;
106 const GEMMInfo &gemm_info = GEMMInfo(
false,
111 fc_info.retain_internal_weights,
112 gemmlowp_output_stage,
113 fc_info.fp_mixed_precision,
116 ActivationLayerInfo());
120 const UniformQuantizationInfo iq_info = src.quantization_info().uniform();
121 const UniformQuantizationInfo wq_info = weights.quantization_info().uniform();
125 const QuantizationInfo src_quantization_info(iq_info.scale, -iq_info.offset);
126 const QuantizationInfo weights_quantization_info(wq_info.scale, -wq_info.offset);
130 &weights.clone()->set_quantization_info(weights_quantization_info),
145 : _convert_weights(nullptr),
147 _reshape_weights(nullptr),
149 _mm_gemmlowp(nullptr),
160 construct_gemmlowp_output_stage(*src, *weights, *dst, gemmlowp_output_stage, fc_info.
activation_info);
168 gemmlowp_output_stage,
189 _mm_gemmlowp = std::make_unique<ClGemmLowpMatrixMultiplyCore>();
190 _mm_gemmlowp->configure(compile_context, &src_info, &weights_info, bias, dst, gemm_info);
195 _mm_gemm = std::make_unique<ClGemm>();
196 _mm_gemm->configure(compile_context, src, weights, bias, dst, 1.f, 1.f, gemm_info);
211 _flatten = std::make_unique<ClFlatten>();
212 _flatten->configure(compile_context, src, &_flattened_src);
215 configure_mm(compile_context, &_flattened_src, weights, bias, dst, fc_info);
224 configure_mm(compile_context, src, weights, bias, dst, fc_info);
235 _are_weights_converted =
true;
237 _is_fc_after_conv =
true;
250 const bool is_batched_fc_layer = dst->
dimension(1) > 1;
251 if(is_batched_fc_layer)
265 if(!_are_weights_reshaped)
268 _reshape_weights = std::make_unique<ClTranspose>();
269 _reshape_weights->configure(compile_context, weights, &_reshaped_weights);
270 weights_used = &_reshaped_weights;
278 _convert_weights = std::make_unique<ClConvertFullyConnectedWeights>();
279 _convert_weights->configure(compile_context,
285 weights_used = &_converted_weights;
287 _are_weights_converted =
false;
290 if(_is_fc_after_conv)
293 configure_conv_fc(compile_context, src, weights_used, biases, dst, fc_info);
298 configure_fc_fc(compile_context, src, weights_used, biases, dst, fc_info);
301 _weights_to_use = *weights_used;
304 auto gemm_mem_req = (_is_quantized) ? _mm_gemmlowp->workspace() : _mm_gemm->workspace();
305 for(
unsigned int i = 0; i < gemm_mem_req.size(); ++i)
307 _aux_mem[i] = gemm_mem_req[i];
309 if(_aux_mem[1].size > 0 || _aux_mem[2].size > 0)
318 const auto transposed_wei_lft = (_weights_to_use_idx ==
offset_int_vec(TransposedWeights)) ? MemoryLifetime::Persistent : MemoryLifetime::Prepare;
319 const auto converted_wei_lft = (_weights_to_use_idx ==
offset_int_vec(ConvertedWeights)) ? MemoryLifetime::Persistent : MemoryLifetime::Prepare;
339 bool is_fc_after_conv =
true;
355 const bool is_batched_fc_layer = dst->
dimension(1) > 1;
356 if(is_batched_fc_layer)
367 if(!weights_reshaped)
371 weights_to_use = &reshaped_weights;
381 weights_to_use = &converted_weights;
391 src_to_use = &flatten_src;
415 if(_is_fc_after_conv)
418 _flatten->run(flatten_pack);
431 _mm_gemmlowp->run(gemm_pack);
435 _mm_gemm->run(gemm_pack);
449 const ITensor *cur_weights = weights;
452 if(!_are_weights_reshaped)
456 _reshape_weights->run(transpose_pack);
459 cur_weights = reshaped_weights.
get();
461 _are_weights_reshaped =
true;
465 if(!_are_weights_converted)
468 _convert_weights->run(convert_pack);
471 cur_weights = converted_weights.
get();
473 _are_weights_converted =
true;
481 _mm_gemm->prepare(tensors);
485 _mm_gemmlowp->prepare(tensors);
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const TensorShape &original_src_shape, DataLayout data_layout)
Static function to check if given info will lead to a valid configuration.
Quantize using a fixed point multiplication.
bool enabled() const
Check if initialised.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
void add_const_tensor(int id, const ITensor *tensor)
Add const tensor to the pack.
bool retain_internal_weights
Retain internal reshaped weights.
void prepare(ITensorPack &tensors) override
Prepare the function for executing.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
1 channel, 1 F32 per channel
void run(ITensorPack &tensors) override
Run the kernels contained in the function.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Fully connected layer info.
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.
void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, FullyConnectedLayerInfo fc_info=FullyConnectedLayerInfo())
Set the input and output tensors.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Interface for CPU tensor.
SimpleTensor< float > src
Copyright (c) 2017-2021 Arm Limited.
std::vector< MemoryInfo > MemoryRequirements
1 channel, 1 F16 per channel
ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info) override
Set the quantization settings (scale and offset) of the tensor.
TensorShape compute_transposed_shape(const ITensorInfo &input)
Calculate the transposed shape of a tensor.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
void mark_as_unused() const
Marks a tensor as unused.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration.
TensorShape compute_flatten_shape(const ITensorInfo *input)
Calculate the flattened output shape of a tensor.
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
Quantization information.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
std::pair< int32_t, int32_t > get_quantized_activation_min_max(ActivationLayerInfo act_info, DataType data_type, UniformQuantizationInfo oq_info)
Returns a pair of minimum and maximum values for a quantized activation.
quantized, asymmetric fixed-point 8-bit number unsigned
bool are_weights_reshaped
Reshape the weights tensor if false.
size_t total_size() const override
Returns the total size of the tensor in bytes.
UniformQuantizationInfo uniform() const
Return per layer quantization info.
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
GEMMLowp output stage info.
ActivationLayerInfo activation_info
Fused activation to apply after the matrix multiplication.
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, FullyConnectedLayerInfo fc_info=FullyConnectedLayerInfo())
Static function to check if given info will lead to a valid configuration.
Num samples, channels, height, width.
static Status validate(const ITensorInfo *src, const ITensorInfo *dst)
Static function to check if given info will lead to a valid configuration.
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
TensorInfo src_info(src_shape, 1, data_type)
std::array< T, num_max_dimensions >::const_iterator cend() const
Returns a read-only (constant) iterator that points one past the last element in the dimension array...
Lower and Upper Bounded Rectifier ( )
std::array< T, num_max_dimensions >::const_iterator cbegin() const
Returns a read-only (constant) iterator that points to the first element in the dimension array...
Upper Bounded Rectifier ( )
DataLayout weights_trained_layout
Layout that the weights have been trained with.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
bool fp_mixed_precision
Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy.
bool transpose_weights
Transpose weights if true.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Store the tensor's metadata.
int offset_int_vec(int offset)
ActivationFunction activation() const
Get the type of activation function.
quantized, asymmetric fixed-point 8-bit number signed
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)
Static function to check if given info will lead to a valid configuration.
static constexpr size_t num_max_dimensions
Number of dimensions the tensor has.
bool constant_weights
If false, weights can vary between runs.
static Status validate(const ITensorInfo *src, const ITensorInfo *dst)
Static function to check if given info will lead to a valid configuration.
std::tuple< PixelValue, PixelValue > get_min_max(DataType dt)
Compute the mininum and maximum values a data type can take.
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.
experimental::MemoryRequirements workspace() const override
Return the memory requirements required by the workspace.