55 Status construct_gemmlowp_output_stage(
const ITensorInfo &
src,
const ITensorInfo &weights,
const ITensorInfo &
dst,
56 GEMMLowpOutputStageInfo &gemmlowp_output_stage, ActivationLayerInfo activation_info)
59 gemmlowp_output_stage.gemmlowp_offset = 0;
60 gemmlowp_output_stage.gemmlowp_multiplier = 0;
61 gemmlowp_output_stage.gemmlowp_shift = 0;
68 const QuantizationInfo oq_info = dst.quantization_info();
69 const UniformQuantizationInfo iq_unif = src.quantization_info().uniform();
70 const UniformQuantizationInfo wq_unif = weights.quantization_info().uniform();
71 const UniformQuantizationInfo oq_unif = oq_info.uniform();
73 const auto output_quant_info = (dst.total_size() == 0) ? iq_unif : oq_unif;
75 const float multiplier = (iq_unif.scale * wq_unif.scale) / output_quant_info.scale;
76 int output_multiplier = 0;
80 PixelValue type_min{};
81 PixelValue type_max{};
84 if(activation_info.enabled())
90 gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset;
91 gemmlowp_output_stage.gemmlowp_multiplier = output_multiplier;
92 gemmlowp_output_stage.gemmlowp_shift = output_shift;
93 gemmlowp_output_stage.gemmlowp_multipliers.push_back(output_multiplier);
94 gemmlowp_output_stage.gemmlowp_shifts.push_back(output_shift);
95 type_min.get(gemmlowp_output_stage.gemmlowp_min_bound);
96 type_max.get(gemmlowp_output_stage.gemmlowp_max_bound);
102 Status validate_mm(
const ITensorInfo &src,
const ITensorInfo &weights,
const ITensorInfo *
bias,
const ITensorInfo &dst,
const FullyConnectedLayerInfo &fc_info)
104 GEMMLowpOutputStageInfo gemmlowp_output_stage;
107 const GEMMInfo &
gemm_info = GEMMInfo(
false,
112 fc_info.retain_internal_weights,
113 gemmlowp_output_stage,
114 fc_info.fp_mixed_precision,
117 ActivationLayerInfo());
121 const UniformQuantizationInfo iq_info = src.quantization_info().uniform();
122 const UniformQuantizationInfo wq_info = weights.quantization_info().uniform();
126 const QuantizationInfo src_quantization_info(iq_info.scale, -iq_info.offset);
127 const QuantizationInfo weights_quantization_info(wq_info.scale, -wq_info.offset);
131 &weights.clone()->set_quantization_info(weights_quantization_info),
146 : _convert_weights(nullptr),
148 _reshape_weights(nullptr),
150 _mm_gemmlowp(nullptr),
161 construct_gemmlowp_output_stage(*src, *weights, *dst, gemmlowp_output_stage, fc_info.
activation_info);
169 gemmlowp_output_stage,
189 _mm_gemmlowp = std::make_unique<ClGemmLowpMatrixMultiplyCore>();
190 _mm_gemmlowp->configure(compile_context, &src_info, &weights_info, bias, dst, gemm_info);
195 _mm_gemm = std::make_unique<ClGemm>();
196 _mm_gemm->configure(compile_context, src, weights, bias, dst, 1.f, 1.f, gemm_info);
211 _flatten = std::make_unique<ClFlatten>();
212 _flatten->configure(compile_context, src, &_flattened_src);
215 configure_mm(compile_context, &_flattened_src, weights, bias, dst, fc_info);
224 configure_mm(compile_context, src, weights, bias, dst, fc_info);
236 _are_weights_converted =
true;
238 _is_fc_after_conv =
true;
251 const bool is_batched_fc_layer = dst->
dimension(1) > 1;
252 if(is_batched_fc_layer)
266 if(!_are_weights_reshaped)
269 _reshape_weights = std::make_unique<ClTranspose>();
270 _reshape_weights->configure(compile_context, weights, &_reshaped_weights);
271 weights_used = &_reshaped_weights;
279 _convert_weights = std::make_unique<ClConvertFullyConnectedWeights>();
280 _convert_weights->configure(compile_context,
286 weights_used = &_converted_weights;
288 _are_weights_converted =
false;
291 if(_is_fc_after_conv)
294 configure_conv_fc(compile_context, src, weights_used, biases, dst, fc_info);
299 configure_fc_fc(compile_context, src, weights_used, biases, dst, fc_info);
302 _weights_to_use = *weights_used;
305 auto gemm_mem_req = (_is_quantized) ? _mm_gemmlowp->workspace() : _mm_gemm->workspace();
306 for(
unsigned int i = 0; i < gemm_mem_req.size(); ++i)
308 _aux_mem[i] = gemm_mem_req[i];
310 if(_aux_mem[1].size > 0 || _aux_mem[2].size > 0)
340 bool is_fc_after_conv =
true;
355 if(biases !=
nullptr)
369 const bool is_batched_fc_layer = dst->
dimension(1) > 1;
370 if(is_batched_fc_layer)
381 if(!weights_reshaped)
385 weights_to_use = &reshaped_weights;
395 weights_to_use = &converted_weights;
405 src_to_use = &flatten_src;
429 if(_is_fc_after_conv)
432 _flatten->run(flatten_pack);
445 _mm_gemmlowp->run(gemm_pack);
449 _mm_gemm->run(gemm_pack);
463 const ITensor *cur_weights = weights;
466 if(!_are_weights_reshaped)
470 _reshape_weights->run(transpose_pack);
473 cur_weights = reshaped_weights.
get();
475 _are_weights_reshaped =
true;
479 if(!_are_weights_converted)
482 _convert_weights->run(convert_pack);
485 cur_weights = converted_weights.
get();
487 _are_weights_converted =
true;
495 _mm_gemm->prepare(tensors);
499 _mm_gemmlowp->prepare(tensors);
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const TensorShape &original_src_shape, DataLayout data_layout)
Static function to check if given info will lead to a valid configuration.
Quantize using a fixed point multiplication.
bool enabled() const
Check if initialised.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
void add_const_tensor(int id, const ITensor *tensor)
Add const tensor to the pack.
bool retain_internal_weights
Retain internal reshaped weights.
void prepare(ITensorPack &tensors) override
Prepare the function for executing.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
1 channel, 1 F32 per channel
void run(ITensorPack &tensors) override
Run the kernels contained in the function.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Fully connected layer info.
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.
void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, FullyConnectedLayerInfo fc_info=FullyConnectedLayerInfo())
Set the input and output tensors.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Interface for CPU tensor.
SimpleTensor< float > src
Copyright (c) 2017-2022 Arm Limited.
std::vector< MemoryInfo > MemoryRequirements
1 channel, 1 F16 per channel
ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info) override
Set the quantization settings (scale and offset) of the tensor.
TensorShape compute_transposed_shape(const ITensorInfo &input)
Calculate the transposed shape of a tensor.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
void mark_as_unused() const
Marks a tensor as unused.
1 channel, 1 S32 per channel
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration.
TensorShape compute_flatten_shape(const ITensorInfo *input)
Calculate the flattened output shape of a tensor.
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
Quantization information.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
std::pair< int32_t, int32_t > get_quantized_activation_min_max(ActivationLayerInfo act_info, DataType data_type, UniformQuantizationInfo oq_info)
Returns a pair of minimum and maximum values for a quantized activation.
quantized, asymmetric fixed-point 8-bit number unsigned
bool are_weights_reshaped
Reshape the weights tensor if false.
size_t total_size() const override
Returns the total size of the tensor in bytes.
UniformQuantizationInfo uniform() const
Return per layer quantization info.
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
GEMMLowp output stage info.
virtual bool are_values_constant() const =0
Flag indicating whether the values of the tensor are constant, meaning that they can change on kernel...
ActivationLayerInfo activation_info
Fused activation to apply after the matrix multiplication.
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, FullyConnectedLayerInfo fc_info=FullyConnectedLayerInfo())
Static function to check if given info will lead to a valid configuration.
Num samples, channels, height, width.
static Status validate(const ITensorInfo *src, const ITensorInfo *dst)
Static function to check if given info will lead to a valid configuration.
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
TensorInfo src_info(src_shape, 1, data_type)
std::array< T, num_max_dimensions >::const_iterator cend() const
Returns a read-only (constant) iterator that points one past the last element in the dimension array...
Lower and Upper Bounded Rectifier ( )
std::array< T, num_max_dimensions >::const_iterator cbegin() const
Returns a read-only (constant) iterator that points to the first element in the dimension array...
Upper Bounded Rectifier ( )
DataLayout weights_trained_layout
Layout that the weights have been trained with.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
bool fp_mixed_precision
Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy.
bool transpose_weights
Transpose weights if true.
#define ARM_COMPUTE_LOG_PARAMS(...)
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Store the tensor's metadata.
int offset_int_vec(int offset)
ActivationFunction activation() const
Get the type of activation function.
quantized, asymmetric fixed-point 8-bit number signed
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)
Static function to check if given info will lead to a valid configuration.
static constexpr size_t num_max_dimensions
Number of dimensions the tensor has.
static Status validate(const ITensorInfo *src, const ITensorInfo *dst)
Static function to check if given info will lead to a valid configuration.
std::tuple< PixelValue, PixelValue > get_min_max(DataType dt)
Compute the mininum and maximum values a data type can take.
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.
experimental::MemoryRequirements workspace() const override
Return the memory requirements required by the workspace.