51 std::pair<PixelValue, PixelValue> get_quantized_asymmetric_output_min_max(
const QuantizationInfo &q_info,
const ActivationLayerInfo &act_info,
DataType data_type)
53 PixelValue type_min{};
54 PixelValue type_max{};
55 std::tie(type_min, type_max) =
get_min_max(data_type);
56 const UniformQuantizationInfo q_unif = q_info.uniform();
58 if(act_info.enabled())
60 switch(act_info.activation())
63 type_min = PixelValue(q_unif.offset);
66 type_min = PixelValue(q_unif.offset);
67 type_max = PixelValue(act_info.a(),
data_type, q_info);
70 type_min = PixelValue(act_info.b(),
data_type, q_info);
71 type_max = PixelValue(act_info.a(),
data_type, q_info);
79 return std::make_pair(type_min, type_max);
82 Status get_gemmlowp_output_stage_info(
const ITensorInfo *
src,
const ITensorInfo *weights,
const ITensorInfo *
dst,
const ActivationLayerInfo &act,
83 GEMMLowpOutputStageInfo &gemmlowp_output_stage_info)
85 const auto data_type = src->data_type();
86 const QuantizationInfo oq_info = dst->quantization_info();
87 const UniformQuantizationInfo iq_unif = src->quantization_info().uniform();
88 const UniformQuantizationInfo wq_unif = weights->quantization_info().uniform();
89 const UniformQuantizationInfo oq_unif = oq_info.uniform();
91 float multiplier = (iq_unif.scale * wq_unif.scale) / oq_unif.scale;
92 int32_t output_multiplier;
97 PixelValue type_min{};
98 PixelValue type_max{};
99 std::tie(type_min, type_max) = get_quantized_asymmetric_output_min_max(oq_info, act, data_type);
101 gemmlowp_output_stage_info.gemmlowp_multiplier = output_multiplier;
102 gemmlowp_output_stage_info.gemmlowp_shift = output_shift;
103 gemmlowp_output_stage_info.gemmlowp_offset = oq_unif.offset;
105 gemmlowp_output_stage_info.gemmlowp_min_bound = type_min.get<int32_t>();
106 gemmlowp_output_stage_info.gemmlowp_max_bound = type_max.get<int32_t>();
111 Status validate_mm(
const ITensorInfo *src,
const ITensorInfo *weights,
const ITensorInfo *biases,
const ITensorInfo *dst,
const ActivationLayerInfo &act)
117 const QuantizationInfo src_quantization_info(src->quantization_info().uniform().scale, -src->quantization_info().uniform().offset);
118 const QuantizationInfo weights_quantization_info(weights->quantization_info().uniform().scale, -weights->quantization_info().uniform().offset);
120 GEMMLowpOutputStageInfo gemmlowp_output_stage_info;
124 gemm_info.set_gemmlowp_output_stage(gemmlowp_output_stage_info);
127 TensorInfo
src_info = src->
clone()->set_quantization_info(src_quantization_info);
128 TensorInfo
weights_info = weights->clone()->set_quantization_info(weights_quantization_info);
146 _convert_weights(nullptr),
147 _transpose_weights(nullptr),
149 _mm_gemmlowp(nullptr),
151 _converted_weights(),
154 _trans_weights_idx(AuxTensorIdx::Count),
156 _needs_weights_conversion(false),
157 _needs_weights_reshape(false),
158 _is_fc_after_conv(false),
159 _is_quantized_asymmetric(false),
169 if(_is_quantized_asymmetric)
176 TensorInfo src_info = src->
clone()->set_quantization_info(src_quantization_info);
177 TensorInfo weights_info = weights->
clone()->set_quantization_info(weights_quantization_info);
181 const Status status = get_gemmlowp_output_stage_info(&src_info, &weights_info, dst, act, gemmlowp_output_stage_info);
186 gemm_info.set_activation_info(act);
187 _mm_gemmlowp = std::make_unique<CpuGemmLowpMatrixMultiplyCore>();
188 _mm_gemmlowp->configure(&src_info, &weights_info, biases, dst, gemm_info);
193 GEMMInfo gemm_info(
false,
false,
true );
195 _mm_gemm = std::make_unique<CpuGemm>();
196 _mm_gemm->configure(src, weights, biases, dst, 1.f, 1.0f, gemm_info);
209 _flatten = std::make_unique<CpuFlatten>();
210 _flatten->configure(src, &_flattened_src);
213 configure_mm(&_flattened_src, weights, biases, dst, act);
221 configure_mm(src, weights, biases, dst, act);
231 biases !=
nullptr ? biases :
nullptr,
235 _needs_weights_conversion =
false;
238 _is_fc_after_conv =
true;
240 _is_prepared =
false;
241 _trans_weights_idx = AuxTensorIdx::Count;
252 const bool is_batched_fc_layer = dst->
dimension(1) > 1;
253 if(is_batched_fc_layer)
265 if(_needs_weights_reshape)
268 _transpose_weights = std::make_unique<kernels::CpuTransposeKernel>();
269 _transpose_weights->configure(weights, &_reshaped_weights);
270 weights_to_use = &_reshaped_weights;
271 _trans_weights_idx = AuxTensorIdx::TransposedWeights;
278 _convert_weights = std::make_unique<CpuConvertFullyConnectedWeights>();
279 _convert_weights->configure(weights_to_use,
284 weights_to_use = &_converted_weights;
285 _needs_weights_conversion =
true;
286 _trans_weights_idx = AuxTensorIdx::ConvertedWeights;
289 if(_is_fc_after_conv)
292 configure_conv_fc(src, weights_to_use, biases, dst, fc_info.
activation_info);
297 configure_fc_fc(src, weights_to_use, biases, dst, fc_info.
activation_info);
301 if(_needs_weights_reshape || _needs_weights_conversion)
303 _trans_weights = *weights_to_use;
307 auto gemm_mem_req = (_is_quantized_asymmetric) ? _mm_gemmlowp->workspace() : _mm_gemm->workspace();
308 for(
unsigned int i = 0; i < gemm_mem_req.size(); ++i)
310 _aux_mem[i] = gemm_mem_req[i];
313 if(_aux_mem[Pretranspose].size > 0)
321 _aux_mem[TransposedWeights] =
MemoryInfo(
offset_int_vec(TransposedWeights), _needs_weights_conversion ? MemoryLifetime::Prepare : MemoryLifetime::Persistent, _reshaped_weights.
total_size());
341 bool is_fc_after_conv =
true;
357 const bool is_batched_fc_layer = dst->
dimension(1) > 1;
359 if(is_batched_fc_layer)
370 if(!weights_reshaped)
374 weights_to_use = &reshaped_weights;
384 weights_to_use = &converted_weights;
394 src_to_use = &flatten_src;
417 if(_is_fc_after_conv)
420 _flatten->run(flatten_pack);
425 if(_needs_weights_reshape || _needs_weights_conversion)
431 if(_is_quantized_asymmetric)
433 _mm_gemmlowp->run(gemm_pack);
437 _mm_gemm->run(gemm_pack);
451 const ITensor *cur_weights = weights;
454 if(_needs_weights_reshape)
461 cur_weights = reshaped_weights.
get();
465 if(_needs_weights_conversion)
468 _convert_weights->run(convert_pack);
471 cur_weights = converted_weights.
get();
478 if(!_is_quantized_asymmetric)
480 _mm_gemm->prepare(gemm_pack);
484 _mm_gemmlowp->prepare(gemm_pack);
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
void prepare(ITensorPack &tensors) override
Prepare the function for executing.
Quantize using a fixed point multiplication.
void set_activation_info(const ActivationLayerInfo &activation_info)
Set activation layer info.
std::unique_ptr< ITensorInfo > clone() const override
Provide a clone of the current object of class T.
void run(ITensorPack &tensors) override
Run the kernels contained in the function.
bool enabled() const
Check if initialised.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
void add_const_tensor(int id, const ITensor *tensor)
Add const tensor to the pack.
bool retain_internal_weights
Retain internal reshaped weights.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
virtual void schedule_op(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors)=0
Runs the kernel in the same thread as the caller synchronously.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *d, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of CpuGemm.
1 channel, 1 F32 per channel
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Fully connected layer info.
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.
CpuFullyConnected()
Constructor.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Activation Layer Information class.
Interface for CPU tensor.
SimpleTensor< float > src
Copyright (c) 2017-2021 Arm Limited.
std::vector< MemoryInfo > MemoryRequirements
1 channel, 1 F16 per channel
static Status validate(const ITensorInfo *src, const ITensorInfo *dst)
Static function to check if given info will lead to a valid configuration.
TensorShape compute_transposed_shape(const ITensorInfo &input)
Calculate the transposed shape of a tensor.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
void mark_as_unused() const
Marks a tensor as unused.
TensorShape compute_flatten_shape(const ITensorInfo *input)
Calculate the flattened output shape of a tensor.
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
Quantization information.
static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const TensorShape &original_src_shape, DataLayout data_layout)
Static function to check if given info will lead to a valid configuration.
experimental::MemoryRequirements workspace() const override
Return the memory requirements required by the workspace.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
static Status validate(const ITensorInfo *src, const ITensorInfo *dst)
Static function to check if given info will lead to a valid configuration.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
quantized, asymmetric fixed-point 8-bit number unsigned
bool are_weights_reshaped
Reshape the weights tensor if false.
size_t total_size() const override
Returns the total size of the tensor in bytes.
void configure(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, FullyConnectedLayerInfo fc_info=FullyConnectedLayerInfo())
Set the input and output tensors.
UniformQuantizationInfo uniform() const
Return per layer quantization info.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
GEMMLowp output stage info.
ActivationLayerInfo activation_info
Fused activation to apply after the matrix multiplication.
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
TensorInfo src_info(src_shape, 1, data_type)
std::array< T, num_max_dimensions >::const_iterator cend() const
Returns a read-only (constant) iterator that points one past the last element in the dimension array...
Lower and Upper Bounded Rectifier ( )
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
std::array< T, num_max_dimensions >::const_iterator cbegin() const
Returns a read-only (constant) iterator that points to the first element in the dimension array...
Upper Bounded Rectifier ( )
void set_gemmlowp_output_stage(GEMMLowpOutputStageInfo &output_stage)
Sets GEMMLowp output stage.
static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, FullyConnectedLayerInfo fc_info=FullyConnectedLayerInfo())
Static function to check if given info will lead to a valid configuration of CpuFullyConnected.
DataLayout weights_trained_layout
Layout that the weights have been trained with.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
bool transpose_weights
Transpose weights if true.
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Store the tensor's metadata.
int offset_int_vec(int offset)
ActivationFunction activation() const
Get the type of activation function.
quantized, asymmetric fixed-point 8-bit number signed
~CpuFullyConnected()
Destructor.
static constexpr size_t num_max_dimensions
Number of dimensions the tensor has.
DataType
Available data types.
bool constant_weights
If false, weights can vary between runs.
std::tuple< PixelValue, PixelValue > get_min_max(DataType dt)
Compute the mininum and maximum values a data type can take.
ErrorCode error_code() const
Gets error code.
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.
static IScheduler & get()
Access the scheduler singleton.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *dst, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration.