49 const QuantizationInfo
qasymm(1.f / 128.f, 128);
50 const QuantizationInfo
qsymm_3(8.f / 32768.f, 0);
51 const QuantizationInfo
qsymm_4(16.f / 32768.f, 0);
52 const QuantizationInfo qsymm_0(1.f / 32768.f, 0);
57 : _memory_group(std::move(memory_manager)), _gemmlowp(), _output_stage(), _transpose_weights(), _concat_input_weights(), _concat_recurrent_weights(), _concat_weights(), _concat_inputs(),
58 _concat_bias(), _sigmoid_forget_gate(), _sigmoid_input_gate(), _sigmoid_output_gate(), _tanh_modulation_gate(), _tanh_output_state(), _add1(), _add2(), _mul1(), _mul2(), _mul3(),
59 _slice_input_tensor(), _slice_forget_tensor(), _slice_cell_tensor(), _slice_output_tensor(), _dequantize(), _quantize(), _input_to_input_weights(nullptr), _input_to_forget_weights(nullptr),
60 _input_to_cell_weights(nullptr), _input_to_output_weights(nullptr), _recurrent_to_input_weights(nullptr), _recurrent_to_forget_weights(nullptr), _recurrent_to_cell_weights(nullptr),
61 _recurrent_to_output_weights(nullptr), _input_gate_bias(nullptr), _forget_gate_bias(nullptr), _cell_bias(nullptr), _output_gate_bias(nullptr), _recurrent_weights(), _input_weights(), _weights(),
62 _input(), _weights_transposed(), _output_highp(), _output_lowp(), _bias(), _forget_gate_input(), _input_gate_input(), _output_gate_input(), _input_modulation_gate_input(), _forget_gate_output(),
63 _input_gate_output(), _output_gate_output(), _input_modulation_gate_output(), _cell_state1(), _cell_state2(), _output_state_tmp(), _output_state_out_symm(), _output_state_out_f32(),
85 const int batch_size =
input->info()->dimension(1);
103 _cell_bias = cell_bias;
116 std::vector<const ITensor *> weights_vector{ &_recurrent_weights, &_input_weights };
119 _transpose_weights.
configure(&_weights, &_weights_transposed);
122 std::vector<const ITensor *> input_vector{
input, output_state_in };
123 _memory_group.
manage(&_input);
137 _memory_group.
manage(&_output_highp);
139 _gemmlowp.
configure(&_input, &_weights_transposed,
nullptr, &_output_highp);
150 int32_t output_multiplier = 0;
151 int32_t output_shift = 0;
154 _memory_group.
manage(&_output_lowp);
155 _output_stage.
configure(&_output_highp, &_bias, &_output_lowp, output_multiplier, output_shift);
162 _memory_group.
manage(&_input_gate_input);
163 _slice_input_tensor.
configure(&_output_lowp, &_input_gate_input, { 0, 0 }, {
output_size, batch_size });
164 _memory_group.
manage(&_forget_gate_input);
166 _memory_group.
manage(&_input_modulation_gate_input);
168 _memory_group.
manage(&_output_gate_input);
174 _memory_group.
manage(&_input_gate_input);
176 _memory_group.
manage(&_forget_gate_input);
178 _memory_group.
manage(&_input_modulation_gate_input);
180 _memory_group.
manage(&_output_gate_input);
186 _memory_group.
manage(&_forget_gate_output);
192 _memory_group.
manage(&_input_gate_output);
198 _memory_group.
manage(&_input_modulation_gate_output);
204 _memory_group.
manage(&_output_gate_output);
210 _memory_group.
manage(&_cell_state1);
215 _memory_group.
manage(&_cell_state2);
226 _memory_group.
manage(&_output_state_tmp);
230 _memory_group.
manage(&_output_state_out_symm);
237 _memory_group.
manage(&_output_state_out_f32);
239 _dequantize.
configure(&_output_state_out_symm, &_output_state_out_f32);
242 _quantize.
configure(&_output_state_out_f32, output_state_out);
255 output_state_in, cell_state_out, output_state_out);
258 const int batch_size =
input->dimension(1);
295 std::vector<const ITensorInfo *> inputs_weights_vector;
305 std::vector<const ITensorInfo *> recurrent_weights_vector;
314 std::vector<const ITensorInfo *> weights_vector;
315 weights_vector.emplace_back(&recurrent_weights);
316 weights_vector.emplace_back(&input_weights);
321 TensorInfo weights_transposed = weights.
clone()->set_is_resizable(
true).set_tensor_shape(weights_transposed_shape);
325 std::vector<const ITensorInfo *> input_vector;
326 input_vector.emplace_back(
input);
327 input_vector.emplace_back(output_state_in);
332 std::vector<const ITensorInfo *> bias_vector;
335 bias_vector.emplace_back(cell_bias);
356 int32_t output_multiplier = 0;
357 int32_t output_shift = 0;
463 _concat_inputs.
run();
470 _slice_input_tensor.
run();
471 _slice_forget_tensor.
run();
472 _slice_cell_tensor.
run();
473 _slice_output_tensor.
run();
477 _sigmoid_forget_gate.
run();
480 _sigmoid_input_gate.
run();
483 _tanh_modulation_gate.
run();
486 _sigmoid_output_gate.
run();
494 _tanh_output_state.
run();
507 _concat_input_weights.
run();
515 _concat_recurrent_weights.
run();
522 _concat_weights.
run();
530 _transpose_weights.
run();
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
void run() override
Run the kernels contained in the function.
void run() override
Run the kernels contained in the function.
~NELSTMLayerQuantized()
Default destructor.
void run() override final
Run the kernels contained in the function.
quantized, symmetric fixed-point 16-bit number
std::unique_ptr< ITensorInfo > clone() const override
Provide a clone of the current object of class T.
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of NEArithmeticAddition.
void init(const TensorAllocator &allocator, const Coordinates &coords, TensorInfo &sub_info)
Shares the same backing memory with another tensor allocator, while the tensor info might be differen...
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(...)
auto input_to_input_weights
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
QuantizationInfo qweights(1.f/16.f, 16)
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
[NEActivationLayer snippet]
void run() override
Run the kernels contained in the function.
void configure(const ITensor *input, ITensor *output)
Configure the kernel.
1 channel, 1 F32 per channel
auto recurrent_to_forget_weights
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Activation Layer Information class.
Interface for CPU tensor.
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and conversion policy.
Copyright (c) 2017-2021 Arm Limited.
auto input_to_cell_weights
QuantizationInfo qsymm_3(8.f/32768.f, 0)
ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info) override
Set the quantization settings (scale and offset) of the tensor.
TensorAllocator * allocator()
Return a pointer to the tensor's allocator.
ITensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
auto recurrent_to_output_weights
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
void mark_as_unused() const
Marks a tensor as unused.
NELSTMLayerQuantized(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
1 channel, 1 S32 per channel
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
void configure(const ITensor *input, ITensor *output)
Initialise the kernel's inputs and output.
auto input_to_output_weights
Quantization information.
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
void configure(const ITensor *input, ITensor *output, const Coordinates &starts, const Coordinates &ends)
Configure kernel.
void run() override
Run the kernels contained in the function.
static Status validate(const std::vector< const ITensorInfo * > &inputs_vector, const ITensorInfo *output, size_t axis)
Static function to check if given info will lead to a valid configuration of NEConcatenateLayer.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of NEDequantizationLayer.
void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs, output.
void run() override
Run the kernels contained in the function.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
auto recurrent_to_input_weights
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and convertion policy.
void run() override
Run the kernels contained in the function.
quantized, asymmetric fixed-point 8-bit number unsigned
void allocate() override
Allocate size specified by TensorInfo of CPU memory.
UniformQuantizationInfo uniform() const
Return per layer quantization info.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
auto recurrent_to_cell_weights
virtual ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info)=0
Set the quantization settings (scale and offset) of the tensor.
void prepare() override
Prepare the function for executing.
void free() override
Free allocated CPU memory.
void configure(const ITensor *input, ITensor *output)
Set the input and output tensors.
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
Memory group resources scope handling class.
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of NEPixelWiseMultiplicatio...
void run() override
Run the kernels contained in the function.
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int min=std::numeric_limits< int32_t >::lowest(), int max=std::numeric_limits< int32_t >::max())
Initialise the kernel's inputs, output.
void run() override
Run the kernels contained in the function.
void configure(std::vector< const ITensor * > inputs_vector, ITensor *output, size_t axis)
Initialise the kernel's inputs vector and output.
void configure(const ITensor *input, const ITensor *input_to_input_weights, const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights, const ITensor *recurrent_to_input_weights, const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights, const ITensor *input_gate_bias, const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias, ITensor *cell_state_in, const ITensor *output_state_in, ITensor *cell_state_out, ITensor *output_state_out)
Initialize function's tensors.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
void configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info)
[NEActivationLayer snippet]
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
QuantizationInfo qasymm(1.f/128.f, 128)
Store the tensor's metadata.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of NEGEMMLowpMatrixMultiply...
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of NEQuantizationLayer.
QuantizationInfo qsymm_4(16.f/32768.f, 0)
const TensorShape & tensor_shape() const override
Size for each dimension of the tensor.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends)
Static function to check if given info will lead to a valid configuration of NESlice.
Truncates the least significant values that are lost in operations.
auto input_to_forget_weights
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of NETranspose.
static Status validate(const ITensorInfo *input, const ITensorInfo *input_to_input_weights, const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights, const ITensorInfo *recurrent_to_input_weights, const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights, const ITensorInfo *input_gate_bias, const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias, const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in, const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out)
Static function to check if given info will lead to a valid configuration of NELSTMLayer.
void run() override
Run the kernels contained in the function.
static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min=std::numeric_limits< int32_t >::lowest(), int max=std::numeric_limits< int32_t >::max())
Static function to check if given info will lead to a valid configuration of NEGEMMLowpQuantizeDownIn...