40 const QuantizationInfo
qasymm(1.f / 128.f, 128);
41 const QuantizationInfo
qsymm_3(8.f / 32768.f, 0);
42 const QuantizationInfo
qsymm_4(16.f / 32768.f, 0);
43 const QuantizationInfo qsymm_0(1.f / 32768.f, 0);
48 : _memory_group(
std::move(memory_manager)), _gemmlowp(), _output_stage(), _transpose_weights(), _concat_input_weights(), _concat_recurrent_weights(), _concat_weights(), _concat_inputs(),
49 _concat_bias(), _sigmoid_forget_gate(), _sigmoid_input_gate(), _sigmoid_output_gate(), _tanh_modulation_gate(), _tanh_output_state(), _add1(), _add2(), _mul1(), _mul2(), _mul3(),
50 _slice_input_tensor(), _slice_forget_tensor(), _slice_cell_tensor(), _slice_output_tensor(), _dequantize(), _quantize(), _input_to_input_weights(nullptr), _input_to_forget_weights(nullptr),
51 _input_to_cell_weights(nullptr), _input_to_output_weights(nullptr), _recurrent_to_input_weights(nullptr), _recurrent_to_forget_weights(nullptr), _recurrent_to_cell_weights(nullptr),
52 _recurrent_to_output_weights(nullptr), _input_gate_bias(nullptr), _forget_gate_bias(nullptr), _cell_bias(nullptr), _output_gate_bias(nullptr), _recurrent_weights(), _input_weights(), _weights(),
53 _input(), _weights_transposed(), _output_highp(), _output_lowp(), _bias(), _forget_gate_input(), _input_gate_input(), _output_gate_input(), _input_modulation_gate_input(), _forget_gate_output(),
54 _input_gate_output(), _output_gate_output(), _input_modulation_gate_output(), _cell_state1(), _cell_state2(), _output_state_tmp(), _output_state_out_symm(), _output_state_out_f32(),
67 recurrent_to_input_weights, recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights,
68 input_gate_bias, forget_gate_bias, cell_bias, output_gate_bias, cell_state_in, output_state_in, cell_state_out, output_state_out);
71 input_to_output_weights->
info(),
72 recurrent_to_input_weights->
info(), recurrent_to_forget_weights->
info(), recurrent_to_cell_weights->
info(), recurrent_to_output_weights->
info(),
73 input_gate_bias->
info(), forget_gate_bias->
info(), cell_bias->
info(), output_gate_bias->
info(), cell_state_in->
info(), output_state_in->
info(), cell_state_out->
info(), output_state_out->
info()));
94 _cell_bias = cell_bias;
107 std::vector<const ITensor *> weights_vector{ &_recurrent_weights, &_input_weights };
110 _transpose_weights.
configure(&_weights, &_weights_transposed);
113 std::vector<const ITensor *> input_vector{
input, output_state_in };
114 _memory_group.
manage(&_input);
128 _memory_group.
manage(&_output_highp);
130 _gemmlowp.
configure(&_input, &_weights_transposed,
nullptr, &_output_highp);
141 int32_t output_multiplier = 0;
142 int32_t output_shift = 0;
145 _memory_group.
manage(&_output_lowp);
152 _output_stage.
configure(&_output_highp, &_bias, &_output_lowp, info);
159 _memory_group.
manage(&_input_gate_input);
160 _slice_input_tensor.
configure(&_output_lowp, &_input_gate_input, { 0, 0 }, {
output_size, batch_size });
161 _memory_group.
manage(&_forget_gate_input);
163 _memory_group.
manage(&_input_modulation_gate_input);
165 _memory_group.
manage(&_output_gate_input);
171 _memory_group.
manage(&_input_gate_input);
172 _slice_input_tensor.
configure(&_output_lowp, &_input_gate_input, { 0 }, { output_size });
173 _memory_group.
manage(&_forget_gate_input);
174 _slice_forget_tensor.
configure(&_output_lowp, &_forget_gate_input, { output_size }, { 2 * output_size });
175 _memory_group.
manage(&_input_modulation_gate_input);
176 _slice_cell_tensor.
configure(&_output_lowp, &_input_modulation_gate_input, { 2 * output_size }, { 3 * output_size });
177 _memory_group.
manage(&_output_gate_input);
178 _slice_output_tensor.
configure(&_output_lowp, &_output_gate_input, { 3 * output_size }, { 4 * output_size });
183 _memory_group.
manage(&_forget_gate_output);
189 _memory_group.
manage(&_input_gate_output);
195 _memory_group.
manage(&_input_modulation_gate_output);
201 _memory_group.
manage(&_output_gate_output);
207 _memory_group.
manage(&_cell_state1);
212 _memory_group.
manage(&_cell_state2);
223 _memory_group.
manage(&_output_state_tmp);
227 _memory_group.
manage(&_output_state_out_symm);
234 _memory_group.
manage(&_output_state_out_f32);
236 _dequantize.
configure(&_output_state_out_symm, &_output_state_out_f32);
239 _quantize.
configure(&_output_state_out_f32, output_state_out);
251 recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights, input_gate_bias, forget_gate_bias, cell_bias, output_gate_bias, cell_state_in,
252 output_state_in, cell_state_out, output_state_out);
255 const int batch_size = input->
dimension(1);
292 std::vector<const ITensorInfo *> inputs_weights_vector;
293 inputs_weights_vector.emplace_back(input_to_input_weights);
294 inputs_weights_vector.emplace_back(input_to_forget_weights);
295 inputs_weights_vector.emplace_back(input_to_cell_weights);
296 inputs_weights_vector.emplace_back(input_to_output_weights);
302 std::vector<const ITensorInfo *> recurrent_weights_vector;
303 recurrent_weights_vector.emplace_back(recurrent_to_input_weights);
304 recurrent_weights_vector.emplace_back(recurrent_to_forget_weights);
305 recurrent_weights_vector.emplace_back(recurrent_to_cell_weights);
306 recurrent_weights_vector.emplace_back(recurrent_to_output_weights);
311 std::vector<const ITensorInfo *> weights_vector;
312 weights_vector.emplace_back(&recurrent_weights);
313 weights_vector.emplace_back(&input_weights);
318 TensorInfo weights_transposed = weights.
clone()->set_is_resizable(
true).set_tensor_shape(weights_transposed_shape);
322 std::vector<const ITensorInfo *> input_vector;
323 input_vector.emplace_back(input);
324 input_vector.emplace_back(output_state_in);
329 std::vector<const ITensorInfo *> bias_vector;
330 bias_vector.emplace_back(input_gate_bias);
331 bias_vector.emplace_back(forget_gate_bias);
332 bias_vector.emplace_back(cell_bias);
333 bias_vector.emplace_back(output_gate_bias);
353 int32_t output_multiplier = 0;
354 int32_t output_shift = 0;
465 _concat_inputs.
run();
472 _slice_input_tensor.
run();
473 _slice_forget_tensor.
run();
474 _slice_cell_tensor.
run();
475 _slice_output_tensor.
run();
479 _sigmoid_forget_gate.
run();
482 _sigmoid_input_gate.
run();
485 _tanh_modulation_gate.
run();
488 _sigmoid_output_gate.
run();
496 _tanh_output_state.
run();
509 _concat_input_weights.
run();
517 _concat_recurrent_weights.
run();
524 _concat_weights.
run();
532 _transpose_weights.
run();
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
void run() override
Run the kernels contained in the function.
int32_t gemmlowp_multiplier
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
void run() override
Run the kernels contained in the function.
~NELSTMLayerQuantized()
Default destructor.
Quantize using a fixed point multiplication.
quantized, symmetric fixed-point 16-bit number
std::unique_ptr< ITensorInfo > clone() const override
Provide a clone of the current object of class T.
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of NEArithmeticAddition.
void init(const TensorAllocator &allocator, const Coordinates &coords, TensorInfo &sub_info)
Shares the same backing memory with another tensor allocator, while the tensor info might be differen...
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(...)
void run() override
Run the kernels contained in the function.
auto input_to_input_weights
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
QuantizationInfo qweights(1.f/16.f, 16)
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
[NEActivationLayer snippet]
void run() override
Run the kernels contained in the function.
void configure(const ITensor *input, ITensor *output)
Configure the kernel.
1 channel, 1 F32 per channel
auto recurrent_to_forget_weights
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Activation Layer Information class.
GEMMLowpOutputStageType type
GEMMLowp output stage type.
Interface for CPU tensor.
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and conversion policy.
Copyright (c) 2017-2021 Arm Limited.
auto input_to_cell_weights
QuantizationInfo qsymm_3(8.f/32768.f, 0)
ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info) override
Set the quantization settings (scale and offset) of the tensor.
TensorAllocator * allocator()
Return a pointer to the tensor's allocator.
ITensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
auto recurrent_to_output_weights
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
void mark_as_unused() const
Marks a tensor as unused.
NELSTMLayerQuantized(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
1 channel, 1 S32 per channel
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
void configure(const ITensor *input, ITensor *output)
Initialise the kernel's inputs and output.
auto input_to_output_weights
Quantization information.
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
void configure(const ITensor *input, ITensor *output, const Coordinates &starts, const Coordinates &ends)
Configure kernel.
void run() override
Run the kernels contained in the function.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of NEDequantizationLayer.
void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs, output.
void run() override
Run the kernels contained in the function.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
auto recurrent_to_input_weights
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and convertion policy.
void run() override
Run the kernels contained in the function.
quantized, asymmetric fixed-point 8-bit number unsigned
void allocate() override
Allocate size specified by TensorInfo of CPU memory.
UniformQuantizationInfo uniform() const
Return per layer quantization info.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
GEMMLowp output stage info.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
auto recurrent_to_cell_weights
virtual ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info)=0
Set the quantization settings (scale and offset) of the tensor.
void prepare() override
Prepare the function for executing.
void free() override
Free allocated CPU memory.
void configure(const ITensor *input, ITensor *output)
Set the input and output tensors.
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
int32_t gemmlowp_shift
GEMMLowp output stage shift used for quantizing to uint8.
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
void configure(const ITensor *input, const ITensor *bias, ITensor *output, const GEMMLowpOutputStageInfo &info)
Initialise the kernel's inputs, output.
Memory group resources scope handling class.
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of NEPixelWiseMultiplicatio...
void run() override
Run the kernels contained in the function.
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo &info)
Static function to check if given info will lead to a valid configuration of NEGEMMLowpOutputStage.
void run() override
Run the kernels contained in the function.
void configure(const ITensor *input, const ITensor *input_to_input_weights, const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights, const ITensor *recurrent_to_input_weights, const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights, const ITensor *input_gate_bias, const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias, ITensor *cell_state_in, const ITensor *output_state_in, ITensor *cell_state_out, ITensor *output_state_out)
Initialize function's tensors.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
void configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info)
[NEActivationLayer snippet]
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
QuantizationInfo qasymm(1.f/128.f, 128)
Store the tensor's metadata.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of NEGEMMLowpMatrixMultiply...
static Status validate(const std::vector< const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis)
Static function to check if given info will lead to a valid configuration of NEConcatenateLayer.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of NEQuantizationLayer.
QuantizationInfo qsymm_4(16.f/32768.f, 0)
void configure(std::vector< const ITensor *> inputs_vector, ITensor *output, size_t axis)
Initialise the kernel's inputs vector and output.
const TensorShape & tensor_shape() const override
Size for each dimension of the tensor.
DataType output_data_type
Output tensor data type to use if the output is not initialized.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends)
Static function to check if given info will lead to a valid configuration of NESlice.
Truncates the least significant values that are lost in operations.
auto input_to_forget_weights
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of NETranspose.
static Status validate(const ITensorInfo *input, const ITensorInfo *input_to_input_weights, const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights, const ITensorInfo *recurrent_to_input_weights, const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights, const ITensorInfo *input_gate_bias, const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias, const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in, const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out)
Static function to check if given info will lead to a valid configuration of NELSTMLayer.
void run() override
Run the kernels contained in the function.