41 : _memory_group(
std::move(memory_manager)), _fully_connected_input_gate(), _accum_input_gate1(), _subtract_input_gate(), _pixelwise_mul_input_gate(), _activation_input_gate(),
42 _fully_connected_forget_gate(), _accum_forget_gate1(), _pixelwise_mul_forget_gate(), _activation_forget_gate(), _fully_connected_cell_state(), _gemm_cell_state1(), _transpose_cell_state(),
43 _accum_cell_state1(), _accum_cell_state2(), _pixelwise_mul_cell_state1(), _activation_cell_state(), _cell_clip(), _pixelwise_mul_cell_state2(), _fully_connected_output(),
44 _pixelwise_mul_output_state1(), _accum_output1(), _activation_output(), _activation_output_state(), _pixelwise_mul_output_state2(), _fully_connected_output_state(), _projection_clip(),
45 _copy_cell_state(), _copy_output(), _concat_scratch_buffer(), _concat_inputs_forget_gate(), _concat_weights_forget_gate(), _concat_weights_input_gate(), _concat_weights_output(),
46 _mean_std_norm_input_gate(), _pixelwise_mul_input_gate_coeff(), _accum_input_gate_bias(), _mean_std_norm_forget_gate(), _pixelwise_mul_forget_gate_coeff(), _accum_forget_gate_bias(),
47 _mean_std_norm_cell_gate(), _pixelwise_mul_cell_gate_coeff(), _accum_cell_gate_bias(), _mean_std_norm_output_gate(), _pixelwise_mul_output_gate_coeff(), _accum_output_gate_bias(), _input_gate_out1(),
48 _input_gate_out2(), _input_gate_out3(), _input_gate_out4(), _forget_gate_out1(), _forget_gate_out2(), _forget_gate_out3(), _forget_gate_out4(), _forget_gate_out5(), _forget_gate_out6(),
49 _cell_state_out1(), _cell_state_out2(), _cell_state_out3(), _cell_state_out4(), _cell_state_out5(), _output1(), _output2(), _output3(), _output4(), _cell_state_activation(), _output_state1(), _ones(),
50 _input_layer_norm_out1(), _input_layer_norm_out2(), _forget_layer_norm_out1(), _forget_layer_norm_out2(), _cell_layer_norm_out1(), _cell_layer_norm_out2(), _output_layer_norm_out1(),
51 _output_layer_norm_out2(), _run_peephole_opt(false), _run_cifg_opt(false), _perform_cell_clipping(false), _has_projection_weights(false), _perform_projection_clipping(false), _is_prepared(false),
52 _is_layer_norm_lstm(false)
65 input_to_forget_weights, input_to_cell_weights, input_to_output_weights,
66 recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights,
67 forget_gate_bias, cell_bias, output_gate_bias,
68 output_state_in, cell_state_in,
69 scratch_buffer, output_state_out, cell_state_out, output);
79 input_to_cell_weights->
info(), input_to_output_weights->
info(),
80 recurrent_to_forget_weights->
info(), recurrent_to_cell_weights->
info(), recurrent_to_output_weights->
info(),
81 forget_gate_bias->
info(), cell_bias->
info(), output_gate_bias->
info(),
82 output_state_in->
info(), cell_state_in->
info(),
83 scratch_buffer->
info(), output_state_out->
info(), cell_state_out->
info(), output->
info(),
84 lstm_params_info, activation_info, cell_threshold, projection_threshold));
96 std::vector<const ITensor *> inputs_vector;
97 inputs_vector.emplace_back(input);
98 inputs_vector.emplace_back(output_state_in);
100 _memory_group.
manage(&_forget_gate_out2);
103 std::vector<const ITensor *> weights_vector;
105 weights_vector.emplace_back(input_to_forget_weights);
106 weights_vector.emplace_back(recurrent_to_forget_weights);
110 _memory_group.
manage(&_forget_gate_out5);
111 _fully_connected_forget_gate.
configure(&_forget_gate_out2, &_forget_gate_out6, (_is_layer_norm_lstm) ?
nullptr : forget_gate_bias, &_forget_gate_out5);
112 _memory_group.
manage(&_forget_gate_out1);
113 _memory_group.
manage(&_forget_gate_out3);
116 Tensor *forget_gate_out = &_forget_gate_out5;
121 _run_peephole_opt =
true;
122 _memory_group.
manage(&_forget_gate_out4);
124 _accum_forget_gate1.
configure(&_forget_gate_out5, &_forget_gate_out4, &_forget_gate_out3, ConvertPolicy::SATURATE);
127 forget_gate_out = &_forget_gate_out3;
133 if(_is_layer_norm_lstm)
137 _memory_group.
manage(&_forget_layer_norm_out1);
138 _memory_group.
manage(&_forget_layer_norm_out2);
139 _mean_std_norm_forget_gate.
configure(forget_gate_out);
143 _accum_forget_gate_bias.
configure(&_forget_layer_norm_out1, forget_gate_bias, &_forget_layer_norm_out2, ConvertPolicy::SATURATE);
145 forget_gate_out = &_forget_layer_norm_out2;
155 Tensor *input_gate_out = &_input_gate_out1;
158 _memory_group.
manage(&_input_gate_out1);
162 _run_cifg_opt =
true;
169 std::vector<const ITensor *> lstm_weights;
175 _memory_group.
manage(&_input_gate_out1);
176 _memory_group.
manage(&_input_gate_out4);
178 _fully_connected_input_gate.
configure(&_forget_gate_out2, &_input_gate_out2, (_is_layer_norm_lstm) ?
nullptr : lstm_params.
input_gate_bias(), &_input_gate_out3);
180 input_gate_out = &_input_gate_out3;
182 if(_run_peephole_opt)
184 _memory_group.
manage(&_input_gate_out4);
186 _accum_input_gate1.
configure(&_input_gate_out3, &_input_gate_out4, &_input_gate_out1, ConvertPolicy::SATURATE);
189 input_gate_out = &_input_gate_out1;
196 if(_is_layer_norm_lstm)
200 _memory_group.
manage(&_input_layer_norm_out1);
201 _memory_group.
manage(&_input_layer_norm_out2);
202 _mean_std_norm_input_gate.
configure(input_gate_out);
208 input_gate_out = &_input_layer_norm_out2;
222 _memory_group.
manage(&_cell_state_out1);
223 _fully_connected_cell_state.
configure(input, input_to_cell_weights, (_is_layer_norm_lstm) ?
nullptr : cell_bias, &_cell_state_out1);
224 _memory_group.
manage(&_cell_state_out2);
225 _transpose_cell_state.
configure(recurrent_to_cell_weights, &_cell_state_out2);
226 _memory_group.
manage(&_cell_state_out3);
227 _gemm_cell_state1.
configure(output_state_in, &_cell_state_out2,
nullptr, &_cell_state_out3, 1.f, 0.f);
229 _memory_group.
manage(&_cell_state_out4);
231 Tensor *cell_state_out_ptr = &_cell_state_out4;
232 if(_is_layer_norm_lstm)
236 _memory_group.
manage(&_cell_layer_norm_out1);
237 _memory_group.
manage(&_cell_layer_norm_out2);
238 _mean_std_norm_cell_gate.
configure(cell_state_out_ptr);
242 _accum_cell_gate_bias.
configure(&_cell_layer_norm_out1, cell_bias, &_cell_layer_norm_out2, ConvertPolicy::SATURATE);
244 cell_state_out_ptr = &_cell_layer_norm_out2;
246 _activation_cell_state.
configure(cell_state_out_ptr,
nullptr, activation_info);
247 _memory_group.
manage(&_cell_state_out5);
255 if(cell_threshold != 0.f)
257 _perform_cell_clipping =
true;
268 std::vector<const ITensor *> in_out_weights;
269 in_out_weights.emplace_back(input_to_output_weights);
270 in_out_weights.emplace_back(recurrent_to_output_weights);
273 _memory_group.
manage(&_output1);
274 _memory_group.
manage(&_output4);
276 _fully_connected_output.
configure(&_forget_gate_out2, &_output2, (_is_layer_norm_lstm) ?
nullptr : output_gate_bias, &_output4);
281 Tensor *output_gate_out = &_output4;
286 _memory_group.
manage(&_output3);
288 _accum_output1.
configure(&_output4, &_output3, &_output1, ConvertPolicy::SATURATE);
290 output_gate_out = &_output1;
299 if(_is_layer_norm_lstm)
303 _memory_group.
manage(&_output_layer_norm_out1);
304 _memory_group.
manage(&_output_layer_norm_out2);
305 _mean_std_norm_output_gate.
configure(output_gate_out);
309 _accum_output_gate_bias.
configure(&_output_layer_norm_out1, output_gate_bias, &_output_layer_norm_out2, ConvertPolicy::SATURATE);
311 output_gate_out = &_output_layer_norm_out2;
328 _memory_group.
manage(&_cell_state_activation);
329 _activation_output_state.
configure(&_cell_state_out1, &_cell_state_activation, activation_info);
336 _has_projection_weights =
true;
340 if(projection_threshold != 0.f)
342 _perform_projection_clipping =
true;
348 _copy_cell_state.
configure(&_cell_state_out1, cell_state_out);
349 _copy_output.
configure(output_state_out, output);
352 std::vector<const ITensor *> scratch_inputs;
355 scratch_inputs.emplace_back(input_gate_out);
357 scratch_inputs.emplace_back(&_cell_state_out1);
358 scratch_inputs.emplace_back(forget_gate_out);
359 scratch_inputs.emplace_back(output_gate_out);
376 input_to_forget_weights, input_to_cell_weights, input_to_output_weights,
377 recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights,
378 forget_gate_bias, cell_bias, output_gate_bias,
379 output_state_in, cell_state_in,
380 scratch_buffer, output_state_out, cell_state_out, output);
385 input_to_forget_weights, input_to_cell_weights, input_to_output_weights,
386 recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights,
387 forget_gate_bias, cell_bias, output_gate_bias,
388 output_state_in, cell_state_in,
389 scratch_buffer, output_state_out, cell_state_out, output);
411 const unsigned int num_batches = input->
dimension(1);
412 const unsigned int num_cells = input_to_output_weights->
dimension(1);
457 std::vector<const ITensorInfo *> inputs_vector;
458 inputs_vector.emplace_back(input);
459 inputs_vector.emplace_back(output_state_in);
491 std::vector<const ITensorInfo *> lstm_weights;
535 if(cell_threshold != 0.f)
542 std::vector<const ITensorInfo *> in_out_weights;
543 in_out_weights.emplace_back(input_to_output_weights);
544 in_out_weights.emplace_back(recurrent_to_output_weights);
572 if(projection_threshold != 0.f)
584 std::vector<const ITensorInfo *> inputs_vector_info_raw;
587 inputs_vector_info_raw.push_back(&input_gate);
589 inputs_vector_info_raw.push_back(&cell_state_tmp);
590 inputs_vector_info_raw.push_back(&forget_gate);
591 inputs_vector_info_raw.push_back(&output_gate_tmp);
603 _concat_inputs_forget_gate.
run();
604 _fully_connected_forget_gate.
run();
606 if(_run_peephole_opt)
608 _pixelwise_mul_forget_gate.
run();
609 _accum_forget_gate1.
run();
611 if(_is_layer_norm_lstm)
613 _mean_std_norm_forget_gate.
run();
614 _pixelwise_mul_forget_gate_coeff.
run();
615 _accum_forget_gate_bias.
run();
617 _activation_forget_gate.
run();
629 _subtract_input_gate.
run();
633 _fully_connected_input_gate.
run();
635 if(_run_peephole_opt)
637 _pixelwise_mul_input_gate.
run();
638 _accum_input_gate1.
run();
641 if(_is_layer_norm_lstm)
643 _mean_std_norm_input_gate.
run();
644 _pixelwise_mul_input_gate_coeff.
run();
645 _accum_input_gate_bias.
run();
647 _activation_input_gate.
run();
650 _fully_connected_cell_state.
run();
651 _transpose_cell_state.
run();
652 _gemm_cell_state1.
run();
653 _accum_cell_state1.
run();
654 if(_is_layer_norm_lstm)
656 _mean_std_norm_cell_gate.
run();
657 _pixelwise_mul_cell_gate_coeff.
run();
658 _accum_cell_gate_bias.
run();
660 _activation_cell_state.
run();
661 _pixelwise_mul_cell_state1.
run();
662 _pixelwise_mul_cell_state2.
run();
663 _accum_cell_state2.
run();
665 if(_perform_cell_clipping)
670 _fully_connected_output.
run();
671 if(_run_peephole_opt)
673 _pixelwise_mul_output_state1.
run();
674 _accum_output1.
run();
676 if(_is_layer_norm_lstm)
678 _mean_std_norm_output_gate.
run();
679 _pixelwise_mul_output_gate_coeff.
run();
680 _accum_output_gate_bias.
run();
682 _activation_output.
run();
684 _activation_output_state.
run();
685 _pixelwise_mul_output_state2.
run();
687 if(_has_projection_weights)
689 _fully_connected_output_state.
run();
690 if(_perform_projection_clipping)
692 _projection_clip.
run();
696 _copy_cell_state.
run();
699 _concat_scratch_buffer.
run();
706 _concat_weights_forget_gate.
run();
709 _concat_weights_input_gate.
run();
711 _concat_weights_output.
run();
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
const T * projection_weights() const
void run() override
Run the kernels contained in the function.
const T * input_to_input_weights() const
void run() override
Run the kernels contained in the function.
void run() override final
Run the kernels contained in the function.
bool use_layer_norm() const
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of NEArithmeticAddition.
void init(const TensorAllocator &allocator, const Coordinates &coords, TensorInfo &sub_info)
Shares the same backing memory with another tensor allocator, while the tensor info might be differen...
static Status validate(const ITensorInfo *input, const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights, const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights, const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias, const ITensorInfo *output_state_in, const ITensorInfo *cell_state_in, const ITensorInfo *scratch_buffer, const ITensorInfo *output_state_out, const ITensorInfo *cell_state_out, const ITensorInfo *output, const LSTMParams< ITensorInfo > &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold=0.f, float projection_threshold=0.f)
Static function to check if given info will lead to a valid configuration of NELSTMLayer.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
bool has_peephole_opt() const
T * forget_layer_norm_weights() const
void build_lstm_params_tensor_info(const LSTMParams< T > &lstm_params, LSTMParams< ITensorInfo > *lstm_params_info)
Build LSTMParams<ITensorInfo> object by extracting the metadata from each tensor. ...
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
[NEActivationLayer snippet]
void run() override
Run the kernels contained in the function.
1 channel, 1 F32 per channel
bool has_cifg_opt() const
void configure(const ITensor *input, const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights, const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights, const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias, const ITensor *output_state_in, const ITensor *cell_state_in, ITensor *scratch_buffer, ITensor *output_state_out, ITensor *cell_state_out, ITensor *output, const LSTMParams< ITensor > &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold=0.f, float projection_threshold=0.f)
Initialize function's tensors.
auto recurrent_to_forget_weights
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
T * cell_to_input_weights() const
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Activation Layer Information class.
Interface for CPU tensor.
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and conversion policy.
Copyright (c) 2017-2021 Arm Limited.
1 channel, 1 F16 per channel
auto input_to_cell_weights
TensorAllocator * allocator()
Return a pointer to the tensor's allocator.
ITensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
TensorShape compute_transposed_shape(const ITensorInfo &input)
Calculate the transposed shape of a tensor.
auto recurrent_to_output_weights
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
void configure(ITensor *input, ITensor *output=nullptr, float epsilon=1e-8f)
Initialise the function's input and outputs.
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
void configure(const ITensor *input, ITensor *output)
Initialise the kernel's inputs and output.
const T * recurrent_to_input_weights() const
void run() override
Run the kernels contained in the function.
auto input_to_output_weights
const T * projection_bias() const
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of NEGEMM.
void run() override
Run the kernels contained in the function.
T * output_layer_norm_weights() const
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
void run() override
Run the kernels contained in the function.
void run() override
Run the kernels contained in the function.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and convertion policy.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of NECopy.
void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, FullyConnectedLayerInfo fc_info=FullyConnectedLayerInfo())
Set the input and output tensors.
void allocate() override
Allocate size specified by TensorInfo of CPU memory.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
Basic implementation of the tensor interface.
virtual size_t element_size() const =0
Element size in bytes calculated as data_size() * num_channels()
auto recurrent_to_cell_weights
NELSTMLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
void prepare() override
Prepare the function for executing.
T * cell_to_forget_weights() const
Lower and Upper Bounded Rectifier ( )
bool has_projection() const
T * cell_to_output_weights() const
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and conversion policy.
Memory group resources scope handling class.
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of NEPixelWiseMultiplicatio...
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
T * input_layer_norm_weights() const
void run() override
Run the kernels contained in the function.
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, FullyConnectedLayerInfo fc_info=FullyConnectedLayerInfo())
Static function to check if given info will lead to a valid configuration of NEFullyConnectedLayer.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
void run() override
Run the kernels contained in the function.
void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs, output.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
void configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info)
[NEActivationLayer snippet]
const T * input_gate_bias() const
uint8_t * buffer() const override
Interface to be implemented by the child class to return a pointer to CPU memory. ...
~NELSTMLayer()
Default destructor.
static Status validate(const ITensorInfo *input, const ITensorInfo *output=nullptr, float epsilon=1e-8f)
Static function to check if given info will lead to a valid configuration of NEMeanStdDevNormalizatio...
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Store the tensor's metadata.
T * cell_layer_norm_weights() const
void configure(ITensor *input, ITensor *output)
Initialise the function's source and destination.
static Status validate(const std::vector< const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis)
Static function to check if given info will lead to a valid configuration of NEConcatenateLayer.
void configure(std::vector< const ITensor *> inputs_vector, ITensor *output, size_t axis)
Initialise the kernel's inputs vector and output.
TensorShape calculate_concatenate_shape(const std::vector< T *> &input, size_t axis)
Calculate the concatenate output shape of the concatenate operation along a single axis...
Truncates the least significant values that are lost in operations.
auto input_to_forget_weights
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of NEArithmeticSubtraction...