51 : _memory_group(
std::move(memory_manager)), _fully_connected_input_gate(), _accum_input_gate1(), _subtract_input_gate(), _pixelwise_mul_input_gate(), _activation_input_gate(),
52 _fully_connected_forget_gate(), _accum_forget_gate1(), _pixelwise_mul_forget_gate(), _activation_forget_gate(), _fully_connected_cell_state(), _gemm_cell_state1(), _transpose_cell_state(),
53 _accum_cell_state1(), _accum_cell_state2(), _pixelwise_mul_cell_state1(), _activation_cell_state(), _cell_clip(), _pixelwise_mul_cell_state2(), _fully_connected_output(),
54 _pixelwise_mul_output_state1(), _accum_output1(), _activation_output(), _activation_output_state(), _pixelwise_mul_output_state2(), _fully_connected_output_state(), _projection_clip(),
55 _copy_cell_state(), _copy_output(), _concat_scratch_buffer(), _concat_inputs_forget_gate(), _concat_weights_forget_gate(), _concat_weights_input_gate(), _concat_weights_output(),
56 _mean_std_norm_input_gate(), _pixelwise_mul_input_gate_coeff(), _accum_input_gate_bias(), _mean_std_norm_forget_gate(), _pixelwise_mul_forget_gate_coeff(), _accum_forget_gate_bias(),
57 _mean_std_norm_cell_gate(), _pixelwise_mul_cell_gate_coeff(), _accum_cell_gate_bias(), _mean_std_norm_output_gate(), _pixelwise_mul_output_gate_coeff(), _accum_output_gate_bias(), _input_gate_out1(),
58 _input_gate_out2(), _input_gate_out3(), _input_gate_out4(), _forget_gate_out1(), _forget_gate_out2(), _forget_gate_out3(), _forget_gate_out4(), _forget_gate_out5(), _forget_gate_out6(),
59 _cell_state_out1(), _cell_state_out2(), _cell_state_out3(), _cell_state_out4(), _cell_state_out5(), _output1(), _output2(), _output3(), _output4(), _cell_state_activation(), _output_state1(), _ones(),
60 _input_layer_norm_out1(), _input_layer_norm_out2(), _forget_layer_norm_out1(), _forget_layer_norm_out2(), _cell_layer_norm_out1(), _cell_layer_norm_out2(), _output_layer_norm_out1(),
61 _output_layer_norm_out2(), _run_peephole_opt(false), _run_cifg_opt(false), _perform_cell_clipping(false), _has_projection_weights(false), _perform_projection_clipping(false), _is_prepared(false),
62 _is_layer_norm_lstm(false)
75 input_to_forget_weights, input_to_cell_weights, input_to_output_weights,
76 recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights,
77 forget_gate_bias, cell_bias, output_gate_bias,
78 output_state_in, cell_state_in,
79 scratch_buffer, output_state_out, cell_state_out, output);
89 input_to_cell_weights->
info(), input_to_output_weights->
info(),
90 recurrent_to_forget_weights->
info(), recurrent_to_cell_weights->
info(), recurrent_to_output_weights->
info(),
91 forget_gate_bias->
info(), cell_bias->
info(), output_gate_bias->
info(),
92 output_state_in->
info(), cell_state_in->
info(),
93 scratch_buffer->
info(), output_state_out->
info(), cell_state_out->
info(), output->
info(),
94 lstm_params_info, activation_info, cell_threshold, projection_threshold));
106 std::vector<const ITensor *> inputs_vector;
107 inputs_vector.emplace_back(input);
108 inputs_vector.emplace_back(output_state_in);
110 _memory_group.
manage(&_forget_gate_out2);
113 std::vector<const ITensor *> weights_vector;
115 weights_vector.emplace_back(input_to_forget_weights);
116 weights_vector.emplace_back(recurrent_to_forget_weights);
120 _memory_group.
manage(&_forget_gate_out5);
121 _fully_connected_forget_gate.
configure(&_forget_gate_out2, &_forget_gate_out6, (_is_layer_norm_lstm) ?
nullptr : forget_gate_bias, &_forget_gate_out5);
122 _memory_group.
manage(&_forget_gate_out1);
123 _memory_group.
manage(&_forget_gate_out3);
126 Tensor *forget_gate_out = &_forget_gate_out5;
131 _run_peephole_opt =
true;
132 _memory_group.
manage(&_forget_gate_out4);
134 _accum_forget_gate1.
configure(&_forget_gate_out5, &_forget_gate_out4, &_forget_gate_out3, ConvertPolicy::SATURATE);
137 forget_gate_out = &_forget_gate_out3;
143 if(_is_layer_norm_lstm)
147 _memory_group.
manage(&_forget_layer_norm_out1);
148 _memory_group.
manage(&_forget_layer_norm_out2);
149 _mean_std_norm_forget_gate.
configure(forget_gate_out);
153 _accum_forget_gate_bias.
configure(&_forget_layer_norm_out1, forget_gate_bias, &_forget_layer_norm_out2, ConvertPolicy::SATURATE);
155 forget_gate_out = &_forget_layer_norm_out2;
165 Tensor *input_gate_out = &_input_gate_out1;
168 _memory_group.
manage(&_input_gate_out1);
172 _run_cifg_opt =
true;
179 std::vector<const ITensor *> lstm_weights;
185 _memory_group.
manage(&_input_gate_out1);
186 _memory_group.
manage(&_input_gate_out4);
188 _fully_connected_input_gate.
configure(&_forget_gate_out2, &_input_gate_out2, (_is_layer_norm_lstm) ?
nullptr : lstm_params.
input_gate_bias(), &_input_gate_out3);
190 input_gate_out = &_input_gate_out3;
192 if(_run_peephole_opt)
194 _memory_group.
manage(&_input_gate_out4);
196 _accum_input_gate1.
configure(&_input_gate_out3, &_input_gate_out4, &_input_gate_out1, ConvertPolicy::SATURATE);
199 input_gate_out = &_input_gate_out1;
206 if(_is_layer_norm_lstm)
210 _memory_group.
manage(&_input_layer_norm_out1);
211 _memory_group.
manage(&_input_layer_norm_out2);
212 _mean_std_norm_input_gate.
configure(input_gate_out);
218 input_gate_out = &_input_layer_norm_out2;
232 _memory_group.
manage(&_cell_state_out1);
233 _fully_connected_cell_state.
configure(input, input_to_cell_weights, (_is_layer_norm_lstm) ?
nullptr : cell_bias, &_cell_state_out1);
234 _memory_group.
manage(&_cell_state_out2);
235 _transpose_cell_state.
configure(recurrent_to_cell_weights, &_cell_state_out2);
236 _memory_group.
manage(&_cell_state_out3);
237 _gemm_cell_state1.
configure(output_state_in, &_cell_state_out2,
nullptr, &_cell_state_out3, 1.f, 0.f);
239 _memory_group.
manage(&_cell_state_out4);
241 Tensor *cell_state_out_ptr = &_cell_state_out4;
242 if(_is_layer_norm_lstm)
246 _memory_group.
manage(&_cell_layer_norm_out1);
247 _memory_group.
manage(&_cell_layer_norm_out2);
248 _mean_std_norm_cell_gate.
configure(cell_state_out_ptr);
252 _accum_cell_gate_bias.
configure(&_cell_layer_norm_out1, cell_bias, &_cell_layer_norm_out2, ConvertPolicy::SATURATE);
254 cell_state_out_ptr = &_cell_layer_norm_out2;
256 _activation_cell_state.
configure(cell_state_out_ptr,
nullptr, activation_info);
257 _memory_group.
manage(&_cell_state_out5);
265 if(cell_threshold != 0.f)
267 _perform_cell_clipping =
true;
278 std::vector<const ITensor *> in_out_weights;
279 in_out_weights.emplace_back(input_to_output_weights);
280 in_out_weights.emplace_back(recurrent_to_output_weights);
283 _memory_group.
manage(&_output1);
284 _memory_group.
manage(&_output4);
286 _fully_connected_output.
configure(&_forget_gate_out2, &_output2, (_is_layer_norm_lstm) ?
nullptr : output_gate_bias, &_output4);
291 Tensor *output_gate_out = &_output4;
296 _memory_group.
manage(&_output3);
298 _accum_output1.
configure(&_output4, &_output3, &_output1, ConvertPolicy::SATURATE);
300 output_gate_out = &_output1;
309 if(_is_layer_norm_lstm)
313 _memory_group.
manage(&_output_layer_norm_out1);
314 _memory_group.
manage(&_output_layer_norm_out2);
315 _mean_std_norm_output_gate.
configure(output_gate_out);
319 _accum_output_gate_bias.
configure(&_output_layer_norm_out1, output_gate_bias, &_output_layer_norm_out2, ConvertPolicy::SATURATE);
321 output_gate_out = &_output_layer_norm_out2;
338 _memory_group.
manage(&_cell_state_activation);
339 _activation_output_state.
configure(&_cell_state_out1, &_cell_state_activation, activation_info);
346 _has_projection_weights =
true;
350 if(projection_threshold != 0.f)
352 _perform_projection_clipping =
true;
358 _copy_cell_state.
configure(&_cell_state_out1, cell_state_out);
359 _copy_output.
configure(output_state_out, output);
362 std::vector<const ITensor *> scratch_inputs;
365 scratch_inputs.emplace_back(input_gate_out);
367 scratch_inputs.emplace_back(&_cell_state_out1);
368 scratch_inputs.emplace_back(forget_gate_out);
369 scratch_inputs.emplace_back(output_gate_out);
386 input_to_forget_weights, input_to_cell_weights, input_to_output_weights,
387 recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights,
388 forget_gate_bias, cell_bias, output_gate_bias,
389 output_state_in, cell_state_in,
390 scratch_buffer, output_state_out, cell_state_out, output);
395 input_to_forget_weights, input_to_cell_weights, input_to_output_weights,
396 recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights,
397 forget_gate_bias, cell_bias, output_gate_bias,
398 output_state_in, cell_state_in,
399 scratch_buffer, output_state_out, cell_state_out, output);
421 const unsigned int num_batches = input->
dimension(1);
422 const unsigned int num_cells = input_to_output_weights->
dimension(1);
467 std::vector<const ITensorInfo *> inputs_vector;
468 inputs_vector.emplace_back(input);
469 inputs_vector.emplace_back(output_state_in);
501 std::vector<const ITensorInfo *> lstm_weights;
545 if(cell_threshold != 0.f)
552 std::vector<const ITensorInfo *> in_out_weights;
553 in_out_weights.emplace_back(input_to_output_weights);
554 in_out_weights.emplace_back(recurrent_to_output_weights);
582 if(projection_threshold != 0.f)
594 std::vector<const ITensorInfo *> inputs_vector_info_raw;
597 inputs_vector_info_raw.push_back(&input_gate);
599 inputs_vector_info_raw.push_back(&cell_state_tmp);
600 inputs_vector_info_raw.push_back(&forget_gate);
601 inputs_vector_info_raw.push_back(&output_gate_tmp);
613 _concat_inputs_forget_gate.
run();
614 _fully_connected_forget_gate.
run();
616 if(_run_peephole_opt)
618 _pixelwise_mul_forget_gate.
run();
619 _accum_forget_gate1.
run();
621 if(_is_layer_norm_lstm)
623 _mean_std_norm_forget_gate.
run();
624 _pixelwise_mul_forget_gate_coeff.
run();
625 _accum_forget_gate_bias.
run();
627 _activation_forget_gate.
run();
639 _subtract_input_gate.
run();
643 _fully_connected_input_gate.
run();
645 if(_run_peephole_opt)
647 _pixelwise_mul_input_gate.
run();
648 _accum_input_gate1.
run();
651 if(_is_layer_norm_lstm)
653 _mean_std_norm_input_gate.
run();
654 _pixelwise_mul_input_gate_coeff.
run();
655 _accum_input_gate_bias.
run();
657 _activation_input_gate.
run();
660 _fully_connected_cell_state.
run();
661 _transpose_cell_state.
run();
662 _gemm_cell_state1.
run();
663 _accum_cell_state1.
run();
664 if(_is_layer_norm_lstm)
666 _mean_std_norm_cell_gate.
run();
667 _pixelwise_mul_cell_gate_coeff.
run();
668 _accum_cell_gate_bias.
run();
670 _activation_cell_state.
run();
671 _pixelwise_mul_cell_state1.
run();
672 _pixelwise_mul_cell_state2.
run();
673 _accum_cell_state2.
run();
675 if(_perform_cell_clipping)
680 _fully_connected_output.
run();
681 if(_run_peephole_opt)
683 _pixelwise_mul_output_state1.
run();
684 _accum_output1.
run();
686 if(_is_layer_norm_lstm)
688 _mean_std_norm_output_gate.
run();
689 _pixelwise_mul_output_gate_coeff.
run();
690 _accum_output_gate_bias.
run();
692 _activation_output.
run();
694 _activation_output_state.
run();
695 _pixelwise_mul_output_state2.
run();
697 if(_has_projection_weights)
699 _fully_connected_output_state.
run();
700 if(_perform_projection_clipping)
702 _projection_clip.
run();
706 _copy_cell_state.
run();
709 _concat_scratch_buffer.
run();
716 _concat_weights_forget_gate.
run();
719 _concat_weights_input_gate.
run();
721 _concat_weights_output.
run();
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
const T * projection_weights() const
void run() override
Run the kernels contained in the function.
const T * input_to_input_weights() const
void run() override
Run the kernels contained in the function.
void run() override final
Run the kernels contained in the function.
bool use_layer_norm() const
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of NEArithmeticAddition.
void init(const TensorAllocator &allocator, const Coordinates &coords, TensorInfo &sub_info)
Shares the same backing memory with another tensor allocator, while the tensor info might be differen...
static Status validate(const ITensorInfo *input, const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights, const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights, const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias, const ITensorInfo *output_state_in, const ITensorInfo *cell_state_in, const ITensorInfo *scratch_buffer, const ITensorInfo *output_state_out, const ITensorInfo *cell_state_out, const ITensorInfo *output, const LSTMParams< ITensorInfo > &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold=0.f, float projection_threshold=0.f)
Static function to check if given info will lead to a valid configuration of NELSTMLayer.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
bool has_peephole_opt() const
T * forget_layer_norm_weights() const
void build_lstm_params_tensor_info(const LSTMParams< T > &lstm_params, LSTMParams< ITensorInfo > *lstm_params_info)
Build LSTMParams<ITensorInfo> object by extracting the metadata from each tensor. ...
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
[NEActivationLayer snippet]
void run() override
Run the kernels contained in the function.
1 channel, 1 F32 per channel
bool has_cifg_opt() const
void configure(const ITensor *input, const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights, const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights, const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias, const ITensor *output_state_in, const ITensor *cell_state_in, ITensor *scratch_buffer, ITensor *output_state_out, ITensor *cell_state_out, ITensor *output, const LSTMParams< ITensor > &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold=0.f, float projection_threshold=0.f)
Initialize function's tensors.
auto recurrent_to_forget_weights
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
T * cell_to_input_weights() const
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Activation Layer Information class.
Interface for Neon tensor.
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and conversion policy.
Copyright (c) 2017-2021 Arm Limited.
1 channel, 1 F16 per channel
auto input_to_cell_weights
TensorAllocator * allocator()
Return a pointer to the tensor's allocator.
ITensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
TensorShape compute_transposed_shape(const ITensorInfo &input)
Calculate the transposed shape of a tensor.
auto recurrent_to_output_weights
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
void configure(ITensor *input, ITensor *output=nullptr, float epsilon=1e-8f)
Initialise the function's input and outputs.
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
void configure(const ITensor *input, ITensor *output)
Initialise the kernel's inputs and output.
const T * recurrent_to_input_weights() const
void run() override
Run the kernels contained in the function.
auto input_to_output_weights
const T * projection_bias() const
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of NEGEMM.
void run() override
Run the kernels contained in the function.
T * output_layer_norm_weights() const
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
void run() override
Run the kernels contained in the function.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and convertion policy.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of NECopy.
void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, FullyConnectedLayerInfo fc_info=FullyConnectedLayerInfo())
Set the input and output tensors.
void allocate() override
Allocate size specified by TensorInfo of CPU memory.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
Basic implementation of the tensor interface.
virtual size_t element_size() const =0
Element size in bytes calculated as data_size() * num_channels()
auto recurrent_to_cell_weights
NELSTMLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
void prepare() override
Prepare the function for executing.
T * cell_to_forget_weights() const
Lower and Upper Bounded Rectifier ( )
bool has_projection() const
T * cell_to_output_weights() const
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and conversion policy.
Memory group resources scope handling class.
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of NEPixelWiseMultiplicatio...
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
T * input_layer_norm_weights() const
void run() override
Run the kernels contained in the function.
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, FullyConnectedLayerInfo fc_info=FullyConnectedLayerInfo())
Static function to check if given info will lead to a valid configuration of NEFullyConnectedLayer.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
void run() override
Run the kernels contained in the function.
void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs, output.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
void configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info)
[NEActivationLayer snippet]
const T * input_gate_bias() const
uint8_t * buffer() const override
Interface to be implemented by the child class to return a pointer to CPU memory. ...
~NELSTMLayer()
Default destructor.
static Status validate(const ITensorInfo *input, const ITensorInfo *output=nullptr, float epsilon=1e-8f)
Static function to check if given info will lead to a valid configuration of NEMeanStdDevNormalizatio...
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Store the tensor's metadata.
T * cell_layer_norm_weights() const
void configure(ITensor *input, ITensor *output)
Initialise the function's source and destination.
static Status validate(const std::vector< const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis)
Static function to check if given info will lead to a valid configuration of NEConcatenateLayer.
void configure(std::vector< const ITensor *> inputs_vector, ITensor *output, size_t axis)
Initialise the kernel's inputs vector and output.
TensorShape calculate_concatenate_shape(const std::vector< T *> &input, size_t axis)
Calculate the concatenate output shape of the concatenate operation along a single axis...
Truncates the least significant values that are lost in operations.
auto input_to_forget_weights
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of NEArithmeticSubtraction...