This function performs a single time step in a Long Short-Term Memory (LSTM) layer. More...

#include <CLLSTMLayer.h>

Collaboration diagram for CLLSTMLayer:

Public Member Functions
	CLLSTMLayer (std::shared_ptr< IMemoryManager > memory_manager=nullptr)
	Default constructor. More...

	CLLSTMLayer (const CLLSTMLayer &)=delete
	Prevent instances of this class from being copied. More...

CLLSTMLayer &	operator= (const CLLSTMLayer &)=delete
	Prevent instances of this class from being copied. More...

	CLLSTMLayer (CLLSTMLayer &&)=delete
	Prevent instances of this class to be moved. More...

CLLSTMLayer &	operator= (CLLSTMLayer &&)=delete
	Prevent instances of this class to be moved. More...

	~CLLSTMLayer ()
	Default destructor. More...

void	configure (const ICLTensor input, const ICLTensor input_to_forget_weights, const ICLTensor input_to_cell_weights, const ICLTensor input_to_output_weights, const ICLTensor recurrent_to_forget_weights, const ICLTensor recurrent_to_cell_weights, const ICLTensor recurrent_to_output_weights, const ICLTensor forget_gate_bias, const ICLTensor cell_bias, const ICLTensor output_gate_bias, const ICLTensor output_state_in, ICLTensor cell_state_in, ICLTensor scratch_buffer, ICLTensor output_state_out, ICLTensor cell_state_out, ICLTensor output, const LSTMParams< ICLTensor > &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold=0.f, float projection_threshold=0.f)
	Initialize function's tensors. More...

void	configure (const CLCompileContext &compile_context, const ICLTensor input, const ICLTensor input_to_forget_weights, const ICLTensor input_to_cell_weights, const ICLTensor input_to_output_weights, const ICLTensor recurrent_to_forget_weights, const ICLTensor recurrent_to_cell_weights, const ICLTensor recurrent_to_output_weights, const ICLTensor forget_gate_bias, const ICLTensor cell_bias, const ICLTensor output_gate_bias, const ICLTensor output_state_in, ICLTensor cell_state_in, ICLTensor scratch_buffer, ICLTensor output_state_out, ICLTensor cell_state_out, ICLTensor output, const LSTMParams< ICLTensor > &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold=0.f, float projection_threshold=0.f)
	Initialize function's tensors. More...

void	run () override
	Run the kernels contained in the function. More...

void	prepare () override
	Prepare the function for executing. More...

Public Member Functions inherited from IFunction
virtual	~IFunction ()=default
	Destructor. More...

Static Public Member Functions
static Status	validate (const ITensorInfo input, const ITensorInfo input_to_forget_weights, const ITensorInfo input_to_cell_weights, const ITensorInfo input_to_output_weights, const ITensorInfo recurrent_to_forget_weights, const ITensorInfo recurrent_to_cell_weights, const ITensorInfo recurrent_to_output_weights, const ITensorInfo forget_gate_bias, const ITensorInfo cell_bias, const ITensorInfo output_gate_bias, const ITensorInfo output_state_in, const ITensorInfo cell_state_in, const ITensorInfo scratch_buffer, const ITensorInfo output_state_out, const ITensorInfo cell_state_out, const ITensorInfo output, const LSTMParams< ITensorInfo > &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold=0.f, float projection_threshold=0.f)
	Static function to check if given info will lead to a valid configuration of CLLSTMLayer. More...

Detailed Description

This function performs a single time step in a Long Short-Term Memory (LSTM) layer.

Definition at line 60 of file CLLSTMLayer.h.

Constructor & Destructor Documentation

◆ CLLSTMLayer() [1/3]

CLLSTMLayer ( std::shared_ptr< IMemoryManager > memory_manager = nullptr )

Default constructor.

Definition at line 42 of file CLLSTMLayer.cpp.

     : _memory_group(std::move(memory_manager)),
       _fully_connected_input_gate(),
       _accum_input_gate1(),
       _subtract_input_gate(),
       _pixelwise_mul_input_gate(),
       _activation_input_gate(),
       _fully_connected_forget_gate(),
       _accum_forget_gate1(),
       _pixelwise_mul_forget_gate(),
       _activation_forget_gate(),
       _fully_connected_cell_state(),
       _gemm_cell_state1(),
       _transpose_cell_state(std::make_unique<opencl::kernels::ClTransposeKernel>()),
       _accum_cell_state1(),
       _accum_cell_state2(),
       _pixelwise_mul_cell_state1(),
       _activation_cell_state(),
       _cell_clip(),
       _pixelwise_mul_cell_state2(),
       _fully_connected_output(),
       _pixelwise_mul_output_state1(),
       _accum_output1(),
       _activation_output(),
       _activation_output_state(),
       _pixelwise_mul_output_state2(),
       _fully_connected_output_state(),
       _projection_clip(),
       _copy_cell_state(),
       _copy_output(),
       _concat_scratch_buffer(),
       _concat_inputs_forget_gate(),
       _concat_weights_forget_gate(),
       _concat_weights_input_gate(),
       _concat_weights_output(),
       _ones_fill(),
       _mean_std_norm_input_gate(),
       _pixelwise_mul_input_gate_coeff(),
       _accum_input_gate_bias(),
       _mean_std_norm_forget_gate(),
       _pixelwise_mul_forget_gate_coeff(),
       _accum_forget_gate_bias(),
       _mean_std_norm_cell_gate(),
       _pixelwise_mul_cell_gate_coeff(),
       _accum_cell_gate_bias(),
       _mean_std_norm_output_gate(),
       _pixelwise_mul_output_gate_coeff(),
       _accum_output_gate_bias(),
       _input_gate_out1(),
       _input_gate_out2(),
       _input_gate_out3(),
       _input_gate_out4(),
       _forget_gate_out1(),
       _forget_gate_out2(),
       _forget_gate_out3(),
       _forget_gate_out4(),
       _forget_gate_out5(),
       _forget_gate_out6(),
       _cell_state_out1(),
       _cell_state_out2(),
       _cell_state_out3(),
       _cell_state_out4(),
       _cell_state_out5(),
       _output1(),
       _output2(),
       _output3(),
       _output4(),
       _cell_state_activation(),
       _output_state1(),
       _ones(),
       _input_layer_norm_out1(),
       _input_layer_norm_out2(),
       _forget_layer_norm_out1(),
       _forget_layer_norm_out2(),
       _cell_layer_norm_out1(),
       _cell_layer_norm_out2(),
       _output_layer_norm_out1(),
       _output_layer_norm_out2(),
       _run_peephole_opt(false),
       _run_cifg_opt(false),
       _perform_cell_clipping(false),
       _has_projection_weights(false),
       _perform_projection_clipping(false),
       _is_prepared(false),
       _is_layer_norm_lstm(false)
 {
 }

◆ CLLSTMLayer() [2/3]

CLLSTMLayer ( const CLLSTMLayer & )

delete

Prevent instances of this class from being copied.

◆ CLLSTMLayer() [3/3]

CLLSTMLayer ( CLLSTMLayer && )

delete

Prevent instances of this class to be moved.

◆ ~CLLSTMLayer()

~CLLSTMLayer ( )

default

Default destructor.

Member Function Documentation

◆ configure() [1/2]

void configure	(	const CLCompileContext &	compile_context,
		const ICLTensor *	input,
		const ICLTensor *	input_to_forget_weights,
		const ICLTensor *	input_to_cell_weights,
		const ICLTensor *	input_to_output_weights,
		const ICLTensor *	recurrent_to_forget_weights,
		const ICLTensor *	recurrent_to_cell_weights,
		const ICLTensor *	recurrent_to_output_weights,
		const ICLTensor *	forget_gate_bias,
		const ICLTensor *	cell_bias,
		const ICLTensor *	output_gate_bias,
		const ICLTensor *	output_state_in,
		ICLTensor *	cell_state_in,
		ICLTensor *	scratch_buffer,
		ICLTensor *	output_state_out,
		ICLTensor *	cell_state_out,
		ICLTensor *	output,
		const LSTMParams< ICLTensor > &	lstm_params,
		const ActivationLayerInfo &	activation_info,
		float	cell_threshold = `0.f`,
		float	projection_threshold = `0.f`
	)

Initialize function's tensors.

Parameters

[in]	compile_context	The compile context to be used.
[in]	input	Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: F16/F32.
[in]	input_to_forget_weights	2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as `input`.
[in]	input_to_cell_weights	2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as `input`.
[in]	input_to_output_weights	2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as `input`.
[in]	recurrent_to_forget_weights	2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as `input`.
[in]	recurrent_to_cell_weights	2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as `input`.
[in]	recurrent_to_output_weights	2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as `input`.
[in]	forget_gate_bias	1D weights tensor with dimensions [num_units]. Data type supported: Same as `input`.
[in]	cell_bias	1D weights tensor with dimensions [num_units]. Data type supported: Same as `input`.
[in]	output_gate_bias	1D weights tensor with dimensions [num_units]. Data type supported: Same as `input`.
[in]	output_state_in	2D weights tensor with dimensions [output_size, batch_size]. Data type supported: Same as `input`.
[in]	cell_state_in	2D tensor with dimensions [num_units, batch_size]. Data type supported: Same as `input`.
[out]	scratch_buffer	2D tensor with dimensions [num_units * 4, batch_size] with CIFG or [num_units * 3, batch_size] without CIGF. Data type supported: Same as `input`.
[out]	output_state_out	2D weights tensor with dimensions [output_size, batch_size]. Data type supported: Same as `input`.
[out]	cell_state_out	2D tensor with dimensions [num_units, batch_size]. Data type supported: Same as `input`.
[out]	output	Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size]. Data types supported: Same as `input`.
[in]	lstm_params	Weights tensors used in peephole optimization: input_to_input_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as `input`. recurrent_to_input_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as `input`. cell_to_input_weights 1D weights tensor with dimensions [num_units]. Can be nullptr. Data type supported: Same as `input`. cell_to_forget_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as `input`. cell_to_output_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as `input`. input_gate_bias 1D weights tensor with dimensions [num_units]. Data type supported: Same as `input` projection_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as `input`. projection_bias 1D weights tensor with dimensions [output_size]. Data type supported: Same as `input`. input_layer_norm_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as `input`. forget_layer_norm_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as `input`. cell_layer_norm_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as `input`. output_layer_norm_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as `input`.
[in]	activation_info	Contains activation information described in ActivationLayerInfo.
[in]	cell_threshold	(Optional) The clipping threshold for the cell state, such that values are bound within [-cell_clip, cell_clip]. If set to 0.0f then clipping is disabled.
[in]	projection_threshold	(Optional) The clipping threshold for the output from the projection layer, such that values are bound within [-proj_clip, proj_clip]. If set to 0.0f then clipping is disabled.

lstm_res = PixelwiseMul(output, Activation(cell_state))

                -- Clip(lstm_res * projection_weights + projection_bias, projection_threshold) , if there is a projection
               /

output_state = – \ – lstm_res , otherwise

Definition at line 160 of file CLLSTMLayer.cpp.

 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, input_to_forget_weights, input_to_cell_weights, input_to_output_weights,
                                  recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights,
                                  forget_gate_bias, cell_bias, output_gate_bias, output_state_in, cell_state_in,
                                  scratch_buffer, output_state_out, cell_state_out, output);
  
     ARM_COMPUTE_LOG_PARAMS(input, input_to_forget_weights, input_to_cell_weights, input_to_output_weights,
                            recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights,
                            forget_gate_bias, cell_bias, output_gate_bias, output_state_in, cell_state_in,
                            scratch_buffer, output_state_out, cell_state_out, output, lstm_params, activation_info,
                            cell_threshold, projection_threshold);
  
     _is_layer_norm_lstm = lstm_params.use_layer_norm();
  
     // Set lstm parameters
     LSTMParams<ITensorInfo> lstm_params_info{};
     build_lstm_params_tensor_info(lstm_params, &lstm_params_info);
  
     // Validate
     ARM_COMPUTE_ERROR_THROW_ON(CLLSTMLayer::validate(
         input->info(), input_to_forget_weights->info(), input_to_cell_weights->info(), input_to_output_weights->info(),
         recurrent_to_forget_weights->info(), recurrent_to_cell_weights->info(), recurrent_to_output_weights->info(),
         forget_gate_bias->info(), cell_bias->info(), output_gate_bias->info(), output_state_in->info(),
         cell_state_in->info(), scratch_buffer->info(), output_state_out->info(), cell_state_out->info(), output->info(),
         lstm_params_info, activation_info, cell_threshold, projection_threshold));
  
     const TensorShape cell_state_shape = cell_state_in->info()->tensor_shape();
     // Configure block that calculates the forget gate
     // forget_gate = Activation(input * input_to_forget_weights + output_state_in * recurrent_to_forget_weights + PixelWiseMul(cell_state, cell_to_forget_weights) + forget_gate_bias)
     // We optimize this as follows:
     // forget_gate = Activation( (input,output_state_in) * (input_to_forget_weights,recurrent_to_forget_weights) + PixelWiseMul(cell_state, cell_to_forget_weights) + forget_gate_bias
     _forget_gate_out1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
     _forget_gate_out3.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
     _forget_gate_out5.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
  
     std::vector<const ICLTensor *> inputs_vector;
     inputs_vector.emplace_back(input);
     inputs_vector.emplace_back(output_state_in);
     const TensorShape concat_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, 0);
     _forget_gate_out2.allocator()->init(TensorInfo(concat_shape, 1, input->info()->data_type()));
  
     _memory_group.manage(&_forget_gate_out2);
     _concat_inputs_forget_gate.configure(compile_context, inputs_vector, &_forget_gate_out2, Window::DimX);
  
     std::vector<const ICLTensor *> weights_vector;
  
     weights_vector.emplace_back(input_to_forget_weights);
     weights_vector.emplace_back(recurrent_to_forget_weights);
     const TensorShape weights_concat_shape =
         arm_compute::misc::shape_calculator::calculate_concatenate_shape(weights_vector, 0);
     _forget_gate_out6.allocator()->init(TensorInfo(weights_concat_shape, 1, input->info()->data_type()));
  
     _concat_weights_forget_gate.configure(compile_context, weights_vector, &_forget_gate_out6, Window::DimX);
  
     _memory_group.manage(&_forget_gate_out5);
     _fully_connected_forget_gate.configure(compile_context, &_forget_gate_out2, &_forget_gate_out6,
                                            (_is_layer_norm_lstm) ? nullptr : forget_gate_bias, &_forget_gate_out5);
     _memory_group.manage(&_forget_gate_out1);
     _memory_group.manage(&_forget_gate_out3);
     _forget_gate_out6.allocator()->allocate();
  
     CLTensor *forget_gate_out = &_forget_gate_out5;
     if (lstm_params.has_peephole_opt())
     {
         _forget_gate_out4.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
  
         _run_peephole_opt = true;
         _memory_group.manage(&_forget_gate_out4);
         _pixelwise_mul_forget_gate.configure(compile_context, cell_state_in, lstm_params.cell_to_forget_weights(),
                                              &_forget_gate_out4, 1, ConvertPolicy::SATURATE,
                                              RoundingPolicy::TO_NEAREST_EVEN);
         _accum_forget_gate1.configure(compile_context, &_forget_gate_out5, &_forget_gate_out4, &_forget_gate_out3,
                                       ConvertPolicy::SATURATE);
         _forget_gate_out4.allocator()->allocate();
         _forget_gate_out5.allocator()->allocate();
         forget_gate_out = &_forget_gate_out3;
     }
     else
     {
         _forget_gate_out3.allocator()->allocate();
     }
     if (_is_layer_norm_lstm)
     {
         _forget_layer_norm_out1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
         _forget_layer_norm_out2.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
         _memory_group.manage(&_forget_layer_norm_out1);
         _memory_group.manage(&_forget_layer_norm_out2);
         _mean_std_norm_forget_gate.configure(compile_context, forget_gate_out);
         _pixelwise_mul_forget_gate_coeff.configure(compile_context, forget_gate_out,
                                                    lstm_params.forget_layer_norm_weights(), &_forget_layer_norm_out1, 1,
                                                    ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN);
         // forget_gate_out is going to be reassigned, so allocate the tensor that it was assigned to before
         forget_gate_out->allocator()->allocate();
         _accum_forget_gate_bias.configure(compile_context, &_forget_layer_norm_out1, forget_gate_bias,
                                           &_forget_layer_norm_out2, ConvertPolicy::SATURATE);
         _forget_layer_norm_out1.allocator()->allocate();
         forget_gate_out = &_forget_layer_norm_out2;
     }
     _activation_forget_gate.configure(compile_context, forget_gate_out, nullptr,
                                       ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
  
     // Configure block that calculates the input gate
     // input_gate = Activation(input * input_to_input_weights + output_state * recurrent_to_input_weights + PixelWiseMul(cell_state, cell_to_input_weights) + input_gate_bias), without CIFG
     // input_gate = 1 - forget_gate, with CIFG
     // We optimize this as follows:
     // input_gate = Activation((input,output_state) * (input_to_input_weights,recurrent_to_input_weights) + PixelWiseMul(cell_state, cell_to_input_weights) + input_gate_bias), without CIFG
     _input_gate_out1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
     CLTensor *input_gate_out = &_input_gate_out1;
     if (lstm_params.has_cifg_opt())
     {
         _memory_group.manage(&_input_gate_out1);
         _ones.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
         _ones_fill.configure(compile_context, &_ones, PixelValue(1, _ones.info()->data_type()));
         _subtract_input_gate.configure(compile_context, &_ones, forget_gate_out, &_input_gate_out1,
                                        ConvertPolicy::SATURATE);
         _ones.allocator()->allocate();
         _run_cifg_opt = true;
     }
     else
     {
         _input_gate_out3.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
         _input_gate_out4.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
  
         std::vector<const ICLTensor *> lstm_weights;
         lstm_weights.emplace_back(lstm_params.input_to_input_weights());
         lstm_weights.emplace_back(lstm_params.recurrent_to_input_weights());
         TensorShape lstm_weights_concat_shape =
             arm_compute::misc::shape_calculator::calculate_concatenate_shape(lstm_weights, 0);
         _input_gate_out2.allocator()->init(TensorInfo(lstm_weights_concat_shape, 1, input->info()->data_type()));
  
         _concat_weights_input_gate.configure(compile_context, lstm_weights, &_input_gate_out2, Window::DimX);
  
         _memory_group.manage(&_input_gate_out1);
  
         _memory_group.manage(&_input_gate_out3);
         _fully_connected_input_gate.configure(compile_context, &_forget_gate_out2, &_input_gate_out2,
                                               (_is_layer_norm_lstm) ? nullptr : lstm_params.input_gate_bias(),
                                               &_input_gate_out3);
         _input_gate_out2.allocator()->allocate();
  
         input_gate_out = &_input_gate_out3;
         if (_run_peephole_opt)
         {
             _memory_group.manage(&_input_gate_out4);
             _pixelwise_mul_input_gate.configure(compile_context, cell_state_in, lstm_params.cell_to_input_weights(),
                                                 &_input_gate_out4, 1, ConvertPolicy::SATURATE,
                                                 RoundingPolicy::TO_NEAREST_EVEN);
             _accum_input_gate1.configure(compile_context, &_input_gate_out3, &_input_gate_out4, &_input_gate_out1,
                                          ConvertPolicy::SATURATE);
             _input_gate_out3.allocator()->allocate();
             _input_gate_out4.allocator()->allocate();
             input_gate_out = &_input_gate_out1;
         }
         else
         {
             _input_gate_out1.allocator()->allocate();
         }
  
         if (_is_layer_norm_lstm)
         {
             _input_layer_norm_out1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
             _input_layer_norm_out2.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
             _memory_group.manage(&_input_layer_norm_out1);
             _memory_group.manage(&_input_layer_norm_out2);
             _mean_std_norm_input_gate.configure(compile_context, input_gate_out);
             _pixelwise_mul_input_gate_coeff.configure(compile_context, input_gate_out,
                                                       lstm_params.input_layer_norm_weights(), &_input_layer_norm_out1,
                                                       1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN);
             // input_gate_out is going to be reassigned, so allocate the tensor that it was assigned to before
             input_gate_out->allocator()->allocate();
             _accum_input_gate_bias.configure(compile_context, &_input_layer_norm_out1, lstm_params.input_gate_bias(),
                                              &_input_layer_norm_out2, ConvertPolicy::SATURATE);
             _input_layer_norm_out1.allocator()->allocate();
             input_gate_out = &_input_layer_norm_out2;
         }
         _activation_input_gate.configure(compile_context, input_gate_out, nullptr,
                                          ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
     }
  
     // Configure block that calculates the cell state
     // cell_state = Clip((PixelwiseMul(input_gate, Activation(input * input_to_cell_weights + output_state_in * recurrent_to_cell_weights + cell_bias)) + PixelwiseMul(forget_gate, cell_state)), cell_threshold)
     TensorShape cell_state1_shape = compute_transposed_shape(*recurrent_to_output_weights->info());
     _cell_state_out1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
     _cell_state_out2.allocator()->init(TensorInfo(cell_state1_shape, 1, input->info()->data_type()));
     _cell_state_out3.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
     _cell_state_out4.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
     _cell_state_out5.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
  
     _memory_group.manage(&_cell_state_out1);
     _fully_connected_cell_state.configure(compile_context, input, input_to_cell_weights,
                                           (_is_layer_norm_lstm) ? nullptr : cell_bias, &_cell_state_out1);
     _memory_group.manage(&_cell_state_out2);
     _transpose_cell_state->configure(compile_context, recurrent_to_cell_weights->info(), _cell_state_out2.info());
     _recurrent_to_cell_weights = recurrent_to_cell_weights;
     _memory_group.manage(&_cell_state_out3);
     _gemm_cell_state1.configure(compile_context, output_state_in, &_cell_state_out2, nullptr, &_cell_state_out3, 1.f,
                                 0.f);
     _cell_state_out2.allocator()->allocate();
     _memory_group.manage(&_cell_state_out4);
     _accum_cell_state1.configure(compile_context, &_cell_state_out1, &_cell_state_out3, &_cell_state_out4,
                                  ConvertPolicy::SATURATE);
     CLTensor *cell_state_out_ptr = &_cell_state_out4;
     if (_is_layer_norm_lstm)
     {
         _cell_layer_norm_out1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
         _cell_layer_norm_out2.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
         _memory_group.manage(&_cell_layer_norm_out1);
         _memory_group.manage(&_cell_layer_norm_out2);
         _mean_std_norm_cell_gate.configure(compile_context, cell_state_out_ptr);
         _pixelwise_mul_cell_gate_coeff.configure(compile_context, cell_state_out_ptr,
                                                  lstm_params.cell_layer_norm_weights(), &_cell_layer_norm_out1, 1,
                                                  ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN);
         // cell_state_out_ptr is going to be reassigned, so allocate the tensor that it was assigned to before
         cell_state_out_ptr->allocator()->allocate();
         _accum_cell_gate_bias.configure(compile_context, &_cell_layer_norm_out1, cell_bias, &_cell_layer_norm_out2,
                                         ConvertPolicy::SATURATE);
         _cell_layer_norm_out1.allocator()->allocate();
         cell_state_out_ptr = &_cell_layer_norm_out2;
     }
     _activation_cell_state.configure(compile_context, cell_state_out_ptr, nullptr, activation_info);
     _memory_group.manage(&_cell_state_out5);
     _pixelwise_mul_cell_state1.configure(compile_context, cell_state_out_ptr, input_gate_out, &_cell_state_out5, 1,
                                          ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN);
     cell_state_out_ptr->allocator()->allocate();
     _pixelwise_mul_cell_state2.configure(compile_context, forget_gate_out, cell_state_in, &_cell_state_out3, 1,
                                          ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN);
     _accum_cell_state2.configure(compile_context, &_cell_state_out5, &_cell_state_out3, &_cell_state_out1,
                                  ConvertPolicy::SATURATE);
     _cell_state_out3.allocator()->allocate();
     _cell_state_out5.allocator()->allocate();
     // Perform clipping
     if (cell_threshold != 0.f)
     {
         _perform_cell_clipping = true;
         _cell_clip.configure(compile_context, &_cell_state_out1, nullptr,
                              ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
                                                  cell_threshold, -cell_threshold));
     }
  
     // Configure block that calculates the output
     // output_state_out = Activation(input * input_to_output_weights + output_state_in * recurrent_to_output_weights + PixelWiseMul(cell_state, cell_to_output_weights) + output_gate_bias)
     // We optimize this as follows:
     // output_state_out = Activation( (input,output_state_in) * (input_to_output_weights, recurrent_to_output_weights) + PixelWiseMul(cell_state, cell_to_output_weights) + output_gate_bias)
     _output1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
     _output4.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
     std::vector<const ICLTensor *> in_out_weights;
     in_out_weights.emplace_back(input_to_output_weights);
     in_out_weights.emplace_back(recurrent_to_output_weights);
     TensorShape in_out_weights_concat_shape =
         arm_compute::misc::shape_calculator::calculate_concatenate_shape(in_out_weights, 0);
     _output2.allocator()->init(TensorInfo(in_out_weights_concat_shape, 1, input->info()->data_type()));
  
     _concat_weights_output.configure(compile_context, in_out_weights, &_output2, Window::DimX);
  
     _memory_group.manage(&_output1);
     _memory_group.manage(&_output4);
  
     _fully_connected_output.configure(compile_context, &_forget_gate_out2, &_output2,
                                       (_is_layer_norm_lstm) ? nullptr : output_gate_bias, &_output4);
  
     _output2.allocator()->allocate();
     _forget_gate_out2.allocator()->allocate();
  
     CLTensor *output_gate_out = &_output4;
     if (lstm_params.has_peephole_opt())
     {
         _output3.allocator()->init(TensorInfo(_cell_state_out1.info()->tensor_shape(), 1, input->info()->data_type()));
  
         _memory_group.manage(&_output3);
         _pixelwise_mul_output_state1.configure(compile_context, &_cell_state_out1, lstm_params.cell_to_output_weights(),
                                                &_output3, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN);
         _accum_output1.configure(compile_context, &_output4, &_output3, &_output1, ConvertPolicy::SATURATE);
         _output4.allocator()->allocate();
         output_gate_out = &_output1;
  
         // Allocate intermediate buffers
         _output3.allocator()->allocate();
     }
     else
     {
         _output1.allocator()->allocate();
     }
     if (_is_layer_norm_lstm)
     {
         _output_layer_norm_out1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
         _output_layer_norm_out2.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
         _memory_group.manage(&_output_layer_norm_out1);
         _memory_group.manage(&_output_layer_norm_out2);
         _mean_std_norm_output_gate.configure(compile_context, output_gate_out);
         _pixelwise_mul_output_gate_coeff.configure(compile_context, output_gate_out,
                                                    lstm_params.output_layer_norm_weights(), &_output_layer_norm_out1, 1,
                                                    ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN);
         // output_gate_out is going to be reassigned, so allocate the tensor that it was assigned to before
         output_gate_out->allocator()->allocate();
         _accum_output_gate_bias.configure(compile_context, &_output_layer_norm_out1, output_gate_bias,
                                           &_output_layer_norm_out2, ConvertPolicy::SATURATE);
         _output_layer_norm_out1.allocator()->allocate();
         output_gate_out = &_output_layer_norm_out2;
     }
     _activation_output.configure(compile_context, output_gate_out, nullptr,
                                  ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
  
     // Configure block that calculates the output state
     /** lstm_res = PixelwiseMul(output, Activation(cell_state))
      *
      *                      -- Clip(lstm_res * projection_weights + projection_bias, projection_threshold) , if there is a projection
      *                     /
      *  output_state =  --
      *                     \
      *                      -- lstm_res , otherwise
      */
     ICLTensor *output_state_out_tmp = lstm_params.has_projection() ? &_output_state1 : output_state_out;
     _cell_state_activation.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
     _output_state1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
  
     _memory_group.manage(&_cell_state_activation);
     _activation_output_state.configure(compile_context, &_cell_state_out1, &_cell_state_activation, activation_info);
     _pixelwise_mul_output_state2.configure(compile_context, &_cell_state_activation, output_gate_out,
                                            output_state_out_tmp, 1, ConvertPolicy::SATURATE,
                                            RoundingPolicy::TO_NEAREST_EVEN);
     _cell_state_activation.allocator()->allocate();
  
     if (lstm_params.has_projection())
     {
         _has_projection_weights = true;
         _fully_connected_output_state.configure(compile_context, output_state_out_tmp, lstm_params.projection_weights(),
                                                 lstm_params.projection_bias(), output_state_out);
         _output_state1.allocator()->allocate();
         // Perform clipping
         if (projection_threshold != 0.f)
         {
             _perform_projection_clipping = true;
             _projection_clip.configure(compile_context, output_state_out, nullptr,
                                        ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
                                                            -projection_threshold, projection_threshold));
         }
     }
  
     // Copy cell state and output
     _copy_cell_state.configure(compile_context, &_cell_state_out1, cell_state_out);
     _copy_output.configure(compile_context, output_state_out, output);
  
     // Vector for holding the tensors to store in scratch buffer
     std::vector<const ICLTensor *> scratch_inputs;
     if (!lstm_params.has_cifg_opt())
     {
         scratch_inputs.emplace_back(input_gate_out);
     }
     scratch_inputs.emplace_back(&_cell_state_out1);
     scratch_inputs.emplace_back(forget_gate_out);
     scratch_inputs.emplace_back(output_gate_out);
     _concat_scratch_buffer.configure(compile_context, scratch_inputs, scratch_buffer, Window::DimX);
     input_gate_out->allocator()->allocate();
     _cell_state_out1.allocator()->allocate();
     forget_gate_out->allocator()->allocate();
     output_gate_out->allocator()->allocate();
 }

◆ configure() [2/2]

void configure	(	const ICLTensor *	input,
		const ICLTensor *	input_to_forget_weights,
		const ICLTensor *	input_to_cell_weights,
		const ICLTensor *	input_to_output_weights,
		const ICLTensor *	recurrent_to_forget_weights,
		const ICLTensor *	recurrent_to_cell_weights,
		const ICLTensor *	recurrent_to_output_weights,
		const ICLTensor *	forget_gate_bias,
		const ICLTensor *	cell_bias,
		const ICLTensor *	output_gate_bias,
		const ICLTensor *	output_state_in,
		ICLTensor *	cell_state_in,
		ICLTensor *	scratch_buffer,
		ICLTensor *	output_state_out,
		ICLTensor *	cell_state_out,
		ICLTensor *	output,
		const LSTMParams< ICLTensor > &	lstm_params,
		const ActivationLayerInfo &	activation_info,
		float	cell_threshold = `0.f`,
		float	projection_threshold = `0.f`
	)

Initialize function's tensors.

Valid data layouts:

All

Valid data type configurations:

src0 - src13	dst0 - dst3
F16	F16
F32	F32

Parameters

[in]	input	Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: F16/F32.
[in]	input_to_forget_weights	2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as `input`.
[in]	input_to_cell_weights	2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as `input`.
[in]	input_to_output_weights	2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as `input`.
[in]	recurrent_to_forget_weights	2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as `input`.
[in]	recurrent_to_cell_weights	2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as `input`.
[in]	recurrent_to_output_weights	2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as `input`.
[in]	forget_gate_bias	1D weights tensor with dimensions [num_units]. Data type supported: Same as `input`.
[in]	cell_bias	1D weights tensor with dimensions [num_units]. Data type supported: Same as `input`.
[in]	output_gate_bias	1D weights tensor with dimensions [num_units]. Data type supported: Same as `input`.
[in]	output_state_in	2D weights tensor with dimensions [output_size, batch_size]. Data type supported: Same as `input`.
[in]	cell_state_in	2D tensor with dimensions [num_units, batch_size]. Data type supported: Same as `input`.
[out]	scratch_buffer	2D tensor with dimensions [num_units * 4, batch_size] with CIFG or [num_units * 3, batch_size] without CIGF. Data type supported: Same as `input`.
[out]	output_state_out	2D weights tensor with dimensions [output_size, batch_size]. Data type supported: Same as `input`.
[out]	cell_state_out	2D tensor with dimensions [num_units, batch_size]. Data type supported: Same as `input`.
[out]	output	Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size]. Data types supported: Same as `input`.
[in]	lstm_params	Weights tensors used in peephole optimization: input_to_input_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as `input`. recurrent_to_input_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as `input`. cell_to_input_weights 1D weights tensor with dimensions [num_units]. Can be nullptr. Data type supported: Same as `input`. cell_to_forget_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as `input`. cell_to_output_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as `input`. input_gate_bias 1D weights tensor with dimensions [num_units]. Data type supported: Same as `input` projection_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as `input`. projection_bias 1D weights tensor with dimensions [output_size]. Data type supported: Same as `input`. input_layer_norm_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as `input`. forget_layer_norm_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as `input`. cell_layer_norm_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as `input`. output_layer_norm_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as `input`.
[in]	activation_info	Contains activation information described in ActivationLayerInfo.
[in]	cell_threshold	(Optional) The clipping threshold for the cell state, such that values are bound within [-cell_clip, cell_clip]. If set to 0.0f then clipping is disabled.
[in]	projection_threshold	(Optional) The clipping threshold for the output from the projection layer, such that values are bound within [-proj_clip, proj_clip]. If set to 0.0f then clipping is disabled.

Definition at line 132 of file CLLSTMLayer.cpp.

 {
     configure(CLKernelLibrary::get().get_compile_context(), input, input_to_forget_weights, input_to_cell_weights,
               input_to_output_weights, recurrent_to_forget_weights, recurrent_to_cell_weights,
               recurrent_to_output_weights, forget_gate_bias, cell_bias, output_gate_bias, output_state_in,
               cell_state_in, scratch_buffer, output_state_out, cell_state_out, output, lstm_params, activation_info,
               cell_threshold, projection_threshold);
 }

References arm_compute::test::validation::forget_gate_bias, CLKernelLibrary::get(), arm_compute::test::validation::input, arm_compute::test::validation::input_to_cell_weights, arm_compute::test::validation::input_to_forget_weights, arm_compute::test::validation::input_to_output_weights, arm_compute::test::validation::output_gate_bias, arm_compute::test::validation::recurrent_to_cell_weights, arm_compute::test::validation::recurrent_to_forget_weights, and arm_compute::test::validation::recurrent_to_output_weights.

◆ operator=() [1/2]

CLLSTMLayer& operator= ( CLLSTMLayer && )

delete

Prevent instances of this class to be moved.

◆ operator=() [2/2]

CLLSTMLayer& operator= ( const CLLSTMLayer & )

delete

Prevent instances of this class from being copied.

◆ prepare()

void prepare ( )

overridevirtual

Prepare the function for executing.

Any one off pre-processing step required by the function is handled here

Note: Prepare stage might not need all the function's buffers' backing memory to be available in order to execute

Reimplemented from IFunction.

Definition at line 925 of file CLLSTMLayer.cpp.

 {
     if (!_is_prepared)
     {
         _concat_weights_forget_gate.run();
         if (!_run_cifg_opt)
         {
             _concat_weights_input_gate.run();
         }
         _concat_weights_output.run();
         _is_prepared = true;
     }
 }

References CLConcatenateLayer::run().

Referenced by CLLSTMLayer::run().

◆ run()

void run ( )

overridevirtual

Run the kernels contained in the function.

For CPU kernels:

Multi-threading is used for the kernels which are parallelisable.
By default std::thread::hardware_concurrency() threads are used.

Note: CPPScheduler::set_num_threads() can be used to manually set the number of threads

For OpenCL kernels:

All the kernels are enqueued on the queue associated with CLScheduler.
The queue is then flushed.

Note: The function will not block until the kernels are executed. It is the user's responsibility to wait.; Will call prepare() on first run if hasn't been done

Implements IFunction.

Definition at line 822 of file CLLSTMLayer.cpp.

 {
     prepare();
  
     MemoryGroupResourceScope scope_mg(_memory_group);
  
     _concat_inputs_forget_gate.run();
  
     _fully_connected_forget_gate.run();
  
     if (_run_peephole_opt)
     {
         _pixelwise_mul_forget_gate.run();
         _accum_forget_gate1.run();
     }
     if (_is_layer_norm_lstm)
     {
         _mean_std_norm_forget_gate.run();
         _pixelwise_mul_forget_gate_coeff.run();
         _accum_forget_gate_bias.run();
     }
     _activation_forget_gate.run();
  
     if (_run_cifg_opt)
     {
         _ones_fill.run();
         _subtract_input_gate.run();
     }
     else
     {
         _fully_connected_input_gate.run();
  
         if (_run_peephole_opt)
         {
             _pixelwise_mul_input_gate.run();
             _accum_input_gate1.run();
         }
  
         if (_is_layer_norm_lstm)
         {
             _mean_std_norm_input_gate.run();
             _pixelwise_mul_input_gate_coeff.run();
             _accum_input_gate_bias.run();
         }
         _activation_input_gate.run();
     }
  
     _fully_connected_cell_state.run();
     ITensorPack pack;
     pack.add_tensor(TensorType::ACL_SRC, _recurrent_to_cell_weights);
     pack.add_tensor(TensorType::ACL_DST, &_cell_state_out2);
     CLScheduler::get().enqueue_op(*_transpose_cell_state, pack, false);
     _gemm_cell_state1.run();
     _accum_cell_state1.run();
     if (_is_layer_norm_lstm)
     {
         _mean_std_norm_cell_gate.run();
         _pixelwise_mul_cell_gate_coeff.run();
         _accum_cell_gate_bias.run();
     }
     _activation_cell_state.run();
     _pixelwise_mul_cell_state1.run();
     _pixelwise_mul_cell_state2.run();
     _accum_cell_state2.run();
  
     if (_perform_cell_clipping)
     {
         _cell_clip.run();
     }
  
     _fully_connected_output.run();
  
     if (_run_peephole_opt)
     {
         _pixelwise_mul_output_state1.run();
         _accum_output1.run();
     }
     if (_is_layer_norm_lstm)
     {
         _mean_std_norm_output_gate.run();
         _pixelwise_mul_output_gate_coeff.run();
         _accum_output_gate_bias.run();
     }
     _activation_output.run();
  
     _activation_output_state.run();
     _pixelwise_mul_output_state2.run();
  
     if (_has_projection_weights)
     {
         _fully_connected_output_state.run();
         if (_perform_projection_clipping)
         {
             _projection_clip.run();
         }
     }
  
     _copy_cell_state.run();
     _copy_output.run();
  
     _concat_scratch_buffer.run();
 }

References arm_compute::ACL_DST, arm_compute::ACL_SRC, ITensorPack::add_tensor(), CLScheduler::enqueue_op(), CLScheduler::get(), arm_compute::test::validation::pack, CLLSTMLayer::prepare(), ICLSimpleFunction::run(), CLFill::run(), CLCopy::run(), CLActivationLayer::run(), CLFullyConnectedLayer::run(), CLConcatenateLayer::run(), CLGEMM::run(), CLPixelWiseMultiplication::run(), CLArithmeticAddition::run(), and CLArithmeticSubtraction::run().

◆ validate()

Status validate	(	const ITensorInfo *	input,
		const ITensorInfo *	input_to_forget_weights,
		const ITensorInfo *	input_to_cell_weights,
		const ITensorInfo *	input_to_output_weights,
		const ITensorInfo *	recurrent_to_forget_weights,
		const ITensorInfo *	recurrent_to_cell_weights,
		const ITensorInfo *	recurrent_to_output_weights,
		const ITensorInfo *	forget_gate_bias,
		const ITensorInfo *	cell_bias,
		const ITensorInfo *	output_gate_bias,
		const ITensorInfo *	output_state_in,
		const ITensorInfo *	cell_state_in,
		const ITensorInfo *	scratch_buffer,
		const ITensorInfo *	output_state_out,
		const ITensorInfo *	cell_state_out,
		const ITensorInfo *	output,
		const LSTMParams< ITensorInfo > &	lstm_params,
		const ActivationLayerInfo &	activation_info,
		float	cell_threshold = `0.f`,
		float	projection_threshold = `0.f`
	)

static

Static function to check if given info will lead to a valid configuration of CLLSTMLayer.

Parameters

[in]	input	Source tensor info. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: F16/F32.
[in]	input_to_forget_weights	2D weights tensor info with dimensions [input_size, num_units]. Data type supported: Same as `input`.
[in]	input_to_cell_weights	2D weights tensor info with dimensions [input_size, num_units]. Data type supported: Same as `input`.
[in]	input_to_output_weights	2D weights tensor info with dimensions [input_size, num_units]. Data type supported: Same as `input`.
[in]	recurrent_to_forget_weights	2D weights tensor info with dimensions [output_size, num_units]. Data type supported: Same as `input`.
[in]	recurrent_to_cell_weights	2D weights tensor info with dimensions [output_size, num_units]. Data type supported: Same as `input`.
[in]	recurrent_to_output_weights	2D weights tensor info with dimensions [output_size, num_units]. Data type supported: Same as `input`.
[in]	forget_gate_bias	1D weights tensor info with dimensions [num_units]. Data type supported: Same as `input`.
[in]	cell_bias	1D weights tensor info with dimensions [num_units]. Data type supported: Same as `input`.
[in]	output_gate_bias	1D weights tensor info with dimensions [num_units]. Data type supported: Same as `input`.
[in]	output_state_in	2D weights tensor info with dimensions [output_size, batch_size]. Data type supported: Same as `input`.
[in]	cell_state_in	2D tensor info with dimensions [num_units, batch_size]. Data type supported: Same as `input`.
[in]	scratch_buffer	2D tensor info with dimensions [num_units * 4, batch_size] with CIFG or [num_units * 3, batch_size] without CIGF. Data type supported: Same as `input`.
[in]	output_state_out	2D weights tensor info with dimensions [output_size, batch_size]. Data type supported: Same as `input`.
[in]	cell_state_out	2D tensor info with dimensions [num_units, batch_size]. Data type supported: Same as `input`.
[in]	output	Destination tensor info. Output is a 2D tensor with dimensions [output_size, batch_size]. Data types supported: Same as `input`.
[in]	lstm_params	Weights tensors info used in peephole optimization: input_to_input_weights 2D weights tensor info with dimensions [input_size, num_units]. Data type supported: Same as `input`. recurrent_to_input_weights 2D weights tensor info with dimensions [output_size, num_units]. Data type supported: Same as `input`. cell_to_input_weights 1D weights tensor info with dimensions [num_units]. Can be nullptr. Data type supported: Same as `input`. cell_to_forget_weights 1D weights tensor info with dimensions [num_units]. Data type supported: Same as `input`. cell_to_output_weights 1D weights tensor info with dimensions [num_units]. Data type supported: Same as `input`. input_gate_bias 1D weights tensor info with dimensions [num_units]. Data type supported: Same as `input` projection_weights 2D weights tensor info with dimensions [output_size, num_units]. Data type supported: Same as `input`. projection_bias 1D weights tensor info with dimensions [output_size]. Data type supported: Same as `input`. input_layer_norm_weights 1D weights tensor info with dimensions [num_units]. Data type supported: Same as `input`. forget_layer_norm_weights 1D weights tensor info with dimensions [num_units]. Data type supported: Same as `input`. cell_layer_norm_weights 1D weights tensor info with dimensions [num_units]. Data type supported: Same as `input`. output_layer_norm_weights 1D weights tensor info with dimensions [num_units]. Data type supported: Same as `input`.
[in]	activation_info	Contains activation information described in ActivationLayerInfo.
[in]	cell_threshold	(Optional) The clipping threshold for the cell state, such that values are bound within [-cell_clip, cell_clip]. If set to 0.0f then clipping is disabled.
[in]	projection_threshold	(Optional) The clipping threshold for the output from the projection layer, such that values are bound within [-proj_clip, proj_clip]. If set to 0.0f then clipping is disabled.

Returns: a status

Definition at line 539 of file CLLSTMLayer.cpp.

 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(
         input, input_to_forget_weights, input_to_cell_weights, input_to_output_weights, recurrent_to_forget_weights,
         recurrent_to_cell_weights, recurrent_to_output_weights, forget_gate_bias, cell_bias, output_gate_bias,
         output_state_in, cell_state_in, scratch_buffer, output_state_out, cell_state_out, output);
  
     // Check data types
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(
         input, input_to_forget_weights, input_to_cell_weights, input_to_output_weights, recurrent_to_forget_weights,
         recurrent_to_cell_weights, recurrent_to_output_weights, forget_gate_bias, cell_bias, output_gate_bias,
         output_state_in, cell_state_in, scratch_buffer, output_state_out, cell_state_out, output);
  
     // Check dimensions
     ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 2);
     ARM_COMPUTE_RETURN_ERROR_ON(input_to_forget_weights->num_dimensions() > 2);
     ARM_COMPUTE_RETURN_ERROR_ON(input_to_cell_weights->num_dimensions() > 2);
     ARM_COMPUTE_RETURN_ERROR_ON(input_to_output_weights->num_dimensions() > 2);
     ARM_COMPUTE_RETURN_ERROR_ON(recurrent_to_forget_weights->num_dimensions() > 2);
     ARM_COMPUTE_RETURN_ERROR_ON(recurrent_to_cell_weights->num_dimensions() > 2);
     ARM_COMPUTE_RETURN_ERROR_ON(recurrent_to_output_weights->num_dimensions() > 2);
     ARM_COMPUTE_RETURN_ERROR_ON(forget_gate_bias->num_dimensions() > 1);
     ARM_COMPUTE_RETURN_ERROR_ON(cell_bias->num_dimensions() > 1);
     ARM_COMPUTE_RETURN_ERROR_ON(output_gate_bias->num_dimensions() > 1);
     ARM_COMPUTE_RETURN_ERROR_ON(output_state_in->num_dimensions() > 2);
     ARM_COMPUTE_RETURN_ERROR_ON(cell_state_in->num_dimensions() > 2);
     ARM_COMPUTE_RETURN_ERROR_ON(scratch_buffer->num_dimensions() > 2);
     ARM_COMPUTE_RETURN_ERROR_ON(output_state_out->num_dimensions() > 2);
     ARM_COMPUTE_RETURN_ERROR_ON(cell_state_out->num_dimensions() > 2);
     ARM_COMPUTE_RETURN_ERROR_ON(output->num_dimensions() > 2);
     ARM_COMPUTE_RETURN_ERROR_ON(cell_bias->dimension(0) * 4 != scratch_buffer->dimension(0) &&
                                 cell_bias->dimension(0) * 3 != scratch_buffer->dimension(0));
  
     const unsigned int num_batches = input->dimension(1);
     const unsigned int num_cells   = input_to_output_weights->dimension(1);
  
     if (lstm_params.use_layer_norm())
     {
         // If CIFG is used, input layer normalization weights tensor is omitted
         if (lstm_params.has_cifg_opt())
         {
             ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.input_layer_norm_weights() != nullptr);
         }
         else
         {
             ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lstm_params.input_layer_norm_weights());
             ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.input_layer_norm_weights()->num_dimensions() > 1);
             ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.input_layer_norm_weights()->dimension(0) != num_cells);
             ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, lstm_params.input_layer_norm_weights());
         }
  
         ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lstm_params.forget_layer_norm_weights(),
                                             lstm_params.cell_layer_norm_weights(),
                                             lstm_params.output_layer_norm_weights());
         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, lstm_params.forget_layer_norm_weights(),
                                                            lstm_params.cell_layer_norm_weights(),
                                                            lstm_params.output_layer_norm_weights());
         ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.forget_layer_norm_weights()->num_dimensions() > 1);
         ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.cell_layer_norm_weights()->num_dimensions() > 1);
         ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.output_layer_norm_weights()->num_dimensions() > 1);
         ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.forget_layer_norm_weights()->dimension(0) != num_cells);
         ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.cell_layer_norm_weights()->dimension(0) != num_cells);
         ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.output_layer_norm_weights()->dimension(0) != num_cells);
     }
  
     // Check peephole optimization
     if (lstm_params.has_peephole_opt())
     {
         ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lstm_params.cell_to_output_weights(), lstm_params.cell_to_forget_weights());
         ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.cell_to_forget_weights()->num_dimensions() > 1);
         ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.cell_to_output_weights()->num_dimensions() > 1);
     }
  
     TensorShape      units_out_transposed_shape = compute_transposed_shape(*recurrent_to_output_weights);
     TensorShape      num_units_transposed_shape = compute_transposed_shape(*forget_gate_bias);
     const TensorInfo units_out_transposed_info  = TensorInfo(units_out_transposed_shape, 1, input->data_type());
     const TensorInfo num_units_transposed_info  = TensorInfo(num_units_transposed_shape, 1, input->data_type());
  
     TensorInfo input_gate      = TensorInfo(TensorShape(num_cells, num_batches), 1, input->data_type());
     TensorInfo forget_gate     = TensorInfo(TensorShape(num_cells, num_batches), 1, input->data_type());
     TensorInfo output_gate_tmp = TensorInfo(TensorShape(num_cells, num_batches), 1, input->data_type());
     TensorInfo cell_state_tmp  = TensorInfo(TensorShape(num_cells, num_batches), 1, input->data_type());
  
     // Validate forget gate
     ARM_COMPUTE_RETURN_ON_ERROR(CLFullyConnectedLayer::validate(
         input, input_to_forget_weights, (lstm_params.use_layer_norm()) ? nullptr : forget_gate_bias, &forget_gate));
  
     std::vector<const ITensorInfo *> inputs_vector;
     inputs_vector.emplace_back(input);
     inputs_vector.emplace_back(output_state_in);
     const TensorShape concat_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, 0);
     TensorInfo        forget_gate_concat = TensorInfo(concat_shape, 1, input->data_type());
  
     ARM_COMPUTE_RETURN_ON_ERROR(CLConcatenateLayer::validate(inputs_vector, &forget_gate_concat, Window::DimX));
  
     if (lstm_params.has_peephole_opt())
     {
         ARM_COMPUTE_RETURN_ON_ERROR(
             CLPixelWiseMultiplication::validate(cell_state_in, lstm_params.cell_to_forget_weights(), &forget_gate, 1,
                                                 ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN));
         ARM_COMPUTE_RETURN_ON_ERROR(
             CLArithmeticAddition::validate(&forget_gate, &forget_gate, &forget_gate, ConvertPolicy::SATURATE));
     }
     if (lstm_params.use_layer_norm())
     {
         ARM_COMPUTE_RETURN_ON_ERROR(CLMeanStdDevNormalizationLayer::validate(&forget_gate));
         ARM_COMPUTE_RETURN_ON_ERROR(
             CLPixelWiseMultiplication::validate(&forget_gate, lstm_params.forget_layer_norm_weights(), &forget_gate, 1,
                                                 ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN));
         ARM_COMPUTE_RETURN_ON_ERROR(
             CLArithmeticAddition::validate(&forget_gate, forget_gate_bias, &forget_gate, ConvertPolicy::SATURATE));
     }
     ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(
         &forget_gate, &forget_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)));
  
     // Validate input gate
     if (!lstm_params.has_cifg_opt())
     {
         ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lstm_params.input_to_input_weights(),
                                             lstm_params.recurrent_to_input_weights(), lstm_params.input_gate_bias());
         ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.input_to_input_weights()->num_dimensions() > 2);
         ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.recurrent_to_input_weights()->num_dimensions() > 2);
         ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.input_gate_bias()->num_dimensions() > 1);
  
         std::vector<const ITensorInfo *> lstm_weights;
         lstm_weights.emplace_back(lstm_params.input_to_input_weights());
         lstm_weights.emplace_back(lstm_params.recurrent_to_input_weights());
         TensorShape lstm_weights_concat_shape =
             arm_compute::misc::shape_calculator::calculate_concatenate_shape(lstm_weights, 0);
         TensorInfo lstm_gate_concat = TensorInfo(lstm_weights_concat_shape, 1, input->data_type());
         ARM_COMPUTE_RETURN_ON_ERROR(CLConcatenateLayer::validate(lstm_weights, &lstm_gate_concat, Window::DimX));
  
         ARM_COMPUTE_RETURN_ON_ERROR(CLFullyConnectedLayer::validate(
             input, lstm_params.input_to_input_weights(),
             (lstm_params.use_layer_norm()) ? nullptr : lstm_params.input_gate_bias(), &input_gate));
  
         if (lstm_params.has_peephole_opt())
         {
             ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lstm_params.cell_to_input_weights());
             ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.cell_to_input_weights()->num_dimensions() > 1);
             ARM_COMPUTE_RETURN_ON_ERROR(
                 CLPixelWiseMultiplication::validate(cell_state_in, lstm_params.cell_to_input_weights(), &input_gate, 1,
                                                     ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN));
             ARM_COMPUTE_RETURN_ON_ERROR(
                 CLArithmeticAddition::validate(&input_gate, &input_gate, &input_gate, ConvertPolicy::SATURATE));
         }
  
         if (lstm_params.use_layer_norm())
         {
             ARM_COMPUTE_RETURN_ON_ERROR(CLMeanStdDevNormalizationLayer::validate(&input_gate));
             ARM_COMPUTE_RETURN_ON_ERROR(
                 CLPixelWiseMultiplication::validate(&input_gate, lstm_params.input_layer_norm_weights(), &input_gate, 1,
                                                     ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN));
             ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&input_gate, lstm_params.input_gate_bias(),
                                                                        &input_gate, ConvertPolicy::SATURATE));
         }
         ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(
             &input_gate, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)));
     }
     else
     {
         ARM_COMPUTE_RETURN_ON_ERROR(
             CLArithmeticSubtraction::validate(&forget_gate, &forget_gate, &forget_gate, ConvertPolicy::SATURATE));
     }
  
     // Validate cell state
     ARM_COMPUTE_RETURN_ON_ERROR(CLFullyConnectedLayer::validate(
         input, input_to_cell_weights, (lstm_params.use_layer_norm()) ? nullptr : cell_bias, &cell_state_tmp));
     ARM_COMPUTE_RETURN_ON_ERROR(
         CLGEMM::validate(output_state_in, &units_out_transposed_info, nullptr, &cell_state_tmp, 1.f, 0.f, GEMMInfo()));
     ARM_COMPUTE_RETURN_ON_ERROR(
         CLArithmeticAddition::validate(&cell_state_tmp, &cell_state_tmp, &cell_state_tmp, ConvertPolicy::SATURATE));
     if (lstm_params.use_layer_norm())
     {
         ARM_COMPUTE_RETURN_ON_ERROR(CLMeanStdDevNormalizationLayer::validate(&cell_state_tmp));
         ARM_COMPUTE_RETURN_ON_ERROR(
             CLPixelWiseMultiplication::validate(&cell_state_tmp, lstm_params.cell_layer_norm_weights(), &cell_state_tmp,
                                                 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN));
         ARM_COMPUTE_RETURN_ON_ERROR(
             CLArithmeticAddition::validate(&cell_state_tmp, cell_bias, &cell_state_tmp, ConvertPolicy::SATURATE));
     }
     ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(&cell_state_tmp, nullptr, activation_info));
     ARM_COMPUTE_RETURN_ON_ERROR(CLPixelWiseMultiplication::validate(
         &cell_state_tmp, &input_gate, &cell_state_tmp, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN));
     ARM_COMPUTE_RETURN_ON_ERROR(CLPixelWiseMultiplication::validate(
         &cell_state_tmp, &forget_gate, &cell_state_tmp, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN));
     ARM_COMPUTE_RETURN_ON_ERROR(
         CLArithmeticAddition::validate(&cell_state_tmp, &cell_state_tmp, &cell_state_tmp, ConvertPolicy::SATURATE));
     if (cell_threshold != 0.f)
     {
         ARM_COMPUTE_RETURN_ON_ERROR(
             CLActivationLayer::validate(&cell_state_tmp, nullptr,
                                         ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
                                                             cell_threshold, -cell_threshold)));
     }
  
     std::vector<const ITensorInfo *> in_out_weights;
     in_out_weights.emplace_back(input_to_output_weights);
     in_out_weights.emplace_back(recurrent_to_output_weights);
     TensorShape in_out_weights_concat_shape =
         arm_compute::misc::shape_calculator::calculate_concatenate_shape(in_out_weights, 0);
     TensorInfo in_out_gate_concat = TensorInfo(in_out_weights_concat_shape, 1, input->data_type());
     ARM_COMPUTE_RETURN_ON_ERROR(CLConcatenateLayer::validate(in_out_weights, &in_out_gate_concat, Window::DimX));
     // Validate output gate tmp
     ARM_COMPUTE_RETURN_ON_ERROR(CLFullyConnectedLayer::validate(
         input, input_to_output_weights, (lstm_params.use_layer_norm()) ? nullptr : output_gate_bias, &output_gate_tmp));
  
     if (lstm_params.has_peephole_opt())
     {
         ARM_COMPUTE_RETURN_ON_ERROR(
             CLPixelWiseMultiplication::validate(&cell_state_tmp, lstm_params.cell_to_output_weights(), &output_gate_tmp,
                                                 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN));
         ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&output_gate_tmp, &output_gate_tmp, &output_gate_tmp,
                                                                    ConvertPolicy::SATURATE));
     }
     if (lstm_params.use_layer_norm())
     {
         ARM_COMPUTE_RETURN_ON_ERROR(CLMeanStdDevNormalizationLayer::validate(&output_gate_tmp));
         ARM_COMPUTE_RETURN_ON_ERROR(CLPixelWiseMultiplication::validate(
             &output_gate_tmp, lstm_params.output_layer_norm_weights(), &output_gate_tmp, 1, ConvertPolicy::SATURATE,
             RoundingPolicy::TO_NEAREST_EVEN));
         ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&output_gate_tmp, output_gate_bias, &output_gate_tmp,
                                                                    ConvertPolicy::SATURATE));
     }
     ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(
         &output_gate_tmp, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)));
  
     // Validate output state
     ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(&cell_state_tmp, &cell_state_tmp, activation_info));
     ARM_COMPUTE_RETURN_ON_ERROR(CLPixelWiseMultiplication::validate(&cell_state_tmp, &output_gate_tmp, &output_gate_tmp,
                                                                     1, ConvertPolicy::SATURATE,
                                                                     RoundingPolicy::TO_NEAREST_EVEN));
     if (lstm_params.has_projection())
     {
         ARM_COMPUTE_RETURN_ON_ERROR(CLFullyConnectedLayer::validate(&output_gate_tmp, lstm_params.projection_weights(),
                                                                     lstm_params.projection_bias(), output_state_out));
         if (projection_threshold != 0.f)
         {
             ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(
                 output_state_out, output_state_out,
                 ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -projection_threshold,
                                     projection_threshold)));
         }
     }
  
     // Validate copy kernel
     ARM_COMPUTE_RETURN_ON_ERROR(CLCopy::validate(&cell_state_tmp, cell_state_out));
     ARM_COMPUTE_RETURN_ON_ERROR(CLCopy::validate(output_state_out, output));
  
     // Validate scratch concatenation
     std::vector<const ITensorInfo *> inputs_vector_info_raw;
     if (!lstm_params.has_cifg_opt())
     {
         inputs_vector_info_raw.push_back(&input_gate);
     }
     inputs_vector_info_raw.push_back(&cell_state_tmp);
     inputs_vector_info_raw.push_back(&forget_gate);
     inputs_vector_info_raw.push_back(&output_gate_tmp);
  
     ARM_COMPUTE_RETURN_ON_ERROR(CLConcatenateLayer::validate(inputs_vector_info_raw, scratch_buffer, Window::DimX));
     return Status{};
 }

Referenced by CLLSTMLayer::configure().

The documentation for this class was generated from the following files:

arm_compute/runtime/CL/functions/CLLSTMLayer.h
src/runtime/CL/functions/CLLSTMLayer.cpp

Public Member Functions

Static Public Member Functions

Detailed Description

Constructor & Destructor Documentation

◆ CLLSTMLayer() [1/3]

◆ CLLSTMLayer() [2/3]

◆ CLLSTMLayer() [3/3]

◆ ~CLLSTMLayer()

Member Function Documentation

◆ configure() [1/2]

◆ configure() [2/2]

◆ operator=() [1/2]

◆ operator=() [2/2]

◆ prepare()

◆ run()

◆ validate()