52 : _memory_group(
std::move(memory_manager)), _fully_connected_input_gate(), _accum_input_gate1(), _subtract_input_gate(), _pixelwise_mul_input_gate(), _activation_input_gate(),
53 _fully_connected_forget_gate(), _accum_forget_gate1(), _pixelwise_mul_forget_gate(), _activation_forget_gate(), _fully_connected_cell_state(), _gemm_cell_state1(),
54 _transpose_cell_state(
std::make_unique<
CLTransposeKernel>()), _accum_cell_state1(), _accum_cell_state2(), _pixelwise_mul_cell_state1(), _activation_cell_state(), _cell_clip(),
55 _pixelwise_mul_cell_state2(), _fully_connected_output(), _pixelwise_mul_output_state1(), _accum_output1(), _activation_output(), _activation_output_state(), _pixelwise_mul_output_state2(),
56 _fully_connected_output_state(), _projection_clip(), _copy_cell_state(), _copy_output(), _concat_scratch_buffer(), _concat_inputs_forget_gate(), _concat_weights_forget_gate(),
57 _concat_weights_input_gate(), _concat_weights_output(), _ones_fill(), _mean_std_norm_input_gate(), _pixelwise_mul_input_gate_coeff(), _accum_input_gate_bias(), _mean_std_norm_forget_gate(),
58 _pixelwise_mul_forget_gate_coeff(), _accum_forget_gate_bias(), _mean_std_norm_cell_gate(), _pixelwise_mul_cell_gate_coeff(), _accum_cell_gate_bias(), _mean_std_norm_output_gate(),
59 _pixelwise_mul_output_gate_coeff(), _accum_output_gate_bias(), _input_gate_out1(), _input_gate_out2(), _input_gate_out3(), _input_gate_out4(), _forget_gate_out1(), _forget_gate_out2(),
60 _forget_gate_out3(), _forget_gate_out4(), _forget_gate_out5(), _forget_gate_out6(), _cell_state_out1(), _cell_state_out2(), _cell_state_out3(), _cell_state_out4(), _cell_state_out5(), _output1(),
61 _output2(), _output3(), _output4(), _cell_state_activation(), _output_state1(), _ones(), _input_layer_norm_out1(), _input_layer_norm_out2(), _forget_layer_norm_out1(), _forget_layer_norm_out2(),
62 _cell_layer_norm_out1(), _cell_layer_norm_out2(), _output_layer_norm_out1(), _output_layer_norm_out2(), _run_peephole_opt(false), _run_cifg_opt(false), _perform_cell_clipping(false),
63 _has_projection_weights(false), _perform_projection_clipping(false), _is_prepared(false), _is_layer_norm_lstm(false)
77 configure(
CLKernelLibrary::get().get_compile_context(), input, input_to_forget_weights, input_to_cell_weights, input_to_output_weights, recurrent_to_forget_weights, recurrent_to_cell_weights,
78 recurrent_to_output_weights, forget_gate_bias, cell_bias, output_gate_bias, output_state_in, cell_state_in, scratch_buffer, output_state_out, cell_state_out, output, lstm_params, activation_info,
79 cell_threshold, projection_threshold);
91 input_to_forget_weights, input_to_cell_weights, input_to_output_weights,
92 recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights,
93 forget_gate_bias, cell_bias, output_gate_bias,
94 output_state_in, cell_state_in,
95 scratch_buffer, output_state_out, cell_state_out, output);
105 input_to_cell_weights->
info(), input_to_output_weights->
info(),
106 recurrent_to_forget_weights->
info(), recurrent_to_cell_weights->
info(), recurrent_to_output_weights->
info(),
107 forget_gate_bias->
info(), cell_bias->
info(), output_gate_bias->
info(),
108 output_state_in->
info(), cell_state_in->
info(),
109 scratch_buffer->
info(), output_state_out->
info(), cell_state_out->
info(), output->
info(),
110 lstm_params_info, activation_info, cell_threshold, projection_threshold));
121 std::vector<const ICLTensor *> inputs_vector;
122 inputs_vector.emplace_back(input);
123 inputs_vector.emplace_back(output_state_in);
127 _memory_group.
manage(&_forget_gate_out2);
128 _concat_inputs_forget_gate.
configure(compile_context, inputs_vector, &_forget_gate_out2,
Window::DimX);
130 std::vector<const ICLTensor *> weights_vector;
132 weights_vector.emplace_back(input_to_forget_weights);
133 weights_vector.emplace_back(recurrent_to_forget_weights);
137 _concat_weights_forget_gate.
configure(compile_context, weights_vector, &_forget_gate_out6,
Window::DimX);
139 _memory_group.
manage(&_forget_gate_out5);
140 _fully_connected_forget_gate.
configure(compile_context, &_forget_gate_out2, &_forget_gate_out6, (_is_layer_norm_lstm) ?
nullptr : forget_gate_bias, &_forget_gate_out5);
141 _memory_group.
manage(&_forget_gate_out1);
142 _memory_group.
manage(&_forget_gate_out3);
145 CLTensor *forget_gate_out = &_forget_gate_out5;
150 _run_peephole_opt =
true;
151 _memory_group.
manage(&_forget_gate_out4);
153 _accum_forget_gate1.
configure(compile_context, &_forget_gate_out5, &_forget_gate_out4, &_forget_gate_out3, ConvertPolicy::SATURATE);
156 forget_gate_out = &_forget_gate_out3;
162 if(_is_layer_norm_lstm)
166 _memory_group.
manage(&_forget_layer_norm_out1);
167 _memory_group.
manage(&_forget_layer_norm_out2);
168 _mean_std_norm_forget_gate.
configure(compile_context, forget_gate_out);
173 _accum_forget_gate_bias.
configure(compile_context, &_forget_layer_norm_out1, forget_gate_bias, &_forget_layer_norm_out2, ConvertPolicy::SATURATE);
175 forget_gate_out = &_forget_layer_norm_out2;
185 CLTensor *input_gate_out = &_input_gate_out1;
188 _memory_group.
manage(&_input_gate_out1);
193 _run_cifg_opt =
true;
200 std::vector<const ICLTensor *> lstm_weights;
206 _concat_weights_input_gate.
configure(compile_context, lstm_weights, &_input_gate_out2,
Window::DimX);
208 _memory_group.
manage(&_input_gate_out1);
210 _memory_group.
manage(&_input_gate_out3);
211 _fully_connected_input_gate.
configure(compile_context, &_forget_gate_out2, &_input_gate_out2, (_is_layer_norm_lstm) ?
nullptr : lstm_params.
input_gate_bias(), &_input_gate_out3);
214 input_gate_out = &_input_gate_out3;
215 if(_run_peephole_opt)
217 _memory_group.
manage(&_input_gate_out4);
219 _accum_input_gate1.
configure(compile_context, &_input_gate_out3, &_input_gate_out4, &_input_gate_out1, ConvertPolicy::SATURATE);
222 input_gate_out = &_input_gate_out1;
229 if(_is_layer_norm_lstm)
233 _memory_group.
manage(&_input_layer_norm_out1);
234 _memory_group.
manage(&_input_layer_norm_out2);
235 _mean_std_norm_input_gate.
configure(compile_context, input_gate_out);
242 input_gate_out = &_input_layer_norm_out2;
256 _memory_group.
manage(&_cell_state_out1);
257 _fully_connected_cell_state.
configure(compile_context, input, input_to_cell_weights, (_is_layer_norm_lstm) ?
nullptr : cell_bias, &_cell_state_out1);
258 _memory_group.
manage(&_cell_state_out2);
259 _transpose_cell_state->configure(compile_context, recurrent_to_cell_weights, &_cell_state_out2);
260 _memory_group.
manage(&_cell_state_out3);
261 _gemm_cell_state1.
configure(compile_context, output_state_in, &_cell_state_out2,
nullptr, &_cell_state_out3, 1.f, 0.f);
263 _memory_group.
manage(&_cell_state_out4);
265 CLTensor *cell_state_out_ptr = &_cell_state_out4;
266 if(_is_layer_norm_lstm)
270 _memory_group.
manage(&_cell_layer_norm_out1);
271 _memory_group.
manage(&_cell_layer_norm_out2);
272 _mean_std_norm_cell_gate.
configure(compile_context, cell_state_out_ptr);
277 _accum_cell_gate_bias.
configure(compile_context, &_cell_layer_norm_out1, cell_bias, &_cell_layer_norm_out2, ConvertPolicy::SATURATE);
279 cell_state_out_ptr = &_cell_layer_norm_out2;
281 _activation_cell_state.
configure(compile_context, cell_state_out_ptr,
nullptr, activation_info);
282 _memory_group.
manage(&_cell_state_out5);
290 if(cell_threshold != 0.f)
292 _perform_cell_clipping =
true;
302 std::vector<const ICLTensor *> in_out_weights;
303 in_out_weights.emplace_back(input_to_output_weights);
304 in_out_weights.emplace_back(recurrent_to_output_weights);
310 _memory_group.
manage(&_output1);
311 _memory_group.
manage(&_output4);
313 _fully_connected_output.
configure(compile_context, &_forget_gate_out2, &_output2, (_is_layer_norm_lstm) ?
nullptr : output_gate_bias, &_output4);
318 CLTensor *output_gate_out = &_output4;
323 _memory_group.
manage(&_output3);
325 _accum_output1.
configure(compile_context, &_output4, &_output3, &_output1, ConvertPolicy::SATURATE);
327 output_gate_out = &_output1;
336 if(_is_layer_norm_lstm)
340 _memory_group.
manage(&_output_layer_norm_out1);
341 _memory_group.
manage(&_output_layer_norm_out2);
342 _mean_std_norm_output_gate.
configure(compile_context, output_gate_out);
347 _accum_output_gate_bias.
configure(compile_context, &_output_layer_norm_out1, output_gate_bias, &_output_layer_norm_out2, ConvertPolicy::SATURATE);
349 output_gate_out = &_output_layer_norm_out2;
366 _memory_group.
manage(&_cell_state_activation);
367 _activation_output_state.
configure(compile_context, &_cell_state_out1, &_cell_state_activation, activation_info);
373 _has_projection_weights =
true;
377 if(projection_threshold != 0.f)
379 _perform_projection_clipping =
true;
385 _copy_cell_state.
configure(compile_context, &_cell_state_out1, cell_state_out);
386 _copy_output.
configure(compile_context, output_state_out, output);
389 std::vector<const ICLTensor *> scratch_inputs;
392 scratch_inputs.emplace_back(input_gate_out);
394 scratch_inputs.emplace_back(&_cell_state_out1);
395 scratch_inputs.emplace_back(forget_gate_out);
396 scratch_inputs.emplace_back(output_gate_out);
413 input_to_forget_weights, input_to_cell_weights, input_to_output_weights,
414 recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights,
415 forget_gate_bias, cell_bias, output_gate_bias,
416 output_state_in, cell_state_in,
417 scratch_buffer, output_state_out, cell_state_out, output);
422 input_to_forget_weights, input_to_cell_weights, input_to_output_weights,
423 recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights,
424 forget_gate_bias, cell_bias, output_gate_bias,
425 output_state_in, cell_state_in,
426 scratch_buffer, output_state_out, cell_state_out, output);
448 const unsigned int num_batches = input->
dimension(1);
449 const unsigned int num_cells = input_to_output_weights->
dimension(1);
497 std::vector<const ITensorInfo *> inputs_vector;
498 inputs_vector.emplace_back(input);
499 inputs_vector.emplace_back(output_state_in);
529 std::vector<const ITensorInfo *> lstm_weights;
574 if(cell_threshold != 0.f)
580 std::vector<const ITensorInfo *> in_out_weights;
581 in_out_weights.emplace_back(input_to_output_weights);
582 in_out_weights.emplace_back(recurrent_to_output_weights);
610 if(projection_threshold != 0.f)
622 std::vector<const ITensorInfo *> inputs_vector_info_raw;
625 inputs_vector_info_raw.push_back(&input_gate);
627 inputs_vector_info_raw.push_back(&cell_state_tmp);
628 inputs_vector_info_raw.push_back(&forget_gate);
629 inputs_vector_info_raw.push_back(&output_gate_tmp);
641 _concat_inputs_forget_gate.
run();
643 _fully_connected_forget_gate.
run();
645 if(_run_peephole_opt)
647 _pixelwise_mul_forget_gate.
run();
648 _accum_forget_gate1.
run();
650 if(_is_layer_norm_lstm)
652 _mean_std_norm_forget_gate.
run();
653 _pixelwise_mul_forget_gate_coeff.
run();
654 _accum_forget_gate_bias.
run();
656 _activation_forget_gate.
run();
661 _subtract_input_gate.
run();
665 _fully_connected_input_gate.
run();
667 if(_run_peephole_opt)
669 _pixelwise_mul_input_gate.
run();
670 _accum_input_gate1.
run();
673 if(_is_layer_norm_lstm)
675 _mean_std_norm_input_gate.
run();
676 _pixelwise_mul_input_gate_coeff.
run();
677 _accum_input_gate_bias.
run();
679 _activation_input_gate.
run();
682 _fully_connected_cell_state.
run();
684 _gemm_cell_state1.
run();
685 _accum_cell_state1.
run();
686 if(_is_layer_norm_lstm)
688 _mean_std_norm_cell_gate.
run();
689 _pixelwise_mul_cell_gate_coeff.
run();
690 _accum_cell_gate_bias.
run();
692 _activation_cell_state.
run();
693 _pixelwise_mul_cell_state1.
run();
694 _pixelwise_mul_cell_state2.
run();
695 _accum_cell_state2.
run();
697 if(_perform_cell_clipping)
702 _fully_connected_output.
run();
704 if(_run_peephole_opt)
706 _pixelwise_mul_output_state1.
run();
707 _accum_output1.
run();
709 if(_is_layer_norm_lstm)
711 _mean_std_norm_output_gate.
run();
712 _pixelwise_mul_output_gate_coeff.
run();
713 _accum_output_gate_bias.
run();
715 _activation_output.
run();
717 _activation_output_state.
run();
718 _pixelwise_mul_output_state2.
run();
720 if(_has_projection_weights)
722 _fully_connected_output_state.
run();
723 if(_perform_projection_clipping)
725 _projection_clip.
run();
729 _copy_cell_state.
run();
732 _concat_scratch_buffer.
run();
739 _concat_weights_forget_gate.
run();
742 _concat_weights_input_gate.
run();
744 _concat_weights_output.
run();
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
Class describing the value of a pixel for any image format.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
Static function to check if given info will lead to a valid configuration of CLActivationLayer.
const T * projection_weights() const
const T * input_to_input_weights() const
void configure(const ICLTensor *input, const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights, const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights, const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias, const ICLTensor *output_state_in, ICLTensor *cell_state_in, ICLTensor *scratch_buffer, ICLTensor *output_state_out, ICLTensor *cell_state_out, ICLTensor *output, const LSTMParams< ICLTensor > &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold=0.f, float projection_threshold=0.f)
Initialize function's tensors.
bool use_layer_norm() const
TensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
void run() override
Run the kernels contained in the function.
static Status validate(const ITensorInfo *input, const ITensorInfo *output=nullptr, float epsilon=1e-8f)
Static function to check if given info will lead to a valid configuration of CLMeanStdDevNormalizatio...
void run() override
Run the kernels contained in the function.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
bool has_peephole_opt() const
static CLScheduler & get()
Access the scheduler singleton.
T * forget_layer_norm_weights() const
void build_lstm_params_tensor_info(const LSTMParams< T > &lstm_params, LSTMParams< ITensorInfo > *lstm_params_info)
Build LSTMParams<ITensorInfo> object by extracting the metadata from each tensor. ...
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
void run() override
Run the kernels contained in the function.
1 channel, 1 F32 per channel
void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and convertion policy.
bool has_cifg_opt() const
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
auto recurrent_to_forget_weights
Store the tensor's metadata.
void run() override
Run the kernels contained in the function.
CLTensorAllocator * allocator()
Return a pointer to the tensor's allocator.
void configure(ICLTensor *input, ICLTensor *output=nullptr, float epsilon=1e-8f)
Initialise the function's input and outputs.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
CLLSTMLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
T * cell_to_input_weights() const
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Activation Layer Information class.
void configure(std::vector< const ICLTensor *> &inputs_vector, ICLTensor *output, size_t axis)
Initialise the kernel's inputs vector and output.
void run() override
Run the kernels contained in the function.
void init(const TensorInfo &input, size_t alignment=0)
Initialize a tensor based on the passed TensorInfo.
Copyright (c) 2017-2021 Arm Limited.
~CLLSTMLayer()
Default destructor.
1 channel, 1 F16 per channel
auto input_to_cell_weights
TensorShape compute_transposed_shape(const ITensorInfo &input)
Calculate the transposed shape of a tensor.
DataType data_type() const override
Data type used for each element of the tensor.
auto recurrent_to_output_weights
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
const T * recurrent_to_input_weights() const
auto input_to_output_weights
const T * projection_bias() const
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, FullyConnectedLayerInfo fc_info=FullyConnectedLayerInfo())
Set the input and output tensors.
void run() override
Run the kernels contained in the function.
T * output_layer_norm_weights() const
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
void run() override final
Run the kernels contained in the function.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
void configure(ICLTensor *input, ICLTensor *output, Window *dst_window=nullptr)
Initialise the function's source and destination.
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, FullyConnectedLayerInfo fc_info=FullyConnectedLayerInfo())
Static function to check if given info will lead to a valid configuration of CLFullyConnectedLayer.
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of opencl::kernels::ClSatur...
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
auto recurrent_to_cell_weights
void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and conversion policy.
static Status validate(const ITensorInfo *input, const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights, const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights, const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias, const ITensorInfo *output_state_in, const ITensorInfo *cell_state_in, const ITensorInfo *scratch_buffer, const ITensorInfo *output_state_out, const ITensorInfo *cell_state_out, const ITensorInfo *output, const LSTMParams< ITensorInfo > &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold=0.f, float projection_threshold=0.f)
Static function to check if given info will lead to a valid configuration of CLLSTMLayer.
void run() override
Run the kernels contained in the function.
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of opencl::kernels::ClSatur...
void enqueue(ICLKernel &kernel, bool flush=true)
Schedule the execution of the passed kernel if possible.
void configure(ICLTensor *tensor, const PixelValue &constant_value, Window *window=nullptr)
Initialize the kernel's tensor and filling value.
T * cell_to_forget_weights() const
static Status validate(const ITensorInfo *input, const ITensorInfo *output, Window *dst_window=nullptr)
Static function to check if given info will lead to a valid configuration of CLCopy.
Lower and Upper Bounded Rectifier ( )
bool has_projection() const
static Status validate(const std::vector< const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis)
Static function to check if given info will lead to a valid configuration of CLConcatenateLayer.
void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and conversion policy.
void allocate() override
Allocate size specified by TensorInfo of OpenCL memory.
T * cell_to_output_weights() const
Rounds to nearest value; half rounds to nearest even.
Memory group resources scope handling class.
Interface for OpenCL tensor.
T * input_layer_norm_weights() const
void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs and output.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
const T * input_gate_bias() const
void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info)
Set the input and output tensor.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of CLGEMM.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Store the tensor's metadata.
T * cell_layer_norm_weights() const
OpenCL kernel which transposes the elements of a matrix.
const TensorShape & tensor_shape() const override
Size for each dimension of the tensor.
void run() override
Run the kernels contained in the function.
TensorShape calculate_concatenate_shape(const std::vector< T *> &input, size_t axis)
Calculate the concatenate output shape of the concatenate operation along a single axis...
void run() override
Run the kernels contained in the function.
auto input_to_forget_weights
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of CLPixelWiseMultiplicatio...
void prepare() override
Prepare the function for executing.
Basic implementation of the OpenCL tensor interface.