24 #ifndef ARM_COMPUTE_CLQLSTMLAYER_H 25 #define ARM_COMPUTE_CLQLSTMLAYER_H 41 class CLCompileContext;
43 class CLGEMMLowpMatrixAReductionKernel;
44 class CLQLSTMLayerNormalizationKernel;
64 CLQLSTMLayer(std::shared_ptr<IMemoryManager> memory_manager =
nullptr);
168 const ICLTensor *recurrent_to_forget_weights,
const ICLTensor *recurrent_to_cell_weights,
const ICLTensor *recurrent_to_output_weights,
229 enum class LayerNormGate : uint8_t
237 static constexpr uint8_t _layer_norm_count =
static_cast<uint8_t
>(LayerNormGate::Count);
238 static constexpr uint32_t _out_state_output_size_dimension_idx = 0;
257 CLTensor *outstage_res,
float gemmlowp_scale,
263 class TensorCopyKernel
265 static constexpr uint32_t max_dimension_supported = 2;
296 CLTranspose _transpose_recurrent_to_forget_weights{};
297 CLTranspose _transpose_recurrent_to_cell_weights{};
298 CLTranspose _transpose_recurrent_to_output_weights{};
299 CLTranspose _transpose_recurrent_to_input_weights{};
301 std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _input_to_input_reduction;
302 std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _recurrent_to_input_reduction;
303 std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _input_to_forget_reduction;
304 std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _recurrent_to_forget_reduction;
305 std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _input_to_cell_reduction;
306 std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _recurrent_to_cell_reduction;
307 std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _input_to_output_reduction;
308 std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _recurrent_to_output_reduction;
309 std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _projection_reduction;
356 std::array<std::unique_ptr<CLQLSTMLayerNormalizationKernel>, _layer_norm_count> _layer_norms;
359 TensorCopyKernel _projection_bias_copy{};
360 TensorCopyKernel _projection_output_to_accumulate_copy{};
361 TensorCopyKernel _projection_accumulate_to_output_copy{};
362 TensorCopyKernel _hidden_to_output_copy{};
369 const ICLTensor *_recurrent_to_input_weights{
nullptr };
370 const ICLTensor *_projection_bias{
nullptr };
371 const ICLTensor *_input_to_forget_weights{
nullptr };
372 const ICLTensor *_input_to_cell_weights{
nullptr };
373 const ICLTensor *_input_to_output_weights{
nullptr };
374 const ICLTensor *_recurrent_to_forget_weights{
nullptr };
375 const ICLTensor *_recurrent_to_cell_weights{
nullptr };
376 const ICLTensor *_recurrent_to_output_weights{
nullptr };
377 const ICLTensor *_projection_weights{
nullptr };
378 std::array<const ICLTensor *, _layer_norm_count> _layer_norm_weights{ {} };
379 std::array<const ICLTensor *, _layer_norm_count> _layer_norm_bias{ {} };
382 inline LayerNormIndexType getGateIndex(LayerNormGate g)
384 return static_cast<LayerNormIndexType
>(g);
387 inline void set_layer_norm_weight(
const ICLTensor *
t, LayerNormGate g)
389 _layer_norm_weights[getGateIndex(g)] =
t;
392 inline void set_layer_norm_bias(
const ICLTensor *t, LayerNormGate g)
394 _layer_norm_bias[getGateIndex(g)] =
t;
397 inline const ICLTensor *get_layer_norm_weight(LayerNormGate g)
399 return _layer_norm_weights[getGateIndex(g)];
402 inline const ICLTensor *get_layer_norm_bias(LayerNormGate g)
404 return _layer_norm_bias[getGateIndex(g)];
409 return *_layer_norms[getGateIndex(g)];
412 inline void configure_layer_norm(LayerNormGate g,
const ICLTensor *in);
416 CLTensor _input_to_forget_weights_transposed{
nullptr };
417 CLTensor _input_to_cell_weights_transposed{
nullptr };
418 CLTensor _input_to_output_weights_transposed{
nullptr };
419 CLTensor _input_to_input_weights_transposed{
nullptr };
420 CLTensor _recurrent_to_forget_weights_transposed{
nullptr };
421 CLTensor _recurrent_to_cell_weights_transposed{
nullptr };
422 CLTensor _recurrent_to_output_weights_transposed{
nullptr };
423 CLTensor _recurrent_to_input_weights_transposed{
nullptr };
424 CLTensor _projection_weights_transposed{
nullptr };
425 CLTensor _input_to_input_eff_bias{
nullptr };
426 CLTensor _recurrent_to_input_eff_bias{
nullptr };
427 CLTensor _input_to_forget_eff_bias{
nullptr };
428 CLTensor _recurrent_to_forget_eff_bias{
nullptr };
429 CLTensor _input_to_cell_eff_bias{
nullptr };
430 CLTensor _recurrent_to_cell_eff_bias{
nullptr };
431 CLTensor _input_to_output_eff_bias{
nullptr };
432 CLTensor _recurrent_to_output_eff_bias{
nullptr };
433 CLTensor _projection_reduction_res{
nullptr };
434 CLTensor _projection_eff_bias{
nullptr };
435 CLTensor _mm_input_to_forget_res{
nullptr };
436 CLTensor _mm_recurrent_to_forget_res{
nullptr };
437 CLTensor _mul_cell_to_forget_res{
nullptr };
438 CLTensor _input_to_forget_outstage_res{
nullptr };
439 CLTensor _cell_to_forget_outstage_res{
nullptr };
440 CLTensor _recurrent_to_forget_outstage_res{
nullptr };
442 CLTensor _mm_input_to_cell_res{
nullptr };
443 CLTensor _input_to_cell_outstage_res{
nullptr };
444 CLTensor _mm_recurrent_to_cell_res{
nullptr };
445 CLTensor _recurrent_to_cell_outstage_res{
nullptr };
447 CLTensor _mul_input_cell_res{
nullptr };
448 CLTensor _mm_input_to_input_res{
nullptr };
449 CLTensor _input_to_input_outstage_res{
nullptr };
450 CLTensor _mm_recurrent_to_input_res{
nullptr };
451 CLTensor _mul_cell_to_input_res{
nullptr };
452 CLTensor _cell_to_input_outstage_res{
nullptr };
453 CLTensor _recurrent_to_input_outstage_res{
nullptr };
455 CLTensor _mm_input_to_output_res{
nullptr };
456 CLTensor _input_to_output_outstage_res{
nullptr };
457 CLTensor _mm_recurrent_to_output_res{
nullptr };
458 CLTensor _mul_cell_to_output_res{
nullptr };
459 CLTensor _cell_to_output_outstage_res{
nullptr };
460 CLTensor _recurrent_to_output_outstage_res{
nullptr };
462 CLTensor _hidden_mul_res{
nullptr };
464 CLTensor _mm_projection_res{
nullptr };
465 CLTensor _projection_outstage_res{
nullptr };
466 CLTensor _projection_out_res{
nullptr };
467 CLTensor _projection_accumulate_res{
nullptr };
469 std::array<CLTensor, _layer_norm_count> _layer_norm_output{ {} };
471 inline CLTensor &get_layer_norm_output(LayerNormGate g)
473 return _layer_norm_output[getGateIndex(g)];
476 bool _is_prepared{
false };
477 bool _has_cifg{
false };
478 bool _has_cell_clipping{
false };
479 bool _has_projection{
false };
480 bool _has_projection_clipping{
false };
481 bool _has_peephole{
false };
482 bool _has_layer_norm{
false };
483 bool _projection_tensor_copy_required{
false };
Base class for all functions.
Basic function to run opencl::kernels::ClCopyKernel.
Basic function to execute GEMMLowpQuantizeDown kernels on CL.
void run() override
Run the kernels contained in the function.
Basic function to run opencl::kernels::ClSaturatedArithmeticKernel for addition.
auto recurrent_to_forget_weights
Store the tensor's metadata.
Basic function to run opencl::kernels::ClActivationKernel.
CLQLSTMLayer & operator=(const CLQLSTMLayer &)=delete
Prevent instances of this class from being copied (As this class contains pointers) ...
CLQLSTMLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
decltype(strategy::transforms) typedef type
SimpleTensor< float > src
void configure(const ICLTensor *input, const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights, const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights, const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias, ICLTensor *cell_state_in, ICLTensor *output_state_in, ICLTensor *cell_state_out, ICLTensor *output_state_out, ICLTensor *output, const LSTMParams< ICLTensor > &lstm_params)
Initialize function's tensors.
Copyright (c) 2017-2021 Arm Limited.
auto input_to_cell_weights
auto recurrent_to_output_weights
void prepare() override
Prepare the function for executing.
auto input_to_output_weights
GEMMLowp output stage info.
auto recurrent_to_cell_weights
Interface for the kernel to do layer normalization.
Basic function to run CLPixelWiseMultiplicationKernel.
Interface for OpenCL tensor.
Basic function to transpose a matrix on OpenCL.
Basic function to run CLQLSTMLayer.
Store the tensor's metadata.
Basic function to execute GEMMLowpMatrixMultiplyCore on OpenCL.
~CLQLSTMLayer()
Default destructor.
Basic function to run opencl::kernels::ClSaturatedArithmeticKernel for subtraction.
auto input_to_forget_weights
Describe a multidimensional execution window.
static Status validate(const ITensorInfo *input, const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights, const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights, const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias, const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in, const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out, const ITensorInfo *output, const LSTMParams< ITensorInfo > &lstm_params)
Static function to check if given info will lead to a valid configuration of CLQLSTMLayer.
Basic implementation of the OpenCL tensor interface.