24 #ifndef ARM_COMPUTE_NEQLSTMLAYER_H 25 #define ARM_COMPUTE_NEQLSTMLAYER_H 45 class NEQLSTMLayerNormalizationKernel;
46 class NEGEMMLowpMatrixAReductionKernel;
66 NEQLSTMLayer(std::shared_ptr<IMemoryManager> memory_manager =
nullptr);
181 enum class LayerNormGate : uint8_t
189 static constexpr uint8_t _layer_norm_count =
static_cast<uint8_t
>(LayerNormGate::Count);
190 static constexpr uint32_t _out_state_output_size_dimension_idx = 0;
208 Tensor *outstage_res,
float gemmlowp_scale,
214 class TensorCopyKernel
216 static constexpr uint32_t max_dimension_supported = 2;
249 NETranspose _transpose_recurrent_to_forget_weights;
251 NETranspose _transpose_recurrent_to_output_weights;
254 std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _input_to_input_reduction;
255 std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _recurrent_to_input_reduction;
256 std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _input_to_forget_reduction;
257 std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _recurrent_to_forget_reduction;
258 std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _input_to_cell_reduction;
259 std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _recurrent_to_cell_reduction;
260 std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _input_to_output_reduction;
261 std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _recurrent_to_output_reduction;
262 std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _projection_reduction;
310 TensorCopyKernel _projection_bias_copy;
311 TensorCopyKernel _projection_output_to_accumulate_copy;
312 TensorCopyKernel _projection_accumulate_to_output_copy;
313 TensorCopyKernel _hidden_to_output_copy;
315 std::array<std::unique_ptr<NEQLSTMLayerNormalizationKernel>, _layer_norm_count> _layer_norms;
320 const ITensor *_input_to_input_weights
324 const ITensor *_recurrent_to_input_weights{
nullptr };
325 const ITensor *_projection_bias{
nullptr };
326 const ITensor *_input_to_forget_weights{
nullptr };
327 const ITensor *_input_to_cell_weights{
nullptr };
328 const ITensor *_input_to_output_weights{
nullptr };
329 const ITensor *_recurrent_to_forget_weights{
nullptr };
330 const ITensor *_recurrent_to_cell_weights{
nullptr };
331 const ITensor *_recurrent_to_output_weights{
nullptr };
332 const ITensor *_projection_weights{
nullptr };
333 std::array<const ITensor *, _layer_norm_count> _layer_norm_weights{};
334 std::array<const ITensor *, _layer_norm_count> _layer_norm_bias{};
337 inline LayerNormIndexType getGateIndex(LayerNormGate g)
339 return static_cast<LayerNormIndexType
>(g);
342 inline void set_layer_norm_weight(
const ITensor *
t, LayerNormGate g)
344 _layer_norm_weights[getGateIndex(g)] =
t;
347 inline void set_layer_norm_bias(
const ITensor *t, LayerNormGate g)
349 _layer_norm_bias[getGateIndex(g)] =
t;
352 inline const ITensor *get_layer_norm_weight(LayerNormGate g)
354 return _layer_norm_weights[getGateIndex(g)];
357 inline const ITensor *get_layer_norm_bias(LayerNormGate g)
359 return _layer_norm_bias[getGateIndex(g)];
362 inline std::unique_ptr<NEQLSTMLayerNormalizationKernel> &get_layer_norm(LayerNormGate g)
364 return _layer_norms[getGateIndex(g)];
367 void configure_layer_norm(LayerNormGate g,
const ITensor *in);
371 Tensor _input_to_forget_weights_transposed{
nullptr };
372 Tensor _input_to_cell_weights_transposed{
nullptr };
373 Tensor _input_to_output_weights_transposed{
nullptr };
374 Tensor _input_to_input_weights_transposed{
nullptr };
375 Tensor _recurrent_to_forget_weights_transposed{
nullptr };
376 Tensor _recurrent_to_cell_weights_transposed{
nullptr };
377 Tensor _recurrent_to_output_weights_transposed{
nullptr };
378 Tensor _recurrent_to_input_weights_transposed{
nullptr };
379 Tensor _projection_weights_transposed{
nullptr };
380 Tensor _input_to_input_eff_bias{
nullptr };
381 Tensor _recurrent_to_input_eff_bias{
nullptr };
382 Tensor _input_to_forget_eff_bias{
nullptr };
383 Tensor _recurrent_to_forget_eff_bias{
nullptr };
384 Tensor _input_to_cell_eff_bias{
nullptr };
385 Tensor _recurrent_to_cell_eff_bias{
nullptr };
386 Tensor _input_to_output_eff_bias{
nullptr };
387 Tensor _recurrent_to_output_eff_bias{
nullptr };
388 Tensor _projection_reduction_res{
nullptr };
389 Tensor _projection_eff_bias{
nullptr };
390 Tensor _mm_input_to_forget_res{
nullptr };
391 Tensor _mm_recurrent_to_forget_res{
nullptr };
392 Tensor _mul_cell_to_forget_res{
nullptr };
393 Tensor _input_to_forget_outstage_res{
nullptr };
394 Tensor _cell_to_forget_outstage_res{
nullptr };
395 Tensor _recurrent_to_forget_outstage_res{
nullptr };
396 Tensor _forget_gate{
nullptr };
397 Tensor _mm_input_to_cell_res{
nullptr };
398 Tensor _input_to_cell_outstage_res{
nullptr };
399 Tensor _mm_recurrent_to_cell_res{
nullptr };
400 Tensor _recurrent_to_cell_outstage_res{
nullptr };
401 Tensor _cell_gate{
nullptr };
402 Tensor _mul_input_cell_res{
nullptr };
403 Tensor _mm_input_to_input_res{
nullptr };
404 Tensor _input_to_input_outstage_res{
nullptr };
405 Tensor _mm_recurrent_to_input_res{
nullptr };
406 Tensor _mul_cell_to_input_res{
nullptr };
407 Tensor _cell_to_input_outstage_res{
nullptr };
408 Tensor _recurrent_to_input_outstage_res{
nullptr };
409 Tensor _input_gate{
nullptr };
410 Tensor _mm_input_to_output_res{
nullptr };
411 Tensor _input_to_output_outstage_res{
nullptr };
412 Tensor _mm_recurrent_to_output_res{
nullptr };
413 Tensor _mul_cell_to_output_res{
nullptr };
414 Tensor _cell_to_output_outstage_res{
nullptr };
415 Tensor _recurrent_to_output_outstage_res{
nullptr };
416 Tensor _output_gate{
nullptr };
417 Tensor _hidden_mul_res{
nullptr };
418 Tensor _hidden_gate{
nullptr };
419 Tensor _mm_projection_res{
nullptr };
420 Tensor _projection_outstage_res{
nullptr };
421 Tensor _projection_out_res{
nullptr };
422 Tensor _projection_accumulate_res{
nullptr };
424 std::array<Tensor, _layer_norm_count> _layer_norm_output{};
426 inline Tensor &get_layer_norm_output(LayerNormGate g)
428 return _layer_norm_output[getGateIndex(g)];
431 bool _is_prepared{
false };
432 bool _has_cifg{
false };
433 bool _has_cell_clipping{
false };
434 bool _has_projection{
false };
435 bool _has_projection_clipping{
false };
436 bool _has_peephole{
false };
437 bool _has_layer_norm{
false };
438 bool _projection_tensor_copy_required{
false };
NEQLSTMLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
Base class for all functions.
Basic function to run cpu::kernels::CpuAddKernel.
auto recurrent_to_forget_weights
Store the tensor's metadata.
decltype(strategy::transforms) typedef type
Interface for Neon tensor.
SimpleTensor< float > src
Copyright (c) 2017-2021 Arm Limited.
Basic function to run cpu::kernels::CpuSubKernel.
static Status validate(const ITensorInfo *input, const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights, const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights, const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias, const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in, const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out, const ITensorInfo *output, const LSTMParams< ITensorInfo > &lstm_params)
Static function to check if given info will lead to a valid configuration of NEQLSTMLayer.
auto input_to_cell_weights
auto recurrent_to_output_weights
auto input_to_output_weights
~NEQLSTMLayer()
Default destructor.
GEMMLowp output stage info.
Basic implementation of the tensor interface.
Basic function to transpose a matrix on Neon.
void configure(const ITensor *input, const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights, const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights, const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias, const ITensor *cell_state_in, ITensor *output_state_in, ITensor *cell_state_out, ITensor *output_state_out, ITensor *output, const LSTMParams< ITensor > &lstm_params)
Initialize function's tensors.
auto recurrent_to_cell_weights
Basic function to run NEQLSTMLayer.
Basic function to run cpu::kernels::CpuActivationKernel.
Basic function to run NEPixelWiseMultiplicationKernel.
void run() override
Run the kernels contained in the function.
Basic function to execute GEMMLowpQuantizeDown kernels on Neon.
Store the tensor's metadata.
NEQLSTMLayer & operator=(const NEQLSTMLayer &)=delete
Prevent instances of this class from being copied (As this class contains pointers) ...
void prepare() override
Prepare the function for executing.
auto input_to_forget_weights
Basic function to execute GEMMLowpMatrixMultiplyCore on Neon.
Describe a multidimensional execution window.
Basic function to run cpu::kernels::CpuCopyKernel.