armnn/latest/_neon_unidirectional_sequence_lstm_float_workload_8cpp_source.html

 //

 // Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved.

 // SPDX-License-Identifier: MIT

 //


 #include "NeonUnidirectionalSequenceLstmFloatWorkload.hpp"

 #include "NeonWorkloadUtils.hpp"


 #include <aclCommon/ArmComputeUtils.hpp>

 #include <aclCommon/ArmComputeTensorUtils.hpp>


 #include <armnn/utility/NumericCast.hpp>

 #include <armnnUtils/Permute.hpp>

 #include <neon/test/NeonWorkloadFactoryHelper.hpp>

 #include <backendsCommon/WorkloadUtils.hpp>


 #include "neon/NeonTensorHandle.hpp"


 namespace

 {

 unsigned int CalcAclAxis(unsigned int numDimensions, unsigned int axis)

 {

     return (numDimensions - axis) - 1;

 }

 } //namespace


 namespace armnn

 {

 using namespace armcomputetensorutils;


 NeonUnidirectionalSequenceLstmFloatWorkload::NeonUnidirectionalSequenceLstmFloatWorkload

     (const UnidirectionalSequenceLstmQueueDescriptor& descriptor, const WorkloadInfo& info)

     : FloatWorkload<UnidirectionalSequenceLstmQueueDescriptor>(descriptor, info)

 {

     // Report Profiling Details

     ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonUnidirectionalSequenceLstmFloatWorkload_Construct",

                                          descriptor.m_Parameters,

                                          info,

                                          GetGuid());


     const arm_compute::ITensor& input = static_cast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();

     arm_compute::ITensor& output = static_cast<IAclTensorHandle*>(m_Data.m_Outputs[2])->GetTensor();


     TensorInfo inputInfo = info.m_InputTensorInfos[0];

     TensorInfo outputInfo = info.m_OutputTensorInfos[0];


     arm_compute::DataType armComputeDataType = static_cast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetDataType();

     armnn::DataType armnnDataType = GetArmNNDataType(armComputeDataType);


     TensorShape inputLayerShape = static_cast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetShape();

     TensorShape cellStateLayerShape = static_cast<IAclTensorHandle*>(m_Data.m_Inputs[2])->GetShape();

     TensorShape outputLayerShape = static_cast<IAclTensorHandle*>(m_Data.m_Outputs[2])->GetShape();


     unsigned int maxTime = m_Data.m_Parameters.m_TimeMajor ? inputLayerShape[0] : inputLayerShape[1];

     unsigned int batchSize = m_Data.m_Parameters.m_TimeMajor ? inputLayerShape[1] : inputLayerShape[0];

     unsigned int inputSize = inputLayerShape[2];

     unsigned int outputSize = outputLayerShape[2];

     unsigned int numUnits = cellStateLayerShape[1];


     const TensorShape timeMajorShapeInput({maxTime, batchSize, inputSize});

     const TensorShape timeMajorShapeOutput({maxTime, batchSize, outputSize});


     //

     // Permute: performed if Unidirectional Sequence Layer inputs/outputs are in batch major format.

     //

     if (!m_Data.m_Parameters.m_TimeMajor)

     {

         std::unique_ptr<arm_compute::NEPermute> layer(new arm_compute::NEPermute());


         TensorInfo permuteOutInfo = inputInfo;

         permuteOutInfo.SetShape(timeMajorShapeInput);

         BuildArmComputeTensor(m_PermuteFirstOut, permuteOutInfo);

         armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_PermuteFirstOut);


         // Permute to time major format.

         layer->configure(&input, &m_PermuteFirstOut, arm_compute::PermutationVector(0U,2U,1U));

         m_Permute1.reset(layer.release());

     }


     //

     // Split and Concat Tensors

     //

     for (unsigned int i = 0; i < maxTime; ++i)

     {

         arm_compute::Tensor splitter_out;

         arm_compute::Tensor concat_in;


         auto splitterTensorInfo = inputInfo;

         auto concatTensorInfo = outputInfo;

         splitterTensorInfo.SetShape({batchSize, inputSize});

         concatTensorInfo.SetShape({batchSize, outputSize});

         BuildArmComputeTensor(splitter_out, splitterTensorInfo);

         BuildArmComputeTensor(concat_in, concatTensorInfo);


         armcomputetensorutils::InitialiseArmComputeTensorEmpty(splitter_out);

         armcomputetensorutils::InitialiseArmComputeTensorEmpty(concat_in);


         // append to std::vector<arm_compute::Tensor>

         m_SplitterOutputsTensors.push_back(std::move(splitter_out));

         m_ConcatInputsTensors.push_back(std::move(concat_in));

     }


     for (unsigned int i = 0; i < maxTime; ++i)

     {

         // append to std::vector<arm_compute::ITensor*>

         m_SplitterOutputs.push_back(&m_SplitterOutputsTensors[i]);

         m_ConcatInputs.push_back(&m_ConcatInputsTensors[i]);

     }


     //

     // Split

     //

     unsigned int numberDimensions = 3;

     unsigned int dimension = 0; // splitting on 0-dimension (i.e. maxTime dimension)


     if (maxTime != 1) // ACL split does not work with only one element to split.

     {

         ViewsDescriptor splitterDesc(maxTime, numberDimensions);

         unsigned int splitterDimSizes[3] = {1, batchSize, inputSize};

         for (unsigned int outputIdx = 0u; outputIdx < maxTime; ++outputIdx)

         {

             splitterDesc.SetViewOriginCoord(outputIdx, dimension, splitterDimSizes[dimension] * outputIdx);

             for (unsigned int dimIdx = 0u; dimIdx < numberDimensions; ++dimIdx)

             {

                 splitterDesc.SetViewSize(outputIdx, dimIdx, splitterDimSizes[dimIdx]);

             }

         }


         std::set<unsigned int> splitAxis = ComputeSplitAxis(splitterDesc, timeMajorShapeInput);


         std::unique_ptr<arm_compute::NESplit> split_layer(new arm_compute::NESplit());

         unsigned int                          aclAxisSplit = CalcAclAxis(splitterDesc.GetNumDimensions(),

                                                                          *splitAxis.begin());

         if (!m_Data.m_Parameters.m_TimeMajor)

         {

             split_layer->configure(&m_PermuteFirstOut, m_SplitterOutputs, aclAxisSplit);

         } else

         {

             split_layer->configure(&input, m_SplitterOutputs, aclAxisSplit);

         }


         split_layer->prepare();

         m_Splitter.reset(split_layer.release());

     }


     //

     // Lstm

     //

     arm_compute::LSTMParams<arm_compute::ITensor> lstm_param;


     m_InputToForgetWeightsTensor = std::make_unique<arm_compute::Tensor>();

     BuildArmComputeTensor(*m_InputToForgetWeightsTensor, m_Data.m_InputToForgetWeights->GetTensorInfo());


     m_InputToCellWeightsTensor = std::make_unique<arm_compute::Tensor>();

     BuildArmComputeTensor(*m_InputToCellWeightsTensor, m_Data.m_InputToCellWeights->GetTensorInfo());


     m_InputToOutputWeightsTensor = std::make_unique<arm_compute::Tensor>();

     BuildArmComputeTensor(*m_InputToOutputWeightsTensor, m_Data.m_InputToOutputWeights->GetTensorInfo());


     m_RecurrentToForgetWeightsTensor = std::make_unique<arm_compute::Tensor>();

     BuildArmComputeTensor(*m_RecurrentToForgetWeightsTensor, m_Data.m_RecurrentToForgetWeights->GetTensorInfo());


     m_RecurrentToCellWeightsTensor = std::make_unique<arm_compute::Tensor>();

     BuildArmComputeTensor(*m_RecurrentToCellWeightsTensor, m_Data.m_RecurrentToCellWeights->GetTensorInfo());


     m_RecurrentToOutputWeightsTensor = std::make_unique<arm_compute::Tensor>();

     BuildArmComputeTensor(*m_RecurrentToOutputWeightsTensor, m_Data.m_RecurrentToOutputWeights->GetTensorInfo());


     m_ForgetGateBiasTensor = std::make_unique<arm_compute::Tensor>();

     BuildArmComputeTensor(*m_ForgetGateBiasTensor, m_Data.m_ForgetGateBias->GetTensorInfo());


     m_CellBiasTensor = std::make_unique<arm_compute::Tensor>();

     BuildArmComputeTensor(*m_CellBiasTensor, m_Data.m_CellBias->GetTensorInfo());


     m_OutputGateBiasTensor = std::make_unique<arm_compute::Tensor>();

     BuildArmComputeTensor(*m_OutputGateBiasTensor, m_Data.m_OutputGateBias->GetTensorInfo());


     // for future reference: check the AndroidNN API for the logic here

     if (!m_Data.m_Parameters.m_CifgEnabled)

     {

         m_InputToInputWeightsTensor = std::make_unique<arm_compute::Tensor>();

         BuildArmComputeTensor(*m_InputToInputWeightsTensor, m_Data.m_InputToInputWeights->GetTensorInfo());


         m_RecurrentToInputWeightsTensor = std::make_unique<arm_compute::Tensor>();

         BuildArmComputeTensor(*m_RecurrentToInputWeightsTensor, m_Data.m_RecurrentToInputWeights->GetTensorInfo());


         m_CellToInputWeightsTensor = std::make_unique<arm_compute::Tensor>();

         if (m_Data.m_CellToInputWeights != nullptr)

         {

             BuildArmComputeTensor(*m_CellToInputWeightsTensor, m_Data.m_CellToInputWeights->GetTensorInfo());

         }


         m_InputGateBiasTensor = std::make_unique<arm_compute::Tensor>();

         BuildArmComputeTensor(*m_InputGateBiasTensor, m_Data.m_InputGateBias->GetTensorInfo());


         lstm_param.set_cifg_params(m_InputToInputWeightsTensor.get(),

                                    m_RecurrentToInputWeightsTensor.get(),

                                    m_Data.m_CellToInputWeights ? m_CellToInputWeightsTensor.get() : nullptr,

                                    m_InputGateBiasTensor.get());

     }


     if (m_Data.m_Parameters.m_ProjectionEnabled)

     {

         m_ProjectionWeightsTensor = std::make_unique<arm_compute::Tensor>();

         BuildArmComputeTensor(*m_ProjectionWeightsTensor, m_Data.m_ProjectionWeights->GetTensorInfo());


         m_ProjectionBiasTensor = std::make_unique<arm_compute::Tensor>();

         if (m_Data.m_ProjectionBias != nullptr)

         {

             BuildArmComputeTensor(*m_ProjectionBiasTensor, m_Data.m_ProjectionBias->GetTensorInfo());

         }


         lstm_param.set_projection_params(m_ProjectionWeightsTensor.get(),

                                          m_Data.m_ProjectionBias ? m_ProjectionBiasTensor.get() : nullptr);

     }


     if (m_Data.m_Parameters.m_PeepholeEnabled)

     {

         m_CellToForgetWeightsTensor = std::make_unique<arm_compute::Tensor>();

         BuildArmComputeTensor(*m_CellToForgetWeightsTensor, m_Data.m_CellToForgetWeights->GetTensorInfo());


         m_CellToOutputWeightsTensor = std::make_unique<arm_compute::Tensor>();

         BuildArmComputeTensor(*m_CellToOutputWeightsTensor, m_Data.m_CellToOutputWeights->GetTensorInfo());


         lstm_param.set_peephole_params(m_CellToForgetWeightsTensor.get(), m_CellToOutputWeightsTensor.get());

     }


     if (m_Data.m_Parameters.m_LayerNormEnabled)

     {

         m_InputLayerNormWeightsTensor = std::make_unique<arm_compute::Tensor>();

         if (!m_Data.m_Parameters.m_CifgEnabled)

         {

             BuildArmComputeTensor(*m_InputLayerNormWeightsTensor, m_Data.m_InputLayerNormWeights->GetTensorInfo());

         }


         m_ForgetLayerNormWeightsTensor = std::make_unique<arm_compute::Tensor>();

         BuildArmComputeTensor(*m_ForgetLayerNormWeightsTensor, m_Data.m_ForgetLayerNormWeights->GetTensorInfo());


         m_CellLayerNormWeightsTensor = std::make_unique<arm_compute::Tensor>();

         BuildArmComputeTensor(*m_CellLayerNormWeightsTensor, m_Data.m_CellLayerNormWeights->GetTensorInfo());


         m_OutputLayerNormWeightsTensor = std::make_unique<arm_compute::Tensor>();

         BuildArmComputeTensor(*m_OutputLayerNormWeightsTensor, m_Data.m_OutputLayerNormWeights->GetTensorInfo());


         auto inputNormWeightTensor = m_Data.m_Parameters.m_CifgEnabled ? nullptr : m_InputLayerNormWeightsTensor.get();

         lstm_param.set_layer_normalization_params(inputNormWeightTensor,

                                                   m_ForgetLayerNormWeightsTensor.get(),

                                                   m_CellLayerNormWeightsTensor.get(),

                                                   m_OutputLayerNormWeightsTensor.get());

     }


     arm_compute::ITensor& output_state_in = static_cast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();

     arm_compute::ITensor& cell_state_in   = static_cast<IAclTensorHandle*>(m_Data.m_Inputs[2])->GetTensor();


     arm_compute::ITensor& output_state_out = static_cast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();

     arm_compute::ITensor& cell_state_out = static_cast<IAclTensorHandle*>(m_Data.m_Inputs[2])->GetTensor();


     m_ScratchBuffer = std::make_unique<arm_compute::Tensor>();

     if (m_Data.m_Parameters.m_CifgEnabled)

     {

         // scratch_buffer [num_units * 3, batch_size] with CIFG

         BuildArmComputeTensor(*m_ScratchBuffer, TensorInfo({batchSize, numUnits * 3}, armnnDataType));

     }

     else

     {

         // scratch_buffer [num_units * 4, batch_size] without CIFG

         BuildArmComputeTensor(*m_ScratchBuffer, TensorInfo({batchSize, numUnits * 4}, armnnDataType));

     }


     // Need to be set at negative threshold to be compatible for ACL

     float cell_threshold       = m_Data.m_Parameters.m_ClippingThresCell;

     float projection_threshold = m_Data.m_Parameters.m_ClippingThresProj;


     // For preparing the object for the class ActivationLayerInfo, consider 5 situations

     arm_compute::ActivationLayerInfo activationLayerInfo =

         ConvertLstmActivationFuncToAclLayerInfo(m_Data.m_Parameters.m_ActivationFunc);


     for (unsigned int i = 0; i != maxTime; ++i)

     {

         // Set LSTM input and output ITensors depending on:

         // input format (timeMajor) & number of LSTM batches (maxTime).

         arm_compute::ITensor* outputLSTM;

         arm_compute::ITensor* inputLSTM;


         // If there is only one LSTM time major batch, we will not concat OR permute.

         // Set input of LSTM to be first input ITensor.

         // Set output of LSTM to be final output ITensor.

         // LSTM input/output cannot be > 2 dimensions so need to resize its TensorInfo.

         if (maxTime == 1 && m_Data.m_Parameters.m_TimeMajor)

         {

             TensorShape inputShape = GetTensorShape(input.info()->tensor_shape(), 1U);

             TensorShape outputShape = GetTensorShape((&output)->info()->tensor_shape(), 1U);


             TensorShape inputShapeShrink({inputShape[1], inputShape[2]});

             TensorShape outputShapeShrink({outputShape[1], outputShape[2]});


             auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);

             auto acl_output_shape_shrink = BuildArmComputeTensorShape(outputShapeShrink);


             input.info()->set_tensor_shape(acl_input_shape_shrink);

             inputLSTM = const_cast<arm_compute::ITensor*>(&input);


             output.info()->set_tensor_shape(acl_output_shape_shrink);

             outputLSTM = &output;

         }

         // If there is only one LSTM batch major batch, we will not concat, only permute.

         // Set input of LSTM to be output of initial permute.

         // Set output of LSTM to be first element of m_ConcatInputs & use that value later in permute.

         // LSTM output cannot be > 2 dimensions so need to resize its TensorInfo.

         else if (maxTime == 1 && !m_Data.m_Parameters.m_TimeMajor)

         {

             TensorShape inputShape = GetTensorShape(m_PermuteFirstOut.info()->tensor_shape(), 1U);

             TensorShape inputShapeShrink({inputShape[1], inputShape[2]});

             auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);

             m_PermuteFirstOut.info()->set_tensor_shape(acl_input_shape_shrink);

             inputLSTM = &m_PermuteFirstOut;


             outputLSTM = const_cast<arm_compute::ITensor*>(m_ConcatInputs[i]);

         }

         // Batch major AND/OR 2+ LSTM batches so will use concat AND/OR permute later on.

         else

         {

             inputLSTM = m_SplitterOutputs[i];

             outputLSTM = const_cast<arm_compute::ITensor*>(m_ConcatInputs[i]);

         }


         std::unique_ptr<arm_compute::NELSTMLayer> lstm_layer(new arm_compute::NELSTMLayer());

         lstm_layer->configure(inputLSTM,

                               m_InputToForgetWeightsTensor.get(),

                               m_InputToCellWeightsTensor.get(),

                               m_InputToOutputWeightsTensor.get(),

                               m_RecurrentToForgetWeightsTensor.get(),

                               m_RecurrentToCellWeightsTensor.get(),

                               m_RecurrentToOutputWeightsTensor.get(),

                               m_ForgetGateBiasTensor.get(),

                               m_CellBiasTensor.get(),

                               m_OutputGateBiasTensor.get(),

                               &output_state_in,

                               &cell_state_in,

                               m_ScratchBuffer.get(),

                               &output_state_out,

                               &cell_state_out,

                               outputLSTM,

                               lstm_param,

                               activationLayerInfo,

                               cell_threshold,

                               projection_threshold);


         m_Layers.emplace_back(std::move(lstm_layer));

     }


     armcomputetensorutils::InitialiseArmComputeTensorEmpty(*m_ScratchBuffer);


     InitializeArmComputeTensorData(*m_InputToForgetWeightsTensor, m_Data.m_InputToForgetWeights);

     InitializeArmComputeTensorData(*m_InputToCellWeightsTensor, m_Data.m_InputToCellWeights);

     InitializeArmComputeTensorData(*m_InputToOutputWeightsTensor, m_Data.m_InputToOutputWeights);

     InitializeArmComputeTensorData(*m_RecurrentToForgetWeightsTensor, m_Data.m_RecurrentToForgetWeights);

     InitializeArmComputeTensorData(*m_RecurrentToCellWeightsTensor, m_Data.m_RecurrentToCellWeights);

     InitializeArmComputeTensorData(*m_RecurrentToOutputWeightsTensor, m_Data.m_RecurrentToOutputWeights);

     InitializeArmComputeTensorData(*m_ForgetGateBiasTensor, m_Data.m_ForgetGateBias);

     InitializeArmComputeTensorData(*m_CellBiasTensor, m_Data.m_CellBias);

     InitializeArmComputeTensorData(*m_OutputGateBiasTensor, m_Data.m_OutputGateBias);


     if (!m_Data.m_Parameters.m_CifgEnabled)

     {

         InitializeArmComputeTensorData(*m_InputToInputWeightsTensor, m_Data.m_InputToInputWeights);

         InitializeArmComputeTensorData(*m_RecurrentToInputWeightsTensor, m_Data.m_RecurrentToInputWeights);

         if (m_Data.m_CellToInputWeights != nullptr)

         {

             InitializeArmComputeTensorData(*m_CellToInputWeightsTensor, m_Data.m_CellToInputWeights);

         }

         InitializeArmComputeTensorData(*m_InputGateBiasTensor, m_Data.m_InputGateBias);

     }


     if (m_Data.m_Parameters.m_ProjectionEnabled)

     {

         InitializeArmComputeTensorData(*m_ProjectionWeightsTensor, m_Data.m_ProjectionWeights);

         if (m_Data.m_ProjectionBias != nullptr)

         {

             InitializeArmComputeTensorData(*m_ProjectionBiasTensor, m_Data.m_ProjectionBias);

         }

     }


     if (m_Data.m_Parameters.m_PeepholeEnabled)

     {

         InitializeArmComputeTensorData(*m_CellToForgetWeightsTensor, m_Data.m_CellToForgetWeights);

         InitializeArmComputeTensorData(*m_CellToOutputWeightsTensor, m_Data.m_CellToOutputWeights);

     }


     if (m_Data.m_Parameters.m_LayerNormEnabled)

     {

         if (!m_Data.m_Parameters.m_CifgEnabled)

         {

             InitializeArmComputeTensorData(*m_InputLayerNormWeightsTensor, m_Data.m_InputLayerNormWeights);

         }

         InitializeArmComputeTensorData(*m_ForgetLayerNormWeightsTensor, m_Data.m_ForgetLayerNormWeights);

         InitializeArmComputeTensorData(*m_CellLayerNormWeightsTensor, m_Data.m_CellLayerNormWeights);

         InitializeArmComputeTensorData(*m_OutputLayerNormWeightsTensor, m_Data.m_OutputLayerNormWeights);

     }


     // Force Compute Library to perform the necessary copying and reshaping.

     // After which delete all the input tensors that will no longer be needed.

     for (uint32_t i = 0; i < m_Layers.size(); ++i)

     {

         m_Layers[i]->prepare();

     }


     //

     // Concat

     //


     // Expand dimensions of LSTM outputs adding one empty dimension to fit concatenate inputs.

     TensorShape shape = GetTensorShape(m_ConcatInputs[0]->info()->tensor_shape(), 1U);

     TensorShape shapeExpandTimeMajor({1, shape[0], shape[1]});

     TensorShape shapeExpandBatchMajor({shape[0], 1, shape[1]});


     if (maxTime != 1) // ACL concat does not work with only one element to concatenate.

     {

         for (unsigned int i = 0; i < maxTime; ++i)

         {

             m_ConcatInputs[i]->info()->set_tensor_shape(BuildArmComputeTensorShape(shapeExpandTimeMajor));

         }


         ConcatDescriptor  concatDescriptor(maxTime, numberDimensions);  // maxTime = num inputs (aka. number of views).

         for (unsigned int inputIdx = 0u; inputIdx < maxTime; ++inputIdx)

         {

             concatDescriptor.SetViewOriginCoord(inputIdx, dimension, inputIdx);

             concatDescriptor.SetConcatAxis(dimension);

         }


         m_Concat.reset(new arm_compute::NEConcatenateLayer());

         unsigned int aclAxisConcat = CalcAclAxis(concatDescriptor.GetNumDimensions(), concatDescriptor.GetConcatAxis());

         if (!m_Data.m_Parameters.m_TimeMajor)

         {

             TensorInfo concatOutputTensorInfo = outputInfo;

             concatOutputTensorInfo.SetShape(timeMajorShapeOutput);

             BuildArmComputeTensor(concat_out, concatOutputTensorInfo);

             armcomputetensorutils::InitialiseArmComputeTensorEmpty(concat_out);


             m_Concat->configure(m_ConcatInputs, &concat_out, aclAxisConcat);

         }

         else

         {

             m_Concat->configure(m_ConcatInputs, &output, aclAxisConcat);

         }


         m_Concat->prepare();

     }

     // If only one LSTM batch, we do not concat and/or permute.

     // Must ensure final output info is expanded to correct batch major dimensions.

     else

     {

         if (!m_Data.m_Parameters.m_TimeMajor)

         {

             output.info()->set_tensor_shape(BuildArmComputeTensorShape(shapeExpandBatchMajor));

         }

         else

         {

             output.info()->set_tensor_shape(BuildArmComputeTensorShape(shapeExpandTimeMajor));

         }

     }


     //

     // Permute: only done if input/output are in batch major format.

     //

     if (!m_Data.m_Parameters.m_TimeMajor)

     {

         // Output now time major. Permute output back to batch major.

         std::unique_ptr<arm_compute::NEPermute> layer(new arm_compute::NEPermute());

         if (maxTime != 1)

         {

             layer->configure(&concat_out, &output, arm_compute::PermutationVector(0U, 2U, 1U));

         }

         else

         {

             layer->configure(m_ConcatInputs[0], &output, arm_compute::PermutationVector(0U, 2U, 1U));

         }

         m_Permute2.reset(layer.release());

     }


     FreeUnusedTensors();

 }


 void NeonUnidirectionalSequenceLstmFloatWorkload::Execute() const

 {

     ARMNN_SCOPED_PROFILING_EVENT_NEON_NAME_GUID("NeonUnidirectionalSequenceLstmFloatWorkload_Execute");

     if (m_Permute1)

     {

         m_Permute1->run();

     }

     if (m_Splitter)

     {

         m_Splitter->run();

     }

     for (uint32_t i = 0; i < m_Layers.size(); ++i)

     {

         m_Layers[i]->run();

     }

     if (m_Concat)

     {

         m_Concat->run();

     }

     if (m_Permute2)

     {

         m_Permute2->run();

     }

 }


 arm_compute::Status

 NeonUnidirectionalSequenceLstmFloatWorkloadValidate(const TensorInfo& input,

                                                     const TensorInfo& outputStateIn,

                                                     const TensorInfo& cellStateIn,

                                                     const TensorInfo& outputStateOut,

                                                     const TensorInfo& cellStateOut,

                                                     const TensorInfo& output,

                                                     const UnidirectionalSequenceLstmDescriptor& descriptor,

                                                     const LstmInputParamsInfo& paramsInfo)

 {

     TensorShape inputLayerShape = input.GetShape();

     TensorShape outputLayerShape = output.GetShape();


     if (inputLayerShape.GetNumDimensions() != 3)

     {

         return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,

                                    "Unidirectional Sequence LSTM layer validate status failed.");

     }


     unsigned int maxTime = descriptor.m_TimeMajor ? inputLayerShape[0] : inputLayerShape[1];

     unsigned int batchSize = descriptor.m_TimeMajor ? inputLayerShape[1] : inputLayerShape[0];

     unsigned int inputSize = inputLayerShape[2];

     unsigned int outputSize = outputLayerShape[2];


     const TensorShape timeMajorShapeInput({maxTime, batchSize, inputSize});

     const TensorShape timeMajorShapeOutput({maxTime, batchSize, outputSize});


     arm_compute::Status statusPermute1 = arm_compute::Status(arm_compute::ErrorCode::OK,

                                                              "Permute1 status");

     arm_compute::Status statusSplit = arm_compute::Status(arm_compute::ErrorCode::OK,

                                                           "Split status");

     arm_compute::Status statusLSTM = arm_compute::Status(arm_compute::ErrorCode::OK,

                                                          "LSTM status");

     arm_compute::Status statusConcat = arm_compute::Status(arm_compute::ErrorCode::OK,

                                                            "Concat status");

     arm_compute::Status statusPermute2 = arm_compute::Status(arm_compute::ErrorCode::OK,

                                                              "Permute2 status");


     const arm_compute::TensorInfo aclInputInfo  = armcomputetensorutils::BuildArmComputeTensorInfo(input);

     const arm_compute::TensorInfo aclOutputInfo  = armcomputetensorutils::BuildArmComputeTensorInfo(output);


     //

     // Permute validate

     //

     TensorInfo permuteOutInfo = armnnUtils::Permuted(input, { 1U, 0U, 2U });

     arm_compute::TensorInfo aclPermuteOutInfo = armcomputetensorutils::BuildArmComputeTensorInfo(permuteOutInfo);

     if (!descriptor.m_TimeMajor)

     {

         statusPermute1 =  arm_compute::NEPermute::validate(&aclInputInfo,

                                                            &aclPermuteOutInfo,

                                                            arm_compute::PermutationVector(0U, 2U, 1U));

     }


     //

     // Split and Concat Tensors validate

     //

     std::vector<arm_compute::TensorInfo> splitterOutputsTensorInfos;

     std::vector<arm_compute::TensorInfo> concatInputsTensorInfos;

     std::vector<arm_compute::ITensorInfo*> splitterOutputsTensorInfosPtr;

     std::vector<const arm_compute::ITensorInfo*> concatInputsTensorInfosPtr;

     splitterOutputsTensorInfos.reserve(maxTime);

     concatInputsTensorInfos.reserve(maxTime);

     for (unsigned int i = 0; i < maxTime; ++i)

     {

         arm_compute::TensorInfo splitter_out;

         arm_compute::TensorInfo concat_in;


         auto splitterTensorInfo = TensorInfo(input);

         auto concatTensorInfo   = TensorInfo(output);

         splitterTensorInfo.SetShape({batchSize, inputSize});

         concatTensorInfo.SetShape({batchSize, outputSize});


         arm_compute::TensorInfo aclSplitterTensorInfo

             = armcomputetensorutils::BuildArmComputeTensorInfo(splitterTensorInfo);

         arm_compute::TensorInfo aclConcatTensorInfo

             = armcomputetensorutils::BuildArmComputeTensorInfo(concatTensorInfo);


         splitterOutputsTensorInfos.emplace_back(aclSplitterTensorInfo);

         concatInputsTensorInfos.emplace_back(aclConcatTensorInfo);

         splitterOutputsTensorInfosPtr.emplace_back(&splitterOutputsTensorInfos[i]);

         concatInputsTensorInfosPtr.emplace_back(&concatInputsTensorInfos[i]);

     }


     //

     // Split validate

     //

     unsigned int numberDimensions = 3;

     unsigned int dimension = 0; // splitting on 0-dimension (i.e. maxTime dimension)

     unsigned int aclAxisSplit = CalcAclAxis(numberDimensions, dimension);


     if (maxTime != 1) // ACL split does not work with only one element to split.

     {

         if (!descriptor.m_TimeMajor)

         {

             statusSplit = arm_compute::NESplit::validate(&aclPermuteOutInfo,

                                                          splitterOutputsTensorInfosPtr,

                                                          aclAxisSplit);

         }

         else

         {

             statusSplit = arm_compute::NESplit::validate(&aclInputInfo, splitterOutputsTensorInfosPtr, aclAxisSplit);

         }

     }


     //

     // LSTM validate

     //


     arm_compute::LSTMParams<arm_compute::ITensorInfo> lstm_params_info;


     unsigned int numUnits = cellStateIn.GetShape()[1];

     unsigned int scratchBufferFactor = 4;


     if (descriptor.m_CifgEnabled)

     {

         // scratchBuffer = { batchSize, numUnits * 3 } with CIFG

        scratchBufferFactor = 3;

     }


     const TensorInfo& scratchBuffer = TensorInfo({ batchSize, numUnits * scratchBufferFactor }, input.GetDataType());


     // The inputs and outputs

     const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);

     const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);

     const arm_compute::TensorInfo aclScratchBufferInfo = BuildArmComputeTensorInfo(scratchBuffer);

     const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);

     const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);


     // Basic parameters

     const arm_compute::TensorInfo aclInputToForgetWeightsInfo

                                       = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());

     const arm_compute::TensorInfo aclInputToCellWeightsInfo

                                       = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());

     const arm_compute::TensorInfo aclInputToOutputWeightsInfo

                                       = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());

     const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo

                                       = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());

     const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo

                                       = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());

     const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo

                                       = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());

     const arm_compute::TensorInfo aclForgetGateBiasInfo

                                       = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());

     const arm_compute::TensorInfo aclCellBiasInfo

                                       = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());

     const arm_compute::TensorInfo aclOutputGateBiasInfo

                                       = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());


     arm_compute::TensorInfo aclInputToInputWeightsInfo;

     arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;

     arm_compute::TensorInfo aclCellToInputWeightsInfo;

     arm_compute::TensorInfo aclInputGateBiasInfo;

     arm_compute::TensorInfo aclProjectionWeightsInfo;

     arm_compute::TensorInfo aclProjectionBiasInfo;

     arm_compute::TensorInfo aclCellToForgetWeightsInfo;

     arm_compute::TensorInfo aclCellToOutputWeightsInfo;


     arm_compute::TensorInfo aclInputLayerNormWeightsInfo;

     arm_compute::TensorInfo aclForgetLayerNormWeightsInfo;

     arm_compute::TensorInfo aclCellLayerNormWeightsInfo;

     arm_compute::TensorInfo aclOutputLayerNormWeightsInfo;


     if (!descriptor.m_CifgEnabled)

     {

         if (descriptor.m_PeepholeEnabled)

         {

             aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights());

         }

         aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());

         aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());

         aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());


         lstm_params_info.set_cifg_params(&aclInputToInputWeightsInfo,

                                          &aclRecurrentToInputWeightsInfo,

                                          descriptor.m_PeepholeEnabled ? &aclCellToInputWeightsInfo : nullptr,

                                          &aclInputGateBiasInfo);

     }


     if (descriptor.m_ProjectionEnabled)

     {

         if (paramsInfo.m_ProjectionBias != nullptr)

         {

             aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias());

         }

         aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights());


         lstm_params_info.set_projection_params(&aclProjectionWeightsInfo,

                                                paramsInfo.m_ProjectionBias ? &aclProjectionBiasInfo : nullptr);

     }


     if (descriptor.m_PeepholeEnabled)

     {

         aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights());

         aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights());


         lstm_params_info.set_peephole_params(&aclCellToForgetWeightsInfo, &aclCellToOutputWeightsInfo);

     }


     if (descriptor.m_LayerNormEnabled)

     {

         if (!descriptor.m_CifgEnabled)

         {

             aclInputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights());

         }

         aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights());

         aclCellLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights());

         aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());


         lstm_params_info.set_layer_normalization_params(descriptor.m_CifgEnabled ? nullptr :

                                                         &aclInputLayerNormWeightsInfo,

                                                         &aclForgetLayerNormWeightsInfo,

                                                         &aclCellLayerNormWeightsInfo,

                                                         &aclOutputLayerNormWeightsInfo);

     }


     // Need to be set at negative threshold to be compatible for ACL

     float cell_threshold = descriptor.m_ClippingThresCell;

     float projection_threshold = descriptor.m_ClippingThresProj;


     arm_compute::ActivationLayerInfo activationLayerInfo =

         ConvertLstmActivationFuncToAclLayerInfo(descriptor.m_ActivationFunc);


     for (unsigned int i = 0; i != maxTime; ++i)

     {


         // Set LSTM input and output ITensors depending on:

         // input format (timeMajor) & number of LSTM batches (maxTime).

         arm_compute::ITensorInfo* outputLSTM;

         arm_compute::ITensorInfo* inputLSTM;


         // If there is only one LSTM time major batch, we will not concat OR permute.

         // Set input of LSTM to be first input ITensor.

         // Set output of LSTM to be final output ITensor.

         // LSTM input/output cannot be > 2 dimensions so need to resize its TensorInfo.

         if (maxTime == 1 && descriptor.m_TimeMajor)

         {

             TensorShape inputShape = GetTensorShape(aclInputInfo.tensor_shape(), 1U);

             TensorShape outputShape = GetTensorShape(aclOutputInfo.tensor_shape(), 1U);


             TensorShape inputShapeShrink({inputShape[1], inputShape[2]});

             TensorShape outputShapeShrink({outputShape[1], outputShape[2]});


             auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);

             auto acl_output_shape_shrink = BuildArmComputeTensorShape(outputShapeShrink);


             const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(acl_input_shape_shrink);

             inputLSTM = const_cast<arm_compute::TensorInfo*>(&aclInputInfo);


             const_cast<arm_compute::TensorInfo*>(&aclOutputInfo)->set_tensor_shape(acl_output_shape_shrink);

             outputLSTM = const_cast<arm_compute::TensorInfo*>(&aclOutputInfo);

         }

         // If there is only one LSTM batch major batch, we will not concat, only permute.

         // Set input of LSTM to be output of initial permute.

         // Set output of LSTM to be first element of m_ConcatInputs & use that value later in permute.

         // LSTM output cannot be > 2 dimensions so need to resize its TensorInfo.

         else if (maxTime == 1 && !descriptor.m_TimeMajor)

         {

             TensorShape inputShape = GetTensorShape(aclPermuteOutInfo.tensor_shape(), 1U);

             TensorShape inputShapeShrink({inputShape[1], inputShape[2]});

             auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);

             aclPermuteOutInfo.set_tensor_shape(acl_input_shape_shrink);

             inputLSTM = &aclPermuteOutInfo;


             outputLSTM = const_cast<arm_compute::ITensorInfo*>(concatInputsTensorInfosPtr[i]);

         }

         // Batch major AND/OR 2+ LSTM batches so will use concat AND/OR permute later on.

         else

         {

             inputLSTM = splitterOutputsTensorInfosPtr[i];

             outputLSTM = const_cast<arm_compute::ITensorInfo*>(concatInputsTensorInfosPtr[i]);

         }


         statusLSTM = arm_compute::NELSTMLayer::validate(inputLSTM,

                                                         &aclInputToForgetWeightsInfo,

                                                         &aclInputToCellWeightsInfo,

                                                         &aclInputToOutputWeightsInfo,

                                                         &aclRecurrentToForgetWeightsInfo,

                                                         &aclRecurrentToCellWeightsInfo,

                                                         &aclRecurrentToOutputWeightsInfo,

                                                         &aclForgetGateBiasInfo,

                                                         &aclCellBiasInfo,

                                                         &aclOutputGateBiasInfo,

                                                         &aclOutputStateInInfo,

                                                         &aclCellStateInInfo,

                                                         &aclScratchBufferInfo,

                                                         &aclOutputStateOutInfo,

                                                         &aclCellStateOutInfo,

                                                         outputLSTM,

                                                         lstm_params_info,

                                                         activationLayerInfo,

                                                         cell_threshold,

                                                         projection_threshold);


         if (statusLSTM.error_code() != arm_compute::ErrorCode::OK)

         {

             break;

         }

     }


     //

     // Concat validate

     //


     // Expand dimensions of LSTM outputs adding one empty dimension to fit concatenate inputs.

     TensorShape shape = GetTensorShape(concatInputsTensorInfosPtr[0]->tensor_shape(), 1U);

     TensorShape shapeExpandTimeMajor({1, shape[0], shape[1]});

     TensorShape shapeExpandBatchMajor({shape[0], 1, shape[1]});


     TensorInfo concatOutputTensorInfo = TensorInfo(output);

     concatOutputTensorInfo.SetShape(timeMajorShapeOutput);

     arm_compute::TensorInfo aclConcatOutputTensorInfo= BuildArmComputeTensorInfo(concatOutputTensorInfo);


     if (maxTime != 1) // ACL concat does not work with only one element to concatenate.

     {

         for (unsigned int i = 0; i < maxTime; ++i)

         {

             auto acl_shape_expand = BuildArmComputeTensorShape(shapeExpandTimeMajor);

             concatInputsTensorInfos[i].set_tensor_shape(acl_shape_expand);

         }


         unsigned int aclAxisConcat = CalcAclAxis(numberDimensions, dimension);

         if (!descriptor.m_TimeMajor)

         {

             statusConcat = arm_compute::NEConcatenateLayer::validate(concatInputsTensorInfosPtr,

                                                                      &aclConcatOutputTensorInfo,

                                                                      aclAxisConcat);

         }

         else

         {

             statusConcat = arm_compute::NEConcatenateLayer::validate(concatInputsTensorInfosPtr,

                                                                      &aclOutputInfo,

                                                                      aclAxisConcat);

         }

     }

     // If only one LSTM batch, we do not concat and/or permute.

     // Must ensure final output info is expanded to correct batch major dimensions.

     else

     {

         if (!descriptor.m_TimeMajor)

         {

             const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(

                 BuildArmComputeTensorShape(shapeExpandBatchMajor));

         }

         else

         {

             const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(

                 BuildArmComputeTensorShape(shapeExpandTimeMajor));

         }

     }


     //

     // Permute validate

     //

     if (!descriptor.m_TimeMajor)

     {

         // Output now time major. Permute output back to batch major.

         if (maxTime != 1)

         {

             statusPermute2 = arm_compute::NEPermute::validate(&aclConcatOutputTensorInfo,

                                                               &aclOutputInfo,

                                                               arm_compute::PermutationVector(0U, 2U, 1U));

         }

         else

         {

             statusPermute2 = arm_compute::NEPermute::validate(concatInputsTensorInfosPtr[0],

                                                               &aclOutputInfo,

                                                               arm_compute::PermutationVector(0U, 2U, 1U));

         }

     }


     auto okCode = arm_compute::ErrorCode::OK;

     if (statusPermute1.error_code() == okCode &&

         statusSplit.error_code()    == okCode &&

         statusLSTM .error_code()    == okCode &&

         statusConcat.error_code()   == okCode &&

         statusPermute2.error_code() == okCode)

     {

         return arm_compute::Status(arm_compute::ErrorCode::OK,

                                    "All Unidirectional Sequence LSTM layer validate status OK.");

     }

     else

     {

         return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,

                                    "Unidirectional Sequence LSTM layer validate status failed.");

     }

 }


 void NeonUnidirectionalSequenceLstmFloatWorkload::FreeUnusedTensors()

 {

     FreeTensorIfUnused(m_InputToInputWeightsTensor);

     FreeTensorIfUnused(m_InputToForgetWeightsTensor);

     FreeTensorIfUnused(m_InputToCellWeightsTensor);

     FreeTensorIfUnused(m_InputToOutputWeightsTensor);

     FreeTensorIfUnused(m_RecurrentToInputWeightsTensor);

     FreeTensorIfUnused(m_RecurrentToForgetWeightsTensor);

     FreeTensorIfUnused(m_RecurrentToCellWeightsTensor);

     FreeTensorIfUnused(m_RecurrentToOutputWeightsTensor);

     FreeTensorIfUnused(m_CellToInputWeightsTensor);

     FreeTensorIfUnused(m_CellToForgetWeightsTensor);

     FreeTensorIfUnused(m_CellToOutputWeightsTensor);

     FreeTensorIfUnused(m_InputGateBiasTensor);

     FreeTensorIfUnused(m_ForgetGateBiasTensor);

     FreeTensorIfUnused(m_CellBiasTensor);

     FreeTensorIfUnused(m_OutputGateBiasTensor);

     FreeTensorIfUnused(m_ProjectionWeightsTensor);

     FreeTensorIfUnused(m_ProjectionBiasTensor);

     FreeTensorIfUnused(m_InputLayerNormWeightsTensor);

     FreeTensorIfUnused(m_ForgetLayerNormWeightsTensor);

     FreeTensorIfUnused(m_CellLayerNormWeightsTensor);

     FreeTensorIfUnused(m_OutputLayerNormWeightsTensor);

     FreeTensorIfUnused(m_ScratchBuffer);

 }


 } //namespace armnn

ArmComputeTensorUtils.hpp

ArmComputeUtils.hpp

NeonTensorHandle.hpp

NeonUnidirectionalSequenceLstmFloatWorkload.hpp

NeonWorkloadUtils.hpp

ARMNN_SCOPED_PROFILING_EVENT_NEON_NAME_GUID
#define ARMNN_SCOPED_PROFILING_EVENT_NEON_NAME_GUID(label)
Creates a profiling event that uses GetGuid() and GetName() from the calling class.
Definition: NeonWorkloadUtils.hpp:33

NumericCast.hpp

Permute.hpp

ARMNN_REPORT_PROFILING_WORKLOAD_DESC
#define ARMNN_REPORT_PROFILING_WORKLOAD_DESC(name, desc, infos, guid)
Definition: Profiling.hpp:227

WorkloadUtils.hpp

armnn::BaseWorkload::GetGuid
arm::pipe::ProfilingGuid GetGuid() const final
Definition: Workload.hpp:52

armnn::BaseWorkload::m_Data
QueueDescriptor m_Data
Definition: Workload.hpp:74

armnn::IAclTensorHandle
Definition: ArmComputeTensorHandle.hpp:17

armnn::NeonUnidirectionalSequenceLstmFloatWorkload::NeonUnidirectionalSequenceLstmFloatWorkload
NeonUnidirectionalSequenceLstmFloatWorkload(const UnidirectionalSequenceLstmQueueDescriptor &descriptor, const WorkloadInfo &info)
Definition: NeonUnidirectionalSequenceLstmFloatWorkload.cpp:32

armnn::NeonUnidirectionalSequenceLstmFloatWorkload::Execute
virtual void Execute() const override
Definition: NeonUnidirectionalSequenceLstmFloatWorkload.cpp:484

armnn::TensorInfo
Definition: Tensor.hpp:153

armnn::TensorInfo::GetShape
const TensorShape & GetShape() const
Definition: Tensor.hpp:193

armnn::TensorInfo::SetShape
void SetShape(const TensorShape &newShape)
Definition: Tensor.hpp:195

armnn::TensorInfo::GetDataType
DataType GetDataType() const
Definition: Tensor.hpp:200

armnn::TensorShape
Definition: Tensor.hpp:21

armnn::TensorShape::GetNumDimensions
unsigned int GetNumDimensions() const
Function that returns the tensor rank.
Definition: Tensor.cpp:174

armnn::TypedWorkload
Definition: Workload.hpp:82

armnn
Copyright (c) 2021 ARM Limited and Contributors.
Definition: 01_00_quick_start.dox:7

armnn::ComputeSplitAxis
std::set< unsigned int > ComputeSplitAxis(const armnn::SplitterDescriptor &desc, const TensorShape &input)
Calculates the axis values for split operation.
Definition: WorkloadUtils.cpp:377

armnn::BoostLogSeverityMapping::info
@ info

armnn::InitializeArmComputeTensorData
void InitializeArmComputeTensorData(arm_compute::Tensor &tensor, TensorInfo tensorInfo, const ITensorHandle *handle)
Definition: NeonWorkloadUtils.hpp:69

armnn::Status
Status
enumeration
Definition: Types.hpp:43

armnn::NeonUnidirectionalSequenceLstmFloatWorkloadValidate
arm_compute::Status NeonUnidirectionalSequenceLstmFloatWorkloadValidate(const TensorInfo &input, const TensorInfo &outputStateIn, const TensorInfo &cellStateIn, const TensorInfo &outputStateOut, const TensorInfo &cellStateOut, const TensorInfo &output, const UnidirectionalSequenceLstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo)
Definition: NeonUnidirectionalSequenceLstmFloatWorkload.cpp:510

armnn::ConvertLstmActivationFuncToAclLayerInfo
arm_compute::ActivationLayerInfo ConvertLstmActivationFuncToAclLayerInfo(uint32_t activationFunction)
Definition: ArmComputeUtils.hpp:118

armnn::DataType
DataType
Definition: Types.hpp:49

armnnUtils::GetTensorShape
armnn::TensorShape GetTensorShape(unsigned int numberOfBatches, unsigned int numberOfChannels, unsigned int height, unsigned int width, const armnn::DataLayout dataLayout)
Definition: TensorUtils.cpp:21

armnnUtils::Permuted
armnn::TensorShape Permuted(const armnn::TensorShape &srcShape, const armnn::PermutationVector &mappings)
Definition: Permute.cpp:125

armnn::LstmDescriptor
An LstmDescriptor for the LstmLayer.
Definition: Descriptors.hpp:1103

armnn::LstmDescriptor::m_PeepholeEnabled
bool m_PeepholeEnabled
Enable/disable peephole.
Definition: Descriptors.hpp:1148

armnn::LstmDescriptor::m_TimeMajor
bool m_TimeMajor
Enable/disable time major.
Definition: Descriptors.hpp:1154

armnn::LstmDescriptor::m_LayerNormEnabled
bool m_LayerNormEnabled
Enable/disable layer normalization.
Definition: Descriptors.hpp:1152

armnn::LstmDescriptor::m_ClippingThresCell
float m_ClippingThresCell
Clipping threshold value for the cell state.
Definition: Descriptors.hpp:1142

armnn::LstmDescriptor::m_ProjectionEnabled
bool m_ProjectionEnabled
Enable/disable the projection layer.
Definition: Descriptors.hpp:1150

armnn::LstmDescriptor::m_ClippingThresProj
float m_ClippingThresProj
Clipping threshold value for the projection.
Definition: Descriptors.hpp:1144

armnn::LstmDescriptor::m_CifgEnabled
bool m_CifgEnabled
Enable/disable cifg (coupled input & forget gate).
Definition: Descriptors.hpp:1146

armnn::LstmDescriptor::m_ActivationFunc
uint32_t m_ActivationFunc
The activation function to use.
Definition: Descriptors.hpp:1140

armnn::LstmInputParamsInfo
Definition: LstmParams.hpp:64

armnn::LstmInputParamsInfo::GetOutputLayerNormWeights
const TensorInfo & GetOutputLayerNormWeights() const
Definition: LstmParams.hpp:201

armnn::LstmInputParamsInfo::GetCellToForgetWeights
const TensorInfo & GetCellToForgetWeights() const
Definition: LstmParams.hpp:157

armnn::LstmInputParamsInfo::GetProjectionWeights
const TensorInfo & GetProjectionWeights() const
Definition: LstmParams.hpp:181

armnn::LstmInputParamsInfo::GetCellToOutputWeights
const TensorInfo & GetCellToOutputWeights() const
Definition: LstmParams.hpp:161

armnn::LstmInputParamsInfo::GetCellToInputWeights
const TensorInfo & GetCellToInputWeights() const
Definition: LstmParams.hpp:153

armnn::LstmInputParamsInfo::GetInputToCellWeights
const TensorInfo & GetInputToCellWeights() const
Definition: LstmParams.hpp:129

armnn::LstmInputParamsInfo::GetInputLayerNormWeights
const TensorInfo & GetInputLayerNormWeights() const
Definition: LstmParams.hpp:189

armnn::LstmInputParamsInfo::GetRecurrentToForgetWeights
const TensorInfo & GetRecurrentToForgetWeights() const
Definition: LstmParams.hpp:141

armnn::LstmInputParamsInfo::GetInputToForgetWeights
const TensorInfo & GetInputToForgetWeights() const
Definition: LstmParams.hpp:125

armnn::LstmInputParamsInfo::GetInputToOutputWeights
const TensorInfo & GetInputToOutputWeights() const
Definition: LstmParams.hpp:133

armnn::LstmInputParamsInfo::GetProjectionBias
const TensorInfo & GetProjectionBias() const
Definition: LstmParams.hpp:185

armnn::LstmInputParamsInfo::GetCellLayerNormWeights
const TensorInfo & GetCellLayerNormWeights() const
Definition: LstmParams.hpp:197

armnn::LstmInputParamsInfo::GetForgetLayerNormWeights
const TensorInfo & GetForgetLayerNormWeights() const
Definition: LstmParams.hpp:193

armnn::LstmInputParamsInfo::GetForgetGateBias
const TensorInfo & GetForgetGateBias() const
Definition: LstmParams.hpp:169

armnn::LstmInputParamsInfo::GetRecurrentToInputWeights
const TensorInfo & GetRecurrentToInputWeights() const
Definition: LstmParams.hpp:137

armnn::LstmInputParamsInfo::GetCellBias
const TensorInfo & GetCellBias() const
Definition: LstmParams.hpp:173

armnn::LstmInputParamsInfo::GetOutputGateBias
const TensorInfo & GetOutputGateBias() const
Definition: LstmParams.hpp:177

armnn::LstmInputParamsInfo::GetInputGateBias
const TensorInfo & GetInputGateBias() const
Definition: LstmParams.hpp:165

armnn::LstmInputParamsInfo::m_ProjectionBias
const TensorInfo * m_ProjectionBias
Definition: LstmParams.hpp:105

armnn::LstmInputParamsInfo::GetRecurrentToCellWeights
const TensorInfo & GetRecurrentToCellWeights() const
Definition: LstmParams.hpp:145

armnn::LstmInputParamsInfo::GetInputToInputWeights
const TensorInfo & GetInputToInputWeights() const
Definition: LstmParams.hpp:121

armnn::LstmInputParamsInfo::GetRecurrentToOutputWeights
const TensorInfo & GetRecurrentToOutputWeights() const
Definition: LstmParams.hpp:149

armnn::OriginsDescriptor
An OriginsDescriptor for the ConcatLayer.
Definition: Descriptors.hpp:202

armnn::OriginsDescriptor::SetViewOriginCoord
Status SetViewOriginCoord(uint32_t view, uint32_t coord, uint32_t value)
@Brief Set the view origin coordinates.
Definition: Descriptors.cpp:167

armnn::OriginsDescriptor::GetConcatAxis
unsigned int GetConcatAxis() const
Get the concatenation axis value.
Definition: Descriptors.cpp:162

armnn::OriginsDescriptor::SetConcatAxis
void SetConcatAxis(unsigned int concatAxis)
Set the concatenation axis value.
Definition: Descriptors.cpp:158

armnn::OriginsDescriptor::GetNumDimensions
uint32_t GetNumDimensions() const
Get the number of dimensions.
Definition: Descriptors.cpp:192

armnn::QueueDescriptor::m_Inputs
std::vector< ITensorHandle * > m_Inputs
Definition: WorkloadData.hpp:26

armnn::QueueDescriptor::m_Outputs
std::vector< ITensorHandle * > m_Outputs
Definition: WorkloadData.hpp:27

armnn::QueueDescriptorWithParameters::m_Parameters
LayerDescriptor m_Parameters
Definition: WorkloadData.hpp:66

armnn::UnidirectionalSequenceLstmQueueDescriptor
Definition: WorkloadData.hpp:697

armnn::ViewsDescriptor
A ViewsDescriptor for the SplitterLayer.
Definition: Descriptors.hpp:245

armnn::ViewsDescriptor::SetViewOriginCoord
Status SetViewOriginCoord(uint32_t view, uint32_t coord, uint32_t value)
@Brief Set the view origin coordinates.
Definition: Descriptors.cpp:317

armnn::ViewsDescriptor::GetNumDimensions
uint32_t GetNumDimensions() const
Get the number of dimensions.
Definition: Descriptors.cpp:307

armnn::ViewsDescriptor::SetViewSize
Status SetViewSize(uint32_t view, uint32_t coord, uint32_t value)
Set the size of the views.
Definition: Descriptors.cpp:322

armnn::WorkloadInfo
Contains information about TensorInfos of a layer.
Definition: WorkloadInfo.hpp:17