ArmNN
 24.08
ClUnidirectionalSequenceLstmFloatWorkload.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
7 #include "ClWorkloadUtils.hpp"
8 
11 
13 #include <armnnUtils/Permute.hpp>
14 #include <cl/test/ClWorkloadFactoryHelper.hpp>
16 
17 #include "cl/ClTensorHandle.hpp"
18 
19 namespace
20 {
21 unsigned int CalcAclAxis(unsigned int numDimensions, unsigned int axis)
22 {
23  return (numDimensions - axis) - 1;
24 }
25 } //namespace
26 
27 namespace armnn
28 {
29 using namespace armcomputetensorutils;
30 
33  const WorkloadInfo& info,
34  const arm_compute::CLCompileContext& clCompileContext)
36 {
37  // Report Profiling Details
38  ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClUnidirectionalSequenceLstmFloatWorkload_Construct",
39  descriptor.m_Parameters,
40  info,
41  GetGuid());
42 
43  const arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
44  arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[2])->GetTensor();
45 
46  TensorInfo inputInfo = info.m_InputTensorInfos[0];
47  TensorInfo outputInfo = info.m_OutputTensorInfos[2];
48 
49  arm_compute::DataType armComputeDataType = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetDataType();
50  armnn::DataType armnnDataType = GetArmNNDataType(armComputeDataType);
51 
52  TensorShape inputLayerShape = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetShape();
53  TensorShape cellStateLayerShape = static_cast<IClTensorHandle*>(m_Data.m_Inputs[2])->GetShape();
54  TensorShape outputLayerShape = static_cast<IClTensorHandle*>(m_Data.m_Outputs[2])->GetShape();
55 
56  unsigned int maxTime = m_Data.m_Parameters.m_TimeMajor ? inputLayerShape[0] : inputLayerShape[1];
57  unsigned int batchSize = m_Data.m_Parameters.m_TimeMajor ? inputLayerShape[1] : inputLayerShape[0];
58  unsigned int inputSize = inputLayerShape[2];
59  unsigned int outputSize = outputLayerShape[2];
60  unsigned int numUnits = cellStateLayerShape[1];
61 
62  const TensorShape timeMajorShapeInput({maxTime, batchSize, inputSize});
63  const TensorShape timeMajorShapeOutput({maxTime, batchSize, outputSize});
64 
65  //
66  // Permute: performed if Unidirectional Sequence Layer inputs/outputs are in batch major format.
67  //
68  if (!m_Data.m_Parameters.m_TimeMajor)
69  {
70  std::unique_ptr<arm_compute::CLPermute> layer(new arm_compute::CLPermute());
71 
72  TensorInfo permuteOutInfo = inputInfo;
73  permuteOutInfo.SetShape(timeMajorShapeInput);
74  BuildArmComputeTensor(m_PermuteFirstOut, permuteOutInfo);
75  armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_PermuteFirstOut);
76 
77  // Permute to time major format.
78  layer->configure(clCompileContext, &input, &m_PermuteFirstOut, arm_compute::PermutationVector(0U,2U,1U));
79  m_Permute1.reset(layer.release());
80  }
81 
82  //
83  // Split and Concat Tensors
84  //
85  for (unsigned int i = 0; i < maxTime; ++i)
86  {
87  arm_compute::CLTensor splitter_out;
88  arm_compute::CLTensor concat_in;
89 
90  auto splitterTensorInfo = inputInfo;
91  auto concatTensorInfo = outputInfo;
92  splitterTensorInfo.SetShape({batchSize, inputSize});
93  concatTensorInfo.SetShape({batchSize, outputSize});
94  BuildArmComputeTensor(splitter_out, splitterTensorInfo);
95  BuildArmComputeTensor(concat_in, concatTensorInfo);
96 
97  armcomputetensorutils::InitialiseArmComputeTensorEmpty(splitter_out);
98  armcomputetensorutils::InitialiseArmComputeTensorEmpty(concat_in);
99 
100  // append to std::vector<arm_compute::CLTensor>
101  m_SplitterOutputsTensors.push_back(std::move(splitter_out));
102  m_ConcatInputsTensors.push_back(std::move(concat_in));
103  }
104 
105  for (unsigned int i = 0; i < maxTime; ++i)
106  {
107  // append to std::vector<arm_compute::ICLTensor*>
108  m_SplitterOutputs.push_back(&m_SplitterOutputsTensors[i]);
109  m_ConcatInputs.push_back(&m_ConcatInputsTensors[i]);
110  }
111 
112  //
113  // Split
114  //
115  unsigned int numberDimensions = 3;
116  unsigned int dimension = 0; // splitting on 0-dimension (i.e. maxTime dimension)
117 
118  if (maxTime != 1) // ACL split does not work with only one element to split.
119  {
120  ViewsDescriptor splitterDesc(maxTime, numberDimensions);
121  unsigned int splitterDimSizes[3] = {1, batchSize, inputSize};
122  for (unsigned int outputIdx = 0u; outputIdx < maxTime; ++outputIdx)
123  {
124  splitterDesc.SetViewOriginCoord(outputIdx, dimension, splitterDimSizes[dimension] * outputIdx);
125  for (unsigned int dimIdx = 0u; dimIdx < numberDimensions; ++dimIdx)
126  {
127  splitterDesc.SetViewSize(outputIdx, dimIdx, splitterDimSizes[dimIdx]);
128  }
129  }
130 
131  std::set<unsigned int> splitAxis = ComputeSplitAxis(splitterDesc, timeMajorShapeInput);
132 
133  std::unique_ptr<arm_compute::CLSplit> split_layer(new arm_compute::CLSplit());
134  unsigned int aclAxisSplit = CalcAclAxis(splitterDesc.GetNumDimensions(), *splitAxis.begin());
135  if (!m_Data.m_Parameters.m_TimeMajor)
136  {
137  split_layer->configure(&m_PermuteFirstOut, m_SplitterOutputs, aclAxisSplit);
138  }
139  else
140  {
141  split_layer->configure(&input, m_SplitterOutputs, aclAxisSplit);
142  }
143 
144  split_layer->prepare();
145  m_Splitter.reset(split_layer.release());
146  }
147 
148  //
149  // Lstm
150  //
151  arm_compute::LSTMParams<arm_compute::ICLTensor> lstm_param;
152 
153  m_InputToForgetWeightsTensor = std::make_unique<arm_compute::CLTensor>();
154  BuildArmComputeTensor(*m_InputToForgetWeightsTensor, m_Data.m_InputToForgetWeights->GetTensorInfo());
155 
156  m_InputToCellWeightsTensor = std::make_unique<arm_compute::CLTensor>();
157  BuildArmComputeTensor(*m_InputToCellWeightsTensor, m_Data.m_InputToCellWeights->GetTensorInfo());
158 
159  m_InputToOutputWeightsTensor = std::make_unique<arm_compute::CLTensor>();
160  BuildArmComputeTensor(*m_InputToOutputWeightsTensor, m_Data.m_InputToOutputWeights->GetTensorInfo());
161 
162  m_RecurrentToForgetWeightsTensor = std::make_unique<arm_compute::CLTensor>();
163  BuildArmComputeTensor(*m_RecurrentToForgetWeightsTensor, m_Data.m_RecurrentToForgetWeights->GetTensorInfo());
164 
165  m_RecurrentToCellWeightsTensor = std::make_unique<arm_compute::CLTensor>();
166  BuildArmComputeTensor(*m_RecurrentToCellWeightsTensor, m_Data.m_RecurrentToCellWeights->GetTensorInfo());
167 
168  m_RecurrentToOutputWeightsTensor = std::make_unique<arm_compute::CLTensor>();
169  BuildArmComputeTensor(*m_RecurrentToOutputWeightsTensor, m_Data.m_RecurrentToOutputWeights->GetTensorInfo());
170 
171  m_ForgetGateBiasTensor = std::make_unique<arm_compute::CLTensor>();
172  BuildArmComputeTensor(*m_ForgetGateBiasTensor, m_Data.m_ForgetGateBias->GetTensorInfo());
173 
174  m_CellBiasTensor = std::make_unique<arm_compute::CLTensor>();
175  BuildArmComputeTensor(*m_CellBiasTensor, m_Data.m_CellBias->GetTensorInfo());
176 
177  m_OutputGateBiasTensor = std::make_unique<arm_compute::CLTensor>();
178  BuildArmComputeTensor(*m_OutputGateBiasTensor, m_Data.m_OutputGateBias->GetTensorInfo());
179 
180  // for future reference: check the AndroidNN API for the logic here
181  if (!m_Data.m_Parameters.m_CifgEnabled)
182  {
183  m_InputToInputWeightsTensor = std::make_unique<arm_compute::CLTensor>();
184  BuildArmComputeTensor(*m_InputToInputWeightsTensor, m_Data.m_InputToInputWeights->GetTensorInfo());
185 
186  m_RecurrentToInputWeightsTensor = std::make_unique<arm_compute::CLTensor>();
187  BuildArmComputeTensor(*m_RecurrentToInputWeightsTensor, m_Data.m_RecurrentToInputWeights->GetTensorInfo());
188 
189  m_CellToInputWeightsTensor = std::make_unique<arm_compute::CLTensor>();
190  if (m_Data.m_CellToInputWeights != nullptr)
191  {
192  BuildArmComputeTensor(*m_CellToInputWeightsTensor, m_Data.m_CellToInputWeights->GetTensorInfo());
193  }
194 
195  m_InputGateBiasTensor = std::make_unique<arm_compute::CLTensor>();
196  BuildArmComputeTensor(*m_InputGateBiasTensor, m_Data.m_InputGateBias->GetTensorInfo());
197 
198  lstm_param.set_cifg_params(m_InputToInputWeightsTensor.get(),
199  m_RecurrentToInputWeightsTensor.get(),
200  m_Data.m_CellToInputWeights ? m_CellToInputWeightsTensor.get() : nullptr,
201  m_InputGateBiasTensor.get());
202  }
203 
204  if (m_Data.m_Parameters.m_ProjectionEnabled)
205  {
206  m_ProjectionWeightsTensor = std::make_unique<arm_compute::CLTensor>();
207  BuildArmComputeTensor(*m_ProjectionWeightsTensor, m_Data.m_ProjectionWeights->GetTensorInfo());
208 
209  m_ProjectionBiasTensor = std::make_unique<arm_compute::CLTensor>();
210  if (m_Data.m_ProjectionBias != nullptr)
211  {
212  BuildArmComputeTensor(*m_ProjectionBiasTensor, m_Data.m_ProjectionBias->GetTensorInfo());
213  }
214 
215  lstm_param.set_projection_params(m_ProjectionWeightsTensor.get(),
216  m_Data.m_ProjectionBias ? m_ProjectionBiasTensor.get() : nullptr);
217  }
218 
219  if (m_Data.m_Parameters.m_PeepholeEnabled)
220  {
221  m_CellToForgetWeightsTensor = std::make_unique<arm_compute::CLTensor>();
222  BuildArmComputeTensor(*m_CellToForgetWeightsTensor, m_Data.m_CellToForgetWeights->GetTensorInfo());
223 
224  m_CellToOutputWeightsTensor = std::make_unique<arm_compute::CLTensor>();
225  BuildArmComputeTensor(*m_CellToOutputWeightsTensor, m_Data.m_CellToOutputWeights->GetTensorInfo());
226 
227  lstm_param.set_peephole_params(m_CellToForgetWeightsTensor.get(), m_CellToOutputWeightsTensor.get());
228  }
229 
230  if (m_Data.m_Parameters.m_LayerNormEnabled)
231  {
232  m_InputLayerNormWeightsTensor = std::make_unique<arm_compute::CLTensor>();
233  if (!m_Data.m_Parameters.m_CifgEnabled)
234  {
235  BuildArmComputeTensor(*m_InputLayerNormWeightsTensor, m_Data.m_InputLayerNormWeights->GetTensorInfo());
236  }
237 
238  m_ForgetLayerNormWeightsTensor = std::make_unique<arm_compute::CLTensor>();
239  BuildArmComputeTensor(*m_ForgetLayerNormWeightsTensor, m_Data.m_ForgetLayerNormWeights->GetTensorInfo());
240 
241  m_CellLayerNormWeightsTensor = std::make_unique<arm_compute::CLTensor>();
242  BuildArmComputeTensor(*m_CellLayerNormWeightsTensor, m_Data.m_CellLayerNormWeights->GetTensorInfo());
243 
244  m_OutputLayerNormWeightsTensor = std::make_unique<arm_compute::CLTensor>();
245  BuildArmComputeTensor(*m_OutputLayerNormWeightsTensor, m_Data.m_OutputLayerNormWeights->GetTensorInfo());
246 
247  auto inputNormWeightTensor = m_Data.m_Parameters.m_CifgEnabled ? nullptr : m_InputLayerNormWeightsTensor.get();
248  lstm_param.set_layer_normalization_params(inputNormWeightTensor,
249  m_ForgetLayerNormWeightsTensor.get(),
250  m_CellLayerNormWeightsTensor.get(),
251  m_OutputLayerNormWeightsTensor.get());
252  }
253 
254  arm_compute::ICLTensor& output_state_in = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
255  arm_compute::ICLTensor& cell_state_in = static_cast<IClTensorHandle*>(m_Data.m_Inputs[2])->GetTensor();
256 
257  arm_compute::ICLTensor& output_state_out = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
258  arm_compute::ICLTensor& cell_state_out = static_cast<IClTensorHandle*>(m_Data.m_Inputs[2])->GetTensor();
259 
260  m_ScratchBuffer = std::make_unique<arm_compute::CLTensor>();
261  if (m_Data.m_Parameters.m_CifgEnabled)
262  {
263  // scratch_buffer [num_units * 3, batch_size] with CIFG
264  BuildArmComputeTensor(*m_ScratchBuffer, TensorInfo({batchSize, numUnits * 3}, armnnDataType));
265  }
266  else
267  {
268  // scratch_buffer [num_units * 4, batch_size] without CIFG
269  BuildArmComputeTensor(*m_ScratchBuffer, TensorInfo({batchSize, numUnits * 4}, armnnDataType));
270  }
271 
272  // Need to be set at negative threshold to be compatible for ACL
273  float cell_threshold = m_Data.m_Parameters.m_ClippingThresCell;
274  float projection_threshold = m_Data.m_Parameters.m_ClippingThresProj;
275 
276  // For preparing the object for the class ActivationLayerInfo, consider 5 situations
277  arm_compute::ActivationLayerInfo activationLayerInfo =
278  ConvertLstmActivationFuncToAclLayerInfo(m_Data.m_Parameters.m_ActivationFunc);
279 
280  for (unsigned int i = 0; i != maxTime; ++i)
281  {
282  // Set LSTM input and output ITensors depending on:
283  // input format (timeMajor) & number of LSTM batches (maxTime).
284  arm_compute::ICLTensor* outputLSTM;
285  arm_compute::ICLTensor* inputLSTM;
286  // If there is only one LSTM time major batch, we will not concat OR permute.
287  // Set input of LSTM to be first input ITensor.
288  // Set output of LSTM to be final output ITensor.
289  // LSTM input/output cannot be > 2 dimensions so need to resize its TensorInfo.
290  if (maxTime == 1 && m_Data.m_Parameters.m_TimeMajor)
291  {
292  TensorShape inputShape = GetTensorShape((&input)->info()->tensor_shape(), 1U);
293  TensorShape outputShape = GetTensorShape((&output)->info()->tensor_shape(), 1U);
294  TensorShape inputShapeShrink({inputShape[1], inputShape[2]});
295  TensorShape outputShapeShrink({outputShape[1], outputShape[2]});
296  auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);
297  auto acl_output_shape_shrink = BuildArmComputeTensorShape(outputShapeShrink);
298  (&input)->info()->set_tensor_shape(acl_input_shape_shrink);
299  inputLSTM = const_cast<arm_compute::ICLTensor*>(&input);
300  (&output)->info()->set_tensor_shape(acl_output_shape_shrink);
301  outputLSTM = &output;
302  }
303  // If there is only one LSTM batch major batch, we will not concat, only permute.
304  // Set input of LSTM to be output of initial permute.
305  // Set output of LSTM to be first element of m_ConcatInputs & use that value later in permute.
306  // LSTM output cannot be > 2 dimensions so need to resize its TensorInfo.
307  else if (maxTime == 1 && !m_Data.m_Parameters.m_TimeMajor)
308  {
309  TensorShape inputShape = GetTensorShape(m_PermuteFirstOut.info()->tensor_shape(), 1U);
310  TensorShape inputShapeShrink({inputShape[1], inputShape[2]});
311  auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);
312  m_PermuteFirstOut.info()->set_tensor_shape(acl_input_shape_shrink);
313  inputLSTM = &m_PermuteFirstOut;
314  outputLSTM = const_cast<arm_compute::ICLTensor*>(m_ConcatInputs[i]);
315  }
316  // Batch major AND/OR 2+ LSTM batches so will use concat AND/OR permute later on.
317  else
318  {
319  inputLSTM = m_SplitterOutputs[i];
320  outputLSTM = const_cast<arm_compute::ICLTensor*>(m_ConcatInputs[i]);
321  }
322 
323  std::unique_ptr<arm_compute::CLLSTMLayer> lstm_layer(new arm_compute::CLLSTMLayer());
324  lstm_layer->configure(clCompileContext,
325  inputLSTM,
326  m_InputToForgetWeightsTensor.get(),
327  m_InputToCellWeightsTensor.get(),
328  m_InputToOutputWeightsTensor.get(),
329  m_RecurrentToForgetWeightsTensor.get(),
330  m_RecurrentToCellWeightsTensor.get(),
331  m_RecurrentToOutputWeightsTensor.get(),
332  m_ForgetGateBiasTensor.get(),
333  m_CellBiasTensor.get(),
334  m_OutputGateBiasTensor.get(),
335  &output_state_in,
336  &cell_state_in,
337  m_ScratchBuffer.get(),
338  &output_state_out,
339  &cell_state_out,
340  outputLSTM,
341  lstm_param,
342  activationLayerInfo,
343  cell_threshold,
344  projection_threshold);
345 
346  m_Layers.emplace_back(std::move(lstm_layer));
347  }
348 
349  armcomputetensorutils::InitialiseArmComputeTensorEmpty(*m_ScratchBuffer);
350 
351  InitializeArmComputeClTensorData(*m_InputToForgetWeightsTensor, m_Data.m_InputToForgetWeights);
352  InitializeArmComputeClTensorData(*m_InputToCellWeightsTensor, m_Data.m_InputToCellWeights);
353  InitializeArmComputeClTensorData(*m_InputToOutputWeightsTensor, m_Data.m_InputToOutputWeights);
354  InitializeArmComputeClTensorData(*m_RecurrentToForgetWeightsTensor, m_Data.m_RecurrentToForgetWeights);
355  InitializeArmComputeClTensorData(*m_RecurrentToCellWeightsTensor, m_Data.m_RecurrentToCellWeights);
356  InitializeArmComputeClTensorData(*m_RecurrentToOutputWeightsTensor, m_Data.m_RecurrentToOutputWeights);
357  InitializeArmComputeClTensorData(*m_ForgetGateBiasTensor, m_Data.m_ForgetGateBias);
358  InitializeArmComputeClTensorData(*m_CellBiasTensor, m_Data.m_CellBias);
359  InitializeArmComputeClTensorData(*m_OutputGateBiasTensor, m_Data.m_OutputGateBias);
360 
361  if (!m_Data.m_Parameters.m_CifgEnabled)
362  {
363  InitializeArmComputeClTensorData(*m_InputToInputWeightsTensor, m_Data.m_InputToInputWeights);
364  InitializeArmComputeClTensorData(*m_RecurrentToInputWeightsTensor, m_Data.m_RecurrentToInputWeights);
365  if (m_Data.m_CellToInputWeights != nullptr)
366  {
367  InitializeArmComputeClTensorData(*m_CellToInputWeightsTensor, m_Data.m_CellToInputWeights);
368  }
369  InitializeArmComputeClTensorData(*m_InputGateBiasTensor, m_Data.m_InputGateBias);
370  }
371 
372  if (m_Data.m_Parameters.m_ProjectionEnabled)
373  {
374  InitializeArmComputeClTensorData(*m_ProjectionWeightsTensor, m_Data.m_ProjectionWeights);
375  if (m_Data.m_ProjectionBias != nullptr)
376  {
377  InitializeArmComputeClTensorData(*m_ProjectionBiasTensor, m_Data.m_ProjectionBias);
378  }
379  }
380 
381  if (m_Data.m_Parameters.m_PeepholeEnabled)
382  {
383  InitializeArmComputeClTensorData(*m_CellToForgetWeightsTensor, m_Data.m_CellToForgetWeights);
384  InitializeArmComputeClTensorData(*m_CellToOutputWeightsTensor, m_Data.m_CellToOutputWeights);
385  }
386 
387  if (m_Data.m_Parameters.m_LayerNormEnabled)
388  {
389  if (!m_Data.m_Parameters.m_CifgEnabled)
390  {
391  InitializeArmComputeClTensorData(*m_InputLayerNormWeightsTensor, m_Data.m_InputLayerNormWeights);
392  }
393  InitializeArmComputeClTensorData(*m_ForgetLayerNormWeightsTensor, m_Data.m_ForgetLayerNormWeights);
394  InitializeArmComputeClTensorData(*m_CellLayerNormWeightsTensor, m_Data.m_CellLayerNormWeights);
395  InitializeArmComputeClTensorData(*m_OutputLayerNormWeightsTensor, m_Data.m_OutputLayerNormWeights);
396  }
397 
398  // Force Compute Library to perform the necessary copying and reshaping.
399  // After which delete all the input tensors that will no longer be needed.
400  for (uint32_t i = 0; i < m_Layers.size(); ++i)
401  {
402  m_Layers[i]->prepare();
403  }
404 
405  //
406  // Concat
407  //
408 
409  // Expand dimensions of LSTM outputs adding one empty dimension to fit concatenate inputs.
410  TensorShape shape = GetTensorShape(m_ConcatInputs[0]->info()->tensor_shape(), 1U);
411  TensorShape shapeExpandTimeMajor({1, shape[0], shape[1]});
412  TensorShape shapeExpandBatchMajor({shape[0], 1, shape[1]});
413 
414  if (maxTime != 1) // ACL concat does not work with only one element to concatenate.
415  {
416  for (unsigned int i = 0; i < maxTime; ++i)
417  {
418  m_ConcatInputs[i]->info()->set_tensor_shape(BuildArmComputeTensorShape(shapeExpandTimeMajor));
419  }
420 
421  ConcatDescriptor concatDescriptor(maxTime, numberDimensions); // maxTime = num inputs (aka. number of views).
422  for (unsigned int inputIdx = 0u; inputIdx < maxTime; ++inputIdx)
423  {
424  concatDescriptor.SetViewOriginCoord(inputIdx, dimension, inputIdx);
425  concatDescriptor.SetConcatAxis(dimension);
426  }
427 
428  m_Concat.reset(new arm_compute::CLConcatenateLayer());
429  unsigned int aclAxisConcat = CalcAclAxis(concatDescriptor.GetNumDimensions(),
430  concatDescriptor.GetConcatAxis());
431  if (!m_Data.m_Parameters.m_TimeMajor)
432  {
433  TensorInfo concatOuputTensorInfo = outputInfo;
434  concatOuputTensorInfo.SetShape(timeMajorShapeOutput);
435  BuildArmComputeTensor(concat_out, concatOuputTensorInfo);
436  armcomputetensorutils::InitialiseArmComputeTensorEmpty(concat_out);
437 
438  m_Concat->configure(m_ConcatInputs, &concat_out, aclAxisConcat);
439  }
440  else
441  {
442  m_Concat->configure(m_ConcatInputs, &output, aclAxisConcat);
443  }
444 
445  m_Concat->prepare();
446  }
447  // If only one LSTM batch, we do not concat and/or permute.
448  // Must ensure final output info is expanded to correct batch major dimensions.
449  else
450  {
451  if (!m_Data.m_Parameters.m_TimeMajor)
452  {
453  (&output)->info()->set_tensor_shape(BuildArmComputeTensorShape(shapeExpandBatchMajor));
454  }
455  else
456  {
457  (&output)->info()->set_tensor_shape(BuildArmComputeTensorShape(shapeExpandTimeMajor));
458  }
459  }
460 
461  //
462  // Permute: only done if input/output are in batch major format.
463  //
464  if (!m_Data.m_Parameters.m_TimeMajor)
465  {
466  // Output now time major. Permute output back to batch major.
467  std::unique_ptr<arm_compute::CLPermute> layer(new arm_compute::CLPermute());
468  if (maxTime != 1)
469  {
470  layer->configure(clCompileContext, &concat_out, &output, arm_compute::PermutationVector(0U, 2U, 1U));
471  }
472  else
473  {
474  layer->configure(clCompileContext, m_ConcatInputs[0], &output, arm_compute::PermutationVector(0U, 2U, 1U));
475  }
476  m_Permute2.reset(layer.release());
477  }
478 
479  FreeUnusedTensors();
480 }
481 
483 {
484  ARMNN_SCOPED_PROFILING_EVENT_CL_NAME_GUID("ClUnidirectionalSequenceLstmFloatWorkload_Execute");
485  if (m_Permute1)
486  {
487  m_Permute1->run();
488  }
489  if (m_Splitter)
490  {
491  m_Splitter->run();
492  }
493  for (uint32_t i = 0; i < m_Layers.size(); ++i)
494  {
495  m_Layers[i]->run();
496  }
497  if (m_Concat)
498  {
499  m_Concat->run();
500  }
501  if (m_Permute2)
502  {
503  m_Permute2->run();
504  }
505 }
506 
509  const TensorInfo& outputStateIn,
510  const TensorInfo& cellStateIn,
511  const TensorInfo& outputStateOut,
512  const TensorInfo& cellStateOut,
513  const TensorInfo& output,
514  const UnidirectionalSequenceLstmDescriptor& descriptor,
515  const LstmInputParamsInfo& paramsInfo)
516 {
517  TensorShape inputLayerShape = input.GetShape();
518  TensorShape outputLayerShape = output.GetShape();
519 
520  if (inputLayerShape.GetNumDimensions() != 3)
521  {
522  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
523  "Unidirectional Sequence LSTM layer validate status failed.");
524  }
525 
526  unsigned int maxTime = descriptor.m_TimeMajor?inputLayerShape[0]:inputLayerShape[1];
527  unsigned int batchSize = descriptor.m_TimeMajor?inputLayerShape[1]:inputLayerShape[0];
528  unsigned int inputSize = inputLayerShape[2];
529  unsigned int outputSize = outputLayerShape[2];
530 
531  const TensorShape timeMajorShapeInput({maxTime, batchSize, inputSize});
532  const TensorShape timeMajorShapeOutput({maxTime, batchSize, outputSize});
533 
534  arm_compute::Status statusPermute1 = arm_compute::Status(arm_compute::ErrorCode::OK,
535  "Permute1 status");
536  arm_compute::Status statusSplit = arm_compute::Status(arm_compute::ErrorCode::OK,
537  "Split status");
538  arm_compute::Status statusLSTM = arm_compute::Status(arm_compute::ErrorCode::OK,
539  "LSTM status");
540  arm_compute::Status statusConcat = arm_compute::Status(arm_compute::ErrorCode::OK,
541  "Concat status");
542  arm_compute::Status statusPermute2 = arm_compute::Status(arm_compute::ErrorCode::OK,
543  "Permute2 status");
544 
545  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
546  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
547 
548  //
549  // Permute validate
550  //
551  TensorInfo permuteOutInfo = armnnUtils::Permuted(input, { 1U, 0U, 2U });
552  arm_compute::TensorInfo aclPermuteOutInfo = armcomputetensorutils::BuildArmComputeTensorInfo(permuteOutInfo);
553  if (!descriptor.m_TimeMajor)
554  {
555  statusPermute1 = arm_compute::CLPermute::validate(&aclInputInfo,
556  &aclPermuteOutInfo,
557  arm_compute::PermutationVector(0U, 2U, 1U));
558  }
559 
560  //
561  // Split and Concat Tensors validate
562  //
563  std::vector<arm_compute::TensorInfo> splitterOutputsTensorInfos;
564  std::vector<arm_compute::TensorInfo> concatInputsTensorInfos;
565  std::vector<arm_compute::ITensorInfo*> splitterOutputsTensorInfosPtr;
566  std::vector<const arm_compute::ITensorInfo*> concatInputsTensorInfosPtr;
567  splitterOutputsTensorInfos.reserve(maxTime);
568  concatInputsTensorInfos.reserve(maxTime);
569  for (unsigned int i = 0; i < maxTime; ++i)
570  {
571  arm_compute::TensorInfo splitter_out;
572  arm_compute::TensorInfo concat_in;
573 
574  auto splitterTensorInfo = TensorInfo(input);
575  auto concatTensorInfo = TensorInfo(output);
576  splitterTensorInfo.SetShape({batchSize, inputSize});
577  concatTensorInfo.SetShape({batchSize, outputSize});
578 
579  arm_compute::TensorInfo aclSplitterTensorInfo
580  = armcomputetensorutils::BuildArmComputeTensorInfo(splitterTensorInfo);
581  arm_compute::TensorInfo aclConcatTensorInfo
582  = armcomputetensorutils::BuildArmComputeTensorInfo(concatTensorInfo);
583 
584  splitterOutputsTensorInfos.emplace_back(aclSplitterTensorInfo);
585  concatInputsTensorInfos.emplace_back(aclConcatTensorInfo);
586  splitterOutputsTensorInfosPtr.emplace_back(&splitterOutputsTensorInfos[i]);
587  concatInputsTensorInfosPtr.emplace_back(&concatInputsTensorInfos[i]);
588  }
589 
590  //
591  // Split validate
592  //
593  unsigned int numberDimensions = 3;
594  unsigned int dimension = 0; // splitting on 0-dimension (i.e. maxTime dimension)
595  unsigned int aclAxisSplit = CalcAclAxis(numberDimensions, dimension);
596 
597  if (maxTime != 1) // ACL split does not work with only one element to split.
598  {
599  if (!descriptor.m_TimeMajor)
600  {
601  statusSplit = arm_compute::CLSplit::validate(&aclPermuteOutInfo,
602  splitterOutputsTensorInfosPtr,
603  aclAxisSplit);
604  }
605  else
606  {
607  statusSplit = arm_compute::CLSplit::validate(&aclInputInfo, splitterOutputsTensorInfosPtr, aclAxisSplit);
608  }
609  }
610 
611  //
612  // LSTM validate
613  //
614 
615  arm_compute::LSTMParams<arm_compute::ITensorInfo> lstm_params_info;
616 
617  unsigned int numUnits = cellStateIn.GetShape()[1];
618  unsigned int scratchBufferFactor = 4;
619 
620  if (descriptor.m_CifgEnabled)
621  {
622  // scratchBuffer = { batchSize, numUnits * 3 } with CIFG
623  scratchBufferFactor = 3;
624  }
625 
626  const TensorInfo& scratchBuffer = TensorInfo({ batchSize, numUnits * scratchBufferFactor }, input.GetDataType());
627 
628  // The inputs and outputs
629  const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
630  const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);
631  const arm_compute::TensorInfo aclScratchBufferInfo = BuildArmComputeTensorInfo(scratchBuffer);
632  const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
633  const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
634 
635  // Basic parameters
636  const arm_compute::TensorInfo aclInputToForgetWeightsInfo
637  = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
638  const arm_compute::TensorInfo aclInputToCellWeightsInfo
639  = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
640  const arm_compute::TensorInfo aclInputToOutputWeightsInfo
641  = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
642  const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
643  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
644  const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
645  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
646  const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
647  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
648  const arm_compute::TensorInfo aclForgetGateBiasInfo
649  = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
650  const arm_compute::TensorInfo aclCellBiasInfo
651  = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
652  const arm_compute::TensorInfo aclOutputGateBiasInfo
653  = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
654 
655  arm_compute::TensorInfo aclInputToInputWeightsInfo;
656  arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
657  arm_compute::TensorInfo aclCellToInputWeightsInfo;
658  arm_compute::TensorInfo aclInputGateBiasInfo;
659  arm_compute::TensorInfo aclProjectionWeightsInfo;
660  arm_compute::TensorInfo aclProjectionBiasInfo;
661  arm_compute::TensorInfo aclCellToForgetWeightsInfo;
662  arm_compute::TensorInfo aclCellToOutputWeightsInfo;
663 
664  arm_compute::TensorInfo aclInputLayerNormWeightsInfo;
665  arm_compute::TensorInfo aclForgetLayerNormWeightsInfo;
666  arm_compute::TensorInfo aclCellLayerNormWeightsInfo;
667  arm_compute::TensorInfo aclOutputLayerNormWeightsInfo;
668 
669 
670  if (!descriptor.m_CifgEnabled)
671  {
672  if (descriptor.m_PeepholeEnabled)
673  {
674  aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights());
675  }
676  aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
677  aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
678  aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
679 
680  lstm_params_info.set_cifg_params(&aclInputToInputWeightsInfo,
681  &aclRecurrentToInputWeightsInfo,
682  descriptor.m_PeepholeEnabled ? &aclCellToInputWeightsInfo : nullptr,
683  &aclInputGateBiasInfo);
684  }
685 
686  if (descriptor.m_ProjectionEnabled)
687  {
688  if (paramsInfo.m_ProjectionBias != nullptr)
689  {
690  aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias());
691  }
692  aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights());
693 
694  lstm_params_info.set_projection_params(&aclProjectionWeightsInfo,
695  paramsInfo.m_ProjectionBias ? &aclProjectionBiasInfo : nullptr);
696  }
697 
698  if (descriptor.m_PeepholeEnabled)
699  {
700  aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights());
701  aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights());
702 
703  lstm_params_info.set_peephole_params(&aclCellToForgetWeightsInfo, &aclCellToOutputWeightsInfo);
704  }
705 
706  if (descriptor.m_LayerNormEnabled)
707  {
708  if (!descriptor.m_CifgEnabled)
709  {
710  aclInputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights());
711  }
712  aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights());
713  aclCellLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights());
714  aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());
715 
716  lstm_params_info.set_layer_normalization_params(descriptor.m_CifgEnabled ? nullptr :
717  &aclInputLayerNormWeightsInfo,
718  &aclForgetLayerNormWeightsInfo,
719  &aclCellLayerNormWeightsInfo,
720  &aclOutputLayerNormWeightsInfo);
721  }
722 
723  // Need to be set at negative threshold to be compatible for ACL
724  float cell_threshold = descriptor.m_ClippingThresCell;
725  float projection_threshold = descriptor.m_ClippingThresProj;
726 
727  arm_compute::ActivationLayerInfo activationLayerInfo =
729 
730  for (unsigned int i = 0; i != maxTime; ++i)
731  {
732 
733  // Set LSTM input and output ITensors depending on:
734  // input format (timeMajor) & number of LSTM batches (maxTime).
735  arm_compute::ITensorInfo* outputLSTM;
736  arm_compute::ITensorInfo* inputLSTM;
737  // If there is only one LSTM time major batch, we will not concat OR permute.
738  // Set input of LSTM to be first input ITensor.
739  // Set output of LSTM to be final output ITensor.
740  // LSTM input/output cannot be > 2 dimensions so need to resize its TensorInfo.
741  if (maxTime == 1 && !descriptor.m_TimeMajor)
742  {
743  TensorShape inputShape = GetTensorShape(aclInputInfo.tensor_shape(), 1U);
744  TensorShape outputShape = GetTensorShape(aclOutputInfo.tensor_shape(), 1U);
745  TensorShape inputShapeShrink({inputShape[1], inputShape[2]});
746  TensorShape outputShapeShrink({outputShape[1], outputShape[2]});
747  auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);
748  auto acl_output_shape_shrink = BuildArmComputeTensorShape(outputShapeShrink);
749  const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(acl_input_shape_shrink);
750  inputLSTM = const_cast<arm_compute::TensorInfo*>(&aclInputInfo);
751  const_cast<arm_compute::TensorInfo*>(&aclOutputInfo)->set_tensor_shape(acl_output_shape_shrink);
752  outputLSTM = const_cast<arm_compute::TensorInfo*>(&aclOutputInfo);
753  }
754  // If there is only one LSTM batch major batch, we will not concat, only permute.
755  // Set input of LSTM to be output of initial permute.
756  // Set output of LSTM to be first element of m_ConcatInputs & use that value later in permute.
757  // LSTM output cannot be > 2 dimensions so need to resize its TensorInfo.
758  else if (maxTime == 1 && !descriptor.m_TimeMajor)
759  {
760  TensorShape inputShape = GetTensorShape(aclPermuteOutInfo.tensor_shape(), 1U);
761  TensorShape inputShapeShrink({inputShape[1], inputShape[2]});
762  auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);
763  aclPermuteOutInfo.set_tensor_shape(acl_input_shape_shrink);
764  inputLSTM = &aclPermuteOutInfo;
765  outputLSTM = const_cast<arm_compute::ITensorInfo*>(concatInputsTensorInfosPtr[i]);
766  }
767  // Batch major AND/OR 2+ LSTM batches so will use concat AND/OR permute later on.
768  else
769  {
770  inputLSTM = splitterOutputsTensorInfosPtr[i];
771  outputLSTM = const_cast<arm_compute::ITensorInfo*>(concatInputsTensorInfosPtr[i]);
772  }
773 
774  statusLSTM = arm_compute::CLLSTMLayer::validate(inputLSTM,
775  &aclInputToForgetWeightsInfo,
776  &aclInputToCellWeightsInfo,
777  &aclInputToOutputWeightsInfo,
778  &aclRecurrentToForgetWeightsInfo,
779  &aclRecurrentToCellWeightsInfo,
780  &aclRecurrentToOutputWeightsInfo,
781  &aclForgetGateBiasInfo,
782  &aclCellBiasInfo,
783  &aclOutputGateBiasInfo,
784  &aclOutputStateInInfo,
785  &aclCellStateInInfo,
786  &aclScratchBufferInfo,
787  &aclOutputStateOutInfo,
788  &aclCellStateOutInfo,
789  outputLSTM,
790  lstm_params_info,
791  activationLayerInfo,
792  cell_threshold,
793  projection_threshold);
794 
795  if (statusLSTM.error_code() != arm_compute::ErrorCode::OK)
796  {
797  break;
798  }
799  }
800 
801  //
802  // Concat validate
803  //
804 
805  // Expand dimensions of LSTM outputs adding one empty dimension to fit concatenate inputs.
806  TensorShape shape = GetTensorShape(concatInputsTensorInfosPtr[0]->tensor_shape(), 1U);
807  TensorShape shapeExpandTimeMajor({1, shape[0], shape[1]});
808  TensorShape shapeExpandBatchMajor({shape[0], 1, shape[1]});
809 
810  TensorInfo concatOuputTensorInfo = TensorInfo(output);
811  concatOuputTensorInfo.SetShape(timeMajorShapeOutput);
812  arm_compute::TensorInfo aclConcatOuputTensorInfo= BuildArmComputeTensorInfo(concatOuputTensorInfo);
813 
814  if (maxTime != 1) // ACL concat does not work with only one element to concatenate.
815  {
816  for (unsigned int i = 0; i < maxTime; ++i)
817  {
818  auto acl_shape_expand = BuildArmComputeTensorShape(shapeExpandTimeMajor);
819  concatInputsTensorInfos[i].set_tensor_shape(acl_shape_expand);
820  }
821 
822  unsigned int aclAxisConcat = CalcAclAxis(numberDimensions, dimension);
823  if (!descriptor.m_TimeMajor)
824  {
825  statusConcat = arm_compute::CLConcatenateLayer::validate(concatInputsTensorInfosPtr,
826  &aclConcatOuputTensorInfo,
827  aclAxisConcat);
828  }
829  else
830  {
831  statusConcat = arm_compute::CLConcatenateLayer::validate(concatInputsTensorInfosPtr,
832  &aclOutputInfo,
833  aclAxisConcat);
834  }
835  }
836  // If only one LSTM batch, we do not concat and/or permute.
837  // Must ensure final output info is expanded to correct batch major dimensions.
838  else
839  {
840  if (!descriptor.m_TimeMajor)
841  {
842  const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(
843  BuildArmComputeTensorShape(shapeExpandBatchMajor));
844  }
845  else
846  {
847  const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(
848  BuildArmComputeTensorShape(shapeExpandTimeMajor));
849  }
850  }
851  //
852  // Permute validate
853  //
854  if (!descriptor.m_TimeMajor)
855  {
856  // Output now time major. Permute output back to batch major.
857  if (maxTime != 1)
858  {
859  statusPermute2 = arm_compute::CLPermute::validate(&aclConcatOuputTensorInfo,
860  &aclOutputInfo,
861  arm_compute::PermutationVector(0U, 2U, 1U));
862  }
863  else
864  {
865  statusPermute2 = arm_compute::CLPermute::validate(concatInputsTensorInfosPtr[0],
866  &aclOutputInfo,
867  arm_compute::PermutationVector(0U, 2U, 1U));
868  }
869  }
870 
871  auto okCode = arm_compute::ErrorCode::OK;
872  if (statusPermute1.error_code() == okCode &&
873  statusSplit.error_code() == okCode &&
874  statusLSTM .error_code() == okCode &&
875  statusConcat.error_code() == okCode &&
876  statusPermute2.error_code() == okCode)
877  {
878  return arm_compute::Status(arm_compute::ErrorCode::OK,
879  "All Unidirectional Sequence LSTM layer validate status OK.");
880  }
881  else
882  {
883  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
884  "Unidirectional Sequence LSTM layer validate status failed.");
885  }
886 }
887 
888 void ClUnidirectionalSequenceLstmFloatWorkload::FreeUnusedTensors()
889 {
890  FreeTensorIfUnused(m_InputToInputWeightsTensor);
891  FreeTensorIfUnused(m_InputToForgetWeightsTensor);
892  FreeTensorIfUnused(m_InputToCellWeightsTensor);
893  FreeTensorIfUnused(m_InputToOutputWeightsTensor);
894  FreeTensorIfUnused(m_RecurrentToInputWeightsTensor);
895  FreeTensorIfUnused(m_RecurrentToForgetWeightsTensor);
896  FreeTensorIfUnused(m_RecurrentToCellWeightsTensor);
897  FreeTensorIfUnused(m_RecurrentToOutputWeightsTensor);
898  FreeTensorIfUnused(m_CellToInputWeightsTensor);
899  FreeTensorIfUnused(m_CellToForgetWeightsTensor);
900  FreeTensorIfUnused(m_CellToOutputWeightsTensor);
901  FreeTensorIfUnused(m_InputGateBiasTensor);
902  FreeTensorIfUnused(m_ForgetGateBiasTensor);
903  FreeTensorIfUnused(m_CellBiasTensor);
904  FreeTensorIfUnused(m_OutputGateBiasTensor);
905  FreeTensorIfUnused(m_ProjectionWeightsTensor);
906  FreeTensorIfUnused(m_ProjectionBiasTensor);
907  FreeTensorIfUnused(m_InputLayerNormWeightsTensor);
908  FreeTensorIfUnused(m_ForgetLayerNormWeightsTensor);
909  FreeTensorIfUnused(m_CellLayerNormWeightsTensor);
910  FreeTensorIfUnused(m_OutputLayerNormWeightsTensor);
911  FreeTensorIfUnused(m_ScratchBuffer);
912 }
913 
914 } //namespace armnn
armnn::OriginsDescriptor::GetConcatAxis
unsigned int GetConcatAxis() const
Get the concatenation axis value.
Definition: Descriptors.cpp:162
armnn::ViewsDescriptor
A ViewsDescriptor for the SplitterLayer.
Definition: Descriptors.hpp:244
armnn::LstmInputParamsInfo::GetCellBias
const TensorInfo & GetCellBias() const
Definition: LstmParams.hpp:173
armnn::InitializeArmComputeClTensorData
void InitializeArmComputeClTensorData(arm_compute::CLTensor &clTensor, const ConstTensorHandle *handle)
Definition: ClWorkloadUtils.hpp:124
armnn::LstmDescriptor::m_TimeMajor
bool m_TimeMajor
Enable/disable time major.
Definition: Descriptors.hpp:1154
armnn::LstmInputParamsInfo::GetInputToCellWeights
const TensorInfo & GetInputToCellWeights() const
Definition: LstmParams.hpp:129
WorkloadUtils.hpp
armnn::TensorInfo
Definition: Tensor.hpp:152
ClUnidirectionalSequenceLstmFloatWorkload.hpp
armnn::ClUnidirectionalSequenceLstmFloatWorkloadValidate
arm_compute::Status ClUnidirectionalSequenceLstmFloatWorkloadValidate(const TensorInfo &input, const TensorInfo &outputStateIn, const TensorInfo &cellStateIn, const TensorInfo &outputStateOut, const TensorInfo &cellStateOut, const TensorInfo &output, const UnidirectionalSequenceLstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo)
Definition: ClUnidirectionalSequenceLstmFloatWorkload.cpp:508
armnn::OriginsDescriptor::GetNumDimensions
uint32_t GetNumDimensions() const
Get the number of dimensions.
Definition: Descriptors.cpp:192
armnn::LstmInputParamsInfo::GetProjectionBias
const TensorInfo & GetProjectionBias() const
Definition: LstmParams.hpp:185
armnn::LstmInputParamsInfo::GetInputGateBias
const TensorInfo & GetInputGateBias() const
Definition: LstmParams.hpp:165
armnn::LstmInputParamsInfo::GetRecurrentToInputWeights
const TensorInfo & GetRecurrentToInputWeights() const
Definition: LstmParams.hpp:137
armnn::LstmInputParamsInfo::GetRecurrentToForgetWeights
const TensorInfo & GetRecurrentToForgetWeights() const
Definition: LstmParams.hpp:141
armnnUtils::Permuted
armnn::TensorShape Permuted(const armnn::TensorShape &srcShape, const armnn::PermutationVector &mappings)
Definition: Permute.cpp:125
armnn::ViewsDescriptor::SetViewSize
Status SetViewSize(uint32_t view, uint32_t coord, uint32_t value)
Set the size of the views.
Definition: Descriptors.cpp:322
armnn::TypedWorkload
Definition: Workload.hpp:101
armnn::LstmInputParamsInfo::GetRecurrentToCellWeights
const TensorInfo & GetRecurrentToCellWeights() const
Definition: LstmParams.hpp:145
NumericCast.hpp
armnn::ClUnidirectionalSequenceLstmFloatWorkload::ClUnidirectionalSequenceLstmFloatWorkload
ClUnidirectionalSequenceLstmFloatWorkload(const UnidirectionalSequenceLstmQueueDescriptor &descriptor, const WorkloadInfo &info, const arm_compute::CLCompileContext &clCompileContext)
Definition: ClUnidirectionalSequenceLstmFloatWorkload.cpp:32
armnn::ViewsDescriptor::SetViewOriginCoord
Status SetViewOriginCoord(uint32_t view, uint32_t coord, uint32_t value)
@Brief Set the view origin coordinates.
Definition: Descriptors.cpp:317
armnn::LstmInputParamsInfo::GetInputLayerNormWeights
const TensorInfo & GetInputLayerNormWeights() const
Definition: LstmParams.hpp:189
armnn::LstmDescriptor::m_PeepholeEnabled
bool m_PeepholeEnabled
Enable/disable peephole.
Definition: Descriptors.hpp:1148
armnn::TensorShape
Definition: Tensor.hpp:20
armnn::LstmDescriptor::m_ClippingThresProj
float m_ClippingThresProj
Clipping threshold value for the projection.
Definition: Descriptors.hpp:1144
armnn::QueueDescriptorWithParameters::m_Parameters
LayerDescriptor m_Parameters
Definition: WorkloadData.hpp:66
armnn::LstmInputParamsInfo::GetCellToInputWeights
const TensorInfo & GetCellToInputWeights() const
Definition: LstmParams.hpp:153
armnn::TensorShape::GetNumDimensions
unsigned int GetNumDimensions() const
Function that returns the tensor rank.
Definition: Tensor.cpp:174
armnn::LstmInputParamsInfo::GetRecurrentToOutputWeights
const TensorInfo & GetRecurrentToOutputWeights() const
Definition: LstmParams.hpp:149
armnn::LstmInputParamsInfo::GetInputToInputWeights
const TensorInfo & GetInputToInputWeights() const
Definition: LstmParams.hpp:121
ARMNN_SCOPED_PROFILING_EVENT_CL_NAME_GUID
#define ARMNN_SCOPED_PROFILING_EVENT_CL_NAME_GUID(label)
Creates a profiling event that uses GetGuid() and GetName() from the calling class.
Definition: ClWorkloadUtils.hpp:36
armnn::WorkloadInfo
Contains information about TensorInfos of a layer.
Definition: WorkloadInfo.hpp:16
armnn::DataType
DataType
Definition: Types.hpp:48
armnn::LstmInputParamsInfo::GetForgetGateBias
const TensorInfo & GetForgetGateBias() const
Definition: LstmParams.hpp:169
ClWorkloadUtils.hpp
armnn::ConvertLstmActivationFuncToAclLayerInfo
arm_compute::ActivationLayerInfo ConvertLstmActivationFuncToAclLayerInfo(uint32_t activationFunction)
Definition: ArmComputeUtils.hpp:118
armnn::LstmInputParamsInfo::GetCellToForgetWeights
const TensorInfo & GetCellToForgetWeights() const
Definition: LstmParams.hpp:157
ArmComputeUtils.hpp
Permute.hpp
armnn::BoostLogSeverityMapping::info
@ info
armnn::OriginsDescriptor::SetConcatAxis
void SetConcatAxis(unsigned int concatAxis)
Set the concatenation axis value.
Definition: Descriptors.cpp:158
armnn::TensorInfo::GetDataType
DataType GetDataType() const
Definition: Tensor.hpp:200
ARMNN_REPORT_PROFILING_WORKLOAD_DESC
#define ARMNN_REPORT_PROFILING_WORKLOAD_DESC(name, desc, infos, guid)
Definition: Profiling.hpp:227
armnn::IClTensorHandle
Definition: IClTensorHandle.hpp:13
armnn::LstmDescriptor
An LstmDescriptor for the LstmLayer.
Definition: Descriptors.hpp:1102
armnn::Status
Status
Definition: Types.hpp:42
ClTensorHandle.hpp
armnn::LstmInputParamsInfo::GetInputToOutputWeights
const TensorInfo & GetInputToOutputWeights() const
Definition: LstmParams.hpp:133
armnn::LstmDescriptor::m_CifgEnabled
bool m_CifgEnabled
Enable/disable cifg (coupled input & forget gate).
Definition: Descriptors.hpp:1146
armnn::TensorInfo::GetShape
const TensorShape & GetShape() const
Definition: Tensor.hpp:193
armnn::LstmInputParamsInfo::GetOutputGateBias
const TensorInfo & GetOutputGateBias() const
Definition: LstmParams.hpp:177
armnn::LstmDescriptor::m_LayerNormEnabled
bool m_LayerNormEnabled
Enable/disable layer normalization.
Definition: Descriptors.hpp:1152
armnn::LstmInputParamsInfo::GetCellToOutputWeights
const TensorInfo & GetCellToOutputWeights() const
Definition: LstmParams.hpp:161
armnn::ViewsDescriptor::GetNumDimensions
uint32_t GetNumDimensions() const
Get the number of dimensions.
Definition: Descriptors.cpp:307
armnn::OriginsDescriptor
An OriginsDescriptor for the ConcatLayer.
Definition: Descriptors.hpp:201
armnn::LstmInputParamsInfo::GetOutputLayerNormWeights
const TensorInfo & GetOutputLayerNormWeights() const
Definition: LstmParams.hpp:201
armnn::TensorInfo::SetShape
void SetShape(const TensorShape &newShape)
Definition: Tensor.hpp:195
armnn
Copyright (c) 2021 ARM Limited and Contributors.
Definition: 01_00_quick_start.dox:6
armnn::OriginsDescriptor::SetViewOriginCoord
Status SetViewOriginCoord(uint32_t view, uint32_t coord, uint32_t value)
@Brief Set the view origin coordinates.
Definition: Descriptors.cpp:167
armnn::ComputeSplitAxis
std::set< unsigned int > ComputeSplitAxis(const armnn::SplitterDescriptor &desc, const TensorShape &input)
Calculates the axis values for split operation.
Definition: WorkloadUtils.cpp:377
armnn::LstmInputParamsInfo
Definition: LstmParams.hpp:63
ArmComputeTensorUtils.hpp
armnn::LstmDescriptor::m_ProjectionEnabled
bool m_ProjectionEnabled
Enable/disable the projection layer.
Definition: Descriptors.hpp:1150
armnn::UnidirectionalSequenceLstmQueueDescriptor
Definition: WorkloadData.hpp:696
armnn::LstmInputParamsInfo::GetProjectionWeights
const TensorInfo & GetProjectionWeights() const
Definition: LstmParams.hpp:181
armnn::ClUnidirectionalSequenceLstmFloatWorkload::Execute
virtual void Execute() const override
Definition: ClUnidirectionalSequenceLstmFloatWorkload.cpp:482
armnn::LstmDescriptor::m_ActivationFunc
uint32_t m_ActivationFunc
The activation function to use.
Definition: Descriptors.hpp:1140
armnn::LstmDescriptor::m_ClippingThresCell
float m_ClippingThresCell
Clipping threshold value for the cell state.
Definition: Descriptors.hpp:1142
armnn::LstmInputParamsInfo::m_ProjectionBias
const TensorInfo * m_ProjectionBias
Definition: LstmParams.hpp:105
armnnUtils::GetTensorShape
armnn::TensorShape GetTensorShape(unsigned int numberOfBatches, unsigned int numberOfChannels, unsigned int height, unsigned int width, const armnn::DataLayout dataLayout)
Definition: TensorUtils.cpp:21
armnn::LstmInputParamsInfo::GetForgetLayerNormWeights
const TensorInfo & GetForgetLayerNormWeights() const
Definition: LstmParams.hpp:193
armnn::LstmInputParamsInfo::GetCellLayerNormWeights
const TensorInfo & GetCellLayerNormWeights() const
Definition: LstmParams.hpp:197
armnn::LstmInputParamsInfo::GetInputToForgetWeights
const TensorInfo & GetInputToForgetWeights() const
Definition: LstmParams.hpp:125