75 const uint32_t numBatches = inputShape[0];
76 const uint32_t inputSize = inputShape[1];
77 const uint32_t outputSize = outputStateInShape[1];
78 const uint32_t numUnits = cellStateInShape[1];
81 const bool cifgEnabled =
m_Data.m_Parameters.m_CifgEnabled;
82 const bool peepholeEnabled =
m_Data.m_Parameters.m_PeepholeEnabled;
83 const bool projectionEnabled =
m_Data.m_Parameters.m_ProjectionEnabled;
84 const bool layerNormEnabled =
m_Data.m_Parameters.m_LayerNormEnabled;
87 std::unique_ptr<Decoder<float>> inputDecoder =
88 MakeDecoder<float>(inputInfo, inputs[0]->
Map());
89 std::unique_ptr<Decoder<float>> outputStateInDecoder =
90 MakeDecoder<float>(outputStateInInfo, inputs[1]->
Map());
91 std::unique_ptr<Decoder<float>> cellStateInDecoder =
92 MakeDecoder<float>(cellStateInInfo, inputs[2]->
Map());
95 std::unique_ptr<Decoder<float>> outputStateOutDecoder =
96 MakeDecoder<float>(outputStateOutInfo, outputs[0]->
Map());
97 std::unique_ptr<Decoder<float>> cellStateOutDecoder =
98 MakeDecoder<float>(cellStateOutInfo, outputs[1]->
Map());
99 std::unique_ptr<Decoder<float>> outputDecoder =
100 MakeDecoder<float>(outputInfo, outputs[2]->
Map());
103 std::unique_ptr<Encoder<float>> outputStateOutEncoder =
104 MakeEncoder<float>(outputStateOutInfo, outputs[0]->
Map());
105 std::unique_ptr<Encoder<float>> cellStateOutEncoder =
106 MakeEncoder<float>(cellStateOutInfo, outputs[1]->
Map());
107 std::unique_ptr<Encoder<float>> outputEncoder =
108 MakeEncoder<float>(outputInfo, outputs[2]->
Map());
111 std::unique_ptr<Decoder<float>> inputToForgetWeightsDecoder = MakeDecoder<float>(
112 m_InputToForgetWeightsTensor->GetTensorInfo(), m_InputToForgetWeightsTensor->GetConstTensor<
void>());
113 std::unique_ptr<Decoder<float>> inputToCellWeightsDecoder = MakeDecoder<float>(
114 m_InputToCellWeightsTensor->GetTensorInfo(), m_InputToCellWeightsTensor->GetConstTensor<
void>());
115 std::unique_ptr<Decoder<float>> inputToOutputWeightsDecoder = MakeDecoder<float>(
116 m_InputToOutputWeightsTensor->GetTensorInfo(), m_InputToOutputWeightsTensor->GetConstTensor<
void>());
118 std::unique_ptr<Decoder<float>> recurrentToForgetWeightsDecoder = MakeDecoder<float>(
119 m_RecurrentToForgetWeightsTensor->GetTensorInfo(),
120 m_RecurrentToForgetWeightsTensor->GetConstTensor<
void>());
121 std::unique_ptr<Decoder<float>> recurrentToCellWeightsDecoder = MakeDecoder<float>(
122 m_RecurrentToCellWeightsTensor->GetTensorInfo(), m_RecurrentToCellWeightsTensor->GetConstTensor<
void>());
123 std::unique_ptr<Decoder<float>> recurrentToOutputWeightsDecoder = MakeDecoder<float>(
124 m_RecurrentToOutputWeightsTensor->GetTensorInfo(),
125 m_RecurrentToOutputWeightsTensor->GetConstTensor<
void>());
128 std::unique_ptr<Decoder<float>> inputToInputWeightsDecoder;
129 std::unique_ptr<Decoder<float>> recurrentToInputWeightsDecoder;
130 std::unique_ptr<Decoder<float>> inputGateBiasDecoder;
133 std::unique_ptr<Decoder<float>> cellToInputWeightsDecoder;
134 std::unique_ptr<Decoder<float>> cellToForgetWeightsDecoder;
135 std::unique_ptr<Decoder<float>> cellToOutputWeightsDecoder;
138 std::unique_ptr<Decoder<float>> projectionWeightsDecoder;
139 std::unique_ptr<Decoder<float>> projectionBiasDecoder;
142 std::unique_ptr<Decoder<float>> inputLayerNormWeightsDecoder;
143 std::unique_ptr<Decoder<float>> forgetLayerNormWeightsDecoder;
144 std::unique_ptr<Decoder<float>> cellLayerNormWeightsDecoder;
145 std::unique_ptr<Decoder<float>> outputLayerNormWeightsDecoder;
148 std::unique_ptr<Decoder<float>> forgetGateBiasDecoder;
149 std::unique_ptr<Decoder<float>> cellGateBiasDecoder;
150 std::unique_ptr<Decoder<float>> outputGateBiasDecoder;
153 const uint32_t stateTensorSize = numBatches * numUnits;
154 std::vector<int16_t> inputGateData(stateTensorSize);
155 std::vector<int16_t> cellGateData(stateTensorSize);
156 std::vector<int16_t> forgetGateData(stateTensorSize);
157 std::vector<int16_t> outputGateData(stateTensorSize);
158 std::vector<int32_t> hiddenStateData(stateTensorSize);
159 std::vector<int16_t> outputInt16Data(numBatches * outputSize);
171 m_Data.m_Parameters.m_HiddenStateScale,
172 m_Data.m_Parameters.m_HiddenStateZeroPoint);
179 std::unique_ptr<Decoder<float>> inputGateDecoder =
180 MakeDecoder<float>(inputGateInfo, inputGateData.data());
181 std::unique_ptr<Decoder<float>> cellGateDecoder =
182 MakeDecoder<float>(cellGateInfo, cellGateData.data());
183 std::unique_ptr<Decoder<float>> forgetGateDecoder =
184 MakeDecoder<float>(forgetGateInfo, forgetGateData.data());
185 std::unique_ptr<Decoder<float>> outputGateDecoder =
186 MakeDecoder<float>(outputGateInfo, outputGateData.data());
187 std::unique_ptr<Decoder<float>> hiddenStateDecoder =
188 MakeDecoder<float>(hiddenStateInfo, hiddenStateData.data());
190 std::unique_ptr<Encoder<float>> inputGateEncoder =
191 MakeEncoder<float>(inputGateInfo, inputGateData.data());
192 std::unique_ptr<Encoder<float>> cellGateEncoder =
193 MakeEncoder<float>(cellGateInfo, cellGateData.data());
194 std::unique_ptr<Encoder<float>> forgetGateEncoder =
195 MakeEncoder<float>(forgetGateInfo, forgetGateData.data());
196 std::unique_ptr<Encoder<float>> outputGateEncoder =
197 MakeEncoder<float>(outputGateInfo, outputGateData.data());
198 std::unique_ptr<Encoder<float>> hiddenStateEncoder =
199 MakeEncoder<float>(hiddenStateInfo, hiddenStateData.data());
202 std::unique_ptr<Decoder<float>> outputInt16Decoder =
203 MakeDecoder<float>(outputInt16Info, outputInt16Data.data());
204 std::unique_ptr<Encoder<float>> outputInt16Encoder =
205 MakeEncoder<float>(outputInt16Info, outputInt16Data.data());
210 inputToInputWeightsDecoder = MakeDecoder<float>(
211 m_InputToInputWeightsTensor->GetTensorInfo(), m_InputToInputWeightsTensor->GetConstTensor<
void>());
212 recurrentToInputWeightsDecoder = MakeDecoder<float>(m_RecurrentToInputWeightsTensor->GetTensorInfo(),
213 m_RecurrentToInputWeightsTensor->GetConstTensor<
void>());
220 cellToInputWeightsDecoder = MakeDecoder<float>(
221 m_CellToInputWeightsTensor->GetTensorInfo(), m_CellToInputWeightsTensor->GetConstTensor<
void>());
223 cellToForgetWeightsDecoder = MakeDecoder<float>(
224 m_CellToForgetWeightsTensor->GetTensorInfo(), m_CellToForgetWeightsTensor->GetConstTensor<
void>());
225 cellToOutputWeightsDecoder = MakeDecoder<float>(
226 m_CellToOutputWeightsTensor->GetTensorInfo(), m_CellToOutputWeightsTensor->GetConstTensor<
void>());
229 if (projectionEnabled)
231 projectionWeightsDecoder = MakeDecoder<float>(
232 m_ProjectionWeightsTensor->GetTensorInfo(), m_ProjectionWeightsTensor->GetConstTensor<
void>());
233 if (m_ProjectionBiasTensor)
235 projectionBiasDecoder = MakeDecoder<float>(
236 m_ProjectionBiasTensor->GetTensorInfo(), m_ProjectionBiasTensor->GetConstTensor<
void>());
240 if (layerNormEnabled)
244 inputLayerNormWeightsDecoder = MakeDecoder<float>(m_InputLayerNormWeightsTensor->GetTensorInfo(),
245 m_InputLayerNormWeightsTensor->GetConstTensor<
void>());
249 m_InputLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() / 1024, 0);
250 inputGateBiasDecoder = MakeDecoder<float>(
251 inputGateBiasTensorInfo, m_InputGateBiasTensor->GetConstTensor<
void>());
254 forgetLayerNormWeightsDecoder = MakeDecoder<float>(
255 m_ForgetLayerNormWeightsTensor->GetTensorInfo(),
256 m_ForgetLayerNormWeightsTensor->GetConstTensor<
void>());
257 cellLayerNormWeightsDecoder = MakeDecoder<float>(
258 m_CellLayerNormWeightsTensor->GetTensorInfo(), m_CellLayerNormWeightsTensor->GetConstTensor<
void>());
259 outputLayerNormWeightsDecoder = MakeDecoder<float>(
260 m_OutputLayerNormWeightsTensor->GetTensorInfo(),
261 m_OutputLayerNormWeightsTensor->GetConstTensor<
void>());
265 m_ForgetLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() / 1024, 0);
266 forgetGateBiasDecoder = MakeDecoder<float>(
267 forgetGateBiasTensorInfo, m_ForgetGateBiasTensor->GetConstTensor<
void>());
270 m_CellLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() / 1024, 0);
271 cellGateBiasDecoder = MakeDecoder<float>(
272 cellGateBiasTensorInfo, m_CellBiasTensor->GetConstTensor<
void>());
275 m_OutputLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() / 1024, 0);
276 outputGateBiasDecoder = MakeDecoder<float>(
277 outputGateBiasTensorInfo, m_OutputGateBiasTensor->GetConstTensor<
void>());
283 ZeroVector(*inputGateEncoder, stateTensorSize);
285 ZeroVector(*forgetGateEncoder, stateTensorSize);
286 ZeroVector(*cellGateEncoder, stateTensorSize);
287 ZeroVector(*outputGateEncoder, stateTensorSize);
288 ZeroVector(*hiddenStateEncoder, stateTensorSize);
294 numUnits, inputSize, *inputDecoder, numBatches, *inputGateEncoder);
298 numUnits, inputSize, *inputDecoder, numBatches, *forgetGateEncoder);
301 numUnits, inputSize, *inputDecoder, numBatches, *cellGateEncoder);
304 numUnits, inputSize, *inputDecoder, numBatches, *outputGateEncoder);
310 numUnits, outputSize, *outputStateInDecoder, numBatches, *inputGateEncoder);
314 numUnits, outputSize, *outputStateInDecoder, numBatches, *forgetGateEncoder);
317 numUnits, outputSize, *outputStateInDecoder, numBatches, *cellGateEncoder);
320 numUnits, outputSize, *outputStateInDecoder, numBatches, *outputGateEncoder);
328 numUnits, *cellStateInDecoder, numBatches, *inputGateEncoder);
331 if (layerNormEnabled)
334 m_InputLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() *
336 inputGateEncoder = MakeEncoder<float>(inputGateInfo, inputGateData.data());
339 *inputGateEncoder, numUnits, numBatches, m_LayerNormEpsilon);
341 inputGateDecoder = MakeDecoder<float>(inputGateInfo, inputGateData.data());
344 numUnits, *inputGateDecoder, numBatches, *inputGateEncoder);
346 inputGateInfo.SetQuantizationScale(1.f / 4096);
347 inputGateEncoder = MakeEncoder<float>(inputGateInfo, inputGateData.data());
350 numUnits, *inputGateDecoder, numBatches, *inputGateEncoder);
352 inputGateDecoder = MakeDecoder<float>(inputGateInfo, inputGateData.data());
356 inputGateEncoder = MakeEncoder<float>(inputGateInfo, inputGateData.data());
359 Activation(*inputGateDecoder, *inputGateEncoder,
360 TensorInfo({numUnits, numBatches}, internalType),
363 inputGateDecoder = MakeDecoder<float>(inputGateInfo, inputGateData.data());
370 *cellStateInDecoder, numBatches, *forgetGateEncoder);
373 if (layerNormEnabled)
377 m_ForgetLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() *
379 forgetGateEncoder = MakeEncoder<float>(forgetGateInfo, forgetGateData.data());
384 *forgetGateEncoder, numUnits, numBatches, m_LayerNormEpsilon);
387 forgetGateDecoder = MakeDecoder<float>(forgetGateInfo, forgetGateData.data());
390 numUnits, *forgetGateDecoder, numBatches, *forgetGateEncoder);
394 forgetGateInfo.SetQuantizationScale(1.f / 4096);
395 forgetGateEncoder = MakeEncoder<float>(forgetGateInfo, forgetGateData.data());
398 numUnits, *forgetGateDecoder, numBatches, *forgetGateEncoder);
401 forgetGateDecoder = MakeDecoder<float>(forgetGateInfo, forgetGateData.data());
405 forgetGateEncoder = MakeEncoder<float>(forgetGateInfo, forgetGateData.data());
408 Activation(*forgetGateDecoder, *forgetGateEncoder,
409 TensorInfo({numUnits, numBatches}, internalType),
412 forgetGateDecoder = MakeDecoder<float>(forgetGateInfo, forgetGateData.data());
415 if (layerNormEnabled)
418 m_CellLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() *
420 cellGateEncoder = MakeEncoder<float>(cellGateInfo, cellGateData.data());
424 cellGateDecoder = MakeDecoder<float>(cellGateInfo, cellGateData.data());
427 numUnits, *cellGateDecoder, numBatches, *cellGateEncoder);
429 cellGateInfo.SetQuantizationScale(1.f / 4096);
430 cellGateEncoder = MakeEncoder<float>(cellGateInfo, cellGateData.data());
433 numUnits, *cellGateDecoder, numBatches, *cellGateEncoder);
435 cellGateDecoder = MakeDecoder<float>(cellGateInfo, cellGateData.data());
439 cellGateEncoder = MakeEncoder<float>(cellGateInfo, cellGateData.data());
442 Activation(*cellGateDecoder, *cellGateEncoder,
443 TensorInfo({numUnits, numBatches}, internalType),
446 cellGateDecoder = MakeDecoder<float>(cellGateInfo, cellGateData.data());
452 Sub1Vector(*forgetGateDecoder, stateTensorSize, *forgetGateEncoder);
454 *cellGateDecoder, *forgetGateDecoder, stateTensorSize, *cellStateOutEncoder);
459 *cellGateDecoder, *inputGateDecoder, stateTensorSize, *cellStateOutEncoder);
463 if (
m_Data.m_Parameters.m_CellClip > 0.0)
465 ClipVector(*cellStateOutDecoder, stateTensorSize,
m_Data.m_Parameters.m_CellClip, *cellStateOutEncoder);
472 numUnits, *cellStateOutDecoder, numBatches, *outputGateEncoder);
475 if (layerNormEnabled)
478 m_OutputLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() *
480 outputGateEncoder = MakeEncoder<float>(outputGateInfo, outputGateData.data());
484 outputGateDecoder = MakeDecoder<float>(outputGateInfo, outputGateData.data());
487 numBatches, *outputGateEncoder);
489 outputGateInfo.SetQuantizationScale(1.f / 4096);
490 outputGateEncoder = MakeEncoder<float>(outputGateInfo, outputGateData.data());
492 VectorBatchVectorAdd(*outputGateBiasDecoder, numUnits, *outputGateDecoder, numBatches, *outputGateEncoder);
494 outputGateDecoder = MakeDecoder<float>(outputGateInfo, outputGateData.data());
498 outputGateEncoder = MakeEncoder<float>(outputGateInfo, outputGateData.data());
501 Activation(*outputGateDecoder, *outputGateEncoder,
502 TensorInfo({numUnits, numBatches}, internalType),
505 outputGateDecoder = MakeDecoder<float>(outputGateInfo, outputGateData.data());
508 Activation(*cellStateOutDecoder, *cellGateEncoder,
509 TensorInfo({numUnits, numBatches}, internalType),
516 if (
m_Data.m_Parameters.m_ProjectionEnabled)
518 if (m_ProjectionBiasTensor)
524 numBatches, *outputInt16Encoder);
526 CopyVector(*outputInt16Decoder, numBatches * outputSize, *outputEncoder);
528 if (
m_Data.m_Parameters.m_ProjectionClip > 0.0)
530 ClipVector(*outputDecoder, numBatches * outputSize,
m_Data.m_Parameters.m_ProjectionClip, *outputEncoder);
536 CopyVector(*hiddenStateDecoder, numBatches * outputSize, *outputEncoder);
540 CopyVector(*outputDecoder, numBatches * outputSize, *outputStateOutEncoder);
void CopyVector(armnn::Decoder< float > &vector, uint32_t vSize, armnn::Encoder< float > &outResult)
void MeanStddevNormalization(armnn::Decoder< float > &input_vector, armnn::Encoder< float > &output_vector, uint32_t v_size, uint32_t n_batch, float normalization_epsilon)
void ClipVector(armnn::Decoder< float > &vector, uint32_t vSize, float absLimit, armnn::Encoder< float > &outResult)
void VectorBatchVectorCwiseProduct(armnn::Decoder< float > &vector, uint32_t vSize, armnn::Decoder< float > &batchVector, uint32_t nBatch, armnn::Encoder< float > &outResult)
void VectorVectorCwiseProductAccumulate(armnn::Decoder< float > &vector1, armnn::Decoder< float > &vector2, uint32_t vSize, armnn::Encoder< float > &outResult)
void VectorBatchVectorAdd(armnn::Decoder< float > &vector, uint32_t vSize, armnn::Decoder< float > &batchVector, uint32_t nBatch, armnn::Encoder< float > &outResult)
void ZeroVector(armnn::Encoder< float > &vector, uint32_t vSize)
void VectorVectorCwiseProduct(armnn::Decoder< float > &vector1, armnn::Decoder< float > &vector2, uint32_t vSize, armnn::Encoder< float > &outResult)
void VectorBatchVectorCwiseProductAccumulate(armnn::Decoder< float > &vector, uint32_t vSize, armnn::Decoder< float > &batchVector, uint32_t nBatch, armnn::Encoder< float > &outResult)
std::unique_ptr< armnn::ScopedTensorHandle > AssignScopedTensorHandle(const armnn::ConstTensorHandle *ptr)
void VectorBatchVectorAssign(armnn::Decoder< float > &vector, uint32_t vSize, uint32_t nBatch, armnn::Encoder< float > &outBatchVector)
void MatrixBatchVectorMultiplyAccumulate(armnn::Decoder< float > &matrix, uint32_t mRows, uint32_t mCols, armnn::Decoder< float > &vector, uint32_t nBatch, armnn::Encoder< float > &outResult)
void Sub1Vector(armnn::Decoder< float > &vector, uint32_t vSize, armnn::Encoder< float > &result)
#define ARMNN_SCOPED_PROFILING_EVENT_REF_NAME_GUID(label)
Creates a profiling event that uses GetGuid() and GetName() from the calling class.
RefQLstmWorkload(const QLstmQueueDescriptor &descriptor, const WorkloadInfo &info)
void Execute() const override
float GetQuantizationScale() const
int32_t GetQuantizationOffset() const
const TensorShape & GetShape() const
Copyright (c) 2021 ARM Limited and Contributors.
float Activation(float in, ActivationFunction function, float a, float b)
const TensorInfo & GetTensorInfo(const ITensorHandle *tensorHandle)
float32 helpers
std::vector< ITensorHandle * > m_Inputs
std::vector< ITensorHandle * > m_Outputs
Contains information about TensorInfos of a layer.