21 #include <doctest/doctest.h>
25 using namespace armnn;
33 template<
typename Workload>
34 std::unique_ptr<Workload> MakeAndCheckWorkload(
Layer& layer,
38 std::unique_ptr<IWorkload> workload = layer.
CreateWorkload(factory);
39 CHECK_MESSAGE(workload.get() == PolymorphicDowncast<Workload*>(workload.get()),
40 "Cannot convert to derived class");
41 std::string reasonIfUnsupported;
44 return std::unique_ptr<Workload>(
static_cast<Workload*
>(workload.release()));
65 template <
typename ActivationWorkload, armnn::DataType DataType>
73 layerDesc.
m_B = -10.0f;
84 Connect(input, layer, tensorInfo);
85 Connect(layer, output, tensorInfo);
87 CreateTensorHandles(graph, factory);
90 auto workload = MakeAndCheckWorkload<ActivationWorkload>(*layer, factory);
93 CHECK(queueDescriptor.
m_Inputs.size() == 1);
94 CHECK(queueDescriptor.
m_Outputs.size() == 1);
103 template <
typename WorkloadType,
104 typename DescriptorType,
120 Connect(input1, layer, tensorInfo, 0, 0);
121 Connect(input2, layer, tensorInfo, 0, 1);
122 Connect(layer, output, tensorInfo);
123 CreateTensorHandles(graph, factory);
126 auto workload = MakeAndCheckWorkload<WorkloadType>(*layer, factory);
128 auto queueDescriptor = workload->GetData();
129 CHECK(queueDescriptor.
m_Inputs.size() == 2);
130 CHECK(queueDescriptor.
m_Outputs.size() == 1);
136 template <
typename WorkloadType, armnn::DataType DataType>
154 Connect(input1, layer, tensorInfo, 0, 0);
155 Connect(input2, layer, tensorInfo, 0, 1);
156 Connect(layer, output, tensorInfo);
157 CreateTensorHandles(graph, factory);
160 auto workload = MakeAndCheckWorkload<WorkloadType>(*layer, factory);
162 auto queueDescriptor = workload->GetData();
163 CHECK(queueDescriptor.
m_Inputs.size() == 2);
164 CHECK(queueDescriptor.
m_Outputs.size() == 1);
170 template<
typename WorkloadType,
171 typename DescriptorType,
179 auto activationDesc = std::make_shared<ActivationDescriptor>();
180 activationDesc->m_A = 10.0f;
181 activationDesc->m_B = 5.0f;
193 Connect(input1, layer, tensorInfo, 0, 0);
194 Connect(input2, layer, tensorInfo, 0, 1);
195 Connect(layer, output, tensorInfo);
196 CreateTensorHandles(graph, factory);
199 std::shared_ptr<ActivationDescriptor>
202 ARMNN_ASSERT(
static_cast<float>(activationDescPtr->m_A) == 10.0f);
203 ARMNN_ASSERT(
static_cast<float>(activationDescPtr->m_B) == 5.0f);
209 auto workload = MakeAndCheckWorkload<WorkloadType>(*layer, factory);
211 DescriptorType queueDescriptor = workload->GetData();
214 queueDescriptor.template GetAdditionalInformation<ActivationDescriptor>();
222 CHECK(queueDescriptor.m_Inputs.size() == 2);
223 CHECK(queueDescriptor.m_Outputs.size() == 1);
229 template<
typename WorkloadType,
230 typename DescriptorType,
238 auto activationDesc = std::make_shared<ActivationDescriptor>();
239 activationDesc->m_A = 10.0f;
240 activationDesc->m_B = 5.0f;
252 Connect(input1, layer, tensorInfo, 0, 0);
253 Connect(input2, layer, tensorInfo, 0, 1);
254 Connect(layer, output, tensorInfo);
255 CreateTensorHandles(graph, factory);
258 std::shared_ptr<ActivationDescriptor>
261 ARMNN_ASSERT(
static_cast<float>(activationDescPtr->m_A) == 10.0f);
262 ARMNN_ASSERT(
static_cast<float>(activationDescPtr->m_B) == 5.0f);
268 auto workload = MakeAndCheckWorkload<WorkloadType>(*layer, factory);
270 DescriptorType queueDescriptor = workload->GetData();
271 CHECK(queueDescriptor.m_Inputs.size() == 2);
272 CHECK(queueDescriptor.m_Outputs.size() == 1);
274 queueDescriptor.template GetAdditionalInformation<ActivationDescriptor>();
285 template<
typename WorkloadType,
286 typename DescriptorType,
294 auto activationDesc = std::make_shared<ActivationDescriptor>();
295 activationDesc->m_A = 10.0f;
296 activationDesc->m_B = 5.0f;
308 Connect(input1, layer, tensorInfo, 0, 0);
309 Connect(input2, layer, tensorInfo, 0, 1);
310 Connect(layer, output, tensorInfo);
311 CreateTensorHandles(graph, factory);
314 std::shared_ptr<ActivationDescriptor>
315 activationDescPtr = layer->template GetAdditionalInformation<ActivationDescriptor>();
317 ARMNN_ASSERT(
static_cast<float>(activationDescPtr->m_A) == 10.0f);
318 ARMNN_ASSERT(
static_cast<float>(activationDescPtr->m_B) == 5.0f);
324 auto workload = MakeAndCheckWorkload<WorkloadType>(*layer, factory);
326 DescriptorType queueDescriptor = workload->GetData();
328 queueDescriptor.template GetAdditionalInformation<ActivationDescriptor>();
330 CHECK(queueDescriptor.m_Inputs.size() == 2);
331 CHECK(queueDescriptor.m_Outputs.size() == 1);
341 template <
typename WorkloadType,
342 typename DescriptorType,
355 Connect(input, layer, tensorInfo, 0, 0);
356 Connect(layer, output, tensorInfo, 0, 0);
357 CreateTensorHandles(graph, factory);
359 auto workload = MakeAndCheckWorkload<WorkloadType>(*layer, factory);
360 DescriptorType queueDescriptor = workload->GetData();
362 CHECK(queueDescriptor.m_Inputs.size() == 1);
363 CHECK(queueDescriptor.m_Outputs.size() == 1);
368 template <
typename BatchNormalizationWorkloadType, armnn::DataType DataType>
369 std::unique_ptr<BatchNormalizationWorkloadType> CreateBatchNormalizationWorkloadTest(
376 tensorShape = { 2, 4, 4, 3 };
380 tensorShape = { 2, 3, 4, 4 };
385 layerDesc.
m_Eps = 0.05f;
391 layer->
m_Mean = std::make_unique<ScopedTensorHandle>(weightInfo);
392 layer->
m_Variance = std::make_unique<ScopedTensorHandle>(weightInfo);
393 layer->
m_Beta = std::make_unique<ScopedTensorHandle>(weightInfo);
394 layer->
m_Gamma = std::make_unique<ScopedTensorHandle>(weightInfo);
395 layer->
m_Mean->Allocate();
397 layer->
m_Beta->Allocate();
406 Connect(input, layer, tensorInfo);
407 Connect(layer, output, tensorInfo);
408 CreateTensorHandles(graph, factory);
411 auto workload = MakeAndCheckWorkload<BatchNormalizationWorkloadType>(*layer, factory);
414 CHECK(queueDescriptor.
m_Inputs.size() == 1);
415 CHECK(queueDescriptor.
m_Outputs.size() == 1);
426 template <
typename BatchNormalizationWorkloadType, armnn::DataType DataType>
427 std::unique_ptr<BatchNormalizationWorkloadType> CreateBatchNormalizationWithBlobWorkloadTest(
434 tensorShape = { 2, 4, 4, 3 };
438 tensorShape = { 2, 3, 4, 4 };
443 layerDesc.
m_Eps = 0.05f;
449 layer->
m_Mean = std::make_unique<ScopedTensorHandle>(weightInfo);
450 layer->
m_Variance = std::make_unique<ScopedTensorHandle>(weightInfo);
451 layer->
m_Beta = std::make_unique<ScopedTensorHandle>(weightInfo);
452 layer->
m_Gamma = std::make_unique<ScopedTensorHandle>(weightInfo);
453 layer->
m_Mean->Allocate();
455 layer->
m_Beta->Allocate();
458 auto activationDesc = std::make_shared<ActivationDescriptor>();
459 activationDesc->m_A = 10.0f;
460 activationDesc->m_B = 5.0f;
467 ARMNN_ASSERT(
static_cast<float>(activationDescPtr->m_A) == 10.0f);
468 ARMNN_ASSERT(
static_cast<float>(activationDescPtr->m_B) == 5.0f);
479 Connect(input, layer, tensorInfo);
480 Connect(layer, output, tensorInfo);
481 CreateTensorHandles(graph, factory);
484 auto workload = MakeAndCheckWorkload<BatchNormalizationWorkloadType>(*layer, factory);
495 CHECK(queueDescriptor.
m_Inputs.size() == 1);
496 CHECK(queueDescriptor.
m_Outputs.size() == 1);
507 template <
typename Convolution2dWorkload, armnn::DataType DataType>
524 float inputsQScale = 1.0f;
534 weightsTensorInfo.SetConstant();
541 weights->m_LayerOutput = std::make_unique<ScopedTensorHandle>(weightsTensorInfo);
542 weights->m_LayerOutput->Allocate();
546 Connect(weights, layer, weightsTensorInfo, 0, 1);
548 CreateTensorHandles(graph, factory);
551 auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, factory, modelOptions);
563 CHECK(queueDescriptor.
m_Inputs.size() == 2);
564 CHECK(queueDescriptor.
m_Outputs.size() == 1);
570 template<
typename Convolution2dWorkload, armnn::DataType DataType>
571 std::unique_ptr<Convolution2dWorkload> CreateConvolution2dFusedActivationWithBlobWorkloadTest(
588 float inputsQScale = 1.0f;
598 weightsTensorInfo.SetConstant();
602 auto activationDesc = std::make_shared<ActivationDescriptor>();
603 activationDesc->m_A = 10.0f;
604 activationDesc->m_B = 5.0f;
612 ARMNN_ASSERT(
static_cast<float>(activationDescPtr->m_A) == 10.0f);
613 ARMNN_ASSERT(
static_cast<float>(activationDescPtr->m_B) == 5.0f);
624 weights->m_LayerOutput = std::make_unique<ScopedTensorHandle>(weightsTensorInfo);
625 weights->m_LayerOutput->Allocate();
626 bias->
m_LayerOutput = std::make_unique<ScopedTensorHandle>(biasTensorInfo);
631 Connect(weights, layer, weightsTensorInfo, 0, 1);
632 Connect(bias, layer, biasTensorInfo, 0, 2);
634 CreateTensorHandles(graph, factory);
637 auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, factory, modelOptions);
657 CHECK(queueDescriptor.
m_Outputs.size() == 1);
658 CHECK(queueDescriptor.
m_Inputs.size() == 3);
664 template <
typename Convolution2dWorkload, armnn::DataType DataType>
665 std::unique_ptr<Convolution2dWorkload> CreateConvolution2dWorkloadFastMathTest(
armnn::IWorkloadFactory& factory,
681 float inputsQScale = 1.0f;
692 weightsTensorInfo.SetConstant();
694 biasTensorInfo.SetConstant();
704 Connect(weights, layer, weightsTensorInfo, 0, 1);
705 Connect(bias, layer, biasTensorInfo, 0, 2);
707 CreateTensorHandles(graph, factory);
710 auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, factory, modelOptions);
721 CHECK(queueDescriptor.
m_Inputs.size() == 3);
722 CHECK(queueDescriptor.
m_Outputs.size() == 1);
728 template <
typename LstmWorkload>
741 unsigned int batchSize = 2;
742 unsigned int inputSize = 2;
743 unsigned int numUnits = 4;
744 unsigned int outputSize = 4;
776 if (layerDesc.m_PeepholeEnabled)
799 armnn::TensorInfo lstmTensorInfoScratchBuff({ batchSize, numUnits * (layerDesc.m_CifgEnabled ? 3 : 4) },
801 Connect(input, layer, lstmTensorInfo1, 0, 0);
802 Connect(cellStateIn, layer, lstmTensorInfo2, 0, 1);
803 Connect(outputStateIn, layer, lstmTensorInfo3, 0, 2);
804 Connect(layer, scratchBuffer, lstmTensorInfoScratchBuff, 0, 0);
805 Connect(layer, outputStateOut, lstmTensorInfo3, 1, 0);
806 Connect(layer, cellStateOut, lstmTensorInfo2, 2, 0);
807 Connect(layer, output, lstmTensorInfo3, 3, 0);
809 CreateTensorHandles(graph, factory);
812 auto workload = MakeAndCheckWorkload<LstmWorkload>(*layer, factory);
817 CHECK(queueDescriptor.
m_Inputs.size() == 3);
818 CHECK(queueDescriptor.
m_Outputs.size() == 4);
828 template <
typename QuantizedLstmWorkload>
833 unsigned int numBatches = 2;
834 unsigned int inputSize = 2;
835 unsigned int outputSize = 4;
838 float inputOutputScale = 0.0078125f;
839 int32_t inputOutputOffset = 128;
841 float cellStateScale = 0.00048828125f;
842 int32_t cellStateOffset = 0;
844 float weightsScale = 0.00408021f;
845 int32_t weightsOffset = 100;
847 float biasScale = 3.1876640625e-05f;
848 int32_t biasOffset = 0;
867 layer->m_QuantizedLstmParameters.m_InputToInputWeights =
868 std::make_unique<ScopedTensorHandle>(inputWeightsInfo);
869 layer->m_QuantizedLstmParameters.m_InputToForgetWeights =
870 std::make_unique<ScopedTensorHandle>(inputWeightsInfo);
871 layer->m_QuantizedLstmParameters.m_InputToCellWeights =
872 std::make_unique<ScopedTensorHandle>(inputWeightsInfo);
873 layer->m_QuantizedLstmParameters.m_InputToOutputWeights =
874 std::make_unique<ScopedTensorHandle>(inputWeightsInfo);
876 layer->m_QuantizedLstmParameters.m_RecurrentToInputWeights =
877 std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
878 layer->m_QuantizedLstmParameters.m_RecurrentToForgetWeights =
879 std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
880 layer->m_QuantizedLstmParameters.m_RecurrentToCellWeights =
881 std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
882 layer->m_QuantizedLstmParameters.m_RecurrentToOutputWeights =
883 std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
885 layer->m_QuantizedLstmParameters.m_InputGateBias = std::make_unique<ScopedTensorHandle>(biasInfo);
886 layer->m_QuantizedLstmParameters.m_ForgetGateBias = std::make_unique<ScopedTensorHandle>(biasInfo);
887 layer->m_QuantizedLstmParameters.m_CellBias = std::make_unique<ScopedTensorHandle>(biasInfo);
888 layer->m_QuantizedLstmParameters.m_OutputGateBias = std::make_unique<ScopedTensorHandle>(biasInfo);
891 layer->m_QuantizedLstmParameters.m_InputToInputWeights->Allocate();
892 layer->m_QuantizedLstmParameters.m_InputToForgetWeights->Allocate();
893 layer->m_QuantizedLstmParameters.m_InputToCellWeights->Allocate();
894 layer->m_QuantizedLstmParameters.m_InputToOutputWeights->Allocate();
896 layer->m_QuantizedLstmParameters.m_RecurrentToInputWeights->Allocate();
897 layer->m_QuantizedLstmParameters.m_RecurrentToForgetWeights->Allocate();
898 layer->m_QuantizedLstmParameters.m_RecurrentToCellWeights->Allocate();
899 layer->m_QuantizedLstmParameters.m_RecurrentToOutputWeights->Allocate();
901 layer->m_QuantizedLstmParameters.m_InputGateBias->Allocate();
902 layer->m_QuantizedLstmParameters.m_ForgetGateBias->Allocate();
903 layer->m_QuantizedLstmParameters.m_CellBias->Allocate();
904 layer->m_QuantizedLstmParameters.m_OutputGateBias->Allocate();
931 Connect(input, layer, inputInfo, 0, 0);
932 Connect(cellStateIn, layer, cellStateInfo, 0, 1);
933 Connect(outputStateIn, layer, outputStateInfo, 0, 2);
935 Connect(layer, cellStateOut, cellStateInfo, 0, 0);
936 Connect(layer, outputStateOut, outputStateInfo, 1, 0);
938 CreateTensorHandles(graph, factory);
941 auto workload = MakeAndCheckWorkload<QuantizedLstmWorkload>(*layer, factory);
945 CHECK(queueDescriptor.
m_Inputs.size() == 3);
946 CHECK(queueDescriptor.
m_Outputs.size() == 2);
967 template <
typename QLstmWorkload>
990 unsigned int numBatches = 2;
991 unsigned int inputSize = 4;
992 unsigned int numUnits = 4;
993 unsigned int outputSize = 4;
996 float inputScale = 0.0078125f;
997 int32_t inputOffset = 0;
1000 float outputScale = layerDesc.m_HiddenStateScale;
1001 int32_t outputOffset = layerDesc.m_HiddenStateZeroPoint;
1003 float cellStateScale = 3.05176e-05f;
1004 int32_t cellStateOffset = 0;
1006 float weightsScale = 0.00784314f;
1007 int32_t weightsOffset = 0;
1009 float layerNormScale = 3.05182e-05f;
1010 int32_t layerNormOffset = 0;
1012 float biasScale = layerNormScale / 1024;
1013 int32_t biasOffset = 0;
1036 std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
1038 std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
1040 std::make_unique<ScopedTensorHandle>(recurrentWeightsInfo);
1047 std::make_unique<ScopedTensorHandle>(layerNormWeightsInfo);
1049 std::make_unique<ScopedTensorHandle>(layerNormWeightsInfo);
1051 std::make_unique<ScopedTensorHandle>(layerNormWeightsInfo);
1095 Connect(input, layer, inputInfo, 0, 0);
1096 Connect(outputStateIn, layer, outputStateInfo, 0, 1);
1097 Connect(cellStateIn, layer, cellStateInfo, 0, 2);
1099 Connect(layer, outputStateOut, outputStateInfo, 0, 0);
1100 Connect(layer, cellStateOut, cellStateInfo, 1, 0);
1101 Connect(layer, output, outputStateInfo, 2, 0);
1103 CreateTensorHandles(graph, factory);
1106 auto workload = MakeAndCheckWorkload<QLstmWorkload>(*layer, factory);
1110 CHECK(queueDescriptor.
m_Inputs.size() == 3);
1111 CHECK(queueDescriptor.
m_Outputs.size() == 3);
1128 template<
typename Convolution2dWorkload, armnn::DataType DataType>
1129 std::unique_ptr<Convolution2dWorkload> CreateDirectConvolution2dWorkloadTest(
armnn::IWorkloadFactory& factory,
1144 float inputsQScale = 1.0f;
1150 weightsTensorInfo.SetConstant();
1152 biasTensorInfo.SetConstant();
1160 weights->m_LayerOutput = std::make_unique<ScopedTensorHandle>(weightsTensorInfo);
1161 weights->m_LayerOutput->Allocate();
1162 bias->
m_LayerOutput = std::make_unique<ScopedTensorHandle>(biasTensorInfo);
1167 Connect(weights, layer, weightsTensorInfo, 0, 1);
1168 Connect(bias, layer, biasTensorInfo, 0, 2);
1170 CreateTensorHandles(graph, factory);
1173 auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, factory);
1184 CHECK(queueDescriptor.
m_Inputs.size() == 3);
1185 CHECK(queueDescriptor.
m_Outputs.size() == 1);
1191 template <
typename DepthwiseConvolution2dFloat32Workload, armnn::DataType DataType>
1192 std::unique_ptr<DepthwiseConvolution2dFloat32Workload> CreateDepthwiseConvolution2dWorkloadTest(
1206 float inputsQScale = 1.0f;
1227 CreateTensorHandles(graph, factory);
1230 auto workload = MakeAndCheckWorkload<DepthwiseConvolution2dFloat32Workload>(*layer, factory);
1242 CHECK(queueDescriptor.
m_Inputs.size() == 2);
1243 CHECK(queueDescriptor.
m_Outputs.size() == 1);
1249 template <
typename FullyConnectedWorkload, armnn::DataType DataType>
1260 float inputsQScale = 1.0f;
1271 weights->m_LayerOutput = std::make_unique<ScopedTensorHandle>(weightsTensorInfo);
1272 weights->m_LayerOutput->Allocate();
1276 Connect(weights, layer, weightsTensorInfo, 0, 1);
1278 CreateTensorHandles(graph, factory);
1281 auto workload = MakeAndCheckWorkload<FullyConnectedWorkload>(*layer, factory);
1286 CHECK(queueDescriptor.
m_Inputs.size() == 2);
1287 CHECK(queueDescriptor.
m_Outputs.size() == 1);
1293 template <
typename FullyConnectedWorkload, armnn::DataType DataType>
1294 std::unique_ptr<FullyConnectedWorkload> CreateFullyConnectedWithBlobWorkloadTest
1305 float inputsQScale = 1.0f;
1311 biasesTensorInfo.SetConstant();
1313 auto activationDesc = std::make_shared<ActivationDescriptor>();
1314 activationDesc->m_A = 10.0f;
1315 activationDesc->m_B = 5.0f;
1322 ARMNN_ASSERT(
static_cast<float>(activationDescPtr->m_A) == 10.0f);
1323 ARMNN_ASSERT(
static_cast<float>(activationDescPtr->m_B) == 5.0f);
1333 weights->m_LayerOutput = std::make_unique<ScopedTensorHandle>(weightsTensorInfo);
1334 weights->m_LayerOutput->Allocate();
1335 biases->
m_LayerOutput = std::make_unique<ScopedTensorHandle>(biasesTensorInfo);
1340 Connect(weights, layer, weightsTensorInfo, 0, 1);
1341 Connect(biases, layer, biasesTensorInfo, 0, 2);
1343 CreateTensorHandles(graph, factory);
1346 auto workload = MakeAndCheckWorkload<FullyConnectedWorkload>(*layer, factory);
1361 CHECK(queueDescriptor.
m_Inputs.size() == 3);
1362 CHECK(queueDescriptor.
m_Outputs.size() == 1);
1368 template <
typename FullyConnectedWorkload, armnn::DataType DataType>
1369 std::unique_ptr<FullyConnectedWorkload> CreateFullyConnectedWorkloadWeightsBiasesAsInputsTest
1381 float inputsQScale = 1.0f;
1395 CreateTensorHandles(graph, factory);
1398 auto workload = MakeAndCheckWorkload<FullyConnectedWorkload>(*layer, factory);
1405 CHECK(queueDescriptor.
m_Inputs.size() == 3);
1406 CHECK(queueDescriptor.
m_Outputs.size() == 1);
1413 template <
typename NormalizationWorkload, armnn::DataType DataType>
1424 layerDesc.
m_Beta = -1.0f;
1425 layerDesc.
m_K = 0.2f;
1442 Connect(input, layer, inputTensorInfo);
1443 Connect(layer, output, outputTensorInfo);
1444 CreateTensorHandles(graph, factory);
1447 auto workload = MakeAndCheckWorkload<NormalizationWorkload>(*layer, factory);
1458 CHECK(queueDescriptor.
m_Inputs.size() == 1);
1459 CHECK(queueDescriptor.
m_Outputs.size() == 1);
1465 template <
typename Pooling2dWorkload, armnn::DataType DataType>
1496 CreateTensorHandles(graph, factory);
1499 auto workload = MakeAndCheckWorkload<Pooling2dWorkload>(*layer, factory);
1514 CHECK(queueDescriptor.
m_Inputs.size() == 1);
1515 CHECK(queueDescriptor.
m_Outputs.size() == 1);
1521 template <
typename SoftmaxWorkload, armnn::DataType DataType>
1530 softmaxDescriptor.
m_Axis = -1;
1543 tensorInfo.SetQuantizationScale(1.f / 256);
1547 tensorInfo.SetQuantizationOffset(-128);
1548 tensorInfo.SetQuantizationScale(1.f / 256);
1551 Connect(input, layer, tensorInfo);
1552 Connect(layer, output, tensorInfo);
1553 CreateTensorHandles(graph, factory);
1556 auto workload = MakeAndCheckWorkload<SoftmaxWorkload>(*layer, factory);
1559 CHECK(queueDescriptor.
m_Inputs.size() == 1);
1560 CHECK(queueDescriptor.
m_Outputs.size() == 1);
1566 template<
typename SplitterWorkload, armnn::DataType DataType>
1567 std::unique_ptr<SplitterWorkload>
1578 layerDesc.SetViewOriginCoord(0, 0, 0);
1579 layerDesc.SetViewOriginCoord(1, 0, 1);
1580 layerDesc.SetViewOriginCoord(2, 0, 3);
1592 Connect(input, layer, tensorInfo);
1598 Connect(layer, output0, output0Info, 0, 0);
1599 Connect(layer, output1, output1Info, 1, 0);
1600 Connect(layer, output2, output2Info, 2, 0);
1602 CreateTensorHandles(graph, factory);
1605 auto workload = MakeAndCheckWorkload<SplitterWorkload>(*layer, factory);
1608 CHECK(queueDescriptor.
m_Inputs.size() == 1);
1609 CHECK(queueDescriptor.
m_Outputs.size() == 3);
1627 template<
typename SplitterWorkload,
typename ConcatWorkload, armnn::DataType DataType>
1628 std::pair<std::unique_ptr<SplitterWorkload>, std::unique_ptr<ConcatWorkload>>
1640 splitterViews.SetViewOriginCoord(0, 0, 0);
1641 splitterViews.SetViewOriginCoord(0, 1, 0);
1642 splitterViews.SetViewOriginCoord(0, 2, 0);
1643 splitterViews.SetViewOriginCoord(0, 3, 0);
1645 splitterViews.SetViewOriginCoord(1, 0, 0);
1646 splitterViews.SetViewOriginCoord(1, 1, 1);
1647 splitterViews.SetViewOriginCoord(1, 2, 0);
1648 splitterViews.SetViewOriginCoord(1, 3, 0);
1655 concatViews.SetViewOriginCoord(0, 0, 0);
1656 concatViews.SetViewOriginCoord(0, 1, 1);
1657 concatViews.SetViewOriginCoord(0, 2, 0);
1658 concatViews.SetViewOriginCoord(0, 3, 0);
1660 concatViews.SetViewOriginCoord(1, 0, 0);
1661 concatViews.SetViewOriginCoord(1, 1, 0);
1662 concatViews.SetViewOriginCoord(1, 2, 0);
1663 concatViews.SetViewOriginCoord(1, 3, 0);
1673 Connect(input, splitter, inputTensorInfo, 0, 0);
1675 Connect(splitter, concat, splitTensorInfo1, 0, 1);
1677 Connect(splitter, concat, splitTensorInfo2, 1, 0);
1679 Connect(concat, output, inputTensorInfo, 0, 0);
1682 CreateTensorHandles(graph, factory);
1685 auto workloadSplitter = MakeAndCheckWorkload<SplitterWorkload>(*splitter, factory);
1686 CHECK(workloadSplitter);
1688 auto workloadConcat = MakeAndCheckWorkload<ConcatWorkload>(*concat, factory);
1689 CHECK(workloadConcat);
1691 return {std::move(workloadSplitter), std::move(workloadConcat)};
1697 template<
typename SplitterWorkload,
typename ActivationWorkload, armnn::DataType DataType>
1699 std::unique_ptr<SplitterWorkload>& wlSplitter,
1700 std::unique_ptr<ActivationWorkload>& wlActiv0_0,
1701 std::unique_ptr<ActivationWorkload>& wlActiv0_1,
1702 std::unique_ptr<ActivationWorkload>& wlActiv1_0,
1703 std::unique_ptr<ActivationWorkload>& wlActiv1_1)
1714 splitterViews.SetViewOriginCoord(0, 0, 0);
1715 splitterViews.SetViewOriginCoord(0, 1, 0);
1716 splitterViews.SetViewOriginCoord(0, 2, 0);
1717 splitterViews.SetViewOriginCoord(0, 3, 0);
1719 splitterViews.SetViewOriginCoord(1, 0, 0);
1720 splitterViews.SetViewOriginCoord(1, 1, 1);
1721 splitterViews.SetViewOriginCoord(1, 2, 0);
1722 splitterViews.SetViewOriginCoord(1, 3, 0);
1739 Connect(input, splitter, inputTensorInfo, 0, 0);
1740 Connect(splitter, activ0_0, splitTensorInfo1, 0, 0);
1741 Connect(splitter, activ0_1, splitTensorInfo1, 0, 0);
1743 Connect(splitter, activ1_0, splitTensorInfo2, 1, 0);
1744 Connect(splitter, activ1_1, splitTensorInfo2, 1, 0);
1746 Connect(activ0_0, output1, splitTensorInfo1, 0, 0);
1747 Connect(activ0_1, output2, splitTensorInfo1, 0, 0);
1748 Connect(activ1_0, output3, splitTensorInfo2, 0, 0);
1749 Connect(activ1_1, output4, splitTensorInfo2, 0, 0);
1751 CreateTensorHandles(graph, factory);
1753 auto workloadSplitter = MakeAndCheckWorkload<SplitterWorkload>(*splitter, factory);
1754 auto workloadActiv0_0 = MakeAndCheckWorkload<ActivationWorkload>(*activ0_0, factory);
1755 auto workloadActiv0_1 = MakeAndCheckWorkload<ActivationWorkload>(*activ0_1, factory);
1756 auto workloadActiv1_0 = MakeAndCheckWorkload<ActivationWorkload>(*activ1_0, factory);
1757 auto workloadActiv1_1 = MakeAndCheckWorkload<ActivationWorkload>(*activ1_1, factory);
1759 wlSplitter = std::move(workloadSplitter);
1760 wlActiv0_0 = std::move(workloadActiv0_0);
1761 wlActiv0_1 = std::move(workloadActiv0_1);
1762 wlActiv1_0 = std::move(workloadActiv1_0);
1763 wlActiv1_1 = std::move(workloadActiv1_1);
1766 template <
typename ResizeWorkload, armnn::DataType DataType>
1774 switch (dataLayout) {
1776 inputShape = { 2, 4, 4, 3 };
1777 outputShape = { 2, 2, 2, 3 };
1781 inputShape = { 2, 3, 4, 4 };
1782 outputShape = { 2, 3, 2, 2 };
1801 Connect(input, layer, inputTensorInfo);
1802 Connect(layer, output, outputTensorInfo);
1803 CreateTensorHandles(graph, factory);
1806 auto workload = MakeAndCheckWorkload<ResizeWorkload>(*layer, factory);
1808 auto queueDescriptor = workload->GetData();
1809 CHECK(queueDescriptor.
m_Inputs.size() == 1);
1810 CHECK(queueDescriptor.
m_Outputs.size() == 1);
1811 CHECK(queueDescriptor.
m_Parameters.m_DataLayout == dataLayout);
1817 template <
typename BatchToSpaceNdWorkload, armnn::DataType DataType>
1831 Connect(input, layer, tensorInfo);
1832 Connect(layer, output, tensorInfo);
1834 CreateTensorHandles(graph, factory);
1837 auto workload = MakeAndCheckWorkload<BatchToSpaceNdWorkload>(*layer, factory);
1840 CHECK(queueDescriptor.
m_Inputs.size() == 1);
1841 CHECK(queueDescriptor.
m_Outputs.size() == 1);
1846 template <
typename LogSoftmaxWorkload, armnn::DataType DataType>
1855 logSoftmaxDescriptor.
m_Axis = -1;
1866 Connect(input, layer, tensorInfo);
1867 Connect(layer, output, tensorInfo);
1868 CreateTensorHandles(graph, factory);
1871 auto workload = MakeAndCheckWorkload<LogSoftmaxWorkload>(*layer, factory);
1874 CHECK(queueDescriptor.
m_Inputs.size() == 1);
1875 CHECK(queueDescriptor.
m_Outputs.size() == 1);
1881 template <
typename L2NormalizationWorkload, armnn::DataType DataType>
1903 Connect(input, layer, inputTensorInfo);
1904 Connect(layer, output, outputTensorInfo);
1905 CreateTensorHandles(graph, factory);
1908 auto workload = MakeAndCheckWorkload<L2NormalizationWorkload>(*layer, factory);
1912 CHECK(queueDescriptor.
m_Inputs.size() == 1);
1913 CHECK(queueDescriptor.
m_Outputs.size() == 1);
1919 template <
typename ReshapeWorkload, armnn::DataType DataType>
1936 Connect(input, layer, inputTensorInfo);
1937 Connect(layer, output, outputTensorInfo);
1938 CreateTensorHandles(graph, factory);
1941 auto workload = MakeAndCheckWorkload<ReshapeWorkload>(*layer, factory);
1944 CHECK(queueDescriptor.
m_Inputs.size() == 1);
1945 CHECK(queueDescriptor.
m_Outputs.size() == 1);
1951 template <
typename ConvertFp16ToFp32Float32Workload>
1952 std::unique_ptr<ConvertFp16ToFp32Float32Workload> CreateConvertFp16ToFp32WorkloadTest(
1965 Connect(input, layer, inputTensorInfo);
1966 Connect(layer, output, outputTensorInfo);
1967 CreateTensorHandles(graph, factory);
1970 auto workload = MakeAndCheckWorkload<ConvertFp16ToFp32Float32Workload>(*layer, factory);
1973 CHECK(queueDescriptor.
m_Inputs.size() == 1);
1974 CHECK(queueDescriptor.
m_Outputs.size() == 1);
1980 template <
typename ConvertFp32ToFp16Float16Workload>
1981 std::unique_ptr<ConvertFp32ToFp16Float16Workload> CreateConvertFp32ToFp16WorkloadTest(
1994 Connect(input, layer, inputTensorInfo);
1995 Connect(layer, output, outputTensorInfo);
1996 CreateTensorHandles(graph, factory);
1999 auto workload = MakeAndCheckWorkload<ConvertFp32ToFp16Float16Workload>(*layer, factory);
2002 CHECK(queueDescriptor.
m_Inputs.size() == 1);
2003 CHECK(queueDescriptor.
m_Outputs.size() == 1);
2009 template <
typename MeanWorkload, armnn::DataType DataType>
2025 Connect(input, layer, inputTensorInfo);
2026 Connect(layer, output, outputTensorInfo);
2027 CreateTensorHandles(graph, factory);
2030 auto workload = MakeAndCheckWorkload<MeanWorkload>(*layer, factory);
2035 CHECK(queueDescriptor.
m_Inputs.size() == 1);
2036 CHECK(queueDescriptor.
m_Outputs.size() == 1);
2042 template<
typename ConcatWorkload, armnn::DataType DataType>
2046 unsigned int concatAxis)
2056 std::vector<armnn::TensorShape> inputShapes{{ 2, 3, 2, 5 }, { 2, 3, 2, 5 }};
2070 Connect(input0, concat, inputTensorInfo, 0, 0);
2072 Connect(input1, concat, inputTensorInfo, 0, 1);
2074 Connect(concat, output, outputTensorInfo, 0, 0);
2077 CreateTensorHandles(graph, factory);
2080 auto workloadConcat = MakeAndCheckWorkload<ConcatWorkload>(*concat, factory);
2081 CHECK(workloadConcat);
2083 return workloadConcat;
2086 template <
typename PreCompiledWorkload, armnn::DataType dataType>
2087 std::pair<armnn::IOptimizedNetworkPtr, std::unique_ptr<PreCompiledWorkload>> CreatePreCompiledWorkloadTest(
2090 bool biasEnabled =
false)
2105 unsigned int weightsLength = weightsTensorInfo.GetNumElements();
2108 std::vector<WeightType> convWeightsData(weightsLength);
2109 for (
unsigned int i = 0; i < weightsLength; ++i)
2111 convWeightsData[i] =
static_cast<WeightType
>(i);
2124 const std::string convLayerName(
"conv layer");
2138 unsigned int biasLength = biasTensorInfo.GetNumElements();
2141 std::vector<BiasType> biasData(biasLength);
2142 std::fill(biasData.begin(), biasData.end(),
static_cast<BiasType
>(0));
2162 inputTensorInfo.SetQuantizationOffset(0);
2163 inputTensorInfo.SetQuantizationScale(0.9f);
2169 outputTensorInfo.SetQuantizationOffset(0);
2170 outputTensorInfo.SetQuantizationScale(0.9f);
2181 std::vector<armnn::BackendId> backends = {factory.
GetBackendId()};
2187 CHECK(optimizedNet !=
nullptr);
2191 Layer* preCompiledLayer =
nullptr;
2192 for (
auto& layer : optimisedGraph)
2196 preCompiledLayer = layer;
2199 CHECK(preCompiledLayer !=
nullptr);
2202 CreateTensorHandles(optimisedGraph, factory);
2205 auto workload = MakeAndCheckWorkload<PreCompiledWorkload>(*preCompiledLayer, factory);
2208 CHECK(queueDescriptor.
m_Inputs.size() == 1);
2209 CHECK(queueDescriptor.
m_Outputs.size() == 1);
2214 return std::make_pair(std::move(optimizedNet), std::move(workload));
2217 template<
typename ConstantWorkload, armnn::DataType DataType>
2227 constant->m_LayerOutput = std::make_unique<ScopedTensorHandle>(outputTensorInfo);
2233 Connect(constant, output, outputTensorInfo, 0, 0);
2236 CreateTensorHandles(graph, factory);
2239 auto workloadConstant = MakeAndCheckWorkload<ConstantWorkload>(*constant, factory);
2240 CHECK(workloadConstant);
2242 return workloadConstant;
2245 template <
typename PreluWorkload>
2255 CHECK(layer !=
nullptr);
2261 CHECK(input !=
nullptr);
2262 CHECK(alpha !=
nullptr);
2263 CHECK(output !=
nullptr);
2269 Connect(input, layer, inputTensorInfo, 0, 0);
2270 Connect(alpha, layer, alphaTensorInfo, 0, 1);
2271 Connect(layer, output, outputTensorInfo, 0, 0);
2272 CreateTensorHandles(graph, factory);
2275 auto workload = MakeAndCheckWorkload<PreluWorkload>(*layer, factory);
2278 CHECK(queueDescriptor.
m_Inputs.size() == 2);
2279 CHECK(queueDescriptor.
m_Outputs.size() == 1);
2285 template <
typename SpaceToDepthWorkload, armnn::DataType DataType>
2301 Connect(input, layer, inputTensorInfo);
2302 Connect(layer, output, outputTensorInfo);
2304 CreateTensorHandles(graph, factory);
2307 auto workload = MakeAndCheckWorkload<SpaceToDepthWorkload>(*layer, factory);
2310 CHECK(queueDescriptor.
m_Inputs.size() == 1);
2311 CHECK(queueDescriptor.
m_Outputs.size() == 1);
2316 template <
typename StackWorkload, armnn::DataType DataType>
2322 unsigned int numInputs)
2330 CHECK(stackLayer !=
nullptr);
2333 std::vector<Layer*> inputs;
2334 for (
unsigned int i=0; i<numInputs; ++i)
2337 static_cast<int>(i),
2338 (
"input" + std::to_string(i)).c_str()
2340 CHECK(inputs[i] !=
nullptr);
2343 CHECK(output !=
nullptr);
2346 for (
unsigned int i=0; i<numInputs; ++i)
2348 Connect(inputs[i], stackLayer, inputTensorInfo, 0, i);
2350 Connect(stackLayer, output, outputTensorInfo, 0, 0);
2352 CreateTensorHandles(graph, factory);
2354 auto stackWorkload = MakeAndCheckWorkload<StackWorkload>(*stackLayer, factory);
2356 CHECK(queueDescriptor.
m_Inputs.size() == numInputs);
2357 CHECK(queueDescriptor.
m_Outputs.size() == 1);
2359 return stackWorkload;