14 #include <doctest/doctest.h> 19 using namespace armnn;
25 std::vector<T> GetVector(
unsigned int size,
float initial,
float increment)
27 std::vector<float> typeVector(size, initial);
28 std::vector<T> vector(size);
32 for (
unsigned int i = 0; i < size; ++i)
34 vector[i] = T(initial + (increment * static_cast<float>(i)));
40 template<DataType ArmnnType,
typename T = ResolveType<ArmnnType>>
41 struct Convolution2dTest
44 static const bool isElementWise =
false;
45 static const bool isConstTensorAsInputSupported =
false;
51 constexpr
static const unsigned int inputSize = 48;
52 constexpr
static const unsigned int outputSize = 36;
64 std::vector<float> weightsData = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
65 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
66 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
67 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42};
68 std::vector<T> weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
69 TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, scale, offset);
76 static std::vector<IConnectableLayer*> AddConstantLayers(
INetwork* network,
87 template<DataType ArmnnType,
typename T = ResolveType<ArmnnType>>
88 struct DWConvolution2dTest
92 static const bool isElementWise =
false;
93 static const bool isConstTensorAsInputSupported =
false;
99 constexpr
static const unsigned int inputSize = 48;
100 constexpr
static const unsigned int outputSize = 108;
113 std::vector<float> weightsData = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
114 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
115 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
116 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42};
117 std::vector<T> weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
118 TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, scale, offset);
125 static std::vector<IConnectableLayer*> AddConstantLayers(
INetwork* network,
136 template<DataType ArmnnType,
typename T = ResolveType<ArmnnType>>
141 static const bool isElementWise =
false;
142 static const bool isConstTensorAsInputSupported =
true;
148 constexpr
static const unsigned int inputSize = 10;
149 constexpr
static const unsigned int outputSize = 6;
165 static std::vector<IConnectableLayer*> AddConstantLayers(
INetwork* network,
169 std::vector<float> weightsData = { 1, 2, 3, 4, 5,
172 std::vector<T> weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
173 TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, scale, offset,
true);
179 std::vector<IConnectableLayer*> layers = { weightsLayer };
184 template<DataType ArmnnType,
typename T = ResolveType<ArmnnType>>
189 static const bool isElementWise =
false;
190 static const bool isConstTensorAsInputSupported =
false;
195 constexpr
static const unsigned int inputSize = 48;
196 constexpr
static const unsigned int outputSize = 48;
209 std::vector<T> betaVector = GetVector<T>(GetOutputShape()[3], 0.0f, 0.2f);
210 std::vector<T> gammaVector = GetVector<T>(GetOutputShape()[3], 0.5f, 0.1f);
211 std::vector<T> meanVector = GetVector<T>(GetOutputShape()[3], 0.1f, 0.1f);
212 std::vector<T> varianceVector = GetVector<T>(GetOutputShape()[3], 1.0f, 0.1f);
214 const unsigned int outputChannelSize[] = { GetOutputShape()[3] };
223 static std::vector<IConnectableLayer*> AddConstantLayers(
INetwork* network,
234 template<DataType ArmnnType,
typename T = ResolveType<ArmnnType>>
238 static const bool isElementWise =
true;
239 static const bool isConstTensorAsInputSupported =
false;
244 constexpr
static const unsigned int inputSize = 48;
245 constexpr
static const unsigned int outputSize = 48;
258 static std::vector<IConnectableLayer*> AddConstantLayers(
INetwork* network,
269 template<DataType ArmnnType,
typename T = ResolveType<ArmnnType>>
273 static const bool isElementWise =
true;
274 static const bool isConstTensorAsInputSupported =
false;
279 constexpr
static const unsigned int inputSize = 48;
280 constexpr
static const unsigned int outputSize = 48;
293 static std::vector<IConnectableLayer*> AddConstantLayers(
INetwork* network,
304 template<DataType ArmnnType,
typename T = ResolveType<ArmnnType>>
308 static const bool isElementWise =
true;
309 static const bool isConstTensorAsInputSupported =
false;
314 constexpr
static const unsigned int inputSize = 48;
315 constexpr
static const unsigned int outputSize = 48;
328 static std::vector<IConnectableLayer*> AddConstantLayers(
INetwork* network,
339 template<DataType ArmnnType,
typename T = ResolveType<ArmnnType>>
343 static const bool isElementWise =
true;
344 static const bool isConstTensorAsInputSupported =
false;
349 constexpr
static const unsigned int inputSize = 48;
350 constexpr
static const unsigned int outputSize = 48;
363 static std::vector<IConnectableLayer*> AddConstantLayers(
INetwork* network,
374 template<
typename LayerTest,
377 float scale, int32_t offset)
389 IConnectableLayer* activationLayer = network->AddActivationLayer(activationDescriptor,
393 IConnectableLayer* output2Layer = preventFusing?network->AddOutputLayer(1):
nullptr;
396 if(LayerTest::isConstTensorAsInputSupported)
398 std::vector<IConnectableLayer*> constantLayers = LayerTest::AddConstantLayers(network.get(),
403 for (
unsigned int i = 0; i < constantLayers.size(); ++i)
410 TensorInfo inputInfo(LayerTest::GetInputShape(), ArmnnType, scale, offset);
411 TensorInfo outputInfo(LayerTest::GetOutputShape(), ArmnnType, scale, offset);
416 activationLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
421 activationLayer->GetOutputSlot(0).Connect(outputLayer->
GetInputSlot(0));
423 if (LayerTest::isElementWise)
435 template<
typename LayerTest,
440 float scale = 1.f, int32_t offset=0)
444 INetworkPtr networkFused = CreateNetwork<LayerTest, ArmnnType>(activationDescriptor,
false, scale, offset);
454 auto checkFusedConv2d = [](
const Layer*
const layer)->
bool {
455 return IsLayerOfType<LayerType>(layer) &&
456 (layer->GetNameStr() ==
"fused-activation-into-receiverLayer");
460 if(LayerTest::isConstTensorAsInputSupported)
462 CHECK(4 == graphFused.GetNumLayers());
465 &IsLayerOfType<InputLayer>,
466 &IsLayerOfType<ConstantLayer>,
468 &IsLayerOfType<OutputLayer>));
472 CHECK(fusedReceiverLayer);
477 CHECK(3 == graphFused.GetNumLayers());
480 &IsLayerOfType<InputLayer>,
482 &IsLayerOfType<OutputLayer>));
487 CHECK(run->LoadNetwork(networkIdentifier, std::move(optNetFused)) ==
Status::Success);
490 std::vector<float> data = GetVector<float>(LayerTest::inputSize, 1.0f, 0.1f);
491 std::vector<T> inputDataFused = armnnUtils::QuantizedVector<T>(data, scale, offset);
492 std::vector<T> outputDataFused(LayerTest::outputSize);
495 {0,
ConstTensor(run->GetInputTensorInfo(networkIdentifier, 0), inputDataFused.data())}};
497 {0,
Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputDataFused.data())}};
500 CHECK(run->EnqueueWorkload(networkIdentifier, inputTensorsFused, outputTensorsFused) ==
Status::Success);
504 INetworkPtr networkNotFused = CreateNetwork<LayerTest, ArmnnType>(activationDescriptor,
true, scale, offset);
515 if(LayerTest::isConstTensorAsInputSupported)
517 CHECK(6 == graphNotFused.GetNumLayers());
519 graphNotFused.cend(),
520 &IsLayerOfType<InputLayer>,
521 &IsLayerOfType<ConstantLayer>,
522 &IsLayerOfType<LayerType>,
523 &IsLayerOfType<ActivationLayer>,
524 &IsLayerOfType<OutputLayer>,
525 &IsLayerOfType<OutputLayer>));
529 CHECK(5 == graphNotFused.GetNumLayers());
531 graphNotFused.cend(),
532 &IsLayerOfType<InputLayer>,
533 &IsLayerOfType<LayerType>,
534 &IsLayerOfType<ActivationLayer>,
535 &IsLayerOfType<OutputLayer>,
536 &IsLayerOfType<OutputLayer>));
541 CHECK(runNotFused->LoadNetwork(networkIdentifierNotFused, std::move(optNetNotFused)) ==
Status::Success);
544 std::vector<T> inputDataNotFused = armnnUtils::QuantizedVector<T>(data, scale, offset);
545 std::vector<T> outputDataNotFused(LayerTest::outputSize);
546 std::vector<T> outputData2NotFused(LayerTest::outputSize);
549 {0,
ConstTensor(runNotFused->GetInputTensorInfo(networkIdentifierNotFused, 0), inputDataNotFused.data())}};
551 {0,
Tensor(runNotFused->GetOutputTensorInfo(networkIdentifierNotFused, 0), outputDataNotFused.data())},
552 {1,
Tensor(runNotFused->GetOutputTensorInfo(networkIdentifierNotFused, 1), outputData2NotFused.data())}};
555 CHECK(runNotFused->EnqueueWorkload(networkIdentifierNotFused, inputTensorsNotFused, outputTensorsNotFused)
559 for (
unsigned int n = 0; n < outputDataFused.size(); ++n)
561 auto outputNotFused =
static_cast<float>(outputDataNotFused[n]);
562 CHECK(static_cast<float>(outputDataFused[n]) == doctest::Approx(outputNotFused).epsilon(tolerance));
566 template<
typename LayerTest,
571 float scale = 1.f, int32_t offset = 0)
577 INetworkPtr networkFused = CreateNetwork<LayerTest, ArmnnType>(activationDescriptor,
false, scale, offset);
587 CHECK(run->LoadNetwork(networkIdentifier, std::move(optNetFused)) ==
Status::Success);
590 std::vector<float> data = GetVector<float>(LayerTest::inputSize, 1.0f, 0.1f);
591 std::vector<T> inputDataFused = armnnUtils::QuantizedVector<T>(data, scale, offset);
592 std::vector<T> outputDataFused(LayerTest::outputSize);
595 {0,
ConstTensor(run->GetInputTensorInfo(networkIdentifier, 0), inputDataFused.data())}};
597 {0,
Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputDataFused.data())}};
600 run->EnqueueWorkload(networkIdentifier, inputTensorsFused, outputTensorsFused);
604 catch (
const std::exception& e)
606 std::cerr << e.what() << std::endl;
615 #if defined(ARMCOMPUTENEON_ENABLED) 619 TEST_CASE(
"FuseReLUIntoConvFloat32CpuAccTest")
624 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>,
DataType::Float32>
627 TEST_CASE(
"FuseReLUIntoDWConvFloat32CpuAccTest")
632 FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float32>,
DataType::Float32>
635 TEST_CASE(
"FuseReLUIntoFullyConnectedFloat32CpuAccTest")
640 FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>,
DataType::Float32>
643 TEST_CASE(
"FuseReLUIntoBatchNormFloat32CpuAccTest")
648 FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>,
DataType::Float32>
653 TEST_CASE(
"FuseBoundedReLUIntoConvFloat32CpuAccTest")
657 activationDescriptor.
m_A = 1.0f;
658 activationDescriptor.
m_B = -1.0f;
660 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>,
DataType::Float32>
663 TEST_CASE(
"FuseBoundedReLUIntoDWConvFloat32CpuAccTest")
667 activationDescriptor.
m_A = 1.0f;
668 activationDescriptor.
m_B = -1.0f;
670 FuseActivationIntoPreviousLayerTest < DWConvolution2dTest < DataType::Float32 > ,
DataType::Float32 >
673 TEST_CASE(
"FuseBoundedReLUIntoFullyConnectedFloat32CpuAccTest")
677 activationDescriptor.
m_A = 1.0f;
678 activationDescriptor.
m_B = -1.0f;
680 FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>,
DataType::Float32>
683 TEST_CASE(
"FuseBoundedReLUIntoBatchNormFloat32CpuAccTest")
687 activationDescriptor.
m_A = 1.0f;
688 activationDescriptor.
m_B = -1.0f;
690 FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>,
DataType::Float32>
695 TEST_CASE(
"FuseReLUIntoConvQAsymmU8CpuAccTest")
700 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
703 TEST_CASE(
"FuseReLUIntoDWConvQAsymmU8CpuAccTest")
708 FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
711 TEST_CASE(
"FuseReLUIntoFullyConnectedQAsymmU8CpuAccTest")
716 FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
721 TEST_CASE(
"FuseBoundedReLUIntoConvQASymmS8CpuAccTest")
725 activationDescriptor.
m_A = 6.0f;
726 activationDescriptor.
m_B = 0.0f;
728 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmS8>,
DataType::QAsymmS8>
731 TEST_CASE(
"FuseBoundedReLUIntoDWConvQASymmS8CpuAccTest")
735 activationDescriptor.
m_A = 6.0f;
736 activationDescriptor.
m_B = 0.0f;
738 FuseActivationIntoPreviousLayerTest < DWConvolution2dTest < DataType::QAsymmS8 > ,
DataType::QAsymmS8 >
741 TEST_CASE(
"FuseBoundedReLUIntoFullyConnectedQASymmS8CpuAccTest")
745 activationDescriptor.
m_A = 6.0f;
746 activationDescriptor.
m_B = 0.0f;
748 FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmS8>,
DataType::QAsymmS8>
753 TEST_CASE(
"FuseTanHIntoConvFloat32CpuAccTest")
758 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>,
DataType::Float32>
763 TEST_CASE(
"FuseHardSwishIntoConvFloat32CpuAccTest")
768 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>,
DataType::Float32>
773 TEST_CASE(
"LayerFollowedByActivationFloat32CpuAccTest")
776 for (
int i = 0; i != 12; ++i)
779 activationDescriptor.
m_A = 1.0f;
780 activationDescriptor.
m_B = -1.0f;
781 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float32>,
DataType::Float32>
782 (activationDescriptor,
Compute::CpuAcc)),
"Convolution + Activation function " << i);
783 CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float32>,
DataType::Float32>
784 (activationDescriptor,
Compute::CpuAcc)),
"DepthwiseConvolution + Activation function " << i);
785 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float32>,
DataType::Float32>
786 (activationDescriptor,
Compute::CpuAcc)),
"FullyConnected + Activation function " << i);
787 CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float32>,
DataType::Float32>
788 (activationDescriptor,
Compute::CpuAcc)),
"BatchNorm + Activation function " << i);
791 TEST_CASE(
"LayerFollowedByActivationFloat16CpuAccTest")
794 for (
int i = 0; i != 12; ++i)
797 activationDescriptor.
m_A = 1.0f;
798 activationDescriptor.
m_B = -1.0f;
799 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float16>,
DataType::Float16>
800 (activationDescriptor,
Compute::CpuAcc)),
"Convolution + Activation function " << i);
801 CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float16>,
DataType::Float16>
802 (activationDescriptor,
Compute::CpuAcc)),
"DepthwiseConvolution + Activation function " << i);
803 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float16>,
DataType::Float16>
804 (activationDescriptor,
Compute::CpuAcc)),
"FullyConnected + Activation function " << i);
805 CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float16>,
DataType::Float16>
806 (activationDescriptor,
Compute::CpuAcc)),
"BatchNorm + Activation function " << i);
809 TEST_CASE(
"LayerFollowedByActivationQAsymmU8CpuAccTest")
814 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
815 (activationDescriptor,
Compute::CpuAcc, 1.f / 256.f, 0)),
"Convolution + Activation function " <<
816 static_cast<int>(activationDescriptor.
m_Function));
817 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
818 (activationDescriptor,
Compute::CpuAcc, 1.f / 256.f, 0)),
"FullyConnected + Activation function " <<
819 static_cast<int>(activationDescriptor.
m_Function));
822 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
823 (activationDescriptor,
Compute::CpuAcc, 1.f / 128.f, 128)),
"Convolution + Activation function " <<
824 static_cast<int>(activationDescriptor.
m_Function));
825 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
826 (activationDescriptor,
Compute::CpuAcc, 1.f / 128.f, 128)),
"FullyConnected + Activation function " <<
827 static_cast<int>(activationDescriptor.
m_Function));
830 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
831 (activationDescriptor,
Compute::CpuAcc)),
"Convolution + Activation function " <<
832 static_cast<int>(activationDescriptor.
m_Function));
833 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
834 (activationDescriptor,
Compute::CpuAcc)),
"FullyConnected + Activation function " <<
835 static_cast<int>(activationDescriptor.
m_Function));
838 activationDescriptor.
m_A = 1.0f;
839 activationDescriptor.
m_B = -1.0f;
840 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
841 (activationDescriptor,
Compute::CpuAcc)),
"Convolution + Activation function " <<
842 static_cast<int>(activationDescriptor.
m_Function));
843 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
844 (activationDescriptor,
Compute::CpuAcc)),
"FullyConnected + Activation function " <<
845 static_cast<int>(activationDescriptor.
m_Function));
848 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
849 (activationDescriptor,
Compute::CpuAcc)),
"Convolution + Activation function " <<
850 static_cast<int>(activationDescriptor.
m_Function));
851 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
852 (activationDescriptor,
Compute::CpuAcc)),
"FullyConnected + Activation function " <<
853 static_cast<int>(activationDescriptor.
m_Function));
858 #if defined(ARMCOMPUTECL_ENABLED) 862 TEST_CASE(
"FuseReLUIntoConvFloat32GpuAccTest")
867 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>,
DataType::Float32>
870 TEST_CASE(
"FuseReLUIntoDWConvFloat32GpuAccTest")
875 FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float32>,
DataType::Float32>
878 TEST_CASE(
"FuseReLUIntoFullyConnectedFloat32GpuAccTest")
883 FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>,
DataType::Float32>
886 TEST_CASE(
"FuseReLUIntoBatchNormFloat32GpuAccTest")
891 FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>,
DataType::Float32>
894 TEST_CASE(
"FuseReLUIntoMulFloat32GpuAccTest")
899 FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>,
DataType::Float32>
902 TEST_CASE(
"FuseReLUIntoAddFloat32GpuAccTest")
907 FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>,
DataType::Float32>
910 TEST_CASE(
"FuseReLUIntoSubFloat32GpuAccTest")
915 FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>,
DataType::Float32>
918 TEST_CASE(
"FuseReLUIntoDivFloat32GpuAccTest")
923 FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>,
DataType::Float32>
928 TEST_CASE(
"FuseBoundedReLUIntoConvFloat32GpuAccTest")
932 activationDescriptor.
m_A = 1.0f;
933 activationDescriptor.
m_B = -1.0f;
935 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>,
DataType::Float32>
938 TEST_CASE(
"FuseBoundedReLUIntoDWConvFloat32GpuAccTest")
942 activationDescriptor.
m_A = 1.0f;
943 activationDescriptor.
m_B = -1.0f;
945 FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float32>,
DataType::Float32>
948 TEST_CASE(
"FuseBoundedReLUIntoFullyConnectedFloat32GpuAccTest")
952 activationDescriptor.
m_A = 1.0f;
953 activationDescriptor.
m_B = -1.0f;
955 FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>,
DataType::Float32>
958 TEST_CASE(
"FuseBoundedReLUIntoBatchNormFloat32GpuAccTest")
962 activationDescriptor.
m_A = 1.0f;
963 activationDescriptor.
m_B = -1.0f;
965 FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>,
DataType::Float32>
968 TEST_CASE(
"FuseBoundedReLUIntoMulFloat32GpuAccTest")
972 activationDescriptor.
m_A = 1.0f;
973 activationDescriptor.
m_B = -1.0f;
975 FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>,
DataType::Float32>
978 TEST_CASE(
"FuseBoundedReLUIntoAddFloat32GpuAccTest")
982 activationDescriptor.
m_A = 1.0f;
983 activationDescriptor.
m_B = -1.0f;
985 FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>,
DataType::Float32>
988 TEST_CASE(
"FuseBoundedReLUIntoSubFloat32GpuAccTest")
992 activationDescriptor.
m_A = 1.0f;
993 activationDescriptor.
m_B = -1.0f;
995 FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>,
DataType::Float32>
998 TEST_CASE(
"FuseBoundedReLUIntoDivFloat32GpuAccTest")
1002 activationDescriptor.
m_A = 1.0f;
1003 activationDescriptor.
m_B = -1.0f;
1005 FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>,
DataType::Float32>
1010 TEST_CASE(
"FuseReLUIntoConvFloat16GpuAccTest")
1015 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float16>,
DataType::Float16>
1018 TEST_CASE(
"FuseReLUIntoDWConvFloat16GpuAccTest")
1023 FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float16>,
DataType::Float16>
1026 TEST_CASE(
"FuseReLUIntoFullyConnectedFloat16GpuAccTest")
1031 FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float16>,
DataType::Float16>
1034 TEST_CASE(
"FuseReLUIntoBatchNormFloat16GpuAccTest")
1039 FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float16>,
DataType::Float16>
1042 TEST_CASE(
"FuseReLUIntoMulFloat16GpuAccTest")
1047 FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float16>,
DataType::Float16>
1050 TEST_CASE(
"FuseReLUIntoAddFloat16GpuAccTest")
1055 FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float16>,
DataType::Float16>
1058 TEST_CASE(
"FuseReLUIntoSubFloat16GpuAccTest")
1063 FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float16>,
DataType::Float16>
1066 TEST_CASE(
"FuseReLUIntoDivFloat16GpuAccTest")
1071 FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float16>,
DataType::Float16>
1076 TEST_CASE(
"FuseReLUQIntoConvAsymmU8GpuAccTest")
1081 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1084 TEST_CASE(
"FuseReLUQIntoDWConvAsymmU8GpuAccTest")
1089 FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1092 TEST_CASE(
"FuseReLUQIntoFullyConnectedAsymmU8GpuAccTest")
1097 FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1102 TEST_CASE(
"FuseBoundedReLUIntoConvQASymmS8GpuAccTest")
1106 activationDescriptor.
m_A = 6.0f;
1107 activationDescriptor.
m_B = 0.0f;
1109 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmS8>,
DataType::QAsymmS8>
1112 TEST_CASE(
"FuseBoundedReLUIntoDWConvQASymmS8GpuAccTest")
1116 activationDescriptor.
m_A = 6.0f;
1117 activationDescriptor.
m_B = 0.0f;
1119 FuseActivationIntoPreviousLayerTest < DWConvolution2dTest < DataType::QAsymmS8 > ,
DataType::QAsymmS8 >
1122 TEST_CASE(
"FuseBoundedReLUIntoFullyConnectedQASymmS8GpuAccTest")
1126 activationDescriptor.
m_A = 6.0f;
1127 activationDescriptor.
m_B = 0.0f;
1129 FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmS8>,
DataType::QAsymmS8>
1134 TEST_CASE(
"FuseTanHIntoConvFloat32GpuAccTest")
1139 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>,
DataType::Float32>
1142 TEST_CASE(
"FuseTanHIntoMulFloat32GpuAccTest")
1147 FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>,
DataType::Float32>
1150 TEST_CASE(
"FuseTanHIntoAddFloat32GpuAccTest")
1155 FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>,
DataType::Float32>
1158 TEST_CASE(
"FuseTanHIntoSubFloat32GpuAccTest")
1163 FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>,
DataType::Float32>
1166 TEST_CASE(
"FuseTanHIntoDivFloat32GpuAccTest")
1171 FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>,
DataType::Float32>
1176 TEST_CASE(
"FuseHardSwishIntoConvFloat32GpuAccTest")
1181 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>,
DataType::Float32>
1184 TEST_CASE(
"FuseHardSwishIntoMulFloat32GpuAccTest")
1189 FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>,
DataType::Float32>
1192 TEST_CASE(
"FuseHardSwishIntoAddFloat32GpuAccTest")
1197 FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>,
DataType::Float32>
1200 TEST_CASE(
"FuseHardSwishIntoSubFloat32GpuAccTest")
1205 FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>,
DataType::Float32>
1208 TEST_CASE(
"FuseHardSwishIntoDivFloat32GpuAccTest")
1213 FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>,
DataType::Float32>
1218 TEST_CASE(
"LayerFollowedByActivationFloat32GpuAccTest")
1221 for (
int i = 0; i != 12; ++i)
1224 activationDescriptor.
m_A = 1.0f;
1225 activationDescriptor.
m_B = -1.0f;
1228 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float32>,
DataType::Float32>
1229 (activationDescriptor,
Compute::GpuAcc)),
"Convolution + Activation function " << i);
1230 CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float32>,
DataType::Float32>
1231 (activationDescriptor,
Compute::GpuAcc)),
"DepthwiseConvolution + Activation function " << i);
1232 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float32>,
DataType::Float32>
1233 (activationDescriptor,
Compute::GpuAcc)),
"FullyConnected + Activation function " << i);
1234 CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float32>,
DataType::Float32>
1235 (activationDescriptor,
Compute::GpuAcc)),
"BatchNorm + Activation function " << i);
1236 CHECK_MESSAGE((FuseActivationSimpleTest<MultiplicationTest<DataType::Float32>,
DataType::Float32>
1237 (activationDescriptor,
Compute::GpuAcc)),
"Multiplication + Activation function " << i);
1238 CHECK_MESSAGE((FuseActivationSimpleTest<AdditionTest<DataType::Float32>,
DataType::Float32>
1239 (activationDescriptor,
Compute::GpuAcc)),
"Addition + Activation function " << i);
1240 CHECK_MESSAGE((FuseActivationSimpleTest<SubtractionTest<DataType::Float32>,
DataType::Float32>
1241 (activationDescriptor,
Compute::GpuAcc)),
"Subtraction + Activation function " << i);
1242 CHECK_MESSAGE((FuseActivationSimpleTest<DivisionTest<DataType::Float32>,
DataType::Float32>
1243 (activationDescriptor,
Compute::GpuAcc)),
"Division + Activation function " << i);
1247 TEST_CASE(
"LayerFollowedByActivationFloat16GpuAccTest")
1250 for (
int i = 0; i != 12; ++i)
1253 activationDescriptor.
m_A = 1.0f;
1254 activationDescriptor.
m_B = -1.0f;
1257 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float16>,
DataType::Float16>
1258 (activationDescriptor,
Compute::GpuAcc)),
"Convolution + Activation function " << i);
1259 CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float16>,
DataType::Float16>
1260 (activationDescriptor,
Compute::GpuAcc)),
"Depthwise + Activation function " << i);
1261 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float16>,
DataType::Float16>
1262 (activationDescriptor,
Compute::GpuAcc)),
"FullyConnected + Activation function " << i);
1263 CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float16>,
DataType::Float16>
1264 (activationDescriptor,
Compute::GpuAcc)),
"BatchNorm + Activation function " << i);
1265 CHECK_MESSAGE((FuseActivationSimpleTest<MultiplicationTest<DataType::Float16>,
DataType::Float16>
1266 (activationDescriptor,
Compute::GpuAcc)),
"Multiplication + Activation function " << i);
1267 CHECK_MESSAGE((FuseActivationSimpleTest<AdditionTest<DataType::Float16>,
DataType::Float16>
1268 (activationDescriptor,
Compute::GpuAcc)),
"Addition + Activation function " << i);
1269 CHECK_MESSAGE((FuseActivationSimpleTest<SubtractionTest<DataType::Float16>,
DataType::Float16>
1270 (activationDescriptor,
Compute::GpuAcc)),
"Subtraction + Activation function " << i);
1271 CHECK_MESSAGE((FuseActivationSimpleTest<DivisionTest<DataType::Float16>,
DataType::Float16>
1272 (activationDescriptor,
Compute::GpuAcc)),
"Division + Activation function " << i);
1276 TEST_CASE(
"LayerFollowedByActivationQAsymmU8GpuAccTest")
1281 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1282 (activationDescriptor,
Compute::GpuAcc, 1.f / 256.f, 0)),
"Convolution + Activation function " <<
1283 static_cast<int>(activationDescriptor.
m_Function));
1284 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1285 (activationDescriptor,
Compute::GpuAcc, 1.f / 256.f, 0)),
"FullyConnected + Activation function " <<
1286 static_cast<int>(activationDescriptor.
m_Function));
1289 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1290 (activationDescriptor,
Compute::GpuAcc, 1.f / 128.f, 128)),
"Convolution + Activation function " <<
1291 static_cast<int>(activationDescriptor.
m_Function));
1292 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1293 (activationDescriptor,
Compute::GpuAcc, 1.f / 128.f, 128)),
"FullyConnected + Activation function " <<
1294 static_cast<int>(activationDescriptor.
m_Function));
1297 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1298 (activationDescriptor,
Compute::GpuAcc)),
"Convolution + Activation function " <<
1299 static_cast<int>(activationDescriptor.
m_Function));
1300 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1301 (activationDescriptor,
Compute::GpuAcc)),
"FullyConnected + Activation function " <<
1302 static_cast<int>(activationDescriptor.
m_Function));
1305 activationDescriptor.
m_A = 1.0f;
1306 activationDescriptor.
m_B = -1.0f;
1307 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1308 (activationDescriptor,
Compute::GpuAcc)),
"Convolution + Activation function " <<
1309 static_cast<int>(activationDescriptor.
m_Function));
1310 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1311 (activationDescriptor,
Compute::GpuAcc)),
"FullyConnected + Activation function " <<
1312 static_cast<int>(activationDescriptor.
m_Function));
1315 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1316 (activationDescriptor,
Compute::GpuAcc)),
"Convolution + Activation function " <<
1317 static_cast<int>(activationDescriptor.
m_Function));
1318 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1319 (activationDescriptor,
Compute::GpuAcc)),
"FullyConnected + Activation function " <<
1320 static_cast<int>(activationDescriptor.
m_Function));
TEST_SUITE("TestConstTensorLayerVisitor")
IConnectableLayer * AddSubtractionLayer(const char *name=nullptr)
Adds a subtraction layer to the network.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
static IRuntimePtr Create(const CreationOptions &options)
This layer represents a batch normalization operation.
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
bool m_BiasEnabled
Enable/disable bias.
armnn::Layer * GetFirstLayerWithName(armnn::Graph &graph, const std::string &name)
IConnectableLayer * AddConstantLayer(const ConstTensor &input, const char *name=nullptr)
Adds a layer with no inputs and a single output, which always corresponds to the passed in constant t...
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
IConnectableLayer * AddDepthwiseConvolution2dLayer(const DepthwiseConvolution2dDescriptor &convolution2dDescriptor, const ConstTensor &weights, const Optional< ConstTensor > &biases, const char *name=nullptr)
Adds a 2D depthwise convolution layer to the network.
This layer represents a depthwise convolution 2d operation.
A Convolution2dDescriptor for the Convolution2dLayer.
LayerTestResult< float, 4 > DivisionTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
typename ResolveTypeImpl< DT >::Type ResolveType
Main network class which provides the interface for building up a neural network. ...
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
IConnectableLayer * AddConvolution2dLayer(const Convolution2dDescriptor &convolution2dDescriptor, const ConstTensor &weights, const Optional< ConstTensor > &biases, const char *name=nullptr)
Adds a 2D convolution layer to the network.
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
IConnectableLayer * AddDivisionLayer(const char *name=nullptr)
Adds a division layer to the network.
LayerTestResult< float, 4 > AdditionTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
IConnectableLayer * AddFullyConnectedLayer(const FullyConnectedDescriptor &fullyConnectedDescriptor, const char *name=nullptr)
Adds a fully connected layer to the network.
LayerTestResult< T, 2 > FullyConnectedTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool biasEnabled, bool constantWeights)
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
Compute
The Compute enum is now deprecated and it is now being replaced by BackendId.
This layer represents a fully connected operation.
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
A FullyConnectedDescriptor for the FullyConnectedLayer.
bool m_BiasEnabled
Enable/disable bias.
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
GPU Execution: OpenCL: ArmCompute.
An ActivationDescriptor for the ActivationLayer.
min(a, max(b, input)) ReLu1 & ReLu6.
IConnectableLayer * AddBatchNormalizationLayer(const BatchNormalizationDescriptor &desc, const ConstTensor &mean, const ConstTensor &variance, const ConstTensor &beta, const ConstTensor &gamma, const char *name=nullptr)
Adds a batch normalization layer to the network.
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
This layer represents an addition operation.
Graph & GetGraphForTesting(IOptimizedNetwork *optNet)
float m_A
Alpha upper bound value used by the activation functions. (BoundedReLu, Linear, TanH, Elu).
IConnectableLayer * AddAdditionLayer(const char *name=nullptr)
Adds an addition layer to the network.
This layer represents a subtraction operation.
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
LayerTestResult< float, 4 > SubtractionTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
CPU Execution: NEON: ArmCompute.
bool CheckSequence(const armnn::Graph::ConstIterator first, const armnn::Graph::ConstIterator last)
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
This layer represents a division operation.
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
This layer represents a convolution 2d operation.
IConnectableLayer * AddMultiplicationLayer(const char *name=nullptr)
Adds a multiplication layer to the network.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
virtual int Connect(IInputSlot &destination)=0
This layer represents a multiplication operation.
static INetworkPtr Create(NetworkOptions networkOptions={})
float m_B
Beta lower bound value used by the activation functions. (BoundedReLu, Linear, TanH).
LayerTestResult< float, 4 > MultiplicationTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
ActivationFunction m_Function
The activation function to use (Sigmoid, TanH, Linear, ReLu, BoundedReLu, SoftReLu, LeakyReLu, Abs, Sqrt, Square, Elu).
A DepthwiseConvolution2dDescriptor for the DepthwiseConvolution2dLayer.
A BatchNormalizationDescriptor for the BatchNormalizationLayer.
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below...