armnn/21.05/_concat_test_impl_8cpp_source.xhtml

 //
 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //

 #include "ConcatTestImpl.hpp"

 #include <QuantizeHelper.hpp>
 #include <ResolveType.hpp>


 #include <armnnUtils/Permute.hpp>

 #include <backendsCommon/test/TensorCopyUtils.hpp>
 #include <backendsCommon/test/WorkloadTestUtils.hpp>

 #include <test/TensorHelpers.hpp>

 using namespace armnn;
 using namespace armnnUtils;

 //
 // Helper functions and templates
 //

 OriginsDescriptor CreateDescriptorForConcat(
     const std::vector<TensorInfo> & inputTensorInfos,
     unsigned int concatDim)
 {
     std::vector<TensorShape> shapes;
     shapes.reserve(inputTensorInfos.size());
     for (const TensorInfo& it: inputTensorInfos)
     {
         shapes.push_back(it.GetShape());
     }

     return CreateDescriptorForConcatenation(shapes.begin(), shapes.end(), concatDim);
 }

 //
 // Concat is only supported for N and C dimensions for NCHW and the inner most dimension
 // In case of <4 dimensions we need to make sure that the concat dimensions are at least
 // the 3rd slowest iterating one or the inner most dimension.
 //

 bool NeedPermuteForConcat(
     const std::vector<TensorInfo> & inputTensorInfos,
     unsigned int concatDim)
 {
     // See note above. Additionally we expect the input shapes to have the
     // same number of dimensions.
     unsigned int nDimensions = 0;

     // Determine the number of dimensions as well as sanity check them
     // agains test implementation issues.
     for (auto && tensorInfo : inputTensorInfos)
     {
         if (!nDimensions)
         {
             nDimensions = tensorInfo.GetShape().GetNumDimensions();
         }
         else
         {
             ARMNN_ASSERT_MSG(nDimensions == tensorInfo.GetShape().GetNumDimensions(),
                 "Input shapes must have the same number of dimensions");
         }
     }

     return (nDimensions < 3 || (nDimensions == 3 && (nDimensions-concatDim) < 3 && (nDimensions-concatDim) != 1));
 }

 TensorShape ExpandTensorShapeTo3dForPermute(const TensorShape & inputShape)
 {
     unsigned int numDims = inputShape.GetNumDimensions();
     if (numDims >= 3)
     {
         // Nothing to do if the inputShape has at least 3 dimensions.
         return inputShape;
     }

     std::vector<unsigned int> newDims(size_t(3), 1u);
     unsigned int expandedBy = 3 - numDims;
     for (unsigned int i=0; i<numDims; ++i)
     {
         newDims[expandedBy+i] = inputShape[i];
     }
     return TensorShape(3u, &newDims[0]);
 }

 void Generate3dPermuteVectorForConcat(
     unsigned int numDimensions,
     unsigned int & concatDim,
     std::pair<PermutationVector, PermutationVector> & permutations)
 {
     ARMNN_ASSERT_MSG(numDimensions <= 3,
        "Only dimensions 1,2 and 3 are supported by this helper");
     unsigned int expandedBy = 3 - numDimensions;
     unsigned int expandedConcatAxis = concatDim + expandedBy;

     if (expandedConcatAxis == 2)
     {
         concatDim = 0;
         PermutationVector forwardPermutation({1, 2, 0});
         PermutationVector reversePermutation({2, 0, 1});
         permutations = std::make_pair(forwardPermutation, reversePermutation);
     }
     else if (expandedConcatAxis == 1)
     {
         concatDim = 0;
         PermutationVector forwardPermutation({2, 0, 1});
         PermutationVector reversePermutation({1, 2, 0});
         permutations = std::make_pair(forwardPermutation, reversePermutation);
     }
     else
     {
         ARMNN_ASSERT(expandedConcatAxis == 0);
         concatDim = 0;
     }
 }

 template<typename T> void PermuteTensorData(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     const PermutationVector& mappings,
     TensorInfo & inputTensorInfo,
     const T * inputData,
     std::vector<T>& outputData)
 {
     IgnoreUnused(memoryManager);
     ARMNN_ASSERT_MSG(inputData != nullptr, "inputData must not be null");
     if (inputData == nullptr)
     {
         // Nullptr is an error in the test. By returning without doing the concatenation
         // I expect the caller to fail the test. It still makes sense to report this as
         // an assert for Debug builds.
         return;
     }

     TensorInfo outputTensorInfo = armnnUtils::Permuted(inputTensorInfo, mappings);
     std::unique_ptr<ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
     std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);

     PermuteQueueDescriptor queueDescriptor;
     queueDescriptor.m_Parameters = PermuteDescriptor{mappings};
     WorkloadInfo workloadInfo;
     AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfo, inputHandle.get());
     AddOutputToWorkload(queueDescriptor, workloadInfo, outputTensorInfo, outputHandle.get());

     std::unique_ptr<IWorkload> workload = workloadFactory.CreatePermute(queueDescriptor, workloadInfo);

     inputHandle->Allocate();
     outputHandle->Allocate();

     CopyDataToITensorHandle(inputHandle.get(), inputData);

     workload->PostAllocationConfigure();
     workload->Execute();

     outputData.resize(outputTensorInfo.GetNumElements());
     CopyDataFromITensorHandle(&outputData[0], outputHandle.get());
     inputTensorInfo = outputTensorInfo;
 }

 //
 // Permute the input tensors so we can do a supported concatenation.
 // Also treat lower than 3d tensors as 3d by adding dummy 1 dimensions
 // at the front. Finally this function tells what the output shape
 // of the permuted concatenated tensor is going to be.
 //
 template<typename T> void PermuteInputsForConcat(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     std::vector<TensorInfo> & inputTensorInfos,
     std::vector<T *> & inputData,
     std::vector<std::vector<T>> & inputDataStorage,
     PermutationVector & permuteVector,
     unsigned int & concatDim,
     TensorInfo & outputTensorInfo)
 {
     IgnoreUnused(memoryManager);
     ARMNN_ASSERT_MSG(inputTensorInfos.size() > 1,
         "Expecting more than one tensor to be concatenated here");

     unsigned int numDims = 0;
     unsigned int nthInput = 0;
     const PermutationVector identity({0, 1, 2});

     std::pair<PermutationVector, PermutationVector> permutations =
         std::make_pair(identity, identity);

     inputDataStorage.resize(inputData.size());

     for (auto && tensorInfo : inputTensorInfos)
     {
         if (numDims == 0)
         {
             numDims = tensorInfo.GetShape().GetNumDimensions();
             Generate3dPermuteVectorForConcat(numDims, concatDim, permutations);

             // Store the reverese permutation.
             permuteVector = permutations.second;
             ARMNN_ASSERT_MSG(!permuteVector.IsEqual(identity),
                 "Test logic error, we don't need permutation, so we shouldn't arrive here");
         }
         else
         {
             ARMNN_ASSERT_MSG(numDims == tensorInfo.GetShape().GetNumDimensions(),
                 "All inputs must have the same number of dimensions");
         }

         TensorInfo newTensorInfo = tensorInfo;
         newTensorInfo.SetShape(ExpandTensorShapeTo3dForPermute(tensorInfo.GetShape()));

         PermuteTensorData<T>(workloadFactory,
                              memoryManager,
                              tensorHandleFactory,
                              permutations.first,
                              newTensorInfo,
                              inputData[nthInput],
                              inputDataStorage[nthInput]);

         inputData[nthInput] = inputDataStorage[nthInput].data();
         inputTensorInfos[nthInput] = newTensorInfo;

         ++nthInput;
     }

     outputTensorInfo.SetShape(
         armnnUtils::Permuted(
             ExpandTensorShapeTo3dForPermute(outputTensorInfo.GetShape()),
             permutations.first));
 }

 //
 // This is the pair of PermuteInputsForConcat(...) which permutes back
 // the output of the concatenation so we can check it against an expected
 // output.
 //
 template <typename T> void PermuteOutputForConcat(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     const TensorInfo & tensorInfo,
     const PermutationVector & permuteVector,
     std::unique_ptr<ITensorHandle> && inputDataHandle,
     T * data)
 {
     ARMNN_ASSERT_MSG(data != nullptr, "data must not be null");
     if (data == nullptr)
     {
         // Nullptr is an error in the test. By returning without doing the permutation
         // I expect the caller to fail the test. It still makes sense to report this as
         // an assert for Debug builds.
         return;
     }

     TensorInfo resultTensorInfo = tensorInfo;
     std::vector<T> inputData(tensorInfo.GetNumElements());
     std::vector<T> outputData;

     CopyDataFromITensorHandle(&inputData[0], inputDataHandle.get());

     PermuteTensorData<T>(workloadFactory,
                          memoryManager,
                          tensorHandleFactory,
                          permuteVector,
                          resultTensorInfo,
                          &inputData[0],
                          outputData);

     ::memcpy(data, &outputData[0], sizeof(T)*outputData.size());
 }

 template<typename T> void Concatenate(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     std::initializer_list<const TensorInfo> inputTensorInfosOrig,
     std::initializer_list<T *> inputsOrig,
     const TensorInfo& outputTensorInfoOrig,
     T * output,
     unsigned int concatDim,
     bool useSubtensor)
 {
     ARMNN_ASSERT_MSG(output != nullptr, "output must not be null");
     if (output == nullptr)
     {
         // Nullptr is an error in the test. By returning without doing the permutation
         // I expect the caller to fail the test. It still makes sense to report this as
         // an assert for Debug builds.
         return;
     }

     // Saves a copy of the parameters which we might need to change.
     std::vector<TensorInfo> inputTensorInfos(inputTensorInfosOrig.begin(), inputTensorInfosOrig.end());
     std::vector<T *> inputs            = inputsOrig;
     TensorInfo outputTensorInfo = outputTensorInfoOrig;

     PermutationVector permuteVector{0, 1, 2};

     // Holds and automatically releases memory for the reshaped input data.
     std::vector<std::vector<T>> tmpInputDataStorage;

     const size_t inputCount = inputTensorInfos.size();

     bool needPermuteForConcat = NeedPermuteForConcat(inputTensorInfos, concatDim);

     if (needPermuteForConcat)
     {
         //
         // We need to permute the inputs, because concatenation along
         // the requested axis is not supported.
         //
         PermuteInputsForConcat<T>(workloadFactory,
                                   memoryManager,
                                   tensorHandleFactory,
                                   inputTensorInfos,
                                   inputs,
                                   tmpInputDataStorage,
                                   permuteVector,
                                   concatDim,
                                   outputTensorInfo);
     }

     WorkloadInfo workloadInfo;

     std::vector<std::unique_ptr<ITensorHandle>> inputHandles;
     inputHandles.reserve(inputCount);

     std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);

     ConcatQueueDescriptor queueDescriptor;
     OriginsDescriptor viewsDescriptor = CreateDescriptorForConcat(inputTensorInfos, concatDim);
     queueDescriptor.m_Parameters = viewsDescriptor;

     if (useSubtensor)
     {
         queueDescriptor.m_ViewOrigins.reserve(viewsDescriptor.GetNumViews());
         for (unsigned int i = 0; i < viewsDescriptor.GetNumViews(); ++i)
         {
             queueDescriptor.m_ViewOrigins.emplace_back(std::vector<unsigned int>(viewsDescriptor.GetViewOrigin(i),
                 viewsDescriptor.GetViewOrigin(i) + viewsDescriptor.GetNumDimensions()));
         }

         outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);

         const bool subTensorsSupported = workloadFactory.SupportsSubTensors();
         for (unsigned int i = 0; i < inputCount; ++i)
         {
             const TensorInfo& inputTensorInfo = inputTensorInfos[i];

             std::unique_ptr<ITensorHandle> inputHandle =
                 subTensorsSupported ?
                     tensorHandleFactory.CreateSubTensorHandle(*outputHandle,
                                                           inputTensorInfo.GetShape(),
                                                           queueDescriptor.m_ViewOrigins[i].m_Origin.data()) :
                                                           tensorHandleFactory.CreateTensorHandle(inputTensorInfo);

             inputHandles.emplace_back(std::move(inputHandle));
         }


     }
     else
     {
         for (unsigned int i = 0; i < inputCount; ++i)
         {
             std::unique_ptr<ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfos[i]);
             inputHandles.emplace_back(std::move(inputHandle));
         }
     }

     for (unsigned int i = 0; i < inputCount; ++i)
     {
         AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfos[i], inputHandles[i].get());
     }

     AddOutputToWorkload(queueDescriptor, workloadInfo, outputTensorInfo, outputHandle.get());

     std::unique_ptr<IWorkload> workload = workloadFactory.CreateConcat(queueDescriptor, workloadInfo);

     for (auto& inputHandle : inputHandles)
     {
         inputHandle->Allocate();
     }

     outputHandle->Allocate();

     unsigned int nextInputId = 0;
     for (auto& inputHandle : inputHandles)
     {
         CopyDataToITensorHandle(inputHandle.get(), inputs[nextInputId]);
         ++nextInputId;
     }

     workload->PostAllocationConfigure();
     workload->Execute();

     if (needPermuteForConcat)
     {
         PermuteOutputForConcat<T>(workloadFactory,
                                   memoryManager,
                                   tensorHandleFactory,
                                   outputTensorInfo,
                                   permuteVector,
                                   std::move(outputHandle),
                                   output);
     }
     else
     {
         CopyDataFromITensorHandle(output, outputHandle.get());
     }
 }

 //
 // Implementation templates
 //

 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
 LayerTestResult<T, 1> Concat1dTestImpl(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     float qScale,
     int32_t qOffset)
 {
     TensorInfo inputTensorInfo({ 3 }, ArmnnType, qScale, qOffset);

     auto input0 = MakeTensor<T, 1>(inputTensorInfo, QuantizedVector<T>({ 1.0f, 2.0f, 3.0f }, qScale, qOffset));
     auto input1 = MakeTensor<T, 1>(inputTensorInfo, QuantizedVector<T>({ 4.0f, 5.0f, 6.0f }, qScale, qOffset));
     auto input2 = MakeTensor<T, 1>(inputTensorInfo, QuantizedVector<T>({ 7.0f, 8.0f, 9.0f }, qScale, qOffset));

     TensorInfo outputTensorInfo({ 9 }, ArmnnType, qScale, qOffset);

     LayerTestResult<T, 1> result(outputTensorInfo);

     std::vector<T> output;
     output.resize(outputTensorInfo.GetNumElements());
     Concatenate<T>(workloadFactory, memoryManager, tensorHandleFactory,
                    { inputTensorInfo, inputTensorInfo, inputTensorInfo },
                    { input0.data(), input1.data(), input2.data() },
                    outputTensorInfo,
                    output.data(),
                    0,
                    true);

     result.output         = MakeTensor<T, 1>(outputTensorInfo, output);
     result.outputExpected = MakeTensor<T, 1>(outputTensorInfo, QuantizedVector<T>(
         {
             1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f
         },
         qScale, qOffset));

     return result;
 }

 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
 LayerTestResult<T, 2> Concat2dTestImpl(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     const TensorInfo& outputTensorInfo,
     unsigned int dimension,
     const float qScale,
     const int32_t qOffset)
 {
     TensorInfo inputTensorInfo({ 2, 3 }, ArmnnType, qScale, qOffset);

     auto input0 = MakeTensor<T, 2>(inputTensorInfo, QuantizedVector<T>(
         {
             // Batch 0
             1.0f, 2.0f, 3.0f,

             // Batch 1
             10.0f, 11.0f, 12.0f,
         },
         qScale, qOffset));

     auto input1 = MakeTensor<T, 2>(inputTensorInfo, QuantizedVector<T>(
          {
             // Batch 0
             4.0f, 5.0f, 6.0f,

             // Batch 1
             13.0f, 14.0f, 15.0f,
         },
         qScale, qOffset));

     auto input2 = MakeTensor<T, 2>(inputTensorInfo, QuantizedVector<T>(
         {
             // Batch 0
             7.0f, 8.0f, 9.0f,

             // Batch 1
             16.0f, 17.0f, 18.0f,
         },
         qScale, qOffset));

     LayerTestResult<T, 2> result(outputTensorInfo);

     std::vector<T> output;
     output.resize(outputTensorInfo.GetNumElements());
     Concatenate<T>(workloadFactory, memoryManager, tensorHandleFactory,
                    { inputTensorInfo, inputTensorInfo, inputTensorInfo },
                    { input0.data(), input1.data(), input2.data() },
                    outputTensorInfo,
                    output.data(),
                    dimension,
                    true);

     result.output = MakeTensor<T, 2>(outputTensorInfo, output);
     return result;
 }

 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
 LayerTestResult<T, 2> Concat2dDim0TestImpl(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     float qScale,
     int32_t qOffset)
 {
     TensorInfo outputTensorInfo({ 6, 3 }, ArmnnType, qScale, qOffset);

     LayerTestResult<T, 2> result = Concat2dTestImpl<ArmnnType>(
         workloadFactory, memoryManager, tensorHandleFactory, outputTensorInfo, 0, qScale, qOffset);

     result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(
         {
             // Batch 0
             1.0f, 2.0f, 3.0f,

             // Batch 1
             10.0f, 11.0f, 12.0f,

             // Batch 2
             4.0f, 5.0f, 6.0f,

             // Batch 3
             13.0f, 14.0f, 15.0f,

             // Batch 4
             7.0f, 8.0f, 9.0f,

             // Batch 5
             16.0f, 17.0f, 18.0f,
         },
         qScale, qOffset));

     return result;
 }

 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
 LayerTestResult<T, 2> Concat2dDim1TestImpl(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     float qScale,
     int32_t qOffset)
 {
     TensorInfo outputTensorInfo({ 2, 9 }, ArmnnType, qScale, qOffset);

     LayerTestResult<T, 2> result = Concat2dTestImpl<ArmnnType>(
         workloadFactory, memoryManager, tensorHandleFactory, outputTensorInfo, 1, qScale, qOffset);

     result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(
         {
             // Batch 0
             1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,

             // Batch 1
             10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f
         },
         qScale, qOffset));

     return result;
 }

 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
 LayerTestResult<T, 2> Concat2dDim0DiffInputDimsTestImpl(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     float qScale,
     int32_t qOffset)
 {
     TensorInfo input0TensorInfo({ 2, 3 }, ArmnnType, qScale, qOffset);
     auto input0 = MakeTensor<T, 2>(input0TensorInfo, QuantizedVector<T>(
         {
             // Batch 0
             1.0f, 2.0f, 3.0f,

             // Batch 1
             10.0f, 11.0f, 12.0f,
         },
         qScale, qOffset));

     TensorInfo input1TensorInfo({ 3, 3 }, ArmnnType, qScale, qOffset);
     auto input1 = MakeTensor<T, 2>(input1TensorInfo, QuantizedVector<T>(
         {
             // Batch 0
             4.0f, 5.0f, 6.0f,

             // Batch 1
             13.0f, 14.0f, 15.0f,

             // Batch 0
             7.0f, 8.0f, 9.0f,
         },
         qScale, qOffset));

     TensorInfo input2TensorInfo({ 1, 3 }, ArmnnType, qScale, qOffset);
     auto input2 = MakeTensor<T, 2>(input2TensorInfo, QuantizedVector<T>(
         {
             // Batch 1
             16.0f, 17.0f, 18.0f,
         },
         qScale, qOffset));

     TensorInfo outputTensorInfo({ 6, 3 }, ArmnnType, qScale, qOffset);
     LayerTestResult<T, 2> result(outputTensorInfo);

     std::vector<T> output;
     output.resize(outputTensorInfo.GetNumElements());
     Concatenate<T>(workloadFactory, memoryManager, tensorHandleFactory,
                    { input0TensorInfo, input1TensorInfo, input2TensorInfo },
                    { input0.data(), input1.data(), input2.data() },
                    outputTensorInfo,
                    output.data(),
                    0,
                    true);

     result.output = MakeTensor<T, 2>(outputTensorInfo, output);
     result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(
         {
             // Batch 0
             1.0f, 2.0f, 3.0f,

             // Batch 1
             10.0f, 11.0f, 12.0f,

             // Batch 2
             4.0f, 5.0f, 6.0f,

             // Batch 3
             13.0f, 14.0f, 15.0f,

             // Batch 4
             7.0f, 8.0f, 9.0f,

             // Batch 5
             16.0f, 17.0f, 18.0f,
         },
         qScale, qOffset));

     return result;
 }

 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
 LayerTestResult<T, 2> Concat2dDim1DiffInputDimsTestImpl(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     float qScale,
     int32_t qOffset)
 {
     TensorInfo input0TensorInfo({ 2, 3 }, ArmnnType, qScale, qOffset);
     auto input0 = MakeTensor<T, 2>(input0TensorInfo, QuantizedVector<T>(
         {
             // Batch 0
             1.0f, 2.0f, 3.0f,

             // Batch 1
             10.0f, 11.0f, 12.0f,
         },
         qScale, qOffset));

     TensorInfo input1TensorInfo({ 2, 5 }, ArmnnType, qScale, qOffset);
     auto input1 = MakeTensor<T, 2>(input1TensorInfo, QuantizedVector<T>(
         {
             // Batch 0
             4.0f, 5.0f, 6.0f, 7.0f, 8.0f,

             // Batch 1
             13.0f, 14.0f, 15.0f, 16.0f, 17.0f,
         },
         qScale, qOffset));

     TensorInfo input2TensorInfo({ 2, 1 }, ArmnnType, qScale, qOffset);
     auto input2 = MakeTensor<T, 2>(input2TensorInfo, QuantizedVector<T>(
         {
             // Batch 0
             9.0f,

             // Batch 1
             18.0f
         },
         qScale, qOffset));

     TensorInfo outputTensorInfo({ 2, 9 }, ArmnnType, qScale, qOffset);
     LayerTestResult<T, 2> result(outputTensorInfo);

     std::vector<T> output;
     output.resize(outputTensorInfo.GetNumElements());
     Concatenate<T>(workloadFactory, memoryManager, tensorHandleFactory,
                    { input0TensorInfo, input1TensorInfo, input2TensorInfo },
                    { input0.data(), input1.data(), input2.data() },
                    outputTensorInfo,
                    output.data(),
                    1,
                    true);

     result.output = MakeTensor<T, 2>(outputTensorInfo, output);
     result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(
         {
             // Batch 0
             1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,

             // Batch 1
             10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f,
         },
         qScale, qOffset));

     return result;
 }

 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
 LayerTestResult<T, 3> Concat3dTestImpl(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     const TensorInfo& outputTensorInfo,
     unsigned int dimension,
     bool useSubtensor,
     float qScale,
     int32_t qOffset)
 {
     TensorInfo inputTensorInfo({ 2, 3, 2 }, ArmnnType, qScale, qOffset);

     auto input0 = MakeTensor<T, 3>(inputTensorInfo, QuantizedVector<T>(
         {
             // Batch 0, Channel 0
             1.0f, 2.0f,

             // Batch 0, Channel 1
             3.0f, 4.0f,

             // Batch 0, Channel 2
             5.0f, 6.0f,

             // Batch 1, Channel 0
             19.0f, 20.0f,

             // Batch 1, Channel 1
             21.0f, 22.0f,

             // Batch 1, Channel 2
             23.0f, 24.0f
         },
         qScale, qOffset));

     auto input1 = MakeTensor<T, 3>(inputTensorInfo, QuantizedVector<T>(
         {
             // Batch 0, Channel 0
             7.0f, 8.0f,

             // Batch 0, Channel 1
             9.0f, 10.0f,

             // Batch 0, Channel 2
             11.0f, 12.0f,

             // Batch 1, Channel 0
             25.0f, 26.0f,

             // Batch 1, Channel 1
             27.0f, 28.0f,

             // Batch 1, Channel 2
             29.0f, 30.0f
         },
         qScale, qOffset));

     auto input2 = MakeTensor<T, 3>(inputTensorInfo, QuantizedVector<T>(
         {
             // Batch 0, Channel 0
             13.0f, 14.0f,

             // Batch 0, Channel 1
             15.0f, 16.0f,

             // Batch 0, Channel 2
             17.0f, 18.0f,

             // Batch 1, Channel 0
             31.0f, 32.0f,

             // Batch 1, Channel 1
             33.0f, 34.0f,

             // Batch 1, Channel 2
             35.0f, 36.0f
         },
         qScale, qOffset));

     LayerTestResult<T, 3> result(outputTensorInfo);

     std::vector<T> output;
     output.resize(outputTensorInfo.GetNumElements());
     Concatenate<T>(workloadFactory, memoryManager, tensorHandleFactory,
                    { inputTensorInfo, inputTensorInfo, inputTensorInfo },
                    { input0.data(), input1.data(), input2.data() },
                    outputTensorInfo,
                    output.data(),
                    dimension,
                    useSubtensor);

     result.output = MakeTensor<T, 3>(outputTensorInfo, output);
     return result;
 }

 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
 LayerTestResult<T, 3> Concat3dDim0TestImpl(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     float qScale,
     int32_t qOffset)
 {
     TensorInfo outputTensorInfo({ 6, 3, 2 }, ArmnnType, qScale, qOffset);

     LayerTestResult<T, 3> result = Concat3dTestImpl<ArmnnType>(
         workloadFactory, memoryManager, tensorHandleFactory, outputTensorInfo, 0, true, qScale, qOffset);

     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(
         {
             // Batch 0, Channel 0
             1.0f, 2.0f,

             // Batch 0, Channel 1
             3.0f, 4.0f,

             // Batch 0, Channel 2
             5.0f, 6.0f,

             // Batch 1, Channel 0
             19.0f, 20.0f,

             // Batch 1, Channel 1
             21.0f, 22.0f,

             // Batch 1, Channel 2
             23.0f, 24.0f,

             // Batch 2, Channel 0
             7.0f, 8.0f,

             // Batch 2, Channel 1
             9.0f, 10.0f,

             // Batch 2, Channel 2
             11.0f, 12.0f,

             // Batch 3, Channel 0
             25.0f, 26.0f,

             // Batch 3, Channel 1
             27.0f, 28.0f,

             // Batch 3, Channel 2
             29.0f, 30.0f,

             // Batch 4, Channel 0
             13.0f, 14.0f,

             // Batch 4, Channel 1
             15.0f, 16.0f,

             // Batch 4, Channel 2
             17.0f, 18.0f,

             // Batch 5, Channel 0
             31.0f, 32.0f,

             // Batch 5, Channel 1
             33.0f, 34.0f,

             // Batch 5, Channel 2
             35.0f, 36.0f
         },
         qScale, qOffset));

     return result;
 }

 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
 LayerTestResult<T, 3> Concat3dDim1TestImpl(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     float qScale,
     int32_t qOffset)
 {
     TensorInfo outputTensorInfo({ 2, 9, 2 }, ArmnnType, qScale, qOffset);

     LayerTestResult<T, 3> result = Concat3dTestImpl<ArmnnType>(
         workloadFactory, memoryManager, tensorHandleFactory, outputTensorInfo, 1, true, qScale, qOffset);

     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(
         {
             // Batch 0, Channel 0
             1.0f, 2.0f,

             // Batch 0, Channel 1
             3.0f, 4.0f,

             // Batch 0, Channel 2
             5.0f, 6.0f,

             // Batch 0, Channel 3
             7.0f, 8.0f,

             // Batch 0, Channel 4
             9.0f, 10.0f,

             // Batch 0, Channel 5
             11.0f, 12.0f,

             // Batch 0, Channel 6
             13.0f, 14.0f,

             // Batch 0, Channel 7
             15.0f, 16.0f,

             // Batch 0, Channel 8
             17.0f, 18.0f,

             // Batch 1, Channel 0
             19.0f, 20.0f,

             // Batch 1, Channel 1
             21.0f, 22.0f,

             // Batch 1, Channel 2
             23.0f, 24.0f,

             // Batch 1, Channel 3
             25.0f, 26.0f,

             // Batch 1, Channel 4
             27.0f, 28.0f,

             // Batch 1, Channel 5
             29.0f, 30.0f,

             // Batch 1, Channel 6
             31.0f, 32.0f,

             // Batch 1, Channel 7
             33.0f, 34.0f,

             // Batch 1, Channel 8
             35.0f, 36.0f
         },
         qScale, qOffset));

     return result;
 }

 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
 LayerTestResult<T, 3> Concat3dDim2TestImpl(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     bool useSubtensor,
     float qScale,
     int32_t qOffset)
 {
     TensorInfo outputTensorInfo({ 2, 3, 6 }, ArmnnType, qScale, qOffset);

     LayerTestResult<T, 3> result = Concat3dTestImpl<ArmnnType>(
         workloadFactory, memoryManager, tensorHandleFactory, outputTensorInfo, 2, useSubtensor, qScale, qOffset);

     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(
         {
             // Batch 0, Channel 0
             1.0f, 2.0f, 7.0f, 8.0f, 13.0f, 14.0f,

             // Batch 0, Channel 1
             3.0f, 4.0f, 9.0f, 10.0f, 15.0f, 16.0f,

             // Batch 0, Channel 2
             5.0f, 6.0f, 11.0f, 12.0f, 17.0f, 18.0f,

             // Batch 1, Channel 0
             19.0f, 20.0f, 25.0f, 26.0f, 31.0f, 32.0f,

             // Batch 1, Channel 1
             21.0f, 22.0f, 27.0f, 28.0f, 33.0f, 34.0f,

             // Batch 1, Channel 2
             23.0f, 24.0f, 29.0f, 30.0f, 35.0f, 36.0f,
         },
         qScale, qOffset));

     return result;
 }

 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
 LayerTestResult<T, 3> Concat3dDim0DiffInputDimsTestImpl(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     float qScale,
     int32_t qOffset)
 {
     TensorInfo input0TensorInfo({ 2, 3, 2 }, ArmnnType);
     auto input0 = MakeTensor<T, 3>(input0TensorInfo, QuantizedVector<T>(
         {
             // Batch 0, Channel 0
             1.0f, 2.0f,

             // Batch 0, Channel 1
             3.0f, 4.0f,

             // Batch 0, Channel 2
             5.0f, 6.0f,

             // Batch 1, Channel 0
             19.0f, 20.0f,

             // Batch 1, Channel 1
             21.0f, 22.0f,

             // Batch 1, Channel 2
             23.0f, 24.0f
         },
         qScale, qOffset));

     TensorInfo input1TensorInfo({ 1, 3, 2 }, ArmnnType);
     auto input1 = MakeTensor<T, 3>(input1TensorInfo, QuantizedVector<T>(
         {
             // Batch 0, Channel 0
             7.0f, 8.0f,

             // Batch 0, Channel 1
             9.0f, 10.0f,

             // Batch 0, Channel 2
             11.0f, 12.0f,
         },
         qScale, qOffset));

     TensorInfo input2TensorInfo({ 3, 3, 2 }, ArmnnType);
     auto input2 = MakeTensor<T, 3>(input2TensorInfo, QuantizedVector<T>(
         {
             // Batch 0, Channel 0
             25.0f, 26.0f,

             // Batch 0, Channel 1
             27.0f, 28.0f,

             // Batch 0, Channel 2
             29.0f, 30.0f,

             // Batch 1, Channel 0
             13.0f, 14.0f,

             // Batch 1, Channel 1
             15.0f, 16.0f,

             // Batch 1, Channel 2
             17.0f, 18.0f,

             // Batch 2, Channel 0
             31.0f, 32.0f,

             // Batch 2, Channel 1
             33.0f, 34.0f,

             // Batch 2, Channel 2
             35.0f, 36.0f
         },
         qScale, qOffset));

     TensorInfo outputTensorInfo({ 6, 3, 2 }, ArmnnType);
     LayerTestResult<T, 3> result(outputTensorInfo);

     std::vector<T> output;
     output.resize(outputTensorInfo.GetNumElements());
     Concatenate<T>(workloadFactory, memoryManager, tensorHandleFactory,
                    { input0TensorInfo, input1TensorInfo, input2TensorInfo },
                    { input0.data(), input1.data(), input2.data() },
                    outputTensorInfo,
                    output.data(),
                    0,
                    true);

     result.output = MakeTensor<T, 3>(outputTensorInfo, output);
     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(
         {
             // Batch 0, Channel 0
             1.0f, 2.0f,

             // Batch 0, Channel 1
             3.0f, 4.0f,

             // Batch 0, Channel 2
             5.0f, 6.0f,

             // Batch 1, Channel 0
             19.0f, 20.0f,

             // Batch 1, Channel 1
             21.0f, 22.0f,

             // Batch 1, Channel 2
             23.0f, 24.0f,

             // Batch 2, Channel 0
             7.0f, 8.0f,

             // Batch 2, Channel 1
             9.0f, 10.0f,

             // Batch 2, Channel 2
             11.0f, 12.0f,

             // Batch 3, Channel 0
             25.0f, 26.0f,

             // Batch 3, Channel 1
             27.0f, 28.0f,

             // Batch 3, Channel 2
             29.0f, 30.0f,

             // Batch 4, Channel 0
             13.0f, 14.0f,

             // Batch 4, Channel 1
             15.0f, 16.0f,

             // Batch 4, Channel 2
             17.0f, 18.0f,

             // Batch 5, Channel 0
             31.0f, 32.0f,

             // Batch 5, Channel 1
             33.0f, 34.0f,

             // Batch 5, Channel 2
             35.0f, 36.0f
         },
         qScale, qOffset));

     return result;
 }

 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
 LayerTestResult<T, 3> Concat3dDim1DiffInputDimsTestImpl(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     float qScale,
     int32_t qOffset)
 {
     TensorInfo input0TensorInfo({ 2, 3, 2 }, ArmnnType, qScale, qOffset);
     auto input0 = MakeTensor<T, 3>(input0TensorInfo, QuantizedVector<T>(
         {
             // Batch 0, Channel 0
             1.0f, 2.0f,

             // Batch 0, Channel 1
             3.0f, 4.0f,

             // Batch 0, Channel 2
             5.0f, 6.0f,

             // Batch 1, Channel 0
             19.0f, 20.0f,

             // Batch 1, Channel 1
             21.0f, 22.0f,

             // Batch 1, Channel 2
             23.0f, 24.0f
         },
         qScale, qOffset));

     TensorInfo input1TensorInfo({ 2, 4, 2 }, ArmnnType, qScale, qOffset);
     auto input1 = MakeTensor<T, 3>(input1TensorInfo, QuantizedVector<T>(
         {
             // Batch 0, Channel 0
             7.0f, 8.0f,

             // Batch 0, Channel 1
             9.0f, 10.0f,

             // Batch 0, Channel 2
             11.0f, 12.0f,

             // Batch 0, Channel 3
             25.0f, 26.0f,

             // Batch 1, Channel 0
             27.0f, 28.0f,

             // Batch 1, Channel 1
             29.0f, 30.0f,

             // Batch 1, Channel 2
             13.0f, 14.0f,

             // Batch 1, Channel 3
             15.0f, 16.0f,
         },
         qScale, qOffset));

     TensorInfo input2TensorInfo({ 2, 1, 2 }, ArmnnType, qScale, qOffset);
     auto input2 = MakeTensor<T, 3>(input2TensorInfo, QuantizedVector<T>(
         {
             // Batch 0, Channel 0
             17.0f, 18.0f,

             // Batch 1, Channel 0
             31.0f, 32.0f,
         },
         qScale, qOffset));

     TensorInfo outputTensorInfo({ 2, 8, 2 }, ArmnnType, qScale, qOffset);
     LayerTestResult<T, 3> result(outputTensorInfo);

     std::vector<T> output;
     output.resize(outputTensorInfo.GetNumElements());
     Concatenate<T>(workloadFactory, memoryManager, tensorHandleFactory,
                    { input0TensorInfo, input1TensorInfo, input2TensorInfo },
                    { input0.data(), input1.data(), input2.data() },
                    outputTensorInfo,
                    output.data(),
                    1,
                    true);

     result.output = MakeTensor<T, 3>(outputTensorInfo, output);
     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(
         {
             // Batch 0, Channel 0
             1.0f, 2.0f,

             // Batch 0, Channel 1
             3.0f, 4.0f,

             // Batch 0, Channel 2
             5.0f, 6.0f,

             // Batch 0, Channel 3
             7.0f, 8.0f,

             // Batch 0, Channel 4
             9.0f, 10.0f,

             // Batch 0, Channel 5
             11.0f, 12.0f,

             // Batch 0, Channel 6
             25.0f, 26.0f,

             // Batch 0, Channel 7
             17.0f, 18.0f,

             // Batch 1, Channel 0
             19.0f, 20.0f,

             // Batch 1, Channel 1
             21.0f, 22.0f,

             // Batch 1, Channel 2
             23.0f, 24.0f,

             // Batch 1, Channel 3
             27.0f, 28.0f,

             // Batch 1, Channel 4
             29.0f, 30.0f,

             // Batch 1, Channel 5
             13.0f, 14.0f,

             // Batch 1, Channel 6
             15.0f, 16.0f,

             // Batch 1, Channel 7
             31.0f, 32.0f,
         },
         qScale, qOffset));

     return result;
 }

 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
 LayerTestResult<T, 3> Concat3dDim2DiffInputDimsTestImpl(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     bool useSubtensor,
     float qScale,
     int32_t qOffset)
 {
     TensorInfo input0TensorInfo({ 2, 3, 2 }, ArmnnType, qScale, qOffset);
     auto input0 = MakeTensor<T, 3>(input0TensorInfo, QuantizedVector<T>(
         {
             // Batch 0, Channel 0
             1.0f, 2.0f,

             // Batch 0, Channel 1
             3.0f, 4.0f,

             // Batch 0, Channel 2
             5.0f, 6.0f,

             // Batch 1, Channel 0
             19.0f, 20.0f,

             // Batch 1, Channel 1
             21.0f, 22.0f,

             // Batch 1, Channel 2
             23.0f, 24.0f
         },
         qScale, qOffset));

     TensorInfo input1TensorInfo({ 2, 3, 1 }, ArmnnType, qScale, qOffset);
     auto input1 = MakeTensor<T, 3>(input1TensorInfo, QuantizedVector<T>(
         {
             // Batch 0, Channel 0
             7.0f,

             // Batch 0, Channel 1
             9.0f,

             // Batch 0, Channel 2
             11.0f,

             // Batch 1, Channel 0
             25.0f,

             // Batch 1, Channel 1
             27.0f,

             // Batch 1, Channel 2
             29.0f
         },
         qScale, qOffset));

     TensorInfo input2TensorInfo({ 2, 3, 3 }, ArmnnType, qScale, qOffset);
     auto input2 = MakeTensor<T, 3>(input2TensorInfo, QuantizedVector<T>(
         {
             // Batch 0, Channel 0
             13.0f, 14.0f, 50.0f,

             // Batch 0, Channel 1
             15.0f, 16.0f, 51.0f,

             // Batch 0, Channel 2
             17.0f, 18.0f, 52.0f,

             // Batch 1, Channel 0
             31.0f, 32.0f, 53.0f,

             // Batch 1, Channel 1
             33.0f, 34.0f, 54.0f,

             // Batch 1, Channel 2
             35.0f, 36.0f, 55.0f,
         },
         qScale, qOffset));

     TensorInfo outputTensorInfo({ 2, 3, 6 }, ArmnnType, qScale, qOffset);
     LayerTestResult<T, 3> result(outputTensorInfo);

     std::vector<T> output;
     output.resize(outputTensorInfo.GetNumElements());
     Concatenate<T>(workloadFactory, memoryManager, tensorHandleFactory,
                    { input0TensorInfo, input1TensorInfo, input2TensorInfo },
                    { input0.data(), input1.data(), input2.data() },
                    outputTensorInfo,
                    output.data(),
                    2,
                    useSubtensor);

     result.output = MakeTensor<T, 3>(outputTensorInfo, output);
     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(
         {
             // Batch 0, Channel 0
             1.0f, 2.0f, 7.0f, 13.0f, 14.0f, 50.0f,

             // Batch 0, Channel 1
             3.0f, 4.0f, 9.0f, 15.0f, 16.0f, 51.0f,

             // Batch 0, Channel 2
             5.0f, 6.0f, 11.0f, 17.0f, 18.0f, 52.0f,

             // Batch 1, Channel 0
             19.0f, 20.0f, 25.0f, 31.0f, 32.0f, 53.0f,

             // Batch 1, Channel 1
             21.0f, 22.0f, 27.0f, 33.0f, 34.0f, 54.0f,

             // Batch 1, Channel 2
             23.0f, 24.0f, 29.0f, 35.0f, 36.0f, 55.0f,
         },
         qScale, qOffset));

     return result;
 }

 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
 LayerTestResult<T, 4> Concat4dTestImpl(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     const TensorInfo& outputTensorInfo,
     unsigned int dimension,
     bool useSubtensor,
     float qScale,
     int32_t qOffset)
 {
     TensorInfo inputTensorInfo({ 1, 3, 2, 2 }, ArmnnType, qScale, qOffset);

     auto input0 = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(
         {
              1.0f,  2.0f,
              3.0f,  4.0f,
              5.0f,  6.0f,
              7.0f,  8.0f,
              9.0f, 10.0f,
             11.0f, 12.0f
         },
         qScale, qOffset));

     auto input1 = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(
         {
             11.0f, 12.0f,
             13.0f, 14.0f,
             15.0f, 16.0f,
             17.0f, 18.0f,
             19.0f, 20.0f,
             21.0f, 22.0f
         },
         qScale, qOffset));

     auto input2 = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(
         {
             21.0f, 22.0f,
             23.0f, 24.0f,
             25.0f, 26.0f,
             27.0f, 28.0f,
             29.0f, 30.0f,
             31.0f, 32.0f
         },
         qScale, qOffset));

     LayerTestResult<T, 4> result(outputTensorInfo);

     std::vector<T> output;
     output.resize(outputTensorInfo.GetNumElements());

     Concatenate<T>(workloadFactory,
                    memoryManager,
                    tensorHandleFactory,
                    {inputTensorInfo, inputTensorInfo, inputTensorInfo},
                    {input0.data(), input1.data(), input2.data()},
                    outputTensorInfo,
                    output.data(),
                    dimension,
                    useSubtensor);

     result.output = MakeTensor<T, 4>(outputTensorInfo, output);
     return result;
 }

 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
 LayerTestResult<T, 4> Concat4dDim0TestImpl(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     float qScale,
     int32_t qOffset)
 {
     TensorInfo outputTensorInfo({ 3, 3, 2, 2 }, ArmnnType, qScale, qOffset);

     LayerTestResult<T, 4> result = Concat4dTestImpl<ArmnnType>(
         workloadFactory, memoryManager, tensorHandleFactory, outputTensorInfo, 0, true, qScale, qOffset);

     result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(
         {
              1.0f,  2.0f,
              3.0f,  4.0f,
              5.0f,  6.0f,
              7.0f,  8.0f,
              9.0f, 10.0f,
             11.0f, 12.0f,

             11.0f, 12.0f,
             13.0f, 14.0f,
             15.0f, 16.0f,
             17.0f, 18.0f,
             19.0f, 20.0f,
             21.0f, 22.0f,

             21.0f, 22.0f,
             23.0f, 24.0f,
             25.0f, 26.0f,
             27.0f, 28.0f,
             29.0f, 30.0f,
             31.0f, 32.0f
         },
         qScale, qOffset));

     return result;
 }

 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
 LayerTestResult<T, 4> Concat4dDim1TestImpl(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     float qScale,
     int32_t qOffset)
 {
     TensorInfo outputTensorInfo({ 1, 9, 2, 2 }, ArmnnType, qScale, qOffset);

     LayerTestResult<T, 4> result = Concat4dTestImpl<ArmnnType>(
         workloadFactory, memoryManager, tensorHandleFactory, outputTensorInfo, 1, true, qScale, qOffset);

     result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(
         {
              1.0f,  2.0f,
              3.0f,  4.0f,
              5.0f,  6.0f,
              7.0f,  8.0f,
              9.0f, 10.0f,
             11.0f, 12.0f,

             11.0f, 12.0f,
             13.0f, 14.0f,
             15.0f, 16.0f,
             17.0f, 18.0f,
             19.0f, 20.0f,
             21.0f, 22.0f,

             21.0f, 22.0f,
             23.0f, 24.0f,
             25.0f, 26.0f,
             27.0f, 28.0f,
             29.0f, 30.0f,
             31.0f, 32.0f
         },
         qScale, qOffset));

     return result;
 }

 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
 LayerTestResult<T, 4> Concat4dDim2TestImpl(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     float qScale,
     int32_t qOffset)
 {
     TensorInfo outputTensorInfo({ 1, 3, 6, 2 }, ArmnnType, qScale, qOffset);

     LayerTestResult<T, 4> result = Concat4dTestImpl<ArmnnType>(
         workloadFactory, memoryManager, tensorHandleFactory, outputTensorInfo, 2, true, qScale, qOffset);

     result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(
         {
              1.0f,  2.0f,
              3.0f,  4.0f,
             11.0f, 12.0f,
             13.0f, 14.0f,
             21.0f, 22.0f,
             23.0f, 24.0f,

              5.0f,  6.0f,
              7.0f,  8.0f,
             15.0f, 16.0f,
             17.0f, 18.0f,
             25.0f, 26.0f,
             27.0f, 28.0f,

              9.0f, 10.0f,
             11.0f, 12.0f,
             19.0f, 20.0f,
             21.0f, 22.0f,
             29.0f, 30.0f,
             31.0f, 32.0f
         },
         qScale, qOffset));

     return result;
 }

 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
 LayerTestResult<T, 4> Concat4dDim3TestImpl(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     float qScale,
     int32_t qOffset,
     bool useSubtensor)
 {
     TensorInfo outputTensorInfo({ 1, 3, 2, 6 }, ArmnnType, qScale, qOffset);

     LayerTestResult<T, 4> result = Concat4dTestImpl<ArmnnType>(
         workloadFactory, memoryManager, tensorHandleFactory, outputTensorInfo, 3, useSubtensor, qScale, qOffset);

     result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(
         {
              1.0f,  2.0f,
             11.0f, 12.0f,
             21.0f, 22.0f,
              3.0f,  4.0f,
             13.0f, 14.0f,
             23.0f, 24.0f,

              5.0f,  6.0f,
             15.0f, 16.0f,
             25.0f, 26.0f,
              7.0f,  8.0f,
             17.0f, 18.0f,
             27.0f, 28.0f,

              9.0f, 10.0f,
             19.0f, 20.0f,
             29.0f, 30.0f,
             11.0f, 12.0f,
             21.0f, 22.0f,
             31.0f, 32.0f
         },
         qScale, qOffset));

     return result;
 }

 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
 LayerTestResult<T, 4> Concat4dDiffShapeDim0TestImpl(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     float qScale,
     int32_t qOffset)
 {
     constexpr unsigned int dimension = 0u;

     TensorInfo inputTensorInfo0({ 1, 3, 2, 2 }, ArmnnType, qScale, qOffset);
     auto input0 = MakeTensor<T, 4>(inputTensorInfo0, QuantizedVector<T>(
         {
              1.0f,  2.0f,
              3.0f,  4.0f,
              5.0f,  6.0f,
              7.0f,  8.0f,
              9.0f, 10.0f,
             11.0f, 12.0f
         },
         qScale, qOffset));

     TensorInfo inputTensorInfo1({ 2, 3, 2, 2 }, ArmnnType, qScale, qOffset);

     auto input1 = MakeTensor<T, 4>(inputTensorInfo1, QuantizedVector<T>(
         {
             11.0f, 12.0f,
             13.0f, 14.0f,
             15.0f, 16.0f,
             17.0f, 18.0f,
             19.0f, 20.0f,
             21.0f, 22.0f,

             21.0f, 22.0f,
             23.0f, 24.0f,
             25.0f, 26.0f,
             27.0f, 28.0f,
             29.0f, 30.0f,
             31.0f, 32.0f
         },
         qScale, qOffset));

     TensorInfo outputTensorInfo({ 3, 3, 2, 2 }, ArmnnType, qScale, qOffset);

     LayerTestResult<T, 4> result(outputTensorInfo);

     std::vector<T> output;
     output.resize(outputTensorInfo.GetNumElements());
     Concatenate<T>(workloadFactory,
                    memoryManager,
                    tensorHandleFactory,
                    {inputTensorInfo0, inputTensorInfo1},
                    {input0.data(), input1.data()},
                    outputTensorInfo,
                    output.data(),
                    dimension,
                    true);

     result.output = MakeTensor<T, 4>(outputTensorInfo, output);
     result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(
         {
              1.0f, 2.0f,
              3.0f, 4.0f,
              5.0f, 6.0f,
              7.0f, 8.0f,
              9.0f, 10.0f,
             11.0f, 12.0f,

             11.0f, 12.0f,
             13.0f, 14.0f,
             15.0f, 16.0f,
             17.0f, 18.0f,
             19.0f, 20.0f,
             21.0f, 22.0f,

             21.0f, 22.0f,
             23.0f, 24.0f,
             25.0f, 26.0f,
             27.0f, 28.0f,
             29.0f, 30.0f,
             31.0f, 32.0f
         },
         qScale, qOffset));

     return result;
 }

 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
 LayerTestResult<T, 4> Concat4dDiffShapeDim1TestImpl(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     float qScale,
     int32_t qOffset)
 {
     constexpr unsigned int dimension = 1u;

     TensorInfo inputTensorInfo0({ 1, 3, 2, 2 }, ArmnnType, qScale, qOffset);
     auto input0 = MakeTensor<T, 4>(inputTensorInfo0, QuantizedVector<T>(
         {
              1.0f,  2.0f,
              3.0f,  4.0f,
              5.0f,  6.0f,
              7.0f,  8.0f,
              9.0f, 10.0f,
             11.0f, 12.0f
         },
         qScale, qOffset));

     TensorInfo inputTensorInfo1({ 1, 2, 2, 2 }, ArmnnType, qScale, qOffset);

     auto input1 = MakeTensor<T, 4>(inputTensorInfo1, QuantizedVector<T>(
         {
             11.0f, 12.0f,
             13.0f, 14.0f,
             15.0f, 16.0f,
             17.0f, 18.0f,
         },
         qScale, qOffset));

     TensorInfo outputTensorInfo({ 1, 5, 2, 2 }, ArmnnType, qScale, qOffset);

     LayerTestResult<T, 4> result(outputTensorInfo);

     std::vector<T> output;
     output.resize(outputTensorInfo.GetNumElements());
     Concatenate<T>(workloadFactory,
                    memoryManager,
                    tensorHandleFactory,
                    {inputTensorInfo0, inputTensorInfo1},
                    {input0.data(), input1.data()},
                    outputTensorInfo,
                    output.data(),
                    dimension,
                    true);

     result.output = MakeTensor<T, 4>(outputTensorInfo, output);
     result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(
         {
              1.0f,  2.0f,
              3.0f,  4.0f,
              5.0f,  6.0f,
              7.0f,  8.0f,
              9.0f, 10.0f,
             11.0f, 12.0f,
             11.0f, 12.0f,
             13.0f, 14.0f,
             15.0f, 16.0f,
             17.0f, 18.0f
         },
         qScale, qOffset));

     return result;
 }

 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
 LayerTestResult<T, 4> Concat4dDiffShapeDim2TestImpl(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     float qScale,
     int32_t qOffset)
 {
     constexpr unsigned int dimension = 2u;

     TensorInfo inputTensorInfo0({ 1, 3, 2, 2 }, ArmnnType, qScale, qOffset);
     auto input0 = MakeTensor<T, 4>(inputTensorInfo0, QuantizedVector<T>(
         {
              1.0f, 2.0f,
              3.0f, 4.0f,
              5.0f, 6.0f,
              7.0f, 8.0f,
             9.0f, 10.0f,
             11.0f, 12.0f
         },
         qScale, qOffset));

     TensorInfo inputTensorInfo1({ 1, 3, 3, 2 }, ArmnnType, qScale, qOffset);
     auto input1 = MakeTensor<T, 4>(inputTensorInfo1, QuantizedVector<T>(
         {
             11.0f, 12.0f,
             13.0f, 14.0f,
             15.0f, 16.0f,
             17.0f, 18.0f,
             19.0f, 20.0f,
             21.0f, 22.0f,
             23.0f, 24.0f,
             25.0f, 26.0f,
             27.0f, 28.0f
         },
         qScale, qOffset));

     TensorInfo outputTensorInfo({ 1, 3, 5, 2 }, ArmnnType, qScale, qOffset);
     LayerTestResult<T, 4> result(outputTensorInfo);

     std::vector<T> output;
     output.resize(outputTensorInfo.GetNumElements());
     Concatenate<T>(workloadFactory,
                    memoryManager,
                    tensorHandleFactory,
                    {inputTensorInfo0, inputTensorInfo1},
                    {input0.data(), input1.data()},
                    outputTensorInfo,
                    output.data(),
                    dimension,
                    true);

     result.output         = MakeTensor<T, 4>(outputTensorInfo, output);
     result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(
         {
              1.0f,  2.0f,
              3.0f,  4.0f,
             11.0f, 12.0f,
             13.0f, 14.0f,
             15.0f, 16.0f,

              5.0f,  6.0f,
              7.0f,  8.0f,
             17.0f, 18.0f,
             19.0f, 20.0f,
             21.0f, 22.0f,

              9.0f, 10.0f,
             11.0f, 12.0f,
             23.0f, 24.0f,
             25.0f, 26.0f,
             27.0f, 28.0f
         },
         qScale, qOffset));

     return result;
 }

 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
 LayerTestResult<T, 4> Concat4dDiffShapeDim3TestImpl(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     float qScale,
     int32_t qOffset,
     bool useSubtensor)
 {
     constexpr unsigned int dimension = 3u;

     TensorInfo inputTensorInfo0({ 1, 3, 2, 2 }, ArmnnType, qScale, qOffset);
     auto input0 = MakeTensor<T, 4>(inputTensorInfo0, QuantizedVector<T>(
         {
              1.0f,  2.0f,
              3.0f,  4.0f,
              5.0f,  6.0f,
              7.0f,  8.0f,
              9.0f, 10.0f,
             11.0f, 12.0f
         },
         qScale, qOffset));

     TensorInfo inputTensorInfo1({ 1, 3, 2, 3 }, ArmnnType, qScale, qOffset);
     auto input1 = MakeTensor<T, 4>(inputTensorInfo1, QuantizedVector<T>(
         {
             11.0f, 12.0f, 13.0f,
             14.0f, 15.0f, 16.0f,

             17.0f, 18.0f, 19.0f,
             20.0f, 21.0f, 22.0f,

             23.0f, 24.0f, 25.0f,
             26.0f, 27.0f, 28.0f
         },
         qScale, qOffset));

     TensorInfo outputTensorInfo({ 1, 3, 2, 5 }, ArmnnType, qScale, qOffset);

     LayerTestResult<T, 4> result(outputTensorInfo);

     std::vector<T> output;
     output.resize(outputTensorInfo.GetNumElements());
     Concatenate<T>(workloadFactory,
                    memoryManager,
                    tensorHandleFactory,
                    {inputTensorInfo0, inputTensorInfo1},
                    {input0.data(), input1.data()},
                    outputTensorInfo,
                    output.data(),
                    dimension,
                    useSubtensor);

     result.output = MakeTensor<T, 4>(outputTensorInfo, output);
     result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(
         {
             1.0f, 2.0f, 11.0f, 12.0f, 13.0f,
             3.0f, 4.0f, 14.0f, 15.0f, 16.0f,
             5.0f, 6.0f, 17.0f, 18.0f, 19.0f,
             7.0f, 8.0f, 20.0f, 21.0f, 22.0f,
             9.0f, 10.0f, 23.0f, 24.0f, 25.0f,
             11.0f, 12.0f, 26.0f, 27.0f, 28.0f
         },
         qScale, qOffset));

     return result;
 }

 template<DataType ArmnnType, typename T>
 LayerTestResult<T, 3> ConcatDifferentInputOutputQParamTest(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     bool useSubtensor)
 {
     IgnoreUnused(memoryManager);

     // Defines the tensor descriptors.
     TensorInfo outputTensorInfo({ 3, 6, 3 }, ArmnnType);
     TensorInfo inputTensorInfo1({ 3, 6, 2 }, ArmnnType);
     TensorInfo inputTensorInfo2({ 3, 6, 1 }, ArmnnType);

     std::vector<TensorShape> inputTensorShapes({inputTensorInfo1.GetShape(), inputTensorInfo2.GetShape()});

     // Quantized input1 tensor.
     const float inputScale1 = 0.5f;
     const int32_t inputOffset1 = 5;

     auto input1 = MakeTensor<T, 3>(inputTensorInfo1, std::vector<T>(
     {
         1, 2, 3,
         4, 5, 6,
         7, 8, 9,
         10, 11, 12,
         13, 14, 15,
         16, 17, 18,

         19, 20, 21,
         22, 23, 24,
         25, 26, 27,
         28, 29, 30,
         31, 32, 33,
         34, 35, 36
     }));

     // Quatized input2 tensor.
     const float inputScale2 = 0.2f;
     const int32_t inputOffset2 = 10;

     auto input2 = MakeTensor<T, 3>(inputTensorInfo2, std::vector<T>(
     {
         37, 38, 39,
         40, 41, 42,
         43, 44, 45,
         46, 47, 48,
         49, 50, 51,
         52, 53, 54
     }));

     // Quantized output tensor.
     const float outputScale = 0.1f;
     const int32_t outputOffset = 20;

     LayerTestResult<T, 3> ret(outputTensorInfo);

     ret.outputExpected = MakeTensor<T, 3>(outputTensorInfo, std::vector<T>(
     {
         0,   5,  74,
         10,  15,  76,
         20,  25,  78,
         30,  35,  80,
         40,  45,  82,
         50,  55,  84,

         60,  65,  86,
         70,  75,  88,
         80,  85,  90,
         90,  95,  92,
         100, 105,  94,
         110, 115,  96,

         120, 125,  98,
         130, 135, 100,
         140, 145, 102,
         150, 155, 104,
         160, 165, 106,
         170, 175, 108
     }));

     outputTensorInfo.SetQuantizationScale(outputScale);
     outputTensorInfo.SetQuantizationOffset(outputOffset);
     inputTensorInfo1.SetQuantizationScale(inputScale1);
     inputTensorInfo1.SetQuantizationOffset(inputOffset1);
     inputTensorInfo2.SetQuantizationScale(inputScale2);
     inputTensorInfo2.SetQuantizationOffset(inputOffset2);

     std::vector<unsigned int> wOrigin1 = { 0, 0, 0 }; //Extent of the window is defined by size of input[0].
     ConcatQueueDescriptor::ViewOrigin window1(wOrigin1);

     std::vector<unsigned int> wOrigin2 = { 0, 0, 2 }; //Extent of the window is defined by size of input[1].
     ConcatQueueDescriptor::ViewOrigin window2(wOrigin2);

     std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);

     bool subTensorsSupported = useSubtensor && workloadFactory.SupportsSubTensors();

     std::unique_ptr<ITensorHandle> inputHandle1 =
             subTensorsSupported ?
             tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) :
             tensorHandleFactory.CreateTensorHandle(inputTensorInfo1);

     std::unique_ptr<ITensorHandle> inputHandle2 =
             subTensorsSupported ?
             tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) :
             tensorHandleFactory.CreateTensorHandle(inputTensorInfo2);

     ConcatQueueDescriptor data;
     OriginsDescriptor desc = CreateDescriptorForConcatenation(
             inputTensorShapes.begin(),inputTensorShapes.end(), 2);
     data.m_Parameters = desc;

     WorkloadInfo info;
     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());

     data.m_ViewOrigins.push_back(window1);
     data.m_ViewOrigins.push_back(window2);

     std::unique_ptr<IWorkload> workload = workloadFactory.CreateConcat(data, info);

     inputHandle1->Allocate();
     inputHandle2->Allocate();
     outputHandle->Allocate();

     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]);
     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]);

     workload->PostAllocationConfigure();
     workload->Execute();

     CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get());

     return ret;
 }

 //
 // Explicit template specializations
 //

 template LayerTestResult<ResolveType<DataType::QAsymmU8>, 3>
 ConcatDifferentInputOutputQParamTest<DataType::QAsymmU8>(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     bool useSubtensor);

 template LayerTestResult<ResolveType<DataType::QSymmS16>, 3>
 ConcatDifferentInputOutputQParamTest<DataType::QSymmS16>(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     bool useSubtensor);

 //
 // Implementation functions
 //

 LayerTestResult<float,3> ConcatTest(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory)
 {
     IgnoreUnused(memoryManager);

     unsigned int outputWidth = 3;
     unsigned int outputHeight = 6;
     unsigned int outputChannels = 3;

     unsigned int inputWidth1 = 3;
     unsigned int inputHeight1 = 6;
     unsigned int inputChannels1 = 2;

     unsigned int inputWidth2 = 3;
     unsigned int inputHeight2 = 6;
     unsigned int inputChannels2 = 1;

     // Define the tensor descriptors.
     TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, DataType::Float32);
     TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, DataType::Float32);
     TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, DataType::Float32);

     LayerTestResult<float,3> ret(outputTensorInfo);

     ret.outputExpected = MakeTensor<float, 3>(outputTensorInfo, std::vector<float>(
     {
             1.0f, 2.0f, 3.0f,
             4.0f, 5.0f, 6.0f,
             7.0f, 8.0f, 9.0f,
             10.0f, 11.0f, 12.0f,
             13.0f, 14.0f, 15.0f,
             16.0f, 17.0f, 18.0f,

             19.0f, 20.0f, 21.0f,
             22.0f, 23.0f, 24.0f,
             25.0f, 26.0f, 27.0f,
             28.0f, 29.0f, 30.0f,
             31.0f, 32.0f, 33.0f,
             34.0f, 35.0f, 36.0f,

             37.0f, 38.0f, 39.0f,
             40.0f, 41.0f, 42.0f,
             43.0f, 44.0f, 45.0f,
             46.0f, 47.0f, 48.0f,
             49.0f, 50.0f, 51.0f,
             52.0f, 53.0f, 54.0f,
         })
     );

     auto input1 = MakeTensor<float, 3>(inputTensorInfo1, std::vector<float>(
         {
             1.0f, 2.0f, 3.0f,
             4.0f, 5.0f, 6.0f,
             7.0f, 8.0f, 9.0f,
             10.0f, 11.0f, 12.0f,
             13.0f, 14.0f, 15.0f,
             16.0f, 17.0f, 18.0f,

             19.0f, 20.0f, 21.0f,
             22.0f, 23.0f, 24.0f,
             25.0f, 26.0f, 27.0f,
             28.0f, 29.0f, 30.0f,
             31.0f, 32.0f, 33.0f,
             34.0f, 35.0f, 36.0f,
         })
     );

     auto input2 = MakeTensor<float, 3>(inputTensorInfo2, std::vector<float>(
         {
             37.0f, 38.0f, 39.0f,
             40.0f, 41.0f, 42.0f,
             43.0f, 44.0f, 45.0f,
             46.0f, 47.0f, 48.0f,
             49.0f, 50.0f, 51.0f,
             52.0f, 53.0f, 54.0f,
         })
     );

     std::vector<unsigned int> wOrigin1 = {0, 0, 0}; //Extent of the window is defined by size of input[0].
     ConcatQueueDescriptor::ViewOrigin window1(wOrigin1);

     std::vector<unsigned int> wOrigin2 = {2, 0, 0}; //Extent of the window is defined by size of input[1].
     ConcatQueueDescriptor::ViewOrigin window2(wOrigin2);

     std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);

     bool subTensorsSupported = workloadFactory.SupportsSubTensors();

     std::unique_ptr<ITensorHandle> inputHandle1 =
         subTensorsSupported ?
             tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) :
             tensorHandleFactory.CreateTensorHandle(inputTensorInfo1);

     std::unique_ptr<ITensorHandle> inputHandle2  =
         subTensorsSupported ?
             tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) :
             tensorHandleFactory.CreateTensorHandle(inputTensorInfo2);

     ConcatQueueDescriptor data;
     WorkloadInfo info;
     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());

     data.m_ViewOrigins.push_back(window1);
     data.m_ViewOrigins.push_back(window2);

     std::unique_ptr<IWorkload> workload = workloadFactory.CreateConcat(data, info);

     inputHandle1->Allocate();
     inputHandle2->Allocate();
     outputHandle->Allocate();

     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]);
     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]);

     workload->PostAllocationConfigure();
     workload->Execute();

     CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get());

     return ret;
 }

 LayerTestResult<float, 1> Concat1dTest(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory)
 {
     return Concat1dTestImpl<DataType::Float32>(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0);
 }

 LayerTestResult<float, 2> Concat2dDim0Test(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory)
 {
     return Concat2dDim0TestImpl<DataType::Float32>(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0);
 }

 LayerTestResult<float, 2> Concat2dDim1Test(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory)
 {
     return Concat2dDim1TestImpl<DataType::Float32>(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0);
 }

 LayerTestResult<float, 2> Concat2dDim0DiffInputDimsTest(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory)
 {
     return Concat2dDim0DiffInputDimsTestImpl<DataType::Float32>(workloadFactory, memoryManager,
                                                                 tensorHandleFactory, 0.0f, 0);
 }

 LayerTestResult<float, 2> Concat2dDim1DiffInputDimsTest(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory)
 {
     return Concat2dDim1DiffInputDimsTestImpl<DataType::Float32>(workloadFactory,
                                                                 memoryManager,
                                                                 tensorHandleFactory,
                                                                 0.0f,
                                                                 0);
 }

 LayerTestResult<float, 3> Concat3dDim0Test(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory)
 {
     return Concat3dDim0TestImpl<DataType::Float32>(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0);
 }

 LayerTestResult<float, 3> Concat3dDim1Test(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory)
 {
     return Concat3dDim1TestImpl<DataType::Float32>(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0);
 }

 LayerTestResult<float, 3> Concat3dDim2Test(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     bool useSubtensor)
 {
     return Concat3dDim2TestImpl<DataType::Float32>(workloadFactory, memoryManager, tensorHandleFactory,
                                                    useSubtensor, 0.0f, 0);
 }

 LayerTestResult<float, 3> Concat3dDim0DiffInputDimsTest(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory)
 {
     return Concat3dDim0DiffInputDimsTestImpl<DataType::Float32>(
         workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0);
 }

 LayerTestResult<float, 3> Concat3dDim1DiffInputDimsTest(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory)
 {
     return Concat3dDim1DiffInputDimsTestImpl<DataType::Float32>(workloadFactory, memoryManager,
                                                                 tensorHandleFactory, 0.0f, 0);
 }

 LayerTestResult<float, 3> Concat3dDim2DiffInputDimsTest(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     bool useSubtensor)
 {
     return Concat3dDim2DiffInputDimsTestImpl<DataType::Float32>(
         workloadFactory, memoryManager, tensorHandleFactory, useSubtensor, 0.0f, 0);
 }

 LayerTestResult<float, 4> Concat4dDim0Test(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory)
 {
     return Concat4dDim0TestImpl<DataType::Float32>(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0);
 }

 LayerTestResult<float, 4> Concat4dDim1Test(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory)
 {
     return Concat4dDim1TestImpl<DataType::Float32>(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0);
 }

 LayerTestResult<float, 4> Concat4dDim2Test(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory)
 {
     return Concat4dDim2TestImpl<DataType::Float32>(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0);
 }

 LayerTestResult<float, 4> Concat4dDim3Test(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     bool useSubtensor)
 {
     return Concat4dDim3TestImpl<DataType::Float32>(workloadFactory, memoryManager,
                                                    tensorHandleFactory, 0.0f, 0, useSubtensor);
 }

 LayerTestResult<float, 4> Concat4dDiffShapeDim0Test(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory)
 {
     return Concat4dDiffShapeDim0TestImpl<DataType::Float32>(workloadFactory, memoryManager,
                                                             tensorHandleFactory, 0.0f, 0);
 }

 LayerTestResult<float, 4> Concat4dDiffShapeDim1Test(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory)
 {
     return Concat4dDiffShapeDim1TestImpl<DataType::Float32>(
         workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0);
 }

 LayerTestResult<float, 4> Concat4dDiffShapeDim2Test(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory)
 {
     return Concat4dDiffShapeDim2TestImpl<DataType::Float32>(workloadFactory, memoryManager,
                                                             tensorHandleFactory, 0.0f, 0);
 }

 LayerTestResult<float, 4> Concat4dDiffShapeDim3Test(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     bool useSubtensor)
 {
     return Concat4dDiffShapeDim3TestImpl<DataType::Float32>(
         workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0, useSubtensor);
 }

 LayerTestResult<Half, 3> ConcatFloat16Test(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory)
 {
     return Concat3dDim1TestImpl<DataType::Float16>(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0);
 }

 LayerTestResult<BFloat16, 3> ConcatBFloat16Test(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory)
 {
     return Concat3dDim1TestImpl<DataType::BFloat16>(workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0);
 }

 LayerTestResult<uint8_t, 3> ConcatUint8DifferentQParamsTest(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory)
 {
     IgnoreUnused(memoryManager);

     unsigned int outputWidth = 3;
     unsigned int outputHeight = 6;
     unsigned int outputChannels = 3;

     unsigned int inputWidth1 = 3;
     unsigned int inputHeight1 = 6;
     unsigned int inputChannels1 = 2;

     unsigned int inputWidth2 = 3;
     unsigned int inputHeight2 = 6;
     unsigned int inputChannels2 = 1;

     // Defines the tensor descriptors.
     TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, DataType::QAsymmU8);
     TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, DataType::QAsymmU8);
     TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, DataType::QAsymmU8);

     // Quantized input1 tensor. Range [-3, 1]
     const float inputScale1 = 0.015686f;
     const int32_t inputOffset1 = 192;

     auto input1 = MakeTensor<uint8_t, 3>(inputTensorInfo1, std::vector<uint8_t>(
     {
         1, 2, 3,
         4, 5, 6,
         7, 8, 9,
         10, 11, 12,
         13, 14, 15,
         16, 17, 18,

         19, 20, 21,
         22, 23, 24,
         25, 26, 27,
         28, 29, 30,
         31, 32, 33,
         34, 35, 36,
     })
     );

     // Quatized input2 tensor. Range [-1, 4]
     const float inputScale2 = 0.019608f;
     const int32_t inputOffset2 = 50;

     auto input2 = MakeTensor<uint8_t, 3>(inputTensorInfo2, std::vector<uint8_t>(
     {
         37, 38, 39,
         40, 41, 42,
         43, 44, 45,
         46, 47, 48,
         49, 50, 51,
         52, 53, 54,
     })
     );

     // Output has the same quantization parameters than input1,
     // so that only the requantization of input2 is required
     const float outputScale = 0.015686f;
     const int32_t outputOffset = 192;

     LayerTestResult<uint8_t, 3> ret(outputTensorInfo);

     ret.outputExpected = MakeTensor<uint8_t, 3>(outputTensorInfo, std::vector<uint8_t>(
     {
         1, 2, 3,
         4, 5, 6,
         7, 8, 9,
         10, 11, 12,
         13, 14, 15,
         16, 17, 18,

         19, 20, 21,
         22, 23, 24,
         25, 26, 27,
         28, 29, 30,
         31, 32, 33,
         34, 35, 36,

         176, 177, 178,
         179, 181, 182,
         183, 184, 186,
         187, 188, 189,
         191, 192, 193,
         195, 196, 197,
     })
     );

     outputTensorInfo.SetQuantizationScale(outputScale);
     outputTensorInfo.SetQuantizationOffset(outputOffset);
     inputTensorInfo1.SetQuantizationScale(inputScale1);
     inputTensorInfo1.SetQuantizationOffset(inputOffset1);
     inputTensorInfo2.SetQuantizationScale(inputScale2);
     inputTensorInfo2.SetQuantizationOffset(inputOffset2);

     std::vector<unsigned int> wOrigin1 = { 0, 0, 0 }; //Extent of the window is defined by size of input[0].
     ConcatQueueDescriptor::ViewOrigin window1(wOrigin1);

     std::vector<unsigned int> wOrigin2 = { 2, 0, 0 }; //Extent of the window is defined by size of input[1].
     ConcatQueueDescriptor::ViewOrigin window2(wOrigin2);

     std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);

     bool subTensorsSupported = workloadFactory.SupportsSubTensors();

     std::unique_ptr<ITensorHandle> inputHandle1 =
             subTensorsSupported ?
             tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) :
             tensorHandleFactory.CreateTensorHandle(inputTensorInfo1);

     std::unique_ptr<ITensorHandle> inputHandle2 =
             subTensorsSupported ?
             tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) :
             tensorHandleFactory.CreateTensorHandle(inputTensorInfo2);

     ConcatQueueDescriptor data;
     WorkloadInfo info;
     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());

     data.m_ViewOrigins.push_back(window1);
     data.m_ViewOrigins.push_back(window2);

     std::unique_ptr<IWorkload> workload = workloadFactory.CreateConcat(data, info);

     inputHandle1->Allocate();
     inputHandle2->Allocate();
     outputHandle->Allocate();

     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]);
     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]);

     workload->PostAllocationConfigure();
     workload->Execute();

     CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get());

     return ret;
 }

 LayerTestResult<uint8_t, 3> ConcatUint8Test(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory)
 {
     IgnoreUnused(memoryManager);

     unsigned int outputWidth = 3;
     unsigned int outputHeight = 6;
     unsigned int outputChannels = 3;

     unsigned int inputWidth1 = 3;
     unsigned int inputHeight1 = 6;
     unsigned int inputChannels1 = 2;

     unsigned int inputWidth2 = 3;
     unsigned int inputHeight2 = 6;
     unsigned int inputChannels2 = 1;

     // Defines the tensor descriptors.
     TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, DataType::QAsymmU8);
     TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, DataType::QAsymmU8);
     TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, DataType::QAsymmU8);

     // Arbitrary scale and offsets. They don't really matter as the Concat operator doesn't dequantize/quantize them.
     const float scale = 0.13497836f;
     const int32_t offset = -7;

     outputTensorInfo.SetQuantizationScale(scale);
     outputTensorInfo.SetQuantizationOffset(offset);
     inputTensorInfo1.SetQuantizationScale(scale);
     inputTensorInfo1.SetQuantizationOffset(offset);
     inputTensorInfo2.SetQuantizationScale(scale);
     inputTensorInfo2.SetQuantizationOffset(offset);

     LayerTestResult<uint8_t, 3> ret(outputTensorInfo);

     ret.outputExpected = MakeTensor<uint8_t, 3>(outputTensorInfo, std::vector<uint8_t>(
         {
             1, 2, 3,
             4, 5, 6,
             7, 8, 9,
             10, 11, 12,
             13, 14, 15,
             16, 17, 18,

             19, 20, 21,
             22, 23, 24,
             25, 26, 27,
             28, 29, 30,
             31, 32, 33,
             34, 35, 36,

             37, 38, 39,
             40, 41, 42,
             43, 44, 45,
             46, 47, 48,
             49, 50, 51,
             52, 53, 54,
         })
     );

     auto input1 = MakeTensor<uint8_t, 3>(inputTensorInfo1, std::vector<uint8_t>(
     {
         1, 2, 3,
         4, 5, 6,
         7, 8, 9,
         10, 11, 12,
         13, 14, 15,
         16, 17, 18,

         19, 20, 21,
         22, 23, 24,
         25, 26, 27,
         28, 29, 30,
         31, 32, 33,
         34, 35, 36,
     })
     );

     auto input2 = MakeTensor<uint8_t, 3>(inputTensorInfo2, std::vector<uint8_t>(
     {
         37, 38, 39,
         40, 41, 42,
         43, 44, 45,
         46, 47, 48,
         49, 50, 51,
         52, 53, 54,
     })
     );

     std::vector<unsigned int> wOrigin1 = { 0, 0, 0 }; //Extent of the window is defined by size of input[0].
     ConcatQueueDescriptor::ViewOrigin window1(wOrigin1);

     std::vector<unsigned int> wOrigin2 = { 2, 0, 0 }; //Extent of the window is defined by size of input[1].
     ConcatQueueDescriptor::ViewOrigin window2(wOrigin2);

     std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);

     bool subTensorsSupported = workloadFactory.SupportsSubTensors();

     std::unique_ptr<ITensorHandle> inputHandle1 =
         subTensorsSupported ?
             tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) :
             tensorHandleFactory.CreateTensorHandle(inputTensorInfo1);

     std::unique_ptr<ITensorHandle> inputHandle2 =
         subTensorsSupported ?
             tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) :
             tensorHandleFactory.CreateTensorHandle(inputTensorInfo2);


     ConcatQueueDescriptor data;
     WorkloadInfo info;
     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());

     data.m_ViewOrigins.push_back(window1);
     data.m_ViewOrigins.push_back(window2);

     std::unique_ptr<IWorkload> workload = workloadFactory.CreateConcat(data, info);

     inputHandle1->Allocate();
     inputHandle2->Allocate();
     outputHandle->Allocate();

     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]);
     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]);

     workload->PostAllocationConfigure();
     workload->Execute();

     CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get());

     return ret;
 }

 LayerTestResult<uint16_t, 3> ConcatUint16Test(
         IWorkloadFactory& workloadFactory,
         const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
         const armnn::ITensorHandleFactory& tensorHandleFactory)
 {
     IgnoreUnused(memoryManager);

     unsigned int outputWidth = 3;
     unsigned int outputHeight = 6;
     unsigned int outputChannels = 3;

     unsigned int inputWidth1 = 3;
     unsigned int inputHeight1 = 6;
     unsigned int inputChannels1 = 2;

     unsigned int inputWidth2 = 3;
     unsigned int inputHeight2 = 6;
     unsigned int inputChannels2 = 1;

     // Defines the tensor descriptors.
     TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, DataType::QSymmS16);
     TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, DataType::QSymmS16);
     TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, DataType::QSymmS16);

     // Arbitrary scale and offsets. They don't really matter as the Concat operator doesn't dequantize/quantize them.
     const float scale = 0.13497836f;
     const int32_t offset = -7;

     outputTensorInfo.SetQuantizationScale(scale);
     outputTensorInfo.SetQuantizationOffset(offset);
     inputTensorInfo1.SetQuantizationScale(scale);
     inputTensorInfo1.SetQuantizationOffset(offset);
     inputTensorInfo2.SetQuantizationScale(scale);
     inputTensorInfo2.SetQuantizationOffset(offset);

     LayerTestResult<uint16_t, 3> ret(outputTensorInfo);

     ret.outputExpected = MakeTensor<uint16_t, 3>(outputTensorInfo, std::vector<uint16_t>(
     {
         1, 2, 3,
         4, 5, 6,
         7, 8, 9,
         10, 11, 12,
         13, 14, 15,
         16, 17, 18,

         19, 20, 21,
         22, 23, 24,
         25, 26, 27,
         28, 29, 30,
         31, 32, 33,
         34, 35, 36,

         37, 38, 39,
         40, 41, 42,
         43, 44, 45,
         46, 47, 48,
         49, 50, 51,
         52, 53, 54,
     }));

     auto input1 = MakeTensor<uint16_t, 3>(inputTensorInfo1, std::vector<uint16_t>(
     {
         1, 2, 3,
         4, 5, 6,
         7, 8, 9,
         10, 11, 12,
         13, 14, 15,
         16, 17, 18,

         19, 20, 21,
         22, 23, 24,
         25, 26, 27,
         28, 29, 30,
         31, 32, 33,
         34, 35, 36,
     }));

     auto input2 = MakeTensor<uint16_t, 3>(inputTensorInfo2, std::vector<uint16_t>(
     {
         37, 38, 39,
         40, 41, 42,
         43, 44, 45,
         46, 47, 48,
         49, 50, 51,
         52, 53, 54,
     }));

     std::vector<unsigned int> wOrigin1 = { 0, 0, 0 }; //Extent of the window is defined by size of input[0].
     ConcatQueueDescriptor::ViewOrigin window1(wOrigin1);

     std::vector<unsigned int> wOrigin2 = { 2, 0, 0 }; //Extent of the window is defined by size of input[1].
     ConcatQueueDescriptor::ViewOrigin window2(wOrigin2);


     std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);

     bool subTensorsSupported = workloadFactory.SupportsSubTensors();

     std::unique_ptr<ITensorHandle> inputHandle1 =
             subTensorsSupported ?
             tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) :
             tensorHandleFactory.CreateTensorHandle(inputTensorInfo1);

     std::unique_ptr<ITensorHandle> inputHandle2 =
             subTensorsSupported ?
             tensorHandleFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) :
             tensorHandleFactory.CreateTensorHandle(inputTensorInfo2);


     ConcatQueueDescriptor data;
     WorkloadInfo info;
     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());

     data.m_ViewOrigins.push_back(window1);
     data.m_ViewOrigins.push_back(window2);

     std::unique_ptr<IWorkload> workload = workloadFactory.CreateConcat(data, info);

     inputHandle1->Allocate();
     inputHandle2->Allocate();
     outputHandle->Allocate();

     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]);
     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]);

     workload->PostAllocationConfigure();
     workload->Execute();

     CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get());

     return ret;
 }

 LayerTestResult<uint8_t, 1> Concat1dUint8Test(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory)
 {
     return Concat1dTestImpl<DataType::QAsymmU8>(workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
 }

 LayerTestResult<uint8_t, 2> Concat2dDim0Uint8Test(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory)
 {
     return Concat2dDim0TestImpl<DataType::QAsymmU8>(workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
 }

 LayerTestResult<uint8_t, 2> Concat2dDim1Uint8Test(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory)
 {
     return Concat2dDim1TestImpl<DataType::QAsymmU8>(workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
 }

 LayerTestResult<uint8_t, 2> Concat2dDim0DiffInputDimsUint8Test(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory)
 {
     return Concat2dDim0DiffInputDimsTestImpl<DataType::QAsymmU8>(
         workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
 }

 LayerTestResult<uint8_t, 2> Concat2dDim1DiffInputDimsUint8Test(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory)
 {
     return Concat2dDim1DiffInputDimsTestImpl<DataType::QAsymmU8>(
         workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
 }

 LayerTestResult<uint8_t, 3> Concat3dDim0Uint8Test(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory)
 {
     return Concat3dDim0TestImpl<DataType::QAsymmU8>(workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
 }

 LayerTestResult<uint8_t, 3> Concat3dDim1Uint8Test(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory)
 {
     return Concat3dDim1TestImpl<DataType::QAsymmU8>(workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
 }

 LayerTestResult<uint8_t, 3> Concat3dDim2Uint8Test(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     bool useSubtensor)
 {
     return Concat3dDim2TestImpl<DataType::QAsymmU8>(
         workloadFactory, memoryManager, tensorHandleFactory, useSubtensor, 0.5f, -1);
 }

 LayerTestResult<uint8_t, 3> Concat3dDim0DiffInputDimsUint8Test(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory)
 {
     return Concat3dDim0TestImpl<DataType::QAsymmU8>(workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
 }

 LayerTestResult<uint8_t, 3> Concat3dDim1DiffInputDimsUint8Test(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory)
 {
     return Concat3dDim1DiffInputDimsTestImpl<DataType::QAsymmU8>(
         workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
 }

 LayerTestResult<uint8_t, 3> Concat3dDim2DiffInputDimsUint8Test(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     bool useSubtensor)
 {
     return Concat3dDim2DiffInputDimsTestImpl<DataType::QAsymmU8>(
         workloadFactory, memoryManager, tensorHandleFactory, useSubtensor, 0.5f, -1);
 }

 LayerTestResult<uint8_t, 4> Concat4dDim0Uint8Test(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory)
 {
     return Concat4dDim0TestImpl<DataType::QAsymmU8>(workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
 }

 LayerTestResult<uint8_t, 4> Concat4dDim1Uint8Test(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory)
 {
     return Concat4dDim1TestImpl<DataType::QAsymmU8>(workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
 }

 LayerTestResult<uint8_t, 4> Concat4dDim2Uint8Test(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory)
 {
     return Concat4dDim2TestImpl<DataType::QAsymmU8>(workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
 }

 LayerTestResult<uint8_t, 4> Concat4dDim3Uint8Test(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory, bool useSubtensor)
 {
     return Concat4dDim3TestImpl<DataType::QAsymmU8>(
         workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1, useSubtensor);
 }

 LayerTestResult<uint8_t, 4> Concat4dDiffShapeDim0Uint8Test(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory)
 {
     return Concat4dDiffShapeDim0TestImpl<DataType::QAsymmU8>(
         workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
 }

 LayerTestResult<uint8_t, 4> Concat4dDiffShapeDim1Uint8Test(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory)
 {
     return Concat4dDiffShapeDim1TestImpl<DataType::QAsymmU8>(
         workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
 }

 LayerTestResult<uint8_t, 4> Concat4dDiffShapeDim2Uint8Test(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory)
 {
     return Concat4dDiffShapeDim2TestImpl<DataType::QAsymmU8>(
         workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1);
 }

 LayerTestResult<uint8_t, 4> Concat4dDiffShapeDim3Uint8Test(
     IWorkloadFactory& workloadFactory,
     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
     const armnn::ITensorHandleFactory& tensorHandleFactory,
     bool useSubtensor)
 {
     return Concat4dDiffShapeDim3TestImpl<DataType::QAsymmU8>(
         workloadFactory, memoryManager, tensorHandleFactory, 0.5f, -1, useSubtensor);
 }
armnn::PermuteQueueDescriptor
Definition: WorkloadData.hpp:185

ConcatUint16Test
LayerTestResult< uint16_t, 3 > ConcatUint16Test(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
Definition: ConcatTestImpl.cpp:2707

Concat4dDiffShapeDim3Uint8Test
LayerTestResult< uint8_t, 4 > Concat4dDiffShapeDim3Uint8Test(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool useSubtensor)
Definition: ConcatTestImpl.cpp:2998

TensorCopyUtils.hpp

Concat3dDim0Test
LayerTestResult< float, 3 > Concat3dDim0Test(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
Definition: ConcatTestImpl.cpp:2282

Concat2dDim0Test
LayerTestResult< float, 2 > Concat2dDim0Test(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
Definition: ConcatTestImpl.cpp:2245

Concat2dDim1TestImpl
LayerTestResult< T, 2 > Concat2dDim1TestImpl(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, float qScale, int32_t qOffset)
Definition: ConcatTestImpl.cpp:556

Concat4dDim0Test
LayerTestResult< float, 4 > Concat4dDim0Test(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
Definition: ConcatTestImpl.cpp:2336

ConcatTest
LayerTestResult< float, 3 > ConcatTest(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
Definition: ConcatTestImpl.cpp:2111

Concat4dDim2TestImpl
LayerTestResult< T, 4 > Concat4dDim2TestImpl(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, float qScale, int32_t qOffset)
Definition: ConcatTestImpl.cpp:1568

Concat3dDim0DiffInputDimsUint8Test
LayerTestResult< uint8_t, 3 > Concat3dDim0DiffInputDimsUint8Test(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
Definition: ConcatTestImpl.cpp:2911

armnn::TensorInfo::GetShape
const TensorShape & GetShape() const
Definition: Tensor.hpp:187

armnn::ConcatQueueDescriptor::ViewOrigin
Definition: WorkloadData.hpp:122

Concat4dDiffShapeDim1Uint8Test
LayerTestResult< uint8_t, 4 > Concat4dDiffShapeDim1Uint8Test(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
Definition: ConcatTestImpl.cpp:2980

Concat2dDim0DiffInputDimsTestImpl
LayerTestResult< T, 2 > Concat2dDim0DiffInputDimsTestImpl(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, float qScale, int32_t qOffset)
Definition: ConcatTestImpl.cpp:582

Concat2dDim0DiffInputDimsUint8Test
LayerTestResult< uint8_t, 2 > Concat2dDim0DiffInputDimsUint8Test(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
Definition: ConcatTestImpl.cpp:2867

Concat3dDim1DiffInputDimsUint8Test
LayerTestResult< uint8_t, 3 > Concat3dDim1DiffInputDimsUint8Test(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
Definition: ConcatTestImpl.cpp:2919

armnn::TensorInfo
Definition: Tensor.hpp:152

QuantizeHelper.hpp

Concat3dDim2DiffInputDimsUint8Test
LayerTestResult< uint8_t, 3 > Concat3dDim2DiffInputDimsUint8Test(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool useSubtensor)
Definition: ConcatTestImpl.cpp:2928

armnn::IWorkloadFactory
Definition: WorkloadFactory.hpp:22

WorkloadTestUtils.hpp

LayerTestResult::outputExpected
boost::multi_array< T, n > outputExpected
Definition: LayerTestResult.hpp:42

armnn::ITensorHandleFactory::CreateSubTensorHandle
virtual std::unique_ptr< ITensorHandle > CreateSubTensorHandle(ITensorHandle &parent, TensorShape const &subTensorShape, unsigned int const *subTensorOrigin) const =0

Concat2dDim1Test
LayerTestResult< float, 2 > Concat2dDim1Test(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
Definition: ConcatTestImpl.cpp:2253

Concat4dDim1Uint8Test
LayerTestResult< uint8_t, 4 > Concat4dDim1Uint8Test(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
Definition: ConcatTestImpl.cpp:2946

Concat2dDim0Uint8Test
LayerTestResult< uint8_t, 2 > Concat2dDim0Uint8Test(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
Definition: ConcatTestImpl.cpp:2851

ConcatBFloat16Test
LayerTestResult< BFloat16, 3 > ConcatBFloat16Test(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
Definition: ConcatTestImpl.cpp:2415

Concat4dDim2Test
LayerTestResult< float, 4 > Concat4dDim2Test(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
Definition: ConcatTestImpl.cpp:2352

ResolveType.hpp

armnn::DataType::QSymmS16

Concat2dDim1DiffInputDimsUint8Test
LayerTestResult< uint8_t, 2 > Concat2dDim1DiffInputDimsUint8Test(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
Definition: ConcatTestImpl.cpp:2876

Concat4dDiffShapeDim1Test
LayerTestResult< float, 4 > Concat4dDiffShapeDim1Test(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
Definition: ConcatTestImpl.cpp:2379

Concat3dDim1DiffInputDimsTest
LayerTestResult< float, 3 > Concat3dDim1DiffInputDimsTest(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
Definition: ConcatTestImpl.cpp:2317

armnn
Copyright (c) 2021 ARM Limited and Contributors.
Definition: 01_00_software_tools.dox:6

armnn::IgnoreUnused
void IgnoreUnused(Ts &&...)
Definition: IgnoreUnused.hpp:14

Concat3dDim2DiffInputDimsTest
LayerTestResult< float, 3 > Concat3dDim2DiffInputDimsTest(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool useSubtensor)
Definition: ConcatTestImpl.cpp:2326

armnn::TensorShape
Definition: Tensor.hpp:20

armnn::QueueDescriptorWithParameters::m_Parameters
LayerDescriptor m_Parameters
Definition: WorkloadData.hpp:57

Generate3dPermuteVectorForConcat
void Generate3dPermuteVectorForConcat(unsigned int numDimensions, unsigned int &concatDim, std::pair< PermutationVector, PermutationVector > &permutations)
Definition: ConcatTestImpl.cpp:90

Concat4dDiffShapeDim2TestImpl
LayerTestResult< T, 4 > Concat4dDiffShapeDim2TestImpl(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, float qScale, int32_t qOffset)
Definition: ConcatTestImpl.cpp:1806

armnn::ConcatQueueDescriptor
Definition: WorkloadData.hpp:120

Concat4dDim3Uint8Test
LayerTestResult< uint8_t, 4 > Concat4dDim3Uint8Test(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool useSubtensor)
Definition: ConcatTestImpl.cpp:2962

Concat2dDim1Uint8Test
LayerTestResult< uint8_t, 2 > Concat2dDim1Uint8Test(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
Definition: ConcatTestImpl.cpp:2859

armnn::IWorkloadFactory::CreateConcat
virtual std::unique_ptr< IWorkload > CreateConcat(const ConcatQueueDescriptor &descriptor, const WorkloadInfo &info) const
Definition: WorkloadFactory.cpp:1370

armnn::TensorInfo::SetShape
void SetShape(const TensorShape &newShape)
Definition: Tensor.hpp:189

Concat2dTestImpl
LayerTestResult< T, 2 > Concat2dTestImpl(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, const TensorInfo &outputTensorInfo, unsigned int dimension, const float qScale, const int32_t qOffset)
Definition: ConcatTestImpl.cpp:460

Concat1dTestImpl
LayerTestResult< T, 1 > Concat1dTestImpl(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, float qScale, int32_t qOffset)
Definition: ConcatTestImpl.cpp:422

TensorHelpers.hpp

armnn::OriginsDescriptor::GetViewOrigin
const uint32_t * GetViewOrigin(uint32_t idx) const
Return the view origin at the int value idx.
Definition: Descriptors.cpp:197

Concat2dDim0DiffInputDimsTest
LayerTestResult< float, 2 > Concat2dDim0DiffInputDimsTest(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
Definition: ConcatTestImpl.cpp:2261

Permute.hpp

armnn::ConcatQueueDescriptor::m_ViewOrigins
std::vector< ViewOrigin > m_ViewOrigins
Definition: WorkloadData.hpp:133

ARMNN_ASSERT_MSG
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15

Concat3dDim1Test
LayerTestResult< float, 3 > Concat3dDim1Test(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
Definition: ConcatTestImpl.cpp:2290

armnn::IBackendInternal::IMemoryManagerSharedPtr
std::shared_ptr< IMemoryManager > IMemoryManagerSharedPtr
Definition: IBackendInternal.hpp:92

Concat3dDim0DiffInputDimsTestImpl
LayerTestResult< T, 3 > Concat3dDim0DiffInputDimsTestImpl(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, float qScale, int32_t qOffset)
Definition: ConcatTestImpl.cpp:1012

armnn::Concatenate
void Concatenate(const ConcatQueueDescriptor &data, std::vector< ITensorHandle *> inputs, std::vector< ITensorHandle *> outputs)
Definition: Concatenate.cpp:14

armnn::OriginsDescriptor
An OriginsDescriptor for the ConcatLayer.
Definition: Descriptors.hpp:163

armnn::DataType::QAsymmU8

Concat4dDiffShapeDim0Test
LayerTestResult< float, 4 > Concat4dDiffShapeDim0Test(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
Definition: ConcatTestImpl.cpp:2370

Concat4dDiffShapeDim1TestImpl
LayerTestResult< T, 4 > Concat4dDiffShapeDim1TestImpl(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, float qScale, int32_t qOffset)
Definition: ConcatTestImpl.cpp:1738

ConcatUint8DifferentQParamsTest
LayerTestResult< uint8_t, 3 > ConcatUint8DifferentQParamsTest(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
Definition: ConcatTestImpl.cpp:2423

armnn::TensorInfo::SetQuantizationScale
void SetQuantizationScale(float scale)
Definition: Tensor.cpp:464

ARMNN_ASSERT
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14

Concat4dDiffShapeDim0Uint8Test
LayerTestResult< uint8_t, 4 > Concat4dDiffShapeDim0Uint8Test(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
Definition: ConcatTestImpl.cpp:2971

Concat4dTestImpl
LayerTestResult< T, 4 > Concat4dTestImpl(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, const TensorInfo &outputTensorInfo, unsigned int dimension, bool useSubtensor, float qScale, int32_t qOffset)
Definition: ConcatTestImpl.cpp:1421

ConcatFloat16Test
LayerTestResult< Half, 3 > ConcatFloat16Test(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
Definition: ConcatTestImpl.cpp:2407

CreateDescriptorForConcat
OriginsDescriptor CreateDescriptorForConcat(const std::vector< TensorInfo > &inputTensorInfos, unsigned int concatDim)
Definition: ConcatTestImpl.cpp:26

CopyDataFromITensorHandle
void CopyDataFromITensorHandle(void *memory, const armnn::ITensorHandle *tensorHandle)
Definition: TensorCopyUtils.cpp:14

Concat3dDim1Uint8Test
LayerTestResult< uint8_t, 3 > Concat3dDim1Uint8Test(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
Definition: ConcatTestImpl.cpp:2893

armnn::PermutationVector
Definition: Types.hpp:245

armnn::IWorkloadFactory::CreatePermute
virtual std::unique_ptr< IWorkload > CreatePermute(const PermuteQueueDescriptor &descriptor, const WorkloadInfo &info) const
Definition: WorkloadFactory.cpp:1599

PermuteInputsForConcat
void PermuteInputsForConcat(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, std::vector< TensorInfo > &inputTensorInfos, std::vector< T *> &inputData, std::vector< std::vector< T >> &inputDataStorage, PermutationVector &permuteVector, unsigned int &concatDim, TensorInfo &outputTensorInfo)
Definition: ConcatTestImpl.cpp:171

Concat3dDim0TestImpl
LayerTestResult< T, 3 > Concat3dDim0TestImpl(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, float qScale, int32_t qOffset)
Definition: ConcatTestImpl.cpp:825

Concat3dDim2Test
LayerTestResult< float, 3 > Concat3dDim2Test(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool useSubtensor)
Definition: ConcatTestImpl.cpp:2298

Concat3dDim1TestImpl
LayerTestResult< T, 3 > Concat3dDim1TestImpl(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, float qScale, int32_t qOffset)
Definition: ConcatTestImpl.cpp:899

Concat4dDiffShapeDim2Test
LayerTestResult< float, 4 > Concat4dDiffShapeDim2Test(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
Definition: ConcatTestImpl.cpp:2388

ExpandTensorShapeTo3dForPermute
TensorShape ExpandTensorShapeTo3dForPermute(const TensorShape &inputShape)
Definition: ConcatTestImpl.cpp:72

Concat2dDim1DiffInputDimsTestImpl
LayerTestResult< T, 2 > Concat2dDim1DiffInputDimsTestImpl(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, float qScale, int32_t qOffset)
Definition: ConcatTestImpl.cpp:662

LayerTestResult::output
boost::multi_array< T, n > output
Definition: LayerTestResult.hpp:41

Concat2dDim0TestImpl
LayerTestResult< T, 2 > Concat2dDim0TestImpl(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, float qScale, int32_t qOffset)
Definition: ConcatTestImpl.cpp:518

armnn::PermutationVector::IsEqual
bool IsEqual(const PermutationVector &other) const
Definition: Types.hpp:279

Concat4dDim1Test
LayerTestResult< float, 4 > Concat4dDim1Test(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
Definition: ConcatTestImpl.cpp:2344

Concat4dDiffShapeDim2Uint8Test
LayerTestResult< uint8_t, 4 > Concat4dDiffShapeDim2Uint8Test(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
Definition: ConcatTestImpl.cpp:2989

Concat3dDim2TestImpl
LayerTestResult< T, 3 > Concat3dDim2TestImpl(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool useSubtensor, float qScale, int32_t qOffset)
Definition: ConcatTestImpl.cpp:973

Concat3dDim0DiffInputDimsTest
LayerTestResult< float, 3 > Concat3dDim0DiffInputDimsTest(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
Definition: ConcatTestImpl.cpp:2308

Concat4dDim1TestImpl
LayerTestResult< T, 4 > Concat4dDim1TestImpl(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, float qScale, int32_t qOffset)
Definition: ConcatTestImpl.cpp:1527

Concat4dDiffShapeDim3TestImpl
LayerTestResult< T, 4 > Concat4dDiffShapeDim3TestImpl(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, float qScale, int32_t qOffset, bool useSubtensor)
Definition: ConcatTestImpl.cpp:1884

Concat4dDim3Test
LayerTestResult< float, 4 > Concat4dDim3Test(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool useSubtensor)
Definition: ConcatTestImpl.cpp:2360

armnn::BoostLogSeverityMapping::info

armnn::TensorShape::GetNumDimensions
unsigned int GetNumDimensions() const
Function that returns the tensor rank.
Definition: Tensor.cpp:174

armnn::ITensorHandleFactory
Definition: ITensorHandleFactory.hpp:42

Concat3dDim0Uint8Test
LayerTestResult< uint8_t, 3 > Concat3dDim0Uint8Test(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
Definition: ConcatTestImpl.cpp:2885

armnn::OriginsDescriptor::GetNumDimensions
uint32_t GetNumDimensions() const
Get the number of dimensions.
Definition: Descriptors.cpp:192

ConcatUint8Test
LayerTestResult< uint8_t, 3 > ConcatUint8Test(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
Definition: ConcatTestImpl.cpp:2569

Concat3dTestImpl
LayerTestResult< T, 3 > Concat3dTestImpl(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, const TensorInfo &outputTensorInfo, unsigned int dimension, bool useSubtensor, float qScale, int32_t qOffset)
Definition: ConcatTestImpl.cpp:730

Concat1dUint8Test
LayerTestResult< uint8_t, 1 > Concat1dUint8Test(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
Definition: ConcatTestImpl.cpp:2843

armnnUtils
Definition: DataLayoutIndexed.hpp:13

Concat2dDim1DiffInputDimsTest
LayerTestResult< float, 2 > Concat2dDim1DiffInputDimsTest(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
Definition: ConcatTestImpl.cpp:2270

PermuteTensorData
void PermuteTensorData(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, const PermutationVector &mappings, TensorInfo &inputTensorInfo, const T *inputData, std::vector< T > &outputData)
Definition: ConcatTestImpl.cpp:121

armnn::DataType::Float32

armnn::WorkloadInfo
Contains information about inputs and outputs to a layer.
Definition: WorkloadInfo.hpp:16

LayerTestResult
Definition: LayerTestResult.hpp:30

Concat3dDim2DiffInputDimsTestImpl
LayerTestResult< T, 3 > Concat3dDim2DiffInputDimsTestImpl(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool useSubtensor, float qScale, int32_t qOffset)
Definition: ConcatTestImpl.cpp:1304

armnn::CreateDescriptorForConcatenation
OriginsDescriptor CreateDescriptorForConcatenation(TensorShapeIt first, TensorShapeIt last, unsigned int concatenationDimension)
Convenience template to create an OriginsDescriptor to use when creating a ConcatLayer for performing...
Definition: Descriptors.hpp:258

armnn::OriginsDescriptor::GetNumViews
uint32_t GetNumViews() const
Get the number of views.
Definition: Descriptors.cpp:187

Concat4dDim0TestImpl
LayerTestResult< T, 4 > Concat4dDim0TestImpl(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, float qScale, int32_t qOffset)
Definition: ConcatTestImpl.cpp:1486

Concat4dDim2Uint8Test
LayerTestResult< uint8_t, 4 > Concat4dDim2Uint8Test(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
Definition: ConcatTestImpl.cpp:2954

ConcatTestImpl.hpp

Concat3dDim2Uint8Test
LayerTestResult< uint8_t, 3 > Concat3dDim2Uint8Test(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool useSubtensor)
Definition: ConcatTestImpl.cpp:2901

ConcatDifferentInputOutputQParamTest
LayerTestResult< T, 3 > ConcatDifferentInputOutputQParamTest(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool useSubtensor)
Definition: ConcatTestImpl.cpp:1952

Concat4dDim3TestImpl
LayerTestResult< T, 4 > Concat4dDim3TestImpl(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, float qScale, int32_t qOffset, bool useSubtensor)
Definition: ConcatTestImpl.cpp:1609

Concat3dDim1DiffInputDimsTestImpl
LayerTestResult< T, 3 > Concat3dDim1DiffInputDimsTestImpl(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, float qScale, int32_t qOffset)
Definition: ConcatTestImpl.cpp:1164

armnnUtils::Permuted
armnn::TensorShape Permuted(const armnn::TensorShape &srcShape, const armnn::PermutationVector &mappings)
Definition: Permute.cpp:98

armnn::ITensorHandleFactory::CreateTensorHandle
virtual std::unique_ptr< ITensorHandle > CreateTensorHandle(const TensorInfo &tensorInfo) const =0

Concat4dDiffShapeDim3Test
LayerTestResult< float, 4 > Concat4dDiffShapeDim3Test(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool useSubtensor)
Definition: ConcatTestImpl.cpp:2397

Concat4dDim0Uint8Test
LayerTestResult< uint8_t, 4 > Concat4dDim0Uint8Test(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
Definition: ConcatTestImpl.cpp:2938

Concat1dTest
LayerTestResult< float, 1 > Concat1dTest(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
Definition: ConcatTestImpl.cpp:2237

armnn::TensorInfo::GetNumElements
unsigned int GetNumElements() const
Definition: Tensor.hpp:192

NeedPermuteForConcat
bool NeedPermuteForConcat(const std::vector< TensorInfo > &inputTensorInfos, unsigned int concatDim)
Definition: ConcatTestImpl.cpp:46

PermuteOutputForConcat
void PermuteOutputForConcat(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, const TensorInfo &tensorInfo, const PermutationVector &permuteVector, std::unique_ptr< ITensorHandle > &&inputDataHandle, T *data)
Definition: ConcatTestImpl.cpp:241

armnn::PermuteDescriptor
A PermuteDescriptor for the PermuteLayer.
Definition: Descriptors.hpp:118

CopyDataToITensorHandle
void CopyDataToITensorHandle(armnn::ITensorHandle *tensorHandle, const void *memory)
Definition: TensorCopyUtils.cpp:9

Concat4dDiffShapeDim0TestImpl
LayerTestResult< T, 4 > Concat4dDiffShapeDim0TestImpl(IWorkloadFactory &workloadFactory, const IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, float qScale, int32_t qOffset)
Definition: ConcatTestImpl.cpp:1651

armnn::IWorkloadFactory::SupportsSubTensors
virtual bool SupportsSubTensors() const =0