armnn/latest/_cl_backend_8cpp_source.html

 //

 // Copyright © 2017-2024 Arm Ltd. All rights reserved.

 // SPDX-License-Identifier: MIT

 //


 #include "ClBackend.hpp"

 #include "ClBackendContext.hpp"

 #include "ClBackendDefaultAllocator.hpp"

 #include "ClBackendId.hpp"

 #include "ClBackendModelContext.hpp"

 #include "ClImportTensorHandleFactory.hpp"

 #include "ClLayerSupport.hpp"

 #include "ClTensorHandleFactory.hpp"

 #include "ClWorkloadFactory.hpp"


 #include <armnn/BackendRegistry.hpp>

 #include <armnn/Descriptors.hpp>


 #include <aclCommon/ArmComputeSubgraphUtils.hpp>

 #include <aclCommon/ArmComputeUtils.hpp>


 #include <armnn/backends/IBackendContext.hpp>

 #include <armnn/backends/IMemoryManager.hpp>

 #include <armnn/utility/PolymorphicDowncast.hpp>


 #include "workloads/ClAdditionWorkload.hpp"

 #include "workloads/ClBatchNormalizationFloatWorkload.hpp"

 #include "workloads/ClConvolution2dWorkload.hpp"

 #include "workloads/ClDepthwiseConvolutionWorkload.hpp"

 #include "workloads/ClDivisionWorkload.hpp"

 #include "workloads/ClFullyConnectedWorkload.hpp"

 #include "workloads/ClMultiplicationWorkload.hpp"

 #include "workloads/ClReduceWorkload.hpp"

 #include "workloads/ClSubtractionWorkload.hpp"


 #include <Optimizer.hpp>


 #include <arm_compute/core/Types.h>

 #include <arm_compute/runtime/CL/CLBufferAllocator.h>


 namespace armnn

 {


 const BackendId& ClBackend::GetIdStatic()

 {

     static const BackendId s_Id{ClBackendId()};

     return s_Id;

 }


 IBackendInternal::IMemoryManagerUniquePtr ClBackend::CreateMemoryManager() const

 {

     if (m_UsingCustomAllocator)

     {

         return std::make_unique<ClMemoryManager>(m_CustomAllocator);

     }

     return std::make_unique<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());

 }


 IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(

     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager) const

 {

     return std::make_unique<ClWorkloadFactory>(

         PolymorphicPointerDowncast<ClMemoryManager>(memoryManager));

 }


 IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(

     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const ModelOptions& modelOptions) const

 {

     return std::make_unique<ClWorkloadFactory>(

         PolymorphicPointerDowncast<ClMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));

 }


 IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(

     TensorHandleFactoryRegistry& registry) const

 {

     std::shared_ptr<ClMemoryManager> memoryManager;

     if (m_UsingCustomAllocator)

     {

         memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);

     }

     else

     {

         memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());

     }


     std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);

     std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(

         static_cast<MemorySourceFlags>(MemorySource::Malloc), static_cast<MemorySourceFlags>(MemorySource::Malloc));


     registry.RegisterCopyAndImportFactoryPair(factory->GetId(), importFactory->GetId());

     registry.RegisterCopyAndImportFactoryPair(importFactory->GetId(), factory->GetId());


     registry.RegisterMemoryManager(memoryManager);

     registry.RegisterFactory(std::move(factory));

     registry.RegisterFactory(std::move(importFactory));


     return std::make_unique<ClWorkloadFactory>(

             PolymorphicPointerDowncast<ClMemoryManager>(memoryManager));

 }


 IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(

     TensorHandleFactoryRegistry& registry, const ModelOptions& modelOptions) const

 {

     std::shared_ptr<ClMemoryManager> memoryManager;

     if (m_UsingCustomAllocator)

     {

         memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);

     }

     else

     {

         memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());

     }


     std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);

     std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(

         static_cast<MemorySourceFlags>(MemorySource::Malloc), static_cast<MemorySourceFlags>(MemorySource::Malloc));


     registry.RegisterCopyAndImportFactoryPair(factory->GetId(), importFactory->GetId());

     registry.RegisterCopyAndImportFactoryPair(importFactory->GetId(), factory->GetId());


     registry.RegisterMemoryManager(memoryManager);

     registry.RegisterFactory(std::move(factory));

     registry.RegisterFactory(std::move(importFactory));


     return std::make_unique<ClWorkloadFactory>(

         PolymorphicPointerDowncast<ClMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));

 }


 IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(

     TensorHandleFactoryRegistry& registry,

     const ModelOptions& modelOptions,

     MemorySourceFlags inputFlags,

     MemorySourceFlags outputFlags) const

 {

     // To allow force import if inputFlags/outputFlags are Undefined, set it as Malloc

     if (inputFlags == static_cast<MemorySourceFlags>(MemorySource::Undefined))

     {

         inputFlags = static_cast<MemorySourceFlags>(MemorySource::Malloc);

     }

     if (outputFlags == static_cast<MemorySourceFlags>(MemorySource::Undefined))

     {

         outputFlags = static_cast<MemorySourceFlags>(MemorySource::Malloc);

     }

     std::shared_ptr<ClMemoryManager> memoryManager;

     if (m_UsingCustomAllocator)

     {

         memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);

     }

     else

     {

         memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());

     }


     std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);

     std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(

             inputFlags, outputFlags);


     registry.RegisterCopyAndImportFactoryPair(factory->GetId(), importFactory->GetId());

     registry.RegisterCopyAndImportFactoryPair(importFactory->GetId(), factory->GetId());


     registry.RegisterMemoryManager(memoryManager);

     registry.RegisterFactory(std::move(factory));

     registry.RegisterFactory(std::move(importFactory));


     return std::make_unique<ClWorkloadFactory>(

         PolymorphicPointerDowncast<ClMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));

 }


 std::vector<ITensorHandleFactory::FactoryId> ClBackend::GetHandleFactoryPreferences() const

 {

     return std::vector<ITensorHandleFactory::FactoryId> {ClTensorHandleFactory::GetIdStatic(),

                                                          ClImportTensorHandleFactory::GetIdStatic()};

 }


 void ClBackend::RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry)

 {

     std::shared_ptr<ClMemoryManager> memoryManager;

     if (m_UsingCustomAllocator)

     {

         memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);

     }

     else

     {

         memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());

     }


     std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);

     std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(

         static_cast<MemorySourceFlags>(MemorySource::Malloc), static_cast<MemorySourceFlags>(MemorySource::Malloc));


     registry.RegisterCopyAndImportFactoryPair(factory->GetId(), importFactory->GetId());

     registry.RegisterCopyAndImportFactoryPair(importFactory->GetId(), factory->GetId());


     registry.RegisterMemoryManager(memoryManager);

     registry.RegisterFactory(std::move(factory));

     registry.RegisterFactory(std::move(importFactory));


 }


 void ClBackend::RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry,

                                               MemorySourceFlags inputFlags,

                                               MemorySourceFlags outputFlags)

 {

     // To allow force import if inputFlags/outputFlags are Undefined, set it as Malloc

     if (inputFlags == static_cast<MemorySourceFlags>(MemorySource::Undefined))

     {

         inputFlags = static_cast<MemorySourceFlags>(MemorySource::Malloc);

     }

     if (outputFlags == static_cast<MemorySourceFlags>(MemorySource::Undefined))

     {

         outputFlags = static_cast<MemorySourceFlags>(MemorySource::Malloc);

     }

     std::shared_ptr<ClMemoryManager> memoryManager;

     if (m_UsingCustomAllocator)

     {

         memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);

     }

     else

     {

         memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());

     }


     std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);

     std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(

             inputFlags, outputFlags);


     registry.RegisterCopyAndImportFactoryPair(factory->GetId(), importFactory->GetId());

     registry.RegisterCopyAndImportFactoryPair(importFactory->GetId(), factory->GetId());


     registry.RegisterMemoryManager(memoryManager);

     registry.RegisterFactory(std::move(factory));

     registry.RegisterFactory(std::move(importFactory));

 }


 IBackendInternal::IBackendContextPtr ClBackend::CreateBackendContext(const IRuntime::CreationOptions& options) const

 {

     return IBackendContextPtr{new ClBackendContext{options}};

 }


 IBackendInternal::IBackendProfilingContextPtr ClBackend::CreateBackendProfilingContext(

     const IRuntime::CreationOptions&, IBackendProfilingPtr&)

 {

     return IBackendProfilingContextPtr{};

 }


 IBackendInternal::IBackendSpecificModelContextPtr ClBackend::CreateBackendSpecificModelContext(

     const ModelOptions& modelOptions) const

 {

     return IBackendSpecificModelContextPtr{new ClBackendModelContext{modelOptions}};

 }


 IBackendInternal::ILayerSupportSharedPtr ClBackend::GetLayerSupport() const

 {

     static ILayerSupportSharedPtr layerSupport

         {

             new ClLayerSupport(IBackendInternal::IBackendSpecificModelContextPtr{})

         };

     return layerSupport;

 }


 IBackendInternal::ILayerSupportSharedPtr ClBackend::GetLayerSupport(const ModelOptions& modelOptions) const

 {

     static ILayerSupportSharedPtr layerSupport

     {

         new ClLayerSupport(CreateBackendSpecificModelContext(modelOptions))

     };

     return layerSupport;

 }


 std::unique_ptr<ICustomAllocator> ClBackend::GetDefaultAllocator() const

 {

     return std::make_unique<ClBackendDefaultAllocator>();

 }


 BackendCapabilities ClBackend::GetCapabilities() const

 {

     // add new capabilities here..

     return BackendCapabilities ("GpuAcc",

                                                  {

                                                      {"NonConstWeights", true},

                                                      {"ProtectedContentAllocation", true},

                                                      {"ConstantTensorsAsInputs", true},

                                                      {"PreImportIOTensors", false},

                                                      {"ExternallyManagedMemory", true},

                                                      {"MultiAxisPacking", false},

                                                      {"SingleAxisPacking", true},

                                                      {"AllOrNothing", false},

                                                      {"HasFp16", arm_compute::CLKernelLibrary::get().fp16_supported()}

                                                  });

 }


 OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph,

                                                   const ModelOptions& modelOptions) const

 {

     OptimizationViews optimizationViews(modelOptions);


     auto it = subgraph.end();

     bool isFastMathEnabled = false;

     std::map<LayerGuid, Layer*> untouched;


     while (it != subgraph.begin())

     {

         --it;

         Layer& base = *(PolymorphicDowncast<Layer*>(*it));

         untouched.insert({base.GetGuid(), &base});

     }


     it = subgraph.end();

 #if defined(ARMCOMPUTECL_ENABLED)

     IBackendInternal::IBackendSpecificModelContextPtr modelContextPtr = CreateBackendSpecificModelContext(modelOptions);


     if (modelContextPtr)

     {

         auto clModelOptions = dynamic_cast<ClBackendModelContext*>(modelContextPtr.get());

         if (clModelOptions)

         {

             isFastMathEnabled = clModelOptions->IsFastMathEnabled();

         }

     }

 #endif

     while (it != subgraph.begin())

     {

         --it;

         Layer& base = *(PolymorphicDowncast<Layer*>(*it));


         // Fuse activation into previous layer if supported by backend

         if ((base.GetType() == LayerType::DepthwiseConvolution2d || base.GetType() == LayerType::Convolution2d

             || base.GetType() == LayerType::BatchNormalization || base.GetType() == LayerType::FullyConnected

             || base.GetType() == LayerType::Addition || base.GetType() == LayerType::Multiplication

             || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division

             || base.GetType() == LayerType::ElementwiseBinary)

             && (base.GetAdditionalInformation<ActivationDescriptor>() == nullptr))

         {

             for (auto output = base.BeginOutputSlots(); output != base.EndOutputSlots(); ++output)

             {

                 if (output->GetNumConnections() == 1)

                 {

                     for (auto&& childInput : output->GetConnections())

                     {

                         if ((childInput->GetOwningLayer().GetType() == LayerType::Activation) &&

                             (checkDataTypeInputandOutput(childInput->GetOwningLayer())))

                         {

                             Layer& child = childInput->GetOwningLayer();


                             auto* activationLayer = PolymorphicDowncast<ActivationLayer*>(&child);

                             // Before we proceed make sure that this activation layer is in the subgraph. It could be

                             // the first layer in the next subgraph.

                             if (untouched.find(activationLayer->GetGuid()) == untouched.end())

                             {

                                 // We can't fuse a layer that's outside the subgraph.

                                 break;

                             }


                             const std::string name = std::string("fused-") + child.GetName() + std::string("-into-") +

                                                      base.GetName();


                             // Get params from activation layer

                             ActivationDescriptor activationDesc = activationLayer->GetParameters();


                             if (base.GetType() == LayerType::Convolution2d)

                             {

                                 Convolution2dLayer* baseLayer = PolymorphicDowncast<Convolution2dLayer*>(&base);


                                 Optional<TensorInfo> biases;


                                 if (baseLayer->GetParameters().m_BiasEnabled)

                                 {

                                     biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();

                                 }


                                 arm_compute::Status status = ClConvolution2dWorkloadValidate(

                                         baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

                                         activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

                                         baseLayer->GetParameters(),

                                         baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),

                                         biases,

                                         isFastMathEnabled,

                                         &activationDesc);


                                 if (status)

                                 {

                                     FuseConvolution2dLayer<Convolution2dLayer>(optimizationViews,

                                                                                baseLayer,

                                                                                activationLayer,

                                                                                activationDesc,

                                                                                name);

                                     untouched.erase(baseLayer->GetGuid());

                                     untouched.erase(activationLayer->GetGuid());

                                 }

                             }

                             else if (base.GetType() == LayerType::DepthwiseConvolution2d)

                             {

                                 DepthwiseConvolution2dLayer* baseLayer =

                                         PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&base);


                                 Optional<TensorInfo> biases;


                                 if (baseLayer->GetParameters().m_BiasEnabled)

                                 {

                                     biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();

                                 }


                                 arm_compute::Status status = ClDepthwiseConvolutionWorkloadValidate(

                                         baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

                                         activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

                                         baseLayer->GetParameters(),

                                         baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),

                                         biases,

                                         &activationDesc);


                                 if (status)

                                 {

                                     FuseDepthwiseConvolution2dLayer<DepthwiseConvolution2dLayer>(optimizationViews,

                                                                                                  baseLayer,

                                                                                                  activationLayer,

                                                                                                  activationDesc,

                                                                                                  name);

                                     untouched.erase(baseLayer->GetGuid());

                                     untouched.erase(activationLayer->GetGuid());

                                 }

                             }

                             else if (base.GetType() == LayerType::FullyConnected)

                             {

                                 FullyConnectedLayer* baseLayer = PolymorphicDowncast<FullyConnectedLayer*>(&base);

                                 FullyConnectedDescriptor descriptor = baseLayer->GetParameters();


                                 // As bias is optional only try to get TensorInfo from input if bias is enabled.

                                 Optional<TensorInfo> biases;

                                 if (descriptor.m_BiasEnabled)

                                 {

                                     biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();

                                 }


                                 arm_compute::Status status = ClFullyConnectedWorkloadValidate(

                                         baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

                                         activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

                                         baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),

                                         biases,

                                         baseLayer->GetParameters(),

                                         &activationDesc);


                                 if (status)

                                 {

                                     FuseFullyConnectedLayer<FullyConnectedLayer>(optimizationViews,

                                                                                  baseLayer,

                                                                                  activationLayer,

                                                                                  activationDesc,

                                                                                  name);

                                     untouched.erase(baseLayer->GetGuid());

                                     untouched.erase(activationLayer->GetGuid());

                                 }

                             }

                             else if (base.GetType() == LayerType::BatchNormalization)

                             {

                                 BatchNormalizationLayer* baseLayer =

                                         PolymorphicDowncast<BatchNormalizationLayer*>(&base);


                                 arm_compute::Status status = ClBatchNormalizationValidate(

                                         baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

                                         activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

                                         baseLayer->m_Mean->GetTensorInfo(),

                                         baseLayer->m_Variance->GetTensorInfo(),

                                         baseLayer->m_Beta->GetTensorInfo(),

                                         baseLayer->m_Gamma->GetTensorInfo(),

                                         baseLayer->GetParameters(),

                                         &activationDesc);


                                 if (status)

                                 {

                                     BatchNormalizationLayer* replacementLayer =

                                         FuseBatchNormalizationLayer<BatchNormalizationLayer>(optimizationViews,

                                                                                              baseLayer,

                                                                                              activationLayer,

                                                                                              activationDesc,

                                                                                              name);


                                     replacementLayer->m_Beta     = std::move(baseLayer->m_Beta);

                                     replacementLayer->m_Gamma    = std::move(baseLayer->m_Gamma);

                                     replacementLayer->m_Mean     = std::move(baseLayer->m_Mean);

                                     replacementLayer->m_Variance = std::move(baseLayer->m_Variance);


                                     untouched.erase(baseLayer->GetGuid());

                                     untouched.erase(activationLayer->GetGuid());

                                 }

                             }

                             else if (base.GetType() == LayerType::Addition)

                             {

                                 AdditionLayer* baseLayer = PolymorphicDowncast<AdditionLayer*>(&base);


                                 arm_compute::Status status = ClAdditionValidate(

                                         baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

                                         baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),

                                         activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

                                         &activationDesc);


                                 if (status)

                                 {

                                     FuseAdditionLayer<AdditionLayer>(optimizationViews,

                                                                      baseLayer,

                                                                      activationLayer,

                                                                      activationDesc,

                                                                      name);


                                     untouched.erase(baseLayer->GetGuid());

                                     untouched.erase(activationLayer->GetGuid());

                                 }

                             }

                             else if (base.GetType() == LayerType::Division)

                             {

                                 DivisionLayer* baseLayer = PolymorphicDowncast<DivisionLayer*>(&base);


                                 arm_compute::Status status = ClDivisionWorkloadValidate(

                                         baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

                                         baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),

                                         activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

                                         &activationDesc);


                                 if (status)

                                 {

                                     FuseDivisionLayer<DivisionLayer>(optimizationViews,

                                                                      baseLayer,

                                                                      activationLayer,

                                                                      activationDesc,

                                                                      name);

                                     untouched.erase(baseLayer->GetGuid());

                                     untouched.erase(activationLayer->GetGuid());

                                 }

                             }

                             else if (base.GetType() == LayerType::Multiplication)

                             {

                                 MultiplicationLayer* baseLayer = PolymorphicDowncast<MultiplicationLayer*>(&base);


                                 arm_compute::Status status = ClMultiplicationWorkloadValidate(

                                         baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

                                         baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),

                                         activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

                                         &activationDesc);


                                 if (status)

                                 {

                                     FuseMultiplicationLayer<MultiplicationLayer>(optimizationViews,

                                                                                  baseLayer,

                                                                                  activationLayer,

                                                                                  activationDesc,

                                                                                  name);

                                     untouched.erase(baseLayer->GetGuid());

                                     untouched.erase(activationLayer->GetGuid());

                                 }

                             }

                             else if (base.GetType() == LayerType::Subtraction)

                             {

                                 SubtractionLayer* baseLayer = PolymorphicDowncast<SubtractionLayer*>(&base);


                                 arm_compute::Status status = ClSubtractionValidate(

                                         baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

                                         baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),

                                         activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

                                         &activationDesc);


                                 if (status)

                                 {

                                     FuseSubtractionLayer<SubtractionLayer>(optimizationViews,

                                                                            baseLayer,

                                                                            activationLayer,

                                                                            activationDesc,

                                                                            name);

                                     untouched.erase(baseLayer->GetGuid());

                                     untouched.erase(activationLayer->GetGuid());

                                 }

                             }

                             else if (base.GetType() == LayerType::ElementwiseBinary)

                             {

                                 ElementwiseBinaryLayer* baseLayer = PolymorphicDowncast<ElementwiseBinaryLayer*>(&base);


                                 if (baseLayer->GetParameters().m_Operation == BinaryOperation::Add)

                                 {

                                     arm_compute::Status status = ClAdditionValidate(

                                             baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

                                             baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),

                                             activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

                                             &activationDesc);


                                     if (status)

                                     {

                                         FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,

                                                                                            baseLayer,

                                                                                            activationLayer,

                                                                                            activationDesc,

                                                                                            BinaryOperation::Add,

                                                                                            name);

                                         untouched.erase(baseLayer->GetGuid());

                                         untouched.erase(activationLayer->GetGuid());

                                     }

                                 }

                                 else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Div)

                                 {

                                     arm_compute::Status status = ClDivisionWorkloadValidate(

                                             baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

                                             baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),

                                             activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

                                             &activationDesc);


                                     if (status)

                                     {

                                         FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,

                                                                                            baseLayer,

                                                                                            activationLayer,

                                                                                            activationDesc,

                                                                                            BinaryOperation::Div,

                                                                                            name);

                                         untouched.erase(baseLayer->GetGuid());

                                         untouched.erase(activationLayer->GetGuid());

                                     }

                                 }

                                 else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Mul)

                                 {

                                     arm_compute::Status status = ClMultiplicationWorkloadValidate(

                                             baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

                                             baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),

                                             activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

                                             &activationDesc);


                                     if (status)

                                     {

                                         FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,

                                                                                            baseLayer,

                                                                                            activationLayer,

                                                                                            activationDesc,

                                                                                            BinaryOperation::Mul,

                                                                                            name);

                                         untouched.erase(baseLayer->GetGuid());

                                         untouched.erase(activationLayer->GetGuid());

                                     }

                                 }

                                 else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Sub)

                                 {

                                     arm_compute::Status status = ClSubtractionValidate(

                                             baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

                                             baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),

                                             activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),

                                             &activationDesc);


                                     if (status)

                                     {

                                         FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,

                                                                                            baseLayer,

                                                                                            activationLayer,

                                                                                            activationDesc,

                                                                                            BinaryOperation::Sub,

                                                                                            name);

                                         untouched.erase(baseLayer->GetGuid());

                                         untouched.erase(activationLayer->GetGuid());

                                     }

                                 }

                                 // No fusion available for other BinaryOperations

                             }

                         }

                     }

                 }

             }

         }


         // Separate reduce layer with multiple axes into multiple reduce layers with 1 axis.

         if (base.GetType() == LayerType::Reduce)

         {

             ReduceLayer* baseLayer            = PolymorphicDowncast<ReduceLayer*>(&base);

             ReduceDescriptor reduceDescriptor = baseLayer->GetParameters();


             if (!reduceDescriptor.m_vAxis.empty() && reduceDescriptor.m_vAxis.size() > 1)

             {

                 // Add new layers to the graph and connect them.

                 std::vector<IConnectableLayer*> layers = ChainReduceLayers<ReduceLayer>(optimizationViews,

                                                                                         baseLayer,

                                                                                         reduceDescriptor);


                 // Replace existing baselayer with new subgraph.

                 ReplaceLayers<ReduceLayer>(optimizationViews, baseLayer, layers);

                 untouched.erase(baseLayer->GetGuid());

             }

         }


         // Remove Reshape where possible

         if (base.GetType() == LayerType::Reshape)

         {

             ReshapeLayer* baseLayer = PolymorphicDowncast<ReshapeLayer*>(&base);


             // Cannot remove a Reshape if it's connected to any layer that has an NCHW layout

             if (ConnectedToLayerWithNCHW(baseLayer))

             {

                 continue;

             }

             RemoveReshapeLayer(baseLayer, untouched, optimizationViews);

         }


         // Special case to fuse padding into average pooling 2d for quantized datatype.

         // Required to be done as a backend specific optimization as Neon does not support this special case.

         if (base.GetType() == LayerType::Pooling2d)

         {

             Pooling2dLayer* baseLayer = PolymorphicDowncast<Pooling2dLayer*>(&base);

             Pooling2dDescriptor poolingDescriptor = baseLayer->GetParameters();


             if (baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer().GetType() == LayerType::Pad)

             {

                 PadLayer* padLayer = PolymorphicDowncast<PadLayer*>(

                     &baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer());

                 if (padLayer->GetOutputSlot(0).GetNumConnections() == 1 &&

                     optimizations::pad_fold::TryFoldPadIntoLayer2d(padLayer->GetParameters(),

                                                                    poolingDescriptor,

                                                                    padLayer->GetOutputSlot().GetTensorInfo(),

                                                                    true))

                 {

                     FoldPadIntoAveragePool2d<Pooling2dLayer>(optimizationViews, baseLayer,

                                                              poolingDescriptor, padLayer);

                     untouched.erase(baseLayer->GetGuid());

                     untouched.erase(padLayer->GetGuid());

                 }

             }

         }

     }


     if (optimizationViews.GetSubstitutions().empty() && optimizationViews.GetDeletedSubgraphs().empty())

     {

         optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph));

     }

     else

     {

         ReportUntouchedLayers(optimizationViews, untouched);

     }


     return optimizationViews;

 }


 } // namespace armnn

ArmComputeSubgraphUtils.hpp

ArmComputeUtils.hpp

BackendRegistry.hpp

ClAdditionWorkload.hpp

ClBackend.hpp

ClBackendContext.hpp

ClBackendDefaultAllocator.hpp

ClBackendId.hpp

ClBackendModelContext.hpp

ClBatchNormalizationFloatWorkload.hpp

ClConvolution2dWorkload.hpp

ClDepthwiseConvolutionWorkload.hpp

ClDivisionWorkload.hpp

ClFullyConnectedWorkload.hpp

ClImportTensorHandleFactory.hpp

ClLayerSupport.hpp

ClMultiplicationWorkload.hpp

ClReduceWorkload.hpp

ClSubtractionWorkload.hpp

ClTensorHandleFactory.hpp

ClWorkloadFactory.hpp

Descriptors.hpp

IBackendContext.hpp

IMemoryManager.hpp

Optimizer.hpp

PolymorphicDowncast.hpp

armnn::AdditionLayer
This layer represents an addition operation.
Definition: AdditionLayer.hpp:14

armnn::BackendId
Definition: BackendId.hpp:76

armnn::BatchNormalizationLayer
This layer represents a batch normalization operation.
Definition: BatchNormalizationLayer.hpp:16

armnn::BatchNormalizationLayer::m_Mean
std::shared_ptr< ConstTensorHandle > m_Mean
A unique pointer to store Mean values.
Definition: BatchNormalizationLayer.hpp:19

armnn::BatchNormalizationLayer::m_Gamma
std::shared_ptr< ConstTensorHandle > m_Gamma
A unique pointer to store Gamma values.
Definition: BatchNormalizationLayer.hpp:25

armnn::BatchNormalizationLayer::m_Beta
std::shared_ptr< ConstTensorHandle > m_Beta
A unique pointer to store Beta values.
Definition: BatchNormalizationLayer.hpp:23

armnn::BatchNormalizationLayer::m_Variance
std::shared_ptr< ConstTensorHandle > m_Variance
A unique pointer to store Variance values.
Definition: BatchNormalizationLayer.hpp:21

armnn::ClBackendContext
Definition: ClBackendContext.hpp:18

armnn::ClBackend::GetIdStatic
static const BackendId & GetIdStatic()
Definition: ClBackend.cpp:44

armnn::ClBackend::CreateBackendSpecificModelContext
IBackendInternal::IBackendSpecificModelContextPtr CreateBackendSpecificModelContext(const ModelOptions &modelOptions) const override
Definition: ClBackend.cpp:246

armnn::ClBackend::OptimizeSubgraphView
OptimizationViews OptimizeSubgraphView(const SubgraphView &subgraph, const ModelOptions &modelOptions) const override
Definition: ClBackend.cpp:292

armnn::ClBackend::RegisterTensorHandleFactories
void RegisterTensorHandleFactories(TensorHandleFactoryRegistry &registry) override
(Optional) Register TensorHandleFactories Either this method or CreateMemoryManager() and IWorkloadFa...
Definition: ClBackend.cpp:175

armnn::ClBackend::GetHandleFactoryPreferences
std::vector< ITensorHandleFactory::FactoryId > GetHandleFactoryPreferences() const override
(Optional) Returns a vector of supported TensorHandleFactory ids in preference order.
Definition: ClBackend.cpp:169

armnn::ClBackend::CreateBackendProfilingContext
IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext(const IRuntime::CreationOptions &, IBackendProfilingPtr &backendProfiling) override
Create context specifically used for profiling interaction from backends.
Definition: ClBackend.cpp:240

armnn::ClBackend::CreateBackendContext
IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions &) const override
Create the runtime context of the backend.
Definition: ClBackend.cpp:235

armnn::ClBackend::CreateWorkloadFactory
IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory(const IBackendInternal::IMemoryManagerSharedPtr &memoryManager=nullptr) const override
Definition: ClBackend.cpp:59

armnn::ClBackend::GetLayerSupport
IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override
Definition: ClBackend.cpp:252

armnn::ClBackend::CreateMemoryManager
IBackendInternal::IMemoryManagerUniquePtr CreateMemoryManager() const override
Definition: ClBackend.cpp:50

armnn::ClBackend::GetCapabilities
BackendCapabilities GetCapabilities() const override
Returns a BackendCapability if the backend lists the capability The BackendCapability must then be in...
Definition: ClBackend.cpp:275

armnn::ClBackend::m_UsingCustomAllocator
bool m_UsingCustomAllocator
Definition: ClBackend.hpp:284

armnn::ClBackend::m_CustomAllocator
std::shared_ptr< ClBackendCustomAllocatorWrapper > m_CustomAllocator
Definition: ClBackend.hpp:283

armnn::ClBackend::GetDefaultAllocator
std::unique_ptr< ICustomAllocator > GetDefaultAllocator() const override
Returns the default memory allocator for the backend.
Definition: ClBackend.cpp:270

armnn::ClBackendModelContext
The ClBackendModelContext is used to pass in CL specific backend ModelOptions.
Definition: ClBackendModelContext.hpp:29

armnn::ClBackendModelContext::IsFastMathEnabled
bool IsFastMathEnabled() const
Definition: ClBackendModelContext.cpp:66

armnn::ClImportTensorHandleFactory::GetIdStatic
static const FactoryId & GetIdStatic()
Definition: ClImportTensorHandleFactory.cpp:93

armnn::ClLayerSupport
Definition: ClLayerSupport.hpp:15

armnn::ClTensorHandleFactory::GetIdStatic
static const FactoryId & GetIdStatic()
Definition: ClTensorHandleFactory.cpp:94

armnn::Convolution2dLayer
This layer represents a convolution 2d operation.
Definition: Convolution2dLayer.hpp:16

armnn::DepthwiseConvolution2dLayer
This layer represents a depthwise convolution 2d operation.
Definition: DepthwiseConvolution2dLayer.hpp:16

armnn::DivisionLayer
This layer represents a division operation.
Definition: DivisionLayer.hpp:15

armnn::ElementwiseBinaryLayer
This layer represents a elementwiseBinary operation.
Definition: ElementwiseBinaryLayer.hpp:15

armnn::FullyConnectedLayer
This layer represents a fully connected operation.
Definition: FullyConnectedLayer.hpp:16

armnn::IBackendInternal::ILayerSupportSharedPtr
std::shared_ptr< ILayerSupport > ILayerSupportSharedPtr
Definition: IBackendInternal.hpp:92

armnn::IBackendInternal::IMemoryManagerUniquePtr
std::unique_ptr< IMemoryManager > IMemoryManagerUniquePtr
Definition: IBackendInternal.hpp:96

armnn::IBackendInternal::IBackendProfilingPtr
std::unique_ptr< arm::pipe::IBackendProfiling > IBackendProfilingPtr
Definition: IBackendInternal.hpp:91

armnn::IBackendInternal::IBackendSpecificModelContextPtr
std::shared_ptr< IBackendModelContext > IBackendSpecificModelContextPtr
Definition: IBackendInternal.hpp:94

armnn::IBackendInternal::IMemoryManagerSharedPtr
std::shared_ptr< IMemoryManager > IMemoryManagerSharedPtr
Definition: IBackendInternal.hpp:97

armnn::IBackendInternal::IWorkloadFactoryPtr
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
Definition: IBackendInternal.hpp:87

armnn::IBackendInternal::IBackendContextPtr
std::unique_ptr< IBackendContext > IBackendContextPtr
Definition: IBackendInternal.hpp:88

armnn::IBackendInternal::IBackendProfilingContextPtr
std::shared_ptr< arm::pipe::IBackendProfilingContext > IBackendProfilingContextPtr
This is the bridge between backend and backend profiling we'll keep it in the backend namespace.
Definition: IBackendInternal.hpp:90

armnn::InputSlot::GetConnectedOutputSlot
const OutputSlot * GetConnectedOutputSlot() const
Definition: Layer.hpp:56

armnn::Layer
Definition: Layer.hpp:231

armnn::Layer::GetOutputSlot
const OutputSlot & GetOutputSlot(unsigned int index=0) const override
Get the const output slot handle by slot index.
Definition: Layer.hpp:339

armnn::Layer::EndOutputSlots
std::vector< OutputSlot >::iterator EndOutputSlots()
Definition: Layer.hpp:267

armnn::Layer::GetName
const char * GetName() const override
Returns the name of the layer.
Definition: Layer.hpp:332

armnn::Layer::BeginOutputSlots
std::vector< OutputSlot >::iterator BeginOutputSlots()
Definition: Layer.hpp:266

armnn::Layer::GetGuid
LayerGuid GetGuid() const final
Returns the unique id of the layer.
Definition: Layer.hpp:343

armnn::Layer::GetInputSlot
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition: Layer.hpp:337

armnn::Layer::GetType
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:286

armnn::Layer::GetAdditionalInformation
std::shared_ptr< T > GetAdditionalInformation() const
Definition: Layer.hpp:368

armnn::LayerWithParameters::GetParameters
const Parameters & GetParameters() const override
If the layer has a descriptor return it.
Definition: LayerWithParameters.hpp:19

armnn::MultiplicationLayer
This layer represents a multiplication operation.
Definition: MultiplicationLayer.hpp:15

armnn::OptimizationViews
Definition: OptimizationViews.hpp:18

armnn::OptimizationViews::AddUntouchedSubgraph
void AddUntouchedSubgraph(SubgraphView &&subgraph)
Definition: OptimizationViews.hpp:48

armnn::OptimizationViews::GetSubstitutions
const Substitutions & GetSubstitutions() const
Definition: OptimizationViews.hpp:58

armnn::OptimizationViews::GetDeletedSubgraphs
const Subgraphs & GetDeletedSubgraphs() const
Definition: OptimizationViews.hpp:61

armnn::Optional
Definition: Optional.hpp:271

armnn::OutputSlot::GetNumConnections
unsigned int GetNumConnections() const override
Definition: Layer.hpp:158

armnn::OutputSlot::GetOwningLayer
Layer & GetOwningLayer() const
Definition: Layer.hpp:132

armnn::OutputSlot::GetTensorInfo
const TensorInfo & GetTensorInfo() const override
Definition: Layer.cpp:100

armnn::PadLayer
This layer represents a pad operation.
Definition: PadLayer.hpp:15

armnn::Pooling2dLayer
This layer represents a pooling 2d operation.
Definition: Pooling2dLayer.hpp:14

armnn::ReduceLayer
This layer represents a reduction operation.
Definition: ReduceLayer.hpp:15

armnn::ReshapeLayer
This layer represents a reshape operation.
Definition: ReshapeLayer.hpp:16

armnn::SubgraphView
The SubgraphView class represents a subgraph of a Graph.
Definition: SubgraphView.hpp:32

armnn::SubgraphView::begin
IConnectableLayerIterator begin()
Definition: SubgraphView.cpp:286

armnn::SubgraphView::end
IConnectableLayerIterator end()
Definition: SubgraphView.cpp:291

armnn::SubtractionLayer
This layer represents a subtraction operation.
Definition: SubtractionLayer.hpp:15

armnn::TensorHandleFactoryRegistry
Definition: TensorHandleFactoryRegistry.hpp:24

armnn::TensorHandleFactoryRegistry::RegisterFactory
void RegisterFactory(std::unique_ptr< ITensorHandleFactory > allocator)
Register a TensorHandleFactory and transfer ownership.
Definition: TensorHandleFactoryRegistry.cpp:12

armnn::TensorHandleFactoryRegistry::RegisterMemoryManager
void RegisterMemoryManager(std::shared_ptr< IMemoryManager > memoryManger)
Register a memory manager with shared ownership.
Definition: TensorHandleFactoryRegistry.cpp:34

armnn::TensorHandleFactoryRegistry::RegisterCopyAndImportFactoryPair
void RegisterCopyAndImportFactoryPair(ITensorHandleFactory::FactoryId copyFactoryId, ITensorHandleFactory::FactoryId importFactoryId)
Register a pair of TensorHandleFactory Id for Memory Copy and TensorHandleFactory Id for Memory Impor...
Definition: TensorHandleFactoryRegistry.cpp:66

armnn::optimizations::pad_fold::TryFoldPadIntoLayer2d
bool TryFoldPadIntoLayer2d(const PadDescriptor &padDescriptor, Descriptor &layerDescriptor, const TensorInfo &tensorInfo)
Definition: FoldPadIntoLayer2d.hpp:88

armnn
Copyright (c) 2021 ARM Limited and Contributors.
Definition: 01_00_quick_start.dox:7

armnn::ClDivisionWorkloadValidate
arm_compute::Status ClDivisionWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
Definition: ClDivisionWorkload.cpp:18

armnn::MemorySource::Malloc
@ Malloc

armnn::MemorySource::Undefined
@ Undefined

armnn::ClFullyConnectedWorkloadValidate
arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const TensorInfo &weights, const Optional< TensorInfo > &biases, const FullyConnectedDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
Definition: ClFullyConnectedWorkload.cpp:19

armnn::ClAdditionValidate
arm_compute::Status ClAdditionValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
Definition: ClAdditionWorkload.cpp:45

armnn::ClBatchNormalizationValidate
arm_compute::Status ClBatchNormalizationValidate(const TensorInfo &input, const TensorInfo &output, const TensorInfo &mean, const TensorInfo &var, const TensorInfo &beta, const TensorInfo &gamma, const BatchNormalizationDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
Definition: ClBatchNormalizationFloatWorkload.cpp:19

armnn::LayerType::Multiplication
@ Multiplication

armnn::LayerType::Division
@ Division

armnn::LayerType::ElementwiseBinary
@ ElementwiseBinary

armnn::LayerType::Subtraction
@ Subtraction

armnn::LayerType::Addition
@ Addition

armnn::LayerType::Reshape
@ Reshape

armnn::LayerType::Activation
@ Activation

armnn::LayerType::FullyConnected
@ FullyConnected

armnn::LayerType::Pooling2d
@ Pooling2d

armnn::LayerType::Convolution2d
@ Convolution2d

armnn::LayerType::Pad
@ Pad

armnn::LayerType::BatchNormalization
@ BatchNormalization

armnn::LayerType::Reduce
@ Reduce

armnn::LayerType::DepthwiseConvolution2d
@ DepthwiseConvolution2d

armnn::ReportUntouchedLayers
void ReportUntouchedLayers(OptimizationViews &optimizationViews, std::map< LayerGuid, Layer * > untouched)
Definition: SubgraphUtils.hpp:220

armnn::MemorySourceFlags
unsigned int MemorySourceFlags
Definition: MemorySources.hpp:15

armnn::ModelOptions
std::vector< BackendOptions > ModelOptions
Definition: BackendOptions.hpp:18

armnn::ClConvolution2dWorkloadValidate
arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const Convolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
Definition: ClConvolution2dWorkload.cpp:23

armnn::Status
Status
enumeration
Definition: Types.hpp:43

armnn::ClMultiplicationWorkloadValidate
arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
Definition: ClMultiplicationWorkload.cpp:18

armnn::ClDepthwiseConvolutionWorkloadValidate
arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, const ActivationDescriptor *activationDescriptor)
Definition: ClDepthwiseConvolutionWorkload.cpp:26

armnn::BackendCapabilities
BackendOptions BackendCapabilities
Definition: BackendOptions.hpp:19

armnn::BinaryOperation::Div
@ Div

armnn::BinaryOperation::Mul
@ Mul

armnn::BinaryOperation::Sub
@ Sub

armnn::BinaryOperation::Add
@ Add

armnn::ConnectedToLayerWithNCHW
bool ConnectedToLayerWithNCHW(Layer *baseLayer)
Checks if the Layer is connected to any Layer that has an NCHW layout.
Definition: SubgraphUtils.hpp:250

armnn::ClBackendId
constexpr const char * ClBackendId()
Definition: ClBackendId.hpp:10

armnn::RemoveReshapeLayer
void RemoveReshapeLayer(ReshapeLayer *baseLayer, std::map< LayerGuid, Layer * > &untouched, OptimizationViews &optimizationViews)
Definition: SubgraphUtils.hpp:293

armnn::ClSubtractionValidate
arm_compute::Status ClSubtractionValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
Definition: ClSubtractionWorkload.cpp:46

armnn::ActivationDescriptor
An ActivationDescriptor for the ActivationLayer.
Definition: Descriptors.hpp:37

armnn::BackendOptions
Struct for the users to pass backend specific options.
Definition: BackendOptions.hpp:23

armnn::Convolution2dDescriptor::m_BiasEnabled
bool m_BiasEnabled
Enable/disable bias.
Definition: Descriptors.hpp:582

armnn::DepthwiseConvolution2dDescriptor::m_BiasEnabled
bool m_BiasEnabled
Enable/disable bias.
Definition: Descriptors.hpp:708

armnn::ElementwiseBinaryDescriptor::m_Operation
BinaryOperation m_Operation
Specifies the elementwiseBinary operation to execute.
Definition: Descriptors.hpp:125

armnn::FullyConnectedDescriptor
A FullyConnectedDescriptor for the FullyConnectedLayer.
Definition: Descriptors.hpp:508

armnn::FullyConnectedDescriptor::m_BiasEnabled
bool m_BiasEnabled
Enable/disable bias.
Definition: Descriptors.hpp:526

armnn::IRuntime::CreationOptions
Definition: IRuntime.hpp:71

armnn::Pooling2dDescriptor
A Pooling2dDescriptor for the Pooling2dLayer.
Definition: Descriptors.hpp:372

armnn::ReduceDescriptor
A ReduceDescriptor for the REDUCE operators.
Definition: Descriptors.hpp:1539

armnn::ReduceDescriptor::m_vAxis
std::vector< uint32_t > m_vAxis
The indices of the dimensions to reduce.
Definition: Descriptors.hpp:1556