armnn/latest/_neon_backend_optimization_utils_8hpp_source.html

 //

 // Copyright © 2023-2024 Arm Ltd and Contributors. All rights reserved.

 // SPDX-License-Identifier: MIT

 //


 #pragma once


 #include <aclCommon/ArmComputeSubgraphUtils.hpp>


 namespace armnn

 {


 // Changes shapes of the form [1, 1, ..., W] to [ W ]

 inline bool CollapseLeadingUnitDimensions(const TensorInfo& in, TensorInfo& out)

 {

     unsigned int numDimensions = in.GetNumDimensions();

     for (unsigned int i = 0; i < (numDimensions-1); ++i)

     {

         if (in.GetShape()[i] != 1)

         {

             return false;

         }

     }


     unsigned int w = in.GetShape()[numDimensions-1];

     out = in;

     out.SetShape({w});


     return true;

 }


 //

 // Build slot and tensor info lists for Add/Mul/Add replacement

 //

 template<typename SlotListType>

 void BuildAddMulAddSlotLists(bool handleReLu,

                              bool multipleOutputs,

                              std::vector<SlotListType>& inputLayersSlotLists,

                              std::vector<SlotListType>& outputLayersSlotLists)

 {

     // Build input slot list

     inputLayersSlotLists.push_back({0, 1});     // Add

     inputLayersSlotLists.push_back({1});        // Mul

     inputLayersSlotLists.push_back({1});        // Add

     if (handleReLu)

     {

         inputLayersSlotLists.push_back({});     // Relu

     }


     // Build output slot list

     if (multipleOutputs)

     {

         outputLayersSlotLists.push_back({0});   // Add

     }

     else

     {

         outputLayersSlotLists.push_back({});    // Add

     }

     outputLayersSlotLists.push_back({});        // Mul

     if (handleReLu)

     {

         outputLayersSlotLists.push_back({});    // Add

         outputLayersSlotLists.push_back({0});   // Relu

     }

     else

     {

         outputLayersSlotLists.push_back({0});   // Add

     }

 }


 inline void GetFusedName(Layer *layerList[4], std::string& fusedName)

 {

     // Build the fused name string

     fusedName = "fused";

     for (unsigned int layerIdx = 0; layerIdx< 4; ++layerIdx)

     {

         if (! layerList[layerIdx])

         {

             break;

         }

         fusedName += "-";

         fusedName += layerList[layerIdx]->GetNameStr();

     }

 }


 template<typename Type>

 bool BuildAddMulAddTensorInfoLists(Type* layerList[4],

                                    unsigned int& numInputs,

                                    unsigned int& numOutputs,

                                    std::vector<TensorInfo>& inputInfos,

                                    std::vector<TensorInfo>& outputInfos,

                                    const ActivationDescriptor*& activationDescriptor,

                                    bool& fuseReLu)

 {

     ARMNN_THROW_INVALIDARG_IF_FALSE(layerList[0]);

     ARMNN_THROW_INVALIDARG_IF_FALSE(layerList[1]);

     ARMNN_THROW_INVALIDARG_IF_FALSE(layerList[2]);


     ARMNN_THROW_INVALIDARG_IF_FALSE(IsSequenceLayerType(*layerList[0], BinaryOperation::Add));

     ARMNN_THROW_INVALIDARG_IF_FALSE(IsSequenceLayerType(*layerList[1], BinaryOperation::Mul));

     ARMNN_THROW_INVALIDARG_IF_FALSE(IsSequenceLayerType(*layerList[2], BinaryOperation::Add));


     auto is1D = [](const TensorInfo expanded)

     {

         TensorInfo collapsed;

         if (CollapseLeadingUnitDimensions(expanded, collapsed))

         {

             return (collapsed.GetNumDimensions() == 1);

         }

         else

         {

             return (expanded.GetNumDimensions() == 1);

         }

     };


     // One of the 2 inputs for MUL and the Second ADD must be 1D

     // ref: clframework/src/cpu/kernels/CpuAddMulAddKernel.cpp

     auto& mulLayer  = *(PolymorphicDowncast<ElementwiseBinaryLayer*>(layerList[1]));

     auto& add2Layer = *(PolymorphicDowncast<ElementwiseBinaryLayer*>(layerList[2]));


     Layer& mulInput0  = mulLayer.GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();

     Layer& mulInput1  = mulLayer.GetInputSlot(1).GetConnectedOutputSlot()->GetOwningLayer();

     Layer& add2Input0 = add2Layer.GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();

     Layer& add2Input1 = add2Layer.GetInputSlot(1).GetConnectedOutputSlot()->GetOwningLayer();

     if (!is1D(mulInput0.GetOutputSlot(0).GetTensorInfo()) && !is1D(mulInput1.GetOutputSlot(0).GetTensorInfo()))

     {

         return false;

     }

     if (!is1D(add2Input0.GetOutputSlot(0).GetTensorInfo()) && !is1D(add2Input1.GetOutputSlot(0).GetTensorInfo()))

     {

         return false;

     }


     fuseReLu = (layerList[3] != nullptr);

     if (fuseReLu)

     {

         activationDescriptor = &PolymorphicDowncast<ActivationLayer *>(layerList[3])->GetParameters();

         ARMNN_THROW_INVALIDARG_IF_FALSE((activationDescriptor->m_Function == ActivationFunction::ReLu) ||

                      (activationDescriptor->m_Function == ActivationFunction::BoundedReLu));

     }


     numInputs = 0;

     numOutputs = 0;


     // Ensure that there are 6 input slots in the add/mul/add layers

     // we are going to replace

     unsigned int layerIdx = 0;

     unsigned int inputSlotCount = 0;

     for (layerIdx = 0; layerIdx < 3; ++layerIdx)

     {

         for (unsigned int slotIdx = 0; slotIdx < layerList[layerIdx]->GetNumInputSlots(); ++slotIdx)

         {

             InputSlot* inputSlot = &layerList[layerIdx]->GetInputSlot(slotIdx);

             OutputSlot* outputSlot = inputSlot->GetConnectedOutputSlot();

             if (outputSlot)

             {

                 if (layerIdx == 0)

                 {

                     // Always count the input connections of the first add

                     inputInfos.push_back(inputSlot->GetTensorInfo());

                     numInputs++;

                 }

                 else

                 {

                     // For subsequent layers, we skip connections to the previous layers in the counting

                     if (&outputSlot->GetOwningLayer() != layerList[layerIdx-1])

                     {

                         TensorInfo inputSlotInfo = inputSlot->GetTensorInfo();

                         if (numInputs == 2 || numInputs == 3)

                         {

                             // Workaround the broadcast optimization to collapse shapes such as

                             // [1, 1, 1, 2] to [2] as required by backend

                             if (CollapseLeadingUnitDimensions(inputSlot->GetTensorInfo(), inputSlotInfo))

                             {

                                 OutputSlot* previousLayerSlot = inputSlot->GetConnectedOutputSlot();

                                 if (previousLayerSlot)

                                 {

                                     if (previousLayerSlot->GetOwningLayer().GetType() == LayerType::Constant)

                                     {

                                         // First update the TensorInfo in the constant owning layer

                                         previousLayerSlot->SetTensorInfo(inputSlotInfo);

                                         // Then update the TensorInfo in the workload for the owning layer

                                         ConstantLayer* layer = PolymorphicDowncast<ConstantLayer*>(

                                                 &previousLayerSlot->GetOwningLayer());

                                         layer->m_LayerOutput

                                                 = std::make_unique<ScopedTensorHandle>(

                                                 ConstTensor(inputSlotInfo,

                                                             layer->m_LayerOutput.get()->GetConstTensor<void>()));

                                     }

                                 }

                             }

                         }

                         inputInfos.push_back(inputSlotInfo);

                         numInputs++;

                     }

                 }

                 inputSlotCount++;

             }

         }

     }


     // Check the input counts

     bool validInputCount = (inputSlotCount == 6) && (inputInfos.size() == 4);

     if (! validInputCount)

     {

         return false;

     }


     const unsigned int maxIdx = (fuseReLu) ? 4 : 3;

     for (layerIdx = 0; layerIdx < maxIdx; ++layerIdx)

     {

         for (unsigned int slotIdx = 0; slotIdx < layerList[layerIdx]->GetNumOutputSlots(); ++slotIdx)

         {

             OutputSlot* outputSlot = &layerList[layerIdx]->GetOutputSlot(slotIdx);


             for (unsigned int connectionIdx = 0; connectionIdx < outputSlot->GetNumConnections(); ++connectionIdx)

             {

                 InputSlot* inputSlot = outputSlot->GetConnection(connectionIdx);

                 if (layerIdx < (maxIdx-1))

                 {

                     if (&inputSlot->GetOwningLayer() != layerList[layerIdx+1])

                     {

                         outputInfos.push_back(outputSlot->GetTensorInfo());

                         numOutputs++;

                     }

                 }

                 else if (layerList[layerIdx] != nullptr)

                 {

                     outputInfos.push_back(outputSlot->GetTensorInfo());

                     numOutputs++;

                 }

             }

         }

     }


     // Check the output count

     bool validOutputCount = (outputInfos.size() > 0);

     if (! validOutputCount)

     {

         return false;

     }


     return true;

 }


 }

ArmComputeSubgraphUtils.hpp

ARMNN_THROW_INVALIDARG_IF_FALSE
#define ARMNN_THROW_INVALIDARG_IF_FALSE(_cond)
Definition: Exceptions.hpp:212

armnn::ConstTensor
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:330

armnn::ConstantLayer
A layer that the constant data can be bound to.
Definition: ConstantLayer.hpp:16

armnn::ConstantLayer::m_LayerOutput
std::shared_ptr< ConstTensorHandle > m_LayerOutput
Definition: ConstantLayer.hpp:46

armnn::InputSlot
Definition: Layer.hpp:43

armnn::InputSlot::GetOwningLayer
Layer & GetOwningLayer() const
Definition: Layer.hpp:53

armnn::InputSlot::GetConnectedOutputSlot
const OutputSlot * GetConnectedOutputSlot() const
Definition: Layer.hpp:56

armnn::InputSlot::GetTensorInfo
const TensorInfo & GetTensorInfo() const override
Gets the TensorInfo for this InputSlot.
Definition: Layer.cpp:614

armnn::Layer
Definition: Layer.hpp:231

armnn::Layer::GetOutputSlot
const OutputSlot & GetOutputSlot(unsigned int index=0) const override
Get the const output slot handle by slot index.
Definition: Layer.hpp:339

armnn::Layer::GetNameStr
const std::string & GetNameStr() const
Definition: Layer.hpp:240

armnn::Layer::GetInputSlot
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition: Layer.hpp:337

armnn::Layer::GetType
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:286

armnn::OutputSlot
Definition: Layer.hpp:101

armnn::OutputSlot::GetConnection
const InputSlot * GetConnection(unsigned int index) const override
Definition: Layer.cpp:83

armnn::OutputSlot::GetNumConnections
unsigned int GetNumConnections() const override
Definition: Layer.hpp:158

armnn::OutputSlot::GetOwningLayer
Layer & GetOwningLayer() const
Definition: Layer.hpp:132

armnn::OutputSlot::SetTensorInfo
void SetTensorInfo(const TensorInfo &tensorInfo) override
Definition: Layer.cpp:95

armnn::OutputSlot::GetTensorInfo
const TensorInfo & GetTensorInfo() const override
Definition: Layer.cpp:100

armnn::TensorInfo
Definition: Tensor.hpp:153

armnn::TensorInfo::GetNumDimensions
unsigned int GetNumDimensions() const
Definition: Tensor.hpp:197

armnn::TensorInfo::GetShape
const TensorShape & GetShape() const
Definition: Tensor.hpp:193

armnn::TensorInfo::SetShape
void SetShape(const TensorShape &newShape)
Definition: Tensor.hpp:195

armnn
Copyright (c) 2021 ARM Limited and Contributors.
Definition: 01_00_quick_start.dox:7

armnn::GetFusedName
void GetFusedName(Layer *layerList[4], std::string &fusedName)
Definition: NeonBackendOptimizationUtils.hpp:71

armnn::ActivationFunction::ReLu
@ ReLu

armnn::ActivationFunction::BoundedReLu
@ BoundedReLu
min(a, max(b, input)) ReLu1 & ReLu6.

armnn::LayerType::Constant
@ Constant

armnn::BuildAddMulAddTensorInfoLists
bool BuildAddMulAddTensorInfoLists(Type *layerList[4], unsigned int &numInputs, unsigned int &numOutputs, std::vector< TensorInfo > &inputInfos, std::vector< TensorInfo > &outputInfos, const ActivationDescriptor *&activationDescriptor, bool &fuseReLu)
Definition: NeonBackendOptimizationUtils.hpp:87

armnn::BuildAddMulAddSlotLists
void BuildAddMulAddSlotLists(bool handleReLu, bool multipleOutputs, std::vector< SlotListType > &inputLayersSlotLists, std::vector< SlotListType > &outputLayersSlotLists)
Definition: NeonBackendOptimizationUtils.hpp:36

armnn::BinaryOperation::Mul
@ Mul

armnn::BinaryOperation::Add
@ Add

armnn::CollapseLeadingUnitDimensions
bool CollapseLeadingUnitDimensions(const TensorInfo &in, TensorInfo &out)
Definition: NeonBackendOptimizationUtils.hpp:14

armnn::IsSequenceLayerType
bool IsSequenceLayerType(Layer &layer, LayerType type)
Definition: SubgraphUtils.hpp:362

armnn::ActivationDescriptor
An ActivationDescriptor for the ActivationLayer.
Definition: Descriptors.hpp:37

armnn::ActivationDescriptor::m_Function
ActivationFunction m_Function
The activation function to use (Sigmoid, TanH, Linear, ReLu, BoundedReLu, SoftReLu,...
Definition: Descriptors.hpp:59