armnn/latest/_fuse_batch_norm_8hpp_source.html

 //

 // Copyright © 2020,2022 Arm Ltd and Contributors. All rights reserved.

 // SPDX-License-Identifier: MIT

 //


 #pragma once


 #include "Optimization.hpp"

 #include <armnnUtils/DataLayoutIndexed.hpp>

 #include <ResolveType.hpp>


 namespace armnn

 {

 namespace optimizations

 {


 template<typename ConvLayer, armnn::DataType ArmnnType,

          typename T = armnn::ResolveType<ArmnnType>>

 class FuseBatchNorm

 {

 public:

     /// Run for every exclusive connection between any base Convolution layer and a child BatchNorm layer for not

     /// quantized layers.

     /// The child will be removed, the base will be removed if it's left unconnected. A new Convolution layer will

     /// be added, its weights and bias will be calculated using the weights and bias of the base Convolution layer

     /// combined with the parameters of the child BatchNorm layer.

     void Run(Graph& graph, InputSlot& connection) const

     {

         Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer();

         Layer& child = connection.GetOwningLayer();


         bool depthwise = (base.GetType() == LayerType::DepthwiseConvolution2d);


         ARMNN_ASSERT(base.GetType() == LayerType::Convolution2d || depthwise);

         ARMNN_ASSERT(child.GetType() == LayerType::BatchNormalization);


         if (base.GetDataType() == ArmnnType && child.GetDataType() == ArmnnType)

         {

             OutputSlot* parentOut = base.GetInputSlot(0).GetConnectedOutputSlot();

             auto convLayer = PolymorphicDowncast<ConvLayer*>(&base);

             auto batchNormLayer = PolymorphicDowncast<BatchNormalizationLayer*>(&child);


             // Read convolution and batch norm parameters

             BatchNormalizationDescriptor batchNormDescriptor = batchNormLayer->GetParameters();

             auto epsilon = batchNormDescriptor.m_Eps;

             IgnoreUnused(epsilon);


             ConstTensor betaTensor(batchNormLayer->m_Beta->GetTensorInfo(), batchNormLayer->m_Beta->Map(true));

             ConstTensor gammaTensor(batchNormLayer->m_Gamma->GetTensorInfo(), batchNormLayer->m_Gamma->Map(true));

             ConstTensor meanTensor(batchNormLayer->m_Mean->GetTensorInfo(), batchNormLayer->m_Mean->Map(true));

             ConstTensor varTensor(batchNormLayer->m_Variance->GetTensorInfo(), batchNormLayer->m_Variance->Map(true));


             auto convDescriptor = convLayer->GetParameters();

             ConstTensor weightsTensor;

             ARMNN_ASSERT_MSG(convLayer->GetInputSlots()[1].GetConnection() != nullptr,

                              "FuseBatchNorm: Weight data should not be null.");


             ConstantLayer* weightLayer = PolymorphicDowncast<ConstantLayer*>(

                                         &base.GetInputSlot(1).GetConnectedOutputSlot()->GetOwningLayer());


             weightsTensor = ConstTensor(weightLayer->m_LayerOutput->GetTensorInfo(),

                                         weightLayer->m_LayerOutput->Map(true));


             armnnUtils::DataLayoutIndexed dataLayout(convDescriptor.m_DataLayout);

             auto weightsShape = weightsTensor.GetInfo().GetShape();

             const unsigned int inputChannels   = parentOut->GetTensorInfo().GetShape()[dataLayout.GetChannelsIndex()];

             const unsigned int depthMultiplier = depthwise ? weightsShape[3] / inputChannels : 1;

             const unsigned int outputChannels  = depthwise ? weightsShape[3] : weightsShape[0];

             const unsigned int weightsHeight   = depthwise ? weightsShape[1] :

                                                  weightsShape[dataLayout.GetHeightIndex()];

             const unsigned int weightsWidth    = depthwise ? weightsShape[2] :

                                                  weightsShape[dataLayout.GetWidthIndex()];


             const auto* weightsBuffer = static_cast<const T*>(weightsTensor.GetMemoryArea());

             const auto* betaBuffer    = static_cast<const T*>(betaTensor.GetMemoryArea());

             const auto* gammaBuffer   = static_cast<const T*>(gammaTensor.GetMemoryArea());

             const auto* meanBuffer    = static_cast<const T*>(meanTensor.GetMemoryArea());

             const auto* varBuffer     = static_cast<const T*>(varTensor.GetMemoryArea());


             std::vector<T> weightsVector (weightsBuffer, weightsBuffer + weightsTensor.GetNumElements());

             std::vector<T> betaVector    (betaBuffer, betaBuffer + betaTensor.GetNumElements());

             std::vector<T> gammaVector   (gammaBuffer, gammaBuffer + gammaTensor.GetNumElements());

             std::vector<T> meanVector    (meanBuffer, meanBuffer + meanTensor.GetNumElements());

             std::vector<T> varianceVector(varBuffer, varBuffer + varTensor.GetNumElements());


             // fusedWeights = ( gamma * weights ) / ( std - epsilon);

             std::vector<T> fusedWeightsVector(weightsVector.size());


             for (unsigned int cInput = 0; cInput < inputChannels; ++cInput)

             {

                 for (unsigned int cOut = 0; cOut < outputChannels; ++cOut)

                 {

                     T mult = gammaVector[cOut] / static_cast<T>(sqrtf(varianceVector[cOut] + epsilon));


                     for (unsigned int h = 0; h < weightsHeight; ++h)

                     {

                         for (unsigned int w = 0; w < weightsWidth; ++w)

                         {

                             unsigned int weightsIdx = 0;


                             if (depthwise)

                             {

                                 cInput = cOut / depthMultiplier;

                                 weightsIdx = w * outputChannels + cOut +

                                              h * weightsWidth * outputChannels;

                             }

                             else if (convDescriptor.m_DataLayout == DataLayout::NHWC)

                             {

                                 weightsIdx = cOut * weightsHeight * weightsWidth * inputChannels +

                                              h * weightsWidth * inputChannels +

                                              w * inputChannels +

                                              cInput;

                             }

                             else

                             {

                                 weightsIdx = cOut * weightsWidth * weightsHeight * inputChannels +

                                              cInput * weightsWidth * weightsHeight +

                                              h * weightsWidth +

                                              w;

                             }

                             fusedWeightsVector[weightsIdx] = mult * weightsVector[weightsIdx];

                         }

                     }

                 }

             }

             ConstTensor fusedWeightsTensor(weightsTensor.GetInfo(), fusedWeightsVector);


             //  fusedBias = (gamma * (bias - mean)) / (variance - epsilon) + beta;

             std::vector<T> fusedBiasVector(outputChannels);

             bool biasWasEnabledBeforeOpt = convDescriptor.m_BiasEnabled;

             if (biasWasEnabledBeforeOpt)

             {

                 ConstTensor biasTensor;

                 ARMNN_ASSERT_MSG(convLayer->GetInputSlots()[2].GetConnection() != nullptr,

                                  "FuseBatchNorm: Bias data should not be null if bias is enabled.");


                 ConstantLayer* biasLayer = PolymorphicDowncast<ConstantLayer*>(

                                                 &base.GetInputSlot(2).GetConnectedOutputSlot()->GetOwningLayer());


                 biasTensor = ConstTensor(biasLayer->m_LayerOutput->GetTensorInfo(),

                                          biasLayer->m_LayerOutput->Map(true));


                 const auto* biasBuffer = static_cast<const T*>(biasTensor.GetMemoryArea());

                 std::vector<T> biasVector(biasBuffer, biasBuffer + biasTensor.GetNumElements());


                 for (unsigned int cOut = 0; cOut < outputChannels; ++cOut)

                 {

                     fusedBiasVector[cOut] = ((gammaVector[cOut] * (biasVector[cOut] - meanVector[cOut])) /

                                              sqrtf(varianceVector[cOut] + epsilon)) + betaVector[cOut];

                 }

             }

             else

             {

                 convDescriptor.m_BiasEnabled = true;

                 std::vector<T> biasVector(outputChannels, T(0));


                 for (unsigned int cOut = 0; cOut < outputChannels; ++cOut)

                 {

                     fusedBiasVector[cOut] = ((gammaVector[cOut] * (biasVector[cOut] - meanVector[cOut])) /

                                              sqrtf(varianceVector[cOut] + epsilon)) + betaVector[cOut];

                 }

             }

             ConstTensor fusedBiasTensor(TensorInfo({outputChannels}, ArmnnType, 0.0f, 0, true), fusedBiasVector);


             // Insert the new convolution layer that has batch norm parameters fused into

             const std::string name = std::string("fused-") + child.GetName() + std::string("-into-") + base.GetName();

             auto& newConv2dLayer = *graph.InsertNewLayer<ConvLayer>(base.GetInputSlot(0),

                                                                     convDescriptor,

                                                                     name.c_str());


             // Connect weights and bias from old to new Conv2d layer

             // This optimization will always have 3 input slots on the Conv2d base layer

             if (newConv2dLayer.GetNumInputSlots() > 1)

             {

                 // Remove old connection and connect to new layer2d

                 weightLayer->GetOutputSlot(0).Disconnect(base.GetInputSlot(1));

                 weightLayer->GetOutputSlot(0).Connect(newConv2dLayer.GetInputSlot(1));

                 weightLayer->m_LayerOutput = std::make_unique<ScopedTensorHandle>(fusedWeightsTensor);


                 // Move bias const layers as normal if it was enabled before the optimisation

                 ConstantLayer* biasLayer;

                 if (biasWasEnabledBeforeOpt)

                 {

                     biasLayer = PolymorphicDowncast<ConstantLayer*>(

                         &base.GetInputSlot(2).GetConnectedOutputSlot()->GetOwningLayer());

                     // Remove old connection and connect to new layer2d

                     biasLayer->GetOutputSlot(0).Disconnect(base.GetInputSlot(2));

                     biasLayer->GetOutputSlot(0).Connect(newConv2dLayer.GetInputSlot(2));


                 }

                 // Otherwise create a new bias layer and add to the new convolution2d

                 else

                 {

                     // Add in bias constant layer

                     biasLayer = graph.AddLayer<ConstantLayer>("Bias");

                     biasLayer->GetOutputSlot(0).SetTensorInfo(fusedBiasTensor.GetInfo());

                     biasLayer->GetOutputSlot(0).Connect(newConv2dLayer.GetInputSlot(2));

                 }

                 biasLayer->m_LayerOutput = std::make_unique<ScopedTensorHandle>(ConstTensor(fusedBiasTensor));

             }


             // Reconnects with original parent.

             newConv2dLayer.GetOutputSlot().MoveAllConnections(*parentOut);

             // Parent is now the new convolution2d layer.

             parentOut = &newConv2dLayer.GetOutputSlot();


             // Moves connections in child output to parent layer.

             // Child layer will be removed as it's left unconnected.

             // Base layer will be removed if left unconnected.

             child.GetOutputSlot().MoveAllConnections(*parentOut);

         }

     }

 protected:

     FuseBatchNorm()  = default;

     ~FuseBatchNorm() = default;

 };


 using FuseBatchNormIntoConvolution2DFloat32 =

         OptimizeForExclusiveConnection<Convolution2dLayer,

                                        BatchNormalizationLayer,

                                        FuseBatchNorm<Convolution2dLayer, armnn::DataType::Float32>>;


 using FuseBatchNormIntoConvolution2DFloat16 =

         OptimizeForExclusiveConnection<Convolution2dLayer,

                                        BatchNormalizationLayer,

                                        FuseBatchNorm<Convolution2dLayer, armnn::DataType::Float16>>;


 using FuseBatchNormIntoDepthwiseConvolution2DFloat32 =

         OptimizeForExclusiveConnection<DepthwiseConvolution2dLayer,

                                        BatchNormalizationLayer,

                                        FuseBatchNorm<DepthwiseConvolution2dLayer, armnn::DataType::Float32>>;


 using FuseBatchNormIntoDepthwiseConvolution2DFloat16 =

         OptimizeForExclusiveConnection<DepthwiseConvolution2dLayer,

                                        BatchNormalizationLayer,

                                        FuseBatchNorm<DepthwiseConvolution2dLayer, armnn::DataType::Float16>>;


 } // namespace optimizations

 } // namespace armnn

ARMNN_ASSERT
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14

ARMNN_ASSERT_MSG
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15

DataLayoutIndexed.hpp

Optimization.hpp

ResolveType.hpp

armnn::BaseTensor::GetNumElements
unsigned int GetNumElements() const
Definition: Tensor.hpp:305

armnn::BaseTensor::GetInfo
const TensorInfo & GetInfo() const
Definition: Tensor.hpp:297

armnn::BaseTensor::GetMemoryArea
MemoryType GetMemoryArea() const
Definition: Tensor.hpp:307

armnn::BatchNormalizationLayer
This layer represents a batch normalization operation.
Definition: BatchNormalizationLayer.hpp:16

armnn::ConstTensor
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:330

armnn::ConstantLayer
A layer that the constant data can be bound to.
Definition: ConstantLayer.hpp:16

armnn::ConstantLayer::m_LayerOutput
std::shared_ptr< ConstTensorHandle > m_LayerOutput
Definition: ConstantLayer.hpp:46

armnn::Convolution2dLayer
This layer represents a convolution 2d operation.
Definition: Convolution2dLayer.hpp:16

armnn::DepthwiseConvolution2dLayer
This layer represents a depthwise convolution 2d operation.
Definition: DepthwiseConvolution2dLayer.hpp:16

armnn::Graph
Definition: Graph.hpp:31

armnn::Graph::InsertNewLayer
LayerT * InsertNewLayer(InputSlot &insertBefore, Args &&... args)
Inserts a new layer between the output slot currently connected to insertBefore and insertBefore itse...
Definition: Graph.hpp:481

armnn::Graph::AddLayer
LayerT * AddLayer(Args &&... args)
Adds a new layer, of type LayerType, to the graph constructed with the arguments passed.
Definition: Graph.hpp:466

armnn::InputSlot
Definition: Layer.hpp:43

armnn::InputSlot::GetOwningLayer
Layer & GetOwningLayer() const
Definition: Layer.hpp:53

armnn::InputSlot::GetConnectedOutputSlot
const OutputSlot * GetConnectedOutputSlot() const
Definition: Layer.hpp:56

armnn::Layer
Definition: Layer.hpp:231

armnn::Layer::GetOutputSlot
const OutputSlot & GetOutputSlot(unsigned int index=0) const override
Get the const output slot handle by slot index.
Definition: Layer.hpp:339

armnn::Layer::GetName
const char * GetName() const override
Returns the name of the layer.
Definition: Layer.hpp:332

armnn::Layer::GetInputSlot
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition: Layer.hpp:337

armnn::Layer::GetType
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:286

armnn::Layer::GetDataType
DataType GetDataType() const
Definition: Layer.cpp:345

armnn::OptimizeForExclusiveConnection
Definition: Optimization.hpp:175

armnn::OutputSlot
Definition: Layer.hpp:101

armnn::OutputSlot::MoveAllConnections
void MoveAllConnections(OutputSlot &destination)
Moves all connections to another OutputSlot.
Definition: Layer.cpp:156

armnn::OutputSlot::GetOwningLayer
Layer & GetOwningLayer() const
Definition: Layer.hpp:132

armnn::OutputSlot::SetTensorInfo
void SetTensorInfo(const TensorInfo &tensorInfo) override
Definition: Layer.cpp:95

armnn::OutputSlot::Disconnect
void Disconnect(InputSlot &slot)
Definition: Layer.cpp:131

armnn::OutputSlot::GetTensorInfo
const TensorInfo & GetTensorInfo() const override
Definition: Layer.cpp:100

armnn::OutputSlot::Connect
int Connect(InputSlot &destination)
Definition: Layer.cpp:123

armnn::TensorInfo
Definition: Tensor.hpp:153

armnn::TensorInfo::GetShape
const TensorShape & GetShape() const
Definition: Tensor.hpp:193

armnn::optimizations::FuseBatchNorm
Definition: FuseBatchNorm.hpp:20

armnn::optimizations::FuseBatchNorm::~FuseBatchNorm
~FuseBatchNorm()=default

armnn::optimizations::FuseBatchNorm::Run
void Run(Graph &graph, InputSlot &connection) const
Run for every exclusive connection between any base Convolution layer and a child BatchNorm layer for...
Definition: FuseBatchNorm.hpp:27

armnn::optimizations::FuseBatchNorm::FuseBatchNorm
FuseBatchNorm()=default

armnnUtils::DataLayoutIndexed
Provides access to the appropriate indexes for Channels, Height and Width based on DataLayout.
Definition: DataLayoutIndexed.hpp:18

armnnUtils::DataLayoutIndexed::GetWidthIndex
unsigned int GetWidthIndex() const
Definition: DataLayoutIndexed.hpp:25

armnnUtils::DataLayoutIndexed::GetHeightIndex
unsigned int GetHeightIndex() const
Definition: DataLayoutIndexed.hpp:24

armnnUtils::DataLayoutIndexed::GetChannelsIndex
unsigned int GetChannelsIndex() const
Definition: DataLayoutIndexed.hpp:23

armnn
Copyright (c) 2021 ARM Limited and Contributors.
Definition: 01_00_quick_start.dox:7

armnn::ResolveType
typename ResolveTypeImpl< DT >::Type ResolveType
Definition: ResolveType.hpp:79

armnn::IgnoreUnused
void IgnoreUnused(Ts &&...)
Definition: IgnoreUnused.hpp:14

armnn::LayerType::Convolution2d
@ Convolution2d

armnn::LayerType::BatchNormalization
@ BatchNormalization

armnn::LayerType::DepthwiseConvolution2d
@ DepthwiseConvolution2d

armnn::DataLayout::NHWC
@ NHWC

armnn::DataType
DataType
Definition: Types.hpp:49

armnn::BatchNormalizationDescriptor
A BatchNormalizationDescriptor for the BatchNormalizationLayer.
Definition: Descriptors.hpp:829

armnn::BatchNormalizationDescriptor::m_Eps
float m_Eps
Value to add to the variance. Used to avoid dividing by zero.
Definition: Descriptors.hpp:841