ArmNN
 25.11
Loading...
Searching...
No Matches
FuseBatchNorm< ConvLayer, ArmnnType, T > Class Template Reference

#include <FuseBatchNorm.hpp>

Public Member Functions

void Run (Graph &graph, InputSlot &connection) const
 Run for every exclusive connection between any base Convolution layer and a child BatchNorm layer for not quantized layers.

Protected Member Functions

 FuseBatchNorm ()=default
 ~FuseBatchNorm ()=default

Detailed Description

template<typename ConvLayer, armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
class armnn::optimizations::FuseBatchNorm< ConvLayer, ArmnnType, T >

Definition at line 19 of file FuseBatchNorm.hpp.

Constructor & Destructor Documentation

◆ FuseBatchNorm()

template<typename ConvLayer, armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
FuseBatchNorm ( )
protecteddefault

◆ ~FuseBatchNorm()

template<typename ConvLayer, armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
~FuseBatchNorm ( )
protecteddefault

Member Function Documentation

◆ Run()

template<typename ConvLayer, armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
void Run ( Graph & graph,
InputSlot & connection ) const
inline

Run for every exclusive connection between any base Convolution layer and a child BatchNorm layer for not quantized layers.

The child will be removed, the base will be removed if it's left unconnected. A new Convolution layer will be added, its weights and bias will be calculated using the weights and bias of the base Convolution layer combined with the parameters of the child BatchNorm layer.

Definition at line 27 of file FuseBatchNorm.hpp.

28 {
29 Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer();
30 Layer& child = connection.GetOwningLayer();
31
32 bool depthwise = (base.GetType() == LayerType::DepthwiseConvolution2d);
33
34 ARMNN_ASSERT(base.GetType() == LayerType::Convolution2d || depthwise);
35 ARMNN_ASSERT(child.GetType() == LayerType::BatchNormalization);
36
37 if (base.GetDataType() == ArmnnType && child.GetDataType() == ArmnnType)
38 {
39 OutputSlot* parentOut = base.GetInputSlot(0).GetConnectedOutputSlot();
40 auto convLayer = PolymorphicDowncast<ConvLayer*>(&base);
41 auto batchNormLayer = PolymorphicDowncast<BatchNormalizationLayer*>(&child);
42
43 // Read convolution and batch norm parameters
44 BatchNormalizationDescriptor batchNormDescriptor = batchNormLayer->GetParameters();
45 auto epsilon = batchNormDescriptor.m_Eps;
46 IgnoreUnused(epsilon);
47
48 ConstTensor betaTensor(batchNormLayer->m_Beta->GetTensorInfo(), batchNormLayer->m_Beta->Map(true));
49 ConstTensor gammaTensor(batchNormLayer->m_Gamma->GetTensorInfo(), batchNormLayer->m_Gamma->Map(true));
50 ConstTensor meanTensor(batchNormLayer->m_Mean->GetTensorInfo(), batchNormLayer->m_Mean->Map(true));
51 ConstTensor varTensor(batchNormLayer->m_Variance->GetTensorInfo(), batchNormLayer->m_Variance->Map(true));
52
53 auto convDescriptor = convLayer->GetParameters();
54 ConstTensor weightsTensor;
55 ARMNN_ASSERT_MSG(convLayer->GetInputSlots()[1].GetConnection() != nullptr,
56 "FuseBatchNorm: Weight data should not be null.");
57
58 ConstantLayer* weightLayer = PolymorphicDowncast<ConstantLayer*>(
59 &base.GetInputSlot(1).GetConnectedOutputSlot()->GetOwningLayer());
60
61 weightsTensor = ConstTensor(weightLayer->m_LayerOutput->GetTensorInfo(),
62 weightLayer->m_LayerOutput->Map(true));
63
64 armnnUtils::DataLayoutIndexed dataLayout(convDescriptor.m_DataLayout);
65 auto weightsShape = weightsTensor.GetInfo().GetShape();
66 const unsigned int inputChannels = parentOut->GetTensorInfo().GetShape()[dataLayout.GetChannelsIndex()];
67 const unsigned int depthMultiplier = depthwise ? weightsShape[3] / inputChannels : 1;
68 const unsigned int outputChannels = depthwise ? weightsShape[3] : weightsShape[0];
69 const unsigned int weightsHeight = depthwise ? weightsShape[1] :
70 weightsShape[dataLayout.GetHeightIndex()];
71 const unsigned int weightsWidth = depthwise ? weightsShape[2] :
72 weightsShape[dataLayout.GetWidthIndex()];
73
74 const auto* weightsBuffer = static_cast<const T*>(weightsTensor.GetMemoryArea());
75 const auto* betaBuffer = static_cast<const T*>(betaTensor.GetMemoryArea());
76 const auto* gammaBuffer = static_cast<const T*>(gammaTensor.GetMemoryArea());
77 const auto* meanBuffer = static_cast<const T*>(meanTensor.GetMemoryArea());
78 const auto* varBuffer = static_cast<const T*>(varTensor.GetMemoryArea());
79
80 std::vector<T> weightsVector (weightsBuffer, weightsBuffer + weightsTensor.GetNumElements());
81 std::vector<T> betaVector (betaBuffer, betaBuffer + betaTensor.GetNumElements());
82 std::vector<T> gammaVector (gammaBuffer, gammaBuffer + gammaTensor.GetNumElements());
83 std::vector<T> meanVector (meanBuffer, meanBuffer + meanTensor.GetNumElements());
84 std::vector<T> varianceVector(varBuffer, varBuffer + varTensor.GetNumElements());
85
86 // fusedWeights = ( gamma * weights ) / ( std - epsilon);
87 std::vector<T> fusedWeightsVector(weightsVector.size());
88
89 for (unsigned int cInput = 0; cInput < inputChannels; ++cInput)
90 {
91 for (unsigned int cOut = 0; cOut < outputChannels; ++cOut)
92 {
93 T mult = gammaVector[cOut] / static_cast<T>(sqrtf(varianceVector[cOut] + epsilon));
94
95 for (unsigned int h = 0; h < weightsHeight; ++h)
96 {
97 for (unsigned int w = 0; w < weightsWidth; ++w)
98 {
99 unsigned int weightsIdx = 0;
100
101 if (depthwise)
102 {
103 cInput = cOut / depthMultiplier;
104 weightsIdx = w * outputChannels + cOut +
105 h * weightsWidth * outputChannels;
106 }
107 else if (convDescriptor.m_DataLayout == DataLayout::NHWC)
108 {
109 weightsIdx = cOut * weightsHeight * weightsWidth * inputChannels +
110 h * weightsWidth * inputChannels +
111 w * inputChannels +
112 cInput;
113 }
114 else
115 {
116 weightsIdx = cOut * weightsWidth * weightsHeight * inputChannels +
117 cInput * weightsWidth * weightsHeight +
118 h * weightsWidth +
119 w;
120 }
121 fusedWeightsVector[weightsIdx] = mult * weightsVector[weightsIdx];
122 }
123 }
124 }
125 }
126 ConstTensor fusedWeightsTensor(weightsTensor.GetInfo(), fusedWeightsVector);
127
128 // fusedBias = (gamma * (bias - mean)) / (variance - epsilon) + beta;
129 std::vector<T> fusedBiasVector(outputChannels);
130 bool biasWasEnabledBeforeOpt = convDescriptor.m_BiasEnabled;
131 if (biasWasEnabledBeforeOpt)
132 {
133 ConstTensor biasTensor;
134 ARMNN_ASSERT_MSG(convLayer->GetInputSlots()[2].GetConnection() != nullptr,
135 "FuseBatchNorm: Bias data should not be null if bias is enabled.");
136
137 ConstantLayer* biasLayer = PolymorphicDowncast<ConstantLayer*>(
138 &base.GetInputSlot(2).GetConnectedOutputSlot()->GetOwningLayer());
139
140 biasTensor = ConstTensor(biasLayer->m_LayerOutput->GetTensorInfo(),
141 biasLayer->m_LayerOutput->Map(true));
142
143 const auto* biasBuffer = static_cast<const T*>(biasTensor.GetMemoryArea());
144 std::vector<T> biasVector(biasBuffer, biasBuffer + biasTensor.GetNumElements());
145
146 for (unsigned int cOut = 0; cOut < outputChannels; ++cOut)
147 {
148 fusedBiasVector[cOut] = ((gammaVector[cOut] * (biasVector[cOut] - meanVector[cOut])) /
149 sqrtf(varianceVector[cOut] + epsilon)) + betaVector[cOut];
150 }
151 }
152 else
153 {
154 convDescriptor.m_BiasEnabled = true;
155 std::vector<T> biasVector(outputChannels, T(0));
156
157 for (unsigned int cOut = 0; cOut < outputChannels; ++cOut)
158 {
159 fusedBiasVector[cOut] = ((gammaVector[cOut] * (biasVector[cOut] - meanVector[cOut])) /
160 sqrtf(varianceVector[cOut] + epsilon)) + betaVector[cOut];
161 }
162 }
163 ConstTensor fusedBiasTensor(TensorInfo({outputChannels}, ArmnnType, 0.0f, 0, true), fusedBiasVector);
164
165 // Insert the new convolution layer that has batch norm parameters fused into
166 const std::string name = std::string("fused-") + child.GetName() + std::string("-into-") + base.GetName();
167 auto& newConv2dLayer = *graph.InsertNewLayer<ConvLayer>(base.GetInputSlot(0),
168 convDescriptor,
169 name.c_str());
170
171 // Connect weights and bias from old to new Conv2d layer
172 // This optimization will always have 3 input slots on the Conv2d base layer
173 if (newConv2dLayer.GetNumInputSlots() > 1)
174 {
175 // Remove old connection and connect to new layer2d
176 weightLayer->GetOutputSlot(0).Disconnect(base.GetInputSlot(1));
177 weightLayer->GetOutputSlot(0).Connect(newConv2dLayer.GetInputSlot(1));
178 weightLayer->m_LayerOutput = std::make_unique<ScopedTensorHandle>(fusedWeightsTensor);
179
180 // Move bias const layers as normal if it was enabled before the optimisation
181 ConstantLayer* biasLayer;
182 if (biasWasEnabledBeforeOpt)
183 {
184 biasLayer = PolymorphicDowncast<ConstantLayer*>(
185 &base.GetInputSlot(2).GetConnectedOutputSlot()->GetOwningLayer());
186 // Remove old connection and connect to new layer2d
187 biasLayer->GetOutputSlot(0).Disconnect(base.GetInputSlot(2));
188 biasLayer->GetOutputSlot(0).Connect(newConv2dLayer.GetInputSlot(2));
189
190 }
191 // Otherwise create a new bias layer and add to the new convolution2d
192 else
193 {
194 // Add in bias constant layer
195 biasLayer = graph.AddLayer<ConstantLayer>("Bias");
196 biasLayer->GetOutputSlot(0).SetTensorInfo(fusedBiasTensor.GetInfo());
197 biasLayer->GetOutputSlot(0).Connect(newConv2dLayer.GetInputSlot(2));
198 }
199 biasLayer->m_LayerOutput = std::make_unique<ScopedTensorHandle>(ConstTensor(fusedBiasTensor));
200 }
201
202
203 // Reconnects with original parent.
204 newConv2dLayer.GetOutputSlot().MoveAllConnections(*parentOut);
205 // Parent is now the new convolution2d layer.
206 parentOut = &newConv2dLayer.GetOutputSlot();
207
208 // Moves connections in child output to parent layer.
209 // Child layer will be removed as it's left unconnected.
210 // Base layer will be removed if left unconnected.
211 child.GetOutputSlot().MoveAllConnections(*parentOut);
212 }
213 }
#define ARMNN_ASSERT(COND)
Definition Assert.hpp:14
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition Assert.hpp:15
void IgnoreUnused(Ts &&...)

References Graph::AddLayer(), ARMNN_ASSERT, ARMNN_ASSERT_MSG, armnn::BatchNormalization, OutputSlot::Connect(), armnn::Convolution2d, armnn::DepthwiseConvolution2d, OutputSlot::Disconnect(), DataLayoutIndexed::GetChannelsIndex(), InputSlot::GetConnectedOutputSlot(), Layer::GetDataType(), DataLayoutIndexed::GetHeightIndex(), BaseTensor< MemoryType >::GetInfo(), Layer::GetInputSlot(), BaseTensor< MemoryType >::GetMemoryArea(), Layer::GetName(), BaseTensor< MemoryType >::GetNumElements(), Layer::GetOutputSlot(), InputSlot::GetOwningLayer(), OutputSlot::GetOwningLayer(), TensorInfo::GetShape(), OutputSlot::GetTensorInfo(), Layer::GetType(), DataLayoutIndexed::GetWidthIndex(), armnn::IgnoreUnused(), Graph::InsertNewLayer(), BatchNormalizationDescriptor::m_Eps, ConstantLayer::m_LayerOutput, OutputSlot::MoveAllConnections(), armnn::NHWC, armnn::PolymorphicDowncast(), and OutputSlot::SetTensorInfo().


The documentation for this class was generated from the following file: