14 namespace optimizations
40 auto convLayer = PolymorphicDowncast<ConvLayer*>(&base);
41 auto batchNormLayer = PolymorphicDowncast<BatchNormalizationLayer*>(&child);
45 auto epsilon = batchNormDescriptor.
m_Eps;
48 ConstTensor betaTensor(batchNormLayer->m_Beta->GetTensorInfo(), batchNormLayer->m_Beta->Map(
true));
49 ConstTensor gammaTensor(batchNormLayer->m_Gamma->GetTensorInfo(), batchNormLayer->m_Gamma->Map(
true));
50 ConstTensor meanTensor(batchNormLayer->m_Mean->GetTensorInfo(), batchNormLayer->m_Mean->Map(
true));
51 ConstTensor varTensor(batchNormLayer->m_Variance->GetTensorInfo(), batchNormLayer->m_Variance->Map(
true));
53 auto convDescriptor = convLayer->GetParameters();
56 "FuseBatchNorm: Weight data should not be null.");
58 ConstantLayer* weightLayer = PolymorphicDowncast<ConstantLayer*>(
67 const unsigned int depthMultiplier = depthwise ? weightsShape[3] / inputChannels : 1;
68 const unsigned int outputChannels = depthwise ? weightsShape[3] : weightsShape[0];
69 const unsigned int weightsHeight = depthwise ? weightsShape[1] :
71 const unsigned int weightsWidth = depthwise ? weightsShape[2] :
74 const auto* weightsBuffer =
static_cast<const T*
>(weightsTensor.
GetMemoryArea());
75 const auto* betaBuffer =
static_cast<const T*
>(betaTensor.
GetMemoryArea());
76 const auto* gammaBuffer =
static_cast<const T*
>(gammaTensor.
GetMemoryArea());
77 const auto* meanBuffer =
static_cast<const T*
>(meanTensor.
GetMemoryArea());
78 const auto* varBuffer =
static_cast<const T*
>(varTensor.
GetMemoryArea());
80 std::vector<T> weightsVector (weightsBuffer, weightsBuffer + weightsTensor.
GetNumElements());
81 std::vector<T> betaVector (betaBuffer, betaBuffer + betaTensor.
GetNumElements());
82 std::vector<T> gammaVector (gammaBuffer, gammaBuffer + gammaTensor.
GetNumElements());
83 std::vector<T> meanVector (meanBuffer, meanBuffer + meanTensor.
GetNumElements());
84 std::vector<T> varianceVector(varBuffer, varBuffer + varTensor.
GetNumElements());
87 std::vector<T> fusedWeightsVector(weightsVector.size());
89 for (
unsigned int cInput = 0; cInput < inputChannels; ++cInput)
91 for (
unsigned int cOut = 0; cOut < outputChannels; ++cOut)
93 T mult = gammaVector[cOut] /
static_cast<T
>(sqrtf(varianceVector[cOut] + epsilon));
95 for (
unsigned int h = 0; h < weightsHeight; ++h)
97 for (
unsigned int w = 0; w < weightsWidth; ++w)
99 unsigned int weightsIdx = 0;
103 cInput = cOut / depthMultiplier;
104 weightsIdx = w * outputChannels + cOut +
105 h * weightsWidth * outputChannels;
109 weightsIdx = cOut * weightsHeight * weightsWidth * inputChannels +
110 h * weightsWidth * inputChannels +
116 weightsIdx = cOut * weightsWidth * weightsHeight * inputChannels +
117 cInput * weightsWidth * weightsHeight +
121 fusedWeightsVector[weightsIdx] = mult * weightsVector[weightsIdx];
129 std::vector<T> fusedBiasVector(outputChannels);
130 bool biasWasEnabledBeforeOpt = convDescriptor.m_BiasEnabled;
131 if (biasWasEnabledBeforeOpt)
135 "FuseBatchNorm: Bias data should not be null if bias is enabled.");
137 ConstantLayer* biasLayer = PolymorphicDowncast<ConstantLayer*>(
143 const auto* biasBuffer =
static_cast<const T*
>(biasTensor.
GetMemoryArea());
144 std::vector<T> biasVector(biasBuffer, biasBuffer + biasTensor.
GetNumElements());
146 for (
unsigned int cOut = 0; cOut < outputChannels; ++cOut)
148 fusedBiasVector[cOut] = ((gammaVector[cOut] * (biasVector[cOut] - meanVector[cOut])) /
149 sqrtf(varianceVector[cOut] + epsilon)) + betaVector[cOut];
154 convDescriptor.m_BiasEnabled =
true;
155 std::vector<T> biasVector(outputChannels, T(0));
157 for (
unsigned int cOut = 0; cOut < outputChannels; ++cOut)
159 fusedBiasVector[cOut] = ((gammaVector[cOut] * (biasVector[cOut] - meanVector[cOut])) /
160 sqrtf(varianceVector[cOut] + epsilon)) + betaVector[cOut];
163 ConstTensor fusedBiasTensor(
TensorInfo({outputChannels}, ArmnnType, 0.0f, 0,
true), fusedBiasVector);
166 const std::string name = std::string(
"fused-") + child.
GetName() + std::string(
"-into-") + base.
GetName();
173 if (newConv2dLayer.GetNumInputSlots() > 1)
178 weightLayer->
m_LayerOutput = std::make_unique<ScopedTensorHandle>(fusedWeightsTensor);
182 if (biasWasEnabledBeforeOpt)
184 biasLayer = PolymorphicDowncast<ConstantLayer*>(
204 newConv2dLayer.GetOutputSlot().MoveAllConnections(*parentOut);
206 parentOut = &newConv2dLayer.GetOutputSlot();
#define ARMNN_ASSERT(COND)
#define ARMNN_ASSERT_MSG(COND, MSG)
unsigned int GetNumElements() const
const TensorInfo & GetInfo() const
MemoryType GetMemoryArea() const
This layer represents a batch normalization operation.
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
A layer that the constant data can be bound to.
std::shared_ptr< ConstTensorHandle > m_LayerOutput
This layer represents a convolution 2d operation.
This layer represents a depthwise convolution 2d operation.
LayerT * InsertNewLayer(InputSlot &insertBefore, Args &&... args)
Inserts a new layer between the output slot currently connected to insertBefore and insertBefore itse...
LayerT * AddLayer(Args &&... args)
Adds a new layer, of type LayerType, to the graph constructed with the arguments passed.
const OutputSlot & GetOutputSlot(unsigned int index=0) const override
Get the const output slot handle by slot index.
const char * GetName() const override
Returns the name of the layer.
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
DataType GetDataType() const
void MoveAllConnections(OutputSlot &destination)
Moves all connections to another OutputSlot.
Layer & GetOwningLayer() const
void SetTensorInfo(const TensorInfo &tensorInfo) override
void Disconnect(InputSlot &slot)
const TensorInfo & GetTensorInfo() const override
int Connect(InputSlot &destination)
const TensorShape & GetShape() const
void Run(Graph &graph, InputSlot &connection) const
Run for every exclusive connection between any base Convolution layer and a child BatchNorm layer for...
Provides access to the appropriate indexes for Channels, Height and Width based on DataLayout.
unsigned int GetWidthIndex() const
unsigned int GetHeightIndex() const
unsigned int GetChannelsIndex() const
Copyright (c) 2021 ARM Limited and Contributors.
typename ResolveTypeImpl< DT >::Type ResolveType
void IgnoreUnused(Ts &&...)
A BatchNormalizationDescriptor for the BatchNormalizationLayer.
float m_Eps
Value to add to the variance. Used to avoid dividing by zero.