14 namespace optimizations
40 auto convLayer = PolymorphicDowncast<ConvLayer*>(&base);
41 auto batchNormLayer = PolymorphicDowncast<BatchNormalizationLayer*>(&child);
45 auto epsilon = batchNormDescriptor.
m_Eps;
48 ConstTensor betaTensor(batchNormLayer->m_Beta->GetTensorInfo(), batchNormLayer->m_Beta->Map(
true));
49 ConstTensor gammaTensor(batchNormLayer->m_Gamma->GetTensorInfo(), batchNormLayer->m_Gamma->Map(
true));
50 ConstTensor meanTensor(batchNormLayer->m_Mean->GetTensorInfo(), batchNormLayer->m_Mean->Map(
true));
51 ConstTensor varTensor(batchNormLayer->m_Variance->GetTensorInfo(), batchNormLayer->m_Variance->Map(
true));
53 auto convDescriptor = convLayer->GetParameters();
56 "FuseBatchNorm: Weight data should not be null.");
58 ConstantLayer* weightLayer = PolymorphicDowncast<ConstantLayer*>(
67 const unsigned int depthMultiplier = depthwise ? weightsShape[3] / inputChannels : 1;
68 const unsigned int outputChannels = depthwise ? weightsShape[3] : weightsShape[0];
69 const unsigned int weightsHeight = depthwise ? weightsShape[1] :
71 const unsigned int weightsWidth = depthwise ? weightsShape[2] :
74 const auto* weightsBuffer =
static_cast<const T*
>(weightsTensor.
GetMemoryArea());
75 const auto* betaBuffer =
static_cast<const T*
>(betaTensor.
GetMemoryArea());
76 const auto* gammaBuffer =
static_cast<const T*
>(gammaTensor.
GetMemoryArea());
77 const auto* meanBuffer =
static_cast<const T*
>(meanTensor.
GetMemoryArea());
78 const auto* varBuffer =
static_cast<const T*
>(varTensor.
GetMemoryArea());
80 std::vector<T> weightsVector (weightsBuffer, weightsBuffer + weightsTensor.
GetNumElements());
81 std::vector<T> betaVector (betaBuffer, betaBuffer + betaTensor.
GetNumElements());
82 std::vector<T> gammaVector (gammaBuffer, gammaBuffer + gammaTensor.
GetNumElements());
83 std::vector<T> meanVector (meanBuffer, meanBuffer + meanTensor.
GetNumElements());
84 std::vector<T> varianceVector(varBuffer, varBuffer + varTensor.
GetNumElements());
87 std::vector<T> fusedWeightsVector(weightsVector.size());
89 for (
unsigned int cInput = 0; cInput < inputChannels; ++cInput)
91 for (
unsigned int cOut = 0; cOut < outputChannels; ++cOut)
93 T mult = gammaVector[cOut] /
static_cast<T
>(sqrtf(varianceVector[cOut] + epsilon));
95 for (
unsigned int h = 0; h < weightsHeight; ++h)
97 for (
unsigned int w = 0; w < weightsWidth; ++w)
99 unsigned int weightsIdx = 0;
103 cInput = cOut / depthMultiplier;
104 weightsIdx = w * outputChannels + cOut +
105 h * weightsWidth * outputChannels;
109 weightsIdx = cOut * weightsHeight * weightsWidth * inputChannels +
110 h * weightsWidth * inputChannels +
116 weightsIdx = cOut * weightsWidth * weightsHeight * inputChannels +
117 cInput * weightsWidth * weightsHeight +
121 fusedWeightsVector[weightsIdx] = mult * weightsVector[weightsIdx];
129 std::vector<T> fusedBiasVector(outputChannels);
130 bool biasWasEnabledBeforeOpt = convDescriptor.m_BiasEnabled;
131 if (biasWasEnabledBeforeOpt)
135 "FuseBatchNorm: Bias data should not be null if bias is enabled.");
137 ConstantLayer* biasLayer = PolymorphicDowncast<ConstantLayer*>(
143 const auto* biasBuffer =
static_cast<const T*
>(biasTensor.
GetMemoryArea());
144 std::vector<T> biasVector(biasBuffer, biasBuffer + biasTensor.
GetNumElements());
146 for (
unsigned int cOut = 0; cOut < outputChannels; ++cOut)
148 fusedBiasVector[cOut] = ((gammaVector[cOut] * (biasVector[cOut] - meanVector[cOut])) /
149 sqrtf(varianceVector[cOut] + epsilon)) + betaVector[cOut];
154 convDescriptor.m_BiasEnabled =
true;
155 std::vector<T> biasVector(outputChannels, T(0));
157 for (
unsigned int cOut = 0; cOut < outputChannels; ++cOut)
159 fusedBiasVector[cOut] = ((gammaVector[cOut] * (biasVector[cOut] - meanVector[cOut])) /
160 sqrtf(varianceVector[cOut] + epsilon)) + betaVector[cOut];
163 ConstTensor fusedBiasTensor(
TensorInfo({outputChannels}, ArmnnType, 0.0f, 0,
true), fusedBiasVector);
166 const std::string name = std::string(
"fused-") + child.
GetName() + std::string(
"-into-") + base.
GetName();
173 if (newConv2dLayer.GetNumInputSlots() > 1)
178 weightLayer->
m_LayerOutput = std::make_unique<ScopedTensorHandle>(fusedWeightsTensor);
182 if (biasWasEnabledBeforeOpt)
184 biasLayer = PolymorphicDowncast<ConstantLayer*>(
204 newConv2dLayer.GetOutputSlot().MoveAllConnections(*parentOut);
206 parentOut = &newConv2dLayer.GetOutputSlot();