149 auto it = subgraph.
end();
150 std::map<LayerGuid, Layer*> untouched;
152 while (it != subgraph.
begin())
155 Layer& base = *(PolymorphicDowncast<Layer*>(*it));
156 untouched.insert({base.
GetGuid(), &base});
160 while (it != subgraph.
begin())
163 Layer& base = *(PolymorphicDowncast<Layer*>(*it));
175 if (output->GetNumConnections() == 1)
177 for (
auto&& childInput : output->GetConnections())
180 (checkDataTypeInputandOutput(childInput->GetOwningLayer())))
182 Layer& child = childInput->GetOwningLayer();
184 auto* activationLayer = PolymorphicDowncast<ActivationLayer*>(&child);
187 if (untouched.find(activationLayer->GetGuid()) == untouched.end())
192 const std::string name = std::string(
"fused-") + child.
GetName() + std::string(
"-into-") +
211 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
220 FuseConvolution2dLayer<Convolution2dLayer>(optimizationViews,
225 untouched.erase(baseLayer->
GetGuid());
226 untouched.erase(activationLayer->GetGuid());
232 PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&base);
243 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
251 FuseDepthwiseConvolution2dLayer<DepthwiseConvolution2dLayer>(optimizationViews,
256 untouched.erase(baseLayer->
GetGuid());
257 untouched.erase(activationLayer->GetGuid());
274 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
282 FuseFullyConnectedLayer<FullyConnectedLayer>(optimizationViews,
287 untouched.erase(baseLayer->
GetGuid());
288 untouched.erase(activationLayer->GetGuid());
294 PolymorphicDowncast<BatchNormalizationLayer*>(&base);
298 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
299 baseLayer->
m_Mean->GetTensorInfo(),
301 baseLayer->
m_Beta->GetTensorInfo(),
302 baseLayer->
m_Gamma->GetTensorInfo(),
309 FuseBatchNormalizationLayer<BatchNormalizationLayer>(optimizationViews,
315 replacementLayer->
m_Beta = std::move(baseLayer->
m_Beta);
317 replacementLayer->
m_Mean = std::move(baseLayer->
m_Mean);
319 untouched.erase(baseLayer->
GetGuid());
320 untouched.erase(activationLayer->GetGuid());
325 AdditionLayer* baseLayer = PolymorphicDowncast<AdditionLayer*>(&base);
330 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
335 FuseAdditionLayer<AdditionLayer>(optimizationViews,
340 untouched.erase(baseLayer->
GetGuid());
341 untouched.erase(activationLayer->GetGuid());
346 DivisionLayer* baseLayer = PolymorphicDowncast<DivisionLayer*>(&base);
351 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
356 FuseDivisionLayer<DivisionLayer>(optimizationViews,
361 untouched.erase(baseLayer->
GetGuid());
362 untouched.erase(activationLayer->GetGuid());
372 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
377 FuseMultiplicationLayer<MultiplicationLayer>(optimizationViews,
382 untouched.erase(baseLayer->
GetGuid());
383 untouched.erase(activationLayer->GetGuid());
388 SubtractionLayer* baseLayer = PolymorphicDowncast<SubtractionLayer*>(&base);
393 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
398 FuseSubtractionLayer<SubtractionLayer>(optimizationViews,
403 untouched.erase(baseLayer->
GetGuid());
404 untouched.erase(activationLayer->GetGuid());
416 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
421 FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
427 untouched.erase(baseLayer->
GetGuid());
428 untouched.erase(activationLayer->GetGuid());
436 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
441 FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
447 untouched.erase(baseLayer->
GetGuid());
448 untouched.erase(activationLayer->GetGuid());
456 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
461 FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
467 untouched.erase(baseLayer->
GetGuid());
468 untouched.erase(activationLayer->GetGuid());
476 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
481 FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
487 untouched.erase(baseLayer->
GetGuid());
488 untouched.erase(activationLayer->GetGuid());
502 ReduceLayer* baseLayer = PolymorphicDowncast<ReduceLayer*>(&base);
505 if (!reduceDescriptor.
m_vAxis.empty() && reduceDescriptor.
m_vAxis.size() > 1)
508 std::vector<IConnectableLayer*> layers = ChainReduceLayers<ReduceLayer>(optimizationViews,
513 ReplaceLayers<ReduceLayer>(optimizationViews, baseLayer, layers);
514 untouched.erase(baseLayer->
GetGuid());
521 ReshapeLayer* baseLayer = PolymorphicDowncast<ReshapeLayer*>(&base);
532 Layer* layerList[4] = {
nullptr,
nullptr,
nullptr,
nullptr};
535 if (IsLayerSequence<BinaryOperation>(base,
541 bool fuseReLu =
false;
542 unsigned int numInputs = 0;
543 unsigned int numOutputs = 0;
544 std::vector<TensorInfo> inputInfos;
545 std::vector<TensorInfo> outputInfos;
548 if (BuildAddMulAddTensorInfoLists<Layer>(layerList,
553 activationDescriptor,
559 {outputInfos.begin(), outputInfos.end()},
561 activationDescriptor);
564 std::string fusedName;
572 FusedLayer* addMulAddFusedLayer = PolymorphicDowncast<FusedLayer*>(addMulAddLayer);
574 std::make_shared<ActivationDescriptor>(*activationDescriptor));
578 std::vector<IConnectableLayer*> originalLayers;
579 for (
unsigned int layerIdx = 0; layerIdx < 4; ++layerIdx)
581 if (layerList[layerIdx])
583 originalLayers.push_back(layerList[layerIdx]);
587 std::vector<SlotList> inputLayersSlotLists, outputLayersSlotLists;
588 BuildAddMulAddSlotLists<SlotList>(fuseReLu,
589 outputInfos.size() > 1,
590 inputLayersSlotLists,
591 outputLayersSlotLists);
593 ReplaceMultipleLayers<FusedLayer>(optimizationViews,
595 PolymorphicDowncast<FusedLayer*>(addMulAddLayer),
596 inputLayersSlotLists,
597 outputLayersSlotLists);
600 for (
unsigned int layerIdx = 0; layerIdx < 4; ++layerIdx)
602 if (layerList[layerIdx])
604 untouched.erase(layerList[layerIdx]->GetGuid());
621 return optimizationViews;