40 #include <arm_compute/core/Types.h>
41 #include <arm_compute/runtime/Allocator.h>
54 return std::make_unique<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
61 return std::make_unique<NeonWorkloadFactory>(
62 PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager));
68 return std::make_unique<NeonWorkloadFactory>(
75 auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
80 auto factory = std::make_unique<NeonTensorHandleFactory>(memoryManager);
87 return std::make_unique<NeonWorkloadFactory>(
88 PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager));
94 auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
99 auto factory = std::make_unique<NeonTensorHandleFactory>(memoryManager);
105 return std::make_unique<NeonWorkloadFactory>(
149 auto it = subgraph.
end();
150 std::map<LayerGuid, Layer*> untouched;
152 while (it != subgraph.
begin())
155 Layer& base = *(PolymorphicDowncast<Layer*>(*it));
156 untouched.insert({base.
GetGuid(), &base});
160 while (it != subgraph.
begin())
163 Layer& base = *(PolymorphicDowncast<Layer*>(*it));
175 if (output->GetNumConnections() == 1)
177 for (
auto&& childInput : output->GetConnections())
180 (checkDataTypeInputandOutput(childInput->GetOwningLayer())))
182 Layer& child = childInput->GetOwningLayer();
184 auto* activationLayer = PolymorphicDowncast<ActivationLayer*>(&child);
186 const std::string name = std::string(
"fused-") + child.
GetName() + std::string(
"-into-") +
205 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
214 FuseConvolution2dLayer<Convolution2dLayer>(optimizationViews,
219 untouched.erase(baseLayer->
GetGuid());
220 untouched.erase(activationLayer->GetGuid());
226 PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&base);
237 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
245 FuseDepthwiseConvolution2dLayer<DepthwiseConvolution2dLayer>(optimizationViews,
250 untouched.erase(baseLayer->
GetGuid());
251 untouched.erase(activationLayer->GetGuid());
268 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
276 FuseFullyConnectedLayer<FullyConnectedLayer>(optimizationViews,
281 untouched.erase(baseLayer->
GetGuid());
282 untouched.erase(activationLayer->GetGuid());
288 PolymorphicDowncast<BatchNormalizationLayer*>(&base);
292 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
293 baseLayer->
m_Mean->GetTensorInfo(),
295 baseLayer->
m_Beta->GetTensorInfo(),
296 baseLayer->
m_Gamma->GetTensorInfo(),
303 FuseBatchNormalizationLayer<BatchNormalizationLayer>(optimizationViews,
309 replacementLayer->
m_Beta = std::move(baseLayer->
m_Beta);
311 replacementLayer->
m_Mean = std::move(baseLayer->
m_Mean);
313 untouched.erase(baseLayer->
GetGuid());
314 untouched.erase(activationLayer->GetGuid());
319 AdditionLayer* baseLayer = PolymorphicDowncast<AdditionLayer*>(&base);
324 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
329 FuseAdditionLayer<AdditionLayer>(optimizationViews,
334 untouched.erase(baseLayer->
GetGuid());
335 untouched.erase(activationLayer->GetGuid());
340 DivisionLayer* baseLayer = PolymorphicDowncast<DivisionLayer*>(&base);
345 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
350 FuseDivisionLayer<DivisionLayer>(optimizationViews,
355 untouched.erase(baseLayer->
GetGuid());
356 untouched.erase(activationLayer->GetGuid());
366 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
371 FuseMultiplicationLayer<MultiplicationLayer>(optimizationViews,
376 untouched.erase(baseLayer->
GetGuid());
377 untouched.erase(activationLayer->GetGuid());
382 SubtractionLayer* baseLayer = PolymorphicDowncast<SubtractionLayer*>(&base);
387 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
392 FuseSubtractionLayer<SubtractionLayer>(optimizationViews,
397 untouched.erase(baseLayer->
GetGuid());
398 untouched.erase(activationLayer->GetGuid());
410 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
415 FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
421 untouched.erase(baseLayer->
GetGuid());
422 untouched.erase(activationLayer->GetGuid());
430 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
435 FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
441 untouched.erase(baseLayer->
GetGuid());
442 untouched.erase(activationLayer->GetGuid());
450 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
455 FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
461 untouched.erase(baseLayer->
GetGuid());
462 untouched.erase(activationLayer->GetGuid());
470 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
475 FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
481 untouched.erase(baseLayer->
GetGuid());
482 untouched.erase(activationLayer->GetGuid());
496 ReduceLayer* baseLayer = PolymorphicDowncast<ReduceLayer*>(&base);
499 if (!reduceDescriptor.
m_vAxis.empty() && reduceDescriptor.
m_vAxis.size() > 1)
502 std::vector<IConnectableLayer*> layers = ChainReduceLayers<ReduceLayer>(optimizationViews,
507 ReplaceLayers<ReduceLayer>(optimizationViews, baseLayer, layers);
508 untouched.erase(baseLayer->
GetGuid());
515 ReshapeLayer* baseLayer = PolymorphicDowncast<ReshapeLayer*>(&base);
531 Layer* layerList[4] = {
nullptr,
nullptr,
nullptr,
nullptr};
534 if (IsLayerSequence<BinaryOperation>(base,
540 bool fuseReLu =
false;
541 unsigned int numInputs = 0;
542 unsigned int numOutputs = 0;
543 std::vector<TensorInfo> inputInfos;
544 std::vector<TensorInfo> outputInfos;
547 if (BuildAddMulAddTensorInfoLists<Layer>(layerList,
552 activationDescriptor,
558 {outputInfos.begin(), outputInfos.end()},
560 activationDescriptor);
563 std::string fusedName;
571 FusedLayer* addMulAddFusedLayer = PolymorphicDowncast<FusedLayer*>(addMulAddLayer);
573 std::make_shared<ActivationDescriptor>(*activationDescriptor));
577 std::vector<IConnectableLayer*> originalLayers;
578 for (
unsigned int layerIdx = 0; layerIdx < 4; ++layerIdx)
580 if (layerList[layerIdx])
582 originalLayers.push_back(layerList[layerIdx]);
586 std::vector<SlotList> inputLayersSlotLists, outputLayersSlotLists;
587 BuildAddMulAddSlotLists<SlotList>(fuseReLu,
588 outputInfos.size() > 1,
589 inputLayersSlotLists,
590 outputLayersSlotLists);
592 ReplaceMultipleLayers<FusedLayer>(optimizationViews,
594 PolymorphicDowncast<FusedLayer*>(addMulAddLayer),
595 inputLayersSlotLists,
596 outputLayersSlotLists);
599 for (
unsigned int layerIdx = 0; layerIdx < 4; ++layerIdx)
601 if (layerList[layerIdx])
603 untouched.erase(layerList[layerIdx]->GetGuid());
620 return optimizationViews;
630 auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
635 auto factory = std::make_unique<NeonTensorHandleFactory>(memoryManager);
644 return std::make_unique<DefaultAllocator>();