39 #include <arm_compute/core/Types.h>
40 #include <arm_compute/runtime/CL/CLBufferAllocator.h>
57 return std::make_unique<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
63 return std::make_unique<ClWorkloadFactory>(
64 PolymorphicPointerDowncast<ClMemoryManager>(memoryManager));
70 return std::make_unique<ClWorkloadFactory>(
77 std::shared_ptr<ClMemoryManager> memoryManager;
84 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
87 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
88 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
98 return std::make_unique<ClWorkloadFactory>(
99 PolymorphicPointerDowncast<ClMemoryManager>(memoryManager));
105 std::shared_ptr<ClMemoryManager> memoryManager;
112 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
115 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
116 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
126 return std::make_unique<ClWorkloadFactory>(
145 std::shared_ptr<ClMemoryManager> memoryManager;
152 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
155 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
156 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
157 inputFlags, outputFlags);
166 return std::make_unique<ClWorkloadFactory>(
178 std::shared_ptr<ClMemoryManager> memoryManager;
185 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
188 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
189 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
214 std::shared_ptr<ClMemoryManager> memoryManager;
221 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
224 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
225 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
226 inputFlags, outputFlags);
273 return std::make_unique<ClBackendDefaultAllocator>();
281 {
"NonConstWeights",
true},
282 {
"AsyncExecution",
false},
283 {
"ProtectedContentAllocation",
true},
284 {
"ConstantTensorsAsInputs",
true},
285 {
"PreImportIOTensors",
false},
286 {
"ExternallyManagedMemory",
true},
287 {
"MultiAxisPacking",
false},
288 {
"SingleAxisPacking",
true},
289 {
"HasFp16", arm_compute::CLKernelLibrary::get().fp16_supported()}
298 auto it = subgraph.
end();
299 bool isFastMathEnabled =
false;
300 std::map<LayerGuid, Layer*> untouched;
302 while (it != subgraph.
begin())
305 Layer& base = *(PolymorphicDowncast<Layer*>(*it));
306 untouched.insert({base.
GetGuid(), &base});
310 #if defined(ARMCOMPUTECL_ENABLED)
322 while (it != subgraph.
begin())
325 Layer& base = *(PolymorphicDowncast<Layer*>(*it));
337 if (output->GetNumConnections() == 1)
339 for (
auto&& childInput : output->GetConnections())
342 (checkDataTypeInputandOutput(childInput->GetOwningLayer())))
344 Layer& child = childInput->GetOwningLayer();
346 auto* activationLayer = PolymorphicDowncast<ActivationLayer*>(&child);
348 const std::string name = std::string(
"fused-") + child.
GetName() + std::string(
"-into-") +
367 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
376 FuseConvolution2dLayer<Convolution2dLayer>(optimizationViews,
381 untouched.erase(baseLayer->
GetGuid());
382 untouched.erase(activationLayer->GetGuid());
388 PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&base);
399 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
407 FuseDepthwiseConvolution2dLayer<DepthwiseConvolution2dLayer>(optimizationViews,
412 untouched.erase(baseLayer->
GetGuid());
413 untouched.erase(activationLayer->GetGuid());
430 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
438 FuseFullyConnectedLayer<FullyConnectedLayer>(optimizationViews,
443 untouched.erase(baseLayer->
GetGuid());
444 untouched.erase(activationLayer->GetGuid());
450 PolymorphicDowncast<BatchNormalizationLayer*>(&base);
454 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
455 baseLayer->
m_Mean->GetTensorInfo(),
457 baseLayer->
m_Beta->GetTensorInfo(),
458 baseLayer->
m_Gamma->GetTensorInfo(),
465 FuseBatchNormalizationLayer<BatchNormalizationLayer>(optimizationViews,
471 replacementLayer->
m_Beta = std::move(baseLayer->
m_Beta);
473 replacementLayer->
m_Mean = std::move(baseLayer->
m_Mean);
476 untouched.erase(baseLayer->
GetGuid());
477 untouched.erase(activationLayer->GetGuid());
482 AdditionLayer* baseLayer = PolymorphicDowncast<AdditionLayer*>(&base);
487 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
492 FuseAdditionLayer<AdditionLayer>(optimizationViews,
498 untouched.erase(baseLayer->
GetGuid());
499 untouched.erase(activationLayer->GetGuid());
504 DivisionLayer* baseLayer = PolymorphicDowncast<DivisionLayer*>(&base);
509 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
514 FuseDivisionLayer<DivisionLayer>(optimizationViews,
519 untouched.erase(baseLayer->
GetGuid());
520 untouched.erase(activationLayer->GetGuid());
530 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
535 FuseMultiplicationLayer<MultiplicationLayer>(optimizationViews,
540 untouched.erase(baseLayer->
GetGuid());
541 untouched.erase(activationLayer->GetGuid());
546 SubtractionLayer* baseLayer = PolymorphicDowncast<SubtractionLayer*>(&base);
551 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
556 FuseSubtractionLayer<SubtractionLayer>(optimizationViews,
561 untouched.erase(baseLayer->
GetGuid());
562 untouched.erase(activationLayer->GetGuid());
574 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
579 FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
585 untouched.erase(baseLayer->
GetGuid());
586 untouched.erase(activationLayer->GetGuid());
594 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
599 FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
605 untouched.erase(baseLayer->
GetGuid());
606 untouched.erase(activationLayer->GetGuid());
614 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
619 FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
625 untouched.erase(baseLayer->
GetGuid());
626 untouched.erase(activationLayer->GetGuid());
634 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
639 FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
645 untouched.erase(baseLayer->
GetGuid());
646 untouched.erase(activationLayer->GetGuid());
660 ReduceLayer* baseLayer = PolymorphicDowncast<ReduceLayer*>(&base);
663 if (!reduceDescriptor.
m_vAxis.empty() && reduceDescriptor.
m_vAxis.size() > 1)
666 std::vector<IConnectableLayer*> layers = ChainReduceLayers<ReduceLayer>(optimizationViews,
671 ReplaceLayers<ReduceLayer>(optimizationViews, baseLayer, layers);
672 untouched.erase(baseLayer->
GetGuid());
679 ReshapeLayer* baseLayer = PolymorphicDowncast<ReshapeLayer*>(&base);
693 Pooling2dLayer* baseLayer = PolymorphicDowncast<Pooling2dLayer*>(&base);
698 PadLayer* padLayer = PolymorphicDowncast<PadLayer*>(
706 FoldPadIntoAveragePool2d<Pooling2dLayer>(optimizationViews, baseLayer,
707 poolingDescriptor, padLayer);
708 untouched.erase(baseLayer->
GetGuid());
709 untouched.erase(padLayer->
GetGuid());
724 return optimizationViews;