38 #include <arm_compute/core/Types.h>
39 #include <arm_compute/runtime/CL/CLBufferAllocator.h>
56 return std::make_unique<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
62 return std::make_unique<ClWorkloadFactory>(
63 PolymorphicPointerDowncast<ClMemoryManager>(memoryManager));
69 return std::make_unique<ClWorkloadFactory>(
76 std::shared_ptr<ClMemoryManager> memoryManager;
83 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
86 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
87 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
97 return std::make_unique<ClWorkloadFactory>(
98 PolymorphicPointerDowncast<ClMemoryManager>(memoryManager));
104 std::shared_ptr<ClMemoryManager> memoryManager;
111 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
114 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
115 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
125 return std::make_unique<ClWorkloadFactory>(
144 std::shared_ptr<ClMemoryManager> memoryManager;
151 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
154 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
155 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
156 inputFlags, outputFlags);
165 return std::make_unique<ClWorkloadFactory>(
177 std::shared_ptr<ClMemoryManager> memoryManager;
184 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
187 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
188 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
213 std::shared_ptr<ClMemoryManager> memoryManager;
220 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
223 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
224 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
225 inputFlags, outputFlags);
272 return std::make_unique<ClBackendDefaultAllocator>();
280 {
"NonConstWeights",
true},
281 {
"AsyncExecution",
false},
282 {
"ProtectedContentAllocation",
true},
283 {
"ConstantTensorsAsInputs",
true},
284 {
"PreImportIOTensors",
false},
285 {
"ExternallyManagedMemory",
true},
286 {
"MultiAxisPacking",
false},
287 {
"SingleAxisPacking",
true},
288 {
"HasFp16", arm_compute::CLKernelLibrary::get().fp16_supported()}
297 auto it = subgraph.
end();
298 bool isFastMathEnabled =
false;
299 std::map<LayerGuid, Layer*> untouched;
301 while (it != subgraph.
begin())
304 Layer& base = *(PolymorphicDowncast<Layer*>(*it));
305 untouched.insert({base.
GetGuid(), &base});
309 #if defined(ARMCOMPUTECL_ENABLED)
321 while (it != subgraph.
begin())
324 Layer& base = *(PolymorphicDowncast<Layer*>(*it));
336 if (output->GetNumConnections() == 1)
338 for (
auto&& childInput : output->GetConnections())
341 (checkDataTypeInputandOutput(childInput->GetOwningLayer())))
343 Layer& child = childInput->GetOwningLayer();
345 auto* activationLayer = PolymorphicDowncast<ActivationLayer*>(&child);
347 const std::string name = std::string(
"fused-") + child.
GetName() + std::string(
"-into-") +
366 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
375 FuseConvolution2dLayer<Convolution2dLayer>(optimizationViews,
380 untouched.erase(baseLayer->
GetGuid());
381 untouched.erase(activationLayer->GetGuid());
387 PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&base);
398 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
406 FuseDepthwiseConvolution2dLayer<DepthwiseConvolution2dLayer>(optimizationViews,
411 untouched.erase(baseLayer->
GetGuid());
412 untouched.erase(activationLayer->GetGuid());
429 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
437 FuseFullyConnectedLayer<FullyConnectedLayer>(optimizationViews,
442 untouched.erase(baseLayer->
GetGuid());
443 untouched.erase(activationLayer->GetGuid());
449 PolymorphicDowncast<BatchNormalizationLayer*>(&base);
453 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
454 baseLayer->
m_Mean->GetTensorInfo(),
456 baseLayer->
m_Beta->GetTensorInfo(),
457 baseLayer->
m_Gamma->GetTensorInfo(),
464 FuseBatchNormalizationLayer<BatchNormalizationLayer>(optimizationViews,
470 replacementLayer->
m_Beta = std::move(baseLayer->
m_Beta);
472 replacementLayer->
m_Mean = std::move(baseLayer->
m_Mean);
475 untouched.erase(baseLayer->
GetGuid());
476 untouched.erase(activationLayer->GetGuid());
481 AdditionLayer* baseLayer = PolymorphicDowncast<AdditionLayer*>(&base);
486 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
491 FuseAdditionLayer<AdditionLayer>(optimizationViews,
497 untouched.erase(baseLayer->
GetGuid());
498 untouched.erase(activationLayer->GetGuid());
503 DivisionLayer* baseLayer = PolymorphicDowncast<DivisionLayer*>(&base);
508 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
513 FuseDivisionLayer<DivisionLayer>(optimizationViews,
518 untouched.erase(baseLayer->
GetGuid());
519 untouched.erase(activationLayer->GetGuid());
529 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
534 FuseMultiplicationLayer<MultiplicationLayer>(optimizationViews,
539 untouched.erase(baseLayer->
GetGuid());
540 untouched.erase(activationLayer->GetGuid());
545 SubtractionLayer* baseLayer = PolymorphicDowncast<SubtractionLayer*>(&base);
550 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
555 FuseSubtractionLayer<SubtractionLayer>(optimizationViews,
560 untouched.erase(baseLayer->
GetGuid());
561 untouched.erase(activationLayer->GetGuid());
573 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
578 FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
584 untouched.erase(baseLayer->
GetGuid());
585 untouched.erase(activationLayer->GetGuid());
593 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
598 FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
604 untouched.erase(baseLayer->
GetGuid());
605 untouched.erase(activationLayer->GetGuid());
613 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
618 FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
624 untouched.erase(baseLayer->
GetGuid());
625 untouched.erase(activationLayer->GetGuid());
633 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
638 FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
644 untouched.erase(baseLayer->
GetGuid());
645 untouched.erase(activationLayer->GetGuid());
659 ReduceLayer* baseLayer = PolymorphicDowncast<ReduceLayer*>(&base);
662 if (!reduceDescriptor.
m_vAxis.empty() && reduceDescriptor.
m_vAxis.size() > 1)
665 std::vector<IConnectableLayer*> layers = ChainReduceLayers<ReduceLayer>(optimizationViews,
670 ReplaceLayers<ReduceLayer>(optimizationViews, baseLayer, layers);
671 untouched.erase(baseLayer->
GetGuid());
678 ReshapeLayer* baseLayer = PolymorphicDowncast<ReshapeLayer*>(&base);
692 Pooling2dLayer* baseLayer = PolymorphicDowncast<Pooling2dLayer*>(&base);
697 PadLayer* padLayer = PolymorphicDowncast<PadLayer*>(
705 FoldPadIntoAveragePool2d<Pooling2dLayer>(optimizationViews, baseLayer,
706 poolingDescriptor, padLayer);
707 untouched.erase(baseLayer->
GetGuid());
708 untouched.erase(padLayer->
GetGuid());
723 return optimizationViews;