38#include <arm_compute/core/Types.h>
39#include <arm_compute/runtime/CL/CLBufferAllocator.h>
56 return std::make_unique<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
62 return std::make_unique<ClWorkloadFactory>(
69 return std::make_unique<ClWorkloadFactory>(
76 std::shared_ptr<ClMemoryManager> memoryManager;
83 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
86 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
87 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
97 return std::make_unique<ClWorkloadFactory>(
104 std::shared_ptr<ClMemoryManager> memoryManager;
111 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
114 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
115 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
125 return std::make_unique<ClWorkloadFactory>(
144 std::shared_ptr<ClMemoryManager> memoryManager;
151 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
154 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
155 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
156 inputFlags, outputFlags);
165 return std::make_unique<ClWorkloadFactory>(
177 std::shared_ptr<ClMemoryManager> memoryManager;
184 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
187 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
188 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
213 std::shared_ptr<ClMemoryManager> memoryManager;
220 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
223 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
224 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
225 inputFlags, outputFlags);
272 return std::make_unique<ClBackendDefaultAllocator>();
280 {
"NonConstWeights",
true},
281 {
"ProtectedContentAllocation",
true},
282 {
"ConstantTensorsAsInputs",
true},
283 {
"PreImportIOTensors",
false},
284 {
"ExternallyManagedMemory",
true},
285 {
"MultiAxisPacking",
false},
286 {
"SingleAxisPacking",
true},
287 {
"AllOrNothing",
false},
288 {
"HasFp16", arm_compute::CLKernelLibrary::get().fp16_supported()}
297 auto it = subgraph.
end();
298 bool isFastMathEnabled =
false;
299 std::map<LayerGuid, Layer*> untouched;
301 while (it != subgraph.
begin())
305 untouched.insert({base.
GetGuid(), &base});
309#if defined(ARMCOMPUTECL_ENABLED)
317 isFastMathEnabled = clModelOptions->IsFastMathEnabled();
321 while (it != subgraph.
begin())
336 if (output->GetNumConnections() == 1)
338 for (
auto&& childInput : output->GetConnections())
341 (checkDataTypeInputandOutput(childInput->GetOwningLayer())))
343 Layer& child = childInput->GetOwningLayer();
348 if (untouched.find(activationLayer->GetGuid()) == untouched.end())
354 const std::string name = std::string(
"fused-") + child.
GetName() + std::string(
"-into-") +
373 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
387 untouched.erase(baseLayer->
GetGuid());
388 untouched.erase(activationLayer->GetGuid());
405 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
418 untouched.erase(baseLayer->
GetGuid());
419 untouched.erase(activationLayer->GetGuid());
436 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
449 untouched.erase(baseLayer->
GetGuid());
450 untouched.erase(activationLayer->GetGuid());
460 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
461 baseLayer->
m_Mean->GetTensorInfo(),
463 baseLayer->
m_Beta->GetTensorInfo(),
464 baseLayer->
m_Gamma->GetTensorInfo(),
477 replacementLayer->
m_Beta = std::move(baseLayer->
m_Beta);
479 replacementLayer->
m_Mean = std::move(baseLayer->
m_Mean);
482 untouched.erase(baseLayer->
GetGuid());
483 untouched.erase(activationLayer->GetGuid());
493 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
504 untouched.erase(baseLayer->
GetGuid());
505 untouched.erase(activationLayer->GetGuid());
515 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
525 untouched.erase(baseLayer->
GetGuid());
526 untouched.erase(activationLayer->GetGuid());
536 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
546 untouched.erase(baseLayer->
GetGuid());
547 untouched.erase(activationLayer->GetGuid());
557 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
567 untouched.erase(baseLayer->
GetGuid());
568 untouched.erase(activationLayer->GetGuid());
580 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
591 untouched.erase(baseLayer->
GetGuid());
592 untouched.erase(activationLayer->GetGuid());
600 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
611 untouched.erase(baseLayer->
GetGuid());
612 untouched.erase(activationLayer->GetGuid());
620 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
631 untouched.erase(baseLayer->
GetGuid());
632 untouched.erase(activationLayer->GetGuid());
640 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
651 untouched.erase(baseLayer->
GetGuid());
652 untouched.erase(activationLayer->GetGuid());
669 if (!reduceDescriptor.
m_vAxis.empty() && reduceDescriptor.
m_vAxis.size() > 1)
678 untouched.erase(baseLayer->
GetGuid());
711 poolingDescriptor, padLayer);
712 untouched.erase(baseLayer->
GetGuid());
713 untouched.erase(padLayer->
GetGuid());
728 return optimizationViews;
This layer represents an addition operation.
This layer represents a batch normalization operation.
std::shared_ptr< ConstTensorHandle > m_Mean
A unique pointer to store Mean values.
std::shared_ptr< ConstTensorHandle > m_Gamma
A unique pointer to store Gamma values.
std::shared_ptr< ConstTensorHandle > m_Beta
A unique pointer to store Beta values.
std::shared_ptr< ConstTensorHandle > m_Variance
A unique pointer to store Variance values.
static const BackendId & GetIdStatic()
IBackendInternal::IBackendSpecificModelContextPtr CreateBackendSpecificModelContext(const ModelOptions &modelOptions) const override
OptimizationViews OptimizeSubgraphView(const SubgraphView &subgraph, const ModelOptions &modelOptions) const override
void RegisterTensorHandleFactories(TensorHandleFactoryRegistry ®istry) override
(Optional) Register TensorHandleFactories Either this method or CreateMemoryManager() and IWorkloadFa...
std::vector< ITensorHandleFactory::FactoryId > GetHandleFactoryPreferences() const override
(Optional) Returns a vector of supported TensorHandleFactory ids in preference order.
IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext(const IRuntime::CreationOptions &, IBackendProfilingPtr &backendProfiling) override
Create context specifically used for profiling interaction from backends.
IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions &) const override
Create the runtime context of the backend.
IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory(const IBackendInternal::IMemoryManagerSharedPtr &memoryManager=nullptr) const override
IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override
IBackendInternal::IMemoryManagerUniquePtr CreateMemoryManager() const override
BackendCapabilities GetCapabilities() const override
Returns a BackendCapability if the backend lists the capability The BackendCapability must then be in...
bool m_UsingCustomAllocator
std::shared_ptr< ClBackendCustomAllocatorWrapper > m_CustomAllocator
std::unique_ptr< ICustomAllocator > GetDefaultAllocator() const override
Returns the default memory allocator for the backend.
The ClBackendModelContext is used to pass in CL specific backend ModelOptions.
static const FactoryId & GetIdStatic()
static const FactoryId & GetIdStatic()
This layer represents a convolution 2d operation.
This layer represents a depthwise convolution 2d operation.
This layer represents a division operation.
This layer represents a elementwiseBinary operation.
This layer represents a fully connected operation.
std::shared_ptr< ILayerSupport > ILayerSupportSharedPtr
std::unique_ptr< IMemoryManager > IMemoryManagerUniquePtr
std::unique_ptr< arm::pipe::IBackendProfiling > IBackendProfilingPtr
std::shared_ptr< IBackendModelContext > IBackendSpecificModelContextPtr
std::shared_ptr< IMemoryManager > IMemoryManagerSharedPtr
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
std::unique_ptr< IBackendContext > IBackendContextPtr
std::shared_ptr< arm::pipe::IBackendProfilingContext > IBackendProfilingContextPtr
This is the bridge between backend and backend profiling we'll keep it in the backend namespace.
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
const OutputSlot & GetOutputSlot(unsigned int index=0) const override
Get the const output slot handle by slot index.
std::vector< OutputSlot >::iterator BeginOutputSlots()
LayerGuid GetGuid() const final
Returns the unique id of the layer.
std::shared_ptr< T > GetAdditionalInformation() const
const char * GetName() const override
Returns the name of the layer.
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
std::vector< OutputSlot >::iterator EndOutputSlots()
const Parameters & GetParameters() const override
If the layer has a descriptor return it.
This layer represents a multiplication operation.
void AddUntouchedSubgraph(SubgraphView &&subgraph)
const Subgraphs & GetDeletedSubgraphs() const
const Substitutions & GetSubstitutions() const
unsigned int GetNumConnections() const override
Layer & GetOwningLayer() const
const TensorInfo & GetTensorInfo() const override
This layer represents a pad operation.
This layer represents a pooling 2d operation.
This layer represents a reduction operation.
This layer represents a reshape operation.
The SubgraphView class represents a subgraph of a Graph.
IConnectableLayerIterator begin()
IConnectableLayerIterator end()
This layer represents a subtraction operation.
void RegisterFactory(std::unique_ptr< ITensorHandleFactory > allocator)
Register a TensorHandleFactory and transfer ownership.
void RegisterMemoryManager(std::shared_ptr< IMemoryManager > memoryManger)
Register a memory manager with shared ownership.
void RegisterCopyAndImportFactoryPair(ITensorHandleFactory::FactoryId copyFactoryId, ITensorHandleFactory::FactoryId importFactoryId)
Register a pair of TensorHandleFactory Id for Memory Copy and TensorHandleFactory Id for Memory Impor...
bool TryFoldPadIntoLayer2d(const PadDescriptor &padDescriptor, Descriptor &layerDescriptor, const TensorInfo &tensorInfo)
Copyright (c) 2021 ARM Limited and Contributors.
arm_compute::Status ClDivisionWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
LayerType * FuseMultiplicationLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const TensorInfo &weights, const Optional< TensorInfo > &biases, const FullyConnectedDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
LayerType * FuseConvolution2dLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
arm_compute::Status ClAdditionValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
LayerType * FuseAdditionLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
arm_compute::Status ClBatchNormalizationValidate(const TensorInfo &input, const TensorInfo &output, const TensorInfo &mean, const TensorInfo &var, const TensorInfo &beta, const TensorInfo &gamma, const BatchNormalizationDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
auto PolymorphicPointerDowncast(const SourceType &value)
Polymorphic downcast for shared pointers and build in pointers.
LayerType * FuseBatchNormalizationLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
unsigned int MemorySourceFlags
std::vector< BackendOptions > ModelOptions
arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const Convolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
void RemoveReshapeLayer(ReshapeLayer *baseLayer, std::map< LayerGuid, Layer * > &untouched, OptimizationViews &optimizationViews)
std::vector< IConnectableLayer * > ChainReduceLayers(OptimizationViews &optimizationViews, LayerType *baseLayer, ReduceDescriptor &desc)
void ReportUntouchedLayers(OptimizationViews &optimizationViews, std::map< LayerGuid, Layer * > untouched)
LayerType * FuseFullyConnectedLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
void ReplaceLayers(OptimizationViews &optimizationViews, LayerType *baseLayer, std::vector< IConnectableLayer * > &layers)
constexpr const char * ClBackendId()
arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
void FoldPadLayer2d(OptimizationViews &optimizationViews, LayerT *baseLayer, Descriptor &descriptor, PadLayer *padLayer)
LayerType * FuseDivisionLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
DestType PolymorphicDowncast(SourceType *value)
Polymorphic downcast for build in pointers only.
arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, const ActivationDescriptor *activationDescriptor)
bool ConnectedToLayerWithNCHW(Layer *baseLayer)
Checks if the Layer is connected to any Layer that has an NCHW layout.
BackendOptions BackendCapabilities
LayerType * FuseSubtractionLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
LayerType * FuseDepthwiseConvolution2dLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
LayerType * FuseElementwiseBinaryLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, BinaryOperation operation, std::string name)
arm_compute::Status ClSubtractionValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
An ActivationDescriptor for the ActivationLayer.
bool m_BiasEnabled
Enable/disable bias.
bool m_BiasEnabled
Enable/disable bias.
BinaryOperation m_Operation
Specifies the elementwiseBinary operation to execute.
A FullyConnectedDescriptor for the FullyConnectedLayer.
bool m_BiasEnabled
Enable/disable bias.
A Pooling2dDescriptor for the Pooling2dLayer.
A ReduceDescriptor for the REDUCE operators.
std::vector< uint32_t > m_vAxis
The indices of the dimensions to reduce.