37 #include <arm_compute/core/Types.h> 38 #include <arm_compute/runtime/CL/CLBufferAllocator.h> 51 return std::make_unique<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
57 return std::make_unique<ClWorkloadFactory>(
58 PolymorphicPointerDowncast<ClMemoryManager>(memoryManager));
64 return std::make_unique<ClWorkloadFactory>(
71 auto memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
74 registry.
RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager));
78 return std::make_unique<ClWorkloadFactory>(
79 PolymorphicPointerDowncast<ClMemoryManager>(memoryManager));
85 auto memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
88 registry.
RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager));
92 return std::make_unique<ClWorkloadFactory>(
102 auto memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
105 registry.
RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager));
106 registry.
RegisterFactory(std::make_unique<ClImportTensorHandleFactory>(inputFlags, outputFlags));
108 return std::make_unique<ClWorkloadFactory>(
120 auto mgr = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
123 registry.
RegisterFactory(std::make_unique<ClTensorHandleFactory>(mgr));
124 registry.
RegisterFactory(std::make_unique<ClImportTensorHandleFactory>(
132 auto mgr = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
135 registry.
RegisterFactory(std::make_unique<ClTensorHandleFactory>(mgr));
136 registry.
RegisterFactory(std::make_unique<ClImportTensorHandleFactory>(inputFlags, outputFlags));
194 auto it = subgraph.
end();
195 bool isFastMathEnabled =
false;
196 std::map<LayerGuid, Layer*> untouched;
198 while (it != subgraph.
begin())
202 untouched.insert({base.
GetGuid(), &base});
206 #if defined(ARMCOMPUTECL_ENABLED) 218 while (it != subgraph.
begin())
231 if (output->GetNumConnections() == 1)
233 for (
auto&& childInput : output->GetConnections())
236 (checkDataTypeInputandOutput(childInput->GetOwningLayer())))
238 Layer& child = childInput->GetOwningLayer();
240 auto* activationLayer = PolymorphicDowncast<ActivationLayer*>(&child);
242 const std::string name = std::string(
"fused-") + child.
GetName() + std::string(
"-into-") +
256 biases = baseLayer->
m_Bias->GetTensorInfo();
261 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
263 baseLayer->
m_Weight->GetTensorInfo(),
270 FuseLayerWithWeightsAndBiases<Convolution2dLayer>(optimizationViews,
275 untouched.erase(baseLayer->GetGuid());
276 untouched.erase(activationLayer->GetGuid());
282 PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&base);
288 biases = baseLayer->
m_Bias->GetTensorInfo();
293 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
295 baseLayer->
m_Weight->GetTensorInfo(),
301 FuseLayerWithWeightsAndBiases<DepthwiseConvolution2dLayer>(optimizationViews,
306 untouched.erase(baseLayer->GetGuid());
307 untouched.erase(activationLayer->GetGuid());
316 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
317 baseLayer->
m_Weight->GetTensorInfo(),
318 baseLayer->
m_Bias->GetTensorInfo(),
324 FuseLayerWithWeightsAndBiases<FullyConnectedLayer>(optimizationViews,
329 untouched.erase(baseLayer->GetGuid());
330 untouched.erase(activationLayer->GetGuid());
336 PolymorphicDowncast<BatchNormalizationLayer*>(&base);
340 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
341 baseLayer->
m_Mean->GetTensorInfo(),
343 baseLayer->
m_Beta->GetTensorInfo(),
344 baseLayer->
m_Gamma->GetTensorInfo(),
351 FuseLayerWithParameters<BatchNormalizationLayer>(optimizationViews,
357 replacementLayer->
m_Beta = std::move(baseLayer->m_Beta);
358 replacementLayer->
m_Gamma = std::move(baseLayer->m_Gamma);
359 replacementLayer->
m_Mean = std::move(baseLayer->m_Mean);
360 replacementLayer->
m_Variance = std::move(baseLayer->m_Variance);
361 untouched.erase(baseLayer->GetGuid());
362 untouched.erase(activationLayer->GetGuid());
367 AdditionLayer* baseLayer = PolymorphicDowncast<AdditionLayer*>(&base);
372 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
377 FuseLayerWithoutParameters<AdditionLayer>(optimizationViews,
382 untouched.erase(baseLayer->GetGuid());
383 untouched.erase(activationLayer->GetGuid());
388 DivisionLayer* baseLayer = PolymorphicDowncast<DivisionLayer*>(&base);
393 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
398 FuseLayerWithoutParameters<DivisionLayer>(optimizationViews,
403 untouched.erase(baseLayer->GetGuid());
404 untouched.erase(activationLayer->GetGuid());
414 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
419 FuseLayerWithoutParameters<MultiplicationLayer>(optimizationViews,
424 untouched.erase(baseLayer->GetGuid());
425 untouched.erase(activationLayer->GetGuid());
430 SubtractionLayer* baseLayer = PolymorphicDowncast<SubtractionLayer*>(&base);
435 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
440 FuseLayerWithoutParameters<SubtractionLayer>(optimizationViews,
445 untouched.erase(baseLayer->GetGuid());
446 untouched.erase(activationLayer->GetGuid());
465 return optimizationViews;
arm_compute::Status ClAdditionValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
bool m_BiasEnabled
Enable/disable bias.
void RegisterMemoryManager(std::shared_ptr< IMemoryManager > memoryManger)
Register a memory manager with shared ownership.
arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const TensorInfo &weights, const TensorInfo &biases, const FullyConnectedDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
static const FactoryId & GetIdStatic()
This layer represents a batch normalization operation.
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
bool m_BiasEnabled
Enable/disable bias.
std::vector< OptimizationPtr > Optimizations
arm_compute::Status ClDivisionWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
const Parameters & GetParameters() const
This layer represents a depthwise convolution 2d operation.
constexpr const char * ClBackendId()
std::vector< BackendOptions > ModelOptions
void RegisterFactory(std::unique_ptr< ITensorHandleFactory > allocator)
Register a TensorHandleFactory and transfer ownership.
void ReportUntouchedLayers(OptimizationViews &optimizationViews, std::map< LayerGuid, Layer *> untouched)
arm_compute::Status ClSubtractionValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
std::shared_ptr< ConstTensorHandle > m_Weight
A unique pointer to store Weight values.
std::shared_ptr< ConstTensorHandle > m_Mean
A unique pointer to store Mean values.
unsigned int MemorySourceFlags
Copyright (c) 2021 ARM Limited and Contributors.
arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const Convolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
std::unique_ptr< IMemoryManager > IMemoryManagerUniquePtr
IBackendInternal::IMemoryManagerUniquePtr CreateMemoryManager() const override
std::shared_ptr< ConstTensorHandle > m_Beta
A unique pointer to store Beta values.
void RegisterTensorHandleFactories(TensorHandleFactoryRegistry ®istry) override
(Optional) Register TensorHandleFactories Either this method or CreateMemoryManager() and IWorkloadFa...
The SubgraphView class represents a subgraph of a Graph.
IBackendInternal::IBackendSpecificModelContextPtr CreateBackendSpecificModelContext(const ModelOptions &modelOptions) const override
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
std::unique_ptr< armnn::profiling::IBackendProfiling > IBackendProfilingPtr
OptimizationViews OptimizeSubgraphView(const SubgraphView &subgraph, const ModelOptions &modelOptions) const override
This layer represents a fully connected operation.
std::shared_ptr< ConstTensorHandle > m_Weight
A unique pointer to store Weight values.
std::shared_ptr< IBackendModelContext > IBackendSpecificModelContextPtr
BackendCapability
BackendCapability class.
std::shared_ptr< IMemoryManager > IMemoryManagerSharedPtr
IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions &) const override
Create the runtime context of the backend.
std::shared_ptr< ConstTensorHandle > m_Bias
A unique pointer to store Bias values.
arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
std::vector< ITensorHandleFactory::FactoryId > GetHandleFactoryPreferences() const override
(Optional) Returns a vector of supported TensorHandleFactory ids in preference order.
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
bool IsFastMathEnabled() const
std::shared_ptr< ConstTensorHandle > m_Gamma
A unique pointer to store Gamma values.
static const FactoryId & GetIdStatic()
const std::set< armnn::BackendCapability > gpuAccCapabilities
An ActivationDescriptor for the ActivationLayer.
void AddUntouchedSubgraph(SubgraphView &&subgraph)
std::shared_ptr< ConstTensorHandle > m_Variance
A unique pointer to store Variance values.
std::shared_ptr< ConstTensorHandle > m_Bias
A unique pointer to store Bias values.
IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory(const IBackendInternal::IMemoryManagerSharedPtr &memoryManager=nullptr) const override
This layer represents an addition operation.
std::shared_ptr< ILayerSupport > ILayerSupportSharedPtr
const Substitutions & GetSubstitutions() const
This layer represents a subtraction operation.
std::vector< OutputSlot >::iterator BeginOutputSlots()
IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override
std::shared_ptr< ConstTensorHandle > m_Bias
A unique pointer to store Bias values.
std::shared_ptr< ConstTensorHandle > m_Weight
A unique pointer to store Weight values.
IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext(const IRuntime::CreationOptions &, IBackendProfilingPtr &backendProfiling) override
Create context specifically used for profiling interaction from backends.
This layer represents a division operation.
std::vector< OutputSlot >::iterator EndOutputSlots()
arm_compute::Status ClBatchNormalizationValidate(const TensorInfo &input, const TensorInfo &output, const TensorInfo &mean, const TensorInfo &var, const TensorInfo &beta, const TensorInfo &gamma, const BatchNormalizationDescriptor &desc, const ActivationDescriptor *activationDescriptor)
const char * GetName() const override
Returns the name of the layer.
This layer represents a convolution 2d operation.
bool HasCapability(BackendCapability capabilityClass) const override
Returns true if backend support the capability false otherwise.
The ClBackendModelContext is used to pass in CL specific backend ModelOptions.
This layer represents a multiplication operation.
IBackendInternal::Optimizations GetOptimizations() const override
const TensorInfo & GetTensorInfo() const override
static const BackendId & GetIdStatic()
arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, const ActivationDescriptor *activationDescriptor)
std::shared_ptr< armnn::profiling::IBackendProfilingContext > IBackendProfilingContextPtr
This is the bridge between backend and backend profiling we'll keep it in the backend namespace...
std::shared_ptr< T > GetAdditionalInformation() const
LayerGuid GetGuid() const final
Returns the unique id of the layer.
std::unique_ptr< IBackendContext > IBackendContextPtr