24.02
|
Go to the documentation of this file.
19 #include <arm_compute/core/CL/CLKernelLibrary.h>
20 #include <arm_compute/runtime/CL/CLBufferAllocator.h>
39 delete static_cast<const T*
>(blob);
47 result.push_back(&(*it));
57 result.push_back(&(*it));
66 return std::make_unique<SubgraphView>(std::move(inputs), std::move(outputs), std::move(layers));
81 return std::make_unique<GpuFsaMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
87 return std::make_unique<GpuFsaWorkloadFactory>(PolymorphicPointerDowncast<GpuFsaMemoryManager>(memoryManager));
93 std::shared_ptr<GpuFsaMemoryManager> memoryManager;
100 memoryManager = std::make_shared<GpuFsaMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
103 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<GpuFsaTensorHandleFactory>(memoryManager);
108 return std::make_unique<GpuFsaWorkloadFactory>(PolymorphicPointerDowncast<GpuFsaMemoryManager>(memoryManager));
128 std::shared_ptr<GpuFsaMemoryManager> memoryManager;
135 memoryManager = std::make_shared<GpuFsaMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
138 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<GpuFsaTensorHandleFactory>(memoryManager);
143 return std::make_unique<GpuFsaWorkloadFactory>(PolymorphicPointerDowncast<GpuFsaMemoryManager>(memoryManager));
153 std::shared_ptr<GpuFsaMemoryManager> memoryManager;
160 memoryManager = std::make_shared<GpuFsaMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
163 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<GpuFsaTensorHandleFactory>(memoryManager);
183 std::shared_ptr<GpuFsaMemoryManager> memoryManager;
190 memoryManager = std::make_shared<GpuFsaMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
193 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<GpuFsaTensorHandleFactory>(memoryManager);
217 return std::make_unique<GpuFsaBackendDefaultAllocator>();
225 using namespace arm_compute::experimental::dynamic_fusion;
227 auto it = subgraph.
end();
228 std::map<LayerGuid, Layer*> untouched;
229 while (it != subgraph.
begin())
232 Layer& base = *(PolymorphicDowncast<Layer*>(*it));
233 untouched.insert({base.
GetGuid(), &base});
238 arm_compute::CLCompileContext* compileCtx = &(arm_compute::CLKernelLibrary::get().get_compile_context());
241 std::shared_ptr<GpuWorkloadContext> workloadContext = std::make_shared<GpuWorkloadContext>(compileCtx);
242 while (it != subgraph.
begin())
245 Layer& base = *(PolymorphicDowncast<Layer*>(*it));
249 preCompiledBlobPtr->
sketch = std::make_unique<GpuWorkloadSketch>(workloadContext.get());
256 auto desc = PolymorphicDowncast<const ActivationDescriptor*>(&base.
GetParameters());
273 auto desc = PolymorphicDowncast<const Convolution2dDescriptor*>(&base.
GetParameters());
274 if (desc->m_BiasEnabled)
297 auto desc = PolymorphicDowncast<const BatchMatMulDescriptor*>(&base.
GetParameters());
306 auto desc = PolymorphicDowncast<const DepthwiseConvolution2dDescriptor*>(&base.
GetParameters());
307 if (desc->m_BiasEnabled)
328 auto desc = PolymorphicDowncast<const ElementwiseBinaryDescriptor *>(&base.
GetParameters());
337 auto desc = PolymorphicDowncast<const Pooling2dDescriptor*>(&base.
GetParameters());
344 auto desc = PolymorphicDowncast<const ReshapeDescriptor*>(&base.
GetParameters());
352 auto desc = PolymorphicDowncast<const ResizeDescriptor*>(&base.
GetParameters());
361 auto desc = PolymorphicDowncast<const SoftmaxDescriptor*>(&base.
GetParameters());
374 std::make_unique<PreCompiledObjectPtr>(preCompiledBlobPtr, DeleteAsType<GpuFsaPreCompiledBlob>);
378 std::move(*compiledBlob),
380 "GpuFsa_Pre_Compiled_Layer");
395 untouched.erase(base.
GetGuid());
408 return optimizationViews;
void AddUntouchedSubgraph(SubgraphView &&subgraph)
void GpuFsaSoftmaxCreateOp(GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const TensorInfo &output, const SoftmaxDescriptor &descriptor)
void GpuFsaElementwiseBinaryCreateOp(GpuFsaPreCompiledBlob *blob, const TensorInfo &input0, const TensorInfo &input1, const ElementwiseBinaryDescriptor &descriptor)
void GpuFsaPooling2dCreateOp(GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const Pooling2dDescriptor &descriptor)
const TensorInfo & GetTensorInfo() const override
void RegisterTensorHandleFactories(TensorHandleFactoryRegistry ®istry) override
(Optional) Register TensorHandleFactories Either this method or CreateMemoryManager() and IWorkloadFa...
static const BackendId & GetIdStatic()
void GpuFsaBatchMatMulCreateOp(GpuFsaPreCompiledBlob *blob, const TensorInfo &input0, const TensorInfo &input1, const BatchMatMulDescriptor &descriptor)
bool m_UsingCustomAllocator
std::shared_ptr< IMemoryManager > IMemoryManagerSharedPtr
IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override
std::list< Layer * > Layers
static const FactoryId & GetIdStatic()
void GpuFsaActivationCreateOp(GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const ActivationDescriptor &descriptor)
unsigned int MemorySourceFlags
const OutputSlot & GetOutputSlot(unsigned int index=0) const override
Get the const output slot handle by slot index.
void RegisterMemoryManager(std::shared_ptr< IMemoryManager > memoryManger)
Register a memory manager with shared ownership.
constexpr const char * GpuFsaBackendId()
OptimizationViews OptimizeSubgraphView(const SubgraphView &subgraph, const ModelOptions &modelOptions) const override
std::unique_ptr< IBackendContext > IBackendContextPtr
std::unique_ptr< ICustomAllocator > GetDefaultAllocator() const override
Returns the default memory allocator for the backend.
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
std::vector< ITensorHandleFactory::FactoryId > GetHandleFactoryPreferences() const override
(Optional) Returns a vector of supported TensorHandleFactory ids in preference order.
std::vector< InputSlot * > InputSlots
void GpuFsaReshapeCreateOp(GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const ReshapeDescriptor &descriptor)
IBackendInternal::IMemoryManagerUniquePtr CreateMemoryManager() const override
IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions &) const override
Create the runtime context of the backend.
IConnectableLayerIterator begin()
IConnectableLayer * AddPrecompiledLayer(const PreCompiledDescriptor &preCompiledDescriptor, CompiledBlobPtr compiledBlobPtr, const Optional< BackendId > &backend, const char *name=nullptr)
Adds a Precompiled layer to the network.
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
std::unique_ptr< arm_compute::experimental::dynamic_fusion::GpuWorkloadSketch > sketch
std::shared_ptr< SubgraphView > SubgraphViewPtr
void GpuFsaDepthwiseConvolution2dCreateOp(GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases)
std::vector< InputSlot >::iterator EndInputSlots()
LayerGuid GetGuid() const final
Returns the unique id of the layer.
unsigned int GetNumOutputSlots() const override
Returns the number of connectable output slots.
The SubgraphView class represents a subgraph of a Graph.
IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext(const IRuntime::CreationOptions &, IBackendProfilingPtr &backendProfiling) override
Create context specifically used for profiling interaction from backends.
IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory(const IBackendInternal::IMemoryManagerSharedPtr &memoryManager=nullptr) const override
SubgraphView::SubgraphViewPtr CreateSubgraphViewFrom(SubgraphView::InputSlots &&inputs, SubgraphView::OutputSlots &&outputs, SubgraphView::Layers &&layers)
void AddSubstitution(SubstitutionPair &&substitution)
std::vector< InputSlot >::iterator BeginInputSlots()
std::shared_ptr< GpuFsaBackendCustomAllocatorWrapper > m_CustomAllocator
std::shared_ptr< arm::pipe::IBackendProfilingContext > IBackendProfilingContextPtr
This is the bridge between backend and backend profiling we'll keep it in the backend namespace.
unsigned int GetNumInputSlots() const override
Returns the number of connectable input slots.
void GpuFsaResizeCreateOp(GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const ResizeDescriptor &descriptor)
virtual const BaseDescriptor & GetParameters() const override
If the layer has a descriptor return it.
void DeleteAsType(const void *const blob)
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
unsigned int GetNumOutputSlots() const
std::unique_ptr< arm::pipe::IBackendProfiling > IBackendProfilingPtr
void GpuFsaCastCreateOp(GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const TensorInfo &output)
std::vector< OutputSlot >::iterator BeginOutputSlots()
const BackendId & GetId() const override
std::unique_ptr< IMemoryManager > IMemoryManagerUniquePtr
const Substitutions & GetSubstitutions() const
IConnectableLayerIterator end()
std::vector< OutputSlot * > OutputSlots
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
Copyright (c) 2021 ARM Limited and Contributors.
void RegisterFactory(std::unique_ptr< ITensorHandleFactory > allocator)
Register a TensorHandleFactory and transfer ownership.
std::shared_ptr< ILayerSupport > ILayerSupportSharedPtr
std::shared_ptr< arm_compute::experimental::dynamic_fusion::GpuWorkloadContext > workloadContext
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
std::vector< BackendOptions > ModelOptions
A structure which contains all the elements needed to execute a fused workload in the GpuFsa Backend.
A PreCompiledDescriptor for the PreCompiledLayer.
void ReportUntouchedLayers(OptimizationViews &optimizationViews, std::map< LayerGuid, Layer * > untouched)
void GpuFsaConvolution2dCreateOp(GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const Convolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases)
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
SubgraphView::OutputSlots CreateOutputsFrom(Layer *layer)
std::vector< OutputSlot >::iterator EndOutputSlots()
SubgraphView::InputSlots CreateInputsFrom(Layer *layer)