24.02
|
Go to the documentation of this file.
10 #include <arm_compute/runtime/CL/CLBufferAllocator.h>
11 #include <arm_compute/runtime/CL/CLMemoryRegion.h>
12 #include <arm_compute/core/CL/CLKernelLibrary.h>
13 #include <CL/cl_ext.h>
14 #include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h>
15 #include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h>
34 std::unique_ptr<arm_compute::experimental::dynamic_fusion::GpuWorkloadSketch>
sketch =
nullptr;
35 std::shared_ptr<arm_compute::experimental::dynamic_fusion::GpuWorkloadContext>
workloadContext =
nullptr;
44 {
"NonConstWeights",
false},
45 {
"AsyncExecution",
false},
46 {
"ProtectedContentAllocation",
false},
47 {
"ConstantTensorsAsInputs",
true},
48 {
"PreImportIOTensors",
false},
49 {
"ExternallyManagedMemory",
false},
50 {
"MultiAxisPacking",
false},
51 {
"SingleAxisPacking",
false}
106 ARMNN_LOG(
info) <<
"Using Custom Allocator for GpuFsaBackend";
109 m_CustomAllocator = std::make_shared<GpuFsaBackendCustomAllocatorWrapper>(std::move(allocator));
121 void*
allocate(
size_t size,
size_t alignment)
override
123 auto alloc = m_CustomAllocator->allocate(size, alignment);
124 return MapAllocatedMemory(alloc, size, m_CustomAllocator->GetMemorySourceType());
128 auto hostMemPtr = m_AllocatedBufferMappings[ptr];
129 clReleaseMemObject(
static_cast<cl_mem
>(ptr));
130 m_CustomAllocator->free(hostMemPtr);
132 std::unique_ptr<arm_compute::IMemoryRegion>
make_region(
size_t size,
size_t alignment)
override
134 auto hostMemPtr = m_CustomAllocator->allocate(size, alignment);
135 cl_mem buffer = MapAllocatedMemory(hostMemPtr, size, m_CustomAllocator->GetMemorySourceType());
137 return std::make_unique<ClBackendCustomAllocatorMemoryRegion>(cl::Buffer(buffer),
139 m_CustomAllocator->GetMemorySourceType());
142 cl_mem MapAllocatedMemory(
void* memory,
size_t size,
MemorySource source)
145 auto cachelineAlignment =
146 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
147 auto roundedSize = cachelineAlignment + size - (size % cachelineAlignment);
151 const cl_import_properties_arm importProperties[] =
154 CL_IMPORT_TYPE_HOST_ARM,
157 cl_int
error = CL_SUCCESS;
158 cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
164 if (error == CL_SUCCESS)
166 m_AllocatedBufferMappings.insert(std::make_pair(
static_cast<void *
>(buffer), memory));
170 "Mapping allocated memory from CustomMemoryAllocator failed, errcode: " + std::to_string(error));
174 const cl_import_properties_arm importProperties[] =
177 CL_IMPORT_TYPE_DMA_BUF_ARM,
178 CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM,
182 cl_int
error = CL_SUCCESS;
183 cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
189 if (error == CL_SUCCESS)
191 m_AllocatedBufferMappings.insert(std::make_pair(
static_cast<void *
>(buffer), memory));
195 "Mapping allocated memory from CustomMemoryAllocator failed, errcode: "
196 + std::to_string(error));
200 const cl_import_properties_arm importProperties[] =
203 CL_IMPORT_TYPE_DMA_BUF_ARM,
204 CL_IMPORT_TYPE_PROTECTED_ARM,
208 cl_int
error = CL_SUCCESS;
209 cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
215 if (error == CL_SUCCESS)
217 m_AllocatedBufferMappings.insert(std::make_pair(
static_cast<void *
>(buffer), memory));
221 "Mapping allocated memory from CustomMemoryAllocator failed, errcode: "
222 + std::to_string(error));
225 "Attempting to allocate memory with unsupported MemorySource type in CustomAllocator");
227 std::shared_ptr<ICustomAllocator> m_CustomAllocator;
228 std::map<void*, void*> m_AllocatedBufferMappings;
236 : ICLMemoryRegion(buffer.getInfo<CL_MEM_SIZE>())
239 m_HostMemPtr = hostMemPtr;
240 m_MemorySource = source;
249 void*
map(cl::CommandQueue &q,
bool blocking)
override
252 if (m_HostMemPtr ==
nullptr)
254 throw armnn::Exception(
"ClBackend: Attempting to map memory with an invalid host ptr");
256 if (_mapping !=
nullptr)
258 throw armnn::Exception(
"ClBackend: Attempting to map memory which has not yet been unmapped");
260 switch (m_MemorySource)
263 _mapping = m_HostMemPtr;
269 _mapping = mmap(NULL, _size, PROT_WRITE, MAP_SHARED, *(
reinterpret_cast<int*
>(m_HostMemPtr)), 0);
273 throw armnn::Exception(
"ClBackend: Attempting to map imported memory without a valid source");
278 void unmap(cl::CommandQueue &q)
override
281 switch (m_MemorySource)
288 munmap(_mapping, _size);
292 throw armnn::Exception(
"ClBackend: Attempting to unmap imported memory without a valid source");
297 void* m_HostMemPtr =
nullptr;
GpuFsaBackendCustomAllocatorWrapper(std::shared_ptr< ICustomAllocator > alloc)
void RegisterTensorHandleFactories(TensorHandleFactoryRegistry ®istry) override
(Optional) Register TensorHandleFactories Either this method or CreateMemoryManager() and IWorkloadFa...
static const BackendId & GetIdStatic()
bool m_UsingCustomAllocator
std::shared_ptr< IMemoryManager > IMemoryManagerSharedPtr
IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override
unsigned int MemorySourceFlags
void free(void *ptr) override
GpuFsaBackend(std::shared_ptr< ICustomAllocator > allocator)
OptimizationViews OptimizeSubgraphView(const SubgraphView &subgraph, const ModelOptions &modelOptions) const override
std::unique_ptr< IBackendContext > IBackendContextPtr
std::unique_ptr< ICustomAllocator > GetDefaultAllocator() const override
Returns the default memory allocator for the backend.
std::vector< ITensorHandleFactory::FactoryId > GetHandleFactoryPreferences() const override
(Optional) Returns a vector of supported TensorHandleFactory ids in preference order.
#define ARMNN_LOG(severity)
std::unique_ptr< std::vector< arm_compute::ITensorInfo * > > inputTensorInfos
void * map(cl::CommandQueue &q, bool blocking) override
void * allocate(size_t size, size_t alignment) override
IBackendInternal::IMemoryManagerUniquePtr CreateMemoryManager() const override
IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions &) const override
Create the runtime context of the backend.
std::unique_ptr< arm_compute::IMemoryRegion > make_region(size_t size, size_t alignment) override
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
std::unique_ptr< arm_compute::experimental::dynamic_fusion::GpuWorkloadSketch > sketch
The SubgraphView class represents a subgraph of a Graph.
IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext(const IRuntime::CreationOptions &, IBackendProfilingPtr &backendProfiling) override
Create context specifically used for profiling interaction from backends.
void unmap(cl::CommandQueue &q) override
IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory(const IBackendInternal::IMemoryManagerSharedPtr &memoryManager=nullptr) const override
Base class for all ArmNN exceptions so that users can filter to just those.
virtual bool UseCustomMemoryAllocator(std::shared_ptr< ICustomAllocator > allocator, armnn::Optional< std::string & >) override
Signals the backend to use a custom memory allocator provided by the user.
BackendCapabilities GetCapabilities() const override
Returns a BackendCapability if the backend lists the capability The BackendCapability must then be in...
std::shared_ptr< GpuFsaBackendCustomAllocatorWrapper > m_CustomAllocator
std::shared_ptr< arm::pipe::IBackendProfilingContext > IBackendProfilingContextPtr
This is the bridge between backend and backend profiling we'll keep it in the backend namespace.
Struct for the users to pass backend specific options.
std::unique_ptr< arm::pipe::IBackendProfiling > IBackendProfilingPtr
const BackendId & GetId() const override
std::unique_ptr< IMemoryManager > IMemoryManagerUniquePtr
const BackendCapabilities gpuFsaCapabilities("GpuFsa", { {"NonConstWeights", false}, {"AsyncExecution", false}, {"ProtectedContentAllocation", false}, {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", false}, {"ExternallyManagedMemory", false}, {"MultiAxisPacking", false}, {"SingleAxisPacking", false} })
void IgnoreUnused(Ts &&...)
MemorySource
Define the Memory Source to reduce copies.
Copyright (c) 2021 ARM Limited and Contributors.
std::shared_ptr< ILayerSupport > ILayerSupportSharedPtr
std::shared_ptr< arm_compute::experimental::dynamic_fusion::GpuWorkloadContext > workloadContext
std::vector< BackendOptions > ModelOptions
A structure which contains all the elements needed to execute a fused workload in the GpuFsa Backend.
ClBackendCustomAllocatorMemoryRegion(const cl::Buffer &buffer, void *hostMemPtr, armnn::MemorySource source)
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
std::unique_ptr< std::vector< arm_compute::ITensorInfo * > > outputTensorInfos