24.08
|
Go to the documentation of this file.
10 #include <arm_compute/runtime/CL/CLBufferAllocator.h>
11 #include <arm_compute/runtime/CL/CLMemoryRegion.h>
12 #include <arm_compute/core/CL/CLKernelLibrary.h>
13 #include <CL/cl_ext.h>
14 #include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h>
15 #include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h>
34 std::unique_ptr<arm_compute::experimental::dynamic_fusion::GpuWorkloadSketch>
sketch =
nullptr;
35 std::shared_ptr<arm_compute::experimental::dynamic_fusion::GpuWorkloadContext>
workloadContext =
nullptr;
44 {
"NonConstWeights",
false},
45 {
"AsyncExecution",
false},
46 {
"ProtectedContentAllocation",
false},
47 {
"ConstantTensorsAsInputs",
true},
48 {
"PreImportIOTensors",
false},
49 {
"ExternallyManagedMemory",
false},
50 {
"MultiAxisPacking",
false},
51 {
"SingleAxisPacking",
false}
108 ARMNN_LOG(
info) <<
"Using Custom Allocator for GpuFsaBackend";
111 m_CustomAllocator = std::make_shared<GpuFsaBackendCustomAllocatorWrapper>(std::move(allocator));
123 void*
allocate(
size_t size,
size_t alignment)
override
125 auto alloc = m_CustomAllocator->allocate(size, alignment);
126 return MapAllocatedMemory(alloc, size, m_CustomAllocator->GetMemorySourceType());
130 auto hostMemPtr = m_AllocatedBufferMappings[ptr];
131 clReleaseMemObject(
static_cast<cl_mem
>(ptr));
132 m_CustomAllocator->free(hostMemPtr);
134 std::unique_ptr<arm_compute::IMemoryRegion>
make_region(
size_t size,
size_t alignment)
override
136 auto hostMemPtr = m_CustomAllocator->allocate(size, alignment);
137 cl_mem buffer = MapAllocatedMemory(hostMemPtr, size, m_CustomAllocator->GetMemorySourceType());
139 return std::make_unique<ClBackendCustomAllocatorMemoryRegion>(cl::Buffer(buffer),
141 m_CustomAllocator->GetMemorySourceType());
144 cl_mem MapAllocatedMemory(
void* memory,
size_t size,
MemorySource source)
147 auto cachelineAlignment =
148 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
149 auto roundedSize = cachelineAlignment + size - (size % cachelineAlignment);
153 const cl_import_properties_arm importProperties[] =
156 CL_IMPORT_TYPE_HOST_ARM,
159 cl_int
error = CL_SUCCESS;
160 cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
166 if (error == CL_SUCCESS)
168 m_AllocatedBufferMappings.insert(std::make_pair(
static_cast<void *
>(buffer), memory));
172 "Mapping allocated memory from CustomMemoryAllocator failed, errcode: " + std::to_string(error));
176 const cl_import_properties_arm importProperties[] =
179 CL_IMPORT_TYPE_DMA_BUF_ARM,
180 CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM,
184 cl_int
error = CL_SUCCESS;
185 cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
191 if (error == CL_SUCCESS)
193 m_AllocatedBufferMappings.insert(std::make_pair(
static_cast<void *
>(buffer), memory));
197 "Mapping allocated memory from CustomMemoryAllocator failed, errcode: "
198 + std::to_string(error));
202 const cl_import_properties_arm importProperties[] =
205 CL_IMPORT_TYPE_DMA_BUF_ARM,
206 CL_IMPORT_TYPE_PROTECTED_ARM,
210 cl_int
error = CL_SUCCESS;
211 cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
217 if (error == CL_SUCCESS)
219 m_AllocatedBufferMappings.insert(std::make_pair(
static_cast<void *
>(buffer), memory));
223 "Mapping allocated memory from CustomMemoryAllocator failed, errcode: "
224 + std::to_string(error));
227 "Attempting to allocate memory with unsupported MemorySource type in CustomAllocator");
229 std::shared_ptr<ICustomAllocator> m_CustomAllocator;
230 std::map<void*, void*> m_AllocatedBufferMappings;
238 : ICLMemoryRegion(buffer.getInfo<CL_MEM_SIZE>())
241 m_HostMemPtr = hostMemPtr;
242 m_MemorySource = source;
251 void*
map(cl::CommandQueue &q,
bool blocking)
override
254 if (m_HostMemPtr ==
nullptr)
256 throw armnn::Exception(
"ClBackend: Attempting to map memory with an invalid host ptr");
258 if (_mapping !=
nullptr)
260 throw armnn::Exception(
"ClBackend: Attempting to map memory which has not yet been unmapped");
262 switch (m_MemorySource)
265 _mapping = m_HostMemPtr;
271 _mapping = mmap(NULL, _size, PROT_WRITE, MAP_SHARED, *(
reinterpret_cast<int*
>(m_HostMemPtr)), 0);
275 throw armnn::Exception(
"ClBackend: Attempting to map imported memory without a valid source");
280 void unmap(cl::CommandQueue &q)
override
283 switch (m_MemorySource)
290 munmap(_mapping, _size);
294 throw armnn::Exception(
"ClBackend: Attempting to unmap imported memory without a valid source");
299 void* m_HostMemPtr =
nullptr;
GpuFsaBackendCustomAllocatorWrapper(std::shared_ptr< ICustomAllocator > alloc)
void RegisterTensorHandleFactories(TensorHandleFactoryRegistry ®istry) override
(Optional) Register TensorHandleFactories Either this method or CreateMemoryManager() and IWorkloadFa...
static const BackendId & GetIdStatic()
bool m_UsingCustomAllocator
std::shared_ptr< IMemoryManager > IMemoryManagerSharedPtr
IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override
Custom Allocator interface.
unsigned int MemorySourceFlags
void free(void *ptr) override
#define ARMNN_NO_DEPRECATE_WARN_BEGIN
OptimizationViews OptimizeSubgraphView(const SubgraphView &subgraph, const ModelOptions &modelOptions) const override
std::unique_ptr< IBackendContext > IBackendContextPtr
std::unique_ptr< ICustomAllocator > GetDefaultAllocator() const override
Returns the default memory allocator for the backend.
std::vector< ITensorHandleFactory::FactoryId > GetHandleFactoryPreferences() const override
(Optional) Returns a vector of supported TensorHandleFactory ids in preference order.
#define ARMNN_LOG(severity)
std::unique_ptr< std::vector< arm_compute::ITensorInfo * > > inputTensorInfos
void * map(cl::CommandQueue &q, bool blocking) override
void * allocate(size_t size, size_t alignment) override
IBackendInternal::IMemoryManagerUniquePtr CreateMemoryManager() const override
IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions &) const override
Create the runtime context of the backend.
std::unique_ptr< arm_compute::IMemoryRegion > make_region(size_t size, size_t alignment) override
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
std::unique_ptr< arm_compute::experimental::dynamic_fusion::GpuWorkloadSketch > sketch
The SubgraphView class represents a subgraph of a Graph.
IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext(const IRuntime::CreationOptions &, IBackendProfilingPtr &backendProfiling) override
Create context specifically used for profiling interaction from backends.
void unmap(cl::CommandQueue &q) override
IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory(const IBackendInternal::IMemoryManagerSharedPtr &memoryManager=nullptr) const override
Base class for all ArmNN exceptions so that users can filter to just those.
virtual bool UseCustomMemoryAllocator(std::shared_ptr< ICustomAllocator > allocator, armnn::Optional< std::string & >) override
Signals the backend to use a custom memory allocator provided by the user.
BackendCapabilities GetCapabilities() const override
Returns a BackendCapability if the backend lists the capability The BackendCapability must then be in...
std::shared_ptr< GpuFsaBackendCustomAllocatorWrapper > m_CustomAllocator
std::shared_ptr< arm::pipe::IBackendProfilingContext > IBackendProfilingContextPtr
This is the bridge between backend and backend profiling we'll keep it in the backend namespace.
Struct for the users to pass backend specific options.
std::unique_ptr< arm::pipe::IBackendProfiling > IBackendProfilingPtr
#define ARMNN_NO_DEPRECATE_WARN_END
const BackendId & GetId() const override
std::unique_ptr< IMemoryManager > IMemoryManagerUniquePtr
const BackendCapabilities gpuFsaCapabilities("GpuFsa", { {"NonConstWeights", false}, {"AsyncExecution", false}, {"ProtectedContentAllocation", false}, {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", false}, {"ExternallyManagedMemory", false}, {"MultiAxisPacking", false}, {"SingleAxisPacking", false} })
void IgnoreUnused(Ts &&...)
#define ARMNN_DEPRECATED_MSG_REMOVAL_DATE(message, removed_in_release)
MemorySource
Define the Memory Source to reduce copies.
Copyright (c) 2021 ARM Limited and Contributors.
std::shared_ptr< ILayerSupport > ILayerSupportSharedPtr
std::shared_ptr< arm_compute::experimental::dynamic_fusion::GpuWorkloadContext > workloadContext
std::vector< BackendOptions > ModelOptions
A structure which contains all the elements needed to execute a fused workload in the GpuFsa Backend.
ClBackendCustomAllocatorMemoryRegion(const cl::Buffer &buffer, void *hostMemPtr, armnn::MemorySource source)
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
std::unique_ptr< std::vector< arm_compute::ITensorInfo * > > outputTensorInfos