33 const cl::Buffer CLTensorAllocator::_empty_buffer = cl::Buffer();
37 static IAllocator *static_global_cl_allocator =
nullptr;
47 std::unique_ptr<ICLMemoryRegion> allocate_region(CLCoreRuntimeContext *ctx,
size_t size, cl_uint alignment)
50 std::unique_ptr<ICLMemoryRegion> region = std::make_unique<CLFineSVMMemoryRegion>(ctx,
51 CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER,
56 if(region !=
nullptr && region->ptr() ==
nullptr)
58 region = std::make_unique<CLCoarseSVMMemoryRegion>(ctx, CL_MEM_READ_WRITE, size, alignment);
61 if(region !=
nullptr && region->ptr() ==
nullptr)
63 region = std::make_unique<CLBufferMemoryRegion>(ctx, CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, size);
90 const std::vector<float> &qscale =
qinfo.
scale();
91 const size_t num_elements = qscale.size();
92 const size_t element_size =
sizeof(std::remove_reference<decltype(qscale)>::type::value_type);
94 scale.resize(num_elements);
100 const std::vector<int32_t> &qoffset =
qinfo.
offset();
101 const size_t offset_element_size =
sizeof(std::remove_reference<decltype(qoffset)>::type::value_type);
103 offset.resize(num_elements);
110 : _ctx(ctx), _owner(owner), _associated_memory_group(nullptr), _memory(), _mapping(nullptr), _scale(), _offset()
116 return { &_scale, &_offset };
132 if(_associated_memory_group ==
nullptr)
135 if(static_global_cl_allocator !=
nullptr)
139 else if(_ctx ==
nullptr)
158 const size_t pad_size = 0;
159 populate_quantization_info(_scale, _offset,
info().quantization_info(), pad_size);
170 clear_quantization_arrays(_scale, _offset);
184 _memory.
set_owned_region(std::make_unique<CLBufferMemoryRegion>(buffer, &legacy_ctx));
198 ARM_COMPUTE_ERROR_ON(_associated_memory_group !=
nullptr && _associated_memory_group != associated_memory_group);
201 _associated_memory_group = associated_memory_group;
209 uint8_t *CLTensorAllocator::lock()
221 void CLTensorAllocator::unlock()
241 _mapping = reinterpret_cast<uint8_t *>(_memory.
cl_region()->
map(q, blocking));
__global uchar * offset(const Image *img, int x, int y)
Get the pointer position of a Image.
const std::vector< int32_t > & offset() const
Offset vector accessor.
static void set_global_allocator(IAllocator *allocator)
Sets global allocator that will be used by all CLTensor objects.
static CLScheduler & get()
Access the scheduler singleton.
CLQuantization quantization() const
Wrapped quantization info data accessor.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
virtual void * buffer()=0
Returns the pointer to the allocated data.
CLCoreRuntimeContext * core_runtime_context()
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
OpenCL quantization data.
Copyright (c) 2017-2021 Arm Limited.
Status import_memory(cl::Buffer buffer)
Import an existing memory as a tensor's backing memory.
const cl::Buffer & cl_data() const
Returns the underlying CL buffer.
void set_associated_memory_group(IMemoryGroup *associated_memory_group)
Associates the tensor with a memory group.
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
CLArray< cl_float > CLFloatArray
OpenCL Array of floats.
uint8_t * data()
Interface to be implemented by the child class to return the pointer to the mapped data.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
uint8_t * map(cl::CommandQueue &q, bool blocking)
Enqueue a map operation of the allocated buffer on the given queue.
cl::Context & context()
Accessor for the associated CL context.
virtual void finalize_memory(IMemoryManageable *obj, IMemory &obj_memory, size_t size, size_t alignment)=0
Finalizes memory for a given object.
Core runtime context for OpenCL.
bool is_data_type_quantized_per_channel(DataType dt)
Check if a given data type is of per channel type.
void set_owned_region(std::unique_ptr< IMemoryRegion > region) final
Sets a memory region.
size_t total_size() const override
Returns the total size of the tensor in bytes.
CLTensorAllocator(IMemoryManageable *owner=nullptr, CLRuntimeContext *ctx=nullptr)
Default constructor.
IMemoryRegion * region() final
Region accessor.
size_t alignment() const
Return underlying's tensor buffer alignment.
const std::vector< float > & scale() const
Scale vector accessor.
ICLMemoryRegion * cl_region()
OpenCL Region accessor.
void unmap(cl::CommandQueue &q, uint8_t *mapping)
Enqueue an unmap operation of the allocated buffer on the given queue.
ITensorInfo & set_is_resizable(bool is_resizable) override
Set the flag whether the tensor size can be changed.
cl::CommandQueue & queue()
Accessor for the associated CL command queue.
input allocator() -> allocate()
virtual void * map(cl::CommandQueue &q, bool blocking)=0
Enqueue a map operation of the allocated buffer on the given queue.
TensorInfo & info()
Return a reference to the tensor's metadata.
void set_region(IMemoryRegion *region) final
Sets a memory region.
void allocate() override
Allocate size specified by TensorInfo of OpenCL memory.
CLArray< cl_int > CLInt32Array
OpenCL Array of int32s.
Interface of an object than can be memory managed.
const QuantizationInfo qinfo
const cl::Buffer & cl_data() const
Interface to be implemented by the child class to return the pointer to the CL data.
void free() override
Free allocated OpenCL memory.
virtual void unmap(cl::CommandQueue &q)=0
Enqueue an unmap operation of the allocated buffer on the given queue.
CLScheduler * gpu_scheduler()