33 const cl::Buffer CLTensorAllocator::_empty_buffer = cl::Buffer();
45 std::unique_ptr<ICLMemoryRegion> allocate_region(CLCoreRuntimeContext *ctx,
size_t size, cl_uint alignment)
48 std::unique_ptr<ICLMemoryRegion> region = std::make_unique<CLFineSVMMemoryRegion>(ctx,
49 CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER,
54 if(region !=
nullptr && region->ptr() ==
nullptr)
56 region = std::make_unique<CLCoarseSVMMemoryRegion>(ctx, CL_MEM_READ_WRITE, size, alignment);
59 if(region !=
nullptr && region->ptr() ==
nullptr)
61 region = std::make_unique<CLBufferMemoryRegion>(ctx, CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, size);
85 clear_quantization_arrays(scale, offset);
88 const std::vector<float> &qscale = qinfo.scale();
89 const size_t num_elements = qscale.size();
90 const size_t element_size =
sizeof(std::remove_reference<decltype(qscale)>::type::value_type);
92 scale.resize(num_elements);
93 CLScheduler::get().
queue().enqueueWriteBuffer(scale.cl_buffer(), CL_TRUE, 0, num_elements * element_size, qinfo.scale().data());
95 if(!qinfo.offset().empty())
98 const std::vector<int32_t> &qoffset = qinfo.offset();
99 const size_t offset_element_size =
sizeof(std::remove_reference<decltype(qoffset)>::type::value_type);
101 offset.resize(num_elements);
102 CLScheduler::get().
queue().enqueueWriteBuffer(offset.cl_buffer(), CL_TRUE, 0, num_elements * offset_element_size, qinfo.offset().data());
108 : _ctx(ctx), _owner(owner), _associated_memory_group(nullptr), _memory(), _mapping(nullptr), _scale(), _offset()
114 return { &_scale, &_offset };
130 if(_associated_memory_group ==
nullptr)
151 const size_t pad_size = 0;
152 populate_quantization_info(_scale, _offset,
info().quantization_info(), pad_size);
163 clear_quantization_arrays(_scale, _offset);
177 _memory.
set_owned_region(std::make_unique<CLBufferMemoryRegion>(buffer, &legacy_ctx));
191 ARM_COMPUTE_ERROR_ON(_associated_memory_group !=
nullptr && _associated_memory_group != associated_memory_group);
194 _associated_memory_group = associated_memory_group;
197 uint8_t *CLTensorAllocator::lock()
209 void CLTensorAllocator::unlock()
229 _mapping =
reinterpret_cast<uint8_t *
>(_memory.
cl_region()->
map(q, blocking));
__global uchar * offset(const Image *img, int x, int y)
Get the pointer position of a Image.
static CLScheduler & get()
Access the scheduler singleton.
CLQuantization quantization() const
Wrapped quantization info data accessor.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
virtual void * buffer()=0
Returns the pointer to the allocated data.
CLCoreRuntimeContext * core_runtime_context()
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
OpenCL quantization data.
Copyright (c) 2017-2021 Arm Limited.
Status import_memory(cl::Buffer buffer)
Import an existing memory as a tensor's backing memory.
const cl::Buffer & cl_data() const
Returns the underlying CL buffer.
void set_associated_memory_group(IMemoryGroup *associated_memory_group)
Associates the tensor with a memory group.
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
CLArray< cl_float > CLFloatArray
OpenCL Array of floats.
uint8_t * data()
Interface to be implemented by the child class to return the pointer to the mapped data...
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
uint8_t * map(cl::CommandQueue &q, bool blocking)
Enqueue a map operation of the allocated buffer on the given queue.
cl::Context & context()
Accessor for the associated CL context.
virtual void finalize_memory(IMemoryManageable *obj, IMemory &obj_memory, size_t size, size_t alignment)=0
Finalizes memory for a given object.
Core runtime context for OpenCL.
bool is_data_type_quantized_per_channel(DataType dt)
Check if a given data type is of per channel type.
void set_owned_region(std::unique_ptr< IMemoryRegion > region) final
Sets a memory region.
size_t total_size() const override
Returns the total size of the tensor in bytes.
CLTensorAllocator(IMemoryManageable *owner=nullptr, CLRuntimeContext *ctx=nullptr)
Default constructor.
IMemoryRegion * region() final
Region accessor.
size_t alignment() const
Return underlying's tensor buffer alignment.
ICLMemoryRegion * cl_region()
OpenCL Region accessor.
void unmap(cl::CommandQueue &q, uint8_t *mapping)
Enqueue an unmap operation of the allocated buffer on the given queue.
ITensorInfo & set_is_resizable(bool is_resizable) override
Set the flag whether the tensor size can be changed.
cl::CommandQueue & queue()
Accessor for the associated CL command queue.
virtual void * map(cl::CommandQueue &q, bool blocking)=0
Enqueue a map operation of the allocated buffer on the given queue.
TensorInfo & info()
Return a reference to the tensor's metadata.
void set_region(IMemoryRegion *region) final
Sets a memory region.
void allocate() override
Allocate size specified by TensorInfo of OpenCL memory.
CLArray< cl_int > CLInt32Array
OpenCL Array of int32s.
Interface of an object than can be memory managed.
const QuantizationInfo qinfo
const cl::Buffer & cl_data() const
Interface to be implemented by the child class to return the pointer to the CL data.
void free() override
Free allocated OpenCL memory.
virtual void unmap(cl::CommandQueue &q)=0
Enqueue an unmap operation of the allocated buffer on the given queue.
CLScheduler * gpu_scheduler()