15 #include <arm_compute/runtime/CL/CLTensor.h>
16 #include <arm_compute/runtime/CL/CLSubTensor.h>
17 #include <arm_compute/runtime/IMemoryGroup.h>
18 #include <arm_compute/runtime/MemoryGroup.h>
19 #include <arm_compute/core/TensorShape.h>
20 #include <arm_compute/core/Coordinates.h>
24 #include <CL/cl_ext.h>
25 #include <arm_compute/core/CL/CLKernelLibrary.h>
34 : m_ImportFlags(importFlags)
36 armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo);
42 : m_ImportFlags(importFlags), m_Imported(false)
44 armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo, dataLayout);
47 arm_compute::CLTensor&
GetTensor()
override {
return m_Tensor; }
48 arm_compute::CLTensor
const&
GetTensor()
const override {
return m_Tensor; }
52 virtual const void*
Map(
bool blocking =
true)
const override
55 return static_cast<const void*
>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
58 virtual void Unmap()
const override {}
64 return m_Tensor.info()->data_type();
67 virtual void SetMemoryGroup(
const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup)
override
74 return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
79 return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
84 m_ImportFlags = importFlags;
98 const cl_import_properties_arm importProperties[] =
101 CL_IMPORT_TYPE_HOST_ARM,
104 return ClImport(importProperties, memory);
108 const cl_import_properties_arm importProperties[] =
111 CL_IMPORT_TYPE_DMA_BUF_ARM,
112 CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM,
117 return ClImport(importProperties, memory);
122 const cl_import_properties_arm importProperties[] =
125 CL_IMPORT_TYPE_DMA_BUF_ARM,
126 CL_IMPORT_TYPE_PROTECTED_ARM,
131 return ClImport(importProperties, memory,
true);
138 if (!m_Imported && !m_Tensor.buffer())
142 m_Tensor.allocator()->import_memory(cl::Buffer(
static_cast<cl_mem
>(memory)));
143 m_Imported = bool(status);
152 else if (!m_Imported && m_Tensor.buffer())
155 "ClImportTensorHandle::Import Attempting to import on an already allocated tensor");
163 m_Tensor.allocator()->import_memory(cl::Buffer(
static_cast<cl_mem
>(memory)));
164 m_Imported = bool(status);
205 bool ClImport(
const cl_import_properties_arm* importProperties,
void* memory,
bool isProtected =
false)
207 size_t totalBytes = m_Tensor.info()->total_size();
211 auto cachelineAlignment =
212 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
213 auto roundedSize = totalBytes;
214 if (totalBytes % cachelineAlignment != 0)
216 roundedSize = cachelineAlignment + totalBytes - (totalBytes % cachelineAlignment);
219 cl_int error = CL_SUCCESS;
223 buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
224 CL_MEM_HOST_NO_ACCESS, importProperties, memory, roundedSize, &error);
228 buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
229 CL_MEM_READ_WRITE, importProperties, memory, roundedSize, &error);
232 if (error != CL_SUCCESS)
234 throw MemoryImportException(
"ClImportTensorHandle::Invalid imported memory: " + std::to_string(error));
237 cl::Buffer wrappedBuffer(buffer);
242 bool imported = (status.error_code() == arm_compute::ErrorCode::OK);
245 throw MemoryImportException(status.error_description());
250 void CopyOutTo(
void* memory)
const override
255 case arm_compute::DataType::F32:
256 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
257 static_cast<float*
>(memory));
259 case arm_compute::DataType::U8:
260 case arm_compute::DataType::QASYMM8:
261 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
262 static_cast<uint8_t*
>(memory));
264 case arm_compute::DataType::QSYMM8_PER_CHANNEL:
265 case arm_compute::DataType::QASYMM8_SIGNED:
266 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
267 static_cast<int8_t*
>(memory));
269 case arm_compute::DataType::F16:
270 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
273 case arm_compute::DataType::S16:
274 case arm_compute::DataType::QSYMM16:
275 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
276 static_cast<int16_t*
>(memory));
278 case arm_compute::DataType::S32:
279 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
280 static_cast<int32_t*
>(memory));
291 void CopyInFrom(
const void* memory)
override
296 case arm_compute::DataType::F32:
297 armcomputetensorutils::CopyArmComputeITensorData(
static_cast<const float*
>(memory),
300 case arm_compute::DataType::U8:
301 case arm_compute::DataType::QASYMM8:
302 armcomputetensorutils::CopyArmComputeITensorData(
static_cast<const uint8_t*
>(memory),
305 case arm_compute::DataType::F16:
306 armcomputetensorutils::CopyArmComputeITensorData(
static_cast<const armnn::Half*
>(memory),
309 case arm_compute::DataType::S16:
310 case arm_compute::DataType::QSYMM8_PER_CHANNEL:
311 case arm_compute::DataType::QASYMM8_SIGNED:
312 armcomputetensorutils::CopyArmComputeITensorData(
static_cast<const int8_t*
>(memory),
315 case arm_compute::DataType::QSYMM16:
316 armcomputetensorutils::CopyArmComputeITensorData(
static_cast<const int16_t*
>(memory),
319 case arm_compute::DataType::S32:
320 armcomputetensorutils::CopyArmComputeITensorData(
static_cast<const int32_t*
>(memory),
331 arm_compute::CLTensor m_Tensor;
340 const arm_compute::TensorShape& shape,
342 : m_Tensor(&parent->
GetTensor(), shape, coords)
344 parentHandle = parent;
347 arm_compute::CLSubTensor&
GetTensor()
override {
return m_Tensor; }
348 arm_compute::CLSubTensor
const&
GetTensor()
const override {
return m_Tensor; }
353 virtual const void*
Map(
bool blocking =
true)
const override
356 return static_cast<const void*
>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
358 virtual void Unmap()
const override {}
364 return m_Tensor.info()->data_type();
367 virtual void SetMemoryGroup(
const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup)
override
374 return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
379 return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
384 void CopyOutTo(
void* memory)
const override
389 case arm_compute::DataType::F32:
390 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
391 static_cast<float*
>(memory));
393 case arm_compute::DataType::U8:
394 case arm_compute::DataType::QASYMM8:
395 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
396 static_cast<uint8_t*
>(memory));
398 case arm_compute::DataType::F16:
399 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
402 case arm_compute::DataType::QSYMM8_PER_CHANNEL:
403 case arm_compute::DataType::QASYMM8_SIGNED:
404 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
405 static_cast<int8_t*
>(memory));
407 case arm_compute::DataType::S16:
408 case arm_compute::DataType::QSYMM16:
409 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
410 static_cast<int16_t*
>(memory));
412 case arm_compute::DataType::S32:
413 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
414 static_cast<int32_t*
>(memory));
425 void CopyInFrom(
const void* memory)
override
430 case arm_compute::DataType::F32:
431 armcomputetensorutils::CopyArmComputeITensorData(
static_cast<const float*
>(memory),
434 case arm_compute::DataType::U8:
435 case arm_compute::DataType::QASYMM8:
436 armcomputetensorutils::CopyArmComputeITensorData(
static_cast<const uint8_t*
>(memory),
439 case arm_compute::DataType::F16:
440 armcomputetensorutils::CopyArmComputeITensorData(
static_cast<const armnn::Half*
>(memory),
443 case arm_compute::DataType::QSYMM8_PER_CHANNEL:
444 case arm_compute::DataType::QASYMM8_SIGNED:
445 armcomputetensorutils::CopyArmComputeITensorData(
static_cast<const int8_t*
>(memory),
448 case arm_compute::DataType::S16:
449 case arm_compute::DataType::QSYMM16:
450 armcomputetensorutils::CopyArmComputeITensorData(
static_cast<const int16_t*
>(memory),
453 case arm_compute::DataType::S32:
454 armcomputetensorutils::CopyArmComputeITensorData(
static_cast<const int32_t*
>(memory),
465 mutable arm_compute::CLSubTensor m_Tensor;
466 ITensorHandle* parentHandle =
nullptr;
virtual void Manage() override
Indicate to the memory manager that this resource is active.
virtual void Unmap() const override
Unmap the tensor data.
TensorShape GetShape() const override
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
TensorShape GetStrides() const override
Get the strides for each dimension ordered from largest to smallest where the smallest value is the s...
arm_compute::CLSubTensor const & GetTensor() const override
virtual ITensorHandle * GetParent() const override
Get the parent tensor if this is a subtensor.
virtual const void * Map(bool blocking=true) const override
Map the tensor data for access.
virtual void Allocate() override
Indicate to the memory manager that this resource is no longer active.
arm_compute::CLSubTensor & GetTensor() override
virtual arm_compute::DataType GetDataType() const override
ClImportSubTensorHandle(IClTensorHandle *parent, const arm_compute::TensorShape &shape, const arm_compute::Coordinates &coords)
virtual void SetMemoryGroup(const std::shared_ptr< arm_compute::IMemoryGroup > &memoryGroup) override
virtual void Manage() override
Indicate to the memory manager that this resource is active.
ClImportTensorHandle(const TensorInfo &tensorInfo, MemorySourceFlags importFlags)
ClImportTensorHandle(const TensorInfo &tensorInfo, DataLayout dataLayout, MemorySourceFlags importFlags)
virtual bool Import(void *memory, MemorySource source) override
Import externally allocated memory.
virtual void Unmap() const override
Unmap the tensor data.
TensorShape GetShape() const override
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
TensorShape GetStrides() const override
Get the strides for each dimension ordered from largest to smallest where the smallest value is the s...
MemorySourceFlags GetImportFlags() const override
Get flags describing supported import sources.
arm_compute::CLTensor & GetTensor() override
virtual ITensorHandle * GetParent() const override
Get the parent tensor if this is a subtensor.
arm_compute::CLTensor const & GetTensor() const override
virtual const void * Map(bool blocking=true) const override
Map the tensor data for access.
virtual void Allocate() override
Indicate to the memory manager that this resource is no longer active.
void SetImportFlags(MemorySourceFlags importFlags)
virtual arm_compute::DataType GetDataType() const override
virtual bool CanBeImported(void *, MemorySource source) override
Implementations must determine if this memory block can be imported.
virtual void SetMemoryGroup(const std::shared_ptr< arm_compute::IMemoryGroup > &memoryGroup) override
Copyright (c) 2021 ARM Limited and Contributors.
MemorySource
Define the Memory Source to reduce copies.
void IgnoreUnused(Ts &&...)
unsigned int MemorySourceFlags
std::array< unsigned int, MaxNumOfTensorDimensions > Coordinates