15 #include <arm_compute/runtime/CL/CLTensor.h>
16 #include <arm_compute/runtime/CL/CLSubTensor.h>
17 #include <arm_compute/runtime/IMemoryGroup.h>
18 #include <arm_compute/runtime/MemoryGroup.h>
19 #include <arm_compute/core/TensorShape.h>
20 #include <arm_compute/core/Coordinates.h>
24 #include <CL/cl_ext.h>
25 #include <arm_compute/core/CL/CLKernelLibrary.h>
34 : m_ImportFlags(importFlags)
36 armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo);
42 : m_ImportFlags(importFlags), m_Imported(false)
44 armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo, dataLayout);
47 arm_compute::CLTensor&
GetTensor()
override {
return m_Tensor; }
48 arm_compute::CLTensor
const&
GetTensor()
const override {
return m_Tensor; }
52 virtual const void*
Map(
bool blocking =
true)
const override
55 return static_cast<const void*
>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
58 virtual void Unmap()
const override {}
64 return m_Tensor.info()->data_type();
67 virtual void SetMemoryGroup(
const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup)
override
74 return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
79 return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
84 m_ImportFlags = importFlags;
98 const cl_import_properties_arm importProperties[] =
101 CL_IMPORT_TYPE_HOST_ARM,
104 return ClImport(importProperties, memory);
108 const cl_import_properties_arm importProperties[] =
111 CL_IMPORT_TYPE_DMA_BUF_ARM,
112 CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM,
117 return ClImport(importProperties, memory);
122 const cl_import_properties_arm importProperties[] =
125 CL_IMPORT_TYPE_DMA_BUF_ARM,
126 CL_IMPORT_TYPE_PROTECTED_ARM,
131 return ClImport(importProperties, memory,
true);
138 if (!m_Imported && !m_Tensor.buffer())
142 m_Tensor.allocator()->import_memory(cl::Buffer(
static_cast<cl_mem
>(memory)));
143 m_Imported = bool(status);
152 else if (!m_Imported && m_Tensor.buffer())
155 "ClImportTensorHandle::Import Attempting to import on an already allocated tensor");
163 m_Tensor.allocator()->import_memory(cl::Buffer(
static_cast<cl_mem
>(memory)));
164 m_Imported = bool(status);
205 bool ClImport(
const cl_import_properties_arm* importProperties,
void* memory,
bool isProtected =
false)
207 size_t totalBytes = m_Tensor.info()->total_size();
211 auto cachelineAlignment =
212 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
213 auto roundedSize = totalBytes;
214 if (totalBytes % cachelineAlignment != 0)
216 roundedSize = cachelineAlignment + totalBytes - (totalBytes % cachelineAlignment);
219 cl_int error = CL_SUCCESS;
223 buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
224 CL_MEM_HOST_NO_ACCESS, importProperties, memory, roundedSize, &error);
228 buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
229 CL_MEM_READ_WRITE, importProperties, memory, roundedSize, &error);
232 if (error != CL_SUCCESS)
234 throw MemoryImportException(
"ClImportTensorHandle::Invalid imported memory: " + std::to_string(error));
237 cl::Buffer wrappedBuffer(buffer);
242 bool imported = (status.error_code() == arm_compute::ErrorCode::OK);
245 throw MemoryImportException(status.error_description());
250 void CopyOutTo(
void* memory)
const override
255 case arm_compute::DataType::F32:
256 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
257 static_cast<float*
>(memory));
259 case arm_compute::DataType::U8:
260 case arm_compute::DataType::QASYMM8:
261 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
262 static_cast<uint8_t*
>(memory));
264 case arm_compute::DataType::QSYMM8_PER_CHANNEL:
265 case arm_compute::DataType::QASYMM8_SIGNED:
266 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
267 static_cast<int8_t*
>(memory));
269 case arm_compute::DataType::F16:
270 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
273 case arm_compute::DataType::S16:
274 case arm_compute::DataType::QSYMM16:
275 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
276 static_cast<int16_t*
>(memory));
278 case arm_compute::DataType::S32:
279 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
280 static_cast<int32_t*
>(memory));
291 void CopyInFrom(
const void* memory)
override
296 case arm_compute::DataType::F32:
297 armcomputetensorutils::CopyArmComputeITensorData(
static_cast<const float*
>(memory),
300 case arm_compute::DataType::U8:
301 case arm_compute::DataType::QASYMM8:
302 armcomputetensorutils::CopyArmComputeITensorData(
static_cast<const uint8_t*
>(memory),
305 case arm_compute::DataType::F16:
306 armcomputetensorutils::CopyArmComputeITensorData(
static_cast<const armnn::Half*
>(memory),
309 case arm_compute::DataType::S16:
310 case arm_compute::DataType::QSYMM8_PER_CHANNEL:
311 case arm_compute::DataType::QASYMM8_SIGNED:
312 armcomputetensorutils::CopyArmComputeITensorData(
static_cast<const int8_t*
>(memory),
315 case arm_compute::DataType::QSYMM16:
316 armcomputetensorutils::CopyArmComputeITensorData(
static_cast<const int16_t*
>(memory),
319 case arm_compute::DataType::S32:
320 armcomputetensorutils::CopyArmComputeITensorData(
static_cast<const int32_t*
>(memory),
331 arm_compute::CLTensor m_Tensor;
340 const arm_compute::TensorShape& shape,
342 : m_Tensor(&parent->
GetTensor(), shape, coords)
344 parentHandle = parent;
347 arm_compute::CLSubTensor&
GetTensor()
override {
return m_Tensor; }
348 arm_compute::CLSubTensor
const&
GetTensor()
const override {
return m_Tensor; }
353 virtual const void*
Map(
bool blocking =
true)
const override
356 return static_cast<const void*
>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
358 virtual void Unmap()
const override {}
364 return m_Tensor.info()->data_type();
367 virtual void SetMemoryGroup(
const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup)
override
374 return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
379 return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
384 void CopyOutTo(
void* memory)
const override
389 case arm_compute::DataType::F32:
390 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
391 static_cast<float*
>(memory));
393 case arm_compute::DataType::U8:
394 case arm_compute::DataType::QASYMM8:
395 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
396 static_cast<uint8_t*
>(memory));
398 case arm_compute::DataType::F16:
399 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
402 case arm_compute::DataType::QSYMM8_PER_CHANNEL:
403 case arm_compute::DataType::QASYMM8_SIGNED:
404 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
405 static_cast<int8_t*
>(memory));
407 case arm_compute::DataType::S16:
408 case arm_compute::DataType::QSYMM16:
409 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
410 static_cast<int16_t*
>(memory));
412 case arm_compute::DataType::S32:
413 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
414 static_cast<int32_t*
>(memory));
425 void CopyInFrom(
const void* memory)
override
430 case arm_compute::DataType::F32:
431 armcomputetensorutils::CopyArmComputeITensorData(
static_cast<const float*
>(memory),
434 case arm_compute::DataType::U8:
435 case arm_compute::DataType::QASYMM8:
436 armcomputetensorutils::CopyArmComputeITensorData(
static_cast<const uint8_t*
>(memory),
439 case arm_compute::DataType::F16:
440 armcomputetensorutils::CopyArmComputeITensorData(
static_cast<const armnn::Half*
>(memory),
443 case arm_compute::DataType::QSYMM8_PER_CHANNEL:
444 case arm_compute::DataType::QASYMM8_SIGNED:
445 armcomputetensorutils::CopyArmComputeITensorData(
static_cast<const int8_t*
>(memory),
448 case arm_compute::DataType::S16:
449 case arm_compute::DataType::QSYMM16:
450 armcomputetensorutils::CopyArmComputeITensorData(
static_cast<const int16_t*
>(memory),
453 case arm_compute::DataType::S32:
454 armcomputetensorutils::CopyArmComputeITensorData(
static_cast<const int32_t*
>(memory),
465 mutable arm_compute::CLSubTensor m_Tensor;
466 ITensorHandle* parentHandle =
nullptr;