15 #include <arm_compute/runtime/CL/CLTensor.h>
16 #include <arm_compute/runtime/CL/CLSubTensor.h>
17 #include <arm_compute/runtime/IMemoryGroup.h>
18 #include <arm_compute/runtime/MemoryGroup.h>
19 #include <arm_compute/core/TensorShape.h>
20 #include <arm_compute/core/Coordinates.h>
24 #include <CL/cl_ext.h>
25 #include <arm_compute/core/CL/CLKernelLibrary.h>
34 : m_ImportFlags(importFlags)
36 armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo);
42 : m_ImportFlags(importFlags), m_Imported(false)
44 armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo, dataLayout);
47 arm_compute::CLTensor&
GetTensor()
override {
return m_Tensor; }
48 arm_compute::CLTensor
const&
GetTensor()
const override {
return m_Tensor; }
52 virtual const void*
Map(
bool blocking =
true)
const override
55 return static_cast<const void*
>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
58 virtual void Unmap()
const override {}
64 return m_Tensor.info()->data_type();
67 virtual void SetMemoryGroup(
const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup)
override
74 return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
79 return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
84 m_ImportFlags = importFlags;
98 const cl_import_properties_arm importProperties[] =
101 CL_IMPORT_TYPE_HOST_ARM,
104 return ClImport(importProperties, memory);
108 const cl_import_properties_arm importProperties[] =
111 CL_IMPORT_TYPE_DMA_BUF_ARM,
112 CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM,
117 return ClImport(importProperties, memory);
122 const cl_import_properties_arm importProperties[] =
125 CL_IMPORT_TYPE_DMA_BUF_ARM,
126 CL_IMPORT_TYPE_PROTECTED_ARM,
131 return ClImport(importProperties, memory,
true);
138 if (!m_Imported && !m_Tensor.buffer())
142 m_Tensor.allocator()->import_memory(cl::Buffer(
static_cast<cl_mem
>(memory)));
143 m_Imported = bool(status);
152 else if (!m_Imported && m_Tensor.buffer())
155 "ClImportTensorHandle::Import Attempting to import on an already allocated tensor");
163 m_Tensor.allocator()->import_memory(cl::Buffer(
static_cast<cl_mem
>(memory)));
164 m_Imported = bool(status);
205 bool ClImport(
const cl_import_properties_arm* importProperties,
void* memory,
bool isProtected =
false)
207 size_t totalBytes = m_Tensor.info()->total_size();
211 auto cachelineAlignment =
212 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
213 auto roundedSize = totalBytes;
214 if (totalBytes % cachelineAlignment != 0)
216 roundedSize = cachelineAlignment + totalBytes - (totalBytes % cachelineAlignment);
219 cl_int error = CL_SUCCESS;
223 buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
224 CL_MEM_HOST_NO_ACCESS, importProperties, memory, roundedSize, &error);
228 buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
229 CL_MEM_READ_WRITE, importProperties, memory, roundedSize, &error);
232 if (error != CL_SUCCESS)
234 throw MemoryImportException(
"ClImportTensorHandle::Invalid imported memory: " + std::to_string(error));
237 cl::Buffer wrappedBuffer(buffer);
242 bool imported = (status.error_code() == arm_compute::ErrorCode::OK);
245 throw MemoryImportException(status.error_description());
252 void CopyOutTo(
void* memory)
const override
257 case arm_compute::DataType::F32:
258 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
259 static_cast<float*
>(memory));
261 case arm_compute::DataType::U8:
262 case arm_compute::DataType::QASYMM8:
263 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
264 static_cast<uint8_t*
>(memory));
266 case arm_compute::DataType::QSYMM8_PER_CHANNEL:
267 case arm_compute::DataType::QASYMM8_SIGNED:
268 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
269 static_cast<int8_t*
>(memory));
271 case arm_compute::DataType::F16:
272 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
275 case arm_compute::DataType::S16:
276 case arm_compute::DataType::QSYMM16:
277 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
278 static_cast<int16_t*
>(memory));
280 case arm_compute::DataType::S32:
281 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
282 static_cast<int32_t*
>(memory));
293 void CopyInFrom(
const void* memory)
override
298 case arm_compute::DataType::F32:
299 armcomputetensorutils::CopyArmComputeITensorData(
static_cast<const float*
>(memory),
302 case arm_compute::DataType::U8:
303 case arm_compute::DataType::QASYMM8:
304 armcomputetensorutils::CopyArmComputeITensorData(
static_cast<const uint8_t*
>(memory),
307 case arm_compute::DataType::F16:
308 armcomputetensorutils::CopyArmComputeITensorData(
static_cast<const armnn::Half*
>(memory),
311 case arm_compute::DataType::S16:
312 case arm_compute::DataType::QSYMM8_PER_CHANNEL:
313 case arm_compute::DataType::QASYMM8_SIGNED:
314 armcomputetensorutils::CopyArmComputeITensorData(
static_cast<const int8_t*
>(memory),
317 case arm_compute::DataType::QSYMM16:
318 armcomputetensorutils::CopyArmComputeITensorData(
static_cast<const int16_t*
>(memory),
321 case arm_compute::DataType::S32:
322 armcomputetensorutils::CopyArmComputeITensorData(
static_cast<const int32_t*
>(memory),
333 arm_compute::CLTensor m_Tensor;
342 const arm_compute::TensorShape& shape,
344 : m_Tensor(&parent->
GetTensor(), shape, coords)
346 parentHandle = parent;
349 arm_compute::CLSubTensor&
GetTensor()
override {
return m_Tensor; }
350 arm_compute::CLSubTensor
const&
GetTensor()
const override {
return m_Tensor; }
355 virtual const void*
Map(
bool blocking =
true)
const override
358 return static_cast<const void*
>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
360 virtual void Unmap()
const override {}
366 return m_Tensor.info()->data_type();
369 virtual void SetMemoryGroup(
const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup)
override
376 return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
381 return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
386 void CopyOutTo(
void* memory)
const override
391 case arm_compute::DataType::F32:
392 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
393 static_cast<float*
>(memory));
395 case arm_compute::DataType::U8:
396 case arm_compute::DataType::QASYMM8:
397 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
398 static_cast<uint8_t*
>(memory));
400 case arm_compute::DataType::F16:
401 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
404 case arm_compute::DataType::QSYMM8_PER_CHANNEL:
405 case arm_compute::DataType::QASYMM8_SIGNED:
406 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
407 static_cast<int8_t*
>(memory));
409 case arm_compute::DataType::S16:
410 case arm_compute::DataType::QSYMM16:
411 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
412 static_cast<int16_t*
>(memory));
414 case arm_compute::DataType::S32:
415 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
416 static_cast<int32_t*
>(memory));
427 void CopyInFrom(
const void* memory)
override
432 case arm_compute::DataType::F32:
433 armcomputetensorutils::CopyArmComputeITensorData(
static_cast<const float*
>(memory),
436 case arm_compute::DataType::U8:
437 case arm_compute::DataType::QASYMM8:
438 armcomputetensorutils::CopyArmComputeITensorData(
static_cast<const uint8_t*
>(memory),
441 case arm_compute::DataType::F16:
442 armcomputetensorutils::CopyArmComputeITensorData(
static_cast<const armnn::Half*
>(memory),
445 case arm_compute::DataType::QSYMM8_PER_CHANNEL:
446 case arm_compute::DataType::QASYMM8_SIGNED:
447 armcomputetensorutils::CopyArmComputeITensorData(
static_cast<const int8_t*
>(memory),
450 case arm_compute::DataType::S16:
451 case arm_compute::DataType::QSYMM16:
452 armcomputetensorutils::CopyArmComputeITensorData(
static_cast<const int16_t*
>(memory),
455 case arm_compute::DataType::S32:
456 armcomputetensorutils::CopyArmComputeITensorData(
static_cast<const int32_t*
>(memory),
467 mutable arm_compute::CLSubTensor m_Tensor;
468 ITensorHandle* parentHandle =
nullptr;