ArmNN
 24.08
ClImportTensorHandle.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2022-2024 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #pragma once
7 
10 
11 #include <Half.hpp>
12 
14 
15 #include <arm_compute/runtime/CL/CLTensor.h>
16 #include <arm_compute/runtime/CL/CLSubTensor.h>
17 #include <arm_compute/runtime/IMemoryGroup.h>
18 #include <arm_compute/runtime/MemoryGroup.h>
19 #include <arm_compute/core/TensorShape.h>
20 #include <arm_compute/core/Coordinates.h>
21 
23 
24 #include <CL/cl_ext.h>
25 #include <arm_compute/core/CL/CLKernelLibrary.h>
26 
27 namespace armnn
28 {
29 
31 {
32 public:
33  ClImportTensorHandle(const TensorInfo& tensorInfo, MemorySourceFlags importFlags)
34  : m_ImportFlags(importFlags)
35  {
36  armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo);
37  }
38 
39  ClImportTensorHandle(const TensorInfo& tensorInfo,
40  DataLayout dataLayout,
41  MemorySourceFlags importFlags)
42  : m_ImportFlags(importFlags), m_Imported(false)
43  {
44  armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo, dataLayout);
45  }
46 
47  arm_compute::CLTensor& GetTensor() override { return m_Tensor; }
48  arm_compute::CLTensor const& GetTensor() const override { return m_Tensor; }
49  virtual void Allocate() override {}
50  virtual void Manage() override {}
51 
52  virtual const void* Map(bool blocking = true) const override
53  {
54  IgnoreUnused(blocking);
55  return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
56  }
57 
58  virtual void Unmap() const override {}
59 
60  virtual ITensorHandle* GetParent() const override { return nullptr; }
61 
62  virtual arm_compute::DataType GetDataType() const override
63  {
64  return m_Tensor.info()->data_type();
65  }
66 
67  virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) override
68  {
69  IgnoreUnused(memoryGroup);
70  }
71 
72  TensorShape GetStrides() const override
73  {
74  return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
75  }
76 
77  TensorShape GetShape() const override
78  {
79  return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
80  }
81 
83  {
84  m_ImportFlags = importFlags;
85  }
86 
88  {
89  return m_ImportFlags;
90  }
91 
92  virtual bool Import(void* memory, MemorySource source) override
93  {
94  if (m_ImportFlags & static_cast<MemorySourceFlags>(source))
95  {
96  if (source == MemorySource::Malloc)
97  {
98  const cl_import_properties_arm importProperties[] =
99  {
100  CL_IMPORT_TYPE_ARM,
101  CL_IMPORT_TYPE_HOST_ARM,
102  0
103  };
104  return ClImport(importProperties, memory);
105  }
106  if (source == MemorySource::DmaBuf)
107  {
108  const cl_import_properties_arm importProperties[] =
109  {
110  CL_IMPORT_TYPE_ARM,
111  CL_IMPORT_TYPE_DMA_BUF_ARM,
112  CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM,
113  CL_TRUE,
114  0
115  };
116 
117  return ClImport(importProperties, memory);
118 
119  }
120  if (source == MemorySource::DmaBufProtected)
121  {
122  const cl_import_properties_arm importProperties[] =
123  {
124  CL_IMPORT_TYPE_ARM,
125  CL_IMPORT_TYPE_DMA_BUF_ARM,
126  CL_IMPORT_TYPE_PROTECTED_ARM,
127  CL_TRUE,
128  0
129  };
130 
131  return ClImport(importProperties, memory, true);
132 
133  }
134  // Case for importing memory allocated by OpenCl externally directly into the tensor
135  else if (source == MemorySource::Gralloc)
136  {
137  // m_Tensor not yet Allocated
138  if (!m_Imported && !m_Tensor.buffer())
139  {
140  // Importing memory allocated by OpenCl into the tensor directly.
141  arm_compute::Status status =
142  m_Tensor.allocator()->import_memory(cl::Buffer(static_cast<cl_mem>(memory)));
143  m_Imported = bool(status);
144  if (!m_Imported)
145  {
146  throw MemoryImportException(status.error_description());
147  }
148  return m_Imported;
149  }
150 
151  // m_Tensor.buffer() initially allocated with Allocate().
152  else if (!m_Imported && m_Tensor.buffer())
153  {
154  throw MemoryImportException(
155  "ClImportTensorHandle::Import Attempting to import on an already allocated tensor");
156  }
157 
158  // m_Tensor.buffer() previously imported.
159  else if (m_Imported)
160  {
161  // Importing memory allocated by OpenCl into the tensor directly.
162  arm_compute::Status status =
163  m_Tensor.allocator()->import_memory(cl::Buffer(static_cast<cl_mem>(memory)));
164  m_Imported = bool(status);
165  if (!m_Imported)
166  {
167  throw MemoryImportException(status.error_description());
168  }
169  return m_Imported;
170  }
171  else
172  {
173  throw MemoryImportException("ClImportTensorHandle::Failed to Import Gralloc Memory");
174  }
175  }
176  else
177  {
178  throw MemoryImportException("ClImportTensorHandle::Import flag is not supported");
179  }
180  }
181  else
182  {
183  throw MemoryImportException("ClImportTensorHandle::Incorrect import flag");
184  }
185  }
186 
187  virtual bool CanBeImported(void* /*memory*/, MemorySource source) override
188  {
189  if (m_ImportFlags & static_cast<MemorySourceFlags>(source))
190  {
191  if (source == MemorySource::Malloc)
192  {
193  // Returning true as ClImport() function will decide if memory can be imported or not
194  return true;
195  }
196  }
197  else
198  {
199  throw MemoryImportException("ClImportTensorHandle::Incorrect import flag");
200  }
201  return false;
202  }
203 
204 private:
205  bool ClImport(const cl_import_properties_arm* importProperties, void* memory, bool isProtected = false)
206  {
207  size_t totalBytes = m_Tensor.info()->total_size();
208 
209  // Round the size of the mapping to match the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE
210  // This does not change the size of the buffer, only the size of the mapping the buffer is mapped to
211  auto cachelineAlignment =
212  arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
213  auto roundedSize = totalBytes;
214  if (totalBytes % cachelineAlignment != 0)
215  {
216  roundedSize = cachelineAlignment + totalBytes - (totalBytes % cachelineAlignment);
217  }
218 
219  cl_int error = CL_SUCCESS;
220  cl_mem buffer;
221  if (isProtected)
222  {
223  buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
224  CL_MEM_HOST_NO_ACCESS, importProperties, memory, roundedSize, &error);
225  }
226  else
227  {
228  buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
229  CL_MEM_READ_WRITE, importProperties, memory, roundedSize, &error);
230  }
231 
232  if (error != CL_SUCCESS)
233  {
234  throw MemoryImportException("ClImportTensorHandle::Invalid imported memory: " + std::to_string(error));
235  }
236 
237  cl::Buffer wrappedBuffer(buffer);
238  arm_compute::Status status = m_Tensor.allocator()->import_memory(wrappedBuffer);
239 
240  // Use the overloaded bool operator of Status to check if it is success, if not throw an exception
241  // with the Status error message
242  bool imported = (status.error_code() == arm_compute::ErrorCode::OK);
243  if (!imported)
244  {
245  throw MemoryImportException(status.error_description());
246  }
247  return imported;
248  }
249  // Only used for testing
250  void CopyOutTo(void* memory) const override
251  {
252  const_cast<armnn::ClImportTensorHandle*>(this)->Map(true);
253  switch(this->GetDataType())
254  {
255  case arm_compute::DataType::F32:
256  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
257  static_cast<float*>(memory));
258  break;
259  case arm_compute::DataType::U8:
260  case arm_compute::DataType::QASYMM8:
261  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
262  static_cast<uint8_t*>(memory));
263  break;
264  case arm_compute::DataType::QSYMM8_PER_CHANNEL:
265  case arm_compute::DataType::QASYMM8_SIGNED:
266  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
267  static_cast<int8_t*>(memory));
268  break;
269  case arm_compute::DataType::F16:
270  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
271  static_cast<armnn::Half*>(memory));
272  break;
273  case arm_compute::DataType::S16:
274  case arm_compute::DataType::QSYMM16:
275  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
276  static_cast<int16_t*>(memory));
277  break;
278  case arm_compute::DataType::S32:
279  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
280  static_cast<int32_t*>(memory));
281  break;
282  default:
283  {
285  }
286  }
287  const_cast<armnn::ClImportTensorHandle*>(this)->Unmap();
288  }
289 
290  // Only used for testing
291  void CopyInFrom(const void* memory) override
292  {
293  this->Map(true);
294  switch(this->GetDataType())
295  {
296  case arm_compute::DataType::F32:
297  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory),
298  this->GetTensor());
299  break;
300  case arm_compute::DataType::U8:
301  case arm_compute::DataType::QASYMM8:
302  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory),
303  this->GetTensor());
304  break;
305  case arm_compute::DataType::F16:
306  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::Half*>(memory),
307  this->GetTensor());
308  break;
309  case arm_compute::DataType::S16:
310  case arm_compute::DataType::QSYMM8_PER_CHANNEL:
311  case arm_compute::DataType::QASYMM8_SIGNED:
312  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory),
313  this->GetTensor());
314  break;
315  case arm_compute::DataType::QSYMM16:
316  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory),
317  this->GetTensor());
318  break;
319  case arm_compute::DataType::S32:
320  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory),
321  this->GetTensor());
322  break;
323  default:
324  {
326  }
327  }
328  this->Unmap();
329  }
330 
331  arm_compute::CLTensor m_Tensor;
332  MemorySourceFlags m_ImportFlags;
333  bool m_Imported;
334 };
335 
337 {
338 public:
340  const arm_compute::TensorShape& shape,
341  const arm_compute::Coordinates& coords)
342  : m_Tensor(&parent->GetTensor(), shape, coords)
343  {
344  parentHandle = parent;
345  }
346 
347  arm_compute::CLSubTensor& GetTensor() override { return m_Tensor; }
348  arm_compute::CLSubTensor const& GetTensor() const override { return m_Tensor; }
349 
350  virtual void Allocate() override {}
351  virtual void Manage() override {}
352 
353  virtual const void* Map(bool blocking = true) const override
354  {
355  IgnoreUnused(blocking);
356  return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
357  }
358  virtual void Unmap() const override {}
359 
360  virtual ITensorHandle* GetParent() const override { return parentHandle; }
361 
362  virtual arm_compute::DataType GetDataType() const override
363  {
364  return m_Tensor.info()->data_type();
365  }
366 
367  virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) override
368  {
369  IgnoreUnused(memoryGroup);
370  }
371 
372  TensorShape GetStrides() const override
373  {
374  return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
375  }
376 
377  TensorShape GetShape() const override
378  {
379  return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
380  }
381 
382 private:
383  // Only used for testing
384  void CopyOutTo(void* memory) const override
385  {
386  const_cast<ClImportSubTensorHandle*>(this)->Map(true);
387  switch(this->GetDataType())
388  {
389  case arm_compute::DataType::F32:
390  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
391  static_cast<float*>(memory));
392  break;
393  case arm_compute::DataType::U8:
394  case arm_compute::DataType::QASYMM8:
395  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
396  static_cast<uint8_t*>(memory));
397  break;
398  case arm_compute::DataType::F16:
399  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
400  static_cast<armnn::Half*>(memory));
401  break;
402  case arm_compute::DataType::QSYMM8_PER_CHANNEL:
403  case arm_compute::DataType::QASYMM8_SIGNED:
404  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
405  static_cast<int8_t*>(memory));
406  break;
407  case arm_compute::DataType::S16:
408  case arm_compute::DataType::QSYMM16:
409  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
410  static_cast<int16_t*>(memory));
411  break;
412  case arm_compute::DataType::S32:
413  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
414  static_cast<int32_t*>(memory));
415  break;
416  default:
417  {
419  }
420  }
421  const_cast<ClImportSubTensorHandle*>(this)->Unmap();
422  }
423 
424  // Only used for testing
425  void CopyInFrom(const void* memory) override
426  {
427  this->Map(true);
428  switch(this->GetDataType())
429  {
430  case arm_compute::DataType::F32:
431  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory),
432  this->GetTensor());
433  break;
434  case arm_compute::DataType::U8:
435  case arm_compute::DataType::QASYMM8:
436  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory),
437  this->GetTensor());
438  break;
439  case arm_compute::DataType::F16:
440  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::Half*>(memory),
441  this->GetTensor());
442  break;
443  case arm_compute::DataType::QSYMM8_PER_CHANNEL:
444  case arm_compute::DataType::QASYMM8_SIGNED:
445  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory),
446  this->GetTensor());
447  break;
448  case arm_compute::DataType::S16:
449  case arm_compute::DataType::QSYMM16:
450  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory),
451  this->GetTensor());
452  break;
453  case arm_compute::DataType::S32:
454  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory),
455  this->GetTensor());
456  break;
457  default:
458  {
460  }
461  }
462  this->Unmap();
463  }
464 
465  mutable arm_compute::CLSubTensor m_Tensor;
466  ITensorHandle* parentHandle = nullptr;
467 };
468 
469 } // namespace armnn
armnn::ClImportTensorHandle::Import
virtual bool Import(void *memory, MemorySource source) override
Import externally allocated memory.
Definition: ClImportTensorHandle.hpp:92
armnn::MemorySource::Malloc
@ Malloc
armnn::ClImportTensorHandle::GetStrides
TensorShape GetStrides() const override
Get the strides for each dimension ordered from largest to smallest where the smallest value is the s...
Definition: ClImportTensorHandle.hpp:72
armnn::ClImportSubTensorHandle
Definition: ClImportTensorHandle.hpp:336
armnn::MemorySource::Gralloc
@ Gralloc
armnn::DataLayout
DataLayout
Definition: Types.hpp:62
armnn::TensorInfo
Definition: Tensor.hpp:152
armnn::MemorySource::DmaBufProtected
@ DmaBufProtected
armnn::MemorySourceFlags
unsigned int MemorySourceFlags
Definition: MemorySources.hpp:15
armnn::ITensorHandle
Definition: ITensorHandle.hpp:16
armnn::ClImportTensorHandle::Manage
virtual void Manage() override
Indicate to the memory manager that this resource is active.
Definition: ClImportTensorHandle.hpp:50
armnn::Half
half_float::half Half
Definition: Half.hpp:22
armnn::ClImportTensorHandle::SetImportFlags
void SetImportFlags(MemorySourceFlags importFlags)
Definition: ClImportTensorHandle.hpp:82
armnn::ClImportSubTensorHandle::Allocate
virtual void Allocate() override
Indicate to the memory manager that this resource is no longer active.
Definition: ClImportTensorHandle.hpp:350
armnn::Coordinates
std::array< unsigned int, MaxNumOfTensorDimensions > Coordinates
Definition: InternalTypes.hpp:15
armnn::ClImportTensorHandle::GetTensor
arm_compute::CLTensor & GetTensor() override
Definition: ClImportTensorHandle.hpp:47
armnn::ClImportTensorHandle::GetTensor
arm_compute::CLTensor const & GetTensor() const override
Definition: ClImportTensorHandle.hpp:48
armnn::ClImportSubTensorHandle::Map
virtual const void * Map(bool blocking=true) const override
Map the tensor data for access.
Definition: ClImportTensorHandle.hpp:353
armnn::TensorShape
Definition: Tensor.hpp:20
armnn::ClImportSubTensorHandle::GetTensor
arm_compute::CLSubTensor const & GetTensor() const override
Definition: ClImportTensorHandle.hpp:348
armnn::MemorySource::DmaBuf
@ DmaBuf
PolymorphicDowncast.hpp
armnn::DataType
DataType
Definition: Types.hpp:48
armnn::ClImportTensorHandle::ClImportTensorHandle
ClImportTensorHandle(const TensorInfo &tensorInfo, DataLayout dataLayout, MemorySourceFlags importFlags)
Definition: ClImportTensorHandle.hpp:39
armnn::ClImportSubTensorHandle::GetStrides
TensorShape GetStrides() const override
Get the strides for each dimension ordered from largest to smallest where the smallest value is the s...
Definition: ClImportTensorHandle.hpp:372
armnn::ClImportTensorHandle
Definition: ClImportTensorHandle.hpp:30
ArmComputeTensorHandle.hpp
armnn::ClImportSubTensorHandle::GetShape
TensorShape GetShape() const override
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
Definition: ClImportTensorHandle.hpp:377
armnn::ClImportSubTensorHandle::GetTensor
arm_compute::CLSubTensor & GetTensor() override
Definition: ClImportTensorHandle.hpp:347
armnn::ClImportSubTensorHandle::Unmap
virtual void Unmap() const override
Unmap the tensor data.
Definition: ClImportTensorHandle.hpp:358
Half.hpp
armnn::ClImportTensorHandle::Map
virtual const void * Map(bool blocking=true) const override
Map the tensor data for access.
Definition: ClImportTensorHandle.hpp:52
armnn::IClTensorHandle
Definition: IClTensorHandle.hpp:13
armnn::ClImportTensorHandle::Unmap
virtual void Unmap() const override
Unmap the tensor data.
Definition: ClImportTensorHandle.hpp:58
armnn::Status
Status
Definition: Types.hpp:42
armnn::ClImportTensorHandle::GetDataType
virtual arm_compute::DataType GetDataType() const override
Definition: ClImportTensorHandle.hpp:62
armnn::MemoryImportException
Definition: Exceptions.hpp:125
armnn::IgnoreUnused
void IgnoreUnused(Ts &&...)
Definition: IgnoreUnused.hpp:14
armnn::ClImportTensorHandle::ClImportTensorHandle
ClImportTensorHandle(const TensorInfo &tensorInfo, MemorySourceFlags importFlags)
Definition: ClImportTensorHandle.hpp:33
armnn::ClImportSubTensorHandle::GetParent
virtual ITensorHandle * GetParent() const override
Get the parent tensor if this is a subtensor.
Definition: ClImportTensorHandle.hpp:360
armnn::ClImportTensorHandle::GetParent
virtual ITensorHandle * GetParent() const override
Get the parent tensor if this is a subtensor.
Definition: ClImportTensorHandle.hpp:60
armnn::MemorySource
MemorySource
Define the Memory Source to reduce copies.
Definition: Types.hpp:244
armnn
Copyright (c) 2021 ARM Limited and Contributors.
Definition: 01_00_quick_start.dox:6
ArmComputeTensorUtils.hpp
armnn::ClImportTensorHandle::Allocate
virtual void Allocate() override
Indicate to the memory manager that this resource is no longer active.
Definition: ClImportTensorHandle.hpp:49
armnn::UnimplementedException
Definition: Exceptions.hpp:98
IClTensorHandle.hpp
armnn::ClImportSubTensorHandle::SetMemoryGroup
virtual void SetMemoryGroup(const std::shared_ptr< arm_compute::IMemoryGroup > &memoryGroup) override
Definition: ClImportTensorHandle.hpp:367
armnn::ClImportSubTensorHandle::ClImportSubTensorHandle
ClImportSubTensorHandle(IClTensorHandle *parent, const arm_compute::TensorShape &shape, const arm_compute::Coordinates &coords)
Definition: ClImportTensorHandle.hpp:339
armnn::ClImportTensorHandle::GetImportFlags
MemorySourceFlags GetImportFlags() const override
Get flags describing supported import sources.
Definition: ClImportTensorHandle.hpp:87
armnn::ClImportTensorHandle::SetMemoryGroup
virtual void SetMemoryGroup(const std::shared_ptr< arm_compute::IMemoryGroup > &memoryGroup) override
Definition: ClImportTensorHandle.hpp:67
armnn::ClImportTensorHandle::GetShape
TensorShape GetShape() const override
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
Definition: ClImportTensorHandle.hpp:77
armnn::ClImportSubTensorHandle::GetDataType
virtual arm_compute::DataType GetDataType() const override
Definition: ClImportTensorHandle.hpp:362
armnn::ClImportSubTensorHandle::Manage
virtual void Manage() override
Indicate to the memory manager that this resource is active.
Definition: ClImportTensorHandle.hpp:351
armnn::ClImportTensorHandle::CanBeImported
virtual bool CanBeImported(void *, MemorySource source) override
Implementations must determine if this memory block can be imported.
Definition: ClImportTensorHandle.hpp:187