ArmNN
 25.02
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ClImportTensorHandle.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2022-2024 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #pragma once
7 
10 
11 #include <Half.hpp>
12 
14 
15 #include <arm_compute/runtime/CL/CLTensor.h>
16 #include <arm_compute/runtime/CL/CLSubTensor.h>
17 #include <arm_compute/runtime/IMemoryGroup.h>
18 #include <arm_compute/runtime/MemoryGroup.h>
19 #include <arm_compute/core/TensorShape.h>
20 #include <arm_compute/core/Coordinates.h>
21 
23 
24 #include <CL/cl_ext.h>
25 #include <arm_compute/core/CL/CLKernelLibrary.h>
26 
27 namespace armnn
28 {
29 
31 {
32 public:
33  ClImportTensorHandle(const TensorInfo& tensorInfo, MemorySourceFlags importFlags)
34  : m_ImportFlags(importFlags)
35  {
36  armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo);
37  }
38 
39  ClImportTensorHandle(const TensorInfo& tensorInfo,
40  DataLayout dataLayout,
41  MemorySourceFlags importFlags)
42  : m_ImportFlags(importFlags), m_Imported(false)
43  {
44  armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo, dataLayout);
45  }
46 
47  arm_compute::CLTensor& GetTensor() override { return m_Tensor; }
48  arm_compute::CLTensor const& GetTensor() const override { return m_Tensor; }
49  virtual void Allocate() override {}
50  virtual void Manage() override {}
51 
52  virtual const void* Map(bool blocking = true) const override
53  {
54  IgnoreUnused(blocking);
55  return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
56  }
57 
58  virtual void Unmap() const override {}
59 
60  virtual ITensorHandle* GetParent() const override { return nullptr; }
61 
62  virtual arm_compute::DataType GetDataType() const override
63  {
64  return m_Tensor.info()->data_type();
65  }
66 
67  virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) override
68  {
69  IgnoreUnused(memoryGroup);
70  }
71 
72  TensorShape GetStrides() const override
73  {
74  return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
75  }
76 
77  TensorShape GetShape() const override
78  {
79  return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
80  }
81 
83  {
84  m_ImportFlags = importFlags;
85  }
86 
88  {
89  return m_ImportFlags;
90  }
91 
92  virtual bool Import(void* memory, MemorySource source) override
93  {
94  if (m_ImportFlags & static_cast<MemorySourceFlags>(source))
95  {
96  if (source == MemorySource::Malloc)
97  {
98  const cl_import_properties_arm importProperties[] =
99  {
100  CL_IMPORT_TYPE_ARM,
101  CL_IMPORT_TYPE_HOST_ARM,
102  0
103  };
104  return ClImport(importProperties, memory);
105  }
106  if (source == MemorySource::DmaBuf)
107  {
108  const cl_import_properties_arm importProperties[] =
109  {
110  CL_IMPORT_TYPE_ARM,
111  CL_IMPORT_TYPE_DMA_BUF_ARM,
112  CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM,
113  CL_TRUE,
114  0
115  };
116 
117  return ClImport(importProperties, memory);
118 
119  }
120  if (source == MemorySource::DmaBufProtected)
121  {
122  const cl_import_properties_arm importProperties[] =
123  {
124  CL_IMPORT_TYPE_ARM,
125  CL_IMPORT_TYPE_DMA_BUF_ARM,
126  CL_IMPORT_TYPE_PROTECTED_ARM,
127  CL_TRUE,
128  0
129  };
130 
131  return ClImport(importProperties, memory, true);
132 
133  }
134  // Case for importing memory allocated by OpenCl externally directly into the tensor
135  else if (source == MemorySource::Gralloc)
136  {
137  // m_Tensor not yet Allocated
138  if (!m_Imported && !m_Tensor.buffer())
139  {
140  // Importing memory allocated by OpenCl into the tensor directly.
141  arm_compute::Status status =
142  m_Tensor.allocator()->import_memory(cl::Buffer(static_cast<cl_mem>(memory)));
143  m_Imported = bool(status);
144  if (!m_Imported)
145  {
146  throw MemoryImportException(status.error_description());
147  }
148  return m_Imported;
149  }
150 
151  // m_Tensor.buffer() initially allocated with Allocate().
152  else if (!m_Imported && m_Tensor.buffer())
153  {
154  throw MemoryImportException(
155  "ClImportTensorHandle::Import Attempting to import on an already allocated tensor");
156  }
157 
158  // m_Tensor.buffer() previously imported.
159  else if (m_Imported)
160  {
161  // Importing memory allocated by OpenCl into the tensor directly.
162  arm_compute::Status status =
163  m_Tensor.allocator()->import_memory(cl::Buffer(static_cast<cl_mem>(memory)));
164  m_Imported = bool(status);
165  if (!m_Imported)
166  {
167  throw MemoryImportException(status.error_description());
168  }
169  return m_Imported;
170  }
171  else
172  {
173  throw MemoryImportException("ClImportTensorHandle::Failed to Import Gralloc Memory");
174  }
175  }
176  else
177  {
178  throw MemoryImportException("ClImportTensorHandle::Import flag is not supported");
179  }
180  }
181  else
182  {
183  throw MemoryImportException("ClImportTensorHandle::Incorrect import flag");
184  }
185  }
186 
187  virtual bool CanBeImported(void* /*memory*/, MemorySource source) override
188  {
189  if (m_ImportFlags & static_cast<MemorySourceFlags>(source))
190  {
191  if (source == MemorySource::Malloc)
192  {
193  // Returning true as ClImport() function will decide if memory can be imported or not
194  return true;
195  }
196  }
197  else
198  {
199  throw MemoryImportException("ClImportTensorHandle::Incorrect import flag");
200  }
201  return false;
202  }
203 
204 private:
205  bool ClImport(const cl_import_properties_arm* importProperties, void* memory, bool isProtected = false)
206  {
207  size_t totalBytes = m_Tensor.info()->total_size();
208 
209  // Round the size of the mapping to match the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE
210  // This does not change the size of the buffer, only the size of the mapping the buffer is mapped to
211  auto cachelineAlignment =
212  arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
213  auto roundedSize = totalBytes;
214  if (totalBytes % cachelineAlignment != 0)
215  {
216  roundedSize = cachelineAlignment + totalBytes - (totalBytes % cachelineAlignment);
217  }
218 
219  cl_int error = CL_SUCCESS;
220  cl_mem buffer;
221  if (isProtected)
222  {
223  buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
224  CL_MEM_HOST_NO_ACCESS, importProperties, memory, roundedSize, &error);
225  }
226  else
227  {
228  buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
229  CL_MEM_READ_WRITE, importProperties, memory, roundedSize, &error);
230  }
231 
232  if (error != CL_SUCCESS)
233  {
234  throw MemoryImportException("ClImportTensorHandle::Invalid imported memory: " + std::to_string(error));
235  }
236 
237  cl::Buffer wrappedBuffer(buffer);
238  arm_compute::Status status = m_Tensor.allocator()->import_memory(wrappedBuffer);
239 
240  // Use the overloaded bool operator of Status to check if it is success, if not throw an exception
241  // with the Status error message
242  bool imported = (status.error_code() == arm_compute::ErrorCode::OK);
243  if (!imported)
244  {
245  throw MemoryImportException(status.error_description());
246  }
247  return imported;
248  }
249  // Only used for testing
250  void CopyOutTo(void* memory) const override
251  {
252  const_cast<armnn::ClImportTensorHandle*>(this)->Map(true);
253  switch(this->GetDataType())
254  {
255  case arm_compute::DataType::F32:
256  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
257  static_cast<float*>(memory));
258  break;
259  case arm_compute::DataType::U8:
260  case arm_compute::DataType::QASYMM8:
261  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
262  static_cast<uint8_t*>(memory));
263  break;
264  case arm_compute::DataType::QSYMM8_PER_CHANNEL:
265  case arm_compute::DataType::QASYMM8_SIGNED:
266  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
267  static_cast<int8_t*>(memory));
268  break;
269  case arm_compute::DataType::F16:
270  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
271  static_cast<armnn::Half*>(memory));
272  break;
273  case arm_compute::DataType::S16:
274  case arm_compute::DataType::QSYMM16:
275  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
276  static_cast<int16_t*>(memory));
277  break;
278  case arm_compute::DataType::S32:
279  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
280  static_cast<int32_t*>(memory));
281  break;
282  default:
283  {
285  }
286  }
287  const_cast<armnn::ClImportTensorHandle*>(this)->Unmap();
288  }
289 
290  // Only used for testing
291  void CopyInFrom(const void* memory) override
292  {
293  this->Map(true);
294  switch(this->GetDataType())
295  {
296  case arm_compute::DataType::F32:
297  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory),
298  this->GetTensor());
299  break;
300  case arm_compute::DataType::U8:
301  case arm_compute::DataType::QASYMM8:
302  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory),
303  this->GetTensor());
304  break;
305  case arm_compute::DataType::F16:
306  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::Half*>(memory),
307  this->GetTensor());
308  break;
309  case arm_compute::DataType::S16:
310  case arm_compute::DataType::QSYMM8_PER_CHANNEL:
311  case arm_compute::DataType::QASYMM8_SIGNED:
312  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory),
313  this->GetTensor());
314  break;
315  case arm_compute::DataType::QSYMM16:
316  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory),
317  this->GetTensor());
318  break;
319  case arm_compute::DataType::S32:
320  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory),
321  this->GetTensor());
322  break;
323  default:
324  {
326  }
327  }
328  this->Unmap();
329  }
330 
331  arm_compute::CLTensor m_Tensor;
332  MemorySourceFlags m_ImportFlags;
333  bool m_Imported;
334 };
335 
337 {
338 public:
340  const arm_compute::TensorShape& shape,
341  const arm_compute::Coordinates& coords)
342  : m_Tensor(&parent->GetTensor(), shape, coords)
343  {
344  parentHandle = parent;
345  }
346 
347  arm_compute::CLSubTensor& GetTensor() override { return m_Tensor; }
348  arm_compute::CLSubTensor const& GetTensor() const override { return m_Tensor; }
349 
350  virtual void Allocate() override {}
351  virtual void Manage() override {}
352 
353  virtual const void* Map(bool blocking = true) const override
354  {
355  IgnoreUnused(blocking);
356  return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
357  }
358  virtual void Unmap() const override {}
359 
360  virtual ITensorHandle* GetParent() const override { return parentHandle; }
361 
362  virtual arm_compute::DataType GetDataType() const override
363  {
364  return m_Tensor.info()->data_type();
365  }
366 
367  virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) override
368  {
369  IgnoreUnused(memoryGroup);
370  }
371 
372  TensorShape GetStrides() const override
373  {
374  return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
375  }
376 
377  TensorShape GetShape() const override
378  {
379  return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
380  }
381 
382 private:
383  // Only used for testing
384  void CopyOutTo(void* memory) const override
385  {
386  const_cast<ClImportSubTensorHandle*>(this)->Map(true);
387  switch(this->GetDataType())
388  {
389  case arm_compute::DataType::F32:
390  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
391  static_cast<float*>(memory));
392  break;
393  case arm_compute::DataType::U8:
394  case arm_compute::DataType::QASYMM8:
395  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
396  static_cast<uint8_t*>(memory));
397  break;
398  case arm_compute::DataType::F16:
399  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
400  static_cast<armnn::Half*>(memory));
401  break;
402  case arm_compute::DataType::QSYMM8_PER_CHANNEL:
403  case arm_compute::DataType::QASYMM8_SIGNED:
404  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
405  static_cast<int8_t*>(memory));
406  break;
407  case arm_compute::DataType::S16:
408  case arm_compute::DataType::QSYMM16:
409  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
410  static_cast<int16_t*>(memory));
411  break;
412  case arm_compute::DataType::S32:
413  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
414  static_cast<int32_t*>(memory));
415  break;
416  default:
417  {
419  }
420  }
421  const_cast<ClImportSubTensorHandle*>(this)->Unmap();
422  }
423 
424  // Only used for testing
425  void CopyInFrom(const void* memory) override
426  {
427  this->Map(true);
428  switch(this->GetDataType())
429  {
430  case arm_compute::DataType::F32:
431  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory),
432  this->GetTensor());
433  break;
434  case arm_compute::DataType::U8:
435  case arm_compute::DataType::QASYMM8:
436  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory),
437  this->GetTensor());
438  break;
439  case arm_compute::DataType::F16:
440  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::Half*>(memory),
441  this->GetTensor());
442  break;
443  case arm_compute::DataType::QSYMM8_PER_CHANNEL:
444  case arm_compute::DataType::QASYMM8_SIGNED:
445  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory),
446  this->GetTensor());
447  break;
448  case arm_compute::DataType::S16:
449  case arm_compute::DataType::QSYMM16:
450  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory),
451  this->GetTensor());
452  break;
453  case arm_compute::DataType::S32:
454  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory),
455  this->GetTensor());
456  break;
457  default:
458  {
460  }
461  }
462  this->Unmap();
463  }
464 
465  mutable arm_compute::CLSubTensor m_Tensor;
466  ITensorHandle* parentHandle = nullptr;
467 };
468 
469 } // namespace armnn
virtual void Manage() override
Indicate to the memory manager that this resource is active.
virtual void Unmap() const override
Unmap the tensor data.
TensorShape GetShape() const override
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
TensorShape GetStrides() const override
Get the strides for each dimension ordered from largest to smallest where the smallest value is the s...
arm_compute::CLSubTensor const & GetTensor() const override
virtual ITensorHandle * GetParent() const override
Get the parent tensor if this is a subtensor.
virtual const void * Map(bool blocking=true) const override
Map the tensor data for access.
virtual void Allocate() override
Indicate to the memory manager that this resource is no longer active.
arm_compute::CLSubTensor & GetTensor() override
virtual arm_compute::DataType GetDataType() const override
ClImportSubTensorHandle(IClTensorHandle *parent, const arm_compute::TensorShape &shape, const arm_compute::Coordinates &coords)
virtual void SetMemoryGroup(const std::shared_ptr< arm_compute::IMemoryGroup > &memoryGroup) override
virtual void Manage() override
Indicate to the memory manager that this resource is active.
ClImportTensorHandle(const TensorInfo &tensorInfo, MemorySourceFlags importFlags)
ClImportTensorHandle(const TensorInfo &tensorInfo, DataLayout dataLayout, MemorySourceFlags importFlags)
virtual bool Import(void *memory, MemorySource source) override
Import externally allocated memory.
virtual void Unmap() const override
Unmap the tensor data.
TensorShape GetShape() const override
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
TensorShape GetStrides() const override
Get the strides for each dimension ordered from largest to smallest where the smallest value is the s...
MemorySourceFlags GetImportFlags() const override
Get flags describing supported import sources.
arm_compute::CLTensor & GetTensor() override
virtual ITensorHandle * GetParent() const override
Get the parent tensor if this is a subtensor.
arm_compute::CLTensor const & GetTensor() const override
virtual const void * Map(bool blocking=true) const override
Map the tensor data for access.
virtual void Allocate() override
Indicate to the memory manager that this resource is no longer active.
void SetImportFlags(MemorySourceFlags importFlags)
virtual arm_compute::DataType GetDataType() const override
virtual bool CanBeImported(void *, MemorySource source) override
Implementations must determine if this memory block can be imported.
virtual void SetMemoryGroup(const std::shared_ptr< arm_compute::IMemoryGroup > &memoryGroup) override
Copyright (c) 2021 ARM Limited and Contributors.
half_float::half Half
Definition: Half.hpp:22
MemorySource
Define the Memory Source to reduce copies.
Definition: Types.hpp:246
void IgnoreUnused(Ts &&...)
unsigned int MemorySourceFlags
Status
enumeration
Definition: Types.hpp:43
std::array< unsigned int, MaxNumOfTensorDimensions > Coordinates
DataLayout
Definition: Types.hpp:63
DataType
Definition: Types.hpp:49