ArmNN
 25.11
Loading...
Searching...
No Matches
ClImportTensorHandle.hpp
Go to the documentation of this file.
1//
2// Copyright © 2022-2024 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6#pragma once
7
10
11#include <Half.hpp>
12
14
15#include <arm_compute/runtime/CL/CLTensor.h>
16#include <arm_compute/runtime/CL/CLSubTensor.h>
17#include <arm_compute/runtime/IMemoryGroup.h>
18#include <arm_compute/runtime/MemoryGroup.h>
19#include <arm_compute/core/TensorShape.h>
20#include <arm_compute/core/Coordinates.h>
21
23
24#include <CL/cl_ext.h>
25#include <arm_compute/core/CL/CLKernelLibrary.h>
26
27namespace armnn
28{
29
31{
32public:
33 ClImportTensorHandle(const TensorInfo& tensorInfo, MemorySourceFlags importFlags)
34 : m_ImportFlags(importFlags)
35 {
36 armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo);
37 }
38
40 DataLayout dataLayout,
41 MemorySourceFlags importFlags)
42 : m_ImportFlags(importFlags), m_Imported(false)
43 {
44 armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo, dataLayout);
45 }
46
47 arm_compute::CLTensor& GetTensor() override { return m_Tensor; }
48 arm_compute::CLTensor const& GetTensor() const override { return m_Tensor; }
49 virtual void Allocate() override {}
50 virtual void Manage() override {}
51
52 virtual const void* Map(bool blocking = true) const override
53 {
54 IgnoreUnused(blocking);
55 return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
56 }
57
58 virtual void Unmap() const override {}
59
60 virtual ITensorHandle* GetParent() const override { return nullptr; }
61
62 virtual arm_compute::DataType GetDataType() const override
63 {
64 return m_Tensor.info()->data_type();
65 }
66
67 virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) override
68 {
69 IgnoreUnused(memoryGroup);
70 }
71
72 TensorShape GetStrides() const override
73 {
74 return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
75 }
76
77 TensorShape GetShape() const override
78 {
79 return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
80 }
81
83 {
84 m_ImportFlags = importFlags;
85 }
86
88 {
89 return m_ImportFlags;
90 }
91
92 virtual bool Import(void* memory, MemorySource source) override
93 {
94 if (m_ImportFlags & static_cast<MemorySourceFlags>(source))
95 {
96 if (source == MemorySource::Malloc)
97 {
98 const cl_import_properties_arm importProperties[] =
99 {
100 CL_IMPORT_TYPE_ARM,
101 CL_IMPORT_TYPE_HOST_ARM,
102 0
103 };
104 return ClImport(importProperties, memory);
105 }
106 if (source == MemorySource::DmaBuf)
107 {
108 const cl_import_properties_arm importProperties[] =
109 {
110 CL_IMPORT_TYPE_ARM,
111 CL_IMPORT_TYPE_DMA_BUF_ARM,
112 CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM,
113 CL_TRUE,
114 0
115 };
116
117 return ClImport(importProperties, memory);
118
119 }
120 if (source == MemorySource::DmaBufProtected)
121 {
122 const cl_import_properties_arm importProperties[] =
123 {
124 CL_IMPORT_TYPE_ARM,
125 CL_IMPORT_TYPE_DMA_BUF_ARM,
126 CL_IMPORT_TYPE_PROTECTED_ARM,
127 CL_TRUE,
128 0
129 };
130
131 return ClImport(importProperties, memory, true);
132
133 }
134 // Case for importing memory allocated by OpenCl externally directly into the tensor
135 else if (source == MemorySource::Gralloc)
136 {
137 // m_Tensor not yet Allocated
138 if (!m_Imported && !m_Tensor.buffer())
139 {
140 // Importing memory allocated by OpenCl into the tensor directly.
141 arm_compute::Status status =
142 m_Tensor.allocator()->import_memory(cl::Buffer(static_cast<cl_mem>(memory)));
143 m_Imported = bool(status);
144 if (!m_Imported)
145 {
146 throw MemoryImportException(status.error_description());
147 }
148 return m_Imported;
149 }
150
151 // m_Tensor.buffer() initially allocated with Allocate().
152 else if (!m_Imported && m_Tensor.buffer())
153 {
155 "ClImportTensorHandle::Import Attempting to import on an already allocated tensor");
156 }
157
158 // m_Tensor.buffer() previously imported.
159 else if (m_Imported)
160 {
161 // Importing memory allocated by OpenCl into the tensor directly.
162 arm_compute::Status status =
163 m_Tensor.allocator()->import_memory(cl::Buffer(static_cast<cl_mem>(memory)));
164 m_Imported = bool(status);
165 if (!m_Imported)
166 {
167 throw MemoryImportException(status.error_description());
168 }
169 return m_Imported;
170 }
171 else
172 {
173 throw MemoryImportException("ClImportTensorHandle::Failed to Import Gralloc Memory");
174 }
175 }
176 else
177 {
178 throw MemoryImportException("ClImportTensorHandle::Import flag is not supported");
179 }
180 }
181 else
182 {
183 throw MemoryImportException("ClImportTensorHandle::Incorrect import flag");
184 }
185 }
186
187 virtual bool CanBeImported(void* /*memory*/, MemorySource source) override
188 {
189 if (m_ImportFlags & static_cast<MemorySourceFlags>(source))
190 {
191 if (source == MemorySource::Malloc)
192 {
193 // Returning true as ClImport() function will decide if memory can be imported or not
194 return true;
195 }
196 }
197 else
198 {
199 throw MemoryImportException("ClImportTensorHandle::Incorrect import flag");
200 }
201 return false;
202 }
203
204private:
205 bool ClImport(const cl_import_properties_arm* importProperties, void* memory, bool isProtected = false)
206 {
207 size_t totalBytes = m_Tensor.info()->total_size();
208
209 // Round the size of the mapping to match the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE
210 // This does not change the size of the buffer, only the size of the mapping the buffer is mapped to
211 auto cachelineAlignment =
212 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
213 auto roundedSize = totalBytes;
214 if (totalBytes % cachelineAlignment != 0)
215 {
216 roundedSize = cachelineAlignment + totalBytes - (totalBytes % cachelineAlignment);
217 }
218
219 cl_int error = CL_SUCCESS;
220 cl_mem buffer;
221 if (isProtected)
222 {
223 buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
224 CL_MEM_HOST_NO_ACCESS, importProperties, memory, roundedSize, &error);
225 }
226 else
227 {
228 buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
229 CL_MEM_READ_WRITE, importProperties, memory, roundedSize, &error);
230 }
231
232 if (error != CL_SUCCESS)
233 {
234 throw MemoryImportException("ClImportTensorHandle::Invalid imported memory: " + std::to_string(error));
235 }
236
237 cl::Buffer wrappedBuffer(buffer);
238 arm_compute::Status status = m_Tensor.allocator()->import_memory(wrappedBuffer);
239
240 // Use the overloaded bool operator of Status to check if it is success, if not throw an exception
241 // with the Status error message
242 bool imported = (status.error_code() == arm_compute::ErrorCode::OK);
243 if (!imported)
244 {
245 throw MemoryImportException(status.error_description());
246 }
247 return imported;
248 }
249 // Only used for testing
250 void CopyOutTo(void* memory) const override
251 {
252 const_cast<armnn::ClImportTensorHandle*>(this)->Map(true);
253 switch(this->GetDataType())
254 {
255 case arm_compute::DataType::F32:
256 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
257 static_cast<float*>(memory));
258 break;
259 case arm_compute::DataType::U8:
260 case arm_compute::DataType::QASYMM8:
261 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
262 static_cast<uint8_t*>(memory));
263 break;
264 case arm_compute::DataType::QSYMM8_PER_CHANNEL:
265 case arm_compute::DataType::QASYMM8_SIGNED:
266 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
267 static_cast<int8_t*>(memory));
268 break;
269 case arm_compute::DataType::F16:
270 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
271 static_cast<armnn::Half*>(memory));
272 break;
273 case arm_compute::DataType::S16:
274 case arm_compute::DataType::QSYMM16:
275 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
276 static_cast<int16_t*>(memory));
277 break;
278 case arm_compute::DataType::S32:
279 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
280 static_cast<int32_t*>(memory));
281 break;
282 default:
283 {
284 throw armnn::UnimplementedException();
285 }
286 }
287 const_cast<armnn::ClImportTensorHandle*>(this)->Unmap();
288 }
289
290 // Only used for testing
291 void CopyInFrom(const void* memory) override
292 {
293 this->Map(true);
294 switch(this->GetDataType())
295 {
296 case arm_compute::DataType::F32:
297 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory),
298 this->GetTensor());
299 break;
300 case arm_compute::DataType::U8:
301 case arm_compute::DataType::QASYMM8:
302 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory),
303 this->GetTensor());
304 break;
305 case arm_compute::DataType::F16:
306 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::Half*>(memory),
307 this->GetTensor());
308 break;
309 case arm_compute::DataType::S16:
310 case arm_compute::DataType::QSYMM8_PER_CHANNEL:
311 case arm_compute::DataType::QASYMM8_SIGNED:
312 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory),
313 this->GetTensor());
314 break;
315 case arm_compute::DataType::QSYMM16:
316 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory),
317 this->GetTensor());
318 break;
319 case arm_compute::DataType::S32:
320 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory),
321 this->GetTensor());
322 break;
323 default:
324 {
325 throw armnn::UnimplementedException();
326 }
327 }
328 this->Unmap();
329 }
330
331 arm_compute::CLTensor m_Tensor;
332 MemorySourceFlags m_ImportFlags;
333 bool m_Imported;
334};
335
337{
338public:
340 const arm_compute::TensorShape& shape,
341 const arm_compute::Coordinates& coords)
342 : m_Tensor(&parent->GetTensor(), shape, coords)
343 {
344 parentHandle = parent;
345 }
346
347 arm_compute::CLSubTensor& GetTensor() override { return m_Tensor; }
348 arm_compute::CLSubTensor const& GetTensor() const override { return m_Tensor; }
349
350 virtual void Allocate() override {}
351 virtual void Manage() override {}
352
353 virtual const void* Map(bool blocking = true) const override
354 {
355 IgnoreUnused(blocking);
356 return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
357 }
358 virtual void Unmap() const override {}
359
360 virtual ITensorHandle* GetParent() const override { return parentHandle; }
361
362 virtual arm_compute::DataType GetDataType() const override
363 {
364 return m_Tensor.info()->data_type();
365 }
366
367 virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) override
368 {
369 IgnoreUnused(memoryGroup);
370 }
371
372 TensorShape GetStrides() const override
373 {
374 return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
375 }
376
377 TensorShape GetShape() const override
378 {
379 return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
380 }
381
382private:
383 // Only used for testing
384 void CopyOutTo(void* memory) const override
385 {
386 const_cast<ClImportSubTensorHandle*>(this)->Map(true);
387 switch(this->GetDataType())
388 {
389 case arm_compute::DataType::F32:
390 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
391 static_cast<float*>(memory));
392 break;
393 case arm_compute::DataType::U8:
394 case arm_compute::DataType::QASYMM8:
395 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
396 static_cast<uint8_t*>(memory));
397 break;
398 case arm_compute::DataType::F16:
399 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
400 static_cast<armnn::Half*>(memory));
401 break;
402 case arm_compute::DataType::QSYMM8_PER_CHANNEL:
403 case arm_compute::DataType::QASYMM8_SIGNED:
404 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
405 static_cast<int8_t*>(memory));
406 break;
407 case arm_compute::DataType::S16:
408 case arm_compute::DataType::QSYMM16:
409 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
410 static_cast<int16_t*>(memory));
411 break;
412 case arm_compute::DataType::S32:
413 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
414 static_cast<int32_t*>(memory));
415 break;
416 default:
417 {
419 }
420 }
421 const_cast<ClImportSubTensorHandle*>(this)->Unmap();
422 }
423
424 // Only used for testing
425 void CopyInFrom(const void* memory) override
426 {
427 this->Map(true);
428 switch(this->GetDataType())
429 {
430 case arm_compute::DataType::F32:
431 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory),
432 this->GetTensor());
433 break;
434 case arm_compute::DataType::U8:
435 case arm_compute::DataType::QASYMM8:
436 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory),
437 this->GetTensor());
438 break;
439 case arm_compute::DataType::F16:
440 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::Half*>(memory),
441 this->GetTensor());
442 break;
443 case arm_compute::DataType::QSYMM8_PER_CHANNEL:
444 case arm_compute::DataType::QASYMM8_SIGNED:
445 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory),
446 this->GetTensor());
447 break;
448 case arm_compute::DataType::S16:
449 case arm_compute::DataType::QSYMM16:
450 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory),
451 this->GetTensor());
452 break;
453 case arm_compute::DataType::S32:
454 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory),
455 this->GetTensor());
456 break;
457 default:
458 {
459 throw armnn::UnimplementedException();
460 }
461 }
462 this->Unmap();
463 }
464
465 mutable arm_compute::CLSubTensor m_Tensor;
466 ITensorHandle* parentHandle = nullptr;
467};
468
469} // namespace armnn
virtual void Manage() override
Indicate to the memory manager that this resource is active.
arm_compute::CLSubTensor const & GetTensor() const override
virtual ITensorHandle * GetParent() const override
Get the parent tensor if this is a subtensor.
virtual void Unmap() const override
Unmap the tensor data.
TensorShape GetShape() const override
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
TensorShape GetStrides() const override
Get the strides for each dimension ordered from largest to smallest where the smallest value is the s...
arm_compute::CLSubTensor & GetTensor() override
virtual void Allocate() override
Indicate to the memory manager that this resource is no longer active.
virtual arm_compute::DataType GetDataType() const override
virtual const void * Map(bool blocking=true) const override
Map the tensor data for access.
ClImportSubTensorHandle(IClTensorHandle *parent, const arm_compute::TensorShape &shape, const arm_compute::Coordinates &coords)
virtual void SetMemoryGroup(const std::shared_ptr< arm_compute::IMemoryGroup > &memoryGroup) override
arm_compute::CLTensor const & GetTensor() const override
virtual void Manage() override
Indicate to the memory manager that this resource is active.
ClImportTensorHandle(const TensorInfo &tensorInfo, MemorySourceFlags importFlags)
ClImportTensorHandle(const TensorInfo &tensorInfo, DataLayout dataLayout, MemorySourceFlags importFlags)
arm_compute::CLTensor & GetTensor() override
virtual bool Import(void *memory, MemorySource source) override
Import externally allocated memory.
virtual ITensorHandle * GetParent() const override
Get the parent tensor if this is a subtensor.
virtual void Unmap() const override
Unmap the tensor data.
TensorShape GetShape() const override
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
TensorShape GetStrides() const override
Get the strides for each dimension ordered from largest to smallest where the smallest value is the s...
MemorySourceFlags GetImportFlags() const override
Get flags describing supported import sources.
virtual void Allocate() override
Indicate to the memory manager that this resource is no longer active.
void SetImportFlags(MemorySourceFlags importFlags)
virtual arm_compute::DataType GetDataType() const override
virtual bool CanBeImported(void *, MemorySource source) override
Implementations must determine if this memory block can be imported.
virtual const void * Map(bool blocking=true) const override
Map the tensor data for access.
virtual void SetMemoryGroup(const std::shared_ptr< arm_compute::IMemoryGroup > &memoryGroup) override
Copyright (c) 2021 ARM Limited and Contributors.
half_float::half Half
Definition Half.hpp:22
MemorySource
Define the Memory Source to reduce copies.
Definition Types.hpp:246
unsigned int MemorySourceFlags
DataLayout
Definition Types.hpp:63
void IgnoreUnused(Ts &&...)