Compute Library
 21.05
TensorAllocator.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
35 #include "tests/CL/CLAccessor.h"
36 #include "tests/Globals.h"
38 #include "tests/framework/Macros.h"
41 
42 #include <memory>
43 #include <random>
44 
45 namespace arm_compute
46 {
47 namespace test
48 {
49 namespace validation
50 {
51 namespace
52 {
53 cl_mem import_malloc_memory_helper(void *ptr, size_t size)
54 {
55  const cl_import_properties_arm import_properties[] =
56  {
57  CL_IMPORT_TYPE_ARM,
58  CL_IMPORT_TYPE_HOST_ARM,
59  0
60  };
61 
62  cl_int err = CL_SUCCESS;
63  cl_mem buf = clImportMemoryARM(CLKernelLibrary::get().context().get(), CL_MEM_READ_WRITE, import_properties, ptr, size, &err);
64  ARM_COMPUTE_ASSERT(err == CL_SUCCESS);
65 
66  return buf;
67 }
68 
69 class DummyAllocator final : public IAllocator
70 {
71 public:
72  DummyAllocator() = default;
73 
74  void *allocate(size_t size, size_t alignment) override
75  {
76  ++_n_calls;
77  return _backend_allocator.allocate(size, alignment);
78  }
79  void free(void *ptr) override
80  {
81  return _backend_allocator.free(ptr);
82  }
83  std::unique_ptr<IMemoryRegion> make_region(size_t size, size_t alignment) override
84  {
85  // Needs to be implemented as is the one that is used internally by the CLTensorAllocator
86  ++_n_calls;
87  return _backend_allocator.make_region(size, alignment);
88  }
89  int get_n_calls() const
90  {
91  return _n_calls;
92  }
93 
94 private:
95  int _n_calls{};
96  CLBufferAllocator _backend_allocator{};
97 };
98 
99 void run_conv2d(std::shared_ptr<IMemoryManager> mm, IAllocator &mm_allocator)
100 {
101  // Create tensors
102  CLTensor src, weights, bias, dst;
103  src.allocator()->init(TensorInfo(TensorShape(16U, 32U, 32U, 2U), 1, DataType::F32, DataLayout::NHWC));
104  weights.allocator()->init(TensorInfo(TensorShape(16U, 3U, 3U, 32U), 1, DataType::F32, DataLayout::NHWC));
105  bias.allocator()->init(TensorInfo(TensorShape(32U), 1, DataType::F32, DataLayout::NHWC));
106  dst.allocator()->init(TensorInfo(TensorShape(32U, 32U, 32U, 2U), 1, DataType::F32, DataLayout::NHWC));
107 
108  // Create and configure function
109  CLGEMMConvolutionLayer conv(mm);
110  conv.configure(&src, &weights, &bias, &dst, PadStrideInfo(1U, 1U, 1U, 1U));
111 
112  // Allocate tensors
113  src.allocator()->allocate();
114  weights.allocator()->allocate();
115  bias.allocator()->allocate();
116  dst.allocator()->allocate();
117 
118  // Finalize memory manager
119  if(mm != nullptr)
120  {
121  mm->populate(mm_allocator, 1 /* num_pools */);
122  ARM_COMPUTE_EXPECT(mm->lifetime_manager()->are_all_finalized(), framework::LogLevel::ERRORS);
123  ARM_COMPUTE_EXPECT(mm->pool_manager()->num_pools() == 1, framework::LogLevel::ERRORS);
124  }
125 
126  conv.run();
127 }
128 } // namespace
129 
130 TEST_SUITE(CL)
131 TEST_SUITE(UNIT)
132 TEST_SUITE(TensorAllocator)
133 
134 /* Validate that an external global allocator can be used for all internal allocations */
135 TEST_CASE(ExternalGlobalAllocator, framework::DatasetMode::ALL)
136 {
137  DummyAllocator global_tensor_alloc;
138  CLTensorAllocator::set_global_allocator(&global_tensor_alloc);
139 
140  // Run a convolution
141  run_conv2d(nullptr /* mm */, global_tensor_alloc);
142 
143  // Check that allocator has been called multiple times > 4
144  ARM_COMPUTE_EXPECT(global_tensor_alloc.get_n_calls() > 4, framework::LogLevel::ERRORS);
145 
146  // Nullify global allocator
148 }
149 
150 /* Validate that an external global allocator can be used for the pool manager */
151 TEST_CASE(ExternalGlobalAllocatorMemoryPool, framework::DatasetMode::ALL)
152 {
153  auto lifetime_mgr = std::make_shared<BlobLifetimeManager>();
154  auto pool_mgr = std::make_shared<PoolManager>();
155  auto mm = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr, pool_mgr);
156 
157  DummyAllocator global_tensor_alloc;
158  CLTensorAllocator::set_global_allocator(&global_tensor_alloc);
159 
160  // Run a convolution
161  run_conv2d(mm, global_tensor_alloc);
162 
163  // Check that allocator has been called multiple times > 4
164  ARM_COMPUTE_EXPECT(global_tensor_alloc.get_n_calls() > 4, framework::LogLevel::ERRORS);
165 
166  // Nullify global allocator
168 }
169 
170 /** Validates import memory interface when importing cl buffer objects */
172 {
173  // Init tensor info
174  const TensorInfo info(TensorShape(24U, 16U, 3U), 1, DataType::F32);
175 
176  // Allocate memory buffer
177  const size_t total_size = info.total_size();
178  auto buf = cl::Buffer(CLScheduler::get().context(), CL_MEM_READ_WRITE, total_size);
179 
180  // Negative case : Import nullptr
181  CLTensor t1;
182  t1.allocator()->init(info);
183  ARM_COMPUTE_ASSERT(!bool(t1.allocator()->import_memory(cl::Buffer())));
185 
186  // Negative case : Import memory to a tensor that is memory managed
187  CLTensor t2;
188  MemoryGroup mg;
189  t2.allocator()->set_associated_memory_group(&mg);
190  ARM_COMPUTE_ASSERT(!bool(t2.allocator()->import_memory(buf)));
191  ARM_COMPUTE_ASSERT(t2.info()->is_resizable());
192 
193  // Negative case : Invalid buffer size
194  CLTensor t3;
195  const TensorInfo info_neg(TensorShape(32U, 16U, 3U), 1, DataType::F32);
196  t3.allocator()->init(info_neg);
197  ARM_COMPUTE_ASSERT(!bool(t3.allocator()->import_memory(buf)));
198  ARM_COMPUTE_ASSERT(t3.info()->is_resizable());
199 
200  // Positive case : Set raw pointer
201  CLTensor t4;
202  t4.allocator()->init(info);
203  ARM_COMPUTE_ASSERT(bool(t4.allocator()->import_memory(buf)));
204  ARM_COMPUTE_ASSERT(!t4.info()->is_resizable());
205  ARM_COMPUTE_EXPECT(t4.cl_buffer().get() == buf.get(), framework::LogLevel::ERRORS);
206  t4.allocator()->free();
207  ARM_COMPUTE_ASSERT(t4.info()->is_resizable());
208  ARM_COMPUTE_EXPECT(t4.cl_buffer().get() != buf.get(), framework::LogLevel::ERRORS);
209 }
210 
211 /** Validates import memory interface when importing malloced memory */
213 {
214  // Check if import extension is supported
215  if(!device_supports_extension(CLKernelLibrary::get().get_device(), "cl_arm_import_memory_host"))
216  {
217  return;
218  }
219  else
220  {
222  const TensorShape shape = TensorShape(24U, 16U, 3U);
224 
225  // Create tensor
226  const TensorInfo info(shape, 1, data_type);
227  CLTensor tensor;
228  tensor.allocator()->init(info);
229 
230  // Create and configure activation function
231  CLActivationLayer act_func;
232  act_func.configure(&tensor, nullptr, act_info);
233 
234  // Allocate and import tensor
235  const size_t total_size_in_elems = tensor.info()->tensor_shape().total_size();
236  const size_t total_size_in_bytes = tensor.info()->total_size();
237  const size_t alignment = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
238  size_t space = total_size_in_bytes + alignment;
239  auto raw_data = std::make_unique<uint8_t[]>(space);
240 
241  void *aligned_ptr = raw_data.get();
242  std::align(alignment, total_size_in_bytes, aligned_ptr, space);
243 
244  cl::Buffer wrapped_buffer(import_malloc_memory_helper(aligned_ptr, total_size_in_bytes));
245  ARM_COMPUTE_ASSERT(bool(tensor.allocator()->import_memory(wrapped_buffer)));
246  ARM_COMPUTE_ASSERT(!tensor.info()->is_resizable());
247 
248  // Fill tensor
249  std::uniform_real_distribution<float> distribution(-5.f, 5.f);
250  std::mt19937 gen(library->seed());
251  auto *typed_ptr = reinterpret_cast<float *>(aligned_ptr);
252  for(unsigned int i = 0; i < total_size_in_elems; ++i)
253  {
254  typed_ptr[i] = distribution(gen);
255  }
256 
257  // Execute function and sync
258  act_func.run();
260 
261  // Validate result by checking that the input has no negative values
262  for(unsigned int i = 0; i < total_size_in_elems; ++i)
263  {
265  }
266 
267  // Release resources
268  tensor.allocator()->free();
270  }
271 }
272 
273 #if !defined(BARE_METAL)
274 /** Validates import memory interface when importing memory mapped objects */
275 TEST_CASE(ImportMemoryMappedFile, framework::DatasetMode::ALL)
276 {
277  // Check if import extension is supported
278  if(!device_supports_extension(CLKernelLibrary::get().get_device(), "cl_arm_import_memory_host"))
279  {
280  return;
281  }
282  else
283  {
285  const TensorShape shape = TensorShape(24U, 16U, 3U);
287 
288  // Create tensor
289  const TensorInfo info(shape, 1, data_type);
290  CLTensor tensor;
291  tensor.allocator()->init(info);
292 
293  // Create and configure activation function
294  CLActivationLayer act_func;
295  act_func.configure(&tensor, nullptr, act_info);
296 
297  // Get number of elements
298  const size_t total_size_in_elems = tensor.info()->tensor_shape().total_size();
299  const size_t total_size_in_bytes = tensor.info()->total_size();
300 
301  // Create file
302  std::ofstream output_file("test_mmap_import.bin", std::ios::binary | std::ios::out);
303  output_file.seekp(total_size_in_bytes - 1);
304  output_file.write("", 1);
305  output_file.close();
306 
307  // Map file
308  utils::mmap_io::MMappedFile mmapped_file("test_mmap_import.bin", 0 /** Whole file */, 0);
309  ARM_COMPUTE_ASSERT(mmapped_file.is_mapped());
310  unsigned char *data = mmapped_file.data();
311 
312  cl::Buffer wrapped_buffer(import_malloc_memory_helper(data, total_size_in_bytes));
313  ARM_COMPUTE_ASSERT(bool(tensor.allocator()->import_memory(wrapped_buffer)));
314  ARM_COMPUTE_ASSERT(!tensor.info()->is_resizable());
315 
316  // Fill tensor
317  std::uniform_real_distribution<float> distribution(-5.f, 5.f);
318  std::mt19937 gen(library->seed());
319  auto *typed_ptr = reinterpret_cast<float *>(data);
320  for(unsigned int i = 0; i < total_size_in_elems; ++i)
321  {
322  typed_ptr[i] = distribution(gen);
323  }
324 
325  // Execute function and sync
326  act_func.run();
328 
329  // Validate result by checking that the input has no negative values
330  for(unsigned int i = 0; i < total_size_in_elems; ++i)
331  {
333  }
334 
335  // Release resources
336  tensor.allocator()->free();
337  ARM_COMPUTE_ASSERT(tensor.info()->is_resizable());
338  }
339 }
340 #endif // !defined(BARE_METAL)
341 
342 /** Validates symmetric per channel quantization */
343 TEST_CASE(Symm8PerChannelQuantizationInfo, framework::DatasetMode::ALL)
344 {
345  // Create tensor
346  CLTensor tensor;
347  const std::vector<float> scale = { 0.25f, 1.4f, 3.2f, 2.3f, 4.7f };
349  tensor.allocator()->init(info);
350 
351  // Check quantization information
356 
357  CLQuantization quantization = tensor.quantization();
358  ARM_COMPUTE_ASSERT(quantization.scale != nullptr);
359  ARM_COMPUTE_ASSERT(quantization.offset != nullptr);
360 
361  // Check OpenCL quantization arrays before allocating
362  ARM_COMPUTE_EXPECT(quantization.scale->max_num_values() == 0, framework::LogLevel::ERRORS);
363  ARM_COMPUTE_EXPECT(quantization.offset->max_num_values() == 0, framework::LogLevel::ERRORS);
364 
365  // Check OpenCL quantization arrays after allocating
366  tensor.allocator()->allocate();
367  ARM_COMPUTE_EXPECT(quantization.scale->max_num_values() == scale.size(), framework::LogLevel::ERRORS);
368  ARM_COMPUTE_EXPECT(quantization.offset->max_num_values() == 0, framework::LogLevel::ERRORS);
369 
370  // Validate that the scale values are the same
371  auto cl_scale_buffer = quantization.scale->cl_buffer();
372  void *mapped_ptr = CLScheduler::get().queue().enqueueMapBuffer(cl_scale_buffer, CL_TRUE, CL_MAP_READ, 0, scale.size());
373  auto cl_scale_ptr = static_cast<float *>(mapped_ptr);
374  for(unsigned int i = 0; i < scale.size(); ++i)
375  {
376  ARM_COMPUTE_EXPECT(cl_scale_ptr[i] == scale[i], framework::LogLevel::ERRORS);
377  }
378  CLScheduler::get().queue().enqueueUnmapMemObject(cl_scale_buffer, mapped_ptr);
379 }
380 
381 TEST_SUITE_END() // TensorAllocator
382 TEST_SUITE_END() // UNIT
383 TEST_SUITE_END() // CL
384 } // namespace validation
385 } // namespace test
386 } // namespace arm_compute
const std::vector< int32_t > & offset() const
Offset vector accessor.
#define ARM_COMPUTE_ASSERT(cond)
Definition: Validate.h:37
Shape of a tensor.
Definition: TensorShape.h:39
TensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
Definition: CLTensor.cpp:41
bool is_resizable() const override
Flag indicating whether the size of the tensor can be changed.
Definition: TensorInfo.h:262
static void set_global_allocator(IAllocator *allocator)
Sets global allocator that will be used by all CLTensor objects.
static CLScheduler & get()
Access the scheduler singleton.
QuantizationInfo quantization_info() const override
Get the quantization settings (scale and offset) of the tensor.
Definition: TensorInfo.h:283
1 channel, 1 F32 per channel
ARM_COMPUTE_EXPECT(has_error==expected, framework::LogLevel::ERRORS)
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
void run() override
Run the kernels contained in the function.
CLTensorAllocator * allocator()
Return a pointer to the tensor's allocator.
Definition: CLTensor.cpp:61
Basic function to run opencl::kernels::ClActivationKernel.
Activation Layer Information class.
Definition: Types.h:1478
OpenCL quantization data.
Definition: CLTypes.h:62
void init(const TensorInfo &input, size_t alignment=0)
Initialize a tensor based on the passed TensorInfo.
SimpleTensor< float > src
Definition: DFT.cpp:155
Copyright (c) 2017-2021 Arm Limited.
Status import_memory(cl::Buffer buffer)
Import an existing memory as a tensor's backing memory.
const DataType data_type
Definition: Im2Col.cpp:150
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
Quantization information.
DatasetMode
Possible dataset modes.
Definition: DatasetModes.h:40
std::unique_ptr< AssetsLibrary > library
Definition: main.cpp:75
TEST_SUITE_END() FIXTURE_DATA_TEST_CASE(RunSmall
[CLActivationLayer Test snippet]
CLQuantization quantization() const override
Interface to be implemented by the child class to return the wrapped quantization info data.
Definition: CLTensor.cpp:56
size_t total_size() const override
Returns the total size of the tensor in bytes.
Definition: TensorInfo.h:250
size_t total_size() const
Collapses all dimensions to a single linear total size.
Definition: TensorShape.h:172
bool is_mapped() const
Checks if file mapped.
const std::vector< float > & scale() const
Scale vector accessor.
TEST_SUITE(U8_to_S8) FIXTURE_DATA_TEST_CASE(RunSmall
std::uniform_real_distribution< float > distribution(-5.f, 5.f)
cl::CommandQueue & queue()
Accessor for the associated CL command queue.
Definition: CLScheduler.cpp:39
unsigned char * data()
Mapped data accessor.
void sync()
Blocks until all commands in the associated command queue have finished.
Definition: CLScheduler.cpp:70
quantized, symmetric per channel fixed-point 8-bit number
void allocate() override
Allocate size specified by TensorInfo of OpenCL memory.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
bool device_supports_extension(const cl::Device &device, const char *extension_name)
Helper function to check whether a given extension is supported.
Definition: CLHelpers.cpp:277
bool empty() const
Indicates whether this QuantizationInfo has valid settings or not.
Num samples, height, width, channels.
void free() override
Free allocated OpenCL memory.
void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info)
Set the input and output tensor.
Store the tensor's metadata.
Definition: TensorInfo.h:43
cl_mem clImportMemoryARM(cl_context context, cl_mem_flags flags, const cl_import_properties_arm *properties, void *memory, size_t size, cl_int *errcode_ret)
Definition: OpenCL.cpp:1038
TEST_CASE(FusedActivation, framework::DatasetMode::ALL)
Validate fused activation expecting the following behaviours:
const TensorShape & tensor_shape() const override
Size for each dimension of the tensor.
Definition: TensorInfo.h:234
DataType
Available data types.
Definition: Types.h:77
const cl::Device & get_device()
Gets the CL device for which the programs are created.
Basic implementation of the OpenCL tensor interface.
Definition: CLTensor.h:41