Compute Library
 21.02
OpenCLMemoryUsage.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018-2019 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "OpenCLMemoryUsage.h"
25 
26 #include "../Framework.h"
27 #include "../Utils.h"
28 
29 #ifndef ARM_COMPUTE_CL
30 #error "You can't use OpenCLMemoryUsage without OpenCL"
31 #endif /* ARM_COMPUTE_CL */
32 
34 
35 namespace arm_compute
36 {
37 namespace test
38 {
39 namespace framework
40 {
41 std::string OpenCLMemoryUsage::id() const
42 {
43  return "OpenCLMemoryUsage";
44 }
45 
47  : real_clCreateBuffer(CLSymbols::get().clCreateBuffer_ptr), real_clRetainMemObject(CLSymbols::get().clRetainMemObject_ptr), real_clReleaseMemObject(CLSymbols::get().clReleaseMemObject_ptr),
48  real_clSVMAlloc(CLSymbols::get().clSVMAlloc_ptr), real_clSVMFree(CLSymbols::get().clSVMFree_ptr), _allocations(), _svm_allocations(), _start(), _end(), _now()
49 {
50  switch(scale_factor)
51  {
52  case ScaleFactor::NONE:
53  _scale_factor = 1;
54  _unit = "";
55  break;
57  _scale_factor = 1000;
58  _unit = "K ";
59  break;
61  _scale_factor = 1000000;
62  _unit = "M ";
63  break;
64  default:
65  ARM_COMPUTE_ERROR("Invalid scale");
66  }
67 }
68 
70 {
71  _now = Stats();
72 
73  ARM_COMPUTE_ERROR_ON(CLSymbols::get().clCreateBuffer_ptr == nullptr);
75  cl_context context,
76  cl_mem_flags flags,
77  size_t size,
78  void *host_ptr,
79  cl_int * errcode_ret)
80  {
81  cl_mem retval = this->real_clCreateBuffer(context, flags, size, host_ptr, errcode_ret);
82  if(host_ptr != nullptr)
83  {
84  // If it's an SVM / external allocation;
85  size = 0;
86  }
87  else
88  {
89  _now.num_allocations++;
90  _now.in_use += size;
91  _now.total_allocated += size;
92  if(_now.in_use > _now.max_in_use)
93  {
94  _now.max_in_use = _now.in_use;
95  }
96  }
97  this->_allocations[retval] = Allocation(size);
98  return retval;
99  };
100  ARM_COMPUTE_ERROR_ON(CLSymbols::get().clRetainMemObject_ptr == nullptr);
101  CLSymbols::get().clRetainMemObject_ptr = [this](cl_mem memobj)
102  {
103  cl_int retval = this->real_clRetainMemObject(memobj);
104  this->_allocations[memobj].refcount++;
105  return retval;
106  };
107  ARM_COMPUTE_ERROR_ON(CLSymbols::get().clReleaseMemObject_ptr == nullptr);
108  CLSymbols::get().clReleaseMemObject_ptr = [this](cl_mem memobj)
109  {
110  cl_int retval = this->real_clRetainMemObject(memobj);
111  Allocation &alloc = this->_allocations[memobj];
112  if(--alloc.refcount == 0)
113  {
114  _now.in_use -= alloc.size;
115  }
116  return retval;
117  };
118 
119  //Only intercept the function if it exists:
120  if(CLSymbols::get().clSVMAlloc_ptr != nullptr)
121  {
122  CLSymbols::get().clSVMAlloc_ptr = [this](cl_context context, cl_svm_mem_flags flags, size_t size, cl_uint alignment)
123  {
124  void *retval = this->real_clSVMAlloc(context, flags, size, alignment);
125  if(retval != nullptr)
126  {
127  _svm_allocations[retval] = size;
128  _now.num_allocations++;
129  _now.in_use += size;
130  _now.total_allocated += size;
131  if(_now.in_use > _now.max_in_use)
132  {
133  _now.max_in_use = _now.in_use;
134  }
135  }
136  return retval;
137  };
138  }
139 
140  //Only intercept the function if it exists:
141  if(CLSymbols::get().clSVMFree_ptr != nullptr)
142  {
143  CLSymbols::get().clSVMFree_ptr = [this](cl_context context, void *svm_pointer)
144  {
145  this->real_clSVMFree(context, svm_pointer);
146  auto iterator = _svm_allocations.find(svm_pointer);
147  if(iterator != _svm_allocations.end())
148  {
149  size_t size = iterator->second;
150  _svm_allocations.erase(iterator);
151  _now.in_use -= size;
152  }
153  };
154  }
155 }
156 
158 {
159  _start = _now;
160 }
162 {
163  _end = _now;
164 }
165 
167 {
168  // Restore real function
169  CLSymbols::get().clCreateBuffer_ptr = real_clCreateBuffer;
170  CLSymbols::get().clRetainMemObject_ptr = real_clRetainMemObject;
171  CLSymbols::get().clReleaseMemObject_ptr = real_clReleaseMemObject;
172  CLSymbols::get().clSVMAlloc_ptr = real_clSVMAlloc;
173  CLSymbols::get().clSVMFree_ptr = real_clSVMFree;
174 }
175 
177 {
179  measurements.emplace("Num buffers allocated per run", Measurement(_end.num_allocations - _start.num_allocations, ""));
180  measurements.emplace("Total memory allocated per run", Measurement((_end.total_allocated - _start.total_allocated) / _scale_factor, _unit));
181  measurements.emplace("Memory in use at start of run", Measurement(_start.in_use / _scale_factor, _unit));
182 
183  return measurements;
184 }
186 {
188  measurements.emplace("Num buffers", Measurement(_now.num_allocations, ""));
189  measurements.emplace("Total memory allocated", Measurement(_now.total_allocated / _scale_factor, _unit));
190  measurements.emplace("Max memory allocated", Measurement(_now.max_in_use / _scale_factor, _unit));
191  measurements.emplace("Memory leaked", Measurement(_now.in_use / _scale_factor, _unit));
192 
193  size_t num_programs = CLKernelLibrary::get().get_built_programs().size();
194  size_t total_size = 0;
195  for(auto const &it : CLKernelLibrary::get().get_built_programs())
196  {
197  std::vector<size_t> binary_sizes = it.second.getInfo<CL_PROGRAM_BINARY_SIZES>();
198  total_size = std::accumulate(binary_sizes.begin(), binary_sizes.end(), total_size);
199  }
200 
201  measurements.emplace("Num programs in cache", Measurement(num_programs, ""));
202  measurements.emplace("Total programs memory in cache", Measurement(total_size / _scale_factor, _unit));
203 
204  return measurements;
205 }
206 } // namespace framework
207 } // namespace test
208 } // namespace arm_compute
Class for loading OpenCL symbols.
Definition: OpenCL.h:61
std::function< decltype(clSVMFree)> clSVMFree_ptr
Definition: OpenCL.h:132
void test_start() override
Start of the test.
std::function< decltype(clCreateBuffer)> clCreateBuffer_ptr
Definition: OpenCL.h:101
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352
std::function< decltype(clReleaseMemObject)> clReleaseMemObject_ptr
Definition: OpenCL.h:121
Generic measurement that stores values as either double or long long int.
Definition: Measurement.h:41
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
std::string id() const override
Identifier for the instrument.
std::function< decltype(clSVMAlloc)> clSVMAlloc_ptr
Definition: OpenCL.h:131
Copyright (c) 2017-2021 Arm Limited.
MeasurementsMap measurements() const override
Return the latest measurements.
MeasurementsMap test_measurements() const override
Return the latest test measurements.
OpenCLMemoryUsage(ScaleFactor scale_factor)
Construct an OpenCL timer.
Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context...
__kernel void accumulate(__global uchar *input_ptr, uint input_stride_x, uint input_step_x, uint input_stride_y, uint input_step_y, uint input_offset_first_element_in_bytes, __global uchar *accu_ptr, uint accu_stride_x, uint accu_step_x, uint accu_stride_y, uint accu_step_y, uint accu_offset_first_element_in_bytes)
This function accumulates an input image into output image.
Definition: accumulate.cl:41
std::map< std::string, Measurement > MeasurementsMap
Map of measurements.
Definition: Instrument.h:109
std::function< decltype(clRetainMemObject)> clRetainMemObject_ptr
Definition: OpenCL.h:120
static CLSymbols & get()
Get the static instance of CLSymbols.
Definition: OpenCL.cpp:45
const std::map< std::string, cl::Program > & get_built_programs() const
Access the cache of built OpenCL programs.