Compute Library
 21.05
OpenCLTimer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2019 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "OpenCLTimer.h"
25 
26 #include "../Framework.h"
27 #include "../Utils.h"
28 
31 
32 #ifndef ARM_COMPUTE_CL
33 #error "You can't use OpenCLTimer without OpenCL"
34 #endif /* ARM_COMPUTE_CL */
35 
36 namespace arm_compute
37 {
38 namespace test
39 {
40 namespace framework
41 {
42 template <bool output_timestamps>
44 {
45  if(output_timestamps)
46  {
47  return "OpenCLTimestamps";
48  }
49  else
50  {
51  return "OpenCLTimer";
52  }
53 }
54 
55 template <bool output_timestamps>
57  : _kernels(), _real_function(nullptr), _real_graph_function(nullptr), _prefix(), _timer_enabled(false)
58 {
59  auto q = CLScheduler::get().queue();
60  cl_command_queue_properties props = q.getInfo<CL_QUEUE_PROPERTIES>();
61  if((props & CL_QUEUE_PROFILING_ENABLE) == 0)
62  {
63  CLScheduler::get().set_queue(cl::CommandQueue(CLScheduler::get().context(), props | CL_QUEUE_PROFILING_ENABLE));
64  }
65 
66  switch(scale_factor)
67  {
68  case ScaleFactor::NONE:
69  _scale_factor = 1.f;
70  _unit = "ns";
71  break;
73  _scale_factor = 1000.f;
74  _unit = "us";
75  break;
77  _scale_factor = 1000000.f;
78  _unit = "ms";
79  break;
81  _scale_factor = 1000000000.f;
82  _unit = "s";
83  break;
84  default:
85  ARM_COMPUTE_ERROR("Invalid scale");
86  }
87 }
88 
89 template <bool output_timestamps>
91 {
92  // Start intercepting enqueues:
93  ARM_COMPUTE_ERROR_ON(_real_function != nullptr);
94  ARM_COMPUTE_ERROR_ON(_real_graph_function != nullptr);
95  _real_function = CLSymbols::get().clEnqueueNDRangeKernel_ptr;
96  _real_graph_function = graph::TaskExecutor::get().execute_function;
97  auto interceptor = [this](
98  cl_command_queue command_queue,
99  cl_kernel kernel,
100  cl_uint work_dim,
101  const size_t *gwo,
102  const size_t *gws,
103  const size_t *lws,
104  cl_uint num_events_in_wait_list,
105  const cl_event * event_wait_list,
106  cl_event * event)
107  {
108  if(this->_timer_enabled)
109  {
110  kernel_info info;
111  cl::Kernel cpp_kernel(kernel, true);
112  std::stringstream ss;
113  ss << this->_prefix << cpp_kernel.getInfo<CL_KERNEL_FUNCTION_NAME>();
114  if(gws != nullptr)
115  {
116  ss << " GWS[" << gws[0] << "," << gws[1] << "," << gws[2] << "]";
117  }
118  if(lws != nullptr)
119  {
120  ss << " LWS[" << lws[0] << "," << lws[1] << "," << lws[2] << "]";
121  }
122  info.name = ss.str();
123  cl_event tmp;
124  cl_int retval = this->_real_function(command_queue, kernel, work_dim, gwo, gws, lws, num_events_in_wait_list, event_wait_list, &tmp);
125  info.event = tmp;
126  this->_kernels.push_back(std::move(info));
127 
128  if(event != nullptr)
129  {
130  //return cl_event from the intercepted call
131  clRetainEvent(tmp);
132  *event = tmp;
133  }
134  return retval;
135  }
136  else
137  {
138  return this->_real_function(command_queue, kernel, work_dim, gwo, gws, lws, num_events_in_wait_list, event_wait_list, event);
139  }
140  };
141 
142  // Start intercepting tasks:
143  auto task_interceptor = [this](graph::ExecutionTask & task)
144  {
145  if(task.node != nullptr && !task.node->name().empty())
146  {
147  this->_prefix = task.node->name() + "/";
148  }
149  else
150  {
151  this->_prefix = "";
152  }
153  this->_real_graph_function(task);
154  this->_prefix = "";
155  };
156 
158  graph::TaskExecutor::get().execute_function = task_interceptor;
159 }
160 
161 template <bool output_timestamps>
163 {
164  _kernels.clear();
165  _timer_enabled = true;
166 }
167 template <bool output_timestamps>
169 {
170  _timer_enabled = false;
171 }
172 
173 template <bool output_timestamps>
175 {
176  // Restore real function
177  CLSymbols::get().clEnqueueNDRangeKernel_ptr = _real_function;
178  graph::TaskExecutor::get().execute_function = _real_graph_function;
179  _real_graph_function = nullptr;
180  _real_function = nullptr;
181 }
182 
183 template <bool output_timestamps>
185 {
186  MeasurementsMap measurements;
187  unsigned int kernel_number = 0;
188  for(auto const &kernel : _kernels)
189  {
190  cl_ulong queued;
191  cl_ulong flushed;
192  cl_ulong start;
193  cl_ulong end;
194  kernel.event.getProfilingInfo(CL_PROFILING_COMMAND_QUEUED, &queued);
195  kernel.event.getProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, &flushed);
196  kernel.event.getProfilingInfo(CL_PROFILING_COMMAND_START, &start);
197  kernel.event.getProfilingInfo(CL_PROFILING_COMMAND_END, &end);
198  std::string name = kernel.name + " #" + support::cpp11::to_string(kernel_number++);
199 
200  if(output_timestamps)
201  {
202  measurements.emplace("[start]" + name, Measurement(start / static_cast<cl_ulong>(_scale_factor), _unit));
203  measurements.emplace("[queued]" + name, Measurement(queued / static_cast<cl_ulong>(_scale_factor), _unit));
204  measurements.emplace("[flushed]" + name, Measurement(flushed / static_cast<cl_ulong>(_scale_factor), _unit));
205  measurements.emplace("[end]" + name, Measurement(end / static_cast<cl_ulong>(_scale_factor), _unit));
206  }
207  else
208  {
209  measurements.emplace(name, Measurement((end - start) / _scale_factor, _unit));
210  }
211  }
212 
213  return measurements;
214 }
215 
216 template <bool output_timestamps>
218 {
219  MeasurementsMap measurements;
220 
221  if(output_timestamps)
222  {
223  // The OpenCL clock and the wall clock are not in sync, so we use
224  // this trick to calculate the offset between the two clocks:
225  ::cl::Event event;
226  cl_ulong now_gpu;
227 
228  // Enqueue retrieve current CPU clock and enqueue a dummy marker
229  std::chrono::high_resolution_clock::time_point now_cpu = std::chrono::high_resolution_clock::now();
230  CLScheduler::get().queue().enqueueMarker(&event);
231 
232  CLScheduler::get().queue().finish();
233  //Access the time at which the marker was enqueued:
234  event.getProfilingInfo(CL_PROFILING_COMMAND_QUEUED, &now_gpu);
235 
236  measurements.emplace("Now Wall clock", Measurement(now_cpu.time_since_epoch().count() / 1000, "us"));
237  measurements.emplace("Now OpenCL", Measurement(now_gpu / static_cast<cl_ulong>(_scale_factor), _unit));
238  }
239 
240  return measurements;
241 }
242 
243 } // namespace framework
244 } // namespace test
245 } // namespace arm_compute
246 
void stop() override
Stop measuring.
static CLScheduler & get()
Access the scheduler singleton.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352
std::string to_string(T &&value)
Convert integer and float values to string.
Generic measurement that stores values as either double or long long int.
Definition: Measurement.h:41
std::stringstream ss(mlgo_str)
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
cl_int clRetainEvent(cl_event event)
Definition: OpenCL.cpp:873
OpenCLClock(ScaleFactor scale_factor)
Construct an OpenCL timer.
Definition: OpenCLTimer.cpp:56
MeasurementsMap test_measurements() const override
Return the latest test measurements.
void start() override
Start measuring.
Copyright (c) 2017-2021 Arm Limited.
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
std::function< decltype(clEnqueueNDRangeKernel)> clEnqueueNDRangeKernel_ptr
Definition: OpenCL.h:98
void end(TokenStream &in, bool &valid)
Definition: MLGOParser.cpp:290
cl::CommandQueue & queue()
Accessor for the associated CL command queue.
Definition: CLScheduler.cpp:39
static TaskExecutor & get()
Task executor accessor.
Definition: Workload.cpp:75
void set_queue(cl::CommandQueue queue)
Accessor to set the CL command queue to be used by the scheduler.
Definition: CLScheduler.cpp:55
const char * name
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
void test_start() override
Start of the test.
Definition: OpenCLTimer.cpp:90
Instrument creating measurements based on the information returned by clGetEventProfilingInfo for eac...
Definition: OpenCLTimer.h:45
std::string id() const override
Identifier for the instrument.
Definition: OpenCLTimer.cpp:43
std::map< std::string, Measurement > MeasurementsMap
Map of measurements.
Definition: Instrument.h:109
MeasurementsMap measurements() const override
Return the latest measurements.
void test_stop() override
End of the test.
static CLSymbols & get()
Get the static instance of CLSymbols.
Definition: OpenCL.cpp:45
std::function< decltype(execute_task)> execute_function
Function that is responsible for executing tasks.
Definition: Workload.h:63