Compute Library
 22.05
OpenCLTimer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2019, 2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "OpenCLTimer.h"
25 
26 #include "../Framework.h"
27 #include "../Utils.h"
28 
31 
32 #ifndef ARM_COMPUTE_CL
33 #error "You can't use OpenCLTimer without OpenCL"
34 #endif /* ARM_COMPUTE_CL */
35 
36 namespace arm_compute
37 {
38 namespace test
39 {
40 namespace framework
41 {
42 template <bool output_timestamps>
44 {
45  if(output_timestamps)
46  {
47  return "OpenCLTimestamps";
48  }
49  else
50  {
51  return "OpenCLTimer";
52  }
53 }
54 
55 template <bool output_timestamps>
57  : _kernels(),
58  _real_function(nullptr),
59 #ifdef ARM_COMPUTE_GRAPH_ENABLED
60  _real_graph_function(nullptr),
61 #endif /* ARM_COMPUTE_GRAPH_ENABLED */
62  _prefix(),
63  _timer_enabled(false)
64 {
65  auto q = CLScheduler::get().queue();
66  cl_command_queue_properties props = q.getInfo<CL_QUEUE_PROPERTIES>();
67  if((props & CL_QUEUE_PROFILING_ENABLE) == 0)
68  {
69  CLScheduler::get().set_queue(cl::CommandQueue(CLScheduler::get().context(), props | CL_QUEUE_PROFILING_ENABLE));
70  }
71 
72  switch(scale_factor)
73  {
74  case ScaleFactor::NONE:
75  _scale_factor = 1.f;
76  _unit = "ns";
77  break;
79  _scale_factor = 1000.f;
80  _unit = "us";
81  break;
83  _scale_factor = 1000000.f;
84  _unit = "ms";
85  break;
87  _scale_factor = 1000000000.f;
88  _unit = "s";
89  break;
90  default:
91  ARM_COMPUTE_ERROR("Invalid scale");
92  }
93 }
94 
95 template <bool output_timestamps>
97 {
98  // Start intercepting enqueues:
99  ARM_COMPUTE_ERROR_ON(_real_function != nullptr);
100  _real_function = CLSymbols::get().clEnqueueNDRangeKernel_ptr;
101  auto interceptor = [this](
102  cl_command_queue command_queue,
103  cl_kernel kernel,
104  cl_uint work_dim,
105  const size_t *gwo,
106  const size_t *gws,
107  const size_t *lws,
108  cl_uint num_events_in_wait_list,
109  const cl_event * event_wait_list,
110  cl_event * event)
111  {
112  if(this->_timer_enabled)
113  {
114  kernel_info info;
115  cl::Kernel cpp_kernel(kernel, true);
116  std::stringstream ss;
117  ss << this->_prefix << cpp_kernel.getInfo<CL_KERNEL_FUNCTION_NAME>();
118  if(gws != nullptr)
119  {
120  ss << " GWS[" << gws[0] << "," << gws[1] << "," << gws[2] << "]";
121  }
122  if(lws != nullptr)
123  {
124  ss << " LWS[" << lws[0] << "," << lws[1] << "," << lws[2] << "]";
125  }
126  info.name = ss.str();
127  cl_event tmp;
128  cl_int retval = this->_real_function(command_queue, kernel, work_dim, gwo, gws, lws, num_events_in_wait_list, event_wait_list, &tmp);
129  info.event = tmp;
130  this->_kernels.push_back(std::move(info));
131 
132  if(event != nullptr)
133  {
134  //return cl_event from the intercepted call
135  clRetainEvent(tmp);
136  *event = tmp;
137  }
138  return retval;
139  }
140  else
141  {
142  return this->_real_function(command_queue, kernel, work_dim, gwo, gws, lws, num_events_in_wait_list, event_wait_list, event);
143  }
144  };
146 
147 #ifdef ARM_COMPUTE_GRAPH_ENABLED
148  ARM_COMPUTE_ERROR_ON(_real_graph_function != nullptr);
149  _real_graph_function = graph::TaskExecutor::get().execute_function;
150  // Start intercepting tasks:
151  auto task_interceptor = [this](graph::ExecutionTask & task)
152  {
153  if(task.node != nullptr && !task.node->name().empty())
154  {
155  this->_prefix = task.node->name() + "/";
156  }
157  else
158  {
159  this->_prefix = "";
160  }
161  this->_real_graph_function(task);
162  this->_prefix = "";
163  };
164  graph::TaskExecutor::get().execute_function = task_interceptor;
165 #endif /* ARM_COMPUTE_GRAPH_ENABLED */
166 }
167 
168 template <bool output_timestamps>
170 {
171  _kernels.clear();
172  _timer_enabled = true;
173 }
174 template <bool output_timestamps>
176 {
177  _timer_enabled = false;
178 }
179 
180 template <bool output_timestamps>
182 {
183  // Restore real function
184  CLSymbols::get().clEnqueueNDRangeKernel_ptr = _real_function;
185  _real_function = nullptr;
186 #ifdef ARM_COMPUTE_GRAPH_ENABLED
187  graph::TaskExecutor::get().execute_function = _real_graph_function;
188  _real_graph_function = nullptr;
189 #endif /* ARM_COMPUTE_GRAPH_ENABLED */
190 }
191 
192 template <bool output_timestamps>
194 {
196  unsigned int kernel_number = 0;
197  for(auto const &kernel : _kernels)
198  {
199  cl_ulong queued;
200  cl_ulong flushed;
201  cl_ulong start;
202  cl_ulong end;
203  kernel.event.getProfilingInfo(CL_PROFILING_COMMAND_QUEUED, &queued);
204  kernel.event.getProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, &flushed);
205  kernel.event.getProfilingInfo(CL_PROFILING_COMMAND_START, &start);
206  kernel.event.getProfilingInfo(CL_PROFILING_COMMAND_END, &end);
207  std::string name = kernel.name + " #" + support::cpp11::to_string(kernel_number++);
208 
209  if(output_timestamps)
210  {
211  measurements.emplace("[start]" + name, Measurement(start / static_cast<cl_ulong>(_scale_factor), _unit));
212  measurements.emplace("[queued]" + name, Measurement(queued / static_cast<cl_ulong>(_scale_factor), _unit));
213  measurements.emplace("[flushed]" + name, Measurement(flushed / static_cast<cl_ulong>(_scale_factor), _unit));
214  measurements.emplace("[end]" + name, Measurement(end / static_cast<cl_ulong>(_scale_factor), _unit));
215  }
216  else
217  {
218  measurements.emplace(name, Measurement((end - start) / _scale_factor, _unit));
219  }
220  }
221 
222  return measurements;
223 }
224 
225 template <bool output_timestamps>
227 {
229 
230  if(output_timestamps)
231  {
232  // The OpenCL clock and the wall clock are not in sync, so we use
233  // this trick to calculate the offset between the two clocks:
234  ::cl::Event event;
235  cl_ulong now_gpu;
236 
237  // Enqueue retrieve current CPU clock and enqueue a dummy marker
238  std::chrono::high_resolution_clock::time_point now_cpu = std::chrono::high_resolution_clock::now();
239  CLScheduler::get().queue().enqueueMarker(&event);
240 
241  CLScheduler::get().queue().finish();
242  //Access the time at which the marker was enqueued:
243  event.getProfilingInfo(CL_PROFILING_COMMAND_QUEUED, &now_gpu);
244 
245  measurements.emplace("Now Wall clock", Measurement(now_cpu.time_since_epoch().count() / 1000, "us"));
246  measurements.emplace("Now OpenCL", Measurement(now_gpu / static_cast<cl_ulong>(_scale_factor), _unit));
247  }
248 
249  return measurements;
250 }
251 
252 } // namespace framework
253 } // namespace test
254 } // namespace arm_compute
255 
void stop() override
Stop measuring.
static CLScheduler & get()
Access the scheduler singleton.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352
std::string to_string(T &&value)
Convert integer and float values to string.
Generic measurement that stores values as either double or long long int.
Definition: Measurement.h:41
std::stringstream ss(mlgo_str)
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
cl_int clRetainEvent(cl_event event)
Definition: OpenCL.cpp:873
OpenCLClock(ScaleFactor scale_factor)
Construct an OpenCL timer.
Definition: OpenCLTimer.cpp:56
MeasurementsMap test_measurements() const override
Return the latest test measurements.
void start() override
Start measuring.
Copyright (c) 2017-2022 Arm Limited.
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
std::function< decltype(clEnqueueNDRangeKernel)> clEnqueueNDRangeKernel_ptr
Definition: OpenCL.h:98
void end(TokenStream &in, bool &valid)
Definition: MLGOParser.cpp:290
const char * name
cl::CommandQueue & queue()
Accessor for the associated CL command queue.
Definition: CLScheduler.cpp:43
static TaskExecutor & get()
Task executor accessor.
Definition: Workload.cpp:75
void set_queue(cl::CommandQueue queue)
Accessor to set the CL command queue to be used by the scheduler.
Definition: CLScheduler.cpp:59
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
void test_start() override
Start of the test.
Definition: OpenCLTimer.cpp:96
Instrument creating measurements based on the information returned by clGetEventProfilingInfo for eac...
Definition: OpenCLTimer.h:45
std::string id() const override
Identifier for the instrument.
Definition: OpenCLTimer.cpp:43
std::map< std::string, Measurement > MeasurementsMap
Map of measurements.
Definition: Instrument.h:109
MeasurementsMap measurements() const override
Return the latest measurements.
void test_stop() override
End of the test.
static CLSymbols & get()
Get the static instance of CLSymbols.
Definition: OpenCL.cpp:45
std::function< decltype(execute_task)> execute_function
Function that is responsible for executing tasks.
Definition: Workload.h:63