26 #include "../Framework.h"
32 #ifndef ARM_COMPUTE_CL
33 #error "You can't use OpenCLTimer without OpenCL"
42 template <
bool output_timestamps>
47 return "OpenCLTimestamps";
55 template <
bool output_timestamps>
58 _real_function(nullptr),
59 #ifdef ARM_COMPUTE_GRAPH_ENABLED
60 _real_graph_function(nullptr),
66 cl_command_queue_properties props = q.getInfo<CL_QUEUE_PROPERTIES>();
67 if((props & CL_QUEUE_PROFILING_ENABLE) == 0)
79 _scale_factor = 1000.f;
83 _scale_factor = 1000000.f;
87 _scale_factor = 1000000000.f;
95 template <
bool output_timestamps>
101 auto interceptor = [
this](
102 cl_command_queue command_queue,
108 cl_uint num_events_in_wait_list,
109 const cl_event * event_wait_list,
112 if(this->_timer_enabled)
115 cl::Kernel cpp_kernel(kernel,
true);
116 std::stringstream
ss;
117 ss << this->_prefix << cpp_kernel.getInfo<CL_KERNEL_FUNCTION_NAME>();
120 ss <<
" GWS[" << gws[0] <<
"," << gws[1] <<
"," << gws[2] <<
"]";
124 ss <<
" LWS[" << lws[0] <<
"," << lws[1] <<
"," << lws[2] <<
"]";
128 cl_int
retval = this->_real_function(command_queue, kernel, work_dim, gwo, gws, lws, num_events_in_wait_list, event_wait_list, &tmp);
130 this->_kernels.push_back(std::move(
info));
142 return this->_real_function(command_queue, kernel, work_dim, gwo, gws, lws, num_events_in_wait_list, event_wait_list, event);
147 #ifdef ARM_COMPUTE_GRAPH_ENABLED
153 if(task.node !=
nullptr && !task.node->name().empty())
155 this->_prefix = task.node->name() +
"/";
161 this->_real_graph_function(task);
168 template <
bool output_timestamps>
172 _timer_enabled =
true;
174 template <
bool output_timestamps>
177 _timer_enabled =
false;
180 template <
bool output_timestamps>
185 _real_function =
nullptr;
186 #ifdef ARM_COMPUTE_GRAPH_ENABLED
188 _real_graph_function =
nullptr;
192 template <
bool output_timestamps>
196 unsigned int kernel_number = 0;
197 for(
auto const &kernel : _kernels)
203 kernel.event.getProfilingInfo(CL_PROFILING_COMMAND_QUEUED, &queued);
204 kernel.event.getProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, &flushed);
205 kernel.event.getProfilingInfo(CL_PROFILING_COMMAND_START, &start);
206 kernel.event.getProfilingInfo(CL_PROFILING_COMMAND_END, &
end);
209 if(output_timestamps)
211 measurements.emplace(
"[start]" +
name,
Measurement(start /
static_cast<cl_ulong
>(_scale_factor), _unit));
212 measurements.emplace(
"[queued]" +
name,
Measurement(queued /
static_cast<cl_ulong
>(_scale_factor), _unit));
213 measurements.emplace(
"[flushed]" +
name,
Measurement(flushed /
static_cast<cl_ulong
>(_scale_factor), _unit));
214 measurements.emplace(
"[end]" +
name,
Measurement(
end /
static_cast<cl_ulong
>(_scale_factor), _unit));
225 template <
bool output_timestamps>
230 if(output_timestamps)
238 std::chrono::high_resolution_clock::time_point now_cpu = std::chrono::high_resolution_clock::now();
243 event.getProfilingInfo(CL_PROFILING_COMMAND_QUEUED, &now_gpu);
245 measurements.emplace(
"Now Wall clock",
Measurement(now_cpu.time_since_epoch().count() / 1000,
"us"));
246 measurements.emplace(
"Now OpenCL",
Measurement(now_gpu /
static_cast<cl_ulong
>(_scale_factor), _unit));