25 auto interceptor = [
this]( cl_command_queue command_queue,
31 cl_uint num_events_in_wait_list,
32 const cl_event * event_wait_list,
39 cl::Kernel retainedKernel(kernel,
true);
41 ss << retainedKernel.getInfo<CL_KERNEL_FUNCTION_NAME>();
46 ss <<
" GWS[" << gws[0] <<
"," << gws[1] <<
"," << gws[2] <<
"]";
50 ss <<
" LWS[" << lws[0] <<
"," << lws[1] <<
"," << lws[2] <<
"]";
56 retVal = m_OriginalEnqueueFunction( command_queue,
62 num_events_in_wait_list,
67 m_Kernels.emplace_back(ss.str(), customEvent);
72 clRetainEvent(customEvent);
79 m_OriginalEnqueueFunction = CLSymbols::get().clEnqueueNDRangeKernel_ptr;
80 CLSymbols::get().clEnqueueNDRangeKernel_ptr = interceptor;
85 CLSymbols::get().clEnqueueNDRangeKernel_ptr = m_OriginalEnqueueFunction;
90 return m_Kernels.size() > 0;
95 std::vector<Measurement> measurements;
97 cl_command_queue_properties clQueueProperties = CLScheduler::get().queue().getInfo<CL_QUEUE_PROPERTIES>();
100 for (
auto& kernel : m_Kernels)
102 std::string name = std::string(this->
GetName()) +
"/" + std::to_string(idx++) +
": " + kernel.m_Name;
105 if((clQueueProperties & CL_QUEUE_PROFILING_ENABLE) != 0)
108 kernel.m_Event.wait();
110 cl_ulong start = kernel.m_Event.getProfilingInfo<CL_PROFILING_COMMAND_START>();
111 cl_ulong end = kernel.m_Event.getProfilingInfo<CL_PROFILING_COMMAND_END>();
112 timeUs =
static_cast<double>(end - start) / 1000.0;
115 measurements.emplace_back(name, timeUs, Measurement::Unit::TIME_US);