26 #include "../Framework.h" 32 #ifndef ARM_COMPUTE_CL 33 #error "You can't use OpenCLTimer without OpenCL" 42 template <
bool output_timestamps>
47 return "OpenCLTimestamps";
55 template <
bool output_timestamps>
57 : _kernels(), _real_function(nullptr), _real_graph_function(nullptr), _prefix(), _timer_enabled(false)
60 cl_command_queue_properties props = q.getInfo<CL_QUEUE_PROPERTIES>();
61 if((props & CL_QUEUE_PROFILING_ENABLE) == 0)
73 _scale_factor = 1000.f;
77 _scale_factor = 1000000.f;
81 _scale_factor = 1000000000.f;
89 template <
bool output_timestamps>
97 auto interceptor = [
this](
98 cl_command_queue command_queue,
104 cl_uint num_events_in_wait_list,
105 const cl_event * event_wait_list,
108 if(this->_timer_enabled)
111 cl::Kernel cpp_kernel(kernel,
true);
112 std::stringstream
ss;
113 ss << this->_prefix << cpp_kernel.getInfo<CL_KERNEL_FUNCTION_NAME>();
116 ss <<
" GWS[" << gws[0] <<
"," << gws[1] <<
"," << gws[2] <<
"]";
120 ss <<
" LWS[" << lws[0] <<
"," << lws[1] <<
"," << lws[2] <<
"]";
122 info.name = ss.str();
124 cl_int retval = this->_real_function(command_queue, kernel, work_dim, gwo, gws, lws, num_events_in_wait_list, event_wait_list, &tmp);
126 this->_kernels.push_back(std::move(info));
138 return this->_real_function(command_queue, kernel, work_dim, gwo, gws, lws, num_events_in_wait_list, event_wait_list, event);
145 if(task.node !=
nullptr && !task.node->name().empty())
147 this->_prefix = task.node->name() +
"/";
153 this->_real_graph_function(task);
161 template <
bool output_timestamps>
165 _timer_enabled =
true;
167 template <
bool output_timestamps>
170 _timer_enabled =
false;
173 template <
bool output_timestamps>
179 _real_graph_function =
nullptr;
180 _real_function =
nullptr;
183 template <
bool output_timestamps>
187 unsigned int kernel_number = 0;
188 for(
auto const &kernel : _kernels)
194 kernel.event.getProfilingInfo(CL_PROFILING_COMMAND_QUEUED, &queued);
195 kernel.event.getProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, &flushed);
196 kernel.event.getProfilingInfo(CL_PROFILING_COMMAND_START, &start);
197 kernel.event.getProfilingInfo(CL_PROFILING_COMMAND_END, &end);
200 if(output_timestamps)
202 measurements.emplace(
"[start]" + name,
Measurement(start / static_cast<cl_ulong>(_scale_factor), _unit));
203 measurements.emplace(
"[queued]" + name,
Measurement(queued / static_cast<cl_ulong>(_scale_factor), _unit));
204 measurements.emplace(
"[flushed]" + name,
Measurement(flushed / static_cast<cl_ulong>(_scale_factor), _unit));
205 measurements.emplace(
"[end]" + name,
Measurement(end / static_cast<cl_ulong>(_scale_factor), _unit));
209 measurements.emplace(name,
Measurement((end - start) / _scale_factor, _unit));
216 template <
bool output_timestamps>
221 if(output_timestamps)
229 std::chrono::high_resolution_clock::time_point now_cpu = std::chrono::high_resolution_clock::now();
234 event.getProfilingInfo(CL_PROFILING_COMMAND_QUEUED, &now_gpu);
236 measurements.emplace(
"Now Wall clock",
Measurement(now_cpu.time_since_epoch().count() / 1000,
"us"));
237 measurements.emplace(
"Now OpenCL",
Measurement(now_gpu / static_cast<cl_ulong>(_scale_factor), _unit));
void stop() override
Stop measuring.
static CLScheduler & get()
Access the scheduler singleton.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
std::string to_string(T &&value)
Convert integer and float values to string.
Generic measurement that stores values as either double or long long int.
std::stringstream ss(mlgo_str)
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
cl_int clRetainEvent(cl_event event)
OpenCLClock(ScaleFactor scale_factor)
Construct an OpenCL timer.
MeasurementsMap test_measurements() const override
Return the latest test measurements.
void start() override
Start measuring.
Copyright (c) 2017-2021 Arm Limited.
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
std::function< decltype(clEnqueueNDRangeKernel)> clEnqueueNDRangeKernel_ptr
void end(TokenStream &in, bool &valid)
cl::CommandQueue & queue()
Accessor for the associated CL command queue.
static TaskExecutor & get()
Task executor accessor.
void set_queue(cl::CommandQueue queue)
Accessor to set the CL command queue to be used by the scheduler.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
void test_start() override
Start of the test.
Instrument creating measurements based on the information returned by clGetEventProfilingInfo for eac...
std::string id() const override
Identifier for the instrument.
std::map< std::string, Measurement > MeasurementsMap
Map of measurements.
MeasurementsMap measurements() const override
Return the latest measurements.
void test_stop() override
End of the test.
static CLSymbols & get()
Get the static instance of CLSymbols.
std::function< decltype(execute_task)> execute_function
Function that is responsible for executing tasks.