23.11
|
Go to the documentation of this file.
53 return _gemm_heuristics;
58 _queue = std::move(
queue);
79 _queue.enqueueMarker(&event);
85 if (_cl_tuner !=
nullptr)
93 return _is_initialised;
96 std::once_flag CLScheduler::_initialize_symbols;
102 _is_initialised(false),
104 _gemm_heuristics(nullptr),
106 _job_chaining_enabled(true),
107 _job_chaining_size(1),
108 _job_chaining_count(0)
124 if (!_is_initialised)
126 const std::string cl_kernels_folder(
"./cl_kernels/");
127 cl::CommandQueue
queue = cl::CommandQueue(ctx, device);
129 init(ctx,
queue, device, cl_tuner, gemm_h);
130 _cl_tuner = cl_tuner;
136 if (!_is_initialised)
143 cl::CommandQueue
queue = cl::CommandQueue(ctx, dev);
149 _cl_tuner = cl_tuner;
150 _gemm_heuristics = gemm_h;
155 _is_initialised =
false;
167 cl::CommandQueue queue,
168 const cl::Device &device,
174 _queue = std::move(
queue);
176 _is_initialised =
true;
177 _cl_tuner = cl_tuner;
178 _gemm_heuristics = gemm_h;
179 _backend_type = cl_backend_type;
185 !_is_initialised,
"The CLScheduler is not initialised yet! Please call the CLScheduler::get().default_init(), \
186 or CLScheduler::get()::init() and CLKernelLibrary::get()::init() function before running functions!");
188 const bool inject_memory = !tensors.
empty();
191 if (_cl_tuner !=
nullptr)
197 inject_memory ? kernel.
run_op(tensors, kernel.
window(), _queue) : kernel.
run(kernel.
window(), _queue);
198 if (_job_chaining_enabled)
200 ++_job_chaining_count;
206 void CLScheduler::flush_queue(
bool flush)
208 if (_job_chaining_enabled)
210 if (_job_chaining_count >= _job_chaining_size)
212 _job_chaining_count = 0;
219 if (_job_chaining_size < 16)
221 _job_chaining_size <<= 1;
235 enqueue_common(kernel,
pack, flush);
240 enqueue_common(kernel, tensors, flush);
245 _job_chaining_enabled =
true;
246 _job_chaining_size = job_chaining_size;
CLGEMMHeuristicsHandle * gemm_heuristics() const
Accessor for the associated CLGEMMHeuristicsHandle.
cl::Context & context()
Accessor for the associated CL context.
bool opencl_is_available()
Check if OpenCL is available.
void set_tuner(ICLTuner *tuner)
Accessor to set the CL tuner to be used by the scheduler.
Provides global access to a CL context and command queue.
std::tuple< cl::Context, cl::Device, cl_int > create_opencl_context_and_device(CLBackendType cl_backend_type)
This function creates an OpenCL context and a device.
void enqueue(ICLKernel &kernel, bool flush=true)
Schedule the execution of the passed kernel if possible.
void init(std::string kernel_path, cl::Context context, cl::Device device)
Initialises the kernel library.
CLScheduler()
Constructor.
void sync()
Blocks until all commands in the associated command queue have finished.
virtual void tune_kernel_dynamic(ICLKernel &kernel)=0
Tune OpenCL kernel dynamically.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context.
bool empty() const
Checks if pack is empty.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
void init(cl::Context context, cl::CommandQueue queue, const cl::Device &device, ICLTuner *cl_tuner=nullptr, CLGEMMHeuristicsHandle *gemm_h=nullptr, CLBackendType cl_backend_type=CLBackendType::Native)
Initialises the context and command queue to be used by the scheduler.
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
void set_target(GPUTarget target)
Accessor to set target GPU to be used by the scheduler.
Handle for loading and retrieving GEMM heuristics.
void default_reinit(ICLTuner *cl_tuner=nullptr, CLGEMMHeuristicsHandle *gemm_h=nullptr, CLBackendType cl_backend_type=CLBackendType::Native)
Re-initializes the context and command queue used by the scheduler to default values and sets a defau...
void tune_kernel_static(ICLKernel &kernel)
Tunes OpenCL kernel.
GPUTarget get_target_from_device(const cl::Device &device)
Helper function to get the GPU target from CL device.
Common interface for all the OpenCL kernels.
static CLScheduler & get()
Access the scheduler singleton.
CLBackendType
List the possible OpenCL backends.
void enable_job_chaining(int job_chaining_size)
Enable job chaining.
void set_context(cl::Context context)
Accessor to set the CL context to be used by the scheduler.
GPUTarget target() const
Get the target GPU.
const Window & window() const
The maximum window the kernel can be executed on.
GPUTarget
Available GPU Targets.
virtual void tune_kernel_static(ICLKernel &kernel)=0
Tune OpenCL kernel statically.
void default_init_with_context(cl::Device &device, cl::Context &ctx, ICLTuner *cl_tuner=nullptr, CLGEMMHeuristicsHandle *gemm_h=nullptr)
Initialises the scheduler with context and device provided by the user.
@ Native
OpenCL native backend.
void default_init(ICLTuner *cl_tuner=nullptr, CLGEMMHeuristicsHandle *gemm_h=nullptr, CLBackendType cl_backend_type=CLBackendType::Native)
Initialises the context and command queue used by the scheduler to default values and sets a default ...
void set_context(cl::Context context)
Sets the CL context used to create programs.
Copyright (c) 2017-2023 Arm Limited.
bool is_initialised() const
virtual void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue.
void enqueue_op(ICLKernel &kernel, ITensorPack &tensors, bool flush=true)
Schedule the execution of the passed kernel if possible.
cl::CommandQueue & queue()
Accessor for the associated CL command queue.
Basic interface for tuning the OpenCL kernels.
void set_queue(cl::CommandQueue queue)
Accessor to set the CL command queue to be used by the scheduler.
cl::Context & context()
Accessor for the associated CL context.
cl::Event enqueue_sync_event()
Enqueues a marker into the associated command queue and return the event.
virtual void run(const Window &window, cl::CommandQueue &queue)
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue.