Compute Library
 22.08
CLScheduler.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016-2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_CLSCHEDULER_H
25 #define ARM_COMPUTE_CLSCHEDULER_H
26 
30 #include "arm_compute/core/Error.h"
31 #include "arm_compute/core/Types.h"
37 
38 #if defined(ENABLE_EXPERIMENTAL_DYNAMIC_FUSION)
39 namespace arm_compute
40 {
41 namespace experimental
42 {
43 namespace dynamic_fusion
44 {
45 struct ClExecutionDescriptor;
46 } // namespace dynamic_fusion
47 } // namespace experimental
48 } // namespace arm_compute
49 #endif // defined(ENABLE_EXPERIMENTAL_DYNAMIC_FUSION)
50 
51 namespace arm_compute
52 {
53 class ICLKernel;
54 class ICLTuner;
55 /** Provides global access to a CL context and command queue. */
56 class CLScheduler final
57 {
58 public:
59  /** Constructor */
60  CLScheduler();
61  /** Prevent instances of this class from being copied (As this class contains pointers) */
62  CLScheduler(const CLScheduler &) = delete;
63  /** Prevent instances of this class from being copied (As this class contains pointers) */
64  CLScheduler &operator=(const CLScheduler &) = delete;
65  /** Default destructor */
66  ~CLScheduler() = default;
67  /** Access the scheduler singleton.
68  * This method has been deprecated and will be removed in future releases
69  * @return The scheduler
70  */
71  static CLScheduler &get();
72  /** Initialises the context and command queue used by the scheduler to default values
73  * and sets a default device and kernel path for the @ref CLKernelLibrary.
74  *
75  * @param[in] cl_tuner (Optional) Pointer to ICLTuner (default=nullptr)
76  * @param[in] gemm_h (Optional) Pointer to CLGEMMHeuristicsHandle (default = nullptr)
77  * @param[in] cl_backend_type (Optional) Type of backend to use (default = CLBackendType::Native)
78  */
79  void default_init(ICLTuner *cl_tuner = nullptr, CLGEMMHeuristicsHandle *gemm_h = nullptr, CLBackendType cl_backend_type = CLBackendType::Native);
80  /** Initialises the scheduler with context and device provided by the user
81  *
82  * @param[in] device OpenCL device to be used
83  * @param[in] ctx OpenCL ctx to be used
84  * @param[in] cl_tuner (Optional) Pointer to ICLTuner (default=nullptr)
85  * @param[in] gemm_h (Optional) Pointer to CLGEMMHeuristicsHandle (default = nullptr)
86  */
87  void default_init_with_context(cl::Device &device, cl::Context &ctx, ICLTuner *cl_tuner = nullptr, CLGEMMHeuristicsHandle *gemm_h = nullptr);
88 
89  /** Re-initializes the context and command queue used by the scheduler to default values
90  * and sets a default device and kernel path for the @ref CLKernelLibrary.
91  *
92  * @param[in] cl_tuner (Optional) Pointer to ICLTuner (default=nullptr)
93  * @param[in] gemm_h (Optional) Pointer to CLGEMMHeuristicsHandle (default = nullptr)
94  * @param[in] cl_backend_type (Optional) Type of backend to use (default = CLBackendType::Native)
95  */
96  void default_reinit(ICLTuner *cl_tuner = nullptr, CLGEMMHeuristicsHandle *gemm_h = nullptr, CLBackendType cl_backend_type = CLBackendType::Native);
97 
98  /** Schedule the execution of the passed kernel if possible.
99  *
100  * @param[in] kernel Kernel to execute.
101  * @param[in] flush (Optional) Specifies if the command queue will be flushed after running the kernel. This will be ignored if job chaining is enabled.
102  */
103  void enqueue(ICLKernel &kernel, bool flush = true);
104  /** Schedule the execution of the passed kernel if possible.
105  *
106  * @param[in] kernel Kernel to execute.
107  * @param[in] tensors Vector containing the tensors to operate on.
108  * @param[in] flush (Optional) Specifies if the command queue will be flushed after running the kernel. This will be ignored if job chaining is enabled.
109  */
110  void enqueue_op(ICLKernel &kernel, ITensorPack &tensors, bool flush = true);
111 
112 #if defined(ENABLE_EXPERIMENTAL_DYNAMIC_FUSION)
113 
114  /** Schedule the execution of the passed kernel if possible.
115  *
116  * @param[in] kernel Kernel to execute.
117  * @param[in] tensors Map containing the tensors to operate on.
118  * @param[in] exec_desc Execution descriptor
119  * @param[in] flush (Optional) Specifies if the command queue will be flushed after running the kernel. This will be ignored if job chaining is enabled.
120  */
121  void enqueue_op(ICLKernel &kernel, ITensorPack &tensors, const experimental::dynamic_fusion::ClExecutionDescriptor &exec_desc, bool flush = true);
122 
123 #endif // defined(ENABLE_EXPERIMENTAL_DYNAMIC_FUSION)
124 
125  /** Initialises the context and command queue to be used by the scheduler.
126  *
127  * @param[in] context A CL context.
128  * @param[in] queue A CL command queue.
129  * @param[in] device A CL device.
130  * @param[in] cl_tuner (Optional) Pointer to OpenCL tuner (default=nullptr)
131  * Note: It is caller's responsibility to release the allocated memory for CLTuner
132  * @param[in] gemm_h (Optional) Pointer to CLGEMMHeuristicsHandle (default = nullptr)
133  * @param[in] cl_backend_type (Optional) Type of backend to use (default = CLBackendType::Native)
134  */
135  void init(cl::Context context, cl::CommandQueue queue, const cl::Device &device, ICLTuner *cl_tuner = nullptr, CLGEMMHeuristicsHandle *gemm_h = nullptr,
136  CLBackendType cl_backend_type = CLBackendType::Native);
137 
138  /** Accessor for the associated CL context.
139  *
140  * @return A CL context.
141  */
142  cl::Context &context();
143 
144  /** Accessor for the associated CL command queue.
145  *
146  * @return A CL command queue.
147  */
148  cl::CommandQueue &queue();
149 
150  /** Get the target GPU.
151  *
152  * @return The target GPU.
153  */
154  GPUTarget target() const;
155 
156  /** Accessor for the associated CLGEMMHeuristicsHandle
157  *
158  * @return Pointer to CLGEMMHeuristicsHandle
159  */
160  CLGEMMHeuristicsHandle *gemm_heuristics() const;
161 
162  /** Accessor to set the CL context to be used by the scheduler.
163  *
164  * @param[in] context A CL context.
165  */
166  void set_context(cl::Context context);
167 
168  /** Accessor to set the CL command queue to be used by the scheduler.
169  *
170  * @param[in] queue A CL command queue.
171  */
172  void set_queue(cl::CommandQueue queue);
173 
174  /** Accessor to set target GPU to be used by the scheduler.
175  *
176  * @param[in] target The target GPU.
177  */
178  void set_target(GPUTarget target);
179 
180  /** Accessor to set the CL tuner to be used by the scheduler.
181  *
182  * @param[in] tuner A CL tuner
183  */
184  void set_tuner(ICLTuner *tuner);
185 
186  /** Blocks until all commands in the associated command queue have finished. */
187  void sync();
188 
189  /** Enqueues a marker into the associated command queue and return the event.
190  *
191  * @return An event that can be waited on to block the executing thread.
192  */
193  cl::Event enqueue_sync_event();
194 
195  /** Tunes OpenCL kernel
196  *
197  * @param[in] kernel Kernel to tune
198  */
199  void tune_kernel_static(ICLKernel &kernel);
200 
201  /** Enable job chaining. The command queue will only be flushed when @p job_chaining_size kernels have been enqueued.
202  *
203  * @param[in] job_chaining_size Kernels to enqueue before flushing
204  */
205  void enable_job_chaining(int job_chaining_size);
206 
207  bool is_initialised() const;
208 
209 private:
210  void enqueue_common(ICLKernel &kernel, ITensorPack &tensors, bool flush);
211  /** If job chain is disabled, then flush the command queue according to @p flush. Otherwise @p flush is ignored and the queue is only flushed when job chain count exceeds allocated job chain size
212  *
213  * @param[in] flush Flush the command queue. Ignored when job chain is enabled.
214  */
215  void flush_queue(bool flush);
216 
217 #if defined(ENABLE_EXPERIMENTAL_DYNAMIC_FUSION)
218  void enqueue_common(ICLKernel &kernel, ITensorPack &tensors, const experimental::dynamic_fusion::ClExecutionDescriptor &exec_desc, bool flush);
219 #endif // defined(ENABLE_EXPERIMENTAL_DYNAMIC_FUSION)
220 
221  /** Flag to ensure symbols initialisation is happening before Scheduler creation */
222  static std::once_flag _initialize_symbols;
223 
224  cl::Context _context;
225  cl::CommandQueue _queue;
226  GPUTarget _target;
227  bool _is_initialised;
228  ICLTuner *_cl_tuner;
229  CLGEMMHeuristicsHandle *_gemm_heuristics;
230  CLBackendType _backend_type;
231  bool _job_chaining_enabled;
232  int _job_chaining_size;
233  int _job_chaining_count;
234 };
235 } // namespace arm_compute
236 #endif /* ARM_COMPUTE_CLSCHEDULER_H */
void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint=CLKernelLibrary::get().default_ndrange(), bool use_dummy_work_items=false)
Add the kernel to the command queue with the given window.
Definition: ICLKernel.cpp:32
OpenCL native backend.
Common interface for all the OpenCL kernels.
Definition: ICLKernel.h:81
Copyright (c) 2017-2022 Arm Limited.
Descriptor containing information required to run a single ClWorkload.
Definition: ClWorkload.h:91
Interface used to tune the local work-group size of OpenCL kernels.
Handle for loading and retrieving GEMM heuristics.
CLBackendType
List the possible OpenCL backends.
Definition: CLTypes.h:55
GPUTarget
Available GPU Targets.
Definition: GPUTarget.h:34
Provides global access to a CL context and command queue.
Definition: CLScheduler.h:56
Wrapper to configure the Khronos OpenCL C++ header.
Tensor packing service.
Definition: ITensorPack.h:39
Basic interface for tuning the OpenCL kernels.
Definition: ICLTuner.h:43