Compute Library
 22.08
CLHelpers.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016-2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
27 #include "arm_compute/core/Error.h"
28 #include "arm_compute/core/Log.h"
29 #include "arm_compute/core/Types.h"
31 
33 
34 #include <utility>
35 #include <vector>
36 
37 namespace arm_compute
38 {
40 {
41  switch(dt)
42  {
43  case DataType::U8:
44  case DataType::QASYMM8:
45  return "uchar";
46  case DataType::S8:
48  case DataType::QSYMM8:
50  return "char";
51  case DataType::U16:
52  case DataType::QASYMM16:
53  return "ushort";
54  case DataType::S16:
55  case DataType::QSYMM16:
56  return "short";
57  case DataType::U32:
58  return "uint";
59  case DataType::S32:
60  return "int";
61  case DataType::U64:
62  return "ulong";
63  case DataType::S64:
64  return "long";
65  case DataType::F16:
66  return "half";
67  case DataType::F32:
68  return "float";
69  default:
70  ARM_COMPUTE_ERROR("Unsupported input data type.");
71  return "";
72  }
73 }
74 
76 {
77  switch(dt)
78  {
79  case DataType::U8:
80  case DataType::QASYMM8:
81  return "ushort";
82  case DataType::S8:
84  case DataType::QSYMM8:
86  return "short";
87  case DataType::U16:
88  case DataType::QASYMM16:
89  return "uint";
90  case DataType::S16:
91  case DataType::QSYMM16:
92  return "int";
93  case DataType::U32:
94  return "ulong";
95  case DataType::S32:
96  return "long";
97  case DataType::F16:
98  return "float";
99  default:
100  ARM_COMPUTE_ERROR("Cannot get promoted OpenCL type for the input data type.");
101  return "";
102  }
103 }
104 
105 std::string get_cl_unsigned_type_from_element_size(size_t element_size)
106 {
107  switch(element_size)
108  {
109  case 1:
110  return "uchar";
111  case 2:
112  return "ushort";
113  case 4:
114  return "uint";
115  case 8:
116  return "ulong";
117  default:
118  ARM_COMPUTE_ERROR("Data type not supported");
119  return "";
120  }
121 }
122 
123 std::string get_cl_signed_type_from_element_size(size_t element_size)
124 {
125  switch(element_size)
126  {
127  case 1:
128  return "char";
129  case 2:
130  return "short";
131  case 4:
132  return "int";
133  case 8:
134  return "long";
135  default:
136  ARM_COMPUTE_ERROR("Data type not supported");
137  return "";
138  }
139 }
140 
142 {
143  switch(dt)
144  {
145  case DataType::U8:
146  case DataType::QASYMM8:
147  return "uchar";
148  case DataType::S8:
150  case DataType::QSYMM8:
152  return "char";
153  case DataType::U16:
154  case DataType::QASYMM16:
155  return "ushort";
156  case DataType::F16:
157  case DataType::S16:
158  case DataType::QSYMM16:
159  return "short";
160  case DataType::U32:
161  return "uint";
162  case DataType::F32:
163  case DataType::S32:
164  return "int";
165  case DataType::U64:
166  return "ulong";
167  case DataType::S64:
168  return "long";
169  default:
170  ARM_COMPUTE_ERROR("Unsupported input data type.");
171  return "";
172  }
173 }
174 
176 {
177  switch(dt)
178  {
179  case DataType::U8:
180  case DataType::QASYMM8:
181  return "uint";
182  case DataType::S8:
184  case DataType::QSYMM8:
186  return "int";
187  default:
188  ARM_COMPUTE_ERROR("Unsupported data type.");
189  return "";
190  }
191 }
192 
194 {
195  switch(dt)
196  {
197  case DataType::U8:
198  case DataType::S8:
199  case DataType::QSYMM8:
200  case DataType::QASYMM8:
203  return "8";
204  case DataType::U16:
205  case DataType::S16:
206  case DataType::QSYMM16:
207  case DataType::QASYMM16:
208  case DataType::F16:
209  return "16";
210  case DataType::U32:
211  case DataType::S32:
212  case DataType::F32:
213  return "32";
214  case DataType::U64:
215  case DataType::S64:
216  return "64";
217  default:
218  ARM_COMPUTE_ERROR("Unsupported input data type.");
219  return "0";
220  }
221 }
222 
223 GPUTarget get_target_from_device(const cl::Device &device)
224 {
225  // Query device name size
226  std::string device_name = device.getInfo<CL_DEVICE_NAME>();
227 
228  return get_target_from_name(device_name);
229 }
230 
231 bool arm_non_uniform_workgroup_supported(const cl::Device &device)
232 {
233  return device_supports_extension(device, "cl_arm_non_uniform_work_group_size");
234 }
235 
236 bool fp16_supported(const cl::Device &device)
237 {
238  return device_supports_extension(device, "cl_khr_fp16");
239 }
240 
241 bool dot8_supported(const cl::Device &device)
242 {
243  std::string device_name = device.getInfo<CL_DEVICE_NAME>();
244  const GPUTarget gpu_target = get_target_from_name(device_name);
245 
246  // SW_WORKAROUND: Workaround for DDK revision r14p0.to enable cl_arm_integer_dot_product_int8
247  std::set<GPUTarget> sw_workaround_issue = { GPUTarget::G76 };
248  return (device_supports_extension(device, "cl_arm_integer_dot_product_int8") || sw_workaround_issue.count(gpu_target) != 0);
249 }
250 
251 bool dot8_acc_supported(const cl::Device &device)
252 {
253  return device_supports_extension(device, "cl_arm_integer_dot_product_accumulate_int8");
254 }
255 
256 CLVersion get_cl_version(const cl::Device &device)
257 {
258  std::string version_str = device.getInfo<CL_DEVICE_VERSION>();
259  if(version_str.find("OpenCL 3") != std::string::npos)
260  {
261  return CLVersion::CL30;
262  }
263  else if(version_str.find("OpenCL 2") != std::string::npos)
264  {
265  return CLVersion::CL20;
266  }
267  else if(version_str.find("OpenCL 1.2") != std::string::npos)
268  {
269  return CLVersion::CL12;
270  }
271  else if(version_str.find("OpenCL 1.1") != std::string::npos)
272  {
273  return CLVersion::CL11;
274  }
275  else if(version_str.find("OpenCL 1.0") != std::string::npos)
276  {
277  return CLVersion::CL10;
278  }
279 
280  return CLVersion::UNKNOWN;
281 }
282 
283 bool device_supports_extension(const cl::Device &device, const char *extension_name)
284 {
285  std::string extensions = device.getInfo<CL_DEVICE_EXTENSIONS>();
286  auto pos = extensions.find(extension_name);
287  return (pos != std::string::npos);
288 }
289 
290 bool cl_winograd_convolution_layer_supported(const Size2D &output_tile, const Size2D &kernel_size, DataLayout data_layout)
291 {
293 
294  using WinogradConfiguration = std::pair<std::pair<int, int>, std::pair<int, int>>;
295 
296  std::vector<WinogradConfiguration> winograd_configs_nchw =
297  {
298  WinogradConfiguration(std::pair<int, int>(1, 2), std::pair<int, int>(1, 3)),
299  WinogradConfiguration(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3)),
300  WinogradConfiguration(std::pair<int, int>(2, 1), std::pair<int, int>(3, 1)),
301  WinogradConfiguration(std::pair<int, int>(4, 1), std::pair<int, int>(3, 1)),
302  WinogradConfiguration(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3)),
303  WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3)),
304  WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5)),
305  WinogradConfiguration(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1)),
306  WinogradConfiguration(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5))
307  };
308 
309  std::vector<WinogradConfiguration> winograd_configs_nhwc =
310  {
311  WinogradConfiguration(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3)),
312  WinogradConfiguration(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3)),
313  WinogradConfiguration(std::pair<int, int>(4, 1), std::pair<int, int>(3, 1)),
314  WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3)),
315  WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5)),
316  WinogradConfiguration(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1)),
317  WinogradConfiguration(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5)),
318  WinogradConfiguration(std::pair<int, int>(1, 2), std::pair<int, int>(1, 7)),
319  WinogradConfiguration(std::pair<int, int>(2, 1), std::pair<int, int>(7, 1)),
320  WinogradConfiguration(std::pair<int, int>(2, 2), std::pair<int, int>(7, 7)),
321  };
322 
323  auto p = std::make_pair(std::pair<int, int>(output_tile.width, output_tile.height),
324  std::pair<int, int>(kernel_size.width, kernel_size.height));
325 
326  // Return true if supported
327  if(data_layout == DataLayout::NCHW)
328  {
329  return (std::find(winograd_configs_nchw.begin(), winograd_configs_nchw.end(), p) != winograd_configs_nchw.end());
330  }
331  else
332  {
333  return (std::find(winograd_configs_nhwc.begin(), winograd_configs_nhwc.end(), p) != winograd_configs_nhwc.end());
334  }
335 }
336 
337 size_t preferred_vector_width(const cl::Device &device, const DataType dt)
338 {
339  switch(dt)
340  {
341  case DataType::U8:
342  case DataType::S8:
343  case DataType::QASYMM8:
345  case DataType::QSYMM8:
347  return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR>();
348  case DataType::U16:
349  case DataType::S16:
350  case DataType::QSYMM16:
351  case DataType::QASYMM16:
352  return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT>();
353  case DataType::U32:
354  case DataType::S32:
355  return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT>();
356  case DataType::F16:
357  case DataType::F32:
358  return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT>();
359  case DataType::U64:
360  case DataType::S64:
361  return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG>();
362  default:
363  return 1;
364  }
365 }
366 
367 bool preferred_dummy_work_items_support(const cl::Device &device)
368 {
369  ARM_COMPUTE_UNUSED(device);
370  // TODO (COMPMID-2044)
371  return true;
372 }
373 
374 bool image2d_from_buffer_supported(const cl::Device &device)
375 {
376  return device_supports_extension(device, "cl_khr_image2d_from_buffer");
377 }
378 
379 size_t get_cl_image_pitch_alignment(const cl::Device &device)
380 {
381  cl_uint pixel_aligment = 0;
382 
383  cl_int err = clGetDeviceInfo(device(), CL_DEVICE_IMAGE_PITCH_ALIGNMENT, sizeof(cl_uint), &pixel_aligment, nullptr);
384 
385  if(err == CL_SUCCESS)
386  {
387  return pixel_aligment;
388  }
389  else
390  {
391  return 0;
392  }
393 }
394 
395 bool get_cl_non_uniform_work_group_supported(const cl::Device &device)
396 {
397  cl_bool supported = CL_FALSE;
398 
399  cl_int err = clGetDeviceInfo(device(), CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT, sizeof(cl_bool), &supported, nullptr);
400 
401  return (err == CL_SUCCESS && supported == CL_TRUE);
402 }
403 
404 cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set<std::string> &build_opts)
405 {
407 
408  const std::string program_name = klib.program_name(kernel_name);
409  auto kernel_src = klib.program(program_name);
410  const std::string kernel_path = klib.kernel_path();
411 
412  return static_cast<cl::Kernel>(ctx.create_kernel(kernel_name, program_name, kernel_src.program, kernel_path, build_opts, kernel_src.is_binary));
413 }
414 
415 cl::NDRange create_lws_hint_parallel_implementations(unsigned int input_dimension, unsigned int vector_size)
416 {
417  const unsigned int width_leftover = input_dimension % vector_size;
418  const unsigned int border_width = (width_leftover != 0) ? vector_size - width_leftover : 0;
419  const unsigned int num_of_threads = ((input_dimension + border_width) / 16);
420  return cl::NDRange(std::min(8U, num_of_threads));
421 }
422 
423 bool get_wbsm_support_info(const cl::Device &device)
424 {
425  cl_bitfield capabilities = 0;
426  cl_int err = clGetDeviceInfo(device.get(), CL_DEVICE_SCHEDULING_CONTROLS_CAPABILITIES_ARM, sizeof(cl_bitfield), &capabilities, nullptr);
427  if((err == CL_SUCCESS) && (capabilities & CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_MODIFIER_ARM))
428  {
429  return true;
430  }
431  return false;
432 }
433 
434 void set_wbsm(cl::Kernel &kernel, cl_int wbsm_hint)
435 {
436  cl_int err = clSetKernelExecInfo(kernel.get(),
437  CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_MODIFIER_ARM,
438  sizeof(cl_int),
439  &wbsm_hint);
440  ARM_COMPUTE_UNUSED(err);
441  ARM_COMPUTE_ERROR_ON(err != CL_SUCCESS);
442 }
443 
445 {
446  if(tensor->tensor_shape()[0] % 4)
447  {
448  return false;
449  }
450 
451  // If not floating point
452  if(!is_data_type_float(tensor->data_type()))
453  {
454  return false;
455  }
456 
457  // Check if the cl_khr_image2d_from_buffer extension is supported on the target platform
459  {
460  return false;
461  }
462 
463  // Check cl image pitch alignment
465  {
466  return false;
467  }
468 
469  const size_t image_w = tensor->tensor_shape()[0] / 4;
470  const size_t image_h = tensor->tensor_shape()[1] * tensor->tensor_shape()[2] * tensor->tensor_shape()[3];
471  const size_t max_image_w = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_WIDTH>();
472  const size_t max_image_h = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_HEIGHT>();
473 
474  if(image_w > max_image_w || image_h > max_image_h)
475  {
476  return false;
477  }
478 
479  return true;
480 }
481 
482 void set_unroll_with_pragma(CLBuildOptions &built_opts, std::initializer_list<int> values)
483 {
484  for(const int value : values)
485  {
486  if(value > max_manual_loop_unrolling)
487  {
488  built_opts.add_option("-DUNROLL_WITH_PRAGMA");
489  return;
490  }
491  }
492 }
493 
494 bool arm_matrix_multiply_supported(const cl::Device &device)
495 {
496  return device_supports_extension(device, "cl_arm_matrix_multiply");
497 }
498 } // namespace arm_compute
bool dot8_acc_supported(const cl::Device &device)
Helper function to check whether the cl_arm_integer_dot_product_accumulate_int8 extension is supporte...
Definition: CLHelpers.cpp:251
bool arm_matrix_multiply_supported(const cl::Device &device)
Helper function to check whether the cl_arm_matrix_multiply extension is supported.
Definition: CLHelpers.cpp:494
bool image2d_from_buffer_supported(const cl::Device &device)
Helper function to check whether the cl_khr_image2d_from_buffer extension is supported.
Definition: CLHelpers.cpp:374
bool dot8_supported(const cl::Device &device)
Helper function to check whether the cl_arm_integer_dot_product_int8 extension is supported...
Definition: CLHelpers.cpp:241
quantized, symmetric fixed-point 16-bit number
bool export_weights_to_cl_image(const ITensorInfo *tensor)
Definition: CLHelpers.cpp:444
void set_unroll_with_pragma(CLBuildOptions &built_opts, std::initializer_list< int > values)
Definition: CLHelpers.cpp:482
cl_int clSetKernelExecInfo(cl_kernel kernel, cl_kernel_exec_info param_name, size_t param_value_size, const void *param_value)
Definition: OpenCL.cpp:1054
ClKernelLibrary contains all the OpenCL kernels that are used throughout the library.
bool fp16_supported(const cl::Device &device)
Helper function to check whether the cl_khr_fp16 extension is supported.
Definition: CLHelpers.cpp:236
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352
void set_wbsm(cl::Kernel &kernel, cl_int wbsm_hint)
Definition: CLHelpers.cpp:434
bool preferred_dummy_work_items_support(const cl::Device &device)
Helper function to check if "dummy work-items" are preferred to have a power of two NDRange In case d...
Definition: CLHelpers.cpp:367
std::string get_cl_select_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL select type.
Definition: CLHelpers.cpp:141
1 channel, 1 U8 per channel
std::string get_cl_dot8_acc_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL dot8 accumulator type.
Definition: CLHelpers.cpp:175
CLVersion
Available OpenCL Version.
Definition: CLTypes.h:39
std::string get_cl_signed_type_from_element_size(size_t element_size)
Translates the element size to an signed integer data type.
Definition: CLHelpers.cpp:123
virtual DataType data_type() const =0
Data type used for each element of the tensor.
1 channel, 1 F32 per channel
size_t preferred_vector_width(const cl::Device &device, DataType dt)
Helper function to get the preferred native vector width size for built-in scalar types that can be p...
Definition: CLHelpers.cpp:337
GPUTarget get_target_from_name(const std::string &device_name)
Helper function to get the GPU target from a device name.
Definition: GPUTarget.cpp:186
bool get_wbsm_support_info(const cl::Device &device)
Definition: CLHelpers.cpp:423
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
ClProgramInfo program(const std::string &program_name) const
Gets the source of the selected program.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
std::string program_name(const std::string &kernel_name) const
Returns the program name given a kernel name.
quantized, asymmetric fixed-point 16-bit number
1 channel, 1 U16 per channel
Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context...
Copyright (c) 2017-2022 Arm Limited.
size_t height
Height of the image region or rectangle.
Definition: Size2D.h:91
1 channel, 1 F16 per channel
const std::string & kernel_path() const
Gets the path that the kernels reside in.
1 channel, 1 S32 per channel
void add_option(std::string option)
Adds option to the existing build option list.
signed 64-bit number
cl_int clGetDeviceInfo(cl_device_id device, cl_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: OpenCL.cpp:871
cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set< std::string > &build_opts=std::set< std::string >())
Creates an opencl kernel using a compile context.
Definition: CLHelpers.cpp:404
static ClKernelLibrary & get()
Access the KernelLibrary singleton.
std::string get_data_size_from_data_type(const DataType &dt)
Get the size of a data type in number of bits.
Definition: CLHelpers.cpp:193
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
1 channel, 1 U32 per channel
bool get_cl_non_uniform_work_group_supported(const cl::Device &device)
Helper function to check whether non-uniform work group is supported.
Definition: CLHelpers.cpp:395
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
quantized, asymmetric fixed-point 8-bit number unsigned
std::string get_cl_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL type.
Definition: CLHelpers.cpp:39
GPUTarget get_target_from_device(const cl::Device &device)
Helper function to get the GPU target from CL device.
Definition: CLHelpers.cpp:223
cl::NDRange create_lws_hint_parallel_implementations(unsigned int input_dimension, unsigned int vector_size)
Creates a suitable LWS hint object for parallel implementations.
Definition: CLHelpers.cpp:415
Kernel create_kernel(const std::string &kernel_name, const std::string &program_name, const std::string &program_source, const std::string &kernel_path, const StringSet &build_options_set, bool is_binary) const
Creates an OpenCL kernel.
1 channel, 1 S16 per channel
quantized, symmetric fixed-point 8-bit number
Num samples, channels, height, width.
CLCompileContext class.
size_t get_cl_image_pitch_alignment(const cl::Device &device)
Helper function to get the cl_image pitch alignment in pixels.
Definition: CLHelpers.cpp:379
quantized, symmetric per channel fixed-point 8-bit number
CLVersion get_cl_version(const cl::Device &device)
Helper function to get the highest OpenCL version supported.
Definition: CLHelpers.cpp:256
std::string get_cl_promoted_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL promoted type.
Definition: CLHelpers.cpp:75
bool device_supports_extension(const cl::Device &device, const char *extension_name)
Helper function to check whether a given extension is supported.
Definition: CLHelpers.cpp:283
size_t width
Width of the image region or rectangle.
Definition: Size2D.h:90
GPUTarget
Available GPU Targets.
Definition: GPUTarget.h:34
Class for specifying the size of an image or rectangle.
Definition: Size2D.h:34
std::string get_cl_unsigned_type_from_element_size(size_t element_size)
Translates the element size to an unsigned integer data type.
Definition: CLHelpers.cpp:105
bool arm_non_uniform_workgroup_supported(const cl::Device &device)
Helper function to check whether the arm_non_uniform_work_group_size extension is supported...
Definition: CLHelpers.cpp:231
quantized, asymmetric fixed-point 8-bit number signed
bool cl_winograd_convolution_layer_supported(const Size2D &output_tile, const Size2D &kernel_size, DataLayout data_layout)
This function checks if the Winograd configuration (defined through the output tile, kernel size and the data layout) is supported on OpenCL.
Definition: CLHelpers.cpp:290
unsigned 64-bit number
std::string kernel_name
DataType
Available data types.
Definition: Types.h:79
DataLayout
[DataLayout enum definition]
Definition: Types.h:113
signed 8-bit number
bool is_data_type_float(DataType dt)
Check if a given data type is of floating point type.
Definition: Utils.h:1010
const cl::Device & get_device()
Gets the CL device for which the programs are created.