Compute Library
 23.08
CLHelpers.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016-2023 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
28 #include "arm_compute/core/Error.h"
29 #include "arm_compute/core/Log.h"
30 #include "arm_compute/core/Types.h"
32 
34 
35 #include <utility>
36 #include <vector>
37 
38 namespace arm_compute
39 {
41 {
42  switch(dt)
43  {
44  case DataType::U8:
45  case DataType::QASYMM8:
46  return "uchar";
47  case DataType::S8:
49  case DataType::QSYMM8:
51  return "char";
52  case DataType::U16:
53  case DataType::QASYMM16:
54  return "ushort";
55  case DataType::S16:
56  case DataType::QSYMM16:
57  return "short";
58  case DataType::U32:
59  return "uint";
60  case DataType::S32:
61  return "int";
62  case DataType::U64:
63  return "ulong";
64  case DataType::S64:
65  return "long";
66  case DataType::F16:
67  return "half";
68  case DataType::F32:
69  return "float";
70  default:
71  ARM_COMPUTE_ERROR("Unsupported input data type.");
72  return "";
73  }
74 }
75 
77 {
78  switch(dt)
79  {
80  case DataType::U8:
81  case DataType::QASYMM8:
82  return "ushort";
83  case DataType::S8:
85  case DataType::QSYMM8:
87  return "short";
88  case DataType::U16:
89  case DataType::QASYMM16:
90  return "uint";
91  case DataType::S16:
92  case DataType::QSYMM16:
93  return "int";
94  case DataType::U32:
95  return "ulong";
96  case DataType::S32:
97  return "long";
98  case DataType::F16:
99  return "float";
100  default:
101  ARM_COMPUTE_ERROR("Cannot get promoted OpenCL type for the input data type.");
102  return "";
103  }
104 }
105 
106 std::string get_cl_unsigned_type_from_element_size(size_t element_size)
107 {
108  switch(element_size)
109  {
110  case 1:
111  return "uchar";
112  case 2:
113  return "ushort";
114  case 4:
115  return "uint";
116  case 8:
117  return "ulong";
118  default:
119  ARM_COMPUTE_ERROR("Data type not supported");
120  return "";
121  }
122 }
123 
124 std::string get_cl_signed_type_from_element_size(size_t element_size)
125 {
126  switch(element_size)
127  {
128  case 1:
129  return "char";
130  case 2:
131  return "short";
132  case 4:
133  return "int";
134  case 8:
135  return "long";
136  default:
137  ARM_COMPUTE_ERROR("Data type not supported");
138  return "";
139  }
140 }
141 
143 {
144  switch(dt)
145  {
146  case DataType::U8:
147  case DataType::QASYMM8:
148  case DataType::S8:
150  case DataType::QSYMM8:
152  return "char";
153  case DataType::U16:
154  case DataType::QASYMM16:
155  return "ushort";
156  case DataType::F16:
157  case DataType::S16:
158  case DataType::QSYMM16:
159  return "short";
160  case DataType::U32:
161  return "uint";
162  case DataType::F32:
163  case DataType::S32:
164  return "int";
165  case DataType::U64:
166  return "ulong";
167  case DataType::S64:
168  return "long";
169  default:
170  ARM_COMPUTE_ERROR("Unsupported input data type.");
171  return "";
172  }
173 }
174 
176 {
177  switch(dt)
178  {
179  case DataType::U8:
180  case DataType::QASYMM8:
181  return "uint";
182  case DataType::S8:
184  case DataType::QSYMM8:
186  return "int";
187  default:
188  ARM_COMPUTE_ERROR("Unsupported data type.");
189  return "";
190  }
191 }
192 
194 {
195  switch(dt)
196  {
197  case DataType::U8:
198  case DataType::S8:
199  case DataType::QSYMM8:
200  case DataType::QASYMM8:
203  return "8";
204  case DataType::U16:
205  case DataType::S16:
206  case DataType::QSYMM16:
207  case DataType::QASYMM16:
208  case DataType::F16:
209  return "16";
210  case DataType::U32:
211  case DataType::S32:
212  case DataType::F32:
213  return "32";
214  case DataType::U64:
215  case DataType::S64:
216  return "64";
217  default:
218  ARM_COMPUTE_ERROR("Unsupported input data type.");
219  return "0";
220  }
221 }
222 
223 GPUTarget get_target_from_device(const cl::Device &device)
224 {
225  // Query device name size
226  std::string device_name = device.getInfo<CL_DEVICE_NAME>();
227 
228  return get_target_from_name(device_name);
229 }
230 
231 bool arm_non_uniform_workgroup_supported(const cl::Device &device)
232 {
233  return device_supports_extension(device, "cl_arm_non_uniform_work_group_size");
234 }
235 
236 bool fp16_supported(const cl::Device &device)
237 {
238  return device_supports_extension(device, "cl_khr_fp16");
239 }
240 
241 bool dot8_supported(const cl::Device &device)
242 {
243  std::string device_name = device.getInfo<CL_DEVICE_NAME>();
244  const GPUTarget gpu_target = get_target_from_name(device_name);
245 
246  // SW_WORKAROUND: Workaround for DDK revision r14p0.to enable cl_arm_integer_dot_product_int8
247  std::set<GPUTarget> sw_workaround_issue = { GPUTarget::G76 };
248  return (device_supports_extension(device, "cl_arm_integer_dot_product_int8") || sw_workaround_issue.count(gpu_target) != 0);
249 }
250 
251 bool dot8_acc_supported(const cl::Device &device)
252 {
253  return device_supports_extension(device, "cl_arm_integer_dot_product_accumulate_int8");
254 }
255 
256 CLVersion get_cl_version(const cl::Device &device)
257 {
258  std::string version_str = device.getInfo<CL_DEVICE_VERSION>();
259  if(version_str.find("OpenCL 3") != std::string::npos)
260  {
261  return CLVersion::CL30;
262  }
263  else if(version_str.find("OpenCL 2") != std::string::npos)
264  {
265  return CLVersion::CL20;
266  }
267  else if(version_str.find("OpenCL 1.2") != std::string::npos)
268  {
269  return CLVersion::CL12;
270  }
271  else if(version_str.find("OpenCL 1.1") != std::string::npos)
272  {
273  return CLVersion::CL11;
274  }
275  else if(version_str.find("OpenCL 1.0") != std::string::npos)
276  {
277  return CLVersion::CL10;
278  }
279 
280  return CLVersion::UNKNOWN;
281 }
282 
283 bool device_supports_extension(const cl::Device &device, const char *extension_name)
284 {
285  std::string extensions = device.getInfo<CL_DEVICE_EXTENSIONS>();
286  auto pos = extensions.find(extension_name);
287  return (pos != std::string::npos);
288 }
289 
290 bool cl_winograd_convolution_layer_supported(const Size2D &output_tile, const Size2D &kernel_size, DataLayout data_layout)
291 {
293 
294  using WinogradConfiguration = std::pair<std::pair<int, int>, std::pair<int, int>>;
295 
296  std::vector<WinogradConfiguration> winograd_configs_nchw =
297  {
298  WinogradConfiguration(std::pair<int, int>(1, 2), std::pair<int, int>(1, 3)),
299  WinogradConfiguration(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3)),
300  WinogradConfiguration(std::pair<int, int>(2, 1), std::pair<int, int>(3, 1)),
301  WinogradConfiguration(std::pair<int, int>(4, 1), std::pair<int, int>(3, 1)),
302  WinogradConfiguration(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3)),
303  WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3)),
304  WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5)),
305  WinogradConfiguration(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1)),
306  WinogradConfiguration(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5))
307  };
308 
309  std::vector<WinogradConfiguration> winograd_configs_nhwc =
310  {
311  WinogradConfiguration(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3)),
312  WinogradConfiguration(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3)),
313  WinogradConfiguration(std::pair<int, int>(4, 1), std::pair<int, int>(3, 1)),
314  WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3)),
315  WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5)),
316  WinogradConfiguration(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1)),
317  WinogradConfiguration(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5)),
318  WinogradConfiguration(std::pair<int, int>(1, 2), std::pair<int, int>(1, 7)),
319  WinogradConfiguration(std::pair<int, int>(2, 1), std::pair<int, int>(7, 1)),
320  WinogradConfiguration(std::pair<int, int>(2, 2), std::pair<int, int>(7, 7)),
321  };
322 
323  auto p = std::make_pair(std::pair<int, int>(output_tile.width, output_tile.height),
324  std::pair<int, int>(kernel_size.width, kernel_size.height));
325 
326  // Return true if supported
328  {
329  return (std::find(winograd_configs_nchw.begin(), winograd_configs_nchw.end(), p) != winograd_configs_nchw.end());
330  }
331  else
332  {
333  return (std::find(winograd_configs_nhwc.begin(), winograd_configs_nhwc.end(), p) != winograd_configs_nhwc.end());
334  }
335 }
336 
337 size_t preferred_vector_width(const cl::Device &device, const DataType dt)
338 {
339  switch(dt)
340  {
341  case DataType::U8:
342  case DataType::S8:
343  case DataType::QASYMM8:
345  case DataType::QSYMM8:
347  return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR>();
348  case DataType::U16:
349  case DataType::S16:
350  case DataType::QSYMM16:
351  case DataType::QASYMM16:
352  return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT>();
353  case DataType::U32:
354  case DataType::S32:
355  return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT>();
356  case DataType::F16:
357  case DataType::F32:
358  return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT>();
359  case DataType::U64:
360  case DataType::S64:
361  return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG>();
362  default:
363  return 1;
364  }
365 }
366 
367 bool preferred_dummy_work_items_support(const cl::Device &device)
368 {
369  ARM_COMPUTE_UNUSED(device);
370  // TODO (COMPMID-2044)
371  return true;
372 }
373 
374 bool image2d_from_buffer_supported(const cl::Device &device)
375 {
376  return device_supports_extension(device, "cl_khr_image2d_from_buffer");
377 }
378 
379 size_t get_cl_image_pitch_alignment(const cl::Device &device)
380 {
381  cl_uint pixel_aligment = 0;
382 
383  cl_int err = clGetDeviceInfo(device(), CL_DEVICE_IMAGE_PITCH_ALIGNMENT, sizeof(cl_uint), &pixel_aligment, nullptr);
384 
385  if(err == CL_SUCCESS)
386  {
387  return pixel_aligment;
388  }
389  else
390  {
391  return 0;
392  }
393 }
394 
395 bool get_cl_non_uniform_work_group_supported(const cl::Device &device)
396 {
397  cl_bool supported = CL_FALSE;
398 
399  cl_int err = clGetDeviceInfo(device(), CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT, sizeof(cl_bool), &supported, nullptr);
400 
401  return (err == CL_SUCCESS && supported == CL_TRUE);
402 }
403 
404 cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set<std::string> &build_opts)
405 {
407 
408  const std::string program_name = klib.program_name(kernel_name);
409  auto kernel_src = klib.program(program_name);
410  const std::string kernel_path = klib.kernel_path();
411 
412  return static_cast<cl::Kernel>(ctx.create_kernel(kernel_name, program_name, kernel_src.program, kernel_path, build_opts, kernel_src.is_binary));
413 }
414 
415 cl::NDRange create_lws_hint_parallel_implementations(unsigned int input_dimension, unsigned int vector_size)
416 {
417  const unsigned int width_leftover = input_dimension % vector_size;
418  const unsigned int border_width = (width_leftover != 0) ? vector_size - width_leftover : 0;
419  const unsigned int num_of_threads = ((input_dimension + border_width) / 16);
420  return cl::NDRange(std::min(8U, num_of_threads));
421 }
422 
423 bool get_wbsm_support_info(const cl::Device &device)
424 {
425  cl_bitfield capabilities = 0;
426  cl_int err = clGetDeviceInfo(device.get(), CL_DEVICE_SCHEDULING_CONTROLS_CAPABILITIES_ARM, sizeof(cl_bitfield), &capabilities, nullptr);
427  if((err == CL_SUCCESS) && (capabilities & CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_MODIFIER_ARM))
428  {
429  return true;
430  }
431  return false;
432 }
433 
434 void set_wbsm(cl::Kernel &kernel, cl_int wbsm_hint)
435 {
436  cl_int err = clSetKernelExecInfo(kernel.get(),
437  CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_MODIFIER_ARM,
438  sizeof(cl_int),
439  &wbsm_hint);
440  ARM_COMPUTE_UNUSED(err);
441  ARM_COMPUTE_ERROR_ON(err != CL_SUCCESS);
442 }
443 
445 {
446  if(tensor->tensor_shape()[0] % 4 != 0)
447  {
448  return false;
449  }
450 
451  // If not floating point
452  if(!is_data_type_float(tensor->data_type()))
453  {
454  return false;
455  }
456 
457  // Check if the cl_khr_image2d_from_buffer extension is supported on the target platform
459  {
460  return false;
461  }
462 
463  // Check cl image pitch alignment
464  if(get_cl_image_pitch_alignment(CLKernelLibrary::get().get_device()) == 0)
465  {
466  return false;
467  }
468 
469  const size_t image_w = tensor->tensor_shape()[0] / 4;
470  const size_t image_h = tensor->tensor_shape().total_size() / tensor->tensor_shape()[0];
471  const size_t max_image_w = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_WIDTH>();
472  const size_t max_image_h = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_HEIGHT>();
473 
474  if(image_w > max_image_w || image_h > max_image_h)
475  {
476  return false;
477  }
478 
479  return true;
480 }
481 
482 void set_unroll_with_pragma(CLBuildOptions &built_opts, std::initializer_list<int> values)
483 {
484  for(const int value : values)
485  {
486  if(value > max_manual_loop_unrolling)
487  {
488  built_opts.add_option("-DUNROLL_WITH_PRAGMA");
489  return;
490  }
491  }
492 }
493 
494 bool arm_matrix_multiply_supported(const cl::Device &device)
495 {
496  return device_supports_extension(device, "cl_arm_matrix_multiply");
497 }
498 } // namespace arm_compute
arm_compute::DataLayout::NCHW
@ NCHW
Num samples, channels, height, width.
arm_compute::DataType::QSYMM8_PER_CHANNEL
@ QSYMM8_PER_CHANNEL
quantized, symmetric per channel fixed-point 8-bit number
arm_compute::DataType::QASYMM16
@ QASYMM16
quantized, asymmetric fixed-point 16-bit number
DataTypeUtils.h
arm_compute::dot8_supported
bool dot8_supported(const cl::Device &device)
Helper function to check whether the cl_arm_integer_dot_product_int8 extension is supported.
Definition: CLHelpers.cpp:241
arm_compute::DataType::U64
@ U64
unsigned 64-bit number
arm_compute::preferred_vector_width
size_t preferred_vector_width(const cl::Device &device, DataType dt)
Helper function to get the preferred native vector width size for built-in scalar types that can be p...
Definition: CLHelpers.cpp:337
arm_compute::CLVersion::CL10
@ CL10
arm_compute::export_to_cl_image
bool export_to_cl_image(const ITensorInfo *tensor)
Definition: CLHelpers.cpp:444
arm_compute::opencl::ClKernelLibrary
ClKernelLibrary contains all the OpenCL kernels that are used throughout the library.
Definition: ClKernelLibrary.h:41
CLTypes.h
arm_compute::fp16_supported
bool fp16_supported(const cl::Device &device)
Helper function to check whether the cl_khr_fp16 extension is supported.
Definition: CLHelpers.cpp:236
arm_compute::DataLayout
DataLayout
[DataLayout enum definition]
Definition: CoreTypes.h:109
arm_compute::preferred_dummy_work_items_support
bool preferred_dummy_work_items_support(const cl::Device &device)
Helper function to check if "dummy work-items" are preferred to have a power of two NDRange In case d...
Definition: CLHelpers.cpp:367
arm_compute::get_cl_dot8_acc_type_from_data_type
std::string get_cl_dot8_acc_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL dot8 accumulator type.
Definition: CLHelpers.cpp:175
arm_compute::get_cl_signed_type_from_element_size
std::string get_cl_signed_type_from_element_size(size_t element_size)
Translates the element size to an signed integer data type.
Definition: CLHelpers.cpp:124
arm_compute::DataType::QASYMM8
@ QASYMM8
quantized, asymmetric fixed-point 8-bit number unsigned
arm_compute::DataType::U16
@ U16
unsigned 16-bit number
arm_compute::CLVersion::CL30
@ CL30
arm_compute::CLVersion::CL11
@ CL11
arm_compute::opencl::ClKernelLibrary::program
ClProgramInfo program(const std::string &program_name) const
Gets the source of the selected program.
Definition: ClKernelLibrary.cpp:1041
arm_compute::DataType::QSYMM8
@ QSYMM8
quantized, symmetric fixed-point 8-bit number
Types.h
arm_compute::arm_matrix_multiply_supported
bool arm_matrix_multiply_supported(const cl::Device &device)
Helper function to check whether the cl_arm_matrix_multiply extension is supported.
Definition: CLHelpers.cpp:494
ARM_COMPUTE_ERROR
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:353
arm_compute::Size2D
Class for specifying the size of an image or rectangle.
Definition: Size2D.h:34
arm_compute::set_wbsm
void set_wbsm(cl::Kernel &kernel, cl_int wbsm_hint)
Definition: CLHelpers.cpp:434
arm_compute::image2d_from_buffer_supported
bool image2d_from_buffer_supported(const cl::Device &device)
Helper function to check whether the cl_khr_image2d_from_buffer extension is supported.
Definition: CLHelpers.cpp:374
arm_compute::Size2D::height
size_t height
Height of the image region or rectangle.
Definition: Size2D.h:91
arm_compute::get_cl_select_type_from_data_type
std::string get_cl_select_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL select type.
Definition: CLHelpers.cpp:142
arm_compute::get_wbsm_support_info
bool get_wbsm_support_info(const cl::Device &device)
Definition: CLHelpers.cpp:423
arm_compute::CLVersion
CLVersion
Available OpenCL Version.
Definition: CLTypes.h:39
arm_compute::get_target_from_name
GPUTarget get_target_from_name(const std::string &device_name)
Helper function to get the GPU target from a device name.
Definition: GPUTarget.cpp:186
Error.h
arm_compute::DataType::S8
@ S8
signed 8-bit number
arm_compute::CLKernelLibrary::get
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Definition: CLKernelLibrary.cpp:39
CLKernelLibrary.h
Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context.
arm_compute::DataType::QSYMM16
@ QSYMM16
quantized, symmetric fixed-point 16-bit number
arm_compute::test::validation::data_layout
const auto data_layout
Definition: ConvolutionLayer.cpp:406
arm_compute::opencl::ClKernelLibrary::program_name
std::string program_name(const std::string &kernel_name) const
Returns the program name given a kernel name.
Definition: ClKernelLibrary.cpp:1015
arm_compute::utils::cast::U
U
Definition: SaturateCast.h:64
ClKernelLibrary.h
Log.h
ARM_COMPUTE_ERROR_ON
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:467
arm_compute::get_data_size_from_data_type
std::string get_data_size_from_data_type(const DataType &dt)
Get the size of a data type in number of bits.
Definition: CLHelpers.cpp:193
arm_compute::DataType::U32
@ U32
unsigned 32-bit number
arm_compute::Size2D::width
size_t width
Width of the image region or rectangle.
Definition: Size2D.h:90
arm_compute::CLBuildOptions::add_option
void add_option(std::string option)
Adds option to the existing build option list.
Definition: CLCompileContext.cpp:41
arm_compute::CLCompileContext
CLCompileContext class.
Definition: CLCompileContext.h:204
ClCompileContext.h
arm_compute::DataType::U8
@ U8
unsigned 8-bit number
arm_compute::DataType::S16
@ S16
signed 16-bit number
arm_compute::create_kernel
cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set< std::string > &build_opts=std::set< std::string >())
Creates an opencl kernel using a compile context.
Definition: CLHelpers.cpp:404
arm_compute::DataType::QASYMM8_SIGNED
@ QASYMM8_SIGNED
quantized, asymmetric fixed-point 8-bit number signed
arm_compute::get_cl_promoted_type_from_data_type
std::string get_cl_promoted_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL promoted type.
Definition: CLHelpers.cpp:76
arm_compute::get_cl_non_uniform_work_group_supported
bool get_cl_non_uniform_work_group_supported(const cl::Device &device)
Helper function to check whether non-uniform work group is supported.
Definition: CLHelpers.cpp:395
arm_compute::opencl::ClKernelLibrary::get
static ClKernelLibrary & get()
Access the KernelLibrary singleton.
Definition: ClKernelLibrary.cpp:1009
arm_compute::CLCompileContext::create_kernel
Kernel create_kernel(const std::string &kernel_name, const std::string &program_name, const std::string &program_source, const std::string &kernel_path, const StringSet &build_options_set, bool is_binary) const
Creates an OpenCL kernel.
Definition: CLCompileContext.cpp:159
ARM_COMPUTE_UNUSED
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
dt
DataType dt
Definition: NEBatchNormalizationLayerKernel.cpp:51
arm_compute::get_target_from_device
GPUTarget get_target_from_device(const cl::Device &device)
Helper function to get the GPU target from CL device.
Definition: CLHelpers.cpp:223
tensor
CLTensor * tensor
Pointer to the auxiliary tensor.
Definition: ClWorkloadRuntime.cpp:66
arm_compute::create_lws_hint_parallel_implementations
cl::NDRange create_lws_hint_parallel_implementations(unsigned int input_dimension, unsigned int vector_size)
Creates a suitable LWS hint object for parallel implementations.
Definition: CLHelpers.cpp:415
arm_compute::opencl::ClKernelLibrary::kernel_path
const std::string & kernel_path() const
Gets the path that the kernels reside in.
Definition: ClKernelLibrary.cpp:1036
arm_compute::get_cl_type_from_data_type
std::string get_cl_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL type.
Definition: CLHelpers.cpp:40
arm_compute::GPUTarget
GPUTarget
Available GPU Targets.
Definition: GPUTarget.h:34
arm_compute::GPUTarget::G76
@ G76
arm_compute::get_cl_unsigned_type_from_element_size
std::string get_cl_unsigned_type_from_element_size(size_t element_size)
Translates the element size to an unsigned integer data type.
Definition: CLHelpers.cpp:106
arm_compute::CLVersion::CL20
@ CL20
arm_compute::get_cl_version
CLVersion get_cl_version(const cl::Device &device)
Helper function to get the highest OpenCL version supported.
Definition: CLHelpers.cpp:256
arm_compute::DataType::S64
@ S64
signed 64-bit number
arm_compute::device_supports_extension
bool device_supports_extension(const cl::Device &device, const char *extension_name)
Helper function to check whether a given extension is supported.
Definition: CLHelpers.cpp:283
arm_compute
Copyright (c) 2017-2023 Arm Limited.
Definition: introduction.dox:24
arm_compute::DataType::F16
@ F16
16-bit floating-point number
arm_compute::UNKNOWN
@ UNKNOWN
Unknown CL kernel type.
Definition: CLTypes.h:82
arm_compute::arm_non_uniform_workgroup_supported
bool arm_non_uniform_workgroup_supported(const cl::Device &device)
Helper function to check whether the arm_non_uniform_work_group_size extension is supported.
Definition: CLHelpers.cpp:231
arm_compute::is_data_type_float
bool is_data_type_float(DataType dt)
Check if a given data type is of floating point type.
Definition: DataTypeUtils.h:304
arm_compute::get_cl_image_pitch_alignment
size_t get_cl_image_pitch_alignment(const cl::Device &device)
Helper function to get the cl_image pitch alignment in pixels.
Definition: CLHelpers.cpp:379
arm_compute::DataType::S32
@ S32
signed 32-bit number
arm_compute::cl_winograd_convolution_layer_supported
bool cl_winograd_convolution_layer_supported(const Size2D &output_tile, const Size2D &kernel_size, DataLayout data_layout)
This function checks if the Winograd configuration (defined through the output tile,...
Definition: CLHelpers.cpp:290
arm_compute::DataLayout::UNKNOWN
@ UNKNOWN
Unknown data layout.
arm_compute::ITensorInfo
Store the tensor's metadata.
Definition: ITensorInfo.h:43
clSetKernelExecInfo
cl_int clSetKernelExecInfo(cl_kernel kernel, cl_kernel_exec_info param_name, size_t param_value_size, const void *param_value)
Definition: OpenCL.cpp:1069
arm_compute::DataType::F32
@ F32
32-bit floating-point number
arm_compute::CLVersion::CL12
@ CL12
arm_compute::CLBuildOptions
Build options.
Definition: CLCompileContext.h:38
arm_compute::DataType
DataType
Available data types.
Definition: CoreTypes.h:82
clGetDeviceInfo
cl_int clGetDeviceInfo(cl_device_id device, cl_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: OpenCL.cpp:886
arm_compute::set_unroll_with_pragma
void set_unroll_with_pragma(CLBuildOptions &built_opts, std::initializer_list< int > values)
Definition: CLHelpers.cpp:482
CLHelpers.h
arm_compute::dot8_acc_supported
bool dot8_acc_supported(const cl::Device &device)
Helper function to check whether the cl_arm_integer_dot_product_accumulate_int8 extension is supporte...
Definition: CLHelpers.cpp:251
kernel_name
std::string kernel_name
Definition: ClIm2ColKernel.cpp:57
arm_compute::CLKernelLibrary::get_device
const cl::Device & get_device()
Gets the CL device for which the programs are created.
Definition: CLKernelLibrary.cpp:69