Compute Library
 21.08
CLHelpers.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
27 #include "arm_compute/core/Error.h"
28 #include "arm_compute/core/Log.h"
29 #include "arm_compute/core/Types.h"
31 
33 
34 #include <utility>
35 #include <vector>
36 
37 namespace arm_compute
38 {
40 {
41  switch(dt)
42  {
43  case DataType::U8:
44  case DataType::QASYMM8:
45  return "uchar";
46  case DataType::S8:
48  case DataType::QSYMM8:
50  return "char";
51  case DataType::U16:
52  case DataType::QASYMM16:
53  return "ushort";
54  case DataType::S16:
55  case DataType::QSYMM16:
56  return "short";
57  case DataType::U32:
58  return "uint";
59  case DataType::S32:
60  return "int";
61  case DataType::U64:
62  return "ulong";
63  case DataType::S64:
64  return "long";
65  case DataType::F16:
66  return "half";
67  case DataType::F32:
68  return "float";
69  default:
70  ARM_COMPUTE_ERROR("Unsupported input data type.");
71  return "";
72  }
73 }
74 
76 {
77  switch(dt)
78  {
79  case DataType::U8:
80  case DataType::QASYMM8:
81  return "ushort";
82  case DataType::S8:
84  case DataType::QSYMM8:
86  return "short";
87  case DataType::U16:
88  case DataType::QASYMM16:
89  return "uint";
90  case DataType::S16:
91  case DataType::QSYMM16:
92  return "int";
93  case DataType::U32:
94  return "ulong";
95  case DataType::S32:
96  return "long";
97  case DataType::F16:
98  return "float";
99  default:
100  ARM_COMPUTE_ERROR("Cannot get promoted OpenCL type for the input data type.");
101  return "";
102  }
103 }
104 
105 std::string get_cl_unsigned_type_from_element_size(size_t element_size)
106 {
107  switch(element_size)
108  {
109  case 1:
110  return "uchar";
111  case 2:
112  return "ushort";
113  case 4:
114  return "uint";
115  case 8:
116  return "ulong";
117  default:
118  ARM_COMPUTE_ERROR("Data type not supported");
119  return "";
120  }
121 }
122 
123 std::string get_cl_signed_type_from_element_size(size_t element_size)
124 {
125  switch(element_size)
126  {
127  case 1:
128  return "char";
129  case 2:
130  return "short";
131  case 4:
132  return "int";
133  case 8:
134  return "long";
135  default:
136  ARM_COMPUTE_ERROR("Data type not supported");
137  return "";
138  }
139 }
140 
142 {
143  switch(dt)
144  {
145  case DataType::U8:
146  case DataType::QASYMM8:
147  return "uchar";
148  case DataType::S8:
150  case DataType::QSYMM8:
152  return "char";
153  case DataType::U16:
154  case DataType::QASYMM16:
155  return "ushort";
156  case DataType::F16:
157  case DataType::S16:
158  case DataType::QSYMM16:
159  return "short";
160  case DataType::U32:
161  return "uint";
162  case DataType::F32:
163  case DataType::S32:
164  return "int";
165  case DataType::U64:
166  return "ulong";
167  case DataType::S64:
168  return "long";
169  default:
170  ARM_COMPUTE_ERROR("Unsupported input data type.");
171  return "";
172  }
173 }
174 
176 {
177  switch(dt)
178  {
179  case DataType::U8:
180  case DataType::QASYMM8:
181  return "uint";
182  case DataType::S8:
184  case DataType::QSYMM8:
186  return "int";
187  default:
188  ARM_COMPUTE_ERROR("Unsupported data type.");
189  return "";
190  }
191 }
192 
194 {
195  switch(dt)
196  {
197  case DataType::U8:
198  case DataType::S8:
199  case DataType::QSYMM8:
200  case DataType::QASYMM8:
203  return "8";
204  case DataType::U16:
205  case DataType::S16:
206  case DataType::QSYMM16:
207  case DataType::QASYMM16:
208  case DataType::F16:
209  return "16";
210  case DataType::U32:
211  case DataType::S32:
212  case DataType::F32:
213  return "32";
214  case DataType::U64:
215  case DataType::S64:
216  return "64";
217  default:
218  ARM_COMPUTE_ERROR("Unsupported input data type.");
219  return "0";
220  }
221 }
222 
223 GPUTarget get_target_from_device(const cl::Device &device)
224 {
225  // Query device name size
226  std::string device_name = device.getInfo<CL_DEVICE_NAME>();
227 
228  return get_target_from_name(device_name);
229 }
230 
231 bool arm_non_uniform_workgroup_supported(const cl::Device &device)
232 {
233  return device_supports_extension(device, "cl_arm_non_uniform_work_group_size");
234 }
235 
236 bool fp16_supported(const cl::Device &device)
237 {
238  return device_supports_extension(device, "cl_khr_fp16");
239 }
240 
241 bool dot8_supported(const cl::Device &device)
242 {
243  std::string device_name = device.getInfo<CL_DEVICE_NAME>();
244  const GPUTarget gpu_target = get_target_from_name(device_name);
245 
246  // SW_WORKAROUND: Workaround for DDK revision r14p0.to enable cl_arm_integer_dot_product_int8
247  std::set<GPUTarget> sw_workaround_issue = { GPUTarget::G76 };
248  return (device_supports_extension(device, "cl_arm_integer_dot_product_int8") || sw_workaround_issue.count(gpu_target) != 0);
249 }
250 
251 bool dot8_acc_supported(const cl::Device &device)
252 {
253  return device_supports_extension(device, "cl_arm_integer_dot_product_accumulate_int8");
254 }
255 
256 CLVersion get_cl_version(const cl::Device &device)
257 {
258  std::string version_str = device.getInfo<CL_DEVICE_VERSION>();
259  if(version_str.find("OpenCL 2") != std::string::npos)
260  {
261  return CLVersion::CL20;
262  }
263  else if(version_str.find("OpenCL 1.2") != std::string::npos)
264  {
265  return CLVersion::CL12;
266  }
267  else if(version_str.find("OpenCL 1.1") != std::string::npos)
268  {
269  return CLVersion::CL11;
270  }
271  else if(version_str.find("OpenCL 1.0") != std::string::npos)
272  {
273  return CLVersion::CL10;
274  }
275 
276  return CLVersion::UNKNOWN;
277 }
278 
279 bool device_supports_extension(const cl::Device &device, const char *extension_name)
280 {
281  std::string extensions = device.getInfo<CL_DEVICE_EXTENSIONS>();
282  auto pos = extensions.find(extension_name);
283  return (pos != std::string::npos);
284 }
285 
286 bool cl_winograd_convolution_layer_supported(const Size2D &output_tile, const Size2D &kernel_size, DataLayout data_layout)
287 {
289 
290  using WinogradConfiguration = std::pair<std::pair<int, int>, std::pair<int, int>>;
291 
292  std::vector<WinogradConfiguration> winograd_configs_nchw =
293  {
294  WinogradConfiguration(std::pair<int, int>(1, 2), std::pair<int, int>(1, 3)),
295  WinogradConfiguration(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3)),
296  WinogradConfiguration(std::pair<int, int>(2, 1), std::pair<int, int>(3, 1)),
297  WinogradConfiguration(std::pair<int, int>(4, 1), std::pair<int, int>(3, 1)),
298  WinogradConfiguration(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3)),
299  WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3)),
300  WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5)),
301  WinogradConfiguration(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1)),
302  WinogradConfiguration(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5))
303  };
304 
305  std::vector<WinogradConfiguration> winograd_configs_nhwc =
306  {
307  WinogradConfiguration(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3)),
308  WinogradConfiguration(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3)),
309  WinogradConfiguration(std::pair<int, int>(4, 1), std::pair<int, int>(3, 1)),
310  WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3)),
311  WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5)),
312  WinogradConfiguration(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1)),
313  WinogradConfiguration(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5)),
314  WinogradConfiguration(std::pair<int, int>(1, 2), std::pair<int, int>(1, 7)),
315  WinogradConfiguration(std::pair<int, int>(2, 1), std::pair<int, int>(7, 1)),
316  WinogradConfiguration(std::pair<int, int>(2, 2), std::pair<int, int>(7, 7)),
317  };
318 
319  auto p = std::make_pair(std::pair<int, int>(output_tile.width, output_tile.height),
320  std::pair<int, int>(kernel_size.width, kernel_size.height));
321 
322  // Return true if supported
323  if(data_layout == DataLayout::NCHW)
324  {
325  return (std::find(winograd_configs_nchw.begin(), winograd_configs_nchw.end(), p) != winograd_configs_nchw.end());
326  }
327  else
328  {
329  return (std::find(winograd_configs_nhwc.begin(), winograd_configs_nhwc.end(), p) != winograd_configs_nhwc.end());
330  }
331 }
332 
333 size_t preferred_vector_width(const cl::Device &device, const DataType dt)
334 {
335  switch(dt)
336  {
337  case DataType::U8:
338  case DataType::S8:
339  case DataType::QASYMM8:
341  case DataType::QSYMM8:
343  return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR>();
344  case DataType::U16:
345  case DataType::S16:
346  case DataType::QSYMM16:
347  case DataType::QASYMM16:
348  return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT>();
349  case DataType::U32:
350  case DataType::S32:
351  return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT>();
352  case DataType::F16:
353  case DataType::F32:
354  return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT>();
355  case DataType::U64:
356  case DataType::S64:
357  return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG>();
358  default:
359  return 1;
360  }
361 }
362 
363 bool preferred_dummy_work_items_support(const cl::Device &device)
364 {
365  ARM_COMPUTE_UNUSED(device);
366  // TODO (COMPMID-2044)
367  return true;
368 }
369 
370 bool image2d_from_buffer_supported(const cl::Device &device)
371 {
372  return device_supports_extension(device, "cl_khr_image2d_from_buffer");
373 }
374 
375 size_t get_cl_image_pitch_alignment(const cl::Device &device)
376 {
377  cl_uint pixel_aligment = 0;
378 
379  cl_int err = clGetDeviceInfo(device(), CL_DEVICE_IMAGE_PITCH_ALIGNMENT, sizeof(cl_uint), &pixel_aligment, nullptr);
380 
381  if(err == CL_SUCCESS)
382  {
383  return pixel_aligment;
384  }
385  else
386  {
387  return 0;
388  }
389 }
390 
391 cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set<std::string> &build_opts)
392 {
394 
395  const std::string program_name = klib.program_name(kernel_name);
396  auto kernel_src = klib.program(program_name);
397  const std::string kernel_path = klib.kernel_path();
398 
399  return static_cast<cl::Kernel>(ctx.create_kernel(kernel_name, program_name, kernel_src.program, kernel_path, build_opts, kernel_src.is_binary));
400 }
401 
402 cl::NDRange create_lws_hint_parallel_implementations(unsigned int input_dimension, unsigned int vector_size)
403 {
404  const unsigned int width_leftover = input_dimension % vector_size;
405  const unsigned int border_width = (width_leftover != 0) ? vector_size - width_leftover : 0;
406  const unsigned int num_of_threads = ((input_dimension + border_width) / 16);
407  return cl::NDRange(std::min(8U, num_of_threads));
408 }
409 
410 bool get_wbsm_support_info(const cl::Device &device)
411 {
412  cl_bitfield capabilities = 0;
413  cl_int err = clGetDeviceInfo(device.get(), ARM_COMPUTE_LIBRARY_OPENCL_DEVICE_CAPABILITIES_ARM, sizeof(cl_bitfield), &capabilities, nullptr);
414  if((err == CL_SUCCESS) && (capabilities & ARM_COMPUTE_LIBRARY_OPENCL_EXEC_WBSM_ARM))
415  {
416  return true;
417  }
418  return false;
419 }
420 
421 void set_wbsm(cl::Kernel &kernel, cl_int wbsm_hint)
422 {
423  cl_int err = clSetKernelExecInfo(kernel.get(),
425  sizeof(cl_int),
426  &wbsm_hint);
427  ARM_COMPUTE_UNUSED(err);
428  ARM_COMPUTE_ERROR_ON(err != CL_SUCCESS);
429 }
430 
432 {
433  if(tensor->tensor_shape()[0] % 4)
434  {
435  return false;
436  }
437 
438  // If not floating point
439  if(!is_data_type_float(tensor->data_type()))
440  {
441  return false;
442  }
443 
444  // Check if the cl_khr_image2d_from_buffer extension is supported on the target platform
446  {
447  return false;
448  }
449 
450  // Check cl image pitch alignment
452  {
453  return false;
454  }
455 
456  const size_t image_w = tensor->tensor_shape()[0] / 4;
457  const size_t image_h = tensor->tensor_shape()[1] * tensor->tensor_shape()[2] * tensor->tensor_shape()[3];
458  const size_t max_image_w = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_WIDTH>();
459  const size_t max_image_h = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_HEIGHT>();
460 
461  if(image_w > max_image_w || image_h > max_image_h)
462  {
463  return false;
464  }
465 
466  return true;
467 }
468 
469 void set_unroll_with_pragma(CLBuildOptions &built_opts, std::initializer_list<int> values)
470 {
471  for(const int value : values)
472  {
473  if(value > max_manual_loop_unrolling)
474  {
475  built_opts.add_option("-DUNROLL_WITH_PRAGMA");
476  return;
477  }
478  }
479 }
480 
481 } // namespace arm_compute
bool dot8_acc_supported(const cl::Device &device)
Helper function to check whether the cl_arm_integer_dot_product_accumulate_int8 extension is supporte...
Definition: CLHelpers.cpp:251
bool image2d_from_buffer_supported(const cl::Device &device)
Helper function to check whether the cl_khr_image2d_from_buffer extension is supported.
Definition: CLHelpers.cpp:370
bool dot8_supported(const cl::Device &device)
Helper function to check whether the cl_arm_integer_dot_product_int8 extension is supported...
Definition: CLHelpers.cpp:241
quantized, symmetric fixed-point 16-bit number
bool export_weights_to_cl_image(const ITensorInfo *tensor)
Definition: CLHelpers.cpp:431
void set_unroll_with_pragma(CLBuildOptions &built_opts, std::initializer_list< int > values)
Definition: CLHelpers.cpp:469
cl_int clSetKernelExecInfo(cl_kernel kernel, cl_kernel_exec_info param_name, size_t param_value_size, const void *param_value)
Definition: OpenCL.cpp:1020
ClKernelLibrary contains all the OpenCL kernels that are used throughout the library.
bool fp16_supported(const cl::Device &device)
Helper function to check whether the cl_khr_fp16 extension is supported.
Definition: CLHelpers.cpp:236
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352
void set_wbsm(cl::Kernel &kernel, cl_int wbsm_hint)
Definition: CLHelpers.cpp:421
bool preferred_dummy_work_items_support(const cl::Device &device)
Helper function to check if "dummy work-items" are preferred to have a power of two NDRange In case d...
Definition: CLHelpers.cpp:363
std::string get_cl_select_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL select type.
Definition: CLHelpers.cpp:141
1 channel, 1 U8 per channel
std::string get_cl_dot8_acc_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL dot8 accumulator type.
Definition: CLHelpers.cpp:175
CLVersion
Available OpenCL Version.
Definition: CLTypes.h:39
std::string get_cl_signed_type_from_element_size(size_t element_size)
Translates the element size to an signed integer data type.
Definition: CLHelpers.cpp:123
virtual DataType data_type() const =0
Data type used for each element of the tensor.
1 channel, 1 F32 per channel
size_t preferred_vector_width(const cl::Device &device, DataType dt)
Helper function to get the preferred native vector width size for built-in scalar types that can be p...
Definition: CLHelpers.cpp:333
GPUTarget get_target_from_name(const std::string &device_name)
Helper function to get the GPU target from a device name.
Definition: GPUTarget.cpp:145
bool get_wbsm_support_info(const cl::Device &device)
Definition: CLHelpers.cpp:410
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
const DataLayout data_layout
Definition: Im2Col.cpp:151
ClProgramInfo program(const std::string &program_name) const
Gets the source of the selected program.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
std::string program_name(const std::string &kernel_name) const
Returns the program name given a kernel name.
quantized, asymmetric fixed-point 16-bit number
1 channel, 1 U16 per channel
Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context...
Copyright (c) 2017-2021 Arm Limited.
size_t height
Height of the image region or rectangle.
Definition: Size2D.h:90
1 channel, 1 F16 per channel
#define ARM_COMPUTE_LIBRARY_OPENCL_DEVICE_CAPABILITIES_ARM
Definition: CLHelpers.h:35
const std::string & kernel_path() const
Gets the path that the kernels reside in.
DataType dt
1 channel, 1 S32 per channel
void add_option(std::string option)
Adds option to the existing build option list.
signed 64-bit number
cl_int clGetDeviceInfo(cl_device_id device, cl_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: OpenCL.cpp:837
#define ARM_COMPUTE_LIBRARY_OPENCL_EXEC_WBSM_ARM
Definition: CLHelpers.h:37
cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set< std::string > &build_opts=std::set< std::string >())
Creates an opencl kernel using a compile context.
Definition: CLHelpers.cpp:391
static ClKernelLibrary & get()
Access the KernelLibrary singleton.
std::string get_data_size_from_data_type(const DataType &dt)
Get the size of a data type in number of bits.
Definition: CLHelpers.cpp:193
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
1 channel, 1 U32 per channel
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
quantized, asymmetric fixed-point 8-bit number unsigned
std::string get_cl_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL type.
Definition: CLHelpers.cpp:39
GPUTarget get_target_from_device(const cl::Device &device)
Helper function to get the GPU target from CL device.
Definition: CLHelpers.cpp:223
cl::NDRange create_lws_hint_parallel_implementations(unsigned int input_dimension, unsigned int vector_size)
Creates a suitable LWS hint object for parallel implementations.
Definition: CLHelpers.cpp:402
Kernel create_kernel(const std::string &kernel_name, const std::string &program_name, const std::string &program_source, const std::string &kernel_path, const StringSet &build_options_set, bool is_binary) const
Creates an OpenCL kernel.
1 channel, 1 S16 per channel
quantized, symmetric fixed-point 8-bit number
Num samples, channels, height, width.
CLCompileContext class.
size_t get_cl_image_pitch_alignment(const cl::Device &device)
Helper function to get the cl_image pitch alignment in pixels.
Definition: CLHelpers.cpp:375
quantized, symmetric per channel fixed-point 8-bit number
CLVersion get_cl_version(const cl::Device &device)
Helper function to get the highest OpenCL version supported.
Definition: CLHelpers.cpp:256
std::string get_cl_promoted_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL promoted type.
Definition: CLHelpers.cpp:75
bool device_supports_extension(const cl::Device &device, const char *extension_name)
Helper function to check whether a given extension is supported.
Definition: CLHelpers.cpp:279
size_t width
Width of the image region or rectangle.
Definition: Size2D.h:89
GPUTarget
Available GPU Targets.
Definition: GPUTarget.h:34
Class for specifying the size of an image or rectangle.
Definition: Size2D.h:34
std::string get_cl_unsigned_type_from_element_size(size_t element_size)
Translates the element size to an unsigned integer data type.
Definition: CLHelpers.cpp:105
bool arm_non_uniform_workgroup_supported(const cl::Device &device)
Helper function to check whether the arm_non_uniform_work_group_size extension is supported...
Definition: CLHelpers.cpp:231
quantized, asymmetric fixed-point 8-bit number signed
bool cl_winograd_convolution_layer_supported(const Size2D &output_tile, const Size2D &kernel_size, DataLayout data_layout)
This function checks if the Winograd configuration (defined through the output tile, kernel size and the data layout) is supported on OpenCL.
Definition: CLHelpers.cpp:286
unsigned 64-bit number
std::string kernel_name
DataType
Available data types.
Definition: Types.h:77
DataLayout
[DataLayout enum definition]
Definition: Types.h:111
signed 8-bit number
bool is_data_type_float(DataType dt)
Check if a given data type is of floating point type.
Definition: Utils.h:961
const cl::Device & get_device()
Gets the CL device for which the programs are created.