Compute Library
 20.08
CLHelpers.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
28 #include "arm_compute/core/Error.h"
29 #include "arm_compute/core/Log.h"
30 #include "arm_compute/core/Types.h"
31 
32 #include <utility>
33 #include <vector>
34 
35 namespace arm_compute
36 {
37 std::string get_cl_type_from_data_type(const DataType &dt)
38 {
39  switch(dt)
40  {
41  case DataType::U8:
42  case DataType::QASYMM8:
43  return "uchar";
44  case DataType::S8:
46  case DataType::QSYMM8:
48  return "char";
49  case DataType::U16:
50  case DataType::QASYMM16:
51  return "ushort";
52  case DataType::S16:
53  case DataType::QSYMM16:
54  return "short";
55  case DataType::U32:
56  return "uint";
57  case DataType::S32:
58  return "int";
59  case DataType::U64:
60  return "ulong";
61  case DataType::S64:
62  return "long";
63  case DataType::F16:
64  return "half";
65  case DataType::F32:
66  return "float";
67  default:
68  ARM_COMPUTE_ERROR("Unsupported input data type.");
69  return "";
70  }
71 }
72 
74 {
75  switch(dt)
76  {
77  case DataType::U8:
78  case DataType::QASYMM8:
79  return "ushort";
80  case DataType::S8:
82  case DataType::QSYMM8:
84  return "short";
85  case DataType::U16:
86  case DataType::QASYMM16:
87  return "uint";
88  case DataType::S16:
89  case DataType::QSYMM16:
90  return "int";
91  case DataType::U32:
92  return "ulong";
93  case DataType::S32:
94  return "long";
95  case DataType::F16:
96  return "float";
97  default:
98  ARM_COMPUTE_ERROR("Cannot get promoted OpenCL type for the input data type.");
99  return "";
100  }
101 }
102 
103 std::string get_cl_unsigned_type_from_element_size(size_t element_size)
104 {
105  switch(element_size)
106  {
107  case 1:
108  return "uchar";
109  case 2:
110  return "ushort";
111  case 4:
112  return "uint";
113  case 8:
114  return "ulong";
115  default:
116  ARM_COMPUTE_ERROR("Data type not supported");
117  return "";
118  }
119 }
120 
121 std::string get_cl_signed_type_from_element_size(size_t element_size)
122 {
123  switch(element_size)
124  {
125  case 1:
126  return "char";
127  case 2:
128  return "short";
129  case 4:
130  return "int";
131  case 8:
132  return "long";
133  default:
134  ARM_COMPUTE_ERROR("Data type not supported");
135  return "";
136  }
137 }
138 
140 {
141  switch(dt)
142  {
143  case DataType::U8:
144  case DataType::QASYMM8:
145  return "uchar";
146  case DataType::S8:
148  case DataType::QSYMM8:
150  return "char";
151  case DataType::U16:
152  case DataType::QASYMM16:
153  return "ushort";
154  case DataType::F16:
155  case DataType::S16:
156  case DataType::QSYMM16:
157  return "short";
158  case DataType::U32:
159  return "uint";
160  case DataType::F32:
161  case DataType::S32:
162  return "int";
163  case DataType::U64:
164  return "ulong";
165  case DataType::S64:
166  return "long";
167  default:
168  ARM_COMPUTE_ERROR("Unsupported input data type.");
169  return "";
170  }
171 }
172 
174 {
175  switch(dt)
176  {
177  case DataType::U8:
178  case DataType::QASYMM8:
179  return "uint";
180  case DataType::S8:
182  case DataType::QSYMM8:
184  return "int";
185  default:
186  ARM_COMPUTE_ERROR("Unsupported data type.");
187  return "";
188  }
189 }
190 
192 {
193  switch(dt)
194  {
195  case DataType::U8:
196  case DataType::S8:
197  case DataType::QSYMM8:
198  case DataType::QASYMM8:
201  return "8";
202  case DataType::U16:
203  case DataType::S16:
204  case DataType::QSYMM16:
205  case DataType::QASYMM16:
206  case DataType::F16:
207  return "16";
208  case DataType::U32:
209  case DataType::S32:
210  case DataType::F32:
211  return "32";
212  case DataType::U64:
213  case DataType::S64:
214  return "64";
215  default:
216  ARM_COMPUTE_ERROR("Unsupported input data type.");
217  return "0";
218  }
219 }
220 
221 GPUTarget get_target_from_device(const cl::Device &device)
222 {
223  // Query device name size
224  std::string device_name = device.getInfo<CL_DEVICE_NAME>();
225 
226  return get_target_from_name(device_name);
227 }
228 
229 bool arm_non_uniform_workgroup_supported(const cl::Device &device)
230 {
231  return device_supports_extension(device, "cl_arm_non_uniform_work_group_size");
232 }
233 
234 bool fp16_supported(const cl::Device &device)
235 {
236  return device_supports_extension(device, "cl_khr_fp16");
237 }
238 
239 bool dot8_supported(const cl::Device &device)
240 {
241  std::string device_name = device.getInfo<CL_DEVICE_NAME>();
242  const GPUTarget gpu_target = get_target_from_name(device_name);
243 
244  // SW_WORKAROUND: Workaround for DDK revision r14p0.to enable cl_arm_integer_dot_product_int8
245  std::set<GPUTarget> sw_workaround_issue = { GPUTarget::G76 };
246  return (device_supports_extension(device, "cl_arm_integer_dot_product_int8") || sw_workaround_issue.count(gpu_target) != 0);
247 }
248 
249 bool dot8_acc_supported(const cl::Device &device)
250 {
251  return device_supports_extension(device, "cl_arm_integer_dot_product_accumulate_int8");
252 }
253 
254 CLVersion get_cl_version(const cl::Device &device)
255 {
256  std::string version_str = device.getInfo<CL_DEVICE_VERSION>();
257  if(version_str.find("OpenCL 2") != std::string::npos)
258  {
259  return CLVersion::CL20;
260  }
261  else if(version_str.find("OpenCL 1.2") != std::string::npos)
262  {
263  return CLVersion::CL12;
264  }
265  else if(version_str.find("OpenCL 1.1") != std::string::npos)
266  {
267  return CLVersion::CL11;
268  }
269  else if(version_str.find("OpenCL 1.0") != std::string::npos)
270  {
271  return CLVersion::CL10;
272  }
273 
274  return CLVersion::UNKNOWN;
275 }
276 
277 bool device_supports_extension(const cl::Device &device, const char *extension_name)
278 {
279  std::string extensions = device.getInfo<CL_DEVICE_EXTENSIONS>();
280  auto pos = extensions.find(extension_name);
281  return (pos != std::string::npos);
282 }
283 
284 bool cl_winograd_convolution_layer_supported(const Size2D &output_tile, const Size2D &kernel_size, DataLayout data_layout)
285 {
287 
288  using WinogradConfiguration = std::pair<std::pair<int, int>, std::pair<int, int>>;
289 
290  std::vector<WinogradConfiguration> winograd_configs_nchw =
291  {
292  WinogradConfiguration(std::pair<int, int>(1, 2), std::pair<int, int>(1, 3)),
293  WinogradConfiguration(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3)),
294  WinogradConfiguration(std::pair<int, int>(2, 1), std::pair<int, int>(3, 1)),
295  WinogradConfiguration(std::pair<int, int>(4, 1), std::pair<int, int>(3, 1)),
296  WinogradConfiguration(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3)),
297  WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3)),
298  WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5)),
299  WinogradConfiguration(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1)),
300  WinogradConfiguration(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5))
301  };
302 
303  std::vector<WinogradConfiguration> winograd_configs_nhwc =
304  {
305  WinogradConfiguration(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3)),
306  WinogradConfiguration(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3)),
307  WinogradConfiguration(std::pair<int, int>(4, 1), std::pair<int, int>(3, 1)),
308  WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3)),
309  WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5)),
310  WinogradConfiguration(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1)),
311  WinogradConfiguration(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5)),
312  WinogradConfiguration(std::pair<int, int>(1, 2), std::pair<int, int>(1, 7)),
313  WinogradConfiguration(std::pair<int, int>(2, 1), std::pair<int, int>(7, 1)),
314  WinogradConfiguration(std::pair<int, int>(2, 2), std::pair<int, int>(7, 7)),
315  };
316 
317  auto p = std::make_pair(std::pair<int, int>(output_tile.width, output_tile.height),
318  std::pair<int, int>(kernel_size.width, kernel_size.height));
319 
320  // Return true if supported
322  {
323  return (std::find(winograd_configs_nchw.begin(), winograd_configs_nchw.end(), p) != winograd_configs_nchw.end());
324  }
325  else
326  {
327  return (std::find(winograd_configs_nhwc.begin(), winograd_configs_nhwc.end(), p) != winograd_configs_nhwc.end());
328  }
329 }
330 
331 size_t preferred_vector_width(const cl::Device &device, const DataType dt)
332 {
333  switch(dt)
334  {
335  case DataType::U8:
336  case DataType::S8:
337  case DataType::QASYMM8:
339  case DataType::QSYMM8:
341  return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR>();
342  case DataType::U16:
343  case DataType::S16:
344  case DataType::QSYMM16:
345  case DataType::QASYMM16:
346  return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT>();
347  case DataType::U32:
348  case DataType::S32:
349  return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT>();
350  case DataType::F16:
351  case DataType::F32:
352  return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT>();
353  case DataType::U64:
354  case DataType::S64:
355  return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG>();
356  default:
357  return 1;
358  }
359 }
360 
361 bool preferred_dummy_work_items_support(const cl::Device &device)
362 {
363  ARM_COMPUTE_UNUSED(device);
364  // TODO (COMPMID-2044)
365  return true;
366 }
367 
368 bool image2d_from_buffer_supported(const cl::Device &device)
369 {
370  return device_supports_extension(device, "cl_khr_image2d_from_buffer");
371 }
372 
373 size_t get_cl_image_pitch_alignment(const cl::Device &device)
374 {
375  cl_uint pixel_aligment = 0;
376 
377  cl_int err = clGetDeviceInfo(device(), CL_DEVICE_IMAGE_PITCH_ALIGNMENT, sizeof(cl_uint), &pixel_aligment, nullptr);
378 
379  if(err == CL_SUCCESS)
380  {
381  return pixel_aligment;
382  }
383  else
384  {
385  return 0;
386  }
387 }
388 
389 cl::Kernel create_opencl_kernel(CLCoreRuntimeContext *ctx, const std::string &kernel_name, const CLBuildOptions &build_opts)
390 {
391  if(ctx && ctx->kernel_library())
392  {
393  // New api going through the core context
394  return static_cast<cl::Kernel>(ctx->kernel_library()->create_kernel(kernel_name, build_opts.options()));
395  }
396  else
397  {
398  // Legacy code through the singleton
399  return static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
400  }
401 }
402 
403 cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set<std::string> &build_opts)
404 {
405  const std::string program_name = CLKernelLibrary::get().get_program_name(kernel_name);
406  std::pair<std::string, bool> kernel_src = CLKernelLibrary::get().get_program(program_name);
407  const std::string kernel_path = CLKernelLibrary::get().get_kernel_path();
408  return static_cast<cl::Kernel>(ctx.create_kernel(kernel_name, program_name, kernel_src.first, kernel_path, build_opts, kernel_src.second));
409 }
410 
411 cl::NDRange create_lws_hint_parallel_implementations(unsigned int input_dimension, unsigned int vector_size)
412 {
413  const unsigned int width_leftover = input_dimension % vector_size;
414  const unsigned int border_width = (width_leftover != 0) ? vector_size - width_leftover : 0;
415  const unsigned int num_of_threads = ((input_dimension + border_width) / 16);
416  return cl::NDRange(std::min(8U, num_of_threads));
417 }
418 } // namespace arm_compute
bool dot8_acc_supported(const cl::Device &device)
Helper function to check whether the cl_arm_integer_dot_product_accumulate_int8 extension is supporte...
Definition: CLHelpers.cpp:249
bool image2d_from_buffer_supported(const cl::Device &device)
Helper function to check whether the cl_khr_image2d_from_buffer extension is supported.
Definition: CLHelpers.cpp:368
const DataLayout data_layout
Definition: Im2Col.cpp:146
bool dot8_supported(const cl::Device &device)
Helper function to check whether the cl_arm_integer_dot_product_int8 extension is supported.
Definition: CLHelpers.cpp:239
quantized, symmetric fixed-point 16-bit number
bool fp16_supported(const cl::Device &device)
Helper function to check whether the cl_khr_fp16 extension is supported.
Definition: CLHelpers.cpp:234
const StringSet & options() const
Gets the current options list set.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352
bool preferred_dummy_work_items_support(const cl::Device &device)
Helper function to check if "dummy work-items" are preferred to have a power of two NDRange In case d...
Definition: CLHelpers.cpp:361
std::string get_cl_select_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL select type.
Definition: CLHelpers.cpp:139
1 channel, 1 U8 per channel
std::string get_cl_dot8_acc_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL dot8 accumulator type.
Definition: CLHelpers.cpp:173
CLVersion
Available OpenCL Version.
Definition: CLTypes.h:39
std::string get_cl_signed_type_from_element_size(size_t element_size)
Translates the element size to an signed integer data type.
Definition: CLHelpers.cpp:121
1 channel, 1 F32 per channel
size_t preferred_vector_width(const cl::Device &device, DataType dt)
Helper function to get the preferred native vector width size for built-in scalar types that can be p...
Definition: CLHelpers.cpp:331
GPUTarget get_target_from_name(const std::string &device_name)
Helper function to get the GPU target from a device name.
Definition: GPUTarget.cpp:141
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
std::pair< std::string, bool > get_program(const std::string &program_name) const
Gets the source of the selected program.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
quantized, asymmetric fixed-point 16-bit number
1 channel, 1 U16 per channel
Copyright (c) 2017-2020 Arm Limited.
size_t height
Height of the image region or rectangle.
Definition: Size2D.h:90
1 channel, 1 F16 per channel
1 channel, 1 S32 per channel
signed 64-bit number
cl_int clGetDeviceInfo(cl_device_id device, cl_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: OpenCL.cpp:800
cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set< std::string > &build_opts=std::set< std::string >())
Creates an opencl kernel using a compile context.
Definition: CLHelpers.cpp:403
std::string get_data_size_from_data_type(const DataType &dt)
Get the size of a data type in number of bits.
Definition: CLHelpers.cpp:191
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
1 channel, 1 U32 per channel
Core runtime context for OpenCL.
quantized, asymmetric fixed-point 8-bit number unsigned
std::string kernel_name
std::string get_cl_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL type.
Definition: CLHelpers.cpp:37
Kernel create_kernel(const std::string &kernel_name, const std::set< std::string > &build_options_set={}) const
Creates a kernel from the kernel library.
GPUTarget get_target_from_device(const cl::Device &device)
Helper function to get the GPU target from CL device.
Definition: CLHelpers.cpp:221
cl::NDRange create_lws_hint_parallel_implementations(unsigned int input_dimension, unsigned int vector_size)
Creates a suitable LWS hint object for parallel implementations.
Definition: CLHelpers.cpp:411
Kernel create_kernel(const std::string &kernel_name, const std::string &program_name, const std::string &program_source, const std::string &kernel_path, const StringSet &build_options_set, bool is_binary) const
Creates an OpenCL kernel.
std::string get_program_name(const std::string &kernel_name) const
Returns the program name given a kernel name.
1 channel, 1 S16 per channel
quantized, symmetric fixed-point 8-bit number
Num samples, channels, height, width.
CLCompileContext class.
size_t get_cl_image_pitch_alignment(const cl::Device &device)
Helper function to get the cl_image pitch alignment in pixels.
Definition: CLHelpers.cpp:373
quantized, symmetric per channel fixed-point 8-bit number
CLVersion get_cl_version(const cl::Device &device)
Helper function to get the highest OpenCL version supported.
Definition: CLHelpers.cpp:254
std::string get_cl_promoted_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL promoted type.
Definition: CLHelpers.cpp:73
bool device_supports_extension(const cl::Device &device, const char *extension_name)
Helper function to check whether a given extension is supported.
Definition: CLHelpers.cpp:277
size_t width
Width of the image region or rectangle.
Definition: Size2D.h:89
GPUTarget
Available GPU Targets.
Definition: GPUTarget.h:34
CLKernelLibrary * kernel_library() const
Kernel Library accessor.
Class for specifying the size of an image or rectangle.
Definition: Size2D.h:34
cl::Kernel create_opencl_kernel(CLCoreRuntimeContext *ctx, const std::string &kernel_name, const CLBuildOptions &build_opts)
Creates an opencl kernel.
Definition: CLHelpers.cpp:389
std::string get_cl_unsigned_type_from_element_size(size_t element_size)
Translates the element size to an unsigned integer data type.
Definition: CLHelpers.cpp:103
std::string get_kernel_path()
Gets the path that the kernels reside in.
bool arm_non_uniform_workgroup_supported(const cl::Device &device)
Helper function to check whether the arm_non_uniform_work_group_size extension is supported.
Definition: CLHelpers.cpp:229
quantized, asymmetric fixed-point 8-bit number signed
bool cl_winograd_convolution_layer_supported(const Size2D &output_tile, const Size2D &kernel_size, DataLayout data_layout)
This function checks if the Winograd configuration (defined through the output tile,...
Definition: CLHelpers.cpp:284
unsigned 64-bit number
DataType
Available data types.
Definition: Types.h:77
DataLayout
[DataLayout enum definition]
Definition: Types.h:120
signed 8-bit number