Compute Library
 23.11
CLCompileContext.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020-2023 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
28 #include "arm_compute/core/Error.h"
29 #include "arm_compute/core/Utils.h"
30 
31 #include "support/StringSupport.h"
32 
33 #include <regex>
34 
35 namespace arm_compute
36 {
38 {
39 }
40 
41 void CLBuildOptions::add_option(std::string option)
42 {
43  _build_opts.emplace(std::move(option));
44 }
45 
46 void CLBuildOptions::add_option_if(bool cond, std::string option)
47 {
48  if (cond)
49  {
50  add_option(std::move(option));
51  }
52 }
53 
54 void CLBuildOptions::add_option_if_else(bool cond, std::string option_true, std::string option_false)
55 {
56  (cond) ? add_option(std::move(option_true)) : add_option(std::move(option_false));
57 }
58 
59 void CLBuildOptions::add_options(const StringSet &options)
60 {
61  _build_opts.insert(options.begin(), options.end());
62 }
63 
64 void CLBuildOptions::add_options_if(bool cond, const StringSet &options)
65 {
66  if (cond)
67  {
69  }
70 }
71 
72 const CLBuildOptions::StringSet &CLBuildOptions::options() const
73 {
74  return _build_opts;
75 }
76 
78 {
79  return _build_opts == other._build_opts;
80 }
81 
82 Program::Program() : _context(), _device(), _is_binary(false), _name(), _source(), _binary()
83 {
84 }
85 
86 Program::Program(cl::Context context, std::string name, std::string source)
87  : _context(std::move(context)),
88  _device(),
89  _is_binary(false),
90  _name(std::move(name)),
91  _source(std::move(source)),
92  _binary()
93 {
94 }
95 
96 Program::Program(cl::Context context, cl::Device device, std::string name, std::vector<unsigned char> binary)
97  : _context(std::move(context)),
98  _device(std::move(device)),
99  _is_binary(true),
100  _name(std::move(name)),
101  _source(),
102  _binary(std::move(binary))
103 {
104 }
105 
106 Program::operator cl::Program() const
107 {
108  if (_is_binary)
109  {
110  return cl::Program(_context, {_device}, {_binary});
111  }
112  else
113  {
114  return cl::Program(_context, _source, false);
115  }
116 }
117 
118 bool Program::build(const cl::Program &program, const std::string &build_options)
119 {
120  try
121  {
122  return program.build(build_options.c_str()) == CL_SUCCESS;
123  }
124  catch (const cl::Error &e)
125  {
126  cl_int err = CL_SUCCESS;
127  const auto build_info = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(&err);
128 
129  for (auto &pair : build_info)
130  {
131  std::cerr << pair.second << std::endl;
132  }
133 
134  return false;
135  }
136 }
137 
138 cl::Program Program::build(const std::string &build_options) const
139 {
140  cl::Program cl_program = static_cast<cl::Program>(*this);
141  build(cl_program, build_options);
142  return cl_program;
143 }
144 
145 Kernel::Kernel() : _name(), _kernel()
146 {
147 }
148 
149 Kernel::Kernel(std::string name, const cl::Program &program)
150  : _name(std::move(name)), _kernel(cl::Kernel(program, _name.c_str()))
151 {
152 }
154  : _context(), _device(), _programs_map(), _built_programs_map(), _is_wbsm_supported()
155 {
156 }
157 
158 CLCompileContext::CLCompileContext(cl::Context context, const cl::Device &device)
159  : _context(), _device(), _programs_map(), _built_programs_map(), _is_wbsm_supported()
160 {
161  _context = std::move(context);
162  _device = CLDevice(device);
163  _is_wbsm_supported = get_wbsm_support_info(device);
164 }
165 
167  const std::string &program_name,
168  const std::string &program_source,
169  const std::string &kernel_path,
170  const StringSet &build_options_set,
171  bool is_binary) const
172 {
173  const std::string build_options = generate_build_options(build_options_set, kernel_path);
174  const std::string built_program_name = program_name + "_" + build_options;
175  auto built_program_it = _built_programs_map.find(built_program_name);
176  cl::Program cl_program;
177 
178  if (_built_programs_map.end() != built_program_it)
179  {
180  // If program has been built, retrieve to create kernel from it
181  cl_program = built_program_it->second;
182  }
183  else
184  {
185  Program program = load_program(program_name, program_source, is_binary);
186 
187  // Build program
188  cl_program = program.build(build_options);
189 
190  // Add built program to internal map
191  _built_programs_map.emplace(built_program_name, cl_program);
192  }
193 
194  // Create and return kernel
195  return Kernel(kernel_name, cl_program);
196 }
197 
198 const Program &
199 CLCompileContext::load_program(const std::string &program_name, const std::string &program_source, bool is_binary) const
200 {
201  const auto program_it = _programs_map.find(program_name);
202 
203  if (program_it != _programs_map.end())
204  {
205  return program_it->second;
206  }
207 
208  Program program;
209 
210 #ifdef EMBEDDED_KERNELS
211  ARM_COMPUTE_UNUSED(is_binary);
212  program = Program(_context, program_name, program_source);
213 #else /* EMBEDDED_KERNELS */
214  if (is_binary)
215  {
216  program = Program(_context, _device.cl_device(), program_name,
217  std::vector<unsigned char>(program_source.begin(), program_source.end()));
218  }
219  else
220  {
221  program = Program(_context, program_name, program_source);
222  }
223 #endif /* EMBEDDED_KERNELS */
224 
225  // Insert program to program map
226  const auto new_program = _programs_map.emplace(program_name, std::move(program));
227 
228  return new_program.first->second;
229 }
230 
232 {
233  _context = std::move(context);
234  if (_context.get() != nullptr)
235  {
236  const auto cl_devices = _context.getInfo<CL_CONTEXT_DEVICES>();
237 
238  if (!cl_devices.empty())
239  {
240  _device = CLDevice(cl_devices[0]);
241  }
242  }
243 }
244 
245 std::string CLCompileContext::generate_build_options(const StringSet &build_options_set,
246  const std::string &kernel_path) const
247 {
248  std::string concat_str;
249  bool ext_supported = false;
250  std::string ext_buildopts;
251 
252 #if defined(ARM_COMPUTE_DEBUG_ENABLED)
253  // Enable debug properties in CL kernels
254  concat_str += " -DARM_COMPUTE_DEBUG_ENABLED";
255 #endif // defined(ARM_COMPUTE_DEBUG_ENABLED)
256 
257  GPUTarget gpu_arch = get_arch_from_target(_device.target());
258  concat_str +=
259  " -DGPU_ARCH=" + support::cpp11::to_string(static_cast<std::underlying_type<GPUTarget>::type>(gpu_arch));
260 
261  if (_device.supported("cl_khr_fp16"))
262  {
263  concat_str += " -DARM_COMPUTE_OPENCL_FP16_ENABLED=1 ";
264  }
265 
266  if (_device.supported("cl_arm_integer_dot_product_int8") || _device.supported("cl_khr_integer_dot_product"))
267  {
268  concat_str += " -DARM_COMPUTE_OPENCL_DOT8_ENABLED=1 ";
269  }
270 
271  if (_device.supported("cl_arm_integer_dot_product_accumulate_int8"))
272  {
273  concat_str += " -DARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED=1 ";
274  }
275 
276  std::tie(ext_supported, ext_buildopts) = _device.is_non_uniform_workgroup_supported();
277 
278  if (ext_supported)
279  {
280  concat_str += ext_buildopts;
281  }
282  else
283  {
284  ARM_COMPUTE_ERROR("Non uniform workgroup size is not supported!!");
285  }
286 
287  if (gpu_arch != GPUTarget::UNKNOWN && gpu_arch != GPUTarget::MIDGARD && get_ddk_version() >= 11)
288  {
289  concat_str += " -DUNROLL_WITH_PRAGMA ";
290  }
291 
292  std::string build_options = stringify_set(build_options_set, kernel_path) + concat_str;
293 
294  return build_options;
295 }
296 
298 {
299  return _device.supported("cl_khr_fp16");
300 }
301 
302 std::string CLCompileContext::stringify_set(const StringSet &s, const std::string &kernel_path) const
303 {
304  std::string concat_set;
305 #ifndef EMBEDDED_KERNELS
306  concat_set += "-I" + kernel_path + " ";
307 #else /* EMBEDDED_KERNELS */
308  ARM_COMPUTE_UNUSED(kernel_path);
309 #endif /* EMBEDDED_KERNELS */
310 
311  // Concatenate set
312  for (const auto &el : s)
313  {
314  concat_set += " " + el;
315  }
316 
317  return concat_set;
318 }
319 
320 void CLCompileContext::add_built_program(const std::string &built_program_name, const cl::Program &program) const
321 {
322  _built_programs_map.emplace(built_program_name, program);
323 }
324 
326 {
327  _programs_map.clear();
328  _built_programs_map.clear();
329 }
330 
331 const std::map<std::string, cl::Program> &CLCompileContext::get_built_programs() const
332 {
333  return _built_programs_map;
334 }
335 
337 {
338  return _context;
339 }
340 
341 const cl::Device &CLCompileContext::get_device() const
342 {
343  return _device.cl_device();
344 }
345 
346 void CLCompileContext::set_device(cl::Device device)
347 {
348  _is_wbsm_supported = get_wbsm_support_info(device);
349  _device = std::move(device);
350 }
351 
353 {
354  GPUTarget _target = get_target_from_device(_device.cl_device());
355  cl::NDRange default_range;
356 
357  switch (_target)
358  {
359  case GPUTarget::MIDGARD:
360  case GPUTarget::T600:
361  case GPUTarget::T700:
362  case GPUTarget::T800:
363  default_range = cl::NDRange(128u, 1);
364  break;
365  default:
366  default_range = cl::NullRange;
367  }
368 
369  return default_range;
370 }
371 
373 {
374  return _device.supported("cl_khr_int64_base_atomics");
375 }
376 
378 {
379  return _is_wbsm_supported;
380 }
381 
382 size_t CLCompileContext::max_local_workgroup_size(const cl::Kernel &kernel) const
383 {
384  size_t result;
385 
386  size_t err = kernel.getWorkGroupInfo(_device.cl_device(), CL_KERNEL_WORK_GROUP_SIZE, &result);
387  ARM_COMPUTE_ERROR_ON_MSG(err != 0,
388  "clGetKernelWorkGroupInfo failed to return the maximum workgroup size for the kernel");
389  ARM_COMPUTE_UNUSED(err);
390 
391  return result;
392 }
393 
395 {
396  return _device.device_version();
397 }
398 
400 {
401  return _device.compute_units();
402 }
403 
405 {
406  const std::string device_version = _device.device_version();
407  const std::regex ddk_regex("r([0-9]*)p[0-9]");
408  std::smatch ddk_match;
409 
410  if (std::regex_search(device_version, ddk_match, ddk_regex))
411  {
412  return std::stoi(ddk_match[1]);
413  }
414 
415  return -1;
416 }
418 {
419  return _device.target();
420 }
421 } // namespace arm_compute
arm_compute::support::cpp11::to_string
std::string to_string(T &&value)
Convert integer and float values to string.
Definition: StringSupport.h:168
arm_compute::GPUTarget::T700
@ T700
StringSupport.h
arm_compute::CLBuildOptions::add_options_if
void add_options_if(bool cond, const StringSet &options)
Appends given build options to the current's objects options if a given condition is true.
Definition: CLCompileContext.cpp:64
arm_compute::CLBuildOptions::add_option_if_else
void add_option_if_else(bool cond, std::string option_true, std::string option_false)
Adds first option if condition is true else the second one.
Definition: CLCompileContext.cpp:54
arm_compute::CLDevice::compute_units
size_t compute_units() const
Returns the number of compute units available.
Definition: CLDevice.h:101
type
decltype(strategy::transforms) typedef type
Definition: gemm_interleaved.hpp:261
arm_compute::GPUTarget::T800
@ T800
arm_compute::CLBuildOptions::options
const StringSet & options() const
Gets the current options list set.
Definition: CLCompileContext.cpp:72
arm_compute::CLCompileContext::fp16_supported
bool fp16_supported() const
Returns true if FP16 is supported by the CL device.
Definition: CLCompileContext.cpp:297
arm_compute::CLDevice::supported
bool supported(const std::string &extension) const override
Check if extensions on a device are supported.
Definition: CLDevice.h:139
arm_compute::CLCompileContext::default_ndrange
cl::NDRange default_ndrange() const
Return the default NDRange for the device.
Definition: CLCompileContext.cpp:352
arm_compute::CLCompileContext::context
cl::Context & context()
Accessor for the associated CL context.
Definition: CLCompileContext.cpp:336
arm_compute::CLDevice::cl_device
const cl::Device & cl_device() const
Returns the underlying cl device object.
Definition: CLDevice.h:110
arm_compute::CLBuildOptions::CLBuildOptions
CLBuildOptions()
Default constructor.
Definition: CLCompileContext.cpp:37
arm_compute::CLCompileContext::max_local_workgroup_size
size_t max_local_workgroup_size(const cl::Kernel &kernel) const
Find the maximum number of local work items in a workgroup can be supported for the kernel.
Definition: CLCompileContext.cpp:382
ARM_COMPUTE_ERROR
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:354
arm_compute::CLCompileContext::is_wbsm_supported
bool is_wbsm_supported() const
Definition: CLCompileContext.cpp:377
arm_compute::CLCompileContext::int64_base_atomics_supported
bool int64_base_atomics_supported() const
Returns true if int64_base_atomics extension is supported by the CL device.
Definition: CLCompileContext.cpp:372
arm_compute::CLCompileContext::get_num_compute_units
cl_uint get_num_compute_units() const
Return the maximum number of compute units in the device.
Definition: CLCompileContext.cpp:399
arm_compute::get_wbsm_support_info
bool get_wbsm_support_info(const cl::Device &device)
Definition: CLHelpers.cpp:429
arm_compute::CLDevice::is_non_uniform_workgroup_supported
std::tuple< bool, std::string > is_non_uniform_workgroup_supported() const
Returns whether non-uniform workgroup is supported and the build options.
Definition: CLDevice.h:152
arm_compute::CLCompileContext::get_ddk_version
int32_t get_ddk_version() const
Return the DDK version.
Definition: CLCompileContext.cpp:404
Error.h
arm_compute::CLCompileContext::get_gpu_target
GPUTarget get_gpu_target() const
Return the Gpu target of the associated device.
Definition: CLCompileContext.cpp:417
arm_compute::CLCompileContext::set_context
void set_context(cl::Context context)
Sets the CL context used to create programs.
Definition: CLCompileContext.cpp:231
arm_compute::CLBuildOptions::add_options
void add_options(const StringSet &options)
Appends given build options to the current's objects options.
Definition: CLCompileContext.cpp:59
arm_compute::CLCompileContext::get_device
const cl::Device & get_device() const
Gets the CL device for which the programs are created.
Definition: CLCompileContext.cpp:341
arm_compute::CLBuildOptions::add_option
void add_option(std::string option)
Adds option to the existing build option list.
Definition: CLCompileContext.cpp:41
arm_compute::CLCompileContext::clear_programs_cache
void clear_programs_cache()
Clear the library's cache of binary programs.
Definition: CLCompileContext.cpp:325
arm_compute::get_arch_from_target
GPUTarget get_arch_from_target(GPUTarget target)
Helper function to get the GPU arch.
Definition: GPUTarget.cpp:223
ARM_COMPUTE_ERROR_ON_MSG
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
Definition: Error.h:456
arm_compute::CLCompileContext::set_device
void set_device(cl::Device device)
Sets the CL device for which the programs are created.
Definition: CLCompileContext.cpp:346
arm_compute::CLCompileContext::get_device_version
std::string get_device_version() const
Return the device version.
Definition: CLCompileContext.cpp:394
CLCompileContext.h
arm_compute::CLBuildOptions::add_option_if
void add_option_if(bool cond, std::string option)
Adds option if a given condition is true;.
Definition: CLCompileContext.cpp:46
name
const char * name
Definition: NEBatchNormalizationLayerKernel.cpp:66
OpenCL.h
Wrapper to configure the Khronos OpenCL C++ header.
arm_compute::CLCompileContext::create_kernel
Kernel create_kernel(const std::string &kernel_name, const std::string &program_name, const std::string &program_source, const std::string &kernel_path, const StringSet &build_options_set, bool is_binary) const
Creates an OpenCL kernel.
Definition: CLCompileContext.cpp:166
arm_compute::Kernel
Kernel class.
Definition: CLCompileContext.h:162
arm_compute::Program::Program
Program()
Default constructor.
Definition: CLCompileContext.cpp:82
ARM_COMPUTE_UNUSED
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:151
arm_compute::get_target_from_device
GPUTarget get_target_from_device(const cl::Device &device)
Helper function to get the GPU target from CL device.
Definition: CLHelpers.cpp:224
arm_compute::test::validation::context
auto context
Definition: DirectConv2d.cpp:156
arm_compute::GPUTarget::MIDGARD
@ MIDGARD
arm_compute::CLDevice::device_version
std::string device_version() const
Returns the device version as a string.
Definition: CLDevice.h:128
arm_compute::GPUTarget
GPUTarget
Available GPU Targets.
Definition: GPUTarget.h:34
Utils.h
arm_compute::Program::build
static bool build(const cl::Program &program, const std::string &build_options="")
Build the given CL program.
Definition: CLCompileContext.cpp:118
arm_compute::CLDevice
OpenCL device type class.
Definition: CLDevice.h:43
arm_compute
Copyright (c) 2017-2023 Arm Limited.
Definition: introduction.dox:24
arm_compute::support::cpp11::stoi
int stoi(const std::string &str, std::size_t *pos=0, NumericBase base=NumericBase::BASE_10)
Convert string values to integer.
Definition: StringSupport.h:55
arm_compute::CLCompileContext::get_built_programs
const std::map< std::string, cl::Program > & get_built_programs() const
Access the cache of built OpenCL programs.
Definition: CLCompileContext.cpp:331
arm_compute::Kernel::Kernel
Kernel()
Default Constructor.
Definition: CLCompileContext.cpp:145
arm_compute::GPUTarget::T600
@ T600
arm_compute::CLBuildOptions
Build options.
Definition: CLCompileContext.h:38
cl
Definition: ICLTensor.h:32
arm_compute::CLDevice::target
const GPUTarget & target() const
Returns the GPU target of the cl device.
Definition: CLDevice.h:92
arm_compute::GPUTarget::UNKNOWN
@ UNKNOWN
arm_compute::CLCompileContext::add_built_program
void add_built_program(const std::string &built_program_name, const cl::Program &program) const
Add a new built program to the cache.
Definition: CLCompileContext.cpp:320
arm_compute::CLBuildOptions::operator==
bool operator==(const CLBuildOptions &other) const
Definition: CLCompileContext.cpp:77
arm_compute::CLCompileContext::CLCompileContext
CLCompileContext()
Constructor.
Definition: CLCompileContext.cpp:153
CLHelpers.h
build_options
std::set< std::string > build_options
Definition: ClIm2ColKernel.cpp:59
kernel_name
std::string kernel_name
Definition: ClIm2ColKernel.cpp:58
arm_compute::Program
Program class.
Definition: CLCompileContext.h:87