Compute Library
 22.08
CLCompileContext.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020-2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
26 
28 #include "arm_compute/core/Error.h"
29 #include "arm_compute/core/Utils.h"
30 #include "support/StringSupport.h"
31 
32 #include <regex>
33 
34 namespace arm_compute
35 {
37  : _build_opts()
38 {
39 }
40 
41 void CLBuildOptions::add_option(std::string option)
42 {
43  _build_opts.emplace(std::move(option));
44 }
45 
46 void CLBuildOptions::add_option_if(bool cond, std::string option)
47 {
48  if(cond)
49  {
50  add_option(std::move(option));
51  }
52 }
53 
54 void CLBuildOptions::add_option_if_else(bool cond, std::string option_true, std::string option_false)
55 {
56  (cond) ? add_option(std::move(option_true)) : add_option(std::move(option_false));
57 }
58 
59 void CLBuildOptions::add_options(const StringSet &options)
60 {
61  _build_opts.insert(options.begin(), options.end());
62 }
63 
64 void CLBuildOptions::add_options_if(bool cond, const StringSet &options)
65 {
66  if(cond)
67  {
68  add_options(options);
69  }
70 }
71 
72 const CLBuildOptions::StringSet &CLBuildOptions::options() const
73 {
74  return _build_opts;
75 }
76 
78 {
79  return _build_opts == other._build_opts;
80 }
81 
83  : _context(), _device(), _is_binary(false), _name(), _source(), _binary()
84 {
85 }
86 
87 Program::Program(cl::Context context, std::string name, std::string source)
88  : _context(std::move(context)), _device(), _is_binary(false), _name(std::move(name)), _source(std::move(source)), _binary()
89 {
90 }
91 
92 Program::Program(cl::Context context, cl::Device device, std::string name, std::vector<unsigned char> binary)
93  : _context(std::move(context)), _device(std::move(device)), _is_binary(true), _name(std::move(name)), _source(), _binary(std::move(binary))
94 {
95 }
96 
97 Program::operator cl::Program() const
98 {
99  if(_is_binary)
100  {
101  return cl::Program(_context, { _device }, { _binary });
102  }
103  else
104  {
105  return cl::Program(_context, _source, false);
106  }
107 }
108 
109 bool Program::build(const cl::Program &program, const std::string &build_options)
110 {
111  try
112  {
113  return program.build(build_options.c_str()) == CL_SUCCESS;
114  }
115  catch(const cl::Error &e)
116  {
117  cl_int err = CL_SUCCESS;
118  const auto build_info = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(&err);
119 
120  for(auto &pair : build_info)
121  {
122  std::cerr << pair.second << std::endl;
123  }
124 
125  return false;
126  }
127 }
128 
129 cl::Program Program::build(const std::string &build_options) const
130 {
131  cl::Program cl_program = static_cast<cl::Program>(*this);
132  build(cl_program, build_options);
133  return cl_program;
134 }
135 
137  : _name(), _kernel()
138 {
139 }
140 
141 Kernel::Kernel(std::string name, const cl::Program &program)
142  : _name(std::move(name)),
143  _kernel(cl::Kernel(program, _name.c_str()))
144 {
145 }
147  : _context(), _device(), _programs_map(), _built_programs_map(), _is_wbsm_supported()
148 {
149 }
150 
151 CLCompileContext::CLCompileContext(cl::Context context, const cl::Device &device)
152  : _context(), _device(), _programs_map(), _built_programs_map(), _is_wbsm_supported()
153 {
154  _context = std::move(context);
155  _device = CLDevice(device);
156  _is_wbsm_supported = get_wbsm_support_info(device);
157 }
158 
159 Kernel CLCompileContext::create_kernel(const std::string &kernel_name, const std::string &program_name, const std::string &program_source,
160  const std::string &kernel_path, const StringSet &build_options_set, bool is_binary) const
161 {
162  const std::string build_options = generate_build_options(build_options_set, kernel_path);
163  const std::string built_program_name = program_name + "_" + build_options;
164  auto built_program_it = _built_programs_map.find(built_program_name);
165  cl::Program cl_program;
166 
167  if(_built_programs_map.end() != built_program_it)
168  {
169  // If program has been built, retrieve to create kernel from it
170  cl_program = built_program_it->second;
171  }
172  else
173  {
174  Program program = load_program(program_name, program_source, is_binary);
175 
176  // Build program
177  cl_program = program.build(build_options);
178 
179  // Add built program to internal map
180  _built_programs_map.emplace(built_program_name, cl_program);
181  }
182 
183  // Create and return kernel
184  return Kernel(kernel_name, cl_program);
185 }
186 
187 const Program &CLCompileContext::load_program(const std::string &program_name, const std::string &program_source, bool is_binary) const
188 {
189  const auto program_it = _programs_map.find(program_name);
190 
191  if(program_it != _programs_map.end())
192  {
193  return program_it->second;
194  }
195 
196  Program program;
197 
198 #ifdef EMBEDDED_KERNELS
199  ARM_COMPUTE_UNUSED(is_binary);
200  program = Program(_context, program_name, program_source);
201 #else /* EMBEDDED_KERNELS */
202  if(is_binary)
203  {
204  program = Program(_context, _device.cl_device(), program_name, std::vector<unsigned char>(program_source.begin(), program_source.end()));
205  }
206  else
207  {
208  program = Program(_context, program_name, program_source);
209  }
210 #endif /* EMBEDDED_KERNELS */
211 
212  // Insert program to program map
213  const auto new_program = _programs_map.emplace(program_name, std::move(program));
214 
215  return new_program.first->second;
216 }
217 
219 {
220  _context = std::move(context);
221  if(_context.get() != nullptr)
222  {
223  const auto cl_devices = _context.getInfo<CL_CONTEXT_DEVICES>();
224 
225  if(!cl_devices.empty())
226  {
227  _device = CLDevice(cl_devices[0]);
228  }
229  }
230 }
231 
232 std::string CLCompileContext::generate_build_options(const StringSet &build_options_set, const std::string &kernel_path) const
233 {
234  std::string concat_str;
235  bool ext_supported = false;
236  std::string ext_buildopts;
237 
238 #if defined(ARM_COMPUTE_DEBUG_ENABLED)
239  // Enable debug properties in CL kernels
240  concat_str += " -DARM_COMPUTE_DEBUG_ENABLED";
241 #endif // defined(ARM_COMPUTE_DEBUG_ENABLED)
242 
243  GPUTarget gpu_arch = get_arch_from_target(_device.target());
244  concat_str += " -DGPU_ARCH=" + support::cpp11::to_string(
245  static_cast<std::underlying_type<GPUTarget>::type>(gpu_arch));
246 
247  if(_device.supported("cl_khr_fp16"))
248  {
249  concat_str += " -DARM_COMPUTE_OPENCL_FP16_ENABLED=1 ";
250  }
251 
252  if(_device.supported("cl_arm_integer_dot_product_int8") || _device.supported("cl_khr_integer_dot_product"))
253  {
254  concat_str += " -DARM_COMPUTE_OPENCL_DOT8_ENABLED=1 ";
255  }
256 
257  if(_device.supported("cl_arm_integer_dot_product_accumulate_int8"))
258  {
259  concat_str += " -DARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED=1 ";
260  }
261 
262  std::tie(ext_supported, ext_buildopts) = _device.is_non_uniform_workgroup_supported();
263 
264  if(ext_supported)
265  {
266  concat_str += ext_buildopts;
267  }
268  else
269  {
270  ARM_COMPUTE_ERROR("Non uniform workgroup size is not supported!!");
271  }
272 
273  if(gpu_arch != GPUTarget::UNKNOWN && gpu_arch != GPUTarget::MIDGARD)
274  {
275  const std::string device_vers = _device.device_version();
276  const std::regex ddk_regex("r([0-9]*)p[0-9]");
277  std::smatch ddk_match;
278 
279  if(std::regex_search(device_vers, ddk_match, ddk_regex) && std::stoi(ddk_match[1]) >= 11)
280  {
281  concat_str += " -DUNROLL_WITH_PRAGMA ";
282  }
283  }
284 
285  std::string build_options = stringify_set(build_options_set, kernel_path) + concat_str;
286 
287  return build_options;
288 }
289 
291 {
292  return _device.supported("cl_khr_fp16");
293 }
294 
295 std::string CLCompileContext::stringify_set(const StringSet &s, const std::string &kernel_path) const
296 {
297  std::string concat_set;
298 #ifndef EMBEDDED_KERNELS
299  concat_set += "-I" + kernel_path + " ";
300 #else /* EMBEDDED_KERNELS */
301  ARM_COMPUTE_UNUSED(kernel_path);
302 #endif /* EMBEDDED_KERNELS */
303 
304  // Concatenate set
305  for(const auto &el : s)
306  {
307  concat_set += " " + el;
308  }
309 
310  return concat_set;
311 }
312 
313 void CLCompileContext::add_built_program(const std::string &built_program_name, const cl::Program &program) const
314 {
315  _built_programs_map.emplace(built_program_name, program);
316 }
317 
319 {
320  _programs_map.clear();
321  _built_programs_map.clear();
322 }
323 
324 const std::map<std::string, cl::Program> &CLCompileContext::get_built_programs() const
325 {
326  return _built_programs_map;
327 }
328 
330 {
331  return _context;
332 }
333 
334 const cl::Device &CLCompileContext::get_device() const
335 {
336  return _device.cl_device();
337 }
338 
339 void CLCompileContext::set_device(cl::Device device)
340 {
341  _device = std::move(device);
342  _is_wbsm_supported = get_wbsm_support_info(device);
343 }
344 
346 {
347  GPUTarget _target = get_target_from_device(_device.cl_device());
348  cl::NDRange default_range;
349 
350  switch(_target)
351  {
352  case GPUTarget::MIDGARD:
353  case GPUTarget::T600:
354  case GPUTarget::T700:
355  case GPUTarget::T800:
356  default_range = cl::NDRange(128u, 1);
357  break;
358  default:
359  default_range = cl::NullRange;
360  }
361 
362  return default_range;
363 }
364 
366 {
367  return _device.supported("cl_khr_int64_base_atomics");
368 }
369 
371 {
372  return _is_wbsm_supported;
373 }
374 
375 size_t CLCompileContext::max_local_workgroup_size(const cl::Kernel &kernel) const
376 {
377  size_t result;
378 
379  size_t err = kernel.getWorkGroupInfo(_device.cl_device(), CL_KERNEL_WORK_GROUP_SIZE, &result);
380  ARM_COMPUTE_ERROR_ON_MSG(err != 0, "clGetKernelWorkGroupInfo failed to return the maximum workgroup size for the kernel");
381  ARM_COMPUTE_UNUSED(err);
382 
383  return result;
384 }
385 
387 {
388  return _device.device_version();
389 }
390 
392 {
393  return _device.compute_units();
394 }
395 } // namespace arm_compute
bool operator==(const CLBuildOptions &other) const
Definition: ICLTensor.h:33
std::string device_version() const
Returns the device version as a string.
Definition: CLDevice.h:130
void set_device(cl::Device device)
Sets the CL device for which the programs are created.
const cl::Device & get_device() const
Gets the CL device for which the programs are created.
cl::Context & context()
Accessor for the associated CL context.
Kernel()
Default Constructor.
const StringSet & options() const
Gets the current options list set.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352
std::string to_string(T &&value)
Convert integer and float values to string.
bool get_wbsm_support_info(const cl::Device &device)
Definition: CLHelpers.cpp:423
const std::vector< unsigned char > & binary() const
Returns program binary data.
CLBuildOptions()
Default constructor.
GPUTarget get_arch_from_target(GPUTarget target)
Helper function to get the GPU arch.
Definition: GPUTarget.cpp:240
decltype(strategy::transforms) typedef type
std::set< std::string > build_options
std::string get_device_version() const
Return the device version.
Copyright (c) 2017-2022 Arm Limited.
void add_option(std::string option)
Adds option to the existing build option list.
std::string name() const
Returns program name.
void add_options(const StringSet &options)
Appends given build options to the current&#39;s objects options.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
cl_uint get_num_compute_units() const
Return the maximum number of compute units in the device.
const GPUTarget & target() const
Returns the GPU target of the cl device.
Definition: CLDevice.h:94
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
Definition: Error.h:456
OpenCL device type class.
Definition: CLDevice.h:43
static bool build(const cl::Program &program, const std::string &build_options="")
Build the given CL program.
bool supported(const std::string &extension) const override
Check if extensions on a device are supported.
Definition: CLDevice.h:141
cl::NDRange default_ndrange() const
Return the default NDRange for the device.
void add_option_if(bool cond, std::string option)
Adds option if a given condition is true;.
GPUTarget get_target_from_device(const cl::Device &device)
Helper function to get the GPU target from CL device.
Definition: CLHelpers.cpp:223
Kernel create_kernel(const std::string &kernel_name, const std::string &program_name, const std::string &program_source, const std::string &kernel_path, const StringSet &build_options_set, bool is_binary) const
Creates an OpenCL kernel.
int stoi(const std::string &str, std::size_t *pos=0, NumericBase base=NumericBase::BASE_10)
Convert string values to integer.
Definition: StringSupport.h:55
void clear_programs_cache()
Clear the library&#39;s cache of binary programs.
Program()
Default constructor.
bool int64_base_atomics_supported() const
Returns true if int64_base_atomics extension is supported by the CL device.
bool fp16_supported() const
Returns true if FP16 is supported by the CL device.
size_t compute_units() const
Returns the number of compute units available.
Definition: CLDevice.h:103
const std::map< std::string, cl::Program > & get_built_programs() const
Access the cache of built OpenCL programs.
GPUTarget
Available GPU Targets.
Definition: GPUTarget.h:34
size_t max_local_workgroup_size(const cl::Kernel &kernel) const
Find the maximum number of local work items in a workgroup can be supported for the kernel...
Wrapper to configure the Khronos OpenCL C++ header.
std::tuple< bool, std::string > is_non_uniform_workgroup_supported() const
Returns whether non-uniform workgroup is supported and the build options.
Definition: CLDevice.h:154
const cl::Device & cl_device() const
Returns the underlying cl device object.
Definition: CLDevice.h:112
void add_built_program(const std::string &built_program_name, const cl::Program &program) const
Add a new built program to the cache.
void set_context(cl::Context context)
Sets the CL context used to create programs.
std::string name() const
Returns kernel name.
std::string kernel_name
void add_options_if(bool cond, const StringSet &options)
Appends given build options to the current&#39;s objects options if a given condition is true...
void add_option_if_else(bool cond, std::string option_true, std::string option_false)
Adds first option if condition is true else the second one.