226 std::string device_name = device.getInfo<CL_DEVICE_NAME>();
243 std::string device_name = device.getInfo<CL_DEVICE_NAME>();
248 return (
device_supports_extension(device,
"cl_arm_integer_dot_product_int8") || sw_workaround_issue.count(gpu_target) != 0);
258 std::string version_str = device.getInfo<CL_DEVICE_VERSION>();
259 if(version_str.find(
"OpenCL 2") != std::string::npos)
263 else if(version_str.find(
"OpenCL 1.2") != std::string::npos)
267 else if(version_str.find(
"OpenCL 1.1") != std::string::npos)
271 else if(version_str.find(
"OpenCL 1.0") != std::string::npos)
281 std::string extensions = device.getInfo<CL_DEVICE_EXTENSIONS>();
282 auto pos = extensions.find(extension_name);
283 return (pos != std::string::npos);
290 using WinogradConfiguration = std::pair<std::pair<int, int>, std::pair<int, int>>;
292 std::vector<WinogradConfiguration> winograd_configs_nchw =
294 WinogradConfiguration(std::pair<int, int>(1, 2), std::pair<int, int>(1, 3)),
295 WinogradConfiguration(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3)),
296 WinogradConfiguration(std::pair<int, int>(2, 1), std::pair<int, int>(3, 1)),
297 WinogradConfiguration(std::pair<int, int>(4, 1), std::pair<int, int>(3, 1)),
298 WinogradConfiguration(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3)),
299 WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3)),
300 WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5)),
301 WinogradConfiguration(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1)),
302 WinogradConfiguration(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5))
305 std::vector<WinogradConfiguration> winograd_configs_nhwc =
307 WinogradConfiguration(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3)),
308 WinogradConfiguration(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3)),
309 WinogradConfiguration(std::pair<int, int>(4, 1), std::pair<int, int>(3, 1)),
310 WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3)),
311 WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5)),
312 WinogradConfiguration(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1)),
313 WinogradConfiguration(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5)),
314 WinogradConfiguration(std::pair<int, int>(1, 2), std::pair<int, int>(1, 7)),
315 WinogradConfiguration(std::pair<int, int>(2, 1), std::pair<int, int>(7, 1)),
316 WinogradConfiguration(std::pair<int, int>(2, 2), std::pair<int, int>(7, 7)),
319 auto p = std::make_pair(std::pair<int, int>(output_tile.
width, output_tile.
height),
320 std::pair<int, int>(kernel_size.
width, kernel_size.
height));
325 return (std::find(winograd_configs_nchw.begin(), winograd_configs_nchw.end(), p) != winograd_configs_nchw.end());
329 return (std::find(winograd_configs_nhwc.begin(), winograd_configs_nhwc.end(), p) != winograd_configs_nhwc.end());
343 return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR>();
348 return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT>();
351 return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT>();
354 return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT>();
357 return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG>();
377 cl_uint pixel_aligment = 0;
379 cl_int err =
clGetDeviceInfo(device(), CL_DEVICE_IMAGE_PITCH_ALIGNMENT,
sizeof(cl_uint), &pixel_aligment,
nullptr);
381 if(err == CL_SUCCESS)
383 return pixel_aligment;
395 const std::string program_name = klib.
program_name(kernel_name);
396 auto kernel_src = klib.
program(program_name);
397 const std::string kernel_path = klib.
kernel_path();
399 return static_cast<cl::Kernel
>(ctx.
create_kernel(kernel_name, program_name, kernel_src.program, kernel_path, build_opts, kernel_src.is_binary));
404 const unsigned int width_leftover = input_dimension % vector_size;
405 const unsigned int border_width = (width_leftover != 0) ? vector_size - width_leftover : 0;
406 const unsigned int num_of_threads = ((input_dimension + border_width) / 16);
407 return cl::NDRange(std::min(8
U, num_of_threads));
412 cl_bitfield capabilities = 0;
421 void set_wbsm(cl::Kernel &kernel, cl_int wbsm_hint)
461 if(image_w > max_image_w || image_h > max_image_h)
471 for(
const int value : values)
473 if(value > max_manual_loop_unrolling)
475 built_opts.
add_option(
"-DUNROLL_WITH_PRAGMA");
bool dot8_acc_supported(const cl::Device &device)
Helper function to check whether the cl_arm_integer_dot_product_accumulate_int8 extension is supporte...
bool image2d_from_buffer_supported(const cl::Device &device)
Helper function to check whether the cl_khr_image2d_from_buffer extension is supported.
bool dot8_supported(const cl::Device &device)
Helper function to check whether the cl_arm_integer_dot_product_int8 extension is supported...
quantized, symmetric fixed-point 16-bit number
bool export_weights_to_cl_image(const ITensorInfo *tensor)
void set_unroll_with_pragma(CLBuildOptions &built_opts, std::initializer_list< int > values)
cl_int clSetKernelExecInfo(cl_kernel kernel, cl_kernel_exec_info param_name, size_t param_value_size, const void *param_value)
ClKernelLibrary contains all the OpenCL kernels that are used throughout the library.
bool fp16_supported(const cl::Device &device)
Helper function to check whether the cl_khr_fp16 extension is supported.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
void set_wbsm(cl::Kernel &kernel, cl_int wbsm_hint)
bool preferred_dummy_work_items_support(const cl::Device &device)
Helper function to check if "dummy work-items" are preferred to have a power of two NDRange In case d...
std::string get_cl_select_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL select type.
1 channel, 1 U8 per channel
std::string get_cl_dot8_acc_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL dot8 accumulator type.
CLVersion
Available OpenCL Version.
std::string get_cl_signed_type_from_element_size(size_t element_size)
Translates the element size to an signed integer data type.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
1 channel, 1 F32 per channel
size_t preferred_vector_width(const cl::Device &device, DataType dt)
Helper function to get the preferred native vector width size for built-in scalar types that can be p...
GPUTarget get_target_from_name(const std::string &device_name)
Helper function to get the GPU target from a device name.
bool get_wbsm_support_info(const cl::Device &device)
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
const DataLayout data_layout
ClProgramInfo program(const std::string &program_name) const
Gets the source of the selected program.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor's metadata.
std::string program_name(const std::string &kernel_name) const
Returns the program name given a kernel name.
quantized, asymmetric fixed-point 16-bit number
1 channel, 1 U16 per channel
Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context...
Copyright (c) 2017-2021 Arm Limited.
size_t height
Height of the image region or rectangle.
1 channel, 1 F16 per channel
#define ARM_COMPUTE_LIBRARY_OPENCL_DEVICE_CAPABILITIES_ARM
const std::string & kernel_path() const
Gets the path that the kernels reside in.
1 channel, 1 S32 per channel
void add_option(std::string option)
Adds option to the existing build option list.
cl_int clGetDeviceInfo(cl_device_id device, cl_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
#define ARM_COMPUTE_LIBRARY_OPENCL_EXEC_WBSM_ARM
cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set< std::string > &build_opts=std::set< std::string >())
Creates an opencl kernel using a compile context.
static ClKernelLibrary & get()
Access the KernelLibrary singleton.
std::string get_data_size_from_data_type(const DataType &dt)
Get the size of a data type in number of bits.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
1 channel, 1 U32 per channel
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
quantized, asymmetric fixed-point 8-bit number unsigned
std::string get_cl_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL type.
GPUTarget get_target_from_device(const cl::Device &device)
Helper function to get the GPU target from CL device.
cl::NDRange create_lws_hint_parallel_implementations(unsigned int input_dimension, unsigned int vector_size)
Creates a suitable LWS hint object for parallel implementations.
Kernel create_kernel(const std::string &kernel_name, const std::string &program_name, const std::string &program_source, const std::string &kernel_path, const StringSet &build_options_set, bool is_binary) const
Creates an OpenCL kernel.
1 channel, 1 S16 per channel
quantized, symmetric fixed-point 8-bit number
Num samples, channels, height, width.
size_t get_cl_image_pitch_alignment(const cl::Device &device)
Helper function to get the cl_image pitch alignment in pixels.
quantized, symmetric per channel fixed-point 8-bit number
CLVersion get_cl_version(const cl::Device &device)
Helper function to get the highest OpenCL version supported.
std::string get_cl_promoted_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL promoted type.
bool device_supports_extension(const cl::Device &device, const char *extension_name)
Helper function to check whether a given extension is supported.
size_t width
Width of the image region or rectangle.
GPUTarget
Available GPU Targets.
Class for specifying the size of an image or rectangle.
std::string get_cl_unsigned_type_from_element_size(size_t element_size)
Translates the element size to an unsigned integer data type.
bool arm_non_uniform_workgroup_supported(const cl::Device &device)
Helper function to check whether the arm_non_uniform_work_group_size extension is supported...
quantized, asymmetric fixed-point 8-bit number signed
bool cl_winograd_convolution_layer_supported(const Size2D &output_tile, const Size2D &kernel_size, DataLayout data_layout)
This function checks if the Winograd configuration (defined through the output tile, kernel size and the data layout) is supported on OpenCL.
DataType
Available data types.
DataLayout
[DataLayout enum definition]
bool is_data_type_float(DataType dt)
Check if a given data type is of floating point type.
const cl::Device & get_device()
Gets the CL device for which the programs are created.