224 std::string device_name = device.getInfo<CL_DEVICE_NAME>();
241 std::string device_name = device.getInfo<CL_DEVICE_NAME>();
246 return (
device_supports_extension(device,
"cl_arm_integer_dot_product_int8") || sw_workaround_issue.count(gpu_target) != 0);
256 std::string version_str = device.getInfo<CL_DEVICE_VERSION>();
257 if(version_str.find(
"OpenCL 2") != std::string::npos)
261 else if(version_str.find(
"OpenCL 1.2") != std::string::npos)
265 else if(version_str.find(
"OpenCL 1.1") != std::string::npos)
269 else if(version_str.find(
"OpenCL 1.0") != std::string::npos)
279 std::string extensions = device.getInfo<CL_DEVICE_EXTENSIONS>();
280 auto pos = extensions.find(extension_name);
281 return (pos != std::string::npos);
288 using WinogradConfiguration = std::pair<std::pair<int, int>, std::pair<int, int>>;
290 std::vector<WinogradConfiguration> winograd_configs_nchw =
292 WinogradConfiguration(std::pair<int, int>(1, 2), std::pair<int, int>(1, 3)),
293 WinogradConfiguration(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3)),
294 WinogradConfiguration(std::pair<int, int>(2, 1), std::pair<int, int>(3, 1)),
295 WinogradConfiguration(std::pair<int, int>(4, 1), std::pair<int, int>(3, 1)),
296 WinogradConfiguration(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3)),
297 WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3)),
298 WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5)),
299 WinogradConfiguration(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1)),
300 WinogradConfiguration(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5))
303 std::vector<WinogradConfiguration> winograd_configs_nhwc =
305 WinogradConfiguration(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3)),
306 WinogradConfiguration(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3)),
307 WinogradConfiguration(std::pair<int, int>(4, 1), std::pair<int, int>(3, 1)),
308 WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3)),
309 WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5)),
310 WinogradConfiguration(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1)),
311 WinogradConfiguration(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5)),
312 WinogradConfiguration(std::pair<int, int>(1, 2), std::pair<int, int>(1, 7)),
313 WinogradConfiguration(std::pair<int, int>(2, 1), std::pair<int, int>(7, 1)),
314 WinogradConfiguration(std::pair<int, int>(2, 2), std::pair<int, int>(7, 7)),
317 auto p = std::make_pair(std::pair<int, int>(output_tile.
width, output_tile.
height),
318 std::pair<int, int>(kernel_size.
width, kernel_size.
height));
323 return (std::find(winograd_configs_nchw.begin(), winograd_configs_nchw.end(), p) != winograd_configs_nchw.end());
327 return (std::find(winograd_configs_nhwc.begin(), winograd_configs_nhwc.end(), p) != winograd_configs_nhwc.end());
341 return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR>();
346 return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT>();
349 return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT>();
352 return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT>();
355 return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG>();
375 cl_uint pixel_aligment = 0;
377 cl_int err =
clGetDeviceInfo(device(), CL_DEVICE_IMAGE_PITCH_ALIGNMENT,
sizeof(cl_uint), &pixel_aligment,
nullptr);
379 if(err == CL_SUCCESS)
381 return pixel_aligment;
408 return static_cast<cl::Kernel>(ctx.
create_kernel(
kernel_name, program_name, kernel_src.first, kernel_path, build_opts, kernel_src.second));
413 const unsigned int width_leftover = input_dimension % vector_size;
414 const unsigned int border_width = (width_leftover != 0) ? vector_size - width_leftover : 0;
415 const unsigned int num_of_threads = ((input_dimension + border_width) / 16);
416 return cl::NDRange(std::min(8
U, num_of_threads));
421 cl_bitfield capabilities = 0;
430 void set_wbsm(cl::Kernel &kernel, cl_int wbsm_hint)
bool dot8_acc_supported(const cl::Device &device)
Helper function to check whether the cl_arm_integer_dot_product_accumulate_int8 extension is supporte...
bool image2d_from_buffer_supported(const cl::Device &device)
Helper function to check whether the cl_khr_image2d_from_buffer extension is supported.
bool dot8_supported(const cl::Device &device)
Helper function to check whether the cl_arm_integer_dot_product_int8 extension is supported.
quantized, symmetric fixed-point 16-bit number
cl_int clSetKernelExecInfo(cl_kernel kernel, cl_kernel_exec_info param_name, size_t param_value_size, const void *param_value)
bool fp16_supported(const cl::Device &device)
Helper function to check whether the cl_khr_fp16 extension is supported.
const StringSet & options() const
Gets the current options list set.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
void set_wbsm(cl::Kernel &kernel, cl_int wbsm_hint)
bool preferred_dummy_work_items_support(const cl::Device &device)
Helper function to check if "dummy work-items" are preferred to have a power of two NDRange In case d...
std::string get_cl_select_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL select type.
1 channel, 1 U8 per channel
std::string get_cl_dot8_acc_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL dot8 accumulator type.
CLVersion
Available OpenCL Version.
std::string get_cl_signed_type_from_element_size(size_t element_size)
Translates the element size to an signed integer data type.
1 channel, 1 F32 per channel
size_t preferred_vector_width(const cl::Device &device, DataType dt)
Helper function to get the preferred native vector width size for built-in scalar types that can be p...
GPUTarget get_target_from_name(const std::string &device_name)
Helper function to get the GPU target from a device name.
bool get_wbsm_support_info(const cl::Device &device)
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
std::pair< std::string, bool > get_program(const std::string &program_name) const
Gets the source of the selected program.
const DataLayout data_layout
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
quantized, asymmetric fixed-point 16-bit number
1 channel, 1 U16 per channel
Copyright (c) 2017-2021 Arm Limited.
size_t height
Height of the image region or rectangle.
1 channel, 1 F16 per channel
#define ARM_COMPUTE_LIBRARY_OPENCL_DEVICE_CAPABILITIES_ARM
1 channel, 1 S32 per channel
cl_int clGetDeviceInfo(cl_device_id device, cl_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
#define ARM_COMPUTE_LIBRARY_OPENCL_EXEC_WBSM_ARM
cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set< std::string > &build_opts=std::set< std::string >())
Creates an opencl kernel using a compile context.
std::string get_data_size_from_data_type(const DataType &dt)
Get the size of a data type in number of bits.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
1 channel, 1 U32 per channel
Core runtime context for OpenCL.
quantized, asymmetric fixed-point 8-bit number unsigned
std::string get_cl_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL type.
Kernel create_kernel(const std::string &kernel_name, const std::set< std::string > &build_options_set={}) const
Creates a kernel from the kernel library.
GPUTarget get_target_from_device(const cl::Device &device)
Helper function to get the GPU target from CL device.
cl::NDRange create_lws_hint_parallel_implementations(unsigned int input_dimension, unsigned int vector_size)
Creates a suitable LWS hint object for parallel implementations.
Kernel create_kernel(const std::string &kernel_name, const std::string &program_name, const std::string &program_source, const std::string &kernel_path, const StringSet &build_options_set, bool is_binary) const
Creates an OpenCL kernel.
std::string get_program_name(const std::string &kernel_name) const
Returns the program name given a kernel name.
1 channel, 1 S16 per channel
quantized, symmetric fixed-point 8-bit number
Num samples, channels, height, width.
size_t get_cl_image_pitch_alignment(const cl::Device &device)
Helper function to get the cl_image pitch alignment in pixels.
quantized, symmetric per channel fixed-point 8-bit number
CLVersion get_cl_version(const cl::Device &device)
Helper function to get the highest OpenCL version supported.
std::string get_cl_promoted_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL promoted type.
bool device_supports_extension(const cl::Device &device, const char *extension_name)
Helper function to check whether a given extension is supported.
size_t width
Width of the image region or rectangle.
GPUTarget
Available GPU Targets.
CLKernelLibrary * kernel_library() const
Kernel Library accessor.
Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context.
Class for specifying the size of an image or rectangle.
cl::Kernel create_opencl_kernel(CLCoreRuntimeContext *ctx, const std::string &kernel_name, const CLBuildOptions &build_opts)
Creates an opencl kernel.
std::string get_cl_unsigned_type_from_element_size(size_t element_size)
Translates the element size to an unsigned integer data type.
std::string get_kernel_path()
Gets the path that the kernels reside in.
bool arm_non_uniform_workgroup_supported(const cl::Device &device)
Helper function to check whether the arm_non_uniform_work_group_size extension is supported.
quantized, asymmetric fixed-point 8-bit number signed
bool cl_winograd_convolution_layer_supported(const Size2D &output_tile, const Size2D &kernel_size, DataLayout data_layout)
This function checks if the Winograd configuration (defined through the output tile,...
DataType
Available data types.
DataLayout
[DataLayout enum definition]