Compute Library
 23.08
CLHelpers.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016-2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_CLHELPERS_H
25 #define ARM_COMPUTE_CLHELPERS_H
26 
29 #include "arm_compute/core/Types.h"
30 
31 #include <set>
32 #include <string>
33 
34 namespace arm_compute
35 {
36 class CLCompileContext;
37 class CLBuildOptions;
38 
39 enum class DataType;
40 
41 /** Max vector width of an OpenCL vector */
42 static constexpr unsigned int max_cl_vector_width = 16;
43 
44 /** Max number of manual loop unrolling */
45 static constexpr int max_manual_loop_unrolling = 128;
46 
47 /** Translates a tensor data type to the appropriate OpenCL type.
48  *
49  * @param[in] dt @ref DataType to be translated to OpenCL type.
50  *
51  * @return The string specifying the OpenCL type to be used.
52  */
53 std::string get_cl_type_from_data_type(const DataType &dt);
54 
55 /** Translates a tensor data type to the appropriate OpenCL promoted type.
56  *
57  * @param[in] dt @ref DataType to be used to get the promoted OpenCL type.
58  *
59  * @return The string specifying the OpenCL type to be used.
60  */
62 
63 /** Translates the element size to an unsigned integer data type
64  *
65  * @param[in] element_size Size in bytes of an element.
66  *
67  * @return The string specifying the OpenCL type to be used.
68  */
69 std::string get_cl_unsigned_type_from_element_size(size_t element_size);
70 
71 /** Translates the element size to an signed integer data type
72  *
73  * @param[in] element_size Size in bytes of an element.
74  *
75  * @return The string specifying the OpenCL type to be used.
76  */
77 std::string get_cl_signed_type_from_element_size(size_t element_size);
78 
79 /** Translates a tensor data type to the appropriate OpenCL select type.
80  *
81  * @param[in] dt @ref DataType to be translated to OpenCL select type.
82  *
83  * @return The string specifying the OpenCL select type to be used.
84  */
86 
87 /** Translates a tensor data type to the appropriate OpenCL dot8 accumulator type.
88  *
89  * @param[in] dt @ref DataType to be translated to OpenCL dot8 accumulator type.
90  *
91  * @return The string specifying the OpenCL dot8 accumulator type to be used.
92  */
94 
95 /** Get the size of a data type in number of bits.
96  *
97  * @param[in] dt @ref DataType.
98  *
99  * @return Number of bits in the data type specified.
100  */
101 std::string get_data_size_from_data_type(const DataType &dt);
102 
103 /** Helper function to get the GPU target from CL device
104  *
105  * @param[in] device A CL device
106  *
107  * @return the GPU target
108  */
109 GPUTarget get_target_from_device(const cl::Device &device);
110 
111 /** Helper function to get the highest OpenCL version supported
112  *
113  * @param[in] device A CL device
114  *
115  * @return the highest OpenCL version supported
116  */
117 CLVersion get_cl_version(const cl::Device &device);
118 
119 /** Helper function to get the cl_image pitch alignment in pixels
120  *
121  * @param[in] device A CL device
122  *
123  * @return the cl_image pitch alignment in pixels. If an error occurs, the function will return 0
124  */
125 size_t get_cl_image_pitch_alignment(const cl::Device &device);
126 
127 /** Helper function to check whether non-uniform work group is supported
128  *
129  * @param[in] device A CL device
130  *
131  * @return True if the feature is supported
132  */
133 bool get_cl_non_uniform_work_group_supported(const cl::Device &device);
134 
135 /** Helper function to check whether a given extension is supported
136  *
137  * @param[in] device A CL device
138  * @param[in] extension_name Name of the extension to be checked
139  *
140  * @return True if the extension is supported
141  */
142 bool device_supports_extension(const cl::Device &device, const char *extension_name);
143 
144 /** Helper function to check whether the cl_khr_fp16 extension is supported
145  *
146  * @param[in] device A CL device
147  *
148  * @return True if the extension is supported
149  */
150 bool fp16_supported(const cl::Device &device);
151 /** Helper function to check whether the arm_non_uniform_work_group_size extension is supported
152  *
153  * @param[in] device A CL device
154  *
155  * @return True if the extension is supported
156  */
157 bool arm_non_uniform_workgroup_supported(const cl::Device &device);
158 /** Helper function to check whether the cl_arm_integer_dot_product_int8 extension is supported
159  *
160  * @param[in] device A CL device
161  *
162  * @return True if the extension is supported
163  */
164 bool dot8_supported(const cl::Device &device);
165 
166 /** Helper function to check whether the cl_arm_integer_dot_product_accumulate_int8 extension is supported
167  *
168  * @param[in] device A CL device
169  *
170  * @return True if the extension is supported
171  */
172 bool dot8_acc_supported(const cl::Device &device);
173 
174 /** This function checks if the Winograd configuration (defined through the output tile, kernel size and the data layout) is supported on OpenCL
175  *
176  * @param[in] output_tile Output tile for the Winograd filtering algorithm
177  * @param[in] kernel_size Kernel size for the Winograd filtering algorithm
178  * @param[in] data_layout Data layout of the input tensor
179  *
180  * @return True if the configuration is supported
181  */
182 bool cl_winograd_convolution_layer_supported(const Size2D &output_tile, const Size2D &kernel_size, DataLayout data_layout);
183 
184 /** Helper function to get the preferred native vector width size for built-in scalar types that can be put into vectors
185  *
186  * @param[in] device A CL device
187  * @param[in] dt data type
188  *
189  * @return preferred vector width
190  */
191 size_t preferred_vector_width(const cl::Device &device, DataType dt);
192 
193 /** Helper function to check if "dummy work-items" are preferred to have a power of two NDRange
194  * In case dummy work-items is enabled, it is OpenCL kernel responsibility to check if the work-item is out-of range or not
195  *
196  * @param[in] device A CL device
197  *
198  * @return True if dummy work-items should be preferred to dispatch the NDRange
199  */
200 bool preferred_dummy_work_items_support(const cl::Device &device);
201 
202 /** Helper function to check whether the cl_khr_image2d_from_buffer extension is supported
203  *
204  * @param[in] device A CL device
205  *
206  * @return True if the extension is supported
207  */
208 bool image2d_from_buffer_supported(const cl::Device &device);
209 
210 /** Creates an opencl kernel using a compile context
211  *
212  * @param[in] ctx A compile context to be used to create the opencl kernel.
213  * @param[in] kernel_name The kernel name.
214  * @param[in] build_opts The build options to be used for the opencl kernel compilation.
215  *
216  * @return An opencl kernel
217  */
218 cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set<std::string> &build_opts = std::set<std::string>());
219 
220 /** Creates a suitable LWS hint object for parallel implementations. Sets the number of WG based on the input size.
221  * If input width is smaller than 128 we can use fewer threads than 8.
222  *
223  * @param[in] input_dimension number of elements along the dimension to apply the parallellization
224  * @param[in] vector_size size of the vector in OpenCL
225  *
226  * @return An LWS hint object
227  */
228 cl::NDRange create_lws_hint_parallel_implementations(unsigned int input_dimension, unsigned int vector_size);
229 
230 /* Helper function to check if the workgroup batch size modifier parameter is supported on the cl device
231  *
232  * @param[in] device cl device to check for support
233  *
234  * @return true if the workgroup batch size modifier parameter is supported, false otherwise
235  */
236 bool get_wbsm_support_info(const cl::Device &device);
237 
238 /* Helper function to set the workgroup batch size modifier parameter in the kernel
239  *
240  * @param[in] kernel cl kernel to set the workgroup batch size modifier parameter
241  * @param[in] wbsm_hint workgroup batch size modifier to use
242  */
243 void set_wbsm(cl::Kernel &kernel, cl_int wbsm_hint);
244 
245 /* Helper function to check if we can export the tensor to cl_image
246  *
247  * @param[in] input tensor
248  *
249  * @return true if we can export the tensor to cl_image
250  */
251 bool export_to_cl_image(const ITensorInfo *tensor);
252 
253 /* Helper function to force unroll with pragma when any of the input values (iterations) are greater than @ref max_manual_loop_unrolling
254  *
255  * This function passes UNROLL_WITH_PRAGMA at compile time when any of the input values are greater than @ref max_manual_loop_unrolling
256  *
257  * @param[in] built_opts OpenCL kernel build options
258  * @param[in] values Input values (iterations)
259  *
260  */
261 void set_unroll_with_pragma(CLBuildOptions &built_opts, std::initializer_list<int> values);
262 
263 /** Helper function to check whether the cl_arm_matrix_multiply extension is supported
264  *
265  * @param[in] device A CL device
266  *
267  * @return True if the extension is supported
268  */
269 bool arm_matrix_multiply_supported(const cl::Device &device);
270 } // namespace arm_compute
271 #endif /* ARM_COMPUTE_CLHELPERS_H */
arm_compute::dot8_supported
bool dot8_supported(const cl::Device &device)
Helper function to check whether the cl_arm_integer_dot_product_int8 extension is supported.
Definition: CLHelpers.cpp:241
arm_compute::preferred_vector_width
size_t preferred_vector_width(const cl::Device &device, DataType dt)
Helper function to get the preferred native vector width size for built-in scalar types that can be p...
Definition: CLHelpers.cpp:337
arm_compute::export_to_cl_image
bool export_to_cl_image(const ITensorInfo *tensor)
Definition: CLHelpers.cpp:444
CLTypes.h
arm_compute::fp16_supported
bool fp16_supported(const cl::Device &device)
Helper function to check whether the cl_khr_fp16 extension is supported.
Definition: CLHelpers.cpp:236
arm_compute::DataLayout
DataLayout
[DataLayout enum definition]
Definition: CoreTypes.h:109
arm_compute::preferred_dummy_work_items_support
bool preferred_dummy_work_items_support(const cl::Device &device)
Helper function to check if "dummy work-items" are preferred to have a power of two NDRange In case d...
Definition: CLHelpers.cpp:367
arm_compute::get_cl_dot8_acc_type_from_data_type
std::string get_cl_dot8_acc_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL dot8 accumulator type.
Definition: CLHelpers.cpp:175
arm_compute::get_cl_signed_type_from_element_size
std::string get_cl_signed_type_from_element_size(size_t element_size)
Translates the element size to an signed integer data type.
Definition: CLHelpers.cpp:124
Types.h
arm_compute::arm_matrix_multiply_supported
bool arm_matrix_multiply_supported(const cl::Device &device)
Helper function to check whether the cl_arm_matrix_multiply extension is supported.
Definition: CLHelpers.cpp:494
arm_compute::set_wbsm
void set_wbsm(cl::Kernel &kernel, cl_int wbsm_hint)
Definition: CLHelpers.cpp:434
arm_compute::image2d_from_buffer_supported
bool image2d_from_buffer_supported(const cl::Device &device)
Helper function to check whether the cl_khr_image2d_from_buffer extension is supported.
Definition: CLHelpers.cpp:374
arm_compute::get_cl_select_type_from_data_type
std::string get_cl_select_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL select type.
Definition: CLHelpers.cpp:142
arm_compute::get_wbsm_support_info
bool get_wbsm_support_info(const cl::Device &device)
Definition: CLHelpers.cpp:423
arm_compute::CLVersion
CLVersion
Available OpenCL Version.
Definition: CLTypes.h:39
arm_compute::test::validation::data_layout
const auto data_layout
Definition: ConvolutionLayer.cpp:406
arm_compute::get_data_size_from_data_type
std::string get_data_size_from_data_type(const DataType &dt)
Get the size of a data type in number of bits.
Definition: CLHelpers.cpp:193
arm_compute::create_kernel
cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set< std::string > &build_opts=std::set< std::string >())
Creates an opencl kernel using a compile context.
Definition: CLHelpers.cpp:404
arm_compute::get_cl_promoted_type_from_data_type
std::string get_cl_promoted_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL promoted type.
Definition: CLHelpers.cpp:76
arm_compute::get_cl_non_uniform_work_group_supported
bool get_cl_non_uniform_work_group_supported(const cl::Device &device)
Helper function to check whether non-uniform work group is supported.
Definition: CLHelpers.cpp:395
OpenCL.h
Wrapper to configure the Khronos OpenCL C++ header.
dt
DataType dt
Definition: NEBatchNormalizationLayerKernel.cpp:51
arm_compute::get_target_from_device
GPUTarget get_target_from_device(const cl::Device &device)
Helper function to get the GPU target from CL device.
Definition: CLHelpers.cpp:223
tensor
CLTensor * tensor
Pointer to the auxiliary tensor.
Definition: ClWorkloadRuntime.cpp:66
arm_compute::create_lws_hint_parallel_implementations
cl::NDRange create_lws_hint_parallel_implementations(unsigned int input_dimension, unsigned int vector_size)
Creates a suitable LWS hint object for parallel implementations.
Definition: CLHelpers.cpp:415
arm_compute::get_cl_type_from_data_type
std::string get_cl_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL type.
Definition: CLHelpers.cpp:40
arm_compute::GPUTarget
GPUTarget
Available GPU Targets.
Definition: GPUTarget.h:34
arm_compute::get_cl_unsigned_type_from_element_size
std::string get_cl_unsigned_type_from_element_size(size_t element_size)
Translates the element size to an unsigned integer data type.
Definition: CLHelpers.cpp:106
arm_compute::get_cl_version
CLVersion get_cl_version(const cl::Device &device)
Helper function to get the highest OpenCL version supported.
Definition: CLHelpers.cpp:256
arm_compute::device_supports_extension
bool device_supports_extension(const cl::Device &device, const char *extension_name)
Helper function to check whether a given extension is supported.
Definition: CLHelpers.cpp:283
arm_compute
Copyright (c) 2017-2023 Arm Limited.
Definition: introduction.dox:24
arm_compute::arm_non_uniform_workgroup_supported
bool arm_non_uniform_workgroup_supported(const cl::Device &device)
Helper function to check whether the arm_non_uniform_work_group_size extension is supported.
Definition: CLHelpers.cpp:231
arm_compute::get_cl_image_pitch_alignment
size_t get_cl_image_pitch_alignment(const cl::Device &device)
Helper function to get the cl_image pitch alignment in pixels.
Definition: CLHelpers.cpp:379
arm_compute::cl_winograd_convolution_layer_supported
bool cl_winograd_convolution_layer_supported(const Size2D &output_tile, const Size2D &kernel_size, DataLayout data_layout)
This function checks if the Winograd configuration (defined through the output tile,...
Definition: CLHelpers.cpp:290
arm_compute::DataType
DataType
Available data types.
Definition: CoreTypes.h:82
arm_compute::set_unroll_with_pragma
void set_unroll_with_pragma(CLBuildOptions &built_opts, std::initializer_list< int > values)
Definition: CLHelpers.cpp:482
arm_compute::dot8_acc_supported
bool dot8_acc_supported(const cl::Device &device)
Helper function to check whether the cl_arm_integer_dot_product_accumulate_int8 extension is supporte...
Definition: CLHelpers.cpp:251
kernel_name
std::string kernel_name
Definition: ClIm2ColKernel.cpp:57