Compute Library
 23.11
ClDirectConv2dKernel.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2023 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
37 
39 #include "src/core/CL/CLUtils.h"
40 #include "src/core/CL/CLValidate.h"
44 #include "support/Cast.h"
45 #include "support/StringSupport.h"
46 
47 namespace arm_compute
48 {
49 namespace opencl
50 {
51 namespace kernels
52 {
53 namespace
54 {
55 Status validate_arguments(const ITensorInfo *src,
56  const ITensorInfo *weights,
57  const ITensorInfo *biases,
58  const ITensorInfo *dst,
59  const PadStrideInfo &conv_info,
60  const ActivationLayerInfo &act_info,
61  const DirectConvComputeKernelInfo &desc)
62 {
67 
68  const DataLayout data_layout = src->data_layout();
72 
73  ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->dimension(channel_idx) != src->dimension(channel_idx),
74  "Weights feature map dimension should match the respective src's one");
75  ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->num_dimensions() > 4, "Weights can be at most 4 dimensional");
76 
77  ARM_COMPUTE_RETURN_ERROR_ON_MSG(desc.export_input_to_cl_image == true,
78  "Export to CLImage is not supported for the input tensor");
79  ARM_COMPUTE_RETURN_ERROR_ON_MSG(desc.export_output_to_cl_image == true,
80  "Export to CLImage is not supported for the output tensor");
81 
83  {
84  ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->dimension(width_idx) != weights->dimension(height_idx),
85  "Weights should have same width and height");
86  ARM_COMPUTE_RETURN_ERROR_ON_MSG((weights->dimension(width_idx) == 1) && std::get<0>(conv_info.stride()) > 3,
87  "Strides larger than 3 not supported for 1x1 convolution.");
88  ARM_COMPUTE_RETURN_ERROR_ON_MSG((weights->dimension(width_idx) == 3 || weights->dimension(width_idx) == 5 ||
89  weights->dimension(width_idx) == 9) &&
90  std::get<0>(conv_info.stride()) > 2,
91  "Strides larger than 2 not supported for 3x3, 5x5, 9x9 convolution.");
92  ARM_COMPUTE_RETURN_ERROR_ON_MSG(act_info.enabled(), "Fused activation is not supported for NCHW layout");
93 
94  if (is_data_type_quantized(src->data_type()))
95  {
97  weights->dimension(width_idx) != 1 && weights->dimension(width_idx) != 3 &&
98  weights->dimension(width_idx) != 5 && weights->dimension(width_idx) != 9,
99  "Kernel sizes other than 1x1, 3x3, 5x5 or 9x9 are not supported with quantized data types");
100  }
101  else
102  {
104  weights->dimension(width_idx) != 1 && weights->dimension(width_idx) != 3 &&
105  weights->dimension(width_idx) != 5,
106  "Kernel sizes other than 1x1, 3x3 or 5x5 are not supported with float data types");
107  }
108  }
109 
111  {
113  "Fused activation in NHWC is only supported for floating point.");
114  ARM_COMPUTE_RETURN_ERROR_ON_MSG(desc.m0 <= 0 || desc.m0 > 8,
115  "M0 can only be greater than 0 and less than or equal to 8");
116  ARM_COMPUTE_RETURN_ERROR_ON_MSG(desc.n0 != 1 && desc.n0 != 2 && desc.n0 != 3 && desc.n0 != 4 && desc.n0 != 8 &&
117  desc.n0 != 16,
118  "N0 can only be: 1, 2, 3, 4, 8, and 16");
119  ARM_COMPUTE_RETURN_ERROR_ON_MSG(desc.k0 != 1 && desc.k0 != 2 && desc.k0 != 3 && desc.k0 != 4 && desc.k0 != 8 &&
120  desc.k0 != 16,
121  "K0 can only be: 1, 2, 3, 4, 8, and 16");
122  if (desc.export_weights_to_cl_image)
123  {
124  ARM_COMPUTE_RETURN_ERROR_ON_MSG(desc.k0 != 4 && desc.k0 != 8 && desc.k0 != 16,
125  "K0 can only be: 4, 8, and 16");
127  "Export to CLImage is not supported for this weight configuration");
128  }
129  }
130 
131  if (biases != nullptr)
132  {
133  if (is_data_type_quantized_asymmetric(src->data_type()))
134  {
136  }
137  else
138  {
140  }
141  ARM_COMPUTE_RETURN_ERROR_ON_MSG(biases->dimension(0) != weights->dimension(3),
142  "Biases size and number of dst feature maps should match");
143  ARM_COMPUTE_RETURN_ERROR_ON_MSG(biases->num_dimensions() > 1, "Biases should be one dimensional");
144  }
145 
146  // Checks performed when dst is configured
147  if (dst->total_size() != 0)
148  {
152  }
153 
154  const auto data_type = src->data_type();
156  {
157  const UniformQuantizationInfo iqinfo = src->quantization_info().uniform();
158  const UniformQuantizationInfo wqinfo = weights->quantization_info().uniform();
159  const UniformQuantizationInfo oqinfo = dst->quantization_info().uniform();
160 
161  float multiplier = iqinfo.scale * wqinfo.scale / oqinfo.scale;
162  int output_multiplier = 0;
163  int output_shift = 0;
165  quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift));
166  }
167  return Status{};
168 }
169 } // namespace
170 
172 {
173  _type = CLKernelType::DIRECT;
174 }
175 
177  ITensorInfo *src,
178  ITensorInfo *weights,
179  ITensorInfo *biases,
180  ITensorInfo *dst,
181  const PadStrideInfo &conv_info,
183  const DirectConvComputeKernelInfo &desc)
184 {
186 
187  // Perform validation
189 
190  const int conv_stride_x = std::get<0>(conv_info.stride());
191  const int conv_stride_y = std::get<1>(conv_info.stride());
192 
193  _data_layout = src->data_layout();
195 
199  const unsigned int kernel_size = weights->dimension(width_idx);
200  const DataType data_type = src->data_type();
201 
202  const GPUTarget gpu_target = get_target();
203  unsigned int _num_elems_processed_per_iteration = 0;
204 
205  // Get dst shape
207 
208  // Output auto inizialitation if not yet initialized
209  auto_init_if_empty(*dst, output_shape, 1, src->data_type(), src->quantization_info());
210 
211  // Configure kernel window
212  Window win;
214  {
215  output_shape.collapse(2U, 1U);
216  const unsigned int n0 = adjust_vec_size(desc.n0, output_shape[0]);
217  const unsigned int m0 = adjust_vec_size(desc.m0, output_shape[1]);
218 
219  // Create window and update padding
220  win = calculate_max_window(output_shape, Steps(n0, m0));
221  }
222  else if (_data_layout == DataLayout::NCHW)
223  {
224  _num_elems_processed_per_iteration = 1u;
225  win = calculate_max_window(*dst, Steps(_num_elems_processed_per_iteration));
226  }
227 
228  ICLKernel::configure_internal(win);
229 
230  std::stringstream kernel_name;
232 
234  {
235  kernel_name << "direct_convolution_nhwc";
236 
237  const unsigned int n0 = win.x().step();
238  const unsigned int m0 = win.y().step();
239  const unsigned int k0 = adjust_vec_size(desc.k0, src->dimension(channel_idx));
240  const unsigned int partial_store_n0 = dst->dimension(channel_idx) % n0;
241  const unsigned int pad_left = conv_info.pad_left();
242  const unsigned int pad_top = conv_info.pad_top();
243 
247 
248  // Update the padding for the weights tensor if we can export to cl_image
250  {
252  }
253 
255  {
257  }
258 
260  {
262  }
263 
264  if (biases != nullptr)
265  {
266  build_options.add_option(std::string("-DHAS_BIAS"));
267  build_options.add_option(std::string("-DBIA_DATA_TYPE=" + get_cl_type_from_data_type(biases->data_type())));
268  }
269 
270  // Conditions of -cl-fast-relaxed-math causing accuracy issues can be traced from COMPMID-5324
271  const auto act_function = act_info.activation();
272  const auto dst_data_type = dst->data_type();
273 
274  if ((gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST) &&
275  (act_function == ActivationLayerInfo::ActivationFunction::BOUNDED_RELU ||
276  act_function == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU) &&
277  (dst_data_type == DataType::F32 || dst_data_type == DataType::F16))
278  {
279  // -cl-fast-relaxed-math also sets -cl-finite-math-only and -cl-unsafe-math-optimizations
280  // to disable -cl-finite-math-only, we only include -cl-unsafe-math-optimizations
281  build_options.add_option("-cl-unsafe-math-optimizations");
282  }
283  else
284  {
285  build_options.add_option("-cl-fast-relaxed-math");
286  }
287 
288  build_options.add_option_if_else(_export_input_to_cl_image, "-DSRC_TENSOR_TYPE=IMAGE",
289  "-DSRC_TENSOR_TYPE=BUFFER");
290  build_options.add_option("-DSRC_DATA_TYPE=" + get_cl_type_from_data_type(src->data_type()));
291  build_options.add_option("-DSRC_CHANNELS=" + support::cpp11::to_string(src->dimension(0)));
292  build_options.add_option("-DSRC_WIDTH=" + support::cpp11::to_string(src->dimension(1)));
293  build_options.add_option("-DSRC_HEIGHT=" + support::cpp11::to_string(src->dimension(2)));
294  build_options.add_option("-DDST_CHANNELS=" + support::cpp11::to_string(dst->dimension(0)));
295  build_options.add_option("-DDST_WIDTH=" + support::cpp11::to_string(dst->dimension(1)));
296  build_options.add_option("-DDST_HEIGHT=" + support::cpp11::to_string(dst->dimension(2)));
297  build_options.add_option_if_else(_export_output_to_cl_image, "-DDST_TENSOR_TYPE=IMAGE",
298  "-DDST_TENSOR_TYPE=BUFFER");
299  build_options.add_option("-DDST_DATA_TYPE=" + get_cl_type_from_data_type(dst_data_type));
300  build_options.add_option_if_else(_export_weights_to_cl_image, "-DWEI_TENSOR_TYPE=IMAGE",
301  "-DWEI_TENSOR_TYPE=BUFFER");
302  build_options.add_option("-DWEI_WIDTH=" + support::cpp11::to_string(weights->dimension(width_idx)));
303  build_options.add_option("-DWEI_HEIGHT=" + support::cpp11::to_string(weights->dimension(height_idx)));
304  build_options.add_option("-DWEI_DATA_TYPE=" + get_cl_type_from_data_type(weights->data_type()));
305  build_options.add_option("-DSTRIDE_X=" + support::cpp11::to_string(conv_stride_x));
306  build_options.add_option("-DSTRIDE_Y=" + support::cpp11::to_string(conv_stride_y));
307  build_options.add_option("-DPAD_LEFT=" + support::cpp11::to_string(pad_left));
308  build_options.add_option("-DPAD_TOP=" + support::cpp11::to_string(pad_top));
309  build_options.add_option("-DN0=" + support::cpp11::to_string(n0));
310  build_options.add_option("-DM0=" + support::cpp11::to_string(m0));
311  build_options.add_option("-DK0=" + support::cpp11::to_string(k0));
312  build_options.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0));
313  build_options.add_option_if((src->dimension(channel_idx) % k0) != 0, "-DLEFTOVER_LOOP");
314  build_options.add_option("-DACTIVATION_TYPE=" + lower_string(string_from_activation_func(act_function)));
315 
317  {
318  const UniformQuantizationInfo iqinfo = src->quantization_info().uniform();
319  const UniformQuantizationInfo wqinfo = weights->quantization_info().uniform();
320  const UniformQuantizationInfo oqinfo = dst->quantization_info().uniform();
321 
322  PixelValue zero_value = PixelValue(0, src->data_type(), src->quantization_info());
323  int zero_value_s32;
324  zero_value.get(zero_value_s32);
325 
326  float multiplier = iqinfo.scale * wqinfo.scale / oqinfo.scale;
327  int output_multiplier = 0;
328  int output_shift = 0;
329  quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift);
330  build_options.add_option("-DIS_QUANTIZED");
331  build_options.add_option("-DDST_MULTIPLIER=" + support::cpp11::to_string(output_multiplier));
332  build_options.add_option("-DDST_SHIFT=" + support::cpp11::to_string(output_shift));
333  build_options.add_option("-DSRC_OFFSET=" + support::cpp11::to_string(-iqinfo.offset));
334  build_options.add_option("-DWEI_OFFSET=" + support::cpp11::to_string(-wqinfo.offset));
335  build_options.add_option("-DDST_OFFSET=" + support::cpp11::to_string(oqinfo.offset));
336  build_options.add_option("-DZERO_VALUE=" + support::cpp11::to_string(zero_value_s32));
337  build_options.add_option("-DACC_DATA_TYPE=" + get_cl_type_from_data_type(DataType::S32));
338  }
339  else
340  {
341  build_options.add_option("-DACC_DATA_TYPE=" + get_cl_type_from_data_type(data_type));
342  build_options.add_option("-DZERO_VALUE=" + support::cpp11::to_string(0));
343  build_options.add_option("-DSRC_OFFSET=" + support::cpp11::to_string(0));
344  build_options.add_option("-DWEI_OFFSET=" + support::cpp11::to_string(0));
345  build_options.add_option("-DDST_OFFSET=" + support::cpp11::to_string(0));
346  build_options.add_option_if(act_info.enabled(),
348  build_options.add_option_if(act_info.enabled(),
350  }
351 
352  if (compile_context.get_ddk_version() >= 30)
353  {
354  build_options.add_option("-fregister-allocation=64");
355  }
356  }
357  else
358  {
360 
361  kernel_name << "direct_convolution_nchw";
362  build_options.add_option_if(biases != nullptr, std::string("-DHAS_BIAS"));
363  build_options.add_option("-DSRC_WIDTH=" + support::cpp11::to_string(src->dimension(width_idx)));
364  build_options.add_option("-DSRC_HEIGHT=" + support::cpp11::to_string(src->dimension(height_idx)));
365  build_options.add_option("-DSRC_CHANNELS=" + support::cpp11::to_string(src->dimension(channel_idx)));
366  build_options.add_option("-DPAD_LEFT=" + support::cpp11::to_string(conv_info.pad_left()));
367  build_options.add_option("-DPAD_TOP=" + support::cpp11::to_string(conv_info.pad_top()));
368  build_options.add_option("-DSTRIDE_X=" + support::cpp11::to_string(conv_stride_x));
369  build_options.add_option("-DSTRIDE_Y=" + support::cpp11::to_string(conv_stride_y));
370  build_options.add_option("-DWEI_WIDTH=" + support::cpp11::to_string(weights->dimension(width_idx)));
371  build_options.add_option("-DWEI_HEIGHT=" + support::cpp11::to_string(weights->dimension(height_idx)));
372  build_options.add_option(std::string("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type)));
373  build_options.add_option(std::string("-DDATA_SIZE=" + get_data_size_from_data_type(data_type)));
374  build_options.add_option(
375  std::string("-DWEIGHTS_DEPTH=" + support::cpp11::to_string(weights->dimension(channel_idx))));
376  build_options.add_option(std::string("-DSTRIDE_X=" + support::cpp11::to_string(conv_stride_x)));
377  build_options.add_option(std::string("-DDATA_TYPE_PROMOTED=" + get_cl_type_from_data_type(data_type)));
378  build_options.add_option(
379  std::string("-DVEC_SIZE=" + support::cpp11::to_string(_num_elems_processed_per_iteration)));
380  build_options.add_option(
381  std::string("-DVEC_SIZE_LEFTOVER=" +
382  support::cpp11::to_string(src->dimension(0) % _num_elems_processed_per_iteration)));
383 
385  {
386  const UniformQuantizationInfo iqinfo = src->quantization_info().uniform();
387  const UniformQuantizationInfo wqinfo = weights->quantization_info().uniform();
388  const UniformQuantizationInfo oqinfo = dst->quantization_info().uniform();
389 
390  float multiplier = iqinfo.scale * wqinfo.scale / oqinfo.scale;
391  int output_multiplier = 0;
392  int output_shift = 0;
393  quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift);
394  build_options.add_option("-DIS_QUANTIZED");
395  build_options.add_option("-DOUTPUT_MULTIPLIER=" + support::cpp11::to_string(output_multiplier));
396  build_options.add_option("-DOUTPUT_SHIFT=" + support::cpp11::to_string(output_shift));
397  build_options.add_option("-DKERNEL_SIZE=" + support::cpp11::to_string(kernel_size));
398  build_options.add_option("-DINPUT_OFFSET=" + support::cpp11::to_string(-iqinfo.offset));
399  build_options.add_option("-DWEIGHTS_OFFSET=" + support::cpp11::to_string(-wqinfo.offset));
400  build_options.add_option("-DOUTPUT_OFFSET=" + support::cpp11::to_string(oqinfo.offset));
401  }
402  }
403 
404  _kernel = create_kernel(compile_context, kernel_name.str(), build_options.options());
405 
406  // Set config_id for enabling LWS tuning
407  // config_id should include the variables used to parameterize the kernel
408  _config_id = kernel_name.str();
409  _config_id += "_";
411  _config_id += "_";
412  _config_id += support::cpp11::to_string(kernel_size);
413  _config_id += "_";
414  _config_id += support::cpp11::to_string(border_size().left);
415  _config_id += "_";
416  _config_id += support::cpp11::to_string(border_size().top);
417  _config_id += "_";
418  _config_id += support::cpp11::to_string(border_size().right);
419  _config_id += "_";
420  _config_id += support::cpp11::to_string(border_size().bottom);
421  _config_id += "_";
422  _config_id += support::cpp11::to_string(conv_stride_x);
423  _config_id += "_";
424  _config_id += support::cpp11::to_string(conv_stride_y);
425  // SRC_CHANNELS, SRC_WIDTH, SRC_HEIGHT
426  _config_id += "_";
427  _config_id += support::cpp11::to_string(src->dimension(channel_idx));
428  _config_id += "_";
429  _config_id += support::cpp11::to_string(src->dimension(width_idx));
430  _config_id += "_";
431  _config_id += support::cpp11::to_string(src->dimension(height_idx));
432  _config_id += "_";
433  // DST_CHANNELS, DST_WIDTH, DST_HEIGHT
434  _config_id += support::cpp11::to_string(dst->dimension(channel_idx));
435  _config_id += "_";
436  _config_id += support::cpp11::to_string(dst->dimension(width_idx));
437  _config_id += "_";
438  _config_id += support::cpp11::to_string(dst->dimension(height_idx));
439  _config_id += "_";
441 }
442 
444  const ITensorInfo *weights,
445  const ITensorInfo *biases,
446  const ITensorInfo *dst,
447  const PadStrideInfo &conv_info,
449  const DirectConvComputeKernelInfo &desc)
450 {
452  return Status{};
453 }
454 
455 void ClDirectConv2dKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
456 {
459 
460  // Get initial windows
462 
463  const auto src =
464  utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_0));
465  const auto weights =
466  utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_1));
467  const auto biases =
468  utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_2));
469  auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
470 
472  {
473  cl::Image2D weights_cl_image;
474  cl::Image2D output_cl_image;
475  cl::Image2D input_cl_image;
476 
478  {
479  // Export tensor to cl_image
480  weights_cl_image = create_image2d_from_tensor(weights, CLImage2DType::ReadOnly);
481  }
482 
484  {
485  // Export tensor to cl_image
487  }
488 
490  {
491  // Export tensor to cl_image
493  }
494 
495  unsigned int idx = 0;
497  {
498  _kernel.setArg(idx++, input_cl_image);
499  }
502  {
503  _kernel.setArg(idx++, output_cl_image);
504  }
507  {
508  _kernel.setArg(idx++, weights_cl_image);
509  }
510  add_4d_tensor_nhwc_argument(idx, weights);
511  if (biases != nullptr)
512  {
513  add_1D_tensor_argument(idx, biases, slice);
514  }
515  enqueue(queue, *this, slice, lws_hint());
516  }
517  else
518  {
519  unsigned int idx1 = 2 * num_arguments_per_3D_tensor();
520  add_3D_tensor_argument(idx1, weights, slice);
521 
522  if (biases != nullptr)
523  {
524  Window slice_biases;
525  slice_biases.use_tensor_dimensions(biases->info()->tensor_shape());
526  add_1D_tensor_argument(idx1, biases, slice_biases);
527  }
528 
529  _kernel.setArg(idx1++, static_cast<unsigned int>(weights->info()->strides_in_bytes()[3]));
530 
531  do
532  {
533  unsigned int idx = 0;
536  enqueue(queue, *this, slice, lws_hint());
538  }
539 }
540 } // namespace kernels
541 } // namespace opencl
542 } // namespace arm_compute
arm_compute::Steps
Class to describe a number of elements in each dimension.
Definition: Steps.h:40
arm_compute::DataLayout::NCHW
@ NCHW
Num samples, channels, height, width.
arm_compute::support::cpp11::to_string
std::string to_string(T &&value)
Convert integer and float values to string.
Definition: StringSupport.h:168
arm_compute::opencl::kernels::ClDirectConv2dKernel::configure
void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info, const DirectConvComputeKernelInfo &desc)
Set the src, weights, biases and dst tensors info.
Definition: ClDirectConv2dKernel.cpp:176
Cast.h
StringSupport.h
arm_compute::UniformQuantizationInfo::offset
int32_t offset
Definition: QuantizationInfo.h:63
arm_compute::ICLKernel::add_1D_tensor_argument
void add_1D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx.
Definition: ICLKernel.h:186
arm_compute::test::validation::src
SimpleTensor< float > src
Definition: DFT.cpp:155
AdjustVecSize.h
arm_compute::export_to_cl_image
bool export_to_cl_image(const ITensorInfo *tensor)
Definition: CLHelpers.cpp:449
ICLTensor.h
arm_compute::PixelValue
Class describing the value of a pixel for any image format.
Definition: PixelValue.h:35
Helpers.h
arm_compute::DataLayout
DataLayout
[DataLayout enum definition]
Definition: CoreTypes.h:110
arm_compute::DataLayoutDimension::CHANNEL
@ CHANNEL
channel
arm_compute::calculate_max_window
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
Definition: WindowHelpers.cpp:29
arm_compute::opencl::kernels::ClDirectConv2dKernel::_conv_info
PadStrideInfo _conv_info
Definition: ClDirectConv2dKernel.h:99
arm_compute::create_image2d_from_tensor
cl::Image2D create_image2d_from_tensor(const ICLTensor *tensor, CLImage2DType image_type)
Create a cl::Image2D object from a tensor.
Definition: CLUtils.cpp:37
arm_compute::test::validation::output_shape
TensorShape output_shape
Definition: LSTMLayerQuantized.cpp:469
arm_compute::DataLayout::NHWC
@ NHWC
Num samples, height, width, channels.
arm_compute::Window::Dimension::step
constexpr int step() const
Return the step of the dimension.
Definition: Window.h:106
arm_compute::DataType::QASYMM8
@ QASYMM8
quantized, asymmetric fixed-point 8-bit number unsigned
arm_compute::PixelValue::get
void get(uint8_t &v) const
Interpret the pixel value as a U8.
Definition: PixelValue.h:228
arm_compute::TensorShape
Shape of a tensor.
Definition: TensorShape.h:39
arm_compute::test::validation::dst
auto dst
Definition: DFT.cpp:170
arm_compute::lower_string
std::string lower_string(const std::string &val)
Lower a given string.
Definition: StringUtils.cpp:38
arm_compute::cpu::kernels::validate_arguments
Status validate_arguments(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const PadStrideInfo &conv_info)
Definition: CpuDirectConv2dKernel.cpp:57
arm_compute::CLImage2DType::WriteOnly
@ WriteOnly
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
Definition: Validate.h:1079
arm_compute::opencl::kernels::ClDirectConv2dKernel::_export_input_to_cl_image
bool _export_input_to_cl_image
Definition: ClDirectConv2dKernel.h:102
arm_compute::DirectConvComputeKernelInfo::export_output_to_cl_image
bool export_output_to_cl_image
Flag to export the output to cl_image.
Definition: KernelDescriptors.h:130
arm_compute::Window::use_tensor_dimensions
void use_tensor_dimensions(const TensorShape &shape, size_t first_dimension=Window::DimX)
Use the tensor's dimensions to fill the window dimensions.
Definition: Window.inl:290
ClGemmHelpers.h
arm_compute::ITensorPack::get_tensor
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
Definition: ITensorPack.cpp:63
arm_compute::string_from_data_type
const std::string & string_from_data_type(DataType dt)
Convert a data type identity into a string.
Definition: DataTypeUtils.cpp:31
arm_compute::IKernel::border_size
virtual BorderSize border_size() const
The size of the border for that kernel.
Definition: IKernel.cpp:45
arm_compute::ICLKernel::get_target
GPUTarget get_target() const
Get the targeted GPU architecture.
Definition: ICLKernel.h:443
arm_compute::ACL_SRC_0
@ ACL_SRC_0
Definition: Types.h:45
arm_compute::CLCompileContext::get_ddk_version
int32_t get_ddk_version() const
Return the DDK version.
Definition: CLCompileContext.cpp:404
arm_compute::UniformQuantizationInfo
Quantization info when assuming per layer quantization.
Definition: QuantizationInfo.h:42
arm_compute::ACL_SRC_1
@ ACL_SRC_1
Definition: Types.h:46
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:677
arm_compute::DataLayoutDimension::WIDTH
@ WIDTH
width
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:952
arm_compute::ACL_SRC_2
@ ACL_SRC_2
Definition: Types.h:47
CLKernelLibrary.h
Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context.
arm_compute::cpu::data_layout
constexpr auto data_layout
Definition: impl.h:36
StringUtils.h
ARM_COMPUTE_RETURN_ON_ERROR
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:205
arm_compute::ITensorInfo::dimension
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
arm_compute::ActivationLayerInfo
Activation Layer Information class.
Definition: ActivationLayerInfo.h:55
arm_compute::GPUTarget::BIFROST
@ BIFROST
CLUtils.h
arm_compute::test::validation::act_info
act_info
Definition: DirectConvolutionLayer.cpp:547
arm_compute::DIRECT
@ DIRECT
Direct Convolution CL kernel type.
Definition: CLTypes.h:82
ARM_COMPUTE_ERROR_ON_NULLPTR
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:159
arm_compute::utils::cast::U
U
Definition: SaturateCast.h:65
arm_compute::DirectConvComputeKernelInfo::n0
int32_t n0
Number of columns to be processed by the kernel.
Definition: KernelDescriptors.h:127
arm_compute::get_data_size_from_data_type
std::string get_data_size_from_data_type(const DataType &dt)
Get the size of a data type in number of bits.
Definition: CLHelpers.cpp:194
arm_compute::ITensorPack::get_const_tensor
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
Definition: ITensorPack.cpp:53
ARM_COMPUTE_ERROR_THROW_ON
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
arm_compute::ITensorPack
Tensor packing service.
Definition: ITensorPack.h:39
arm_compute::CLCompileContext
CLCompileContext class.
Definition: CLCompileContext.h:204
arm_compute::CLImage2DType::ReadOnly
@ ReadOnly
arm_compute::DataLayoutDimension::HEIGHT
@ HEIGHT
height
arm_compute::opencl::kernels::ClDirectConv2dKernel::ClDirectConv2dKernel
ClDirectConv2dKernel()
Definition: ClDirectConv2dKernel.cpp:171
arm_compute::ACL_DST
@ ACL_DST
Definition: Types.h:55
arm_compute::auto_init_if_empty
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
Definition: AutoConfiguration.h:43
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED
#define ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(tensor)
Definition: CLValidate.h:36
arm_compute::create_kernel
cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set< std::string > &build_opts=std::set< std::string >())
Creates an opencl kernel using a compile context.
Definition: CLHelpers.cpp:409
arm_compute::Status
Status class.
Definition: Error.h:52
arm_compute::DataType::QASYMM8_SIGNED
@ QASYMM8_SIGNED
quantized, asymmetric fixed-point 8-bit number signed
WindowHelpers.h
arm_compute::Window::y
constexpr const Dimension & y() const
Alias to access the second dimension of the window.
Definition: Window.h:167
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
Definition: Validate.h:203
arm_compute::float_to_string_with_full_precision
std::string float_to_string_with_full_precision(float val)
Create a string with the float in full precision.
Definition: StringUtils.cpp:52
arm_compute::ITensorInfo::data_type
virtual DataType data_type() const =0
Data type used for each element of the tensor.
arm_compute::opencl::kernels::ClDirectConv2dKernel::run_op
void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue.
Definition: ClDirectConv2dKernel.cpp:455
arm_compute::Window::slide_window_slice_3D
bool slide_window_slice_3D(Window &slice) const
Slide the passed 3D window slice.
Definition: Window.h:350
arm_compute::DirectConvComputeKernelInfo::m0
int32_t m0
Number of rows to be processed by the kernel.
Definition: KernelDescriptors.h:126
arm_compute::QuantizationInfo::uniform
UniformQuantizationInfo uniform() const
Return per layer quantization info.
Definition: QuantizationInfo.h:140
PixelValue.h
arm_compute::PadStrideInfo
Definition: CoreTypes.h:139
AutoConfiguration.h
ActivationFunctionUtils.h
arm_compute::test::validation::data_type
data_type
Definition: Cast.cpp:222
arm_compute::opencl::kernels::gemm::update_padding_for_cl_image
void update_padding_for_cl_image(ITensorInfo *tensor)
Update padding required to export the OpenCL buffer to OpenCL image2d.
Definition: ClGemmHelpers.cpp:100
arm_compute::ITensorInfo::quantization_info
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
arm_compute::Window::first_slice_window_3D
Window first_slice_window_3D() const
First 3D slice of the window.
Definition: Window.h:306
CLValidate.h
AsymmHelpers.h
arm_compute::IKernel::window
const Window & window() const
The maximum window the kernel can be executed on.
Definition: IKernel.cpp:28
arm_compute::string_from_data_layout
const std::string & string_from_data_layout(DataLayout dl)
Convert a data layout identity into a string.
Definition: DataLayoutUtils.cpp:30
arm_compute::get_cl_type_from_data_type
std::string get_cl_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL type.
Definition: CLHelpers.cpp:41
arm_compute::GPUTarget
GPUTarget
Available GPU Targets.
Definition: GPUTarget.h:34
arm_compute::ICLKernel::add_3D_tensor_argument
void add_3D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx.
Definition: ICLKernel.h:234
arm_compute::get_data_layout_dimension_index
size_t get_data_layout_dimension_index(const DataLayout &data_layout, const DataLayoutDimension &data_layout_dimension)
Get the index of the given dimension.
Definition: Helpers.inl:201
ShapeCalculator.h
arm_compute::ICLKernel::add_4d_tensor_nhwc_argument
void add_4d_tensor_nhwc_argument(unsigned int &idx, const ICLTensor *tensor)
Add the passed NHWC 4D tensor's parameters to the object's kernel's arguments by passing strides,...
Definition: ICLKernel.cpp:145
arm_compute::UniformQuantizationInfo::scale
float scale
Definition: QuantizationInfo.h:62
KernelDescriptors.h
arm_compute::Window
Describe a multidimensional execution window.
Definition: Window.h:39
arm_compute::opencl::kernels::ClDirectConv2dKernel::_data_layout
DataLayout _data_layout
Definition: ClDirectConv2dKernel.h:98
ClDirectConv2dKernel.h
arm_compute::ICLKernel::num_arguments_per_3D_tensor
constexpr static unsigned int num_arguments_per_3D_tensor()
Returns the number of arguments enqueued per 3D tensor object.
Definition: ICLKernel.h:321
ARM_COMPUTE_RETURN_ERROR_ON_MSG
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:245
arm_compute
Copyright (c) 2017-2023 Arm Limited.
Definition: introduction.dox:24
arm_compute::test::validation::conv_info
conv_info
Definition: DirectConvolutionLayer.cpp:547
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(...)
Definition: Validate.h:294
arm_compute::DataType::F16
@ F16
16-bit floating-point number
arm_compute::GPUTarget::GPU_ARCH_MASK
@ GPU_ARCH_MASK
arm_compute::adjust_vec_size
unsigned int adjust_vec_size(unsigned int vec_size, size_t dim0)
Returns the adjusted vector size in case it is less than the input's first dimension,...
Definition: AdjustVecSize.h:38
arm_compute::DirectConvComputeKernelInfo::export_weights_to_cl_image
bool export_weights_to_cl_image
Flag to export the weights to cl_image.
Definition: KernelDescriptors.h:129
arm_compute::is_data_type_float
bool is_data_type_float(DataType dt)
Check if a given data type is of floating point type.
Definition: DataTypeUtils.h:304
arm_compute::opencl::kernels::ClDirectConv2dKernel::validate
static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info, const DirectConvComputeKernelInfo &desc)
Static function to check if given info will lead to a valid configuration.
Definition: ClDirectConv2dKernel.cpp:443
arm_compute::DataType::S32
@ S32
signed 32-bit number
arm_compute::ICLKernel::lws_hint
cl::NDRange lws_hint() const
Return the Local-Workgroup-Size hint.
Definition: ICLKernel.h:383
arm_compute::is_data_type_quantized_asymmetric
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
Definition: DataTypeUtils.h:346
arm_compute::quantization::calculate_quantized_multiplier
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.
Definition: AsymmHelpers.cpp:43
arm_compute::string_from_activation_func
const std::string & string_from_activation_func(const ActivationFunction &act)
Translates a given activation function to a string.
Definition: ActivationFunctionUtils.cpp:31
arm_compute::DirectConvComputeKernelInfo::export_input_to_cl_image
bool export_input_to_cl_image
Flag to export the input to cl_image.
Definition: KernelDescriptors.h:131
arm_compute::is_data_type_quantized
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
Definition: DataTypeUtils.h:324
arm_compute::cpu::channel_idx
const size_t channel_idx
Definition: impl.h:39
AccessWindowStatic.h
arm_compute::GPUTarget::G71
@ G71
arm_compute::ITensorInfo
Store the tensor's metadata.
Definition: ITensorInfo.h:44
arm_compute::opencl::kernels::ClDirectConv2dKernel::_export_output_to_cl_image
bool _export_output_to_cl_image
Definition: ClDirectConv2dKernel.h:101
arm_compute::DataType::F32
@ F32
32-bit floating-point number
arm_compute::cpu::width_idx
const size_t width_idx
Definition: impl.h:37
ITensor.h
arm_compute::CLBuildOptions
Build options.
Definition: CLCompileContext.h:38
arm_compute::misc::shape_calculator::compute_deep_convolution_shape
TensorShape compute_deep_convolution_shape(const TensorShape &input_shape, DataLayout input_data_layout, const TensorShape &weights_shape, const PadStrideInfo &conv_info)
Calculate the deep convolution shape output shape of a tensor.
Definition: ShapeCalculator.h:777
arm_compute::opencl::kernels::ClDirectConv2dKernel::_export_weights_to_cl_image
bool _export_weights_to_cl_image
Definition: ClDirectConv2dKernel.h:100
arm_compute::DirectConvComputeKernelInfo::k0
int32_t k0
Number of partial accumulations to be processed in a single iteration by the kernel.
Definition: KernelDescriptors.h:128
arm_compute::Window::x
constexpr const Dimension & x() const
Alias to access the first dimension of the window.
Definition: Window.h:158
arm_compute::DataType
DataType
Available data types.
Definition: CoreTypes.h:83
arm_compute::test::validation::reference::slice
SimpleTensor< T > slice(const SimpleTensor< T > &src, Coordinates starts, Coordinates ends)
Definition: SliceOperations.cpp:38
arm_compute::DirectConvComputeKernelInfo
Compute descriptor used by the direct convolution kernel.
Definition: KernelDescriptors.h:124
build_options
std::set< std::string > build_options
Definition: ClIm2ColKernel.cpp:59
arm_compute::cpu::height_idx
const size_t height_idx
Definition: impl.h:38
kernel_name
std::string kernel_name
Definition: ClIm2ColKernel.cpp:58
arm_compute::TensorShape::collapse
void collapse(size_t n, size_t first=0)
Collapse the first n dimensions.
Definition: TensorShape.h:136
arm_compute::enqueue
void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint=CLKernelLibrary::get().default_ndrange(), bool use_dummy_work_items=false)
Add the kernel to the command queue with the given window.
Definition: ICLKernel.cpp:33