Compute Library
 22.02
ClDirectConv2dKernel.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
32 #include "arm_compute/core/Utils.h"
36 #include "src/core/CL/CLUtils.h"
37 #include "src/core/CL/CLValidate.h"
41 #include "support/Cast.h"
42 #include "support/StringSupport.h"
43 namespace arm_compute
44 {
45 namespace opencl
46 {
47 namespace kernels
48 {
49 namespace
50 {
51 Status validate_arguments(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst,
52  const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
53 {
57 
58  const DataLayout data_layout = src->data_layout();
59  const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
60  const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
61  const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
62 
63  ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->dimension(channel_idx) != src->dimension(channel_idx), "Weights feature map dimension should match the respective src's one");
64  ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->num_dimensions() > 4, "Weights can be at most 4 dimensional");
65 
66  if(data_layout == DataLayout::NCHW)
67  {
68  ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->dimension(width_idx) != weights->dimension(height_idx), "Weights should have same width and height");
69  ARM_COMPUTE_RETURN_ERROR_ON_MSG((weights->dimension(width_idx) == 1) && std::get<0>(conv_info.stride()) > 3, "Strides larger than 3 not supported for 1x1 convolution.");
70  ARM_COMPUTE_RETURN_ERROR_ON_MSG((weights->dimension(width_idx) == 3 || weights->dimension(width_idx) == 5 || weights->dimension(width_idx) == 9) && std::get<0>(conv_info.stride()) > 2,
71  "Strides larger than 2 not supported for 3x3, 5x5, 9x9 convolution.");
72  ARM_COMPUTE_RETURN_ERROR_ON_MSG(!is_data_type_float(src->data_type()) && act_info.enabled(), "Activation supported only for floating point and NHWC.");
73 
74  if(is_data_type_quantized(src->data_type()))
75  {
76  ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->dimension(width_idx) != 1 && weights->dimension(width_idx) != 3 && weights->dimension(width_idx) != 5 && weights->dimension(width_idx) != 9,
77  "Kernel sizes other than 1x1, 3x3, 5x5 or 9x9 are not supported with quantized data types");
78  }
79  else
80  {
81  ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->dimension(width_idx) != 1 && weights->dimension(width_idx) != 3 && weights->dimension(width_idx) != 5,
82  "Kernel sizes other than 1x1, 3x3 or 5x5 are not supported with float data types");
83  }
84  }
85 
86  if(biases != nullptr)
87  {
88  if(is_data_type_quantized_asymmetric(src->data_type()))
89  {
91  }
92  else
93  {
95  }
96  ARM_COMPUTE_RETURN_ERROR_ON_MSG(biases->dimension(0) != weights->dimension(3),
97  "Biases size and number of dst feature maps should match");
98  ARM_COMPUTE_RETURN_ERROR_ON_MSG(biases->num_dimensions() > 1,
99  "Biases should be one dimensional");
100  }
101 
102  // Checks performed when dst is configured
103  if(dst->total_size() != 0)
104  {
106  misc::shape_calculator::compute_deep_convolution_shape(*src, *weights, conv_info));
108  }
109 
110  const auto data_type = src->data_type();
112  {
113  const UniformQuantizationInfo iqinfo = src->quantization_info().uniform();
114  const UniformQuantizationInfo wqinfo = weights->quantization_info().uniform();
115  const UniformQuantizationInfo oqinfo = dst->quantization_info().uniform();
116 
117  float multiplier = iqinfo.scale * wqinfo.scale / oqinfo.scale;
118  int output_multiplier = 0;
119  int output_shift = 0;
120  ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift));
121  }
122  return Status{};
123 }
124 
125 bool export_to_cl_image_support(ITensorInfo *tensor, GPUTarget gpu_target, DataLayout data_layout)
126 {
127  if(tensor->tensor_shape()[0] % 4 || (data_layout != DataLayout::NHWC))
128  {
129  return false;
130  }
131 
132  // If not floating point
133  if(!is_data_type_float(tensor->data_type()))
134  {
135  return false;
136  }
137 
138  if(gpu_target == GPUTarget::G71 || get_arch_from_target(gpu_target) == GPUTarget::MIDGARD)
139  {
140  return false;
141  }
142 
143  // Check if the cl_khr_image2d_from_buffer extension is supported on the target platform
145  {
146  return false;
147  }
148 
149  // Check cl image pitch alignment
151  {
152  return false;
153  }
154 
155  const size_t image_w = tensor->tensor_shape()[0] / 4;
156  const size_t image_h = tensor->tensor_shape()[1] * tensor->tensor_shape()[2] * tensor->tensor_shape()[3];
157  const size_t max_image_w = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_WIDTH>();
158  const size_t max_image_h = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_HEIGHT>();
159 
160  if(image_w > max_image_w || image_h > max_image_h)
161  {
162  return false;
163  }
164 
165  return true;
166 }
167 
168 } // namespace
169 
171 {
172  _type = CLKernelType::DIRECT;
173 }
174 
175 void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst,
176  const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
177 {
178  ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst);
179 
180  // Perform validation
181  ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, weights, biases, dst, conv_info, act_info));
182 
183  const int conv_stride_x = std::get<0>(conv_info.stride());
184  const int conv_stride_y = std::get<1>(conv_info.stride());
185 
186  _data_layout = src->data_layout();
188 
189  const unsigned int width_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH);
190  const unsigned int height_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT);
191  const unsigned int channel_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::CHANNEL);
192  const unsigned int kernel_size = weights->dimension(width_idx);
193  const DataType data_type = src->data_type();
194 
195  const GPUTarget gpu_target = get_target();
196  unsigned int _num_elems_processed_per_iteration = 0;
197 
198  // Get dst shape
200 
201  // Output auto inizialitation if not yet initialized
202  auto_init_if_empty(*dst, output_shape,
203  1,
204  src->data_type(),
205  src->quantization_info());
206 
207  // Configure kernel window
208  Window win;
209  if(_data_layout == DataLayout::NHWC)
210  {
211  const unsigned int vec_size = std::min(static_cast<unsigned int>(dst->tensor_shape()[0]), 4u);
212  unsigned int num_rows = 1U;
213  if(dst->tensor_shape()[0] > 16)
214  {
215  num_rows = src->data_type() == DataType::F32 ? 2U : 4U;
216  }
217 
218  // Create window and update padding
219  win = calculate_max_window(output_shape, Steps(vec_size, num_rows));
220  }
221  else if(_data_layout == DataLayout::NCHW)
222  {
223  _num_elems_processed_per_iteration = 1u;
224  win = calculate_max_window(*dst, Steps(_num_elems_processed_per_iteration));
225  }
226 
227  ICLKernel::configure_internal(win);
228 
229  std::stringstream kernel_name;
231 
232  if(_data_layout == DataLayout::NHWC)
233  {
234  kernel_name << "direct_convolution_nhwc";
235 
236  const unsigned int n0 = win.x().step();
237  const unsigned int m0 = win.y().step();
238  const unsigned int k0 = adjust_vec_size(is_data_type_quantized(data_type) ? 16u : 8u, src->dimension(channel_idx));
239  const unsigned int partial_store_n0 = dst->dimension(channel_idx) % n0;
240  const unsigned int pad_left = conv_info.pad_left();
241  const unsigned int pad_top = conv_info.pad_top();
242  const bool export_to_cl_image = export_to_cl_image_support(weights, gpu_target, _data_layout);
243 
244  // Update the padding for the weights tensor if we can export to cl_image
245  if(export_to_cl_image)
246  {
248  }
249 
250  if(biases != nullptr)
251  {
252  build_options.add_option(std::string("-DHAS_BIAS"));
253  build_options.add_option(std::string("-DBIA_DATA_TYPE=" + get_cl_type_from_data_type(biases->data_type())));
254  }
255 
256  build_options.add_option("-cl-fast-relaxed-math");
257  build_options.add_option("-DSRC_TENSOR_TYPE=BUFFER");
258  build_options.add_option("-DSRC_DATA_TYPE=" + get_cl_type_from_data_type(src->data_type()));
259  build_options.add_option("-DDST_TENSOR_TYPE=BUFFER");
260  build_options.add_option("-DDST_DATA_TYPE=" + get_cl_type_from_data_type(dst->data_type()));
261  build_options.add_option_if_else(export_to_cl_image, "-DWEI_TENSOR_TYPE=IMAGE", "-DWEI_TENSOR_TYPE=BUFFER");
262  build_options.add_option("-DWEI_WIDTH=" + support::cpp11::to_string(weights->dimension(width_idx)));
263  build_options.add_option("-DWEI_HEIGHT=" + support::cpp11::to_string(weights->dimension(height_idx)));
264  build_options.add_option("-DWEI_DATA_TYPE=" + get_cl_type_from_data_type(weights->data_type()));
265  build_options.add_option("-DSTRIDE_X=" + support::cpp11::to_string(conv_stride_x));
266  build_options.add_option("-DSTRIDE_Y=" + support::cpp11::to_string(conv_stride_y));
267  build_options.add_option("-DPAD_LEFT=" + support::cpp11::to_string(pad_left));
268  build_options.add_option("-DPAD_TOP=" + support::cpp11::to_string(pad_top));
269  build_options.add_option("-DN0=" + support::cpp11::to_string(n0));
270  build_options.add_option("-DM0=" + support::cpp11::to_string(m0));
271  build_options.add_option("-DK0=" + support::cpp11::to_string(k0));
272  build_options.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0));
273  build_options.add_option_if((src->dimension(channel_idx) % k0) != 0, "-DLEFTOVER_LOOP");
274  build_options.add_option("-DACTIVATION_TYPE=" + lower_string(string_from_activation_func(act_info.activation())));
275 
276  if(is_data_type_quantized(data_type))
277  {
278  const UniformQuantizationInfo iqinfo = src->quantization_info().uniform();
279  const UniformQuantizationInfo wqinfo = weights->quantization_info().uniform();
280  const UniformQuantizationInfo oqinfo = dst->quantization_info().uniform();
281 
282  PixelValue zero_value = PixelValue(0, src->data_type(), src->quantization_info());
283  int zero_value_s32;
284  zero_value.get(zero_value_s32);
285 
286  float multiplier = iqinfo.scale * wqinfo.scale / oqinfo.scale;
287  int output_multiplier = 0;
288  int output_shift = 0;
289  quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift);
290  build_options.add_option("-DIS_QUANTIZED");
291  build_options.add_option("-DDST_MULTIPLIER=" + support::cpp11::to_string(output_multiplier));
292  build_options.add_option("-DDST_SHIFT=" + support::cpp11::to_string(output_shift));
293  build_options.add_option("-DSRC_OFFSET=" + support::cpp11::to_string(-iqinfo.offset));
294  build_options.add_option("-DWEI_OFFSET=" + support::cpp11::to_string(-wqinfo.offset));
295  build_options.add_option("-DDST_OFFSET=" + support::cpp11::to_string(oqinfo.offset));
296  build_options.add_option("-DZERO_VALUE=" + support::cpp11::to_string(zero_value_s32));
297  build_options.add_option("-DACC_DATA_TYPE=" + get_cl_type_from_data_type(DataType::S32));
298  }
299  else
300  {
301  build_options.add_option("-DACC_DATA_TYPE=" + get_cl_type_from_data_type(data_type));
302  build_options.add_option("-DZERO_VALUE=" + support::cpp11::to_string(0));
303  build_options.add_option("-DSRC_OFFSET=" + support::cpp11::to_string(0));
304  build_options.add_option("-DWEI_OFFSET=" + support::cpp11::to_string(0));
305  build_options.add_option("-DDST_OFFSET=" + support::cpp11::to_string(0));
306  build_options.add_option_if(act_info.enabled(), "-DA_VAL=" + float_to_string_with_full_precision(act_info.a()));
307  build_options.add_option_if(act_info.enabled(), "-DB_VAL=" + float_to_string_with_full_precision(act_info.b()));
308  }
309  }
310  else
311  {
312  kernel_name << "direct_convolution_nchw";
313  build_options.add_option_if(biases != nullptr, std::string("-DHAS_BIAS"));
314  build_options.add_option("-DSRC_WIDTH=" + support::cpp11::to_string(src->dimension(width_idx)));
315  build_options.add_option("-DSRC_HEIGHT=" + support::cpp11::to_string(src->dimension(height_idx)));
316  build_options.add_option("-DSRC_CHANNELS=" + support::cpp11::to_string(src->dimension(channel_idx)));
317  build_options.add_option("-DPAD_LEFT=" + support::cpp11::to_string(conv_info.pad_left()));
318  build_options.add_option("-DPAD_TOP=" + support::cpp11::to_string(conv_info.pad_top()));
319  build_options.add_option("-DSTRIDE_X=" + support::cpp11::to_string(conv_stride_x));
320  build_options.add_option("-DSTRIDE_Y=" + support::cpp11::to_string(conv_stride_y));
321  build_options.add_option("-DWEI_WIDTH=" + support::cpp11::to_string(weights->dimension(width_idx)));
322  build_options.add_option("-DWEI_HEIGHT=" + support::cpp11::to_string(weights->dimension(height_idx)));
323  build_options.add_option(std::string("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type)));
324  build_options.add_option(std::string("-DDATA_SIZE=" + get_data_size_from_data_type(data_type)));
325  build_options.add_option(std::string("-DWEIGHTS_DEPTH=" + support::cpp11::to_string(weights->dimension(channel_idx))));
326  build_options.add_option(std::string("-DSTRIDE_X=" + support::cpp11::to_string(conv_stride_x)));
327  build_options.add_option(std::string("-DDATA_TYPE_PROMOTED=" + get_cl_type_from_data_type(data_type)));
328  build_options.add_option(std::string("-DVEC_SIZE=" + support::cpp11::to_string(_num_elems_processed_per_iteration)));
329  build_options.add_option(std::string("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(src->dimension(0) % _num_elems_processed_per_iteration)));
330 
331  if(is_data_type_quantized(data_type))
332  {
333  const UniformQuantizationInfo iqinfo = src->quantization_info().uniform();
334  const UniformQuantizationInfo wqinfo = weights->quantization_info().uniform();
335  const UniformQuantizationInfo oqinfo = dst->quantization_info().uniform();
336 
337  float multiplier = iqinfo.scale * wqinfo.scale / oqinfo.scale;
338  int output_multiplier = 0;
339  int output_shift = 0;
340  quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift);
341  build_options.add_option("-DIS_QUANTIZED");
342  build_options.add_option("-DOUTPUT_MULTIPLIER=" + support::cpp11::to_string(output_multiplier));
343  build_options.add_option("-DOUTPUT_SHIFT=" + support::cpp11::to_string(output_shift));
344  build_options.add_option("-DKERNEL_SIZE=" + support::cpp11::to_string(kernel_size));
345  build_options.add_option("-DINPUT_OFFSET=" + support::cpp11::to_string(-iqinfo.offset));
346  build_options.add_option("-DWEIGHTS_OFFSET=" + support::cpp11::to_string(-wqinfo.offset));
347  build_options.add_option("-DOUTPUT_OFFSET=" + support::cpp11::to_string(oqinfo.offset));
348  }
349  }
350 
351  _kernel = create_kernel(compile_context, kernel_name.str(), build_options.options());
352 
353  // Set config_id for enabling LWS tuning
354  _config_id = kernel_name.str();
355  _config_id += "_";
356  _config_id += lower_string(string_from_data_type(data_type));
357  _config_id += "_";
358  _config_id += support::cpp11::to_string(kernel_size);
359  _config_id += "_";
360  _config_id += support::cpp11::to_string(border_size().left);
361  _config_id += "_";
362  _config_id += support::cpp11::to_string(border_size().top);
363  _config_id += "_";
364  _config_id += support::cpp11::to_string(border_size().right);
365  _config_id += "_";
366  _config_id += support::cpp11::to_string(border_size().bottom);
367  _config_id += "_";
368  _config_id += support::cpp11::to_string(conv_stride_x);
369  _config_id += "_";
370  _config_id += support::cpp11::to_string(conv_stride_y);
371  _config_id += "_";
372  _config_id += support::cpp11::to_string(dst->dimension(width_idx));
373  _config_id += "_";
374  _config_id += support::cpp11::to_string(dst->dimension(height_idx));
375  _config_id += "_";
376  _config_id += lower_string(string_from_data_layout(_data_layout));
377 }
378 
379 Status ClDirectConv2dKernel::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst,
380  const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
381 {
382  ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, weights, biases, dst, conv_info, act_info));
383  return Status{};
384 }
385 
386 void ClDirectConv2dKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
387 {
390 
391  // Get initial windows
393 
394  const auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_0));
395  const auto weights = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_1));
396  const auto biases = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_2));
397  auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
398 
400  {
401  cl::Image2D weights_cl_image;
402 
403  const size_t dim_y_collapsed = ceil_to_multiple(dst->info()->dimension(1) * dst->info()->dimension(2), slice.y().step());
404  const bool export_to_cl_image = export_to_cl_image_support(weights->info(), get_target(), _data_layout);
405 
406  slice.set(Window::DimY, Window::Dimension(0, dim_y_collapsed, slice.y().step()));
407  slice.set(Window::DimZ, Window::Dimension(0, dst->info()->dimension(3), 1));
408 
409  if(export_to_cl_image)
410  {
411  const size_t image_w = weights->info()->dimension(0) / 4;
412  const size_t image_h = weights->info()->dimension(1) * weights->info()->dimension(2) * weights->info()->dimension(3);
413  const TensorShape shape2d(image_w, image_h);
414  const size_t image_row_pitch = weights->info()->strides_in_bytes()[1];
415 
416  // Export cl_buffer to cl_image
417  weights_cl_image = create_image2d_from_buffer(CLKernelLibrary::get().context(), weights->cl_buffer(), shape2d, weights->info()->data_type(), image_row_pitch);
418  }
419 
420  unsigned int idx = 0;
421  add_4d_tensor_nhwc_argument(idx, src);
422  add_4d_tensor_nhwc_argument(idx, dst);
423  if(export_to_cl_image)
424  {
425  _kernel.setArg(idx++, weights_cl_image);
426  }
427  add_4d_tensor_nhwc_argument(idx, weights);
428  if(biases != nullptr)
429  {
430  add_1D_tensor_argument(idx, biases, slice);
431  }
432  enqueue(queue, *this, slice, lws_hint());
433  }
434  else
435  {
436  unsigned int idx1 = 2 * num_arguments_per_3D_tensor();
437  add_3D_tensor_argument(idx1, weights, slice);
438 
439  if(biases != nullptr)
440  {
441  Window slice_biases;
442  slice_biases.use_tensor_dimensions(biases->info()->tensor_shape());
443  add_1D_tensor_argument(idx1, biases, slice_biases);
444  }
445 
446  _kernel.setArg(idx1++, static_cast<unsigned int>(weights->info()->strides_in_bytes()[3]));
447 
448  do
449  {
450  unsigned int idx = 0;
451  add_3D_tensor_argument(idx, src, slice);
452  add_3D_tensor_argument(idx, dst, slice);
453  enqueue(queue, *this, slice, lws_hint());
454  }
455  while(window.slide_window_slice_3D(slice));
456  }
457 }
458 } // namespace kernels
459 } // namespace opencl
460 } // namespace arm_compute
void add_4d_tensor_nhwc_argument(unsigned int &idx, const ICLTensor *tensor)
Add the passed NHWC 4D tensor&#39;s parameters to the object&#39;s kernel&#39;s arguments by passing strides...
Definition: ICLKernel.cpp:144
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
Definition: Utils.h:996
Class describing the value of a pixel for any image format.
Definition: PixelValue.h:34
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
#define ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(tensor)
Definition: CLValidate.h:35
bool image2d_from_buffer_supported(const cl::Device &device)
Helper function to check whether the cl_khr_image2d_from_buffer extension is supported.
Definition: CLHelpers.cpp:370
const Window & window() const
The maximum window the kernel can be executed on.
Definition: IKernel.cpp:28
Shape of a tensor.
Definition: TensorShape.h:39
bool enabled() const
Check if initialised.
Definition: Types.h:1559
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint=CLKernelLibrary::get().default_ndrange(), bool use_dummy_work_items=false)
Add the kernel to the command queue with the given window.
Definition: ICLKernel.cpp:32
const StringSet & options() const
Gets the current options list set.
constexpr int step() const
Return the step of the dimension.
Definition: Window.h:104
cl::NDRange lws_hint() const
Return the Local-Workgroup-Size hint.
Definition: ICLKernel.h:353
float a() const
Get the alpha value.
Definition: Types.h:1549
void get(uint8_t &v) const
Interpret the pixel value as a U8.
Definition: PixelValue.h:244
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
std::string to_string(T &&value)
Convert integer and float values to string.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
1 channel, 1 F32 per channel
const std::string & string_from_activation_func(ActivationLayerInfo::ActivationFunction act)
Translates a given activation function to a string.
Definition: Utils.cpp:163
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
Quantization info when assuming per layer quantization.
Describe one of the image&#39;s dimensions with a start, end and step.
Definition: Window.h:77
Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context...
unsigned int pad_top() const
Get the top padding.
Definition: Types.h:740
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.
Status class.
Definition: Error.h:52
GPUTarget get_arch_from_target(GPUTarget target)
Helper function to get the GPU arch.
Definition: GPUTarget.cpp:199
std::string lower_string(const std::string &val)
Lower a given string.
Definition: Utils.cpp:338
Activation Layer Information class.
Definition: Types.h:1509
std::set< std::string > build_options
void update_padding_for_cl_image(ITensorInfo *tensor)
Update padding required to export the OpenCL buffer to OpenCL image2d.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(...)
Definition: Validate.h:284
void add_3D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 3D tensor&#39;s parameters to the object&#39;s kernel&#39;s arguments starting from the index idx...
Definition: ICLKernel.h:214
void use_tensor_dimensions(const TensorShape &shape, size_t first_dimension=Window::DimX)
Use the tensor&#39;s dimensions to fill the window dimensions.
Definition: Window.inl:276
SimpleTensor< float > src
Definition: DFT.cpp:155
Copyright (c) 2017-2021 Arm Limited.
void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
Set the src, weights, biases and dst tensors info.
1 channel, 1 F16 per channel
1 channel, 1 S32 per channel
void add_option(std::string option)
Adds option to the existing build option list.
const DataType data_type
Definition: Im2Col.cpp:150
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
Definition: ITensorPack.cpp:54
cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set< std::string > &build_opts=std::set< std::string >())
Creates an opencl kernel using a compile context.
Definition: CLHelpers.cpp:391
const std::string & string_from_data_type(DataType dt)
Convert a data type identity into a string.
Definition: Utils.cpp:135
std::string get_data_size_from_data_type(const DataType &dt)
Get the size of a data type in number of bits.
Definition: CLHelpers.cpp:193
static constexpr unsigned int num_arguments_per_3D_tensor()
Returns the number of arguments enqueued per 3D tensor object.
Definition: ICLKernel.h:291
std::string float_to_string_with_full_precision(float val)
Create a string with the float in full precision.
Definition: Utils.h:1090
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
auto ceil_to_multiple(S value, T divisor) -> decltype(((value+divisor - 1)/divisor) *divisor)
Computes the smallest number larger or equal to value that is a multiple of divisor.
Definition: Utils.h:71
quantized, asymmetric fixed-point 8-bit number unsigned
Class to describe a number of elements in each dimension.
Definition: Steps.h:40
std::pair< unsigned int, unsigned int > stride() const
Get the stride.
Definition: Types.h:704
GPUTarget get_target() const
Get the targeted GPU architecture.
Definition: ICLKernel.h:413
UniformQuantizationInfo uniform() const
Return per layer quantization info.
std::string get_cl_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL type.
Definition: CLHelpers.cpp:39
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual BorderSize border_size() const
The size of the border for that kernel.
Definition: IKernel.cpp:46
void add_option_if(bool cond, std::string option)
Adds option if a given condition is true;.
Padding and stride information class.
Definition: Types.h:656
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
Definition: Window.inl:49
bool slide_window_slice_3D(Window &slice) const
Slide the passed 3D window slice.
Definition: Window.h:335
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
Definition: Validate.h:915
const size_t conv_stride_x
Num samples, channels, height, width.
CLCompileContext class.
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
Definition: Utils.h:1018
size_t get_cl_image_pitch_alignment(const cl::Device &device)
Helper function to get the cl_image pitch alignment in pixels.
Definition: CLHelpers.cpp:375
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
Definition: Window.h:45
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
Definition: ITensorPack.cpp:64
const std::string & string_from_data_layout(DataLayout dl)
Convert a data layout identity into a string.
Definition: Utils.cpp:123
static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
Static function to check if given info will lead to a valid configuration.
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
Definition: Window.h:47
GPUTarget
Available GPU Targets.
Definition: GPUTarget.h:34
size_t get_data_layout_dimension_index(const DataLayout &data_layout, const DataLayoutDimension &data_layout_dimension)
Get the index of the given dimension.
Definition: Helpers.inl:193
void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue...
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:541
Num samples, height, width, channels.
constexpr const Dimension & y() const
Alias to access the second dimension of the window.
Definition: Window.h:154
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:788
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:244
Tensor packing service.
Definition: ITensorPack.h:39
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:157
cl::Image2D create_image2d_from_buffer(const cl::Context &ctx, const cl::Buffer &buffer, const TensorShape &shape2d, DataType data_type, size_t image_row_pitch)
Create a cl::Image2D object from an OpenCL buffer.
Definition: CLUtils.cpp:35
unsigned int adjust_vec_size(unsigned int vec_size, size_t dim0)
Returns the adjusted vector size in case it is less than the input&#39;s first dimension, getting rounded down to its closest valid vector size.
Definition: Utils.h:1186
ActivationFunction activation() const
Get the type of activation function.
Definition: Types.h:1544
float b() const
Get the beta value.
Definition: Types.h:1554
quantized, asymmetric fixed-point 8-bit number signed
const size_t conv_stride_y
void add_1D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 1D tensor&#39;s parameters to the object&#39;s kernel&#39;s arguments starting from the index idx...
Definition: ICLKernel.h:166
Window first_slice_window_3D() const
First 3D slice of the window.
Definition: Window.h:291
std::string kernel_name
DataType
Available data types.
Definition: Types.h:79
unsigned int pad_left() const
Get the left padding.
Definition: Types.h:730
DataLayout
[DataLayout enum definition]
Definition: Types.h:113
Describe a multidimensional execution window.
Definition: Window.h:39
TensorShape compute_deep_convolution_shape(const TensorShape &input_shape, DataLayout input_data_layout, const TensorShape &weights_shape, const PadStrideInfo &conv_info)
Calculate the deep convolution shape output shape of a tensor.
bool is_data_type_float(DataType dt)
Check if a given data type is of floating point type.
Definition: Utils.h:976
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
Definition: Validate.h:201
SimpleTensor< T > slice(const SimpleTensor< T > &src, Coordinates starts, Coordinates ends)
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.
const cl::Device & get_device()
Gets the CL device for which the programs are created.
void add_option_if_else(bool cond, std::string option_true, std::string option_false)
Adds first option if condition is true else the second one.