Compute Library
 20.08
CLDirectConvolutionLayerKernel.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
31 #include "arm_compute/core/Error.h"
35 #include "arm_compute/core/Types.h"
36 #include "arm_compute/core/Utils.h"
39 #include "support/StringSupport.h"
40 
41 namespace arm_compute
42 {
43 namespace
44 {
45 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info)
46 {
50 
51  const DataLayout data_layout = input->data_layout();
55 
56  ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->dimension(width_idx) != weights->dimension(height_idx), "Weights should have same width and height");
57  ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->dimension(width_idx) != 1 && weights->dimension(width_idx) != 3 && weights->dimension(width_idx) != 5 && weights->dimension(width_idx) != 9,
58  "Kernel sizes other than 1x1, 3x3, 5x5 or 9x9 are not supported");
59  ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->dimension(channel_idx) != input->dimension(channel_idx),
60  "Weights feature map dimension should match the respective input's one");
61  ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->num_dimensions() > 4, "Weights can be at most 4 dimensional");
62  ARM_COMPUTE_RETURN_ERROR_ON_MSG((weights->dimension(width_idx) == 1) && std::get<0>(conv_info.stride()) > 3, "Strides larger than 3 not supported for 1x1 convolution.");
63  ARM_COMPUTE_RETURN_ERROR_ON_MSG((weights->dimension(width_idx) == 3 || weights->dimension(width_idx) == 5 || weights->dimension(width_idx) == 9)
64  && std::get<0>(conv_info.stride()) > 2,
65  "Strides larger than 2 not supported for 3x3, 5x5, 9x9 convolution.");
66 
67  if(biases != nullptr)
68  {
70  {
72  }
73  else
74  {
76  }
77  ARM_COMPUTE_RETURN_ERROR_ON_MSG(biases->dimension(0) != weights->dimension(3),
78  "Biases size and number of input feature maps should match");
79  ARM_COMPUTE_RETURN_ERROR_ON_MSG(biases->num_dimensions() > 1,
80  "Biases should be one dimensional");
81  }
82 
83  // Checks performed when output is configured
84  if(output->total_size() != 0)
85  {
89  }
90 
91  const auto data_type = input->data_type();
93  {
94  const UniformQuantizationInfo iqinfo = input->quantization_info().uniform();
95  const UniformQuantizationInfo wqinfo = weights->quantization_info().uniform();
96  const UniformQuantizationInfo oqinfo = output->quantization_info().uniform();
97 
98  float multiplier = iqinfo.scale * wqinfo.scale / oqinfo.scale;
99  int output_multiplier = 0;
100  int output_shift = 0;
101  ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift));
102  }
103  return Status{};
104 }
105 
106 inline bool can_run_optimized_kernel_for_bifrost(GPUTarget gpu_target, unsigned int conv_stride_x, unsigned int conv_stride_y, unsigned int kernel_size,
108 {
109  return gpu_target_is_in(gpu_target,
113  && (kernel_size <= 5)
114  && (conv_stride_x == 1) && (conv_stride_y == 1)
115  && (data_type == DataType::F32)
117 }
118 
119 inline bool can_run_optimized_kernel_for_bifrost_nhwc(GPUTarget gpu_target, unsigned int conv_stride_x, unsigned int conv_stride_y, unsigned int kernel_size,
121 {
122  return gpu_target_is_in(gpu_target,
126  && (kernel_size == 9)
127  && (conv_stride_x == 1) && (conv_stride_y == 1)
128  && (data_type == DataType::F32)
130 }
131 
132 inline void setup_num_elems(unsigned int &num_elems_read_per_iteration_x, unsigned int &num_elems_read_per_iteration_y,
133  unsigned int &num_elems_written_per_iteration_x, unsigned int &num_elems_written_per_iteration_y,
134  unsigned int kernel_size, const PadStrideInfo &conv_info, const GPUTarget target, ITensorInfo *input)
135 {
136  const DataType data_type = input->data_type();
137  const DataLayout data_layout = input->data_layout();
138  unsigned int conv_stride_x = std::get<0>(conv_info.stride());
139  unsigned int conv_stride_y = std::get<1>(conv_info.stride());
140 
141  const bool run_optimized_bifrost = can_run_optimized_kernel_for_bifrost(target, conv_stride_x, conv_stride_y, kernel_size, data_type, data_layout);
142 
143  if(run_optimized_bifrost)
144  {
145  // Configure kernel window
146  switch(kernel_size)
147  {
148  case 1:
149  {
150  num_elems_read_per_iteration_x = 4;
151  num_elems_read_per_iteration_y = 4;
152  num_elems_written_per_iteration_x = 4;
153  num_elems_written_per_iteration_y = 4;
154  break;
155  }
156  case 3:
157  {
158  num_elems_read_per_iteration_x = 6;
159  num_elems_read_per_iteration_y = 5;
160  num_elems_written_per_iteration_x = 4;
161  num_elems_written_per_iteration_y = 3;
162  break;
163  }
164  case 5:
165  {
166  num_elems_read_per_iteration_x = 8;
167  num_elems_read_per_iteration_y = 6;
168  num_elems_written_per_iteration_x = 4;
169  num_elems_written_per_iteration_y = 2;
170  break;
171  }
172  default:
173  {
174  ARM_COMPUTE_ERROR("Kernel size not optimized for Bifrost");
175  }
176  }
177  }
178  else if(data_layout == DataLayout::NCHW)
179  {
180  num_elems_read_per_iteration_y = kernel_size;
181  num_elems_written_per_iteration_x = 8;
182  num_elems_written_per_iteration_y = 1;
183  switch(kernel_size)
184  {
185  case 1:
186  switch(conv_stride_x)
187  {
188  case 1:
189  num_elems_read_per_iteration_x = 8;
190  break;
191  case 2:
192  num_elems_read_per_iteration_x = 16;
193  break;
194  case 3:
195  switch(input->element_size())
196  {
197  case 1:
198  num_elems_read_per_iteration_x = 28;
199  break;
200  case 2:
201  num_elems_read_per_iteration_x = 24;
202  break;
203  case 4:
204  num_elems_read_per_iteration_x = 22;
205  break;
206  default:
207  ARM_COMPUTE_ERROR("Invalid data size");
208  }
209  break;
210  default:
211  ARM_COMPUTE_ERROR("Invalid convolution stride X");
212  }
213  break;
214  case 3:
215  switch(conv_stride_x)
216  {
217  case 1:
218  num_elems_read_per_iteration_x = 10;
219  break;
220  case 2:
221  num_elems_read_per_iteration_x = 17;
222  break;
223  default:
224  ARM_COMPUTE_ERROR("Invalid convolution stride X");
225  }
226  break;
227  case 5:
228  switch(conv_stride_x)
229  {
230  case 1:
231  num_elems_read_per_iteration_x = 12;
232  break;
233  case 2:
234  num_elems_read_per_iteration_x = 20;
235  break;
236  default:
237  ARM_COMPUTE_ERROR("Invalid convolution stride X");
238  }
239  break;
240  case 9:
241  switch(conv_stride_x)
242  {
243  case 1:
244  num_elems_read_per_iteration_x = 16;
245  break;
246  case 2:
247  num_elems_read_per_iteration_x = 24;
248  break;
249  default:
250  ARM_COMPUTE_ERROR("Invalid convolution stride X");
251  }
252  break;
253  default:
254  ARM_COMPUTE_ERROR("Invalid direct convolution size");
255  }
256  }
257  else // data_layout == NHWC
258  {
259  const bool run_optimized_bifrost_nhwc = can_run_optimized_kernel_for_bifrost_nhwc(target, conv_stride_x, conv_stride_y, kernel_size, data_type, data_layout);
260 
261  num_elems_written_per_iteration_x = 1;
262 
263  if(run_optimized_bifrost_nhwc)
264  {
265  num_elems_read_per_iteration_x = 4;
266  }
267  else
268  {
269  num_elems_read_per_iteration_x = 1;
270  }
271 
272  switch(kernel_size)
273  {
274  case 1:
275  switch(conv_stride_x)
276  {
277  case 1:
278  num_elems_read_per_iteration_y = 8;
279  num_elems_written_per_iteration_y = 8;
280  break;
281  case 2:
282  num_elems_read_per_iteration_y = 16;
283  num_elems_written_per_iteration_y = 8;
284  break;
285  default:
286  ARM_COMPUTE_ERROR("Invalid convolution stride X");
287  }
288  break;
289  case 3:
290  switch(conv_stride_x)
291  {
292  case 1:
293  num_elems_read_per_iteration_y = 10;
294  num_elems_written_per_iteration_y = 8;
295  break;
296  case 2:
297  num_elems_read_per_iteration_y = 17;
298  num_elems_written_per_iteration_y = 8;
299  break;
300  default:
301  ARM_COMPUTE_ERROR("Invalid convolution stride X");
302  }
303  break;
304  case 5:
305  switch(conv_stride_x)
306  {
307  case 1:
308  num_elems_read_per_iteration_y = 12;
309  num_elems_written_per_iteration_y = 8;
310  break;
311  case 2:
312  num_elems_read_per_iteration_y = 20;
313  num_elems_written_per_iteration_y = 8;
314  break;
315  default:
316  ARM_COMPUTE_ERROR("Invalid convolution stride X");
317  }
318  break;
319  case 9:
320  switch(conv_stride_x)
321  {
322  case 1:
323  num_elems_read_per_iteration_y = 16;
324  num_elems_written_per_iteration_y = 8;
325  break;
326  case 2:
327  num_elems_read_per_iteration_y = 24;
328  num_elems_written_per_iteration_y = 8;
329  break;
330  default:
331  ARM_COMPUTE_ERROR("Invalid convolution stride X");
332  }
333  break;
334  default:
335  ARM_COMPUTE_ERROR("Not implemented.");
336  break;
337  }
338  }
339 }
340 
341 std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *weights, ITensorInfo *output, const PadStrideInfo &conv_info, const GPUTarget target)
342 {
343  const DataLayout data_layout = input->data_layout();
345  const unsigned int kernel_size = weights->dimension(width_idx);
346 
347  // Get convolved dimensions
349 
350  // Output auto inizialitation if not yet initialized
352  1,
353  input->data_type(),
354  input->quantization_info());
355 
356  unsigned int num_elems_read_per_iteration_x = 0;
357  unsigned int num_elems_read_per_iteration_y = 0;
358  unsigned int num_elems_written_per_iteration_x = 0;
359  unsigned int num_elems_written_per_iteration_y = 0;
360 
361  unsigned int conv_pad_left = conv_info.pad_left();
362  unsigned int conv_pad_top = conv_info.pad_top();
363  unsigned int conv_stride_x = std::get<0>(conv_info.stride());
364  unsigned int conv_stride_y = std::get<1>(conv_info.stride());
365 
366  setup_num_elems(num_elems_read_per_iteration_x, num_elems_read_per_iteration_y,
367  num_elems_written_per_iteration_x, num_elems_written_per_iteration_y,
368  kernel_size, conv_info, target, input);
369 
370  // Create window and update padding
371  bool window_changed = false;
372  Window win = calculate_max_window(*output, Steps(num_elems_written_per_iteration_x, num_elems_written_per_iteration_y));
373 
375  {
376  AccessWindowStatic input_access(input, 0, -conv_pad_left,
377  ceil_to_multiple(input->dimension(0), num_elems_read_per_iteration_x),
378  ceil_to_multiple(input->dimension(1) + conv_info.pad_right(), num_elems_read_per_iteration_y));
379  AccessWindowStatic weights_access(weights, 0, 0, weights->dimension(0), weights->dimension(1));
380  AccessWindowRectangle output_access(output, 0, 0, num_elems_written_per_iteration_x, num_elems_written_per_iteration_y);
381  window_changed = update_window_and_padding(win, input_access, weights_access, output_access);
382  output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
383  Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
384  return std::make_pair(err, win);
385  }
386  else if(data_layout == DataLayout::NCHW)
387  {
388  AccessWindowRectangle input_access(input, -conv_pad_left, -conv_pad_top, num_elems_read_per_iteration_x, num_elems_read_per_iteration_y, conv_stride_x, conv_stride_y);
389  AccessWindowStatic weights_access(weights, 0, 0, kernel_size, kernel_size);
390  AccessWindowRectangle output_access(output, 0, 0, num_elems_written_per_iteration_x, num_elems_written_per_iteration_y);
391  window_changed = update_window_and_padding(win, input_access, weights_access, output_access);
392  output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
393  Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
394  return std::make_pair(err, win);
395  }
396  else
397  {
398  ARM_COMPUTE_ERROR("Not supported");
399  }
400 }
401 } // namespace
402 
404  : _input(nullptr), _biases(nullptr), _weights(nullptr), _output(nullptr), _data_layout(DataLayout::UNKNOWN), _border_size(0), _conv_stride_x(0), _conv_stride_y(0)
405 {
406 }
407 
409 {
410  return _border_size;
411 }
412 
414 {
415  configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info);
416 }
417 
418 void CLDirectConvolutionLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
419  const PadStrideInfo &conv_info)
420 {
422 
423  _data_layout = input->info()->data_layout();
427 
428  const unsigned int kernel_size = weights->info()->dimension(width_idx);
429  const DataType data_type = input->info()->data_type();
430 
431  // Get convolved dimensions
433 
434  // Output auto inizialitation if not yet initialized
435  auto_init_if_empty(*output->info(),
436  output_shape,
437  1,
438  input->info()->data_type(),
439  input->info()->quantization_info());
440 
441  // Perform validation step
443  weights->info(),
444  (biases != nullptr) ? biases->info() : nullptr,
445  output->info(),
446  conv_info));
447 
448  _conv_stride_x = std::get<0>(conv_info.stride());
449  _conv_stride_y = std::get<1>(conv_info.stride());
450 
452  {
453  _border_size = BorderSize(conv_info.pad_left(), 0, conv_info.pad_right(), 0);
454  }
455  else if(_data_layout == DataLayout::NCHW)
456  {
457  _border_size = BorderSize(conv_info.pad_top(), conv_info.pad_right(), conv_info.pad_bottom(), conv_info.pad_left());
458  }
459  else
460  {
461  ARM_COMPUTE_ERROR("Not supported");
462  }
463 
464  _input = input;
465  _weights = weights;
466  _output = output;
467  _biases = biases;
468 
469  const GPUTarget gpu_target = get_target();
470 
471  std::stringstream kernel_name;
472  kernel_name << "direct_convolution" << kernel_size << "x" << kernel_size;
474  {
476  }
477 
479  build_options.add_option_if(_biases != nullptr, std::string("-DHAS_BIAS"));
480 
481  const bool run_optimized_for_bifrost = can_run_optimized_kernel_for_bifrost(gpu_target, _conv_stride_x, _conv_stride_y, kernel_size, data_type, _data_layout);
482 
483  if(run_optimized_for_bifrost)
484  {
485  build_options.add_option(std::string("-DWEIGHTS_DEPTH=" + support::cpp11::to_string(_weights->info()->dimension(channel_idx))));
486 
487  kernel_name << "_f32_bifrost";
488  _kernel = create_kernel(compile_context, kernel_name.str(), build_options.options());
489  }
490  else
491  {
492  build_options.add_option(std::string("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type)));
493  build_options.add_option(std::string("-DDATA_SIZE=" + get_data_size_from_data_type(data_type)));
494  build_options.add_option(std::string("-DWEIGHTS_DEPTH=" + support::cpp11::to_string(_weights->info()->dimension(channel_idx))));
495  build_options.add_option(std::string("-DSTRIDE_X=" + support::cpp11::to_string(_conv_stride_x)));
497  {
498  const bool run_optimized_for_bifrost_nhwc = can_run_optimized_kernel_for_bifrost_nhwc(gpu_target, _conv_stride_x, _conv_stride_y, kernel_size, data_type, _data_layout);
499  build_options.add_option(std::string("-DDATA_LAYOUT_NHWC=1"));
500  build_options.add_option(std::string("-DDST_HEIGHT=" + support::cpp11::to_string(_output->info()->dimension(height_idx))));
501  build_options.add_option(std::string("-DDST_WIDTH=" + support::cpp11::to_string(_output->info()->dimension(width_idx))));
502  build_options.add_option(std::string("-DSRC_HEIGHT=" + support::cpp11::to_string(_input->info()->dimension(height_idx))));
503  build_options.add_option(std::string("-DSRC_WIDTH=" + support::cpp11::to_string(_input->info()->dimension(width_idx))));
504  build_options.add_option(std::string("-DPAD_LEFT=" + support::cpp11::to_string(conv_info.pad_left())));
505  build_options.add_option(std::string("-DPAD_TOP=" + support::cpp11::to_string(conv_info.pad_top())));
506  build_options.add_option(std::string("-DPAD_BOTTOM=" + support::cpp11::to_string(conv_info.pad_bottom())));
507  build_options.add_option(std::string("-DSTRIDE_Y=" + support::cpp11::to_string(_conv_stride_y)));
508  if(run_optimized_for_bifrost_nhwc)
509  {
510  const unsigned int num_elems_read_per_iteration_x = 4;
511  _border_size.right = num_elems_read_per_iteration_x;
512  build_options.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_read_per_iteration_x));
513  }
514  }
515  build_options.add_option(std::string("-DDATA_TYPE_PROMOTED=" + get_cl_type_from_data_type(data_type)));
516 
518  {
522 
523  float multiplier = iqinfo.scale * wqinfo.scale / oqinfo.scale;
524  int output_multiplier = 0;
525  int output_shift = 0;
526  quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift);
527  build_options.add_option("-DOUTPUT_MULTIPLIER=" + support::cpp11::to_string(output_multiplier));
528  build_options.add_option("-DOUTPUT_SHIFT=" + support::cpp11::to_string(output_shift));
529  build_options.add_option("-DKERNEL_SIZE=" + support::cpp11::to_string(kernel_size));
530 
531  // Create kernel
532  _kernel = create_kernel(compile_context, "direct_convolution_quantized", build_options.options());
533 
534  // Set static kernel arguments
535  unsigned int idx = 3 * num_arguments_per_3D_tensor() + ((_biases != nullptr) ? num_arguments_per_1D_tensor() : 0) + 1;
536  _kernel.setArg(idx++, -iqinfo.offset);
537  _kernel.setArg(idx++, -wqinfo.offset);
538  _kernel.setArg(idx++, oqinfo.offset);
539  }
540  else
541  {
542  // Create kernel
543  _kernel = create_kernel(compile_context, kernel_name.str(), build_options.options());
544  }
545  }
546 
547  // Configure kernel window
548  auto win_config = validate_and_configure_window(input->info(), weights->info(), output->info(), conv_info, gpu_target);
549  ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
550  ICLKernel::configure_internal(win_config.second);
551 
552  // Set config_id for enabling LWS tuning
553  _config_id = "direct_convolution_";
555  _config_id += "_";
556  _config_id += support::cpp11::to_string(kernel_size);
557  _config_id += "_";
558  _config_id += support::cpp11::to_string(border_size().left);
559  _config_id += "_";
560  _config_id += support::cpp11::to_string(border_size().top);
561  _config_id += "_";
562  _config_id += support::cpp11::to_string(border_size().right);
563  _config_id += "_";
564  _config_id += support::cpp11::to_string(border_size().bottom);
565  _config_id += "_";
567  _config_id += "_";
569  _config_id += "_";
570  _config_id += support::cpp11::to_string(output->info()->dimension(width_idx));
571  _config_id += "_";
572  _config_id += support::cpp11::to_string(output->info()->dimension(height_idx));
573  _config_id += "_";
575 }
576 
578  const GPUTarget target)
579 {
581  ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), weights->clone().get(), output->clone().get(), conv_info, target).first);
582 
583  return Status{};
584 }
585 
586 void CLDirectConvolutionLayerKernel::run(const Window &window, cl::CommandQueue &queue)
587 {
590 
591  // Get initial windows
593  Window win_in = window;
594 
595  win_in.adjust(Window::DimX, -_border_size.left, true);
596  win_in.adjust(Window::DimY, -_border_size.top, true);
597 
600 
601  win_in.set_dimension_step(width_idx, window[width_idx].step() * _conv_stride_x);
602  win_in.set_dimension_step(height_idx, window[height_idx].step() * _conv_stride_y);
603 
604  Window slice_in = win_in.first_slice_window_3D();
605  unsigned int idx1 = 2 * num_arguments_per_3D_tensor();
607 
608  if(_biases != nullptr)
609  {
610  Window slice_biases;
611  slice_biases.use_tensor_dimensions(_biases->info()->tensor_shape());
612  add_1D_tensor_argument(idx1, _biases, slice_biases);
613  }
614 
615  _kernel.setArg(idx1++, static_cast<unsigned int>(_weights->info()->strides_in_bytes()[3]));
616 
617  do
618  {
619  unsigned int idx = 0;
620  add_3D_tensor_argument(idx, _input, slice_in);
622  enqueue(queue, *this, slice, lws_hint());
623  }
624  while(window.slide_window_slice_3D(slice) && win_in.slide_window_slice_3D(slice_in));
625 }
626 } // namespace arm_compute
static constexpr unsigned int num_arguments_per_1D_tensor()
Returns the number of arguments enqueued per 1D tensor object.
Definition: ICLKernel.h:185
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
Definition: Utils.h:1121
unsigned int top
top of the border
Definition: Types.h:352
#define ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(tensor)
Definition: CLValidate.h:34
const Window & window() const
The maximum window the kernel can be executed on.
Definition: IKernel.cpp:28
Shape of a tensor.
Definition: TensorShape.h:39
const DataLayout data_layout
Definition: Im2Col.cpp:146
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
Container for 2D border size.
Definition: Types.h:272
void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint=CLKernelLibrary::get().default_ndrange(), bool use_dummy_work_items=false)
Add the kernel to the command queue with the given window.
Definition: ICLKernel.cpp:39
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:545
TensorShape compute_deep_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, PadStrideInfo conv_info)
Calculate the deep convolution shape output shape of a tensor.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352
cl::NDRange lws_hint() const
Return the Local-Workgroup-Size hint.
Definition: ICLKernel.h:263
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
std::string to_string(T &&value)
Convert integer and float values to string.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:792
1 channel, 1 F32 per channel
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor's metadata.
Definition: ITensorInfo.h:40
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
Quantization info when assuming per layer quantization.
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.
Status class.
Definition: Error.h:52
std::string lower_string(const std::string &val)
Lower a given string.
Definition: Utils.cpp:326
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps=Steps(), bool skip_border=false, BorderSize border_size=BorderSize())
Calculate the maximum window for a given tensor shape and border setting.
Definition: Helpers.cpp:28
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const GPUTarget target)
Static function to check if given info will lead to a valid configuration of CLDirectConvolutionLayer...
void add_3D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx.
Definition: ICLKernel.h:159
void use_tensor_dimensions(const TensorShape &shape, size_t first_dimension=Window::DimX)
Use the tensor's dimensions to fill the window dimensions.
Definition: Window.inl:276
Copyright (c) 2017-2020 Arm Limited.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
Definition: Helpers.inl:207
1 channel, 1 F16 per channel
ITensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
Definition: Tensor.cpp:33
1 channel, 1 S32 per channel
cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set< std::string > &build_opts=std::set< std::string >())
Creates an opencl kernel using a compile context.
Definition: CLHelpers.cpp:403
const std::string & string_from_data_type(DataType dt)
Convert a data type identity into a string.
Definition: Utils.cpp:135
std::string get_data_size_from_data_type(const DataType &dt)
Get the size of a data type in number of bits.
Definition: CLHelpers.cpp:191
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Definition: Window.h:43
bool update_window_and_padding(Window &win, Ts &&... patterns)
Update window and padding size for each of the access patterns.
Definition: Helpers.h:437
static constexpr unsigned int num_arguments_per_3D_tensor()
Returns the number of arguments enqueued per 3D tensor object.
Definition: ICLKernel.h:201
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
auto ceil_to_multiple(S value, T divisor) -> decltype(((value+divisor - 1)/divisor) *divisor)
Computes the smallest number larger or equal to value that is a multiple of divisor.
Definition: Utils.h:67
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(...)
Definition: Validate.h:288
quantized, asymmetric fixed-point 8-bit number unsigned
std::set< std::string > build_options
void run(const Window &window, cl::CommandQueue &queue) override
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue.
std::string kernel_name
GPUTarget get_target() const
Get the targeted GPU architecture.
Definition: ICLKernel.h:302
UniformQuantizationInfo uniform() const
Return per layer quantization info.
std::string get_cl_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL type.
Definition: CLHelpers.cpp:37
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
Padding and stride information class.
Definition: Types.h:689
unsigned int left
left of the border
Definition: Types.h:355
bool slide_window_slice_3D(Window &slice) const
Slide the passed 3D window slice.
Definition: Window.h:333
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
unsigned int right
right of the border
Definition: Types.h:353
Num samples, channels, height, width.
CLCompileContext class.
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
Definition: Utils.h:1143
BorderSize border_size() const override
The size of the border for that kernel.
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
Definition: Window.h:45
void set_dimension_step(size_t dimension, int step)
Set the step of a given dimension.
Definition: Window.inl:167
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:161
Interface for OpenCL tensor.
Definition: ICLTensor.h:42
const std::string & string_from_data_layout(DataLayout dl)
Convert a data layout identity into a string.
Definition: Utils.cpp:123
#define ARM_COMPUTE_CREATE_ERROR(error_code, msg)
Creates an error with a given message.
Definition: Error.h:159
GPUTarget
Available GPU Targets.
Definition: GPUTarget.h:34
Num samples, height, width, channels.
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage)
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:244
quantized, asymmetric fixed-point 8-bit number signed
virtual const Strides & strides_in_bytes() const =0
The strides in bytes for accessing each dimension of the tensor.
size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
Get the index of the given dimension.
Definition: Helpers.inl:332
void adjust(size_t dimension, int adjust_value, bool is_at_start)
Adjust the start or end of a given dimension by the given value.
Definition: Window.inl:140
void add_1D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx.
Definition: ICLKernel.h:111
Window first_slice_window_3D() const
First 3D slice of the window.
Definition: Window.h:289
DataType
Available data types.
Definition: Types.h:77
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
Definition: Validate.h:205
DataLayout
[DataLayout enum definition]
Definition: Types.h:120
Describe a multidimensional execution window.
Definition: Window.h:39
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
Definition: Validate.h:941
bool gpu_target_is_in(GPUTarget target_to_check, GPUTarget target, Args... targets)
Helper function to check whether a gpu target is equal to the provided targets.
Definition: GPUTarget.h:96
SimpleTensor< T > slice(const SimpleTensor< T > &src, Coordinates starts, Coordinates ends)
void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info)
Set the input, weights, biases and output tensors.