Compute Library
 21.02
CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
31 #include "arm_compute/core/Utils.h"
35 #include "src/core/CL/CLValidate.h"
36 #include "src/core/CL/ICLKernel.h"
39 #include "support/StringSupport.h"
40 
41 namespace arm_compute
42 {
44 
45 namespace
46 {
47 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
48  const PadStrideInfo &conv_info, unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D dilation,
49  const ITensorInfo *output_multipliers, const ITensorInfo *output_shifts)
50 {
53  ARM_COMPUTE_RETURN_ERROR_ON_MSG((act_info.enabled()) && (input->data_type() == DataType::QASYMM8 || input->data_type() == DataType::QASYMM8_SIGNED)
55  && (act_info.activation() != ActivationLayerInfo::ActivationFunction::BOUNDED_RELU)
56  && (act_info.activation() != ActivationLayerInfo::ActivationFunction::RELU)
57  && (act_info.activation() != ActivationLayerInfo::ActivationFunction::LOGISTIC),
58  "For QASYMM8 only logistic, relu, lower bounded relu and lower-upper bounded relu are supported");
59  ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) != 3 || weights->dimension(1) != 3);
60  ARM_COMPUTE_RETURN_ERROR_ON(conv_info.stride().first < 1 || conv_info.stride().first > 3);
61 
62  ARM_COMPUTE_RETURN_ERROR_ON((dilation.x() < 1) || (dilation.y() < 1));
63 
64  const bool is_qasymm = is_data_type_quantized_asymmetric(input->data_type());
65 
66  if(biases != nullptr)
67  {
68  if(is_qasymm)
69  {
71  }
72  else
73  {
75  }
76  ARM_COMPUTE_RETURN_ERROR_ON((biases->dimension(0) != weights->dimension(2)) && (weights->dimension(2) != 1 || biases->dimension(0) != weights->dimension(3)));
77  ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
78  }
79 
80  if(is_qasymm)
81  {
82  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output_multipliers, output_shifts);
85  ARM_COMPUTE_RETURN_ERROR_ON(output_multipliers->num_dimensions() > 1);
86  ARM_COMPUTE_RETURN_ERROR_ON(output_shifts->num_dimensions() > 1);
87 
88  if(is_data_type_quantized_per_channel(weights->data_type()))
89  {
91  ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(2) != output_multipliers->dimension(0));
92  ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(2) != output_shifts->dimension(0));
93  }
94  else
95  {
97  ARM_COMPUTE_RETURN_ERROR_ON(1 != output_multipliers->dimension(0));
98  ARM_COMPUTE_RETURN_ERROR_ON(1 != output_shifts->dimension(0));
99  }
100  }
101  else
102  {
104  }
105 
106  if(output->total_size() != 0)
107  {
108  const TensorShape output_shape = compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier, dilation);
110  }
111 
112  return Status{};
113 }
114 
115 std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *weights, ITensorInfo *output, const PadStrideInfo &conv_info,
116  unsigned int depth_multiplier, GPUTarget gpu_target, std::string &kernel_name, const Size2D dilation)
117 {
118  // Output auto inizialitation if not yet initialized
119  const TensorShape output_shape = compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier, dilation);
120  auto_init_if_empty(*output, input->clone()->set_tensor_shape(output_shape).set_quantization_info(output->quantization_info()));
121 
122  const unsigned int conv_stride_x = conv_info.stride().first;
123  const unsigned int conv_stride_y = conv_info.stride().second;
124  const bool is_qasymm = is_data_type_quantized_asymmetric(input->data_type());
125  const bool is_bifrost = get_arch_from_target(gpu_target) == GPUTarget::BIFROST;
126 
127  // Configure kernel window
128  unsigned int num_elems_read_per_iteration_x = 0;
129  unsigned int num_elems_read_per_iteration_y = 0;
130  unsigned int num_elems_written_per_iteration_x = 0;
131  unsigned int num_elems_written_per_iteration_y = 0;
132 
133  if(input->data_type() == DataType::F16)
134  {
135  kernel_name = "depthwise_convolution_3x3_f16";
136  num_elems_written_per_iteration_x = 8 / data_size_from_type(input->data_type());
137  num_elems_written_per_iteration_y = 1;
138  num_elems_read_per_iteration_y = 3;
139  switch(conv_stride_x)
140  {
141  case 1:
142  num_elems_read_per_iteration_x = 8;
143  break;
144  case 2:
145  num_elems_read_per_iteration_x = 9;
146  break;
147  case 3:
148  num_elems_read_per_iteration_x = 16;
149  break;
150  default:
151  num_elems_read_per_iteration_x = 3 + (num_elems_written_per_iteration_x - 1) * conv_stride_x;
152  break;
153  }
154  if(is_bifrost)
155  {
156  if(conv_stride_x == 1 && conv_stride_y == 1)
157  {
158  kernel_name = "depthwise_convolution_3x3_stridex1_stridey1_bifrost_f16";
159  num_elems_read_per_iteration_x = 8;
160  num_elems_written_per_iteration_x = 4;
161  num_elems_read_per_iteration_y = 6;
162  num_elems_written_per_iteration_y = 4;
163  }
164  else if(conv_stride_x == 2 && conv_stride_y == 2)
165  {
166  kernel_name = "depthwise_convolution_3x3_stridex2_stridey2_bifrost_f16";
167  num_elems_read_per_iteration_x = 10;
168  num_elems_written_per_iteration_x = 4;
169  num_elems_read_per_iteration_y = 5;
170  num_elems_written_per_iteration_y = 2;
171  }
172  }
173  }
174  else if(input->data_type() == DataType::F32 && is_bifrost)
175  {
176  if(conv_stride_x == 1 && conv_stride_y == 1)
177  {
178  kernel_name = "depthwise_convolution_3x3_stridex1_stridey1_bifrost_f32";
179  num_elems_read_per_iteration_x = 4;
180  num_elems_read_per_iteration_y = 6;
181  num_elems_written_per_iteration_x = 2;
182  num_elems_written_per_iteration_y = 4;
183  }
184  else if(conv_stride_x == 2 && conv_stride_y == 2)
185  {
186  kernel_name = "depthwise_convolution_3x3_stridex2_stridey2_bifrost_f32";
187  num_elems_read_per_iteration_x = 6;
188  num_elems_read_per_iteration_y = 5;
189  num_elems_written_per_iteration_x = 2;
190  num_elems_written_per_iteration_y = 2;
191  }
192  else
193  {
194  kernel_name = "depthwise_convolution_3x3";
195  num_elems_written_per_iteration_x = 8 / data_size_from_type(input->data_type());
196  num_elems_written_per_iteration_y = 1;
197  num_elems_read_per_iteration_x = 3 + (num_elems_written_per_iteration_x - 1) * conv_stride_x;
198  num_elems_read_per_iteration_y = 3;
199  }
200  }
201  else
202  {
203  const bool is_dot8_supported = dot8_supported(CLKernelLibrary::get().get_device()) && !is_data_type_quantized_per_channel(weights->data_type());
204 
205  kernel_name = is_qasymm ? "dwc_3x3_native_quantized8" : "depthwise_convolution_3x3";
206  kernel_name += (is_qasymm && is_dot8_supported ? "_dot8" : "");
207  kernel_name += (is_qasymm ? "_nchw" : "");
208 
209  num_elems_written_per_iteration_x = 8 / data_size_from_type(input->data_type());
210  num_elems_written_per_iteration_y = (is_qasymm && conv_stride_y == 1 && dilation.y() == 1) ? 2 : 1;
211  num_elems_read_per_iteration_x = 3 + (num_elems_written_per_iteration_x - 1) * conv_stride_x + (conv_stride_x > 1 ? 1 : 0);
212  num_elems_read_per_iteration_y = num_elems_written_per_iteration_y + 2;
213  }
214  // The OpenCL routine convolution1x3 does loadn(addr), loadn(addr + dilation_x) and loadn(addr + 2 * dilation_x) on the input.
215  // Each of the three convolution1x3 gets called by passing addr, (addr + dilation_y) and (addr + 2 * dilation_y)
216  // Hence we must add 2 * dilation.x/y() to the number of elements read in those axes per thread
217  num_elems_read_per_iteration_x += 2 * dilation.x();
218  num_elems_read_per_iteration_y += 2 * dilation.y();
219 
220  // Create window and update padding
221  Window win = calculate_max_window(*output, Steps(num_elems_written_per_iteration_x, num_elems_written_per_iteration_y));
222 
223  AccessWindowRectangle input_access(input, -conv_info.pad_left(), -conv_info.pad_top(),
224  num_elems_read_per_iteration_x, num_elems_read_per_iteration_y,
226  AccessWindowStatic weights_access(weights, 0, 0, 3, 3);
227  AccessWindowRectangle output_access(output, 0, 0, num_elems_written_per_iteration_x, num_elems_written_per_iteration_y);
228 
229  bool window_changed = update_window_and_padding(win, input_access, weights_access, output_access);
230 
231  output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
232 
233  Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
234  return std::make_pair(err, win);
235 }
236 } // namespace
237 
239  : _conv_stride_x(0), _conv_pad_top(0), _conv_pad_left(0)
240 {
241 }
242 
244 {
245  return _border_size;
246 }
247 
248 void CLDepthwiseConvolutionLayer3x3NCHWKernel::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
249  const PadStrideInfo &conv_info, unsigned int depth_multiplier, ActivationLayerInfo act_info, const Size2D &dilation,
250  const ICLTensor *output_multipliers, const ICLTensor *output_shifts)
251 {
252  configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation, output_multipliers, output_shifts);
253 }
254 
255 void CLDepthwiseConvolutionLayer3x3NCHWKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
256  const PadStrideInfo &conv_info, unsigned int depth_multiplier, ActivationLayerInfo act_info, const Size2D &dilation,
257  const ICLTensor *output_multipliers, const ICLTensor *output_shifts)
258 {
259  ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
260  ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(),
261  conv_info, depth_multiplier, act_info, dilation,
262  (output_multipliers != nullptr) ? output_multipliers->info() : nullptr,
263  (output_shifts != nullptr) ? output_shifts->info() : nullptr));
264 
265  _input = input;
266  _output = output;
267  _weights = weights;
268  _biases = biases;
269  _conv_stride_x = conv_info.stride().first;
270  _conv_stride_y = conv_info.stride().second;
271  _conv_pad_left = conv_info.pad_left();
272  _conv_pad_top = conv_info.pad_top();
273  _output_multipliers = output_multipliers;
274  _output_shifts = output_shifts;
275  _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
276 
277  // Configure kernel window
278  std::string kernel_name;
279  const GPUTarget gpu_target = get_target();
280 
281  auto win_config = validate_and_configure_window(input->info(), weights->info(), output->info(), conv_info, depth_multiplier, gpu_target, kernel_name, dilation);
282  ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
283  ICLKernel::configure_internal(win_config.second);
284 
285  _border_size = BorderSize(input->info()->padding());
286 
287  // Set build options
288  CLBuildOptions build_opts;
289  build_opts.add_option("-DACTIVATION_TYPE=" + lower_string(string_from_activation_func(act_info.activation())));
290  build_opts.add_option("-DDST_CHANNELS=" + support::cpp11::to_string(_output->info()->tensor_shape().z()));
291  build_opts.add_option("-DDEPTH_MULTIPLIER=" + support::cpp11::to_string(depth_multiplier));
292  build_opts.add_option("-DCONV_STRIDE_X=" + support::cpp11::to_string(_conv_stride_x));
293  build_opts.add_option("-DDILATION_X=" + support::cpp11::to_string(dilation.x()));
294  build_opts.add_option("-DDILATION_Y=" + support::cpp11::to_string(dilation.y()));
295  build_opts.add_option_if(_biases != nullptr, "-DHAS_BIAS");
296 
297  if(_is_quantized)
298  {
299  const UniformQuantizationInfo iq_info = _input->info()->quantization_info().uniform();
300  const UniformQuantizationInfo wq_info = _weights->info()->quantization_info().uniform();
301  const UniformQuantizationInfo oq_info = _output->info()->quantization_info().uniform();
302 
303  const bool is_quantized_per_channel = is_data_type_quantized_per_channel(weights->info()->data_type());
304  const bool is_dot8_supported = dot8_supported(CLKernelLibrary::get().get_device()) && !is_quantized_per_channel;
305  build_opts.add_option("-DCONV_STRIDE_Y=" + support::cpp11::to_string(_conv_stride_y));
306  build_opts.add_option("-DINPUT_OFFSET=" + support::cpp11::to_string(-iq_info.offset));
307  build_opts.add_option("-DWEIGHTS_OFFSET=" + support::cpp11::to_string(-wq_info.offset));
308  build_opts.add_option("-DOUTPUT_OFFSET=" + support::cpp11::to_string(oq_info.offset));
309  build_opts.add_option("-DK_OFFSET=" + support::cpp11::to_string(9 * iq_info.offset * wq_info.offset));
310  build_opts.add_option_if(is_quantized_per_channel, "-DPER_CHANNEL_QUANTIZATION");
311  build_opts.add_option_if(is_dot8_supported, "-DIS_DOT8");
312 
313  // Compute non-per-channel multiplier and shift anyway to make OpenCL kernel simpler
314  float multiplier = iq_info.scale * wq_info.scale / oq_info.scale;
315  int output_multiplier = 0;
316  int output_shift = 0;
317  quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift);
318  build_opts.add_option("-DOUTPUT_MULTIPLIER=" + support::cpp11::to_string(output_multiplier));
319  build_opts.add_option("-DOUTPUT_SHIFT=" + support::cpp11::to_string(output_shift));
320 
321  if(act_info.enabled())
322  {
323  int a_val{};
324  int b_val{};
325  std::tie(b_val, a_val) = get_quantized_activation_min_max(act_info, input->info()->data_type(), oq_info);
326 
327  const int o1 = oq_info.offset;
328 
329  build_opts.add_option("-DA_VAL=" + support::cpp11::to_string(a_val));
330  build_opts.add_option("-DB_VAL=" + support::cpp11::to_string(b_val));
331  build_opts.add_option("-DCONST_0=" + support::cpp11::to_string(o1));
332 
333  const float s1 = iq_info.scale;
334  build_opts.add_option("-DS1_VAL=" + float_to_string_with_full_precision(s1));
335  build_opts.add_option("-DO1_VAL=" + support::cpp11::to_string(o1));
336  }
337 
338  build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
339  build_opts.add_option("-DWEIGHTS_TYPE=" + get_cl_type_from_data_type(weights->info()->data_type()));
340  build_opts.add_option("-DWEIGHTS_PROMOTED_TYPE=" + get_cl_promoted_type_from_data_type(weights->info()->data_type()));
341  }
342  else
343  {
344  build_opts.add_option_if(act_info.enabled(), "-DA_VAL=" + float_to_string_with_full_precision(act_info.a()));
345  build_opts.add_option_if(act_info.enabled(), "-DB_VAL=" + float_to_string_with_full_precision(act_info.b()));
346  build_opts.add_option_if(act_info.enabled(), "-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
347  build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(win_config.second.x().step()));
348  }
349 
350  build_opts.add_option_if(input->info()->data_type() == DataType::F16, "-DIS_F16");
351  build_opts.add_option_if(input->info()->data_type() == DataType::F32, "-DIS_F32");
352 
353  _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
354 
355  // Set config_id for enabling LWS tuning
356  _config_id = kernel_name;
357  _config_id += "_";
358  _config_id += lower_string(string_from_data_type(input->info()->data_type()));
359  _config_id += "_";
360  _config_id += support::cpp11::to_string(input->info()->dimension(0));
361  _config_id += "_";
362  _config_id += support::cpp11::to_string(input->info()->dimension(1));
363  _config_id += "_";
364  _config_id += support::cpp11::to_string(input->info()->dimension(2));
365  _config_id += "_";
366  _config_id += support::cpp11::to_string(output->info()->dimension(0));
367  _config_id += "_";
368  _config_id += support::cpp11::to_string(output->info()->dimension(1));
369 }
370 
372  const PadStrideInfo &conv_info, unsigned int depth_multiplier, ActivationLayerInfo act_info, GPUTarget gpu_target,
373  const Size2D &dilation, const ITensorInfo *output_multipliers, const ITensorInfo *output_shifts)
374 {
375  std::string kernel_name;
376  ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation, output_multipliers, output_shifts));
377  ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), weights->clone().get(), output->clone().get(),
378  conv_info, depth_multiplier, gpu_target, kernel_name, dilation)
379  .first);
380 
381  return Status{};
382 }
383 
384 void CLDepthwiseConvolutionLayer3x3NCHWKernel::run(const Window &window, cl::CommandQueue &queue)
385 {
388 
390 
391  // Create input window and adjust
392  Window collapsed_in = collapsed;
393  collapsed_in.adjust(Window::DimX, -_conv_pad_left, true);
394  collapsed_in.adjust(Window::DimY, -_conv_pad_top, true);
395  collapsed_in.set_dimension_step(Window::DimX, collapsed_in.x().step() * _conv_stride_x);
396  collapsed_in.set_dimension_step(Window::DimY, collapsed_in.y().step() * _conv_stride_y);
397 
398  Window slice_in = collapsed_in.first_slice_window_3D();
399  Window slice_out = collapsed.first_slice_window_3D();
400  Window slice_weights = window.first_slice_window_3D();
401  slice_weights.set_dimension_step(Window::DimX, 0);
402  slice_weights.set_dimension_step(Window::DimY, 0);
403 
404  unsigned int idx = 3 * num_arguments_per_3D_tensor();
405 
406  // Set output multipliers in case of quantized data type
407  if(_is_quantized)
408  {
409  Window slice;
410  slice.use_tensor_dimensions(_output_multipliers->info()->tensor_shape());
411  add_1D_tensor_argument(idx, _output_multipliers, slice);
412  add_1D_tensor_argument(idx, _output_shifts, slice);
413  }
414 
415  // Set biases
416  if(_biases != nullptr)
417  {
418  Window slice_biases;
419  slice_biases.use_tensor_dimensions(_biases->info()->tensor_shape());
420  add_1D_tensor_argument(idx, _biases, slice_biases);
421  }
422 
423  do
424  {
425  idx = 0;
426  add_3D_tensor_argument(idx, _input, slice_in);
427  add_3D_tensor_argument(idx, _output, slice_out);
428  add_3D_tensor_argument(idx, _weights, slice_weights);
429 
430  enqueue(queue, *this, slice_out, lws_hint());
431  }
432  while(collapsed.slide_window_slice_3D(slice_out) && collapsed_in.slide_window_slice_3D(slice_in));
433 }
434 } // namespace arm_compute
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
#define ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(tensor)
Definition: CLValidate.h:35
const Window & window() const
The maximum window the kernel can be executed on.
Definition: IKernel.cpp:28
bool dot8_supported(const cl::Device &device)
Helper function to check whether the cl_arm_integer_dot_product_int8 extension is supported...
Definition: CLHelpers.cpp:239
void enqueue(IGCKernel &kernel, const Window &window, const gles::NDRange &lws=gles::NDRange(1U, 1U, 1U))
Add the kernel to the command queue with the given window.
Definition: IGCKernel.cpp:41
TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, PadStrideInfo conv_info, unsigned int depth_multiplier, const Size2D &dilation=Size2D(1U, 1U))
Calculate the depthwise convolution output shape of a tensor.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
Container for 2D border size.
Definition: Types.h:273
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier=1, ActivationLayerInfo act_info=ActivationLayerInfo(), GPUTarget gpu_target=GPUTarget::MIDGARD, const Size2D &dilation=Size2D(1U, 1U), const ITensorInfo *output_multipliers=nullptr, const ITensorInfo *output_shifts=nullptr)
Static function to check if given info will lead to a valid configuration of CLDepthwiseConvolutionLa...
constexpr int step() const
Return the step of the dimension.
Definition: Window.h:104
cl::NDRange lws_hint() const
Return the Local-Workgroup-Size hint.
Definition: ICLKernel.h:276
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
std::string to_string(T &&value)
Convert integer and float values to string.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
1 channel, 1 F32 per channel
const std::string & string_from_activation_func(ActivationLayerInfo::ActivationFunction act)
Translates a given activation function to a string.
Definition: Utils.cpp:163
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
Quantization info when assuming per layer quantization.
unsigned int pad_top() const
Get the top padding.
Definition: Types.h:806
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.
Status class.
Definition: Error.h:52
GPUTarget get_arch_from_target(GPUTarget target)
Helper function to get the GPU arch.
Definition: GPUTarget.cpp:189
std::string lower_string(const std::string &val)
Lower a given string.
Definition: Utils.cpp:350
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:296
Activation Layer Information class.
Definition: Types.h:1550
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(...)
Definition: Validate.h:288
void add_3D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 3D tensor&#39;s parameters to the object&#39;s kernel&#39;s arguments starting from the index idx...
Definition: ICLKernel.h:172
void use_tensor_dimensions(const TensorShape &shape, size_t first_dimension=Window::DimX)
Use the tensor&#39;s dimensions to fill the window dimensions.
Definition: Window.inl:276
Copyright (c) 2017-2021 Arm Limited.
1 channel, 1 F16 per channel
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:163
1 channel, 1 S32 per channel
void add_option(std::string option)
Adds option to the existing build option list.
void run(const Window &window, cl::CommandQueue &queue) override
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue...
cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set< std::string > &build_opts=std::set< std::string >())
Creates an opencl kernel using a compile context.
Definition: CLHelpers.cpp:403
const std::string & string_from_data_type(DataType dt)
Convert a data type identity into a string.
Definition: Utils.cpp:135
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Definition: Window.h:43
bool update_window_and_padding(Window &win, Ts &&... patterns)
Update window and padding size for each of the access patterns.
Definition: WindowHelpers.h:46
static constexpr unsigned int num_arguments_per_3D_tensor()
Returns the number of arguments enqueued per 3D tensor object.
Definition: ICLKernel.h:214
Window collapse_if_possible(const Window &full_window, size_t first, size_t last, bool *has_collapsed=nullptr) const
Collapse the dimensions between first and last if possible.
Definition: Window.inl:68
bool is_data_type_quantized_per_channel(DataType dt)
Check if a given data type is of per channel type.
Definition: Utils.h:1245
std::string float_to_string_with_full_precision(float val)
Create a string with the float in full precision.
Definition: Utils.h:1262
std::pair< int32_t, int32_t > get_quantized_activation_min_max(ActivationLayerInfo act_info, DataType data_type, UniformQuantizationInfo oq_info)
Returns a pair of minimum and maximum values for a quantized activation.
Definition: Utils.cpp:483
quantized, asymmetric fixed-point 8-bit number unsigned
std::pair< unsigned int, unsigned int > stride() const
Get the stride.
Definition: Types.h:770
std::string kernel_name
GPUTarget get_target() const
Get the targeted GPU architecture.
Definition: ICLKernel.h:336
std::string get_cl_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL type.
Definition: CLHelpers.cpp:37
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.
size_t data_size_from_type(DataType data_type)
The size in bytes of the data type.
Definition: Utils.h:106
Padding and stride information class.
Definition: Types.h:722
virtual PaddingSize padding() const =0
Padding of tensor.
bool slide_window_slice_3D(Window &slice) const
Slide the passed 3D window slice.
Definition: Window.h:335
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
Definition: Validate.h:941
CLCompileContext class.
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
Definition: Utils.h:1190
quantized, symmetric per channel fixed-point 8-bit number
std::string get_cl_promoted_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL promoted type.
Definition: CLHelpers.cpp:73
void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier=1, ActivationLayerInfo act_info=ActivationLayerInfo(), const Size2D &dilation=Size2D(1U, 1U), const ICLTensor *output_multipliers=nullptr, const ICLTensor *output_shifts=nullptr) override
Initialize the function&#39;s source, destination, conv and border_size.
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
Definition: Window.h:45
void set_dimension_step(size_t dimension, int step)
Set the step of a given dimension.
Definition: Window.inl:167
Interface for OpenCL tensor.
Definition: ICLTensor.h:42
#define ARM_COMPUTE_CREATE_ERROR(error_code, msg)
Creates an error with a given message.
Definition: Error.h:159
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
Definition: Window.h:47
GPUTarget
Available GPU Targets.
Definition: GPUTarget.h:34
Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context...
Class for specifying the size of an image or rectangle.
Definition: Size2D.h:34
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:545
constexpr const Dimension & y() const
Alias to access the second dimension of the window.
Definition: Window.h:154
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:792
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage)
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:244
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:161
quantized, asymmetric fixed-point 8-bit number signed
void adjust(size_t dimension, int adjust_value, bool is_at_start)
Adjust the start or end of a given dimension by the given value.
Definition: Window.inl:140
void add_1D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 1D tensor&#39;s parameters to the object&#39;s kernel&#39;s arguments starting from the index idx...
Definition: ICLKernel.h:124
Window first_slice_window_3D() const
First 3D slice of the window.
Definition: Window.h:291
unsigned int pad_left() const
Get the left padding.
Definition: Types.h:796
Describe a multidimensional execution window.
Definition: Window.h:39
BorderSize border_size() const override
The size of the border for that kernel.
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
Definition: Validate.h:205
SimpleTensor< T > slice(const SimpleTensor< T > &src, Coordinates starts, Coordinates ends)
constexpr const Dimension & x() const
Alias to access the first dimension of the window.
Definition: Window.h:145