Compute Library
 20.08
CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
32 #include "arm_compute/core/Error.h"
35 #include "arm_compute/core/Types.h"
36 #include "arm_compute/core/Utils.h"
39 #include "support/StringSupport.h"
40 
41 namespace arm_compute
42 {
44 
45 namespace
46 {
47 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
48  const PadStrideInfo &conv_info, unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D dilation,
49  const ITensorInfo *output_multipliers, const ITensorInfo *output_shifts)
50 {
53  ARM_COMPUTE_RETURN_ERROR_ON_MSG((act_info.enabled()) && (input->data_type() == DataType::QASYMM8 || input->data_type() == DataType::QASYMM8_SIGNED)
55  && (act_info.activation() != ActivationLayerInfo::ActivationFunction::BOUNDED_RELU)
56  && (act_info.activation() != ActivationLayerInfo::ActivationFunction::RELU)
57  && (act_info.activation() != ActivationLayerInfo::ActivationFunction::LOGISTIC),
58  "For QASYMM8 only logistic, relu, lower bounded relu and lower-upper bounded relu are supported");
59  ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) != 3 || weights->dimension(1) != 3);
60  ARM_COMPUTE_RETURN_ERROR_ON(conv_info.stride().first < 1 || conv_info.stride().first > 3);
61 
62  ARM_COMPUTE_RETURN_ERROR_ON((dilation.x() < 1) || (dilation.y() < 1));
63 
64  const bool is_qasymm = is_data_type_quantized_asymmetric(input->data_type());
65 
66  if(biases != nullptr)
67  {
68  if(is_qasymm)
69  {
71  }
72  else
73  {
75  }
76  ARM_COMPUTE_RETURN_ERROR_ON((biases->dimension(0) != weights->dimension(2)) && (weights->dimension(2) != 1 || biases->dimension(0) != weights->dimension(3)));
77  ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
78  }
79 
80  if(is_qasymm)
81  {
82  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output_multipliers, output_shifts);
85  ARM_COMPUTE_RETURN_ERROR_ON(output_multipliers->num_dimensions() > 1);
86  ARM_COMPUTE_RETURN_ERROR_ON(output_shifts->num_dimensions() > 1);
87 
89  {
91  ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(2) != output_multipliers->dimension(0));
92  ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(2) != output_shifts->dimension(0));
93  }
94  else
95  {
97  ARM_COMPUTE_RETURN_ERROR_ON(1 != output_multipliers->dimension(0));
98  ARM_COMPUTE_RETURN_ERROR_ON(1 != output_shifts->dimension(0));
99  }
100  }
101  else
102  {
104  }
105 
106  if(output->total_size() != 0)
107  {
108  const TensorShape output_shape = compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier, dilation);
110  }
111 
112  return Status{};
113 }
114 
115 std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *weights, ITensorInfo *output, const PadStrideInfo &conv_info,
116  unsigned int depth_multiplier, GPUTarget gpu_target, std::string &kernel_name, const Size2D dilation)
117 {
118  // Output auto inizialitation if not yet initialized
119  const TensorShape output_shape = compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier, dilation);
120  auto_init_if_empty(*output, input->clone()->set_tensor_shape(output_shape).set_quantization_info(output->quantization_info()));
121 
122  const unsigned int conv_stride_x = conv_info.stride().first;
123  const unsigned int conv_stride_y = conv_info.stride().second;
124  const bool is_qasymm = is_data_type_quantized_asymmetric(input->data_type());
125  const bool is_bifrost = get_arch_from_target(gpu_target) == GPUTarget::BIFROST;
126 
127  // Configure kernel window
128  unsigned int num_elems_read_per_iteration_x = 0;
129  unsigned int num_elems_read_per_iteration_y = 0;
130  unsigned int num_elems_written_per_iteration_x = 0;
131  unsigned int num_elems_written_per_iteration_y = 0;
132 
133  if(input->data_type() == DataType::F16)
134  {
135  kernel_name = "depthwise_convolution_3x3_f16";
136  num_elems_written_per_iteration_x = 8 / data_size_from_type(input->data_type());
137  num_elems_written_per_iteration_y = 1;
138  num_elems_read_per_iteration_y = 3;
139  switch(conv_stride_x)
140  {
141  case 1:
142  num_elems_read_per_iteration_x = 8;
143  break;
144  case 2:
145  num_elems_read_per_iteration_x = 9;
146  break;
147  case 3:
148  num_elems_read_per_iteration_x = 16;
149  break;
150  default:
151  num_elems_read_per_iteration_x = 3 + (num_elems_written_per_iteration_x - 1) * conv_stride_x;
152  break;
153  }
154  if(is_bifrost)
155  {
156  if(conv_stride_x == 1 && conv_stride_y == 1)
157  {
158  kernel_name = "depthwise_convolution_3x3_stridex1_stridey1_bifrost_f16";
159  num_elems_read_per_iteration_x = 8;
160  num_elems_written_per_iteration_x = 4;
161  num_elems_read_per_iteration_y = 6;
162  num_elems_written_per_iteration_y = 4;
163  }
164  else if(conv_stride_x == 2 && conv_stride_y == 2)
165  {
166  kernel_name = "depthwise_convolution_3x3_stridex2_stridey2_bifrost_f16";
167  num_elems_read_per_iteration_x = 10;
168  num_elems_written_per_iteration_x = 4;
169  num_elems_read_per_iteration_y = 5;
170  num_elems_written_per_iteration_y = 2;
171  }
172  }
173  }
174  else if(input->data_type() == DataType::F32 && is_bifrost)
175  {
176  if(conv_stride_x == 1 && conv_stride_y == 1)
177  {
178  kernel_name = "depthwise_convolution_3x3_stridex1_stridey1_bifrost_f32";
179  num_elems_read_per_iteration_x = 4;
180  num_elems_read_per_iteration_y = 6;
181  num_elems_written_per_iteration_x = 2;
182  num_elems_written_per_iteration_y = 4;
183  }
184  else if(conv_stride_x == 2 && conv_stride_y == 2)
185  {
186  kernel_name = "depthwise_convolution_3x3_stridex2_stridey2_bifrost_f32";
187  num_elems_read_per_iteration_x = 6;
188  num_elems_read_per_iteration_y = 5;
189  num_elems_written_per_iteration_x = 2;
190  num_elems_written_per_iteration_y = 2;
191  }
192  else
193  {
194  kernel_name = "depthwise_convolution_3x3";
195  num_elems_written_per_iteration_x = 8 / data_size_from_type(input->data_type());
196  num_elems_written_per_iteration_y = 1;
197  num_elems_read_per_iteration_x = 3 + (num_elems_written_per_iteration_x - 1) * conv_stride_x;
198  num_elems_read_per_iteration_y = 3;
199  }
200  }
201  else
202  {
203  const bool is_dot8_supported = dot8_supported(CLKernelLibrary::get().get_device()) && !is_data_type_quantized_per_channel(weights->data_type());
204 
205  kernel_name = is_qasymm ? "dwc_3x3_native_quantized8" : "depthwise_convolution_3x3";
206  kernel_name += (is_qasymm && is_dot8_supported ? "_dot8" : "");
207  kernel_name += (is_qasymm ? "_nchw" : "");
208 
209  num_elems_written_per_iteration_x = 8 / data_size_from_type(input->data_type());
210  num_elems_written_per_iteration_y = (is_qasymm && conv_stride_y == 1 && dilation.y() == 1) ? 2 : 1;
211  num_elems_read_per_iteration_x = 3 + (num_elems_written_per_iteration_x - 1) * conv_stride_x + (conv_stride_x > 1 ? 1 : 0);
212  num_elems_read_per_iteration_y = num_elems_written_per_iteration_y + 2;
213  }
214  num_elems_read_per_iteration_x += (num_elems_read_per_iteration_x - 1) * (dilation.x() - 1);
215  num_elems_read_per_iteration_y += (num_elems_read_per_iteration_y - 1) * (dilation.y() - 1);
216 
217  // Create window and update padding
218  Window win = calculate_max_window(*output, Steps(num_elems_written_per_iteration_x, num_elems_written_per_iteration_y));
219 
220  AccessWindowRectangle input_access(input, -conv_info.pad_left(), -conv_info.pad_top(),
221  num_elems_read_per_iteration_x, num_elems_read_per_iteration_y,
222  conv_stride_x, conv_stride_y);
223  AccessWindowStatic weights_access(weights, 0, 0, 3, 3);
224  AccessWindowRectangle output_access(output, 0, 0, num_elems_written_per_iteration_x, num_elems_written_per_iteration_y);
225 
226  bool window_changed = update_window_and_padding(win, input_access, weights_access, output_access);
227 
228  output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
229 
230  Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
231  return std::make_pair(err, win);
232 }
233 } // namespace
234 
236  : _conv_stride_x(0), _conv_pad_top(0), _conv_pad_left(0)
237 {
238 }
239 
241 {
242  return _border_size;
243 }
244 
246  const PadStrideInfo &conv_info, unsigned int depth_multiplier, ActivationLayerInfo act_info, const Size2D &dilation,
247  const ICLTensor *output_multipliers, const ICLTensor *output_shifts)
248 {
249  configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation, output_multipliers, output_shifts);
250 }
251 
253  const PadStrideInfo &conv_info, unsigned int depth_multiplier, ActivationLayerInfo act_info, const Size2D &dilation,
254  const ICLTensor *output_multipliers, const ICLTensor *output_shifts)
255 {
257  ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(),
258  conv_info, depth_multiplier, act_info, dilation,
259  (output_multipliers != nullptr) ? output_multipliers->info() : nullptr,
260  (output_shifts != nullptr) ? output_shifts->info() : nullptr));
261 
262  _input = input;
263  _output = output;
264  _weights = weights;
265  _biases = biases;
266  _conv_stride_x = conv_info.stride().first;
267  _conv_stride_y = conv_info.stride().second;
268  _conv_pad_left = conv_info.pad_left();
269  _conv_pad_top = conv_info.pad_top();
270  _border_size = BorderSize(_conv_pad_top, conv_info.pad_right(), conv_info.pad_bottom(), _conv_pad_left);
271  _output_multipliers = output_multipliers;
272  _output_shifts = output_shifts;
273  _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
274 
275  // Configure kernel window
276  std::string kernel_name;
277  const GPUTarget gpu_target = get_target();
278 
279  auto win_config = validate_and_configure_window(input->info(), weights->info(), output->info(), conv_info, depth_multiplier, gpu_target, kernel_name, dilation);
280  ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
281  ICLKernel::configure_internal(win_config.second);
282 
283  // Set build options
284  CLBuildOptions build_opts;
285  build_opts.add_option("-DACTIVATION_TYPE=" + lower_string(string_from_activation_func(act_info.activation())));
286  build_opts.add_option("-DDST_CHANNELS=" + support::cpp11::to_string(_output->info()->tensor_shape().z()));
287  build_opts.add_option("-DDEPTH_MULTIPLIER=" + support::cpp11::to_string(depth_multiplier));
288  build_opts.add_option("-DCONV_STRIDE_X=" + support::cpp11::to_string(_conv_stride_x));
289  build_opts.add_option("-DDILATION_X=" + support::cpp11::to_string(dilation.x()));
290  build_opts.add_option("-DDILATION_Y=" + support::cpp11::to_string(dilation.y()));
291  build_opts.add_option_if(_biases != nullptr, "-DHAS_BIAS");
292 
293  if(_is_quantized)
294  {
295  const UniformQuantizationInfo iq_info = _input->info()->quantization_info().uniform();
296  const UniformQuantizationInfo wq_info = _weights->info()->quantization_info().uniform();
297  const UniformQuantizationInfo oq_info = _output->info()->quantization_info().uniform();
298 
299  const bool is_quantized_per_channel = is_data_type_quantized_per_channel(weights->info()->data_type());
300  const bool is_dot8_supported = dot8_supported(CLKernelLibrary::get().get_device()) && !is_quantized_per_channel;
301  build_opts.add_option("-DCONV_STRIDE_Y=" + support::cpp11::to_string(_conv_stride_y));
302  build_opts.add_option("-DINPUT_OFFSET=" + support::cpp11::to_string(-iq_info.offset));
303  build_opts.add_option("-DWEIGHTS_OFFSET=" + support::cpp11::to_string(-wq_info.offset));
304  build_opts.add_option("-DOUTPUT_OFFSET=" + support::cpp11::to_string(oq_info.offset));
305  build_opts.add_option("-DK_OFFSET=" + support::cpp11::to_string(9 * iq_info.offset * wq_info.offset));
306  build_opts.add_option_if(is_quantized_per_channel, "-DPER_CHANNEL_QUANTIZATION");
307  build_opts.add_option_if(is_dot8_supported, "-DIS_DOT8");
308 
309  // Compute non-per-channel multiplier and shift anyway to make OpenCL kernel simpler
310  float multiplier = iq_info.scale * wq_info.scale / oq_info.scale;
311  int output_multiplier = 0;
312  int output_shift = 0;
313  quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift);
314  build_opts.add_option("-DOUTPUT_MULTIPLIER=" + support::cpp11::to_string(output_multiplier));
315  build_opts.add_option("-DOUTPUT_SHIFT=" + support::cpp11::to_string(output_shift));
316 
317  if(act_info.enabled())
318  {
319  const int a_val = quantize_qasymm8(act_info.a(), oq_info);
320  const int b_val = quantize_qasymm8(act_info.b(), oq_info);
321  const int o1 = oq_info.offset;
322 
323  build_opts.add_option("-DA_VAL=" + support::cpp11::to_string(a_val));
324  build_opts.add_option("-DB_VAL=" + support::cpp11::to_string(b_val));
325  build_opts.add_option("-DCONST_0=" + support::cpp11::to_string(o1));
326 
327  const float s1 = iq_info.scale;
328  build_opts.add_option("-DS1_VAL=" + float_to_string_with_full_precision(s1));
329  build_opts.add_option("-DO1_VAL=" + support::cpp11::to_string(o1));
330  }
331 
332  build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
333  build_opts.add_option("-DWEIGHTS_TYPE=" + get_cl_type_from_data_type(weights->info()->data_type()));
334  build_opts.add_option("-DWEIGHTS_PROMOTED_TYPE=" + get_cl_promoted_type_from_data_type(weights->info()->data_type()));
335  }
336  else
337  {
338  build_opts.add_option_if(act_info.enabled(), "-DA_VAL=" + float_to_string_with_full_precision(act_info.a()));
339  build_opts.add_option_if(act_info.enabled(), "-DB_VAL=" + float_to_string_with_full_precision(act_info.b()));
340  build_opts.add_option_if(act_info.enabled(), "-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
341  build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(win_config.second.x().step()));
342  }
343 
344  build_opts.add_option_if(input->info()->data_type() == DataType::F16, "-DIS_F16");
345  build_opts.add_option_if(input->info()->data_type() == DataType::F32, "-DIS_F32");
346 
347  _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
348 
349  // Set config_id for enabling LWS tuning
350  _config_id = kernel_name;
351  _config_id += "_";
352  _config_id += lower_string(string_from_data_type(input->info()->data_type()));
353  _config_id += "_";
354  _config_id += support::cpp11::to_string(input->info()->dimension(0));
355  _config_id += "_";
356  _config_id += support::cpp11::to_string(input->info()->dimension(1));
357  _config_id += "_";
358  _config_id += support::cpp11::to_string(input->info()->dimension(2));
359  _config_id += "_";
360  _config_id += support::cpp11::to_string(output->info()->dimension(0));
361  _config_id += "_";
362  _config_id += support::cpp11::to_string(output->info()->dimension(1));
363 }
364 
366  const PadStrideInfo &conv_info, unsigned int depth_multiplier, ActivationLayerInfo act_info, GPUTarget gpu_target,
367  const Size2D &dilation, const ITensorInfo *output_multipliers, const ITensorInfo *output_shifts)
368 {
369  std::string kernel_name;
370  ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation, output_multipliers, output_shifts));
371  ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), weights->clone().get(), output->clone().get(),
372  conv_info, depth_multiplier, gpu_target, kernel_name, dilation)
373  .first);
374 
375  return Status{};
376 }
377 
378 void CLDepthwiseConvolutionLayer3x3NCHWKernel::run(const Window &window, cl::CommandQueue &queue)
379 {
382 
384 
385  // Create input window and adjust
386  Window collapsed_in = collapsed;
387  collapsed_in.adjust(Window::DimX, -_conv_pad_left, true);
388  collapsed_in.adjust(Window::DimY, -_conv_pad_top, true);
389  collapsed_in.set_dimension_step(Window::DimX, collapsed_in.x().step() * _conv_stride_x);
390  collapsed_in.set_dimension_step(Window::DimY, collapsed_in.y().step() * _conv_stride_y);
391 
392  Window slice_in = collapsed_in.first_slice_window_3D();
393  Window slice_out = collapsed.first_slice_window_3D();
394  Window slice_weights = window.first_slice_window_3D();
395  slice_weights.set_dimension_step(Window::DimX, 0);
396  slice_weights.set_dimension_step(Window::DimY, 0);
397 
398  unsigned int idx = 3 * num_arguments_per_3D_tensor();
399 
400  // Set output multipliers in case of quantized data type
401  if(_is_quantized)
402  {
403  Window slice;
404  slice.use_tensor_dimensions(_output_multipliers->info()->tensor_shape());
405  add_1D_tensor_argument(idx, _output_multipliers, slice);
406  add_1D_tensor_argument(idx, _output_shifts, slice);
407  }
408 
409  // Set biases
410  if(_biases != nullptr)
411  {
412  Window slice_biases;
413  slice_biases.use_tensor_dimensions(_biases->info()->tensor_shape());
414  add_1D_tensor_argument(idx, _biases, slice_biases);
415  }
416 
417  do
418  {
419  idx = 0;
420  add_3D_tensor_argument(idx, _input, slice_in);
421  add_3D_tensor_argument(idx, _output, slice_out);
422  add_3D_tensor_argument(idx, _weights, slice_weights);
423 
424  enqueue(queue, *this, slice_out, lws_hint());
425  }
426  while(collapsed.slide_window_slice_3D(slice_out) && collapsed_in.slide_window_slice_3D(slice_in));
427 }
428 } // namespace arm_compute
#define ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(tensor)
Definition: CLValidate.h:34
const Window & window() const
The maximum window the kernel can be executed on.
Definition: IKernel.cpp:28
bool dot8_supported(const cl::Device &device)
Helper function to check whether the cl_arm_integer_dot_product_int8 extension is supported.
Definition: CLHelpers.cpp:239
TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, PadStrideInfo conv_info, unsigned int depth_multiplier, const Size2D &dilation=Size2D(1U, 1U))
Calculate the depthwise convolution output shape of a tensor.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
Container for 2D border size.
Definition: Types.h:272
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier=1, ActivationLayerInfo act_info=ActivationLayerInfo(), GPUTarget gpu_target=GPUTarget::MIDGARD, const Size2D &dilation=Size2D(1U, 1U), const ITensorInfo *output_multipliers=nullptr, const ITensorInfo *output_shifts=nullptr)
Static function to check if given info will lead to a valid configuration of CLDepthwiseConvolutionLa...
void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint=CLKernelLibrary::get().default_ndrange(), bool use_dummy_work_items=false)
Add the kernel to the command queue with the given window.
Definition: ICLKernel.cpp:39
const StringSet & options() const
Gets the current options list set.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:545
uint8_t quantize_qasymm8(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given an unsigned 8-bit asymmetric quantization scheme.
constexpr int step() const
Return the step of the dimension.
Definition: Window.h:102
cl::NDRange lws_hint() const
Return the Local-Workgroup-Size hint.
Definition: ICLKernel.h:263
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
std::string to_string(T &&value)
Convert integer and float values to string.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:792
1 channel, 1 F32 per channel
const std::string & string_from_activation_func(ActivationLayerInfo::ActivationFunction act)
Translates a given activation function to a string.
Definition: Utils.cpp:163
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor's metadata.
Definition: ITensorInfo.h:40
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
Quantization info when assuming per layer quantization.
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.
Status class.
Definition: Error.h:52
GPUTarget get_arch_from_target(GPUTarget target)
Helper function to get the GPU arch.
Definition: GPUTarget.cpp:189
std::string lower_string(const std::string &val)
Lower a given string.
Definition: Utils.cpp:326
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:296
Activation Layer Information class.
Definition: Types.h:1517
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps=Steps(), bool skip_border=false, BorderSize border_size=BorderSize())
Calculate the maximum window for a given tensor shape and border setting.
Definition: Helpers.cpp:28
void add_3D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx.
Definition: ICLKernel.h:159
void use_tensor_dimensions(const TensorShape &shape, size_t first_dimension=Window::DimX)
Use the tensor's dimensions to fill the window dimensions.
Definition: Window.inl:276
Copyright (c) 2017-2020 Arm Limited.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
Definition: Helpers.inl:207
1 channel, 1 F16 per channel
ITensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
Definition: Tensor.cpp:33
1 channel, 1 S32 per channel
void add_option(std::string option)
Adds option to the existing build option list.
void run(const Window &window, cl::CommandQueue &queue) override
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue.
cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set< std::string > &build_opts=std::set< std::string >())
Creates an opencl kernel using a compile context.
Definition: CLHelpers.cpp:403
const std::string & string_from_data_type(DataType dt)
Convert a data type identity into a string.
Definition: Utils.cpp:135
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Definition: Window.h:43
bool update_window_and_padding(Window &win, Ts &&... patterns)
Update window and padding size for each of the access patterns.
Definition: Helpers.h:437
static constexpr unsigned int num_arguments_per_3D_tensor()
Returns the number of arguments enqueued per 3D tensor object.
Definition: ICLKernel.h:201
Window collapse_if_possible(const Window &full_window, size_t first, size_t last, bool *has_collapsed=nullptr) const
Collapse the dimensions between first and last if possible.
Definition: Window.inl:68
bool is_data_type_quantized_per_channel(DataType dt)
Check if a given data type is of per channel type.
Definition: Utils.h:1198
std::string float_to_string_with_full_precision(float val)
Create a string with the float in full precision.
Definition: Utils.h:1215
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(...)
Definition: Validate.h:288
quantized, asymmetric fixed-point 8-bit number unsigned
std::string kernel_name
GPUTarget get_target() const
Get the targeted GPU architecture.
Definition: ICLKernel.h:302
std::string get_cl_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL type.
Definition: CLHelpers.cpp:37
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
size_t data_size_from_type(DataType data_type)
The size in bytes of the data type.
Definition: Utils.h:102
void add_option_if(bool cond, std::string option)
Adds option if a given condition is true;.
Padding and stride information class.
Definition: Types.h:689
bool slide_window_slice_3D(Window &slice) const
Slide the passed 3D window slice.
Definition: Window.h:333
CLCompileContext class.
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
Definition: Utils.h:1143
quantized, symmetric per channel fixed-point 8-bit number
std::string get_cl_promoted_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL promoted type.
Definition: CLHelpers.cpp:73
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:163
void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier=1, ActivationLayerInfo act_info=ActivationLayerInfo(), const Size2D &dilation=Size2D(1U, 1U), const ICLTensor *output_multipliers=nullptr, const ICLTensor *output_shifts=nullptr) override
Initialize the function's source, destination, conv and border_size.
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
Definition: Window.h:45
void set_dimension_step(size_t dimension, int step)
Set the step of a given dimension.
Definition: Window.inl:167
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:161
Interface for OpenCL tensor.
Definition: ICLTensor.h:42
#define ARM_COMPUTE_CREATE_ERROR(error_code, msg)
Creates an error with a given message.
Definition: Error.h:159
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
Definition: Window.h:47
GPUTarget
Available GPU Targets.
Definition: GPUTarget.h:34
Class for specifying the size of an image or rectangle.
Definition: Size2D.h:34
constexpr const Dimension & y() const
Alias to access the second dimension of the window.
Definition: Window.h:152
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage)
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:244
quantized, asymmetric fixed-point 8-bit number signed
void adjust(size_t dimension, int adjust_value, bool is_at_start)
Adjust the start or end of a given dimension by the given value.
Definition: Window.inl:140
void add_1D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx.
Definition: ICLKernel.h:111
Window first_slice_window_3D() const
First 3D slice of the window.
Definition: Window.h:289
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
Definition: Validate.h:205
Describe a multidimensional execution window.
Definition: Window.h:39
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
Definition: Validate.h:941
BorderSize border_size() const override
The size of the border for that kernel.
SimpleTensor< T > slice(const SimpleTensor< T > &src, Coordinates starts, Coordinates ends)
constexpr const Dimension & x() const
Alias to access the first dimension of the window.
Definition: Window.h:143