Compute Library
 20.08
NEDepthwiseConvolutionLayer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
30 
31 using namespace arm_compute::misc;
33 
34 namespace arm_compute
35 {
36 namespace
37 {
38 Status validate_arguments_optimized(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
39  unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
40 {
44  {
46  }
48  ARM_COMPUTE_RETURN_ERROR_ON(dilation.x() < 1 || dilation.y() < 1);
49  const size_t idx_w = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
50  const size_t idx_h = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
51  ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) + (weights->dimension(idx_w) - 1) * (dilation.x() - 1) > input->dimension(idx_w) + conv_info.pad_left() + conv_info.pad_right());
52  ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_h) + (weights->dimension(idx_h) - 1) * (dilation.y() - 1) > input->dimension(idx_h) + conv_info.pad_top() + conv_info.pad_bottom());
53 
54  if(biases != nullptr)
55  {
56  const unsigned int channel_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);
57  ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
58  ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(channel_idx));
59  }
60 
61  const bool is_quantized = (!is_data_type_quantized_per_channel(weights->data_type())) && is_data_type_quantized_asymmetric(input->data_type());
62 
64  {
65  TensorInfo accumulator = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
66  ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionLayer3x3Kernel::validate(input, weights, is_quantized ? &accumulator : output, conv_info, depth_multiplier, dilation));
67 
68  if(is_quantized)
69  {
70  DirectConvolutionLayerOutputStageKernelInfo direct_conv_info;
71  direct_conv_info.output_data_type = input->data_type();
72  ARM_COMPUTE_RETURN_ON_ERROR(NEDirectConvolutionLayerOutputStageKernel::validate(&accumulator, biases, output, direct_conv_info));
73  }
74  }
75  else
76  {
78  }
79 
80  //Validate Activation Layer
81  if(act_info.enabled())
82  {
83  ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));
84  }
85  return Status{};
86 }
87 } // namespace
88 
89 NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::NEDepthwiseConvolutionLayerOptimizedInternal(std::shared_ptr<IMemoryManager> memory_manager)
90  : _memory_group(memory_manager), _dwc_kernel(), _dwc_optimized_func(memory_manager), _output_stage_kernel(), _border_handler(), _permute_input(), _permute_weights(), _permute_output(),
91  _activationlayer_function(), _accumulator(), _permuted_input(), _permuted_weights(), _permuted_output(), _original_weights(nullptr), _has_bias(false), _is_quantized(false), _is_optimized(false),
92  _is_nchw(true), _permute(false), _is_activationlayer_enabled(false), _is_prepared(false)
93 {
94 }
95 
96 void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::configure_generic(ITensor *input,
97  const ITensor *weights,
98  const ITensor *biases,
99  ITensor *output,
100  const PadStrideInfo &conv_info,
101  unsigned int depth_multiplier,
102  const ActivationLayerInfo &act_info,
103  const Size2D &dilation)
104 {
105  ARM_COMPUTE_UNUSED(act_info);
106 
107  PixelValue zero_value(0.f);
108 
109  // Initialize the intermediate accumulator tensor in case of quantized input
110  if(_is_quantized)
111  {
112  TensorShape accum_shape = output->info()->tensor_shape();
113  DataLayout accum_layout = output->info()->data_layout();
114  if(!_is_nchw)
115  {
116  permute(accum_shape, PermutationVector(1U, 2U, 0U));
117  accum_layout = DataLayout::NCHW;
118  }
119 
120  _memory_group.manage(&_accumulator);
121  _accumulator.allocator()->init(TensorInfo(accum_shape, 1, DataType::S32, output->info()->quantization_info()));
122  _accumulator.info()->set_data_layout(accum_layout);
123  zero_value = PixelValue(static_cast<uint32_t>(input->info()->quantization_info().uniform().offset));
124  }
125 
126  if(!_is_nchw)
127  {
128  _memory_group.manage(&_permuted_input);
129  _memory_group.manage(&_permuted_output);
130 
131  // Configure the function to transform the input tensor from NHWC -> NCHW
132  _permute_input.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U));
133  _permuted_input.info()->set_data_layout(DataLayout::NCHW);
134 
135  // Configure the function to transform the weights tensor from HWI -> IHW
136  _permute_weights.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));
137  _permuted_weights.info()->set_data_layout(DataLayout::NCHW);
138  _permuted_output.info()->set_quantization_info(output->info()->quantization_info());
139 
140  // Configure depthwise
141  _dwc_kernel.configure(&_permuted_input, &_permuted_weights, (_is_quantized) ? &_accumulator : &_permuted_output, conv_info, depth_multiplier, dilation);
142 
143  // Configure border handler
144  _border_handler.configure(&_permuted_input, _dwc_kernel.border_size(), BorderMode::CONSTANT, zero_value);
145 
146  // Allocate tensors
147  _permuted_input.allocator()->allocate();
148  }
149  else
150  {
151  // Configure depthwise convolution kernel
152  _dwc_kernel.configure(input, weights, (_is_quantized) ? &_accumulator : output, conv_info, depth_multiplier, dilation);
153 
154  // Configure border handler
155  _border_handler.configure(input, _dwc_kernel.border_size(), BorderMode::CONSTANT, zero_value);
156  }
157 
158  // Configure biases accumulation
159  if(_is_quantized)
160  {
161  const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
162  const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform();
163  const UniformQuantizationInfo oq_info = (output->info()->total_size() == 0) ? iq_info : output->info()->quantization_info().uniform();
164 
165  float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale;
166  int32_t output_multiplier;
167  int32_t output_shift;
168  quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift);
169 
170  DirectConvolutionLayerOutputStageKernelInfo direct_conv_info;
171  direct_conv_info.result_fixedpoint_multiplier = output_multiplier;
172  direct_conv_info.result_shift = output_shift;
173  direct_conv_info.result_offset_after_shift = oq_info.offset;
174  direct_conv_info.output_data_type = input->info()->data_type();
175  _output_stage_kernel.configure(&_accumulator, biases, _is_nchw ? output : &_permuted_output, direct_conv_info);
176  _accumulator.allocator()->allocate();
177  }
178  else if(_has_bias)
179  {
180  _output_stage_kernel.configure(_is_nchw ? output : &_permuted_output, biases);
181  }
182 
183  // Permute output
184  if(!_is_nchw)
185  {
186  // Configure the function to transform the convoluted output to NHWC
187  _permute_output.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U));
188  _permuted_output.allocator()->allocate();
189  }
190 }
191 
192 void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::configure_optimized(const ITensor *input,
193  const ITensor *weights,
194  const ITensor *biases,
195  ITensor *output,
196  const PadStrideInfo &conv_info,
197  unsigned int depth_multiplier,
198  const ActivationLayerInfo &act_info,
199  const Size2D &dilation)
200 {
201  ActivationLayerInfo act_info_to_use = ActivationLayerInfo();
202  const bool is_relu = arm_compute::utils::info_helpers::is_relu(act_info);
204  _is_activationlayer_enabled = act_info.enabled() && !(is_relu || is_relu6);
205  if(!_is_activationlayer_enabled)
206  {
207  act_info_to_use = act_info;
208  }
209 
210  if(_is_nchw)
211  {
212  _memory_group.manage(&_permuted_input);
213  _memory_group.manage(&_permuted_output);
214 
215  // Configure the function to transform the input tensor from NCHW -> NHWC
216  _permute_input.configure(input, &_permuted_input, PermutationVector(2U, 0U, 1U));
217  _permuted_input.info()->set_data_layout(DataLayout::NHWC);
218 
219  // Configure the function to transform the weights tensor from IHW -> HWI
220  _permute_weights.configure(weights, &_permuted_weights, PermutationVector(2U, 0U, 1U));
221  _permuted_weights.info()->set_data_layout(DataLayout::NHWC);
222 
223  _permuted_output.info()->set_data_layout(DataLayout::NHWC);
224  _permuted_output.info()->set_quantization_info(output->info()->quantization_info());
225 
226  // Configure optimized depthwise
227  _dwc_optimized_func.configure(&_permuted_input, &_permuted_weights, biases, &_permuted_output, conv_info, depth_multiplier, act_info_to_use, dilation);
228 
229  // Configure the function to transform the convoluted output to ACL's native ordering format NCHW
230  _permuted_output.info()->set_data_layout(DataLayout::NHWC);
231  _permute_output.configure(&_permuted_output, output, PermutationVector(1U, 2U, 0U));
232 
233  // Allocate tensors
234  _permuted_input.allocator()->allocate();
235  _permuted_output.allocator()->allocate();
236  }
237  else
238  {
239  _dwc_optimized_func.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info_to_use, dilation);
240  }
241 }
242 
243 void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::configure(ITensor *input,
244  const ITensor *weights,
245  const ITensor *biases,
246  ITensor *output, const PadStrideInfo &conv_info,
247  unsigned int depth_multiplier,
248  const ActivationLayerInfo &act_info,
249  const Size2D &dilation)
250 {
252  // Perform validation step
253  ARM_COMPUTE_ERROR_THROW_ON(NEDepthwiseConvolutionLayerOptimizedInternal::validate(input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(),
254  output->info(), conv_info, depth_multiplier, act_info, dilation));
255 
256  _original_weights = weights;
257  _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
258  _has_bias = biases != nullptr;
260  weights->info(),
261  conv_info,
262  depth_multiplier,
263  dilation);
264  _is_nchw = input->info()->data_layout() == DataLayout::NCHW;
265  _permute = _is_optimized == _is_nchw;
266  _is_prepared = false;
267  _is_activationlayer_enabled = act_info.enabled();
268 
269  // Configure appropriate pipeline
270  if(_is_optimized)
271  {
272  configure_optimized(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
273  }
274  else
275  {
276  configure_generic(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
277  }
278 
279  // Configure activation
280  if(_is_activationlayer_enabled)
281  {
282  _activationlayer_function.configure(output, nullptr, act_info);
283  }
284 }
285 
287  const ITensorInfo *weights,
288  const ITensorInfo *biases,
289  const ITensorInfo *output,
290  const PadStrideInfo &conv_info,
291  unsigned int depth_multiplier,
292  const ActivationLayerInfo &act_info,
293  const Size2D &dilation)
294 {
295  return validate_arguments_optimized(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
296 }
297 
298 void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::run_generic()
299 {
300  // Fill border
301  NEScheduler::get().schedule(&_border_handler, Window::DimX);
302 
303  // Execute depthwise convolution
304  NEScheduler::get().schedule(&_dwc_kernel, Window::DimX);
305 
306  // Add biases
307  if(_has_bias || _is_quantized)
308  {
309  NEScheduler::get().schedule(&_output_stage_kernel, Window::DimX);
310  }
311 
312  // Permute output
313  if(!_is_nchw)
314  {
315  _permute_output.run();
316  }
317 }
318 
319 void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::run_optimized()
320 {
321  // Run assembly function
322  _dwc_optimized_func.run();
323 
324  // Permute output
325  if(_is_nchw)
326  {
327  _permute_output.run();
328  }
329 }
330 
332 {
333  prepare();
334 
335  MemoryGroupResourceScope scope_mg(_memory_group);
336 
337  // Permute input
338  if(_permute)
339  {
340  _permute_input.run();
341  }
342 
343  _is_optimized ? run_optimized() : run_generic();
344 
345  // Run activation
346  if(_is_activationlayer_enabled)
347  {
348  _activationlayer_function.run();
349  }
350 }
351 
352 void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::prepare()
353 {
354  if(!_is_prepared)
355  {
356  // Permute weights
357  if(_permute)
358  {
359  _permuted_weights.allocator()->allocate();
360  _permute_weights.run();
361  _original_weights->mark_as_unused();
362  }
363 
364  // Prepare optimized function
365  if(_is_optimized)
366  {
367  _dwc_optimized_func.prepare();
368  if(!_permuted_weights.is_used())
369  {
370  _permuted_weights.allocator()->free();
371  }
372  }
373 
374  _is_prepared = true;
375  }
376 }
377 
378 NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::NEDepthwiseConvolutionLayerGeneric()
379  : _depthwise_conv_kernel(), _fill_border(), _permute_input(), _permute_weights(), _permute_output(), _activationlayer_function(), _permuted_input(), _permuted_weights(), _permuted_output(),
380  _is_prepared(false), _is_nchw(false), _is_activationlayer_enabled(false), _original_weights(nullptr)
381 {
382 }
383 
384 void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
385  unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
386 {
388  ARM_COMPUTE_ERROR_THROW_ON(NEDepthwiseConvolutionLayer::validate(input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(),
389  output->info(), conv_info, depth_multiplier, act_info, dilation));
390 
391  _is_nchw = input->info()->data_layout() == DataLayout::NCHW;
392  _is_prepared = !_is_nchw;
393 
394  ITensor *input_to_use = input;
395  const ITensor *weights_to_use = weights;
396  ITensor *output_to_use = output;
397  if(_is_nchw)
398  {
399  _permute_input.configure(input, &_permuted_input, PermutationVector(2U, 0U, 1U));
400  _permuted_input.info()->set_data_layout(DataLayout::NHWC);
401  input_to_use = &_permuted_input;
402 
403  _permute_weights.configure(weights, &_permuted_weights, PermutationVector(2U, 0U, 1U));
404  _permuted_weights.info()->set_data_layout(DataLayout::NHWC);
405  weights_to_use = &_permuted_weights;
406 
407  _permuted_output.allocator()->init(output->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(TensorShape()));
408  output_to_use = &_permuted_output;
409  }
410  _original_weights = weights_to_use;
411 
412  _depthwise_conv_kernel.configure(input_to_use, weights_to_use, biases, output_to_use, conv_info, depth_multiplier, dilation);
413  _fill_border.configure(input_to_use, _depthwise_conv_kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast<uint64_t>(0), input->info()->data_type(), input->info()->quantization_info()));
414 
415  if(_is_nchw)
416  {
417  _permute_output.configure(&_permuted_output, output, PermutationVector(1U, 2U, 0U));
418  _permuted_output.info()->set_data_layout(DataLayout::NHWC);
419 
420  _permuted_input.allocator()->allocate();
421  _permuted_weights.allocator()->allocate();
422  _permuted_output.allocator()->allocate();
423  }
424 
425  //Configure Activation Layer
426  _is_activationlayer_enabled = act_info.enabled();
427  if(_is_activationlayer_enabled)
428  {
429  _activationlayer_function.configure(output, nullptr, act_info);
430  }
431 }
432 
433 Status NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
434  const PadStrideInfo &conv_info,
435  unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
436 {
438  if(input->data_layout() == DataLayout::NCHW)
439  {
440  TensorShape permuted_input_shape = input->tensor_shape();
441  TensorShape permuted_weights_shape = weights->tensor_shape();
442  TensorShape permuted_output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier, dilation);
443  permute(permuted_input_shape, PermutationVector(2U, 0U, 1U));
444  permute(permuted_weights_shape, PermutationVector(2U, 0U, 1U));
445  permute(permuted_output_shape, PermutationVector(2U, 0U, 1U));
446 
447  const TensorInfo permuted_input = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_input_shape).set_data_layout(DataLayout::NHWC));
448  const TensorInfo permuted_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_weights_shape).set_data_layout(DataLayout::NHWC));
449  const TensorInfo permuted_output = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_output_shape).set_data_layout(DataLayout::NCHW));
450 
453  ARM_COMPUTE_RETURN_ON_ERROR(NEPermute::validate(&permuted_output, output, PermutationVector(1U, 2U, 0U)));
454 
455  ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionLayerNativeKernel::validate(&permuted_input, &permuted_weights, biases, &permuted_output, conv_info, depth_multiplier, dilation));
456  }
457  else
458  {
460  }
461 
462  // Validate Activation Layer
463  if(act_info.enabled())
464  {
465  ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));
466  }
467 
468  return Status{};
469 }
470 
472 {
473  if(_is_nchw)
474  {
475  prepare();
476  _permute_input.run();
477  }
478 
479  NEScheduler::get().schedule(&_fill_border, Window::DimX);
480  NEScheduler::get().schedule(&_depthwise_conv_kernel, Window::DimY);
481 
482  if(_is_nchw)
483  {
484  _permute_output.run();
485  }
486 
487  if(_is_activationlayer_enabled)
488  {
489  _activationlayer_function.run();
490  }
491 }
492 
493 void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::prepare()
494 {
495  if(!_is_prepared)
496  {
497  ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
498 
499  _permute_weights.run();
500  _original_weights->mark_as_unused();
501  _is_prepared = true;
502  }
503 }
504 
505 NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
506  : _depth_conv_func(DepthwiseConvolutionFunction::GENERIC), _func_optimized(std::move(memory_manager)), _func_generic()
507 {
508 }
509 
510 void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier,
511  const ActivationLayerInfo &act_info, const Size2D &dilation)
512 {
513  _depth_conv_func = get_depthwiseconvolution_function(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(), conv_info, depth_multiplier, act_info, dilation);
514  switch(_depth_conv_func)
515  {
517  _func_optimized.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
518  break;
520  _func_generic.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
521  break;
522  default:
523  ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");
524  }
525 }
526 
528  unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
529 {
530  DepthwiseConvolutionFunction depth_conv_func = get_depthwiseconvolution_function(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
531  switch(depth_conv_func)
532  {
534  return NEDepthwiseConvolutionLayerOptimizedInternal::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
535  break;
537  return NEDepthwiseConvolutionLayerGeneric::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
538  break;
539  default:
540  ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");
541  }
542 }
543 
544 DepthwiseConvolutionFunction NEDepthwiseConvolutionLayer::get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
545  const PadStrideInfo &conv_info,
546  unsigned int depth_multiplier, ActivationLayerInfo act_info, const Size2D &dilation)
547 {
548  if(bool(NEDepthwiseConvolutionLayerOptimizedInternal::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation)))
549  {
551  }
552  else
553  {
555  }
556 }
557 
559 {
560  switch(_depth_conv_func)
561  {
563  _func_optimized.run();
564  break;
566  _func_generic.run();
567  break;
568  default:
569  ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
570  }
571 }
572 
574 {
575  switch(_depth_conv_func)
576  {
578  _func_optimized.prepare();
579  break;
581  _func_generic.prepare();
582  break;
583  default:
584  ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
585  }
586 }
587 } // namespace arm_compute
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier=1, const Size2D &dilation=Size2D(1U, 1U))
Static function to check if given info will lead to a valid configuration of NEDepthwiseConvolutionLa...
DepthwiseConvolutionFunction
Available DepthwiseConvolutionFunction.
Definition: Types.h:147
TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, PadStrideInfo conv_info, unsigned int depth_multiplier, const Size2D &dilation=Size2D(1U, 1U))
Calculate the depthwise convolution output shape of a tensor.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:545
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
[NEActivationLayer snippet]
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier=1, const ActivationLayerInfo &act_info=ActivationLayerInfo(), const Size2D &dilation=Size2D(1, 1))
Static function to check if given info will lead to a valid configuration of NEDepthwiseConvolutionAs...
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:792
1 channel, 1 F32 per channel
Strides PermutationVector
Permutation vector.
Definition: Types.h:49
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
Store the tensor's metadata.
Definition: ITensorInfo.h:40
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm)
Static function to check if given info will lead to a valid configuration of NEPermute.
Definition: NEPermute.cpp:38
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.
Status class.
Definition: Error.h:52
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:296
Activation Layer Information class.
Definition: Types.h:1517
Interface for NEON tensor.
Definition: ITensor.h:36
Copyright (c) 2017-2020 Arm Limited.
1 channel, 1 F16 per channel
ITensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
Definition: Tensor.cpp:33
void prepare() override
Prepare the function for executing.
void permute(Dimensions< T > &dimensions, const PermutationVector &perm)
Permutes given Dimensions according to a permutation vector.
Definition: Helpers.h:605
1 channel, 1 S32 per channel
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Definition: Window.h:43
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
bool is_data_type_quantized_per_channel(DataType dt)
Check if a given data type is of per channel type.
Definition: Utils.h:1198
virtual ITensorInfo & set_data_layout(const DataLayout &data_layout)=0
Set the data layout of the tensor.
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier=1, const ActivationLayerInfo &act_info=ActivationLayerInfo(), const Size2D &dilation=Size2D(1U, 1U))
Static function to check if given info will lead to a valid configuration of NEDepthwiseConvolutionLa...
quantized, asymmetric fixed-point 8-bit number unsigned
NEDepthwiseConvolutionLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
UniformQuantizationInfo uniform() const
Return per layer quantization info.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
Padding and stride information class.
Definition: Types.h:689
static bool is_optimized_supported(const ITensorInfo *input, const ITensorInfo *weights, PadStrideInfo conv_info, unsigned int depth_multiplier=1, const Size2D &dilation=Size2D(1, 1))
Check if the optimized kernel can be used for the given kernel sizes and strides.
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
Num samples, channels, height, width.
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
Definition: Utils.h:1143
bool is_relu6(ActivationLayerInfo activation_info)
Checks if activation information correspond to a relu6 activation function.
Definition: InfoHelpers.h:54
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:163
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
Definition: Window.h:45
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:161
static Status validate(const ITensorInfo *input, const ITensorInfo *bias=nullptr, const ITensorInfo *output=nullptr, const DirectConvolutionLayerOutputStageKernelInfo &info=DirectConvolutionLayerOutputStageKernelInfo())
Static function to check if given info will lead to a valid configuration of NEDirectConvolutionLayer...
virtual void schedule(ICPPKernel *kernel, const Hints &hints)=0
Runs the kernel in the same thread as the caller synchronously.
Class for specifying the size of an image or rectangle.
Definition: Size2D.h:34
Num samples, height, width, channels.
bool is_relu(ActivationLayerInfo activation_info)
Checks if activation information correspond to a relu activation function.
Definition: InfoHelpers.h:43
quantized, asymmetric fixed-point 8-bit number signed
size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
Get the index of the given dimension.
Definition: Helpers.inl:332
DataLayout
[DataLayout enum definition]
Definition: Types.h:120
void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier=1, const ActivationLayerInfo &act_info=ActivationLayerInfo(), const Size2D &dilation=Size2D(1U, 1U))
Initialize the function's source, destination, weights and convolution information.
void run() override
Run the kernels contained in the function.
Status validate(const ITensorInfo *scores_in, const ITensorInfo *boxes_in, const ITensorInfo *batch_splits_in, const ITensorInfo *scores_out, const ITensorInfo *boxes_out, const ITensorInfo *classes, const ITensorInfo *batch_splits_out, const ITensorInfo *keeps, const ITensorInfo *keeps_size, const BoxNMSLimitInfo info)
static IScheduler & get()
Access the scheduler singleton.
Definition: Scheduler.cpp:95
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier=1, const Size2D &dilation=Size2D(1U, 1U))
Static function to check if given info will lead to a valid configuration of NEDepthwiseConvolutionLa...