Compute Library
 23.05
NEDepthwiseConvolutionLayer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2021, 2023 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
30 #include "src/common/utils/Log.h"
32 
33 using namespace arm_compute::misc;
35 
36 namespace arm_compute
37 {
39 
40 struct NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::Impl
41 {
42  ITensor *src{ nullptr }; // SRC_0
43  ITensor *dst{ nullptr }; // DST_0
44  const ITensor *weights
45  {
46  nullptr
47  }; // SRC_1
48  const ITensor *biases
49  {
50  nullptr
51  }; // SRC_2
52  Tensor permuted_input{}; // INT_0
53  Tensor permuted_weights{}; // INT_1
54  Tensor permuted_output{}; // INT_2
55  Tensor workspace{}; // INT_3
56  Tensor packed_weights{}; // INT_4
57  std::shared_ptr<cpu::CpuDepthwiseConv2d> op{ nullptr };
58  bool is_prepared{ false };
59  bool permute{ false };
60 };
61 
62 NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::NEDepthwiseConvolutionLayerOptimizedInternal(std::shared_ptr<IMemoryManager> memory_manager)
63  : _memory_group(memory_manager), _impl(std::make_unique<Impl>())
64 {
65 }
66 
68  const ITensor *weights,
69  const ITensor *biases,
70  ITensor *output, const PadStrideInfo &conv_info,
71  unsigned int depth_multiplier,
72  const ActivationLayerInfo &act_info,
73  const Size2D &dilation)
74 {
75  ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
76 
77  bool is_nhwc = input->info()->data_layout() == DataLayout::NCHW;
78  _impl->src = input;
79  _impl->weights = weights;
80  _impl->biases = biases;
81  _impl->dst = output;
82  _impl->permute = is_nhwc;
83 
84  _impl->op = std::make_unique<cpu::CpuDepthwiseConv2d>();
85  ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation };
86  _impl->op->configure(_impl->src->info(), _impl->weights->info(), _impl->biases == nullptr ? nullptr : _impl->biases->info(),
87  _impl->dst->info(), info);
88 
89  // Configure pipeline
90  ActivationLayerInfo act_info_to_use = ActivationLayerInfo();
93  bool is_activationlayer_enabled = act_info.enabled() && !(is_relu || is_relu6);
94 
95  if(!is_activationlayer_enabled)
96  {
97  act_info_to_use = act_info;
98  }
99  info = ConvolutionInfo{ conv_info, depth_multiplier, act_info_to_use, dilation };
100 
101  auto dwc_optimized_func = std::make_unique<cpu::CpuDepthwiseConv2dAssemblyDispatch>();
102 
103  if(is_nhwc)
104  {
105  auto permute_input = std::make_unique<cpu::CpuPermute>();
106  auto permute_weights = std::make_unique<cpu::CpuPermute>();
107  auto permute_output = std::make_unique<cpu::CpuPermute>();
108 
109  _memory_group.manage(&_impl->permuted_input);
110  _memory_group.manage(&_impl->permuted_weights);
111  _memory_group.manage(&_impl->permuted_output);
112 
113  // Configure the function to transform the input tensor from NCHW -> NHWC
114  permute_input->configure(input->info(), _impl->permuted_input.info(), PermutationVector(2U, 0U, 1U));
115  _impl->permuted_input.info()->set_data_layout(DataLayout::NHWC);
116 
117  // Configure the function to transform the weights tensor from IHW -> HWI
118  permute_weights->configure(weights->info(), _impl->permuted_weights.info(), PermutationVector(2U, 0U, 1U));
119  _impl->permuted_weights.info()->set_data_layout(DataLayout::NHWC);
120 
121  _impl->permuted_output.info()->set_data_layout(DataLayout::NHWC);
122  _impl->permuted_output.info()->set_quantization_info(output->info()->quantization_info());
123 
124  // Configure optimized depthwise
125  dwc_optimized_func->configure(_impl->permuted_input.info(), _impl->permuted_weights.info(), biases == nullptr ? nullptr : biases->info(), _impl->permuted_output.info(), info);
126 
127  // Configure the function to transform the convoluted output to ACL's native ordering format NCHW
128  _impl->permuted_output.info()->set_data_layout(DataLayout::NHWC);
129  permute_output->configure(_impl->permuted_output.info(), output->info(), PermutationVector(1U, 2U, 0U));
130 
131  _impl->permuted_input.allocator()->allocate();
132  _impl->permuted_output.allocator()->allocate();
133  }
134  else
135  {
136  dwc_optimized_func->configure(_impl->src->info(), _impl->weights->info(), biases == nullptr ? nullptr : biases->info(), _impl->dst->info(), info);
137  }
138 
139  // Allocate memory based on the internal memory requirements
140  experimental::MemoryRequirements mem_req = dwc_optimized_func->workspace();
141  _impl->workspace.allocator()->init(TensorInfo(TensorShape{ mem_req[0].size + mem_req[0].alignment }, 1, DataType::S8), mem_req[0].alignment);
142  _impl->packed_weights.allocator()->init(TensorInfo(TensorShape{ mem_req[1].size + mem_req[1].alignment }, 1, DataType::S8), mem_req[1].alignment);
143  _memory_group.manage(&_impl->workspace);
144  _memory_group.manage(&_impl->packed_weights);
145  _impl->workspace.allocator()->allocate();
146  _impl->packed_weights.allocator()->allocate();
147 }
148 
150  const ITensorInfo *weights,
151  const ITensorInfo *biases,
152  const ITensorInfo *output,
153  const PadStrideInfo &conv_info,
154  unsigned int depth_multiplier,
155  const ActivationLayerInfo &act_info,
156  const Size2D &dilation)
157 {
158  ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation };
159  return cpu::CpuDepthwiseConv2d::validate(input, weights, biases, output, info);
160 }
161 
163 {
164  prepare();
165  MemoryGroupResourceScope scope_mg(_memory_group);
166 
167  ITensorPack pack;
168  pack.add_tensor(TensorType::ACL_SRC_0, _impl->src);
169  pack.add_tensor(TensorType::ACL_SRC_1, _impl->weights);
170  pack.add_tensor(TensorType::ACL_SRC_2, _impl->biases);
171  pack.add_tensor(TensorType::ACL_INT_0, &_impl->permuted_input);
172  pack.add_tensor(TensorType::ACL_INT_1, &_impl->permuted_weights);
173  pack.add_tensor(TensorType::ACL_INT_2, &_impl->permuted_output);
174  pack.add_tensor(TensorType::ACL_INT_3, &_impl->workspace);
175  pack.add_tensor(TensorType::ACL_INT_4, &_impl->packed_weights);
176  pack.add_tensor(TensorType::ACL_DST_0, _impl->dst);
177 
178  _impl->op->run(pack);
179 }
180 
181 void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::prepare()
182 {
183  if(!_impl->is_prepared)
184  {
185  // Permute weights
186  if(_impl->permute)
187  {
188  _impl->permuted_weights.allocator()->allocate();
189  }
190 
191  if(!_impl->permuted_weights.is_used())
192  {
193  _impl->permuted_weights.allocator()->free();
194  }
195 
196  _impl->is_prepared = true;
197  }
198 }
199 
200 struct NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::Impl
201 {
202  Tensor permuted_input{};
203  Tensor permuted_weights{};
204  Tensor permuted_output{};
205  bool is_prepared{ false };
206  bool is_nchw{ false };
207  bool is_activationlayer_enabled{ false };
208  const ITensor *weights{ nullptr };
209  const ITensor *biases{ nullptr };
210  const ITensor *src{ nullptr };
211  ITensor *dst{ nullptr };
212  std::shared_ptr<cpu::CpuDepthwiseConv2d> op{ nullptr };
213 };
214 
215 NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::NEDepthwiseConvolutionLayerGeneric()
216  : _impl(std::make_unique<Impl>())
217 {
218 }
219 
220 void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
221  unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
222 {
223  ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
224 
225  const ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation };
226  _impl->op = std::make_unique<cpu::CpuDepthwiseConv2d>();
227  _impl->op->configure(input->info(), weights->info(), biases == nullptr ? nullptr : biases->info(), output->info(), info);
228 
229  _impl->src = input;
230  _impl->dst = output;
231  _impl->weights = weights;
232  _impl->biases = biases;
233  _impl->is_nchw = input->info()->data_layout() == DataLayout::NCHW;
234  _impl->is_prepared = !_impl->is_nchw;
235 
236  ITensor *input_to_use = input;
237  const ITensor *weights_to_use = weights;
238  ITensor *output_to_use = output;
239  if(_impl->is_nchw)
240  {
241  auto permute_input = std::make_unique<cpu::CpuPermute>();
242  auto permute_weights = std::make_unique<cpu::CpuPermute>();
243 
244  permute_input->configure(input->info(), _impl->permuted_input.info(), PermutationVector(2U, 0U, 1U));
245  _impl->permuted_input.info()->set_data_layout(DataLayout::NHWC);
246  input_to_use = &_impl->permuted_input;
247 
248  permute_weights->configure(weights->info(), _impl->permuted_weights.info(), PermutationVector(2U, 0U, 1U));
249  _impl->permuted_weights.info()->set_data_layout(DataLayout::NHWC);
250  weights_to_use = &_impl->permuted_weights;
251 
252  _impl->permuted_output.allocator()->init(output->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(TensorShape()));
253  output_to_use = &_impl->permuted_output;
254  }
255 
256  auto depthwise_conv_kernel = std::make_unique<cpu::kernels::CpuDepthwiseConv2dNativeKernel>();
257  depthwise_conv_kernel->configure(input_to_use->info(), weights_to_use->info(), biases == nullptr ? nullptr : biases->info(), output_to_use->info(), info);
258 
259  if(_impl->is_nchw)
260  {
261  auto permute_output = std::make_unique<cpu::CpuPermute>();
262  permute_output->configure(_impl->permuted_output.info(), output->info(), PermutationVector(1U, 2U, 0U));
263  _impl->permuted_output.info()->set_data_layout(DataLayout::NHWC);
264 
265  _impl->permuted_input.allocator()->allocate();
266  _impl->permuted_weights.allocator()->allocate();
267  _impl->permuted_output.allocator()->allocate();
268  }
269 }
270 
271 Status NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
272  const PadStrideInfo &conv_info,
273  unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
274 {
275  ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation };
276  return cpu::CpuDepthwiseConv2d::validate(input, weights, biases, output, info);
277 }
278 
280 {
281  ITensorPack pack;
282  pack.add_tensor(TensorType::ACL_SRC_0, _impl->src);
283  pack.add_tensor(TensorType::ACL_SRC_1, _impl->weights);
284  pack.add_tensor(TensorType::ACL_SRC_2, _impl->biases);
285  pack.add_tensor(TensorType::ACL_INT_0, &_impl->permuted_input);
286  pack.add_tensor(TensorType::ACL_INT_1, &_impl->permuted_weights);
287  pack.add_tensor(TensorType::ACL_INT_2, &_impl->permuted_output);
288  pack.add_tensor(TensorType::ACL_DST_0, _impl->dst);
289 
290  _impl->op->run(pack);
291 }
292 
293 NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
294  : _memory_group(std::move(memory_manager)), _impl(std::make_unique<Impl>())
295 {
296 }
297 
298 #ifndef DOXYGEN_SKIP_THIS
299 struct NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer::Impl
300 {
302  NEDepthwiseConvolutionLayerOptimizedInternal func_optimized{ nullptr };
303  NEDepthwiseConvolutionLayerGeneric func_generic{};
304  std::shared_ptr<cpu::CpuDepthwiseConv2d> op{ nullptr };
305 };
306 #endif // DOXYGEN_SKIP_THIS
307 
308 void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier,
309  const ActivationLayerInfo &act_info, const Size2D &dilation)
310 {
311  ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
312 
313  ARM_COMPUTE_LOG_PARAMS(input, weights, output, conv_info, depth_multiplier, biases, act_info, dilation);
314  ARM_COMPUTE_ERROR_THROW_ON(NEDepthwiseConvolutionLayer::validate(input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(),
315  output->info(), conv_info, depth_multiplier, act_info, dilation));
316 
317  const ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation };
318  _impl->op = std::make_shared<cpu::CpuDepthwiseConv2d>();
319  _impl->depth_conv_func = _impl->op->get_depthwiseconvolution_function(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(),
320  info);
321  switch(_impl->depth_conv_func)
322  {
324  _impl->func_optimized.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
325  break;
327  _impl->func_generic.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
328  break;
329  default:
330  ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");
331  }
332 }
333 
334 Status NEDepthwiseConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
335  unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
336 {
337  ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation };
338  return cpu::CpuDepthwiseConv2d::validate(input, weights, biases, output, info);
339 }
340 
342 {
343  switch(_impl->depth_conv_func)
344  {
346  _impl->func_optimized.run();
347  break;
349  _impl->func_generic.run();
350  break;
351  default:
352  ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
353  }
354 }
355 
357 {
358  switch(_impl->depth_conv_func)
359  {
361  _impl->func_optimized.prepare();
362  break;
364  _impl->func_generic.prepare();
365  break;
366  default:
367  ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
368  }
369 }
370 } // namespace arm_compute
static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ConvolutionInfo &info)
Static function to check if given info will lead to a valid configuration.
DepthwiseConvolutionFunction
Available DepthwiseConvolutionFunction.
Definition: Types.h:145
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352
Strides PermutationVector
Permutation vector.
Definition: Types.h:51
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:43
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
Status class.
Definition: Error.h:52
Activation Layer Information class.
Definition: Types.h:1659
Interface for CPU tensor.
Definition: ITensor.h:36
SimpleTensor< float > src
Definition: DFT.cpp:155
Copyright (c) 2017-2023 Arm Limited.
std::vector< MemoryInfo > MemoryRequirements
Definition: Types.h:134
void prepare() override
Prepare the function for executing.
void permute(Dimensions< T > &dimensions, const PermutationVector &perm)
Permutes given Dimensions according to a permutation vector.
Definition: Helpers.h:146
~NEDepthwiseConvolutionLayer()
Default destructor.
virtual void prepare()
Prepare the function for executing.
Definition: IFunction.h:57
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier=1, const ActivationLayerInfo &act_info=ActivationLayerInfo(), const Size2D &dilation=Size2D(1U, 1U))
Static function to check if given info will lead to a valid configuration of NEDepthwiseConvolutionLa...
NEDepthwiseConvolutionLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.
Padding and stride information class.
Definition: Types.h:671
Num samples, channels, height, width.
bool is_relu6(ActivationLayerInfo activation_info)
Checks if activation information correspond to a relu6 activation function.
Definition: InfoHelpers.h:54
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
Class for specifying the size of an image or rectangle.
Definition: Size2D.h:34
Num samples, height, width, channels.
bool is_relu(ActivationLayerInfo activation_info)
Checks if activation information correspond to a relu activation function.
Definition: InfoHelpers.h:43
#define ARM_COMPUTE_LOG_PARAMS(...)
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:157
im2col_func configure(src_target.info(), dst_target.info(), spatial_kernel, conv_info, has_bias)
signed 8-bit number
void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier=1, const ActivationLayerInfo &act_info=ActivationLayerInfo(), const Size2D &dilation=Size2D(1U, 1U))
Initialize the function&#39;s source, destination, weights and convolution information.
void add_tensor(int id, ITensor *tensor)
Add tensor to the pack.
Definition: ITensorPack.cpp:39
void run() override
Run the kernels contained in the function.
Status validate(const ITensorInfo *scores_in, const ITensorInfo *boxes_in, const ITensorInfo *batch_splits_in, const ITensorInfo *scores_out, const ITensorInfo *boxes_out, const ITensorInfo *classes, const ITensorInfo *batch_splits_out, const ITensorInfo *keeps, const ITensorInfo *keeps_size, const BoxNMSLimitInfo info)