41 struct NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::Impl
43 ITensor *
src{
nullptr};
44 ITensor *
dst{
nullptr};
45 const ITensor *weights{
nullptr};
46 const ITensor *biases{
nullptr};
52 std::shared_ptr<cpu::CpuDepthwiseConv2d> op{
nullptr};
53 bool is_prepared{
false};
57 NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::NEDepthwiseConvolutionLayerOptimizedInternal(
58 std::shared_ptr<IMemoryManager> memory_manager)
59 : _memory_group(memory_manager), _impl(std::make_unique<Impl>())
65 const ITensor *weights,
66 const ITensor *biases,
69 unsigned int depth_multiplier,
71 const Size2D &dilation)
77 _impl->weights = weights;
78 _impl->biases = biases;
80 _impl->permute = is_nhwc;
82 _impl->op = std::make_unique<cpu::CpuDepthwiseConv2d>();
84 _impl->op->configure(_impl->src->info(), _impl->weights->info(),
85 _impl->biases ==
nullptr ?
nullptr : _impl->biases->info(), _impl->dst->info(),
info);
88 ActivationLayerInfo act_info_to_use = ActivationLayerInfo();
93 if (!is_activationlayer_enabled)
97 info = ConvolutionInfo{
conv_info, depth_multiplier, act_info_to_use, dilation};
99 auto dwc_optimized_func = std::make_unique<cpu::CpuDepthwiseConv2dAssemblyDispatch>();
103 auto permute_input = std::make_unique<cpu::CpuPermute>();
104 auto permute_weights = std::make_unique<cpu::CpuPermute>();
105 auto permute_output = std::make_unique<cpu::CpuPermute>();
107 _memory_group.manage(&_impl->permuted_input);
108 _memory_group.manage(&_impl->permuted_weights);
109 _memory_group.manage(&_impl->permuted_output);
116 permute_weights->configure(weights->info(), _impl->permuted_weights.info(),
PermutationVector(2U, 0U, 1U));
120 _impl->permuted_output.info()->set_quantization_info(output->info()->quantization_info());
123 dwc_optimized_func->configure(_impl->permuted_input.info(), _impl->permuted_weights.info(),
124 biases ==
nullptr ?
nullptr : biases->info(), _impl->permuted_output.info(),
129 permute_output->configure(_impl->permuted_output.info(), output->info(),
PermutationVector(1U, 2U, 0U));
131 _impl->permuted_input.allocator()->allocate();
132 _impl->permuted_output.allocator()->allocate();
136 dwc_optimized_func->configure(_impl->src->info(), _impl->weights->info(),
137 biases ==
nullptr ?
nullptr : biases->info(), _impl->dst->info(),
info);
142 _impl->workspace.allocator()->init(TensorInfo(TensorShape{mem_req[0].size + mem_req[0].alignment}, 1,
DataType::S8),
143 mem_req[0].alignment);
144 _impl->packed_weights.allocator()->init(
145 TensorInfo(TensorShape{mem_req[1].size + mem_req[1].alignment}, 1,
DataType::S8), mem_req[1].alignment);
146 _memory_group.manage(&_impl->workspace);
147 _memory_group.manage(&_impl->packed_weights);
148 _impl->workspace.allocator()->allocate();
149 _impl->packed_weights.allocator()->allocate();
154 const ITensorInfo *weights,
155 const ITensorInfo *biases,
156 const ITensorInfo *output,
158 unsigned int depth_multiplier,
159 const ActivationLayerInfo &
act_info,
160 const Size2D &dilation)
169 MemoryGroupResourceScope scope_mg(_memory_group);
182 _impl->op->run(
pack);
185 void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::prepare()
187 if (!_impl->is_prepared)
192 _impl->permuted_weights.allocator()->allocate();
195 if (!_impl->permuted_weights.is_used())
197 _impl->permuted_weights.allocator()->free();
200 _impl->is_prepared =
true;
204 struct NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::Impl
207 Tensor permuted_weights{};
209 bool is_prepared{
false};
211 bool is_activationlayer_enabled{
false};
212 const ITensor *weights{
nullptr};
213 const ITensor *biases{
nullptr};
214 const ITensor *
src{
nullptr};
215 ITensor *
dst{
nullptr};
216 std::shared_ptr<cpu::CpuDepthwiseConv2d> op{
nullptr};
219 NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::NEDepthwiseConvolutionLayerGeneric()
220 : _impl(std::make_unique<Impl>())
225 const ITensor *weights,
226 const ITensor *biases,
229 unsigned int depth_multiplier,
230 const ActivationLayerInfo &
act_info,
231 const Size2D &dilation)
236 _impl->op = std::make_unique<cpu::CpuDepthwiseConv2d>();
237 _impl->op->configure(
input->info(), weights->info(), biases ==
nullptr ?
nullptr : biases->info(), output->info(),
242 _impl->weights = weights;
243 _impl->biases = biases;
245 _impl->is_prepared = !_impl->is_nchw;
247 ITensor *input_to_use =
input;
248 const ITensor *weights_to_use = weights;
249 ITensor *output_to_use = output;
252 auto permute_input = std::make_unique<cpu::CpuPermute>();
253 auto permute_weights = std::make_unique<cpu::CpuPermute>();
257 input_to_use = &_impl->permuted_input;
259 permute_weights->configure(weights->info(), _impl->permuted_weights.info(),
PermutationVector(2U, 0U, 1U));
261 weights_to_use = &_impl->permuted_weights;
263 _impl->permuted_output.allocator()->init(
264 output->info()->clone()->set_is_resizable(
true).reset_padding().set_tensor_shape(TensorShape()));
265 output_to_use = &_impl->permuted_output;
268 auto depthwise_conv_kernel = std::make_unique<cpu::kernels::CpuDepthwiseConv2dNativeKernel>();
269 depthwise_conv_kernel->configure(input_to_use->info(), weights_to_use->info(),
270 biases ==
nullptr ?
nullptr : biases->info(), output_to_use->info(),
info);
274 auto permute_output = std::make_unique<cpu::CpuPermute>();
275 permute_output->configure(_impl->permuted_output.info(), output->info(),
PermutationVector(1U, 2U, 0U));
278 _impl->permuted_input.allocator()->allocate();
279 _impl->permuted_weights.allocator()->allocate();
280 _impl->permuted_output.allocator()->allocate();
285 const ITensorInfo *weights,
286 const ITensorInfo *biases,
287 const ITensorInfo *output,
289 unsigned int depth_multiplier,
290 const ActivationLayerInfo &
act_info,
291 const Size2D &dilation)
308 _impl->op->run(
pack);
312 : _memory_group(std::move(memory_manager)), _impl(std::make_unique<Impl>())
316 #ifndef DOXYGEN_SKIP_THIS
317 struct NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer::Impl
320 NEDepthwiseConvolutionLayerOptimizedInternal func_optimized{
nullptr};
321 NEDepthwiseConvolutionLayerGeneric func_generic{};
322 std::shared_ptr<cpu::CpuDepthwiseConv2d> op{
nullptr};
324 #endif // DOXYGEN_SKIP_THIS
331 unsigned int depth_multiplier,
340 depth_multiplier,
act_info, dilation));
343 _impl->op = std::make_shared<cpu::CpuDepthwiseConv2d>();
344 _impl->depth_conv_func = _impl->op->get_depthwiseconvolution_function(
345 input->info(), weights->
info(), (biases !=
nullptr) ? biases->
info() :
nullptr, output->
info(),
info);
346 switch (_impl->depth_conv_func)
366 unsigned int depth_multiplier,
376 switch (_impl->depth_conv_func)
379 _impl->func_optimized.run();
382 _impl->func_generic.run();
391 switch (_impl->depth_conv_func)
394 _impl->func_optimized.prepare();
397 _impl->func_generic.prepare();