40 struct NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::Impl
42 ITensor *
src{
nullptr };
43 ITensor *
dst{
nullptr };
44 const ITensor *weights
57 std::shared_ptr<cpu::CpuDepthwiseConv2d> op{
nullptr };
58 bool is_prepared{
false };
62 NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::NEDepthwiseConvolutionLayerOptimizedInternal(std::shared_ptr<IMemoryManager> memory_manager)
63 : _memory_group(memory_manager), _impl(
std::make_unique<Impl>())
68 const ITensor *weights,
69 const ITensor *biases,
70 ITensor *output,
const PadStrideInfo &
conv_info,
71 unsigned int depth_multiplier,
73 const Size2D &dilation)
79 _impl->weights = weights;
80 _impl->biases = biases;
82 _impl->permute = is_nhwc;
84 _impl->op = std::make_unique<cpu::CpuDepthwiseConv2d>();
86 _impl->op->configure(_impl->src->info(), _impl->weights->info(), _impl->biases ==
nullptr ? nullptr : _impl->biases->info(),
87 _impl->dst->info(),
info);
90 ActivationLayerInfo act_info_to_use = ActivationLayerInfo();
93 bool is_activationlayer_enabled = act_info.enabled() && !(is_relu ||
is_relu6);
95 if(!is_activationlayer_enabled)
99 info = ConvolutionInfo{
conv_info, depth_multiplier, act_info_to_use, dilation };
101 auto dwc_optimized_func = std::make_unique<cpu::CpuDepthwiseConv2dAssemblyDispatch>();
105 auto permute_input = std::make_unique<cpu::CpuPermute>();
106 auto permute_weights = std::make_unique<cpu::CpuPermute>();
107 auto permute_output = std::make_unique<cpu::CpuPermute>();
109 _memory_group.manage(&_impl->permuted_input);
110 _memory_group.manage(&_impl->permuted_weights);
111 _memory_group.manage(&_impl->permuted_output);
114 permute_input->configure(input->info(), _impl->permuted_input.info(),
PermutationVector(2
U, 0
U, 1
U));
118 permute_weights->configure(weights->info(), _impl->permuted_weights.info(),
PermutationVector(2
U, 0
U, 1
U));
122 _impl->permuted_output.info()->set_quantization_info(output->info()->quantization_info());
125 dwc_optimized_func->configure(_impl->permuted_input.info(), _impl->permuted_weights.info(), biases ==
nullptr ? nullptr : biases->info(), _impl->permuted_output.info(),
info);
129 permute_output->configure(_impl->permuted_output.info(), output->info(),
PermutationVector(1
U, 2
U, 0
U));
131 _impl->permuted_input.allocator()->allocate();
132 _impl->permuted_output.allocator()->allocate();
136 dwc_optimized_func->configure(_impl->src->info(), _impl->weights->info(), biases ==
nullptr ? nullptr : biases->info(), _impl->dst->info(),
info);
141 _impl->workspace.allocator()->init(TensorInfo(TensorShape{ mem_req[0].size + mem_req[0].alignment }, 1,
DataType::S8), mem_req[0].alignment);
142 _impl->packed_weights.allocator()->init(TensorInfo(TensorShape{ mem_req[1].size + mem_req[1].alignment }, 1,
DataType::S8), mem_req[1].alignment);
143 _memory_group.manage(&_impl->workspace);
144 _memory_group.manage(&_impl->packed_weights);
145 _impl->workspace.allocator()->allocate();
146 _impl->packed_weights.allocator()->allocate();
150 const ITensorInfo *weights,
151 const ITensorInfo *biases,
152 const ITensorInfo *output,
153 const PadStrideInfo &conv_info,
154 unsigned int depth_multiplier,
155 const ActivationLayerInfo &act_info,
156 const Size2D &dilation)
165 MemoryGroupResourceScope scope_mg(_memory_group);
178 _impl->op->run(pack);
181 void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::prepare()
183 if(!_impl->is_prepared)
188 _impl->permuted_weights.allocator()->allocate();
191 if(!_impl->permuted_weights.is_used())
193 _impl->permuted_weights.allocator()->free();
196 _impl->is_prepared =
true;
200 struct NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::Impl
203 Tensor permuted_weights{};
205 bool is_prepared{
false };
206 bool is_nchw{
false };
207 bool is_activationlayer_enabled{
false };
208 const ITensor *weights{
nullptr };
209 const ITensor *biases{
nullptr };
210 const ITensor *
src{
nullptr };
211 ITensor *
dst{
nullptr };
212 std::shared_ptr<cpu::CpuDepthwiseConv2d> op{
nullptr };
215 NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::NEDepthwiseConvolutionLayerGeneric()
216 : _impl(
std::make_unique<Impl>())
221 unsigned int depth_multiplier,
const ActivationLayerInfo &act_info,
const Size2D &dilation)
226 _impl->op = std::make_unique<cpu::CpuDepthwiseConv2d>();
227 _impl->op->configure(input->info(), weights->info(), biases ==
nullptr ? nullptr : biases->info(), output->info(),
info);
231 _impl->weights = weights;
232 _impl->biases = biases;
234 _impl->is_prepared = !_impl->is_nchw;
236 ITensor *input_to_use =
input;
237 const ITensor *weights_to_use = weights;
238 ITensor *output_to_use = output;
241 auto permute_input = std::make_unique<cpu::CpuPermute>();
242 auto permute_weights = std::make_unique<cpu::CpuPermute>();
244 permute_input->configure(input->info(), _impl->permuted_input.info(),
PermutationVector(2
U, 0
U, 1
U));
246 input_to_use = &_impl->permuted_input;
248 permute_weights->configure(weights->info(), _impl->permuted_weights.info(),
PermutationVector(2
U, 0
U, 1
U));
250 weights_to_use = &_impl->permuted_weights;
252 _impl->permuted_output.allocator()->init(output->info()->clone()->set_is_resizable(
true).reset_padding().set_tensor_shape(TensorShape()));
253 output_to_use = &_impl->permuted_output;
256 auto depthwise_conv_kernel = std::make_unique<cpu::kernels::CpuDepthwiseConv2dNativeKernel>();
257 depthwise_conv_kernel->configure(input_to_use->info(), weights_to_use->info(), biases ==
nullptr ? nullptr : biases->info(), output_to_use->info(),
info);
261 auto permute_output = std::make_unique<cpu::CpuPermute>();
262 permute_output->configure(_impl->permuted_output.info(), output->info(),
PermutationVector(1
U, 2
U, 0
U));
265 _impl->permuted_input.allocator()->allocate();
266 _impl->permuted_weights.allocator()->allocate();
267 _impl->permuted_output.allocator()->allocate();
272 const PadStrideInfo &conv_info,
273 unsigned int depth_multiplier,
const ActivationLayerInfo &act_info,
const Size2D &dilation)
290 _impl->op->run(pack);
294 : _memory_group(
std::move(memory_manager)), _impl(
std::make_unique<Impl>())
298 #ifndef DOXYGEN_SKIP_THIS 299 struct NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer::Impl
302 NEDepthwiseConvolutionLayerOptimizedInternal func_optimized{
nullptr };
303 NEDepthwiseConvolutionLayerGeneric func_generic{};
304 std::shared_ptr<cpu::CpuDepthwiseConv2d> op{
nullptr };
306 #endif // DOXYGEN_SKIP_THIS 318 _impl->op = std::make_shared<cpu::CpuDepthwiseConv2d>();
319 _impl->depth_conv_func = _impl->op->get_depthwiseconvolution_function(input->
info(), weights->
info(), (biases !=
nullptr) ? biases->
info() :
nullptr, output->
info(),
321 switch(_impl->depth_conv_func)
324 _impl->func_optimized.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
327 _impl->func_generic.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
343 switch(_impl->depth_conv_func)
346 _impl->func_optimized.run();
349 _impl->func_generic.run();
358 switch(_impl->depth_conv_func)
361 _impl->func_optimized.prepare();
364 _impl->func_generic.prepare();
static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ConvolutionInfo &info)
Static function to check if given info will lead to a valid configuration.
DepthwiseConvolutionFunction
Available DepthwiseConvolutionFunction.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Strides PermutationVector
Permutation vector.
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Activation Layer Information class.
Interface for CPU tensor.
SimpleTensor< float > src
Copyright (c) 2017-2023 Arm Limited.
std::vector< MemoryInfo > MemoryRequirements
void prepare() override
Prepare the function for executing.
void permute(Dimensions< T > &dimensions, const PermutationVector &perm)
Permutes given Dimensions according to a permutation vector.
Optimized Depthwise Convolution.
~NEDepthwiseConvolutionLayer()
Default destructor.
virtual void prepare()
Prepare the function for executing.
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier=1, const ActivationLayerInfo &act_info=ActivationLayerInfo(), const Size2D &dilation=Size2D(1U, 1U))
Static function to check if given info will lead to a valid configuration of NEDepthwiseConvolutionLa...
NEDepthwiseConvolutionLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
Padding and stride information class.
Num samples, channels, height, width.
bool is_relu6(ActivationLayerInfo activation_info)
Checks if activation information correspond to a relu6 activation function.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
Class for specifying the size of an image or rectangle.
Num samples, height, width, channels.
bool is_relu(ActivationLayerInfo activation_info)
Checks if activation information correspond to a relu activation function.
#define ARM_COMPUTE_LOG_PARAMS(...)
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
im2col_func configure(src_target.info(), dst_target.info(), spatial_kernel, conv_info, has_bias)
void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier=1, const ActivationLayerInfo &act_info=ActivationLayerInfo(), const Size2D &dilation=Size2D(1U, 1U))
Initialize the function's source, destination, weights and convolution information.
void add_tensor(int id, ITensor *tensor)
Add tensor to the pack.
void run() override
Run the kernels contained in the function.
Status validate(const ITensorInfo *scores_in, const ITensorInfo *boxes_in, const ITensorInfo *batch_splits_in, const ITensorInfo *scores_out, const ITensorInfo *boxes_out, const ITensorInfo *classes, const ITensorInfo *batch_splits_out, const ITensorInfo *keeps, const ITensorInfo *keeps_size, const BoxNMSLimitInfo info)