39 Status validate_arguments_optimized(
const ITensorInfo *
input,
const ITensorInfo *weights,
const ITensorInfo *biases,
const ITensorInfo *output,
const ConvolutionInfo &
info)
52 info.pad_stride_info.pad_right());
54 info.pad_stride_info.pad_bottom());
66 if(
info.act_info.enabled())
74 CpuDepthwiseConvolution::CpuDepthwiseConvolutionOptimizedInternal::CpuDepthwiseConvolutionOptimizedInternal()
75 : _dwc_optimized_func(nullptr), _permute_input(nullptr), _permute_weights(nullptr), _permute_output(nullptr), _activationlayer_function(nullptr), _has_bias(false), _is_quantized(false),
76 _is_nchw(true), _permute(false), _is_activationlayer_enabled(false), _is_prepared(false)
80 void CpuDepthwiseConvolution::CpuDepthwiseConvolutionOptimizedInternal::configure(ITensorInfo *
input,
81 const ITensorInfo *weights,
82 const ITensorInfo *biases,
84 const ConvolutionInfo &
info)
92 _has_bias = biases !=
nullptr;
98 ActivationLayerInfo act_info_to_use = ActivationLayerInfo();
103 if(!_is_activationlayer_enabled)
105 act_info_to_use =
info.act_info;
108 _dwc_optimized_func = std::make_unique<CpuDepthwiseConvolutionAssemblyDispatch>();
111 _permute_input = std::make_unique<cpu::CpuPermute>();
112 _permute_weights = std::make_unique<cpu::CpuPermute>();
113 _permute_output = std::make_unique<cpu::CpuPermute>();
115 auto input_perm = std::make_unique<TensorInfo>();
116 auto weights_perm = std::make_unique<TensorInfo>();
117 auto output_perm = std::make_unique<TensorInfo>();
124 _permute_weights->configure(weights, weights_perm.get(),
PermutationVector(2U, 0U, 1U));
128 output_perm->set_quantization_info(output->quantization_info());
131 _dwc_optimized_func->configure(input_perm.get(), weights_perm.get(), biases, output_perm.get(),
info);
135 _permute_output->configure(output_perm.get(), output,
PermutationVector(1U, 2U, 0U));
139 _dwc_optimized_func->configure(
input, weights, biases, output,
info);
143 if(_is_activationlayer_enabled)
145 _activationlayer_function = std::make_unique<cpu::CpuActivation>();
146 _activationlayer_function->configure(output,
nullptr,
info.act_info);
151 const ITensorInfo *weights,
152 const ITensorInfo *biases,
153 const ITensorInfo *output,
154 const ConvolutionInfo &
info)
156 return validate_arguments_optimized(
input, weights, biases, output,
info);
177 _permute_input->run(pack);
194 _dwc_optimized_func->run(pack);
209 _dwc_optimized_func->run(pack);
219 _permute_output->run(pack);
223 if(_is_activationlayer_enabled)
228 _activationlayer_function->run(pack);
232 void CpuDepthwiseConvolution::CpuDepthwiseConvolutionOptimizedInternal::prepare(ITensorPack &tensors)
248 _permute_weights->run(pack);
250 weights->mark_as_unused();
252 ITensorPack pack_opt;
258 _dwc_optimized_func->prepare(pack_opt);
262 ITensorPack pack_opt;
268 _dwc_optimized_func->prepare(pack_opt);
275 CpuDepthwiseConvolution::CpuDepthwiseConvolutionGeneric::CpuDepthwiseConvolutionGeneric()
276 : _depthwise_conv_kernel(nullptr), _permute_input(nullptr), _permute_weights(nullptr), _permute_output(nullptr), _activationlayer_function(nullptr), _is_nchw(true), _is_prepared(false),
277 _is_activationlayer_enabled(false)
281 void CpuDepthwiseConvolution::CpuDepthwiseConvolutionGeneric::configure(ITensorInfo *
input,
const ITensorInfo *weights,
const ITensorInfo *biases, ITensorInfo *output,
const ConvolutionInfo &
info)
288 _is_prepared = !_is_nchw;
290 ITensorInfo *input_to_use =
input;
291 const ITensorInfo *weights_to_use = weights;
292 ITensorInfo *output_to_use = output;
294 auto input_perm = std::make_unique<TensorInfo>();
295 auto weights_perm = std::make_unique<TensorInfo>();
296 auto output_perm = std::make_unique<TensorInfo>(output->clone()->set_is_resizable(
true).reset_padding().set_tensor_shape(TensorShape()));
300 _permute_input = std::make_unique<cpu::CpuPermute>();
301 _permute_weights = std::make_unique<cpu::CpuPermute>();
305 input_to_use = input_perm.get();
307 _permute_weights->configure(weights, weights_perm.get(),
PermutationVector(2U, 0U, 1U));
309 weights_to_use = weights_perm.get();
311 output_to_use = output_perm.get();
314 _depthwise_conv_kernel = std::make_unique<cpu::kernels::CpuDepthwiseConvolutionNativeKernel>();
315 _depthwise_conv_kernel->configure(input_to_use, weights_to_use, biases, output_to_use,
info);
319 _permute_output = std::make_unique<cpu::CpuPermute>();
320 _permute_output->configure(output_perm.get(), output,
PermutationVector(1U, 2U, 0U));
325 _is_activationlayer_enabled =
info.act_info.enabled();
326 if(_is_activationlayer_enabled)
328 _activationlayer_function = std::make_unique<cpu::CpuActivation>();
329 _activationlayer_function->configure(output,
nullptr,
info.act_info);
334 const ConvolutionInfo &
info)
339 TensorShape permuted_input_shape =
input->tensor_shape();
340 TensorShape permuted_weights_shape = weights->tensor_shape();
346 const TensorInfo permuted_input = TensorInfo(
input->clone()->set_is_resizable(
true).reset_padding().set_tensor_shape(permuted_input_shape).set_data_layout(
DataLayout::NHWC));
347 const TensorInfo permuted_weights = TensorInfo(weights->clone()->set_is_resizable(
true).reset_padding().set_tensor_shape(permuted_weights_shape).set_data_layout(
DataLayout::NHWC));
348 const TensorInfo permuted_output = TensorInfo(output->clone()->set_is_resizable(
true).reset_padding().set_tensor_shape(permuted_output_shape).set_data_layout(
DataLayout::NCHW));
362 if(
info.act_info.enabled())
387 _permute_input->run(pack);
389 ITensorPack pack_depth;
398 ITensorPack pack_depth;
412 _permute_output->run(pack);
415 if(_is_activationlayer_enabled)
420 _activationlayer_function->run(pack);
424 void CpuDepthwiseConvolution::CpuDepthwiseConvolutionGeneric::prepare(ITensorPack &tensors)
437 _permute_weights->run(pack);
438 weights->mark_as_unused();
451 switch(_depth_conv_func)
454 _func_optimized.configure(
input, weights, biases, output,
info);
457 _func_generic.configure(
input, weights, biases, output,
info);
467 switch(depth_conv_func)
495 switch(_depth_conv_func)
498 _func_optimized.run(tensors);
501 _func_generic.run(tensors);
510 switch(_depth_conv_func)
513 _func_optimized.prepare(tensors);
516 _func_generic.prepare(tensors);
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const ConvolutionInfo &info)
Static function to check if given info will lead to a valid configuration of CpuDepthwiseConvolutionA...
void prepare(ITensorPack &tensors) override
Prepare the function for executing.
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ConvolutionInfo &info)
Static function to check if given info will lead to a valid configuration of CpuDepthwiseConvolution.
DepthwiseConvolutionFunction
Available DepthwiseConvolutionFunction.
TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, const ConvolutionInfo &info)
Calculate the depthwise convolution output shape of a tensor.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual void schedule_op(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors)=0
Runs the kernel in the same thread as the caller synchronously.
1 channel, 1 F32 per channel
void configure(ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *output, const ConvolutionInfo &info)
Initialize the function's source, destination, weights and convolution information.
Strides PermutationVector
Permutation vector.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
SimpleTensor< float > src
Copyright (c) 2017-2021 Arm Limited.
static DepthwiseConvolutionFunction get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ConvolutionInfo &info)
Static function to choose the best depthwise convolution function for CpuDepthwiseConvolution.
1 channel, 1 F16 per channel
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
void permute(Dimensions< T > &dimensions, const PermutationVector &perm)
Permutes given Dimensions according to a permutation vector.
Optimized Depthwise Convolution.
bool is_data_type_quantized_per_channel(DataType dt)
Check if a given data type is of per channel type.
quantized, asymmetric fixed-point 8-bit number unsigned
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const PermutationVector &perm)
Static function to check if given info will lead to a valid configuration of CpuPermute.
CpuDepthwiseConvolution()
Default constructor.
Num samples, channels, height, width.
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
bool is_relu6(ActivationLayerInfo activation_info)
Checks if activation information correspond to a relu6 activation function.
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
Static function to check if given info will lead to a valid configuration of CpuActivation.
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ConvolutionInfo &info)
Static function to check if given info will lead to a valid configuration of CpuDepthwiseConvolutionN...
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Num samples, height, width, channels.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
bool is_relu(ActivationLayerInfo activation_info)
Checks if activation information correspond to a relu activation function.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
quantized, asymmetric fixed-point 8-bit number signed
size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
Get the index of the given dimension.
void run(ITensorPack &tensors) override
Run the kernels contained in the function.
Status validate(const ITensorInfo *scores_in, const ITensorInfo *boxes_in, const ITensorInfo *batch_splits_in, const ITensorInfo *scores_out, const ITensorInfo *boxes_out, const ITensorInfo *classes, const ITensorInfo *batch_splits_out, const ITensorInfo *keeps, const ITensorInfo *keeps_size, const BoxNMSLimitInfo info)
static IScheduler & get()
Access the scheduler singleton.