39 Status validate_arguments_optimized(
const ITensorInfo *
src,
const ITensorInfo *weights,
const ITensorInfo *biases,
const ITensorInfo *
dst,
const ConvolutionInfo &
info)
51 ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) + (weights->dimension(idx_w) - 1) * (info.dilation.x() - 1) > src->dimension(idx_w) + info.pad_stride_info.pad_left() +
52 info.pad_stride_info.pad_right());
53 ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_h) + (weights->dimension(idx_h) - 1) * (info.dilation.y() - 1) > src->dimension(idx_h) + info.pad_stride_info.pad_top() +
54 info.pad_stride_info.pad_bottom());
75 const ITensorInfo *weights,
76 const ITensorInfo *biases,
78 const ConvolutionInfo &info)
86 _has_bias = biases !=
nullptr;
94 _dwc_optimized_func = std::make_unique<CpuDepthwiseConv2dAssemblyDispatch>();
97 _permute_input = std::make_unique<cpu::CpuPermute>();
98 _permute_weights = std::make_unique<cpu::CpuPermute>();
99 _permute_output = std::make_unique<cpu::CpuPermute>();
101 auto input_perm = std::make_unique<TensorInfo>();
102 auto weights_perm = std::make_unique<TensorInfo>();
103 auto output_perm = std::make_unique<TensorInfo>();
110 _permute_weights->configure(weights, weights_perm.get(),
PermutationVector(2U, 0U, 1U));
114 output_perm->set_quantization_info(dst->quantization_info());
117 _dwc_optimized_func->configure(input_perm.get(), weights_perm.get(), biases, output_perm.get(),
info);
125 _dwc_optimized_func->configure(src, weights, biases, dst, info);
129 if(_is_activationlayer_enabled)
131 _activationlayer_function = std::make_unique<cpu::CpuActivation>();
132 _activationlayer_function->configure(dst,
nullptr, info.act_info);
137 const ITensorInfo *weights,
138 const ITensorInfo *biases,
139 const ITensorInfo *dst,
140 const ConvolutionInfo &info)
142 return validate_arguments_optimized(src, weights, biases, dst, info);
163 _permute_input->run(pack);
180 _dwc_optimized_func->run(pack);
195 _dwc_optimized_func->run(pack);
205 _permute_output->run(pack);
209 if(_is_activationlayer_enabled)
214 _activationlayer_function->run(pack);
218 void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::prepare(ITensorPack &tensors)
234 _permute_weights->run(pack);
236 weights->mark_as_unused();
238 ITensorPack pack_opt;
244 _dwc_optimized_func->prepare(pack_opt);
248 ITensorPack pack_opt;
254 _dwc_optimized_func->prepare(pack_opt);
268 _is_prepared = !_is_nchw;
270 ITensorInfo *input_to_use =
src;
271 const ITensorInfo *weights_to_use = weights;
272 ITensorInfo *output_to_use =
dst;
274 auto input_perm = std::make_unique<TensorInfo>();
275 auto weights_perm = std::make_unique<TensorInfo>();
276 auto output_perm = std::make_unique<TensorInfo>(dst->clone()->set_is_resizable(
true).reset_padding().set_tensor_shape(TensorShape()));
280 _permute_input = std::make_unique<cpu::CpuPermute>();
281 _permute_weights = std::make_unique<cpu::CpuPermute>();
285 input_to_use = input_perm.get();
287 _permute_weights->configure(weights, weights_perm.get(),
PermutationVector(2U, 0U, 1U));
289 weights_to_use = weights_perm.get();
291 output_to_use = output_perm.get();
294 _depthwise_conv_kernel = std::make_unique<cpu::kernels::CpuDepthwiseConv2dNativeKernel>();
295 _depthwise_conv_kernel->configure(input_to_use, weights_to_use, biases, output_to_use, info);
299 _permute_output = std::make_unique<cpu::CpuPermute>();
305 _is_activationlayer_enabled = info.act_info.enabled();
306 if(_is_activationlayer_enabled)
308 _activationlayer_function = std::make_unique<cpu::CpuActivation>();
309 _activationlayer_function->configure(dst,
nullptr, info.act_info);
314 const ConvolutionInfo &info)
319 TensorShape permuted_input_shape = src->tensor_shape();
320 TensorShape permuted_weights_shape = weights->tensor_shape();
326 const TensorInfo permuted_input = TensorInfo(src->clone()->set_is_resizable(
true).reset_padding().set_tensor_shape(permuted_input_shape).set_data_layout(
DataLayout::NHWC));
327 const TensorInfo permuted_weights = TensorInfo(weights->clone()->set_is_resizable(
true).reset_padding().set_tensor_shape(permuted_weights_shape).set_data_layout(
DataLayout::NHWC));
328 const TensorInfo permuted_output = TensorInfo(dst->clone()->set_is_resizable(
true).reset_padding().set_tensor_shape(permuted_output_shape).set_data_layout(
DataLayout::NCHW));
367 _permute_input->run(pack);
369 ITensorPack pack_depth;
378 ITensorPack pack_depth;
392 _permute_output->run(pack);
395 if(_is_activationlayer_enabled)
400 _activationlayer_function->run(pack);
404 void CpuDepthwiseConv2d::CpuDepthwiseConv2dGeneric::prepare(ITensorPack &tensors)
417 _permute_weights->run(pack);
418 weights->mark_as_unused();
426 switch(_depth_conv_func)
429 _func_optimized.configure(src, weights, biases, dst, info);
432 _func_generic.configure(src, weights, biases, dst, info);
442 switch(depth_conv_func)
470 switch(_depth_conv_func)
473 _func_optimized.run(tensors);
476 _func_generic.run(tensors);
485 switch(_depth_conv_func)
488 _func_optimized.prepare(tensors);
491 _func_generic.prepare(tensors);
static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ConvolutionInfo &info)
Static function to check if given info will lead to a valid configuration.
DepthwiseConvolutionFunction
Available DepthwiseConvolutionFunction.
TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, const ConvolutionInfo &info)
Calculate the depthwise convolution output shape of a tensor.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
static DepthwiseConvolutionFunction get_depthwiseconvolution_function(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ConvolutionInfo &info)
Static function to choose the best depthwise convolution function for CpuDepthwiseConv2d.
void prepare(ITensorPack &tensors) override
Prepare the function for executing.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual void schedule_op(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors)=0
Runs the kernel in the same thread as the caller synchronously.
1 channel, 1 F32 per channel
Strides PermutationVector
Permutation vector.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
void run(ITensorPack &tensors) override
Run the kernels contained in the function.
SimpleTensor< float > src
Copyright (c) 2017-2021 Arm Limited.
1 channel, 1 F16 per channel
void configure(ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const ConvolutionInfo &info)
Initialize the function's source, destination, weights and convolution information.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
void permute(Dimensions< T > &dimensions, const PermutationVector &perm)
Permutes given Dimensions according to a permutation vector.
Optimized Depthwise Convolution.
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
static bool is_activation_supported(const ActivationLayerInfo &activation)
Checks if activation is supported by the assembly kernels.
bool is_data_type_quantized_per_channel(DataType dt)
Check if a given data type is of per channel type.
quantized, asymmetric fixed-point 8-bit number unsigned
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const PermutationVector &perm)
Static function to check if given info will lead to a valid configuration.
Num samples, channels, height, width.
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
Static function to check if given info will lead to a valid configuration.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Num samples, height, width, channels.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ConvolutionInfo &info)
Static function to check if given info will lead to a valid configuration.
quantized, asymmetric fixed-point 8-bit number signed
im2col_func configure(src_target.info(), dst_target.info(), spatial_kernel, conv_info, has_bias)
size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
Get the index of the given dimension.
static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *dst, const ConvolutionInfo &info)
Static function to check if given info will lead to a valid configuration.
void add_tensor(int id, ITensor *tensor)
Add tensor to the pack.
Status validate(const ITensorInfo *scores_in, const ITensorInfo *boxes_in, const ITensorInfo *batch_splits_in, const ITensorInfo *scores_out, const ITensorInfo *boxes_out, const ITensorInfo *classes, const ITensorInfo *batch_splits_out, const ITensorInfo *keeps, const ITensorInfo *keeps_size, const BoxNMSLimitInfo info)
static IScheduler & get()
Access the scheduler singleton.