39 Status validate_arguments_optimized(
const ITensorInfo *
input,
const ITensorInfo *weights,
const ITensorInfo *biases,
const ITensorInfo *output,
const PadStrideInfo &
conv_info,
40 unsigned int depth_multiplier,
const ActivationLayerInfo &act_info,
const Size2D &dilation)
52 ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) + (weights->dimension(idx_w) - 1) * (dilation.x() - 1) > input->dimension(idx_w) + conv_info.pad_left() + conv_info.pad_right());
53 ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_h) + (weights->dimension(idx_h) - 1) * (dilation.y() - 1) > input->dimension(idx_h) + conv_info.pad_top() + conv_info.pad_bottom());
65 if(act_info.enabled())
75 NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::NEDepthwiseConvolutionLayerOptimizedInternal(std::shared_ptr<IMemoryManager> memory_manager)
76 : _memory_group(memory_manager), _dwc_optimized_func(memory_manager), _permute_input(), _permute_weights(), _permute_output(), _activationlayer_function(), _accumulator(), _permuted_input(),
77 _permuted_weights(), _permuted_output(), _original_weights(nullptr), _has_bias(false), _is_quantized(false), _is_nchw(true), _permute(false), _is_activationlayer_enabled(false), _is_prepared(false)
81 void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::configure(ITensor *input,
82 const ITensor *weights,
83 const ITensor *biases,
84 ITensor *output,
const PadStrideInfo &conv_info,
85 unsigned int depth_multiplier,
86 const ActivationLayerInfo &act_info,
87 const Size2D &dilation)
92 output->info(),
conv_info, depth_multiplier, act_info, dilation));
94 _original_weights = weights;
96 _has_bias = biases !=
nullptr;
100 _is_activationlayer_enabled = act_info.enabled();
103 ActivationLayerInfo act_info_to_use = ActivationLayerInfo();
106 _is_activationlayer_enabled = act_info.enabled() && !(is_relu ||
is_relu6);
107 if(!_is_activationlayer_enabled)
109 act_info_to_use = act_info;
114 _memory_group.manage(&_permuted_input);
115 _memory_group.manage(&_permuted_output);
126 _permuted_output.info()->set_quantization_info(output->info()->quantization_info());
129 _dwc_optimized_func.configure(&_permuted_input, &_permuted_weights, biases, &_permuted_output, conv_info, depth_multiplier, act_info_to_use, dilation);
136 _permuted_input.allocator()->allocate();
137 _permuted_output.allocator()->allocate();
141 _dwc_optimized_func.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info_to_use, dilation);
145 if(_is_activationlayer_enabled)
147 _activationlayer_function.configure(output,
nullptr, act_info);
152 const ITensorInfo *weights,
153 const ITensorInfo *biases,
154 const ITensorInfo *output,
155 const PadStrideInfo &conv_info,
156 unsigned int depth_multiplier,
157 const ActivationLayerInfo &act_info,
158 const Size2D &dilation)
160 return validate_arguments_optimized(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
167 MemoryGroupResourceScope scope_mg(_memory_group);
172 _permute_input.run();
176 _dwc_optimized_func.run();
181 _permute_output.run();
185 if(_is_activationlayer_enabled)
187 _activationlayer_function.run();
191 void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::prepare()
198 _permuted_weights.allocator()->allocate();
199 _permute_weights.run();
200 _original_weights->mark_as_unused();
204 _dwc_optimized_func.prepare();
205 if(!_permuted_weights.is_used())
207 _permuted_weights.allocator()->free();
214 NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::NEDepthwiseConvolutionLayerGeneric()
215 : _depthwise_conv_kernel(), _permute_input(), _permute_weights(), _permute_output(), _activationlayer_function(), _permuted_input(), _permuted_weights(), _permuted_output(), _is_prepared(false),
216 _is_nchw(false), _is_activationlayer_enabled(false), _original_weights(nullptr)
220 void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::configure(ITensor *input,
const ITensor *weights,
const ITensor *biases, ITensor *output,
const PadStrideInfo &conv_info,
221 unsigned int depth_multiplier,
const ActivationLayerInfo &act_info,
const Size2D &dilation)
225 output->info(),
conv_info, depth_multiplier, act_info, dilation));
228 _is_prepared = !_is_nchw;
230 ITensor *input_to_use =
input;
231 const ITensor *weights_to_use = weights;
232 ITensor *output_to_use = output;
237 input_to_use = &_permuted_input;
241 weights_to_use = &_permuted_weights;
243 _permuted_output.allocator()->init(output->info()->clone()->set_is_resizable(
true).reset_padding().set_tensor_shape(TensorShape()));
244 output_to_use = &_permuted_output;
246 _original_weights = weights_to_use;
248 _depthwise_conv_kernel = std::make_unique<NEDepthwiseConvolutionLayerNativeKernel>();
249 _depthwise_conv_kernel->configure(input_to_use, weights_to_use, biases, output_to_use, conv_info, depth_multiplier, dilation);
256 _permuted_input.allocator()->allocate();
257 _permuted_weights.allocator()->allocate();
258 _permuted_output.allocator()->allocate();
262 _is_activationlayer_enabled = act_info.enabled();
263 if(_is_activationlayer_enabled)
265 _activationlayer_function.configure(output,
nullptr, act_info);
270 const PadStrideInfo &conv_info,
271 unsigned int depth_multiplier,
const ActivationLayerInfo &act_info,
const Size2D &dilation)
276 TensorShape permuted_input_shape = input->tensor_shape();
277 TensorShape permuted_weights_shape = weights->tensor_shape();
283 const TensorInfo permuted_input = TensorInfo(input->clone()->set_is_resizable(
true).reset_padding().set_tensor_shape(permuted_input_shape).set_data_layout(
DataLayout::NHWC));
284 const TensorInfo permuted_weights = TensorInfo(weights->clone()->set_is_resizable(
true).reset_padding().set_tensor_shape(permuted_weights_shape).set_data_layout(
DataLayout::NHWC));
285 const TensorInfo permuted_output = TensorInfo(output->clone()->set_is_resizable(
true).reset_padding().set_tensor_shape(permuted_output_shape).set_data_layout(
DataLayout::NCHW));
299 if(act_info.enabled())
312 _permute_input.run();
319 _permute_output.run();
322 if(_is_activationlayer_enabled)
324 _activationlayer_function.run();
328 void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::prepare()
334 _permute_weights.run();
335 _original_weights->mark_as_unused();
348 _depth_conv_func = get_depthwiseconvolution_function(input->
info(), weights->
info(), (biases !=
nullptr) ? biases->
info() :
nullptr, output->
info(),
conv_info, depth_multiplier, act_info, dilation);
349 switch(_depth_conv_func)
352 _func_optimized.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
355 _func_generic.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
365 DepthwiseConvolutionFunction depth_conv_func = get_depthwiseconvolution_function(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
366 switch(depth_conv_func)
395 switch(_depth_conv_func)
398 _func_optimized.run();
410 switch(_depth_conv_func)
413 _func_optimized.prepare();
416 _func_generic.prepare();
DepthwiseConvolutionFunction
Available DepthwiseConvolutionFunction.
TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, PadStrideInfo conv_info, unsigned int depth_multiplier, const Size2D &dilation=Size2D(1U, 1U))
Calculate the depthwise convolution output shape of a tensor.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
[NEActivationLayer snippet]
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier=1, const ActivationLayerInfo &act_info=ActivationLayerInfo(), const Size2D &dilation=Size2D(1, 1))
Static function to check if given info will lead to a valid configuration of NEDepthwiseConvolutionAs...
1 channel, 1 F32 per channel
Strides PermutationVector
Permutation vector.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Store the tensor's metadata.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm)
Static function to check if given info will lead to a valid configuration of NEPermute.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Activation Layer Information class.
Interface for Neon tensor.
Copyright (c) 2017-2021 Arm Limited.
1 channel, 1 F16 per channel
void prepare() override
Prepare the function for executing.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
void permute(Dimensions< T > &dimensions, const PermutationVector &perm)
Permutes given Dimensions according to a permutation vector.
Optimized Depthwise Convolution.
~NEDepthwiseConvolutionLayer()
Default destructor.
bool is_data_type_quantized_per_channel(DataType dt)
Check if a given data type is of per channel type.
virtual void prepare()
Prepare the function for executing.
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier=1, const ActivationLayerInfo &act_info=ActivationLayerInfo(), const Size2D &dilation=Size2D(1U, 1U))
Static function to check if given info will lead to a valid configuration of NEDepthwiseConvolutionLa...
quantized, asymmetric fixed-point 8-bit number unsigned
NEDepthwiseConvolutionLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
Padding and stride information class.
Num samples, channels, height, width.
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
bool is_relu6(ActivationLayerInfo activation_info)
Checks if activation information correspond to a relu6 activation function.
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
virtual void schedule(ICPPKernel *kernel, const Hints &hints)=0
Runs the kernel in the same thread as the caller synchronously.
Class for specifying the size of an image or rectangle.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Num samples, height, width, channels.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
bool is_relu(ActivationLayerInfo activation_info)
Checks if activation information correspond to a relu activation function.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
quantized, asymmetric fixed-point 8-bit number signed
size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
Get the index of the given dimension.
void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier=1, const ActivationLayerInfo &act_info=ActivationLayerInfo(), const Size2D &dilation=Size2D(1U, 1U))
Initialize the function's source, destination, weights and convolution information.
void run() override
Run the kernels contained in the function.
Status validate(const ITensorInfo *scores_in, const ITensorInfo *boxes_in, const ITensorInfo *batch_splits_in, const ITensorInfo *scores_out, const ITensorInfo *boxes_out, const ITensorInfo *classes, const ITensorInfo *batch_splits_out, const ITensorInfo *keeps, const ITensorInfo *keeps_size, const BoxNMSLimitInfo info)
static IScheduler & get()
Access the scheduler singleton.
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier=1, const Size2D &dilation=Size2D(1U, 1U))
Static function to check if given info will lead to a valid configuration of NEDepthwiseConvolutionLa...