45 : _memory_group(
std::move(memory_manager)),
47 _permute_input_to_nhwc(),
48 _permute_weights_to_nhwc(),
49 _permute_output_to_nchw(),
53 _output_multipliers(),
58 _needs_permute(false),
79 biases !=
nullptr ? biases->
info() :
nullptr,
80 output !=
nullptr ? output->
info() : input->
info(),
89 _original_weights = weights;
97 const ICLTensor *weights_to_use = weights;
101 _memory_group.
manage(&_permuted_input);
102 _memory_group.
manage(&_permuted_output);
115 input_to_use = &_permuted_input;
116 weights_to_use = &_permuted_weights;
117 output_to_use = &_permuted_output;
120 CLTensor *output_multipliers_to_use =
nullptr;
121 CLTensor *output_shifts_to_use =
nullptr;
130 output_multipliers_to_use = &_output_multipliers;
131 output_shifts_to_use = &_output_shifts;
140 _dwc_native_kernel->set_target(gpu_target);
141 _dwc_native_kernel->configure(compile_context, input_to_use, weights_to_use, biases, output_to_use,
142 dwc_native_compute_info, conv_kernel_info, output_multipliers_to_use, output_shifts_to_use);
168 const bool in_place = input == output || output ==
nullptr;
216 const TensorInfo permuted_input = input->
clone()->set_is_resizable(
true).reset_padding().set_tensor_shape(permuted_input_shape).set_data_layout(
DataLayout::NHWC);
217 const TensorInfo permuted_weights = weights->
clone()->set_is_resizable(
true).reset_padding().set_tensor_shape(permuted_weights_shape).set_data_layout(
DataLayout::NHWC);
218 const TensorInfo permuted_output = output->
clone()->set_is_resizable(
true).reset_padding().set_tensor_shape(permuted_output_shape).set_data_layout(
DataLayout::NHWC);
225 const DWCComputeKernelInfo dwc_native_compute_info =
t->configure(&permuted_input, &permuted_weights, conv_info, dilation, depth_multiplier);
228 dwc_native_compute_info, conv_kernel_info, &output_multipliers_shifts_info, &output_multipliers_shifts_info));
235 const DWCComputeKernelInfo dwc_native_compute_info =
t->configure(input, weights, conv_info, dilation, depth_multiplier);
237 &output_multipliers_shifts_info));
250 _permute_input_to_nhwc.
run();
255 _permute_output_to_nchw.
run();
265 _output_multipliers.
map();
266 _output_shifts.
map();
268 _original_weights->
info(),
269 _output !=
nullptr ? _output->
info() : _input->
info(),
272 _output_multipliers.
unmap();
273 _output_shifts.
unmap();
281 _permute_weights_to_nhwc.
run();
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
uint8_t * ptr_to_element(const Coordinates &id) const
Return a pointer to the element at the passed coordinates.
TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, const ConvolutionInfo &info)
Calculate the depthwise convolution output shape of a tensor.
TensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
void prepare() override
Prepare the function for executing.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(...)
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
static CLScheduler & get()
Access the scheduler singleton.
GPUTarget target() const
Get the target GPU.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
bool is_used() const
Flags if the tensor is used or not.
Strides PermutationVector
Permutation vector.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor's metadata.
CLTensorAllocator * allocator()
Return a pointer to the tensor's allocator.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
size_t x() const
Semantic accessor for width as x.
unsigned int pad_top() const
Get the top padding.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Activation Layer Information class.
void init(const TensorInfo &input, size_t alignment=0)
Initialize a tensor based on the passed TensorInfo.
Copyright (c) 2017-2023 Arm Limited.
void run() override
Run the kernels contained in the function.
void map(bool blocking=true)
Enqueue a map operation of the allocated buffer.
ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info) override
Set the quantization settings (scale and offset) of the tensor.
void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier=1, ActivationLayerInfo act_info=ActivationLayerInfo(), const Size2D &dilation=Size2D(1U, 1U))
Initialize the function's source, destination, weights and convolution information.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
void permute(Dimensions< T > &dimensions, const PermutationVector &perm)
Permutes given Dimensions according to a permutation vector.
void mark_as_unused() const
Marks a tensor as unused.
1 channel, 1 S32 per channel
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
ITensorInfo & set_data_layout(const DataLayout &data_layout) override
Set the data layout of the tensor.
bool is_data_type_quantized_per_channel(DataType dt)
Check if a given data type is of per channel type.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
virtual bool are_values_constant() const =0
Flag indicating whether the values of the tensor are constant, meaning that they can change on kernel...
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
unsigned int pad_right() const
Get the right padding.
Padding and stride information class.
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
CLDepthwiseConvolutionLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
void enqueue(ICLKernel &kernel, bool flush=true)
Schedule the execution of the passed kernel if possible.
Num samples, channels, height, width.
size_t y() const
Semantic accessor for height as y.
Compute descriptor used by the depthwise convolution native kernel.
quantized, symmetric per channel fixed-point 8-bit number
void allocate() override
Allocate size specified by TensorInfo of OpenCL memory.
void compute_quantized_multipliers_and_shifts(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, int32_t *output_multipliers_ptr, int32_t *output_shifts_ptr)
Compute quantized per-channel multipliers and shifts.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
~CLDepthwiseConvolutionLayer()
Default destructor.
Memory group resources scope handling class.
Interface for OpenCL tensor.
GPUTarget
Available GPU Targets.
size_t get_data_layout_dimension_index(const DataLayout &data_layout, const DataLayoutDimension &data_layout_dimension)
Get the index of the given dimension.
Class for specifying the size of an image or rectangle.
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Num samples, height, width, channels.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const DWCComputeKernelInfo &dwc_info, const ConvolutionInfo &conv_info, const ITensorInfo *output_multipliers=nullptr, const ITensorInfo *output_shifts=nullptr)
Static function to check if given info will lead to a valid configuration of CLDepthwiseConvolutionLa...
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
#define ARM_COMPUTE_LOG_PARAMS(...)
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Store the tensor's metadata.
void configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm)
Set the input and output tensors.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm)
Static function to check if given info will lead to a valid configuration of CLPermute.
unsigned int pad_bottom() const
Get the bottom padding.
static std::unique_ptr< IClDWCNativeKernelConfig > create(GPUTarget gpu)
Static method to call the ClDWCNative kernel configuration class accordingly with the GPU target...
unsigned int pad_left() const
Get the left padding.
void unmap()
Enqueue an unmap operation of the allocated and mapped buffer.
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier=1, ActivationLayerInfo act_info=ActivationLayerInfo(), const Size2D &dilation=Size2D(1U, 1U))
Static function to check if given info will lead to a valid configuration of CLDepthwiseConvolutionLa...
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.
Interface for the kernel to run a MxN depthwise convolution.
Basic implementation of the OpenCL tensor interface.