24.02.1
|
Go to the documentation of this file.
45 : _memory_group(std::move(memory_manager)),
47 _permute_input_to_nhwc(),
48 _permute_weights_to_nhwc(),
49 _permute_output_to_nchw(),
53 _output_multipliers(),
58 _needs_permute(false),
71 unsigned int depth_multiplier,
85 unsigned int depth_multiplier,
91 input->info(), weights->
info(), biases !=
nullptr ? biases->
info() :
nullptr,
97 _original_weights = weights;
105 const ICLTensor *weights_to_use = weights;
109 _memory_group.
manage(&_permuted_input);
110 _memory_group.
manage(&_permuted_output);
123 input_to_use = &_permuted_input;
124 weights_to_use = &_permuted_weights;
125 output_to_use = &_permuted_output;
128 CLTensor *output_multipliers_to_use =
nullptr;
129 CLTensor *output_shifts_to_use =
nullptr;
134 const size_t num_filters =
140 output_multipliers_to_use = &_output_multipliers;
141 output_shifts_to_use = &_output_shifts;
147 t->configure(input_to_use->
info(), weights_to_use->
info(),
conv_info, dilation, depth_multiplier);
151 _dwc_native_kernel->set_target(gpu_target);
152 _dwc_native_kernel->configure(compile_context, input_to_use, weights_to_use, biases, output_to_use,
153 dwc_native_compute_info, conv_kernel_info, output_multipliers_to_use,
154 output_shifts_to_use);
178 unsigned int depth_multiplier,
185 const bool in_place =
input == output || output ==
nullptr;
237 ->set_is_resizable(
true)
239 .set_tensor_shape(permuted_input_shape)
242 ->set_is_resizable(
true)
244 .set_tensor_shape(permuted_weights_shape)
247 ->set_is_resizable(
true)
249 .set_tensor_shape(permuted_output_shape)
258 t->configure(&permuted_input, &permuted_weights,
conv_info, dilation, depth_multiplier);
261 &permuted_input, &permuted_weights, biases, &permuted_output, dwc_native_compute_info, conv_kernel_info,
262 &output_multipliers_shifts_info, &output_multipliers_shifts_info));
272 input, weights, biases, output, dwc_native_compute_info, conv_kernel_info, &output_multipliers_shifts_info,
273 &output_multipliers_shifts_info));
286 _permute_input_to_nhwc.
run();
291 _permute_output_to_nchw.
run();
301 _output_multipliers.
map();
302 _output_shifts.
map();
304 _input->
info(), _original_weights->
info(), _output !=
nullptr ? _output->
info() : _input->
info(),
307 _output_multipliers.
unmap();
308 _output_shifts.
unmap();
316 _permute_weights_to_nhwc.
run();
@ NCHW
Num samples, channels, height, width.
@ QSYMM8_PER_CHANNEL
quantized, symmetric per channel fixed-point 8-bit number
~CLDepthwiseConvolutionLayer()
Default destructor.
size_t y() const
Semantic accessor for height as y.
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.
void unmap()
Enqueue an unmap operation of the allocated and mapped buffer.
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, const ConvolutionInfo &info)
Calculate the depthwise convolution output shape of a tensor.
void enqueue(ICLKernel &kernel, bool flush=true)
Schedule the execution of the passed kernel if possible.
@ NHWC
Num samples, height, width, channels.
void init(const TensorInfo &input, size_t alignment=0)
Initialize a tensor based on the passed TensorInfo.
void run() override
Run the kernels contained in the function.
static std::unique_ptr< IClDWCNativeKernelConfig > create(GPUTarget gpu)
Static method to call the ClDWCNative kernel configuration class accordingly with the GPU target.
Interface for OpenCL tensor.
Class for specifying the size of an image or rectangle.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm)
Static function to check if given info will lead to a valid configuration of CLPermute.
void map(bool blocking=true)
Enqueue a map operation of the allocated buffer.
void permute(Dimensions< T > &dimensions, const PermutationVector &perm)
Permutes given Dimensions according to a permutation vector.
Basic implementation of the OpenCL tensor interface.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
Activation Layer Information class.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Strides PermutationVector
Permutation vector.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info) override
Set the quantization settings (scale and offset) of the tensor.
void compute_quantized_multipliers_and_shifts(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, int32_t *output_multipliers_ptr, int32_t *output_shifts_ptr)
Compute quantized per-channel multipliers and shifts.
void prepare() override
Prepare the function for executing.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
void mark_as_unused() const
Marks a tensor as unused.
ITensorInfo & set_data_layout(const DataLayout &data_layout) override
Set the data layout of the tensor.
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
bool is_data_type_quantized_per_channel(DataType dt)
Check if a given data type is of per channel type.
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const DWCComputeKernelInfo &dwc_info, const ConvolutionInfo &conv_info, const ITensorInfo *output_multipliers=nullptr, const ITensorInfo *output_shifts=nullptr)
Static function to check if given info will lead to a valid configuration of CLDepthwiseConvolutionLa...
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(...)
size_t x() const
Semantic accessor for width as x.
void allocate() override
Allocate size specified by TensorInfo of OpenCL memory.
CLTensorAllocator * allocator()
Return a pointer to the tensor's allocator.
void configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm)
Set the input and output tensors.
static CLScheduler & get()
Access the scheduler singleton.
void run() override
Run the kernels contained in the function.
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier=1, ActivationLayerInfo act_info=ActivationLayerInfo(), const Size2D &dilation=Size2D(1U, 1U))
Static function to check if given info will lead to a valid configuration of CLDepthwiseConvolutionLa...
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
GPUTarget target() const
Get the target GPU.
GPUTarget
Available GPU Targets.
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
size_t get_data_layout_dimension_index(const DataLayout &data_layout, const DataLayoutDimension &data_layout_dimension)
Get the index of the given dimension.
Store the tensor's metadata.
bool is_used() const
Flags if the tensor is used or not.
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Memory group resources scope handling class.
Copyright (c) 2017-2024 Arm Limited.
uint8_t * ptr_to_element(const Coordinates &id) const
Return a pointer to the element at the passed coordinates.
Compute descriptor used by the depthwise convolution native kernel.
@ S32
signed 32-bit number
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
TensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier=1, ActivationLayerInfo act_info=ActivationLayerInfo(), const Size2D &dilation=Size2D(1U, 1U))
Initialize the function's source, destination, weights and convolution information.
Store the tensor's metadata.
virtual bool are_values_constant() const =0
Flag indicating whether the values of the tensor are constant, meaning that they can change on kernel...
ITensorInfo & set_tensor_shape(const TensorShape &shape) override
Set the shape of an already initialized tensor.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
#define ARM_COMPUTE_LOG_PARAMS(...)
CLDepthwiseConvolutionLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
Interface for the kernel to run a MxN depthwise convolution.