44 Status
validate_arguments(
const ITensorInfo *
input,
const ITensorInfo *weights,
const ITensorInfo *biases,
const ITensorInfo *output,
const DWCWeightsKernelInfo &dwc_weights_info,
45 const DWCKernelInfo &dwc_info,
const PadStrideInfo &
conv_info,
unsigned int depth_multiplier,
const Size2D &dilation,
46 const ITensorInfo *output_multipliers,
const ITensorInfo *output_shifts)
61 const ConvolutionInfo
info{
conv_info, depth_multiplier, ActivationLayerInfo(), dilation };
107 if(output->total_size() != 0)
115 const UniformQuantizationInfo iq_info =
input->quantization_info().uniform();
116 const UniformQuantizationInfo wq_info = weights->quantization_info().uniform();
117 const UniformQuantizationInfo oq_info = (output->total_size() != 0) ? output->quantization_info().uniform() : iq_info;
119 float multiplier = iq_info.scale * wq_info.scale / oq_info.scale;
120 int output_multiplier = 0;
121 int output_shift = 0;
134 _depth_multiplier(1),
135 _output_multipliers(nullptr),
136 _output_shifts(nullptr),
145 configure(
CLKernelLibrary::get().get_compile_context(),
input, weights, biases, output, dwc_weights_info, dwc_info,
conv_info, depth_multiplier, dilation, output_multipliers, output_shifts);
155 dwc_weights_info, dwc_info,
conv_info, depth_multiplier, dilation,
156 (output_multipliers !=
nullptr) ? output_multipliers->
info() :
nullptr, (output_shifts !=
nullptr) ? output_shifts->
info() :
nullptr));
168 _depth_multiplier = depth_multiplier;
169 _output_multipliers = output_multipliers;
170 _output_shifts = output_shifts;
194 std::string
kernel_name = (_is_quantized) ?
"dwc_MxN_native_quantized8_nhwc" :
"dwc_MxN_native_fp_nhwc";
209 int output_multiplier = 0;
210 int output_shift = 0;
215 if(dwc_info.activation_info.enabled())
221 const int o1 = oq_info.
offset;
227 const float s1 = iq_info.
scale;
242 ICLKernel::configure_internal(win);
270 ARM_COMPUTE_RETURN_ON_ERROR(
validate_arguments(
input, weights, biases, output, dwc_weights_info, dwc_info,
conv_info, depth_multiplier, dilation, output_multipliers, output_shifts));
284 if(_depth_multiplier != 1)
299 if(_biases !=
nullptr)
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
#define ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(tensor)
const Window & window() const
The maximum window the kernel can be executed on.
TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, const ConvolutionInfo &info)
Calculate the depthwise convolution output shape of a tensor.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(t,...)
std::unordered_map< const ITensorInfo *, PaddingSize > get_padding_info(std::initializer_list< const ITensorInfo * > infos)
Stores padding information before configuring a kernel.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint=CLKernelLibrary::get().default_ndrange(), bool use_dummy_work_items=false)
Add the kernel to the command queue with the given window.
unsigned int n0
Number of columns processed by each thread.
constexpr int step() const
Return the step of the dimension.
cl::NDRange lws_hint() const
Return the Local-Workgroup-Size hint.
CLDepthwiseConvolutionLayerNativeKernel()
Default Constructor.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
std::string to_string(T &&value)
Convert integer and float values to string.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
1 channel, 1 F32 per channel
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
const std::string & string_from_activation_func(ActivationLayerInfo::ActivationFunction act)
Translates a given activation function to a string.
size_t total_size_upper(size_t dimension) const
Collapses given dimension and above.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Describe one of the image's dimensions with a start, end and step.
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.
std::string lower_string(const std::string &val)
Lower a given string.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Activation Layer Information class.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(...)
void add_3D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx.
Copyright (c) 2017-2021 Arm Limited.
1 channel, 1 F16 per channel
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
1 channel, 1 S32 per channel
void run(const Window &window, cl::CommandQueue &queue) override
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue.
Window collapse(const Window &full_window, size_t first, size_t last=Coordinates::num_max_dimensions) const
Collapse the dimensions between first and last.
cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set< std::string > &build_opts=std::set< std::string >())
Creates an opencl kernel using a compile context.
const std::string & string_from_data_type(DataType dt)
Convert a data type identity into a string.
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
static constexpr unsigned int num_arguments_per_3D_tensor()
Returns the number of arguments enqueued per 3D tensor object.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
bool is_data_type_quantized_per_channel(DataType dt)
Check if a given data type is of per channel type.
std::string float_to_string_with_full_precision(float val)
Create a string with the float in full precision.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
std::pair< int32_t, int32_t > get_quantized_activation_min_max(ActivationLayerInfo act_info, DataType data_type, UniformQuantizationInfo oq_info)
Returns a pair of minimum and maximum values for a quantized activation.
quantized, asymmetric fixed-point 8-bit number unsigned
Class to describe a number of elements in each dimension.
UniformQuantizationInfo uniform() const
Return per layer quantization info.
std::string get_cl_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL type.
Descriptor used by the depthwise convolution kernels.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
void add_option_if(bool cond, std::string option)
Adds option if a given condition is true;.
Padding and stride information class.
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
static constexpr unsigned int num_arguments_per_4D_tensor()
Returns the number of arguments enqueued per 4D tensor object.
void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCWeightsKernelInfo &dwc_weights_info, const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier=1, const Size2D &dilation=Size2D(1U, 1U), const ICLTensor *output_multipliers=nullptr, const ICLTensor *output_shifts=nullptr)
Initialize the function's source, destination and parameters.
Descriptor used by the depthwise convolution kernels to retrieve the number of output elements proces...
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
bool has_padding_changed(const std::unordered_map< const ITensorInfo *, PaddingSize > &padding_map)
Check if the previously stored padding info has changed after configuring a kernel.
quantized, symmetric per channel fixed-point 8-bit number
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
Interface for OpenCL tensor.
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context.
Class for specifying the size of an image or rectangle.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Num samples, height, width, channels.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage)
Window first_slice_window_4D() const
First 4D slice of the window.
bool slide_window_slice_4D(Window &slice) const
Slide the passed 4D window slice.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
unsigned int adjust_vec_size(unsigned int vec_size, size_t dim0)
Returns the adjusted vector size in case it is less than the input's first dimension,...
quantized, asymmetric fixed-point 8-bit number signed
size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
Get the index of the given dimension.
void add_1D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx.
void add_4D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 4D tensor's parameters to the object's kernel's arguments starting from the index idx.
Describe a multidimensional execution window.
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const DWCWeightsKernelInfo &dwc_weights_info, const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier=1, const Size2D &dilation=Size2D(1U, 1U), const ITensorInfo *output_multipliers=nullptr, const ITensorInfo *output_shifts=nullptr)
Static function to check if given info will lead to a valid configuration of CLDepthwiseConvolutionLa...
constexpr const Dimension & x() const
Alias to access the first dimension of the window.