50 using namespace experimental;
51 using namespace misc::shape_calculator;
52 using namespace utils::cast;
55 ClGemmConv2d::ClGemmConv2d()
56 : _weights_reshape_kernel(nullptr), _im2col_kernel(nullptr), _mm_gemm(nullptr), _mm_gemmlowp(nullptr), _col2im_kernel(nullptr), _activation_kernel(nullptr), _im2col_output(), _weights_reshaped(),
57 _gemm_output(), _skip_im2col(false), _skip_col2im(false), _is_quantized(false), _fuse_activation(true), _append_bias(false), _is_prepared(false), _use_post_ops(false), _aux_mem(AuxTensorIdx::Count)
67 ARM_COMPUTE_ERROR_THROW_ON(validate_mm(src, weights, biases, dst, gemmlowp_output_stage, gemm_3d_depth, _skip_im2col, act_info));
75 gemmlowp_output_stage,
92 tmp_src.set_quantization_info(
QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
95 _mm_gemmlowp = std::make_unique<ClGemmLowpMatrixMultiplyCore>();
96 _mm_gemmlowp->configure(compile_context, &tmp_src, weights, biases, dst, gemm_info);
101 auto mm_mem_req = _mm_gemmlowp->workspace();
102 for(
unsigned int cont = 0; cont < mm_mem_req.size(); ++cont)
104 _aux_mem[cont] = mm_mem_req[cont];
110 _mm_gemm = std::make_unique<ClGemm>();
111 _mm_gemm->configure(compile_context, &tmp_src, weights, biases, dst, 1.0f, 1.0f, gemm_info);
112 auto mm_mem_req = _mm_gemm->workspace();
113 for(
unsigned int cont = 0; cont < mm_mem_req.size(); ++cont)
115 _aux_mem[cont] = mm_mem_req[cont];
131 gemmlowp_output_stage,
147 std::unique_ptr<ITensorInfo> src_qa = src->
clone();
148 std::unique_ptr<ITensorInfo> weights_qa = weights->
clone();
149 src_qa->set_quantization_info(
QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
178 const unsigned int kernel_width = weights->
dimension(idx_width);
179 const unsigned int kernel_height = weights->
dimension(idx_height);
180 const unsigned int num_kernels = weights->
dimension(idx_kernels);
191 _fuse_activation =
true;
192 _use_post_ops = conv2d_info.
post_ops.size() > 0;
198 unsigned int stride_x = 0;
199 unsigned int stride_y = 0;
203 unsigned int conv_w = 0;
204 unsigned int conv_h = 0;
212 unsigned int mat_weights_cols = num_kernels / conv2d_info.
num_groups;
215 _append_bias =
false;
217 _weights_reshape_kernel = std::make_unique<kernels::ClWeightsReshapeKernel>();
218 if(conv2d_info.
num_groups != 1 && biases !=
nullptr)
222 biases_to_use =
nullptr;
224 _weights_reshape_kernel->configure(compile_context, weights, biases, &_weights_reshaped, conv2d_info.
num_groups);
228 _weights_reshape_kernel->configure(compile_context, weights,
nullptr, &_weights_reshaped, conv2d_info.
num_groups);
235 _im2col_kernel = std::make_unique<opencl::kernels::ClIm2ColKernel>();
239 _im2col_kernel->configure(compile_context, src, &_im2col_output,
Size2D(kernel_width, kernel_height), conv2d_info.
conv_info, _append_bias, conv2d_info.
dilation, conv2d_info.
num_groups);
246 gemm_input_to_use = &_im2col_output;
256 shape_gemm.
set(0, mat_weights_cols);
257 shape_gemm.
set(1, conv_w * conv_h);
259 _gemm_output =
TensorInfo(shape_gemm, 1, data_type);
263 gemm_output_to_use = &_gemm_output;
273 const auto output_quant_info = (dst->
total_size() == 0) ? iq_info : oq_info;
275 const unsigned int num_filters = (is_quantized_per_channel) ? num_kernels : 1;
291 auto min_activation = min_val.get<int32_t>();
292 auto max_activation = max_val.get<int32_t>();
307 _fuse_activation =
false;
319 const unsigned int gemm_3d_depth = (data_layout ==
DataLayout::NHWC) ? conv_h : 0;
321 configure_mm(compile_context, gemm_input_to_use, &_weights_reshaped, biases_to_use, gemm_output_to_use, gemmlowp_output_stage, gemm_3d_depth, conv2d_info.
act_info, conv2d_info.
post_ops);
327 _col2im_kernel = std::make_unique<opencl::kernels::ClCol2ImKernel>();
330 _col2im_kernel->configure(compile_context, gemm_output_to_use, dst,
Size2D(conv_w, conv_h), conv2d_info.
num_groups);
335 "Output shape does not match the expected one");
338 if(!_fuse_activation && !_use_post_ops)
340 _activation_kernel = std::make_unique<opencl::kernels::ClActivationKernel>();
341 _activation_kernel->configure(compile_context, dst,
nullptr, conv2d_info.
act_info);
357 if(!is_quantized_per_channel)
373 const unsigned int kernel_width = weights->
dimension(idx_width);
374 const unsigned int kernel_height = weights->
dimension(idx_height);
375 const unsigned int num_kernels = weights->
dimension(idx_kernels);
387 bool fuse_activation =
true;
388 bool use_post_ops = conv2d_info.
post_ops.size() > 0;
394 "ClGemmConv2d does not support post ops with col2im or im2col operation");
397 if(biases !=
nullptr)
417 unsigned int conv_w = 0;
418 unsigned int conv_h = 0;
427 unsigned int mat_weights_cols = num_kernels / conv2d_info.
num_groups;
430 bool append_bias =
false;
432 if(conv2d_info.
num_groups != 1 && biases !=
nullptr)
436 biases_to_use =
nullptr;
445 weights_to_use = &weights_reshaped_info;
449 const Size2D kernel_dims(kernel_width, kernel_height);
457 gemm_input_to_use = &im2col_reshaped_info;
466 shape_gemm.
set(0, mat_weights_cols);
467 shape_gemm.
set(1, conv_w * conv_h);
469 info_gemm =
TensorInfo(shape_gemm, 1, data_type);
471 gemm_output_to_use = &info_gemm;
483 const auto output_quant_info = (dst->
total_size() == 0) ? iq_info : oq_info;
484 const unsigned int num_filters = (is_quantized_per_channel) ? num_kernels : 1;
494 int min_activation = 0;
495 int max_activation = 0;
510 fuse_activation =
false;
521 const unsigned int gemm_3d_depth = (data_layout ==
DataLayout::NHWC) ? conv_h : 0;
523 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemm_input_to_use, weights_to_use, biases_to_use, gemm_output_to_use, gemmlowp_output_stage, gemm_3d_depth, skip_im2col, conv2d_info.
act_info,
534 if(!fuse_activation && !use_post_ops)
549 auto gemm_input_to_use =
src;
550 auto gemm_output_to_use =
dst;
565 gemm_input_to_use = im2col_output.
get();
569 gemm_output_to_use = gemm_output.
get();
583 _mm_gemmlowp->run(pack_mm);
588 _mm_gemm->run(pack_mm);
604 if(!_fuse_activation && !_use_post_ops)
638 _is_quantized ? _mm_gemmlowp->prepare(tensors) : _mm_gemm->prepare(tensors);
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
Class describing the value of a pixel for any image format.
int32_t gemmlowp_multiplier
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
Quantize using a fixed point multiplication.
experimental::PostOpList< ITensorInfo * > post_ops
void prepare(ITensorPack &constants) override
Prepare the function for executing.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(...)
bool enabled() const
Check if initialised.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
void add_const_tensor(int id, const ITensor *tensor)
Add const tensor to the pack.
static CLScheduler & get()
Access the scheduler singleton.
ActivationLayerInfo act_info
float a() const
Get the alpha value.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
bool are_reshaped() const
Flag which specifies if the weights tensor has been reshaped.
1 channel, 1 F32 per channel
void configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, const Conv2dInfo &conv2d_info, const WeightsInfo &weights_info=WeightsInfo())
Set the input and output tensors.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
~ClGemmConv2d()
Default destructor.
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
int32_t gemmlowp_offset
GEMMLowp output stage offset used for quantizing to QASYMM8.
int32_t gemmlowp_max_bound
GEMMLowp max value used to saturate down the output result before converting back to QASYMM8...
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Activation Layer Information class.
GEMMLowpOutputStageType type
GEMMLowp output stage type.
SimpleTensor< float > src
Copyright (c) 2017-2022 Arm Limited.
std::vector< MemoryInfo > MemoryRequirements
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const Conv2dInfo &conv2d_info, const WeightsInfo &weights_info=WeightsInfo())
Static function to check if given info will lead to a valid configuration.
1 channel, 1 F16 per channel
std::pair< unsigned int, unsigned int > scaled_dimensions(int width, int height, int kernel_width, int kernel_height, const PadStrideInfo &pad_stride_info, const Size2D &dilation=Size2D(1U, 1U))
Returns expected width and height of output scaled tensor depending on dimensions rounding mode...
ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info) override
Set the quantization settings (scale and offset) of the tensor.
bool is_quantized_per_channel
GEMMLowp quantized per-channel flag.
Convolution Layer Weights Information class.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
std::vector< int32_t > gemmlowp_shifts
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
1 channel, 1 S32 per channel
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration.
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
Quantization information.
bool is_data_type_quantized_per_channel(DataType dt)
Check if a given data type is of per channel type.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
std::pair< int32_t, int32_t > get_quantized_activation_min_max(ActivationLayerInfo act_info, DataType data_type, UniformQuantizationInfo oq_info)
Returns a pair of minimum and maximum values for a quantized activation.
static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const Size2D &convolved_dims, unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration.
quantized, asymmetric fixed-point 8-bit number unsigned
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
size_t total_size() const override
Returns the total size of the tensor in bytes.
std::pair< unsigned int, unsigned int > stride() const
Get the stride.
void enqueue_op(ICLKernel &kernel, ITensorPack &tensors, bool flush=true)
Schedule the execution of the passed kernel if possible.
std::vector< int32_t > gemmlowp_multipliers
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
UniformQuantizationInfo uniform() const
Return per layer quantization info.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
GEMMLowp output stage info.
static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const ActivationLayerInfo &act_info)
Static function to check if given info will lead to a valid configuration.
virtual ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info)=0
Set the quantization settings (scale and offset) of the tensor.
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
Descriptor used by the 2d Convolution function.
Num samples, channels, height, width.
src_info set_data_layout(data_layout)
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
Lower and Upper Bounded Rectifier ( )
int32_t gemmlowp_shift
GEMMLowp output stage shift used for quantizing to uint8.
experimental::PostOpList< ITensorInfo * > post_ops
experimental::MemoryRequirements workspace() const override
Return the memory requirements required by the workspace.
void compute_quantized_multipliers_and_shifts(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, int32_t *output_multipliers_ptr, int32_t *output_shifts_ptr)
Compute quantized per-channel multipliers and shifts.
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
Interface for OpenCL tensor.
Upper Bounded Rectifier ( )
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bool has_bias=false, unsigned int num_groups=1)
Calculate the reshaped shape of the weights.
size_t get_data_layout_dimension_index(const DataLayout &data_layout, const DataLayoutDimension &data_layout_dimension)
Get the index of the given dimension.
Class for specifying the size of an image or rectangle.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Num samples, height, width, channels.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
size_t size() const
Number of post ops.
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
#define ARM_COMPUTE_LOG_PARAMS(...)
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Store the tensor's metadata.
int offset_int_vec(int offset)
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation=Size2D(1U, 1U), unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration.
ActivationFunction activation() const
Get the type of activation function.
float b() const
Get the beta value.
quantized, asymmetric fixed-point 8-bit number signed
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)
Static function to check if given info will lead to a valid configuration.
int32_t gemmlowp_min_bound
GEMMLowp min value used to saturate down the output result before converting back to QASYMM8...
void tune_kernel_static(ICLKernel &kernel)
Tunes OpenCL kernel.
const TensorShape & tensor_shape() const override
Size for each dimension of the tensor.
DataType
Available data types.
DataLayout
[DataLayout enum definition]
TensorShape compute_im2col_conv_shape(const ITensorInfo *input, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation, bool batch_size_on_z, unsigned int num_groups=1)
Calculate the im2col output shape of a tensor.
A sequence of PostOps that can be appended to the end of other operators.
std::tuple< PixelValue, PixelValue > get_min_max(DataType dt)
Compute the mininum and maximum values a data type can take.
TensorShape & set(size_t dimension, size_t value, bool apply_dim_correction=true, bool increase_dim_unit=true)
Accessor to set the value of one of the dimensions.
bool retain_internal_weights() const
void add_tensor(int id, ITensor *tensor)
Add tensor to the pack.
void run(ITensorPack &tensors) override
Run the kernels contained in the function.
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.