48 using namespace experimental;
49 using namespace misc::shape_calculator;
50 using namespace utils::cast;
53 ClGemmConvolution::ClGemmConvolution()
54 : _weights_reshape_kernel(nullptr), _im2col_kernel(nullptr), _mm_gemm(nullptr), _mm_gemmlowp(nullptr), _col2im_kernel(nullptr), _activation_kernel(nullptr), _im2col_output(), _weights_reshaped(),
55 _gemm_output(), _skip_im2col(false), _skip_col2im(false), _is_quantized(false), _fuse_activation(true), _append_bias(false), _is_prepared(false), _aux_mem(AuxTensorIdx::Count)
65 ARM_COMPUTE_ERROR_THROW_ON(validate_mm(src, weights, biases, dst, gemmlowp_output_stage, gemm_3d_depth, _skip_im2col, act_info));
73 gemmlowp_output_stage,
90 _mm_gemmlowp = std::make_unique<ClGemmLowpMatrixMultiplyCore>();
91 _mm_gemmlowp->configure(compile_context, &tmp_src, weights, biases, dst, gemm_info);
96 auto mm_mem_req = _mm_gemmlowp->workspace();
97 for(
unsigned int cont = 0; cont < mm_mem_req.size(); ++cont)
99 _aux_mem[cont] = mm_mem_req[cont];
105 _mm_gemm = std::make_unique<ClGemm>();
106 _mm_gemm->configure(compile_context, &tmp_src, weights, biases, dst, 1.0f, 1.0f, gemm_info);
107 auto mm_mem_req = _mm_gemm->workspace();
108 for(
unsigned int cont = 0; cont < mm_mem_req.size(); ++cont)
110 _aux_mem[cont] = mm_mem_req[cont];
126 gemmlowp_output_stage,
139 std::unique_ptr<ITensorInfo> src_qa = src->
clone();
140 std::unique_ptr<ITensorInfo> weights_qa = weights->
clone();
169 const unsigned int kernel_width = weights->
dimension(idx_width);
170 const unsigned int kernel_height = weights->
dimension(idx_height);
171 const unsigned int num_kernels = weights->
dimension(idx_kernels);
182 _fuse_activation =
true;
188 unsigned int stride_x = 0;
189 unsigned int stride_y = 0;
193 unsigned int conv_w = 0;
194 unsigned int conv_h = 0;
202 unsigned int mat_weights_cols = num_kernels / conv2d_info.
num_groups;
205 _append_bias =
false;
207 _weights_reshape_kernel = std::make_unique<kernels::ClWeightsReshapeKernel>();
208 if(conv2d_info.
num_groups != 1 && biases !=
nullptr)
212 biases_to_use =
nullptr;
214 _weights_reshape_kernel->configure(compile_context, weights, biases, &_weights_reshaped, conv2d_info.
num_groups);
218 _weights_reshape_kernel->configure(compile_context, weights,
nullptr, &_weights_reshaped, conv2d_info.
num_groups);
225 _im2col_kernel = std::make_unique<opencl::kernels::ClIm2ColKernel>();
229 _im2col_kernel->configure(compile_context, src, &_im2col_output,
Size2D(kernel_width, kernel_height), conv2d_info.
conv_info, _append_bias, conv2d_info.
dilation, conv2d_info.
num_groups);
236 gemm_input_to_use = &_im2col_output;
246 shape_gemm.
set(0, mat_weights_cols);
247 shape_gemm.
set(1, conv_w * conv_h);
249 _gemm_output =
TensorInfo(shape_gemm, 1, data_type);
253 gemm_output_to_use = &_gemm_output;
263 const auto output_quant_info = (dst->
total_size() == 0) ? iq_info : oq_info;
265 const unsigned int num_filters = (is_quantized_per_channel) ? num_kernels : 1;
281 auto min_activation = min_val.get<int32_t>();
282 auto max_activation = max_val.get<int32_t>();
297 _fuse_activation =
false;
309 const unsigned int gemm_3d_depth = (data_layout ==
DataLayout::NHWC) ? conv_h : 0;
311 configure_mm(compile_context, gemm_input_to_use, &_weights_reshaped, biases_to_use, gemm_output_to_use, gemmlowp_output_stage, gemm_3d_depth, conv2d_info.
act_info);
316 _col2im_kernel = std::make_unique<opencl::kernels::ClCol2ImKernel>();
319 _col2im_kernel->configure(compile_context, gemm_output_to_use, dst,
Size2D(conv_w, conv_h), conv2d_info.
num_groups);
324 "Output shape does not match the expected one");
326 if(!_fuse_activation)
328 _activation_kernel = std::make_unique<opencl::kernels::ClActivationKernel>();
329 _activation_kernel->configure(compile_context, dst,
nullptr, conv2d_info.
act_info);
345 if(!is_quantized_per_channel)
361 const unsigned int kernel_width = weights->
dimension(idx_width);
362 const unsigned int kernel_height = weights->
dimension(idx_height);
363 const unsigned int num_kernels = weights->
dimension(idx_kernels);
375 bool fuse_activation =
true;
381 if(biases !=
nullptr)
401 unsigned int conv_w = 0;
402 unsigned int conv_h = 0;
411 unsigned int mat_weights_cols = num_kernels / conv2d_info.
num_groups;
414 bool append_bias =
false;
416 if(conv2d_info.
num_groups != 1 && biases !=
nullptr)
420 biases_to_use =
nullptr;
429 weights_to_use = &weights_reshaped_info;
433 const Size2D kernel_dims(kernel_width, kernel_height);
441 gemm_input_to_use = &im2col_reshaped_info;
450 shape_gemm.
set(0, mat_weights_cols);
451 shape_gemm.
set(1, conv_w * conv_h);
453 info_gemm =
TensorInfo(shape_gemm, 1, data_type);
455 gemm_output_to_use = &info_gemm;
467 const auto output_quant_info = (dst->
total_size() == 0) ? iq_info : oq_info;
468 const unsigned int num_filters = (is_quantized_per_channel) ? num_kernels : 1;
478 int min_activation = 0;
479 int max_activation = 0;
494 fuse_activation =
false;
505 const unsigned int gemm_3d_depth = (data_layout ==
DataLayout::NHWC) ? conv_h : 0;
507 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemm_input_to_use, weights_to_use, biases_to_use, gemm_output_to_use, gemmlowp_output_stage, gemm_3d_depth, skip_im2col, conv2d_info.
act_info));
531 auto gemm_input_to_use =
src;
532 auto gemm_output_to_use =
dst;
547 gemm_input_to_use = im2col_output.
get();
551 gemm_output_to_use = gemm_output.
get();
565 _mm_gemmlowp->run(pack_mm);
570 _mm_gemm->run(pack_mm);
585 if(!_fuse_activation)
619 _is_quantized ? _mm_gemmlowp->prepare(tensors) : _mm_gemm->prepare(tensors);
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
Class describing the value of a pixel for any image format.
int32_t gemmlowp_multiplier
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
Quantize using a fixed point multiplication.
void configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, const Conv2dInfo &conv2d_info, const WeightsInfo &weights_info=WeightsInfo())
Set the input and output tensors.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(...)
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const Conv2dInfo &conv2d_info, const WeightsInfo &weights_info=WeightsInfo())
Static function to check if given info will lead to a valid configuration.
bool enabled() const
Check if initialised.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
experimental::MemoryRequirements workspace() const override
Return the memory requirements required by the workspace.
void add_const_tensor(int id, const ITensor *tensor)
Add const tensor to the pack.
static CLScheduler & get()
Access the scheduler singleton.
ActivationLayerInfo act_info
float a() const
Get the alpha value.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
bool are_reshaped() const
Flag which specifies if the weights tensor has been reshaped.
1 channel, 1 F32 per channel
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
const DataLayout data_layout
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
void prepare(ITensorPack &constants) override
Prepare the function for executing.
int32_t gemmlowp_offset
GEMMLowp output stage offset used for quantizing to QASYMM8.
int32_t gemmlowp_max_bound
GEMMLowp max value used to saturate down the output result before converting back to QASYMM8...
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Activation Layer Information class.
GEMMLowpOutputStageType type
GEMMLowp output stage type.
SimpleTensor< float > src
Copyright (c) 2017-2021 Arm Limited.
std::vector< MemoryInfo > MemoryRequirements
1 channel, 1 F16 per channel
std::pair< unsigned int, unsigned int > scaled_dimensions(int width, int height, int kernel_width, int kernel_height, const PadStrideInfo &pad_stride_info, const Size2D &dilation=Size2D(1U, 1U))
Returns expected width and height of output scaled tensor depending on dimensions rounding mode...
ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info) override
Set the quantization settings (scale and offset) of the tensor.
bool is_quantized_per_channel
GEMMLowp quantized per-channel flag.
Convolution Layer Weights Information class.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
std::vector< int32_t > gemmlowp_shifts
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
1 channel, 1 S32 per channel
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration.
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
Quantization information.
bool is_data_type_quantized_per_channel(DataType dt)
Check if a given data type is of per channel type.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
std::pair< int32_t, int32_t > get_quantized_activation_min_max(ActivationLayerInfo act_info, DataType data_type, UniformQuantizationInfo oq_info)
Returns a pair of minimum and maximum values for a quantized activation.
static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const Size2D &convolved_dims, unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration.
quantized, asymmetric fixed-point 8-bit number unsigned
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
size_t total_size() const override
Returns the total size of the tensor in bytes.
std::pair< unsigned int, unsigned int > stride() const
Get the stride.
void enqueue_op(ICLKernel &kernel, ITensorPack &tensors, bool flush=true)
Schedule the execution of the passed kernel if possible.
std::vector< int32_t > gemmlowp_multipliers
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
UniformQuantizationInfo uniform() const
Return per layer quantization info.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
GEMMLowp output stage info.
static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const ActivationLayerInfo &act_info)
Static function to check if given info will lead to a valid configuration.
virtual ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info)=0
Set the quantization settings (scale and offset) of the tensor.
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
Descriptor used by the Convolution function.
Num samples, channels, height, width.
src_info set_data_layout(data_layout)
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
Lower and Upper Bounded Rectifier ( )
int32_t gemmlowp_shift
GEMMLowp output stage shift used for quantizing to uint8.
void compute_quantized_multipliers_and_shifts(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, int32_t *output_multipliers_ptr, int32_t *output_shifts_ptr)
Compute quantized per-channel multipliers and shifts.
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
Interface for OpenCL tensor.
Upper Bounded Rectifier ( )
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bool has_bias=false, unsigned int num_groups=1)
Calculate the reshaped shape of the weights.
Class for specifying the size of an image or rectangle.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Num samples, height, width, channels.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Store the tensor's metadata.
~ClGemmConvolution()
Default destructor.
int offset_int_vec(int offset)
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation=Size2D(1U, 1U), unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration.
ActivationFunction activation() const
Get the type of activation function.
float b() const
Get the beta value.
quantized, asymmetric fixed-point 8-bit number signed
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)
Static function to check if given info will lead to a valid configuration.
size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
Get the index of the given dimension.
int32_t gemmlowp_min_bound
GEMMLowp min value used to saturate down the output result before converting back to QASYMM8...
void tune_kernel_static(ICLKernel &kernel)
Tunes OpenCL kernel.
const TensorShape & tensor_shape() const override
Size for each dimension of the tensor.
DataType
Available data types.
void run(ITensorPack &tensors) override
Run the kernels contained in the function.
DataLayout
[DataLayout enum definition]
TensorShape compute_im2col_conv_shape(const ITensorInfo *input, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation, bool batch_size_on_z, unsigned int num_groups=1)
Calculate the im2col output shape of a tensor.
std::tuple< PixelValue, PixelValue > get_min_max(DataType dt)
Compute the mininum and maximum values a data type can take.
TensorShape & set(size_t dimension, size_t value, bool apply_dim_correction=true, bool increase_dim_unit=true)
Accessor to set the value of one of the dimensions.
bool retain_internal_weights() const
void add_tensor(int id, ITensor *tensor)
Add tensor to the pack.
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.