54 CpuGemmConvolution::CpuGemmConvolution()
55 : _weights_reshape_kernel(nullptr), _im2col_kernel(), _mm_gemm(), _mm_gemmlowp(), _col2im_kernel(), _reshape_kernel(), _im2col_output(), _weights_reshaped(), _gemm_output(), _gemm_output_3d(),
56 _data_layout(
DataLayout::
NCHW), _skip_im2col(false), _skip_col2im(false), _is_quantized(false), _is_prepared(false), _aux_mem(AuxTensorIdx::Count)
62 bool enable_fast_math,
int gemm_3d_depth)
69 gemm_3d_depth, _skip_im2col ,
101 std::tie(type_min, type_max) =
get_min_max(data_type);
102 int32_t min_activation = type_min.get<int32_t>();
103 int32_t max_activation = type_max.get<int32_t>();
105 if(supported_acts.count(act_info.
activation()) != 0)
118 _mm_gemmlowp = std::make_unique<CpuGemmLowpMatrixMultiplyCore>();
119 _mm_gemmlowp->configure(&tmp_src, &tmp_weights, biases, dst,
GEMMInfo(
false,
false,
true, gemm_3d_depth, _skip_im2col,
false, output_info,
false, enable_fast_math,
false, act_info));
121 auto mm_mem_req = _mm_gemmlowp->workspace();
122 for(
unsigned int cont = 0; cont < mm_mem_req.size(); ++cont)
124 _aux_mem[cont] = mm_mem_req[cont];
130 _mm_gemm = std::make_unique<CpuGemm>();
131 _mm_gemm->configure(src, weights, biases, dst, 1.0f, 0.0f, gemm_info);
132 auto mm_mem_req = _mm_gemm->workspace();
133 for(
unsigned int cont = 0; cont < mm_mem_req.size(); ++cont)
135 _aux_mem[cont] = mm_mem_req[cont];
141 const ActivationLayerInfo &act_info,
bool enable_fast_math,
int gemm_3d_depth,
bool skip_im2col)
145 const bool is_activation_enabled = act_info.
enabled();
149 gemm_3d_depth, skip_im2col ,
164 std::tie(type_min, type_max) =
get_min_max(data_type);
165 int32_t min_activation = type_min.get<int32_t>();
166 int32_t max_activation = type_max.get<int32_t>();
172 if(is_activation_enabled && supported_acts.count(act_info.
activation()) != 0)
186 std::unique_ptr<ITensorInfo> input_qa = src->
clone();
187 std::unique_ptr<ITensorInfo> weights_qa = weights->
clone();
190 return CpuGemmLowpMatrixMultiplyCore::validate(input_qa.get(), weights_qa.get(), biases,
dst,
GEMMInfo(
false,
false,
true, gemm_3d_depth, skip_im2col,
false, output_info,
191 false, enable_fast_math,
false, act_info));
203 const unsigned int mult_y = skip_im2col ? 1
U : gemm_3d_depth;
204 const unsigned int mult_z = skip_im2col ? gemm_3d_depth : 1
U;
211 return validate_mm(&dummy_input_info, &dummy_weights_info,
nullptr, &dummy_output_info, act_info,
false, gemm_3d_depth, skip_im2col);
236 const unsigned int kernel_width = weights->
dimension(idx_width);
237 const unsigned int kernel_height = weights->
dimension(idx_height);
242 _skip_im2col = (data_layout ==
DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv_info.
stride().first == 1 && conv_info.
stride().second == 1);
248 unsigned int conv_w = 0;
249 unsigned int conv_h = 0;
257 "Output shape does not match the expected one");
262 _skip_col2im = bool(validate_gemm3d(src, weights, act_info, conv_h,
true));
266 _skip_im2col =
false;
271 _skip_col2im =
false;
275 unsigned int stride_x = 0;
276 unsigned int stride_y = 0;
277 std::tie(stride_x, stride_y) = conv_info.stride();
279 unsigned int mat_weights_cols = weights->
dimension(idx_kernels);
283 _weights_reshape_kernel = std::make_unique<kernels::CpuWeightsReshapeKernel>();
284 _weights_reshape_kernel->configure(weights,
nullptr, &_weights_reshaped);
291 _im2col_kernel = std::make_unique<kernels::CpuIm2ColKernel>();
292 _im2col_kernel->configure(src, &_im2col_output,
Size2D(kernel_width, kernel_height), conv_info,
false, dilation);
295 gemm_input_to_use = &_im2col_output;
306 shape_gemm.
set(0, mat_weights_cols);
307 shape_gemm.
set(1, conv_w * conv_h);
309 _gemm_output =
TensorInfo(shape_gemm, 1, output_data_type);
314 gemm_output_to_use = &_gemm_output;
323 gemm_output_to_use = &_gemm_output_3d;
328 const unsigned int gemm_3d_depth = _skip_col2im ? conv_h : 0;
329 configure_mm(gemm_input_to_use, &_weights_reshaped, biases, gemm_output_to_use, act_info, enable_fast_math, gemm_3d_depth);
334 _col2im_kernel = std::make_unique<kernels::CpuCol2ImKernel>();
335 _col2im_kernel->configure(gemm_output_to_use, dst,
Size2D(conv_w, conv_h));
340 _reshape_kernel = std::make_unique<kernels::CpuReshapeKernel>();
341 _reshape_kernel->configure(gemm_output_to_use, dst);
346 bool gemm_trans_wei = _aux_mem[1].size > 0;
347 gemm_trans_wei = _mm_gemm !=
nullptr ? _aux_mem[3].size > 0 : gemm_trans_wei;
348 gemm_trans_wei = _mm_gemmlowp !=
nullptr ? _aux_mem[5].size > 0 : gemm_trans_wei;
352 _aux_mem[WeightsReshaped] =
MemoryInfo(
offset_int_vec(WeightsReshaped), gemm_trans_wei ? MemoryLifetime::Prepare : MemoryLifetime::Persistent, _weights_reshaped.
total_size());
373 const unsigned int kernel_width = weights->
dimension(idx_width);
374 const unsigned int kernel_height = weights->
dimension(idx_height);
384 const bool append_bias =
false;
387 bool skip_im2col = (data_layout ==
DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv_info.
stride().first == 1 && conv_info.
stride().second == 1);
390 unsigned int conv_w = 0;
391 unsigned int conv_h = 0;
401 bool skip_col2im =
false;
404 skip_col2im = bool(validate_gemm3d(src, weights, act_info, conv_h,
true));
415 if(!
bool(validate_gemm3d(src, weights, act_info, conv_h, skip_im2col)))
426 if(biases !=
nullptr)
444 unsigned int mat_weights_cols = weights->
dimension(idx_kernels);
449 weights_to_use = &weights_reshaped_info;
456 shape_im2col.
set(0, mat_weights_rows);
457 shape_im2col.
set(1, conv_w * conv_h);
458 shape_im2col.
set(2, 1);
460 im2col_reshaped_info =
TensorInfo(shape_im2col, 1, data_type);
463 gemm_input_to_use = &im2col_reshaped_info;
471 shape_gemm.
set(0, mat_weights_cols);
472 shape_gemm.
set(1, conv_w * conv_h);
473 info_gemm =
TensorInfo(shape_gemm, 1, output_data_type);
480 gemm_output_to_use = &info_gemm;
481 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemm_input_to_use, weights_to_use, biases, gemm_output_to_use, act_info, enable_fast_math, skip_col2im ? conv_h : 0, skip_im2col));
498 auto gemm_input_to_use =
src;
504 bool out_has_padding = _skip_col2im && (dst->info()->
padding().
bottom != 0 || dst->info()->
padding().
top != 0);
515 gemm_input_to_use = im2col_output.
get();
519 const ITensor *out_to_use = out_has_padding ? gemm_output.
get() :
dst;
524 auto gemm_output_to_use = gemm_output.
get();
528 gemm_output_to_use = &gemm3d;
530 if(_skip_col2im && !out_has_padding)
532 gemm_output_to_use =
dst;
543 _mm_gemmlowp->run(pack_mm);
548 _mm_gemm->run(pack_mm);
573 else if(out_has_padding)
597 weights->mark_as_unused();
602 _is_quantized ? _mm_gemmlowp->prepare(gemm_pack) : _mm_gemm->prepare(gemm_pack);
unsigned int top
top of the border
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
Class describing the value of a pixel for any image format.
void configure(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const PadStrideInfo &conv_info, const WeightsInfo &weights_info=WeightsInfo(), const Size2D &dilation=Size2D(1U, 1U), const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false, unsigned int num_groups=1)
Set the input and output tensors.
Quantize using a fixed point multiplication.
void soft_init(TensorInfo &input, size_t alignment=0)
Initialize a tensor based with a reference TensorInfo.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(...)
bool enabled() const
Check if initialised.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
void add_const_tensor(int id, const ITensor *tensor)
Add const tensor to the pack.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
virtual void schedule_op(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors)=0
Runs the kernel in the same thread as the caller synchronously.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *d, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of CpuGemm.
bool extend_padding(const PaddingSize &padding) override
Update the offset to the first element, the strides and the total size.
bool are_reshaped() const
Flag which specifies if the weights tensor has been reshaped.
1 channel, 1 F32 per channel
ITensorInfo & set_data_type(DataType data_type) override
Set the data type to the specified value.
const DataLayout data_layout
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
~CpuGemmConvolution()
Destructor.
unsigned int bottom
bottom of the border
int32_t gemmlowp_offset
GEMMLowp output stage offset used for quantizing to QASYMM8.
int32_t gemmlowp_max_bound
GEMMLowp max value used to saturate down the output result before converting back to QASYMM8...
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Activation Layer Information class.
GEMMLowpOutputStageType type
GEMMLowp output stage type.
Interface for CPU tensor.
SimpleTensor< float > src
Copyright (c) 2017-2021 Arm Limited.
std::vector< MemoryInfo > MemoryRequirements
1 channel, 1 F16 per channel
std::pair< unsigned int, unsigned int > scaled_dimensions(int width, int height, int kernel_width, int kernel_height, const PadStrideInfo &pad_stride_info, const Size2D &dilation=Size2D(1U, 1U))
Returns expected width and height of output scaled tensor depending on dimensions rounding mode...
ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info) override
Set the quantization settings (scale and offset) of the tensor.
TensorAllocator * allocator()
Return a pointer to the tensor's allocator.
bool is_quantized_per_channel
GEMMLowp quantized per-channel flag.
Convolution Layer Weights Information class.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
1 channel, 1 S32 per channel
16-bit brain floating-point number
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
Quantization information.
static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const Size2D &convolved_dims)
Static function to check if given info will lead to a valid configuration.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
bool is_data_type_quantized_per_channel(DataType dt)
Check if a given data type is of per channel type.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
virtual ITensorInfo & set_data_layout(const DataLayout &data_layout)=0
Set the data layout of the tensor.
std::pair< int32_t, int32_t > get_quantized_activation_min_max(ActivationLayerInfo act_info, DataType data_type, UniformQuantizationInfo oq_info)
Returns a pair of minimum and maximum values for a quantized activation.
Status calculate_quantized_multipliers(const QuantizationInfo &iq_info, const QuantizationInfo &wq_info, const QuantizationInfo &oq_info, GEMMLowpOutputStageInfo &stage_info)
Calculate quantized representation of per-channel multipliers.
quantized, asymmetric fixed-point 8-bit number unsigned
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
const unsigned int num_groups
size_t total_size() const override
Returns the total size of the tensor in bytes.
virtual uint8_t * buffer() const =0
Interface to be implemented by the child class to return a pointer to CPU memory. ...
std::pair< unsigned int, unsigned int > stride() const
Get the stride.
UniformQuantizationInfo uniform() const
Return per layer quantization info.
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
GEMMLowp output stage info.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
Basic implementation of the tensor interface.
Padding and stride information class.
virtual PaddingSize padding() const =0
Padding of tensor.
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
Num samples, channels, height, width.
src_info set_data_layout(data_layout)
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
experimental::MemoryRequirements workspace() const override
Return the memory requirements required by the workspace.
quantized, symmetric per channel fixed-point 8-bit number
Lower and Upper Bounded Rectifier ( )
void prepare(ITensorPack &tensors) override
Prepare the function for executing.
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
Upper Bounded Rectifier ( )
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bool has_bias=false, unsigned int num_groups=1)
Calculate the reshaped shape of the weights.
void run(ITensorPack &tensors) override
Run the kernels contained in the function.
Class for specifying the size of an image or rectangle.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Num samples, height, width, channels.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Status import_memory(void *memory)
Import an existing memory as a tensor's backing memory.
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Store the tensor's metadata.
int offset_int_vec(int offset)
ActivationFunction activation() const
Get the type of activation function.
quantized, asymmetric fixed-point 8-bit number signed
size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
Get the index of the given dimension.
int32_t gemmlowp_min_bound
GEMMLowp min value used to saturate down the output result before converting back to QASYMM8...
const TensorShape & tensor_shape() const override
Size for each dimension of the tensor.
DataType
Available data types.
DataLayout
[DataLayout enum definition]
std::tuple< PixelValue, PixelValue > get_min_max(DataType dt)
Compute the mininum and maximum values a data type can take.
static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info=WeightsInfo(), const Size2D &dilation=Size2D(1U, 1U), const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false, unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration.
TensorShape & set(size_t dimension, size_t value, bool apply_dim_correction=true, bool increase_dim_unit=true)
Accessor to set the value of one of the dimensions.
bool retain_internal_weights() const
void add_tensor(int id, ITensor *tensor)
Add tensor to the pack.
static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation=Size2D(1U, 1U), unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration.
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.
static IScheduler & get()
Access the scheduler singleton.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *dst, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration.