CMSIS-NN  Version 3.0.0
CMSIS NN Software Library
 All Data Structures Files Functions Variables Enumerations Enumerator Macros Groups Pages
Convolution Functions

Functions

arm_status arm_convolve_1_x_n_s8 (const cmsis_nn_context *ctx, const cmsis_nn_conv_params *conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q7_t *input_data, const cmsis_nn_dims *filter_dims, const q7_t *filter_data, const cmsis_nn_dims *bias_dims, const int32_t *bias_data, const cmsis_nn_dims *output_dims, q7_t *output_data)
 1xn convolution More...
 
int32_t arm_convolve_1_x_n_s8_get_buffer_size (const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims)
 Get the required additional buffer size for 1xn convolution. More...
 
arm_status arm_convolve_1x1_HWC_q7_fast_nonsquare (const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB)
 Fast Q7 version of 1x1 convolution (non-sqaure shape) More...
 
arm_status arm_convolve_1x1_s8_fast (const cmsis_nn_context *ctx, const cmsis_nn_conv_params *conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q7_t *input_data, const cmsis_nn_dims *filter_dims, const q7_t *filter_data, const cmsis_nn_dims *bias_dims, const int32_t *bias_data, const cmsis_nn_dims *output_dims, q7_t *output_data)
 Fast s8 version for 1x1 convolution (non-square shape) More...
 
int32_t arm_convolve_1x1_s8_fast_get_buffer_size (const cmsis_nn_dims *input_dims)
 Get the required buffer size for arm_convolve_1x1_s8_fast. More...
 
arm_status arm_convolve_HWC_q15_basic (const q15_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q15_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q15_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q15_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)
 Basic Q15 convolution function. More...
 
arm_status arm_convolve_HWC_q15_fast (const q15_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q15_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q15_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q15_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)
 Fast Q15 convolution function. More...
 
arm_status arm_convolve_HWC_q15_fast_nonsquare (const q15_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q15_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q15_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q15_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB)
 Fast Q15 convolution function (non-sqaure shape) More...
 
arm_status arm_convolve_HWC_q7_basic (const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)
 Basic Q7 convolution function. More...
 
arm_status arm_convolve_HWC_q7_basic_nonsquare (const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB)
 Basic Q7 convolution function (non-sqaure shape) More...
 
arm_status arm_convolve_HWC_q7_fast (const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)
 Fast Q7 convolution function. More...
 
arm_status arm_convolve_HWC_q7_fast_nonsquare (const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB)
 Fast Q7 convolution function (non-sqaure shape) More...
 
arm_status arm_convolve_HWC_q7_RGB (const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)
 Q7 convolution function for RGB image. More...
 
arm_status arm_convolve_s8 (const cmsis_nn_context *ctx, const cmsis_nn_conv_params *conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q7_t *input_data, const cmsis_nn_dims *filter_dims, const q7_t *filter_data, const cmsis_nn_dims *bias_dims, const int32_t *bias_data, const cmsis_nn_dims *output_dims, q7_t *output_data)
 Basic s8 convolution function. More...
 
int32_t arm_convolve_s8_get_buffer_size (const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims)
 Get the required buffer size for s8 convolution function. More...
 
arm_status arm_convolve_wrapper_s8 (const cmsis_nn_context *ctx, const cmsis_nn_conv_params *conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q7_t *input_data, const cmsis_nn_dims *filter_dims, const q7_t *filter_data, const cmsis_nn_dims *bias_dims, const int32_t *bias_data, const cmsis_nn_dims *output_dims, q7_t *output_data)
 s8 convolution layer wrapper function with the main purpose to call the optimal kernel available in cmsis-nn to perform the convolution. More...
 
int32_t arm_convolve_wrapper_s8_get_buffer_size (const cmsis_nn_conv_params *conv_params, const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims, const cmsis_nn_dims *output_dims)
 Get the required buffer size for arm_convolve_wrapper_s8. More...
 
arm_status arm_depthwise_conv_3x3_s8 (const cmsis_nn_context *ctx, const cmsis_nn_dw_conv_params *dw_conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q7_t *input, const cmsis_nn_dims *filter_dims, const q7_t *kernel, const cmsis_nn_dims *bias_dims, const int32_t *bias, const cmsis_nn_dims *output_dims, q7_t *output)
 Optimized s8 depthwise convolution function for 3x3 kernel size with some constraints on the input arguments(documented below). Refer arm_depthwise_conv_s8() for function argument details. More...
 
static void depthwise_conv_s8_mult_4 (const int8_t *input, const int32_t input_x, const int32_t input_y, const int32_t input_ch, const int8_t *kernel, const int32_t output_ch, const int32_t ch_mult, const int32_t kernel_x, const int32_t kernel_y, const int32_t pad_x, const int32_t pad_y, const int32_t stride_x, const int32_t stride_y, const int32_t *bias, int8_t *output, const int32_t *output_shift, const int32_t *output_mult, const int32_t output_x, const int32_t output_y, const int32_t output_offset, const int32_t input_offset, const int32_t output_activation_min, const int32_t output_activation_max)
 
static void depthwise_conv_s8_generic (const q7_t *input, const uint16_t input_batches, const uint16_t input_x, const uint16_t input_y, const uint16_t input_ch, const q7_t *kernel, const uint16_t output_ch, const uint16_t ch_mult, const uint16_t kernel_x, const uint16_t kernel_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const int32_t *bias, q7_t *output, const int32_t *output_shift, const int32_t *output_mult, const uint16_t output_x, const uint16_t output_y, const int32_t output_offset, const int32_t input_offset, const int32_t output_activation_min, const int32_t output_activation_max)
 
arm_status arm_depthwise_conv_s8 (const cmsis_nn_context *ctx, const cmsis_nn_dw_conv_params *dw_conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q7_t *input, const cmsis_nn_dims *filter_dims, const q7_t *kernel, const cmsis_nn_dims *bias_dims, const int32_t *bias, const cmsis_nn_dims *output_dims, q7_t *output)
 Basic s8 depthwise convolution function that doesn't have any constraints on the input dimensions. More...
 
arm_status arm_depthwise_conv_s8_opt (const cmsis_nn_context *ctx, const cmsis_nn_dw_conv_params *dw_conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q7_t *input, const cmsis_nn_dims *filter_dims, const q7_t *kernel, const cmsis_nn_dims *bias_dims, const int32_t *bias, const cmsis_nn_dims *output_dims, q7_t *output)
 Optimized s8 depthwise convolution function with constraint that in_channel equals out_channel. Refer arm_depthwise_conv_s8() for function argument details. More...
 
int32_t arm_depthwise_conv_s8_opt_get_buffer_size (const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims)
 Get the required buffer size for optimized s8 depthwise convolution function with constraint that in_channel equals out_channel. More...
 
static void depthwise_conv_u8_mult_4 (const uint8_t *input, const int32_t input_x, const int32_t input_y, const int32_t input_ch, const uint8_t *kernel, const int32_t output_ch, const int32_t ch_mult, const int32_t kernel_x, const int32_t kernel_y, const int32_t pad_x, const int32_t pad_y, const int32_t stride_x, const int32_t stride_y, const int32_t *bias, uint8_t *output, const int32_t output_shift, const int32_t output_mult, const int32_t output_x, const int32_t output_y, const int32_t output_offset, const int32_t input_offset, const int32_t filter_offset, const int32_t output_activation_min, const int32_t output_activation_max)
 
static void depthwise_conv_u8_generic (const uint8_t *input, const int32_t input_x, const int32_t input_y, const int32_t input_ch, const uint8_t *kernel, const int32_t output_ch, const int32_t ch_mult, const int32_t kernel_x, const int32_t kernel_y, const int32_t pad_x, const int32_t pad_y, const int32_t stride_x, const int32_t stride_y, const int32_t *bias, uint8_t *output, const int32_t output_shift, const int32_t output_mult, const int32_t output_x, const int32_t output_y, const int32_t output_offset, const int32_t input_offset, const int32_t filter_offset, const int32_t output_activation_min, const int32_t output_activation_max)
 
arm_status arm_depthwise_conv_u8_basic_ver1 (const uint8_t *input, const uint16_t input_x, const uint16_t input_y, const uint16_t input_ch, const uint8_t *kernel, const uint16_t kernel_x, const uint16_t kernel_y, const int16_t ch_mult, const int16_t pad_x, const int16_t pad_y, const int16_t stride_x, const int16_t stride_y, const int16_t dilation_x, const int16_t dilation_y, const int32_t *bias, const int32_t input_offset, const int32_t filter_offset, const int32_t output_offset, uint8_t *output, const uint16_t output_x, const uint16_t output_y, const int32_t output_activation_min, const int32_t output_activation_max, const int32_t output_shift, const int32_t output_mult)
 uint8 depthwise convolution function with asymmetric quantization More...
 
arm_status arm_depthwise_conv_wrapper_s8 (const cmsis_nn_context *ctx, const cmsis_nn_dw_conv_params *dw_conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q7_t *input, const cmsis_nn_dims *filter_dims, const q7_t *filter, const cmsis_nn_dims *bias_dims, const int32_t *bias, const cmsis_nn_dims *output_dims, q7_t *output)
 Wrapper function to pick the right optimized s8 depthwise convolution function. More...
 
int32_t arm_depthwise_conv_wrapper_s8_get_buffer_size (const cmsis_nn_dw_conv_params *dw_conv_params, const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims, const cmsis_nn_dims *output_dims)
 Get size of additional buffer required by arm_depthwise_conv_wrapper_s8() More...
 
arm_status arm_depthwise_separable_conv_HWC_q7 (const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)
 Q7 depthwise separable convolution function. More...
 
arm_status arm_depthwise_separable_conv_HWC_q7_nonsquare (const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB)
 Q7 depthwise separable convolution function (non-square shape) More...
 

Description

Collection of convolution, depthwise convolution functions and their variants.

The convolution is implemented in 2 steps: im2col and GEMM

im2col is a process of converting each patch of image data into a column. After im2col, the convolution is computed as matrix-matrix multiplication.

To reduce the memory footprint, the im2col is performed partially. Each iteration, only a few column (i.e., patches) are generated and computed with GEMM kernels similar to CMSIS-DSP arm_mat_mult functions.

Function Documentation

arm_status arm_convolve_1_x_n_s8 ( const cmsis_nn_context ctx,
const cmsis_nn_conv_params conv_params,
const cmsis_nn_per_channel_quant_params quant_params,
const cmsis_nn_dims input_dims,
const q7_t *  input_data,
const cmsis_nn_dims filter_dims,
const q7_t *  filter_data,
const cmsis_nn_dims bias_dims,
const int32_t *  bias_data,
const cmsis_nn_dims output_dims,
q7_t *  output_data 
)
Parameters
[in,out]ctxFunction context that contains the additional buffer if required by the function. arm_convolve_1_x_n_s8_get_buffer_size will return the buffer_size if required
[in]conv_paramsConvolution parameters (e.g. strides, dilations, pads,...). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127]
[in]quant_paramsPer-channel quantization info. It contains the multiplier and shift values to be applied to each output channel
[in]input_dimsInput (activation) tensor dimensions. Format: [N, H, W, C_IN]
[in]input_dataInput (activation) data pointer. Data type: int8
[in]filter_dimsFilter tensor dimensions. Format: [C_OUT, 1, WK, C_IN] where WK is the horizontal spatial filter dimension
[in]filter_dataFilter data pointer. Data type: int8
[in]bias_dimsBias tensor dimensions. Format: [C_OUT]
[in]bias_dataOptional bias data pointer. Data type: int32
[in]output_dimsOutput tensor dimensions. Format: [N, H, W, C_OUT]
[out]output_dataOutput data pointer. Data type: int8
Returns
The function returns either ARM_MATH_SIZE_MISMATCH if argument constraints fail. or, ARM_MATH_SUCCESS on successful completion.
  • Supported framework : TensorFlow Lite Micro
  • The following constrains on the arguments apply
    1. input_dims->n equals 1
    2. ouput_dims->w is a multiple of 4
    3. Explicit constraints(since it is for 1xN convolution) -## input_dims->h equals 1 -## output_dims->h equals 1 -## filter_dims->h equals 1
      Todo:
      Remove constraint on output_dims->w to make the function generic.

References cmsis_nn_conv_params::activation, arm_convolve_s8(), arm_nn_mat_mul_core_1x_s8(), arm_nn_mat_mul_core_4x_s8(), cmsis_nn_dims::c, cmsis_nn_conv_params::input_offset, MAX, cmsis_nn_activation::max, MIN, cmsis_nn_activation::min, cmsis_nn_per_channel_quant_params::multiplier, cmsis_nn_conv_params::output_offset, cmsis_nn_conv_params::padding, cmsis_nn_per_channel_quant_params::shift, cmsis_nn_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.

Referenced by arm_convolve_wrapper_s8().

int32_t arm_convolve_1_x_n_s8_get_buffer_size ( const cmsis_nn_dims input_dims,
const cmsis_nn_dims filter_dims 
)
Parameters
[in]input_dimsInput (activation) tensor dimensions. Format: [N, H, W, C_IN]
[in]filter_dimsFilter tensor dimensions. Format: [C_OUT, 1, WK, C_IN] where WK is the horizontal spatial filter dimension
Returns
The function returns required buffer size(bytes)

References cmsis_nn_dims::c, cmsis_nn_dims::h, and cmsis_nn_dims::w.

Referenced by arm_convolve_wrapper_s8_get_buffer_size().

arm_status arm_convolve_1x1_HWC_q7_fast_nonsquare ( const q7_t *  Im_in,
const uint16_t  dim_im_in_x,
const uint16_t  dim_im_in_y,
const uint16_t  ch_im_in,
const q7_t *  wt,
const uint16_t  ch_im_out,
const uint16_t  dim_kernel_x,
const uint16_t  dim_kernel_y,
const uint16_t  padding_x,
const uint16_t  padding_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const q7_t *  bias,
const uint16_t  bias_shift,
const uint16_t  out_shift,
q7_t *  Im_out,
const uint16_t  dim_im_out_x,
const uint16_t  dim_im_out_y,
q15_t *  bufferA,
q7_t *  bufferB 
)
Parameters
[in]Im_inpointer to input tensor
[in]dim_im_in_xinput tensor dimention x
[in]dim_im_in_yinput tensor dimention y
[in]ch_im_innumber of input tensor channels
[in]wtpointer to kernel weights
[in]ch_im_outnumber of filters, i.e., output tensor channels
[in]dim_kernel_xfilter kernel size x
[in]dim_kernel_yfilter kernel size y
[in]padding_xpadding size x
[in]padding_ypadding size y
[in]stride_xconvolution stride x
[in]stride_yconvolution stride y
[in]biaspointer to bias
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in,out]Im_outpointer to output tensor
[in]dim_im_out_xoutput tensor dimension x
[in]dim_im_out_youtput tensor dimension y
[in,out]bufferApointer to buffer space for input
[in,out]bufferBpointer to buffer space for output
Returns
The function returns either ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking.

This function is optimized for convolution with 1x1 kernel size (i.e., dim_kernel_x=1 and dim_kernel_y=1). It can be used for the second half of MobileNets [1] after depthwise separable convolution.

This function is the version with full list of optimization tricks, but with some constraints: ch_im_in is multiple of 4 ch_im_out is multiple of 2

[1] MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications https://arxiv.org/abs/1704.04861

References arm_nn_mat_mult_kernel_q7_q15_reordered(), arm_nn_read_q15x2_ia(), arm_q7_to_q15_reordered_no_shift(), and NN_ROUND.

arm_status arm_convolve_1x1_s8_fast ( const cmsis_nn_context ctx,
const cmsis_nn_conv_params conv_params,
const cmsis_nn_per_channel_quant_params quant_params,
const cmsis_nn_dims input_dims,
const q7_t *  input_data,
const cmsis_nn_dims filter_dims,
const q7_t *  filter_data,
const cmsis_nn_dims bias_dims,
const int32_t *  bias_data,
const cmsis_nn_dims output_dims,
q7_t *  output_data 
)
Parameters
[in,out]ctxFunction context that contains the additional buffer if required by the function. arm_convolve_1x1_s8_fast_get_buffer_size will return the buffer_size if required
[in]conv_paramsConvolution parameters (e.g. strides, dilations, pads,...). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127]
[in]quant_paramsPer-channel quantization info. It contains the multiplier and shift values to be applied to each output channel
[in]input_dimsInput (activation) tensor dimensions. Format: [N, H, W, C_IN]
[in]input_dataInput (activation) data pointer. Data type: int8
[in]filter_dimsFilter tensor dimensions. Format: [C_OUT, 1, 1, C_IN]
[in]filter_dataFilter data pointer. Data type: int8
[in]bias_dimsBias tensor dimensions. Format: [C_OUT]
[in]bias_dataOptional bias data pointer. Data type: int32
[in]output_dimsOutput tensor dimensions. Format: [N, H, W, C_OUT]
[out]output_dataOutput data pointer. Data type: int8
Returns
The function returns either ARM_MATH_SIZE_MISMATCH if argument constraints fail. or, ARM_MATH_SUCCESS on successful completion.
  • Supported framework : TensorFlow Lite Micro
  • The following constrains on the arguments apply
    1. input_dims->c is a multiple of 4
    2. conv_params->padding.w = conv_params->padding.h = 0
    3. conv_params->stride.w = conv_params->stride.h = 1

References cmsis_nn_conv_params::activation, arm_nn_mat_mul_core_1x_s8(), arm_nn_mat_mul_core_4x_s8(), arm_nn_mat_mult_nt_t_s8(), arm_nn_requantize(), cmsis_nn_dims::c, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_conv_params::input_offset, MAX, cmsis_nn_activation::max, MIN, cmsis_nn_activation::min, cmsis_nn_per_channel_quant_params::multiplier, cmsis_nn_dims::n, cmsis_nn_conv_params::output_offset, cmsis_nn_conv_params::padding, cmsis_nn_per_channel_quant_params::shift, cmsis_nn_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.

Referenced by arm_convolve_wrapper_s8().

int32_t arm_convolve_1x1_s8_fast_get_buffer_size ( const cmsis_nn_dims input_dims)
Parameters
[in]input_dimsInput (activation) dimensions
Returns
The function returns the required buffer size in bytes

Referenced by arm_convolve_wrapper_s8_get_buffer_size().

arm_status arm_convolve_HWC_q15_basic ( const q15_t *  Im_in,
const uint16_t  dim_im_in,
const uint16_t  ch_im_in,
const q15_t *  wt,
const uint16_t  ch_im_out,
const uint16_t  dim_kernel,
const uint16_t  padding,
const uint16_t  stride,
const q15_t *  bias,
const uint16_t  bias_shift,
const uint16_t  out_shift,
q15_t *  Im_out,
const uint16_t  dim_im_out,
q15_t *  bufferA,
q7_t *  bufferB 
)
Parameters
[in]Im_inpointer to input tensor
[in]dim_im_ininput tensor dimention
[in]ch_im_innumber of input tensor channels
[in]wtpointer to kernel weights
[in]ch_im_outnumber of filters, i.e., output tensor channels
[in]dim_kernelfilter kernel size
[in]paddingpadding sizes
[in]strideconvolution stride
[in]biaspointer to bias
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in,out]Im_outpointer to output tensor
[in]dim_im_outoutput tensor dimension
[in,out]bufferApointer to buffer space for input
[in,out]bufferBpointer to buffer space for output
Returns
The function returns ARM_MATH_SUCCESS

Buffer size:

bufferA size: ch_im_in*dim_kernel*dim_kernel

bufferB size: 0

This basic version is designed to work for any input tensor and weight dimension.

References arm_nn_read_q15x2_ia(), and NN_ROUND.

arm_status arm_convolve_HWC_q15_fast ( const q15_t *  Im_in,
const uint16_t  dim_im_in,
const uint16_t  ch_im_in,
const q15_t *  wt,
const uint16_t  ch_im_out,
const uint16_t  dim_kernel,
const uint16_t  padding,
const uint16_t  stride,
const q15_t *  bias,
const uint16_t  bias_shift,
const uint16_t  out_shift,
q15_t *  Im_out,
const uint16_t  dim_im_out,
q15_t *  bufferA,
q7_t *  bufferB 
)
Parameters
[in]Im_inpointer to input tensor
[in]dim_im_ininput tensor dimention
[in]ch_im_innumber of input tensor channels
[in]wtpointer to kernel weights
[in]ch_im_outnumber of filters, i.e., output tensor channels
[in]dim_kernelfilter kernel size
[in]paddingpadding sizes
[in]strideconvolution stride
[in]biaspointer to bias
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in,out]Im_outpointer to output tensor
[in]dim_im_outoutput tensor dimension
[in,out]bufferApointer to buffer space for input
[in,out]bufferBpointer to buffer space for output
Returns
The function returns either ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking.

Buffer size:

bufferA size: 2*ch_im_in*dim_kernel*dim_kernel

bufferB size: 0

Input dimension constraints:

ch_im_in is multiple of 2

ch_im_out is multiple of 2

References arm_nn_read_q15x2_ia(), and NN_ROUND.

arm_status arm_convolve_HWC_q15_fast_nonsquare ( const q15_t *  Im_in,
const uint16_t  dim_im_in_x,
const uint16_t  dim_im_in_y,
const uint16_t  ch_im_in,
const q15_t *  wt,
const uint16_t  ch_im_out,
const uint16_t  dim_kernel_x,
const uint16_t  dim_kernel_y,
const uint16_t  padding_x,
const uint16_t  padding_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const q15_t *  bias,
const uint16_t  bias_shift,
const uint16_t  out_shift,
q15_t *  Im_out,
const uint16_t  dim_im_out_x,
const uint16_t  dim_im_out_y,
q15_t *  bufferA,
q7_t *  bufferB 
)
Parameters
[in]Im_inpointer to input tensor
[in]dim_im_in_xinput tensor dimention x
[in]dim_im_in_yinput tensor dimention y
[in]ch_im_innumber of input tensor channels
[in]wtpointer to kernel weights
[in]ch_im_outnumber of filters, i.e., output tensor channels
[in]dim_kernel_xfilter kernel size x
[in]dim_kernel_yfilter kernel size y
[in]padding_xpadding size x
[in]padding_ypadding size y
[in]stride_xconvolution stride x
[in]stride_yconvolution stride y
[in]biaspointer to bias
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in,out]Im_outpointer to output tensor
[in]dim_im_out_xoutput tensor dimension x
[in]dim_im_out_youtput tensor dimension y
[in,out]bufferApointer to buffer space for input
[in,out]bufferBpointer to buffer space for output
Returns
The function returns either ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking.

Buffer size:

bufferA size: 2*ch_im_in*dim_kernel*dim_kernel

bufferB size: 0

Input dimension constraints:

ch_im_in is multiple of 2

ch_im_out is multiple of 2

References arm_nn_read_q15x2_ia(), and NN_ROUND.

arm_status arm_convolve_HWC_q7_basic ( const q7_t *  Im_in,
const uint16_t  dim_im_in,
const uint16_t  ch_im_in,
const q7_t *  wt,
const uint16_t  ch_im_out,
const uint16_t  dim_kernel,
const uint16_t  padding,
const uint16_t  stride,
const q7_t *  bias,
const uint16_t  bias_shift,
const uint16_t  out_shift,
q7_t *  Im_out,
const uint16_t  dim_im_out,
q15_t *  bufferA,
q7_t *  bufferB 
)
Parameters
[in]Im_inpointer to input tensor
[in]dim_im_ininput tensor dimention
[in]ch_im_innumber of input tensor channels
[in]wtpointer to kernel weights
[in]ch_im_outnumber of filters, i.e., output tensor channels
[in]dim_kernelfilter kernel size
[in]paddingpadding sizes
[in]strideconvolution stride
[in]biaspointer to bias
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in,out]Im_outpointer to output tensor
[in]dim_im_outoutput tensor dimension
[in,out]bufferApointer to buffer space for input
[in,out]bufferBpointer to buffer space for output
Returns
The function returns ARM_MATH_SUCCESS

Buffer size:

bufferA size: 2*ch_im_in*dim_kernel*dim_kernel

bufferB size: 0

This basic version is designed to work for any input tensor and weight dimension.

References arm_nn_mat_mult_kernel_q7_q15(), arm_nn_read_q15x2_ia(), arm_q7_to_q15_no_shift(), and NN_ROUND.

arm_status arm_convolve_HWC_q7_basic_nonsquare ( const q7_t *  Im_in,
const uint16_t  dim_im_in_x,
const uint16_t  dim_im_in_y,
const uint16_t  ch_im_in,
const q7_t *  wt,
const uint16_t  ch_im_out,
const uint16_t  dim_kernel_x,
const uint16_t  dim_kernel_y,
const uint16_t  padding_x,
const uint16_t  padding_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const q7_t *  bias,
const uint16_t  bias_shift,
const uint16_t  out_shift,
q7_t *  Im_out,
const uint16_t  dim_im_out_x,
const uint16_t  dim_im_out_y,
q15_t *  bufferA,
q7_t *  bufferB 
)

Basic Q7 convolution function (non-square shape)

Parameters
[in]Im_inpointer to input tensor
[in]dim_im_in_xinput tensor dimention x
[in]dim_im_in_yinput tensor dimention y
[in]ch_im_innumber of input tensor channels
[in]wtpointer to kernel weights
[in]ch_im_outnumber of filters, i.e., output tensor channels
[in]dim_kernel_xfilter kernel size x
[in]dim_kernel_yfilter kernel size y
[in]padding_xpadding size x
[in]padding_ypadding size y
[in]stride_xconvolution stride x
[in]stride_yconvolution stride y
[in]biaspointer to bias
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in,out]Im_outpointer to output tensor
[in]dim_im_out_xoutput tensor dimension x
[in]dim_im_out_youtput tensor dimension y
[in,out]bufferApointer to buffer space for input
[in,out]bufferBpointer to buffer space for output
Returns
The function returns ARM_MATH_SUCCESS

References arm_nn_mat_mult_kernel_q7_q15(), arm_nn_read_q15x2_ia(), arm_q7_to_q15_no_shift(), and NN_ROUND.

arm_status arm_convolve_HWC_q7_fast ( const q7_t *  Im_in,
const uint16_t  dim_im_in,
const uint16_t  ch_im_in,
const q7_t *  wt,
const uint16_t  ch_im_out,
const uint16_t  dim_kernel,
const uint16_t  padding,
const uint16_t  stride,
const q7_t *  bias,
const uint16_t  bias_shift,
const uint16_t  out_shift,
q7_t *  Im_out,
const uint16_t  dim_im_out,
q15_t *  bufferA,
q7_t *  bufferB 
)
Parameters
[in]Im_inpointer to input tensor
[in]dim_im_ininput tensor dimention
[in]ch_im_innumber of input tensor channels
[in]wtpointer to kernel weights
[in]ch_im_outnumber of filters, i.e., output tensor channels
[in]dim_kernelfilter kernel size
[in]paddingpadding sizes
[in]strideconvolution stride
[in]biaspointer to bias
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in,out]Im_outpointer to output tensor
[in]dim_im_outoutput tensor dimension
[in,out]bufferApointer to buffer space for input
[in,out]bufferBpointer to buffer space for output
Returns
The function returns either ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking.

Buffer size:

bufferA size: 2*ch_im_in*dim_kernel*dim_kernel

bufferB size: 0

Input dimension constraints:

ch_im_in is multiple of 4 ( because of the SIMD32 read and swap )

ch_im_out is multiple of 2 ( bacause 2x2 mat_mult kernel )

The im2col converts the Q7 tensor input into Q15 column, which is stored in bufferA. There is reordering happenning during this im2col process with arm_q7_to_q15_reordered_no_shift. For every four elements, the second and third elements are swapped.

The computation kernel arm_nn_mat_mult_kernel_q7_q15_reordered does the GEMM computation with the reordered columns.

To speed-up the determination of the padding condition, we split the computation into 3x3 parts, i.e., {top, mid, bottom} X {left, mid, right}. This reduces the total number of boundary condition checks and improves the data copying performance.

References arm_nn_mat_mult_kernel_q7_q15_reordered(), arm_nn_read_q15x2_ia(), arm_q7_to_q15_reordered_no_shift(), and NN_ROUND.

arm_status arm_convolve_HWC_q7_fast_nonsquare ( const q7_t *  Im_in,
const uint16_t  dim_im_in_x,
const uint16_t  dim_im_in_y,
const uint16_t  ch_im_in,
const q7_t *  wt,
const uint16_t  ch_im_out,
const uint16_t  dim_kernel_x,
const uint16_t  dim_kernel_y,
const uint16_t  padding_x,
const uint16_t  padding_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const q7_t *  bias,
const uint16_t  bias_shift,
const uint16_t  out_shift,
q7_t *  Im_out,
const uint16_t  dim_im_out_x,
const uint16_t  dim_im_out_y,
q15_t *  bufferA,
q7_t *  bufferB 
)
Parameters
[in]Im_inpointer to input tensor
[in]dim_im_in_xinput tensor dimention x
[in]dim_im_in_yinput tensor dimention y
[in]ch_im_innumber of input tensor channels
[in]wtpointer to kernel weights
[in]ch_im_outnumber of filters, i.e., output tensor channels
[in]dim_kernel_xfilter kernel size x
[in]dim_kernel_yfilter kernel size y
[in]padding_xpadding size x
[in]padding_ypadding size y
[in]stride_xconvolution stride x
[in]stride_yconvolution stride y
[in]biaspointer to bias
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in,out]Im_outpointer to output tensor
[in]dim_im_out_xoutput tensor dimension x
[in]dim_im_out_youtput tensor dimension y
[in,out]bufferApointer to buffer space for input
[in,out]bufferBpointer to buffer space for output
Returns
The function returns either ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking.

This function is the version with full list of optimization tricks, but with some constraints: ch_im_in is multiple of 4 ch_im_out is multiple of 2

References arm_nn_mat_mult_kernel_q7_q15_reordered(), arm_nn_read_q15x2_ia(), arm_q7_to_q15_reordered_no_shift(), and NN_ROUND.

arm_status arm_convolve_HWC_q7_RGB ( const q7_t *  Im_in,
const uint16_t  dim_im_in,
const uint16_t  ch_im_in,
const q7_t *  wt,
const uint16_t  ch_im_out,
const uint16_t  dim_kernel,
const uint16_t  padding,
const uint16_t  stride,
const q7_t *  bias,
const uint16_t  bias_shift,
const uint16_t  out_shift,
q7_t *  Im_out,
const uint16_t  dim_im_out,
q15_t *  bufferA,
q7_t *  bufferB 
)

Q7 version of convolution for RGB image.

Parameters
[in]Im_inpointer to input tensor
[in]dim_im_ininput tensor dimention
[in]ch_im_innumber of input tensor channels
[in]wtpointer to kernel weights
[in]ch_im_outnumber of filters, i.e., output tensor channels
[in]dim_kernelfilter kernel size
[in]paddingpadding sizes
[in]strideconvolution stride
[in]biaspointer to bias
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in,out]Im_outpointer to output tensor
[in]dim_im_outoutput tensor dimension
[in,out]bufferApointer to buffer space for input
[in,out]bufferBpointer to buffer space for output
Returns
The function returns either ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking.

Buffer size:

bufferA size: 2*ch_im_in*dim_kernel*dim_kernel

bufferB size: 0

Input dimension constraints:

ch_im_in equals 3

This kernel is written exclusively for convolution with ch_im_in equals 3. This applies on the first layer of CNNs which has input image with RGB format.

References arm_nn_mat_mult_kernel_q7_q15(), arm_nn_read_q15x2_ia(), arm_nn_read_q7x4(), arm_nnword::half_words, NN_ROUND, and arm_nnword::word.

arm_status arm_convolve_s8 ( const cmsis_nn_context ctx,
const cmsis_nn_conv_params conv_params,
const cmsis_nn_per_channel_quant_params quant_params,
const cmsis_nn_dims input_dims,
const q7_t *  input_data,
const cmsis_nn_dims filter_dims,
const q7_t *  filter_data,
const cmsis_nn_dims bias_dims,
const int32_t *  bias_data,
const cmsis_nn_dims output_dims,
q7_t *  output_data 
)
Parameters
[in,out]ctxFunction context that contains the additional buffer if required by the function. arm_convolve_s8_get_buffer_size will return the buffer_size if required
[in]conv_paramsConvolution parameters (e.g. strides, dilations, pads,...). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127]
[in]quant_paramsPer-channel quantization info. It contains the multiplier and shift values to be applied to each output channel
[in]input_dimsInput (activation) tensor dimensions. Format: [N, H, W, C_IN]
[in]input_dataInput (activation) data pointer. Data type: int8
[in]filter_dimsFilter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions
[in]filter_dataFilter data pointer. Data type: int8
[in]bias_dimsBias tensor dimensions. Format: [C_OUT]
[in]bias_dataOptional bias data pointer. Data type: int32
[in]output_dimsOutput tensor dimensions. Format: [N, H, W, C_OUT]
[out]output_dataOutput data pointer. Data type: int8
Returns
The function returns ARM_MATH_SUCCESS
  1. Supported framework: TensorFlow Lite micro
  2. q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs.
  3. Additional memory is required for optimization. Refer to argument 'ctx' for details.

References cmsis_nn_conv_params::activation, arm_memcpy_q7(), arm_nn_mat_mul_core_4x_s8(), arm_nn_mat_mult_kernel_s8_s16(), arm_nn_mat_mult_s8(), arm_nn_read_q15x2_ia(), arm_nn_requantize(), arm_q7_to_q15_with_offset(), cmsis_nn_context::buf, cmsis_nn_dims::c, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_conv_params::input_offset, MAX, cmsis_nn_activation::max, MIN, cmsis_nn_activation::min, cmsis_nn_per_channel_quant_params::multiplier, cmsis_nn_dims::n, cmsis_nn_conv_params::output_offset, cmsis_nn_conv_params::padding, cmsis_nn_per_channel_quant_params::shift, cmsis_nn_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.

Referenced by arm_convolve_1_x_n_s8(), and arm_convolve_wrapper_s8().

int32_t arm_convolve_s8_get_buffer_size ( const cmsis_nn_dims input_dims,
const cmsis_nn_dims filter_dims 
)
Parameters
[in]input_dimsInput (activation) tensor dimensions. Format: [N, H, W, C_IN]
[in]filter_dimsFilter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions
Returns
The function returns required buffer size(bytes)

References cmsis_nn_dims::c, cmsis_nn_dims::h, and cmsis_nn_dims::w.

Referenced by arm_convolve_wrapper_s8_get_buffer_size().

arm_status arm_convolve_wrapper_s8 ( const cmsis_nn_context ctx,
const cmsis_nn_conv_params conv_params,
const cmsis_nn_per_channel_quant_params quant_params,
const cmsis_nn_dims input_dims,
const q7_t *  input_data,
const cmsis_nn_dims filter_dims,
const q7_t *  filter_data,
const cmsis_nn_dims bias_dims,
const int32_t *  bias_data,
const cmsis_nn_dims output_dims,
q7_t *  output_data 
)
Parameters
[in,out]ctxFunction context that contains the additional buffer if required by the function. arm_convolve_wrapper_s8_get_buffer_size will return the buffer_size if required
[in]conv_paramsConvolution parameters (e.g. strides, dilations, pads,...). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127]
[in]quant_paramsPer-channel quantization info. It contains the multiplier and shift values to be applied to each output channel
[in]input_dimsInput (activation) tensor dimensions. Format: [N, H, W, C_IN]
[in]input_dataInput (activation) data pointer. Data type: int8
[in]filter_dimsFilter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions
[in]filter_dataFilter data pointer. Data type: int8
[in]bias_dimsBias tensor dimensions. Format: [C_OUT]
[in]bias_dataBias data pointer. Data type: int32
[in]output_dimsOutput tensor dimensions. Format: [N, H, W, C_OUT]
[out]output_dataOutput data pointer. Data type: int8
Returns
The function returns either ARM_MATH_SIZE_MISMATCH if argument constraints fail. or, ARM_MATH_SUCCESS on successful completion.

References arm_convolve_1_x_n_s8(), arm_convolve_1x1_s8_fast(), arm_convolve_s8(), cmsis_nn_dims::c, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_dims::n, cmsis_nn_conv_params::padding, cmsis_nn_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.

int32_t arm_convolve_wrapper_s8_get_buffer_size ( const cmsis_nn_conv_params conv_params,
const cmsis_nn_dims input_dims,
const cmsis_nn_dims filter_dims,
const cmsis_nn_dims output_dims 
)
Parameters
[in]conv_paramsConvolution parameters (e.g. strides, dilations, pads,...). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127]
[in]input_dimsInput (activation) dimensions. Format: [N, H, W, C_IN]
[in]filter_dimsFilter dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions
[in]output_dimsOutput tensor dimensions. Format: [N, H, W, C_OUT]
Returns
The function returns required buffer size(bytes)

References arm_convolve_1_x_n_s8_get_buffer_size(), arm_convolve_1x1_s8_fast_get_buffer_size(), arm_convolve_s8_get_buffer_size(), cmsis_nn_dims::c, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_dims::n, cmsis_nn_conv_params::padding, cmsis_nn_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.

arm_status arm_depthwise_conv_3x3_s8 ( const cmsis_nn_context ctx,
const cmsis_nn_dw_conv_params dw_conv_params,
const cmsis_nn_per_channel_quant_params quant_params,
const cmsis_nn_dims input_dims,
const q7_t *  input_data,
const cmsis_nn_dims filter_dims,
const q7_t *  filter_data,
const cmsis_nn_dims bias_dims,
const int32_t *  bias_data,
const cmsis_nn_dims output_dims,
q7_t *  output_data 
)
Returns
The function returns one of the following ARM_MATH_SIZE_MISMATCH - Unsupported dimension of tensors ARM_MATH_ARGUMENT_ERROR - Unsupported pad size along the x axis ARM_MATH_SUCCESS - Successful operation
  • Supported framework : TensorFlow Lite Micro
  • The following constrains on the arguments apply
    1. Number of input channel equals number of output channels
    2. Filter height and width equals 3
    3. Padding along x is either 0 or 1.

References cmsis_nn_dw_conv_params::activation, arm_nn_read_q7x4(), arm_nn_requantize(), cmsis_nn_dims::c, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_dw_conv_params::input_offset, MAX, cmsis_nn_activation::max, MIN, cmsis_nn_activation::min, cmsis_nn_per_channel_quant_params::multiplier, cmsis_nn_dw_conv_params::output_offset, cmsis_nn_dw_conv_params::padding, cmsis_nn_per_channel_quant_params::shift, cmsis_nn_dw_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.

Referenced by arm_depthwise_conv_wrapper_s8().

arm_status arm_depthwise_conv_s8 ( const cmsis_nn_context ctx,
const cmsis_nn_dw_conv_params dw_conv_params,
const cmsis_nn_per_channel_quant_params quant_params,
const cmsis_nn_dims input_dims,
const q7_t *  input_data,
const cmsis_nn_dims filter_dims,
const q7_t *  filter_data,
const cmsis_nn_dims bias_dims,
const int32_t *  bias_data,
const cmsis_nn_dims output_dims,
q7_t *  output_data 
)
Parameters
[in,out]ctxFunction context (e.g. temporary buffer). Check the function definition file to see if an additional buffer is required. Optional function {API}_get_buffer_size() provides the buffer size if an additional buffer is required. exists if additional memory is.
[in]dw_conv_paramsDepthwise convolution parameters (e.g. strides, dilations, pads,...) dw_conv_params->dilation is not used. Range of dw_conv_params->input_offset : [-127, 128] Range of dw_conv_params->input_offset : [-128, 127]
[in]quant_paramsPer-channel quantization info. It contains the multiplier and shift values to be applied to each output channel
[in]input_dimsInput (activation) tensor dimensions. Format: [1, H, W, C_IN] Batch argument N is not used.
[in]input_dataInput (activation) data pointer. Data type: int8
[in]filter_dimsFilter tensor dimensions. Format: [1, H, W, C_OUT]
[in]filter_dataFilter data pointer. Data type: int8
[in]bias_dimsBias tensor dimensions. Format: [C_OUT]
[in]bias_dataBias data pointer. Data type: int32
[in]output_dimsOutput tensor dimensions. Format: [1, H, W, C_OUT]
[in,out]output_dataOutput data pointer. Data type: int8
Returns
The function returns ARM_MATH_SUCCESS
  • Supported framework: TensorFlow Lite
  • q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs.

References cmsis_nn_dw_conv_params::activation, cmsis_nn_dims::c, cmsis_nn_dw_conv_params::ch_mult, depthwise_conv_s8_generic(), depthwise_conv_s8_mult_4(), cmsis_nn_dw_conv_params::dilation, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_dw_conv_params::input_offset, cmsis_nn_activation::max, cmsis_nn_activation::min, cmsis_nn_per_channel_quant_params::multiplier, cmsis_nn_dims::n, cmsis_nn_dw_conv_params::output_offset, cmsis_nn_dw_conv_params::padding, cmsis_nn_per_channel_quant_params::shift, cmsis_nn_dw_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.

Referenced by arm_depthwise_conv_s8_opt(), and arm_depthwise_conv_wrapper_s8().

arm_status arm_depthwise_conv_s8_opt ( const cmsis_nn_context ctx,
const cmsis_nn_dw_conv_params dw_conv_params,
const cmsis_nn_per_channel_quant_params quant_params,
const cmsis_nn_dims input_dims,
const q7_t *  input_data,
const cmsis_nn_dims filter_dims,
const q7_t *  filter_data,
const cmsis_nn_dims bias_dims,
const int32_t *  bias_data,
const cmsis_nn_dims output_dims,
q7_t *  output_data 
)
Returns
The function returns one of the following ARM_MATH_SIZE_MISMATCH - input channel != output channel or ch_mult != 1 ARM_MATH_SUCCESS - Successful operation
Note
If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read out for the following if MVE optimizations(Arm Helium Technology) are used.
  • Output shift
  • Output multiplier
  • Output bias
  • kernel
  • Supported framework: TensorFlow Lite
  • The following constrains on the arguments apply
    1. Number of input channel equals number of output channels or ch_mult equals 1
  • q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs.
  • Reccomended when number of channels is 4 or greater.

References cmsis_nn_dw_conv_params::activation, arm_depthwise_conv_s8(), arm_memcpy_q7(), arm_memset_q7(), arm_nn_depthwise_conv_nt_t_padded_s8(), arm_nn_depthwise_conv_nt_t_s8(), arm_nn_read_q15x2(), arm_nn_read_q7x4(), arm_nn_requantize(), arm_q7_to_q15_with_offset(), cmsis_nn_context::buf, cmsis_nn_dims::c, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_dw_conv_params::input_offset, MAX, cmsis_nn_activation::max, MIN, cmsis_nn_activation::min, cmsis_nn_per_channel_quant_params::multiplier, cmsis_nn_dw_conv_params::output_offset, cmsis_nn_dw_conv_params::padding, cmsis_nn_per_channel_quant_params::shift, cmsis_nn_dw_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.

Referenced by arm_depthwise_conv_wrapper_s8().

int32_t arm_depthwise_conv_s8_opt_get_buffer_size ( const cmsis_nn_dims input_dims,
const cmsis_nn_dims filter_dims 
)
Parameters
[in]input_dimsInput (activation) tensor dimensions. Format: [1, H, W, C_IN] Batch argument N is not used.
[in]filter_dimsFilter tensor dimensions. Format: [1, H, W, C_OUT]
Returns
The function returns required buffer size in bytes

References cmsis_nn_dims::c, cmsis_nn_dims::h, and cmsis_nn_dims::w.

Referenced by arm_depthwise_conv_wrapper_s8_get_buffer_size().

arm_status arm_depthwise_conv_u8_basic_ver1 ( const uint8_t *  input,
const uint16_t  input_x,
const uint16_t  input_y,
const uint16_t  input_ch,
const uint8_t *  kernel,
const uint16_t  kernel_x,
const uint16_t  kernel_y,
const int16_t  ch_mult,
const int16_t  pad_x,
const int16_t  pad_y,
const int16_t  stride_x,
const int16_t  stride_y,
const int16_t  dilation_x,
const int16_t  dilation_y,
const int32_t *  bias,
const int32_t  input_offset,
const int32_t  filter_offset,
const int32_t  output_offset,
uint8_t *  output,
const uint16_t  output_x,
const uint16_t  output_y,
const int32_t  output_activation_min,
const int32_t  output_activation_max,
const int32_t  output_shift,
const int32_t  output_mult 
)

uint8 depthwise convolution function with asymmetric quantization Unless specified otherwise, arguments are mandatory.

Parameters
[in]inputPointer to input tensor
[in]input_xWidth of input tensor
[in]input_yHeight of input tensor
[in]input_chChannels in input tensor
[in]kernelPointer to kernel weights
[in]kernel_xWidth of kernel
[in]kernel_yHeight of kernel
[in]ch_multNumber of channel multiplier
[in]pad_xPadding sizes x
[in]pad_yPadding sizes y
[in]stride_xConvolution stride along the width
[in]stride_yConvolution stride along the height
[in]dilation_xDilation along width. Not used and intended for future enhancement.
[in]dilation_yDilation along height. Not used and intended for future enhancement.
[in]biasPointer to optional bias values. If no bias is available, NULL is expected
[in]input_offsetInput tensor zero offset
[in]filter_offsetKernel tensor zero offset
[in]output_offsetOutput tensor zero offset
[in,out]outputPointer to output tensor
[in]output_xWidth of output tensor
[in]output_yHeight of output tensor
[in]output_activation_minMinimum value to clamp the output to. Range : {0, 255}
[in]output_activation_maxMinimum value to clamp the output to. Range : {0, 255}
[in]output_shiftAmount of right-shift for output
[in]output_multOutput multiplier for requantization
Returns
The function returns one of the following ARM_MATH_SIZE_MISMATCH - Not supported dimension of tensors ARM_MATH_SUCCESS - Successful operation ARM_MATH_ARGUMENT_ERROR - Implementation not available

References depthwise_conv_u8_generic(), and depthwise_conv_u8_mult_4().

arm_status arm_depthwise_conv_wrapper_s8 ( const cmsis_nn_context ctx,
const cmsis_nn_dw_conv_params dw_conv_params,
const cmsis_nn_per_channel_quant_params quant_params,
const cmsis_nn_dims input_dims,
const q7_t *  input_data,
const cmsis_nn_dims filter_dims,
const q7_t *  filter_data,
const cmsis_nn_dims bias_dims,
const int32_t *  bias_data,
const cmsis_nn_dims output_dims,
q7_t *  output_data 
)
Parameters
[in,out]ctxFunction context (e.g. temporary buffer). Check the function definition file to see if an additional buffer is required. Optional function {API}_get_buffer_size() provides the buffer size if required.
[in]dw_conv_paramsDepthwise convolution parameters (e.g. strides, dilations, pads,...) dw_conv_params->dilation is not used. Range of dw_conv_params->input_offset : [-127, 128] Range of dw_conv_params->output_offset : [-128, 127]
[in]quant_paramsPer-channel quantization info. It contains the multiplier and shift values to be applied to each output channel
[in]input_dimsInput (activation) tensor dimensions. Format: [H, W, C_IN] Batch argument N is not used and assumed to be 1.
[in]input_dataInput (activation) data pointer. Data type: int8
[in]filter_dimsFilter tensor dimensions. Format: [1, H, W, C_OUT]
[in]filter_dataFilter data pointer. Data type: int8
[in]bias_dimsBias tensor dimensions. Format: [C_OUT]
[in]bias_dataBias data pointer. Data type: int32
[in]output_dimsOutput tensor dimensions. Format: [1, H, W, C_OUT]
[in,out]output_dataOutput data pointer. Data type: int8
Returns
The function returns ARM_MATH_SUCCESS - Successful completion.

References arm_depthwise_conv_3x3_s8(), arm_depthwise_conv_s8(), arm_depthwise_conv_s8_opt(), cmsis_nn_dw_conv_params::ch_mult, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_dims::n, cmsis_nn_dw_conv_params::padding, and cmsis_nn_dims::w.

int32_t arm_depthwise_conv_wrapper_s8_get_buffer_size ( const cmsis_nn_dw_conv_params dw_conv_params,
const cmsis_nn_dims input_dims,
const cmsis_nn_dims filter_dims,
const cmsis_nn_dims output_dims 
)
Parameters
[in]dw_conv_paramsDepthwise convolution parameters (e.g. strides, dilations, pads,...) dw_conv_params->dilation is not used. Range of dw_conv_params->input_offset : [-127, 128] Range of dw_conv_params->input_offset : [-128, 127]
[in]input_dimsInput (activation) tensor dimensions. Format: [H, W, C_IN] Batch argument N is not used and assumed to be 1.
[in]filter_dimsFilter tensor dimensions. Format: [1, H, W, C_OUT]
[in]output_dimsOutput tensor dimensions. Format: [1, H, W, C_OUT]
Returns
Size of additional memory required for optimizations in bytes.

References arm_depthwise_conv_s8_opt_get_buffer_size(), cmsis_nn_dims::c, and cmsis_nn_dims::n.

arm_status arm_depthwise_separable_conv_HWC_q7 ( const q7_t *  Im_in,
const uint16_t  dim_im_in,
const uint16_t  ch_im_in,
const q7_t *  wt,
const uint16_t  ch_im_out,
const uint16_t  dim_kernel,
const uint16_t  padding,
const uint16_t  stride,
const q7_t *  bias,
const uint16_t  bias_shift,
const uint16_t  out_shift,
q7_t *  Im_out,
const uint16_t  dim_im_out,
q15_t *  bufferA,
q7_t *  bufferB 
)
Parameters
[in]Im_inpointer to input tensor
[in]dim_im_ininput tensor dimension
[in]ch_im_innumber of input tensor channels
[in]wtpointer to kernel weights
[in]ch_im_outnumber of filters, i.e., output tensor channels
[in]dim_kernelfilter kernel size
[in]paddingpadding sizes
[in]strideconvolution stride
[in]biaspointer to bias
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in,out]Im_outpointer to output tensor
[in]dim_im_outoutput tensor dimension
[in,out]bufferApointer to buffer space for input
[in,out]bufferBpointer to buffer space for output
Returns
The function returns either ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking.

Buffer size:

bufferA size: 2*ch_im_in*dim_kernel*dim_kernel

bufferB size: 0

Input dimension constraints:

ch_im_in equals ch_im_out

Implementation: There are 3 nested loop here: Inner loop: calculate each output value with MAC instruction over an accumulator Mid loop: loop over different output channel Outer loop: loop over different output (x, y)

References arm_nn_read_q7x4(), arm_nnword::bytes, NN_ROUND, and arm_nnword::word.

arm_status arm_depthwise_separable_conv_HWC_q7_nonsquare ( const q7_t *  Im_in,
const uint16_t  dim_im_in_x,
const uint16_t  dim_im_in_y,
const uint16_t  ch_im_in,
const q7_t *  wt,
const uint16_t  ch_im_out,
const uint16_t  dim_kernel_x,
const uint16_t  dim_kernel_y,
const uint16_t  padding_x,
const uint16_t  padding_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const q7_t *  bias,
const uint16_t  bias_shift,
const uint16_t  out_shift,
q7_t *  Im_out,
const uint16_t  dim_im_out_x,
const uint16_t  dim_im_out_y,
q15_t *  bufferA,
q7_t *  bufferB 
)
Parameters
[in]Im_inpointer to input tensor
[in]dim_im_in_xinput tensor dimension x
[in]dim_im_in_yinput tensor dimension y
[in]ch_im_innumber of input tensor channels
[in]wtpointer to kernel weights
[in]ch_im_outnumber of filters, i.e., output tensor channels
[in]dim_kernel_xfilter kernel size x
[in]dim_kernel_yfilter kernel size y
[in]padding_xpadding sizes x
[in]padding_ypadding sizes y
[in]stride_xconvolution stride x
[in]stride_yconvolution stride y
[in]biaspointer to bias
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in,out]Im_outpointer to output tensor
[in]dim_im_out_xoutput tensor dimension x
[in]dim_im_out_youtput tensor dimension y
[in,out]bufferApointer to buffer space for input
[in,out]bufferBpointer to buffer space for output
Returns
The function returns either ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking.

This function is the version with full list of optimization tricks, but with some constraints: ch_im_in is equal to ch_im_out

References arm_nn_read_q7x4(), arm_nnword::bytes, NN_ROUND, and arm_nnword::word.

static void depthwise_conv_s8_generic ( const q7_t *  input,
const uint16_t  input_batches,
const uint16_t  input_x,
const uint16_t  input_y,
const uint16_t  input_ch,
const q7_t *  kernel,
const uint16_t  output_ch,
const uint16_t  ch_mult,
const uint16_t  kernel_x,
const uint16_t  kernel_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const int32_t *  bias,
q7_t *  output,
const int32_t *  output_shift,
const int32_t *  output_mult,
const uint16_t  output_x,
const uint16_t  output_y,
const int32_t  output_offset,
const int32_t  input_offset,
const int32_t  output_activation_min,
const int32_t  output_activation_max 
)
static

References arm_nn_requantize(), MAX, and MIN.

Referenced by arm_depthwise_conv_s8().

static void depthwise_conv_s8_mult_4 ( const int8_t *  input,
const int32_t  input_x,
const int32_t  input_y,
const int32_t  input_ch,
const int8_t *  kernel,
const int32_t  output_ch,
const int32_t  ch_mult,
const int32_t  kernel_x,
const int32_t  kernel_y,
const int32_t  pad_x,
const int32_t  pad_y,
const int32_t  stride_x,
const int32_t  stride_y,
const int32_t *  bias,
int8_t *  output,
const int32_t *  output_shift,
const int32_t *  output_mult,
const int32_t  output_x,
const int32_t  output_y,
const int32_t  output_offset,
const int32_t  input_offset,
const int32_t  output_activation_min,
const int32_t  output_activation_max 
)
static

References arm_nn_requantize(), MAX, and MIN.

Referenced by arm_depthwise_conv_s8().

static void depthwise_conv_u8_generic ( const uint8_t *  input,
const int32_t  input_x,
const int32_t  input_y,
const int32_t  input_ch,
const uint8_t *  kernel,
const int32_t  output_ch,
const int32_t  ch_mult,
const int32_t  kernel_x,
const int32_t  kernel_y,
const int32_t  pad_x,
const int32_t  pad_y,
const int32_t  stride_x,
const int32_t  stride_y,
const int32_t *  bias,
uint8_t *  output,
const int32_t  output_shift,
const int32_t  output_mult,
const int32_t  output_x,
const int32_t  output_y,
const int32_t  output_offset,
const int32_t  input_offset,
const int32_t  filter_offset,
const int32_t  output_activation_min,
const int32_t  output_activation_max 
)
static
static void depthwise_conv_u8_mult_4 ( const uint8_t *  input,
const int32_t  input_x,
const int32_t  input_y,
const int32_t  input_ch,
const uint8_t *  kernel,
const int32_t  output_ch,
const int32_t  ch_mult,
const int32_t  kernel_x,
const int32_t  kernel_y,
const int32_t  pad_x,
const int32_t  pad_y,
const int32_t  stride_x,
const int32_t  stride_y,
const int32_t *  bias,
uint8_t *  output,
const int32_t  output_shift,
const int32_t  output_mult,
const int32_t  output_x,
const int32_t  output_y,
const int32_t  output_offset,
const int32_t  input_offset,
const int32_t  filter_offset,
const int32_t  output_activation_min,
const int32_t  output_activation_max 
)
static