CMSIS-NN  Version 4.0.0
CMSIS NN Software Library
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
Convolution Functions

Macros

#define USE_FAST_DW_CONV_FUNCTION(dw_conv_params, filter_dims, input_dims)
 

Functions

arm_cmsis_nn_status arm_convolve_1_x_n_s8 (const cmsis_nn_context *ctx, const cmsis_nn_conv_params *conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q7_t *input_data, const cmsis_nn_dims *filter_dims, const q7_t *filter_data, const cmsis_nn_dims *bias_dims, const int32_t *bias_data, const cmsis_nn_dims *output_dims, q7_t *output_data)
 1xn convolution More...
 
int32_t arm_convolve_1_x_n_s8_get_buffer_size (const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims)
 Get the required additional buffer size for 1xn convolution. More...
 
arm_cmsis_nn_status arm_convolve_1x1_HWC_q7_fast_nonsquare (const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB)
 Fast Q7 version of 1x1 convolution (non-sqaure shape) More...
 
arm_cmsis_nn_status arm_convolve_1x1_s8_fast (const cmsis_nn_context *ctx, const cmsis_nn_conv_params *conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q7_t *input_data, const cmsis_nn_dims *filter_dims, const q7_t *filter_data, const cmsis_nn_dims *bias_dims, const int32_t *bias_data, const cmsis_nn_dims *output_dims, q7_t *output_data)
 Fast s8 version for 1x1 convolution (non-square shape) More...
 
int32_t arm_convolve_1x1_s8_fast_get_buffer_size (const cmsis_nn_dims *input_dims)
 Get the required buffer size for arm_convolve_1x1_s8_fast. More...
 
arm_cmsis_nn_status arm_convolve_fast_s16 (const cmsis_nn_context *ctx, const cmsis_nn_conv_params *conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q15_t *input_data, const cmsis_nn_dims *filter_dims, const q7_t *filter_data, const cmsis_nn_dims *bias_dims, const int64_t *bias_data, const cmsis_nn_dims *output_dims, q15_t *output_data)
 Optimized s16 convolution function. More...
 
int32_t arm_convolve_fast_s16_get_buffer_size (const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims)
 Get the required buffer size for fast s16 convolution function. More...
 
arm_cmsis_nn_status arm_convolve_HWC_q15_basic (const q15_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q15_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q15_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q15_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)
 Basic Q15 convolution function. More...
 
arm_cmsis_nn_status arm_convolve_HWC_q15_fast (const q15_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q15_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q15_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q15_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)
 Fast Q15 convolution function. More...
 
arm_cmsis_nn_status arm_convolve_HWC_q15_fast_nonsquare (const q15_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q15_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q15_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q15_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB)
 Fast Q15 convolution function (non-sqaure shape) More...
 
arm_cmsis_nn_status arm_convolve_HWC_q7_basic (const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)
 Basic Q7 convolution function. More...
 
arm_cmsis_nn_status arm_convolve_HWC_q7_basic_nonsquare (const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB)
 Basic Q7 convolution function (non-square shape) More...
 
arm_cmsis_nn_status arm_convolve_HWC_q7_fast (const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)
 Fast Q7 convolution function. More...
 
arm_cmsis_nn_status arm_convolve_HWC_q7_fast_nonsquare (const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB)
 Fast Q7 convolution function (non-sqaure shape) More...
 
arm_cmsis_nn_status arm_convolve_HWC_q7_RGB (const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)
 Q7 version of convolution for RGB image. More...
 
arm_cmsis_nn_status arm_convolve_s16 (const cmsis_nn_context *ctx, const cmsis_nn_conv_params *conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q15_t *input_data, const cmsis_nn_dims *filter_dims, const q7_t *filter_data, const cmsis_nn_dims *bias_dims, const int64_t *bias_data, const cmsis_nn_dims *output_dims, q15_t *output_data)
 Basic s16 convolution function. More...
 
int32_t arm_convolve_s16_get_buffer_size (const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims)
 Get the required buffer size for s16 convolution function. More...
 
arm_cmsis_nn_status arm_convolve_s8 (const cmsis_nn_context *ctx, const cmsis_nn_conv_params *conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q7_t *input_data, const cmsis_nn_dims *filter_dims, const q7_t *filter_data, const cmsis_nn_dims *bias_dims, const int32_t *bias_data, const cmsis_nn_dims *output_dims, q7_t *output_data)
 Basic s8 convolution function. More...
 
int32_t arm_convolve_s8_get_buffer_size (const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims)
 Get the required buffer size for s8 convolution function. More...
 
arm_cmsis_nn_status arm_convolve_wrapper_s16 (const cmsis_nn_context *ctx, const cmsis_nn_conv_params *conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q15_t *input_data, const cmsis_nn_dims *filter_dims, const q7_t *filter_data, const cmsis_nn_dims *bias_dims, const int64_t *bias_data, const cmsis_nn_dims *output_dims, q15_t *output_data)
 s16 convolution layer wrapper function with the main purpose to call the optimal kernel available in cmsis-nn to perform the convolution. More...
 
int32_t arm_convolve_wrapper_s16_get_buffer_size (const cmsis_nn_conv_params *conv_params, const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims, const cmsis_nn_dims *output_dims)
 Get the required buffer size for arm_convolve_wrapper_s16. More...
 
arm_cmsis_nn_status arm_convolve_wrapper_s8 (const cmsis_nn_context *ctx, const cmsis_nn_conv_params *conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q7_t *input_data, const cmsis_nn_dims *filter_dims, const q7_t *filter_data, const cmsis_nn_dims *bias_dims, const int32_t *bias_data, const cmsis_nn_dims *output_dims, q7_t *output_data)
 s8 convolution layer wrapper function with the main purpose to call the optimal kernel available in cmsis-nn to perform the convolution. More...
 
int32_t arm_convolve_wrapper_s8_get_buffer_size (const cmsis_nn_conv_params *conv_params, const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims, const cmsis_nn_dims *output_dims)
 Get the required buffer size for arm_convolve_wrapper_s8. More...
 
arm_cmsis_nn_status arm_depthwise_conv_3x3_s8 (const cmsis_nn_context *ctx, const cmsis_nn_dw_conv_params *dw_conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q7_t *input, const cmsis_nn_dims *filter_dims, const q7_t *kernel, const cmsis_nn_dims *bias_dims, const int32_t *bias, const cmsis_nn_dims *output_dims, q7_t *output)
 Optimized s8 depthwise convolution function for 3x3 kernel size with some constraints on the input arguments(documented below). Refer arm_depthwise_conv_s8() for function argument details. More...
 
arm_cmsis_nn_status arm_depthwise_conv_fast_s16 (const cmsis_nn_context *ctx, const cmsis_nn_dw_conv_params *dw_conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q15_t *input, const cmsis_nn_dims *filter_dims, const q7_t *kernel, const cmsis_nn_dims *bias_dims, const int64_t *bias, const cmsis_nn_dims *output_dims, q15_t *output)
 Optimized s16 depthwise convolution function with constraint that in_channel equals out_channel. Refer arm_depthwise_conv_s16() for function argument details. More...
 
int32_t arm_depthwise_conv_fast_s16_get_buffer_size (const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims)
 Get the required buffer size for optimized s16 depthwise convolution function with constraint that in_channel equals out_channel. More...
 
static void __attribute__ ((unused))
 
static void depthwise_conv_s16_generic_s16 (const int16_t *input, const uint16_t input_batches, const uint16_t input_x, const uint16_t input_y, const uint16_t input_ch, const int8_t *kernel, const uint16_t ch_mult, const uint16_t kernel_x, const uint16_t kernel_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const int64_t *bias, int16_t *output, const int32_t *output_shift, const int32_t *output_mult, const uint16_t output_x, const uint16_t output_y, const int32_t output_activation_min, const int32_t output_activation_max, const uint16_t dilation_x, const uint16_t dilation_y)
 
arm_cmsis_nn_status arm_depthwise_conv_s16 (const cmsis_nn_context *ctx, const cmsis_nn_dw_conv_params *dw_conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q15_t *input, const cmsis_nn_dims *filter_dims, const q7_t *kernel, const cmsis_nn_dims *bias_dims, const int64_t *bias, const cmsis_nn_dims *output_dims, q15_t *output)
 Basic s16 depthwise convolution function that doesn't have any constraints on the input dimensions. More...
 
 __attribute__ ((optimize("no-unroll-loops")))
 
static void depthwise_conv_s8_generic (const q7_t *input, const uint16_t input_batches, const uint16_t input_x, const uint16_t input_y, const uint16_t input_ch, const q7_t *kernel, const uint16_t output_ch, const uint16_t ch_mult, const uint16_t kernel_x, const uint16_t kernel_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const int32_t *bias, q7_t *output, const int32_t *output_shift, const int32_t *output_mult, const uint16_t output_x, const uint16_t output_y, const int32_t output_offset, const int32_t input_offset, const int32_t output_activation_min, const int32_t output_activation_max, const uint16_t dilation_x, const uint16_t dilation_y)
 
arm_cmsis_nn_status arm_depthwise_conv_s8 (const cmsis_nn_context *ctx, const cmsis_nn_dw_conv_params *dw_conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q7_t *input, const cmsis_nn_dims *filter_dims, const q7_t *kernel, const cmsis_nn_dims *bias_dims, const int32_t *bias, const cmsis_nn_dims *output_dims, q7_t *output)
 Basic s8 depthwise convolution function that doesn't have any constraints on the input dimensions. More...
 
arm_cmsis_nn_status arm_depthwise_conv_s8_opt (const cmsis_nn_context *ctx, const cmsis_nn_dw_conv_params *dw_conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q7_t *input, const cmsis_nn_dims *filter_dims, const q7_t *kernel, const cmsis_nn_dims *bias_dims, const int32_t *bias, const cmsis_nn_dims *output_dims, q7_t *output)
 Optimized s8 depthwise convolution function with constraint that in_channel equals out_channel. Refer arm_depthwise_conv_s8() for function argument details. More...
 
int32_t arm_depthwise_conv_s8_opt_get_buffer_size (const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims)
 Get the required buffer size for optimized s8 depthwise convolution function with constraint that in_channel equals out_channel. More...
 
static void depthwise_conv_u8_mult_4 (const uint8_t *input, const int32_t input_x, const int32_t input_y, const int32_t input_ch, const uint8_t *kernel, const int32_t output_ch, const int32_t ch_mult, const int32_t kernel_x, const int32_t kernel_y, const int32_t pad_x, const int32_t pad_y, const int32_t stride_x, const int32_t stride_y, const int32_t *bias, uint8_t *output, const int32_t output_shift, const int32_t output_mult, const int32_t output_x, const int32_t output_y, const int32_t output_offset, const int32_t input_offset, const int32_t filter_offset, const int32_t output_activation_min, const int32_t output_activation_max)
 
static void depthwise_conv_u8_generic (const uint8_t *input, const int32_t input_x, const int32_t input_y, const int32_t input_ch, const uint8_t *kernel, const int32_t output_ch, const int32_t ch_mult, const int32_t kernel_x, const int32_t kernel_y, const int32_t pad_x, const int32_t pad_y, const int32_t stride_x, const int32_t stride_y, const int32_t *bias, uint8_t *output, const int32_t output_shift, const int32_t output_mult, const int32_t output_x, const int32_t output_y, const int32_t output_offset, const int32_t input_offset, const int32_t filter_offset, const int32_t output_activation_min, const int32_t output_activation_max)
 
arm_cmsis_nn_status arm_depthwise_conv_u8_basic_ver1 (const uint8_t *input, const uint16_t input_x, const uint16_t input_y, const uint16_t input_ch, const uint8_t *kernel, const uint16_t kernel_x, const uint16_t kernel_y, const int16_t ch_mult, const int16_t pad_x, const int16_t pad_y, const int16_t stride_x, const int16_t stride_y, const int16_t dilation_x, const int16_t dilation_y, const int32_t *bias, const int32_t input_offset, const int32_t filter_offset, const int32_t output_offset, uint8_t *output, const uint16_t output_x, const uint16_t output_y, const int32_t output_activation_min, const int32_t output_activation_max, const int32_t output_shift, const int32_t output_mult)
 uint8 depthwise convolution function with asymmetric quantization More...
 
arm_cmsis_nn_status arm_depthwise_conv_wrapper_s16 (const cmsis_nn_context *ctx, const cmsis_nn_dw_conv_params *dw_conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q15_t *input, const cmsis_nn_dims *filter_dims, const q7_t *filter, const cmsis_nn_dims *bias_dims, const int64_t *bias, const cmsis_nn_dims *output_dims, q15_t *output)
 Wrapper function to pick the right optimized s16 depthwise convolution function. More...
 
int32_t arm_depthwise_conv_wrapper_s16_get_buffer_size (const cmsis_nn_dw_conv_params *dw_conv_params, const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims, const cmsis_nn_dims *output_dims)
 Get size of additional buffer required by arm_depthwise_conv_wrapper_s16() More...
 
arm_cmsis_nn_status arm_depthwise_conv_wrapper_s8 (const cmsis_nn_context *ctx, const cmsis_nn_dw_conv_params *dw_conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q7_t *input, const cmsis_nn_dims *filter_dims, const q7_t *filter, const cmsis_nn_dims *bias_dims, const int32_t *bias, const cmsis_nn_dims *output_dims, q7_t *output)
 Wrapper function to pick the right optimized s8 depthwise convolution function. More...
 
int32_t arm_depthwise_conv_wrapper_s8_get_buffer_size (const cmsis_nn_dw_conv_params *dw_conv_params, const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims, const cmsis_nn_dims *output_dims)
 Get size of additional buffer required by arm_depthwise_conv_wrapper_s8() More...
 
arm_cmsis_nn_status arm_depthwise_separable_conv_HWC_q7 (const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)
 Q7 depthwise separable convolution function. More...
 
arm_cmsis_nn_status arm_depthwise_separable_conv_HWC_q7_nonsquare (const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB)
 Q7 depthwise separable convolution function (non-square shape) More...
 

Description

Collection of convolution, depthwise convolution functions and their variants.

The convolution is implemented in 2 steps: im2col and GEMM

im2col is a process of converting each patch of image data into a column. After im2col, the convolution is computed as matrix-matrix multiplication.

To reduce the memory footprint, the im2col is performed partially. Each iteration, only a few column (i.e., patches) are generated and computed with GEMM kernels similar to CMSIS-DSP arm_mat_mult functions.

Macro Definition Documentation

#define USE_FAST_DW_CONV_FUNCTION (   dw_conv_params,
  filter_dims,
  input_dims 
)

Function Documentation

static void __attribute__ ( (unused)  )
static
__attribute__ ( (optimize("no-unroll-loops"))  )

References arm_nn_requantize(), MAX, and MIN.

arm_cmsis_nn_status arm_convolve_1_x_n_s8 ( const cmsis_nn_context ctx,
const cmsis_nn_conv_params conv_params,
const cmsis_nn_per_channel_quant_params quant_params,
const cmsis_nn_dims input_dims,
const q7_t input_data,
const cmsis_nn_dims filter_dims,
const q7_t filter_data,
const cmsis_nn_dims bias_dims,
const int32_t *  bias_data,
const cmsis_nn_dims output_dims,
q7_t output_data 
)
Parameters
[in,out]ctxFunction context that contains the additional buffer if required by the function. arm_convolve_1_x_n_s8_get_buffer_size will return the buffer_size if required The caller is expected to clear the buffer ,if applicable, for security reasons.
[in]conv_paramsConvolution parameters (e.g. strides, dilations, pads,...). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127]
[in]quant_paramsPer-channel quantization info. It contains the multiplier and shift values to be applied to each output channel
[in]input_dimsInput (activation) tensor dimensions. Format: [N, H, W, C_IN]
[in]input_dataInput (activation) data pointer. Data type: int8
[in]filter_dimsFilter tensor dimensions. Format: [C_OUT, 1, WK, C_IN] where WK is the horizontal spatial filter dimension
[in]filter_dataFilter data pointer. Data type: int8
[in]bias_dimsBias tensor dimensions. Format: [C_OUT]
[in]bias_dataOptional bias data pointer. Data type: int32
[in]output_dimsOutput tensor dimensions. Format: [N, H, W, C_OUT]
[out]output_dataOutput data pointer. Data type: int8
Returns
The function returns either ARM_CMSIS_NN_ARG_ERROR if argument constraints fail. or, ARM_CMSIS_NN_SUCCESS on successful completion.
  • Supported framework : TensorFlow Lite Micro
  • The following constrains on the arguments apply
    1. input_dims->n equals 1
    2. ouput_dims->w is a multiple of 4
    3. Explicit constraints(since it is for 1xN convolution) -## input_dims->h equals 1 -## output_dims->h equals 1 -## filter_dims->h equals 1
      Todo:
      Remove constraint on output_dims->w to make the function generic.

References ARM_CMSIS_NN_ARG_ERROR, ARM_CMSIS_NN_SUCCESS, arm_convolve_s8(), arm_nn_mat_mul_core_1x_s8(), arm_nn_mat_mul_core_4x_s8(), cmsis_nn_dims::c, cmsis_nn_conv_params::dilation, cmsis_nn_dims::h, MAX, MIN, cmsis_nn_dims::n, cmsis_nn_conv_params::padding, cmsis_nn_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.

Referenced by arm_convolve_wrapper_s8().

int32_t arm_convolve_1_x_n_s8_get_buffer_size ( const cmsis_nn_dims input_dims,
const cmsis_nn_dims filter_dims 
)
Parameters
[in]input_dimsInput (activation) tensor dimensions. Format: [N, H, W, C_IN]
[in]filter_dimsFilter tensor dimensions. Format: [C_OUT, 1, WK, C_IN] where WK is the horizontal spatial filter dimension
Returns
The function returns required buffer size(bytes)

References arm_convolve_s8_get_buffer_size().

Referenced by arm_convolve_wrapper_s8_get_buffer_size().

arm_cmsis_nn_status arm_convolve_1x1_HWC_q7_fast_nonsquare ( const q7_t Im_in,
const uint16_t  dim_im_in_x,
const uint16_t  dim_im_in_y,
const uint16_t  ch_im_in,
const q7_t wt,
const uint16_t  ch_im_out,
const uint16_t  dim_kernel_x,
const uint16_t  dim_kernel_y,
const uint16_t  padding_x,
const uint16_t  padding_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const q7_t bias,
const uint16_t  bias_shift,
const uint16_t  out_shift,
q7_t Im_out,
const uint16_t  dim_im_out_x,
const uint16_t  dim_im_out_y,
q15_t bufferA,
q7_t bufferB 
)
Parameters
[in]Im_inpointer to input tensor
[in]dim_im_in_xinput tensor dimension x
[in]dim_im_in_yinput tensor dimension y
[in]ch_im_innumber of input tensor channels
[in]wtpointer to kernel weights
[in]ch_im_outnumber of filters, i.e., output tensor channels
[in]dim_kernel_xfilter kernel size x
[in]dim_kernel_yfilter kernel size y
[in]padding_xpadding size x
[in]padding_ypadding size y
[in]stride_xconvolution stride x
[in]stride_yconvolution stride y
[in]biaspointer to bias
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in,out]Im_outpointer to output tensor
[in]dim_im_out_xoutput tensor dimension x
[in]dim_im_out_youtput tensor dimension y
[in,out]bufferApointer to buffer space for input
[in,out]bufferBpointer to buffer space for output
Returns
The function returns either ARM_CMSIS_NN_ARG_ERROR if argument constraints fail. or, ARM_CMSIS_NN_SUCCESS on successful completion.

This function implement convolution with 1x1 kernel size (i.e., dim_kernel_x=1 and dim_kernel_y=1). It can be used for second half of MobileNets after depthwise separable convolution.

This function is the version with full list of optimization tricks, but with some contraints: ch_im_in is multiple of 4 ch_im_out is multiple of 2

References ARM_CMSIS_NN_ARG_ERROR, ARM_CMSIS_NN_SUCCESS, arm_nn_mat_mult_kernel_q7_q15_reordered(), arm_nn_read_q15x2_ia(), arm_q7_to_q15_reordered_no_shift(), and NN_ROUND.

arm_cmsis_nn_status arm_convolve_1x1_s8_fast ( const cmsis_nn_context ctx,
const cmsis_nn_conv_params conv_params,
const cmsis_nn_per_channel_quant_params quant_params,
const cmsis_nn_dims input_dims,
const q7_t input_data,
const cmsis_nn_dims filter_dims,
const q7_t filter_data,
const cmsis_nn_dims bias_dims,
const int32_t *  bias_data,
const cmsis_nn_dims output_dims,
q7_t output_data 
)
Parameters
[in,out]ctxFunction context that contains the additional buffer if required by the function. arm_convolve_1x1_s8_fast_get_buffer_size will return the buffer_size if required. The caller is expected to clear the buffer ,if applicable, for security reasons.
[in]conv_paramsConvolution parameters (e.g. strides, dilations, pads,...). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127]
[in]quant_paramsPer-channel quantization info. It contains the multiplier and shift values to be applied to each output channel
[in]input_dimsInput (activation) tensor dimensions. Format: [N, H, W, C_IN]
[in]input_dataInput (activation) data pointer. Data type: int8
[in]filter_dimsFilter tensor dimensions. Format: [C_OUT, 1, 1, C_IN]
[in]filter_dataFilter data pointer. Data type: int8
[in]bias_dimsBias tensor dimensions. Format: [C_OUT]
[in]bias_dataOptional bias data pointer. Data type: int32
[in]output_dimsOutput tensor dimensions. Format: [N, H, W, C_OUT]
[out]output_dataOutput data pointer. Data type: int8
Returns
The function returns either ARM_CMSIS_NN_ARG_ERROR if argument constraints fail. or, ARM_CMSIS_NN_SUCCESS on successful completion.
  • Supported framework : TensorFlow Lite Micro
  • The following constrains on the arguments apply
    1. input_dims->c is a multiple of 4
    2. conv_params->padding.w = conv_params->padding.h = 0
    3. conv_params->stride.w = conv_params->stride.h = 1

References cmsis_nn_conv_params::activation, ARM_CMSIS_NN_ARG_ERROR, ARM_CMSIS_NN_SUCCESS, arm_nn_mat_mul_core_1x_s8(), arm_nn_mat_mul_core_4x_s8(), arm_nn_mat_mult_nt_t_s8(), cmsis_nn_dims::c, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_conv_params::input_offset, cmsis_nn_activation::max, cmsis_nn_activation::min, cmsis_nn_per_channel_quant_params::multiplier, cmsis_nn_dims::n, cmsis_nn_conv_params::output_offset, cmsis_nn_conv_params::padding, cmsis_nn_per_channel_quant_params::shift, cmsis_nn_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.

Referenced by arm_convolve_wrapper_s8().

int32_t arm_convolve_1x1_s8_fast_get_buffer_size ( const cmsis_nn_dims input_dims)
Parameters
[in]input_dimsInput (activation) dimensions
Returns
The function returns the required buffer size in bytes

Referenced by arm_convolve_wrapper_s8_get_buffer_size().

arm_cmsis_nn_status arm_convolve_fast_s16 ( const cmsis_nn_context ctx,
const cmsis_nn_conv_params conv_params,
const cmsis_nn_per_channel_quant_params quant_params,
const cmsis_nn_dims input_dims,
const q15_t input_data,
const cmsis_nn_dims filter_dims,
const q7_t filter_data,
const cmsis_nn_dims bias_dims,
const int64_t *  bias_data,
const cmsis_nn_dims output_dims,
q15_t output_data 
)
Parameters
[in,out]ctxFunction context that contains the additional buffer if required by the function. arm_convolve_fast_s16_get_buffer_size will return the buffer_size if required. The caller is expected to clear the buffer ,if applicable, for security reasons.
[in]conv_paramsConvolution parameters (e.g. strides, dilations, pads,...). conv_params->input_offset : Not used conv_params->output_offset : Not used
[in]quant_paramsPer-channel quantization info. It contains the multiplier and shift values to be applied to each output channel
[in]input_dimsInput (activation) tensor dimensions. Format: [N, H, W, C_IN]
[in]input_dataInput (activation) data pointer. Data type: int16
[in]filter_dimsFilter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions. (filter_dims->w * filter_dims->h * input_dims->c) must not exceed 512
[in]filter_dataFilter data pointer. Data type: int8
[in]bias_dimsBias tensor dimensions. Format: [C_OUT]
[in]bias_dataOptional bias data pointer. Data type: int64
[in]output_dimsOutput tensor dimensions. Format: [N, H, W, C_OUT]
[out]output_dataOutput data pointer. Data type: int16
Returns
The function returns ARM_CMSIS_NN_SUCCESS
 1. Supported framework: TensorFlow Lite micro
 2. q7/q15 is used as data type eventhough it is s8/s16 data. It is done so to be consistent with existing APIs.
 3. Additional memory is required for optimization. Refer to argument 'ctx' for details.
 4. Implementation supports kernel volumes (filter width * filter height * input channels) < 512.

References cmsis_nn_conv_params::activation, ARM_CMSIS_NN_ARG_ERROR, ARM_CMSIS_NN_SUCCESS, arm_convolve_s8_get_buffer_size(), arm_memcpy_q7(), arm_memset_q7(), arm_nn_mat_mult_kernel_s16(), arm_nn_read_q15x2_ia(), arm_nn_requantize(), arm_nn_requantize_s64(), cmsis_nn_context::buf, cmsis_nn_dims::c, cmsis_nn_tile::h, cmsis_nn_dims::h, MAX, cmsis_nn_activation::max, MIN, cmsis_nn_activation::min, cmsis_nn_per_channel_quant_params::multiplier, cmsis_nn_dims::n, cmsis_nn_conv_params::padding, REDUCE_MULTIPLIER, cmsis_nn_per_channel_quant_params::shift, cmsis_nn_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.

Referenced by arm_convolve_wrapper_s16().

int32_t arm_convolve_fast_s16_get_buffer_size ( const cmsis_nn_dims input_dims,
const cmsis_nn_dims filter_dims 
)
Parameters
[in]input_dimsInput (activation) tensor dimensions. Format: [N, H, W, C_IN]
[in]filter_dimsFilter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions
Returns
The function returns required buffer size(bytes)

References cmsis_nn_dims::c, cmsis_nn_dims::h, and cmsis_nn_dims::w.

Referenced by arm_convolve_wrapper_s16_get_buffer_size().

arm_cmsis_nn_status arm_convolve_HWC_q15_basic ( const q15_t Im_in,
const uint16_t  dim_im_in,
const uint16_t  ch_im_in,
const q15_t wt,
const uint16_t  ch_im_out,
const uint16_t  dim_kernel,
const uint16_t  padding,
const uint16_t  stride,
const q15_t bias,
const uint16_t  bias_shift,
const uint16_t  out_shift,
q15_t Im_out,
const uint16_t  dim_im_out,
q15_t bufferA,
q7_t bufferB 
)
Parameters
[in]Im_inpointer to input tensor
[in]dim_im_ininput tensor dimension
[in]ch_im_innumber of input tensor channels
[in]wtpointer to kernel weights
[in]ch_im_outnumber of filters, i.e., output tensor channels
[in]dim_kernelfilter kernel size
[in]paddingpadding sizes
[in]strideconvolution stride
[in]biaspointer to bias
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in,out]Im_outpointer to output tensor
[in]dim_im_outoutput tensor dimension
[in,out]bufferApointer to buffer space for input
[in,out]bufferBpointer to buffer space for output
Returns
The function returns ARM_CMSIS_NN_SUCCESS

References ARM_CMSIS_NN_SUCCESS, arm_nn_read_q15x2_ia(), and NN_ROUND.

arm_cmsis_nn_status arm_convolve_HWC_q15_fast ( const q15_t Im_in,
const uint16_t  dim_im_in,
const uint16_t  ch_im_in,
const q15_t wt,
const uint16_t  ch_im_out,
const uint16_t  dim_kernel,
const uint16_t  padding,
const uint16_t  stride,
const q15_t bias,
const uint16_t  bias_shift,
const uint16_t  out_shift,
q15_t Im_out,
const uint16_t  dim_im_out,
q15_t bufferA,
q7_t bufferB 
)
Parameters
[in]Im_inpointer to input tensor
[in]dim_im_ininput tensor dimension
[in]ch_im_innumber of input tensor channels
[in]wtpointer to kernel weights
[in]ch_im_outnumber of filters, i.e., output tensor channels
[in]dim_kernelfilter kernel size
[in]paddingpadding sizes
[in]strideconvolution stride
[in]biaspointer to bias
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in,out]Im_outpointer to output tensor
[in]dim_im_outoutput tensor dimension
[in,out]bufferApointer to buffer space for input
[in,out]bufferBpointer to buffer space for output
Returns
The function returns either ARM_CMSIS_NN_ARG_ERROR or ARM_CMSIS_NN_SUCCESS based on the outcome of input arguments constraints checking.

This function is the version with full list of optimization tricks, but with some contraints: ch_im_in is multiple of 2 ch_im_out is multiple of 2 dim_im_out is a multiple of 2

References ARM_CMSIS_NN_ARG_ERROR, ARM_CMSIS_NN_SUCCESS, arm_nn_read_q15x2_ia(), and NN_ROUND.

arm_cmsis_nn_status arm_convolve_HWC_q15_fast_nonsquare ( const q15_t Im_in,
const uint16_t  dim_im_in_x,
const uint16_t  dim_im_in_y,
const uint16_t  ch_im_in,
const q15_t wt,
const uint16_t  ch_im_out,
const uint16_t  dim_kernel_x,
const uint16_t  dim_kernel_y,
const uint16_t  padding_x,
const uint16_t  padding_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const q15_t bias,
const uint16_t  bias_shift,
const uint16_t  out_shift,
q15_t Im_out,
const uint16_t  dim_im_out_x,
const uint16_t  dim_im_out_y,
q15_t bufferA,
q7_t bufferB 
)
Parameters
[in]Im_inpointer to input tensor
[in]dim_im_in_xinput tensor dimension x
[in]dim_im_in_yinput tensor dimension y
[in]ch_im_innumber of input tensor channels
[in]wtpointer to kernel weights
[in]ch_im_outnumber of filters, i.e., output tensor channels
[in]dim_kernel_xfilter kernel size x
[in]dim_kernel_yfilter kernel size y
[in]padding_xpadding size x
[in]padding_ypadding size y
[in]stride_xconvolution stride x
[in]stride_yconvolution stride y
[in]biaspointer to bias
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in,out]Im_outpointer to output tensor
[in]dim_im_out_xoutput tensor dimension x
[in]dim_im_out_youtput tensor dimension y
[in,out]bufferApointer to buffer space for input
[in,out]bufferBpointer to buffer space for output
Returns
The function returns either ARM_CMSIS_NN_ARG_ERROR or ARM_CMSIS_NN_SUCCESS based on the outcome of input arguments constraints checking.

Buffer size:

bufferA size: 2*ch_im_in*dim_kernel*dim_kernel

bufferB size: 0

Input dimension constraints:

ch_im_in is multiple of 2

ch_im_out is multipe of 2

References ARM_CMSIS_NN_ARG_ERROR, ARM_CMSIS_NN_SUCCESS, arm_nn_read_q15x2_ia(), and NN_ROUND.

arm_cmsis_nn_status arm_convolve_HWC_q7_basic ( const q7_t Im_in,
const uint16_t  dim_im_in,
const uint16_t  ch_im_in,
const q7_t wt,
const uint16_t  ch_im_out,
const uint16_t  dim_kernel,
const uint16_t  padding,
const uint16_t  stride,
const q7_t bias,
const uint16_t  bias_shift,
const uint16_t  out_shift,
q7_t Im_out,
const uint16_t  dim_im_out,
q15_t bufferA,
q7_t bufferB 
)
Parameters
[in]Im_inpointer to input tensor
[in]dim_im_ininput tensor dimension
[in]ch_im_innumber of input tensor channels
[in]wtpointer to kernel weights
[in]ch_im_outnumber of filters, i.e., output tensor channels
[in]dim_kernelfilter kernel size
[in]paddingpadding sizes
[in]strideconvolution stride
[in]biaspointer to bias
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in,out]Im_outpointer to output tensor
[in]dim_im_outoutput tensor dimension
[in,out]bufferApointer to buffer space for input
[in,out]bufferBpointer to buffer space for output
Returns
The function returns ARM_CMSIS_NN_SUCCESS

References ARM_CMSIS_NN_SUCCESS, arm_nn_mat_mult_kernel_q7_q15(), arm_nn_read_q15x2_ia(), arm_q7_to_q15_no_shift(), and NN_ROUND.

arm_cmsis_nn_status arm_convolve_HWC_q7_basic_nonsquare ( const q7_t Im_in,
const uint16_t  dim_im_in_x,
const uint16_t  dim_im_in_y,
const uint16_t  ch_im_in,
const q7_t wt,
const uint16_t  ch_im_out,
const uint16_t  dim_kernel_x,
const uint16_t  dim_kernel_y,
const uint16_t  padding_x,
const uint16_t  padding_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const q7_t bias,
const uint16_t  bias_shift,
const uint16_t  out_shift,
q7_t Im_out,
const uint16_t  dim_im_out_x,
const uint16_t  dim_im_out_y,
q15_t bufferA,
q7_t bufferB 
)
Parameters
[in]Im_inpointer to input tensor
[in]dim_im_in_xinput tensor dimension x
[in]dim_im_in_yinput tensor dimension y
[in]ch_im_innumber of input tensor channels
[in]wtpointer to kernel weights
[in]ch_im_outnumber of filters, i.e., output tensor channels
[in]dim_kernel_xfilter kernel size x
[in]dim_kernel_yfilter kernel size y
[in]padding_xpadding size x
[in]padding_ypadding size y
[in]stride_xconvolution stride x
[in]stride_yconvolution stride y
[in]biaspointer to bias
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in,out]Im_outpointer to output tensor
[in]dim_im_out_xoutput tensor dimension x
[in]dim_im_out_youtput tensor dimension y
[in,out]bufferApointer to buffer space for input
[in,out]bufferBpointer to buffer space for output
Returns
The function returns ARM_CMSIS_NN_SUCCESS

References ARM_CMSIS_NN_SUCCESS, arm_nn_mat_mult_kernel_q7_q15(), arm_nn_read_q15x2_ia(), arm_q7_to_q15_no_shift(), and NN_ROUND.

arm_cmsis_nn_status arm_convolve_HWC_q7_fast ( const q7_t Im_in,
const uint16_t  dim_im_in,
const uint16_t  ch_im_in,
const q7_t wt,
const uint16_t  ch_im_out,
const uint16_t  dim_kernel,
const uint16_t  padding,
const uint16_t  stride,
const q7_t bias,
const uint16_t  bias_shift,
const uint16_t  out_shift,
q7_t Im_out,
const uint16_t  dim_im_out,
q15_t bufferA,
q7_t bufferB 
)
Parameters
[in]Im_inpointer to input tensor
[in]dim_im_ininput tensor dimension
[in]ch_im_innumber of input tensor channels
[in]wtpointer to kernel weights
[in]ch_im_outnumber of filters, i.e., output tensor channels
[in]dim_kernelfilter kernel size
[in]paddingpadding sizes
[in]strideconvolution stride
[in]biaspointer to bias
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in,out]Im_outpointer to output tensor
[in]dim_im_outoutput tensor dimension
[in,out]bufferApointer to buffer space for input
[in,out]bufferBpointer to buffer space for output
Returns
The function returns either ARM_CMSIS_NN_ARG_ERROR or ARM_CMSIS_NN_SUCCESS based on the outcome of input arguments constraints checking.

This function is the version with full list of optimization tricks, but with some contraints: ch_im_in is multiple of 4 ch_im_out is multiple of 2

References ARM_CMSIS_NN_ARG_ERROR, ARM_CMSIS_NN_SUCCESS, arm_nn_mat_mult_kernel_q7_q15_reordered(), arm_nn_read_q15x2_ia(), arm_q7_to_q15_reordered_no_shift(), and NN_ROUND.

arm_cmsis_nn_status arm_convolve_HWC_q7_fast_nonsquare ( const q7_t Im_in,
const uint16_t  dim_im_in_x,
const uint16_t  dim_im_in_y,
const uint16_t  ch_im_in,
const q7_t wt,
const uint16_t  ch_im_out,
const uint16_t  dim_kernel_x,
const uint16_t  dim_kernel_y,
const uint16_t  padding_x,
const uint16_t  padding_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const q7_t bias,
const uint16_t  bias_shift,
const uint16_t  out_shift,
q7_t Im_out,
const uint16_t  dim_im_out_x,
const uint16_t  dim_im_out_y,
q15_t bufferA,
q7_t bufferB 
)
Parameters
[in]Im_inpointer to input tensor
[in]dim_im_in_xinput tensor dimension x
[in]dim_im_in_yinput tensor dimension y
[in]ch_im_innumber of input tensor channels
[in]wtpointer to kernel weights
[in]ch_im_outnumber of filters, i.e., output tensor channels
[in]dim_kernel_xfilter kernel size x
[in]dim_kernel_yfilter kernel size y
[in]padding_xpadding size x
[in]padding_ypadding size y
[in]stride_xconvolution stride x
[in]stride_yconvolution stride y
[in]biaspointer to bias
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in,out]Im_outpointer to output tensor
[in]dim_im_out_xoutput tensor dimension x
[in]dim_im_out_youtput tensor dimension y
[in,out]bufferApointer to buffer space for input
[in,out]bufferBpointer to buffer space for output
Returns
The function returns either ARM_CMSIS_NN_ARG_ERROR or ARM_CMSIS_NN_SUCCESS based on the outcome of input arguments constraints checking.

This function is the version with full list of optimization tricks, but with some contraints: ch_im_in is multiple of 4 ch_im_out is multiple of 2

References ARM_CMSIS_NN_ARG_ERROR, ARM_CMSIS_NN_SUCCESS, arm_nn_mat_mult_kernel_q7_q15_reordered(), arm_nn_read_q15x2_ia(), arm_q7_to_q15_reordered_no_shift(), and NN_ROUND.

arm_cmsis_nn_status arm_convolve_HWC_q7_RGB ( const q7_t Im_in,
const uint16_t  dim_im_in,
const uint16_t  ch_im_in,
const q7_t wt,
const uint16_t  ch_im_out,
const uint16_t  dim_kernel,
const uint16_t  padding,
const uint16_t  stride,
const q7_t bias,
const uint16_t  bias_shift,
const uint16_t  out_shift,
q7_t Im_out,
const uint16_t  dim_im_out,
q15_t bufferA,
q7_t bufferB 
)
Parameters
[in]Im_inpointer to input tensor
[in]dim_im_ininput tensor dimension
[in]ch_im_innumber of input tensor channels
[in]wtpointer to kernel weights
[in]ch_im_outnumber of filters, i.e., output tensor channels
[in]dim_kernelfilter kernel size
[in]paddingpadding sizes
[in]strideconvolution stride
[in]biaspointer to bias
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in,out]Im_outpointer to output tensor
[in]dim_im_outoutput tensor dimension
[in,out]bufferApointer to buffer space for input
[in,out]bufferBpointer to buffer space for output
Returns
The function returns either ARM_CMSIS_NN_ARG_ERROR or ARM_CMSIS_NN_SUCCESS based on the outcome of input arguments constraints checking.

This kernel is written exclusively for convolution with ch_im_in equals 3. This applies on the first layer of CNNs which has input image with RGB format.

References ARM_CMSIS_NN_ARG_ERROR, ARM_CMSIS_NN_SUCCESS, arm_memcpy_q7(), arm_memset_q7(), arm_nn_mat_mult_kernel_q7_q15(), arm_nn_read_q15x2_ia(), arm_nn_read_q7x4(), arm_nnword::half_words, NN_ROUND, and arm_nnword::word.

arm_cmsis_nn_status arm_convolve_s16 ( const cmsis_nn_context ctx,
const cmsis_nn_conv_params conv_params,
const cmsis_nn_per_channel_quant_params quant_params,
const cmsis_nn_dims input_dims,
const q15_t input_data,
const cmsis_nn_dims filter_dims,
const q7_t filter_data,
const cmsis_nn_dims bias_dims,
const int64_t *  bias_data,
const cmsis_nn_dims output_dims,
q15_t output_data 
)
Parameters
[in,out]ctxFunction context that contains the additional buffer if required by the function. arm_convolve_s16_get_buffer_size will return the buffer_size if required. The caller is expected to clear the buffer ,if applicable, for security reasons.
[in]conv_paramsConvolution parameters (e.g. strides, dilations, pads,...). conv_params->input_offset : Not used conv_params->output_offset : Not used
[in]quant_paramsPer-channel quantization info. It contains the multiplier and shift values to be applied to each output channel
[in]input_dimsInput (activation) tensor dimensions. Format: [N, H, W, C_IN]
[in]input_dataInput (activation) data pointer. Data type: int16
[in]filter_dimsFilter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions
[in]filter_dataFilter data pointer. Data type: int8
[in]bias_dimsBias tensor dimensions. Format: [C_OUT]
[in]bias_dataOptional bias data pointer. Data type: int64
[in]output_dimsOutput tensor dimensions. Format: [N, H, W, C_OUT]
[out]output_dataOutput data pointer. Data type: int16
Returns
The function returns ARM_CMSIS_NN_SUCCESS
  1. Supported framework: TensorFlow Lite micro
  2. q7/q15 is used as data type eventhough it is s8/s16 data. It is done so to be consistent with existing APIs.
  3. Additional memory is required for optimization. Refer to argument 'ctx' for details.

References cmsis_nn_conv_params::activation, ARM_CMSIS_NN_SUCCESS, arm_nn_requantize_s64(), cmsis_nn_dims::c, cmsis_nn_conv_params::dilation, cmsis_nn_tile::h, cmsis_nn_dims::h, MAX, cmsis_nn_activation::max, MIN, cmsis_nn_activation::min, cmsis_nn_per_channel_quant_params::multiplier, cmsis_nn_dims::n, cmsis_nn_conv_params::padding, REDUCE_MULTIPLIER, cmsis_nn_per_channel_quant_params::shift, cmsis_nn_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.

Referenced by arm_convolve_wrapper_s16().

int32_t arm_convolve_s16_get_buffer_size ( const cmsis_nn_dims input_dims,
const cmsis_nn_dims filter_dims 
)
Parameters
[in]input_dimsInput (activation) tensor dimensions. Format: [N, H, W, C_IN]
[in]filter_dimsFilter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions
Returns
The function returns required buffer size(bytes)

Referenced by arm_convolve_wrapper_s16_get_buffer_size().

arm_cmsis_nn_status arm_convolve_s8 ( const cmsis_nn_context ctx,
const cmsis_nn_conv_params conv_params,
const cmsis_nn_per_channel_quant_params quant_params,
const cmsis_nn_dims input_dims,
const q7_t input_data,
const cmsis_nn_dims filter_dims,
const q7_t filter_data,
const cmsis_nn_dims bias_dims,
const int32_t *  bias_data,
const cmsis_nn_dims output_dims,
q7_t output_data 
)
Parameters
[in,out]ctxFunction context that contains the additional buffer if required by the function. arm_convolve_s8_get_buffer_size will return the buffer_size if required. The caller is expected to clear the buffer ,if applicable, for security reasons.
[in]conv_paramsConvolution parameters (e.g. strides, dilations, pads,...). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127]
[in]quant_paramsPer-channel quantization info. It contains the multiplier and shift values to be applied to each output channel
[in]input_dimsInput (activation) tensor dimensions. Format: [N, H, W, C_IN]
[in]input_dataInput (activation) data pointer. Data type: int8
[in]filter_dimsFilter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions
[in]filter_dataFilter data pointer. Data type: int8
[in]bias_dimsBias tensor dimensions. Format: [C_OUT]
[in]bias_dataOptional bias data pointer. Data type: int32
[in]output_dimsOutput tensor dimensions. Format: [N, H, W, C_OUT]
[out]output_dataOutput data pointer. Data type: int8
Returns
The function returns ARM_CMSIS_NN_SUCCESS
  1. Supported framework: TensorFlow Lite micro
  2. q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs.
  3. Additional memory is required for optimization. Refer to argument 'ctx' for details.

References cmsis_nn_conv_params::activation, ARM_CMSIS_NN_ARG_ERROR, ARM_CMSIS_NN_SUCCESS, arm_convolve_s8_get_buffer_size(), arm_memcpy_q7(), arm_nn_mat_mul_core_4x_s8(), arm_nn_mat_mult_kernel_s8_s16(), arm_nn_mat_mult_s8(), arm_nn_read_q15x2_ia(), arm_nn_requantize(), arm_q7_to_q15_with_offset(), cmsis_nn_context::buf, cmsis_nn_dims::c, cmsis_nn_conv_params::dilation, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_conv_params::input_offset, MAX, cmsis_nn_activation::max, MIN, cmsis_nn_activation::min, cmsis_nn_per_channel_quant_params::multiplier, cmsis_nn_dims::n, cmsis_nn_conv_params::output_offset, cmsis_nn_conv_params::padding, cmsis_nn_per_channel_quant_params::shift, cmsis_nn_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.

Referenced by arm_convolve_1_x_n_s8(), and arm_convolve_wrapper_s8().

int32_t arm_convolve_s8_get_buffer_size ( const cmsis_nn_dims input_dims,
const cmsis_nn_dims filter_dims 
)
Parameters
[in]input_dimsInput (activation) tensor dimensions. Format: [N, H, W, C_IN]
[in]filter_dimsFilter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions
Returns
The function returns required buffer size(bytes)

References cmsis_nn_dims::c, cmsis_nn_dims::h, and cmsis_nn_dims::w.

Referenced by arm_convolve_1_x_n_s8_get_buffer_size(), arm_convolve_fast_s16(), arm_convolve_s8(), and arm_convolve_wrapper_s8_get_buffer_size().

arm_cmsis_nn_status arm_convolve_wrapper_s16 ( const cmsis_nn_context ctx,
const cmsis_nn_conv_params conv_params,
const cmsis_nn_per_channel_quant_params quant_params,
const cmsis_nn_dims input_dims,
const q15_t input_data,
const cmsis_nn_dims filter_dims,
const q7_t filter_data,
const cmsis_nn_dims bias_dims,
const int64_t *  bias_data,
const cmsis_nn_dims output_dims,
q15_t output_data 
)
Parameters
[in,out]ctxFunction context that contains the additional buffer if required by the function. arm_convolve_wrapper_s8_get_buffer_size will return the buffer_size if required The caller is expected to clear the buffer ,if applicable, for security reasons.
[in]conv_paramsConvolution parameters (e.g. strides, dilations, pads,...). conv_params->input_offset : Not used conv_params->output_offset : Not used
[in]quant_paramsPer-channel quantization info. It contains the multiplier and shift values to be applied to each output channel
[in]input_dimsInput (activation) tensor dimensions. Format: [N, H, W, C_IN]
[in]input_dataInput (activation) data pointer. Data type: int16
[in]filter_dimsFilter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions
[in]filter_dataFilter data pointer. Data type: int8
[in]bias_dimsBias tensor dimensions. Format: [C_OUT]
[in]bias_dataBias data pointer. Data type: int64
[in]output_dimsOutput tensor dimensions. Format: [N, H, W, C_OUT]
[out]output_dataOutput data pointer. Data type: int16
Returns
The function returns either ARM_CMSIS_NN_ARG_ERROR if argument constraints fail. or, ARM_CMSIS_NN_SUCCESS on successful completion.

References arm_convolve_fast_s16(), arm_convolve_s16(), cmsis_nn_dims::c, cmsis_nn_conv_params::dilation, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_tile::w, and cmsis_nn_dims::w.

int32_t arm_convolve_wrapper_s16_get_buffer_size ( const cmsis_nn_conv_params conv_params,
const cmsis_nn_dims input_dims,
const cmsis_nn_dims filter_dims,
const cmsis_nn_dims output_dims 
)
Parameters
[in]conv_paramsConvolution parameters (e.g. strides, dilations, pads,...). conv_params->input_offset : Not used conv_params->output_offset : Not used
[in]input_dimsInput (activation) dimensions. Format: [N, H, W, C_IN]
[in]filter_dimsFilter dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions
[in]output_dimsOutput tensor dimensions. Format: [N, H, W, C_OUT]
Returns
The function returns required buffer size(bytes)

References arm_convolve_fast_s16_get_buffer_size(), arm_convolve_s16_get_buffer_size(), cmsis_nn_dims::c, cmsis_nn_conv_params::dilation, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_tile::w, and cmsis_nn_dims::w.

arm_cmsis_nn_status arm_convolve_wrapper_s8 ( const cmsis_nn_context ctx,
const cmsis_nn_conv_params conv_params,
const cmsis_nn_per_channel_quant_params quant_params,
const cmsis_nn_dims input_dims,
const q7_t input_data,
const cmsis_nn_dims filter_dims,
const q7_t filter_data,
const cmsis_nn_dims bias_dims,
const int32_t *  bias_data,
const cmsis_nn_dims output_dims,
q7_t output_data 
)
Parameters
[in,out]ctxFunction context that contains the additional buffer if required by the function. arm_convolve_wrapper_s8_get_buffer_size will return the buffer_size if required. The caller is expected to clear the buffer ,if applicable, for security reasons.
[in]conv_paramsConvolution parameters (e.g. strides, dilations, pads,...). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127]
[in]quant_paramsPer-channel quantization info. It contains the multiplier and shift values to be applied to each output channel
[in]input_dimsInput (activation) tensor dimensions. Format: [N, H, W, C_IN]
[in]input_dataInput (activation) data pointer. Data type: int8
[in]filter_dimsFilter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions
[in]filter_dataFilter data pointer. Data type: int8
[in]bias_dimsBias tensor dimensions. Format: [C_OUT]
[in]bias_dataBias data pointer. Data type: int32
[in]output_dimsOutput tensor dimensions. Format: [N, H, W, C_OUT]
[out]output_dataOutput data pointer. Data type: int8
Returns
The function returns either ARM_CMSIS_NN_ARG_ERROR if argument constraints fail. or, ARM_CMSIS_NN_SUCCESS on successful completion.

References arm_convolve_1_x_n_s8(), arm_convolve_1x1_s8_fast(), arm_convolve_s8(), cmsis_nn_conv_params::dilation, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_conv_params::padding, cmsis_nn_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.

int32_t arm_convolve_wrapper_s8_get_buffer_size ( const cmsis_nn_conv_params conv_params,
const cmsis_nn_dims input_dims,
const cmsis_nn_dims filter_dims,
const cmsis_nn_dims output_dims 
)
Parameters
[in]conv_paramsConvolution parameters (e.g. strides, dilations, pads,...). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127]
[in]input_dimsInput (activation) dimensions. Format: [N, H, W, C_IN]
[in]filter_dimsFilter dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions
[in]output_dimsOutput tensor dimensions. Format: [N, H, W, C_OUT]
Returns
The function returns required buffer size(bytes)

References arm_convolve_1_x_n_s8_get_buffer_size(), arm_convolve_1x1_s8_fast_get_buffer_size(), arm_convolve_s8_get_buffer_size(), cmsis_nn_conv_params::dilation, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_conv_params::padding, cmsis_nn_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.

arm_cmsis_nn_status arm_depthwise_conv_3x3_s8 ( const cmsis_nn_context ctx,
const cmsis_nn_dw_conv_params dw_conv_params,
const cmsis_nn_per_channel_quant_params quant_params,
const cmsis_nn_dims input_dims,
const q7_t input_data,
const cmsis_nn_dims filter_dims,
const q7_t filter_data,
const cmsis_nn_dims bias_dims,
const int32_t *  bias_data,
const cmsis_nn_dims output_dims,
q7_t output_data 
)
Returns
The function returns one of the following ARM_CMSIS_NN_ARG_ERROR - Unsupported dimension of tensors
  • Unsupported pad size along the x axis ARM_CMSIS_NN_SUCCESS - Successful operation
  • Supported framework : TensorFlow Lite Micro
  • The following constrains on the arguments apply
    1. Number of input channel equals number of output channels
    2. Filter height and width equals 3
    3. Padding along x is either 0 or 1.

References cmsis_nn_dw_conv_params::activation, ARM_CMSIS_NN_ARG_ERROR, ARM_CMSIS_NN_SUCCESS, arm_nn_read_q7x4(), arm_nn_requantize(), cmsis_nn_dims::c, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_dw_conv_params::input_offset, MAX, cmsis_nn_activation::max, MIN, cmsis_nn_activation::min, cmsis_nn_per_channel_quant_params::multiplier, cmsis_nn_dw_conv_params::output_offset, cmsis_nn_dw_conv_params::padding, cmsis_nn_per_channel_quant_params::shift, cmsis_nn_dw_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.

Referenced by arm_depthwise_conv_wrapper_s8().

arm_cmsis_nn_status arm_depthwise_conv_fast_s16 ( const cmsis_nn_context ctx,
const cmsis_nn_dw_conv_params dw_conv_params,
const cmsis_nn_per_channel_quant_params quant_params,
const cmsis_nn_dims input_dims,
const q15_t input_data,
const cmsis_nn_dims filter_dims,
const q7_t filter_data,
const cmsis_nn_dims bias_dims,
const int64_t *  bias_data,
const cmsis_nn_dims output_dims,
q15_t output_data 
)
Returns
The function returns one of the following ARM_CMSIS_NN_ARG_ERROR - ctx-buff == NULL and arm_depthwise_conv_fast_s16_get_buffer_size() > 0 or input channel != output channel or ch_mult != 1

ARM_CMSIS_NN_SUCCESS - Successful operation

  • Supported framework: TensorFlow Lite
  • The following constrains on the arguments apply
    1. Number of input channel equals number of output channels or ch_mult equals 1
  • q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs.
  • Reccomended when number of channels is 4 or greater.

References cmsis_nn_dw_conv_params::activation, ARM_CMSIS_NN_ARG_ERROR, ARM_CMSIS_NN_SUCCESS, arm_depthwise_conv_fast_s16_get_buffer_size(), arm_depthwise_conv_s16(), arm_memcpy_q15(), arm_nn_depthwise_conv_nt_t_s16(), arm_nn_read_q15x2(), arm_nn_read_q7x4(), arm_nn_requantize_s64(), cmsis_nn_context::buf, cmsis_nn_dims::c, cmsis_nn_tile::h, cmsis_nn_dims::h, MAX, cmsis_nn_activation::max, MIN, cmsis_nn_activation::min, cmsis_nn_per_channel_quant_params::multiplier, cmsis_nn_dims::n, cmsis_nn_dw_conv_params::padding, REDUCE_MULTIPLIER, cmsis_nn_per_channel_quant_params::shift, cmsis_nn_dw_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.

Referenced by arm_depthwise_conv_wrapper_s16().

int32_t arm_depthwise_conv_fast_s16_get_buffer_size ( const cmsis_nn_dims input_dims,
const cmsis_nn_dims filter_dims 
)
Parameters
[in]input_dimsInput (activation) tensor dimensions. Format: [1, H, W, C_IN] Batch argument N is not used.
[in]filter_dimsFilter tensor dimensions. Format: [1, H, W, C_OUT]
Returns
The function returns required buffer size in bytes

References cmsis_nn_dims::c, cmsis_nn_dims::h, and cmsis_nn_dims::w.

Referenced by arm_depthwise_conv_fast_s16(), and arm_depthwise_conv_wrapper_s16_get_buffer_size().

arm_cmsis_nn_status arm_depthwise_conv_s16 ( const cmsis_nn_context ctx,
const cmsis_nn_dw_conv_params dw_conv_params,
const cmsis_nn_per_channel_quant_params quant_params,
const cmsis_nn_dims input_dims,
const q15_t input_data,
const cmsis_nn_dims filter_dims,
const q7_t filter_data,
const cmsis_nn_dims bias_dims,
const int64_t *  bias_data,
const cmsis_nn_dims output_dims,
q15_t output_data 
)
Parameters
[in,out]ctxFunction context (e.g. temporary buffer). Check the function definition file to see if an additional buffer is required. Optional function {API}_get_buffer_size() provides the buffer size if an additional buffer is required. exists if additional memory is. The caller is expected to clear the buffer ,if applicable, for security reasons.
[in]dw_conv_paramsDepthwise convolution parameters (e.g. strides, dilations, pads,...) conv_params->input_offset : Not used conv_params->output_offset : Not used
[in]quant_paramsPer-channel quantization info. It contains the multiplier and shift values to be applied to each output channel
[in]input_dimsInput (activation) tensor dimensions. Format: [N, H, W, C_IN] Batch argument N is not used.
[in]input_dataInput (activation) data pointer. Data type: int8
[in]filter_dimsFilter tensor dimensions. Format: [1, H, W, C_OUT]
[in]filter_dataFilter data pointer. Data type: int8
[in]bias_dimsBias tensor dimensions. Format: [C_OUT]
[in]bias_dataBias data pointer. Data type: int64
[in]output_dimsOutput tensor dimensions. Format: [N, H, W, C_OUT]
[in,out]output_dataOutput data pointer. Data type: int16
Returns
The function returns ARM_CMSIS_NN_SUCCESS
  • Supported framework: TensorFlow Lite
  • q15 is used as data type eventhough it is s16 data. It is done so to be consistent with existing APIs.

References cmsis_nn_dw_conv_params::activation, ARM_CMSIS_NN_SUCCESS, cmsis_nn_dims::c, cmsis_nn_dw_conv_params::ch_mult, depthwise_conv_s16_generic_s16(), cmsis_nn_dw_conv_params::dilation, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_activation::max, cmsis_nn_activation::min, cmsis_nn_per_channel_quant_params::multiplier, cmsis_nn_dims::n, cmsis_nn_dw_conv_params::padding, cmsis_nn_per_channel_quant_params::shift, cmsis_nn_dw_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.

Referenced by arm_depthwise_conv_fast_s16(), and arm_depthwise_conv_wrapper_s16().

arm_cmsis_nn_status arm_depthwise_conv_s8 ( const cmsis_nn_context ctx,
const cmsis_nn_dw_conv_params dw_conv_params,
const cmsis_nn_per_channel_quant_params quant_params,
const cmsis_nn_dims input_dims,
const q7_t input_data,
const cmsis_nn_dims filter_dims,
const q7_t filter_data,
const cmsis_nn_dims bias_dims,
const int32_t *  bias_data,
const cmsis_nn_dims output_dims,
q7_t output_data 
)
Parameters
[in,out]ctxFunction context (e.g. temporary buffer). Check the function definition file to see if an additional buffer is required. Optional function {API}_get_buffer_size() provides the buffer size if an additional buffer is required exists if additional memory is. The caller is expected to clear the buffer ,if applicable, for security reasons.
[in]dw_conv_paramsDepthwise convolution parameters (e.g. strides, dilations, pads,...) dw_conv_params->dilation is not used. Range of dw_conv_params->input_offset : [-127, 128] Range of dw_conv_params->input_offset : [-128, 127]
[in]quant_paramsPer-channel quantization info. It contains the multiplier and shift values to be applied to each output channel
[in]input_dimsInput (activation) tensor dimensions. Format: [N, H, W, C_IN] Batch argument N is not used.
[in]input_dataInput (activation) data pointer. Data type: int8
[in]filter_dimsFilter tensor dimensions. Format: [1, H, W, C_OUT]
[in]filter_dataFilter data pointer. Data type: int8
[in]bias_dimsBias tensor dimensions. Format: [C_OUT]
[in]bias_dataBias data pointer. Data type: int32
[in]output_dimsOutput tensor dimensions. Format: [N, H, W, C_OUT]
[in,out]output_dataOutput data pointer. Data type: int8
Returns
The function returns ARM_CMSIS_NN_SUCCESS
  • Supported framework: TensorFlow Lite
  • q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs.

References cmsis_nn_dw_conv_params::activation, ARM_CMSIS_NN_SUCCESS, cmsis_nn_dims::c, cmsis_nn_dw_conv_params::ch_mult, depthwise_conv_s8_generic(), cmsis_nn_dw_conv_params::dilation, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_dw_conv_params::input_offset, cmsis_nn_activation::max, cmsis_nn_activation::min, cmsis_nn_per_channel_quant_params::multiplier, cmsis_nn_dims::n, cmsis_nn_dw_conv_params::output_offset, cmsis_nn_dw_conv_params::padding, cmsis_nn_per_channel_quant_params::shift, cmsis_nn_dw_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.

Referenced by arm_depthwise_conv_s8_opt(), and arm_depthwise_conv_wrapper_s8().

arm_cmsis_nn_status arm_depthwise_conv_s8_opt ( const cmsis_nn_context ctx,
const cmsis_nn_dw_conv_params dw_conv_params,
const cmsis_nn_per_channel_quant_params quant_params,
const cmsis_nn_dims input_dims,
const q7_t input_data,
const cmsis_nn_dims filter_dims,
const q7_t filter_data,
const cmsis_nn_dims bias_dims,
const int32_t *  bias_data,
const cmsis_nn_dims output_dims,
q7_t output_data 
)
Returns
The function returns one of the following ARM_CMSIS_NN_ARG_ERROR - input channel != output channel or ch_mult != 1 ARM_CMSIS_NN_SUCCESS - Successful operation
Note
If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read out for the following if MVE optimizations(Arm Helium Technology) are used.
  • Output shift
  • Output multiplier
  • Output bias
  • kernel
  • Supported framework: TensorFlow Lite
  • The following constrains on the arguments apply
    1. Number of input channel equals number of output channels or ch_mult equals 1
  • q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs.
  • Reccomended when number of channels is 4 or greater.

References cmsis_nn_dw_conv_params::activation, ARM_CMSIS_NN_ARG_ERROR, ARM_CMSIS_NN_SUCCESS, arm_depthwise_conv_s8(), arm_depthwise_conv_s8_opt_get_buffer_size(), arm_memcpy_q7(), arm_memset_q7(), arm_nn_depthwise_conv_nt_t_padded_s8(), arm_nn_depthwise_conv_nt_t_s8(), arm_nn_read_q15x2(), arm_nn_read_q7x4(), arm_nn_requantize(), arm_q7_to_q15_with_offset(), cmsis_nn_context::buf, cmsis_nn_dims::c, CH_IN_BLOCK_MVE, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_dw_conv_params::input_offset, MAX, cmsis_nn_activation::max, MIN, cmsis_nn_activation::min, cmsis_nn_per_channel_quant_params::multiplier, cmsis_nn_dw_conv_params::output_offset, cmsis_nn_dw_conv_params::padding, cmsis_nn_per_channel_quant_params::shift, cmsis_nn_dw_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.

Referenced by arm_depthwise_conv_wrapper_s8().

int32_t arm_depthwise_conv_s8_opt_get_buffer_size ( const cmsis_nn_dims input_dims,
const cmsis_nn_dims filter_dims 
)
Parameters
[in]input_dimsInput (activation) tensor dimensions. Format: [1, H, W, C_IN] Batch argument N is not used.
[in]filter_dimsFilter tensor dimensions. Format: [1, H, W, C_OUT]
Returns
The function returns required buffer size in bytes

References cmsis_nn_dims::c, CH_IN_BLOCK_MVE, cmsis_nn_dims::h, and cmsis_nn_dims::w.

Referenced by arm_depthwise_conv_s8_opt(), and arm_depthwise_conv_wrapper_s8_get_buffer_size().

arm_cmsis_nn_status arm_depthwise_conv_u8_basic_ver1 ( const uint8_t *  input,
const uint16_t  input_x,
const uint16_t  input_y,
const uint16_t  input_ch,
const uint8_t *  kernel,
const uint16_t  kernel_x,
const uint16_t  kernel_y,
const int16_t  ch_mult,
const int16_t  pad_x,
const int16_t  pad_y,
const int16_t  stride_x,
const int16_t  stride_y,
const int16_t  dilation_x,
const int16_t  dilation_y,
const int32_t *  bias,
const int32_t  input_offset,
const int32_t  filter_offset,
const int32_t  output_offset,
uint8_t *  output,
const uint16_t  output_x,
const uint16_t  output_y,
const int32_t  output_activation_min,
const int32_t  output_activation_max,
const int32_t  output_shift,
const int32_t  output_mult 
)

uint8 depthwise convolution function with asymmetric quantization Unless specified otherwise, arguments are mandatory.

Parameters
[in]inputPointer to input tensor
[in]input_xWidth of input tensor
[in]input_yHeight of input tensor
[in]input_chChannels in input tensor
[in]kernelPointer to kernel weights
[in]kernel_xWidth of kernel
[in]kernel_yHeight of kernel
[in]ch_multNumber of channel multiplier
[in]pad_xPadding sizes x
[in]pad_yPadding sizes y
[in]stride_xConvolution stride along the width
[in]stride_yConvolution stride along the height
[in]dilation_xDilation along width. Not used and intended for future enhancement.
[in]dilation_yDilation along height. Not used and intended for future enhancement.
[in]biasPointer to optional bias values. If no bias is available, NULL is expected
[in]input_offsetInput tensor zero offset
[in]filter_offsetKernel tensor zero offset
[in]output_offsetOutput tensor zero offset
[in,out]outputPointer to output tensor
[in]output_xWidth of output tensor
[in]output_yHeight of output tensor
[in]output_activation_minMinimum value to clamp the output to. Range : {0, 255}
[in]output_activation_maxMinimum value to clamp the output to. Range : {0, 255}
[in]output_shiftAmount of right-shift for output
[in]output_multOutput multiplier for requantization
Returns
The function returns one of the following ARM_CMSIS_NN_SUCCESS - Successful operation

References ARM_CMSIS_NN_SUCCESS, depthwise_conv_u8_generic(), and depthwise_conv_u8_mult_4().

arm_cmsis_nn_status arm_depthwise_conv_wrapper_s16 ( const cmsis_nn_context ctx,
const cmsis_nn_dw_conv_params dw_conv_params,
const cmsis_nn_per_channel_quant_params quant_params,
const cmsis_nn_dims input_dims,
const q15_t input_data,
const cmsis_nn_dims filter_dims,
const q7_t filter_data,
const cmsis_nn_dims bias_dims,
const int64_t *  bias_data,
const cmsis_nn_dims output_dims,
q15_t output_data 
)
Parameters
[in,out]ctxFunction context (e.g. temporary buffer). Check the function definition file to see if an additional buffer is required. Optional function {API}_get_buffer_size() provides the buffer size if required. The caller is expected to clear the buffer ,if applicable, for security reasons.
[in]dw_conv_paramsDepthwise convolution parameters (e.g. strides, dilations, pads,...) dw_conv_params->dilation is not used. Range of dw_conv_params->input_offset : Not used Range of dw_conv_params->output_offset : Not used
[in]quant_paramsPer-channel quantization info. It contains the multiplier and shift values to be applied to each output channel
[in]input_dimsInput (activation) tensor dimensions. Format: [H, W, C_IN] Batch argument N is not used and assumed to be 1.
[in]input_dataInput (activation) data pointer. Data type: int16
[in]filter_dimsFilter tensor dimensions. Format: [1, H, W, C_OUT]
[in]filter_dataFilter data pointer. Data type: int8
[in]bias_dimsBias tensor dimensions. Format: [C_OUT]
[in]bias_dataBias data pointer. Data type: int64
[in]output_dimsOutput tensor dimensions. Format: [1, H, W, C_OUT]
[in,out]output_dataOutput data pointer. Data type: int16
Returns
The function returns ARM_CMSIS_NN_SUCCESS - Successful completion.
  • Supported framework: TensorFlow Lite
  • Picks one of the the following functions
    1. arm_depthwise_conv_s16()
    2. arm_depthwise_conv_fast_s16() - Cortex-M CPUs with DSP extension only
  • q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs.

References ARM_CMSIS_NN_SUCCESS, arm_depthwise_conv_fast_s16(), arm_depthwise_conv_s16(), and USE_FAST_DW_CONV_FUNCTION.

int32_t arm_depthwise_conv_wrapper_s16_get_buffer_size ( const cmsis_nn_dw_conv_params dw_conv_params,
const cmsis_nn_dims input_dims,
const cmsis_nn_dims filter_dims,
const cmsis_nn_dims output_dims 
)
Parameters
[in]dw_conv_paramsDepthwise convolution parameters (e.g. strides, dilations, pads,...) Range of dw_conv_params->input_offset : Not used Range of dw_conv_params->input_offset : Not used
[in]input_dimsInput (activation) tensor dimensions. Format: [H, W, C_IN] Batch argument N is not used and assumed to be 1.
[in]filter_dimsFilter tensor dimensions. Format: [1, H, W, C_OUT]
[in]output_dimsOutput tensor dimensions. Format: [1, H, W, C_OUT]
Returns
Size of additional memory required for optimizations in bytes.

References arm_depthwise_conv_fast_s16_get_buffer_size(), and USE_FAST_DW_CONV_FUNCTION.

arm_cmsis_nn_status arm_depthwise_conv_wrapper_s8 ( const cmsis_nn_context ctx,
const cmsis_nn_dw_conv_params dw_conv_params,
const cmsis_nn_per_channel_quant_params quant_params,
const cmsis_nn_dims input_dims,
const q7_t input_data,
const cmsis_nn_dims filter_dims,
const q7_t filter_data,
const cmsis_nn_dims bias_dims,
const int32_t *  bias_data,
const cmsis_nn_dims output_dims,
q7_t output_data 
)
Parameters
[in,out]ctxFunction context (e.g. temporary buffer). Check the function definition file to see if an additional buffer is required. Optional function {API}_get_buffer_size() provides the buffer size if required. The caller is expected to clear the buffer ,if applicable, for security reasons.
[in]dw_conv_paramsDepthwise convolution parameters (e.g. strides, dilations, pads,...) dw_conv_params->dilation is not used. Range of dw_conv_params->input_offset : [-127, 128] Range of dw_conv_params->output_offset : [-128, 127]
[in]quant_paramsPer-channel quantization info. It contains the multiplier and shift values to be applied to each output channel
[in]input_dimsInput (activation) tensor dimensions. Format: [H, W, C_IN] Batch argument N is not used and assumed to be 1.
[in]input_dataInput (activation) data pointer. Data type: int8
[in]filter_dimsFilter tensor dimensions. Format: [1, H, W, C_OUT]
[in]filter_dataFilter data pointer. Data type: int8
[in]bias_dimsBias tensor dimensions. Format: [C_OUT]
[in]bias_dataBias data pointer. Data type: int32
[in]output_dimsOutput tensor dimensions. Format: [1, H, W, C_OUT]
[in,out]output_dataOutput data pointer. Data type: int8
Returns
The function returns ARM_CMSIS_NN_SUCCESS - Successful completion.

References ARM_CMSIS_NN_SUCCESS, arm_depthwise_conv_3x3_s8(), arm_depthwise_conv_s8(), arm_depthwise_conv_s8_opt(), cmsis_nn_dw_conv_params::ch_mult, cmsis_nn_dw_conv_params::dilation, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_dims::n, cmsis_nn_dw_conv_params::padding, cmsis_nn_tile::w, and cmsis_nn_dims::w.

int32_t arm_depthwise_conv_wrapper_s8_get_buffer_size ( const cmsis_nn_dw_conv_params dw_conv_params,
const cmsis_nn_dims input_dims,
const cmsis_nn_dims filter_dims,
const cmsis_nn_dims output_dims 
)
Parameters
[in]dw_conv_paramsDepthwise convolution parameters (e.g. strides, dilations, pads,...) Range of dw_conv_params->input_offset : [-127, 128] Range of dw_conv_params->input_offset : [-128, 127]
[in]input_dimsInput (activation) tensor dimensions. Format: [H, W, C_IN] Batch argument N is not used and assumed to be 1.
[in]filter_dimsFilter tensor dimensions. Format: [1, H, W, C_OUT]
[in]output_dimsOutput tensor dimensions. Format: [1, H, W, C_OUT]
Returns
Size of additional memory required for optimizations in bytes.

References arm_depthwise_conv_s8_opt_get_buffer_size(), cmsis_nn_dims::c, cmsis_nn_dw_conv_params::dilation, cmsis_nn_tile::h, cmsis_nn_dims::n, and cmsis_nn_tile::w.

arm_cmsis_nn_status arm_depthwise_separable_conv_HWC_q7 ( const q7_t Im_in,
const uint16_t  dim_im_in,
const uint16_t  ch_im_in,
const q7_t wt,
const uint16_t  ch_im_out,
const uint16_t  dim_kernel,
const uint16_t  padding,
const uint16_t  stride,
const q7_t bias,
const uint16_t  bias_shift,
const uint16_t  out_shift,
q7_t Im_out,
const uint16_t  dim_im_out,
q15_t bufferA,
q7_t bufferB 
)
Parameters
[in]Im_inpointer to input tensor
[in]dim_im_ininput tensor dimension
[in]ch_im_innumber of input tensor channels
[in]wtpointer to kernel weights
[in]ch_im_outnumber of filters, i.e., output tensor channels
[in]dim_kernelfilter kernel size
[in]paddingpadding sizes
[in]strideconvolution stride
[in]biaspointer to bias
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in,out]Im_outpointer to output tensor
[in]dim_im_outoutput tensor dimension
[in,out]bufferApointer to buffer space for input
[in,out]bufferBpointer to buffer space for output
Returns
The function returns either ARM_CMSIS_NN_ARG_ERROR or ARM_CMSIS_NN_SUCCESS based on the outcome of input arguments constraints checking.

This function is the version with full list of optimization tricks, but with some contraints: ch_im_in is multiple of 2 ch_im_out is multiple of 2

References ARM_CMSIS_NN_ARG_ERROR, ARM_CMSIS_NN_SUCCESS, arm_nn_read_q7x4(), arm_nnword::bytes, NN_ROUND, and arm_nnword::word.

arm_cmsis_nn_status arm_depthwise_separable_conv_HWC_q7_nonsquare ( const q7_t Im_in,
const uint16_t  dim_im_in_x,
const uint16_t  dim_im_in_y,
const uint16_t  ch_im_in,
const q7_t wt,
const uint16_t  ch_im_out,
const uint16_t  dim_kernel_x,
const uint16_t  dim_kernel_y,
const uint16_t  padding_x,
const uint16_t  padding_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const q7_t bias,
const uint16_t  bias_shift,
const uint16_t  out_shift,
q7_t Im_out,
const uint16_t  dim_im_out_x,
const uint16_t  dim_im_out_y,
q15_t bufferA,
q7_t bufferB 
)
Parameters
[in]Im_inpointer to input tensor
[in]dim_im_in_xinput tensor dimension x
[in]dim_im_in_yinput tensor dimension y
[in]ch_im_innumber of input tensor channels
[in]wtpointer to kernel weights
[in]ch_im_outnumber of filters, i.e., output tensor channels
[in]dim_kernel_xfilter kernel size x
[in]dim_kernel_yfilter kernel size y
[in]padding_xpadding sizes x
[in]padding_ypadding sizes y
[in]stride_xconvolution stride x
[in]stride_yconvolution stride y
[in]biaspointer to bias
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in,out]Im_outpointer to output tensor
[in]dim_im_out_xoutput tensor dimension x
[in]dim_im_out_youtput tensor dimension y
[in,out]bufferApointer to buffer space for input
[in,out]bufferBpointer to buffer space for output
Returns
The function returns either ARM_CMSIS_NN_ARG_ERROR or ARM_CMSIS_NN_SUCCESS based on the outcome of input arguments constraints checking.

This function is the version with full list of optimization tricks, but with some contraints: ch_im_in is multiple of 2 ch_im_out is multiple of 2

References ARM_CMSIS_NN_ARG_ERROR, ARM_CMSIS_NN_SUCCESS, arm_nn_read_q7x4(), arm_nnword::bytes, NN_ROUND, and arm_nnword::word.

static void depthwise_conv_s16_generic_s16 ( const int16_t *  input,
const uint16_t  input_batches,
const uint16_t  input_x,
const uint16_t  input_y,
const uint16_t  input_ch,
const int8_t *  kernel,
const uint16_t  ch_mult,
const uint16_t  kernel_x,
const uint16_t  kernel_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const int64_t *  bias,
int16_t *  output,
const int32_t *  output_shift,
const int32_t *  output_mult,
const uint16_t  output_x,
const uint16_t  output_y,
const int32_t  output_activation_min,
const int32_t  output_activation_max,
const uint16_t  dilation_x,
const uint16_t  dilation_y 
)
static
static void depthwise_conv_s8_generic ( const q7_t input,
const uint16_t  input_batches,
const uint16_t  input_x,
const uint16_t  input_y,
const uint16_t  input_ch,
const q7_t kernel,
const uint16_t  output_ch,
const uint16_t  ch_mult,
const uint16_t  kernel_x,
const uint16_t  kernel_y,
const uint16_t  pad_x,
const uint16_t  pad_y,
const uint16_t  stride_x,
const uint16_t  stride_y,
const int32_t *  bias,
q7_t output,
const int32_t *  output_shift,
const int32_t *  output_mult,
const uint16_t  output_x,
const uint16_t  output_y,
const int32_t  output_offset,
const int32_t  input_offset,
const int32_t  output_activation_min,
const int32_t  output_activation_max,
const uint16_t  dilation_x,
const uint16_t  dilation_y 
)
static

References arm_nn_requantize(), MAX, and MIN.

Referenced by arm_depthwise_conv_s8().

static void depthwise_conv_u8_generic ( const uint8_t *  input,
const int32_t  input_x,
const int32_t  input_y,
const int32_t  input_ch,
const uint8_t *  kernel,
const int32_t  output_ch,
const int32_t  ch_mult,
const int32_t  kernel_x,
const int32_t  kernel_y,
const int32_t  pad_x,
const int32_t  pad_y,
const int32_t  stride_x,
const int32_t  stride_y,
const int32_t *  bias,
uint8_t *  output,
const int32_t  output_shift,
const int32_t  output_mult,
const int32_t  output_x,
const int32_t  output_y,
const int32_t  output_offset,
const int32_t  input_offset,
const int32_t  filter_offset,
const int32_t  output_activation_min,
const int32_t  output_activation_max 
)
static
static void depthwise_conv_u8_mult_4 ( const uint8_t *  input,
const int32_t  input_x,
const int32_t  input_y,
const int32_t  input_ch,
const uint8_t *  kernel,
const int32_t  output_ch,
const int32_t  ch_mult,
const int32_t  kernel_x,
const int32_t  kernel_y,
const int32_t  pad_x,
const int32_t  pad_y,
const int32_t  stride_x,
const int32_t  stride_y,
const int32_t *  bias,
uint8_t *  output,
const int32_t  output_shift,
const int32_t  output_mult,
const int32_t  output_x,
const int32_t  output_y,
const int32_t  output_offset,
const int32_t  input_offset,
const int32_t  filter_offset,
const int32_t  output_activation_min,
const int32_t  output_activation_max 
)
static