CMSIS-NN
Version 3.0.0
CMSIS NN Software Library
|
Functions | |
arm_status | arm_convolve_1_x_n_s8 (const cmsis_nn_context *ctx, const cmsis_nn_conv_params *conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q7_t *input_data, const cmsis_nn_dims *filter_dims, const q7_t *filter_data, const cmsis_nn_dims *bias_dims, const int32_t *bias_data, const cmsis_nn_dims *output_dims, q7_t *output_data) |
1xn convolution More... | |
int32_t | arm_convolve_1_x_n_s8_get_buffer_size (const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims) |
Get the required additional buffer size for 1xn convolution. More... | |
arm_status | arm_convolve_1x1_HWC_q7_fast_nonsquare (const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB) |
Fast Q7 version of 1x1 convolution (non-sqaure shape) More... | |
arm_status | arm_convolve_1x1_s8_fast (const cmsis_nn_context *ctx, const cmsis_nn_conv_params *conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q7_t *input_data, const cmsis_nn_dims *filter_dims, const q7_t *filter_data, const cmsis_nn_dims *bias_dims, const int32_t *bias_data, const cmsis_nn_dims *output_dims, q7_t *output_data) |
Fast s8 version for 1x1 convolution (non-square shape) More... | |
int32_t | arm_convolve_1x1_s8_fast_get_buffer_size (const cmsis_nn_dims *input_dims) |
Get the required buffer size for arm_convolve_1x1_s8_fast. More... | |
arm_status | arm_convolve_HWC_q15_basic (const q15_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q15_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q15_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q15_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB) |
Basic Q15 convolution function. More... | |
arm_status | arm_convolve_HWC_q15_fast (const q15_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q15_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q15_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q15_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB) |
Fast Q15 convolution function. More... | |
arm_status | arm_convolve_HWC_q15_fast_nonsquare (const q15_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q15_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q15_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q15_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB) |
Fast Q15 convolution function (non-sqaure shape) More... | |
arm_status | arm_convolve_HWC_q7_basic (const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB) |
Basic Q7 convolution function. More... | |
arm_status | arm_convolve_HWC_q7_basic_nonsquare (const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB) |
Basic Q7 convolution function (non-sqaure shape) More... | |
arm_status | arm_convolve_HWC_q7_fast (const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB) |
Fast Q7 convolution function. More... | |
arm_status | arm_convolve_HWC_q7_fast_nonsquare (const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB) |
Fast Q7 convolution function (non-sqaure shape) More... | |
arm_status | arm_convolve_HWC_q7_RGB (const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB) |
Q7 convolution function for RGB image. More... | |
arm_status | arm_convolve_s8 (const cmsis_nn_context *ctx, const cmsis_nn_conv_params *conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q7_t *input_data, const cmsis_nn_dims *filter_dims, const q7_t *filter_data, const cmsis_nn_dims *bias_dims, const int32_t *bias_data, const cmsis_nn_dims *output_dims, q7_t *output_data) |
Basic s8 convolution function. More... | |
int32_t | arm_convolve_s8_get_buffer_size (const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims) |
Get the required buffer size for s8 convolution function. More... | |
arm_status | arm_convolve_wrapper_s8 (const cmsis_nn_context *ctx, const cmsis_nn_conv_params *conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q7_t *input_data, const cmsis_nn_dims *filter_dims, const q7_t *filter_data, const cmsis_nn_dims *bias_dims, const int32_t *bias_data, const cmsis_nn_dims *output_dims, q7_t *output_data) |
s8 convolution layer wrapper function with the main purpose to call the optimal kernel available in cmsis-nn to perform the convolution. More... | |
int32_t | arm_convolve_wrapper_s8_get_buffer_size (const cmsis_nn_conv_params *conv_params, const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims, const cmsis_nn_dims *output_dims) |
Get the required buffer size for arm_convolve_wrapper_s8. More... | |
arm_status | arm_depthwise_conv_3x3_s8 (const cmsis_nn_context *ctx, const cmsis_nn_dw_conv_params *dw_conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q7_t *input, const cmsis_nn_dims *filter_dims, const q7_t *kernel, const cmsis_nn_dims *bias_dims, const int32_t *bias, const cmsis_nn_dims *output_dims, q7_t *output) |
Optimized s8 depthwise convolution function for 3x3 kernel size with some constraints on the input arguments(documented below). Refer arm_depthwise_conv_s8() for function argument details. More... | |
static void | depthwise_conv_s8_mult_4 (const int8_t *input, const int32_t input_x, const int32_t input_y, const int32_t input_ch, const int8_t *kernel, const int32_t output_ch, const int32_t ch_mult, const int32_t kernel_x, const int32_t kernel_y, const int32_t pad_x, const int32_t pad_y, const int32_t stride_x, const int32_t stride_y, const int32_t *bias, int8_t *output, const int32_t *output_shift, const int32_t *output_mult, const int32_t output_x, const int32_t output_y, const int32_t output_offset, const int32_t input_offset, const int32_t output_activation_min, const int32_t output_activation_max) |
static void | depthwise_conv_s8_generic (const q7_t *input, const uint16_t input_batches, const uint16_t input_x, const uint16_t input_y, const uint16_t input_ch, const q7_t *kernel, const uint16_t output_ch, const uint16_t ch_mult, const uint16_t kernel_x, const uint16_t kernel_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const int32_t *bias, q7_t *output, const int32_t *output_shift, const int32_t *output_mult, const uint16_t output_x, const uint16_t output_y, const int32_t output_offset, const int32_t input_offset, const int32_t output_activation_min, const int32_t output_activation_max) |
arm_status | arm_depthwise_conv_s8 (const cmsis_nn_context *ctx, const cmsis_nn_dw_conv_params *dw_conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q7_t *input, const cmsis_nn_dims *filter_dims, const q7_t *kernel, const cmsis_nn_dims *bias_dims, const int32_t *bias, const cmsis_nn_dims *output_dims, q7_t *output) |
Basic s8 depthwise convolution function that doesn't have any constraints on the input dimensions. More... | |
arm_status | arm_depthwise_conv_s8_opt (const cmsis_nn_context *ctx, const cmsis_nn_dw_conv_params *dw_conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q7_t *input, const cmsis_nn_dims *filter_dims, const q7_t *kernel, const cmsis_nn_dims *bias_dims, const int32_t *bias, const cmsis_nn_dims *output_dims, q7_t *output) |
Optimized s8 depthwise convolution function with constraint that in_channel equals out_channel. Refer arm_depthwise_conv_s8() for function argument details. More... | |
int32_t | arm_depthwise_conv_s8_opt_get_buffer_size (const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims) |
Get the required buffer size for optimized s8 depthwise convolution function with constraint that in_channel equals out_channel. More... | |
static void | depthwise_conv_u8_mult_4 (const uint8_t *input, const int32_t input_x, const int32_t input_y, const int32_t input_ch, const uint8_t *kernel, const int32_t output_ch, const int32_t ch_mult, const int32_t kernel_x, const int32_t kernel_y, const int32_t pad_x, const int32_t pad_y, const int32_t stride_x, const int32_t stride_y, const int32_t *bias, uint8_t *output, const int32_t output_shift, const int32_t output_mult, const int32_t output_x, const int32_t output_y, const int32_t output_offset, const int32_t input_offset, const int32_t filter_offset, const int32_t output_activation_min, const int32_t output_activation_max) |
static void | depthwise_conv_u8_generic (const uint8_t *input, const int32_t input_x, const int32_t input_y, const int32_t input_ch, const uint8_t *kernel, const int32_t output_ch, const int32_t ch_mult, const int32_t kernel_x, const int32_t kernel_y, const int32_t pad_x, const int32_t pad_y, const int32_t stride_x, const int32_t stride_y, const int32_t *bias, uint8_t *output, const int32_t output_shift, const int32_t output_mult, const int32_t output_x, const int32_t output_y, const int32_t output_offset, const int32_t input_offset, const int32_t filter_offset, const int32_t output_activation_min, const int32_t output_activation_max) |
arm_status | arm_depthwise_conv_u8_basic_ver1 (const uint8_t *input, const uint16_t input_x, const uint16_t input_y, const uint16_t input_ch, const uint8_t *kernel, const uint16_t kernel_x, const uint16_t kernel_y, const int16_t ch_mult, const int16_t pad_x, const int16_t pad_y, const int16_t stride_x, const int16_t stride_y, const int16_t dilation_x, const int16_t dilation_y, const int32_t *bias, const int32_t input_offset, const int32_t filter_offset, const int32_t output_offset, uint8_t *output, const uint16_t output_x, const uint16_t output_y, const int32_t output_activation_min, const int32_t output_activation_max, const int32_t output_shift, const int32_t output_mult) |
uint8 depthwise convolution function with asymmetric quantization More... | |
arm_status | arm_depthwise_conv_wrapper_s8 (const cmsis_nn_context *ctx, const cmsis_nn_dw_conv_params *dw_conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q7_t *input, const cmsis_nn_dims *filter_dims, const q7_t *filter, const cmsis_nn_dims *bias_dims, const int32_t *bias, const cmsis_nn_dims *output_dims, q7_t *output) |
Wrapper function to pick the right optimized s8 depthwise convolution function. More... | |
int32_t | arm_depthwise_conv_wrapper_s8_get_buffer_size (const cmsis_nn_dw_conv_params *dw_conv_params, const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims, const cmsis_nn_dims *output_dims) |
Get size of additional buffer required by arm_depthwise_conv_wrapper_s8() More... | |
arm_status | arm_depthwise_separable_conv_HWC_q7 (const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB) |
Q7 depthwise separable convolution function. More... | |
arm_status | arm_depthwise_separable_conv_HWC_q7_nonsquare (const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB) |
Q7 depthwise separable convolution function (non-square shape) More... | |
Collection of convolution, depthwise convolution functions and their variants.
The convolution is implemented in 2 steps: im2col and GEMM
im2col is a process of converting each patch of image data into a column. After im2col, the convolution is computed as matrix-matrix multiplication.
To reduce the memory footprint, the im2col is performed partially. Each iteration, only a few column (i.e., patches) are generated and computed with GEMM kernels similar to CMSIS-DSP arm_mat_mult functions.
arm_status arm_convolve_1_x_n_s8 | ( | const cmsis_nn_context * | ctx, |
const cmsis_nn_conv_params * | conv_params, | ||
const cmsis_nn_per_channel_quant_params * | quant_params, | ||
const cmsis_nn_dims * | input_dims, | ||
const q7_t * | input_data, | ||
const cmsis_nn_dims * | filter_dims, | ||
const q7_t * | filter_data, | ||
const cmsis_nn_dims * | bias_dims, | ||
const int32_t * | bias_data, | ||
const cmsis_nn_dims * | output_dims, | ||
q7_t * | output_data | ||
) |
[in,out] | ctx | Function context that contains the additional buffer if required by the function. arm_convolve_1_x_n_s8_get_buffer_size will return the buffer_size if required |
[in] | conv_params | Convolution parameters (e.g. strides, dilations, pads,...). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127] |
[in] | quant_params | Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel |
[in] | input_dims | Input (activation) tensor dimensions. Format: [N, H, W, C_IN] |
[in] | input_data | Input (activation) data pointer. Data type: int8 |
[in] | filter_dims | Filter tensor dimensions. Format: [C_OUT, 1, WK, C_IN] where WK is the horizontal spatial filter dimension |
[in] | filter_data | Filter data pointer. Data type: int8 |
[in] | bias_dims | Bias tensor dimensions. Format: [C_OUT] |
[in] | bias_data | Optional bias data pointer. Data type: int32 |
[in] | output_dims | Output tensor dimensions. Format: [N, H, W, C_OUT] |
[out] | output_data | Output data pointer. Data type: int8 |
ARM_MATH_SIZE_MISMATCH
if argument constraints fail. or, ARM_MATH_SUCCESS
on successful completion.References cmsis_nn_conv_params::activation, arm_convolve_s8(), arm_nn_mat_mul_core_1x_s8(), arm_nn_mat_mul_core_4x_s8(), cmsis_nn_dims::c, cmsis_nn_conv_params::input_offset, MAX, cmsis_nn_activation::max, MIN, cmsis_nn_activation::min, cmsis_nn_per_channel_quant_params::multiplier, cmsis_nn_conv_params::output_offset, cmsis_nn_conv_params::padding, cmsis_nn_per_channel_quant_params::shift, cmsis_nn_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.
Referenced by arm_convolve_wrapper_s8().
int32_t arm_convolve_1_x_n_s8_get_buffer_size | ( | const cmsis_nn_dims * | input_dims, |
const cmsis_nn_dims * | filter_dims | ||
) |
[in] | input_dims | Input (activation) tensor dimensions. Format: [N, H, W, C_IN] |
[in] | filter_dims | Filter tensor dimensions. Format: [C_OUT, 1, WK, C_IN] where WK is the horizontal spatial filter dimension |
References cmsis_nn_dims::c, cmsis_nn_dims::h, and cmsis_nn_dims::w.
Referenced by arm_convolve_wrapper_s8_get_buffer_size().
arm_status arm_convolve_1x1_HWC_q7_fast_nonsquare | ( | const q7_t * | Im_in, |
const uint16_t | dim_im_in_x, | ||
const uint16_t | dim_im_in_y, | ||
const uint16_t | ch_im_in, | ||
const q7_t * | wt, | ||
const uint16_t | ch_im_out, | ||
const uint16_t | dim_kernel_x, | ||
const uint16_t | dim_kernel_y, | ||
const uint16_t | padding_x, | ||
const uint16_t | padding_y, | ||
const uint16_t | stride_x, | ||
const uint16_t | stride_y, | ||
const q7_t * | bias, | ||
const uint16_t | bias_shift, | ||
const uint16_t | out_shift, | ||
q7_t * | Im_out, | ||
const uint16_t | dim_im_out_x, | ||
const uint16_t | dim_im_out_y, | ||
q15_t * | bufferA, | ||
q7_t * | bufferB | ||
) |
[in] | Im_in | pointer to input tensor |
[in] | dim_im_in_x | input tensor dimention x |
[in] | dim_im_in_y | input tensor dimention y |
[in] | ch_im_in | number of input tensor channels |
[in] | wt | pointer to kernel weights |
[in] | ch_im_out | number of filters, i.e., output tensor channels |
[in] | dim_kernel_x | filter kernel size x |
[in] | dim_kernel_y | filter kernel size y |
[in] | padding_x | padding size x |
[in] | padding_y | padding size y |
[in] | stride_x | convolution stride x |
[in] | stride_y | convolution stride y |
[in] | bias | pointer to bias |
[in] | bias_shift | amount of left-shift for bias |
[in] | out_shift | amount of right-shift for output |
[in,out] | Im_out | pointer to output tensor |
[in] | dim_im_out_x | output tensor dimension x |
[in] | dim_im_out_y | output tensor dimension y |
[in,out] | bufferA | pointer to buffer space for input |
[in,out] | bufferB | pointer to buffer space for output |
ARM_MATH_SIZE_MISMATCH
or ARM_MATH_SUCCESS
based on the outcome of size checking.This function is optimized for convolution with 1x1 kernel size (i.e., dim_kernel_x=1 and dim_kernel_y=1). It can be used for the second half of MobileNets [1] after depthwise separable convolution.
This function is the version with full list of optimization tricks, but with some constraints: ch_im_in is multiple of 4 ch_im_out is multiple of 2
[1] MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications https://arxiv.org/abs/1704.04861
References arm_nn_mat_mult_kernel_q7_q15_reordered(), arm_nn_read_q15x2_ia(), arm_q7_to_q15_reordered_no_shift(), and NN_ROUND.
arm_status arm_convolve_1x1_s8_fast | ( | const cmsis_nn_context * | ctx, |
const cmsis_nn_conv_params * | conv_params, | ||
const cmsis_nn_per_channel_quant_params * | quant_params, | ||
const cmsis_nn_dims * | input_dims, | ||
const q7_t * | input_data, | ||
const cmsis_nn_dims * | filter_dims, | ||
const q7_t * | filter_data, | ||
const cmsis_nn_dims * | bias_dims, | ||
const int32_t * | bias_data, | ||
const cmsis_nn_dims * | output_dims, | ||
q7_t * | output_data | ||
) |
[in,out] | ctx | Function context that contains the additional buffer if required by the function. arm_convolve_1x1_s8_fast_get_buffer_size will return the buffer_size if required |
[in] | conv_params | Convolution parameters (e.g. strides, dilations, pads,...). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127] |
[in] | quant_params | Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel |
[in] | input_dims | Input (activation) tensor dimensions. Format: [N, H, W, C_IN] |
[in] | input_data | Input (activation) data pointer. Data type: int8 |
[in] | filter_dims | Filter tensor dimensions. Format: [C_OUT, 1, 1, C_IN] |
[in] | filter_data | Filter data pointer. Data type: int8 |
[in] | bias_dims | Bias tensor dimensions. Format: [C_OUT] |
[in] | bias_data | Optional bias data pointer. Data type: int32 |
[in] | output_dims | Output tensor dimensions. Format: [N, H, W, C_OUT] |
[out] | output_data | Output data pointer. Data type: int8 |
ARM_MATH_SIZE_MISMATCH
if argument constraints fail. or, ARM_MATH_SUCCESS
on successful completion.References cmsis_nn_conv_params::activation, arm_nn_mat_mul_core_1x_s8(), arm_nn_mat_mul_core_4x_s8(), arm_nn_mat_mult_nt_t_s8(), arm_nn_requantize(), cmsis_nn_dims::c, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_conv_params::input_offset, MAX, cmsis_nn_activation::max, MIN, cmsis_nn_activation::min, cmsis_nn_per_channel_quant_params::multiplier, cmsis_nn_dims::n, cmsis_nn_conv_params::output_offset, cmsis_nn_conv_params::padding, cmsis_nn_per_channel_quant_params::shift, cmsis_nn_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.
Referenced by arm_convolve_wrapper_s8().
int32_t arm_convolve_1x1_s8_fast_get_buffer_size | ( | const cmsis_nn_dims * | input_dims | ) |
[in] | input_dims | Input (activation) dimensions |
Referenced by arm_convolve_wrapper_s8_get_buffer_size().
arm_status arm_convolve_HWC_q15_basic | ( | const q15_t * | Im_in, |
const uint16_t | dim_im_in, | ||
const uint16_t | ch_im_in, | ||
const q15_t * | wt, | ||
const uint16_t | ch_im_out, | ||
const uint16_t | dim_kernel, | ||
const uint16_t | padding, | ||
const uint16_t | stride, | ||
const q15_t * | bias, | ||
const uint16_t | bias_shift, | ||
const uint16_t | out_shift, | ||
q15_t * | Im_out, | ||
const uint16_t | dim_im_out, | ||
q15_t * | bufferA, | ||
q7_t * | bufferB | ||
) |
[in] | Im_in | pointer to input tensor |
[in] | dim_im_in | input tensor dimention |
[in] | ch_im_in | number of input tensor channels |
[in] | wt | pointer to kernel weights |
[in] | ch_im_out | number of filters, i.e., output tensor channels |
[in] | dim_kernel | filter kernel size |
[in] | padding | padding sizes |
[in] | stride | convolution stride |
[in] | bias | pointer to bias |
[in] | bias_shift | amount of left-shift for bias |
[in] | out_shift | amount of right-shift for output |
[in,out] | Im_out | pointer to output tensor |
[in] | dim_im_out | output tensor dimension |
[in,out] | bufferA | pointer to buffer space for input |
[in,out] | bufferB | pointer to buffer space for output |
ARM_MATH_SUCCESS
Buffer size:
bufferA size: ch_im_in*dim_kernel*dim_kernel
bufferB size: 0
This basic version is designed to work for any input tensor and weight dimension.
References arm_nn_read_q15x2_ia(), and NN_ROUND.
arm_status arm_convolve_HWC_q15_fast | ( | const q15_t * | Im_in, |
const uint16_t | dim_im_in, | ||
const uint16_t | ch_im_in, | ||
const q15_t * | wt, | ||
const uint16_t | ch_im_out, | ||
const uint16_t | dim_kernel, | ||
const uint16_t | padding, | ||
const uint16_t | stride, | ||
const q15_t * | bias, | ||
const uint16_t | bias_shift, | ||
const uint16_t | out_shift, | ||
q15_t * | Im_out, | ||
const uint16_t | dim_im_out, | ||
q15_t * | bufferA, | ||
q7_t * | bufferB | ||
) |
[in] | Im_in | pointer to input tensor |
[in] | dim_im_in | input tensor dimention |
[in] | ch_im_in | number of input tensor channels |
[in] | wt | pointer to kernel weights |
[in] | ch_im_out | number of filters, i.e., output tensor channels |
[in] | dim_kernel | filter kernel size |
[in] | padding | padding sizes |
[in] | stride | convolution stride |
[in] | bias | pointer to bias |
[in] | bias_shift | amount of left-shift for bias |
[in] | out_shift | amount of right-shift for output |
[in,out] | Im_out | pointer to output tensor |
[in] | dim_im_out | output tensor dimension |
[in,out] | bufferA | pointer to buffer space for input |
[in,out] | bufferB | pointer to buffer space for output |
ARM_MATH_SIZE_MISMATCH
or ARM_MATH_SUCCESS
based on the outcome of size checking.Buffer size:
bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
bufferB size: 0
Input dimension constraints:
ch_im_in is multiple of 2
ch_im_out is multiple of 2
References arm_nn_read_q15x2_ia(), and NN_ROUND.
arm_status arm_convolve_HWC_q15_fast_nonsquare | ( | const q15_t * | Im_in, |
const uint16_t | dim_im_in_x, | ||
const uint16_t | dim_im_in_y, | ||
const uint16_t | ch_im_in, | ||
const q15_t * | wt, | ||
const uint16_t | ch_im_out, | ||
const uint16_t | dim_kernel_x, | ||
const uint16_t | dim_kernel_y, | ||
const uint16_t | padding_x, | ||
const uint16_t | padding_y, | ||
const uint16_t | stride_x, | ||
const uint16_t | stride_y, | ||
const q15_t * | bias, | ||
const uint16_t | bias_shift, | ||
const uint16_t | out_shift, | ||
q15_t * | Im_out, | ||
const uint16_t | dim_im_out_x, | ||
const uint16_t | dim_im_out_y, | ||
q15_t * | bufferA, | ||
q7_t * | bufferB | ||
) |
[in] | Im_in | pointer to input tensor |
[in] | dim_im_in_x | input tensor dimention x |
[in] | dim_im_in_y | input tensor dimention y |
[in] | ch_im_in | number of input tensor channels |
[in] | wt | pointer to kernel weights |
[in] | ch_im_out | number of filters, i.e., output tensor channels |
[in] | dim_kernel_x | filter kernel size x |
[in] | dim_kernel_y | filter kernel size y |
[in] | padding_x | padding size x |
[in] | padding_y | padding size y |
[in] | stride_x | convolution stride x |
[in] | stride_y | convolution stride y |
[in] | bias | pointer to bias |
[in] | bias_shift | amount of left-shift for bias |
[in] | out_shift | amount of right-shift for output |
[in,out] | Im_out | pointer to output tensor |
[in] | dim_im_out_x | output tensor dimension x |
[in] | dim_im_out_y | output tensor dimension y |
[in,out] | bufferA | pointer to buffer space for input |
[in,out] | bufferB | pointer to buffer space for output |
ARM_MATH_SIZE_MISMATCH
or ARM_MATH_SUCCESS
based on the outcome of size checking.Buffer size:
bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
bufferB size: 0
Input dimension constraints:
ch_im_in is multiple of 2
ch_im_out is multiple of 2
References arm_nn_read_q15x2_ia(), and NN_ROUND.
arm_status arm_convolve_HWC_q7_basic | ( | const q7_t * | Im_in, |
const uint16_t | dim_im_in, | ||
const uint16_t | ch_im_in, | ||
const q7_t * | wt, | ||
const uint16_t | ch_im_out, | ||
const uint16_t | dim_kernel, | ||
const uint16_t | padding, | ||
const uint16_t | stride, | ||
const q7_t * | bias, | ||
const uint16_t | bias_shift, | ||
const uint16_t | out_shift, | ||
q7_t * | Im_out, | ||
const uint16_t | dim_im_out, | ||
q15_t * | bufferA, | ||
q7_t * | bufferB | ||
) |
[in] | Im_in | pointer to input tensor |
[in] | dim_im_in | input tensor dimention |
[in] | ch_im_in | number of input tensor channels |
[in] | wt | pointer to kernel weights |
[in] | ch_im_out | number of filters, i.e., output tensor channels |
[in] | dim_kernel | filter kernel size |
[in] | padding | padding sizes |
[in] | stride | convolution stride |
[in] | bias | pointer to bias |
[in] | bias_shift | amount of left-shift for bias |
[in] | out_shift | amount of right-shift for output |
[in,out] | Im_out | pointer to output tensor |
[in] | dim_im_out | output tensor dimension |
[in,out] | bufferA | pointer to buffer space for input |
[in,out] | bufferB | pointer to buffer space for output |
ARM_MATH_SUCCESS
Buffer size:
bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
bufferB size: 0
This basic version is designed to work for any input tensor and weight dimension.
References arm_nn_mat_mult_kernel_q7_q15(), arm_nn_read_q15x2_ia(), arm_q7_to_q15_no_shift(), and NN_ROUND.
arm_status arm_convolve_HWC_q7_basic_nonsquare | ( | const q7_t * | Im_in, |
const uint16_t | dim_im_in_x, | ||
const uint16_t | dim_im_in_y, | ||
const uint16_t | ch_im_in, | ||
const q7_t * | wt, | ||
const uint16_t | ch_im_out, | ||
const uint16_t | dim_kernel_x, | ||
const uint16_t | dim_kernel_y, | ||
const uint16_t | padding_x, | ||
const uint16_t | padding_y, | ||
const uint16_t | stride_x, | ||
const uint16_t | stride_y, | ||
const q7_t * | bias, | ||
const uint16_t | bias_shift, | ||
const uint16_t | out_shift, | ||
q7_t * | Im_out, | ||
const uint16_t | dim_im_out_x, | ||
const uint16_t | dim_im_out_y, | ||
q15_t * | bufferA, | ||
q7_t * | bufferB | ||
) |
Basic Q7 convolution function (non-square shape)
[in] | Im_in | pointer to input tensor |
[in] | dim_im_in_x | input tensor dimention x |
[in] | dim_im_in_y | input tensor dimention y |
[in] | ch_im_in | number of input tensor channels |
[in] | wt | pointer to kernel weights |
[in] | ch_im_out | number of filters, i.e., output tensor channels |
[in] | dim_kernel_x | filter kernel size x |
[in] | dim_kernel_y | filter kernel size y |
[in] | padding_x | padding size x |
[in] | padding_y | padding size y |
[in] | stride_x | convolution stride x |
[in] | stride_y | convolution stride y |
[in] | bias | pointer to bias |
[in] | bias_shift | amount of left-shift for bias |
[in] | out_shift | amount of right-shift for output |
[in,out] | Im_out | pointer to output tensor |
[in] | dim_im_out_x | output tensor dimension x |
[in] | dim_im_out_y | output tensor dimension y |
[in,out] | bufferA | pointer to buffer space for input |
[in,out] | bufferB | pointer to buffer space for output |
ARM_MATH_SUCCESS
References arm_nn_mat_mult_kernel_q7_q15(), arm_nn_read_q15x2_ia(), arm_q7_to_q15_no_shift(), and NN_ROUND.
arm_status arm_convolve_HWC_q7_fast | ( | const q7_t * | Im_in, |
const uint16_t | dim_im_in, | ||
const uint16_t | ch_im_in, | ||
const q7_t * | wt, | ||
const uint16_t | ch_im_out, | ||
const uint16_t | dim_kernel, | ||
const uint16_t | padding, | ||
const uint16_t | stride, | ||
const q7_t * | bias, | ||
const uint16_t | bias_shift, | ||
const uint16_t | out_shift, | ||
q7_t * | Im_out, | ||
const uint16_t | dim_im_out, | ||
q15_t * | bufferA, | ||
q7_t * | bufferB | ||
) |
[in] | Im_in | pointer to input tensor |
[in] | dim_im_in | input tensor dimention |
[in] | ch_im_in | number of input tensor channels |
[in] | wt | pointer to kernel weights |
[in] | ch_im_out | number of filters, i.e., output tensor channels |
[in] | dim_kernel | filter kernel size |
[in] | padding | padding sizes |
[in] | stride | convolution stride |
[in] | bias | pointer to bias |
[in] | bias_shift | amount of left-shift for bias |
[in] | out_shift | amount of right-shift for output |
[in,out] | Im_out | pointer to output tensor |
[in] | dim_im_out | output tensor dimension |
[in,out] | bufferA | pointer to buffer space for input |
[in,out] | bufferB | pointer to buffer space for output |
ARM_MATH_SIZE_MISMATCH
or ARM_MATH_SUCCESS
based on the outcome of size checking.Buffer size:
bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
bufferB size: 0
Input dimension constraints:
ch_im_in is multiple of 4 ( because of the SIMD32 read and swap )
ch_im_out is multiple of 2 ( bacause 2x2 mat_mult kernel )
The im2col converts the Q7 tensor input into Q15 column, which is stored in bufferA. There is reordering happenning during this im2col process with arm_q7_to_q15_reordered_no_shift. For every four elements, the second and third elements are swapped.
The computation kernel arm_nn_mat_mult_kernel_q7_q15_reordered does the GEMM computation with the reordered columns.
To speed-up the determination of the padding condition, we split the computation into 3x3 parts, i.e., {top, mid, bottom} X {left, mid, right}. This reduces the total number of boundary condition checks and improves the data copying performance.
References arm_nn_mat_mult_kernel_q7_q15_reordered(), arm_nn_read_q15x2_ia(), arm_q7_to_q15_reordered_no_shift(), and NN_ROUND.
arm_status arm_convolve_HWC_q7_fast_nonsquare | ( | const q7_t * | Im_in, |
const uint16_t | dim_im_in_x, | ||
const uint16_t | dim_im_in_y, | ||
const uint16_t | ch_im_in, | ||
const q7_t * | wt, | ||
const uint16_t | ch_im_out, | ||
const uint16_t | dim_kernel_x, | ||
const uint16_t | dim_kernel_y, | ||
const uint16_t | padding_x, | ||
const uint16_t | padding_y, | ||
const uint16_t | stride_x, | ||
const uint16_t | stride_y, | ||
const q7_t * | bias, | ||
const uint16_t | bias_shift, | ||
const uint16_t | out_shift, | ||
q7_t * | Im_out, | ||
const uint16_t | dim_im_out_x, | ||
const uint16_t | dim_im_out_y, | ||
q15_t * | bufferA, | ||
q7_t * | bufferB | ||
) |
[in] | Im_in | pointer to input tensor |
[in] | dim_im_in_x | input tensor dimention x |
[in] | dim_im_in_y | input tensor dimention y |
[in] | ch_im_in | number of input tensor channels |
[in] | wt | pointer to kernel weights |
[in] | ch_im_out | number of filters, i.e., output tensor channels |
[in] | dim_kernel_x | filter kernel size x |
[in] | dim_kernel_y | filter kernel size y |
[in] | padding_x | padding size x |
[in] | padding_y | padding size y |
[in] | stride_x | convolution stride x |
[in] | stride_y | convolution stride y |
[in] | bias | pointer to bias |
[in] | bias_shift | amount of left-shift for bias |
[in] | out_shift | amount of right-shift for output |
[in,out] | Im_out | pointer to output tensor |
[in] | dim_im_out_x | output tensor dimension x |
[in] | dim_im_out_y | output tensor dimension y |
[in,out] | bufferA | pointer to buffer space for input |
[in,out] | bufferB | pointer to buffer space for output |
ARM_MATH_SIZE_MISMATCH
or ARM_MATH_SUCCESS
based on the outcome of size checking.This function is the version with full list of optimization tricks, but with some constraints: ch_im_in is multiple of 4 ch_im_out is multiple of 2
References arm_nn_mat_mult_kernel_q7_q15_reordered(), arm_nn_read_q15x2_ia(), arm_q7_to_q15_reordered_no_shift(), and NN_ROUND.
arm_status arm_convolve_HWC_q7_RGB | ( | const q7_t * | Im_in, |
const uint16_t | dim_im_in, | ||
const uint16_t | ch_im_in, | ||
const q7_t * | wt, | ||
const uint16_t | ch_im_out, | ||
const uint16_t | dim_kernel, | ||
const uint16_t | padding, | ||
const uint16_t | stride, | ||
const q7_t * | bias, | ||
const uint16_t | bias_shift, | ||
const uint16_t | out_shift, | ||
q7_t * | Im_out, | ||
const uint16_t | dim_im_out, | ||
q15_t * | bufferA, | ||
q7_t * | bufferB | ||
) |
Q7 version of convolution for RGB image.
[in] | Im_in | pointer to input tensor |
[in] | dim_im_in | input tensor dimention |
[in] | ch_im_in | number of input tensor channels |
[in] | wt | pointer to kernel weights |
[in] | ch_im_out | number of filters, i.e., output tensor channels |
[in] | dim_kernel | filter kernel size |
[in] | padding | padding sizes |
[in] | stride | convolution stride |
[in] | bias | pointer to bias |
[in] | bias_shift | amount of left-shift for bias |
[in] | out_shift | amount of right-shift for output |
[in,out] | Im_out | pointer to output tensor |
[in] | dim_im_out | output tensor dimension |
[in,out] | bufferA | pointer to buffer space for input |
[in,out] | bufferB | pointer to buffer space for output |
ARM_MATH_SIZE_MISMATCH
or ARM_MATH_SUCCESS
based on the outcome of size checking.Buffer size:
bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
bufferB size: 0
Input dimension constraints:
ch_im_in equals 3
This kernel is written exclusively for convolution with ch_im_in equals 3. This applies on the first layer of CNNs which has input image with RGB format.
References arm_nn_mat_mult_kernel_q7_q15(), arm_nn_read_q15x2_ia(), arm_nn_read_q7x4(), arm_nnword::half_words, NN_ROUND, and arm_nnword::word.
arm_status arm_convolve_s8 | ( | const cmsis_nn_context * | ctx, |
const cmsis_nn_conv_params * | conv_params, | ||
const cmsis_nn_per_channel_quant_params * | quant_params, | ||
const cmsis_nn_dims * | input_dims, | ||
const q7_t * | input_data, | ||
const cmsis_nn_dims * | filter_dims, | ||
const q7_t * | filter_data, | ||
const cmsis_nn_dims * | bias_dims, | ||
const int32_t * | bias_data, | ||
const cmsis_nn_dims * | output_dims, | ||
q7_t * | output_data | ||
) |
[in,out] | ctx | Function context that contains the additional buffer if required by the function. arm_convolve_s8_get_buffer_size will return the buffer_size if required |
[in] | conv_params | Convolution parameters (e.g. strides, dilations, pads,...). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127] |
[in] | quant_params | Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel |
[in] | input_dims | Input (activation) tensor dimensions. Format: [N, H, W, C_IN] |
[in] | input_data | Input (activation) data pointer. Data type: int8 |
[in] | filter_dims | Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions |
[in] | filter_data | Filter data pointer. Data type: int8 |
[in] | bias_dims | Bias tensor dimensions. Format: [C_OUT] |
[in] | bias_data | Optional bias data pointer. Data type: int32 |
[in] | output_dims | Output tensor dimensions. Format: [N, H, W, C_OUT] |
[out] | output_data | Output data pointer. Data type: int8 |
ARM_MATH_SUCCESS
References cmsis_nn_conv_params::activation, arm_memcpy_q7(), arm_nn_mat_mul_core_4x_s8(), arm_nn_mat_mult_kernel_s8_s16(), arm_nn_mat_mult_s8(), arm_nn_read_q15x2_ia(), arm_nn_requantize(), arm_q7_to_q15_with_offset(), cmsis_nn_context::buf, cmsis_nn_dims::c, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_conv_params::input_offset, MAX, cmsis_nn_activation::max, MIN, cmsis_nn_activation::min, cmsis_nn_per_channel_quant_params::multiplier, cmsis_nn_dims::n, cmsis_nn_conv_params::output_offset, cmsis_nn_conv_params::padding, cmsis_nn_per_channel_quant_params::shift, cmsis_nn_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.
Referenced by arm_convolve_1_x_n_s8(), and arm_convolve_wrapper_s8().
int32_t arm_convolve_s8_get_buffer_size | ( | const cmsis_nn_dims * | input_dims, |
const cmsis_nn_dims * | filter_dims | ||
) |
[in] | input_dims | Input (activation) tensor dimensions. Format: [N, H, W, C_IN] |
[in] | filter_dims | Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions |
References cmsis_nn_dims::c, cmsis_nn_dims::h, and cmsis_nn_dims::w.
Referenced by arm_convolve_wrapper_s8_get_buffer_size().
arm_status arm_convolve_wrapper_s8 | ( | const cmsis_nn_context * | ctx, |
const cmsis_nn_conv_params * | conv_params, | ||
const cmsis_nn_per_channel_quant_params * | quant_params, | ||
const cmsis_nn_dims * | input_dims, | ||
const q7_t * | input_data, | ||
const cmsis_nn_dims * | filter_dims, | ||
const q7_t * | filter_data, | ||
const cmsis_nn_dims * | bias_dims, | ||
const int32_t * | bias_data, | ||
const cmsis_nn_dims * | output_dims, | ||
q7_t * | output_data | ||
) |
[in,out] | ctx | Function context that contains the additional buffer if required by the function. arm_convolve_wrapper_s8_get_buffer_size will return the buffer_size if required |
[in] | conv_params | Convolution parameters (e.g. strides, dilations, pads,...). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127] |
[in] | quant_params | Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel |
[in] | input_dims | Input (activation) tensor dimensions. Format: [N, H, W, C_IN] |
[in] | input_data | Input (activation) data pointer. Data type: int8 |
[in] | filter_dims | Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions |
[in] | filter_data | Filter data pointer. Data type: int8 |
[in] | bias_dims | Bias tensor dimensions. Format: [C_OUT] |
[in] | bias_data | Bias data pointer. Data type: int32 |
[in] | output_dims | Output tensor dimensions. Format: [N, H, W, C_OUT] |
[out] | output_data | Output data pointer. Data type: int8 |
ARM_MATH_SIZE_MISMATCH
if argument constraints fail. or, ARM_MATH_SUCCESS
on successful completion. References arm_convolve_1_x_n_s8(), arm_convolve_1x1_s8_fast(), arm_convolve_s8(), cmsis_nn_dims::c, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_dims::n, cmsis_nn_conv_params::padding, cmsis_nn_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.
int32_t arm_convolve_wrapper_s8_get_buffer_size | ( | const cmsis_nn_conv_params * | conv_params, |
const cmsis_nn_dims * | input_dims, | ||
const cmsis_nn_dims * | filter_dims, | ||
const cmsis_nn_dims * | output_dims | ||
) |
[in] | conv_params | Convolution parameters (e.g. strides, dilations, pads,...). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127] |
[in] | input_dims | Input (activation) dimensions. Format: [N, H, W, C_IN] |
[in] | filter_dims | Filter dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions |
[in] | output_dims | Output tensor dimensions. Format: [N, H, W, C_OUT] |
References arm_convolve_1_x_n_s8_get_buffer_size(), arm_convolve_1x1_s8_fast_get_buffer_size(), arm_convolve_s8_get_buffer_size(), cmsis_nn_dims::c, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_dims::n, cmsis_nn_conv_params::padding, cmsis_nn_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.
arm_status arm_depthwise_conv_3x3_s8 | ( | const cmsis_nn_context * | ctx, |
const cmsis_nn_dw_conv_params * | dw_conv_params, | ||
const cmsis_nn_per_channel_quant_params * | quant_params, | ||
const cmsis_nn_dims * | input_dims, | ||
const q7_t * | input_data, | ||
const cmsis_nn_dims * | filter_dims, | ||
const q7_t * | filter_data, | ||
const cmsis_nn_dims * | bias_dims, | ||
const int32_t * | bias_data, | ||
const cmsis_nn_dims * | output_dims, | ||
q7_t * | output_data | ||
) |
ARM_MATH_SIZE_MISMATCH
- Unsupported dimension of tensors ARM_MATH_ARGUMENT_ERROR
- Unsupported pad size along the x axis ARM_MATH_SUCCESS
- Successful operationReferences cmsis_nn_dw_conv_params::activation, arm_nn_read_q7x4(), arm_nn_requantize(), cmsis_nn_dims::c, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_dw_conv_params::input_offset, MAX, cmsis_nn_activation::max, MIN, cmsis_nn_activation::min, cmsis_nn_per_channel_quant_params::multiplier, cmsis_nn_dw_conv_params::output_offset, cmsis_nn_dw_conv_params::padding, cmsis_nn_per_channel_quant_params::shift, cmsis_nn_dw_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.
Referenced by arm_depthwise_conv_wrapper_s8().
arm_status arm_depthwise_conv_s8 | ( | const cmsis_nn_context * | ctx, |
const cmsis_nn_dw_conv_params * | dw_conv_params, | ||
const cmsis_nn_per_channel_quant_params * | quant_params, | ||
const cmsis_nn_dims * | input_dims, | ||
const q7_t * | input_data, | ||
const cmsis_nn_dims * | filter_dims, | ||
const q7_t * | filter_data, | ||
const cmsis_nn_dims * | bias_dims, | ||
const int32_t * | bias_data, | ||
const cmsis_nn_dims * | output_dims, | ||
q7_t * | output_data | ||
) |
[in,out] | ctx | Function context (e.g. temporary buffer). Check the function definition file to see if an additional buffer is required. Optional function {API}_get_buffer_size() provides the buffer size if an additional buffer is required. exists if additional memory is. |
[in] | dw_conv_params | Depthwise convolution parameters (e.g. strides, dilations, pads,...) dw_conv_params->dilation is not used. Range of dw_conv_params->input_offset : [-127, 128] Range of dw_conv_params->input_offset : [-128, 127] |
[in] | quant_params | Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel |
[in] | input_dims | Input (activation) tensor dimensions. Format: [1, H, W, C_IN] Batch argument N is not used. |
[in] | input_data | Input (activation) data pointer. Data type: int8 |
[in] | filter_dims | Filter tensor dimensions. Format: [1, H, W, C_OUT] |
[in] | filter_data | Filter data pointer. Data type: int8 |
[in] | bias_dims | Bias tensor dimensions. Format: [C_OUT] |
[in] | bias_data | Bias data pointer. Data type: int32 |
[in] | output_dims | Output tensor dimensions. Format: [1, H, W, C_OUT] |
[in,out] | output_data | Output data pointer. Data type: int8 |
ARM_MATH_SUCCESS
References cmsis_nn_dw_conv_params::activation, cmsis_nn_dims::c, cmsis_nn_dw_conv_params::ch_mult, depthwise_conv_s8_generic(), depthwise_conv_s8_mult_4(), cmsis_nn_dw_conv_params::dilation, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_dw_conv_params::input_offset, cmsis_nn_activation::max, cmsis_nn_activation::min, cmsis_nn_per_channel_quant_params::multiplier, cmsis_nn_dims::n, cmsis_nn_dw_conv_params::output_offset, cmsis_nn_dw_conv_params::padding, cmsis_nn_per_channel_quant_params::shift, cmsis_nn_dw_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.
Referenced by arm_depthwise_conv_s8_opt(), and arm_depthwise_conv_wrapper_s8().
arm_status arm_depthwise_conv_s8_opt | ( | const cmsis_nn_context * | ctx, |
const cmsis_nn_dw_conv_params * | dw_conv_params, | ||
const cmsis_nn_per_channel_quant_params * | quant_params, | ||
const cmsis_nn_dims * | input_dims, | ||
const q7_t * | input_data, | ||
const cmsis_nn_dims * | filter_dims, | ||
const q7_t * | filter_data, | ||
const cmsis_nn_dims * | bias_dims, | ||
const int32_t * | bias_data, | ||
const cmsis_nn_dims * | output_dims, | ||
q7_t * | output_data | ||
) |
ARM_MATH_SIZE_MISMATCH
- input channel != output channel or ch_mult != 1 ARM_MATH_SUCCESS
- Successful operationReferences cmsis_nn_dw_conv_params::activation, arm_depthwise_conv_s8(), arm_memcpy_q7(), arm_memset_q7(), arm_nn_depthwise_conv_nt_t_padded_s8(), arm_nn_depthwise_conv_nt_t_s8(), arm_nn_read_q15x2(), arm_nn_read_q7x4(), arm_nn_requantize(), arm_q7_to_q15_with_offset(), cmsis_nn_context::buf, cmsis_nn_dims::c, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_dw_conv_params::input_offset, MAX, cmsis_nn_activation::max, MIN, cmsis_nn_activation::min, cmsis_nn_per_channel_quant_params::multiplier, cmsis_nn_dw_conv_params::output_offset, cmsis_nn_dw_conv_params::padding, cmsis_nn_per_channel_quant_params::shift, cmsis_nn_dw_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.
Referenced by arm_depthwise_conv_wrapper_s8().
int32_t arm_depthwise_conv_s8_opt_get_buffer_size | ( | const cmsis_nn_dims * | input_dims, |
const cmsis_nn_dims * | filter_dims | ||
) |
[in] | input_dims | Input (activation) tensor dimensions. Format: [1, H, W, C_IN] Batch argument N is not used. |
[in] | filter_dims | Filter tensor dimensions. Format: [1, H, W, C_OUT] |
References cmsis_nn_dims::c, cmsis_nn_dims::h, and cmsis_nn_dims::w.
Referenced by arm_depthwise_conv_wrapper_s8_get_buffer_size().
arm_status arm_depthwise_conv_u8_basic_ver1 | ( | const uint8_t * | input, |
const uint16_t | input_x, | ||
const uint16_t | input_y, | ||
const uint16_t | input_ch, | ||
const uint8_t * | kernel, | ||
const uint16_t | kernel_x, | ||
const uint16_t | kernel_y, | ||
const int16_t | ch_mult, | ||
const int16_t | pad_x, | ||
const int16_t | pad_y, | ||
const int16_t | stride_x, | ||
const int16_t | stride_y, | ||
const int16_t | dilation_x, | ||
const int16_t | dilation_y, | ||
const int32_t * | bias, | ||
const int32_t | input_offset, | ||
const int32_t | filter_offset, | ||
const int32_t | output_offset, | ||
uint8_t * | output, | ||
const uint16_t | output_x, | ||
const uint16_t | output_y, | ||
const int32_t | output_activation_min, | ||
const int32_t | output_activation_max, | ||
const int32_t | output_shift, | ||
const int32_t | output_mult | ||
) |
uint8 depthwise convolution function with asymmetric quantization Unless specified otherwise, arguments are mandatory.
[in] | input | Pointer to input tensor |
[in] | input_x | Width of input tensor |
[in] | input_y | Height of input tensor |
[in] | input_ch | Channels in input tensor |
[in] | kernel | Pointer to kernel weights |
[in] | kernel_x | Width of kernel |
[in] | kernel_y | Height of kernel |
[in] | ch_mult | Number of channel multiplier |
[in] | pad_x | Padding sizes x |
[in] | pad_y | Padding sizes y |
[in] | stride_x | Convolution stride along the width |
[in] | stride_y | Convolution stride along the height |
[in] | dilation_x | Dilation along width. Not used and intended for future enhancement. |
[in] | dilation_y | Dilation along height. Not used and intended for future enhancement. |
[in] | bias | Pointer to optional bias values. If no bias is available, NULL is expected |
[in] | input_offset | Input tensor zero offset |
[in] | filter_offset | Kernel tensor zero offset |
[in] | output_offset | Output tensor zero offset |
[in,out] | output | Pointer to output tensor |
[in] | output_x | Width of output tensor |
[in] | output_y | Height of output tensor |
[in] | output_activation_min | Minimum value to clamp the output to. Range : {0, 255} |
[in] | output_activation_max | Minimum value to clamp the output to. Range : {0, 255} |
[in] | output_shift | Amount of right-shift for output |
[in] | output_mult | Output multiplier for requantization |
ARM_MATH_SIZE_MISMATCH
- Not supported dimension of tensors ARM_MATH_SUCCESS
- Successful operation ARM_MATH_ARGUMENT_ERROR
- Implementation not available References depthwise_conv_u8_generic(), and depthwise_conv_u8_mult_4().
arm_status arm_depthwise_conv_wrapper_s8 | ( | const cmsis_nn_context * | ctx, |
const cmsis_nn_dw_conv_params * | dw_conv_params, | ||
const cmsis_nn_per_channel_quant_params * | quant_params, | ||
const cmsis_nn_dims * | input_dims, | ||
const q7_t * | input_data, | ||
const cmsis_nn_dims * | filter_dims, | ||
const q7_t * | filter_data, | ||
const cmsis_nn_dims * | bias_dims, | ||
const int32_t * | bias_data, | ||
const cmsis_nn_dims * | output_dims, | ||
q7_t * | output_data | ||
) |
[in,out] | ctx | Function context (e.g. temporary buffer). Check the function definition file to see if an additional buffer is required. Optional function {API}_get_buffer_size() provides the buffer size if required. |
[in] | dw_conv_params | Depthwise convolution parameters (e.g. strides, dilations, pads,...) dw_conv_params->dilation is not used. Range of dw_conv_params->input_offset : [-127, 128] Range of dw_conv_params->output_offset : [-128, 127] |
[in] | quant_params | Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel |
[in] | input_dims | Input (activation) tensor dimensions. Format: [H, W, C_IN] Batch argument N is not used and assumed to be 1. |
[in] | input_data | Input (activation) data pointer. Data type: int8 |
[in] | filter_dims | Filter tensor dimensions. Format: [1, H, W, C_OUT] |
[in] | filter_data | Filter data pointer. Data type: int8 |
[in] | bias_dims | Bias tensor dimensions. Format: [C_OUT] |
[in] | bias_data | Bias data pointer. Data type: int32 |
[in] | output_dims | Output tensor dimensions. Format: [1, H, W, C_OUT] |
[in,out] | output_data | Output data pointer. Data type: int8 |
ARM_MATH_SUCCESS
- Successful completion.References arm_depthwise_conv_3x3_s8(), arm_depthwise_conv_s8(), arm_depthwise_conv_s8_opt(), cmsis_nn_dw_conv_params::ch_mult, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_dims::n, cmsis_nn_dw_conv_params::padding, and cmsis_nn_dims::w.
int32_t arm_depthwise_conv_wrapper_s8_get_buffer_size | ( | const cmsis_nn_dw_conv_params * | dw_conv_params, |
const cmsis_nn_dims * | input_dims, | ||
const cmsis_nn_dims * | filter_dims, | ||
const cmsis_nn_dims * | output_dims | ||
) |
[in] | dw_conv_params | Depthwise convolution parameters (e.g. strides, dilations, pads,...) dw_conv_params->dilation is not used. Range of dw_conv_params->input_offset : [-127, 128] Range of dw_conv_params->input_offset : [-128, 127] |
[in] | input_dims | Input (activation) tensor dimensions. Format: [H, W, C_IN] Batch argument N is not used and assumed to be 1. |
[in] | filter_dims | Filter tensor dimensions. Format: [1, H, W, C_OUT] |
[in] | output_dims | Output tensor dimensions. Format: [1, H, W, C_OUT] |
References arm_depthwise_conv_s8_opt_get_buffer_size(), cmsis_nn_dims::c, and cmsis_nn_dims::n.
arm_status arm_depthwise_separable_conv_HWC_q7 | ( | const q7_t * | Im_in, |
const uint16_t | dim_im_in, | ||
const uint16_t | ch_im_in, | ||
const q7_t * | wt, | ||
const uint16_t | ch_im_out, | ||
const uint16_t | dim_kernel, | ||
const uint16_t | padding, | ||
const uint16_t | stride, | ||
const q7_t * | bias, | ||
const uint16_t | bias_shift, | ||
const uint16_t | out_shift, | ||
q7_t * | Im_out, | ||
const uint16_t | dim_im_out, | ||
q15_t * | bufferA, | ||
q7_t * | bufferB | ||
) |
[in] | Im_in | pointer to input tensor |
[in] | dim_im_in | input tensor dimension |
[in] | ch_im_in | number of input tensor channels |
[in] | wt | pointer to kernel weights |
[in] | ch_im_out | number of filters, i.e., output tensor channels |
[in] | dim_kernel | filter kernel size |
[in] | padding | padding sizes |
[in] | stride | convolution stride |
[in] | bias | pointer to bias |
[in] | bias_shift | amount of left-shift for bias |
[in] | out_shift | amount of right-shift for output |
[in,out] | Im_out | pointer to output tensor |
[in] | dim_im_out | output tensor dimension |
[in,out] | bufferA | pointer to buffer space for input |
[in,out] | bufferB | pointer to buffer space for output |
ARM_MATH_SIZE_MISMATCH
or ARM_MATH_SUCCESS
based on the outcome of size checking.Buffer size:
bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
bufferB size: 0
Input dimension constraints:
ch_im_in equals ch_im_out
Implementation: There are 3 nested loop here: Inner loop: calculate each output value with MAC instruction over an accumulator Mid loop: loop over different output channel Outer loop: loop over different output (x, y)
References arm_nn_read_q7x4(), arm_nnword::bytes, NN_ROUND, and arm_nnword::word.
arm_status arm_depthwise_separable_conv_HWC_q7_nonsquare | ( | const q7_t * | Im_in, |
const uint16_t | dim_im_in_x, | ||
const uint16_t | dim_im_in_y, | ||
const uint16_t | ch_im_in, | ||
const q7_t * | wt, | ||
const uint16_t | ch_im_out, | ||
const uint16_t | dim_kernel_x, | ||
const uint16_t | dim_kernel_y, | ||
const uint16_t | padding_x, | ||
const uint16_t | padding_y, | ||
const uint16_t | stride_x, | ||
const uint16_t | stride_y, | ||
const q7_t * | bias, | ||
const uint16_t | bias_shift, | ||
const uint16_t | out_shift, | ||
q7_t * | Im_out, | ||
const uint16_t | dim_im_out_x, | ||
const uint16_t | dim_im_out_y, | ||
q15_t * | bufferA, | ||
q7_t * | bufferB | ||
) |
[in] | Im_in | pointer to input tensor |
[in] | dim_im_in_x | input tensor dimension x |
[in] | dim_im_in_y | input tensor dimension y |
[in] | ch_im_in | number of input tensor channels |
[in] | wt | pointer to kernel weights |
[in] | ch_im_out | number of filters, i.e., output tensor channels |
[in] | dim_kernel_x | filter kernel size x |
[in] | dim_kernel_y | filter kernel size y |
[in] | padding_x | padding sizes x |
[in] | padding_y | padding sizes y |
[in] | stride_x | convolution stride x |
[in] | stride_y | convolution stride y |
[in] | bias | pointer to bias |
[in] | bias_shift | amount of left-shift for bias |
[in] | out_shift | amount of right-shift for output |
[in,out] | Im_out | pointer to output tensor |
[in] | dim_im_out_x | output tensor dimension x |
[in] | dim_im_out_y | output tensor dimension y |
[in,out] | bufferA | pointer to buffer space for input |
[in,out] | bufferB | pointer to buffer space for output |
ARM_MATH_SIZE_MISMATCH
or ARM_MATH_SUCCESS
based on the outcome of size checking.This function is the version with full list of optimization tricks, but with some constraints: ch_im_in is equal to ch_im_out
References arm_nn_read_q7x4(), arm_nnword::bytes, NN_ROUND, and arm_nnword::word.
|
static |
References arm_nn_requantize(), MAX, and MIN.
Referenced by arm_depthwise_conv_s8().
|
static |
References arm_nn_requantize(), MAX, and MIN.
Referenced by arm_depthwise_conv_s8().
|
static |
References arm_nn_requantize(), MAX, and MIN.
Referenced by arm_depthwise_conv_u8_basic_ver1().
|
static |
References arm_nn_requantize(), MAX, and MIN.
Referenced by arm_depthwise_conv_u8_basic_ver1().