CMSIS-NN
Version 3.1.0
CMSIS NN Software Library
|
Functions | |
arm_status | arm_convolve_1_x_n_s8 (const cmsis_nn_context *ctx, const cmsis_nn_conv_params *conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q7_t *input_data, const cmsis_nn_dims *filter_dims, const q7_t *filter_data, const cmsis_nn_dims *bias_dims, const int32_t *bias_data, const cmsis_nn_dims *output_dims, q7_t *output_data) |
1xn convolution More... | |
int32_t | arm_convolve_1_x_n_s8_get_buffer_size (const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims) |
Get the required additional buffer size for 1xn convolution. More... | |
arm_status | arm_convolve_1x1_HWC_q7_fast_nonsquare (const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB) |
Fast Q7 version of 1x1 convolution (non-sqaure shape) More... | |
arm_status | arm_convolve_1x1_s8_fast (const cmsis_nn_context *ctx, const cmsis_nn_conv_params *conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q7_t *input_data, const cmsis_nn_dims *filter_dims, const q7_t *filter_data, const cmsis_nn_dims *bias_dims, const int32_t *bias_data, const cmsis_nn_dims *output_dims, q7_t *output_data) |
Fast s8 version for 1x1 convolution (non-square shape) More... | |
int32_t | arm_convolve_1x1_s8_fast_get_buffer_size (const cmsis_nn_dims *input_dims) |
Get the required buffer size for arm_convolve_1x1_s8_fast. More... | |
arm_status | arm_convolve_fast_s16 (const cmsis_nn_context *ctx, const cmsis_nn_conv_params *conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q15_t *input_data, const cmsis_nn_dims *filter_dims, const q7_t *filter_data, const cmsis_nn_dims *bias_dims, const int64_t *bias_data, const cmsis_nn_dims *output_dims, q15_t *output_data) |
Optimized s16 convolution function. More... | |
int32_t | arm_convolve_fast_s16_get_buffer_size (const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims) |
Get the required buffer size for fast s16 convolution function. More... | |
arm_status | arm_convolve_HWC_q15_basic (const q15_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q15_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q15_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q15_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB) |
Basic Q15 convolution function. More... | |
arm_status | arm_convolve_HWC_q15_fast (const q15_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q15_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q15_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q15_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB) |
Fast Q15 convolution function. More... | |
arm_status | arm_convolve_HWC_q15_fast_nonsquare (const q15_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q15_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q15_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q15_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB) |
Fast Q15 convolution function (non-sqaure shape) More... | |
arm_status | arm_convolve_HWC_q7_basic (const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB) |
Basic Q7 convolution function. More... | |
arm_status | arm_convolve_HWC_q7_basic_nonsquare (const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB) |
Basic Q7 convolution function (non-sqaure shape) More... | |
arm_status | arm_convolve_HWC_q7_fast (const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB) |
Fast Q7 convolution function. More... | |
arm_status | arm_convolve_HWC_q7_fast_nonsquare (const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB) |
Fast Q7 convolution function (non-sqaure shape) More... | |
arm_status | arm_convolve_HWC_q7_RGB (const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB) |
Q7 convolution function for RGB image. More... | |
arm_status | arm_convolve_s16 (const cmsis_nn_context *ctx, const cmsis_nn_conv_params *conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q15_t *input_data, const cmsis_nn_dims *filter_dims, const q7_t *filter_data, const cmsis_nn_dims *bias_dims, const int64_t *bias_data, const cmsis_nn_dims *output_dims, q15_t *output_data) |
Basic s16 convolution function. More... | |
int32_t | arm_convolve_s16_get_buffer_size (const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims) |
Get the required buffer size for s16 convolution function. More... | |
arm_status | arm_convolve_s8 (const cmsis_nn_context *ctx, const cmsis_nn_conv_params *conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q7_t *input_data, const cmsis_nn_dims *filter_dims, const q7_t *filter_data, const cmsis_nn_dims *bias_dims, const int32_t *bias_data, const cmsis_nn_dims *output_dims, q7_t *output_data) |
Basic s8 convolution function. More... | |
int32_t | arm_convolve_s8_get_buffer_size (const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims) |
Get the required buffer size for s8 convolution function. More... | |
arm_status | arm_convolve_wrapper_s16 (const cmsis_nn_context *ctx, const cmsis_nn_conv_params *conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q15_t *input_data, const cmsis_nn_dims *filter_dims, const q7_t *filter_data, const cmsis_nn_dims *bias_dims, const int64_t *bias_data, const cmsis_nn_dims *output_dims, q15_t *output_data) |
s16 convolution layer wrapper function with the main purpose to call the optimal kernel available in cmsis-nn to perform the convolution. More... | |
int32_t | arm_convolve_wrapper_s16_get_buffer_size (const cmsis_nn_conv_params *conv_params, const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims, const cmsis_nn_dims *output_dims) |
Get the required buffer size for arm_convolve_wrapper_s16. More... | |
arm_status | arm_convolve_wrapper_s8 (const cmsis_nn_context *ctx, const cmsis_nn_conv_params *conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q7_t *input_data, const cmsis_nn_dims *filter_dims, const q7_t *filter_data, const cmsis_nn_dims *bias_dims, const int32_t *bias_data, const cmsis_nn_dims *output_dims, q7_t *output_data) |
s8 convolution layer wrapper function with the main purpose to call the optimal kernel available in cmsis-nn to perform the convolution. More... | |
int32_t | arm_convolve_wrapper_s8_get_buffer_size (const cmsis_nn_conv_params *conv_params, const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims, const cmsis_nn_dims *output_dims) |
Get the required buffer size for arm_convolve_wrapper_s8. More... | |
arm_status | arm_depthwise_conv_3x3_s8 (const cmsis_nn_context *ctx, const cmsis_nn_dw_conv_params *dw_conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q7_t *input, const cmsis_nn_dims *filter_dims, const q7_t *kernel, const cmsis_nn_dims *bias_dims, const int32_t *bias, const cmsis_nn_dims *output_dims, q7_t *output) |
Optimized s8 depthwise convolution function for 3x3 kernel size with some constraints on the input arguments(documented below). Refer arm_depthwise_conv_s8() for function argument details. More... | |
static void | __attribute__ ((unused)) |
static void | depthwise_conv_s16_generic_s16 (const int16_t *input, const uint16_t input_batches, const uint16_t input_x, const uint16_t input_y, const uint16_t input_ch, const int8_t *kernel, const uint16_t ch_mult, const uint16_t kernel_x, const uint16_t kernel_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const int64_t *bias, int16_t *output, const int32_t *output_shift, const int32_t *output_mult, const uint16_t output_x, const uint16_t output_y, const int32_t output_activation_min, const int32_t output_activation_max, const uint16_t dilation_x, const uint16_t dilation_y) |
arm_status | arm_depthwise_conv_s16 (const cmsis_nn_context *ctx, const cmsis_nn_dw_conv_params *dw_conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q15_t *input, const cmsis_nn_dims *filter_dims, const q7_t *kernel, const cmsis_nn_dims *bias_dims, const int64_t *bias, const cmsis_nn_dims *output_dims, q15_t *output) |
Basic s16 depthwise convolution function that doesn't have any constraints on the input dimensions. More... | |
static void | depthwise_conv_s8_mult_4 (const int8_t *input, const int32_t input_x, const int32_t input_y, const int32_t input_ch, const int8_t *kernel, const int32_t output_ch, const int32_t ch_mult, const int32_t kernel_x, const int32_t kernel_y, const int32_t pad_x, const int32_t pad_y, const int32_t stride_x, const int32_t stride_y, const int32_t *bias, int8_t *output, const int32_t *output_shift, const int32_t *output_mult, const int32_t output_x, const int32_t output_y, const int32_t output_offset, const int32_t input_offset, const int32_t output_activation_min, const int32_t output_activation_max) |
static void | depthwise_conv_s8_generic (const q7_t *input, const uint16_t input_batches, const uint16_t input_x, const uint16_t input_y, const uint16_t input_ch, const q7_t *kernel, const uint16_t output_ch, const uint16_t ch_mult, const uint16_t kernel_x, const uint16_t kernel_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const int32_t *bias, q7_t *output, const int32_t *output_shift, const int32_t *output_mult, const uint16_t output_x, const uint16_t output_y, const int32_t output_offset, const int32_t input_offset, const int32_t output_activation_min, const int32_t output_activation_max, const uint16_t dilation_x, const uint16_t dilation_y) |
arm_status | arm_depthwise_conv_s8 (const cmsis_nn_context *ctx, const cmsis_nn_dw_conv_params *dw_conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q7_t *input, const cmsis_nn_dims *filter_dims, const q7_t *kernel, const cmsis_nn_dims *bias_dims, const int32_t *bias, const cmsis_nn_dims *output_dims, q7_t *output) |
Basic s8 depthwise convolution function that doesn't have any constraints on the input dimensions. More... | |
arm_status | arm_depthwise_conv_s8_opt (const cmsis_nn_context *ctx, const cmsis_nn_dw_conv_params *dw_conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q7_t *input, const cmsis_nn_dims *filter_dims, const q7_t *kernel, const cmsis_nn_dims *bias_dims, const int32_t *bias, const cmsis_nn_dims *output_dims, q7_t *output) |
Optimized s8 depthwise convolution function with constraint that in_channel equals out_channel. Refer arm_depthwise_conv_s8() for function argument details. More... | |
int32_t | arm_depthwise_conv_s8_opt_get_buffer_size (const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims) |
Get the required buffer size for optimized s8 depthwise convolution function with constraint that in_channel equals out_channel. More... | |
static void | depthwise_conv_u8_mult_4 (const uint8_t *input, const int32_t input_x, const int32_t input_y, const int32_t input_ch, const uint8_t *kernel, const int32_t output_ch, const int32_t ch_mult, const int32_t kernel_x, const int32_t kernel_y, const int32_t pad_x, const int32_t pad_y, const int32_t stride_x, const int32_t stride_y, const int32_t *bias, uint8_t *output, const int32_t output_shift, const int32_t output_mult, const int32_t output_x, const int32_t output_y, const int32_t output_offset, const int32_t input_offset, const int32_t filter_offset, const int32_t output_activation_min, const int32_t output_activation_max) |
static void | depthwise_conv_u8_generic (const uint8_t *input, const int32_t input_x, const int32_t input_y, const int32_t input_ch, const uint8_t *kernel, const int32_t output_ch, const int32_t ch_mult, const int32_t kernel_x, const int32_t kernel_y, const int32_t pad_x, const int32_t pad_y, const int32_t stride_x, const int32_t stride_y, const int32_t *bias, uint8_t *output, const int32_t output_shift, const int32_t output_mult, const int32_t output_x, const int32_t output_y, const int32_t output_offset, const int32_t input_offset, const int32_t filter_offset, const int32_t output_activation_min, const int32_t output_activation_max) |
arm_status | arm_depthwise_conv_u8_basic_ver1 (const uint8_t *input, const uint16_t input_x, const uint16_t input_y, const uint16_t input_ch, const uint8_t *kernel, const uint16_t kernel_x, const uint16_t kernel_y, const int16_t ch_mult, const int16_t pad_x, const int16_t pad_y, const int16_t stride_x, const int16_t stride_y, const int16_t dilation_x, const int16_t dilation_y, const int32_t *bias, const int32_t input_offset, const int32_t filter_offset, const int32_t output_offset, uint8_t *output, const uint16_t output_x, const uint16_t output_y, const int32_t output_activation_min, const int32_t output_activation_max, const int32_t output_shift, const int32_t output_mult) |
uint8 depthwise convolution function with asymmetric quantization More... | |
arm_status | arm_depthwise_conv_wrapper_s8 (const cmsis_nn_context *ctx, const cmsis_nn_dw_conv_params *dw_conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const cmsis_nn_dims *input_dims, const q7_t *input, const cmsis_nn_dims *filter_dims, const q7_t *filter, const cmsis_nn_dims *bias_dims, const int32_t *bias, const cmsis_nn_dims *output_dims, q7_t *output) |
Wrapper function to pick the right optimized s8 depthwise convolution function. More... | |
int32_t | arm_depthwise_conv_wrapper_s8_get_buffer_size (const cmsis_nn_dw_conv_params *dw_conv_params, const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims, const cmsis_nn_dims *output_dims) |
Get size of additional buffer required by arm_depthwise_conv_wrapper_s8() More... | |
arm_status | arm_depthwise_separable_conv_HWC_q7 (const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB) |
Q7 depthwise separable convolution function. More... | |
arm_status | arm_depthwise_separable_conv_HWC_q7_nonsquare (const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB) |
Q7 depthwise separable convolution function (non-square shape) More... | |
Collection of convolution, depthwise convolution functions and their variants.
The convolution is implemented in 2 steps: im2col and GEMM
im2col is a process of converting each patch of image data into a column. After im2col, the convolution is computed as matrix-matrix multiplication.
To reduce the memory footprint, the im2col is performed partially. Each iteration, only a few column (i.e., patches) are generated and computed with GEMM kernels similar to CMSIS-DSP arm_mat_mult functions.
|
static |
References arm_nn_requantize_s64(), MAX, MIN, and REDUCE_MULTIPLIER.
arm_status arm_convolve_1_x_n_s8 | ( | const cmsis_nn_context * | ctx, |
const cmsis_nn_conv_params * | conv_params, | ||
const cmsis_nn_per_channel_quant_params * | quant_params, | ||
const cmsis_nn_dims * | input_dims, | ||
const q7_t * | input_data, | ||
const cmsis_nn_dims * | filter_dims, | ||
const q7_t * | filter_data, | ||
const cmsis_nn_dims * | bias_dims, | ||
const int32_t * | bias_data, | ||
const cmsis_nn_dims * | output_dims, | ||
q7_t * | output_data | ||
) |
[in,out] | ctx | Function context that contains the additional buffer if required by the function. arm_convolve_1_x_n_s8_get_buffer_size will return the buffer_size if required |
[in] | conv_params | Convolution parameters (e.g. strides, dilations, pads,...). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127] |
[in] | quant_params | Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel |
[in] | input_dims | Input (activation) tensor dimensions. Format: [N, H, W, C_IN] |
[in] | input_data | Input (activation) data pointer. Data type: int8 |
[in] | filter_dims | Filter tensor dimensions. Format: [C_OUT, 1, WK, C_IN] where WK is the horizontal spatial filter dimension |
[in] | filter_data | Filter data pointer. Data type: int8 |
[in] | bias_dims | Bias tensor dimensions. Format: [C_OUT] |
[in] | bias_data | Optional bias data pointer. Data type: int32 |
[in] | output_dims | Output tensor dimensions. Format: [N, H, W, C_OUT] |
[out] | output_data | Output data pointer. Data type: int8 |
ARM_MATH_SIZE_MISMATCH
if argument constraints fail. or, ARM_MATH_SUCCESS
on successful completion.References cmsis_nn_conv_params::activation, arm_convolve_s8(), arm_nn_mat_mul_core_1x_s8(), arm_nn_mat_mul_core_4x_s8(), cmsis_nn_dims::c, cmsis_nn_conv_params::input_offset, MAX, cmsis_nn_activation::max, MIN, cmsis_nn_activation::min, cmsis_nn_per_channel_quant_params::multiplier, cmsis_nn_conv_params::output_offset, cmsis_nn_conv_params::padding, cmsis_nn_per_channel_quant_params::shift, cmsis_nn_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.
Referenced by arm_convolve_wrapper_s8().
int32_t arm_convolve_1_x_n_s8_get_buffer_size | ( | const cmsis_nn_dims * | input_dims, |
const cmsis_nn_dims * | filter_dims | ||
) |
[in] | input_dims | Input (activation) tensor dimensions. Format: [N, H, W, C_IN] |
[in] | filter_dims | Filter tensor dimensions. Format: [C_OUT, 1, WK, C_IN] where WK is the horizontal spatial filter dimension |
References cmsis_nn_dims::c, cmsis_nn_dims::h, and cmsis_nn_dims::w.
Referenced by arm_convolve_wrapper_s8_get_buffer_size().
arm_status arm_convolve_1x1_HWC_q7_fast_nonsquare | ( | const q7_t * | Im_in, |
const uint16_t | dim_im_in_x, | ||
const uint16_t | dim_im_in_y, | ||
const uint16_t | ch_im_in, | ||
const q7_t * | wt, | ||
const uint16_t | ch_im_out, | ||
const uint16_t | dim_kernel_x, | ||
const uint16_t | dim_kernel_y, | ||
const uint16_t | padding_x, | ||
const uint16_t | padding_y, | ||
const uint16_t | stride_x, | ||
const uint16_t | stride_y, | ||
const q7_t * | bias, | ||
const uint16_t | bias_shift, | ||
const uint16_t | out_shift, | ||
q7_t * | Im_out, | ||
const uint16_t | dim_im_out_x, | ||
const uint16_t | dim_im_out_y, | ||
q15_t * | bufferA, | ||
q7_t * | bufferB | ||
) |
[in] | Im_in | pointer to input tensor |
[in] | dim_im_in_x | input tensor dimention x |
[in] | dim_im_in_y | input tensor dimention y |
[in] | ch_im_in | number of input tensor channels |
[in] | wt | pointer to kernel weights |
[in] | ch_im_out | number of filters, i.e., output tensor channels |
[in] | dim_kernel_x | filter kernel size x |
[in] | dim_kernel_y | filter kernel size y |
[in] | padding_x | padding size x |
[in] | padding_y | padding size y |
[in] | stride_x | convolution stride x |
[in] | stride_y | convolution stride y |
[in] | bias | pointer to bias |
[in] | bias_shift | amount of left-shift for bias |
[in] | out_shift | amount of right-shift for output |
[in,out] | Im_out | pointer to output tensor |
[in] | dim_im_out_x | output tensor dimension x |
[in] | dim_im_out_y | output tensor dimension y |
[in,out] | bufferA | pointer to buffer space for input |
[in,out] | bufferB | pointer to buffer space for output |
ARM_MATH_SIZE_MISMATCH
or ARM_MATH_SUCCESS
based on the outcome of size checking.This function is optimized for convolution with 1x1 kernel size (i.e., dim_kernel_x=1 and dim_kernel_y=1). It can be used for the second half of MobileNets [1] after depthwise separable convolution.
This function is the version with full list of optimization tricks, but with some constraints: ch_im_in is multiple of 4 ch_im_out is multiple of 2
[1] MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications https://arxiv.org/abs/1704.04861
References arm_nn_mat_mult_kernel_q7_q15_reordered(), arm_nn_read_q15x2_ia(), arm_q7_to_q15_reordered_no_shift(), and NN_ROUND.
arm_status arm_convolve_1x1_s8_fast | ( | const cmsis_nn_context * | ctx, |
const cmsis_nn_conv_params * | conv_params, | ||
const cmsis_nn_per_channel_quant_params * | quant_params, | ||
const cmsis_nn_dims * | input_dims, | ||
const q7_t * | input_data, | ||
const cmsis_nn_dims * | filter_dims, | ||
const q7_t * | filter_data, | ||
const cmsis_nn_dims * | bias_dims, | ||
const int32_t * | bias_data, | ||
const cmsis_nn_dims * | output_dims, | ||
q7_t * | output_data | ||
) |
[in,out] | ctx | Function context that contains the additional buffer if required by the function. arm_convolve_1x1_s8_fast_get_buffer_size will return the buffer_size if required |
[in] | conv_params | Convolution parameters (e.g. strides, dilations, pads,...). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127] |
[in] | quant_params | Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel |
[in] | input_dims | Input (activation) tensor dimensions. Format: [N, H, W, C_IN] |
[in] | input_data | Input (activation) data pointer. Data type: int8 |
[in] | filter_dims | Filter tensor dimensions. Format: [C_OUT, 1, 1, C_IN] |
[in] | filter_data | Filter data pointer. Data type: int8 |
[in] | bias_dims | Bias tensor dimensions. Format: [C_OUT] |
[in] | bias_data | Optional bias data pointer. Data type: int32 |
[in] | output_dims | Output tensor dimensions. Format: [N, H, W, C_OUT] |
[out] | output_data | Output data pointer. Data type: int8 |
ARM_MATH_SIZE_MISMATCH
if argument constraints fail. or, ARM_MATH_SUCCESS
on successful completion.References cmsis_nn_conv_params::activation, arm_nn_mat_mul_core_1x_s8(), arm_nn_mat_mul_core_4x_s8(), arm_nn_mat_mult_nt_t_s8(), arm_nn_requantize(), cmsis_nn_dims::c, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_conv_params::input_offset, MAX, cmsis_nn_activation::max, MIN, cmsis_nn_activation::min, cmsis_nn_per_channel_quant_params::multiplier, cmsis_nn_dims::n, cmsis_nn_conv_params::output_offset, cmsis_nn_conv_params::padding, cmsis_nn_per_channel_quant_params::shift, cmsis_nn_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.
Referenced by arm_convolve_wrapper_s8().
int32_t arm_convolve_1x1_s8_fast_get_buffer_size | ( | const cmsis_nn_dims * | input_dims | ) |
[in] | input_dims | Input (activation) dimensions |
Referenced by arm_convolve_wrapper_s8_get_buffer_size().
arm_status arm_convolve_fast_s16 | ( | const cmsis_nn_context * | ctx, |
const cmsis_nn_conv_params * | conv_params, | ||
const cmsis_nn_per_channel_quant_params * | quant_params, | ||
const cmsis_nn_dims * | input_dims, | ||
const q15_t * | input_data, | ||
const cmsis_nn_dims * | filter_dims, | ||
const q7_t * | filter_data, | ||
const cmsis_nn_dims * | bias_dims, | ||
const int64_t * | bias_data, | ||
const cmsis_nn_dims * | output_dims, | ||
q15_t * | output_data | ||
) |
[in,out] | ctx | Function context that contains the additional buffer if required by the function. arm_convolve_fast_s16_get_buffer_size will return the buffer_size if required |
[in] | conv_params | Convolution parameters (e.g. strides, dilations, pads,...). conv_params->input_offset : Not used conv_params->output_offset : Not used |
[in] | quant_params | Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel |
[in] | input_dims | Input (activation) tensor dimensions. Format: [N, H, W, C_IN] |
[in] | input_data | Input (activation) data pointer. Data type: int16 |
[in] | filter_dims | Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions. (filter_dims->w * filter_dims->h * input_dims->c) must not exceed 512 |
[in] | filter_data | Filter data pointer. Data type: int8 |
[in] | bias_dims | Bias tensor dimensions. Format: [C_OUT] |
[in] | bias_data | Optional bias data pointer. Data type: int64 |
[in] | output_dims | Output tensor dimensions. Format: [N, H, W, C_OUT] |
[out] | output_data | Output data pointer. Data type: int16 |
ARM_MATH_SUCCESS
1. Supported framework: TensorFlow Lite micro 2. q7/q15 is used as data type eventhough it is s8/s16 data. It is done so to be consistent with existing APIs. 3. Additional memory is required for optimization. Refer to argument 'ctx' for details. 4. Implementation supports kernel volumes (filter width * filter height * input channels) < 512.
References cmsis_nn_conv_params::activation, arm_convolve_s8_get_buffer_size(), arm_memcpy_q7(), arm_memset_q7(), arm_nn_mat_mult_kernel_s16(), arm_nn_read_q15x2_ia(), arm_nn_requantize(), arm_nn_requantize_s64(), cmsis_nn_context::buf, cmsis_nn_dims::c, cmsis_nn_tile::h, cmsis_nn_dims::h, MAX, cmsis_nn_activation::max, MIN, cmsis_nn_activation::min, cmsis_nn_per_channel_quant_params::multiplier, cmsis_nn_dims::n, cmsis_nn_conv_params::padding, REDUCE_MULTIPLIER, cmsis_nn_per_channel_quant_params::shift, cmsis_nn_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.
Referenced by arm_convolve_wrapper_s16().
int32_t arm_convolve_fast_s16_get_buffer_size | ( | const cmsis_nn_dims * | input_dims, |
const cmsis_nn_dims * | filter_dims | ||
) |
[in] | input_dims | Input (activation) tensor dimensions. Format: [N, H, W, C_IN] |
[in] | filter_dims | Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions |
References cmsis_nn_dims::c, cmsis_nn_dims::h, and cmsis_nn_dims::w.
Referenced by arm_convolve_wrapper_s16_get_buffer_size().
arm_status arm_convolve_HWC_q15_basic | ( | const q15_t * | Im_in, |
const uint16_t | dim_im_in, | ||
const uint16_t | ch_im_in, | ||
const q15_t * | wt, | ||
const uint16_t | ch_im_out, | ||
const uint16_t | dim_kernel, | ||
const uint16_t | padding, | ||
const uint16_t | stride, | ||
const q15_t * | bias, | ||
const uint16_t | bias_shift, | ||
const uint16_t | out_shift, | ||
q15_t * | Im_out, | ||
const uint16_t | dim_im_out, | ||
q15_t * | bufferA, | ||
q7_t * | bufferB | ||
) |
[in] | Im_in | pointer to input tensor |
[in] | dim_im_in | input tensor dimention |
[in] | ch_im_in | number of input tensor channels |
[in] | wt | pointer to kernel weights |
[in] | ch_im_out | number of filters, i.e., output tensor channels |
[in] | dim_kernel | filter kernel size |
[in] | padding | padding sizes |
[in] | stride | convolution stride |
[in] | bias | pointer to bias |
[in] | bias_shift | amount of left-shift for bias |
[in] | out_shift | amount of right-shift for output |
[in,out] | Im_out | pointer to output tensor |
[in] | dim_im_out | output tensor dimension |
[in,out] | bufferA | pointer to buffer space for input |
[in,out] | bufferB | pointer to buffer space for output |
ARM_MATH_SUCCESS
Buffer size:
bufferA size: ch_im_in*dim_kernel*dim_kernel
bufferB size: 0
This basic version is designed to work for any input tensor and weight dimension.
References arm_nn_read_q15x2_ia(), and NN_ROUND.
arm_status arm_convolve_HWC_q15_fast | ( | const q15_t * | Im_in, |
const uint16_t | dim_im_in, | ||
const uint16_t | ch_im_in, | ||
const q15_t * | wt, | ||
const uint16_t | ch_im_out, | ||
const uint16_t | dim_kernel, | ||
const uint16_t | padding, | ||
const uint16_t | stride, | ||
const q15_t * | bias, | ||
const uint16_t | bias_shift, | ||
const uint16_t | out_shift, | ||
q15_t * | Im_out, | ||
const uint16_t | dim_im_out, | ||
q15_t * | bufferA, | ||
q7_t * | bufferB | ||
) |
[in] | Im_in | pointer to input tensor |
[in] | dim_im_in | input tensor dimention |
[in] | ch_im_in | number of input tensor channels |
[in] | wt | pointer to kernel weights |
[in] | ch_im_out | number of filters, i.e., output tensor channels |
[in] | dim_kernel | filter kernel size |
[in] | padding | padding sizes |
[in] | stride | convolution stride |
[in] | bias | pointer to bias |
[in] | bias_shift | amount of left-shift for bias |
[in] | out_shift | amount of right-shift for output |
[in,out] | Im_out | pointer to output tensor |
[in] | dim_im_out | output tensor dimension |
[in,out] | bufferA | pointer to buffer space for input |
[in,out] | bufferB | pointer to buffer space for output |
ARM_MATH_SIZE_MISMATCH
or ARM_MATH_SUCCESS
based on the outcome of size checking.Buffer size:
bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
bufferB size: 0
Input dimension constraints:
ch_im_in is multiple of 2
ch_im_out is multiple of 2
dim_im_out is a multiple of 2
References arm_nn_read_q15x2_ia(), and NN_ROUND.
arm_status arm_convolve_HWC_q15_fast_nonsquare | ( | const q15_t * | Im_in, |
const uint16_t | dim_im_in_x, | ||
const uint16_t | dim_im_in_y, | ||
const uint16_t | ch_im_in, | ||
const q15_t * | wt, | ||
const uint16_t | ch_im_out, | ||
const uint16_t | dim_kernel_x, | ||
const uint16_t | dim_kernel_y, | ||
const uint16_t | padding_x, | ||
const uint16_t | padding_y, | ||
const uint16_t | stride_x, | ||
const uint16_t | stride_y, | ||
const q15_t * | bias, | ||
const uint16_t | bias_shift, | ||
const uint16_t | out_shift, | ||
q15_t * | Im_out, | ||
const uint16_t | dim_im_out_x, | ||
const uint16_t | dim_im_out_y, | ||
q15_t * | bufferA, | ||
q7_t * | bufferB | ||
) |
[in] | Im_in | pointer to input tensor |
[in] | dim_im_in_x | input tensor dimention x |
[in] | dim_im_in_y | input tensor dimention y |
[in] | ch_im_in | number of input tensor channels |
[in] | wt | pointer to kernel weights |
[in] | ch_im_out | number of filters, i.e., output tensor channels |
[in] | dim_kernel_x | filter kernel size x |
[in] | dim_kernel_y | filter kernel size y |
[in] | padding_x | padding size x |
[in] | padding_y | padding size y |
[in] | stride_x | convolution stride x |
[in] | stride_y | convolution stride y |
[in] | bias | pointer to bias |
[in] | bias_shift | amount of left-shift for bias |
[in] | out_shift | amount of right-shift for output |
[in,out] | Im_out | pointer to output tensor |
[in] | dim_im_out_x | output tensor dimension x |
[in] | dim_im_out_y | output tensor dimension y |
[in,out] | bufferA | pointer to buffer space for input |
[in,out] | bufferB | pointer to buffer space for output |
ARM_MATH_SIZE_MISMATCH
or ARM_MATH_SUCCESS
based on the outcome of size checking.Buffer size:
bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
bufferB size: 0
Input dimension constraints:
ch_im_in is multiple of 2
ch_im_out is multiple of 2
References arm_nn_read_q15x2_ia(), and NN_ROUND.
arm_status arm_convolve_HWC_q7_basic | ( | const q7_t * | Im_in, |
const uint16_t | dim_im_in, | ||
const uint16_t | ch_im_in, | ||
const q7_t * | wt, | ||
const uint16_t | ch_im_out, | ||
const uint16_t | dim_kernel, | ||
const uint16_t | padding, | ||
const uint16_t | stride, | ||
const q7_t * | bias, | ||
const uint16_t | bias_shift, | ||
const uint16_t | out_shift, | ||
q7_t * | Im_out, | ||
const uint16_t | dim_im_out, | ||
q15_t * | bufferA, | ||
q7_t * | bufferB | ||
) |
[in] | Im_in | pointer to input tensor |
[in] | dim_im_in | input tensor dimention |
[in] | ch_im_in | number of input tensor channels |
[in] | wt | pointer to kernel weights |
[in] | ch_im_out | number of filters, i.e., output tensor channels |
[in] | dim_kernel | filter kernel size |
[in] | padding | padding sizes |
[in] | stride | convolution stride |
[in] | bias | pointer to bias |
[in] | bias_shift | amount of left-shift for bias |
[in] | out_shift | amount of right-shift for output |
[in,out] | Im_out | pointer to output tensor |
[in] | dim_im_out | output tensor dimension |
[in,out] | bufferA | pointer to buffer space for input |
[in,out] | bufferB | pointer to buffer space for output |
ARM_MATH_SUCCESS
Buffer size:
bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
bufferB size: 0
This basic version is designed to work for any input tensor and weight dimension.
References arm_nn_mat_mult_kernel_q7_q15(), arm_nn_read_q15x2_ia(), arm_q7_to_q15_no_shift(), and NN_ROUND.
arm_status arm_convolve_HWC_q7_basic_nonsquare | ( | const q7_t * | Im_in, |
const uint16_t | dim_im_in_x, | ||
const uint16_t | dim_im_in_y, | ||
const uint16_t | ch_im_in, | ||
const q7_t * | wt, | ||
const uint16_t | ch_im_out, | ||
const uint16_t | dim_kernel_x, | ||
const uint16_t | dim_kernel_y, | ||
const uint16_t | padding_x, | ||
const uint16_t | padding_y, | ||
const uint16_t | stride_x, | ||
const uint16_t | stride_y, | ||
const q7_t * | bias, | ||
const uint16_t | bias_shift, | ||
const uint16_t | out_shift, | ||
q7_t * | Im_out, | ||
const uint16_t | dim_im_out_x, | ||
const uint16_t | dim_im_out_y, | ||
q15_t * | bufferA, | ||
q7_t * | bufferB | ||
) |
Basic Q7 convolution function (non-square shape)
[in] | Im_in | pointer to input tensor |
[in] | dim_im_in_x | input tensor dimention x |
[in] | dim_im_in_y | input tensor dimention y |
[in] | ch_im_in | number of input tensor channels |
[in] | wt | pointer to kernel weights |
[in] | ch_im_out | number of filters, i.e., output tensor channels |
[in] | dim_kernel_x | filter kernel size x |
[in] | dim_kernel_y | filter kernel size y |
[in] | padding_x | padding size x |
[in] | padding_y | padding size y |
[in] | stride_x | convolution stride x |
[in] | stride_y | convolution stride y |
[in] | bias | pointer to bias |
[in] | bias_shift | amount of left-shift for bias |
[in] | out_shift | amount of right-shift for output |
[in,out] | Im_out | pointer to output tensor |
[in] | dim_im_out_x | output tensor dimension x |
[in] | dim_im_out_y | output tensor dimension y |
[in,out] | bufferA | pointer to buffer space for input |
[in,out] | bufferB | pointer to buffer space for output |
ARM_MATH_SUCCESS
References arm_nn_mat_mult_kernel_q7_q15(), arm_nn_read_q15x2_ia(), arm_q7_to_q15_no_shift(), and NN_ROUND.
arm_status arm_convolve_HWC_q7_fast | ( | const q7_t * | Im_in, |
const uint16_t | dim_im_in, | ||
const uint16_t | ch_im_in, | ||
const q7_t * | wt, | ||
const uint16_t | ch_im_out, | ||
const uint16_t | dim_kernel, | ||
const uint16_t | padding, | ||
const uint16_t | stride, | ||
const q7_t * | bias, | ||
const uint16_t | bias_shift, | ||
const uint16_t | out_shift, | ||
q7_t * | Im_out, | ||
const uint16_t | dim_im_out, | ||
q15_t * | bufferA, | ||
q7_t * | bufferB | ||
) |
[in] | Im_in | pointer to input tensor |
[in] | dim_im_in | input tensor dimention |
[in] | ch_im_in | number of input tensor channels |
[in] | wt | pointer to kernel weights |
[in] | ch_im_out | number of filters, i.e., output tensor channels |
[in] | dim_kernel | filter kernel size |
[in] | padding | padding sizes |
[in] | stride | convolution stride |
[in] | bias | pointer to bias |
[in] | bias_shift | amount of left-shift for bias |
[in] | out_shift | amount of right-shift for output |
[in,out] | Im_out | pointer to output tensor |
[in] | dim_im_out | output tensor dimension |
[in,out] | bufferA | pointer to buffer space for input |
[in,out] | bufferB | pointer to buffer space for output |
ARM_MATH_SIZE_MISMATCH
or ARM_MATH_SUCCESS
based on the outcome of size checking.Buffer size:
bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
bufferB size: 0
Input dimension constraints:
ch_im_in is multiple of 4 ( because of the SIMD32 read and swap )
ch_im_out is multiple of 2 ( bacause 2x2 mat_mult kernel )
The im2col converts the Q7 tensor input into Q15 column, which is stored in bufferA. There is reordering happenning during this im2col process with arm_q7_to_q15_reordered_no_shift. For every four elements, the second and third elements are swapped.
The computation kernel arm_nn_mat_mult_kernel_q7_q15_reordered does the GEMM computation with the reordered columns.
To speed-up the determination of the padding condition, we split the computation into 3x3 parts, i.e., {top, mid, bottom} X {left, mid, right}. This reduces the total number of boundary condition checks and improves the data copying performance.
References arm_nn_mat_mult_kernel_q7_q15_reordered(), arm_nn_read_q15x2_ia(), arm_q7_to_q15_reordered_no_shift(), and NN_ROUND.
arm_status arm_convolve_HWC_q7_fast_nonsquare | ( | const q7_t * | Im_in, |
const uint16_t | dim_im_in_x, | ||
const uint16_t | dim_im_in_y, | ||
const uint16_t | ch_im_in, | ||
const q7_t * | wt, | ||
const uint16_t | ch_im_out, | ||
const uint16_t | dim_kernel_x, | ||
const uint16_t | dim_kernel_y, | ||
const uint16_t | padding_x, | ||
const uint16_t | padding_y, | ||
const uint16_t | stride_x, | ||
const uint16_t | stride_y, | ||
const q7_t * | bias, | ||
const uint16_t | bias_shift, | ||
const uint16_t | out_shift, | ||
q7_t * | Im_out, | ||
const uint16_t | dim_im_out_x, | ||
const uint16_t | dim_im_out_y, | ||
q15_t * | bufferA, | ||
q7_t * | bufferB | ||
) |
[in] | Im_in | pointer to input tensor |
[in] | dim_im_in_x | input tensor dimention x |
[in] | dim_im_in_y | input tensor dimention y |
[in] | ch_im_in | number of input tensor channels |
[in] | wt | pointer to kernel weights |
[in] | ch_im_out | number of filters, i.e., output tensor channels |
[in] | dim_kernel_x | filter kernel size x |
[in] | dim_kernel_y | filter kernel size y |
[in] | padding_x | padding size x |
[in] | padding_y | padding size y |
[in] | stride_x | convolution stride x |
[in] | stride_y | convolution stride y |
[in] | bias | pointer to bias |
[in] | bias_shift | amount of left-shift for bias |
[in] | out_shift | amount of right-shift for output |
[in,out] | Im_out | pointer to output tensor |
[in] | dim_im_out_x | output tensor dimension x |
[in] | dim_im_out_y | output tensor dimension y |
[in,out] | bufferA | pointer to buffer space for input |
[in,out] | bufferB | pointer to buffer space for output |
ARM_MATH_SIZE_MISMATCH
or ARM_MATH_SUCCESS
based on the outcome of size checking.This function is the version with full list of optimization tricks, but with some constraints: ch_im_in is multiple of 4 ch_im_out is multiple of 2
References arm_nn_mat_mult_kernel_q7_q15_reordered(), arm_nn_read_q15x2_ia(), arm_q7_to_q15_reordered_no_shift(), and NN_ROUND.
arm_status arm_convolve_HWC_q7_RGB | ( | const q7_t * | Im_in, |
const uint16_t | dim_im_in, | ||
const uint16_t | ch_im_in, | ||
const q7_t * | wt, | ||
const uint16_t | ch_im_out, | ||
const uint16_t | dim_kernel, | ||
const uint16_t | padding, | ||
const uint16_t | stride, | ||
const q7_t * | bias, | ||
const uint16_t | bias_shift, | ||
const uint16_t | out_shift, | ||
q7_t * | Im_out, | ||
const uint16_t | dim_im_out, | ||
q15_t * | bufferA, | ||
q7_t * | bufferB | ||
) |
Q7 version of convolution for RGB image.
[in] | Im_in | pointer to input tensor |
[in] | dim_im_in | input tensor dimention |
[in] | ch_im_in | number of input tensor channels |
[in] | wt | pointer to kernel weights |
[in] | ch_im_out | number of filters, i.e., output tensor channels |
[in] | dim_kernel | filter kernel size |
[in] | padding | padding sizes |
[in] | stride | convolution stride |
[in] | bias | pointer to bias |
[in] | bias_shift | amount of left-shift for bias |
[in] | out_shift | amount of right-shift for output |
[in,out] | Im_out | pointer to output tensor |
[in] | dim_im_out | output tensor dimension |
[in,out] | bufferA | pointer to buffer space for input |
[in,out] | bufferB | pointer to buffer space for output |
ARM_MATH_SIZE_MISMATCH
or ARM_MATH_SUCCESS
based on the outcome of size checking.Buffer size:
bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
bufferB size: 0
Input dimension constraints:
ch_im_in equals 3
This kernel is written exclusively for convolution with ch_im_in equals 3. This applies on the first layer of CNNs which has input image with RGB format.
References arm_memcpy_q7(), arm_memset_q7(), arm_nn_mat_mult_kernel_q7_q15(), arm_nn_read_q15x2_ia(), arm_nn_read_q7x4(), arm_nnword::half_words, NN_ROUND, and arm_nnword::word.
arm_status arm_convolve_s16 | ( | const cmsis_nn_context * | ctx, |
const cmsis_nn_conv_params * | conv_params, | ||
const cmsis_nn_per_channel_quant_params * | quant_params, | ||
const cmsis_nn_dims * | input_dims, | ||
const q15_t * | input_data, | ||
const cmsis_nn_dims * | filter_dims, | ||
const q7_t * | filter_data, | ||
const cmsis_nn_dims * | bias_dims, | ||
const int64_t * | bias_data, | ||
const cmsis_nn_dims * | output_dims, | ||
q15_t * | output_data | ||
) |
[in,out] | ctx | Function context that contains the additional buffer if required by the function. arm_convolve_s16_get_buffer_size will return the buffer_size if required |
[in] | conv_params | Convolution parameters (e.g. strides, dilations, pads,...). conv_params->input_offset : Not used conv_params->output_offset : Not used |
[in] | quant_params | Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel |
[in] | input_dims | Input (activation) tensor dimensions. Format: [N, H, W, C_IN] |
[in] | input_data | Input (activation) data pointer. Data type: int16 |
[in] | filter_dims | Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions |
[in] | filter_data | Filter data pointer. Data type: int8 |
[in] | bias_dims | Bias tensor dimensions. Format: [C_OUT] |
[in] | bias_data | Optional bias data pointer. Data type: int64 |
[in] | output_dims | Output tensor dimensions. Format: [N, H, W, C_OUT] |
[out] | output_data | Output data pointer. Data type: int16 |
ARM_MATH_SUCCESS
References cmsis_nn_conv_params::activation, arm_nn_requantize_s64(), cmsis_nn_dims::c, cmsis_nn_conv_params::dilation, cmsis_nn_tile::h, cmsis_nn_dims::h, MAX, cmsis_nn_activation::max, MIN, cmsis_nn_activation::min, cmsis_nn_per_channel_quant_params::multiplier, cmsis_nn_dims::n, cmsis_nn_conv_params::padding, REDUCE_MULTIPLIER, cmsis_nn_per_channel_quant_params::shift, cmsis_nn_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.
Referenced by arm_convolve_wrapper_s16().
int32_t arm_convolve_s16_get_buffer_size | ( | const cmsis_nn_dims * | input_dims, |
const cmsis_nn_dims * | filter_dims | ||
) |
[in] | input_dims | Input (activation) tensor dimensions. Format: [N, H, W, C_IN] |
[in] | filter_dims | Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions |
Referenced by arm_convolve_wrapper_s16_get_buffer_size().
arm_status arm_convolve_s8 | ( | const cmsis_nn_context * | ctx, |
const cmsis_nn_conv_params * | conv_params, | ||
const cmsis_nn_per_channel_quant_params * | quant_params, | ||
const cmsis_nn_dims * | input_dims, | ||
const q7_t * | input_data, | ||
const cmsis_nn_dims * | filter_dims, | ||
const q7_t * | filter_data, | ||
const cmsis_nn_dims * | bias_dims, | ||
const int32_t * | bias_data, | ||
const cmsis_nn_dims * | output_dims, | ||
q7_t * | output_data | ||
) |
[in,out] | ctx | Function context that contains the additional buffer if required by the function. arm_convolve_s8_get_buffer_size will return the buffer_size if required |
[in] | conv_params | Convolution parameters (e.g. strides, dilations, pads,...). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127] |
[in] | quant_params | Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel |
[in] | input_dims | Input (activation) tensor dimensions. Format: [N, H, W, C_IN] |
[in] | input_data | Input (activation) data pointer. Data type: int8 |
[in] | filter_dims | Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions |
[in] | filter_data | Filter data pointer. Data type: int8 |
[in] | bias_dims | Bias tensor dimensions. Format: [C_OUT] |
[in] | bias_data | Optional bias data pointer. Data type: int32 |
[in] | output_dims | Output tensor dimensions. Format: [N, H, W, C_OUT] |
[out] | output_data | Output data pointer. Data type: int8 |
ARM_MATH_SUCCESS
References cmsis_nn_conv_params::activation, arm_convolve_s8_get_buffer_size(), arm_memcpy_q7(), arm_nn_mat_mul_core_4x_s8(), arm_nn_mat_mult_kernel_s8_s16(), arm_nn_mat_mult_s8(), arm_nn_read_q15x2_ia(), arm_nn_requantize(), arm_q7_to_q15_with_offset(), cmsis_nn_context::buf, cmsis_nn_dims::c, cmsis_nn_conv_params::dilation, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_conv_params::input_offset, MAX, cmsis_nn_activation::max, MIN, cmsis_nn_activation::min, cmsis_nn_per_channel_quant_params::multiplier, cmsis_nn_dims::n, cmsis_nn_conv_params::output_offset, cmsis_nn_conv_params::padding, cmsis_nn_per_channel_quant_params::shift, cmsis_nn_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.
Referenced by arm_convolve_1_x_n_s8(), and arm_convolve_wrapper_s8().
int32_t arm_convolve_s8_get_buffer_size | ( | const cmsis_nn_dims * | input_dims, |
const cmsis_nn_dims * | filter_dims | ||
) |
[in] | input_dims | Input (activation) tensor dimensions. Format: [N, H, W, C_IN] |
[in] | filter_dims | Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions |
References cmsis_nn_dims::c, cmsis_nn_dims::h, and cmsis_nn_dims::w.
Referenced by arm_convolve_fast_s16(), arm_convolve_s8(), and arm_convolve_wrapper_s8_get_buffer_size().
arm_status arm_convolve_wrapper_s16 | ( | const cmsis_nn_context * | ctx, |
const cmsis_nn_conv_params * | conv_params, | ||
const cmsis_nn_per_channel_quant_params * | quant_params, | ||
const cmsis_nn_dims * | input_dims, | ||
const q15_t * | input_data, | ||
const cmsis_nn_dims * | filter_dims, | ||
const q7_t * | filter_data, | ||
const cmsis_nn_dims * | bias_dims, | ||
const int64_t * | bias_data, | ||
const cmsis_nn_dims * | output_dims, | ||
q15_t * | output_data | ||
) |
[in,out] | ctx | Function context that contains the additional buffer if required by the function. arm_convolve_wrapper_s8_get_buffer_size will return the buffer_size if required |
[in] | conv_params | Convolution parameters (e.g. strides, dilations, pads,...). conv_params->input_offset : Not used conv_params->output_offset : Not used |
[in] | quant_params | Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel |
[in] | input_dims | Input (activation) tensor dimensions. Format: [N, H, W, C_IN] |
[in] | input_data | Input (activation) data pointer. Data type: int16 |
[in] | filter_dims | Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions |
[in] | filter_data | Filter data pointer. Data type: int8 |
[in] | bias_dims | Bias tensor dimensions. Format: [C_OUT] |
[in] | bias_data | Bias data pointer. Data type: int64 |
[in] | output_dims | Output tensor dimensions. Format: [N, H, W, C_OUT] |
[out] | output_data | Output data pointer. Data type: int16 |
ARM_MATH_SIZE_MISMATCH
if argument constraints fail. or, ARM_MATH_SUCCESS
on successful completion. References arm_convolve_fast_s16(), arm_convolve_s16(), cmsis_nn_dims::c, cmsis_nn_conv_params::dilation, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_tile::w, and cmsis_nn_dims::w.
int32_t arm_convolve_wrapper_s16_get_buffer_size | ( | const cmsis_nn_conv_params * | conv_params, |
const cmsis_nn_dims * | input_dims, | ||
const cmsis_nn_dims * | filter_dims, | ||
const cmsis_nn_dims * | output_dims | ||
) |
[in] | conv_params | Convolution parameters (e.g. strides, dilations, pads,...). conv_params->input_offset : Not used conv_params->output_offset : Not used |
[in] | input_dims | Input (activation) dimensions. Format: [N, H, W, C_IN] |
[in] | filter_dims | Filter dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions |
[in] | output_dims | Output tensor dimensions. Format: [N, H, W, C_OUT] |
References arm_convolve_fast_s16_get_buffer_size(), arm_convolve_s16_get_buffer_size(), cmsis_nn_dims::c, cmsis_nn_conv_params::dilation, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_tile::w, and cmsis_nn_dims::w.
arm_status arm_convolve_wrapper_s8 | ( | const cmsis_nn_context * | ctx, |
const cmsis_nn_conv_params * | conv_params, | ||
const cmsis_nn_per_channel_quant_params * | quant_params, | ||
const cmsis_nn_dims * | input_dims, | ||
const q7_t * | input_data, | ||
const cmsis_nn_dims * | filter_dims, | ||
const q7_t * | filter_data, | ||
const cmsis_nn_dims * | bias_dims, | ||
const int32_t * | bias_data, | ||
const cmsis_nn_dims * | output_dims, | ||
q7_t * | output_data | ||
) |
[in,out] | ctx | Function context that contains the additional buffer if required by the function. arm_convolve_wrapper_s8_get_buffer_size will return the buffer_size if required |
[in] | conv_params | Convolution parameters (e.g. strides, dilations, pads,...). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127] |
[in] | quant_params | Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel |
[in] | input_dims | Input (activation) tensor dimensions. Format: [N, H, W, C_IN] |
[in] | input_data | Input (activation) data pointer. Data type: int8 |
[in] | filter_dims | Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions |
[in] | filter_data | Filter data pointer. Data type: int8 |
[in] | bias_dims | Bias tensor dimensions. Format: [C_OUT] |
[in] | bias_data | Bias data pointer. Data type: int32 |
[in] | output_dims | Output tensor dimensions. Format: [N, H, W, C_OUT] |
[out] | output_data | Output data pointer. Data type: int8 |
ARM_MATH_SIZE_MISMATCH
if argument constraints fail. or, ARM_MATH_SUCCESS
on successful completion. References arm_convolve_1_x_n_s8(), arm_convolve_1x1_s8_fast(), arm_convolve_s8(), cmsis_nn_dims::c, cmsis_nn_conv_params::dilation, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_dims::n, cmsis_nn_conv_params::padding, cmsis_nn_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.
int32_t arm_convolve_wrapper_s8_get_buffer_size | ( | const cmsis_nn_conv_params * | conv_params, |
const cmsis_nn_dims * | input_dims, | ||
const cmsis_nn_dims * | filter_dims, | ||
const cmsis_nn_dims * | output_dims | ||
) |
[in] | conv_params | Convolution parameters (e.g. strides, dilations, pads,...). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127] |
[in] | input_dims | Input (activation) dimensions. Format: [N, H, W, C_IN] |
[in] | filter_dims | Filter dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions |
[in] | output_dims | Output tensor dimensions. Format: [N, H, W, C_OUT] |
References arm_convolve_1_x_n_s8_get_buffer_size(), arm_convolve_1x1_s8_fast_get_buffer_size(), arm_convolve_s8_get_buffer_size(), cmsis_nn_dims::c, cmsis_nn_conv_params::dilation, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_dims::n, cmsis_nn_conv_params::padding, cmsis_nn_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.
arm_status arm_depthwise_conv_3x3_s8 | ( | const cmsis_nn_context * | ctx, |
const cmsis_nn_dw_conv_params * | dw_conv_params, | ||
const cmsis_nn_per_channel_quant_params * | quant_params, | ||
const cmsis_nn_dims * | input_dims, | ||
const q7_t * | input_data, | ||
const cmsis_nn_dims * | filter_dims, | ||
const q7_t * | filter_data, | ||
const cmsis_nn_dims * | bias_dims, | ||
const int32_t * | bias_data, | ||
const cmsis_nn_dims * | output_dims, | ||
q7_t * | output_data | ||
) |
ARM_MATH_SIZE_MISMATCH
- Unsupported dimension of tensors ARM_MATH_ARGUMENT_ERROR
- Unsupported pad size along the x axis ARM_MATH_SUCCESS
- Successful operationReferences cmsis_nn_dw_conv_params::activation, arm_nn_read_q7x4(), arm_nn_requantize(), cmsis_nn_dims::c, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_dw_conv_params::input_offset, MAX, cmsis_nn_activation::max, MIN, cmsis_nn_activation::min, cmsis_nn_per_channel_quant_params::multiplier, cmsis_nn_dw_conv_params::output_offset, cmsis_nn_dw_conv_params::padding, cmsis_nn_per_channel_quant_params::shift, cmsis_nn_dw_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.
Referenced by arm_depthwise_conv_wrapper_s8().
arm_status arm_depthwise_conv_s16 | ( | const cmsis_nn_context * | ctx, |
const cmsis_nn_dw_conv_params * | dw_conv_params, | ||
const cmsis_nn_per_channel_quant_params * | quant_params, | ||
const cmsis_nn_dims * | input_dims, | ||
const q15_t * | input_data, | ||
const cmsis_nn_dims * | filter_dims, | ||
const q7_t * | filter_data, | ||
const cmsis_nn_dims * | bias_dims, | ||
const int64_t * | bias_data, | ||
const cmsis_nn_dims * | output_dims, | ||
q15_t * | output_data | ||
) |
[in,out] | ctx | Function context (e.g. temporary buffer). Check the function definition file to see if an additional buffer is required. Optional function {API}_get_buffer_size() provides the buffer size if an additional buffer is required. exists if additional memory is. |
[in] | dw_conv_params | Depthwise convolution parameters (e.g. strides, dilations, pads,...) conv_params->input_offset : Not used conv_params->output_offset : Not used |
[in] | quant_params | Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel |
[in] | input_dims | Input (activation) tensor dimensions. Format: [N, H, W, C_IN] Batch argument N is not used. |
[in] | input_data | Input (activation) data pointer. Data type: int8 |
[in] | filter_dims | Filter tensor dimensions. Format: [1, H, W, C_OUT] |
[in] | filter_data | Filter data pointer. Data type: int8 |
[in] | bias_dims | Bias tensor dimensions. Format: [C_OUT] |
[in] | bias_data | Bias data pointer. Data type: int64 |
[in] | output_dims | Output tensor dimensions. Format: [N, H, W, C_OUT] |
[in,out] | output_data | Output data pointer. Data type: int16 |
ARM_MATH_SUCCESS
References cmsis_nn_dw_conv_params::activation, cmsis_nn_dims::c, cmsis_nn_dw_conv_params::ch_mult, depthwise_conv_s16_generic_s16(), cmsis_nn_dw_conv_params::dilation, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_activation::max, cmsis_nn_activation::min, cmsis_nn_per_channel_quant_params::multiplier, cmsis_nn_dims::n, cmsis_nn_dw_conv_params::padding, cmsis_nn_per_channel_quant_params::shift, cmsis_nn_dw_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.
arm_status arm_depthwise_conv_s8 | ( | const cmsis_nn_context * | ctx, |
const cmsis_nn_dw_conv_params * | dw_conv_params, | ||
const cmsis_nn_per_channel_quant_params * | quant_params, | ||
const cmsis_nn_dims * | input_dims, | ||
const q7_t * | input_data, | ||
const cmsis_nn_dims * | filter_dims, | ||
const q7_t * | filter_data, | ||
const cmsis_nn_dims * | bias_dims, | ||
const int32_t * | bias_data, | ||
const cmsis_nn_dims * | output_dims, | ||
q7_t * | output_data | ||
) |
[in,out] | ctx | Function context (e.g. temporary buffer). Check the function definition file to see if an additional buffer is required. Optional function {API}_get_buffer_size() provides the buffer size if an additional buffer is required. exists if additional memory is. |
[in] | dw_conv_params | Depthwise convolution parameters (e.g. strides, dilations, pads,...) dw_conv_params->dilation is not used. Range of dw_conv_params->input_offset : [-127, 128] Range of dw_conv_params->input_offset : [-128, 127] |
[in] | quant_params | Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel |
[in] | input_dims | Input (activation) tensor dimensions. Format: [N, H, W, C_IN] Batch argument N is not used. |
[in] | input_data | Input (activation) data pointer. Data type: int8 |
[in] | filter_dims | Filter tensor dimensions. Format: [1, H, W, C_OUT] |
[in] | filter_data | Filter data pointer. Data type: int8 |
[in] | bias_dims | Bias tensor dimensions. Format: [C_OUT] |
[in] | bias_data | Bias data pointer. Data type: int32 |
[in] | output_dims | Output tensor dimensions. Format: [N, H, W, C_OUT] |
[in,out] | output_data | Output data pointer. Data type: int8 |
ARM_MATH_SUCCESS
References cmsis_nn_dw_conv_params::activation, cmsis_nn_dims::c, cmsis_nn_dw_conv_params::ch_mult, depthwise_conv_s8_generic(), depthwise_conv_s8_mult_4(), cmsis_nn_dw_conv_params::dilation, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_dw_conv_params::input_offset, cmsis_nn_activation::max, cmsis_nn_activation::min, cmsis_nn_per_channel_quant_params::multiplier, cmsis_nn_dims::n, cmsis_nn_dw_conv_params::output_offset, cmsis_nn_dw_conv_params::padding, cmsis_nn_per_channel_quant_params::shift, cmsis_nn_dw_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.
Referenced by arm_depthwise_conv_s8_opt(), and arm_depthwise_conv_wrapper_s8().
arm_status arm_depthwise_conv_s8_opt | ( | const cmsis_nn_context * | ctx, |
const cmsis_nn_dw_conv_params * | dw_conv_params, | ||
const cmsis_nn_per_channel_quant_params * | quant_params, | ||
const cmsis_nn_dims * | input_dims, | ||
const q7_t * | input_data, | ||
const cmsis_nn_dims * | filter_dims, | ||
const q7_t * | filter_data, | ||
const cmsis_nn_dims * | bias_dims, | ||
const int32_t * | bias_data, | ||
const cmsis_nn_dims * | output_dims, | ||
q7_t * | output_data | ||
) |
ARM_MATH_SIZE_MISMATCH
- input channel != output channel or ch_mult != 1 ARM_MATH_SUCCESS
- Successful operationReferences cmsis_nn_dw_conv_params::activation, arm_depthwise_conv_s8(), arm_depthwise_conv_s8_opt_get_buffer_size(), arm_memcpy_q7(), arm_memset_q7(), arm_nn_depthwise_conv_nt_t_padded_s8(), arm_nn_depthwise_conv_nt_t_s8(), arm_nn_read_q15x2(), arm_nn_read_q7x4(), arm_nn_requantize(), arm_q7_to_q15_with_offset(), cmsis_nn_context::buf, cmsis_nn_dims::c, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_dw_conv_params::input_offset, MAX, cmsis_nn_activation::max, MIN, cmsis_nn_activation::min, cmsis_nn_per_channel_quant_params::multiplier, cmsis_nn_dw_conv_params::output_offset, cmsis_nn_dw_conv_params::padding, cmsis_nn_per_channel_quant_params::shift, cmsis_nn_dw_conv_params::stride, cmsis_nn_tile::w, and cmsis_nn_dims::w.
Referenced by arm_depthwise_conv_wrapper_s8().
int32_t arm_depthwise_conv_s8_opt_get_buffer_size | ( | const cmsis_nn_dims * | input_dims, |
const cmsis_nn_dims * | filter_dims | ||
) |
[in] | input_dims | Input (activation) tensor dimensions. Format: [1, H, W, C_IN] Batch argument N is not used. |
[in] | filter_dims | Filter tensor dimensions. Format: [1, H, W, C_OUT] |
References cmsis_nn_dims::c, cmsis_nn_dims::h, and cmsis_nn_dims::w.
Referenced by arm_depthwise_conv_s8_opt(), and arm_depthwise_conv_wrapper_s8_get_buffer_size().
arm_status arm_depthwise_conv_u8_basic_ver1 | ( | const uint8_t * | input, |
const uint16_t | input_x, | ||
const uint16_t | input_y, | ||
const uint16_t | input_ch, | ||
const uint8_t * | kernel, | ||
const uint16_t | kernel_x, | ||
const uint16_t | kernel_y, | ||
const int16_t | ch_mult, | ||
const int16_t | pad_x, | ||
const int16_t | pad_y, | ||
const int16_t | stride_x, | ||
const int16_t | stride_y, | ||
const int16_t | dilation_x, | ||
const int16_t | dilation_y, | ||
const int32_t * | bias, | ||
const int32_t | input_offset, | ||
const int32_t | filter_offset, | ||
const int32_t | output_offset, | ||
uint8_t * | output, | ||
const uint16_t | output_x, | ||
const uint16_t | output_y, | ||
const int32_t | output_activation_min, | ||
const int32_t | output_activation_max, | ||
const int32_t | output_shift, | ||
const int32_t | output_mult | ||
) |
uint8 depthwise convolution function with asymmetric quantization Unless specified otherwise, arguments are mandatory.
[in] | input | Pointer to input tensor |
[in] | input_x | Width of input tensor |
[in] | input_y | Height of input tensor |
[in] | input_ch | Channels in input tensor |
[in] | kernel | Pointer to kernel weights |
[in] | kernel_x | Width of kernel |
[in] | kernel_y | Height of kernel |
[in] | ch_mult | Number of channel multiplier |
[in] | pad_x | Padding sizes x |
[in] | pad_y | Padding sizes y |
[in] | stride_x | Convolution stride along the width |
[in] | stride_y | Convolution stride along the height |
[in] | dilation_x | Dilation along width. Not used and intended for future enhancement. |
[in] | dilation_y | Dilation along height. Not used and intended for future enhancement. |
[in] | bias | Pointer to optional bias values. If no bias is available, NULL is expected |
[in] | input_offset | Input tensor zero offset |
[in] | filter_offset | Kernel tensor zero offset |
[in] | output_offset | Output tensor zero offset |
[in,out] | output | Pointer to output tensor |
[in] | output_x | Width of output tensor |
[in] | output_y | Height of output tensor |
[in] | output_activation_min | Minimum value to clamp the output to. Range : {0, 255} |
[in] | output_activation_max | Minimum value to clamp the output to. Range : {0, 255} |
[in] | output_shift | Amount of right-shift for output |
[in] | output_mult | Output multiplier for requantization |
ARM_MATH_SIZE_MISMATCH
- Not supported dimension of tensors ARM_MATH_SUCCESS
- Successful operation ARM_MATH_ARGUMENT_ERROR
- Implementation not available References depthwise_conv_u8_generic(), and depthwise_conv_u8_mult_4().
arm_status arm_depthwise_conv_wrapper_s8 | ( | const cmsis_nn_context * | ctx, |
const cmsis_nn_dw_conv_params * | dw_conv_params, | ||
const cmsis_nn_per_channel_quant_params * | quant_params, | ||
const cmsis_nn_dims * | input_dims, | ||
const q7_t * | input_data, | ||
const cmsis_nn_dims * | filter_dims, | ||
const q7_t * | filter_data, | ||
const cmsis_nn_dims * | bias_dims, | ||
const int32_t * | bias_data, | ||
const cmsis_nn_dims * | output_dims, | ||
q7_t * | output_data | ||
) |
[in,out] | ctx | Function context (e.g. temporary buffer). Check the function definition file to see if an additional buffer is required. Optional function {API}_get_buffer_size() provides the buffer size if required. |
[in] | dw_conv_params | Depthwise convolution parameters (e.g. strides, dilations, pads,...) dw_conv_params->dilation is not used. Range of dw_conv_params->input_offset : [-127, 128] Range of dw_conv_params->output_offset : [-128, 127] |
[in] | quant_params | Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel |
[in] | input_dims | Input (activation) tensor dimensions. Format: [H, W, C_IN] Batch argument N is not used and assumed to be 1. |
[in] | input_data | Input (activation) data pointer. Data type: int8 |
[in] | filter_dims | Filter tensor dimensions. Format: [1, H, W, C_OUT] |
[in] | filter_data | Filter data pointer. Data type: int8 |
[in] | bias_dims | Bias tensor dimensions. Format: [C_OUT] |
[in] | bias_data | Bias data pointer. Data type: int32 |
[in] | output_dims | Output tensor dimensions. Format: [1, H, W, C_OUT] |
[in,out] | output_data | Output data pointer. Data type: int8 |
ARM_MATH_SUCCESS
- Successful completion.References arm_depthwise_conv_3x3_s8(), arm_depthwise_conv_s8(), arm_depthwise_conv_s8_opt(), cmsis_nn_dw_conv_params::ch_mult, cmsis_nn_dw_conv_params::dilation, cmsis_nn_tile::h, cmsis_nn_dims::h, cmsis_nn_dims::n, cmsis_nn_dw_conv_params::padding, cmsis_nn_tile::w, and cmsis_nn_dims::w.
int32_t arm_depthwise_conv_wrapper_s8_get_buffer_size | ( | const cmsis_nn_dw_conv_params * | dw_conv_params, |
const cmsis_nn_dims * | input_dims, | ||
const cmsis_nn_dims * | filter_dims, | ||
const cmsis_nn_dims * | output_dims | ||
) |
[in] | dw_conv_params | Depthwise convolution parameters (e.g. strides, dilations, pads,...) dw_conv_params->dilation is not used. Range of dw_conv_params->input_offset : [-127, 128] Range of dw_conv_params->input_offset : [-128, 127] |
[in] | input_dims | Input (activation) tensor dimensions. Format: [H, W, C_IN] Batch argument N is not used and assumed to be 1. |
[in] | filter_dims | Filter tensor dimensions. Format: [1, H, W, C_OUT] |
[in] | output_dims | Output tensor dimensions. Format: [1, H, W, C_OUT] |
References arm_depthwise_conv_s8_opt_get_buffer_size(), cmsis_nn_dims::c, cmsis_nn_dw_conv_params::dilation, cmsis_nn_tile::h, cmsis_nn_dims::n, and cmsis_nn_tile::w.
arm_status arm_depthwise_separable_conv_HWC_q7 | ( | const q7_t * | Im_in, |
const uint16_t | dim_im_in, | ||
const uint16_t | ch_im_in, | ||
const q7_t * | wt, | ||
const uint16_t | ch_im_out, | ||
const uint16_t | dim_kernel, | ||
const uint16_t | padding, | ||
const uint16_t | stride, | ||
const q7_t * | bias, | ||
const uint16_t | bias_shift, | ||
const uint16_t | out_shift, | ||
q7_t * | Im_out, | ||
const uint16_t | dim_im_out, | ||
q15_t * | bufferA, | ||
q7_t * | bufferB | ||
) |
[in] | Im_in | pointer to input tensor |
[in] | dim_im_in | input tensor dimension |
[in] | ch_im_in | number of input tensor channels |
[in] | wt | pointer to kernel weights |
[in] | ch_im_out | number of filters, i.e., output tensor channels |
[in] | dim_kernel | filter kernel size |
[in] | padding | padding sizes |
[in] | stride | convolution stride |
[in] | bias | pointer to bias |
[in] | bias_shift | amount of left-shift for bias |
[in] | out_shift | amount of right-shift for output |
[in,out] | Im_out | pointer to output tensor |
[in] | dim_im_out | output tensor dimension |
[in,out] | bufferA | pointer to buffer space for input |
[in,out] | bufferB | pointer to buffer space for output |
ARM_MATH_SIZE_MISMATCH
or ARM_MATH_SUCCESS
based on the outcome of size checking.Buffer size:
bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
bufferB size: 0
Input dimension constraints:
ch_im_in equals ch_im_out
Implementation: There are 3 nested loop here: Inner loop: calculate each output value with MAC instruction over an accumulator Mid loop: loop over different output channel Outer loop: loop over different output (x, y)
References arm_nn_read_q7x4(), arm_nnword::bytes, NN_ROUND, and arm_nnword::word.
arm_status arm_depthwise_separable_conv_HWC_q7_nonsquare | ( | const q7_t * | Im_in, |
const uint16_t | dim_im_in_x, | ||
const uint16_t | dim_im_in_y, | ||
const uint16_t | ch_im_in, | ||
const q7_t * | wt, | ||
const uint16_t | ch_im_out, | ||
const uint16_t | dim_kernel_x, | ||
const uint16_t | dim_kernel_y, | ||
const uint16_t | padding_x, | ||
const uint16_t | padding_y, | ||
const uint16_t | stride_x, | ||
const uint16_t | stride_y, | ||
const q7_t * | bias, | ||
const uint16_t | bias_shift, | ||
const uint16_t | out_shift, | ||
q7_t * | Im_out, | ||
const uint16_t | dim_im_out_x, | ||
const uint16_t | dim_im_out_y, | ||
q15_t * | bufferA, | ||
q7_t * | bufferB | ||
) |
[in] | Im_in | pointer to input tensor |
[in] | dim_im_in_x | input tensor dimension x |
[in] | dim_im_in_y | input tensor dimension y |
[in] | ch_im_in | number of input tensor channels |
[in] | wt | pointer to kernel weights |
[in] | ch_im_out | number of filters, i.e., output tensor channels |
[in] | dim_kernel_x | filter kernel size x |
[in] | dim_kernel_y | filter kernel size y |
[in] | padding_x | padding sizes x |
[in] | padding_y | padding sizes y |
[in] | stride_x | convolution stride x |
[in] | stride_y | convolution stride y |
[in] | bias | pointer to bias |
[in] | bias_shift | amount of left-shift for bias |
[in] | out_shift | amount of right-shift for output |
[in,out] | Im_out | pointer to output tensor |
[in] | dim_im_out_x | output tensor dimension x |
[in] | dim_im_out_y | output tensor dimension y |
[in,out] | bufferA | pointer to buffer space for input |
[in,out] | bufferB | pointer to buffer space for output |
ARM_MATH_SIZE_MISMATCH
or ARM_MATH_SUCCESS
based on the outcome of size checking.This function is the version with full list of optimization tricks, but with some constraints: ch_im_in is equal to ch_im_out
References arm_nn_read_q7x4(), arm_nnword::bytes, NN_ROUND, and arm_nnword::word.
|
static |
References arm_nn_requantize_s64(), MAX, MIN, and REDUCE_MULTIPLIER.
Referenced by arm_depthwise_conv_s16().
|
static |
References arm_nn_requantize(), MAX, and MIN.
Referenced by arm_depthwise_conv_s8().
|
static |
References arm_nn_requantize(), MAX, and MIN.
Referenced by arm_depthwise_conv_s8().
|
static |
References arm_nn_requantize(), MAX, and MIN.
Referenced by arm_depthwise_conv_u8_basic_ver1().
|
static |
References arm_nn_requantize(), MAX, and MIN.
Referenced by arm_depthwise_conv_u8_basic_ver1().