|
void | arm_q7_to_q15_with_offset (const int8_t *src, int16_t *dst, int32_t block_size, int16_t offset) |
| Converts the elements from a s8 vector to a s16 vector with an added offset.
|
|
int32_t | arm_depthwise_conv_s8_opt_get_buffer_size_mve (const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims) |
| Get the required buffer size for optimized s8 depthwise convolution function with constraint that in_channel equals out_channel. This is for processors with MVE extension. Refer to arm_depthwise_conv_s8_opt_get_buffer_size() for function argument details.
|
|
int32_t | arm_depthwise_conv_s8_opt_get_buffer_size_dsp (const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims) |
| Get the required buffer size for optimized s8 depthwise convolution function with constraint that in_channel equals out_channel. This is for processors with DSP extension. Refer to arm_depthwise_conv_s8_opt_get_buffer_size() for function argument details.
|
|
int8_t * | arm_nn_depthwise_conv_s8_core (const int8_t *row, const int16_t *col, const uint16_t num_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t out_offset, const int32_t activation_min, const int32_t activation_max, const uint16_t kernel_size, const int32_t *const output_bias, int8_t *out) |
| Depthwise conv on an im2col buffer where the input channel equals output channel.
|
|
int8_t * | arm_nn_mat_mult_s8 (const int8_t *input_row, const int8_t *input_col, const uint16_t output_ch, const uint16_t col_batches, const int32_t *output_shift, const int32_t *output_mult, const int32_t out_offset, const int32_t col_offset, const int32_t row_offset, const int16_t out_activation_min, const int16_t out_activation_max, const uint16_t row_len, const int32_t *const bias, int8_t *out) |
| General Matrix-multiplication function with per-channel requantization.
|
|
int16_t * | arm_nn_mat_mult_kernel_s16 (const int8_t *input_a, const int16_t *input_b, const int32_t output_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t activation_min, const int32_t activation_max, const int32_t num_col_a, const cmsis_nn_bias_data *const bias_data, int16_t *out_0) |
| Matrix-multiplication function for convolution with per-channel requantization for 16 bits convolution.
|
|
arm_cmsis_nn_status | arm_nn_mat_mul_core_1x_s8 (int32_t row_elements, const int32_t skipped_row_elements, const int8_t *row_base_ref, const int8_t *col_base_ref, const int32_t out_ch, const cmsis_nn_conv_params *conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const int32_t *bias, int8_t *output) |
| General Vector by Matrix multiplication with requantization and storage of result.
|
|
arm_cmsis_nn_status | arm_nn_mat_mul_core_1x_s4 (int32_t row_elements, const int32_t skipped_row_elements, const int8_t *row_base_ref, const int8_t *col_base_ref, const int32_t out_ch, const cmsis_nn_conv_params *conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const int32_t *bias, int8_t *output) |
| General Vector by Matrix multiplication with requantization, storage of result and int4 weights packed into an int8 buffer.
|
|
int8_t * | arm_nn_mat_mul_core_4x_s8 (const int32_t row_elements, const int32_t offset, const int8_t *row_base, const int8_t *col_base, const int32_t out_ch, const cmsis_nn_conv_params *conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const int32_t *bias, int8_t *output) |
| Matrix-multiplication with requantization & activation function for four rows and one column.
|
|
arm_cmsis_nn_status | arm_nn_mat_mult_nt_t_s4 (const int8_t *lhs, const int8_t *rhs, const int32_t *bias, int8_t *dst, const int32_t *dst_multipliers, const int32_t *dst_shifts, const int32_t lhs_rows, const int32_t rhs_rows, const int32_t rhs_cols, const int32_t lhs_offset, const int32_t dst_offset, const int32_t activation_min, const int32_t activation_max, const int32_t lhs_cols_offset) |
| General Matrix-multiplication function with per-channel requantization. This function assumes:
|
|
arm_cmsis_nn_status | arm_nn_mat_mult_nt_interleaved_t_even_s4 (const int8_t *lhs, const int8_t *rhs, const int32_t *bias, int8_t *dst, const int32_t *dst_multipliers, const int32_t *dst_shifts, const int32_t lhs_rows, const int32_t rhs_rows, const int32_t rhs_cols, const int32_t lhs_offset, const int32_t dst_offset, const int32_t activation_min, const int32_t activation_max, const int32_t lhs_cols_offset) |
| General Matrix-multiplication function with per-channel requantization. This function assumes:
|
|
arm_cmsis_nn_status | arm_nn_mat_mult_nt_t_s8 (const int8_t *lhs, const int8_t *rhs, const int32_t *bias, int8_t *dst, const int32_t *dst_multipliers, const int32_t *dst_shifts, const int32_t lhs_rows, const int32_t rhs_rows, const int32_t rhs_cols, const int32_t lhs_offset, const int32_t dst_offset, const int32_t activation_min, const int32_t activation_max, const int32_t row_address_offset, const int32_t lhs_cols_offset) |
| General Matrix-multiplication function with per-channel requantization. This function assumes:
|
|
arm_cmsis_nn_status | arm_nn_mat_mult_nt_t_s16 (const int16_t *lhs, const int8_t *rhs, const cmsis_nn_bias_data *bias_data, int16_t *dst, const int32_t *dst_multipliers, const int32_t *dst_shifts, const int32_t lhs_rows, const int32_t rhs_rows, const int32_t rhs_cols, const int32_t activation_min, const int32_t activation_max) |
| General Matrix-multiplication function with per-channel requantization and int16 input (LHS) and output. This function assumes:
|
|
arm_cmsis_nn_status | arm_nn_mat_mult_nt_t_s8_s32 (const int8_t *lhs, const int8_t *rhs, int32_t *dst, const int32_t lhs_rows, const int32_t rhs_rows, const int32_t rhs_cols, const int32_t lhs_offset, const int32_t dst_idx_offset) |
| General Matrix-multiplication function with int8 input and int32 output. This function assumes:
|
|
arm_cmsis_nn_status | arm_nn_vec_mat_mult_t_s4 (const int8_t *lhs, const int8_t *packed_rhs, const int32_t *bias, int8_t *dst, const int32_t lhs_offset, const int32_t dst_offset, const int32_t dst_multiplier, const int32_t dst_shift, const int32_t rhs_cols, const int32_t rhs_rows, const int32_t activation_min, const int32_t activation_max) |
| s4 Vector by Matrix (transposed) multiplication
|
|
arm_cmsis_nn_status | arm_nn_vec_mat_mult_t_s8 (const int8_t *lhs, const int8_t *rhs, const int32_t *kernel_sum, const int32_t *bias, int8_t *dst, const int32_t lhs_offset, const int32_t dst_offset, const int32_t dst_multiplier, const int32_t dst_shift, const int32_t rhs_cols, const int32_t rhs_rows, const int32_t activation_min, const int32_t activation_max, const int32_t address_offset, const int32_t rhs_offset) |
| s8 Vector by Matrix (transposed) multiplication
|
|
arm_cmsis_nn_status | arm_nn_vec_mat_mult_t_per_ch_s8 (const int8_t *lhs, const int8_t *rhs, const int32_t *kernel_sum, const int32_t *bias, int8_t *dst, const int32_t lhs_offset, const int32_t dst_offset, const int32_t *dst_multiplier, const int32_t *dst_shift, const int32_t rhs_cols, const int32_t rhs_rows, const int32_t activation_min, const int32_t activation_max, const int32_t address_offset, const int32_t rhs_offset) |
| s8 Vector by Matrix (transposed) multiplication using per channel quantization for output
|
|
arm_cmsis_nn_status | arm_nn_vec_mat_mult_t_s16 (const int16_t *lhs, const int8_t *rhs, const int64_t *bias, int16_t *dst, const int32_t dst_multiplier, const int32_t dst_shift, const int32_t rhs_cols, const int32_t rhs_rows, const int32_t activation_min, const int32_t activation_max) |
| s16 Vector by s8 Matrix (transposed) multiplication
|
|
arm_cmsis_nn_status | arm_nn_vec_mat_mult_t_s16_s16 (const int16_t *lhs, const int16_t *rhs, const int64_t *bias, int16_t *dst, const int32_t dst_multiplier, const int32_t dst_shift, const int32_t rhs_cols, const int32_t rhs_rows, const int32_t activation_min, const int32_t activation_max) |
| s16 Vector by s16 Matrix (transposed) multiplication
|
|
arm_cmsis_nn_status | arm_nn_vec_mat_mult_t_svdf_s8 (const int8_t *lhs, const int8_t *rhs, int16_t *dst, const int32_t lhs_offset, const int32_t scatter_offset, const int32_t dst_multiplier, const int32_t dst_shift, const int32_t rhs_cols, const int32_t rhs_rows, const int32_t activation_min, const int32_t activation_max) |
| s8 Vector by Matrix (transposed) multiplication with s16 output
|
|
arm_cmsis_nn_status | arm_nn_depthwise_conv_nt_t_padded_s8 (const int8_t *lhs, const int8_t *rhs, const int32_t lhs_offset, const int32_t active_ch, const int32_t total_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t out_offset, const int32_t activation_min, const int32_t activation_max, const uint16_t row_x_col, const int32_t *const output_bias, int8_t *out) |
| Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in padded cases where the padding is -lhs_offset(Range: int8). Dimensions are the same for lhs and rhs.
|
|
arm_cmsis_nn_status | arm_nn_depthwise_conv_nt_t_s8 (const int8_t *lhs, const int8_t *rhs, const int32_t lhs_offset, const int32_t active_ch, const int32_t total_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t out_offset, const int32_t activation_min, const int32_t activation_max, const uint16_t row_x_col, const int32_t *const output_bias, int8_t *out) |
| Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in non-padded cases. Dimensions are the same for lhs and rhs.
|
|
arm_cmsis_nn_status | arm_nn_depthwise_conv_nt_t_s4 (const int8_t *lhs, const int8_t *rhs, const int32_t lhs_offset, const int32_t active_ch, const int32_t total_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t out_offset, const int32_t activation_min, const int32_t activation_max, const uint16_t row_x_col, const int32_t *const output_bias, int8_t *out) |
| Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in non-padded cases. rhs consists of packed int4 data. Dimensions are the same for lhs and rhs.
|
|
int16_t * | arm_nn_depthwise_conv_nt_t_s16 (const int16_t *lhs, const int8_t *rhs, const uint16_t num_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t activation_min, const int32_t activation_max, const uint16_t row_x_col, const int64_t *const output_bias, int16_t *out) |
| Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in non-padded cases. Dimensions are the same for lhs and rhs.
|
|
arm_cmsis_nn_status | arm_nn_transpose_conv_row_s8_s32 (const int8_t *lhs, const int8_t *rhs, int32_t *output_start, const int32_t output_index, const int32_t output_max, const int32_t rhs_rows, const int32_t rhs_cols, const int32_t input_channels, const int32_t output_channels, const int32_t lhs_offset, const int32_t row_offset, const int32_t input_x, const int32_t stride_x, const int32_t skip_row_top, const int32_t skip_row_bottom) |
| Row of s8 scalars multiplicated with a s8 matrix ad accumulated into a s32 rolling scratch buffer. Helpfunction for transposed convolution.
|
|
int8_t * | arm_nn_mat_mult_kernel_s4_s16 (const int8_t *input_a, const int16_t *input_b, const uint16_t output_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t out_offset, const int32_t activation_min, const int32_t activation_max, const int32_t num_col_a, const int32_t *const output_bias, int8_t *out_0) |
| Matrix-multiplication function for convolution with per-channel requantization and 4 bit weights.
|
|
int8_t * | arm_nn_mat_mult_kernel_s8_s16 (const int8_t *input_a, const int16_t *input_b, const uint16_t output_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t out_offset, const int16_t activation_min, const int16_t activation_max, const int32_t num_col_a, const int32_t aligned_num_col_a, const int32_t *const output_bias, int8_t *out_0) |
| Matrix-multiplication function for convolution with per-channel requantization.
|
|
int8_t * | arm_nn_mat_mult_kernel_row_offset_s8_s16 (const int8_t *input_a, const int16_t *input_b, const uint16_t output_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t out_offset, const int16_t activation_min, const int16_t activation_max, const int32_t num_col_a, const int32_t aligned_num_col_a, const int32_t *const output_bias, const int32_t row_address_offset, int8_t *out_0) |
| Matrix-multiplication function for convolution with per-channel requantization, supporting an address offset between rows.
|
|
void | arm_nn_softmax_common_s8 (const int8_t *input, const int32_t num_rows, const int32_t row_size, const int32_t mult, const int32_t shift, const int32_t diff_min, const bool int16_output, void *output) |
| Common softmax function for s8 input and s8 or s16 output.
|
|
arm_cmsis_nn_status | arm_nn_lstm_step_s8 (const int8_t *data_in, const int8_t *hidden_in, int8_t *hidden_out, const cmsis_nn_lstm_params *params, cmsis_nn_lstm_context *buffers, const int32_t batch_offset) |
| Update LSTM function for an iteration step using s8 input and output, and s16 internally.
|
|
arm_cmsis_nn_status | arm_nn_lstm_step_s16 (const int16_t *data_in, const int16_t *hidden_in, int16_t *hidden_out, const cmsis_nn_lstm_params *params, cmsis_nn_lstm_context *buffers, const int32_t batch_offset) |
| Update LSTM function for an iteration step using s16 input and output, and s16 internally.
|
|
arm_cmsis_nn_status | arm_nn_lstm_calculate_gate_s8_s16 (const int8_t *data_in, const int8_t *hidden_in, const cmsis_nn_lstm_gate *gate_data, const cmsis_nn_lstm_params *params, int16_t *output, const int32_t batch_offset) |
| Updates a LSTM gate for an iteration step of LSTM function, int8x8_16 version.
|
|
arm_cmsis_nn_status | arm_nn_lstm_calculate_gate_s16 (const int16_t *data_in, const int16_t *hidden_in, const cmsis_nn_lstm_gate *gate_data, const cmsis_nn_lstm_params *params, int16_t *output, const int32_t batch_offset) |
| Updates a LSTM gate for an iteration step of LSTM function, int16x8_16 version.
|
|
arm_cmsis_nn_status | arm_nn_vec_mat_mul_result_acc_s8_s16 (const int8_t *lhs, const int8_t *rhs, const int32_t *effective_bias, int16_t *dst, const int32_t dst_multiplier, const int32_t dst_shift, const int32_t rhs_cols, const int32_t rhs_rows, const int32_t batches, const int32_t batch_offset) |
| The result of the multiplication is accumulated to the passed result buffer. Multiplies a matrix by a "batched" vector (i.e. a matrix with a batch dimension composed by input vectors independent from each other).
|
|
arm_cmsis_nn_status | arm_nn_vec_mat_mul_result_acc_s16 (const int16_t *lhs, const int8_t *rhs, const int64_t *effective_bias, int16_t *dst, const int32_t dst_multiplier, const int32_t dst_shift, const int32_t rhs_cols, const int32_t rhs_rows, const int32_t batches, const int32_t batch_offset) |
| The result of the multiplication is accumulated to the passed result buffer. Multiplies a matrix by a "batched" vector (i.e. a matrix with a batch dimension composed by input vectors independent from each other).
|
|
arm_cmsis_nn_status | arm_elementwise_mul_s16_s8 (const int16_t *input_1_vect, const int16_t *input_2_vect, int8_t *output, const int32_t out_offset, const int32_t out_mult, const int32_t out_shift, const int32_t block_size, const int32_t batch_size, const int32_t batch_offset) |
| s16 elementwise multiplication with s8 output
|
|
arm_cmsis_nn_status | arm_elementwise_mul_s16_batch_offset (const int16_t *input_1_vect, const int16_t *input_2_vect, int16_t *output, const int32_t out_offset, const int32_t out_mult, const int32_t out_shift, const int32_t block_size, const int32_t batch_size, const int32_t batch_offset) |
| s16 elementwise multiplication with s16 output
|
|
arm_cmsis_nn_status | arm_elementwise_mul_acc_s16 (const int16_t *input_1_vect, const int16_t *input_2_vect, const int32_t input_1_offset, const int32_t input_2_offset, int16_t *output, const int32_t out_offset, const int32_t out_mult, const int32_t out_shift, const int32_t out_activation_min, const int32_t out_activation_max, const int32_t block_size) |
| s16 elementwise multiplication. The result of the multiplication is accumulated to the passed result buffer.
|
|