|
void | arm_q7_to_q15_with_offset (const int8_t *src, int16_t *dst, uint32_t block_size, int16_t offset) |
| Converts the elements from a s8 vector to a s16 vector with an added offset. More...
|
|
int8_t * | arm_nn_depthwise_conv_s8_core (const int8_t *row, const int16_t *col, const uint16_t num_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t out_offset, const int32_t activation_min, const int32_t activation_max, const uint16_t kernel_size, const int32_t *const output_bias, int8_t *out) |
| Depthwise conv on an im2col buffer where the input channel equals output channel. More...
|
|
int8_t * | arm_nn_mat_mult_s8 (const int8_t *input_row, const int8_t *input_col, const uint16_t output_ch, const uint16_t col_batches, const int32_t *output_shift, const int32_t *output_mult, const int32_t out_offset, const int32_t col_offset, const int32_t row_offset, const int16_t out_activation_min, const int16_t out_activation_max, const uint16_t row_len, const int32_t *const bias, int8_t *out) |
| General Matrix-multiplication function with per-channel requantization. More...
|
|
int16_t * | arm_nn_mat_mult_kernel_s16 (const int8_t *input_a, const int16_t *input_b, const int32_t output_ch, const int32_t *out_shift, const int32_t *out_mult, const int16_t activation_min, const int16_t activation_max, const int32_t num_col_a, const int64_t *const output_bias, int16_t *out_0) |
| Matrix-multiplication function for convolution with per-channel requantization for 16 bits convolution. More...
|
|
arm_cmsis_nn_status | arm_nn_mat_mul_core_1x_s8 (int32_t row_elements, const int32_t skipped_row_elements, const int8_t *row_base_ref, const int8_t *col_base_ref, const int32_t out_ch, const cmsis_nn_conv_params *conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const int32_t *bias, int8_t *output) |
| General Vector by Matrix multiplication with requantization and storage of result. More...
|
|
int8_t * | arm_nn_mat_mul_core_4x_s8 (const int32_t row_elements, const int32_t offset, const int8_t *row_base, const int8_t *col_base, const int32_t out_ch, const cmsis_nn_conv_params *conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const int32_t *bias, int8_t *output) |
| Matrix-multiplication with requantization & activation function for four rows and one column. More...
|
|
arm_cmsis_nn_status | arm_nn_mat_mult_nt_t_s8 (const int8_t *lhs, const int8_t *rhs, const int32_t *bias, int8_t *dst, const int32_t *dst_multipliers, const int32_t *dst_shifts, const int32_t lhs_rows, const int32_t rhs_rows, const int32_t rhs_cols, const int32_t lhs_offset, const int32_t dst_offset, const int32_t activation_min, const int32_t activation_max, const int32_t rhs_cols_offset) |
| General Matrix-multiplication function with per-channel requantization. This function assumes: More...
|
|
arm_cmsis_nn_status | arm_nn_vec_mat_mult_t_s8 (const int8_t *lhs, const int8_t *rhs, const int32_t *bias, int8_t *dst, const int32_t lhs_offset, const int32_t dst_offset, const int32_t dst_multiplier, const int32_t dst_shift, const int32_t rhs_cols, const int32_t rhs_rows, const int32_t activation_min, const int32_t activation_max, const int32_t address_offset) |
| s8 Vector by Matrix (transposed) multiplication More...
|
|
arm_cmsis_nn_status | arm_nn_vec_mat_mult_t_s16 (const int16_t *lhs, const int8_t *rhs, const int64_t *bias, int16_t *dst, const int32_t dst_multiplier, const int32_t dst_shift, const int32_t rhs_cols, const int32_t rhs_rows, const int32_t activation_min, const int32_t activation_max) |
| s16 Vector by Matrix (transposed) multiplication More...
|
|
arm_cmsis_nn_status | arm_nn_vec_mat_mult_t_svdf_s8 (const int8_t *lhs, const int8_t *rhs, int16_t *dst, const int32_t lhs_offset, const int32_t scatter_offset, const int32_t dst_multiplier, const int32_t dst_shift, const int32_t rhs_cols, const int32_t rhs_rows, const int32_t activation_min, const int32_t activation_max) |
| s8 Vector by Matrix (transposed) multiplication with s16 output More...
|
|
arm_cmsis_nn_status | arm_nn_depthwise_conv_nt_t_padded_s8 (const int8_t *lhs, const int8_t *rhs, const int32_t lhs_offset, const int32_t active_ch, const int32_t total_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t out_offset, const int32_t activation_min, const int32_t activation_max, const uint16_t row_x_col, const int32_t *const output_bias, int8_t *out) |
| Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in padded cases where the padding is -lhs_offset(Range: int8). Dimensions are the same for lhs and rhs. More...
|
|
arm_cmsis_nn_status | arm_nn_depthwise_conv_nt_t_s8 (const int8_t *lhs, const int8_t *rhs, const int32_t lhs_offset, const int32_t active_ch, const int32_t total_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t out_offset, const int32_t activation_min, const int32_t activation_max, const uint16_t row_x_col, const int32_t *const output_bias, int8_t *out) |
| Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in non-padded cases. Dimensions are the same for lhs and rhs. More...
|
|
int16_t * | arm_nn_depthwise_conv_nt_t_s16 (const int16_t *lhs, const int8_t *rhs, const uint16_t num_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t activation_min, const int32_t activation_max, const uint16_t row_x_col, const int64_t *const output_bias, int16_t *out) |
| Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in non-padded cases. Dimensions are the same for lhs and rhs. More...
|
|
int32_t | arm_nn_read_q15x2_ia (const int16_t **in_q15) |
| Read 2 s16 elements and post increment pointer. More...
|
|
int32_t | arm_nn_read_s8x4_ia (const int8_t **in_s8) |
| Read 4 s8 from s8 pointer and post increment pointer. More...
|
|
int32_t | arm_nn_read_s16x2 (const int16_t *in) |
| Read 2 int16 values from int16 pointer. More...
|
|
int32_t | arm_nn_read_s8x4 (const int8_t *in_s8) |
| Read 4 s8 values. More...
|
|
void | arm_nn_write_s8x4_ia (int8_t **in, int32_t value) |
| Write four s8 to s8 pointer and increment pointer afterwards. More...
|
|
void | arm_memset_s8 (int8_t *dst, const int8_t val, uint32_t block_size) |
| memset optimized for MVE More...
|
|
int8_t * | arm_nn_mat_mult_kernel_s8_s16 (const int8_t *input_a, const int16_t *input_b, const uint16_t output_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t out_offset, const int16_t activation_min, const int16_t activation_max, const uint16_t num_col_a, const int32_t *const output_bias, int8_t *out_0) |
| Matrix-multiplication function for convolution with per-channel requantization. More...
|
|
void | arm_nn_softmax_common_s8 (const int8_t *input, const int32_t num_rows, const int32_t row_size, const int32_t mult, const int32_t shift, const int32_t diff_min, const bool int16_output, void *output) |
| Common softmax function for s8 input and s8 or s16 output. More...
|
|
int32_t | arm_nn_doubling_high_mult (const int32_t m1, const int32_t m2) |
| Saturating doubling high multiply. Result matches NEON instruction VQRDMULH. More...
|
|
int32_t | arm_nn_doubling_high_mult_no_sat (const int32_t m1, const int32_t m2) |
| Doubling high multiply without saturation. This is intended for requantization where the scale is a positive integer. More...
|
|
int32_t | arm_nn_divide_by_power_of_two (const int32_t dividend, const int32_t exponent) |
| Rounding divide by power of two. More...
|
|
int32_t | arm_nn_requantize (const int32_t val, const int32_t multiplier, const int32_t shift) |
| Requantize a given value. More...
|
|
int32_t | arm_nn_requantize_s64 (const int64_t val, const int32_t reduced_multiplier, const int32_t shift) |
| Requantize a given 64 bit value. More...
|
|
void | arm_memcpy_s8 (int8_t *__RESTRICT dst, const int8_t *__RESTRICT src, uint32_t block_size) |
| memcpy optimized for MVE More...
|
|
void | arm_memcpy_q15 (int16_t *__RESTRICT dst, const int16_t *__RESTRICT src, uint32_t block_size) |
| memcpy wrapper for int16 More...
|
|
int32_t | arm_nn_exp_on_negative_values (int32_t val) |
|
int32_t | arm_nn_mult_by_power_of_two (const int32_t val, const int32_t exp) |
|
int32_t | arm_nn_one_over_one_plus_x_for_x_in_0_1 (int32_t val) |
|
void | arm_nn_write_q15x2_ia (int16_t **dest_q15, int32_t src_q31) |
| Write 2 s16 elements and post increment pointer. More...
|
|
arm_cmsis_nn_status | arm_nn_lstm_step_s8_s16 (const int8_t *input, const int8_t *input_to_input_weight, const int8_t *input_to_forget_weight, const int8_t *input_to_cell_weight, const int8_t *input_to_output_weight, const int8_t *recurrent_to_input_weight, const int8_t *recurrent_to_forget_weight, const int8_t *recurrent_to_cell_weight, const int8_t *recurrent_to_output_weight, const cmsis_nn_lstm_params *lstm, const int n_batch, const int n_cell, const int n_input, const int n_output, int8_t *output_state, int16_t *cell_state, int8_t *output, cmsis_nn_lstm_context *scratch_buffers) |
| Update LSTM function for an iteration step. More...
|
|
void | arm_nn_lstm_calculate_gate_s8_s16 (const int8_t *input, const int8_t *input_to_gate_weights, const int32_t *input_to_gate_bias, const cmsis_nn_scaling input_to_gate_scaling, const int8_t *output_state, const int8_t *recurrent_to_gate_weights, const int32_t *recurrent_to_gate_bias, const cmsis_nn_scaling recurrent_to_gate_scaling, const int32_t n_batch, const int32_t n_input, const int32_t n_output, const int32_t n_cell, const arm_nn_activation_type activation_type, int16_t *gate) |
| Updates a LSTM gate for an iteration step of LSTM function, int8x8_16 version. More...
|
|
void | arm_nn_lstm_update_cell_state_s16 (const int32_t n_block, const int32_t cell_state_scale, int16_t *cell_state, const int16_t *input_gate, const int16_t *forget_gate, const int16_t *cell_gate) |
| Update cell state for a single LSTM iteration step, int8x8_16 version. More...
|
|
void | arm_nn_lstm_update_output_s8_s16 (const int n_batch, const int n_cell, const int n_output, int16_t *cell_state, const int32_t cell_state_scale, const int16_t *output_gate, const cmsis_nn_scaling hidden_scale, const int32_t hidden_offset, int8_t *output_state, int16_t *scratch0, int8_t *scratch1) |
| Calculate the output state tensor of an LSTM step, s8 input/output and s16 weight version. More...
|
|
void | arm_nn_vec_mat_mul_result_acc_s8 (const int8_t *lhs, const int8_t *rhs, const int32_t *bias, int16_t *dst, const int32_t dst_offset, const int32_t multiplier, const int32_t shift, const int32_t rhs_cols, const int32_t rhs_rows, const int32_t batch) |
| The result of the multiplication is accumulated to the passed result buffer. Multiplies a matrix by a "batched" vector (i.e. a matrix with a batch dimension composed by input vectors independent from each other). More...
|
|
arm_cmsis_nn_status | arm_elementwise_mul_s16_s8 (const int16_t *input_1_vect, const int16_t *input_2_vect, int8_t *output, const int32_t out_offset, const int32_t out_mult, const int32_t out_shift, const int32_t block_size) |
| s16 elementwise multiplication with s8 output More...
|
|