Data Structures
union	arm_nnword
	Union for SIMD access of q31/q15/q7 types. More...

struct	arm_nn_double
	Union for data type long long. More...

union	arm_nn_long_long

Macros
#define	LEFT_SHIFT(_shift)

#define	RIGHT_SHIFT(_shift)

#define	MASK_IF_ZERO(x)

#define	MASK_IF_NON_ZERO(x)

#define	SELECT_USING_MASK(mask, a, b)

#define	MAX(A, B)

#define	MIN(A, B)

#define	CLAMP(x, h, l)

#define	REDUCE_MULTIPLIER(_mult)

#define	PACK_Q7x4_32x1(v0, v1, v2, v3)
	definition to pack four 8 bit values. More...

#define	NN_ROUND(out_shift)
	macro for adding rounding offset More...

#define	MUL_SAT(a, b)

#define	MUL_SAT_MVE(a, b)

#define	MUL_POW2(a, b)

#define	DIV_POW2(a, b)

#define	DIV_POW2_MVE(a, b)

#define	EXP_ON_NEG(x)

#define	ONE_OVER1(x)

#define	SELECT_IF_NON_ZERO(x)

Functions
void	arm_q7_to_q15_no_shift (const q7_t pSrc, q15_t pDst, uint32_t blockSize)
	Converts the elements of the q7 vector to q15 vector without left-shift. More...

void	arm_nn_add_q7 (const q7_t input, q31_t output, uint32_t block_size)
	Non-saturating addition of elements of a q7 vector. More...

void	arm_q7_to_q15_reordered_no_shift (const q7_t pSrc, q15_t pDst, uint32_t blockSize)
	Converts the elements of the q7 vector to reordered q15 vector without left-shift. More...

void	arm_q7_to_q15_with_offset (const q7_t src, q15_t dst, uint32_t block_size, q15_t offset)
	Converts the elements from a q7 vector to a q15 vector with an added offset. More...

void	arm_q7_to_q15_reordered_with_offset (const q7_t src, q15_t dst, uint32_t block_size, q15_t offset)
	Converts the elements of the q7 vector to reordered q15 vector with an added offset. More...

void	arm_nn_accumulate_q7_to_q15 (q15_t dst, const q7_t src, uint32_t block_size)
	Converts the elements from a q7 vector and accumulate to a q15 vector. More...

q7_t *	arm_nn_depthwise_conv_s8_core (const q7_t row, const q15_t col, const uint16_t num_ch, const int32_t out_shift, const int32_t out_mult, const int32_t out_offset, const int32_t activation_min, const int32_t activation_max, const uint16_t kernel_size, const int32_t const output_bias, q7_t out)
	Depthwise conv on an im2col buffer where the input channel equals output channel. More...

q7_t *	arm_nn_mat_mult_s8 (const q7_t input_row, const q7_t input_col, const uint16_t output_ch, const uint16_t col_batches, const int32_t output_shift, const int32_t output_mult, const int32_t out_offset, const int32_t col_offset, const int32_t row_offset, const int16_t out_activation_min, const int16_t out_activation_max, const uint16_t row_len, const int32_t const bias, q7_t out)
	General Matrix-multiplication function with per-channel requantization. More...

q15_t *	arm_nn_mat_mult_kernel_s16 (const q7_t input_a, const q15_t input_b, const int32_t output_ch, const int32_t out_shift, const int32_t out_mult, const int16_t activation_min, const int16_t activation_max, const int32_t num_col_a, const int64_t const output_bias, q15_t out_0)
	Matrix-multiplication function for convolution with per-channel requantization for 16 bits convolution. More...

arm_status	arm_nn_mat_mul_core_1x_s8 (int32_t row_elements, const int8_t row_base, const int8_t col_base, int32_t const sum_col, int32_t const output)
	General Matrix-multiplication without requantization for one row & one column. More...

int8_t *	arm_nn_mat_mul_core_4x_s8 (const int32_t row_elements, const int32_t offset, const int8_t row_base, const int8_t col_base, const int32_t out_ch, const cmsis_nn_conv_params conv_params, const cmsis_nn_per_channel_quant_params quant_params, const int32_t bias, int8_t output)
	Matrix-multiplication with requantization & activation function for four rows and one column. More...

arm_status	arm_nn_mat_mult_nt_t_s8 (const q7_t lhs, const q7_t rhs, const q31_t bias, q7_t dst, const int32_t dst_multipliers, const int32_t dst_shifts, const int32_t lhs_rows, const int32_t rhs_rows, const int32_t rhs_cols, const int32_t lhs_offset, const int32_t dst_offset, const int32_t activation_min, const int32_t activation_max)
	General Matrix-multiplication function with per-channel requantization. This function assumes: More...

arm_status	arm_nn_vec_mat_mult_t_s8 (const q7_t lhs, const q7_t rhs, const q31_t bias, q7_t dst, const int32_t lhs_offset, const int32_t rhs_offset, const int32_t dst_offset, const int32_t dst_multiplier, const int32_t dst_shift, const int32_t rhs_cols, const int32_t rhs_rows, const int32_t activation_min, const int32_t activation_max, const int32_t address_offset)
	s8 Vector by Matrix (transposed) multiplication More...

arm_status	arm_nn_vec_mat_mult_t_s16 (const q15_t lhs, const q7_t rhs, const q63_t bias, q15_t dst, const int32_t dst_multiplier, const int32_t dst_shift, const int32_t rhs_cols, const int32_t rhs_rows, const int32_t activation_min, const int32_t activation_max)
	s16 Vector by Matrix (transposed) multiplication More...

arm_status	arm_nn_vec_mat_mult_t_svdf_s8 (const q7_t lhs, const q7_t rhs, q15_t *dst, const int32_t lhs_offset, const int32_t rhs_offset, const int32_t scatter_offset, const int32_t dst_multiplier, const int32_t dst_shift, const int32_t rhs_cols, const int32_t rhs_rows, const int32_t activation_min, const int32_t activation_max)
	s8 Vector by Matrix (transposed) multiplication with s16 output More...

q7_t *	arm_nn_depthwise_conv_nt_t_padded_s8 (const q7_t lhs, const q7_t rhs, const int32_t lhs_offset, const uint16_t num_ch, const int32_t out_shift, const int32_t out_mult, const int32_t out_offset, const int32_t activation_min, const int32_t activation_max, const uint16_t row_x_col, const int32_t const output_bias, q7_t out)
	Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in padded cases where the padding is -lhs_offset(Range: int8). Dimensions are the same for lhs and rhs. More...

q7_t *	arm_nn_depthwise_conv_nt_t_s8 (const q7_t lhs, const q7_t rhs, const int32_t lhs_offset, const uint16_t num_ch, const int32_t out_shift, const int32_t out_mult, const int32_t out_offset, const int32_t activation_min, const int32_t activation_max, const uint16_t row_x_col, const int32_t const output_bias, q7_t out)
	Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in non-padded cases. Dimensions are the same for lhs and rhs. More...

q7_t *	arm_nn_mat_mult_kernel_q7_q15_reordered (const q7_t pA, const q15_t pInBuffer, const uint16_t ch_im_out, const uint16_t numCol_A, const uint16_t bias_shift, const uint16_t out_shift, const q7_t bias, q7_t pOut)
	Matrix-multiplication function for convolution with reordered columns. More...

__STATIC_FORCEINLINE q31_t	arm_nn_read_q15x2_ia (const q15_t **in_q15)
	Read 2 q15 elements and post increment pointer. More...

__STATIC_FORCEINLINE q31_t	arm_nn_read_q7x4_ia (const q7_t **in_q7)
	Read 4 q7 from q7 pointer and post increment pointer. More...

__STATIC_FORCEINLINE q31_t	arm_nn_read_q15x2 (const q15_t *in_q15)
	Read 2 q15 from q15 pointer. More...

__STATIC_FORCEINLINE q31_t	arm_nn_read_q7x4 (const q7_t *in_q7)
	Read 4 q7 values. More...

__STATIC_FORCEINLINE void	arm_nn_write_q7x4_ia (q7_t **in, q31_t value)
	Write four q7 to q7 pointer and increment pointer afterwards. More...

__STATIC_FORCEINLINE void	arm_memset_q7 (q7_t *dst, const q7_t val, uint32_t block_size)
	memset optimized for MVE More...

void	arm_nn_mult_q15 (q15_t pSrcA, q15_t pSrcB, q15_t *pDst, const uint16_t out_shift, uint32_t blockSize)
	q7 vector multiplication with variable output shifts More...

void	arm_nn_mult_q7 (q7_t pSrcA, q7_t pSrcB, q7_t *pDst, const uint16_t out_shift, uint32_t blockSize)
	q7 vector multiplication with variable output shifts More...

q7_t *	arm_nn_mat_mult_kernel_s8_s16 (const q7_t input_a, const q15_t input_b, const uint16_t output_ch, const int32_t out_shift, const int32_t out_mult, const int32_t out_offset, const int16_t activation_min, const int16_t activation_max, const uint16_t num_col_a, const int32_t const output_bias, q7_t out_0)
	Matrix-multiplication function for convolution with per-channel requantization. More...

void	arm_nn_softmax_common_s8 (const int8_t input, const int32_t num_rows, const int32_t row_size, const int32_t mult, const int32_t shift, const int32_t diff_min, const bool int16_output, void output)
	Common softmax function for s8 input and s8 or s16 output. More...

__STATIC_FORCEINLINE q31_t	arm_nn_doubling_high_mult (const q31_t m1, const q31_t m2)
	Saturating doubling high multiply. Result matches NEON instruction VQRDMULH. More...

__STATIC_FORCEINLINE q31_t	arm_nn_doubling_high_mult_no_sat (const q31_t m1, const q31_t m2)
	Doubling high multiply without saturation. This is intended for requantization where the scale is a positive integer. More...

__STATIC_FORCEINLINE q31_t	arm_nn_divide_by_power_of_two (const q31_t dividend, const q31_t exponent)
	Rounding divide by power of two. More...

__STATIC_FORCEINLINE q31_t	arm_nn_requantize (const q31_t val, const q31_t multiplier, const q31_t shift)
	Requantize a given value. More...

__STATIC_FORCEINLINE q31_t	arm_nn_requantize_s64 (const q63_t val, const q31_t reduced_multiplier, const q31_t shift)
	Requantize a given 64 bit value. More...

__STATIC_FORCEINLINE void	arm_memcpy_q7 (q7_t __RESTRICT dst, const q7_t __RESTRICT src, uint32_t block_size)
	memcpy optimized for MVE More...

__STATIC_FORCEINLINE int32_t	arm_nn_exp_on_negative_values (int32_t val)

__STATIC_FORCEINLINE q31_t	arm_nn_mult_by_power_of_two (const int32_t val, const int32_t exp)

__STATIC_FORCEINLINE int32_t	arm_nn_one_over_one_plus_x_for_x_in_0_1 (int32_t val)

__STATIC_FORCEINLINE void	arm_nn_write_q15x2_ia (q15_t **dest_q15, q31_t src_q31)
	Write 2 q15 elements and post increment pointer. More...

Macro Definition Documentation

#define CLAMP	(	x,
		h,
		l
	)

Referenced by arm_nn_softmax_common_s8(), arm_softmax_s8(), arm_softmax_u8(), arm_svdf_s8(), and arm_svdf_state_s16_s8().

#define DIV_POW2	(	a,
		b
	)

Referenced by arm_nn_exp_on_negative_values(), arm_nn_softmax_common_s8(), arm_softmax_s8(), and arm_softmax_u8().

#define DIV_POW2_MVE	(	a,
		b
	)

Referenced by arm_softmax_s8().

#define EXP_ON_NEG ( x )

Referenced by arm_nn_softmax_common_s8(), arm_softmax_s8(), and arm_softmax_u8().

#define LEFT_SHIFT ( _shift )

Referenced by arm_nn_requantize().

#define MASK_IF_NON_ZERO ( x )

Referenced by arm_nn_mult_by_power_of_two().

#define MASK_IF_ZERO ( x )

Referenced by arm_nn_exp_on_negative_values().

#define MAX	(	A,
		B
	)

Referenced by __attribute__(), arm_avgpool_s16(), arm_avgpool_s8(), arm_convolve_1_x_n_s8(), arm_convolve_1x1_s8_fast(), arm_convolve_fast_s16(), arm_convolve_s16(), arm_convolve_s8(), arm_depthwise_conv_3x3_s8(), arm_depthwise_conv_s8_opt(), arm_elementwise_add_s16(), arm_elementwise_add_s8(), arm_elementwise_mul_s16(), arm_elementwise_mul_s8(), arm_max_pool_s16(), arm_max_pool_s8(), arm_nn_mat_mult_kernel_s16(), arm_nn_mat_mult_kernel_s8_s16(), arm_nn_mat_mult_nt_t_s8(), arm_nn_mat_mult_s8(), arm_nn_softmax_common_s8(), arm_nn_vec_mat_mult_t_s16(), arm_nn_vec_mat_mult_t_s8(), arm_nn_vec_mat_mult_t_svdf_s8(), arm_relu6_s8(), arm_softmax_s16(), arm_softmax_u8(), clamp_output(), depthwise_conv_s16_generic_s16(), depthwise_conv_s8_generic(), depthwise_conv_s8_mult_4(), depthwise_conv_u8_generic(), and depthwise_conv_u8_mult_4().

#define MIN	(	A,
		B
	)

#define MUL_POW2	(	a,
		b
	)

Referenced by arm_nn_one_over_one_plus_x_for_x_in_0_1().

#define MUL_SAT	(	a,
		b
	)

Referenced by arm_nn_exp_on_negative_values(), arm_nn_one_over_one_plus_x_for_x_in_0_1(), arm_nn_softmax_common_s8(), arm_softmax_s8(), and arm_softmax_u8().

#define MUL_SAT_MVE	(	a,
		b
	)

Referenced by arm_softmax_s8().

#define ONE_OVER1 ( x )

Referenced by arm_nn_softmax_common_s8(), arm_softmax_s8(), and arm_softmax_u8().

#define PACK_Q7x4_32x1	(	v0,
		v1,
		v2,
		v3
	)

Referenced by arm_elementwise_add_s8(), and arm_elementwise_mul_s8().

#define REDUCE_MULTIPLIER ( _mult )

Referenced by __attribute__(), arm_convolve_fast_s16(), arm_convolve_s16(), arm_fully_connected_s16(), arm_nn_mat_mult_kernel_s16(), and depthwise_conv_s16_generic_s16().

#define RIGHT_SHIFT ( _shift )

Referenced by arm_nn_requantize().

#define SELECT_IF_NON_ZERO ( x )

Referenced by arm_nn_exp_on_negative_values().

#define SELECT_USING_MASK	(	mask,
		a,
		b
	)

Referenced by arm_nn_exp_on_negative_values(), and arm_nn_mult_by_power_of_two().

Function Documentation

__STATIC_FORCEINLINE void arm_memcpy_q7	(	q7_t *__RESTRICT	dst,
		const q7_t *__RESTRICT	src,
		uint32_t	block_size
	)

Parameters

[in,out]	dst	Destination pointer
[in]	src	Source pointer.
[in]	block_size	Number of bytes to copy.

Referenced by arm_concatenation_s8_w(), arm_concatenation_s8_x(), arm_concatenation_s8_y(), arm_concatenation_s8_z(), arm_convolve_fast_s16(), arm_convolve_HWC_q7_RGB(), arm_convolve_s8(), arm_depthwise_conv_s8_opt(), arm_max_pool_s8(), and arm_reshape_s8().

__STATIC_FORCEINLINE void arm_memset_q7	(	q7_t *	dst,
		const q7_t	val,
		uint32_t	block_size
	)

Parameters

[in,out]	dst	Destination pointer
[in]	val	Value to set
[in]	block_size	Number of bytes to copy.

Referenced by arm_convolve_fast_s16(), arm_convolve_HWC_q7_RGB(), and arm_depthwise_conv_s8_opt().

q7_t* arm_nn_depthwise_conv_s8_core	(	const q7_t *	row,
		const q15_t *	col,
		const uint16_t	num_ch,
		const int32_t *	out_shift,
		const int32_t *	out_mult,
		const int32_t	out_offset,
		const int32_t	activation_min,
		const int32_t	activation_max,
		const uint16_t	kernel_size,
		const int32_t *const	output_bias,
		q7_t *	out
	)

Parameters

[in]	row	pointer to row
[in]	col	pointer to im2col buffer, always consists of 2 columns.
[in]	num_ch	number of channels
[in]	out_shift	pointer to per output channel requantization shift parameter.
[in]	out_mult	pointer to per output channel requantization multiplier parameter.
[in]	out_offset	output tensor offset.
[in]	activation_min	minimum value to clamp the output to. Range : int8
[in]	activation_max	maximum value to clamp the output to. Range : int8
[in]	kernel_size	number of elements in one column.
[in]	output_bias	per output channel bias. Range : int32
[out]	out	pointer to output

Returns

The function returns one of the two

The incremented output pointer for a successful operation or
NULL if implementation is not available.

Supported framework: TensorFlow Lite micro.

__STATIC_FORCEINLINE q31_t arm_nn_divide_by_power_of_two	(	const q31_t	dividend,
		const q31_t	exponent
	)

Parameters

[in]	dividend	- Dividend
[in]	exponent	- Divisor = power(2, exponent) Range: [0, 31]

Returns: Rounded result of division. Midpoint is rounded away from zero.

Referenced by arm_nn_requantize().

__STATIC_FORCEINLINE q31_t arm_nn_doubling_high_mult	(	const q31_t	m1,
		const q31_t	m2
	)

Parameters

[in]	m1	Multiplicand. Range: {NN_Q31_MIN, NN_Q31_MAX}
[in]	m2	Multiplier. Range: {NN_Q31_MIN, NN_Q31_MAX}

Returns: Result of multiplication.

References NN_Q31_MAX, and NN_Q31_MIN.

__STATIC_FORCEINLINE q31_t arm_nn_doubling_high_mult_no_sat	(	const q31_t	m1,
		const q31_t	m2
	)

Parameters

[in]	m1	Multiplicand. Range: {NN_Q31_MIN, NN_Q31_MAX}
[in]	m2	Multiplier Range: {NN_Q31_MIN, NN_Q31_MAX}

Returns: Result of multiplication.

Note: The result of this matches that of neon instruction VQRDMULH for m1 in range {NN_Q31_MIN, NN_Q31_MAX} and m2 in range {NN_Q31_MIN + 1, NN_Q31_MAX}. Saturation occurs when m1 equals m2 equals NN_Q31_MIN and that is not handled by this function.

References arm_nn_double::high, arm_nn_long_long::long_long, arm_nn_double::low, and arm_nn_long_long::word.

Referenced by arm_nn_requantize().

__STATIC_FORCEINLINE int32_t arm_nn_exp_on_negative_values ( int32_t val )

References DIV_POW2, MASK_IF_ZERO, MUL_SAT, NN_Q31_MAX, SELECT_IF_NON_ZERO, and SELECT_USING_MASK.

q7_t* arm_nn_mat_mult_kernel_q7_q15_reordered	(	const q7_t *	pA,
		const q15_t *	pInBuffer,
		const uint16_t	ch_im_out,
		const uint16_t	numCol_A,
		const uint16_t	bias_shift,
		const uint16_t	out_shift,
		const q7_t *	bias,
		q7_t *	pOut
	)

Parameters

[in]	pA	pointer to operand A
[in]	pInBuffer	pointer to operand B, always conssists of 2 vectors
[in]	ch_im_out	numRow of A
[in]	numCol_A	numCol of A
[in]	bias_shift	amount of left-shift for bias
[in]	out_shift	amount of right-shift for output
[in]	bias	the bias
[in,out]	pOut	pointer to output

Returns: The function returns the incremented output pointer

This function assumes that data in pInBuffer are reordered

Matrix-multiplication function for convolution with reordered columns.

Refer to header file for details.

References arm_nn_read_q15x2_ia(), and NN_ROUND.

Referenced by arm_convolve_1x1_HWC_q7_fast_nonsquare(), arm_convolve_HWC_q7_fast(), and arm_convolve_HWC_q7_fast_nonsquare().

q15_t* arm_nn_mat_mult_kernel_s16	(	const q7_t *	input_a,
		const q15_t *	input_b,
		const int32_t	output_ch,
		const int32_t *	out_shift,
		const int32_t *	out_mult,
		const int16_t	activation_min,
		const int16_t	activation_max,
		const int32_t	num_col_a,
		const int64_t *const	output_bias,
		q15_t *	out_0
	)

Parameters

[in]	input_a	pointer to operand A
[in]	input_b	pointer to operand B, always consists of 2 vectors.
[in]	output_ch	number of rows of A
[in]	out_shift	pointer to per output channel requantization shift parameter.
[in]	out_mult	pointer to per output channel requantization multiplier parameter.
[in]	activation_min	minimum value to clamp the output to. Range : int16
[in]	activation_max	maximum value to clamp the output to. Range : int16
[in]	num_col_a	number of columns of A
[in]	output_bias	per output channel bias. Range : int64
[in,out]	out_0	pointer to output

Returns

The function returns one of the two

The incremented output pointer for a successful operation or
NULL if implementation is not available.

This function does the matrix multiplication of weight matrix for all output channels with 2 columns from im2col and produces two elements/output_channel. The outputs are clamped in the range provided by activation min and max. Supported framework: TensorFlow Lite micro.

References arm_nn_read_q15x2_ia(), arm_nn_requantize(), arm_nn_requantize_s64(), MAX, MIN, and REDUCE_MULTIPLIER.

Referenced by arm_convolve_fast_s16().

q7_t* arm_nn_mat_mult_kernel_s8_s16	(	const q7_t *	input_a,
		const q15_t *	input_b,
		const uint16_t	output_ch,
		const int32_t *	out_shift,
		const int32_t *	out_mult,
		const int32_t	out_offset,
		const int16_t	activation_min,
		const int16_t	activation_max,
		const uint16_t	num_col_a,
		const int32_t *const	output_bias,
		q7_t *	out_0
	)

Parameters

[in]	input_a	pointer to operand A
[in]	input_b	pointer to operand B, always consists of 2 vectors.
[in]	output_ch	number of rows of A
[in]	out_shift	pointer to per output channel requantization shift parameter.
[in]	out_mult	pointer to per output channel requantization multiplier parameter.
[in]	out_offset	output tensor offset.
[in]	activation_min	minimum value to clamp the output to. Range : int8
[in]	activation_max	maximum value to clamp the output to. Range : int8
[in]	num_col_a	number of columns of A
[in]	output_bias	per output channel bias. Range : int32
[in,out]	out_0	pointer to output

Returns

The function returns one of the two

The incremented output pointer for a successful operation or
NULL if implementation is not available.

This function does the matrix multiplication of weight matrix for all output channels with 2 columns from im2col and produces two elements/output_channel. The outputs are clamped in the range provided by activation min and max. Supported framework: TensorFlow Lite micro.

References arm_nn_read_q15x2_ia(), arm_nn_requantize(), MAX, and MIN.

Referenced by arm_convolve_s8().

q7_t* arm_nn_mat_mult_s8	(	const q7_t *	input_row,
		const q7_t *	input_col,
		const uint16_t	output_ch,
		const uint16_t	col_batches,
		const int32_t *	output_shift,
		const int32_t *	output_mult,
		const int32_t	out_offset,
		const int32_t	col_offset,
		const int32_t	row_offset,
		const int16_t	out_activation_min,
		const int16_t	out_activation_max,
		const uint16_t	row_len,
		const int32_t *const	bias,
		q7_t *	out
	)

Parameters

[in]	input_row	pointer to row operand
[in]	input_col	pointer to col operand
[in]	output_ch	number of rows of input_row
[in]	col_batches	number of column batches. Range: 1 to 4
[in]	output_shift	pointer to per output channel requantization shift parameter.
[in]	output_mult	pointer to per output channel requantization multiplier parameter.
[in]	out_offset	output tensor offset.
[in]	col_offset	input tensor(col) offset.
[in]	row_offset	kernel offset(row). Not used.
[in]	out_activation_min	minimum value to clamp the output to. Range : int8
[in]	out_activation_max	maximum value to clamp the output to. Range : int8
[in]	row_len	number of elements in each row
[in]	bias	per output channel bias. Range : int32
[in,out]	out	pointer to output

Returns

The function returns one of the two

The incremented output pointer for a successful operation or
NULL if implementation is not available.

Supported framework: TensorFlow Lite

References arm_nn_requantize(), MAX, and MIN.

Referenced by arm_convolve_s8().

__STATIC_FORCEINLINE q31_t arm_nn_mult_by_power_of_two	(	const int32_t	val,
		const int32_t	exp
	)

References MASK_IF_NON_ZERO, NN_Q31_MAX, NN_Q31_MIN, and SELECT_USING_MASK.

__STATIC_FORCEINLINE int32_t arm_nn_one_over_one_plus_x_for_x_in_0_1 ( int32_t val )

References MUL_POW2, MUL_SAT, and NN_Q31_MAX.

__STATIC_FORCEINLINE q31_t arm_nn_read_q15x2 ( const q15_t * in_q15 )

Parameters

[in] in_q15 pointer to address of input.

Returns: q31 value

Referenced by arm_depthwise_conv_s8_opt(), arm_nn_accumulate_q7_to_q15(), clamp_output(), and compare_and_replace_if_larger().

__STATIC_FORCEINLINE q31_t arm_nn_read_q15x2_ia ( const q15_t ** in_q15 )

Parameters

[in] in_q15 Pointer to pointer that holds address of input.

Returns: q31 value

Referenced by arm_convolve_1x1_HWC_q7_fast_nonsquare(), arm_convolve_fast_s16(), arm_convolve_HWC_q15_basic(), arm_convolve_HWC_q15_fast(), arm_convolve_HWC_q15_fast_nonsquare(), arm_convolve_HWC_q7_basic(), arm_convolve_HWC_q7_basic_nonsquare(), arm_convolve_HWC_q7_fast(), arm_convolve_HWC_q7_fast_nonsquare(), arm_convolve_HWC_q7_RGB(), arm_convolve_s8(), arm_fully_connected_mat_q7_vec_q15(), arm_fully_connected_mat_q7_vec_q15_opt(), arm_fully_connected_q15(), arm_fully_connected_q15_opt(), arm_fully_connected_q7(), arm_fully_connected_q7_opt(), arm_nn_mat_mult_kernel_q7_q15(), arm_nn_mat_mult_kernel_q7_q15_reordered(), arm_nn_mat_mult_kernel_s16(), arm_nn_mat_mult_kernel_s8_s16(), arm_nn_vec_mat_mult_t_s16(), arm_relu_q15(), arm_svdf_state_s16_s8(), and compare_and_replace_if_larger().

__STATIC_FORCEINLINE q31_t arm_nn_read_q7x4 ( const q7_t * in_q7 )

Parameters

[in] in_q7 pointer to address of input.

Returns: q31 value

Referenced by arm_convolve_HWC_q7_RGB(), arm_depthwise_conv_3x3_s8(), arm_depthwise_conv_s8_opt(), arm_depthwise_separable_conv_HWC_q7(), arm_depthwise_separable_conv_HWC_q7_nonsquare(), arm_nn_mat_mult_nt_t_s8(), clamp_output(), and compare_and_replace_if_larger_q7().

__STATIC_FORCEINLINE q31_t arm_nn_read_q7x4_ia ( const q7_t ** in_q7 )

Parameters

[in] in_q7 Pointer to pointer that holds address of input.

Returns: q31 value

Referenced by arm_fully_connected_mat_q7_vec_q15_opt(), arm_fully_connected_q7_opt(), arm_nn_accumulate_q7_to_q15(), arm_nn_add_q7(), arm_nn_mat_mult_nt_t_s8(), arm_nn_vec_mat_mult_t_s8(), arm_nn_vec_mat_mult_t_svdf_s8(), arm_q7_to_q15_no_shift(), arm_q7_to_q15_reordered_no_shift(), arm_q7_to_q15_reordered_with_offset(), arm_q7_to_q15_with_offset(), arm_relu_q7(), and compare_and_replace_if_larger_q7().

__STATIC_FORCEINLINE q31_t arm_nn_requantize	(	const q31_t	val,
		const q31_t	multiplier,
		const q31_t	shift
	)

Parameters

[in]	val	Value to be requantized
[in]	multiplier	multiplier. Range {NN_Q31_MIN + 1, Q32_MAX}
[in]	shift	left or right shift for 'val * multiplier'

Returns: Returns (val * multiplier)/(2 ^ shift)

References arm_nn_divide_by_power_of_two(), arm_nn_doubling_high_mult_no_sat(), LEFT_SHIFT, and RIGHT_SHIFT.

Referenced by arm_convolve_1x1_s8_fast(), arm_convolve_fast_s16(), arm_convolve_s8(), arm_depthwise_conv_3x3_s8(), arm_depthwise_conv_s8_opt(), arm_elementwise_add_s16(), arm_elementwise_add_s8(), arm_elementwise_mul_s16(), arm_elementwise_mul_s8(), arm_nn_mat_mult_kernel_s16(), arm_nn_mat_mult_kernel_s8_s16(), arm_nn_mat_mult_nt_t_s8(), arm_nn_mat_mult_s8(), arm_nn_vec_mat_mult_t_s8(), arm_nn_vec_mat_mult_t_svdf_s8(), arm_softmax_s16(), arm_svdf_s8(), arm_svdf_state_s16_s8(), depthwise_conv_s8_generic(), depthwise_conv_s8_mult_4(), depthwise_conv_u8_generic(), and depthwise_conv_u8_mult_4().

__STATIC_FORCEINLINE q31_t arm_nn_requantize_s64	(	const q63_t	val,
		const q31_t	reduced_multiplier,
		const q31_t	shift
	)

Parameters

[in]	val	Value to be requantized in the range {-(1<<47)} to {(1<<47) - 1}
[in]	reduced_multiplier	Reduced multiplier in the range {NN_Q31_MIN + 1, Q32_MAX} to {Q16_MIN + 1, Q16_MAX}
[in]	shift	Left or right shift for 'val * multiplier' in the range {-31} to {7}

Returns: Returns (val * multiplier)/(2 ^ shift)

Referenced by __attribute__(), arm_convolve_fast_s16(), arm_convolve_s16(), arm_nn_mat_mult_kernel_s16(), arm_nn_vec_mat_mult_t_s16(), and depthwise_conv_s16_generic_s16().

__STATIC_FORCEINLINE void arm_nn_write_q15x2_ia	(	q15_t **	dest_q15,
		q31_t	src_q31
	)

Parameters

[in]	dest_q15	Pointer to pointer that holds address of destination.
[in]	src_q31	Input value to be written.

Referenced by arm_nn_accumulate_q7_to_q15(), arm_q7_to_q15_no_shift(), arm_q7_to_q15_reordered_with_offset(), arm_q7_to_q15_with_offset(), arm_relu_q15(), clamp_output(), and compare_and_replace_if_larger().

__STATIC_FORCEINLINE void arm_nn_write_q7x4_ia	(	q7_t **	in,
		q31_t	value
	)

Parameters

[in]	in	Double pointer to input value
[in]	value	Four bytes to copy

Referenced by arm_elementwise_add_s8(), arm_elementwise_mul_s8(), arm_q7_to_q15_reordered_no_shift(), arm_relu_q7(), clamp_output(), and compare_and_replace_if_larger_q7().

Data Structures

Macros

Functions

Macro Definition Documentation

Function Documentation