Data Structures
struct	vctpq< float >
	vctpq for this datatype More...

struct	vctpq< float16_t >
	vctpq for Helium and f16 More...

struct	vload1_gen_stride
	Load with generalized stride (gather load) More...

struct	vload1_gen_stride< 0, 1, 2, 3 >
	Load with generalized stride specialized for <0,1,2,3> More...

struct	vload1_gen_stride_z
	Load with generalized stride (gather load) and tail predicate. More...

struct	vload1_gen_stride_z< 0, 1, 2, 3 >
	Load with generalized stride (gather load) and tail predicate specialized for <0,1,2,3> More...

struct	vstore1_gen_stride
	Generalized store with strides. More...

struct	vstore1_gen_stride< 0, 1, 2, 3 >
	Generalized store with stride (Specialized for <0,1,2,3>) More...

struct	vstore1_gen_stride_z
	Store with generalized strides and tail predicate. More...

struct	vstore1_gen_stride_z< 0, 1, 2, 3 >
	Scatter store with tail predicate (specialized for <0,1,2,3>) More...

Functions
Q15DSPVector	vconst (Q15 val)
	Vector const.

Q< 33, 30 >	vreduce (const Q< 33, 30 > sum)
	Reduce accumulation value.

float32x4_t	vconst (const float v)
	Vector constant.

float32x4_t	vconst_tail (const float v, const mve_pred16_t p0)
	Vector constant with tail.

float32x4_t	vneg (const float32x4_t a)
	Vector negate.

float32x4_t	vneg (const float32x4_t a, const mve_pred16_t p0)
	Vector negate with tail.

float32x4_t	vadd (const float32x4_t a, const float32x4_t b)
	Vector + Vector.

float32x4_t	vadd (const float32x4_t a, const float b)
	Vector + Scalar.

float32x4_t	vadd (const float a, const float32x4_t b)
	Scalar + Vector.

float32x4_t	vadd (const float32x4_t a, const float32x4_t b, const mve_pred16_t p0)
	Vector + Vector with tail.

float32x4_t	vadd (const float32x4_t a, const float b, const mve_pred16_t p0)
	Vector + scalar with tail.

float32x4_t	vadd (const float a, const float32x4_t b, const mve_pred16_t p0)
	Scalar + vector with tail predicate.

float32x4_t	vsub (const float32x4_t a, const float32x4_t b)
	Vector - Vector.

float32x4_t	vsub (const float32x4_t a, const float b)
	Vector - Scalar.

float32x4_t	vsub (const float a, const float32x4_t b)
	Scalar - Vector.

float32x4_t	vsub (const float32x4_t a, const float32x4_t b, const mve_pred16_t p0)
	Vector - Vector with predicate.

float32x4_t	vsub (const float32x4_t a, const float b, const mve_pred16_t p0)
	Vector - Scalar with predicate.

float32x4_t	vsub (const float a, const float32x4_t b, const mve_pred16_t p0)
	Scalar - Vector with predicate.

float32x4_t	vmul (const float32x4_t a, const float32x4_t b)
	Vector * Vector.

float32x4_t	vmul (const float32x4_t a, const float b)
	Vector * Scalar.

float32x4_t	vmul (const float a, const float32x4_t b)
	Scalar * Vector.

float32x4_t	vmul (const float32x4_t a, const float32x4_t b, const mve_pred16_t p0)
	Vector * Vector with predicate.

float32x4_t	vmul (const float32x4_t a, const float b, const mve_pred16_t p0)
	Vector * Scalar with predicate.

float32x4_t	vmul (const float a, const float32x4_t b, const mve_pred16_t p0)
	Scalar * Vector with predicate.

float32x4_t	vmacc (const float32x4_t acc, const float32x4_t a, const float32x4_t b)
	Multiply accumulate (Vector * Vector)

float32x4_t	vmacc (const float32x4_t acc, const float32x4_t a, const float_t b)
	Multiply accumulate (Vector * Scalar)

float32x4_t	vmacc (const float32x4_t acc, const float32x4_t a, const float32x4_t b, const mve_pred16_t p0)
	Multiply accumulate with predicate (Vector * Vector)

float	vreduce (const float32x4_t in)
	Vector reduce.

template<int S, typename std::enable_if< S==1, bool >::type = true>
float32x4_t	vload1 (const float32_t *p)
	Vector load with stride.

float32x4_t	vload1 (const float32_t *p, const index_t stride)
	Vector load with dynamic stride.

template<int S, typename std::enable_if< S==1, bool >::type = true>
float32x4_t	vload1_z (const float32_t *p, const std::size_t nb, const mve_pred16_t p0)
	Vector load with stride and predicate.

float32x4_t	vload1_z (const float32_t *p, const index_t stride, const std::size_t nb, const mve_pred16_t p0)
	Vector load with dynamic stride and loop predication.

template<int S, typename std::enable_if< S==1, bool >::type = true>
void	vstore1 (float32_t *p, const float32x4_t val)
	Store with stride.

void	vstore1 (float32_t *p, const index_t stride, const float32x4_t val)
	Store with dynamic stride.

template<int S, typename std::enable_if< S==1, bool >::type = true>
void	vstore1_z (float32_t *p, const float32x4_t val, const std::size_t nb, const mve_pred16_t p0)
	Store with stride and tail predicate.

void	vstore1_z (float32_t *p, const index_t stride, const float32x4_t val, const std::size_t nb, const mve_pred16_t p0)
	Store with dynamic stride.

float16x8_t	vconst (float16_t v)
	Vector const.

float16x8_t	vconst_tail (const float16_t v, const mve_pred16_t p0)
	Vector of const with tail predicate.

float16x8_t	vneg (const float16x8_t a)
	Vector negate.

float16x8_t	vneg (const float16x8_t a, const mve_pred16_t p0)
	Vector negate with tail predicate.

float16x8_t	vadd (const float16x8_t a, const float16x8_t b)
	Vector + Vector.

float16x8_t	vadd (const float16x8_t a, const float16_t b)
	Vector + Scalar.

float16x8_t	vadd (const float16_t a, const float16x8_t b)
	Scalar + Vector.

float16x8_t	vadd (const float16x8_t a, const float16x8_t b, const mve_pred16_t p0)
	Vector + Vector with tail predicate.

float16x8_t	vadd (const float16x8_t a, const float16_t b, const mve_pred16_t p0)
	Vector + Scalar with tail predicate.

float16x8_t	vadd (const float16_t a, const float16x8_t b, const mve_pred16_t p0)
	Scalar + Vector with tail predicate.

template<int S, typename std::enable_if< S==1, bool >::type = true>
int16x8_t	vload1 (const Q15 *p)
	Vector load with stride.

Q< 33, 30 >	vmacc (const Q< 33, 30 > sum, const int16x8_t vala, const int16x8_t valb)
	Vector accumulate into scalar.

double	from_accumulator (const double a)
	Convert from accumulator representation.

double	mac (const double acc, const double a, const double b)
	Multiply and accumulate for this datatype.

void	accumulate (double &a, const double &b)
	Accumulate.

double	mult (double &a, const double &b)
	Multiply.

float	from_accumulator (const float a)
	Convert from accumulator representtaion.

float	mac (const float acc, const float a, const float b)
	Scalar multiply and accumulate.

void	accumulate (float &a, const float &b)
	Scalar accumulate.

float	mult (float &a, const float &b)
	Scalar multiply.

template<typename A , typename V , std::size_t... Ns>
A	vmacc_impl (const A &acc, const V &a, const V &b, std::index_sequence< Ns... >)
	Vector accumulate for tuples of vectors.

template<typename A , typename ... E>
A	vmacc (const A &acc, const std::tuple< E... > &a, const std::tuple< E... > &b)
	Vector accumulate for tuples of vectors.

template<typename A , typename V , typename B , std::size_t... Ns>
A	vmacc_impl (const A &acc, const V &a, const V &b, const B p0, std::index_sequence< Ns... >)
	Predicated vector accumulate for tuple.

template<typename A , typename B , typename ... E>
A	vmacc (const A &acc, const std::tuple< E... > &a, const std::tuple< E... > &b, const B p0)
	Predicated vector accumulate for tuples.

template<typename A , std::size_t... Ns>
auto	vreduce_impl (const A &acc, std::index_sequence< Ns... >)
	Reduce function for tuple.

template<typename ... E>
auto	vreduce (const std::tuple< E... > &acc)
	Reduce function for tuples.

template<typename A , std::size_t... Ns>
auto	from_accumulator_impl (const A &acc, std::index_sequence< Ns... >)
	Convert from accumulator value.

template<typename ... E>
auto	from_accumulator (const std::tuple< E... > &acc)
	Convert from tuple of accumulator values.

template<typename A , typename V , std::size_t... Ns>
A	mac_impl (const A &acc, const V &a, const V &b, std::index_sequence< Ns... >)
	Multiply accumulate for tuple of scalar.

template<typename A , typename ... E>
A	mac (const A &acc, const std::tuple< E... > &a, const std::tuple< E... > &b)
	Multiply accumulate.

template<typename A , typename V , typename B , std::size_t... Ns>
A	mac_impl (const A &acc, const V &a, const V &b, const B p0, std::index_sequence< Ns... >)
	Multiply accumulate for tuple of scalar.

template<typename A , typename B , typename ... E>
A	mac (const A &acc, const std::tuple< E... > &a, const std::tuple< E... > &b, const B p0)
	Multiply accumulate.

float16_t	from_accumulator (const float16_t a)
	Convert from accumulator datatype.

float16_t	mac (const float16_t acc, const float16_t a, const float16_t b)
	Multiply and accumulate.

void	accumulate (float16_t &a, const float16_t &b)
	Accumulate.

float16_t	mult (float16_t &a, const float16_t &b)
	Multiply.

Q15	from_accumulator (const Q< 33, 30 > a)
	Convert from accumulator type.

Q< 33, 30 >	mac (const Q< 33, 30 > acc, const Q15 a, const Q15 b)
	Multiply and accumulate.

Q31	from_accumulator (const Q< 15, 48 > a)
	Convert from accumulator (with no saturation)

Q< 15, 48 >	mac (const Q< 15, 48 > acc, const Q31 a, const Q31 b)
	Multiply and accumulate.

Q7	from_accumulator (const Q< 17, 14 > a)
	Convert from accumulator with saturation.

Q< 17, 14 >	mac (const Q< 17, 14 > acc, const Q7 a, const Q7 b)
	Multiply and accumulate.

Detailed Description

Inner implementation of Helium intrinsics

Inner implementation of generic intrinsics

Function Documentation

◆ accumulate() [1/3]

void accumulate	(	double &	a,
		const double &	b
	)

Accumulate.

Parameters

	a	Accumulator
[in]	b	VAlue to be added

◆ accumulate() [2/3]

void accumulate	(	float &	a,
		const float &	b
	)

Scalar accumulate.

Parameters

	a	Accumulator
[in]	b	Operand

◆ accumulate() [3/3]

void accumulate	(	float16_t &	a,
		const float16_t &	b
	)

Accumulate.

Parameters

	a	Accumulator
[in]	b	Value to accumulate

◆ from_accumulator() [1/7]

double from_accumulator ( const double a )

Convert from accumulator representation.

Parameters

[in] a Value

Returns: Accumulator value converted to current datatype

◆ from_accumulator() [2/7]

float from_accumulator ( const float a )

Convert from accumulator representtaion.

Parameters

[in] a Value

Returns: Accumulator value converted to current datatype

◆ from_accumulator() [3/7]

float16_t from_accumulator ( const float16_t a )

Convert from accumulator datatype.

Parameters

[in] a Value

Returns: Converted from accumulator datatype

◆ from_accumulator() [4/7]

Q31 from_accumulator ( const Q< 15, 48 > a )

Convert from accumulator (with no saturation)

Parameters

[in] a Accumulator value

Returns: Converted value

◆ from_accumulator() [5/7]

Q7 from_accumulator ( const Q< 17, 14 > a )

Convert from accumulator with saturation.

Parameters

[in] a Accumulator value

Returns: Q7 value

◆ from_accumulator() [6/7]

Q15 from_accumulator ( const Q< 33, 30 > a )

Convert from accumulator type.

Parameters

[in] a The accumulator value

Returns: The converted value (with saturation)

◆ from_accumulator() [7/7]

auto from_accumulator ( const std::tuple< E... > & acc )

Convert from tuple of accumulator values.

Parameters

[in] acc Accumulator

Template Parameters

E	Datatypes for tuple

Returns: Tuples of converted accumulator values

Accumulator may use more bits to avoid saturations. At the end of the accumulation, the final result must be converted to the current datatype (it may implies saturation)

◆ from_accumulator_impl()

auto from_accumulator_impl	(	const A &	acc,
		std::index_sequence< Ns... >
	)

Convert from accumulator value.

Parameters

[in] acc The accumulator

Template Parameters

A	Accumulator datatype
Ns	Tuples indexes

Returns: Tuples of values

◆ mac() [1/8]

A mac	(	const A &	acc,
		const std::tuple< E... > &	a,
		const std::tuple< E... > &	b
	)

Multiply accumulate.

Parameters

[in]	acc	Accumulator
[in]	a	First operand
[in]	b	Second operand

Template Parameters

A	Accumulator datatype
E	Datatypes for tuple

Returns: Accumulated values

◆ mac() [2/8]

A mac	(	const A &	acc,
		const std::tuple< E... > &	a,
		const std::tuple< E... > &	b,
		const B	p0
	)

Multiply accumulate.

Parameters

[in]	acc	Accumulator
[in]	a	First operand
[in]	b	Second operand
[in]	p0	Predicate

Template Parameters

A	Accumulator datatype
B	Predicate datatype
E	Datatypes for tuple

Returns: Accumulated values

◆ mac() [3/8]

double mac	(	const double	acc,
		const double	a,
		const double	b
	)

Multiply and accumulate for this datatype.

Parameters

[in]	acc	The accumulated value
[in]	a	The left hand side
[in]	b	The right hand side

Returns: Return acc + a*b

◆ mac() [4/8]

float mac	(	const float	acc,
		const float	a,
		const float	b
	)

Scalar multiply and accumulate.

Parameters

[in]	acc	Accumulator
[in]	a	Operand
[in]	b	Operand

Returns: acc + a*b

◆ mac() [5/8]

float16_t mac	(	const float16_t	acc,
		const float16_t	a,
		const float16_t	b
	)

Multiply and accumulate.

Parameters

[in]	acc	Accumulator
[in]	a	First operand
[in]	b	Second operand

Returns: acc + a*b

◆ mac() [6/8]

Q< 15, 48 > mac	(	const Q< 15, 48 >	acc,
		const Q31	a,
		const Q31	b
	)

Multiply and accumulate.

Parameters

[in]	acc	Accumulator
[in]	a	First operand
[in]	b	Second operand

Returns: acc + a*b

◆ mac() [7/8]

Q< 17, 14 > mac	(	const Q< 17, 14 >	acc,
		const Q7	a,
		const Q7	b
	)

Multiply and accumulate.

Parameters

[in]	acc	Accumulator
[in]	a	First operand
[in]	b	Second operand

Returns: acc + a*b

◆ mac() [8/8]

Q< 33, 30 > mac	(	const Q< 33, 30 >	acc,
		const Q15	a,
		const Q15	b
	)

Multiply and accumulate.

Parameters

[in]	acc	Accumulator
[in]	a	First operand
[in]	b	Second operand

Returns: acc + a*b

◆ mac_impl() [1/2]

A mac_impl	(	const A &	acc,
		const V &	a,
		const V &	b,
		const B	p0,
		std::index_sequence< Ns... >
	)

Multiply accumulate for tuple of scalar.

Parameters

[in]	acc	Accumulator
[in]	a	First operand
[in]	b	Second operand
[in]	p0	Predicate

Template Parameters

A	Accumulator datatype
V	Scalar datatype
B	Predicate datatype
Ns	Tuple indexes

Returns: Tuples of accumulated values

◆ mac_impl() [2/2]

A mac_impl	(	const A &	acc,
		const V &	a,
		const V &	b,
		std::index_sequence< Ns... >
	)

Multiply accumulate for tuple of scalar.

Parameters

[in]	acc	Accumulator
[in]	a	First operand
[in]	b	Second operand

Template Parameters

A	Accumulator datatype
V	Scalar datatype
Ns	Tuple indexes

Returns: Tuples of accumulated values

◆ mult() [1/3]

double mult	(	double &	a,
		const double &	b
	)

Multiply.

Parameters

	a	Left hand side
[in]	b	Right hand side

Returns: Return a*b

◆ mult() [2/3]

float mult	(	float &	a,
		const float &	b
	)

Scalar multiply.

Parameters

	a	Operand
[in]	b	Operand

Returns: a*b

◆ mult() [3/3]

float16_t mult	(	float16_t &	a,
		const float16_t &	b
	)

Multiply.

Parameters

	a	First operand
[in]	b	Second operand

Returns: a*b

◆ vadd() [1/12]

float32x4_t vadd	(	const float	a,
		const float32x4_t	b
	)

Scalar + Vector.

Parameters

[in]	a	Scalar
[in]	b	Vector

Returns: a + b

◆ vadd() [2/12]

float32x4_t vadd	(	const float	a,
		const float32x4_t	b,
		const mve_pred16_t	p0
	)

Scalar + vector with tail predicate.

Parameters

[in]	a	Scalar
[in]	b	Vector
[in]	p0	Predicate

Returns: a + b with tail predicate

◆ vadd() [3/12]

float16x8_t vadd	(	const float16_t	a,
		const float16x8_t	b
	)

Scalar + Vector.

Parameters

[in]	a	Scalar
[in]	b	Vector

Returns: a + b

◆ vadd() [4/12]

float16x8_t vadd	(	const float16_t	a,
		const float16x8_t	b,
		const mve_pred16_t	p0
	)

Scalar + Vector with tail predicate.

Parameters

[in]	a	Scalar
[in]	b	Vector
[in]	p0	Predicate

Returns: a + b with tail predicate

◆ vadd() [5/12]

float16x8_t vadd	(	const float16x8_t	a,
		const float16_t	b
	)

Vector + Scalar.

Parameters

[in]	a	Vector
[in]	b	Scalar

Returns: a + b

◆ vadd() [6/12]

float16x8_t vadd	(	const float16x8_t	a,
		const float16_t	b,
		const mve_pred16_t	p0
	)

Vector + Scalar with tail predicate.

Parameters

[in]	a	Vector
[in]	b	Scalar
[in]	p0	Predicate

Returns: a + b with tail predicate

◆ vadd() [7/12]

float16x8_t vadd	(	const float16x8_t	a,
		const float16x8_t	b
	)

Vector + Vector.

Parameters

[in]	a	Vector
[in]	b	Vector

Returns: a + b

◆ vadd() [8/12]

float16x8_t vadd	(	const float16x8_t	a,
		const float16x8_t	b,
		const mve_pred16_t	p0
	)

Vector + Vector with tail predicate.

Parameters

[in]	a	Vector
[in]	b	Vector
[in]	p0	predicate

Returns: a + b with tail predicate

◆ vadd() [9/12]

float32x4_t vadd	(	const float32x4_t	a,
		const float	b
	)

Vector + Scalar.

Parameters

[in]	a	Vector
[in]	b	Scalar

Returns: a + b

◆ vadd() [10/12]

float32x4_t vadd	(	const float32x4_t	a,
		const float	b,
		const mve_pred16_t	p0
	)

Vector + scalar with tail.

Parameters

[in]	a	Vector
[in]	b	Scalar
[in]	p0	Predicate

Returns: a + b with tail predicate

◆ vadd() [11/12]

float32x4_t vadd	(	const float32x4_t	a,
		const float32x4_t	b
	)

Vector + Vector.

Parameters

[in]	a	First operand
[in]	b	Second operand

Returns: a + b

◆ vadd() [12/12]

float32x4_t vadd	(	const float32x4_t	a,
		const float32x4_t	b,
		const mve_pred16_t	p0
	)

Vector + Vector with tail.

Parameters

[in]	a	Vector
[in]	b	Vector
[in]	p0	Predicated

Returns: a + b with tail predicate

◆ vconst() [1/3]

float32x4_t vconst ( const float v )

Vector constant.

Parameters

[in] v Constant value

Returns: Vector initialized with constant in each lane

◆ vconst() [2/3]

float16x8_t vconst ( float16_t v )

Vector const.

Parameters

[in] v Initialization value

Returns: Vector of const

◆ vconst() [3/3]

int16x8_t vconst ( Q15 val )

Vector const.

Parameters

[in] val The value

Returns: The static forceinline.

◆ vconst_tail() [1/2]

float32x4_t vconst_tail	(	const float	v,
		const mve_pred16_t	p0
	)

Vector constant with tail.

Parameters

[in]	v	Constant value
[in]	p0	Prddicate

Returns: Vector initialized with constant in some lanes dependign on the predicate

◆ vconst_tail() [2/2]

float16x8_t vconst_tail	(	const float16_t	v,
		const mve_pred16_t	p0
	)

Vector of const with tail predicate.

Parameters

[in]	v	The initialization parameter
[in]	p0	The predicate

Returns: The initialized vector with const and predicate

◆ vload1() [1/3]

float32x4_t vload1 ( const float32_t * p )

inline

Vector load with stride.

Parameters

[in] p Load address

Template Parameters

S	Stride
<unnamed>	Check stride value

Returns: Loaded vector with stride

◆ vload1() [2/3]

float32x4_t vload1	(	const float32_t *	p,
		const index_t	stride
	)

inline

Vector load with dynamic stride.

Parameters

[in]	p	Load address
[in]	stride	Stride value

Returns: Loaded vector with stride

◆ vload1() [3/3]

int16x8_t vload1 ( const Q15 * p )

inline

Vector load with stride.

Parameters

[in] p Load address

Template Parameters

S	Stride
<unnamed>	Stride check

Returns: Gather load

In q15, a lane is on 16 bits. So the offset that can be encoded for gather load cannot be bigger than 65535. With a stride of S, the bigger offset is S*7. So S must be <= 65535/7 S <= 9362

For higher stride, the Helium instruction cannot be used and instead a dynamic stride is used.

◆ vload1_z() [1/2]

float32x4_t vload1_z	(	const float32_t *	p,
		const index_t	stride,
		const std::size_t	nb,
		const mve_pred16_t	p0
	)

inline

Vector load with dynamic stride and loop predication.

Parameters

[in]	p	Load address
[in]	stride	Stride value
[in]	nb	Number of remaining loop samples
[in]	p0	Predicate for remaining loop samples

Returns: Loaded vector with stride and loop predicate

◆ vload1_z() [2/2]

float32x4_t vload1_z	(	const float32_t *	p,
		const std::size_t	nb,
		const mve_pred16_t	p0
	)

inline

Vector load with stride and predicate.

Parameters

[in]	p	Load address
[in]	nb	Number of remaining loop samples
[in]	p0	Predicate for remaining loop samples

Template Parameters

S	Stride
<unnamed>	Check stride value

Returns: Loaded vector with stride and loop predication

◆ vmacc() [1/6]

A vmacc	(	const A &	acc,
		const std::tuple< E... > &	a,
		const std::tuple< E... > &	b
	)

Vector accumulate for tuples of vectors.

Parameters

[in]	acc	The accumulator
[in]	a	First operand
[in]	b	Second operand

Template Parameters

A	Accumulator datatype
E	Datatype of tuples elements

Returns: Accumulator result

◆ vmacc() [2/6]

A vmacc	(	const A &	acc,
		const std::tuple< E... > &	a,
		const std::tuple< E... > &	b,
		const B	p0
	)

Predicated vector accumulate for tuples.

Parameters

[in]	acc	Accumulator
[in]	a	First operand
[in]	b	Second operand
[in]	p0	Predicate

Template Parameters

A	Accumulator datatype
B	Predicate datatype
E	Dadatype of tuples elements

Returns: Tuple of accumulated vectors

◆ vmacc() [3/6]

float32x4_t vmacc	(	const float32x4_t	acc,
		const float32x4_t	a,
		const float32x4_t	b
	)

Multiply accumulate (Vector * Vector)

Parameters

[in]	acc	Accumulator
[in]	a	Vector
[in]	b	Vector

Returns: acc + a * b

◆ vmacc() [4/6]

float32x4_t vmacc	(	const float32x4_t	acc,
		const float32x4_t	a,
		const float32x4_t	b,
		const mve_pred16_t	p0
	)

Multiply accumulate with predicate (Vector * Vector)

Parameters

[in]	acc	Accumulator
[in]	a	Vector
[in]	b	Vector
[in]	p0	Predicate

Returns: acc + a*b with predicate

◆ vmacc() [5/6]

float32x4_t vmacc	(	const float32x4_t	acc,
		const float32x4_t	a,
		const float_t	b
	)

Multiply accumulate (Vector * Scalar)

Parameters

[in]	acc	Accumulator
[in]	a	Vector
[in]	b	Scalar

Returns: acc + a * b

◆ vmacc() [6/6]

Q< 33, 30 > vmacc	(	const Q< 33, 30 >	sum,
		const int16x8_t	vala,
		const int16x8_t	valb
	)

Vector accumulate into scalar.

Parameters

[in]	sum	The sum
[in]	vala	The vala
[in]	valb	The valb

Returns: vala * valb and accumulated into sum

◆ vmacc_impl() [1/2]

A vmacc_impl	(	const A &	acc,
		const V &	a,
		const V &	b,
		const B	p0,
		std::index_sequence< Ns... >
	)

Predicated vector accumulate for tuple.

Parameters

[in]	acc	Accumulator
[in]	a	First operand
[in]	b	Second operand
[in]	p0	Predicate

Template Parameters

A	Accumulator datatype
V	Vector datatype
B	Predicate datatype
Ns	Tuple indexes

Returns: Tuple of accumulated values

◆ vmacc_impl() [2/2]

A vmacc_impl	(	const A &	acc,
		const V &	a,
		const V &	b,
		std::index_sequence< Ns... >
	)

Vector accumulate for tuples of vectors.

Parameters

[in]	acc	The accumulator
[in]	a	First operand
[in]	b	Second operand

Template Parameters

A	Accumulator datatype
V	Vector datatype
Ns	Tuple index

Returns: tuple of results

◆ vmul() [1/6]

float32x4_t vmul	(	const float	a,
		const float32x4_t	b
	)

Scalar * Vector.

Parameters

[in]	a	Scalar
[in]	b	Vector

Returns: a * b

◆ vmul() [2/6]

float32x4_t vmul	(	const float	a,
		const float32x4_t	b,
		const mve_pred16_t	p0
	)

Scalar * Vector with predicate.

Parameters

[in]	a	Scalar
[in]	b	Vector
[in]	p0	Predicate

Returns: a * b with predicate

◆ vmul() [3/6]

float32x4_t vmul	(	const float32x4_t	a,
		const float	b
	)

Vector * Scalar.

Parameters

[in]	a	Vector
[in]	b	Scalar

Returns: a * b

◆ vmul() [4/6]

float32x4_t vmul	(	const float32x4_t	a,
		const float	b,
		const mve_pred16_t	p0
	)

Vector * Scalar with predicate.

Parameters

[in]	a	Vector
[in]	b	Scalar
[in]	p0	Predicate

Returns: a * b with predicate

◆ vmul() [5/6]

float32x4_t vmul	(	const float32x4_t	a,
		const float32x4_t	b
	)

Vector * Vector.

Parameters

[in]	a	Vector
[in]	b	Vector

Returns: a * b

◆ vmul() [6/6]

float32x4_t vmul	(	const float32x4_t	a,
		const float32x4_t	b,
		const mve_pred16_t	p0
	)

Vector * Vector with predicate.

Parameters

[in]	a	Vector
[in]	b	Vector
[in]	p0	Predicate

Returns: a * b

◆ vneg() [1/4]

float16x8_t vneg ( const float16x8_t a )

Vector negate.

Parameters

[in] a Vector

Returns: Negate of vector

◆ vneg() [2/4]

float16x8_t vneg	(	const float16x8_t	a,
		const mve_pred16_t	p0
	)

Vector negate with tail predicate.

Parameters

[in]	a	Vector
[in]	p0	Predicate

Returns: Negate of vector with tail predicate

◆ vneg() [3/4]

float32x4_t vneg ( const float32x4_t a )

Vector negate.

Parameters

[in] a Vector value to negate

Returns: Negated value

◆ vneg() [4/4]

float32x4_t vneg	(	const float32x4_t	a,
		const mve_pred16_t	p0
	)

Vector negate with tail.

Parameters

[in]	a	Value
[in]	p0	Predicate

Returns: Negated value

◆ vreduce() [1/3]

float vreduce ( const float32x4_t in )

Vector reduce.

Parameters

[in] in Vector

Returns: Reduced scalar value

◆ vreduce() [2/3]

Q< 33, 30 > vreduce ( const Q< 33, 30 > sum )

Reduce accumulation value.

Parameters

[in] sum The sum

Returns: Reduced value

Since the Helium instructions can accumulate vector product into a scalar there is no need to reduce the accumulator value. It is already in scalar form.

◆ vreduce() [3/3]

auto vreduce ( const std::tuple< E... > & acc )

Reduce function for tuples.

Parameters

[in] acc The accumulator

Template Parameters

E	Datatypes for tuples

Returns: Tuples of reduced values

Some vector instructions sets cannot accumulate vectors into a scalar. They accumulate into this vector. This vector must be reduced to a scalar at the end of the accumulation loop.

◆ vreduce_impl()

auto vreduce_impl	(	const A &	acc,
		std::index_sequence< Ns... >
	)

Reduce function for tuple.

Parameters

[in] acc Accumulator

Template Parameters

A	Accumulator datatype
Ns	Tuple indexes

Returns: Reduced accumulator values

Some vector instructions sets cannot accumulate vectors into a scalar. They accumulate into this vector. This vector must be reduced to a scalar at the end of the accumulation loop.

◆ vstore1() [1/2]

void vstore1	(	float32_t *	p,
		const float32x4_t	val
	)

inline

Store with stride.

Parameters

	p	Store address
[in]	val	Value to store

Template Parameters

S	Stride
<unnamed>	Check stride value

◆ vstore1() [2/2]

void vstore1	(	float32_t *	p,
		const index_t	stride,
		const float32x4_t	val
	)

inline

Store with dynamic stride.

Parameters

	p	Store address
[in]	stride	Stride value
[in]	val	Value to store

◆ vstore1_z() [1/2]

void vstore1_z	(	float32_t *	p,
		const float32x4_t	val,
		const std::size_t	nb,
		const mve_pred16_t	p0
	)

inline

Store with stride and tail predicate.

Parameters

	p	Store address
[in]	val	Value to store
[in]	nb	Number of remaining loop iterations
[in]	p0	Predicate for loop

Template Parameters

S	Stride
<unnamed>	Check stride value

◆ vstore1_z() [2/2]

void vstore1_z	(	float32_t *	p,
		const index_t	stride,
		const float32x4_t	val,
		const std::size_t	nb,
		const mve_pred16_t	p0
	)

inline

Store with dynamic stride.

Parameters

	p	Store address
[in]	stride	Stride value
[in]	val	Value to store
[in]	nb	Number of remaining loops
[in]	p0	Predicate for loop

◆ vsub() [1/6]

float32x4_t vsub	(	const float	a,
		const float32x4_t	b
	)

Scalar - Vector.

Parameters

[in]	a	Scalar
[in]	b	Vector

Returns: a - b

◆ vsub() [2/6]

float32x4_t vsub	(	const float	a,
		const float32x4_t	b,
		const mve_pred16_t	p0
	)

Scalar - Vector with predicate.

Parameters

[in]	a	Scalar
[in]	b	Vector
[in]	p0	predicate

Returns: a - b with predicate

◆ vsub() [3/6]

float32x4_t vsub	(	const float32x4_t	a,
		const float	b
	)

Vector - Scalar.

Parameters

[in]	a	Vector
[in]	b	Scalar

Returns: a - b

◆ vsub() [4/6]

float32x4_t vsub	(	const float32x4_t	a,
		const float	b,
		const mve_pred16_t	p0
	)

Vector - Scalar with predicate.

Parameters

[in]	a	Vector
[in]	b	Scalar
[in]	p0	predicate

Returns: a - b with predicate

◆ vsub() [5/6]

float32x4_t vsub	(	const float32x4_t	a,
		const float32x4_t	b
	)

Vector - Vector.

Parameters

[in]	a	Vector
[in]	b	Vector

Returns: a - b

◆ vsub() [6/6]

float32x4_t vsub	(	const float32x4_t	a,
		const float32x4_t	b,
		const mve_pred16_t	p0
	)

Vector - Vector with predicate.

Parameters

[in]	a	Vector
[in]	b	Vector
[in]	p0	Predicate

Returns: a - b

Data Structures

Functions

Detailed Description

Function Documentation

◆ accumulate() [1/3]

◆ accumulate() [2/3]

◆ accumulate() [3/3]

◆ from_accumulator() [1/7]

◆ from_accumulator() [2/7]

◆ from_accumulator() [3/7]

◆ from_accumulator() [4/7]

◆ from_accumulator() [5/7]

◆ from_accumulator() [6/7]

◆ from_accumulator() [7/7]

◆ from_accumulator_impl()

◆ mac() [1/8]

◆ mac() [2/8]

◆ mac() [3/8]

◆ mac() [4/8]

◆ mac() [5/8]

◆ mac() [6/8]

◆ mac() [7/8]

◆ mac() [8/8]

◆ mac_impl() [1/2]

◆ mac_impl() [2/2]

◆ mult() [1/3]

◆ mult() [2/3]

◆ mult() [3/3]

◆ vadd() [1/12]

◆ vadd() [2/12]

◆ vadd() [3/12]

◆ vadd() [4/12]

◆ vadd() [5/12]

◆ vadd() [6/12]

◆ vadd() [7/12]

◆ vadd() [8/12]

◆ vadd() [9/12]

◆ vadd() [10/12]

◆ vadd() [11/12]

◆ vadd() [12/12]

◆ vconst() [1/3]

◆ vconst() [2/3]

◆ vconst() [3/3]

◆ vconst_tail() [1/2]

◆ vconst_tail() [2/2]

◆ vload1() [1/3]

◆ vload1() [2/3]

◆ vload1() [3/3]

◆ vload1_z() [1/2]

◆ vload1_z() [2/2]

◆ vmacc() [1/6]

◆ vmacc() [2/6]

◆ vmacc() [3/6]

◆ vmacc() [4/6]

◆ vmacc() [5/6]

◆ vmacc() [6/6]

◆ vmacc_impl() [1/2]

◆ vmacc_impl() [2/2]

◆ vmul() [1/6]

◆ vmul() [2/6]

◆ vmul() [3/6]

◆ vmul() [4/6]

◆ vmul() [5/6]

◆ vmul() [6/6]

◆ vneg() [1/4]

◆ vneg() [2/4]

◆ vneg() [3/4]

◆ vneg() [4/4]

◆ vreduce() [1/3]

◆ vreduce() [2/3]

◆ vreduce() [3/3]

◆ vreduce_impl()

◆ vstore1() [1/2]

◆ vstore1() [2/2]

◆ vstore1_z() [1/2]

◆ vstore1_z() [2/2]

◆ vsub() [1/6]

◆ vsub() [2/6]

◆ vsub() [3/6]

◆ vsub() [4/6]