CMSIS-DSP  
CMSIS DSP Software Library
 
Loading...
Searching...
No Matches

Data Structures

struct  vctpq< float >
 vctpq for this datatype More...
 
struct  vctpq< float16_t >
 vctpq for Helium and f16 More...
 
struct  vload1_gen_stride
 Load with generalized stride (gather load) More...
 
struct  vload1_gen_stride< 0, 1, 2, 3 >
 Load with generalized stride specialized for <0,1,2,3> More...
 
struct  vload1_gen_stride_z
 Load with generalized stride (gather load) and tail predicate. More...
 
struct  vload1_gen_stride_z< 0, 1, 2, 3 >
 Load with generalized stride (gather load) and tail predicate specialized for <0,1,2,3> More...
 
struct  vstore1_gen_stride
 Generalized store with strides. More...
 
struct  vstore1_gen_stride< 0, 1, 2, 3 >
 Generalized store with stride (Specialized for <0,1,2,3>) More...
 
struct  vstore1_gen_stride_z
 Store with generalized strides and tail predicate. More...
 
struct  vstore1_gen_stride_z< 0, 1, 2, 3 >
 Scatter store with tail predicate (specialized for <0,1,2,3>) More...
 

Functions

Q15DSPVector vconst (Q15 val)
 Vector const.
 
Q< 33, 30 > vreduce (const Q< 33, 30 > sum)
 Reduce accumulation value.
 
float32x4_t vconst (const float v)
 Vector constant.
 
float32x4_t vconst_tail (const float v, const mve_pred16_t p0)
 Vector constant with tail.
 
float32x4_t vneg (const float32x4_t a)
 Vector negate.
 
float32x4_t vneg (const float32x4_t a, const mve_pred16_t p0)
 Vector negate with tail.
 
float32x4_t vadd (const float32x4_t a, const float32x4_t b)
 Vector + Vector.
 
float32x4_t vadd (const float32x4_t a, const float b)
 Vector + Scalar.
 
float32x4_t vadd (const float a, const float32x4_t b)
 Scalar + Vector.
 
float32x4_t vadd (const float32x4_t a, const float32x4_t b, const mve_pred16_t p0)
 Vector + Vector with tail.
 
float32x4_t vadd (const float32x4_t a, const float b, const mve_pred16_t p0)
 Vector + scalar with tail.
 
float32x4_t vadd (const float a, const float32x4_t b, const mve_pred16_t p0)
 Scalar + vector with tail predicate.
 
float32x4_t vsub (const float32x4_t a, const float32x4_t b)
 Vector - Vector.
 
float32x4_t vsub (const float32x4_t a, const float b)
 Vector - Scalar.
 
float32x4_t vsub (const float a, const float32x4_t b)
 Scalar - Vector.
 
float32x4_t vsub (const float32x4_t a, const float32x4_t b, const mve_pred16_t p0)
 Vector - Vector with predicate.
 
float32x4_t vsub (const float32x4_t a, const float b, const mve_pred16_t p0)
 Vector - Scalar with predicate.
 
float32x4_t vsub (const float a, const float32x4_t b, const mve_pred16_t p0)
 Scalar - Vector with predicate.
 
float32x4_t vmul (const float32x4_t a, const float32x4_t b)
 Vector * Vector.
 
float32x4_t vmul (const float32x4_t a, const float b)
 Vector * Scalar.
 
float32x4_t vmul (const float a, const float32x4_t b)
 Scalar * Vector.
 
float32x4_t vmul (const float32x4_t a, const float32x4_t b, const mve_pred16_t p0)
 Vector * Vector with predicate.
 
float32x4_t vmul (const float32x4_t a, const float b, const mve_pred16_t p0)
 Vector * Scalar with predicate.
 
float32x4_t vmul (const float a, const float32x4_t b, const mve_pred16_t p0)
 Scalar * Vector with predicate.
 
float32x4_t vmacc (const float32x4_t acc, const float32x4_t a, const float32x4_t b)
 Multiply accumulate (Vector * Vector)
 
float32x4_t vmacc (const float32x4_t acc, const float32x4_t a, const float_t b)
 Multiply accumulate (Vector * Scalar)
 
float32x4_t vmacc (const float32x4_t acc, const float32x4_t a, const float32x4_t b, const mve_pred16_t p0)
 Multiply accumulate with predicate (Vector * Vector)
 
float vreduce (const float32x4_t in)
 Vector reduce.
 
template<int S, typename std::enable_if< S==1, bool >::type = true>
float32x4_t vload1 (const float32_t *p)
 Vector load with stride.
 
float32x4_t vload1 (const float32_t *p, const index_t stride)
 Vector load with dynamic stride.
 
template<int S, typename std::enable_if< S==1, bool >::type = true>
float32x4_t vload1_z (const float32_t *p, const std::size_t nb, const mve_pred16_t p0)
 Vector load with stride and predicate.
 
float32x4_t vload1_z (const float32_t *p, const index_t stride, const std::size_t nb, const mve_pred16_t p0)
 Vector load with dynamic stride and loop predication.
 
template<int S, typename std::enable_if< S==1, bool >::type = true>
void vstore1 (float32_t *p, const float32x4_t val)
 Store with stride.
 
void vstore1 (float32_t *p, const index_t stride, const float32x4_t val)
 Store with dynamic stride.
 
template<int S, typename std::enable_if< S==1, bool >::type = true>
void vstore1_z (float32_t *p, const float32x4_t val, const std::size_t nb, const mve_pred16_t p0)
 Store with stride and tail predicate.
 
void vstore1_z (float32_t *p, const index_t stride, const float32x4_t val, const std::size_t nb, const mve_pred16_t p0)
 Store with dynamic stride.
 
float16x8_t vconst (float16_t v)
 Vector const.
 
float16x8_t vconst_tail (const float16_t v, const mve_pred16_t p0)
 Vector of const with tail predicate.
 
float16x8_t vneg (const float16x8_t a)
 Vector negate.
 
float16x8_t vneg (const float16x8_t a, const mve_pred16_t p0)
 Vector negate with tail predicate.
 
float16x8_t vadd (const float16x8_t a, const float16x8_t b)
 Vector + Vector.
 
float16x8_t vadd (const float16x8_t a, const float16_t b)
 Vector + Scalar.
 
float16x8_t vadd (const float16_t a, const float16x8_t b)
 Scalar + Vector.
 
float16x8_t vadd (const float16x8_t a, const float16x8_t b, const mve_pred16_t p0)
 Vector + Vector with tail predicate.
 
float16x8_t vadd (const float16x8_t a, const float16_t b, const mve_pred16_t p0)
 Vector + Scalar with tail predicate.
 
float16x8_t vadd (const float16_t a, const float16x8_t b, const mve_pred16_t p0)
 Scalar + Vector with tail predicate.
 
template<int S, typename std::enable_if< S==1, bool >::type = true>
int16x8_t vload1 (const Q15 *p)
 Vector load with stride.
 
Q< 33, 30 > vmacc (const Q< 33, 30 > sum, const int16x8_t vala, const int16x8_t valb)
 Vector accumulate into scalar.
 
double from_accumulator (const double a)
 Convert from accumulator representation.
 
double mac (const double acc, const double a, const double b)
 Multiply and accumulate for this datatype.
 
void accumulate (double &a, const double &b)
 Accumulate.
 
double mult (double &a, const double &b)
 Multiply.
 
float from_accumulator (const float a)
 Convert from accumulator representtaion.
 
float mac (const float acc, const float a, const float b)
 Scalar multiply and accumulate.
 
void accumulate (float &a, const float &b)
 Scalar accumulate.
 
float mult (float &a, const float &b)
 Scalar multiply.
 
template<typename A , typename V , std::size_t... Ns>
vmacc_impl (const A &acc, const V &a, const V &b, std::index_sequence< Ns... >)
 Vector accumulate for tuples of vectors.
 
template<typename A , typename ... E>
vmacc (const A &acc, const std::tuple< E... > &a, const std::tuple< E... > &b)
 Vector accumulate for tuples of vectors.
 
template<typename A , typename V , typename B , std::size_t... Ns>
vmacc_impl (const A &acc, const V &a, const V &b, const B p0, std::index_sequence< Ns... >)
 Predicated vector accumulate for tuple.
 
template<typename A , typename B , typename ... E>
vmacc (const A &acc, const std::tuple< E... > &a, const std::tuple< E... > &b, const B p0)
 Predicated vector accumulate for tuples.
 
template<typename A , std::size_t... Ns>
auto vreduce_impl (const A &acc, std::index_sequence< Ns... >)
 Reduce function for tuple.
 
template<typename ... E>
auto vreduce (const std::tuple< E... > &acc)
 Reduce function for tuples.
 
template<typename A , std::size_t... Ns>
auto from_accumulator_impl (const A &acc, std::index_sequence< Ns... >)
 Convert from accumulator value.
 
template<typename ... E>
auto from_accumulator (const std::tuple< E... > &acc)
 Convert from tuple of accumulator values.
 
template<typename A , typename V , std::size_t... Ns>
mac_impl (const A &acc, const V &a, const V &b, std::index_sequence< Ns... >)
 Multiply accumulate for tuple of scalar.
 
template<typename A , typename ... E>
mac (const A &acc, const std::tuple< E... > &a, const std::tuple< E... > &b)
 Multiply accumulate.
 
template<typename A , typename V , typename B , std::size_t... Ns>
mac_impl (const A &acc, const V &a, const V &b, const B p0, std::index_sequence< Ns... >)
 Multiply accumulate for tuple of scalar.
 
template<typename A , typename B , typename ... E>
mac (const A &acc, const std::tuple< E... > &a, const std::tuple< E... > &b, const B p0)
 Multiply accumulate.
 
float16_t from_accumulator (const float16_t a)
 Convert from accumulator datatype.
 
float16_t mac (const float16_t acc, const float16_t a, const float16_t b)
 Multiply and accumulate.
 
void accumulate (float16_t &a, const float16_t &b)
 Accumulate.
 
float16_t mult (float16_t &a, const float16_t &b)
 Multiply.
 
Q15 from_accumulator (const Q< 33, 30 > a)
 Convert from accumulator type.
 
Q< 33, 30 > mac (const Q< 33, 30 > acc, const Q15 a, const Q15 b)
 Multiply and accumulate.
 
Q31 from_accumulator (const Q< 15, 48 > a)
 Convert from accumulator (with no saturation)
 
Q< 15, 48 > mac (const Q< 15, 48 > acc, const Q31 a, const Q31 b)
 Multiply and accumulate.
 
Q7 from_accumulator (const Q< 17, 14 > a)
 Convert from accumulator with saturation.
 
Q< 17, 14 > mac (const Q< 17, 14 > acc, const Q7 a, const Q7 b)
 Multiply and accumulate.
 

Detailed Description

Inner implementation of Helium intrinsics

Inner implementation of generic intrinsics

Function Documentation

◆ accumulate() [1/3]

void accumulate ( double &  a,
const double &  b 
)

Accumulate.

Parameters
aAccumulator
[in]bVAlue to be added

◆ accumulate() [2/3]

void accumulate ( float &  a,
const float &  b 
)

Scalar accumulate.

Parameters
aAccumulator
[in]bOperand

◆ accumulate() [3/3]

void accumulate ( float16_t &  a,
const float16_t &  b 
)

Accumulate.

Parameters
aAccumulator
[in]bValue to accumulate

◆ from_accumulator() [1/7]

double from_accumulator ( const double  a)

Convert from accumulator representation.

Parameters
[in]aValue
Returns
Accumulator value converted to current datatype

◆ from_accumulator() [2/7]

float from_accumulator ( const float  a)

Convert from accumulator representtaion.

Parameters
[in]aValue
Returns
Accumulator value converted to current datatype

◆ from_accumulator() [3/7]

float16_t from_accumulator ( const float16_t  a)

Convert from accumulator datatype.

Parameters
[in]aValue
Returns
Converted from accumulator datatype

◆ from_accumulator() [4/7]

Q31 from_accumulator ( const Q< 15, 48 >  a)

Convert from accumulator (with no saturation)

Parameters
[in]aAccumulator value
Returns
Converted value

◆ from_accumulator() [5/7]

Q7 from_accumulator ( const Q< 17, 14 >  a)

Convert from accumulator with saturation.

Parameters
[in]aAccumulator value
Returns
Q7 value

◆ from_accumulator() [6/7]

Q15 from_accumulator ( const Q< 33, 30 >  a)

Convert from accumulator type.

Parameters
[in]aThe accumulator value
Returns
The converted value (with saturation)

◆ from_accumulator() [7/7]

auto from_accumulator ( const std::tuple< E... > &  acc)

Convert from tuple of accumulator values.

Parameters
[in]accAccumulator
Template Parameters
EDatatypes for tuple
Returns
Tuples of converted accumulator values

Accumulator may use more bits to avoid saturations. At the end of the accumulation, the final result must be converted to the current datatype (it may implies saturation)

◆ from_accumulator_impl()

auto from_accumulator_impl ( const A &  acc,
std::index_sequence< Ns... >   
)

Convert from accumulator value.

Parameters
[in]accThe accumulator
Template Parameters
AAccumulator datatype
NsTuples indexes
Returns
Tuples of values

◆ mac() [1/8]

A mac ( const A &  acc,
const std::tuple< E... > &  a,
const std::tuple< E... > &  b 
)

Multiply accumulate.

Parameters
[in]accAccumulator
[in]aFirst operand
[in]bSecond operand
Template Parameters
AAccumulator datatype
EDatatypes for tuple
Returns
Accumulated values

◆ mac() [2/8]

A mac ( const A &  acc,
const std::tuple< E... > &  a,
const std::tuple< E... > &  b,
const B  p0 
)

Multiply accumulate.

Parameters
[in]accAccumulator
[in]aFirst operand
[in]bSecond operand
[in]p0Predicate
Template Parameters
AAccumulator datatype
BPredicate datatype
EDatatypes for tuple
Returns
Accumulated values

◆ mac() [3/8]

double mac ( const double  acc,
const double  a,
const double  b 
)

Multiply and accumulate for this datatype.

Parameters
[in]accThe accumulated value
[in]aThe left hand side
[in]bThe right hand side
Returns
Return acc + a*b

◆ mac() [4/8]

float mac ( const float  acc,
const float  a,
const float  b 
)

Scalar multiply and accumulate.

Parameters
[in]accAccumulator
[in]aOperand
[in]bOperand
Returns
acc + a*b

◆ mac() [5/8]

float16_t mac ( const float16_t  acc,
const float16_t  a,
const float16_t  b 
)

Multiply and accumulate.

Parameters
[in]accAccumulator
[in]aFirst operand
[in]bSecond operand
Returns
acc + a*b

◆ mac() [6/8]

Q< 15, 48 > mac ( const Q< 15, 48 >  acc,
const Q31  a,
const Q31  b 
)

Multiply and accumulate.

Parameters
[in]accAccumulator
[in]aFirst operand
[in]bSecond operand
Returns
acc + a*b

◆ mac() [7/8]

Q< 17, 14 > mac ( const Q< 17, 14 >  acc,
const Q7  a,
const Q7  b 
)

Multiply and accumulate.

Parameters
[in]accAccumulator
[in]aFirst operand
[in]bSecond operand
Returns
acc + a*b

◆ mac() [8/8]

Q< 33, 30 > mac ( const Q< 33, 30 >  acc,
const Q15  a,
const Q15  b 
)

Multiply and accumulate.

Parameters
[in]accAccumulator
[in]aFirst operand
[in]bSecond operand
Returns
acc + a*b

◆ mac_impl() [1/2]

A mac_impl ( const A &  acc,
const V &  a,
const V &  b,
const B  p0,
std::index_sequence< Ns... >   
)

Multiply accumulate for tuple of scalar.

Parameters
[in]accAccumulator
[in]aFirst operand
[in]bSecond operand
[in]p0Predicate
Template Parameters
AAccumulator datatype
VScalar datatype
BPredicate datatype
NsTuple indexes
Returns
Tuples of accumulated values

◆ mac_impl() [2/2]

A mac_impl ( const A &  acc,
const V &  a,
const V &  b,
std::index_sequence< Ns... >   
)

Multiply accumulate for tuple of scalar.

Parameters
[in]accAccumulator
[in]aFirst operand
[in]bSecond operand
Template Parameters
AAccumulator datatype
VScalar datatype
NsTuple indexes
Returns
Tuples of accumulated values

◆ mult() [1/3]

double mult ( double &  a,
const double &  b 
)

Multiply.

Parameters
aLeft hand side
[in]bRight hand side
Returns
Return a*b

◆ mult() [2/3]

float mult ( float &  a,
const float &  b 
)

Scalar multiply.

Parameters
aOperand
[in]bOperand
Returns
a*b

◆ mult() [3/3]

float16_t mult ( float16_t &  a,
const float16_t &  b 
)

Multiply.

Parameters
aFirst operand
[in]bSecond operand
Returns
a*b

◆ vadd() [1/12]

float32x4_t vadd ( const float  a,
const float32x4_t  b 
)

Scalar + Vector.

Parameters
[in]aScalar
[in]bVector
Returns
a + b

◆ vadd() [2/12]

float32x4_t vadd ( const float  a,
const float32x4_t  b,
const mve_pred16_t  p0 
)

Scalar + vector with tail predicate.

Parameters
[in]aScalar
[in]bVector
[in]p0Predicate
Returns
a + b with tail predicate

◆ vadd() [3/12]

float16x8_t vadd ( const float16_t  a,
const float16x8_t  b 
)

Scalar + Vector.

Parameters
[in]aScalar
[in]bVector
Returns
a + b

◆ vadd() [4/12]

float16x8_t vadd ( const float16_t  a,
const float16x8_t  b,
const mve_pred16_t  p0 
)

Scalar + Vector with tail predicate.

Parameters
[in]aScalar
[in]bVector
[in]p0Predicate
Returns
a + b with tail predicate

◆ vadd() [5/12]

float16x8_t vadd ( const float16x8_t  a,
const float16_t  b 
)

Vector + Scalar.

Parameters
[in]aVector
[in]bScalar
Returns
a + b

◆ vadd() [6/12]

float16x8_t vadd ( const float16x8_t  a,
const float16_t  b,
const mve_pred16_t  p0 
)

Vector + Scalar with tail predicate.

Parameters
[in]aVector
[in]bScalar
[in]p0Predicate
Returns
a + b with tail predicate

◆ vadd() [7/12]

float16x8_t vadd ( const float16x8_t  a,
const float16x8_t  b 
)

Vector + Vector.

Parameters
[in]aVector
[in]bVector
Returns
a + b

◆ vadd() [8/12]

float16x8_t vadd ( const float16x8_t  a,
const float16x8_t  b,
const mve_pred16_t  p0 
)

Vector + Vector with tail predicate.

Parameters
[in]aVector
[in]bVector
[in]p0predicate
Returns
a + b with tail predicate

◆ vadd() [9/12]

float32x4_t vadd ( const float32x4_t  a,
const float  b 
)

Vector + Scalar.

Parameters
[in]aVector
[in]bScalar
Returns
a + b

◆ vadd() [10/12]

float32x4_t vadd ( const float32x4_t  a,
const float  b,
const mve_pred16_t  p0 
)

Vector + scalar with tail.

Parameters
[in]aVector
[in]bScalar
[in]p0Predicate
Returns
a + b with tail predicate

◆ vadd() [11/12]

float32x4_t vadd ( const float32x4_t  a,
const float32x4_t  b 
)

Vector + Vector.

Parameters
[in]aFirst operand
[in]bSecond operand
Returns
a + b

◆ vadd() [12/12]

float32x4_t vadd ( const float32x4_t  a,
const float32x4_t  b,
const mve_pred16_t  p0 
)

Vector + Vector with tail.

Parameters
[in]aVector
[in]bVector
[in]p0Predicated
Returns
a + b with tail predicate

◆ vconst() [1/3]

float32x4_t vconst ( const float  v)

Vector constant.

Parameters
[in]vConstant value
Returns
Vector initialized with constant in each lane

◆ vconst() [2/3]

float16x8_t vconst ( float16_t  v)

Vector const.

Parameters
[in]vInitialization value
Returns
Vector of const

◆ vconst() [3/3]

int16x8_t vconst ( Q15  val)

Vector const.

Parameters
[in]valThe value
Returns
The static forceinline.

◆ vconst_tail() [1/2]

float32x4_t vconst_tail ( const float  v,
const mve_pred16_t  p0 
)

Vector constant with tail.

Parameters
[in]vConstant value
[in]p0Prddicate
Returns
Vector initialized with constant in some lanes dependign on the predicate

◆ vconst_tail() [2/2]

float16x8_t vconst_tail ( const float16_t  v,
const mve_pred16_t  p0 
)

Vector of const with tail predicate.

Parameters
[in]vThe initialization parameter
[in]p0The predicate
Returns
The initialized vector with const and predicate

◆ vload1() [1/3]

float32x4_t vload1 ( const float32_t p)
inline

Vector load with stride.

Parameters
[in]pLoad address
Template Parameters
SStride
<unnamed>Check stride value
Returns
Loaded vector with stride

◆ vload1() [2/3]

float32x4_t vload1 ( const float32_t p,
const index_t  stride 
)
inline

Vector load with dynamic stride.

Parameters
[in]pLoad address
[in]strideStride value
Returns
Loaded vector with stride

◆ vload1() [3/3]

int16x8_t vload1 ( const Q15 *  p)
inline

Vector load with stride.

Parameters
[in]pLoad address
Template Parameters
SStride
<unnamed>Stride check
Returns
Gather load

In q15, a lane is on 16 bits. So the offset that can be encoded for gather load cannot be bigger than 65535. With a stride of S, the bigger offset is S*7. So S must be <= 65535/7 S <= 9362

For higher stride, the Helium instruction cannot be used and instead a dynamic stride is used.

◆ vload1_z() [1/2]

float32x4_t vload1_z ( const float32_t p,
const index_t  stride,
const std::size_t  nb,
const mve_pred16_t  p0 
)
inline

Vector load with dynamic stride and loop predication.

Parameters
[in]pLoad address
[in]strideStride value
[in]nbNumber of remaining loop samples
[in]p0Predicate for remaining loop samples
Returns
Loaded vector with stride and loop predicate

◆ vload1_z() [2/2]

float32x4_t vload1_z ( const float32_t p,
const std::size_t  nb,
const mve_pred16_t  p0 
)
inline

Vector load with stride and predicate.

Parameters
[in]pLoad address
[in]nbNumber of remaining loop samples
[in]p0Predicate for remaining loop samples
Template Parameters
SStride
<unnamed>Check stride value
Returns
Loaded vector with stride and loop predication

◆ vmacc() [1/6]

A vmacc ( const A &  acc,
const std::tuple< E... > &  a,
const std::tuple< E... > &  b 
)

Vector accumulate for tuples of vectors.

Parameters
[in]accThe accumulator
[in]aFirst operand
[in]bSecond operand
Template Parameters
AAccumulator datatype
EDatatype of tuples elements
Returns
Accumulator result

◆ vmacc() [2/6]

A vmacc ( const A &  acc,
const std::tuple< E... > &  a,
const std::tuple< E... > &  b,
const B  p0 
)

Predicated vector accumulate for tuples.

Parameters
[in]accAccumulator
[in]aFirst operand
[in]bSecond operand
[in]p0Predicate
Template Parameters
AAccumulator datatype
BPredicate datatype
EDadatype of tuples elements
Returns
Tuple of accumulated vectors

◆ vmacc() [3/6]

float32x4_t vmacc ( const float32x4_t  acc,
const float32x4_t  a,
const float32x4_t  b 
)

Multiply accumulate (Vector * Vector)

Parameters
[in]accAccumulator
[in]aVector
[in]bVector
Returns
acc + a * b

◆ vmacc() [4/6]

float32x4_t vmacc ( const float32x4_t  acc,
const float32x4_t  a,
const float32x4_t  b,
const mve_pred16_t  p0 
)

Multiply accumulate with predicate (Vector * Vector)

Parameters
[in]accAccumulator
[in]aVector
[in]bVector
[in]p0Predicate
Returns
acc + a*b with predicate

◆ vmacc() [5/6]

float32x4_t vmacc ( const float32x4_t  acc,
const float32x4_t  a,
const float_t  b 
)

Multiply accumulate (Vector * Scalar)

Parameters
[in]accAccumulator
[in]aVector
[in]bScalar
Returns
acc + a * b

◆ vmacc() [6/6]

Q< 33, 30 > vmacc ( const Q< 33, 30 >  sum,
const int16x8_t  vala,
const int16x8_t  valb 
)

Vector accumulate into scalar.

Parameters
[in]sumThe sum
[in]valaThe vala
[in]valbThe valb
Returns
vala * valb and accumulated into sum

◆ vmacc_impl() [1/2]

A vmacc_impl ( const A &  acc,
const V &  a,
const V &  b,
const B  p0,
std::index_sequence< Ns... >   
)

Predicated vector accumulate for tuple.

Parameters
[in]accAccumulator
[in]aFirst operand
[in]bSecond operand
[in]p0Predicate
Template Parameters
AAccumulator datatype
VVector datatype
BPredicate datatype
NsTuple indexes
Returns
Tuple of accumulated values

◆ vmacc_impl() [2/2]

A vmacc_impl ( const A &  acc,
const V &  a,
const V &  b,
std::index_sequence< Ns... >   
)

Vector accumulate for tuples of vectors.

Parameters
[in]accThe accumulator
[in]aFirst operand
[in]bSecond operand
Template Parameters
AAccumulator datatype
VVector datatype
NsTuple index
Returns
tuple of results

◆ vmul() [1/6]

float32x4_t vmul ( const float  a,
const float32x4_t  b 
)

Scalar * Vector.

Parameters
[in]aScalar
[in]bVector
Returns
a * b

◆ vmul() [2/6]

float32x4_t vmul ( const float  a,
const float32x4_t  b,
const mve_pred16_t  p0 
)

Scalar * Vector with predicate.

Parameters
[in]aScalar
[in]bVector
[in]p0Predicate
Returns
a * b with predicate

◆ vmul() [3/6]

float32x4_t vmul ( const float32x4_t  a,
const float  b 
)

Vector * Scalar.

Parameters
[in]aVector
[in]bScalar
Returns
a * b

◆ vmul() [4/6]

float32x4_t vmul ( const float32x4_t  a,
const float  b,
const mve_pred16_t  p0 
)

Vector * Scalar with predicate.

Parameters
[in]aVector
[in]bScalar
[in]p0Predicate
Returns
a * b with predicate

◆ vmul() [5/6]

float32x4_t vmul ( const float32x4_t  a,
const float32x4_t  b 
)

Vector * Vector.

Parameters
[in]aVector
[in]bVector
Returns
a * b

◆ vmul() [6/6]

float32x4_t vmul ( const float32x4_t  a,
const float32x4_t  b,
const mve_pred16_t  p0 
)

Vector * Vector with predicate.

Parameters
[in]aVector
[in]bVector
[in]p0Predicate
Returns
a * b

◆ vneg() [1/4]

float16x8_t vneg ( const float16x8_t  a)

Vector negate.

Parameters
[in]aVector
Returns
Negate of vector

◆ vneg() [2/4]

float16x8_t vneg ( const float16x8_t  a,
const mve_pred16_t  p0 
)

Vector negate with tail predicate.

Parameters
[in]aVector
[in]p0Predicate
Returns
Negate of vector with tail predicate

◆ vneg() [3/4]

float32x4_t vneg ( const float32x4_t  a)

Vector negate.

Parameters
[in]aVector value to negate
Returns
Negated value

◆ vneg() [4/4]

float32x4_t vneg ( const float32x4_t  a,
const mve_pred16_t  p0 
)

Vector negate with tail.

Parameters
[in]aValue
[in]p0Predicate
Returns
Negated value

◆ vreduce() [1/3]

float vreduce ( const float32x4_t  in)

Vector reduce.

Parameters
[in]inVector
Returns
Reduced scalar value

◆ vreduce() [2/3]

Q< 33, 30 > vreduce ( const Q< 33, 30 >  sum)

Reduce accumulation value.

Parameters
[in]sumThe sum
Returns
Reduced value

Since the Helium instructions can accumulate vector product into a scalar there is no need to reduce the accumulator value. It is already in scalar form.

◆ vreduce() [3/3]

auto vreduce ( const std::tuple< E... > &  acc)

Reduce function for tuples.

Parameters
[in]accThe accumulator
Template Parameters
EDatatypes for tuples
Returns
Tuples of reduced values

Some vector instructions sets cannot accumulate vectors into a scalar. They accumulate into this vector. This vector must be reduced to a scalar at the end of the accumulation loop.

◆ vreduce_impl()

auto vreduce_impl ( const A &  acc,
std::index_sequence< Ns... >   
)

Reduce function for tuple.

Parameters
[in]accAccumulator
Template Parameters
AAccumulator datatype
NsTuple indexes
Returns
Reduced accumulator values

Some vector instructions sets cannot accumulate vectors into a scalar. They accumulate into this vector. This vector must be reduced to a scalar at the end of the accumulation loop.

◆ vstore1() [1/2]

void vstore1 ( float32_t p,
const float32x4_t  val 
)
inline

Store with stride.

Parameters
pStore address
[in]valValue to store
Template Parameters
SStride
<unnamed>Check stride value

◆ vstore1() [2/2]

void vstore1 ( float32_t p,
const index_t  stride,
const float32x4_t  val 
)
inline

Store with dynamic stride.

Parameters
pStore address
[in]strideStride value
[in]valValue to store

◆ vstore1_z() [1/2]

void vstore1_z ( float32_t p,
const float32x4_t  val,
const std::size_t  nb,
const mve_pred16_t  p0 
)
inline

Store with stride and tail predicate.

Parameters
pStore address
[in]valValue to store
[in]nbNumber of remaining loop iterations
[in]p0Predicate for loop
Template Parameters
SStride
<unnamed>Check stride value

◆ vstore1_z() [2/2]

void vstore1_z ( float32_t p,
const index_t  stride,
const float32x4_t  val,
const std::size_t  nb,
const mve_pred16_t  p0 
)
inline

Store with dynamic stride.

Parameters
pStore address
[in]strideStride value
[in]valValue to store
[in]nbNumber of remaining loops
[in]p0Predicate for loop

◆ vsub() [1/6]

float32x4_t vsub ( const float  a,
const float32x4_t  b 
)

Scalar - Vector.

Parameters
[in]aScalar
[in]bVector
Returns
a - b

◆ vsub() [2/6]

float32x4_t vsub ( const float  a,
const float32x4_t  b,
const mve_pred16_t  p0 
)

Scalar - Vector with predicate.

Parameters
[in]aScalar
[in]bVector
[in]p0predicate
Returns
a - b with predicate

◆ vsub() [3/6]

float32x4_t vsub ( const float32x4_t  a,
const float  b 
)

Vector - Scalar.

Parameters
[in]aVector
[in]bScalar
Returns
a - b

◆ vsub() [4/6]

float32x4_t vsub ( const float32x4_t  a,
const float  b,
const mve_pred16_t  p0 
)

Vector - Scalar with predicate.

Parameters
[in]aVector
[in]bScalar
[in]p0predicate
Returns
a - b with predicate

◆ vsub() [5/6]

float32x4_t vsub ( const float32x4_t  a,
const float32x4_t  b 
)

Vector - Vector.

Parameters
[in]aVector
[in]bVector
Returns
a - b

◆ vsub() [6/6]

float32x4_t vsub ( const float32x4_t  a,
const float32x4_t  b,
const mve_pred16_t  p0 
)

Vector - Vector with predicate.

Parameters
[in]aVector
[in]bVector
[in]p0Predicate
Returns
a - b