Data Structures | |
| struct | vctpq< float > | 
| vctpq for this datatype  More... | |
| struct | vctpq< float16_t > | 
| vctpq for Helium and f16  More... | |
| struct | vload1_gen_stride | 
| Load with generalized stride (gather load)  More... | |
| struct | vload1_gen_stride< 0, 1, 2, 3 > | 
| Load with generalized stride specialized for <0,1,2,3>  More... | |
| struct | vload1_gen_stride_z | 
| Load with generalized stride (gather load) and tail predicate.  More... | |
| struct | vload1_gen_stride_z< 0, 1, 2, 3 > | 
| Load with generalized stride (gather load) and tail predicate specialized for <0,1,2,3>  More... | |
| struct | vstore1_gen_stride | 
| Generalized store with strides.  More... | |
| struct | vstore1_gen_stride< 0, 1, 2, 3 > | 
| Generalized store with stride (Specialized for <0,1,2,3>)  More... | |
| struct | vstore1_gen_stride_z | 
| Store with generalized strides and tail predicate.  More... | |
| struct | vstore1_gen_stride_z< 0, 1, 2, 3 > | 
| Scatter store with tail predicate (specialized for <0,1,2,3>)  More... | |
Functions | |
| Q15DSPVector | vconst (Q15 val) | 
| Vector const.   | |
| Q< 33, 30 > | vreduce (const Q< 33, 30 > sum) | 
| Reduce accumulation value.   | |
| float32x4_t | vconst (const float v) | 
| Vector constant.   | |
| float32x4_t | vconst_tail (const float v, const mve_pred16_t p0) | 
| Vector constant with tail.   | |
| float32x4_t | vneg (const float32x4_t a) | 
| Vector negate.   | |
| float32x4_t | vneg (const float32x4_t a, const mve_pred16_t p0) | 
| Vector negate with tail.   | |
| float32x4_t | vadd (const float32x4_t a, const float32x4_t b) | 
| Vector + Vector.   | |
| float32x4_t | vadd (const float32x4_t a, const float b) | 
| Vector + Scalar.   | |
| float32x4_t | vadd (const float a, const float32x4_t b) | 
| Scalar + Vector.   | |
| float32x4_t | vadd (const float32x4_t a, const float32x4_t b, const mve_pred16_t p0) | 
| Vector + Vector with tail.   | |
| float32x4_t | vadd (const float32x4_t a, const float b, const mve_pred16_t p0) | 
| Vector + scalar with tail.   | |
| float32x4_t | vadd (const float a, const float32x4_t b, const mve_pred16_t p0) | 
| Scalar + vector with tail predicate.   | |
| float32x4_t | vsub (const float32x4_t a, const float32x4_t b) | 
| Vector - Vector.   | |
| float32x4_t | vsub (const float32x4_t a, const float b) | 
| Vector - Scalar.   | |
| float32x4_t | vsub (const float a, const float32x4_t b) | 
| Scalar - Vector.   | |
| float32x4_t | vsub (const float32x4_t a, const float32x4_t b, const mve_pred16_t p0) | 
| Vector - Vector with predicate.   | |
| float32x4_t | vsub (const float32x4_t a, const float b, const mve_pred16_t p0) | 
| Vector - Scalar with predicate.   | |
| float32x4_t | vsub (const float a, const float32x4_t b, const mve_pred16_t p0) | 
| Scalar - Vector with predicate.   | |
| float32x4_t | vmul (const float32x4_t a, const float32x4_t b) | 
| Vector * Vector.   | |
| float32x4_t | vmul (const float32x4_t a, const float b) | 
| Vector * Scalar.   | |
| float32x4_t | vmul (const float a, const float32x4_t b) | 
| Scalar * Vector.   | |
| float32x4_t | vmul (const float32x4_t a, const float32x4_t b, const mve_pred16_t p0) | 
| Vector * Vector with predicate.   | |
| float32x4_t | vmul (const float32x4_t a, const float b, const mve_pred16_t p0) | 
| Vector * Scalar with predicate.   | |
| float32x4_t | vmul (const float a, const float32x4_t b, const mve_pred16_t p0) | 
| Scalar * Vector with predicate.   | |
| float32x4_t | vmacc (const float32x4_t acc, const float32x4_t a, const float32x4_t b) | 
| Multiply accumulate (Vector * Vector)   | |
| float32x4_t | vmacc (const float32x4_t acc, const float32x4_t a, const float_t b) | 
| Multiply accumulate (Vector * Scalar)   | |
| float32x4_t | vmacc (const float32x4_t acc, const float32x4_t a, const float32x4_t b, const mve_pred16_t p0) | 
| Multiply accumulate with predicate (Vector * Vector)   | |
| float | vreduce (const float32x4_t in) | 
| Vector reduce.   | |
| template<int S, typename std::enable_if< S==1, bool >::type = true> | |
| float32x4_t | vload1 (const float32_t *p) | 
| Vector load with stride.   | |
| float32x4_t | vload1 (const float32_t *p, const index_t stride) | 
| Vector load with dynamic stride.   | |
| template<int S, typename std::enable_if< S==1, bool >::type = true> | |
| float32x4_t | vload1_z (const float32_t *p, const std::size_t nb, const mve_pred16_t p0) | 
| Vector load with stride and predicate.   | |
| float32x4_t | vload1_z (const float32_t *p, const index_t stride, const std::size_t nb, const mve_pred16_t p0) | 
| Vector load with dynamic stride and loop predication.   | |
| template<int S, typename std::enable_if< S==1, bool >::type = true> | |
| void | vstore1 (float32_t *p, const float32x4_t val) | 
| Store with stride.   | |
| void | vstore1 (float32_t *p, const index_t stride, const float32x4_t val) | 
| Store with dynamic stride.   | |
| template<int S, typename std::enable_if< S==1, bool >::type = true> | |
| void | vstore1_z (float32_t *p, const float32x4_t val, const std::size_t nb, const mve_pred16_t p0) | 
| Store with stride and tail predicate.   | |
| void | vstore1_z (float32_t *p, const index_t stride, const float32x4_t val, const std::size_t nb, const mve_pred16_t p0) | 
| Store with dynamic stride.   | |
| float16x8_t | vconst (float16_t v) | 
| Vector const.   | |
| float16x8_t | vconst_tail (const float16_t v, const mve_pred16_t p0) | 
| Vector of const with tail predicate.   | |
| float16x8_t | vneg (const float16x8_t a) | 
| Vector negate.   | |
| float16x8_t | vneg (const float16x8_t a, const mve_pred16_t p0) | 
| Vector negate with tail predicate.   | |
| float16x8_t | vadd (const float16x8_t a, const float16x8_t b) | 
| Vector + Vector.   | |
| float16x8_t | vadd (const float16x8_t a, const float16_t b) | 
| Vector + Scalar.   | |
| float16x8_t | vadd (const float16_t a, const float16x8_t b) | 
| Scalar + Vector.   | |
| float16x8_t | vadd (const float16x8_t a, const float16x8_t b, const mve_pred16_t p0) | 
| Vector + Vector with tail predicate.   | |
| float16x8_t | vadd (const float16x8_t a, const float16_t b, const mve_pred16_t p0) | 
| Vector + Scalar with tail predicate.   | |
| float16x8_t | vadd (const float16_t a, const float16x8_t b, const mve_pred16_t p0) | 
| Scalar + Vector with tail predicate.   | |
| template<int S, typename std::enable_if< S==1, bool >::type = true> | |
| int16x8_t | vload1 (const Q15 *p) | 
| Vector load with stride.   | |
| Q< 33, 30 > | vmacc (const Q< 33, 30 > sum, const int16x8_t vala, const int16x8_t valb) | 
| Vector accumulate into scalar.   | |
| double | from_accumulator (const double a) | 
| Convert from accumulator representation.   | |
| double | mac (const double acc, const double a, const double b) | 
| Multiply and accumulate for this datatype.   | |
| void | accumulate (double &a, const double &b) | 
| Accumulate.   | |
| double | mult (double &a, const double &b) | 
| Multiply.   | |
| float | from_accumulator (const float a) | 
| Convert from accumulator representtaion.   | |
| float | mac (const float acc, const float a, const float b) | 
| Scalar multiply and accumulate.   | |
| void | accumulate (float &a, const float &b) | 
| Scalar accumulate.   | |
| float | mult (float &a, const float &b) | 
| Scalar multiply.   | |
| template<typename A , typename V , std::size_t... Ns> | |
| A | vmacc_impl (const A &acc, const V &a, const V &b, std::index_sequence< Ns... >) | 
| Vector accumulate for tuples of vectors.   | |
| template<typename A , typename ... E> | |
| A | vmacc (const A &acc, const std::tuple< E... > &a, const std::tuple< E... > &b) | 
| Vector accumulate for tuples of vectors.   | |
| template<typename A , typename V , typename B , std::size_t... Ns> | |
| A | vmacc_impl (const A &acc, const V &a, const V &b, const B p0, std::index_sequence< Ns... >) | 
| Predicated vector accumulate for tuple.   | |
| template<typename A , typename B , typename ... E> | |
| A | vmacc (const A &acc, const std::tuple< E... > &a, const std::tuple< E... > &b, const B p0) | 
| Predicated vector accumulate for tuples.   | |
| template<typename A , std::size_t... Ns> | |
| auto | vreduce_impl (const A &acc, std::index_sequence< Ns... >) | 
| Reduce function for tuple.   | |
| template<typename ... E> | |
| auto | vreduce (const std::tuple< E... > &acc) | 
| Reduce function for tuples.   | |
| template<typename A , std::size_t... Ns> | |
| auto | from_accumulator_impl (const A &acc, std::index_sequence< Ns... >) | 
| Convert from accumulator value.   | |
| template<typename ... E> | |
| auto | from_accumulator (const std::tuple< E... > &acc) | 
| Convert from tuple of accumulator values.   | |
| template<typename A , typename V , std::size_t... Ns> | |
| A | mac_impl (const A &acc, const V &a, const V &b, std::index_sequence< Ns... >) | 
| Multiply accumulate for tuple of scalar.   | |
| template<typename A , typename ... E> | |
| A | mac (const A &acc, const std::tuple< E... > &a, const std::tuple< E... > &b) | 
| Multiply accumulate.   | |
| template<typename A , typename V , typename B , std::size_t... Ns> | |
| A | mac_impl (const A &acc, const V &a, const V &b, const B p0, std::index_sequence< Ns... >) | 
| Multiply accumulate for tuple of scalar.   | |
| template<typename A , typename B , typename ... E> | |
| A | mac (const A &acc, const std::tuple< E... > &a, const std::tuple< E... > &b, const B p0) | 
| Multiply accumulate.   | |
| float16_t | from_accumulator (const float16_t a) | 
| Convert from accumulator datatype.   | |
| float16_t | mac (const float16_t acc, const float16_t a, const float16_t b) | 
| Multiply and accumulate.   | |
| void | accumulate (float16_t &a, const float16_t &b) | 
| Accumulate.   | |
| float16_t | mult (float16_t &a, const float16_t &b) | 
| Multiply.   | |
| Q15 | from_accumulator (const Q< 33, 30 > a) | 
| Convert from accumulator type.   | |
| Q< 33, 30 > | mac (const Q< 33, 30 > acc, const Q15 a, const Q15 b) | 
| Multiply and accumulate.   | |
| Q31 | from_accumulator (const Q< 15, 48 > a) | 
| Convert from accumulator (with no saturation)   | |
| Q< 15, 48 > | mac (const Q< 15, 48 > acc, const Q31 a, const Q31 b) | 
| Multiply and accumulate.   | |
| Q7 | from_accumulator (const Q< 17, 14 > a) | 
| Convert from accumulator with saturation.   | |
| Q< 17, 14 > | mac (const Q< 17, 14 > acc, const Q7 a, const Q7 b) | 
| Multiply and accumulate.   | |
Inner implementation of Helium intrinsics
Inner implementation of generic intrinsics
| void accumulate | ( | double & | a, | 
| const double & | b | ||
| ) | 
Accumulate.
| a | Accumulator | |
| [in] | b | VAlue to be added | 
| void accumulate | ( | float & | a, | 
| const float & | b | ||
| ) | 
Scalar accumulate.
| a | Accumulator | |
| [in] | b | Operand | 
| void accumulate | ( | float16_t & | a, | 
| const float16_t & | b | ||
| ) | 
Accumulate.
| a | Accumulator | |
| [in] | b | Value to accumulate | 
| double from_accumulator | ( | const double | a | ) | 
Convert from accumulator representation.
| [in] | a | Value | 
| float from_accumulator | ( | const float | a | ) | 
Convert from accumulator representtaion.
| [in] | a | Value | 
| float16_t from_accumulator | ( | const float16_t | a | ) | 
Convert from accumulator datatype.
| [in] | a | Value | 
| Q31 from_accumulator | ( | const Q< 15, 48 > | a | ) | 
Convert from accumulator (with no saturation)
| [in] | a | Accumulator value | 
| Q7 from_accumulator | ( | const Q< 17, 14 > | a | ) | 
Convert from accumulator with saturation.
| [in] | a | Accumulator value | 
| Q15 from_accumulator | ( | const Q< 33, 30 > | a | ) | 
Convert from accumulator type.
| [in] | a | The accumulator value | 
| auto from_accumulator | ( | const std::tuple< E... > & | acc | ) | 
Convert from tuple of accumulator values.
| [in] | acc | Accumulator | 
| E | Datatypes for tuple | 
Accumulator may use more bits to avoid saturations. At the end of the accumulation, the final result must be converted to the current datatype (it may implies saturation)
| auto from_accumulator_impl | ( | const A & | acc, | 
| std::index_sequence< Ns... > | |||
| ) | 
Convert from accumulator value.
| [in] | acc | The accumulator | 
| A | Accumulator datatype | 
| Ns | Tuples indexes | 
| A mac | ( | const A & | acc, | 
| const std::tuple< E... > & | a, | ||
| const std::tuple< E... > & | b | ||
| ) | 
Multiply accumulate.
| [in] | acc | Accumulator | 
| [in] | a | First operand | 
| [in] | b | Second operand | 
| A | Accumulator datatype | 
| E | Datatypes for tuple | 
| A mac | ( | const A & | acc, | 
| const std::tuple< E... > & | a, | ||
| const std::tuple< E... > & | b, | ||
| const B | p0 | ||
| ) | 
Multiply accumulate.
| [in] | acc | Accumulator | 
| [in] | a | First operand | 
| [in] | b | Second operand | 
| [in] | p0 | Predicate | 
| A | Accumulator datatype | 
| B | Predicate datatype | 
| E | Datatypes for tuple | 
| double mac | ( | const double | acc, | 
| const double | a, | ||
| const double | b | ||
| ) | 
Multiply and accumulate for this datatype.
| [in] | acc | The accumulated value | 
| [in] | a | The left hand side | 
| [in] | b | The right hand side | 
| float mac | ( | const float | acc, | 
| const float | a, | ||
| const float | b | ||
| ) | 
Scalar multiply and accumulate.
| [in] | acc | Accumulator | 
| [in] | a | Operand | 
| [in] | b | Operand | 
| float16_t mac | ( | const float16_t | acc, | 
| const float16_t | a, | ||
| const float16_t | b | ||
| ) | 
Multiply and accumulate.
| [in] | acc | Accumulator | 
| [in] | a | First operand | 
| [in] | b | Second operand | 
| Q< 15, 48 > mac | ( | const Q< 15, 48 > | acc, | 
| const Q31 | a, | ||
| const Q31 | b | ||
| ) | 
Multiply and accumulate.
| [in] | acc | Accumulator | 
| [in] | a | First operand | 
| [in] | b | Second operand | 
| Q< 17, 14 > mac | ( | const Q< 17, 14 > | acc, | 
| const Q7 | a, | ||
| const Q7 | b | ||
| ) | 
Multiply and accumulate.
| [in] | acc | Accumulator | 
| [in] | a | First operand | 
| [in] | b | Second operand | 
| Q< 33, 30 > mac | ( | const Q< 33, 30 > | acc, | 
| const Q15 | a, | ||
| const Q15 | b | ||
| ) | 
Multiply and accumulate.
| [in] | acc | Accumulator | 
| [in] | a | First operand | 
| [in] | b | Second operand | 
| A mac_impl | ( | const A & | acc, | 
| const V & | a, | ||
| const V & | b, | ||
| const B | p0, | ||
| std::index_sequence< Ns... > | |||
| ) | 
Multiply accumulate for tuple of scalar.
| [in] | acc | Accumulator | 
| [in] | a | First operand | 
| [in] | b | Second operand | 
| [in] | p0 | Predicate | 
| A | Accumulator datatype | 
| V | Scalar datatype | 
| B | Predicate datatype | 
| Ns | Tuple indexes | 
| A mac_impl | ( | const A & | acc, | 
| const V & | a, | ||
| const V & | b, | ||
| std::index_sequence< Ns... > | |||
| ) | 
Multiply accumulate for tuple of scalar.
| [in] | acc | Accumulator | 
| [in] | a | First operand | 
| [in] | b | Second operand | 
| A | Accumulator datatype | 
| V | Scalar datatype | 
| Ns | Tuple indexes | 
| double mult | ( | double & | a, | 
| const double & | b | ||
| ) | 
Multiply.
| a | Left hand side | |
| [in] | b | Right hand side | 
| float mult | ( | float & | a, | 
| const float & | b | ||
| ) | 
Scalar multiply.
| a | Operand | |
| [in] | b | Operand | 
| float16_t mult | ( | float16_t & | a, | 
| const float16_t & | b | ||
| ) | 
Multiply.
| a | First operand | |
| [in] | b | Second operand | 
| float32x4_t vadd | ( | const float | a, | 
| const float32x4_t | b | ||
| ) | 
Scalar + Vector.
| [in] | a | Scalar | 
| [in] | b | Vector | 
| float32x4_t vadd | ( | const float | a, | 
| const float32x4_t | b, | ||
| const mve_pred16_t | p0 | ||
| ) | 
Scalar + vector with tail predicate.
| [in] | a | Scalar | 
| [in] | b | Vector | 
| [in] | p0 | Predicate | 
| float16x8_t vadd | ( | const float16_t | a, | 
| const float16x8_t | b | ||
| ) | 
Scalar + Vector.
| [in] | a | Scalar | 
| [in] | b | Vector | 
| float16x8_t vadd | ( | const float16_t | a, | 
| const float16x8_t | b, | ||
| const mve_pred16_t | p0 | ||
| ) | 
Scalar + Vector with tail predicate.
| [in] | a | Scalar | 
| [in] | b | Vector | 
| [in] | p0 | Predicate | 
| float16x8_t vadd | ( | const float16x8_t | a, | 
| const float16_t | b | ||
| ) | 
Vector + Scalar.
| [in] | a | Vector | 
| [in] | b | Scalar | 
| float16x8_t vadd | ( | const float16x8_t | a, | 
| const float16_t | b, | ||
| const mve_pred16_t | p0 | ||
| ) | 
Vector + Scalar with tail predicate.
| [in] | a | Vector | 
| [in] | b | Scalar | 
| [in] | p0 | Predicate | 
| float16x8_t vadd | ( | const float16x8_t | a, | 
| const float16x8_t | b | ||
| ) | 
Vector + Vector.
| [in] | a | Vector | 
| [in] | b | Vector | 
| float16x8_t vadd | ( | const float16x8_t | a, | 
| const float16x8_t | b, | ||
| const mve_pred16_t | p0 | ||
| ) | 
Vector + Vector with tail predicate.
| [in] | a | Vector | 
| [in] | b | Vector | 
| [in] | p0 | predicate | 
| float32x4_t vadd | ( | const float32x4_t | a, | 
| const float | b | ||
| ) | 
Vector + Scalar.
| [in] | a | Vector | 
| [in] | b | Scalar | 
| float32x4_t vadd | ( | const float32x4_t | a, | 
| const float | b, | ||
| const mve_pred16_t | p0 | ||
| ) | 
Vector + scalar with tail.
| [in] | a | Vector | 
| [in] | b | Scalar | 
| [in] | p0 | Predicate | 
| float32x4_t vadd | ( | const float32x4_t | a, | 
| const float32x4_t | b | ||
| ) | 
Vector + Vector.
| [in] | a | First operand | 
| [in] | b | Second operand | 
| float32x4_t vadd | ( | const float32x4_t | a, | 
| const float32x4_t | b, | ||
| const mve_pred16_t | p0 | ||
| ) | 
Vector + Vector with tail.
| [in] | a | Vector | 
| [in] | b | Vector | 
| [in] | p0 | Predicated | 
| float32x4_t vconst | ( | const float | v | ) | 
Vector constant.
| [in] | v | Constant value | 
| float16x8_t vconst | ( | float16_t | v | ) | 
Vector const.
| [in] | v | Initialization value | 
| int16x8_t vconst | ( | Q15 | val | ) | 
Vector const.
| [in] | val | The value | 
| float32x4_t vconst_tail | ( | const float | v, | 
| const mve_pred16_t | p0 | ||
| ) | 
Vector constant with tail.
| [in] | v | Constant value | 
| [in] | p0 | Prddicate | 
| float16x8_t vconst_tail | ( | const float16_t | v, | 
| const mve_pred16_t | p0 | ||
| ) | 
Vector of const with tail predicate.
| [in] | v | The initialization parameter | 
| [in] | p0 | The predicate | 
      
  | 
  inline | 
Vector load with stride.
| [in] | p | Load address | 
| S | Stride | 
| <unnamed> | Check stride value | 
      
  | 
  inline | 
Vector load with dynamic stride.
| [in] | p | Load address | 
| [in] | stride | Stride value | 
      
  | 
  inline | 
Vector load with stride.
| [in] | p | Load address | 
| S | Stride | 
| <unnamed> | Stride check | 
In q15, a lane is on 16 bits. So the offset that can be encoded for gather load cannot be bigger than 65535. With a stride of S, the bigger offset is S*7. So S must be <= 65535/7 S <= 9362
For higher stride, the Helium instruction cannot be used and instead a dynamic stride is used.
      
  | 
  inline | 
Vector load with dynamic stride and loop predication.
| [in] | p | Load address | 
| [in] | stride | Stride value | 
| [in] | nb | Number of remaining loop samples | 
| [in] | p0 | Predicate for remaining loop samples | 
      
  | 
  inline | 
Vector load with stride and predicate.
| [in] | p | Load address | 
| [in] | nb | Number of remaining loop samples | 
| [in] | p0 | Predicate for remaining loop samples | 
| S | Stride | 
| <unnamed> | Check stride value | 
| A vmacc | ( | const A & | acc, | 
| const std::tuple< E... > & | a, | ||
| const std::tuple< E... > & | b | ||
| ) | 
Vector accumulate for tuples of vectors.
| [in] | acc | The accumulator | 
| [in] | a | First operand | 
| [in] | b | Second operand | 
| A | Accumulator datatype | 
| E | Datatype of tuples elements | 
| A vmacc | ( | const A & | acc, | 
| const std::tuple< E... > & | a, | ||
| const std::tuple< E... > & | b, | ||
| const B | p0 | ||
| ) | 
Predicated vector accumulate for tuples.
| [in] | acc | Accumulator | 
| [in] | a | First operand | 
| [in] | b | Second operand | 
| [in] | p0 | Predicate | 
| A | Accumulator datatype | 
| B | Predicate datatype | 
| E | Dadatype of tuples elements | 
| float32x4_t vmacc | ( | const float32x4_t | acc, | 
| const float32x4_t | a, | ||
| const float32x4_t | b | ||
| ) | 
Multiply accumulate (Vector * Vector)
| [in] | acc | Accumulator | 
| [in] | a | Vector | 
| [in] | b | Vector | 
| float32x4_t vmacc | ( | const float32x4_t | acc, | 
| const float32x4_t | a, | ||
| const float32x4_t | b, | ||
| const mve_pred16_t | p0 | ||
| ) | 
Multiply accumulate with predicate (Vector * Vector)
| [in] | acc | Accumulator | 
| [in] | a | Vector | 
| [in] | b | Vector | 
| [in] | p0 | Predicate | 
| float32x4_t vmacc | ( | const float32x4_t | acc, | 
| const float32x4_t | a, | ||
| const float_t | b | ||
| ) | 
Multiply accumulate (Vector * Scalar)
| [in] | acc | Accumulator | 
| [in] | a | Vector | 
| [in] | b | Scalar | 
| Q< 33, 30 > vmacc | ( | const Q< 33, 30 > | sum, | 
| const int16x8_t | vala, | ||
| const int16x8_t | valb | ||
| ) | 
Vector accumulate into scalar.
| [in] | sum | The sum | 
| [in] | vala | The vala | 
| [in] | valb | The valb | 
| A vmacc_impl | ( | const A & | acc, | 
| const V & | a, | ||
| const V & | b, | ||
| const B | p0, | ||
| std::index_sequence< Ns... > | |||
| ) | 
Predicated vector accumulate for tuple.
| [in] | acc | Accumulator | 
| [in] | a | First operand | 
| [in] | b | Second operand | 
| [in] | p0 | Predicate | 
| A | Accumulator datatype | 
| V | Vector datatype | 
| B | Predicate datatype | 
| Ns | Tuple indexes | 
| A vmacc_impl | ( | const A & | acc, | 
| const V & | a, | ||
| const V & | b, | ||
| std::index_sequence< Ns... > | |||
| ) | 
Vector accumulate for tuples of vectors.
| [in] | acc | The accumulator | 
| [in] | a | First operand | 
| [in] | b | Second operand | 
| A | Accumulator datatype | 
| V | Vector datatype | 
| Ns | Tuple index | 
| float32x4_t vmul | ( | const float | a, | 
| const float32x4_t | b | ||
| ) | 
Scalar * Vector.
| [in] | a | Scalar | 
| [in] | b | Vector | 
| float32x4_t vmul | ( | const float | a, | 
| const float32x4_t | b, | ||
| const mve_pred16_t | p0 | ||
| ) | 
Scalar * Vector with predicate.
| [in] | a | Scalar | 
| [in] | b | Vector | 
| [in] | p0 | Predicate | 
| float32x4_t vmul | ( | const float32x4_t | a, | 
| const float | b | ||
| ) | 
Vector * Scalar.
| [in] | a | Vector | 
| [in] | b | Scalar | 
| float32x4_t vmul | ( | const float32x4_t | a, | 
| const float | b, | ||
| const mve_pred16_t | p0 | ||
| ) | 
Vector * Scalar with predicate.
| [in] | a | Vector | 
| [in] | b | Scalar | 
| [in] | p0 | Predicate | 
| float32x4_t vmul | ( | const float32x4_t | a, | 
| const float32x4_t | b | ||
| ) | 
Vector * Vector.
| [in] | a | Vector | 
| [in] | b | Vector | 
| float32x4_t vmul | ( | const float32x4_t | a, | 
| const float32x4_t | b, | ||
| const mve_pred16_t | p0 | ||
| ) | 
Vector * Vector with predicate.
| [in] | a | Vector | 
| [in] | b | Vector | 
| [in] | p0 | Predicate | 
| float16x8_t vneg | ( | const float16x8_t | a | ) | 
Vector negate.
| [in] | a | Vector | 
| float16x8_t vneg | ( | const float16x8_t | a, | 
| const mve_pred16_t | p0 | ||
| ) | 
Vector negate with tail predicate.
| [in] | a | Vector | 
| [in] | p0 | Predicate | 
| float32x4_t vneg | ( | const float32x4_t | a | ) | 
Vector negate.
| [in] | a | Vector value to negate | 
| float32x4_t vneg | ( | const float32x4_t | a, | 
| const mve_pred16_t | p0 | ||
| ) | 
Vector negate with tail.
| [in] | a | Value | 
| [in] | p0 | Predicate | 
| float vreduce | ( | const float32x4_t | in | ) | 
Vector reduce.
| [in] | in | Vector | 
| Q< 33, 30 > vreduce | ( | const Q< 33, 30 > | sum | ) | 
Reduce accumulation value.
| [in] | sum | The sum | 
Since the Helium instructions can accumulate vector product into a scalar there is no need to reduce the accumulator value. It is already in scalar form.
| auto vreduce | ( | const std::tuple< E... > & | acc | ) | 
Reduce function for tuples.
| [in] | acc | The accumulator | 
| E | Datatypes for tuples | 
Some vector instructions sets cannot accumulate vectors into a scalar. They accumulate into this vector. This vector must be reduced to a scalar at the end of the accumulation loop.
| auto vreduce_impl | ( | const A & | acc, | 
| std::index_sequence< Ns... > | |||
| ) | 
Reduce function for tuple.
| [in] | acc | Accumulator | 
| A | Accumulator datatype | 
| Ns | Tuple indexes | 
Some vector instructions sets cannot accumulate vectors into a scalar. They accumulate into this vector. This vector must be reduced to a scalar at the end of the accumulation loop.
      
  | 
  inline | 
Store with stride.
| p | Store address | |
| [in] | val | Value to store | 
| S | Stride | 
| <unnamed> | Check stride value | 
      
  | 
  inline | 
Store with dynamic stride.
| p | Store address | |
| [in] | stride | Stride value | 
| [in] | val | Value to store | 
      
  | 
  inline | 
Store with stride and tail predicate.
| p | Store address | |
| [in] | val | Value to store | 
| [in] | nb | Number of remaining loop iterations | 
| [in] | p0 | Predicate for loop | 
| S | Stride | 
| <unnamed> | Check stride value | 
      
  | 
  inline | 
Store with dynamic stride.
| p | Store address | |
| [in] | stride | Stride value | 
| [in] | val | Value to store | 
| [in] | nb | Number of remaining loops | 
| [in] | p0 | Predicate for loop | 
| float32x4_t vsub | ( | const float | a, | 
| const float32x4_t | b | ||
| ) | 
Scalar - Vector.
| [in] | a | Scalar | 
| [in] | b | Vector | 
| float32x4_t vsub | ( | const float | a, | 
| const float32x4_t | b, | ||
| const mve_pred16_t | p0 | ||
| ) | 
Scalar - Vector with predicate.
| [in] | a | Scalar | 
| [in] | b | Vector | 
| [in] | p0 | predicate | 
| float32x4_t vsub | ( | const float32x4_t | a, | 
| const float | b | ||
| ) | 
Vector - Scalar.
| [in] | a | Vector | 
| [in] | b | Scalar | 
| float32x4_t vsub | ( | const float32x4_t | a, | 
| const float | b, | ||
| const mve_pred16_t | p0 | ||
| ) | 
Vector - Scalar with predicate.
| [in] | a | Vector | 
| [in] | b | Scalar | 
| [in] | p0 | predicate | 
| float32x4_t vsub | ( | const float32x4_t | a, | 
| const float32x4_t | b | ||
| ) | 
Vector - Vector.
| [in] | a | Vector | 
| [in] | b | Vector | 
| float32x4_t vsub | ( | const float32x4_t | a, | 
| const float32x4_t | b, | ||
| const mve_pred16_t | p0 | ||
| ) | 
Vector - Vector with predicate.
| [in] | a | Vector | 
| [in] | b | Vector | 
| [in] | p0 | Predicate |