Data Structures | |
| struct | vctpq< float > |
| vctpq for this datatype More... | |
| struct | vctpq< float16_t > |
| vctpq for Helium and f16 More... | |
| struct | vload1_gen_stride |
| Load with generalized stride (gather load) More... | |
| struct | vload1_gen_stride< 0, 1, 2, 3 > |
| Load with generalized stride specialized for <0,1,2,3> More... | |
| struct | vload1_gen_stride_z |
| Load with generalized stride (gather load) and tail predicate. More... | |
| struct | vload1_gen_stride_z< 0, 1, 2, 3 > |
| Load with generalized stride (gather load) and tail predicate specialized for <0,1,2,3> More... | |
| struct | vstore1_gen_stride |
| Generalized store with strides. More... | |
| struct | vstore1_gen_stride< 0, 1, 2, 3 > |
| Generalized store with stride (Specialized for <0,1,2,3>) More... | |
| struct | vstore1_gen_stride_z |
| Store with generalized strides and tail predicate. More... | |
| struct | vstore1_gen_stride_z< 0, 1, 2, 3 > |
| Scatter store with tail predicate (specialized for <0,1,2,3>) More... | |
Functions | |
| Q15DSPVector | vconst (Q15 val) |
| Vector const. | |
| Q< 33, 30 > | vreduce (const Q< 33, 30 > sum) |
| Reduce accumulation value. | |
| float32x4_t | vconst (const float v) |
| Vector constant. | |
| float32x4_t | vconst_tail (const float v, const mve_pred16_t p0) |
| Vector constant with tail. | |
| float32x4_t | vneg (const float32x4_t a) |
| Vector negate. | |
| float32x4_t | vneg (const float32x4_t a, const mve_pred16_t p0) |
| Vector negate with tail. | |
| float32x4_t | vadd (const float32x4_t a, const float32x4_t b) |
| Vector + Vector. | |
| float32x4_t | vadd (const float32x4_t a, const float b) |
| Vector + Scalar. | |
| float32x4_t | vadd (const float a, const float32x4_t b) |
| Scalar + Vector. | |
| float32x4_t | vadd (const float32x4_t a, const float32x4_t b, const mve_pred16_t p0) |
| Vector + Vector with tail. | |
| float32x4_t | vadd (const float32x4_t a, const float b, const mve_pred16_t p0) |
| Vector + scalar with tail. | |
| float32x4_t | vadd (const float a, const float32x4_t b, const mve_pred16_t p0) |
| Scalar + vector with tail predicate. | |
| float32x4_t | vsub (const float32x4_t a, const float32x4_t b) |
| Vector - Vector. | |
| float32x4_t | vsub (const float32x4_t a, const float b) |
| Vector - Scalar. | |
| float32x4_t | vsub (const float a, const float32x4_t b) |
| Scalar - Vector. | |
| float32x4_t | vsub (const float32x4_t a, const float32x4_t b, const mve_pred16_t p0) |
| Vector - Vector with predicate. | |
| float32x4_t | vsub (const float32x4_t a, const float b, const mve_pred16_t p0) |
| Vector - Scalar with predicate. | |
| float32x4_t | vsub (const float a, const float32x4_t b, const mve_pred16_t p0) |
| Scalar - Vector with predicate. | |
| float32x4_t | vmul (const float32x4_t a, const float32x4_t b) |
| Vector * Vector. | |
| float32x4_t | vmul (const float32x4_t a, const float b) |
| Vector * Scalar. | |
| float32x4_t | vmul (const float a, const float32x4_t b) |
| Scalar * Vector. | |
| float32x4_t | vmul (const float32x4_t a, const float32x4_t b, const mve_pred16_t p0) |
| Vector * Vector with predicate. | |
| float32x4_t | vmul (const float32x4_t a, const float b, const mve_pred16_t p0) |
| Vector * Scalar with predicate. | |
| float32x4_t | vmul (const float a, const float32x4_t b, const mve_pred16_t p0) |
| Scalar * Vector with predicate. | |
| float32x4_t | vmacc (const float32x4_t acc, const float32x4_t a, const float32x4_t b) |
| Multiply accumulate (Vector * Vector) | |
| float32x4_t | vmacc (const float32x4_t acc, const float32x4_t a, const float_t b) |
| Multiply accumulate (Vector * Scalar) | |
| float32x4_t | vmacc (const float32x4_t acc, const float32x4_t a, const float32x4_t b, const mve_pred16_t p0) |
| Multiply accumulate with predicate (Vector * Vector) | |
| float | vreduce (const float32x4_t in) |
| Vector reduce. | |
| template<int S, typename std::enable_if< S==1, bool >::type = true> | |
| float32x4_t | vload1 (const float32_t *p) |
| Vector load with stride. | |
| float32x4_t | vload1 (const float32_t *p, const index_t stride) |
| Vector load with dynamic stride. | |
| template<int S, typename std::enable_if< S==1, bool >::type = true> | |
| float32x4_t | vload1_z (const float32_t *p, const std::size_t nb, const mve_pred16_t p0) |
| Vector load with stride and predicate. | |
| float32x4_t | vload1_z (const float32_t *p, const index_t stride, const std::size_t nb, const mve_pred16_t p0) |
| Vector load with dynamic stride and loop predication. | |
| template<int S, typename std::enable_if< S==1, bool >::type = true> | |
| void | vstore1 (float32_t *p, const float32x4_t val) |
| Store with stride. | |
| void | vstore1 (float32_t *p, const index_t stride, const float32x4_t val) |
| Store with dynamic stride. | |
| template<int S, typename std::enable_if< S==1, bool >::type = true> | |
| void | vstore1_z (float32_t *p, const float32x4_t val, const std::size_t nb, const mve_pred16_t p0) |
| Store with stride and tail predicate. | |
| void | vstore1_z (float32_t *p, const index_t stride, const float32x4_t val, const std::size_t nb, const mve_pred16_t p0) |
| Store with dynamic stride. | |
| float16x8_t | vconst (float16_t v) |
| Vector const. | |
| float16x8_t | vconst_tail (const float16_t v, const mve_pred16_t p0) |
| Vector of const with tail predicate. | |
| float16x8_t | vneg (const float16x8_t a) |
| Vector negate. | |
| float16x8_t | vneg (const float16x8_t a, const mve_pred16_t p0) |
| Vector negate with tail predicate. | |
| float16x8_t | vadd (const float16x8_t a, const float16x8_t b) |
| Vector + Vector. | |
| float16x8_t | vadd (const float16x8_t a, const float16_t b) |
| Vector + Scalar. | |
| float16x8_t | vadd (const float16_t a, const float16x8_t b) |
| Scalar + Vector. | |
| float16x8_t | vadd (const float16x8_t a, const float16x8_t b, const mve_pred16_t p0) |
| Vector + Vector with tail predicate. | |
| float16x8_t | vadd (const float16x8_t a, const float16_t b, const mve_pred16_t p0) |
| Vector + Scalar with tail predicate. | |
| float16x8_t | vadd (const float16_t a, const float16x8_t b, const mve_pred16_t p0) |
| Scalar + Vector with tail predicate. | |
| template<int S, typename std::enable_if< S==1, bool >::type = true> | |
| int16x8_t | vload1 (const Q15 *p) |
| Vector load with stride. | |
| Q< 33, 30 > | vmacc (const Q< 33, 30 > sum, const int16x8_t vala, const int16x8_t valb) |
| Vector accumulate into scalar. | |
| double | from_accumulator (const double a) |
| Convert from accumulator representation. | |
| double | mac (const double acc, const double a, const double b) |
| Multiply and accumulate for this datatype. | |
| void | accumulate (double &a, const double &b) |
| Accumulate. | |
| double | mult (double &a, const double &b) |
| Multiply. | |
| float | from_accumulator (const float a) |
| Convert from accumulator representtaion. | |
| float | mac (const float acc, const float a, const float b) |
| Scalar multiply and accumulate. | |
| void | accumulate (float &a, const float &b) |
| Scalar accumulate. | |
| float | mult (float &a, const float &b) |
| Scalar multiply. | |
| template<typename A , typename V , std::size_t... Ns> | |
| A | vmacc_impl (const A &acc, const V &a, const V &b, std::index_sequence< Ns... >) |
| Vector accumulate for tuples of vectors. | |
| template<typename A , typename ... E> | |
| A | vmacc (const A &acc, const std::tuple< E... > &a, const std::tuple< E... > &b) |
| Vector accumulate for tuples of vectors. | |
| template<typename A , typename V , typename B , std::size_t... Ns> | |
| A | vmacc_impl (const A &acc, const V &a, const V &b, const B p0, std::index_sequence< Ns... >) |
| Predicated vector accumulate for tuple. | |
| template<typename A , typename B , typename ... E> | |
| A | vmacc (const A &acc, const std::tuple< E... > &a, const std::tuple< E... > &b, const B p0) |
| Predicated vector accumulate for tuples. | |
| template<typename A , std::size_t... Ns> | |
| auto | vreduce_impl (const A &acc, std::index_sequence< Ns... >) |
| Reduce function for tuple. | |
| template<typename ... E> | |
| auto | vreduce (const std::tuple< E... > &acc) |
| Reduce function for tuples. | |
| template<typename A , std::size_t... Ns> | |
| auto | from_accumulator_impl (const A &acc, std::index_sequence< Ns... >) |
| Convert from accumulator value. | |
| template<typename ... E> | |
| auto | from_accumulator (const std::tuple< E... > &acc) |
| Convert from tuple of accumulator values. | |
| template<typename A , typename V , std::size_t... Ns> | |
| A | mac_impl (const A &acc, const V &a, const V &b, std::index_sequence< Ns... >) |
| Multiply accumulate for tuple of scalar. | |
| template<typename A , typename ... E> | |
| A | mac (const A &acc, const std::tuple< E... > &a, const std::tuple< E... > &b) |
| Multiply accumulate. | |
| template<typename A , typename V , typename B , std::size_t... Ns> | |
| A | mac_impl (const A &acc, const V &a, const V &b, const B p0, std::index_sequence< Ns... >) |
| Multiply accumulate for tuple of scalar. | |
| template<typename A , typename B , typename ... E> | |
| A | mac (const A &acc, const std::tuple< E... > &a, const std::tuple< E... > &b, const B p0) |
| Multiply accumulate. | |
| float16_t | from_accumulator (const float16_t a) |
| Convert from accumulator datatype. | |
| float16_t | mac (const float16_t acc, const float16_t a, const float16_t b) |
| Multiply and accumulate. | |
| void | accumulate (float16_t &a, const float16_t &b) |
| Accumulate. | |
| float16_t | mult (float16_t &a, const float16_t &b) |
| Multiply. | |
| Q15 | from_accumulator (const Q< 33, 30 > a) |
| Convert from accumulator type. | |
| Q< 33, 30 > | mac (const Q< 33, 30 > acc, const Q15 a, const Q15 b) |
| Multiply and accumulate. | |
| Q31 | from_accumulator (const Q< 15, 48 > a) |
| Convert from accumulator (with no saturation) | |
| Q< 15, 48 > | mac (const Q< 15, 48 > acc, const Q31 a, const Q31 b) |
| Multiply and accumulate. | |
| Q7 | from_accumulator (const Q< 17, 14 > a) |
| Convert from accumulator with saturation. | |
| Q< 17, 14 > | mac (const Q< 17, 14 > acc, const Q7 a, const Q7 b) |
| Multiply and accumulate. | |
Inner implementation of Helium intrinsics
Inner implementation of generic intrinsics
| void accumulate | ( | double & | a, |
| const double & | b | ||
| ) |
Accumulate.
| a | Accumulator | |
| [in] | b | VAlue to be added |
| void accumulate | ( | float & | a, |
| const float & | b | ||
| ) |
Scalar accumulate.
| a | Accumulator | |
| [in] | b | Operand |
| void accumulate | ( | float16_t & | a, |
| const float16_t & | b | ||
| ) |
Accumulate.
| a | Accumulator | |
| [in] | b | Value to accumulate |
| double from_accumulator | ( | const double | a | ) |
Convert from accumulator representation.
| [in] | a | Value |
| float from_accumulator | ( | const float | a | ) |
Convert from accumulator representtaion.
| [in] | a | Value |
| float16_t from_accumulator | ( | const float16_t | a | ) |
Convert from accumulator datatype.
| [in] | a | Value |
| Q31 from_accumulator | ( | const Q< 15, 48 > | a | ) |
Convert from accumulator (with no saturation)
| [in] | a | Accumulator value |
| Q7 from_accumulator | ( | const Q< 17, 14 > | a | ) |
Convert from accumulator with saturation.
| [in] | a | Accumulator value |
| Q15 from_accumulator | ( | const Q< 33, 30 > | a | ) |
Convert from accumulator type.
| [in] | a | The accumulator value |
| auto from_accumulator | ( | const std::tuple< E... > & | acc | ) |
Convert from tuple of accumulator values.
| [in] | acc | Accumulator |
| E | Datatypes for tuple |
Accumulator may use more bits to avoid saturations. At the end of the accumulation, the final result must be converted to the current datatype (it may implies saturation)
| auto from_accumulator_impl | ( | const A & | acc, |
| std::index_sequence< Ns... > | |||
| ) |
Convert from accumulator value.
| [in] | acc | The accumulator |
| A | Accumulator datatype |
| Ns | Tuples indexes |
| A mac | ( | const A & | acc, |
| const std::tuple< E... > & | a, | ||
| const std::tuple< E... > & | b | ||
| ) |
Multiply accumulate.
| [in] | acc | Accumulator |
| [in] | a | First operand |
| [in] | b | Second operand |
| A | Accumulator datatype |
| E | Datatypes for tuple |
| A mac | ( | const A & | acc, |
| const std::tuple< E... > & | a, | ||
| const std::tuple< E... > & | b, | ||
| const B | p0 | ||
| ) |
Multiply accumulate.
| [in] | acc | Accumulator |
| [in] | a | First operand |
| [in] | b | Second operand |
| [in] | p0 | Predicate |
| A | Accumulator datatype |
| B | Predicate datatype |
| E | Datatypes for tuple |
| double mac | ( | const double | acc, |
| const double | a, | ||
| const double | b | ||
| ) |
Multiply and accumulate for this datatype.
| [in] | acc | The accumulated value |
| [in] | a | The left hand side |
| [in] | b | The right hand side |
| float mac | ( | const float | acc, |
| const float | a, | ||
| const float | b | ||
| ) |
Scalar multiply and accumulate.
| [in] | acc | Accumulator |
| [in] | a | Operand |
| [in] | b | Operand |
| float16_t mac | ( | const float16_t | acc, |
| const float16_t | a, | ||
| const float16_t | b | ||
| ) |
Multiply and accumulate.
| [in] | acc | Accumulator |
| [in] | a | First operand |
| [in] | b | Second operand |
| Q< 15, 48 > mac | ( | const Q< 15, 48 > | acc, |
| const Q31 | a, | ||
| const Q31 | b | ||
| ) |
Multiply and accumulate.
| [in] | acc | Accumulator |
| [in] | a | First operand |
| [in] | b | Second operand |
| Q< 17, 14 > mac | ( | const Q< 17, 14 > | acc, |
| const Q7 | a, | ||
| const Q7 | b | ||
| ) |
Multiply and accumulate.
| [in] | acc | Accumulator |
| [in] | a | First operand |
| [in] | b | Second operand |
| Q< 33, 30 > mac | ( | const Q< 33, 30 > | acc, |
| const Q15 | a, | ||
| const Q15 | b | ||
| ) |
Multiply and accumulate.
| [in] | acc | Accumulator |
| [in] | a | First operand |
| [in] | b | Second operand |
| A mac_impl | ( | const A & | acc, |
| const V & | a, | ||
| const V & | b, | ||
| const B | p0, | ||
| std::index_sequence< Ns... > | |||
| ) |
Multiply accumulate for tuple of scalar.
| [in] | acc | Accumulator |
| [in] | a | First operand |
| [in] | b | Second operand |
| [in] | p0 | Predicate |
| A | Accumulator datatype |
| V | Scalar datatype |
| B | Predicate datatype |
| Ns | Tuple indexes |
| A mac_impl | ( | const A & | acc, |
| const V & | a, | ||
| const V & | b, | ||
| std::index_sequence< Ns... > | |||
| ) |
Multiply accumulate for tuple of scalar.
| [in] | acc | Accumulator |
| [in] | a | First operand |
| [in] | b | Second operand |
| A | Accumulator datatype |
| V | Scalar datatype |
| Ns | Tuple indexes |
| double mult | ( | double & | a, |
| const double & | b | ||
| ) |
Multiply.
| a | Left hand side | |
| [in] | b | Right hand side |
| float mult | ( | float & | a, |
| const float & | b | ||
| ) |
Scalar multiply.
| a | Operand | |
| [in] | b | Operand |
| float16_t mult | ( | float16_t & | a, |
| const float16_t & | b | ||
| ) |
Multiply.
| a | First operand | |
| [in] | b | Second operand |
| float32x4_t vadd | ( | const float | a, |
| const float32x4_t | b | ||
| ) |
Scalar + Vector.
| [in] | a | Scalar |
| [in] | b | Vector |
| float32x4_t vadd | ( | const float | a, |
| const float32x4_t | b, | ||
| const mve_pred16_t | p0 | ||
| ) |
Scalar + vector with tail predicate.
| [in] | a | Scalar |
| [in] | b | Vector |
| [in] | p0 | Predicate |
| float16x8_t vadd | ( | const float16_t | a, |
| const float16x8_t | b | ||
| ) |
Scalar + Vector.
| [in] | a | Scalar |
| [in] | b | Vector |
| float16x8_t vadd | ( | const float16_t | a, |
| const float16x8_t | b, | ||
| const mve_pred16_t | p0 | ||
| ) |
Scalar + Vector with tail predicate.
| [in] | a | Scalar |
| [in] | b | Vector |
| [in] | p0 | Predicate |
| float16x8_t vadd | ( | const float16x8_t | a, |
| const float16_t | b | ||
| ) |
Vector + Scalar.
| [in] | a | Vector |
| [in] | b | Scalar |
| float16x8_t vadd | ( | const float16x8_t | a, |
| const float16_t | b, | ||
| const mve_pred16_t | p0 | ||
| ) |
Vector + Scalar with tail predicate.
| [in] | a | Vector |
| [in] | b | Scalar |
| [in] | p0 | Predicate |
| float16x8_t vadd | ( | const float16x8_t | a, |
| const float16x8_t | b | ||
| ) |
Vector + Vector.
| [in] | a | Vector |
| [in] | b | Vector |
| float16x8_t vadd | ( | const float16x8_t | a, |
| const float16x8_t | b, | ||
| const mve_pred16_t | p0 | ||
| ) |
Vector + Vector with tail predicate.
| [in] | a | Vector |
| [in] | b | Vector |
| [in] | p0 | predicate |
| float32x4_t vadd | ( | const float32x4_t | a, |
| const float | b | ||
| ) |
Vector + Scalar.
| [in] | a | Vector |
| [in] | b | Scalar |
| float32x4_t vadd | ( | const float32x4_t | a, |
| const float | b, | ||
| const mve_pred16_t | p0 | ||
| ) |
Vector + scalar with tail.
| [in] | a | Vector |
| [in] | b | Scalar |
| [in] | p0 | Predicate |
| float32x4_t vadd | ( | const float32x4_t | a, |
| const float32x4_t | b | ||
| ) |
Vector + Vector.
| [in] | a | First operand |
| [in] | b | Second operand |
| float32x4_t vadd | ( | const float32x4_t | a, |
| const float32x4_t | b, | ||
| const mve_pred16_t | p0 | ||
| ) |
Vector + Vector with tail.
| [in] | a | Vector |
| [in] | b | Vector |
| [in] | p0 | Predicated |
| float32x4_t vconst | ( | const float | v | ) |
Vector constant.
| [in] | v | Constant value |
| float16x8_t vconst | ( | float16_t | v | ) |
Vector const.
| [in] | v | Initialization value |
| int16x8_t vconst | ( | Q15 | val | ) |
Vector const.
| [in] | val | The value |
| float32x4_t vconst_tail | ( | const float | v, |
| const mve_pred16_t | p0 | ||
| ) |
Vector constant with tail.
| [in] | v | Constant value |
| [in] | p0 | Prddicate |
| float16x8_t vconst_tail | ( | const float16_t | v, |
| const mve_pred16_t | p0 | ||
| ) |
Vector of const with tail predicate.
| [in] | v | The initialization parameter |
| [in] | p0 | The predicate |
|
inline |
Vector load with stride.
| [in] | p | Load address |
| S | Stride |
| <unnamed> | Check stride value |
|
inline |
Vector load with dynamic stride.
| [in] | p | Load address |
| [in] | stride | Stride value |
|
inline |
Vector load with stride.
| [in] | p | Load address |
| S | Stride |
| <unnamed> | Stride check |
In q15, a lane is on 16 bits. So the offset that can be encoded for gather load cannot be bigger than 65535. With a stride of S, the bigger offset is S*7. So S must be <= 65535/7 S <= 9362
For higher stride, the Helium instruction cannot be used and instead a dynamic stride is used.
|
inline |
Vector load with dynamic stride and loop predication.
| [in] | p | Load address |
| [in] | stride | Stride value |
| [in] | nb | Number of remaining loop samples |
| [in] | p0 | Predicate for remaining loop samples |
|
inline |
Vector load with stride and predicate.
| [in] | p | Load address |
| [in] | nb | Number of remaining loop samples |
| [in] | p0 | Predicate for remaining loop samples |
| S | Stride |
| <unnamed> | Check stride value |
| A vmacc | ( | const A & | acc, |
| const std::tuple< E... > & | a, | ||
| const std::tuple< E... > & | b | ||
| ) |
Vector accumulate for tuples of vectors.
| [in] | acc | The accumulator |
| [in] | a | First operand |
| [in] | b | Second operand |
| A | Accumulator datatype |
| E | Datatype of tuples elements |
| A vmacc | ( | const A & | acc, |
| const std::tuple< E... > & | a, | ||
| const std::tuple< E... > & | b, | ||
| const B | p0 | ||
| ) |
Predicated vector accumulate for tuples.
| [in] | acc | Accumulator |
| [in] | a | First operand |
| [in] | b | Second operand |
| [in] | p0 | Predicate |
| A | Accumulator datatype |
| B | Predicate datatype |
| E | Dadatype of tuples elements |
| float32x4_t vmacc | ( | const float32x4_t | acc, |
| const float32x4_t | a, | ||
| const float32x4_t | b | ||
| ) |
Multiply accumulate (Vector * Vector)
| [in] | acc | Accumulator |
| [in] | a | Vector |
| [in] | b | Vector |
| float32x4_t vmacc | ( | const float32x4_t | acc, |
| const float32x4_t | a, | ||
| const float32x4_t | b, | ||
| const mve_pred16_t | p0 | ||
| ) |
Multiply accumulate with predicate (Vector * Vector)
| [in] | acc | Accumulator |
| [in] | a | Vector |
| [in] | b | Vector |
| [in] | p0 | Predicate |
| float32x4_t vmacc | ( | const float32x4_t | acc, |
| const float32x4_t | a, | ||
| const float_t | b | ||
| ) |
Multiply accumulate (Vector * Scalar)
| [in] | acc | Accumulator |
| [in] | a | Vector |
| [in] | b | Scalar |
| Q< 33, 30 > vmacc | ( | const Q< 33, 30 > | sum, |
| const int16x8_t | vala, | ||
| const int16x8_t | valb | ||
| ) |
Vector accumulate into scalar.
| [in] | sum | The sum |
| [in] | vala | The vala |
| [in] | valb | The valb |
| A vmacc_impl | ( | const A & | acc, |
| const V & | a, | ||
| const V & | b, | ||
| const B | p0, | ||
| std::index_sequence< Ns... > | |||
| ) |
Predicated vector accumulate for tuple.
| [in] | acc | Accumulator |
| [in] | a | First operand |
| [in] | b | Second operand |
| [in] | p0 | Predicate |
| A | Accumulator datatype |
| V | Vector datatype |
| B | Predicate datatype |
| Ns | Tuple indexes |
| A vmacc_impl | ( | const A & | acc, |
| const V & | a, | ||
| const V & | b, | ||
| std::index_sequence< Ns... > | |||
| ) |
Vector accumulate for tuples of vectors.
| [in] | acc | The accumulator |
| [in] | a | First operand |
| [in] | b | Second operand |
| A | Accumulator datatype |
| V | Vector datatype |
| Ns | Tuple index |
| float32x4_t vmul | ( | const float | a, |
| const float32x4_t | b | ||
| ) |
Scalar * Vector.
| [in] | a | Scalar |
| [in] | b | Vector |
| float32x4_t vmul | ( | const float | a, |
| const float32x4_t | b, | ||
| const mve_pred16_t | p0 | ||
| ) |
Scalar * Vector with predicate.
| [in] | a | Scalar |
| [in] | b | Vector |
| [in] | p0 | Predicate |
| float32x4_t vmul | ( | const float32x4_t | a, |
| const float | b | ||
| ) |
Vector * Scalar.
| [in] | a | Vector |
| [in] | b | Scalar |
| float32x4_t vmul | ( | const float32x4_t | a, |
| const float | b, | ||
| const mve_pred16_t | p0 | ||
| ) |
Vector * Scalar with predicate.
| [in] | a | Vector |
| [in] | b | Scalar |
| [in] | p0 | Predicate |
| float32x4_t vmul | ( | const float32x4_t | a, |
| const float32x4_t | b | ||
| ) |
Vector * Vector.
| [in] | a | Vector |
| [in] | b | Vector |
| float32x4_t vmul | ( | const float32x4_t | a, |
| const float32x4_t | b, | ||
| const mve_pred16_t | p0 | ||
| ) |
Vector * Vector with predicate.
| [in] | a | Vector |
| [in] | b | Vector |
| [in] | p0 | Predicate |
| float16x8_t vneg | ( | const float16x8_t | a | ) |
Vector negate.
| [in] | a | Vector |
| float16x8_t vneg | ( | const float16x8_t | a, |
| const mve_pred16_t | p0 | ||
| ) |
Vector negate with tail predicate.
| [in] | a | Vector |
| [in] | p0 | Predicate |
| float32x4_t vneg | ( | const float32x4_t | a | ) |
Vector negate.
| [in] | a | Vector value to negate |
| float32x4_t vneg | ( | const float32x4_t | a, |
| const mve_pred16_t | p0 | ||
| ) |
Vector negate with tail.
| [in] | a | Value |
| [in] | p0 | Predicate |
| float vreduce | ( | const float32x4_t | in | ) |
Vector reduce.
| [in] | in | Vector |
| Q< 33, 30 > vreduce | ( | const Q< 33, 30 > | sum | ) |
Reduce accumulation value.
| [in] | sum | The sum |
Since the Helium instructions can accumulate vector product into a scalar there is no need to reduce the accumulator value. It is already in scalar form.
| auto vreduce | ( | const std::tuple< E... > & | acc | ) |
Reduce function for tuples.
| [in] | acc | The accumulator |
| E | Datatypes for tuples |
Some vector instructions sets cannot accumulate vectors into a scalar. They accumulate into this vector. This vector must be reduced to a scalar at the end of the accumulation loop.
| auto vreduce_impl | ( | const A & | acc, |
| std::index_sequence< Ns... > | |||
| ) |
Reduce function for tuple.
| [in] | acc | Accumulator |
| A | Accumulator datatype |
| Ns | Tuple indexes |
Some vector instructions sets cannot accumulate vectors into a scalar. They accumulate into this vector. This vector must be reduced to a scalar at the end of the accumulation loop.
|
inline |
Store with stride.
| p | Store address | |
| [in] | val | Value to store |
| S | Stride |
| <unnamed> | Check stride value |
|
inline |
Store with dynamic stride.
| p | Store address | |
| [in] | stride | Stride value |
| [in] | val | Value to store |
|
inline |
Store with stride and tail predicate.
| p | Store address | |
| [in] | val | Value to store |
| [in] | nb | Number of remaining loop iterations |
| [in] | p0 | Predicate for loop |
| S | Stride |
| <unnamed> | Check stride value |
|
inline |
Store with dynamic stride.
| p | Store address | |
| [in] | stride | Stride value |
| [in] | val | Value to store |
| [in] | nb | Number of remaining loops |
| [in] | p0 | Predicate for loop |
| float32x4_t vsub | ( | const float | a, |
| const float32x4_t | b | ||
| ) |
Scalar - Vector.
| [in] | a | Scalar |
| [in] | b | Vector |
| float32x4_t vsub | ( | const float | a, |
| const float32x4_t | b, | ||
| const mve_pred16_t | p0 | ||
| ) |
Scalar - Vector with predicate.
| [in] | a | Scalar |
| [in] | b | Vector |
| [in] | p0 | predicate |
| float32x4_t vsub | ( | const float32x4_t | a, |
| const float | b | ||
| ) |
Vector - Scalar.
| [in] | a | Vector |
| [in] | b | Scalar |
| float32x4_t vsub | ( | const float32x4_t | a, |
| const float | b, | ||
| const mve_pred16_t | p0 | ||
| ) |
Vector - Scalar with predicate.
| [in] | a | Vector |
| [in] | b | Scalar |
| [in] | p0 | predicate |
| float32x4_t vsub | ( | const float32x4_t | a, |
| const float32x4_t | b | ||
| ) |
Vector - Vector.
| [in] | a | Vector |
| [in] | b | Vector |
| float32x4_t vsub | ( | const float32x4_t | a, |
| const float32x4_t | b, | ||
| const mve_pred16_t | p0 | ||
| ) |
Vector - Vector with predicate.
| [in] | a | Vector |
| [in] | b | Vector |
| [in] | p0 | Predicate |