Data Structures | |
struct | vctpq< float > |
vctpq for this datatype More... | |
struct | vctpq< float16_t > |
vctpq for Helium and f16 More... | |
struct | vload1_gen_stride |
Load with generalized stride (gather load) More... | |
struct | vload1_gen_stride< 0, 1, 2, 3 > |
Load with generalized stride specialized for <0,1,2,3> More... | |
struct | vload1_gen_stride_z |
Load with generalized stride (gather load) and tail predicate. More... | |
struct | vload1_gen_stride_z< 0, 1, 2, 3 > |
Load with generalized stride (gather load) and tail predicate specialized for <0,1,2,3> More... | |
struct | vstore1_gen_stride |
Generalized store with strides. More... | |
struct | vstore1_gen_stride< 0, 1, 2, 3 > |
Generalized store with stride (Specialized for <0,1,2,3>) More... | |
struct | vstore1_gen_stride_z |
Store with generalized strides and tail predicate. More... | |
struct | vstore1_gen_stride_z< 0, 1, 2, 3 > |
Scatter store with tail predicate (specialized for <0,1,2,3>) More... | |
Functions | |
Q15DSPVector | vconst (Q15 val) |
Vector const. | |
Q< 33, 30 > | vreduce (const Q< 33, 30 > sum) |
Reduce accumulation value. | |
float32x4_t | vconst (const float v) |
Vector constant. | |
float32x4_t | vconst_tail (const float v, const mve_pred16_t p0) |
Vector constant with tail. | |
float32x4_t | vneg (const float32x4_t a) |
Vector negate. | |
float32x4_t | vneg (const float32x4_t a, const mve_pred16_t p0) |
Vector negate with tail. | |
float32x4_t | vadd (const float32x4_t a, const float32x4_t b) |
Vector + Vector. | |
float32x4_t | vadd (const float32x4_t a, const float b) |
Vector + Scalar. | |
float32x4_t | vadd (const float a, const float32x4_t b) |
Scalar + Vector. | |
float32x4_t | vadd (const float32x4_t a, const float32x4_t b, const mve_pred16_t p0) |
Vector + Vector with tail. | |
float32x4_t | vadd (const float32x4_t a, const float b, const mve_pred16_t p0) |
Vector + scalar with tail. | |
float32x4_t | vadd (const float a, const float32x4_t b, const mve_pred16_t p0) |
Scalar + vector with tail predicate. | |
float32x4_t | vsub (const float32x4_t a, const float32x4_t b) |
Vector - Vector. | |
float32x4_t | vsub (const float32x4_t a, const float b) |
Vector - Scalar. | |
float32x4_t | vsub (const float a, const float32x4_t b) |
Scalar - Vector. | |
float32x4_t | vsub (const float32x4_t a, const float32x4_t b, const mve_pred16_t p0) |
Vector - Vector with predicate. | |
float32x4_t | vsub (const float32x4_t a, const float b, const mve_pred16_t p0) |
Vector - Scalar with predicate. | |
float32x4_t | vsub (const float a, const float32x4_t b, const mve_pred16_t p0) |
Scalar - Vector with predicate. | |
float32x4_t | vmul (const float32x4_t a, const float32x4_t b) |
Vector * Vector. | |
float32x4_t | vmul (const float32x4_t a, const float b) |
Vector * Scalar. | |
float32x4_t | vmul (const float a, const float32x4_t b) |
Scalar * Vector. | |
float32x4_t | vmul (const float32x4_t a, const float32x4_t b, const mve_pred16_t p0) |
Vector * Vector with predicate. | |
float32x4_t | vmul (const float32x4_t a, const float b, const mve_pred16_t p0) |
Vector * Scalar with predicate. | |
float32x4_t | vmul (const float a, const float32x4_t b, const mve_pred16_t p0) |
Scalar * Vector with predicate. | |
float32x4_t | vmacc (const float32x4_t acc, const float32x4_t a, const float32x4_t b) |
Multiply accumulate (Vector * Vector) | |
float32x4_t | vmacc (const float32x4_t acc, const float32x4_t a, const float_t b) |
Multiply accumulate (Vector * Scalar) | |
float32x4_t | vmacc (const float32x4_t acc, const float32x4_t a, const float32x4_t b, const mve_pred16_t p0) |
Multiply accumulate with predicate (Vector * Vector) | |
float | vreduce (const float32x4_t in) |
Vector reduce. | |
template<int S, typename std::enable_if< S==1, bool >::type = true> | |
float32x4_t | vload1 (const float32_t *p) |
Vector load with stride. | |
float32x4_t | vload1 (const float32_t *p, const index_t stride) |
Vector load with dynamic stride. | |
template<int S, typename std::enable_if< S==1, bool >::type = true> | |
float32x4_t | vload1_z (const float32_t *p, const std::size_t nb, const mve_pred16_t p0) |
Vector load with stride and predicate. | |
float32x4_t | vload1_z (const float32_t *p, const index_t stride, const std::size_t nb, const mve_pred16_t p0) |
Vector load with dynamic stride and loop predication. | |
template<int S, typename std::enable_if< S==1, bool >::type = true> | |
void | vstore1 (float32_t *p, const float32x4_t val) |
Store with stride. | |
void | vstore1 (float32_t *p, const index_t stride, const float32x4_t val) |
Store with dynamic stride. | |
template<int S, typename std::enable_if< S==1, bool >::type = true> | |
void | vstore1_z (float32_t *p, const float32x4_t val, const std::size_t nb, const mve_pred16_t p0) |
Store with stride and tail predicate. | |
void | vstore1_z (float32_t *p, const index_t stride, const float32x4_t val, const std::size_t nb, const mve_pred16_t p0) |
Store with dynamic stride. | |
float16x8_t | vconst (float16_t v) |
Vector const. | |
float16x8_t | vconst_tail (const float16_t v, const mve_pred16_t p0) |
Vector of const with tail predicate. | |
float16x8_t | vneg (const float16x8_t a) |
Vector negate. | |
float16x8_t | vneg (const float16x8_t a, const mve_pred16_t p0) |
Vector negate with tail predicate. | |
float16x8_t | vadd (const float16x8_t a, const float16x8_t b) |
Vector + Vector. | |
float16x8_t | vadd (const float16x8_t a, const float16_t b) |
Vector + Scalar. | |
float16x8_t | vadd (const float16_t a, const float16x8_t b) |
Scalar + Vector. | |
float16x8_t | vadd (const float16x8_t a, const float16x8_t b, const mve_pred16_t p0) |
Vector + Vector with tail predicate. | |
float16x8_t | vadd (const float16x8_t a, const float16_t b, const mve_pred16_t p0) |
Vector + Scalar with tail predicate. | |
float16x8_t | vadd (const float16_t a, const float16x8_t b, const mve_pred16_t p0) |
Scalar + Vector with tail predicate. | |
template<int S, typename std::enable_if< S==1, bool >::type = true> | |
int16x8_t | vload1 (const Q15 *p) |
Vector load with stride. | |
Q< 33, 30 > | vmacc (const Q< 33, 30 > sum, const int16x8_t vala, const int16x8_t valb) |
Vector accumulate into scalar. | |
double | from_accumulator (const double a) |
Convert from accumulator representation. | |
double | mac (const double acc, const double a, const double b) |
Multiply and accumulate for this datatype. | |
void | accumulate (double &a, const double &b) |
Accumulate. | |
double | mult (double &a, const double &b) |
Multiply. | |
float | from_accumulator (const float a) |
Convert from accumulator representtaion. | |
float | mac (const float acc, const float a, const float b) |
Scalar multiply and accumulate. | |
void | accumulate (float &a, const float &b) |
Scalar accumulate. | |
float | mult (float &a, const float &b) |
Scalar multiply. | |
template<typename A , typename V , std::size_t... Ns> | |
A | vmacc_impl (const A &acc, const V &a, const V &b, std::index_sequence< Ns... >) |
Vector accumulate for tuples of vectors. | |
template<typename A , typename ... E> | |
A | vmacc (const A &acc, const std::tuple< E... > &a, const std::tuple< E... > &b) |
Vector accumulate for tuples of vectors. | |
template<typename A , typename V , typename B , std::size_t... Ns> | |
A | vmacc_impl (const A &acc, const V &a, const V &b, const B p0, std::index_sequence< Ns... >) |
Predicated vector accumulate for tuple. | |
template<typename A , typename B , typename ... E> | |
A | vmacc (const A &acc, const std::tuple< E... > &a, const std::tuple< E... > &b, const B p0) |
Predicated vector accumulate for tuples. | |
template<typename A , std::size_t... Ns> | |
auto | vreduce_impl (const A &acc, std::index_sequence< Ns... >) |
Reduce function for tuple. | |
template<typename ... E> | |
auto | vreduce (const std::tuple< E... > &acc) |
Reduce function for tuples. | |
template<typename A , std::size_t... Ns> | |
auto | from_accumulator_impl (const A &acc, std::index_sequence< Ns... >) |
Convert from accumulator value. | |
template<typename ... E> | |
auto | from_accumulator (const std::tuple< E... > &acc) |
Convert from tuple of accumulator values. | |
template<typename A , typename V , std::size_t... Ns> | |
A | mac_impl (const A &acc, const V &a, const V &b, std::index_sequence< Ns... >) |
Multiply accumulate for tuple of scalar. | |
template<typename A , typename ... E> | |
A | mac (const A &acc, const std::tuple< E... > &a, const std::tuple< E... > &b) |
Multiply accumulate. | |
template<typename A , typename V , typename B , std::size_t... Ns> | |
A | mac_impl (const A &acc, const V &a, const V &b, const B p0, std::index_sequence< Ns... >) |
Multiply accumulate for tuple of scalar. | |
template<typename A , typename B , typename ... E> | |
A | mac (const A &acc, const std::tuple< E... > &a, const std::tuple< E... > &b, const B p0) |
Multiply accumulate. | |
float16_t | from_accumulator (const float16_t a) |
Convert from accumulator datatype. | |
float16_t | mac (const float16_t acc, const float16_t a, const float16_t b) |
Multiply and accumulate. | |
void | accumulate (float16_t &a, const float16_t &b) |
Accumulate. | |
float16_t | mult (float16_t &a, const float16_t &b) |
Multiply. | |
Q15 | from_accumulator (const Q< 33, 30 > a) |
Convert from accumulator type. | |
Q< 33, 30 > | mac (const Q< 33, 30 > acc, const Q15 a, const Q15 b) |
Multiply and accumulate. | |
Q31 | from_accumulator (const Q< 15, 48 > a) |
Convert from accumulator (with no saturation) | |
Q< 15, 48 > | mac (const Q< 15, 48 > acc, const Q31 a, const Q31 b) |
Multiply and accumulate. | |
Q7 | from_accumulator (const Q< 17, 14 > a) |
Convert from accumulator with saturation. | |
Q< 17, 14 > | mac (const Q< 17, 14 > acc, const Q7 a, const Q7 b) |
Multiply and accumulate. | |
Inner implementation of Helium intrinsics
Inner implementation of generic intrinsics
void accumulate | ( | double & | a, |
const double & | b | ||
) |
Accumulate.
a | Accumulator | |
[in] | b | VAlue to be added |
void accumulate | ( | float & | a, |
const float & | b | ||
) |
Scalar accumulate.
a | Accumulator | |
[in] | b | Operand |
void accumulate | ( | float16_t & | a, |
const float16_t & | b | ||
) |
Accumulate.
a | Accumulator | |
[in] | b | Value to accumulate |
double from_accumulator | ( | const double | a | ) |
Convert from accumulator representation.
[in] | a | Value |
float from_accumulator | ( | const float | a | ) |
Convert from accumulator representtaion.
[in] | a | Value |
float16_t from_accumulator | ( | const float16_t | a | ) |
Convert from accumulator datatype.
[in] | a | Value |
Q31 from_accumulator | ( | const Q< 15, 48 > | a | ) |
Convert from accumulator (with no saturation)
[in] | a | Accumulator value |
Q7 from_accumulator | ( | const Q< 17, 14 > | a | ) |
Convert from accumulator with saturation.
[in] | a | Accumulator value |
Q15 from_accumulator | ( | const Q< 33, 30 > | a | ) |
Convert from accumulator type.
[in] | a | The accumulator value |
auto from_accumulator | ( | const std::tuple< E... > & | acc | ) |
Convert from tuple of accumulator values.
[in] | acc | Accumulator |
E | Datatypes for tuple |
Accumulator may use more bits to avoid saturations. At the end of the accumulation, the final result must be converted to the current datatype (it may implies saturation)
auto from_accumulator_impl | ( | const A & | acc, |
std::index_sequence< Ns... > | |||
) |
Convert from accumulator value.
[in] | acc | The accumulator |
A | Accumulator datatype |
Ns | Tuples indexes |
A mac | ( | const A & | acc, |
const std::tuple< E... > & | a, | ||
const std::tuple< E... > & | b | ||
) |
Multiply accumulate.
[in] | acc | Accumulator |
[in] | a | First operand |
[in] | b | Second operand |
A | Accumulator datatype |
E | Datatypes for tuple |
A mac | ( | const A & | acc, |
const std::tuple< E... > & | a, | ||
const std::tuple< E... > & | b, | ||
const B | p0 | ||
) |
Multiply accumulate.
[in] | acc | Accumulator |
[in] | a | First operand |
[in] | b | Second operand |
[in] | p0 | Predicate |
A | Accumulator datatype |
B | Predicate datatype |
E | Datatypes for tuple |
double mac | ( | const double | acc, |
const double | a, | ||
const double | b | ||
) |
Multiply and accumulate for this datatype.
[in] | acc | The accumulated value |
[in] | a | The left hand side |
[in] | b | The right hand side |
float mac | ( | const float | acc, |
const float | a, | ||
const float | b | ||
) |
Scalar multiply and accumulate.
[in] | acc | Accumulator |
[in] | a | Operand |
[in] | b | Operand |
float16_t mac | ( | const float16_t | acc, |
const float16_t | a, | ||
const float16_t | b | ||
) |
Multiply and accumulate.
[in] | acc | Accumulator |
[in] | a | First operand |
[in] | b | Second operand |
Q< 15, 48 > mac | ( | const Q< 15, 48 > | acc, |
const Q31 | a, | ||
const Q31 | b | ||
) |
Multiply and accumulate.
[in] | acc | Accumulator |
[in] | a | First operand |
[in] | b | Second operand |
Q< 17, 14 > mac | ( | const Q< 17, 14 > | acc, |
const Q7 | a, | ||
const Q7 | b | ||
) |
Multiply and accumulate.
[in] | acc | Accumulator |
[in] | a | First operand |
[in] | b | Second operand |
Q< 33, 30 > mac | ( | const Q< 33, 30 > | acc, |
const Q15 | a, | ||
const Q15 | b | ||
) |
Multiply and accumulate.
[in] | acc | Accumulator |
[in] | a | First operand |
[in] | b | Second operand |
A mac_impl | ( | const A & | acc, |
const V & | a, | ||
const V & | b, | ||
const B | p0, | ||
std::index_sequence< Ns... > | |||
) |
Multiply accumulate for tuple of scalar.
[in] | acc | Accumulator |
[in] | a | First operand |
[in] | b | Second operand |
[in] | p0 | Predicate |
A | Accumulator datatype |
V | Scalar datatype |
B | Predicate datatype |
Ns | Tuple indexes |
A mac_impl | ( | const A & | acc, |
const V & | a, | ||
const V & | b, | ||
std::index_sequence< Ns... > | |||
) |
Multiply accumulate for tuple of scalar.
[in] | acc | Accumulator |
[in] | a | First operand |
[in] | b | Second operand |
A | Accumulator datatype |
V | Scalar datatype |
Ns | Tuple indexes |
double mult | ( | double & | a, |
const double & | b | ||
) |
Multiply.
a | Left hand side | |
[in] | b | Right hand side |
float mult | ( | float & | a, |
const float & | b | ||
) |
Scalar multiply.
a | Operand | |
[in] | b | Operand |
float16_t mult | ( | float16_t & | a, |
const float16_t & | b | ||
) |
Multiply.
a | First operand | |
[in] | b | Second operand |
float32x4_t vadd | ( | const float | a, |
const float32x4_t | b | ||
) |
Scalar + Vector.
[in] | a | Scalar |
[in] | b | Vector |
float32x4_t vadd | ( | const float | a, |
const float32x4_t | b, | ||
const mve_pred16_t | p0 | ||
) |
Scalar + vector with tail predicate.
[in] | a | Scalar |
[in] | b | Vector |
[in] | p0 | Predicate |
float16x8_t vadd | ( | const float16_t | a, |
const float16x8_t | b | ||
) |
Scalar + Vector.
[in] | a | Scalar |
[in] | b | Vector |
float16x8_t vadd | ( | const float16_t | a, |
const float16x8_t | b, | ||
const mve_pred16_t | p0 | ||
) |
Scalar + Vector with tail predicate.
[in] | a | Scalar |
[in] | b | Vector |
[in] | p0 | Predicate |
float16x8_t vadd | ( | const float16x8_t | a, |
const float16_t | b | ||
) |
Vector + Scalar.
[in] | a | Vector |
[in] | b | Scalar |
float16x8_t vadd | ( | const float16x8_t | a, |
const float16_t | b, | ||
const mve_pred16_t | p0 | ||
) |
Vector + Scalar with tail predicate.
[in] | a | Vector |
[in] | b | Scalar |
[in] | p0 | Predicate |
float16x8_t vadd | ( | const float16x8_t | a, |
const float16x8_t | b | ||
) |
Vector + Vector.
[in] | a | Vector |
[in] | b | Vector |
float16x8_t vadd | ( | const float16x8_t | a, |
const float16x8_t | b, | ||
const mve_pred16_t | p0 | ||
) |
Vector + Vector with tail predicate.
[in] | a | Vector |
[in] | b | Vector |
[in] | p0 | predicate |
float32x4_t vadd | ( | const float32x4_t | a, |
const float | b | ||
) |
Vector + Scalar.
[in] | a | Vector |
[in] | b | Scalar |
float32x4_t vadd | ( | const float32x4_t | a, |
const float | b, | ||
const mve_pred16_t | p0 | ||
) |
Vector + scalar with tail.
[in] | a | Vector |
[in] | b | Scalar |
[in] | p0 | Predicate |
float32x4_t vadd | ( | const float32x4_t | a, |
const float32x4_t | b | ||
) |
Vector + Vector.
[in] | a | First operand |
[in] | b | Second operand |
float32x4_t vadd | ( | const float32x4_t | a, |
const float32x4_t | b, | ||
const mve_pred16_t | p0 | ||
) |
Vector + Vector with tail.
[in] | a | Vector |
[in] | b | Vector |
[in] | p0 | Predicated |
float32x4_t vconst | ( | const float | v | ) |
Vector constant.
[in] | v | Constant value |
float16x8_t vconst | ( | float16_t | v | ) |
Vector const.
[in] | v | Initialization value |
int16x8_t vconst | ( | Q15 | val | ) |
Vector const.
[in] | val | The value |
float32x4_t vconst_tail | ( | const float | v, |
const mve_pred16_t | p0 | ||
) |
Vector constant with tail.
[in] | v | Constant value |
[in] | p0 | Prddicate |
float16x8_t vconst_tail | ( | const float16_t | v, |
const mve_pred16_t | p0 | ||
) |
Vector of const with tail predicate.
[in] | v | The initialization parameter |
[in] | p0 | The predicate |
|
inline |
Vector load with stride.
[in] | p | Load address |
S | Stride |
<unnamed> | Check stride value |
|
inline |
Vector load with dynamic stride.
[in] | p | Load address |
[in] | stride | Stride value |
|
inline |
Vector load with stride.
[in] | p | Load address |
S | Stride |
<unnamed> | Stride check |
In q15, a lane is on 16 bits. So the offset that can be encoded for gather load cannot be bigger than 65535. With a stride of S, the bigger offset is S*7. So S must be <= 65535/7 S <= 9362
For higher stride, the Helium instruction cannot be used and instead a dynamic stride is used.
|
inline |
Vector load with dynamic stride and loop predication.
[in] | p | Load address |
[in] | stride | Stride value |
[in] | nb | Number of remaining loop samples |
[in] | p0 | Predicate for remaining loop samples |
|
inline |
Vector load with stride and predicate.
[in] | p | Load address |
[in] | nb | Number of remaining loop samples |
[in] | p0 | Predicate for remaining loop samples |
S | Stride |
<unnamed> | Check stride value |
A vmacc | ( | const A & | acc, |
const std::tuple< E... > & | a, | ||
const std::tuple< E... > & | b | ||
) |
Vector accumulate for tuples of vectors.
[in] | acc | The accumulator |
[in] | a | First operand |
[in] | b | Second operand |
A | Accumulator datatype |
E | Datatype of tuples elements |
A vmacc | ( | const A & | acc, |
const std::tuple< E... > & | a, | ||
const std::tuple< E... > & | b, | ||
const B | p0 | ||
) |
Predicated vector accumulate for tuples.
[in] | acc | Accumulator |
[in] | a | First operand |
[in] | b | Second operand |
[in] | p0 | Predicate |
A | Accumulator datatype |
B | Predicate datatype |
E | Dadatype of tuples elements |
float32x4_t vmacc | ( | const float32x4_t | acc, |
const float32x4_t | a, | ||
const float32x4_t | b | ||
) |
Multiply accumulate (Vector * Vector)
[in] | acc | Accumulator |
[in] | a | Vector |
[in] | b | Vector |
float32x4_t vmacc | ( | const float32x4_t | acc, |
const float32x4_t | a, | ||
const float32x4_t | b, | ||
const mve_pred16_t | p0 | ||
) |
Multiply accumulate with predicate (Vector * Vector)
[in] | acc | Accumulator |
[in] | a | Vector |
[in] | b | Vector |
[in] | p0 | Predicate |
float32x4_t vmacc | ( | const float32x4_t | acc, |
const float32x4_t | a, | ||
const float_t | b | ||
) |
Multiply accumulate (Vector * Scalar)
[in] | acc | Accumulator |
[in] | a | Vector |
[in] | b | Scalar |
Q< 33, 30 > vmacc | ( | const Q< 33, 30 > | sum, |
const int16x8_t | vala, | ||
const int16x8_t | valb | ||
) |
Vector accumulate into scalar.
[in] | sum | The sum |
[in] | vala | The vala |
[in] | valb | The valb |
A vmacc_impl | ( | const A & | acc, |
const V & | a, | ||
const V & | b, | ||
const B | p0, | ||
std::index_sequence< Ns... > | |||
) |
Predicated vector accumulate for tuple.
[in] | acc | Accumulator |
[in] | a | First operand |
[in] | b | Second operand |
[in] | p0 | Predicate |
A | Accumulator datatype |
V | Vector datatype |
B | Predicate datatype |
Ns | Tuple indexes |
A vmacc_impl | ( | const A & | acc, |
const V & | a, | ||
const V & | b, | ||
std::index_sequence< Ns... > | |||
) |
Vector accumulate for tuples of vectors.
[in] | acc | The accumulator |
[in] | a | First operand |
[in] | b | Second operand |
A | Accumulator datatype |
V | Vector datatype |
Ns | Tuple index |
float32x4_t vmul | ( | const float | a, |
const float32x4_t | b | ||
) |
Scalar * Vector.
[in] | a | Scalar |
[in] | b | Vector |
float32x4_t vmul | ( | const float | a, |
const float32x4_t | b, | ||
const mve_pred16_t | p0 | ||
) |
Scalar * Vector with predicate.
[in] | a | Scalar |
[in] | b | Vector |
[in] | p0 | Predicate |
float32x4_t vmul | ( | const float32x4_t | a, |
const float | b | ||
) |
Vector * Scalar.
[in] | a | Vector |
[in] | b | Scalar |
float32x4_t vmul | ( | const float32x4_t | a, |
const float | b, | ||
const mve_pred16_t | p0 | ||
) |
Vector * Scalar with predicate.
[in] | a | Vector |
[in] | b | Scalar |
[in] | p0 | Predicate |
float32x4_t vmul | ( | const float32x4_t | a, |
const float32x4_t | b | ||
) |
Vector * Vector.
[in] | a | Vector |
[in] | b | Vector |
float32x4_t vmul | ( | const float32x4_t | a, |
const float32x4_t | b, | ||
const mve_pred16_t | p0 | ||
) |
Vector * Vector with predicate.
[in] | a | Vector |
[in] | b | Vector |
[in] | p0 | Predicate |
float16x8_t vneg | ( | const float16x8_t | a | ) |
Vector negate.
[in] | a | Vector |
float16x8_t vneg | ( | const float16x8_t | a, |
const mve_pred16_t | p0 | ||
) |
Vector negate with tail predicate.
[in] | a | Vector |
[in] | p0 | Predicate |
float32x4_t vneg | ( | const float32x4_t | a | ) |
Vector negate.
[in] | a | Vector value to negate |
float32x4_t vneg | ( | const float32x4_t | a, |
const mve_pred16_t | p0 | ||
) |
Vector negate with tail.
[in] | a | Value |
[in] | p0 | Predicate |
float vreduce | ( | const float32x4_t | in | ) |
Vector reduce.
[in] | in | Vector |
Q< 33, 30 > vreduce | ( | const Q< 33, 30 > | sum | ) |
Reduce accumulation value.
[in] | sum | The sum |
Since the Helium instructions can accumulate vector product into a scalar there is no need to reduce the accumulator value. It is already in scalar form.
auto vreduce | ( | const std::tuple< E... > & | acc | ) |
Reduce function for tuples.
[in] | acc | The accumulator |
E | Datatypes for tuples |
Some vector instructions sets cannot accumulate vectors into a scalar. They accumulate into this vector. This vector must be reduced to a scalar at the end of the accumulation loop.
auto vreduce_impl | ( | const A & | acc, |
std::index_sequence< Ns... > | |||
) |
Reduce function for tuple.
[in] | acc | Accumulator |
A | Accumulator datatype |
Ns | Tuple indexes |
Some vector instructions sets cannot accumulate vectors into a scalar. They accumulate into this vector. This vector must be reduced to a scalar at the end of the accumulation loop.
|
inline |
Store with stride.
p | Store address | |
[in] | val | Value to store |
S | Stride |
<unnamed> | Check stride value |
|
inline |
Store with dynamic stride.
p | Store address | |
[in] | stride | Stride value |
[in] | val | Value to store |
|
inline |
Store with stride and tail predicate.
p | Store address | |
[in] | val | Value to store |
[in] | nb | Number of remaining loop iterations |
[in] | p0 | Predicate for loop |
S | Stride |
<unnamed> | Check stride value |
|
inline |
Store with dynamic stride.
p | Store address | |
[in] | stride | Stride value |
[in] | val | Value to store |
[in] | nb | Number of remaining loops |
[in] | p0 | Predicate for loop |
float32x4_t vsub | ( | const float | a, |
const float32x4_t | b | ||
) |
Scalar - Vector.
[in] | a | Scalar |
[in] | b | Vector |
float32x4_t vsub | ( | const float | a, |
const float32x4_t | b, | ||
const mve_pred16_t | p0 | ||
) |
Scalar - Vector with predicate.
[in] | a | Scalar |
[in] | b | Vector |
[in] | p0 | predicate |
float32x4_t vsub | ( | const float32x4_t | a, |
const float | b | ||
) |
Vector - Scalar.
[in] | a | Vector |
[in] | b | Scalar |
float32x4_t vsub | ( | const float32x4_t | a, |
const float | b, | ||
const mve_pred16_t | p0 | ||
) |
Vector - Scalar with predicate.
[in] | a | Vector |
[in] | b | Scalar |
[in] | p0 | predicate |
float32x4_t vsub | ( | const float32x4_t | a, |
const float32x4_t | b | ||
) |
Vector - Vector.
[in] | a | Vector |
[in] | b | Vector |
float32x4_t vsub | ( | const float32x4_t | a, |
const float32x4_t | b, | ||
const mve_pred16_t | p0 | ||
) |
Vector - Vector with predicate.
[in] | a | Vector |
[in] | b | Vector |
[in] | p0 | Predicate |