24 #ifndef SRC_CORE_SVE_KERNELS_ELEMENTWISE_LIST_H
25 #define SRC_CORE_SVE_KERNELS_ELEMENTWISE_LIST_H
38 template <
typename VectorType>
41 return svpow_z(pg, a,
b);
44 template <
typename VectorType>
47 return svdiv_z(pg, a,
b);
50 template <u
int32_t
bytew
idth>
53 const auto all_false = svpfalse();
58 pg = svuzp1_b32(pg, all_false);
61 pg = svuzp1_b16(pg, all_false);
64 pg = svuzp1_b8(pg, all_false);
72 template <
typename VectorType>
81 res = svmax_z(pg, a,
b);
84 res = svmin_z(pg, a,
b);
88 const auto tmp = svsub_z(pg, a,
b);
89 res = svmul_z(pg, tmp, tmp);
94 const auto zero = svdup_n(ScalarType(0));
95 const auto tmp = svmul_z(pg, a,
b);
96 const auto gt = svcmpgt(pg, a, zero);
97 res = svsel(gt, a, tmp);
117 template <
typename InputVectorType,
typename OutputVectorType>
121 svbool_t selection_vector{};
126 selection_vector = svcmpeq(pg, a,
b);
129 selection_vector = svcmpne(pg, a,
b);
132 selection_vector = svcmpgt(pg, a,
b);
135 selection_vector = svcmpge(pg, a,
b);
138 selection_vector = svcmplt(pg, a,
b);
141 selection_vector = svcmple(pg, a,
b);
148 selection_vector = narrow_to_byte_predicate<sizeof(InputScalarType)>(selection_vector);
151 const auto false_vector = svdup_n(
static_cast<OutputScalarType
>((uint32_t)0));
152 const auto true_vector = svdup_n(
static_cast<OutputScalarType
>(~(uint32_t)0));
153 auto ret = svsel(selection_vector, true_vector, false_vector);
158 template <
typename ScalarType>
162 template <
typename ScalarType,
typename OutputScalarType = u
int8_t>