36 template <
typename ScalarType>
39 const auto all_true_pg = wrapper::svptrue<ScalarType>();
40 const auto window_start_x =
static_cast<int>(window.
x().
start());
41 const auto window_end_x =
static_cast<int>(window.
x().
end());
53 const auto in_ptr =
reinterpret_cast<const ScalarType *
>(
input.ptr());
54 const auto out_ptr =
reinterpret_cast<ScalarType *
>(output.
ptr());
57 auto vec_max = wrapper::svdup_n(support::cpp11::lowest<ScalarType>());
59 int x = window_start_x;
60 svbool_t pg = wrapper::svwhilelt<ScalarType>(x, window_end_x);
63 const auto current_value = svld1(pg, in_ptr + x);
64 vec_max = svmax_m(pg, vec_max, current_value);
66 x += wrapper::svcnt<ScalarType>();
67 pg = wrapper::svwhilelt<ScalarType>(x, window_end_x);
68 }
while (svptest_any(all_true_pg, pg));
70 auto max_val = svmaxv(all_true_pg, vec_max);
77 template <
typename ScalarType>
93 const auto all_true_pg = wrapper::svptrue<ScalarType>();
100 const auto in_ptr =
reinterpret_cast<const ScalarType *
>(in_it.
ptr()) + start_x;
101 const auto out_ptr =
reinterpret_cast<ScalarType *
>(out_it.
ptr()) + start_x;
102 const auto tmp_ptr =
reinterpret_cast<ScalarType *
>(tmp);
109 const auto max_val = *
reinterpret_cast<const ScalarType *
>(max_it.
ptr());
110 const auto vec_max = wrapper::svdup_n(max_val);
111 const auto vec_beta = wrapper::svdup_n(
static_cast<ScalarType
>(beta));
114 auto vec_sum = wrapper::svdup_n(
static_cast<ScalarType
>(0));
118 svbool_t pg = wrapper::svwhilelt<ScalarType>(x, input_width);
121 auto vec_elements = svld1(pg, in_ptr + x);
122 vec_elements = svmul_z(pg, svsub_z(pg, vec_elements, vec_max), vec_beta);
125 vec_elements = wrapper::svexp_z(pg, vec_elements);
126 vec_sum = svadd_m(pg, vec_sum, vec_elements);
128 svst1(pg, tmp_ptr + x, vec_elements);
132 vec_sum = svadd_m(pg, vec_sum, wrapper::svexp_z(pg, vec_elements));
135 x += wrapper::svcnt<ScalarType>();
136 pg = wrapper::svwhilelt<ScalarType>(x, input_width);
137 }
while (svptest_any(all_true_pg, pg));
140 sum = svaddv(all_true_pg, vec_sum);
144 sum =
static_cast<ScalarType
>(std::log(sum));
148 sum = ScalarType(1) / sum;
156 svbool_t pg = wrapper::svwhilelt<ScalarType>(x, input_width);
159 auto vec_in = svld1(pg, tmp_ptr + x);
160 auto normalized_value = wrapper::svdup_n(
static_cast<ScalarType
>(0));
163 normalized_value = svsub_z(pg, vec_in, wrapper::svdup_n(
static_cast<ScalarType
>(sum)));
167 normalized_value = svmul_z(pg, vec_in, wrapper::svdup_n(
static_cast<ScalarType
>(sum)));
169 svst1(pg, out_ptr + x, normalized_value);
171 x += wrapper::svcnt<ScalarType>();
172 pg = wrapper::svwhilelt<ScalarType>(x, input_width);
173 }
while (svptest_any(all_true_pg, pg));
176 in_it, max_it, out_it);