32 template <
typename ScalarType>
35 const auto all_true_pg = wrapper::svptrue<ScalarType>();
36 const auto window_start_x =
static_cast<int>(window.
x().
start());
37 const auto window_end_x =
static_cast<int>(window.
x().
end());
47 const auto in_ptr =
reinterpret_cast<const ScalarType *
>(input.
ptr());
48 const auto out_ptr =
reinterpret_cast<ScalarType *
>(output.
ptr());
51 auto vec_max = wrapper::svdup_n(support::cpp11::lowest<ScalarType>());
53 int x = window_start_x;
54 svbool_t pg = wrapper::svwhilelt<ScalarType>(x, window_end_x);
57 const auto current_value = svld1(pg, in_ptr + x);
58 vec_max = svmax_m(pg, vec_max, current_value);
60 x += wrapper::svcnt<ScalarType>();
61 pg = wrapper::svwhilelt<ScalarType>(x, window_end_x);
63 while(svptest_any(all_true_pg, pg));
65 auto max_val = svmaxv(all_true_pg, vec_max);
72 template <
typename ScalarType>
74 ITensor *out,
const float beta,
bool is_log,
const Window &window)
83 const auto all_true_pg = wrapper::svptrue<ScalarType>();
88 const auto in_ptr =
reinterpret_cast<const ScalarType *
>(in_it.
ptr()) + start_x;
89 const auto out_ptr =
reinterpret_cast<ScalarType *
>(out_it.
ptr()) + start_x;
90 const auto tmp_ptr =
reinterpret_cast<ScalarType *
>(tmp);
97 const auto max_val = *
reinterpret_cast<const ScalarType *
>(max_it.
ptr());
98 const auto vec_max = wrapper::svdup_n(max_val);
101 auto vec_sum = wrapper::svdup_n(static_cast<ScalarType>(0));
105 svbool_t pg = wrapper::svwhilelt<ScalarType>(x,
input_width);
108 auto vec_elements = svld1(pg, in_ptr + x);
109 vec_elements = svsub_z(pg, vec_elements, vec_max);
112 vec_elements = svmul_z(pg, vec_elements, wrapper::svdup_n(static_cast<ScalarType>(beta)));
113 vec_sum = svadd_m(pg, vec_sum, wrapper::svexp_z(pg, vec_elements));
117 vec_elements = wrapper::svexp_z(pg, svmul_z(pg, vec_elements, wrapper::svdup_n(static_cast<ScalarType>(beta))));
118 vec_sum = svadd_m(pg, vec_sum, vec_elements);
120 svst1(pg, tmp_ptr + x, vec_elements);
122 x += wrapper::svcnt<ScalarType>();
123 pg = wrapper::svwhilelt<ScalarType>(x,
input_width);
125 while(svptest_any(all_true_pg, pg));
128 sum = svaddv(all_true_pg, vec_sum);
132 sum =
static_cast<ScalarType
>(std::log(sum));
136 sum = ScalarType(1) / sum;
144 svbool_t pg = wrapper::svwhilelt<ScalarType>(x,
input_width);
147 auto vec_in = svld1(pg, tmp_ptr + x);
148 auto normalized_value = wrapper::svdup_n(static_cast<ScalarType>(0));
151 normalized_value = svsub_z(pg, vec_in, wrapper::svdup_n(static_cast<ScalarType>(sum)));
155 normalized_value = svmul_z(pg, vec_in, wrapper::svdup_n(static_cast<ScalarType>(sum)));
157 svst1(pg, out_ptr + x, normalized_value);
159 x += wrapper::svcnt<ScalarType>();
160 pg = wrapper::svwhilelt<ScalarType>(x,
input_width);
162 while(svptest_any(all_true_pg, pg));
165 in_it, max_it, out_it);
174 ITensor *out,
const float beta,
bool is_log,
const Window &window);
176 ITensor *out,
const float beta,
bool is_log,
const Window &window);
void sve_softmax_logits_1d_float(const ITensor *in, const ITensor *max, void *const tmp, ITensor *out, const float beta, bool is_log, const Window &window)
template void sve_logits_1d_max< float16_t >(const ITensor *in, ITensor *out, const Window &window)
template void sve_logits_1d_max< float >(const ITensor *in, ITensor *out, const Window &window)
TensorShape shape
Shape of the valid region.
template void sve_softmax_logits_1d_float< float >(const ITensor *in, const ITensor *max, void *const tmp, ITensor *out, const float beta, bool is_log, const Window &window)
Describe one of the image's dimensions with a start, end and step.
Interface for CPU tensor.
Copyright (c) 2017-2022 Arm Limited.
virtual ValidRegion valid_region() const =0
Valid region of the tensor.
T x() const
Alias to access the size of the first dimension.
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
template void sve_logits_1d_max< qasymm8_signed_t >(const ITensor *in, ITensor *out, const Window &window)
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
constexpr uint8_t * ptr() const
Return a pointer to the current pixel.
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
template void sve_softmax_logits_1d_float< float16_t >(const ITensor *in, const ITensor *max, void *const tmp, ITensor *out, const float beta, bool is_log, const Window &window)
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
void sve_logits_1d_max(const ITensor *in, ITensor *out, const Window &window)
constexpr int end() const
Return the end of the dimension.
Iterator updated by execute_window_loop for each window element.
constexpr int start() const
Return the start of the dimension.
Describe a multidimensional execution window.
template void sve_logits_1d_max< qasymm8_t >(const ITensor *in, ITensor *out, const Window &window)
Coordinates anchor
Anchor for the start of the valid region.
constexpr const Dimension & x() const
Alias to access the first dimension of the window.