24.04
|
Go to the documentation of this file.
24 #ifndef SRC_CORE_NEON_KERNELS_L2NORMLAYER_LIST_H
25 #define SRC_CORE_NEON_KERNELS_L2NORMLAYER_LIST_H
39 template <
typename T,
int S>
45 const auto window_start_x =
static_cast<int>(window.
x().
start());
46 const auto window_end_x =
static_cast<int>(window.
x().
end());
51 Iterator input_it(in, win_collapsed);
53 Iterator output_it(out, win_collapsed);
59 const auto in_ptr =
reinterpret_cast<const T *
>(input_it.
ptr());
60 const auto out_ptr =
reinterpret_cast<T *
>(output_it.
ptr());
62 const T sum_value = *
reinterpret_cast<const T *
>(sum_it.
ptr());
63 const T norm_value =
static_cast<T
>(1.f) / std::sqrt(std::max(sum_value,
static_cast<T
>(
epsilon)));
64 const auto vec_norm_value =
wrapper::vdup_n(norm_value, ExactTagType{});
67 int x = window_start_x;
68 for (; x <= (window_end_x - window_step_x); x += window_step_x)
74 for (; x < window_end_x; ++x)
76 out_ptr[x] = in_ptr[x] * norm_value;
79 input_it, sum_it, output_it);
82 template <
typename T,
int S>
89 const auto window_start_x =
static_cast<int>(window.
x().
start());
90 const auto window_end_x =
static_cast<int>(window.
x().
end());
108 const auto in_ptr =
reinterpret_cast<const T *
>(input_it.
ptr());
109 const auto sum_ptr =
reinterpret_cast<const T *
>(sum_it.
ptr());
110 const auto out_ptr =
reinterpret_cast<T *
>(output_it.
ptr());
113 int x = window_start_x;
114 for (; x <= (window_end_x - window_step_x); x += window_step_x)
121 for (; x < window_end_x; ++x)
123 const T norm_value =
static_cast<T
>(1.f) / std::sqrt(std::max(sum_ptr[x],
static_cast<T
>(
epsilon)));
124 out_ptr[x] = in_ptr[x] * norm_value;
127 input_it, sum_it, output_it);
131 #endif //SRC_CORE_NEON_KERNELS_L2NORMLAYER_LIST_H
constexpr int start() const
Return the start of the dimension.
float32x2_t vinvsqrt(const float32x2_t &a)
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Window collapse_if_possible(const Window &full_window, size_t first, size_t last, bool *has_collapsed=nullptr) const
Collapse the dimensions between first and last if possible.
Interface for CPU tensor.
uint8x16_t vloadq(const uint8_t *ptr)
Includes all wrapper headers at once.
Create the appropriate SIMD vector given its type and size in terms of elements.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
constexpr uint8_t * ptr() const
Return a pointer to the current pixel.
size_t data_size_from_type(DataType data_type)
The size in bytes of the data type.
uint8x8_t vmul(const uint8x8_t &a, const uint8x8_t &b)
void l2_normalize_yz(const ITensor *in, const ITensor *sum, ITensor *out, float epsilon, const Window &window, size_t axis)
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&...iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
Iterator updated by execute_window_loop for each window element.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
Describe one of the image's dimensions with a start, end and step.
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
uint8x8_t vmax(const uint8x8_t &a, const uint8x8_t &b)
void l2_normalize_x(const ITensor *in, const ITensor *sum, ITensor *out, float epsilon, const Window &window)
void vstore(uint8_t *ptr, uint8x8_t val)
Describe a multidimensional execution window.
Copyright (c) 2017-2024 Arm Limited.
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
constexpr int end() const
Return the end of the dimension.
constexpr const Dimension & x() const
Alias to access the first dimension of the window.
uint8x8_t vdup_n(uint8_t value, traits::vector_64_tag)