32 template <
typename ScalarType,
int size>
41 const int window_step_x = size;
42 const auto window_start_x =
static_cast<int>(window.
x().
start());
43 const auto window_end_x =
static_cast<int>(window.
x().
end());
50 int x = window_start_x;
51 auto in_ptr =
reinterpret_cast<const ScalarType *
>(input_itr.
ptr());
52 auto out_ptr =
reinterpret_cast<ScalarType *
>(output_itr.
ptr());
54 auto sum_vec =
wrapper::vdup_n(static_cast<ScalarType>(0.f), ExactTagType{});
55 auto sum_sq_vec =
wrapper::vdup_n(static_cast<ScalarType>(0.f), ExactTagType{});
57 for(; x <= (window_end_x - window_step_x); x += window_step_x)
66 for(
int i = 0; i < size / 4; ++i)
69 sum_sq_carry_res =
wrapper::vpadd(sum_sq_carry_res, sum_sq_carry_res);
76 for(; x < window_end_x; ++x)
78 ScalarType data = *(in_ptr + x);
80 sum_sq += data * data;
84 ScalarType var = (sum_sq / input->
info()->
dimension(0)) - (mean * mean);
85 ScalarType stddev_inv = 1.f / sqrt(var + epsilon);
89 for(x = window_start_x; x <= (window_end_x - window_step_x); x += window_step_x)
96 for(; x < window_end_x; ++x)
98 *(out_ptr + x) = (*(in_ptr + x) - mean) * stddev_inv;
101 input_itr, output_itr);
105 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) 107 #endif //defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
uint8x16_t vloadq(const uint8_t *ptr)
template void mean_stddev_normalization< float, 4 >(ITensor *input, ITensor *output, float epsilon, const Window &window)
uint8x8_t vadd(const uint8x8_t &a, const uint8x8_t &b)
uint8x8_t vsub(const uint8x8_t &a, const uint8x8_t &b)
Describe one of the image's dimensions with a start, end and step.
Interface for CPU tensor.
Copyright (c) 2017-2022 Arm Limited.
uint8x8_t vpadd(const uint8x8_t &a, const uint8x8_t &b)
uint8_t vgetlane(const uint8x8_t vector, const unsigned int lane)
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Create the appropriate SIMD vector given its type and size in terms of elements.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
constexpr uint8_t * ptr() const
Return a pointer to the current pixel.
uint8x8_t vgetlow(const uint8x16_t val)
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
uint8x8_t vgethigh(const uint8x16_t val)
uint8x8_t vmul(const uint8x8_t &a, const uint8x8_t &b)
void vstore(uint8_t *ptr, uint8x8_t val)
void mean_stddev_normalization(ITensor *input, ITensor *output, float epsilon, const Window &window)
uint8x8_t vdup_n(uint8_t value, traits::vector_64_tag)
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
Includes all wrapper headers at once.
constexpr int end() const
Return the end of the dimension.
Iterator updated by execute_window_loop for each window element.
constexpr int start() const
Return the start of the dimension.
Describe a multidimensional execution window.
constexpr const Dimension & x() const
Alias to access the first dimension of the window.