23.05.1
|
Go to the documentation of this file.
33 template <
typename InputType,
typename AccType>
34 void vector_float_sum(AccType &result, AccType &result_square,
const InputType &inputs)
40 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
42 inline void vector_float_sum(float32x4_t &result, float32x4_t &result_square,
const float16x8_t &inputs)
48 inline float16x8_t
vector_float_norm(
const float16x8_t &inputs,
const float32x4_t &vec_mean,
const float32x4_t &vec_multip,
const float32x4_t &vec_beta)
52 const auto result_low = wrapper::vcvt<float16_t>(
vector_float_norm(input_low, vec_mean, vec_multip, vec_beta));
53 const auto result_high = wrapper::vcvt<float16_t>(
vector_float_norm(input_high, vec_mean, vec_multip, vec_beta));
58 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
60 template <
typename InputType,
typename AccType>
61 InputType
vector_float_norm(
const InputType &inputs,
const AccType &vec_mean,
const AccType &vec_multip,
const AccType &vec_beta)
66 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
68 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
69 template <
typename T,
typename AccType>
80 constexpr
int window_step_x = 16 /
sizeof(T);
81 const unsigned int elements_plane =
input->info()->dimension(0) * output->
info()->
dimension(1);
92 Iterator output_plane_it(output, win_plane);
94 auto sum_h_w =
static_cast<AccType
>(0.f);
95 auto sum_squares_h_w =
static_cast<AccType
>(0.f);
99 const auto input_ptr =
reinterpret_cast<const T *
>(input_plane_it.
ptr());
101 auto vec_sum_h_w =
wrapper::vdup_n(
static_cast<AccType
>(0.f), ExactTagType{});
102 auto vec_sum_squares_h_w =
wrapper::vdup_n(
static_cast<AccType
>(0.f), ExactTagType{});
105 int x = window.
x().
start();
106 for(; x <= (window.
x().
end() - window_step_x); x += window_step_x)
116 vec2_sum_squares_h_w =
wrapper::vpadd(vec2_sum_squares_h_w, vec2_sum_squares_h_w);
122 for(; x < window.
x().
end(); ++x)
124 const auto value =
static_cast<AccType
>(*(input_ptr + x));
126 sum_squares_h_w += value * value;
129 input_plane_it, output_plane_it);
131 const auto mean_h_w = sum_h_w / elements_plane;
132 const auto var_h_w = sum_squares_h_w / elements_plane - mean_h_w * mean_h_w;
134 const auto multip_h_w = gamma / std::sqrt(var_h_w +
epsilon);
135 const auto vec_mean_h_w =
wrapper::vdup_n(
static_cast<AccType
>(mean_h_w), ExactTagType{});
136 const auto vec_multip_h_w =
wrapper::vdup_n(
static_cast<AccType
>(multip_h_w), ExactTagType{});
137 const auto vec_beta =
wrapper::vdup_n(
static_cast<AccType
>(beta), ExactTagType{});
141 auto input_ptr =
reinterpret_cast<T *
>(input_plane_it.
ptr());
142 auto output_ptr =
reinterpret_cast<T *
>(output_plane_it.
ptr());
145 int x = window.
x().
start();
147 for(; x <= (window.
x().
end() - window_step_x); x += window_step_x)
150 const auto normalized_vec =
vector_float_norm(vec_val, vec_mean_h_w, vec_multip_h_w, vec_beta);
155 for(; x < window.
x().
end(); ++x)
157 const auto val =
static_cast<AccType
>(*(input_ptr + x));
158 *(output_ptr + x) =
static_cast<T
>((val - mean_h_w) * multip_h_w + beta);
161 input_plane_it, output_plane_it);
167 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
170 #endif //defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
uint8x8_t vadd(const uint8x8_t &a, const uint8x8_t &b)
constexpr int start() const
Return the start of the dimension.
void instance_normalization_nchw(ITensor *input, ITensor *output, float gamma, float beta, float epsilon, const Window &window)
uint8x8_t vsub(const uint8x8_t &a, const uint8x8_t &b)
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
uint8_t vgetlane(const uint8x8_t vector, const unsigned int lane)
Interface for CPU tensor.
uint8x16_t vloadq(const uint8_t *ptr)
Includes all wrapper headers at once.
uint8x8_t vpadd(const uint8x8_t &a, const uint8x8_t &b)
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
constexpr uint8_t * ptr() const
Return a pointer to the current pixel.
uint8x8_t vmul(const uint8x8_t &a, const uint8x8_t &b)
Iterator updated by execute_window_loop for each window element.
template void instance_normalization_nchw< float >(ITensor *input, ITensor *output, float gamma, float beta, float epsilon, const Window &window)
uint8x16_t vcombine(const uint8x8_t &a, const uint8x8_t &b)
InputType vector_float_norm(const InputType &inputs, const AccType &vec_mean, const AccType &vec_multip, const AccType &vec_beta)
Describe one of the image's dimensions with a start, end and step.
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
uint8x8_t vgetlow(const uint8x16_t val)
void vstore(uint8_t *ptr, uint8x8_t val)
Describe a multidimensional execution window.
typename neon_bitvector< T, BW >::tag_type neon_bitvector_tag_t
Helper type template to get the tag type of a neon vector.
Copyright (c) 2017-2023 Arm Limited.
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
void end(TokenStream &in, bool &valid)
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
constexpr int end() const
Return the end of the dimension.
uint8x8_t vgethigh(const uint8x16_t val)
constexpr const Dimension & x() const
Alias to access the first dimension of the window.
void vector_float_sum(AccType &result, AccType &result_square, const InputType &inputs)
uint8x8_t vdup_n(uint8_t value, traits::vector_64_tag)