42 inline float32x4_t mask_float_vector(
const float32x4_t &in,
const uint32x4_t &mask)
44 auto int_in = vreinterpretq_u32_f32(in);
55 constexpr
int window_step_x = 4;
56 const auto window_start_x =
static_cast<int>(window.
x().
start());
57 const auto window_end_x =
static_cast<int>(window.
x().
end());
70 const auto delta =
wrapper::vdup_n(static_cast<float>(1e-24), ExactTagType {});
72 const auto const_1 =
wrapper::vdup_n(static_cast<float>(1.f), ExactTagType {});
73 const auto const_0 =
wrapper::vdup_n(static_cast<float>(0.f), ExactTagType{});
74 const auto const_6 =
wrapper::vdup_n(static_cast<float>(6.f), ExactTagType{});
75 const auto const_3 =
wrapper::vdup_n(static_cast<float>(3.f), ExactTagType{});
76 const auto const_inv_6 =
wrapper::vdup_n(static_cast<float>(0.166666667f), ExactTagType{});
78 constexpr
float soft_relu_thresh = 12.f;
79 const auto vsoft_relu_thresh =
wrapper::vdup_n(static_cast<float>(soft_relu_thresh), ExactTagType{});
81 const auto va =
wrapper::vdup_n(static_cast<float>(act_info.
a()), ExactTagType{});
82 const auto vb =
wrapper::vdup_n(static_cast<float>(act_info.
b()), ExactTagType{});
83 const auto a =
static_cast<float>(act_info.
a());
84 const auto b =
static_cast<float>(act_info.
b());
87 const auto input_ptr =
reinterpret_cast<const float *
>(input.
ptr());
88 const auto output_ptr =
reinterpret_cast<float *
>(output.
ptr());
93 int x = window_start_x;
94 for(; x <= (window_end_x - window_step_x); x += window_step_x)
128 tmp = wrapper::vsqrt(vin);
156 for(; x < window_end_x; ++x)
158 const float in = *(
reinterpret_cast<const float *
>(input_ptr + x));
169 tmp =
static_cast<float>(1) / (static_cast<float>(1) + std::exp(-in));
172 tmp = std::max<float>(
static_cast<float>(0), in);
175 tmp = std::min<float>(a, std::max(static_cast<float>(0), in));
178 tmp = std::min<float>(a, std::max<float>(
b, in));
181 tmp = (in > 0) ? in : a * in;
184 tmp = (in > soft_relu_thresh) ? in : std::log(static_cast<float>(1) + std::exp(in));
187 tmp = (in >= 0) ? in : a * (std::exp(in) - 1);
196 tmp = a * std::tanh(b * in);
202 tmp = in * ((std::min(std::max((in + 3), 0.0f), 6.0f)) * 0.166666667f);
207 *(output_ptr + x) = tmp;
float32x4_t vlog(const float32x4_t &a)
float32x4_t vtanh(const float32x4_t &a)
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
float a() const
Get the alpha value.
float32x2_t vinvsqrt(const float32x2_t &a)
uint8x16_t vloadq(const uint8_t *ptr)
uint8x8_t vadd(const uint8x8_t &a, const uint8x8_t &b)
float32x2_t vinv(const float32x2_t &a)
int8x8_t vabs(const int8x8_t &a)
uint8x8_t vsub(const uint8x8_t &a, const uint8x8_t &b)
Describe one of the image's dimensions with a start, end and step.
Activation Layer Information class.
Interface for Neon tensor.
SimpleTensor< float > src
Copyright (c) 2017-2021 Arm Limited.
ActivationFunction
Available activation functions.
typename neon_bitvector< T, BW >::tag_type neon_bitvector_tag_t
Helper type template to get the tag type of a neon vector.
Exponential Linear Unit ( )
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
uint8x8_t vnot(const uint8x8_t &a)
Window collapse_if_possible(const Window &full_window, size_t first, size_t last, bool *has_collapsed=nullptr) const
Collapse the dimensions between first and last if possible.
uint8x8_t vmin(const uint8x8_t &a, const uint8x8_t &b)
uint8x8_t vand(const uint8x8_t &a, const uint8x8_t &b)
void fp32_neon_activation(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window)
constexpr uint8_t * ptr() const
Return a pointer to the current pixel.
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
int8x8_t vneg(const int8x8_t &a)
Lower and Upper Bounded Rectifier ( )
uint8x8_t vcgt(const uint8x8_t &a, const uint8x8_t &b)
uint8x8_t vmul(const uint8x8_t &a, const uint8x8_t &b)
uint8x8_t vbsl(const uint8x8_t &a, const uint8x8_t &b, const uint8x8_t &c)
Upper Bounded Rectifier ( )
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
void vstore(uint8_t *ptr, uint8x8_t val)
uint8x8_t vdup_n(uint8_t value, traits::vector_64_tag)
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
typename neon_bitvector< T, BW >::type neon_bitvector_t
Helper type template to get the type of a neon vector.
ActivationFunction activation() const
Get the type of activation function.
float b() const
Get the beta value.
Includes all wrapper headers at once.
uint8x8_t vmla(const uint8x8_t &a, const uint8x8_t &b, const uint8x8_t &c)
constexpr int end() const
Return the end of the dimension.
uint8x8_t vcge(const uint8x8_t &a, const uint8x8_t &b)
Iterator updated by execute_window_loop for each window element.
uint8x8_t vmax(const uint8x8_t &a, const uint8x8_t &b)
constexpr int start() const
Return the start of the dimension.
float32x4_t vexpq(const float32x4_t &a)
Describe a multidimensional execution window.
uint8x8_t vceq(const uint8x8_t &a, const uint8x8_t &b)
constexpr const Dimension & x() const
Alias to access the first dimension of the window.