24.02.1
|
Go to the documentation of this file.
47 auto int_in = vreinterpretq_u32_f32(in);
50 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
51 inline float16x8_t
mask_float_vector(
const float16x8_t &in,
const uint16x8_t &mask)
53 auto int_in = vreinterpretq_u16_f16(in);
56 #endif //defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
59 template <
typename T, const ActFpImplParams &P>
68 constexpr
int window_step_x = P.step_x;
69 const auto window_start_x =
static_cast<int>(window.
x().
start());
70 const auto window_end_x =
static_cast<int>(window.
x().
end());
80 const auto delta =
wrapper::vdup_n(
static_cast<T
>(P.delta), ExactTagType{});
82 const auto const_inv_2 =
wrapper::vdup_n(
static_cast<T
>(0.5f), ExactTagType{});
83 const auto const_inv_sqrt_2 =
wrapper::vdup_n(
static_cast<T
>(0.70710678118f), ExactTagType{});
85 const auto const_1 =
wrapper::vdup_n(
static_cast<T
>(1.f), ExactTagType{});
86 const auto const_0 =
wrapper::vdup_n(
static_cast<T
>(0.f), ExactTagType{});
87 const auto const_6 =
wrapper::vdup_n(
static_cast<T
>(6.f), ExactTagType{});
88 const auto const_3 =
wrapper::vdup_n(
static_cast<T
>(3.f), ExactTagType{});
89 const auto const_inv_6 =
wrapper::vdup_n(
static_cast<T
>(0.166666667f), ExactTagType{});
90 constexpr
float soft_relu_thresh = 12.f;
91 const auto vsoft_relu_thresh =
wrapper::vdup_n(
static_cast<T
>(soft_relu_thresh), ExactTagType{});
94 const auto a =
static_cast<T
>(
act_info.a());
95 const auto b =
static_cast<T
>(
act_info.b());
100 const auto input_ptr =
reinterpret_cast<const T *
>(
input.ptr());
101 const auto output_ptr =
reinterpret_cast<T *
>(output.
ptr());
104 int x = window_start_x;
105 for (; x <= (window_end_x - window_step_x); x += window_step_x)
110 case ActivationLayerInfo::ActivationFunction::ABS:
113 case ActivationLayerInfo::ActivationFunction::LINEAR:
116 case ActivationLayerInfo::ActivationFunction::LOGISTIC:
119 case ActivationLayerInfo::ActivationFunction::RELU:
122 case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU:
125 case ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU:
128 case ActivationLayerInfo::ActivationFunction::LEAKY_RELU:
131 case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
135 case ActivationLayerInfo::ActivationFunction::ELU:
139 case ActivationLayerInfo::ActivationFunction::SQRT:
141 tmp = wrapper::vsqrt(vin);
150 case ActivationLayerInfo::ActivationFunction::SQUARE:
153 case ActivationLayerInfo::ActivationFunction::TANH:
156 case ActivationLayerInfo::ActivationFunction::IDENTITY:
159 case ActivationLayerInfo::ActivationFunction::HARD_SWISH:
165 case ActivationLayerInfo::ActivationFunction::SWISH:
170 case ActivationLayerInfo::ActivationFunction::GELU:
183 for (; x < window_end_x; ++x)
185 const T in = *(
reinterpret_cast<const T *
>(input_ptr + x));
189 case ActivationLayerInfo::ActivationFunction::ABS:
192 case ActivationLayerInfo::ActivationFunction::LINEAR:
195 case ActivationLayerInfo::ActivationFunction::LOGISTIC:
196 tmp =
static_cast<T
>(1) / (
static_cast<T
>(1) + std::exp(-in));
198 case ActivationLayerInfo::ActivationFunction::RELU:
199 tmp = std::max<T>(
static_cast<T
>(0), in);
201 case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU:
202 tmp = std::min<T>(a, std::max(
static_cast<T
>(0), in));
204 case ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU:
205 tmp = std::min<T>(a, std::max<T>(
b, in));
207 case ActivationLayerInfo::ActivationFunction::LEAKY_RELU:
208 tmp = (in > 0) ? in : a * in;
210 case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
211 tmp = (in > soft_relu_thresh) ? in : std::log(
static_cast<T
>(1) + std::exp(in));
213 case ActivationLayerInfo::ActivationFunction::ELU:
214 tmp = (in >= 0) ? in : a * (std::exp(in) - 1);
216 case ActivationLayerInfo::ActivationFunction::SQRT:
219 case ActivationLayerInfo::ActivationFunction::SQUARE:
222 case ActivationLayerInfo::ActivationFunction::TANH:
223 tmp = a * std::tanh(
b * in);
225 case ActivationLayerInfo::ActivationFunction::IDENTITY:
228 case ActivationLayerInfo::ActivationFunction::HARD_SWISH:
229 tmp = in * ((std::min(std::max((in + 3), 0.0f), 6.0f)) * 0.166666667f);
231 case ActivationLayerInfo::ActivationFunction::SWISH:
232 tmp = in / (
static_cast<T
>(1) + std::exp(-a * in));
234 case ActivationLayerInfo::ActivationFunction::GELU:
235 tmp = in *
static_cast<T
>(0.5f * (1.0f + erff(
static_cast<float>(in) / 1.41421356237f)));
240 *(output_ptr + x) = tmp;
uint8x8_t vadd(const uint8x8_t &a, const uint8x8_t &b)
constexpr int start() const
Return the start of the dimension.
SimpleTensor< float > src
uint8x8_t vsub(const uint8x8_t &a, const uint8x8_t &b)
float32x4_t mask_float_vector(const float32x4_t &in, const uint32x4_t &mask)
float32x2_t vinvsqrt(const float32x2_t &a)
float32x4_t vexpq(const float32x4_t &a)
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Window collapse_if_possible(const Window &full_window, size_t first, size_t last, bool *has_collapsed=nullptr) const
Collapse the dimensions between first and last if possible.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
float32x2_t vinv(const float32x2_t &a)
float32x4_t vtanh(const float32x4_t &a)
Interface for CPU tensor.
float32x4_t vlog(const float32x4_t &a)
int step_x
Window step at the x dimension.
uint8x16_t vloadq(const uint8_t *ptr)
Includes all wrapper headers at once.
uint8x8_t vmin(const uint8x8_t &a, const uint8x8_t &b)
Activation Layer Information class.
constexpr uint8_t * ptr() const
Return a pointer to the current pixel.
uint8x8_t vmul(const uint8x8_t &a, const uint8x8_t &b)
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&...iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
Iterator updated by execute_window_loop for each window element.
typename neon_bitvector< T, BW >::type neon_bitvector_t
Helper type template to get the type of a neon vector.
ActivationFunction
Available activation functions.
uint8x8_t vnot(const uint8x8_t &a)
float32x4_t verf(const float32x4_t &a)
void fp_neon_activation_impl(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window)
uint8x8_t vbsl(const uint8x8_t &a, const uint8x8_t &b, const uint8x8_t &c)
uint8x8_t vand(const uint8x8_t &a, const uint8x8_t &b)
uint8x8_t vmla(const uint8x8_t &a, const uint8x8_t &b, const uint8x8_t &c)
int8x8_t vneg(const int8x8_t &a)
Describe one of the image's dimensions with a start, end and step.
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
uint8x8_t vmax(const uint8x8_t &a, const uint8x8_t &b)
void vstore(uint8_t *ptr, uint8x8_t val)
Describe a multidimensional execution window.
typename neon_bitvector< T, BW >::tag_type neon_bitvector_tag_t
Helper type template to get the tag type of a neon vector.
Copyright (c) 2017-2024 Arm Limited.
uint8x8_t vcgt(const uint8x8_t &a, const uint8x8_t &b)
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
uint8x8_t vceq(const uint8x8_t &a, const uint8x8_t &b)
uint8x8_t vcge(const uint8x8_t &a, const uint8x8_t &b)
constexpr int end() const
Return the end of the dimension.
constexpr const Dimension & x() const
Alias to access the first dimension of the window.
float delta
Minimum delta needed to avoid NaN on corner-cases of elementary functions.
Constant parameters needed by the activation implementation.
uint8x8_t vdup_n(uint8_t value, traits::vector_64_tag)
int8x8_t vabs(const int8x8_t &a)