41 constexpr
int window_step_x = 16;
42 const auto window_start_x =
static_cast<int>(window.
x().
start());
43 const auto window_end_x =
static_cast<int>(window.
x().
end());
60 const auto vconst_1 = vdupq_n_f32(1.f);
62 const auto vconst_0_f32 = vdupq_n_f32(0);
64 const float32x4_t va_f32 = vdupq_n_f32(act_info.
a());
65 const float32x4_t vb_f32 = vdupq_n_f32(act_info.
b());
66 const float a_f32 = act_info.
a();
67 const float b_f32 = act_info.
b();
68 const auto const_6_f32 = vdupq_n_f32(6.f);
69 const auto const_0_f32 = vdupq_n_f32(0.f);
70 const auto const_3_f32 = vdupq_n_f32(3.f);
71 const auto const_inv_6_f32 = vdupq_n_f32(0.166666667f);
76 float32x4_t vs = vdupq_n_f32(s);
77 float32x4_t vo = vdupq_n_f32(o);
81 const auto input_ptr =
reinterpret_cast<const qasymm8_t *
>(input.
ptr());
82 const auto output_ptr =
reinterpret_cast<qasymm8_t *
>(output.
ptr());
87 int x = window_start_x;
88 for(; x <= (window_end_x - window_step_x); x += window_step_x)
94 tmp = vmaxq_u8(vconst_0, vin);
101 tmp = vminq_u8(va, vmaxq_u8(vconst_0, vin));
108 tmp = vminq_u8(va, vmaxq_u8(vb, vin));
117 const float32x4x4_t tmp_dep =
134 const float32x4x4_t tmp_dep =
151 const float32x4x4_t tmp_dep =
168 const uint32x4x4_t pos_mask =
171 wrapper::vcgtz(vin_deq.val[0]),
172 wrapper::vcgtz(vin_deq.val[1]),
173 wrapper::vcgtz(vin_deq.val[2]),
174 wrapper::vcgtz(vin_deq.val[3]),
178 const uint32x4x4_t pos_mask =
187 #endif // __aarch64__ 189 const float32x4x4_t tmp_dep =
209 for(; x < window_end_x; ++x)
215 tmp = std::max(const_0, in);
216 tmp = utility::clamp<int32_t, qasymm8_t>(tmp * s + o);
220 tmp = std::min(a, std::max(const_0, in));
221 tmp = utility::clamp<int32_t, qasymm8_t>(tmp * s + o);
225 tmp = std::min(a, std::max(b, in));
226 tmp = utility::clamp<int32_t, qasymm8_t>(tmp * s + o);
231 tmp_f = 1.f / (1.f + std::exp(-tmp_f));
237 tmp_f = a_f32 * std::tanh(b_f32 * tmp_f);
243 tmp_f = tmp_f * ((std::min(std::max((tmp_f + 3), 0.0f), 6.0f)) * 0.166666667f);
249 tmp_f = tmp_f > 0 ? tmp_f : tmp_f * a_f32;
256 *(output_ptr + x) = tmp;
float32x2_t vdiv(const float32x2_t &a, const float32x2_t &b)
float32x4_t vtanh(const float32x4_t &a)
float dequantize_qasymm8(uint8_t value, const INFO_TYPE &qinfo)
Dequantize a value given an unsigned 8-bit asymmetric quantization scheme.
uint8_t quantize_qasymm8(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given an unsigned 8-bit asymmetric quantization scheme.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
float32x4x2_t vdequantize(const uint8x8_t &qv, const UniformQuantizationInfo &qi)
Dequantize a neon vector holding 8 quantized values.
float a() const
Get the alpha value.
uint8x16_t vloadq(const uint8_t *ptr)
uint8x8_t vadd(const uint8x8_t &a, const uint8x8_t &b)
Describe one of the image's dimensions with a start, end and step.
Activation Layer Information class.
Interface for CPU tensor.
SimpleTensor< float > src
Copyright (c) 2017-2021 Arm Limited.
ActivationFunction
Available activation functions.
uint8x16_t vmlaq_qasymm8(qasymm8x16_t vd, float32x4_t vs, float32x4_t vo)
Perform a multiply-accumulate on all 16 components of a QASYMM8 vector.
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Window collapse_if_possible(const Window &full_window, size_t first, size_t last, bool *has_collapsed=nullptr) const
Collapse the dimensions between first and last if possible.
uint8x8_t vmin(const uint8x8_t &a, const uint8x8_t &b)
UniformQuantizationInfo uniform() const
Return per layer quantization info.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
constexpr uint8_t * ptr() const
Return a pointer to the current pixel.
void qasymm8_neon_activation(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window)
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
int8x8_t vneg(const int8x8_t &a)
Lower and Upper Bounded Rectifier ( )
uint8x8_t vcgt(const uint8x8_t &a, const uint8x8_t &b)
uint8x8_t vmul(const uint8x8_t &a, const uint8x8_t &b)
uint8x8_t vbsl(const uint8x8_t &a, const uint8x8_t &b, const uint8x8_t &c)
Upper Bounded Rectifier ( )
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
uint8_t qasymm8_t
8 bit quantized asymmetric scalar value
uint8x8_t vquantize(const float32x4x2_t &qv, const UniformQuantizationInfo &qi)
Quantize a neon vector holding 8 floating point values.
void vstore(uint8_t *ptr, uint8x8_t val)
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
typename neon_bitvector< T, BW >::type neon_bitvector_t
Helper type template to get the type of a neon vector.
ActivationFunction activation() const
Get the type of activation function.
float b() const
Get the beta value.
Includes all wrapper headers at once.
constexpr int end() const
Return the end of the dimension.
Iterator updated by execute_window_loop for each window element.
uint8x8_t vmax(const uint8x8_t &a, const uint8x8_t &b)
constexpr int start() const
Return the start of the dimension.
float32x4_t vexpq(const float32x4_t &a)
Describe a multidimensional execution window.
constexpr const Dimension & x() const
Alias to access the first dimension of the window.
uint8x16_t qasymm8x16_t
8 bit quantized asymmetric vector with 16 elements