24.02.1
|
Go to the documentation of this file.
24 #ifndef SRC_CORE_NEON_KERNELS_ELEMENTWISE_UNARY_LIST_H
25 #define SRC_CORE_NEON_KERNELS_ELEMENTWISE_UNARY_LIST_H
37 template <
typename ScalarType>
61 template <
typename ScalarType,
typename VectorType>
85 template <
typename ScalarType>
88 const int window_step_x = 16 /
sizeof(ScalarType);
89 const auto window_start_x =
static_cast<int>(window.
x().
start());
90 const auto window_end_x =
static_cast<int>(window.
x().
end());
102 auto output_ptr =
reinterpret_cast<ScalarType *
>(output.
ptr());
103 const auto input_ptr =
reinterpret_cast<const ScalarType *
>(
input.ptr());
105 int x = window_start_x;
106 for (; x <= window_end_x - window_step_x; x += window_step_x)
110 for (; x < window_end_x; ++x)
121 const int window_step_x = 16;
122 const auto window_start_x =
static_cast<int>(window.
x().
start());
123 const auto window_end_x =
static_cast<int>(window.
x().
end());
126 const auto min_clamped_value = vdupq_n_f32((-128 - qi_out.
offset) * qi_out.
scale);
127 const auto max_clamped_value = vdupq_n_f32((127 - qi_out.
offset) * qi_out.
scale);
139 auto output_ptr =
reinterpret_cast<int8_t *
>(output.
ptr());
140 const auto input_ptr =
reinterpret_cast<const int8_t *
>(
input.ptr());
141 const auto vconst_0_f32 = vdupq_n_f32(0);
144 int x = window_start_x;
145 for (; x <= window_end_x - window_step_x; x += window_step_x)
153 float32x4x4_t vtmp_deq = {{
154 elementwise_op_imp<float>(op, vin_deq.val[0]),
155 elementwise_op_imp<float>(op, vin_deq.val[1]),
156 elementwise_op_imp<float>(op, vin_deq.val[2]),
157 elementwise_op_imp<float>(op, vin_deq.val[3]),
163 vbslq_f32(vcleq_f32(vin_deq.val[0], vconst_0_f32), clamped_value, vtmp_deq.val[0]);
165 vbslq_f32(vcleq_f32(vin_deq.val[1], vconst_0_f32), clamped_value, vtmp_deq.val[1]);
167 vbslq_f32(vcleq_f32(vin_deq.val[2], vconst_0_f32), clamped_value, vtmp_deq.val[2]);
169 vbslq_f32(vcleq_f32(vin_deq.val[3], vconst_0_f32), clamped_value, vtmp_deq.val[3]);
176 for (; x < window_end_x; ++x)
193 tmp_f = elementwise_op_scalar_imp<float>(op, tmp_f);
198 tmp_f = elementwise_op_scalar_imp<float>(op, tmp_f);
205 *(output_ptr + x) = tmp;
213 const int window_step_x = 16;
214 const auto window_start_x =
static_cast<int>(window.
x().
start());
215 const auto window_end_x =
static_cast<int>(window.
x().
end());
218 const auto vconst_0_f32 = vdupq_n_f32(0);
219 const auto min_clamped_value = vdupq_n_f32((0 - qi_out.
offset) * qi_out.
scale);
220 const auto max_clamped_value = vdupq_n_f32((255 - qi_out.
offset) * qi_out.
scale);
233 auto output_ptr =
reinterpret_cast<uint8_t *
>(output.
ptr());
234 const auto input_ptr =
reinterpret_cast<const uint8_t *
>(
input.ptr());
235 int x = window_start_x;
236 for (; x <= window_end_x - window_step_x; x += window_step_x)
244 float32x4x4_t vtmp_deq = {{
245 elementwise_op_imp<float>(op, vin_deq.val[0]),
246 elementwise_op_imp<float>(op, vin_deq.val[1]),
247 elementwise_op_imp<float>(op, vin_deq.val[2]),
248 elementwise_op_imp<float>(op, vin_deq.val[3]),
253 vbslq_f32(vcleq_f32(vin_deq.val[0], vconst_0_f32), clamped_value, vtmp_deq.val[0]);
255 vbslq_f32(vcleq_f32(vin_deq.val[1], vconst_0_f32), clamped_value, vtmp_deq.val[1]);
257 vbslq_f32(vcleq_f32(vin_deq.val[2], vconst_0_f32), clamped_value, vtmp_deq.val[2]);
259 vbslq_f32(vcleq_f32(vin_deq.val[3], vconst_0_f32), clamped_value, vtmp_deq.val[3]);
266 for (; x < window_end_x; ++x)
283 tmp_f = elementwise_op_scalar_imp<float>(op, tmp_f);
288 tmp_f = elementwise_op_scalar_imp<float>(op, tmp_f);
291 *(output_ptr + x) = tmp;
300 #endif // SRC_CORE_NEON_KERNELS_ELEMENTWISE_UNARY_LIST_H
float32x4x2_t vdequantize(const uint8x8_t &qv, const UniformQuantizationInfo &qi)
Dequantize a neon vector holding 8 quantized values.
constexpr int start() const
Return the start of the dimension.
float dequantize_qasymm8(uint8_t value, const INFO_TYPE &qinfo)
Dequantize a value given an unsigned 8-bit asymmetric quantization scheme.
ScalarType elementwise_op_scalar_imp(ElementWiseUnary op, const ScalarType &a)
float32x2_t vinvsqrt(const float32x2_t &a)
void elementwise_op< int8_t >(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op)
float32x4_t vexpq(const float32x4_t &a)
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
void elementwise_op(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, OutputScalarType(*scalar_func)(const InputScalarType &, const InputScalarType &), int(*broadcast_func)(int, int, int, const InputScalarType *, const InputScalarType &, OutputScalarType *, const bool), int(*neon_func)(int, int, int, const InputScalarType *, const InputScalarType *, OutputScalarType *))
uint8_t quantize_qasymm8(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given an unsigned 8-bit asymmetric quantization scheme.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Interface for CPU tensor.
float32x4_t vlog(const float32x4_t &a)
uint8x16_t vloadq(const uint8_t *ptr)
@ RSQRT
Reverse square root.
float32x4_t vsin(const float32x4_t &a)
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
constexpr uint8_t * ptr() const
Return a pointer to the current pixel.
RoundingPolicy
Rounding method.
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&...iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
Iterator updated by execute_window_loop for each window element.
int8_t quantize_qasymm8_signed(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given a signed 8-bit asymmetric quantization scheme.
VectorType elementwise_op_imp(ElementWiseUnary op, const VectorType &a)
T nearbyint(T value)
Rounds the floating-point argument arg to an integer value in floating-point format,...
UniformQuantizationInfo uniform() const
Return per layer quantization info.
float32x4_t vround(const float32x4_t &a)
int8x8_t vneg(const int8x8_t &a)
Describe one of the image's dimensions with a start, end and step.
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
void vstore(uint8_t *ptr, uint8x8_t val)
Describe a multidimensional execution window.
int8_t qasymm8_signed_t
8 bit signed quantized asymmetric scalar value
float dequantize_qasymm8_signed(int8_t value, const INFO_TYPE &qinfo)
Dequantize a value given a signed 8-bit asymmetric quantization scheme.
uint8x8_t vquantize(const float32x4x2_t &qv, const UniformQuantizationInfo &qi)
Quantize a neon vector holding 8 floating point values.
Copyright (c) 2017-2024 Arm Limited.
@ TO_ZERO
Truncates the least significant values that are lost in operations.
uint8_t qasymm8_t
8 bit quantized asymmetric scalar value
constexpr int end() const
Return the end of the dimension.
void elementwise_op< uint8_t >(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op)
int8x8_t vquantize_signed(const float32x4x2_t &qv, const UniformQuantizationInfo &qi)
Quantize a neon vector holding 8 floating point values.
constexpr const Dimension & x() const
Alias to access the first dimension of the window.
ElementWiseUnary
Available element wise unary operations.
int8x8_t vabs(const int8x8_t &a)