24 #ifndef SRC_CORE_NEON_KERNELS_SUB_LIST_H 25 #define SRC_CORE_NEON_KERNELS_SUB_LIST_H 35 #define DECLARE_SUB_KERNEL(func_name) \ 36 void func_name(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window) 45 #undef DECLARE_SUB_KERNEL 63 constexpr
int window_step_x = 16 /
sizeof(T);
64 const auto window_start_x = static_cast<int>(window.
x().
start());
65 const auto window_end_x = static_cast<int>(window.
x().
end());
72 if(is_broadcast_across_x)
74 const bool is_broadcast_input_2 = input2_win.
x().
step() == 0;
75 Window broadcast_win = is_broadcast_input_2 ? input2_win : input1_win;
76 Window non_broadcast_win = !is_broadcast_input_2 ? input2_win : input1_win;
77 const ITensor *broadcast_tensor = is_broadcast_input_2 ? src1 : src0;
78 const ITensor *non_broadcast_tensor = !is_broadcast_input_2 ? src1 : src0;
83 Iterator broadcast_input(broadcast_tensor, broadcast_win);
84 Iterator non_broadcast_input(non_broadcast_tensor, non_broadcast_win);
89 const auto non_broadcast_input_ptr = reinterpret_cast<const T *>(non_broadcast_input.
ptr());
90 const auto output_ptr = reinterpret_cast<T *>(output.
ptr());
92 const T broadcast_value = *reinterpret_cast<const T *>(broadcast_input.
ptr());
93 const auto broadcast_value_vec =
wrapper::vdup_n(broadcast_value, ExactTagType{});
96 int x = window_start_x;
97 for(; x <= (window_end_x - window_step_x); x += window_step_x)
99 const auto non_broadcast_v =
wrapper::vloadq(non_broadcast_input_ptr + x);
101 if(is_broadcast_input_2)
109 for(; x < window_end_x; ++x)
111 const auto non_broadcast_v = *(non_broadcast_input_ptr + x);
112 auto res = is_sat ?
wrapper::sub_sat(broadcast_value, non_broadcast_v) : broadcast_value - non_broadcast_v;
113 if(is_broadcast_input_2)
115 res = static_cast<T>(-1) * res;
118 *(output_ptr + x) = res;
121 broadcast_input, non_broadcast_input, output);
135 const auto input1_ptr = reinterpret_cast<const T *>(input1.
ptr());
136 const auto input2_ptr = reinterpret_cast<const T *>(input2.
ptr());
137 const auto output_ptr = reinterpret_cast<T *>(output.
ptr());
140 int x = window_start_x;
141 for(; x <= (window_end_x - window_step_x); x += window_step_x)
150 for(; x < window_end_x; ++x)
152 const auto val1 = *(input1_ptr + x);
153 const auto val2 = *(input2_ptr + x);
157 input1, input2, output);
162 #endif // SRC_CORE_NEON_KERNELS_SUB_LIST_H void sub_same_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
constexpr int step() const
Return the step of the dimension.
uint8x16_t vloadq(const uint8_t *ptr)
uint8x8_t vsub(const uint8x8_t &a, const uint8x8_t &b)
Describe one of the image's dimensions with a start, end and step.
Interface for CPU tensor.
Copyright (c) 2017-2021 Arm Limited.
typename neon_bitvector< T, BW >::tag_type neon_bitvector_tag_t
Helper type template to get the tag type of a neon vector.
T x() const
Alias to access the size of the first dimension.
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
constexpr uint8_t * ptr() const
Return a pointer to the current pixel.
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
Window broadcast_if_dimension_le_one(const TensorShape &shape) const
Don't advance in the dimension where shape is less equal to 1.
void sub_qasymm8_signed_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
void sub_u8_u8_s16_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
uint8x8_t vmul(const uint8x8_t &a, const uint8x8_t &b)
void sub_qsymm16_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
void sub_qasymm8_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
uint8x8_t vqsub(const uint8x8_t &a, const uint8x8_t &b)
#define DECLARE_SUB_KERNEL(func_name)
void vstore(uint8_t *ptr, uint8x8_t val)
uint8x8_t vdup_n(uint8_t value, traits::vector_64_tag)
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
uint8_t sub_sat(const uint8_t &a, const uint8_t &b)
void sub_u8_s16_s16_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
Includes all wrapper headers at once.
void sub_s16_u8_s16_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
constexpr int end() const
Return the end of the dimension.
Iterator updated by execute_window_loop for each window element.
constexpr int start() const
Return the start of the dimension.
Describe a multidimensional execution window.
ConvertPolicy
Policy to handle overflow.
constexpr const Dimension & x() const
Alias to access the first dimension of the window.