37 void sub_s16_u8_s16_impl(
const ITensor *src0,
const ITensor *src1, ITensor *
dst,
const ConvertPolicy &policy,
const Window &window,
bool is_swapped)
41 Window input1_win = window.broadcast_if_dimension_le_one(src0->info()->tensor_shape());
42 Window input2_win = window.broadcast_if_dimension_le_one(src1->info()->tensor_shape());
46 input1_win.set(
Window::DimX, Window::Dimension(0, 1, 1));
47 input2_win.set(
Window::DimX, Window::Dimension(0, 1, 1));
49 Iterator input1(src0, input1_win);
50 Iterator input2(src1, input2_win);
51 Iterator output(dst, win);
53 const int window_step_x = 8;
54 const auto window_start_x =
static_cast<int>(window.x().start());
55 const auto window_end_x =
static_cast<int>(window.x().end());
59 const auto input1_ptr =
reinterpret_cast<const int16_t *
>(input1.ptr());
60 const auto input2_ptr =
reinterpret_cast<const uint8_t *
>(input2.ptr());
61 const auto output_ptr =
reinterpret_cast<int16_t *
>(output.ptr());
66 int x = window_start_x;
67 for(; x <= (window_end_x - window_step_x); x += window_step_x)
71 const auto res = is_swapped ?
wrapper::vsub(vin2, vin1) : wrapper::
vsub(vin1, vin2);
76 for(; x < window_end_x; ++x)
78 const auto res = is_swapped ?
static_cast<int16_t
>(*(input2_ptr + x)) - *(input1_ptr + x) : *(input1_ptr + x) -
static_cast<int16_t
>(*(input2_ptr + x));
79 *(output_ptr + x) = res;
85 int x = window_start_x;
86 for(; x <= (window_end_x - window_step_x); x += window_step_x)
95 for(; x < window_end_x; ++x)
97 const auto res = is_swapped ?
wrapper::sub_sat(static_cast<int16_t>(*(input2_ptr + x)), *(input1_ptr + x)) : wrapper::
sub_sat(*(input1_ptr + x), static_cast<int16_t>(*(input2_ptr + x)));
98 *(output_ptr + x) = res;
102 input1, input2, output);
108 sub_s16_u8_s16_impl(src1, src0, dst, policy, window,
false);
114 sub_s16_u8_s16_impl(src1, src0, dst, policy, window,
true);
133 const int window_step_x = 8;
134 const auto window_start_x =
static_cast<int>(window.
x().
start());
135 const auto window_end_x =
static_cast<int>(window.
x().
end());
139 const auto input1_ptr =
reinterpret_cast<const uint8_t *
>(input1.
ptr());
140 const auto input2_ptr =
reinterpret_cast<const uint8_t *
>(input2.
ptr());
141 const auto output_ptr =
reinterpret_cast<int16_t *
>(output.
ptr());
146 int x = window_start_x;
147 for(; x <= (window_end_x - window_step_x); x += window_step_x)
155 for(; x < window_end_x; ++x)
157 *(output_ptr + x) = static_cast<int16_t>(*(input1_ptr + x)) - static_cast<int16_t>(*(input2_ptr + x));
163 int x = window_start_x;
164 for(; x <= (window_end_x - window_step_x); x += window_step_x)
172 for(; x < window_end_x; ++x)
174 *(output_ptr + x) =
wrapper::sub_sat(static_cast<int16_t>(*(input1_ptr + x)),
175 static_cast<int16_t
>(*(input2_ptr + x)));
179 input1, input2, output);
uint8x16_t vloadq(const uint8_t *ptr)
uint8x8_t vsub(const uint8x8_t &a, const uint8x8_t &b)
Describe one of the image's dimensions with a start, end and step.
Interface for Neon tensor.
Copyright (c) 2017-2021 Arm Limited.
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
constexpr uint8_t * ptr() const
Return a pointer to the current pixel.
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
Window broadcast_if_dimension_le_one(const TensorShape &shape) const
Don't advance in the dimension where shape is less equal to 1.
void sub_u8_u8_s16_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
uint8x8_t vqsub(const uint8x8_t &a, const uint8x8_t &b)
uint8x8_t vload(const uint8_t *ptr)
void vstore(uint8_t *ptr, uint8x8_t val)
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
uint8_t sub_sat(const uint8_t &a, const uint8_t &b)
void sub_u8_s16_s16_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
Includes all wrapper headers at once.
void sub_s16_u8_s16_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
constexpr int end() const
Return the end of the dimension.
Iterator updated by execute_window_loop for each window element.
uint16x8_t vmovl(const uint8x8_t &a)
constexpr int start() const
Return the start of the dimension.
Describe a multidimensional execution window.
ConvertPolicy
Policy to handle overflow.
constexpr const Dimension & x() const
Alias to access the first dimension of the window.