42 void abs_diff_U8_U8_U8(
const ITensor *in1,
const ITensor *in2, ITensor *out,
const Window &window)
44 Iterator input1(in1, window);
45 Iterator input2(in2, window);
46 Iterator output(out, window);
50 const uint8x16_t input1_val = vld1q_u8(input1.ptr());
51 const uint8x16_t input2_val = vld1q_u8(input2.ptr());
53 vst1q_u8(output.ptr(), vabdq_u8(input1_val, input2_val));
55 input1, input2, output);
58 inline int16x8x2_t vqabd2q_s16(
const int16x8x2_t &v1,
const int16x8x2_t &v2)
60 const int16x8x2_t res =
63 vqabsq_s16(vqsubq_s16(v1.val[0], v2.val[0])),
64 vqabsq_s16(vqsubq_s16(v1.val[1], v2.val[1]))
71 void abs_diff_S16_S16_S16(
const ITensor *in1,
const ITensor *in2, ITensor *out,
const Window &window)
73 Iterator input1(in1, window);
74 Iterator input2(in2, window);
75 Iterator output(out, window);
79 int16x8x2_t input1_val = vld2q_s16(reinterpret_cast<const int16_t *>(input1.ptr()));
80 int16x8x2_t input2_val = vld2q_s16(reinterpret_cast<const int16_t *>(input2.ptr()));
81 vst2q_s16(reinterpret_cast<int16_t *>(output.ptr()), vqabd2q_s16(input1_val, input2_val));
83 input1, input2, output);
86 void abs_diff_U8_S16_S16(
const ITensor *in1,
const ITensor *in2, ITensor *out,
const Window &window)
88 Iterator input1(in1, window);
89 Iterator input2(in2, window);
90 Iterator output(out, window);
94 const uint8x16_t input1_val = vld1q_u8(input1.ptr());
95 const int16x8x2_t input2_val =
98 vld1q_s16(reinterpret_cast<const int16_t *>(input2.ptr())),
99 vld1q_s16(reinterpret_cast<const int16_t *>(input2.ptr()) + 8)
103 const int16x8x2_t out_val =
106 vqabsq_s16(vqsubq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(input1_val))), input2_val.val[0])),
107 vqabsq_s16(vqsubq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(input1_val))), input2_val.val[1]))
111 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()), out_val.val[0]);
112 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()) + 8, out_val.val[1]);
115 input1, input2, output);
118 void abs_diff_S16_U8_S16(
const ITensor *in1,
const ITensor *in2, ITensor *out,
const Window &window)
120 abs_diff_U8_S16_S16(in2, in1, out, window);
125 : _func(nullptr), _input1(nullptr), _input2(nullptr), _output(nullptr)
149 "The output image can only be U8 if both input images are U8");
158 if(input1_data_type == input2_data_type)
162 _func = &abs_diff_U8_U8_U8;
166 _func = &abs_diff_S16_S16_S16;
173 _func = &abs_diff_U8_S16_S16;
177 _func = &abs_diff_S16_U8_S16;
195 output_access.set_valid_region(win, valid_region);
197 INEKernel::configure(win);
207 _func(_input1, _input2, _output, window);
bool set_format_if_unknown(ITensorInfo &info, Format format)
Set the format, data type and number of channels to the specified value if the current data type is u...
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
const Window & window() const
The maximum window the kernel can be executed on.
void run(const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
1 channel, 1 U8 per channel
virtual DataType data_type() const =0
Data type used for each element of the tensor.
NEAbsoluteDifferenceKernel()
Default constructor.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
const ValidRegion valid_region
Interface for Neon tensor.
Copyright (c) 2017-2021 Arm Limited.
virtual ValidRegion valid_region() const =0
Valid region of the tensor.
bool update_window_and_padding(Window &win, Ts &&... patterns)
Update window and padding size for each of the access patterns.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
Class to describe a number of elements in each dimension.
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
Implementation of a row access pattern.
#define ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(...)
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
bool set_shape_if_empty(ITensorInfo &info, const TensorShape &shape)
Set the shape to the specified value if the current assignment is empty.
ValidRegion intersect_valid_regions(const Ts &... regions)
Intersect multiple valid regions.
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
1 channel, 1 S16 per channel
#define ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
Information about executing thread and CPU.
unsigned int num_elems_processed_per_iteration
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
void configure(const ITensor *input1, const ITensor *input2, ITensor *output)
Set the inputs and output tensors.
Container for valid region of a window.
DataType
Available data types.
Describe a multidimensional execution window.
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)