53 constexpr
unsigned int num_elems_read_per_iteration = 16;
54 constexpr
unsigned int num_elems_written_per_iteration = 8;
55 constexpr
unsigned int num_rows_read_per_iteration = 3;
66 INEKernel::configure(win);
78 const size_t in_stride = _input->info()->strides_in_bytes()[1];
82 uint8_t *in_ptr = in.
ptr() - 1;
83 const uint8x16_t top_data = vld1q_u8(in_ptr - in_stride);
84 const uint8x16_t mid_data = vld1q_u8(in_ptr);
85 const uint8x16_t bot_data = vld1q_u8(in_ptr + in_stride);
87 uint8x8_t top_high_data = vget_high_u8(top_data);
88 uint8x8_t top_low_data = vget_low_u8(top_data);
90 uint8x8_t mid_high_data = vget_high_u8(mid_data);
91 uint8x8_t mid_low_data = vget_low_u8(mid_data);
93 uint8x8_t bot_high_data = vget_high_u8(bot_data);
94 uint8x8_t bot_low_data = vget_low_u8(bot_data);
100 p1 = vext_u8(top_low_data, top_high_data, 1);
101 p0 = vmin_u8(p0, p1);
103 p1 = vext_u8(top_low_data, top_high_data, 2);
104 p0 = vmin_u8(p0, p1);
107 p0 = vmin_u8(p0, p1);
109 p1 = vext_u8(mid_low_data, mid_high_data, 1);
110 p0 = vmin_u8(p0, p1);
112 p1 = vext_u8(mid_low_data, mid_high_data, 2);
113 p0 = vmin_u8(p0, p1);
116 p0 = vmin_u8(p0, p1);
118 p1 = vext_u8(bot_low_data, bot_high_data, 1);
119 p0 = vmin_u8(p0, p1);
121 p1 = vext_u8(bot_low_data, bot_high_data, 2);
122 p0 = vmin_u8(p0, p1);
124 vst1_u8(out.
ptr(), p0);
unsigned int top
top of the border
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
const Window & window() const
The maximum window the kernel can be executed on.
Container for 2D border size.
1 channel, 1 U8 per channel
Interface for Neon tensor.
Copyright (c) 2017-2021 Arm Limited.
virtual ValidRegion valid_region() const =0
Valid region of the tensor.
BorderSize border_size() const override
The size of the border for that kernel.
Implementation of a rectangular access pattern.
bool update_window_and_padding(Window &win, Ts &&... patterns)
Update window and padding size for each of the access patterns.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
#define ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Class to describe a number of elements in each dimension.
Implementation of a row access pattern.
void run(const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
constexpr uint8_t * ptr() const
Return a pointer to the current pixel.
unsigned int left
left of the border
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
#define ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
Information about executing thread and CPU.
void configure(const ITensor *input, ITensor *output, bool border_undefined)
Set the source, destination and border mode of the kernel.
unsigned int num_elems_processed_per_iteration
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
Iterator updated by execute_window_loop for each window element.
Describe a multidimensional execution window.
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)