Compute Library
 21.02
NESobel5x5VertKernel Class Reference

Interface for the kernel to run the vertical pass of 5x5 Sobel Y filter on a tensor. More...

#include <NESobel5x5Kernel.h>

Collaboration diagram for NESobel5x5VertKernel:
[legend]

Public Member Functions

const char * name () const override
 Name of the kernel. More...
 
 NESobel5x5VertKernel ()
 Default constructor. More...
 
 NESobel5x5VertKernel (const NESobel5x5VertKernel &)=delete
 Prevent instances of this class from being copied (As this class contains pointers) More...
 
NESobel5x5VertKerneloperator= (const NESobel5x5VertKernel &)=delete
 Prevent instances of this class from being copied (As this class contains pointers) More...
 
 NESobel5x5VertKernel (NESobel5x5VertKernel &&)=default
 Allow instances of this class to be moved. More...
 
NESobel5x5VertKerneloperator= (NESobel5x5VertKernel &&)=default
 Allow instances of this class to be moved. More...
 
 ~NESobel5x5VertKernel ()=default
 Default destructor. More...
 
void configure (ITensor *input_x, ITensor *input_y, ITensor *output_x, ITensor *output_y, bool border_undefined)
 Initialise the kernel's source, destination and border mode. More...
 
void run (const Window &window, const ThreadInfo &info) override
 Execute the kernel on the passed window. More...
 
BorderSize border_size () const override
 The size of the border for that kernel. More...
 
- Public Member Functions inherited from ICPPKernel
virtual ~ICPPKernel ()=default
 Default destructor. More...
 
virtual void run_nd (const Window &window, const ThreadInfo &info, const Window &thread_locator)
 legacy compatibility layer for implemantions which do not support thread_locator In these cases we simply narrow the interface down the legacy version More...
 
virtual void run_op (ITensorPack &tensors, const Window &window, const ThreadInfo &info)
 Execute the kernel on the passed window. More...
 
- Public Member Functions inherited from IKernel
 IKernel ()
 Constructor. More...
 
virtual ~IKernel ()=default
 Destructor. More...
 
virtual bool is_parallelisable () const
 Indicates whether or not the kernel is parallelisable. More...
 
const Windowwindow () const
 The maximum window the kernel can be executed on. More...
 

Detailed Description

Interface for the kernel to run the vertical pass of 5x5 Sobel Y filter on a tensor.

Definition at line 83 of file NESobel5x5Kernel.h.

Constructor & Destructor Documentation

◆ NESobel5x5VertKernel() [1/3]

Default constructor.

Definition at line 209 of file NESobel5x5Kernel.cpp.

210  : _input_x(nullptr), _input_y(nullptr), _output_x(nullptr), _output_y(nullptr), _run_sobel_x(false), _run_sobel_y(false)
211 {
212 }

◆ NESobel5x5VertKernel() [2/3]

Prevent instances of this class from being copied (As this class contains pointers)

◆ NESobel5x5VertKernel() [3/3]

Allow instances of this class to be moved.

◆ ~NESobel5x5VertKernel()

~NESobel5x5VertKernel ( )
default

Default destructor.

Member Function Documentation

◆ border_size()

BorderSize border_size ( ) const
overridevirtual

The size of the border for that kernel.

Returns
The width in number of elements of the border.

Reimplemented from IKernel.

Definition at line 214 of file NESobel5x5Kernel.cpp.

Referenced by NESobel5x5VertKernel::configure().

215 {
216  return BorderSize{ 2, 0 };
217 }
Container for 2D border size.
Definition: Types.h:273

◆ configure()

void configure ( ITensor input_x,
ITensor input_y,
ITensor output_x,
ITensor output_y,
bool  border_undefined 
)

Initialise the kernel's source, destination and border mode.

Parameters
[in]input_xInput for X (X output of hor pass). Data type supported: S16.
[in]input_yInput for Y (Y output of hor pass). Data type supported: S16.
[out]output_xDestination tensor for the X gradient. Data type supported: S16.
[out]output_yDestination tensor for the Y gradient. Data type supported: S16.
[in]border_undefinedTrue if the border mode is undefined. False if it's replicate or constant.

Definition at line 219 of file NESobel5x5Kernel.cpp.

References ARM_COMPUTE_ERROR_ON, ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN, NESobel5x5VertKernel::border_size(), arm_compute::calculate_max_window(), ITensor::info(), arm_compute::test::validation::input, num_elems_processed_per_iteration, arm_compute::S16, BorderSize::top, arm_compute::update_window_and_padding(), and ITensorInfo::valid_region().

220 {
221  ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr));
222 
223  _run_sobel_x = output_x != nullptr;
224  _run_sobel_y = output_y != nullptr;
225 
226  if(_run_sobel_x)
227  {
230  }
231 
232  if(_run_sobel_y)
233  {
236  }
237 
238  _input_x = input_x;
239  _input_y = input_y;
240  _output_x = output_x;
241  _output_y = output_y;
242 
243  const ITensor *const input = _run_sobel_x ? input_x : input_y;
244 
245  // Configure kernel window
246  constexpr unsigned int num_elems_processed_per_iteration = 16;
247  constexpr unsigned int num_elems_read_per_iteration = 16;
248  constexpr unsigned int num_elems_written_per_iteration = 16;
249  constexpr unsigned int num_rows_read_per_iteration = 5;
250 
251  Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size());
252  AccessWindowHorizontal output_x_access(output_x == nullptr ? nullptr : output_x->info(), 0, num_elems_written_per_iteration);
253  AccessWindowHorizontal output_y_access(output_y == nullptr ? nullptr : output_y->info(), 0, num_elems_written_per_iteration);
254 
256  AccessWindowRectangle(input_x == nullptr ? nullptr : input_x->info(), 0, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration),
257  AccessWindowRectangle(input_y == nullptr ? nullptr : input_y->info(), 0, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration),
258  output_x_access,
259  output_y_access);
260 
261  output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
262  output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
263 
264  INEKernel::configure(win);
265 }
unsigned int top
top of the border
Definition: Types.h:375
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
#define ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(t,...)
Definition: Validate.h:643
Interface for Neon tensor.
Definition: ITensor.h:36
virtual ValidRegion valid_region() const =0
Valid region of the tensor.
Implementation of a rectangular access pattern.
bool update_window_and_padding(Window &win, Ts &&... patterns)
Update window and padding size for each of the access patterns.
Definition: WindowHelpers.h:46
BorderSize border_size() const override
The size of the border for that kernel.
Class to describe a number of elements in each dimension.
Definition: Steps.h:40
Implementation of a row access pattern.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.
1 channel, 1 S16 per channel
unsigned int num_elems_processed_per_iteration
Describe a multidimensional execution window.
Definition: Window.h:39

◆ name()

const char* name ( ) const
inlineoverridevirtual

Name of the kernel.

Returns
Kernel name

Implements ICPPKernel.

Definition at line 86 of file NESobel5x5Kernel.h.

References NESobel5x5HorKernel::border_size(), NESobel5x5HorKernel::configure(), arm_compute::test::validation::info, NESobel5x5HorKernel::operator=(), NESobel5x5HorKernel::run(), and IKernel::window().

87  {
88  return "NESobel5x5VertKernel";
89  }

◆ operator=() [1/2]

NESobel5x5VertKernel& operator= ( const NESobel5x5VertKernel )
delete

Prevent instances of this class from being copied (As this class contains pointers)

◆ operator=() [2/2]

NESobel5x5VertKernel& operator= ( NESobel5x5VertKernel &&  )
default

Allow instances of this class to be moved.

◆ run()

void run ( const Window window,
const ThreadInfo info 
)
overridevirtual

Execute the kernel on the passed window.

Warning
If is_parallelisable() returns false then the passed window must be equal to window()
Note
The window has to be a region within the window returned by the window() method
The width of the window has to be a multiple of num_elems_processed_per_iteration().
Parameters
[in]windowRegion on which to execute the kernel. (Must be a region of the window returned by window())
[in]infoInfo about executing thread and CPU.

Reimplemented from ICPPKernel.

Definition at line 267 of file NESobel5x5Kernel.cpp.

References ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW, ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL, ARM_COMPUTE_UNUSED, arm_compute::execute_window_loop(), Iterator::offset(), Iterator::ptr(), ITensor::ptr_to_element(), and IKernel::window().

268 {
269  ARM_COMPUTE_UNUSED(info);
272 
273  Iterator input_x;
274  Iterator input_y;
275  Iterator output_x;
276  Iterator output_y;
277 
278  const int16_t *input_x_low2_ptr = nullptr;
279  const int16_t *input_x_low_ptr = nullptr;
280  const int16_t *input_x_mid_ptr = nullptr;
281  const int16_t *input_x_top_ptr = nullptr;
282  const int16_t *input_x_top2_ptr = nullptr;
283 
284  const int16_t *input_y_low2_ptr = nullptr;
285  const int16_t *input_y_low_ptr = nullptr;
286  const int16_t *input_y_top_ptr = nullptr;
287  const int16_t *input_y_top2_ptr = nullptr;
288 
289  if(_run_sobel_x)
290  {
291  input_x = Iterator(_input_x, window);
292  output_x = Iterator(_output_x, window);
293  input_x_top2_ptr = reinterpret_cast<const int16_t *>(_input_x->ptr_to_element(Coordinates(0, -2)));
294  input_x_top_ptr = reinterpret_cast<const int16_t *>(_input_x->ptr_to_element(Coordinates(0, -1)));
295  input_x_mid_ptr = reinterpret_cast<const int16_t *>(_input_x->ptr_to_element(Coordinates(0, 0)));
296  input_x_low_ptr = reinterpret_cast<const int16_t *>(_input_x->ptr_to_element(Coordinates(0, 1)));
297  input_x_low2_ptr = reinterpret_cast<const int16_t *>(_input_x->ptr_to_element(Coordinates(0, 2)));
298  }
299 
300  if(_run_sobel_y)
301  {
302  input_y = Iterator(_input_y, window);
303  output_y = Iterator(_output_y, window);
304  input_y_top2_ptr = reinterpret_cast<const int16_t *>(_input_y->ptr_to_element(Coordinates(0, -2)));
305  input_y_top_ptr = reinterpret_cast<const int16_t *>(_input_y->ptr_to_element(Coordinates(0, -1)));
306  input_y_low_ptr = reinterpret_cast<const int16_t *>(_input_y->ptr_to_element(Coordinates(0, 1)));
307  input_y_low2_ptr = reinterpret_cast<const int16_t *>(_input_y->ptr_to_element(Coordinates(0, 2)));
308  }
309 
310  static const int16x8_t six = vdupq_n_s16(6);
311  static const int16x8_t four = vdupq_n_s16(4);
312  static const int16x8_t two = vdupq_n_s16(2);
313  static const int16x8_t minustwo = vdupq_n_s16(-2);
314 
315  if(_run_sobel_x)
316  {
317  execute_window_loop(window, [&](const Coordinates &)
318  {
319  // Convert offset from uint8_t* to uint16_t*
320  const size_t input_offset_high_s16 = input_x.offset() / 2;
321  const size_t input_offset_low_s16 = input_offset_high_s16 + 8;
322 
323  //HIGH DATA
324  //top2
325  int16x8_t data_high = vld1q_s16(input_x_top2_ptr + input_offset_high_s16);
326  int16x8_t out_high = data_high;
327  //top
328  data_high = vld1q_s16(input_x_top_ptr + input_offset_high_s16);
329  out_high = vmlaq_s16(out_high, data_high, four);
330  //mid
331  data_high = vld1q_s16(input_x_mid_ptr + input_offset_high_s16);
332  out_high = vmlaq_s16(out_high, data_high, six);
333  //low
334  data_high = vld1q_s16(input_x_low_ptr + input_offset_high_s16);
335  out_high = vmlaq_s16(out_high, data_high, four);
336  //low2
337  data_high = vld1q_s16(input_x_low2_ptr + input_offset_high_s16);
338  out_high = vaddq_s16(out_high, data_high);
339 
340  vst1q_s16((reinterpret_cast<int16_t *>(output_x.ptr())), out_high);
341 
342  //LOW DATA
343  //top2
344  int16x8_t data_low = vld1q_s16(input_x_top2_ptr + input_offset_low_s16);
345  int16x8_t out_low = data_low;
346  //top
347  data_low = vld1q_s16(input_x_top_ptr + input_offset_low_s16);
348  out_low = vmlaq_s16(out_low, data_low, four);
349  //mid
350  data_low = vld1q_s16(input_x_mid_ptr + input_offset_low_s16);
351  out_low = vmlaq_s16(out_low, data_low, six);
352  //low
353  data_low = vld1q_s16(input_x_low_ptr + input_offset_low_s16);
354  out_low = vmlaq_s16(out_low, data_low, four);
355  //low2
356  data_low = vld1q_s16(input_x_low2_ptr + input_offset_low_s16);
357  out_low = vaddq_s16(out_low, data_low);
358 
359  vst1q_s16((reinterpret_cast<int16_t *>(output_x.ptr())) + 8, out_low);
360  },
361  input_x, output_x);
362  }
363 
364  if(_run_sobel_y)
365  {
366  execute_window_loop(window, [&](const Coordinates &)
367  {
368  // Convert offset from uint8_t* to uint16_t*
369  const size_t input_offset_high_s16 = input_y.offset() / 2;
370  const size_t input_offset_low_s16 = input_offset_high_s16 + 8;
371 
372  //HIGH DATA
373  //top2
374  int16x8_t data_high = vld1q_s16(input_y_top2_ptr + input_offset_high_s16);
375  int16x8_t out_high = vnegq_s16(data_high);
376  //top
377  data_high = vld1q_s16(input_y_top_ptr + input_offset_high_s16);
378  out_high = vmlaq_s16(out_high, data_high, minustwo);
379  //low
380  data_high = vld1q_s16(input_y_low_ptr + input_offset_high_s16);
381  out_high = vmlaq_s16(out_high, data_high, two);
382  //low2
383  data_high = vld1q_s16(input_y_low2_ptr + input_offset_high_s16);
384  out_high = vaddq_s16(out_high, data_high);
385 
386  vst1q_s16((reinterpret_cast<int16_t *>(output_y.ptr())), out_high);
387 
388  //LOW DATA
389  //top2
390  int16x8_t data_low = vld1q_s16(input_y_top2_ptr + input_offset_low_s16);
391  int16x8_t out_low = vnegq_s16(data_low);
392  //top
393  data_low = vld1q_s16(input_y_top_ptr + input_offset_low_s16);
394  out_low = vmlaq_s16(out_low, data_low, minustwo);
395  //low
396  data_low = vld1q_s16(input_y_low_ptr + input_offset_low_s16);
397  out_low = vmlaq_s16(out_low, data_low, two);
398  //low2
399  data_low = vld1q_s16(input_y_low2_ptr + input_offset_low_s16);
400  out_low = vaddq_s16(out_low, data_low);
401 
402  vst1q_s16((reinterpret_cast<int16_t *>(output_y.ptr())) + 8, out_low);
403  },
404  input_y, output_y);
405  }
406 }
const Window & window() const
The maximum window the kernel can be executed on.
Definition: IKernel.cpp:28
uint8_t * ptr_to_element(const Coordinates &id) const
Return a pointer to the element at the passed coordinates.
Definition: ITensor.h:63
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
Coordinates of an item.
Definition: Coordinates.h:37
constexpr uint8_t * ptr() const
Return a pointer to the current pixel.
Definition: Helpers.inl:139
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
Definition: Validate.h:941
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
Definition: Helpers.inl:77
constexpr size_t offset() const
Return the offset in bytes from the first element to the current position of the iterator.
Definition: Helpers.inl:134
Iterator updated by execute_window_loop for each window element.
Definition: Helpers.h:46
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
Definition: Validate.h:205

The documentation for this class was generated from the following files: