Compute Library
 21.02
NEWarpKernel.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
27 #include "arm_compute/core/Error.h"
37 
38 #include <cstddef>
39 
40 using namespace arm_compute;
41 
42 namespace
43 {
44 inline uint8_t nearest_interpolation(const uint8_t *in_ptr, int x, int y, size_t stride)
45 {
46  return in_ptr[x + y * stride];
47 }
48 } // namespace
49 
51  : _func(nullptr), _input(nullptr), _output(nullptr), _constant_border_value(0), _matrix()
52 {
53 }
54 
56 {
57  return BorderSize(1);
58 }
59 
61 {
62  ARM_COMPUTE_UNUSED(info);
65  ARM_COMPUTE_ERROR_ON(_func == nullptr);
66 
67  (this->*_func)(window);
68 }
69 
70 void INEWarpKernel::configure(const ITensor *input, ITensor *output, const std::array<float, 9> &matrix, BorderMode border_mode, uint8_t constant_border_value)
71 {
74 
75  _matrix = matrix;
76  _constant_border_value = constant_border_value;
77 
78  switch(border_mode)
79  {
81  _func = &INEWarpKernel::warp_undefined;
82  break;
84  _func = &INEWarpKernel::warp_constant;
85  break;
87  _func = &INEWarpKernel::warp_replicate;
88  break;
89  default:
90  ARM_COMPUTE_ERROR("Border mode not supported");
91  break;
92  }
93 
94  _input = input;
95  _output = output;
96 
97  // Configure kernel window
98  Window win = calculate_max_window(*output->info(), Steps(1U));
99 
100  const ValidRegion &input_valid_region = input->info()->valid_region();
101 
102  // Reads can occur within the valid region of the input
103  AccessWindowStatic input_access(input->info(),
104  input_valid_region.anchor[0] - border_size().left, input_valid_region.anchor[1] - border_size().top,
105  input_valid_region.anchor[0] + input_valid_region.shape[0] + border_size().right,
106  input_valid_region.anchor[1] + input_valid_region.shape[1] + border_size().bottom);
107  AccessWindowHorizontal output_access(output->info(), 0, 1);
108 
109  update_window_and_padding(win, input_access, output_access);
110 
111  output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
112 
113  INEKernel::configure(win);
114 }
115 
116 template <InterpolationPolicy interpolation>
118 {
119  // Don't increment in X and Y direction for the input tensor
120  // A pointer to the start of this plane is needed as base for the precomputed offsets
121  Window win_in(window);
122  win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
123  win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
124 
125  Iterator in(_input, win_in);
126  Iterator out(_output, window);
127 
128  const int min_x = _input->info()->valid_region().anchor[0];
129  const int max_x = min_x + _input->info()->valid_region().shape[0];
130  const int min_y = _input->info()->valid_region().anchor[1];
131  const int max_y = min_y + _input->info()->valid_region().shape[1];
132  const size_t stride = _input->info()->strides_in_bytes()[1];
133 
134  // x0 = M01 * x + M01 * y + M02
135  // y0 = M11 * x + M11 * y + M12
136  const float M00 = _matrix[0];
137  const float M10 = _matrix[1];
138  const float M01 = _matrix[0 + 1 * 2];
139  const float M11 = _matrix[1 + 1 * 2];
140  const float M02 = _matrix[0 + 2 * 2];
141  const float M12 = _matrix[1 + 2 * 2];
142 
143  // "M00 * x" and "M10 * x", when x = window.x.start
144  const float start_x0 = M00 * window.x().start();
145  const float start_y0 = M10 * window.x().start();
146 
147  // Current row
148  int y_cur = window.y().start();
149  int z_cur = window.z().start();
150  int d3_cur = window[3].start();
151  int d4_cur = window[4].start();
152  int d5_cur = window[5].start();
153 
154  // const_x0 and const_y0 are the constant parts of x0 and y0 during the row processing
155  float const_x0 = M01 * y_cur + M02;
156  float const_y0 = M11 * y_cur + M12;
157 
158  // Affine warp coordinates
159  float x0 = start_x0 + const_x0;
160  float y0 = start_y0 + const_y0;
161 
162  execute_window_loop(window, [&](const Coordinates & id)
163  {
164  // Check if we are processing a new row. If so, update the current processed row (y_cur), x0, y0 and z0
165  if((y_cur != id.y()) || (z_cur != id.z()) || (d3_cur != id[3]) || (d4_cur != id[4]) || (d5_cur != id[5]))
166  {
167  y_cur = id.y();
168  z_cur = id.z();
169  d3_cur = id[3];
170  d4_cur = id[4];
171  d5_cur = id[5];
172 
173  const_x0 = M01 * y_cur + M02;
174  const_y0 = M11 * y_cur + M12;
175 
176  x0 = start_x0 + const_x0;
177  y0 = start_y0 + const_y0;
178  }
179 
180  // Only write to output if x0 and y0 are within the valid region.
181  // Otherwise the read value would be undefined.
182  if((min_y <= y0) && (y0 < max_y) && (min_x <= x0) && (x0 < max_x))
183  {
184  switch(interpolation)
185  {
187  *out.ptr() = nearest_interpolation(in.ptr(), x0, y0, stride);
188  break;
190  *out.ptr() = scale_helpers::pixel_bilinear_c1(in.ptr(), stride, x0, y0);
191  break;
192  default:
193  ARM_COMPUTE_ERROR("Interpolation not supported");
194  }
195  }
196 
197  x0 += M00;
198  y0 += M10;
199  },
200  in, out);
201 }
202 
203 template <InterpolationPolicy interpolation>
205 {
206  // Don't increment in X and Y direction for the input tensor
207  // A pointer to the start of this plane is needed as base for the precomputed offsets
208  Window win_in(window);
209  win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
210  win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
211 
212  Iterator in(_input, win_in);
213  Iterator out(_output, window);
214 
215  const int min_x = _input->info()->valid_region().anchor[0];
216  const int max_x = min_x + _input->info()->valid_region().shape[0];
217  const int min_y = _input->info()->valid_region().anchor[1];
218  const int max_y = min_y + _input->info()->valid_region().shape[1];
219  const size_t stride = _input->info()->strides_in_bytes()[1];
220 
221  // x0 = M01 * x + M01 * y + M02
222  // y0 = M11 * x + M11 * y + M12
223  const float M00 = _matrix[0];
224  const float M10 = _matrix[1];
225  const float M01 = _matrix[0 + 1 * 2];
226  const float M11 = _matrix[1 + 1 * 2];
227  const float M02 = _matrix[0 + 2 * 2];
228  const float M12 = _matrix[1 + 2 * 2];
229 
230  // "M00 * x" and "M10 * x", when x = window.x.start
231  const float start_x0 = M00 * window.x().start();
232  const float start_y0 = M10 * window.x().start();
233 
234  // Current row
235  int y_cur = window.y().start();
236  int z_cur = window.z().start();
237  int d3_cur = window[3].start();
238  int d4_cur = window[4].start();
239  int d5_cur = window[5].start();
240 
241  // const_x0 and const_y0 are the constant parts of x0 and y0 during the row processing
242  float const_x0 = M01 * y_cur + M02;
243  float const_y0 = M11 * y_cur + M12;
244 
245  // Affine warp coordinates
246  float x0 = start_x0 + const_x0;
247  float y0 = start_y0 + const_y0;
248 
249  execute_window_loop(window, [&](const Coordinates & id)
250  {
251  // Check if we are processing a new row. If so, update the current processed row (y_cur), x0, y0 and z0
252  if((y_cur != id.y()) || (z_cur != id.z()) || (d3_cur != id[3]) || (d4_cur != id[4]) || (d5_cur != id[5]))
253  {
254  y_cur = id.y();
255  z_cur = id.z();
256  d3_cur = id[3];
257  d4_cur = id[4];
258  d5_cur = id[5];
259 
260  const_x0 = M01 * y_cur + M02;
261  const_y0 = M11 * y_cur + M12;
262 
263  x0 = start_x0 + const_x0;
264  y0 = start_y0 + const_y0;
265  }
266 
267  // Only use input values if x0 and y0 are within the valid region.
268  // Otherwise write the constant border value.
269  if((min_y <= y0) && (y0 < max_y) && (min_x <= x0) && (x0 < max_x))
270  {
271  switch(interpolation)
272  {
274  *out.ptr() = nearest_interpolation(in.ptr(), x0, y0, stride);
275  break;
277  *out.ptr() = scale_helpers::pixel_bilinear_c1(in.ptr(), stride, x0, y0);
278  break;
279  default:
280  ARM_COMPUTE_ERROR("Interpolation not supported");
281  }
282  }
283  else
284  {
285  switch(interpolation)
286  {
288  *out.ptr() = _constant_border_value;
289  break;
291  {
292  const auto xi = utility::clamp<int>(std::floor(x0), min_x - 1, max_x);
293  const auto yi = utility::clamp<int>(std::floor(y0), min_y - 1, max_y);
294  const auto xi_1 = utility::clamp<int>(std::floor(x0 + 1), min_x - 1, max_x);
295  const auto yi_1 = utility::clamp<int>(std::floor(y0 + 1), min_y - 1, max_y);
296 
297  const float dx = x0 - std::floor(x0);
298  const float dy = y0 - std::floor(y0);
299  const float dx1 = 1.0f - dx;
300  const float dy1 = 1.0f - dy;
301 
302  const float a00 = *(in.ptr() + xi + yi * stride);
303  const float a01 = *(in.ptr() + xi_1 + yi * stride);
304  const float a10 = *(in.ptr() + xi + yi_1 * stride);
305  const float a11 = *(in.ptr() + xi_1 + yi_1 * stride);
306 
307  *out.ptr() = a00 * (dx1 * dy1) + a01 * (dx * dy1) + a10 * (dx1 * dy) + a11 * (dx * dy);
308  }
309  break;
310  default:
311  ARM_COMPUTE_ERROR("Interpolation not supported");
312  }
313  }
314 
315  x0 += M00;
316  y0 += M10;
317  },
318  in, out);
319 }
320 
321 template <InterpolationPolicy interpolation>
323 {
324  // Don't increment in X and Y direction for the input tensor
325  // A pointer to the start of this plane is needed as base for the precomputed offsets
326  Window win_in(window);
327  win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
328  win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
329 
330  Iterator in(_input, win_in);
331  Iterator out(_output, window);
332 
333  const int min_x = _input->info()->valid_region().anchor[0];
334  const int max_x = min_x + _input->info()->valid_region().shape[0];
335  const int min_y = _input->info()->valid_region().anchor[1];
336  const int max_y = min_y + _input->info()->valid_region().shape[1];
337  const size_t stride = _input->info()->strides_in_bytes()[1];
338 
339  // Current row
340  int y_cur = window.y().start();
341  int z_cur = window.z().start();
342  int d3_cur = window[3].start();
343  int d4_cur = window[4].start();
344  int d5_cur = window[5].start();
345 
346  const float M00 = _matrix[0];
347  const float M10 = _matrix[1];
348  const float M01 = _matrix[0 + 1 * 2];
349  const float M11 = _matrix[1 + 1 * 2];
350  const float M02 = _matrix[0 + 2 * 2];
351  const float M12 = _matrix[1 + 2 * 2];
352 
353  // "M00 * x" and "M10 * x", when x = window.x.start
354  const float start_x0 = M00 * window.x().start();
355  const float start_y0 = M10 * window.x().start();
356 
357  // const_x0 and const_y0 are the constant parts of x0 and y0 during the row processing
358  float const_x0 = M01 * y_cur + M02;
359  float const_y0 = M11 * y_cur + M12;
360 
361  float x0 = start_x0 + const_x0;
362  float y0 = start_y0 + const_y0;
363 
364  execute_window_loop(window, [&](const Coordinates & id)
365  {
366  // Check if we are processing a new row. If so, update the current processed row (y_cur), x0, y0 and z0
367  if((y_cur != id.y()) || (z_cur != id.z()) || (d3_cur != id[3]) || (d4_cur != id[4]) || (d5_cur != id[5]))
368  {
369  y_cur = id.y();
370  z_cur = id.z();
371  d3_cur = id[3];
372  d4_cur = id[4];
373  d5_cur = id[5];
374 
375  const_x0 = M01 * y_cur + M02;
376  const_y0 = M11 * y_cur + M12;
377 
378  x0 = start_x0 + const_x0;
379  y0 = start_y0 + const_y0;
380  }
381 
382  // Only load from (x0, y0) if the point is within the valid region.
383  // Otherwise load from the edge of the valid region.
384  if((min_y <= y0) && (y0 < max_y) && (min_x <= x0) && (x0 < max_x))
385  {
386  switch(interpolation)
387  {
389  *out.ptr() = nearest_interpolation(in.ptr(), x0, y0, stride);
390  break;
392  *out.ptr() = scale_helpers::pixel_bilinear_c1(in.ptr(), stride, x0, y0);
393  break;
394  default:
395  ARM_COMPUTE_ERROR("Interpolation not supported");
396  }
397  }
398  else
399  {
400  // Clamp coordinates
401  const auto xi = utility::clamp<int>(std::floor(x0), min_x, max_x - 1);
402  const auto yi = utility::clamp<int>(std::floor(y0), min_y, max_y - 1);
403  switch(interpolation)
404  {
406  *out.ptr() = *(in.ptr() + xi + yi * stride);
407  break;
409  {
410  const auto xi_1 = utility::clamp<int>(std::floor(x0 + 1), min_x, max_x - 1);
411  const auto yi_1 = utility::clamp<int>(std::floor(y0 + 1), min_y, max_y - 1);
412 
413  const float dx = x0 - std::floor(x0);
414  const float dy = y0 - std::floor(y0);
415  const float dx1 = 1.0f - dx;
416  const float dy1 = 1.0f - dy;
417 
418  const float a00 = *(in.ptr() + xi + yi * stride);
419  const float a01 = *(in.ptr() + xi_1 + yi * stride);
420  const float a10 = *(in.ptr() + xi + yi_1 * stride);
421  const float a11 = *(in.ptr() + xi_1 + yi_1 * stride);
422 
423  *out.ptr() = a00 * (dx1 * dy1) + a01 * (dx * dy1) + a10 * (dx1 * dy) + a11 * (dx * dy);
424  }
425  break;
426  default:
427  ARM_COMPUTE_ERROR("Interpolation not supported");
428  }
429  }
430 
431  x0 += M00;
432  y0 += M10;
433  },
434  in, out);
435 }
436 
437 template <InterpolationPolicy interpolation>
439 {
440  // Don't increment in X and Y direction for the input tensor
441  // A pointer to the start of this plane is needed as base for the precomputed offsets
442  Window win_in(window);
443  win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
444  win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
445 
446  Iterator in(_input, win_in);
447  Iterator out(_output, window);
448 
449  const int min_x = _input->info()->valid_region().anchor[0];
450  const int max_x = min_x + _input->info()->valid_region().shape[0];
451  const int min_y = _input->info()->valid_region().anchor[1];
452  const int max_y = min_y + _input->info()->valid_region().shape[1];
453  const size_t stride = _input->info()->strides_in_bytes()[1];
454 
455  // x0 = M00 * x + M01 * y + M02
456  // y0 = M10 * x + M11 * y + M12
457  // z0 = M20 * x + M21 * y + M22
458  // xn = x0 / z0
459  // yn = y0 / z0
460  const float M00 = _matrix[0];
461  const float M10 = _matrix[1];
462  const float M20 = _matrix[2];
463  const float M01 = _matrix[0 + 1 * 3];
464  const float M11 = _matrix[1 + 1 * 3];
465  const float M21 = _matrix[2 + 1 * 3];
466  const float M02 = _matrix[0 + 2 * 3];
467  const float M12 = _matrix[1 + 2 * 3];
468  const float M22 = _matrix[2 + 2 * 3];
469 
470  // "M00 * x", "M10 * x" and "M20 * x", when x = window.x.start
471  const float start_x0 = M00 * window.x().start();
472  const float start_y0 = M10 * window.x().start();
473  const float start_z0 = M20 * window.x().start();
474 
475  // Current row
476  int y_cur = window.y().start();
477  int z_cur = window.z().start();
478  int d3_cur = window[3].start();
479  int d4_cur = window[4].start();
480  int d5_cur = window[5].start();
481 
482  // const_x0, const_y0 and const_z0 are the constant parts of x0, y0 and z0 during the row processing
483  float const_x0 = M01 * y_cur + M02;
484  float const_y0 = M11 * y_cur + M12;
485  float const_z0 = M21 * y_cur + M22;
486 
487  // Perspective warp coordinates
488  float x0 = start_x0 + const_x0;
489  float y0 = start_y0 + const_y0;
490  float z0 = start_z0 + const_z0;
491 
492  execute_window_loop(window, [&](const Coordinates & id)
493  {
494  // Check if we are processing a new row. If so, update the current processed row (y_cur), x0, y0 and z0
495  if((y_cur != id.y()) || (z_cur != id.z()) || (d3_cur != id[3]) || (d4_cur != id[4]) || (d5_cur != id[5]))
496  {
497  y_cur = id.y();
498  z_cur = id.z();
499  d3_cur = id[3];
500  d4_cur = id[4];
501  d5_cur = id[5];
502 
503  const_x0 = M01 * y_cur + M02;
504  const_y0 = M11 * y_cur + M12;
505  const_z0 = M21 * y_cur + M22;
506 
507  x0 = start_x0 + const_x0;
508  y0 = start_y0 + const_y0;
509  z0 = start_z0 + const_z0;
510  }
511 
512  const float xn = x0 / z0;
513  const float yn = y0 / z0;
514 
515  // Only write to output if xn and yn are within the valid region.
516  // Otherwise the read value would be undefined.
517  if((min_y <= yn) && (yn < max_y) && (min_x <= xn) && (xn < max_x))
518  {
519  switch(interpolation)
520  {
522  *out.ptr() = nearest_interpolation(in.ptr(), xn, yn, stride);
523  break;
525  *out.ptr() = scale_helpers::pixel_bilinear_c1(in.ptr(), stride, xn, yn);
526  break;
527  default:
528  ARM_COMPUTE_ERROR("Interpolation not supported");
529  }
530  }
531 
532  x0 += M00;
533  y0 += M10;
534  z0 += M20;
535  },
536  in, out);
537 }
538 
539 template <InterpolationPolicy interpolation>
541 {
542  // Don't increment in X and Y direction for the input tensor
543  // A pointer to the start of this plane is needed as base for the precomputed offsets
544  Window win_in(window);
545  win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
546  win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
547 
548  Iterator in(_input, win_in);
549  Iterator out(_output, window);
550 
551  const int min_x = _input->info()->valid_region().anchor[0];
552  const int max_x = min_x + _input->info()->valid_region().shape[0];
553  const int min_y = _input->info()->valid_region().anchor[1];
554  const int max_y = min_y + _input->info()->valid_region().shape[1];
555  const size_t stride = _input->info()->strides_in_bytes()[1];
556 
557  // x0 = M00 * x + M01 * y + M02
558  // y0 = M10 * x + M11 * y + M12
559  // z0 = M20 * x + M21 * y + M22
560  // xn = x0 / z0
561  // yn = y0 / z0
562  const float M00 = _matrix[0];
563  const float M10 = _matrix[1];
564  const float M20 = _matrix[2];
565  const float M01 = _matrix[0 + 1 * 3];
566  const float M11 = _matrix[1 + 1 * 3];
567  const float M21 = _matrix[2 + 1 * 3];
568  const float M02 = _matrix[0 + 2 * 3];
569  const float M12 = _matrix[1 + 2 * 3];
570  const float M22 = _matrix[2 + 2 * 3];
571 
572  // "M00 * x", "M10 * x" and "M20 * x", when x = window.x.start
573  const float start_x0 = M00 * window.x().start();
574  const float start_y0 = M10 * window.x().start();
575  const float start_z0 = M20 * window.x().start();
576 
577  // Current row
578  int y_cur = window.y().start();
579  int z_cur = window.z().start();
580  int d3_cur = window[3].start();
581  int d4_cur = window[4].start();
582  int d5_cur = window[5].start();
583 
584  // const_x0, const_y0 and const_z0 are the constant parts of x0, y0 and z0 during the row processing
585  float const_x0 = M01 * y_cur + M02;
586  float const_y0 = M11 * y_cur + M12;
587  float const_z0 = M21 * y_cur + M22;
588 
589  // Perspective warp coordinates
590  float x0 = start_x0 + const_x0;
591  float y0 = start_y0 + const_y0;
592  float z0 = start_z0 + const_z0;
593 
594  execute_window_loop(window, [&](const Coordinates & id)
595  {
596  // Check if we are processing a new row. If so, update the current processed row (y_cur), x0, y0 and z0
597  if((y_cur != id.y()) || (z_cur != id.z()) || (d3_cur != id[3]) || (d4_cur != id[4]) || (d5_cur != id[5]))
598  {
599  y_cur = id.y();
600  z_cur = id.z();
601  d3_cur = id[3];
602  d4_cur = id[4];
603  d5_cur = id[5];
604 
605  const_x0 = M01 * y_cur + M02;
606  const_y0 = M11 * y_cur + M12;
607  const_z0 = M21 * y_cur + M22;
608 
609  x0 = start_x0 + const_x0;
610  y0 = start_y0 + const_y0;
611  z0 = start_z0 + const_z0;
612  }
613 
614  const float xn = x0 / z0;
615  const float yn = y0 / z0;
616 
617  // Only use input values if xn and yn are within the valid region.
618  if((min_y <= yn) && (yn < max_y) && (min_x <= xn) && (xn < max_x))
619  {
620  switch(interpolation)
621  {
623  *out.ptr() = nearest_interpolation(in.ptr(), xn, yn, stride);
624  break;
626  *out.ptr() = scale_helpers::pixel_bilinear_c1(in.ptr(), stride, xn, yn);
627  break;
628  default:
629  ARM_COMPUTE_ERROR("Interpolation not supported");
630  }
631  }
632  else
633  {
634  switch(interpolation)
635  {
637  *out.ptr() = _constant_border_value;
638  break;
640  {
641  const auto xi = utility::clamp<int>(std::floor(xn), min_x - 1, max_x);
642  const auto yi = utility::clamp<int>(std::floor(yn), min_y - 1, max_y);
643  const auto xi_1 = utility::clamp<int>(std::floor(xn + 1), min_x - 1, max_x);
644  const auto yi_1 = utility::clamp<int>(std::floor(yn + 1), min_y - 1, max_y);
645 
646  const float dx = xn - std::floor(xn);
647  const float dy = yn - std::floor(yn);
648  const float dx1 = 1.0f - dx;
649  const float dy1 = 1.0f - dy;
650 
651  const float a00 = *(in.ptr() + xi + yi * stride);
652  const float a01 = *(in.ptr() + xi_1 + yi * stride);
653  const float a10 = *(in.ptr() + xi + yi_1 * stride);
654  const float a11 = *(in.ptr() + xi_1 + yi_1 * stride);
655 
656  *out.ptr() = a00 * (dx1 * dy1) + a01 * (dx * dy1) + a10 * (dx1 * dy) + a11 * (dx * dy);
657  }
658  break;
659  default:
660  ARM_COMPUTE_ERROR("Interpolation not supported");
661  }
662  }
663 
664  x0 += M00;
665  y0 += M10;
666  z0 += M20;
667  },
668  in, out);
669 }
670 
671 template <InterpolationPolicy interpolation>
673 {
674  // Don't increment in X and Y direction for the input tensor
675  // A pointer to the start of this plane is needed as base for the precomputed offsets
676  Window win_in(window);
677  win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
678  win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
679 
680  Iterator in(_input, win_in);
681  Iterator out(_output, window);
682 
683  const int min_x = _input->info()->valid_region().anchor[0];
684  const int max_x = min_x + _input->info()->valid_region().shape[0];
685  const int min_y = _input->info()->valid_region().anchor[1];
686  const int max_y = min_y + _input->info()->valid_region().shape[1];
687  const size_t stride = _input->info()->strides_in_bytes()[1];
688 
689  // Current row
690  int y_cur = window.y().start();
691  int z_cur = window.z().start();
692  int d3_cur = window[3].start();
693  int d4_cur = window[4].start();
694  int d5_cur = window[5].start();
695 
696  // x0 = M00 * x + M01 * y + M02
697  // y0 = M10 * x + M11 * y + M12
698  // z0 = M20 * x + M21 * y + M22
699  // xn = x0 / z0
700  // yn = y0 / z0
701  const float M00 = _matrix[0];
702  const float M10 = _matrix[1];
703  const float M20 = _matrix[2];
704  const float M01 = _matrix[0 + 1 * 3];
705  const float M11 = _matrix[1 + 1 * 3];
706  const float M21 = _matrix[2 + 1 * 3];
707  const float M02 = _matrix[0 + 2 * 3];
708  const float M12 = _matrix[1 + 2 * 3];
709  const float M22 = _matrix[2 + 2 * 3];
710 
711  // "M00 * x", "M10 * x" and "M20 * x", when x = window.x.start
712  const float start_x0 = M00 * window.x().start();
713  const float start_y0 = M10 * window.x().start();
714  const float start_z0 = M20 * window.x().start();
715 
716  // const_x0, const_y0 and const_z0 are the constant parts of x0, y0 and z0 during the row processing
717  float const_x0 = M01 * y_cur + M02;
718  float const_y0 = M11 * y_cur + M12;
719  float const_z0 = M21 * y_cur + M22;
720 
721  // Perspective warp coordinates
722  float x0 = start_x0 + const_x0;
723  float y0 = start_y0 + const_y0;
724  float z0 = start_z0 + const_z0;
725 
726  execute_window_loop(window, [&](const Coordinates & id)
727  {
728  // Check if we are processing a new row. If so, update the current processed row (y_cur), x0, y0 and z0
729  if((y_cur != id.y()) || (z_cur != id.z()) || (d3_cur != id[3]) || (d4_cur != id[4]) || (d5_cur != id[5]))
730  {
731  y_cur = id.y();
732  z_cur = id.z();
733  d3_cur = id[3];
734  d4_cur = id[4];
735  d5_cur = id[5];
736 
737  const_x0 = M01 * y_cur + M02;
738  const_y0 = M11 * y_cur + M12;
739  const_z0 = M21 * y_cur + M22;
740 
741  x0 = start_x0 + const_x0;
742  y0 = start_y0 + const_y0;
743  z0 = start_z0 + const_z0;
744  }
745 
746  const float xn = x0 / z0;
747  const float yn = y0 / z0;
748 
749  // Only load from (x0, y0) if the point is within the valid region.
750  if((min_y <= yn) && (yn < max_y) && (min_x <= xn) && (xn < max_x))
751  {
752  switch(interpolation)
753  {
755  *out.ptr() = nearest_interpolation(in.ptr(), xn, yn, stride);
756  break;
758  *out.ptr() = scale_helpers::pixel_bilinear_c1(in.ptr(), stride, xn, yn);
759  break;
760  default:
761  ARM_COMPUTE_ERROR("Interpolation not supported");
762  }
763  }
764  else
765  {
766  // Clamp coordinates
767  const auto xi = utility::clamp<int>(std::floor(xn), min_x, max_x - 1);
768  const auto yi = utility::clamp<int>(std::floor(yn), min_y, max_y - 1);
769  switch(interpolation)
770  {
772  *out.ptr() = *(in.ptr() + xi + yi * stride);
773  break;
775  {
776  const auto xi_1 = utility::clamp<int>(std::floor(xn + 1), min_x, max_x - 1);
777  const auto yi_1 = utility::clamp<int>(std::floor(yn + 1), min_y, max_y - 1);
778 
779  const float dx = xn - std::floor(xn);
780  const float dy = yn - std::floor(yn);
781  const float dx1 = 1.0f - dx;
782  const float dy1 = 1.0f - dy;
783 
784  const float a00 = *(in.ptr() + xi + yi * stride);
785  const float a01 = *(in.ptr() + xi_1 + yi * stride);
786  const float a10 = *(in.ptr() + xi + yi_1 * stride);
787  const float a11 = *(in.ptr() + xi_1 + yi_1 * stride);
788 
789  *out.ptr() = a00 * (dx1 * dy1) + a01 * (dx * dy1) + a10 * (dx1 * dy) + a11 * (dx * dy);
790  }
791  break;
792  default:
793  ARM_COMPUTE_ERROR("Interpolation not supported");
794  }
795  }
796 
797  x0 += M00;
798  y0 += M10;
799  z0 += M20;
800  },
801  in, out);
802 }
803 
BorderMode
Methods available to handle borders.
Definition: Types.h:265
unsigned int top
top of the border
Definition: Types.h:375
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
const Window & window() const
The maximum window the kernel can be executed on.
Definition: IKernel.cpp:28
TensorShape shape
Shape of the valid region.
Definition: Types.h:261
Container for 2D border size.
Definition: Types.h:273
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352
void run(const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
1 channel, 1 U8 per channel
INEWarpKernel()
Default constructor.
Output values are defined by bilinear interpolation between the pixels.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
Describe one of the image&#39;s dimensions with a start, end and step.
Definition: Window.h:77
unsigned int bottom
bottom of the border
Definition: Types.h:377
constexpr const Dimension & z() const
Alias to access the third dimension of the window.
Definition: Window.h:163
Output values are defined to match the source pixel whose center is nearest to the sample position...
Interface for Neon tensor.
Definition: ITensor.h:36
Copyright (c) 2017-2021 Arm Limited.
virtual ValidRegion valid_region() const =0
Valid region of the tensor.
Implementation of a static rectangular access pattern.
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Definition: Window.h:43
bool update_window_and_padding(Window &win, Ts &&... patterns)
Update window and padding size for each of the access patterns.
Definition: WindowHelpers.h:46
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
Class to describe a number of elements in each dimension.
Definition: Steps.h:40
Coordinates of an item.
Definition: Coordinates.h:37
Implementation of a row access pattern.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.
constexpr uint8_t * ptr() const
Return a pointer to the current pixel.
Definition: Helpers.inl:139
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
Definition: Window.inl:49
Template interface for the kernel to compute warp affine.
Definition: NEWarpKernel.h:101
BorderSize border_size() const override
The size of the border for that kernel.
unsigned int left
left of the border
Definition: Types.h:378
unsigned int right
right of the border
Definition: Types.h:376
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
Definition: Validate.h:941
#define ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:790
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
Definition: Window.h:45
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
Information about executing thread and CPU.
Definition: CPPTypes.h:235
Borders are left undefined.
Pixels outside the image are assumed to have the same value as the closest image pixel.
constexpr const Dimension & y() const
Alias to access the second dimension of the window.
Definition: Window.h:154
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
Definition: Helpers.inl:77
virtual const Strides & strides_in_bytes() const =0
The strides in bytes for accessing each dimension of the tensor.
Container for valid region of a window.
Definition: Types.h:188
T pixel_bilinear_c1(const T *first_pixel_ptr, size_t stride, float x, float y)
Return the pixel at (x,y) using bilinear interpolation.
Definition: ScaleHelpers.h:207
Iterator updated by execute_window_loop for each window element.
Definition: Helpers.h:46
constexpr int start() const
Return the start of the dimension.
Definition: Window.h:94
Describe a multidimensional execution window.
Definition: Window.h:39
Coordinates anchor
Anchor for the start of the valid region.
Definition: Types.h:260
Template interface for the kernel to compute warp perspective.
Definition: NEWarpKernel.h:118
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
Definition: Validate.h:205
virtual void configure(const ITensor *input, ITensor *output, const std::array< float, 9 > &matrix, BorderMode border_mode, uint8_t constant_border_value)
Initialise the kernel&#39;s input, output and border mode.
constexpr const Dimension & x() const
Alias to access the first dimension of the window.
Definition: Window.h:145