Compute Library
 21.02
NECannyEdgeKernel.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
26 #include "arm_compute/core/Error.h"
30 #include "arm_compute/core/Types.h"
31 #include "arm_compute/core/Utils.h"
38 
39 #include <arm_neon.h>
40 #include <cstddef>
41 #include <cstdint>
42 #include <tuple>
43 
44 namespace arm_compute
45 {
46 namespace
47 {
48 constexpr int NO_EDGE = 0;
49 constexpr int EDGE = 255;
50 constexpr int MAYBE = 127;
51 
52 inline uint8x8_t phase_quantization(const float32x4x2_t &gx, const float32x4x2_t &gy)
53 {
54  // Constant use for evaluating score1 and score3
55  static const float32x4_t const45 = vdupq_n_f32(0.70710678118655f);
56  static const float32x4_t zero = vdupq_n_f32(0.0f);
57  static const float32x4_t one = vdupq_n_f32(1.0f);
58  static const float32x4_t two = vdupq_n_f32(2.0f);
59  static const float32x4_t three = vdupq_n_f32(3.0f);
60 
61  // Score0: (1, 0)
62  const float32x4x2_t score0 =
63  {
64  {
65  vabsq_f32(gx.val[0]),
66  vabsq_f32(gx.val[1])
67  }
68  };
69 
70  // Score2: ( 0, 1 )
71  const float32x4x2_t score2 =
72  {
73  {
74  vabsq_f32(gy.val[0]),
75  vabsq_f32(gy.val[1])
76  }
77  };
78 
79  // Score1 and Score3: ( sqrt(2) / 2, sqrt(2) / 2 ) - ( -sqrt(2) / 2, sqrt(2) / 2 )
80  float32x4x2_t score1 =
81  {
82  {
83  vmulq_f32(gy.val[0], const45),
84  vmulq_f32(gy.val[1], const45)
85  }
86  };
87 
88  float32x4x2_t score3 = score1;
89 
90  score1.val[0] = vmlaq_f32(score1.val[0], gx.val[0], const45);
91  score1.val[1] = vmlaq_f32(score1.val[1], gx.val[1], const45);
92  score3.val[0] = vmlsq_f32(score3.val[0], gx.val[0], const45);
93  score3.val[1] = vmlsq_f32(score3.val[1], gx.val[1], const45);
94 
95  score1.val[0] = vabsq_f32(score1.val[0]);
96  score1.val[1] = vabsq_f32(score1.val[1]);
97  score3.val[0] = vabsq_f32(score3.val[0]);
98  score3.val[1] = vabsq_f32(score3.val[1]);
99 
100  float32x4x2_t phase =
101  {
102  {
103  zero,
104  zero
105  }
106  };
107 
108  float32x4x2_t old_score = score0;
109 
110  // score1 > old_score?
111  uint32x4x2_t mask =
112  {
113  {
114  vcgtq_f32(score1.val[0], old_score.val[0]),
115  vcgtq_f32(score1.val[1], old_score.val[1])
116  }
117  };
118 
119  phase.val[0] = vbslq_f32(mask.val[0], one, phase.val[0]);
120  phase.val[1] = vbslq_f32(mask.val[1], one, phase.val[1]);
121  old_score.val[0] = vbslq_f32(mask.val[0], score1.val[0], old_score.val[0]);
122  old_score.val[1] = vbslq_f32(mask.val[1], score1.val[1], old_score.val[1]);
123 
124  // score2 > old_score?
125  mask.val[0] = vcgtq_f32(score2.val[0], old_score.val[0]);
126  mask.val[1] = vcgtq_f32(score2.val[1], old_score.val[1]);
127 
128  phase.val[0] = vbslq_f32(mask.val[0], two, phase.val[0]);
129  phase.val[1] = vbslq_f32(mask.val[1], two, phase.val[1]);
130  old_score.val[0] = vbslq_f32(mask.val[0], score2.val[0], old_score.val[0]);
131  old_score.val[1] = vbslq_f32(mask.val[1], score2.val[1], old_score.val[1]);
132 
133  // score3 > old_score?
134  mask.val[0] = vcgtq_f32(score3.val[0], old_score.val[0]);
135  mask.val[1] = vcgtq_f32(score3.val[1], old_score.val[1]);
136 
137  phase.val[0] = vbslq_f32(mask.val[0], three, phase.val[0]);
138  phase.val[1] = vbslq_f32(mask.val[1], three, phase.val[1]);
139  old_score.val[0] = vbslq_f32(mask.val[0], score3.val[0], old_score.val[0]);
140  old_score.val[1] = vbslq_f32(mask.val[1], score3.val[1], old_score.val[1]);
141 
142  // Convert from float32x4_t to uint8x8_t
143  return vmovn_u16(vcombine_u16(vmovn_u32(vcvtq_u32_f32(phase.val[0])),
144  vmovn_u32(vcvtq_u32_f32(phase.val[1]))));
145 }
146 
147 /* Computes the gradient phase if gradient_size = 3 or 5. The output is quantized.
148  * 0 = 0°, 1 = 45°, 2 = 90°, 3 = 135°
149  *
150  * @param[in] gx Gx component
151  * @param[in] gy Gy component
152  *
153  * @return quantized phase for 8 pixels
154  */
155 inline uint8x8_t phase_quantization_S16_S16(int16x8_t gx, int16x8_t gy)
156 {
157  // Convert to float
158  const float32x4x2_t gx_f32 =
159  {
160  {
161  vcvtq_f32_s32(vmovl_s16(vget_low_s16(gx))),
162  vcvtq_f32_s32(vmovl_s16(vget_high_s16(gx)))
163  }
164  };
165 
166  const float32x4x2_t gy_f32 =
167  {
168  {
169  vcvtq_f32_s32(vmovl_s16(vget_low_s16(gy))),
170  vcvtq_f32_s32(vmovl_s16(vget_high_s16(gy)))
171  }
172  };
173 
174  return phase_quantization(gx_f32, gy_f32);
175 }
176 
177 /* Computes the gradient phase if gradient_size = 7. The output is quantized.
178  * 0 = 0°, 1 = 45°, 2 = 90°, 3 = 135°
179  *
180  * @param[in] gx Gx component
181  * @param[in] gy Gy component
182  *
183  * @return quantized phase for 8 pixels
184  */
185 inline uint8x8_t phase_quantization_S32_S32(const int32x4x2_t &gx, const int32x4x2_t &gy)
186 {
187  // Convert to float
188  const float32x4x2_t gx_f32 =
189  {
190  {
191  vcvtq_f32_s32(gx.val[0]),
192  vcvtq_f32_s32(gx.val[1])
193  }
194  };
195 
196  const float32x4x2_t gy_f32 =
197  {
198  {
199  vcvtq_f32_s32(gy.val[0]),
200  vcvtq_f32_s32(gy.val[1])
201  }
202  };
203 
204  return phase_quantization(gx_f32, gy_f32);
205 }
206 
207 /* Computes the magnitude using the L1-norm type if gradient_size = 3 or 5
208  *
209  * @param[in] gx Gx component
210  * @param[in] gy Gy component
211  *
212  * @return magnitude for 8 pixels
213  */
214 inline uint16x8_t mag_l1_S16_S16(int16x8_t gx, int16x8_t gy)
215 {
216  return vaddq_u16(vreinterpretq_u16_s16(vabsq_s16(gx)),
217  vreinterpretq_u16_s16(vabsq_s16(gy)));
218 }
219 
220 /* Computes the magnitude using the L1-norm type if gradient_size = 7
221  *
222  * @param[in] gx Gx component
223  * @param[in] gy Gy component
224  *
225  * @return magnitude for 8 pixels
226  */
227 inline uint32x4x2_t mag_l1_S32_S32(const int32x4x2_t &gx, const int32x4x2_t &gy)
228 {
229  const uint32x4x2_t gx_abs =
230  {
231  {
232  vreinterpretq_u32_s32(vabsq_s32(gx.val[0])),
233  vreinterpretq_u32_s32(vabsq_s32(gx.val[1]))
234  }
235  };
236 
237  const uint32x4x2_t gy_abs =
238  {
239  {
240  vreinterpretq_u32_s32(vabsq_s32(gy.val[0])),
241  vreinterpretq_u32_s32(vabsq_s32(gy.val[1]))
242  }
243  };
244 
245  const uint32x4x2_t output =
246  {
247  {
248  vaddq_u32(gx_abs.val[0], gy_abs.val[0]),
249  vaddq_u32(gx_abs.val[1], gy_abs.val[1])
250  }
251  };
252 
253  return output;
254 }
255 
256 inline float32x4x2_t mag_l2(const float32x4x2_t &gx, const float32x4x2_t &gy)
257 {
258  // x^2 ...
259  float32x4x2_t magnitude =
260  {
261  {
262  vmulq_f32(gx.val[0], gx.val[0]),
263  vmulq_f32(gx.val[1], gx.val[1])
264  }
265  };
266 
267  // ... + y^2
268  magnitude.val[0] = vmlaq_f32(magnitude.val[0], gy.val[0], gy.val[0]);
269  magnitude.val[1] = vmlaq_f32(magnitude.val[1], gy.val[1], gy.val[1]);
270 
271  // sqrt(...)
272  magnitude.val[0] = vmulq_f32(vrsqrteq_f32(magnitude.val[0]), magnitude.val[0]);
273  magnitude.val[1] = vmulq_f32(vrsqrteq_f32(magnitude.val[1]), magnitude.val[1]);
274 
275  return magnitude;
276 }
277 
278 /* Computes the magnitude using L2-norm if gradient_size = 3 or 5
279  *
280  * @param[in] gx Gx component
281  * @param[in] gy Gy component
282  *
283  * @return magnitude for 8 pixels
284  */
285 inline uint16x8_t mag_l2_S16_S16(int16x8_t gx, int16x8_t gy)
286 {
287  // Compute magnitude using L2 normalization
288  const float32x4x2_t gx2 =
289  {
290  {
291  vcvtq_f32_s32(vmovl_s16(vget_low_s16(gx))),
292  vcvtq_f32_s32(vmovl_s16(vget_high_s16(gx)))
293  }
294  };
295 
296  const float32x4x2_t gy2 =
297  {
298  {
299  vcvtq_f32_s32(vmovl_s16(vget_low_s16(gy))),
300  vcvtq_f32_s32(vmovl_s16(vget_high_s16(gy)))
301  }
302  };
303 
304  const float32x4x2_t magnitude = mag_l2(gx2, gy2);
305 
306  // Store magnitude - Convert to uint16x8
307  return vcombine_u16(vmovn_u32(vcvtq_u32_f32(magnitude.val[0])),
308  vmovn_u32(vcvtq_u32_f32(magnitude.val[1])));
309 }
310 
311 /* Computes the magnitude using L2-norm if gradient_size = 7
312  *
313  * @param[in] gx Gx component
314  * @param[in] gy Gy component
315  *
316  * @return magnitude for 8 pixels
317  */
318 inline uint32x4x2_t mag_l2_S32_S32(const int32x4x2_t &gx, const int32x4x2_t &gy)
319 {
320  // Compute magnitude using L2 normalization
321  float32x4x2_t gx2 =
322  {
323  {
324  vcvtq_f32_s32(gx.val[0]),
325  vcvtq_f32_s32(gx.val[1])
326  }
327  };
328 
329  float32x4x2_t gy2 =
330  {
331  {
332  vcvtq_f32_s32(gy.val[0]),
333  vcvtq_f32_s32(gy.val[1])
334  }
335  };
336 
337  const float32x4x2_t magnitude = mag_l2(gx2, gy2);
338  const uint32x4x2_t mag32 =
339  {
340  {
341  vcvtq_u32_f32(magnitude.val[0]),
342  vcvtq_u32_f32(magnitude.val[1])
343  }
344  };
345 
346  return mag32;
347 }
348 
349 /* Gradient function used when the gradient size = 3 or 5 and when the norm_type = L1-norm
350  *
351  * @param[in] gx_ptr Pointer to source image. Gx image. Data type supported S16
352  * @param[in] gy_ptr Pointer to source image. Gy image. Data type supported S16
353  * @param[out] magnitude_ptr Pointer to destination image. Magnitude. Data type supported U16
354  * @param[out] phase_ptr Pointer to destination image. Quantized phase. Data type supported U8
355  */
356 void mag_phase_l1norm_S16_S16_U16_U8(const void *__restrict gx_ptr, const void *__restrict gy_ptr, void *__restrict magnitude_ptr, void *__restrict phase_ptr)
357 {
358  const auto gx = static_cast<const int16_t *__restrict>(gx_ptr);
359  const auto gy = static_cast<const int16_t *__restrict>(gy_ptr);
360  const auto magnitude = static_cast<uint16_t *__restrict>(magnitude_ptr);
361  const auto phase = static_cast<uint8_t *__restrict>(phase_ptr);
362 
363  const int16x8x4_t gx_val =
364  {
365  {
366  vld1q_s16(gx),
367  vld1q_s16(gx + 8),
368  vld1q_s16(gx + 16),
369  vld1q_s16(gx + 24)
370  }
371  };
372 
373  const int16x8x4_t gy_val =
374  {
375  {
376  vld1q_s16(gy),
377  vld1q_s16(gy + 8),
378  vld1q_s16(gy + 16),
379  vld1q_s16(gy + 24)
380  }
381  };
382 
383  // Compute and store phase
384  vst1_u8(phase + 0, phase_quantization_S16_S16(gx_val.val[0], gy_val.val[0]));
385  vst1_u8(phase + 8, phase_quantization_S16_S16(gx_val.val[1], gy_val.val[1]));
386  vst1_u8(phase + 16, phase_quantization_S16_S16(gx_val.val[2], gy_val.val[2]));
387  vst1_u8(phase + 24, phase_quantization_S16_S16(gx_val.val[3], gy_val.val[3]));
388 
389  // Compute ans store magnitude using L1 normalization
390  vst1q_u16(magnitude + 0, mag_l1_S16_S16(gx_val.val[0], gy_val.val[0]));
391  vst1q_u16(magnitude + 8, mag_l1_S16_S16(gx_val.val[1], gy_val.val[1]));
392  vst1q_u16(magnitude + 16, mag_l1_S16_S16(gx_val.val[2], gy_val.val[2]));
393  vst1q_u16(magnitude + 24, mag_l1_S16_S16(gx_val.val[3], gy_val.val[3]));
394 }
395 
396 /* Gradient function used when the gradient size = 3 or 5 and when the norm_type = L2-norm
397  *
398  * @param[in] gx_ptr Pointer to source image. Gx image. Data type supported S16
399  * @param[in] gy_ptr Pointer to source image. Gy image. Data type supported S16
400  * @param[out] magnitude_ptr Pointer to destination image. Magnitude. Data type supported U16
401  * @param[out] phase_ptr Pointer to destination image. Quantized phase. Data type supported U8
402  */
403 void mag_phase_l2norm_S16_S16_U16_U8(const void *__restrict gx_ptr, const void *__restrict gy_ptr, void *__restrict magnitude_ptr, void *__restrict phase_ptr)
404 {
405  const auto gx = static_cast<const int16_t *__restrict>(gx_ptr);
406  const auto gy = static_cast<const int16_t *__restrict>(gy_ptr);
407  const auto magnitude = static_cast<uint16_t *__restrict>(magnitude_ptr);
408  const auto phase = static_cast<uint8_t *__restrict>(phase_ptr);
409 
410  const int16x8x4_t gx_val =
411  {
412  {
413  vld1q_s16(gx),
414  vld1q_s16(gx + 8),
415  vld1q_s16(gx + 16),
416  vld1q_s16(gx + 24)
417  }
418  };
419 
420  const int16x8x4_t gy_val =
421  {
422  {
423  vld1q_s16(gy),
424  vld1q_s16(gy + 8),
425  vld1q_s16(gy + 16),
426  vld1q_s16(gy + 24)
427  }
428  };
429 
430  // Compute and store phase
431  vst1_u8(phase + 0, phase_quantization_S16_S16(gx_val.val[0], gy_val.val[0]));
432  vst1_u8(phase + 8, phase_quantization_S16_S16(gx_val.val[1], gy_val.val[1]));
433  vst1_u8(phase + 16, phase_quantization_S16_S16(gx_val.val[2], gy_val.val[2]));
434  vst1_u8(phase + 24, phase_quantization_S16_S16(gx_val.val[3], gy_val.val[3]));
435 
436  // Compute and store magnitude using L2 normalization
437  vst1q_u16(magnitude + 0, mag_l2_S16_S16(gx_val.val[0], gy_val.val[0]));
438  vst1q_u16(magnitude + 8, mag_l2_S16_S16(gx_val.val[1], gy_val.val[1]));
439  vst1q_u16(magnitude + 16, mag_l2_S16_S16(gx_val.val[2], gy_val.val[2]));
440  vst1q_u16(magnitude + 24, mag_l2_S16_S16(gx_val.val[3], gy_val.val[3]));
441 }
442 
443 /* Gradient function used when the gradient size = 7 and when the norm_type = L1-norm
444  *
445  * @param[in] gx_ptr Pointer to source image. Gx image. Data type supported S32
446  * @param[in] gy_ptr Pointer to source image. Gy image. Data type supported S32
447  * @param[out] magnitude_ptr Pointer to destination image. Magnitude. Data type supported U32
448  * @param[out] phase_ptr Pointer to destination image. Quantized phase. Data type support U8
449  */
450 void mag_phase_l1norm_S32_S32_U32_U8(const void *__restrict gx_ptr, const void *__restrict gy_ptr, void *__restrict magnitude_ptr, void *__restrict phase_ptr)
451 {
452  auto gx = static_cast<const int32_t *__restrict>(gx_ptr);
453  auto gy = static_cast<const int32_t *__restrict>(gy_ptr);
454  auto magnitude = static_cast<uint32_t *__restrict>(magnitude_ptr);
455  auto phase = static_cast<uint8_t *__restrict>(phase_ptr);
456 
457  // Process low and high part
458  for(size_t i = 0; i < 2; ++i, gx += 16, gy += 16, magnitude += 16, phase += 16)
459  {
460  const int32x4x2_t gx0 =
461  {
462  {
463  vld1q_s32(gx + 0),
464  vld1q_s32(gx + 4)
465  }
466  };
467 
468  const int32x4x2_t gx1 =
469  {
470  {
471  vld1q_s32(gx + 8),
472  vld1q_s32(gx + 12)
473  }
474  };
475 
476  const int32x4x2_t gy0 =
477  {
478  {
479  vld1q_s32(gy + 0),
480  vld1q_s32(gy + 4)
481  }
482  };
483 
484  const int32x4x2_t gy1 =
485  {
486  {
487  vld1q_s32(gy + 8),
488  vld1q_s32(gy + 12)
489  }
490  };
491 
492  // Compute and store phase
493  vst1_u8(phase + 0, phase_quantization_S32_S32(gx0, gy0));
494  vst1_u8(phase + 8, phase_quantization_S32_S32(gx1, gy1));
495 
496  // Compute magnitude using L1 normalization
497  const uint32x4x2_t mag0 = mag_l1_S32_S32(gx0, gy0);
498  const uint32x4x2_t mag1 = mag_l1_S32_S32(gx1, gy1);
499 
500  // Store magnitude
501  vst1q_u32(magnitude + 0, mag0.val[0]);
502  vst1q_u32(magnitude + 4, mag0.val[1]);
503  vst1q_u32(magnitude + 8, mag1.val[0]);
504  vst1q_u32(magnitude + 12, mag1.val[1]);
505  }
506 }
507 
508 /* Gradient function used when the gradient size = 7 and when the norm_type = L2-norm
509  *
510  * @param[in] gx_ptr Pointer to source image. Gx image. Data type supported S32
511  * @param[in] gy_ptr Pointer to source image. Gy image. Data type supported S32
512  * @param[out] magnitude_ptr Pointer to destination image. Magnitude. Data type supported U32
513  * @param[out] phase_ptr Pointer to destination image. Quantized phase. Data type supported U8
514  */
515 void mag_phase_l2norm_S32_S32_U32_U8(const void *__restrict gx_ptr, const void *__restrict gy_ptr, void *__restrict magnitude_ptr, void *__restrict phase_ptr)
516 {
517  auto gx = static_cast<const int32_t *__restrict>(gx_ptr);
518  auto gy = static_cast<const int32_t *__restrict>(gy_ptr);
519  auto magnitude = static_cast<uint32_t *__restrict>(magnitude_ptr);
520  auto phase = static_cast<uint8_t *__restrict>(phase_ptr);
521 
522  // Process low and high part
523  for(size_t i = 0; i < 2; ++i, gx += 16, gy += 16, magnitude += 16, phase += 16)
524  {
525  const int32x4x2_t gx0 =
526  {
527  {
528  vld1q_s32(gx + 0),
529  vld1q_s32(gx + 4)
530  }
531  };
532 
533  const int32x4x2_t gx1 =
534  {
535  {
536  vld1q_s32(gx + 8),
537  vld1q_s32(gx + 12)
538  }
539  };
540 
541  const int32x4x2_t gy0 =
542  {
543  {
544  vld1q_s32(gy + 0),
545  vld1q_s32(gy + 4)
546  }
547  };
548 
549  const int32x4x2_t gy1 =
550  {
551  {
552  vld1q_s32(gy + 8),
553  vld1q_s32(gy + 12)
554  }
555  };
556 
557  // Compute and store phase
558  vst1_u8(phase + 0, phase_quantization_S32_S32(gx0, gy0));
559  vst1_u8(phase + 8, phase_quantization_S32_S32(gx1, gy1));
560 
561  // Compute magnitude using L2 normalization
562  const uint32x4x2_t mag0 = mag_l2_S32_S32(gx0, gy0);
563  const uint32x4x2_t mag1 = mag_l2_S32_S32(gx1, gy1);
564 
565  // Store magnitude
566  vst1q_u32(magnitude + 0, mag0.val[0]);
567  vst1q_u32(magnitude + 4, mag0.val[1]);
568  vst1q_u32(magnitude + 8, mag1.val[0]);
569  vst1q_u32(magnitude + 12, mag1.val[1]);
570  }
571 }
572 
573 /* Computes non-maxima suppression and hysteresis when the gradient size = 3 or 5
574  *
575  * @param[in] magnitude_ptr Pointer to source image. Magnitude. Data type supported U16
576  * @param[in] phase_ptr Pointer to source image. Quantized phase. Data type supported U8
577  * @param[out] output_ptr Pointer to output image. Data type supported U8
578  * @param[in] stride_mag Stride of magnitude image
579  * @param[in] lower_thr Lower threshold used for the hysteresis
580  * @param[in] upper_thr Upper threshold used for the hysteresis
581  */
582 void non_max_suppression_U16_U8_U8(const void *__restrict magnitude_ptr, const void *__restrict phase_ptr, void *__restrict output_ptr, const uint32_t stride_mag, const int32_t lower_thr,
583  const int32_t upper_thr)
584 {
585  const auto magnitude = static_cast<const uint16_t *__restrict>(magnitude_ptr);
586  const auto phase = static_cast<const uint8_t *__restrict>(phase_ptr);
587  const auto output = static_cast<uint8_t *__restrict>(output_ptr);
588 
589  // Get magnitude and phase of the centre pixels
590  uint16x8_t mc = vld1q_u16(magnitude);
591 
592  // Angle_quantized: 0 = 0°, 1 = 45°, 2 = 90°, 3 = 135°
593  const uint16x8_t pc16 = vmovl_u8(vld1_u8(phase));
594 
595  // 0 degree
596  const uint16x8_t mk0_0 = vld1q_u16(magnitude - 1);
597  const uint16x8_t mk0_1 = vld1q_u16(magnitude + 1);
598  uint16x8_t mask0 = vceqq_u16(pc16, vdupq_n_u16(0));
599  mask0 = vandq_u16(mask0, vcgtq_u16(mc, mk0_0));
600  mask0 = vandq_u16(mask0, vcgtq_u16(mc, mk0_1));
601 
602  // 45 degree
603  const uint16x8_t mk45_0 = vld1q_u16(magnitude - stride_mag - 1);
604  const uint16x8_t mk45_1 = vld1q_u16(magnitude + stride_mag + 1);
605  uint16x8_t mask1 = vceqq_u16(pc16, vdupq_n_u16(1));
606  mask1 = vandq_u16(mask1, vcgtq_u16(mc, mk45_0));
607  mask1 = vandq_u16(mask1, vcgtq_u16(mc, mk45_1));
608 
609  // 90 degree
610  const uint16x8_t mk90_0 = vld1q_u16(magnitude - stride_mag);
611  const uint16x8_t mk90_1 = vld1q_u16(magnitude + stride_mag);
612  uint16x8_t mask2 = vceqq_u16(pc16, vdupq_n_u16(2));
613  mask2 = vandq_u16(mask2, vcgtq_u16(mc, mk90_0));
614  mask2 = vandq_u16(mask2, vcgtq_u16(mc, mk90_1));
615 
616  // 135 degree
617  const uint16x8_t mk135_0 = vld1q_u16(magnitude - stride_mag + 1);
618  const uint16x8_t mk135_1 = vld1q_u16(magnitude + stride_mag - 1);
619  uint16x8_t mask3 = vceqq_u16(pc16, vdupq_n_u16(3));
620  mask3 = vandq_u16(mask3, vcgtq_u16(mc, mk135_0));
621  mask3 = vandq_u16(mask3, vcgtq_u16(mc, mk135_1));
622 
623  // Merge masks
624  mask0 = vorrq_u16(mask0, mask1);
625  mask2 = vorrq_u16(mask2, mask3);
626  mask0 = vorrq_u16(mask0, mask2);
627 
628  mc = vbslq_u16(mask0, mc, vdupq_n_u16(0));
629 
630  // mc > upper_thr
631  mask0 = vcgtq_u16(mc, vdupq_n_u16(upper_thr));
632 
633  // mc <= lower_thr
634  mask1 = vcleq_u16(mc, vdupq_n_u16(lower_thr));
635 
636  // mc <= upper_thr && mc > lower_thr
637  mask2 = vcleq_u16(mc, vdupq_n_u16(upper_thr));
638  mask2 = vandq_u16(mask2, vcgtq_u16(mc, vdupq_n_u16(lower_thr)));
639 
640  mc = vbslq_u16(mask0, vdupq_n_u16(EDGE), mc);
641  mc = vbslq_u16(mask1, vdupq_n_u16(NO_EDGE), mc);
642  mc = vbslq_u16(mask2, vdupq_n_u16(MAYBE), mc);
643 
644  vst1_u8(output, vmovn_u16(mc));
645 }
646 
647 inline uint16x4_t non_max_U32_helper(const uint32_t *input, const uint16x4_t pc, const uint32_t stride_mag, const int32_t lower_thr, const int32_t upper_thr)
648 {
649  // Phase for 4 pixel
650  const uint32x4_t pc32 = vmovl_u16(pc);
651 
652  // Get magnitude for 4 pixel
653  uint32x4_t mc = vld1q_u32(input);
654 
655  // Angle_quantized: 0 = 0°, 1 = 45°, 2 = 90°, 3 = 135°
656  // 0 degree
657  const uint32x4_t mk0_0 = vld1q_u32(input - 1);
658  const uint32x4_t mk0_1 = vld1q_u32(input + 1);
659  uint32x4_t mask0 = vceqq_u32(pc32, vdupq_n_u32(0));
660  mask0 = vandq_u32(mask0, vcgtq_u32(mc, mk0_0));
661  mask0 = vandq_u32(mask0, vcgtq_u32(mc, mk0_1));
662 
663  // 45 degree
664  const uint32x4_t mk45_0 = vld1q_u32(input - stride_mag - 1);
665  const uint32x4_t mk45_1 = vld1q_u32(input + stride_mag + 1);
666  uint32x4_t mask1 = vceqq_u32(pc32, vdupq_n_u32(1));
667  mask1 = vandq_u32(mask1, vcgtq_u32(mc, mk45_0));
668  mask1 = vandq_u32(mask1, vcgtq_u32(mc, mk45_1));
669 
670  // 90 degree
671  const uint32x4_t mk90_0 = vld1q_u32(input - stride_mag);
672  const uint32x4_t mk90_1 = vld1q_u32(input + stride_mag);
673  uint32x4_t mask2 = vceqq_u32(pc32, vdupq_n_u32(2));
674  mask2 = vandq_u32(mask2, vcgtq_u32(mc, mk90_0));
675  mask2 = vandq_u32(mask2, vcgtq_u32(mc, mk90_1));
676 
677  // 135 degree
678  const uint32x4_t mk135_0 = vld1q_u32(input - stride_mag + 1);
679  const uint32x4_t mk135_1 = vld1q_u32(input + stride_mag - 1);
680  uint32x4_t mask3 = vceqq_u32(pc32, vdupq_n_u32(3));
681  mask3 = vandq_u32(mask3, vcgtq_u32(mc, mk135_0));
682  mask3 = vandq_u32(mask3, vcgtq_u32(mc, mk135_1));
683 
684  // Merge masks
685  mask0 = vorrq_u32(mask0, mask1);
686  mask2 = vorrq_u32(mask2, mask3);
687  mask0 = vorrq_u32(mask0, mask2);
688 
689  mc = vbslq_u32(mask0, mc, vdupq_n_u32(0));
690 
691  // mc > upper_thr
692  mask0 = vcgtq_u32(mc, vdupq_n_u32(upper_thr));
693 
694  // mc <= lower_thr
695  mask1 = vcleq_u32(mc, vdupq_n_u32(lower_thr));
696 
697  // mc <= upper_thr && mc > lower_thr
698  mask2 = vcleq_u32(mc, vdupq_n_u32(upper_thr));
699  mask2 = vandq_u32(mask2, vcgtq_u32(mc, vdupq_n_u32(lower_thr)));
700 
701  mc = vbslq_u32(mask0, vdupq_n_u32(EDGE), mc);
702  mc = vbslq_u32(mask1, vdupq_n_u32(NO_EDGE), mc);
703  mc = vbslq_u32(mask2, vdupq_n_u32(MAYBE), mc);
704 
705  return vmovn_u32(mc);
706 }
707 
708 /* Computes non-maxima suppression and hysteresis when the gradient_size = 7
709  *
710  * @param[in] magnitude_ptr Pointer to source image. Magnitude. Data type supported U32
711  * @param[in] phase_ptr Pointer to source image. Quantized phase. Data type supported U8
712  * @param[out] output_ptr Pointer to destination image. Data type supported U8
713  * @param[in] stride_mag Stride of magnitude image
714  * @param[in] lower_thr Lower threshold used for the hysteresis
715  * @param[in] upper_thr Upper threshold used for the hysteresis
716  */
717 void non_max_suppression_U32_U8_U8(const void *__restrict magnitude_ptr, const void *__restrict phase_ptr, void *__restrict output_ptr, const uint32_t stride_mag, const int32_t lower_thr,
718  const int32_t upper_thr)
719 {
720  const auto magnitude = static_cast<const uint32_t *__restrict>(magnitude_ptr);
721  const auto phase = static_cast<const uint8_t *__restrict>(phase_ptr);
722  const auto output = static_cast<uint8_t *__restrict>(output_ptr);
723 
724  // Get phase for 8 pixel
725  const uint16x8_t pc16 = vmovl_u8(vld1_u8(phase));
726 
727  // Compute non maxima suppression
728  const uint16x4x2_t res =
729  {
730  {
731  non_max_U32_helper(magnitude, vget_low_u16(pc16), stride_mag, lower_thr, upper_thr),
732  non_max_U32_helper(magnitude + 4, vget_high_u16(pc16), stride_mag, lower_thr, upper_thr)
733  }
734  };
735 
736  // Store result
737  vst1_u8(output, vmovn_u16(vcombine_u16(res.val[0], res.val[1])));
738 }
739 
740 /* Computes edge tracing when is called by edge_trace_U8_U8 recursively
741  *
742  * @param[in] input Pointer to source image. Data type supported U8
743  * @param[out] output Pointer to destination image. Data type supported U8
744  * @param[in] input_stride Stride of the input image
745  * @param[in] output_stride Stride of the output image
746  */
747 void edge_trace_recursive_U8_U8(uint8_t *__restrict input, uint8_t *__restrict output, const int32_t input_stride, const int32_t output_stride)
748 {
749  // Look for MAYBE pixels in 8 directions
750  *output = EDGE;
751 
752  // (-1, 0)
753  uint8_t pixel = *(input - 1);
754 
755  if(pixel == MAYBE)
756  {
757  // Touched a MAYBE point. MAYBE becomes EDGE
758  *(input - 1) = EDGE;
759 
760  edge_trace_recursive_U8_U8(input - 1, output - 1, input_stride, output_stride);
761  }
762 
763  // (+1, 0)
764  pixel = *(input + 1);
765 
766  if(pixel == MAYBE)
767  {
768  // Touched a MAYBE point. MAYBE becomes EDGE
769  *(input + 1) = EDGE;
770 
771  edge_trace_recursive_U8_U8(input + 1, output + 1, input_stride, output_stride);
772  }
773 
774  input -= input_stride;
775  output -= output_stride;
776 
777  // (-1, -1)
778  pixel = *(input - 1);
779 
780  if(pixel == MAYBE)
781  {
782  // Touched a MAYBE point. MAYBE becomes EDGE
783  *(input - 1) = EDGE;
784 
785  edge_trace_recursive_U8_U8(input - 1, output - 1, input_stride, output_stride);
786  }
787 
788  // (0, -1)
789  pixel = *input;
790 
791  if(pixel == MAYBE)
792  {
793  // Touched a MAYBE point. MAYBE becomes EDGE
794  *input = EDGE;
795 
796  edge_trace_recursive_U8_U8(input, output, input_stride, output_stride);
797  }
798 
799  // (+1, -1)
800  pixel = *(input + 1);
801 
802  if(pixel == MAYBE)
803  {
804  // Touched a MAYBE point. MAYBE becomes EDGE
805  *(input + 1) = EDGE;
806 
807  edge_trace_recursive_U8_U8(input + 1, output + 1, input_stride, output_stride);
808  }
809 
810  input += input_stride * 2;
811  output += output_stride * 2;
812 
813  // (-1, +1)
814  pixel = *(input - 1);
815 
816  if(pixel == MAYBE)
817  {
818  // Touched a MAYBE point. MAYBE becomes EDGE
819  *(input - 1) = EDGE;
820 
821  edge_trace_recursive_U8_U8(input - 1, output - 1, input_stride, output_stride);
822  }
823 
824  // (0, +1)
825  pixel = *input;
826 
827  if(pixel == MAYBE)
828  {
829  // Touched a MAYBE point. MAYBE becomes EDGE
830  *input = EDGE;
831 
832  edge_trace_recursive_U8_U8(input, output, input_stride, output_stride);
833  }
834 
835  // (+1, +1)
836  pixel = *(input + 1);
837 
838  if(pixel == MAYBE)
839  {
840  // Touched a MAYBE point. MAYBE becomes EDGE
841  *(input + 1) = EDGE;
842 
843  edge_trace_recursive_U8_U8(input + 1, output + 1, input_stride, output_stride);
844  }
845 }
846 
847 /* Computes edge tracing
848  *
849  * @param[in] input Pointer to source image. Data type supported U8
850  * @param[out] output Pointer to destination image. Data type supported U8
851  * @param[in] input_stride Stride of the input image
852  * @param[in] output_stride Stride of the output image
853  */
854 void edge_trace_U8_U8(uint8_t *__restrict input, uint8_t *__restrict output, const int32_t input_stride, const int32_t output_stride)
855 {
856  if(*input == NO_EDGE)
857  {
858  *output = NO_EDGE;
859  }
860  // Check if EDGE and not yet touched
861  else if((*input == EDGE) && (*output == NO_EDGE))
862  {
863  edge_trace_recursive_U8_U8(input, output, input_stride, output_stride);
864  }
865 }
866 } // namespace
867 
869 
871  : _func(nullptr), _gx(nullptr), _gy(nullptr), _magnitude(nullptr), _phase(nullptr)
872 {
873 }
874 
875 void NEGradientKernel::configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase, int32_t norm_type)
876 {
877  ARM_COMPUTE_ERROR_ON_NULLPTR(gx, gy, magnitude, phase);
878 
879  set_shape_if_empty(*magnitude->info(), gx->info()->tensor_shape());
880  set_shape_if_empty(*phase->info(), gx->info()->tensor_shape());
881 
882  Format magnitude_format = gx->info()->data_type() == DataType::S16 ? Format::U16 : Format::U32;
883  set_format_if_unknown(*magnitude->info(), magnitude_format);
885 
886  ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(gx, gy, magnitude, phase);
892  ARM_COMPUTE_ERROR_ON_MSG(element_size_from_data_type(gx->info()->data_type()) != element_size_from_data_type(magnitude->info()->data_type()), "Magnitude must have the same element size as Gx and Gy");
893 
894  _gx = gx;
895  _gy = gy;
896  _magnitude = magnitude;
897  _phase = phase;
898 
899  if(_gx->info()->data_type() == DataType::S16)
900  {
901  if(norm_type == 1)
902  {
903  _func = &mag_phase_l1norm_S16_S16_U16_U8;
904  }
905  else
906  {
907  _func = &mag_phase_l2norm_S16_S16_U16_U8;
908  }
909  }
910  else
911  {
912  if(norm_type == 1)
913  {
914  _func = &mag_phase_l1norm_S32_S32_U32_U8;
915  }
916  else
917  {
918  _func = &mag_phase_l2norm_S32_S32_U32_U8;
919  }
920  }
921 
922  constexpr unsigned int num_elems_processed_per_iteration = 32;
923 
924  // Configure kernel window
925  Window win = calculate_max_window(*_gx->info(), Steps(num_elems_processed_per_iteration));
926 
929  AccessWindowHorizontal mag_access(_magnitude->info(), 0, num_elems_processed_per_iteration);
931 
932  ARM_COMPUTE_UNUSED(update_window_and_padding(win, gx_access, gy_access, mag_access, phase_access));
933 
934  mag_access.set_valid_region(win, _gx->info()->valid_region());
935  phase_access.set_valid_region(win, _gx->info()->valid_region());
936 
937  INEKernel::configure(win);
938 }
939 
941 {
942  ARM_COMPUTE_UNUSED(info);
945  ARM_COMPUTE_ERROR_ON(_func == nullptr);
946  Iterator gx(_gx, window);
947  Iterator gy(_gy, window);
948  Iterator magnitude(_magnitude, window);
949  Iterator phase(_phase, window);
950 
951  execute_window_loop(window, [&](const Coordinates &)
952  {
953  (*_func)(gx.ptr(), gy.ptr(), magnitude.ptr(), phase.ptr());
954  },
955  gx, gy, magnitude, phase);
956 }
957 
960  : _func(nullptr), _magnitude(nullptr), _phase(nullptr), _output(nullptr), _lower_thr(0), _upper_thr(0)
961 {
962 }
963 
965 {
966  return BorderSize(1);
967 }
968 
969 void NEEdgeNonMaxSuppressionKernel::configure(const ITensor *magnitude, const ITensor *phase, ITensor *output,
970  int32_t upper_thr, int32_t lower_thr, bool border_undefined)
971 {
972  ARM_COMPUTE_ERROR_ON_NULLPTR(magnitude, phase, output);
973 
974  set_shape_if_empty(*output->info(), magnitude->info()->tensor_shape());
975 
978 
979  ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(magnitude, phase, output);
984 
985  _magnitude = magnitude;
986  _phase = phase;
987  _output = output;
988 
989  switch(_magnitude->info()->data_type())
990  {
991  case DataType::U16:
992  _func = &non_max_suppression_U16_U8_U8;
993  break;
994  case DataType::U32:
995  _func = &non_max_suppression_U32_U8_U8;
996  break;
997  default:
998  ARM_COMPUTE_ERROR("Unsupported data type!");
999  }
1000 
1001  // Set thresholds
1002  _lower_thr = lower_thr;
1003  _upper_thr = upper_thr;
1004 
1005  constexpr unsigned int num_elems_processed_per_iteration = 8;
1006  constexpr unsigned int num_elems_read_per_iteration = 10;
1007  constexpr unsigned int num_rows_read_per_iteration = 3;
1008 
1009  // Configure kernel window
1010  Window win = calculate_max_window(*_magnitude->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size());
1011 
1012  AccessWindowRectangle mag_access(_magnitude->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration);
1014  AccessWindowHorizontal output_access(_output->info(), 0, num_elems_processed_per_iteration);
1015 
1016  update_window_and_padding(win, mag_access, phase_access, output_access);
1017 
1018  output_access.set_valid_region(win, _magnitude->info()->valid_region(), border_undefined, border_size());
1019 
1020  INEKernel::configure(win);
1021 }
1022 
1024 {
1025  ARM_COMPUTE_UNUSED(info);
1028  ARM_COMPUTE_ERROR_ON(_func == nullptr);
1029  Iterator magnitude(_magnitude, window);
1030  Iterator phase(_phase, window);
1031  Iterator output(_output, window);
1032 
1033  const size_t input1_stride = _magnitude->info()->strides_in_bytes()[1];
1034  const size_t input1_stride_ushort = input1_stride / data_size_from_type(_magnitude->info()->data_type());
1035 
1036  execute_window_loop(window, [&](const Coordinates &)
1037  {
1038  (*_func)(magnitude.ptr(), phase.ptr(), output.ptr(), input1_stride_ushort, _lower_thr, _upper_thr);
1039  },
1040  magnitude, phase, output);
1041 }
1042 
1045  : _input(nullptr), _output(nullptr)
1046 {
1047 }
1048 
1050 {
1051  return BorderSize(1);
1052 }
1053 
1055 {
1056  return false;
1057 }
1058 
1060 {
1061  ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
1062 
1063  set_shape_if_empty(*output->info(), input->info()->tensor_shape());
1064 
1066  set_format_if_unknown(*output->info(), Format::U8);
1067 
1072 
1073  _input = input;
1074  _output = output;
1075 
1076  constexpr unsigned int num_elems_processed_per_iteration = 1;
1077 
1078  // Configure kernel window
1079  Window win = calculate_max_window(*_input->info(), Steps(num_elems_processed_per_iteration));
1080 
1081  const ValidRegion &input_valid_region = input->info()->valid_region();
1082  const ValidRegion &output_valid_region = output->info()->valid_region();
1083 
1084  // Reads can occur within the valid region of the input + border
1085  AccessWindowStatic input_access(input->info(),
1086  input_valid_region.anchor[0] - border_size().left,
1087  input_valid_region.anchor[1] - border_size().top,
1088  input_valid_region.anchor[0] + input_valid_region.shape[0] + border_size().right,
1089  input_valid_region.anchor[1] + input_valid_region.shape[1] + border_size().bottom);
1090 
1091  // Writes can occur within the valid region of the output + border
1092  AccessWindowStatic output_access(output->info(),
1093  output_valid_region.anchor[0] - border_size().left,
1094  output_valid_region.anchor[1] - border_size().top,
1095  output_valid_region.anchor[0] + output_valid_region.shape[0] + border_size().right,
1096  output_valid_region.anchor[1] + output_valid_region.shape[1] + border_size().bottom);
1097 
1098  update_window_and_padding(win, input_access, output_access);
1099 
1100  output_access.set_valid_region(win, _input->info()->valid_region());
1101 
1102  INEKernel::configure(win);
1103 }
1104 
1106 {
1107  ARM_COMPUTE_UNUSED(info);
1110  Iterator input(_input, window);
1111  Iterator output(_output, window);
1112 
1113  const size_t input_stride = _input->info()->strides_in_bytes()[1];
1114  const size_t output_stride = _output->info()->strides_in_bytes()[1];
1115 
1116  execute_window_loop(window, [&](const Coordinates &)
1117  {
1118  edge_trace_U8_U8(input.ptr(), output.ptr(), input_stride, output_stride);
1119  },
1120  input, output);
1121 }
1122 } // namespace arm_compute
bool set_format_if_unknown(ITensorInfo &info, Format format)
Set the format, data type and number of channels to the specified value if the current data type is u...
unsigned int top
top of the border
Definition: Types.h:375
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
const Window & window() const
The maximum window the kernel can be executed on.
Definition: IKernel.cpp:28
TensorShape shape
Shape of the valid region.
Definition: Types.h:261
Container for 2D border size.
Definition: Types.h:273
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352
1 channel, 1 U8 per channel
size_t element_size_from_data_type(DataType dt)
The size in bytes of the data type.
Definition: Utils.h:185
virtual DataType data_type() const =0
Data type used for each element of the tensor.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
SimpleTensor< uint8_t > phase(const SimpleTensor< T > &gx, const SimpleTensor< T > &gy, PhaseType phase_type)
Definition: Phase.cpp:35
1 channel, 1 U16 per channel
unsigned int bottom
bottom of the border
Definition: Types.h:377
Interface for Neon tensor.
Definition: ITensor.h:36
Copyright (c) 2017-2021 Arm Limited.
BorderSize border_size() const override
The size of the border for that kernel.
bool is_parallelisable() const override
Indicates whether or not the kernel is parallelisable.
virtual void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase, int32_t norm_type)
Initialise the kernel&#39;s sources, destinations and border mode.
virtual ValidRegion valid_region() const =0
Valid region of the tensor.
Implementation of a static rectangular access pattern.
1 channel, 1 S32 per channel
Implementation of a rectangular access pattern.
void run(const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
bool update_window_and_padding(Window &win, Ts &&... patterns)
Update window and padding size for each of the access patterns.
Definition: WindowHelpers.h:46
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
1 channel, 1 U32 per channel
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
Format
Image colour formats.
Definition: Types.h:54
#define ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:543
BorderSize border_size() const override
The size of the border for that kernel.
Class to describe a number of elements in each dimension.
Definition: Steps.h:40
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
Definition: Error.h:456
Coordinates of an item.
Definition: Coordinates.h:37
Implementation of a row access pattern.
NEEdgeTraceKernel()
Default constructor.
#define ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(...)
Definition: Validate.h:441
~NEGradientKernel()
Default destructor.
~NEEdgeTraceKernel()
Default destructor.
void configure(const ITensor *magnitude, const ITensor *phase, ITensor *output, int32_t upper_thr, int32_t lower_thr, bool border_undefined)
Initialise the kernel&#39;s sources, destination and border mode.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.
size_t data_size_from_type(DataType data_type)
The size in bytes of the data type.
Definition: Utils.h:106
bool set_shape_if_empty(ITensorInfo &info, const TensorShape &shape)
Set the shape to the specified value if the current assignment is empty.
constexpr uint8_t * ptr() const
Return a pointer to the current pixel.
Definition: Helpers.inl:139
#define EDGE
Definition: canny.cl:151
unsigned int left
left of the border
Definition: Types.h:378
unsigned int right
right of the border
Definition: Types.h:376
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
Definition: Validate.h:941
1 channel, 1 S16 per channel
#define ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:790
#define NO_EDGE
Definition: canny.cl:152
~NEEdgeNonMaxSuppressionKernel()
Default destructor.
NEGradientKernel()
Default constructor.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
void run(const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
Information about executing thread and CPU.
Definition: CPPTypes.h:235
void configure(ITensor *input, ITensor *output)
Initialise the kernel&#39;s source, destination and border mode.
unsigned int num_elems_processed_per_iteration
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:161
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
Definition: Helpers.inl:77
virtual const Strides & strides_in_bytes() const =0
The strides in bytes for accessing each dimension of the tensor.
Container for valid region of a window.
Definition: Types.h:188
Iterator updated by execute_window_loop for each window element.
Definition: Helpers.h:46
void run(const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
Describe a multidimensional execution window.
Definition: Window.h:39
SimpleTensor< T > magnitude(const SimpleTensor< T > &gx, const SimpleTensor< T > &gy, MagnitudeType magnitude_type)
Definition: Magnitude.cpp:35
Coordinates anchor
Anchor for the start of the valid region.
Definition: Types.h:260
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
Definition: Validate.h:205