49 constexpr
int EDGE = 255;
50 constexpr
int MAYBE = 127;
52 inline uint8x8_t phase_quantization(
const float32x4x2_t &gx,
const float32x4x2_t &gy)
55 static const float32x4_t const45 = vdupq_n_f32(0.70710678118655f);
56 static const float32x4_t zero = vdupq_n_f32(0.0f);
57 static const float32x4_t one = vdupq_n_f32(1.0f);
58 static const float32x4_t two = vdupq_n_f32(2.0f);
59 static const float32x4_t three = vdupq_n_f32(3.0f);
62 const float32x4x2_t score0 =
71 const float32x4x2_t score2 =
80 float32x4x2_t score1 =
83 vmulq_f32(gy.val[0], const45),
84 vmulq_f32(gy.val[1], const45)
88 float32x4x2_t score3 = score1;
90 score1.val[0] = vmlaq_f32(score1.val[0], gx.val[0], const45);
91 score1.val[1] = vmlaq_f32(score1.val[1], gx.val[1], const45);
92 score3.val[0] = vmlsq_f32(score3.val[0], gx.val[0], const45);
93 score3.val[1] = vmlsq_f32(score3.val[1], gx.val[1], const45);
95 score1.val[0] = vabsq_f32(score1.val[0]);
96 score1.val[1] = vabsq_f32(score1.val[1]);
97 score3.val[0] = vabsq_f32(score3.val[0]);
98 score3.val[1] = vabsq_f32(score3.val[1]);
100 float32x4x2_t
phase =
108 float32x4x2_t old_score = score0;
114 vcgtq_f32(score1.val[0], old_score.val[0]),
115 vcgtq_f32(score1.val[1], old_score.val[1])
119 phase.val[0] = vbslq_f32(mask.val[0], one, phase.val[0]);
120 phase.val[1] = vbslq_f32(mask.val[1], one, phase.val[1]);
121 old_score.val[0] = vbslq_f32(mask.val[0], score1.val[0], old_score.val[0]);
122 old_score.val[1] = vbslq_f32(mask.val[1], score1.val[1], old_score.val[1]);
125 mask.val[0] = vcgtq_f32(score2.val[0], old_score.val[0]);
126 mask.val[1] = vcgtq_f32(score2.val[1], old_score.val[1]);
128 phase.val[0] = vbslq_f32(mask.val[0], two, phase.val[0]);
129 phase.val[1] = vbslq_f32(mask.val[1], two, phase.val[1]);
130 old_score.val[0] = vbslq_f32(mask.val[0], score2.val[0], old_score.val[0]);
131 old_score.val[1] = vbslq_f32(mask.val[1], score2.val[1], old_score.val[1]);
134 mask.val[0] = vcgtq_f32(score3.val[0], old_score.val[0]);
135 mask.val[1] = vcgtq_f32(score3.val[1], old_score.val[1]);
137 phase.val[0] = vbslq_f32(mask.val[0], three, phase.val[0]);
138 phase.val[1] = vbslq_f32(mask.val[1], three, phase.val[1]);
139 old_score.val[0] = vbslq_f32(mask.val[0], score3.val[0], old_score.val[0]);
140 old_score.val[1] = vbslq_f32(mask.val[1], score3.val[1], old_score.val[1]);
143 return vmovn_u16(vcombine_u16(vmovn_u32(vcvtq_u32_f32(phase.val[0])),
144 vmovn_u32(vcvtq_u32_f32(phase.val[1]))));
155 inline uint8x8_t phase_quantization_S16_S16(int16x8_t gx, int16x8_t gy)
158 const float32x4x2_t gx_f32 =
161 vcvtq_f32_s32(vmovl_s16(vget_low_s16(gx))),
162 vcvtq_f32_s32(vmovl_s16(vget_high_s16(gx)))
166 const float32x4x2_t gy_f32 =
169 vcvtq_f32_s32(vmovl_s16(vget_low_s16(gy))),
170 vcvtq_f32_s32(vmovl_s16(vget_high_s16(gy)))
174 return phase_quantization(gx_f32, gy_f32);
185 inline uint8x8_t phase_quantization_S32_S32(
const int32x4x2_t &gx,
const int32x4x2_t &gy)
188 const float32x4x2_t gx_f32 =
191 vcvtq_f32_s32(gx.val[0]),
192 vcvtq_f32_s32(gx.val[1])
196 const float32x4x2_t gy_f32 =
199 vcvtq_f32_s32(gy.val[0]),
200 vcvtq_f32_s32(gy.val[1])
204 return phase_quantization(gx_f32, gy_f32);
214 inline uint16x8_t mag_l1_S16_S16(int16x8_t gx, int16x8_t gy)
216 return vaddq_u16(vreinterpretq_u16_s16(vabsq_s16(gx)),
217 vreinterpretq_u16_s16(vabsq_s16(gy)));
227 inline uint32x4x2_t mag_l1_S32_S32(
const int32x4x2_t &gx,
const int32x4x2_t &gy)
229 const uint32x4x2_t gx_abs =
232 vreinterpretq_u32_s32(vabsq_s32(gx.val[0])),
233 vreinterpretq_u32_s32(vabsq_s32(gx.val[1]))
237 const uint32x4x2_t gy_abs =
240 vreinterpretq_u32_s32(vabsq_s32(gy.val[0])),
241 vreinterpretq_u32_s32(vabsq_s32(gy.val[1]))
245 const uint32x4x2_t output =
248 vaddq_u32(gx_abs.val[0], gy_abs.val[0]),
249 vaddq_u32(gx_abs.val[1], gy_abs.val[1])
256 inline float32x4x2_t mag_l2(
const float32x4x2_t &gx,
const float32x4x2_t &gy)
262 vmulq_f32(gx.val[0], gx.val[0]),
263 vmulq_f32(gx.val[1], gx.val[1])
268 magnitude.val[0] = vmlaq_f32(magnitude.val[0], gy.val[0], gy.val[0]);
269 magnitude.val[1] = vmlaq_f32(magnitude.val[1], gy.val[1], gy.val[1]);
272 magnitude.val[0] = vmulq_f32(vrsqrteq_f32(magnitude.val[0]), magnitude.val[0]);
273 magnitude.val[1] = vmulq_f32(vrsqrteq_f32(magnitude.val[1]), magnitude.val[1]);
285 inline uint16x8_t mag_l2_S16_S16(int16x8_t gx, int16x8_t gy)
288 const float32x4x2_t gx2 =
291 vcvtq_f32_s32(vmovl_s16(vget_low_s16(gx))),
292 vcvtq_f32_s32(vmovl_s16(vget_high_s16(gx)))
296 const float32x4x2_t gy2 =
299 vcvtq_f32_s32(vmovl_s16(vget_low_s16(gy))),
300 vcvtq_f32_s32(vmovl_s16(vget_high_s16(gy)))
304 const float32x4x2_t magnitude = mag_l2(gx2, gy2);
307 return vcombine_u16(vmovn_u32(vcvtq_u32_f32(magnitude.val[0])),
308 vmovn_u32(vcvtq_u32_f32(magnitude.val[1])));
318 inline uint32x4x2_t mag_l2_S32_S32(
const int32x4x2_t &gx,
const int32x4x2_t &gy)
324 vcvtq_f32_s32(gx.val[0]),
325 vcvtq_f32_s32(gx.val[1])
332 vcvtq_f32_s32(gy.val[0]),
333 vcvtq_f32_s32(gy.val[1])
337 const float32x4x2_t magnitude = mag_l2(gx2, gy2);
338 const uint32x4x2_t mag32 =
341 vcvtq_u32_f32(magnitude.val[0]),
342 vcvtq_u32_f32(magnitude.val[1])
356 void mag_phase_l1norm_S16_S16_U16_U8(
const void *__restrict gx_ptr,
const void *__restrict gy_ptr,
void *__restrict magnitude_ptr,
void *__restrict phase_ptr)
358 const auto gx =
static_cast<const int16_t *__restrict
>(gx_ptr);
359 const auto gy =
static_cast<const int16_t *__restrict
>(gy_ptr);
360 const auto magnitude =
static_cast<uint16_t *__restrict
>(magnitude_ptr);
361 const auto phase =
static_cast<uint8_t *__restrict
>(phase_ptr);
363 const int16x8x4_t gx_val =
373 const int16x8x4_t gy_val =
384 vst1_u8(phase + 0, phase_quantization_S16_S16(gx_val.val[0], gy_val.val[0]));
385 vst1_u8(phase + 8, phase_quantization_S16_S16(gx_val.val[1], gy_val.val[1]));
386 vst1_u8(phase + 16, phase_quantization_S16_S16(gx_val.val[2], gy_val.val[2]));
387 vst1_u8(phase + 24, phase_quantization_S16_S16(gx_val.val[3], gy_val.val[3]));
390 vst1q_u16(magnitude + 0, mag_l1_S16_S16(gx_val.val[0], gy_val.val[0]));
391 vst1q_u16(magnitude + 8, mag_l1_S16_S16(gx_val.val[1], gy_val.val[1]));
392 vst1q_u16(magnitude + 16, mag_l1_S16_S16(gx_val.val[2], gy_val.val[2]));
393 vst1q_u16(magnitude + 24, mag_l1_S16_S16(gx_val.val[3], gy_val.val[3]));
403 void mag_phase_l2norm_S16_S16_U16_U8(
const void *__restrict gx_ptr,
const void *__restrict gy_ptr,
void *__restrict magnitude_ptr,
void *__restrict phase_ptr)
405 const auto gx =
static_cast<const int16_t *__restrict
>(gx_ptr);
406 const auto gy =
static_cast<const int16_t *__restrict
>(gy_ptr);
407 const auto magnitude =
static_cast<uint16_t *__restrict
>(magnitude_ptr);
408 const auto phase =
static_cast<uint8_t *__restrict
>(phase_ptr);
410 const int16x8x4_t gx_val =
420 const int16x8x4_t gy_val =
431 vst1_u8(phase + 0, phase_quantization_S16_S16(gx_val.val[0], gy_val.val[0]));
432 vst1_u8(phase + 8, phase_quantization_S16_S16(gx_val.val[1], gy_val.val[1]));
433 vst1_u8(phase + 16, phase_quantization_S16_S16(gx_val.val[2], gy_val.val[2]));
434 vst1_u8(phase + 24, phase_quantization_S16_S16(gx_val.val[3], gy_val.val[3]));
437 vst1q_u16(magnitude + 0, mag_l2_S16_S16(gx_val.val[0], gy_val.val[0]));
438 vst1q_u16(magnitude + 8, mag_l2_S16_S16(gx_val.val[1], gy_val.val[1]));
439 vst1q_u16(magnitude + 16, mag_l2_S16_S16(gx_val.val[2], gy_val.val[2]));
440 vst1q_u16(magnitude + 24, mag_l2_S16_S16(gx_val.val[3], gy_val.val[3]));
450 void mag_phase_l1norm_S32_S32_U32_U8(
const void *__restrict gx_ptr,
const void *__restrict gy_ptr,
void *__restrict magnitude_ptr,
void *__restrict phase_ptr)
452 auto gx =
static_cast<const int32_t *__restrict
>(gx_ptr);
453 auto gy =
static_cast<const int32_t *__restrict
>(gy_ptr);
454 auto magnitude =
static_cast<uint32_t *__restrict
>(magnitude_ptr);
455 auto phase =
static_cast<uint8_t *__restrict
>(phase_ptr);
458 for(
size_t i = 0; i < 2; ++i, gx += 16, gy += 16, magnitude += 16, phase += 16)
460 const int32x4x2_t gx0 =
468 const int32x4x2_t gx1 =
476 const int32x4x2_t gy0 =
484 const int32x4x2_t gy1 =
493 vst1_u8(phase + 0, phase_quantization_S32_S32(gx0, gy0));
494 vst1_u8(phase + 8, phase_quantization_S32_S32(gx1, gy1));
497 const uint32x4x2_t mag0 = mag_l1_S32_S32(gx0, gy0);
498 const uint32x4x2_t mag1 = mag_l1_S32_S32(gx1, gy1);
501 vst1q_u32(magnitude + 0, mag0.val[0]);
502 vst1q_u32(magnitude + 4, mag0.val[1]);
503 vst1q_u32(magnitude + 8, mag1.val[0]);
504 vst1q_u32(magnitude + 12, mag1.val[1]);
515 void mag_phase_l2norm_S32_S32_U32_U8(
const void *__restrict gx_ptr,
const void *__restrict gy_ptr,
void *__restrict magnitude_ptr,
void *__restrict phase_ptr)
517 auto gx =
static_cast<const int32_t *__restrict
>(gx_ptr);
518 auto gy =
static_cast<const int32_t *__restrict
>(gy_ptr);
519 auto magnitude =
static_cast<uint32_t *__restrict
>(magnitude_ptr);
520 auto phase =
static_cast<uint8_t *__restrict
>(phase_ptr);
523 for(
size_t i = 0; i < 2; ++i, gx += 16, gy += 16, magnitude += 16, phase += 16)
525 const int32x4x2_t gx0 =
533 const int32x4x2_t gx1 =
541 const int32x4x2_t gy0 =
549 const int32x4x2_t gy1 =
558 vst1_u8(phase + 0, phase_quantization_S32_S32(gx0, gy0));
559 vst1_u8(phase + 8, phase_quantization_S32_S32(gx1, gy1));
562 const uint32x4x2_t mag0 = mag_l2_S32_S32(gx0, gy0);
563 const uint32x4x2_t mag1 = mag_l2_S32_S32(gx1, gy1);
566 vst1q_u32(magnitude + 0, mag0.val[0]);
567 vst1q_u32(magnitude + 4, mag0.val[1]);
568 vst1q_u32(magnitude + 8, mag1.val[0]);
569 vst1q_u32(magnitude + 12, mag1.val[1]);
582 void non_max_suppression_U16_U8_U8(
const void *__restrict magnitude_ptr,
const void *__restrict phase_ptr,
void *__restrict output_ptr,
const uint32_t stride_mag,
const int32_t lower_thr,
583 const int32_t upper_thr)
585 const auto magnitude =
static_cast<const uint16_t *__restrict
>(magnitude_ptr);
586 const auto phase =
static_cast<const uint8_t *__restrict
>(phase_ptr);
587 const auto output =
static_cast<uint8_t *__restrict
>(output_ptr);
590 uint16x8_t mc = vld1q_u16(magnitude);
593 const uint16x8_t pc16 = vmovl_u8(vld1_u8(phase));
596 const uint16x8_t mk0_0 = vld1q_u16(magnitude - 1);
597 const uint16x8_t mk0_1 = vld1q_u16(magnitude + 1);
598 uint16x8_t mask0 = vceqq_u16(pc16, vdupq_n_u16(0));
599 mask0 = vandq_u16(mask0, vcgtq_u16(mc, mk0_0));
600 mask0 = vandq_u16(mask0, vcgtq_u16(mc, mk0_1));
603 const uint16x8_t mk45_0 = vld1q_u16(magnitude - stride_mag - 1);
604 const uint16x8_t mk45_1 = vld1q_u16(magnitude + stride_mag + 1);
605 uint16x8_t mask1 = vceqq_u16(pc16, vdupq_n_u16(1));
606 mask1 = vandq_u16(mask1, vcgtq_u16(mc, mk45_0));
607 mask1 = vandq_u16(mask1, vcgtq_u16(mc, mk45_1));
610 const uint16x8_t mk90_0 = vld1q_u16(magnitude - stride_mag);
611 const uint16x8_t mk90_1 = vld1q_u16(magnitude + stride_mag);
612 uint16x8_t mask2 = vceqq_u16(pc16, vdupq_n_u16(2));
613 mask2 = vandq_u16(mask2, vcgtq_u16(mc, mk90_0));
614 mask2 = vandq_u16(mask2, vcgtq_u16(mc, mk90_1));
617 const uint16x8_t mk135_0 = vld1q_u16(magnitude - stride_mag + 1);
618 const uint16x8_t mk135_1 = vld1q_u16(magnitude + stride_mag - 1);
619 uint16x8_t mask3 = vceqq_u16(pc16, vdupq_n_u16(3));
620 mask3 = vandq_u16(mask3, vcgtq_u16(mc, mk135_0));
621 mask3 = vandq_u16(mask3, vcgtq_u16(mc, mk135_1));
624 mask0 = vorrq_u16(mask0, mask1);
625 mask2 = vorrq_u16(mask2, mask3);
626 mask0 = vorrq_u16(mask0, mask2);
628 mc = vbslq_u16(mask0, mc, vdupq_n_u16(0));
631 mask0 = vcgtq_u16(mc, vdupq_n_u16(upper_thr));
634 mask1 = vcleq_u16(mc, vdupq_n_u16(lower_thr));
637 mask2 = vcleq_u16(mc, vdupq_n_u16(upper_thr));
638 mask2 = vandq_u16(mask2, vcgtq_u16(mc, vdupq_n_u16(lower_thr)));
640 mc = vbslq_u16(mask0, vdupq_n_u16(EDGE), mc);
641 mc = vbslq_u16(mask1, vdupq_n_u16(NO_EDGE), mc);
642 mc = vbslq_u16(mask2, vdupq_n_u16(MAYBE), mc);
644 vst1_u8(output, vmovn_u16(mc));
647 inline uint16x4_t non_max_U32_helper(
const uint32_t *
input,
const uint16x4_t pc,
const uint32_t stride_mag,
const int32_t lower_thr,
const int32_t upper_thr)
650 const uint32x4_t pc32 = vmovl_u16(pc);
653 uint32x4_t mc = vld1q_u32(input);
657 const uint32x4_t mk0_0 = vld1q_u32(input - 1);
658 const uint32x4_t mk0_1 = vld1q_u32(input + 1);
659 uint32x4_t mask0 = vceqq_u32(pc32, vdupq_n_u32(0));
660 mask0 = vandq_u32(mask0, vcgtq_u32(mc, mk0_0));
661 mask0 = vandq_u32(mask0, vcgtq_u32(mc, mk0_1));
664 const uint32x4_t mk45_0 = vld1q_u32(input - stride_mag - 1);
665 const uint32x4_t mk45_1 = vld1q_u32(input + stride_mag + 1);
666 uint32x4_t mask1 = vceqq_u32(pc32, vdupq_n_u32(1));
667 mask1 = vandq_u32(mask1, vcgtq_u32(mc, mk45_0));
668 mask1 = vandq_u32(mask1, vcgtq_u32(mc, mk45_1));
671 const uint32x4_t mk90_0 = vld1q_u32(input - stride_mag);
672 const uint32x4_t mk90_1 = vld1q_u32(input + stride_mag);
673 uint32x4_t mask2 = vceqq_u32(pc32, vdupq_n_u32(2));
674 mask2 = vandq_u32(mask2, vcgtq_u32(mc, mk90_0));
675 mask2 = vandq_u32(mask2, vcgtq_u32(mc, mk90_1));
678 const uint32x4_t mk135_0 = vld1q_u32(input - stride_mag + 1);
679 const uint32x4_t mk135_1 = vld1q_u32(input + stride_mag - 1);
680 uint32x4_t mask3 = vceqq_u32(pc32, vdupq_n_u32(3));
681 mask3 = vandq_u32(mask3, vcgtq_u32(mc, mk135_0));
682 mask3 = vandq_u32(mask3, vcgtq_u32(mc, mk135_1));
685 mask0 = vorrq_u32(mask0, mask1);
686 mask2 = vorrq_u32(mask2, mask3);
687 mask0 = vorrq_u32(mask0, mask2);
689 mc = vbslq_u32(mask0, mc, vdupq_n_u32(0));
692 mask0 = vcgtq_u32(mc, vdupq_n_u32(upper_thr));
695 mask1 = vcleq_u32(mc, vdupq_n_u32(lower_thr));
698 mask2 = vcleq_u32(mc, vdupq_n_u32(upper_thr));
699 mask2 = vandq_u32(mask2, vcgtq_u32(mc, vdupq_n_u32(lower_thr)));
701 mc = vbslq_u32(mask0, vdupq_n_u32(EDGE), mc);
702 mc = vbslq_u32(mask1, vdupq_n_u32(NO_EDGE), mc);
703 mc = vbslq_u32(mask2, vdupq_n_u32(MAYBE), mc);
705 return vmovn_u32(mc);
717 void non_max_suppression_U32_U8_U8(
const void *__restrict magnitude_ptr,
const void *__restrict phase_ptr,
void *__restrict output_ptr,
const uint32_t stride_mag,
const int32_t lower_thr,
718 const int32_t upper_thr)
720 const auto magnitude =
static_cast<const uint32_t *__restrict
>(magnitude_ptr);
721 const auto phase =
static_cast<const uint8_t *__restrict
>(phase_ptr);
722 const auto output =
static_cast<uint8_t *__restrict
>(output_ptr);
725 const uint16x8_t pc16 = vmovl_u8(vld1_u8(phase));
728 const uint16x4x2_t res =
731 non_max_U32_helper(magnitude, vget_low_u16(pc16), stride_mag, lower_thr, upper_thr),
732 non_max_U32_helper(magnitude + 4, vget_high_u16(pc16), stride_mag, lower_thr, upper_thr)
737 vst1_u8(output, vmovn_u16(vcombine_u16(res.val[0], res.val[1])));
747 void edge_trace_recursive_U8_U8(uint8_t *__restrict input, uint8_t *__restrict output,
const int32_t input_stride,
const int32_t output_stride)
753 uint8_t pixel = *(input - 1);
760 edge_trace_recursive_U8_U8(input - 1, output - 1, input_stride, output_stride);
764 pixel = *(input + 1);
771 edge_trace_recursive_U8_U8(input + 1, output + 1, input_stride, output_stride);
774 input -= input_stride;
775 output -= output_stride;
778 pixel = *(input - 1);
785 edge_trace_recursive_U8_U8(input - 1, output - 1, input_stride, output_stride);
796 edge_trace_recursive_U8_U8(input, output, input_stride, output_stride);
800 pixel = *(input + 1);
807 edge_trace_recursive_U8_U8(input + 1, output + 1, input_stride, output_stride);
810 input += input_stride * 2;
811 output += output_stride * 2;
814 pixel = *(input - 1);
821 edge_trace_recursive_U8_U8(input - 1, output - 1, input_stride, output_stride);
832 edge_trace_recursive_U8_U8(input, output, input_stride, output_stride);
836 pixel = *(input + 1);
843 edge_trace_recursive_U8_U8(input + 1, output + 1, input_stride, output_stride);
854 void edge_trace_U8_U8(uint8_t *__restrict input, uint8_t *__restrict output,
const int32_t input_stride,
const int32_t output_stride)
856 if(*input == NO_EDGE)
861 else if((*input == EDGE) && (*output == NO_EDGE))
863 edge_trace_recursive_U8_U8(input, output, input_stride, output_stride);
871 : _func(nullptr), _gx(nullptr), _gy(nullptr), _magnitude(nullptr), _phase(nullptr)
903 _func = &mag_phase_l1norm_S16_S16_U16_U8;
907 _func = &mag_phase_l2norm_S16_S16_U16_U8;
914 _func = &mag_phase_l1norm_S32_S32_U32_U8;
918 _func = &mag_phase_l2norm_S32_S32_U32_U8;
937 INEKernel::configure(win);
953 (*_func)(gx.
ptr(), gy.
ptr(), magnitude.
ptr(), phase.
ptr());
960 : _func(nullptr), _magnitude(nullptr), _phase(nullptr), _output(nullptr), _lower_thr(0), _upper_thr(0)
970 int32_t upper_thr, int32_t lower_thr,
bool border_undefined)
992 _func = &non_max_suppression_U16_U8_U8;
995 _func = &non_max_suppression_U32_U8_U8;
1002 _lower_thr = lower_thr;
1003 _upper_thr = upper_thr;
1006 constexpr
unsigned int num_elems_read_per_iteration = 10;
1007 constexpr
unsigned int num_rows_read_per_iteration = 3;
1020 INEKernel::configure(win);
1038 (*_func)(magnitude.
ptr(), phase.
ptr(), output.
ptr(), input1_stride_ushort, _lower_thr, _upper_thr);
1045 : _input(nullptr), _output(nullptr)
1088 input_valid_region.anchor[0] + input_valid_region.shape[0] +
border_size().
right,
1089 input_valid_region.anchor[1] + input_valid_region.shape[1] +
border_size().
bottom);
1102 INEKernel::configure(win);
1118 edge_trace_U8_U8(input.
ptr(), output.
ptr(), input_stride, output_stride);
bool set_format_if_unknown(ITensorInfo &info, Format format)
Set the format, data type and number of channels to the specified value if the current data type is u...
unsigned int top
top of the border
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
const Window & window() const
The maximum window the kernel can be executed on.
TensorShape shape
Shape of the valid region.
Container for 2D border size.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
1 channel, 1 U8 per channel
size_t element_size_from_data_type(DataType dt)
The size in bytes of the data type.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
SimpleTensor< uint8_t > phase(const SimpleTensor< T > &gx, const SimpleTensor< T > &gy, PhaseType phase_type)
1 channel, 1 U16 per channel
unsigned int bottom
bottom of the border
Interface for Neon tensor.
Copyright (c) 2017-2021 Arm Limited.
BorderSize border_size() const override
The size of the border for that kernel.
bool is_parallelisable() const override
Indicates whether or not the kernel is parallelisable.
virtual void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase, int32_t norm_type)
Initialise the kernel's sources, destinations and border mode.
virtual ValidRegion valid_region() const =0
Valid region of the tensor.
Implementation of a static rectangular access pattern.
NEEdgeNonMaxSuppressionKernel()
Default constructor.
1 channel, 1 S32 per channel
Implementation of a rectangular access pattern.
void run(const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
bool update_window_and_padding(Window &win, Ts &&... patterns)
Update window and padding size for each of the access patterns.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
1 channel, 1 U32 per channel
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
Format
Image colour formats.
#define ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(...)
BorderSize border_size() const override
The size of the border for that kernel.
Class to describe a number of elements in each dimension.
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
Implementation of a row access pattern.
NEEdgeTraceKernel()
Default constructor.
#define ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(...)
~NEGradientKernel()
Default destructor.
~NEEdgeTraceKernel()
Default destructor.
void configure(const ITensor *magnitude, const ITensor *phase, ITensor *output, int32_t upper_thr, int32_t lower_thr, bool border_undefined)
Initialise the kernel's sources, destination and border mode.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
size_t data_size_from_type(DataType data_type)
The size in bytes of the data type.
bool set_shape_if_empty(ITensorInfo &info, const TensorShape &shape)
Set the shape to the specified value if the current assignment is empty.
constexpr uint8_t * ptr() const
Return a pointer to the current pixel.
unsigned int left
left of the border
unsigned int right
right of the border
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
1 channel, 1 S16 per channel
#define ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
~NEEdgeNonMaxSuppressionKernel()
Default destructor.
NEGradientKernel()
Default constructor.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
void run(const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
Information about executing thread and CPU.
void configure(ITensor *input, ITensor *output)
Initialise the kernel's source, destination and border mode.
unsigned int num_elems_processed_per_iteration
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
virtual const Strides & strides_in_bytes() const =0
The strides in bytes for accessing each dimension of the tensor.
Container for valid region of a window.
Iterator updated by execute_window_loop for each window element.
void run(const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
Describe a multidimensional execution window.
SimpleTensor< T > magnitude(const SimpleTensor< T > &gx, const SimpleTensor< T > &gy, MagnitudeType magnitude_type)
Coordinates anchor
Anchor for the start of the valid region.
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)