25 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
45 void fp16_sve_scale_nearest(
const ITensor *
src,
47 const ITensor *offsets,
48 float sampling_offset,
52 const size_t in_stride_c =
src->info()->dimension(0) +
src->info()->padding().left +
src->info()->padding().right;
53 const size_t in_stride_w =
src->info()->dimension(1) +
src->info()->padding().top +
src->info()->padding().bottom;
54 const size_t in_stride_wc = in_stride_w * in_stride_c;
55 const size_t in_dim_h =
src->info()->dimension(2);
59 const auto window_start_x =
static_cast<int32_t
>(window.x().start());
60 const auto window_end_x =
static_cast<int32_t
>(window.x().end());
64 Iterator out(
dst, win);
66 const uint8_t *in_ptr_start =
src->buffer() +
src->info()->offset_first_element_in_bytes();
67 const unsigned int in_stride_bytes_hwc =
src->info()->strides_in_bytes()[3];
71 [&](
const Coordinates &
id)
74 *
reinterpret_cast<const int32_t *
>(offsets->ptr_to_element(Coordinates(
id.y(),
id.z()))) * in_stride_c;
75 const auto in_hi =
static_cast<int>(
77 : std::floor((id.z() + sampling_offset) * hr));
78 const int offset_row = in_hi * in_stride_wc;
79 const auto in_ptr =
reinterpret_cast<const float16_t *
>(in_ptr_start + in_stride_bytes_hwc *
id[3]);
80 const auto out_ptr =
reinterpret_cast<float16_t *
>(out.ptr());
83 int x = window_start_x;
84 svbool_t pg = svwhilelt_b16(x, window_end_x);
88 svst1_f16(pg, out_ptr + x, svld1_f16(pg, in_ptr +
offset + offset_row + x));
91 pg = svwhilelt_b16(x, window_end_x);
92 }
while (svptest_any(svptrue_b16(), pg));
101 const ITensor *offsets,
106 PixelValue constant_border_value,
107 float sampling_offset,
109 const Window &window)
114 fp16_sve_scale_nearest(
src,
dst, offsets, sampling_offset, align_corners, window);