25 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
46 void fp16_neon_scale_nearest(
const ITensor *
src,
48 const ITensor *offsets,
49 float sampling_offset,
53 const size_t in_stride_c =
src->info()->dimension(0) +
src->info()->padding().left +
src->info()->padding().right;
54 const size_t in_stride_w =
src->info()->dimension(1) +
src->info()->padding().top +
src->info()->padding().bottom;
55 const size_t in_stride_wc = in_stride_w * in_stride_c;
56 const size_t in_dim_h =
src->info()->dimension(2);
60 const auto window_start_x =
static_cast<int32_t
>(window.x().start());
61 const auto window_end_x =
static_cast<int32_t
>(window.x().end());
62 const int window_step_x = 8;
66 Iterator out(
dst, win);
68 const uint8_t *in_ptr_start =
src->buffer() +
src->info()->offset_first_element_in_bytes();
69 const unsigned int in_stride_bytes_hwc =
src->info()->strides_in_bytes()[3];
73 [&](
const Coordinates &
id)
76 *
reinterpret_cast<const int32_t *
>(offsets->ptr_to_element(Coordinates(
id.y(),
id.z()))) * in_stride_c;
77 const auto in_hi =
static_cast<int>(
79 : std::floor((id.z() + sampling_offset) * hr));
80 const int offset_row = in_hi * in_stride_wc;
81 int32_t x = window_start_x;
82 const float16_t *in_ptr =
reinterpret_cast<const float16_t *
>(in_ptr_start + in_stride_bytes_hwc *
id[3]);
84 for (; x <= window_end_x - window_step_x; x += window_step_x)
89 for (; x < window_end_x; ++x)
91 *(
reinterpret_cast<float16_t *
>(out.ptr()) + x) = *(in_ptr +
offset + offset_row + x);
97 void fp16_neon_scale_bilinear(
const ITensor *
src,
99 const ITensor *offsets,
103 PixelValue constant_border_value,
104 float sampling_offset,
106 const Window &window)
112 Iterator out(
dst, window);
113 const int in_stride_c =
src->info()->dimension(0) +
src->info()->padding().left +
src->info()->padding().right;
114 const int in_dim_w =
src->info()->dimension(1);
115 const int in_dim_h =
src->info()->dimension(2);
116 const int in_stride_wc = in_stride_c * (in_dim_w +
src->info()->padding().top +
src->info()->padding().bottom);
120 Window win_in(window);
123 Iterator in(
src, win_in);
127 using ConstType =
typename std::conditional<std::is_same<float16_t, float16_t>::value,
half, float16_t>
::type;
129 const float16_t const_border_value =
static_cast<float16_t
>(constant_border_value.get<ConstType>());
132 [&](
const Coordinates &
id)
135 *
reinterpret_cast<const int32_t *
>(offsets->ptr_to_element(Coordinates(
id.y(),
id.z())));
136 const auto dx_val = *
reinterpret_cast<const float *
>(dx->ptr_to_element(Coordinates(
id.y(),
id.z())));
137 const auto dy_val = *
reinterpret_cast<const float *
>(dy->ptr_to_element(Coordinates(
id.y(),
id.z())));
138 const int32_t in_hi = std::floor((
id.z() + sampling_offset) * hr - sampling_offset);
139 const float16_t *in_ptr =
140 reinterpret_cast<const float16_t *
>(in.ptr()) +
offset * in_stride_c + in_hi * in_stride_wc;
143 (0 <=
offset &&
offset < in_dim_w && 0 <= in_hi && in_hi < in_dim_h) ? *in_ptr : const_border_value;
144 const auto a01 = (-1 <=
offset &&
offset < in_dim_w - 1 && 0 <= in_hi && in_hi < in_dim_h)
145 ? *(in_ptr + in_stride_c)
146 : const_border_value;
147 const auto a10 = (0 <=
offset &&
offset < in_dim_w && -1 <= in_hi && in_hi < in_dim_h - 1)
148 ? *(in_ptr + in_stride_wc)
149 : const_border_value;
150 const auto a11 = (-1 <=
offset &&
offset < in_dim_w - 1 && -1 <= in_hi && in_hi < in_dim_h - 1)
151 ? *(in_ptr + in_stride_c + in_stride_wc)
152 : const_border_value;
154 *
reinterpret_cast<float16_t *
>(out.ptr()) =
163 [&](
const Coordinates &
id)
166 *
reinterpret_cast<const int32_t *
>(offsets->ptr_to_element(Coordinates(
id.y(),
id.z())));
167 const auto dx_val = *
reinterpret_cast<const float *
>(dx->ptr_to_element(Coordinates(
id.y(),
id.z())));
168 const auto dy_val = *
reinterpret_cast<const float *
>(dy->ptr_to_element(Coordinates(
id.y(),
id.z())));
169 const int in_hi = std::floor((
id.z() + sampling_offset) * hr - sampling_offset);
171 auto clamped_w = utility::clamp<int>(
offset, 0, in_dim_w - 1);
172 auto clamped_w1 = utility::clamp<int>(
offset + 1, 0, in_dim_w - 1);
173 auto clamped_h = utility::clamp<int>(in_hi, 0, in_dim_h - 1);
174 auto clamped_h1 = utility::clamp<int>(in_hi + 1, 0, in_dim_h - 1);
176 const auto a00 = *(
reinterpret_cast<const float16_t *
>(in.ptr()) + clamped_w * in_stride_c +
177 clamped_h * in_stride_wc);
178 const auto a01 = *(
reinterpret_cast<const float16_t *
>(in.ptr()) + clamped_w1 * in_stride_c +
179 clamped_h * in_stride_wc);
180 const auto a10 = *(
reinterpret_cast<const float16_t *
>(in.ptr()) + clamped_w * in_stride_c +
181 clamped_h1 * in_stride_wc);
182 const auto a11 = *(
reinterpret_cast<const float16_t *
>(in.ptr()) + clamped_w1 * in_stride_c +
183 clamped_h1 * in_stride_wc);
185 *
reinterpret_cast<float16_t *
>(out.ptr()) =
198 #ifdef ENABLE_NCHW_KERNELS
201 const ITensor *offsets,
206 PixelValue constant_border_value,
207 float sampling_offset,
209 const Window &window)
212 arm_compute::cpu::scale_bilinear_nchw<float16_t>(
src,
dst, dx, dy, offsets, border_mode, constant_border_value,
213 sampling_offset, align_corners, window);
218 const ITensor *offsets,
223 PixelValue constant_border_value,
224 float sampling_offset,
226 const Window &window)
230 arm_compute::cpu::scale_nearest_nchw<float16_t>(
src,
dst, dx, dy, offsets, constant_border_value, sampling_offset,
231 align_corners, window);
233 #endif // ENABLE_NCHW_KERNELS
234 void fp16_neon_scale(
const ITensor *
src,
236 const ITensor *offsets,
241 PixelValue constant_border_value,
242 float sampling_offset,
244 const Window &window)
248 fp16_neon_scale_bilinear(
src,
dst, offsets, dx, dy, border_mode, constant_border_value, sampling_offset,
249 align_corners, window);
253 fp16_neon_scale_nearest(
src,
dst, offsets, sampling_offset, align_corners, window);
259 const ITensor *offsets,
264 PixelValue constant_border_value,
265 float sampling_offset,
267 const Window &window)
269 arm_compute::cpu::common_neon_scale<float16_t>(
src,
dst, offsets, dx, dy, policy, border_mode,
270 constant_border_value, sampling_offset, align_corners, window);