24 #ifndef SRC_CORE_NEON_KERNELS_SCALE_LIST_H 25 #define SRC_CORE_NEON_KERNELS_SCALE_LIST_H 41 #define DECLARE_SCALE_KERNEL(func_name) \ 42 void func_name(const ITensor *src, ITensor *dst, const ITensor *offsets, const ITensor *dx, const ITensor *dy, \ 43 InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, float sampling_offset, \ 44 bool align_corners, const Window &window) 49 #undef DECLARE_SCALE_KERNEL 53 bool align_corners,
const Window &window)
57 const size_t in_stride_wc = in_stride_w * in_stride_c;
62 const auto window_start_x =
static_cast<int32_t
>(window.
x().
start());
63 const auto window_end_x =
static_cast<int32_t
>(window.
x().
end());
64 const int window_step_x = 16 /
sizeof(T);
77 const int offset_row = in_hi * in_stride_wc;
78 int32_t x = window_start_x;
79 const T *in_ptr =
reinterpret_cast<const T *
>(in_ptr_start + in_stride_bytes_hwc *
id[3]);
81 for(; x <= window_end_x - window_step_x; x += window_step_x)
86 for(; x < window_end_x; ++x)
88 *(
reinterpret_cast<T *
>(out.
ptr()) + x) = *(in_ptr + offset + offset_row + x);
97 bool align_corners,
const Window &window)
117 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 118 using ConstType =
typename std::conditional<std::is_same<T, float16_t>::value,
half, T>
::type;
122 const T const_border_value =
static_cast<T
>(constant_border_value.
get<ConstType>());
128 const int32_t in_hi = std::floor((
id.z() + sampling_offset) * hr - sampling_offset);
129 const T *in_ptr =
reinterpret_cast<const T *
>(in.
ptr()) +
offset * in_stride_c + in_hi * in_stride_wc;
131 const auto a00 = (0 <=
offset &&
offset < in_dim_w && 0 <= in_hi && in_hi < in_dim_h) ? *in_ptr : const_border_value;
132 const auto a01 = (-1 <=
offset &&
offset < in_dim_w - 1 && 0 <= in_hi && in_hi < in_dim_h) ? *(in_ptr + in_stride_c) : const_border_value;
133 const auto a10 = (0 <=
offset &&
offset < in_dim_w && -1 <= in_hi && in_hi < in_dim_h - 1) ? *(in_ptr + in_stride_wc) : const_border_value;
134 const auto a11 = (-1 <=
offset &&
offset < in_dim_w - 1 && -1 <= in_hi && in_hi < in_dim_h - 1) ? *(in_ptr + in_stride_c + in_stride_wc) : const_border_value;
147 const int in_hi = std::floor((
id.z() + sampling_offset) * hr - sampling_offset);
149 auto clamped_w = utility::clamp<int>(
offset, 0, in_dim_w - 1);
150 auto clamped_w1 = utility::clamp<int>(offset + 1, 0, in_dim_w - 1);
151 auto clamped_h = utility::clamp<int>(in_hi, 0, in_dim_h - 1);
152 auto clamped_h1 = utility::clamp<int>(in_hi + 1, 0, in_dim_h - 1);
154 const auto a00 = *(
reinterpret_cast<const T *
>(in.
ptr()) + clamped_w * in_stride_c + clamped_h * in_stride_wc);
155 const auto a01 = *(
reinterpret_cast<const T *
>(in.
ptr()) + clamped_w1 * in_stride_c + clamped_h * in_stride_wc);
156 const auto a10 = *(
reinterpret_cast<const T *
>(in.
ptr()) + clamped_w * in_stride_c + clamped_h1 * in_stride_wc);
157 const auto a11 = *(
reinterpret_cast<const T *
>(in.
ptr()) + clamped_w1 * in_stride_c + clamped_h1 * in_stride_wc);
169 template <
typename T>
172 bool align_corners,
const Window &window)
176 bilinear_neon_scale<T>(
src,
dst, offsets, dx, dy, border_mode, constant_border_value, sampling_offset, align_corners, window);
180 nearest_neon_scale<T>(
src,
dst, offsets, sampling_offset, align_corners, window);
BorderMode
Methods available to handle borders.
__global uchar * offset(const Image *img, int x, int y)
Get the pointer position of a Image.
unsigned int top
top of the border
Class describing the value of a pixel for any image format.
InterpolationPolicy
Interpolation method.
uint8_t * ptr_to_element(const Coordinates &id) const
Return a pointer to the element at the passed coordinates.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
void get(uint8_t &v) const
Interpret the pixel value as a U8.
uint8x16_t vloadq(const uint8_t *ptr)
half_float::half half
16-bit floating point type
Output values are defined by bilinear interpolation between the pixels.
Describe one of the image's dimensions with a start, end and step.
unsigned int bottom
bottom of the border
void nearest_neon_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, float sampling_offset, bool align_corners, const Window &window)
T round_half_away_from_zero(T value)
Round floating-point value with half value rounding away from zero.
Output values are defined to match the source pixel whose center is nearest to the sample position...
decltype(strategy::transforms) typedef type
Interface for Neon tensor.
SimpleTensor< float > src
Copyright (c) 2017-2021 Arm Limited.
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
virtual uint8_t * buffer() const =0
Interface to be implemented by the child class to return a pointer to CPU memory. ...
void qasymm8_signed_neon_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, const ITensor *dx, const ITensor *dy, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, float sampling_offset, bool align_corners, const Window &window)
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
constexpr uint8_t * ptr() const
Return a pointer to the current pixel.
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
virtual PaddingSize padding() const =0
Padding of tensor.
unsigned int left
left of the border
unsigned int right
right of the border
void bilinear_neon_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, const ITensor *dx, const ITensor *dy, BorderMode border_mode, PixelValue constant_border_value, float sampling_offset, bool align_corners, const Window &window)
virtual size_t offset_first_element_in_bytes() const =0
The offset from the beginning of the memory allocation to the first element of the tensor...
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
Pixels outside the image are assumed to have the same value as the closest image pixel.
void vstore(uint8_t *ptr, uint8x8_t val)
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
#define DECLARE_SCALE_KERNEL(func_name)
void common_neon_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, const ITensor *dx, const ITensor *dy, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, float sampling_offset, bool align_corners, const Window &window)
float calculate_resize_ratio(size_t input_size, size_t output_size, bool align_corners=false)
Returns resize ratio between input and output with consideration of aligned corners.
Includes all wrapper headers at once.
virtual const Strides & strides_in_bytes() const =0
The strides in bytes for accessing each dimension of the tensor.
constexpr int end() const
Return the end of the dimension.
Iterator updated by execute_window_loop for each window element.
float delta_bilinear(float a00, float a01, float a10, float a11, float dx_val, float dy_val)
Computes bilinear interpolation using the top-left, top-right, bottom-left, bottom-right pixels and t...
constexpr int start() const
Return the start of the dimension.
void qasymm8_neon_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, const ITensor *dx, const ITensor *dy, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, float sampling_offset, bool align_corners, const Window &window)
Describe a multidimensional execution window.
constexpr const Dimension & x() const
Alias to access the first dimension of the window.