24.02.1
|
Go to the documentation of this file.
45 #ifdef ENABLE_NCHW_KERNELS
46 void scale_area_nchw_u8(
const ITensor *
src,
48 const ITensor *offsets,
53 PixelValue constant_border_value,
54 float sampling_offset,
58 ARM_COMPUTE_UNUSED(dx, dy, offsets, policy, border_mode, constant_border_value, sampling_offset, align_corners);
59 using namespace scale_helpers;
65 Window win_in(window);
70 Iterator src_i(
src, win_in);
71 Iterator dst_i(
dst, window);
77 const auto w =
src->info()->dimension(0);
78 const auto h =
src->info()->dimension(1);
79 const size_t in_stride =
src->info()->strides_in_bytes()[1];
83 [&](
const Coordinates &
id)
85 const auto in_ptr =
reinterpret_cast<const uint8_t *
>(src_i.ptr());
87 uint8x8_t tmp0 = vdup_n_u8(0);
97 uint8x8_t tmp1 = vdup_n_u8(0);
100 tmp1 = vset_lane_u8(
pixel_area_c1u8_clamp(in_ptr, in_stride,
w, h, wr, hr,
id.x() + 10,
id.y()), tmp1, 2);
101 tmp1 = vset_lane_u8(
pixel_area_c1u8_clamp(in_ptr, in_stride,
w, h, wr, hr,
id.x() + 11,
id.y()), tmp1, 3);
102 tmp1 = vset_lane_u8(
pixel_area_c1u8_clamp(in_ptr, in_stride,
w, h, wr, hr,
id.x() + 12,
id.y()), tmp1, 4);
103 tmp1 = vset_lane_u8(
pixel_area_c1u8_clamp(in_ptr, in_stride,
w, h, wr, hr,
id.x() + 13,
id.y()), tmp1, 5);
104 tmp1 = vset_lane_u8(
pixel_area_c1u8_clamp(in_ptr, in_stride,
w, h, wr, hr,
id.x() + 14,
id.y()), tmp1, 6);
105 tmp1 = vset_lane_u8(
pixel_area_c1u8_clamp(in_ptr, in_stride,
w, h, wr, hr,
id.x() + 15,
id.y()), tmp1, 7);
107 vst1q_u8(dst_i.ptr(), vcombine_u8(tmp0, tmp1));
112 template <
typename T>
113 void scale_bilinear_qasymm_nchw(
const ITensor *
src,
115 const ITensor *offsets,
120 PixelValue constant_border_value,
121 float sampling_offset,
123 const Window &window)
139 Window win_in(window);
140 win_in.set(
idx_width, Window::Dimension(0, 0, 0));
141 win_in.set(
idx_height, Window::Dimension(0, 0, 0));
143 for (
size_t d =
Window::DimZ; d < offsets->info()->num_dimensions(); ++d)
145 win_off.set(d, Window::Dimension(0, 0, 0));
148 Iterator src_i(
src, win_in);
149 Iterator dst_i(
dst, window);
151 const int32_t in_dim_w =
src->info()->dimension(
idx_width);
153 const int32_t stride_w =
src->info()->strides_in_bytes()[
idx_width];
154 const int32_t stride_h =
src->info()->strides_in_bytes()[
idx_height];
156 const UniformQuantizationInfo iq_info =
src->info()->quantization_info().uniform();
157 const UniformQuantizationInfo oq_info =
dst->info()->quantization_info().uniform();
161 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
162 using ConstType =
typename std::conditional<std::is_same<T, float16_t>::value,
half, T>
::type;
166 const T const_border_value =
static_cast<T
>(constant_border_value.get<ConstType>());
169 [&](
const Coordinates &
id)
171 const int32_t index_h = std::floor((
id[
idx_height] + sampling_offset) * hr - sampling_offset);
172 const int32_t index_w = *(
reinterpret_cast<const int32_t *
>(
175 *(
reinterpret_cast<const float *
>(dx->ptr_to_element(Coordinates(
id[
idx_width],
id[
idx_height]))));
177 *(
reinterpret_cast<const float *
>(dy->ptr_to_element(Coordinates(
id[
idx_width],
id[
idx_height]))));
178 const auto pixel_row_ptr =
reinterpret_cast<const T *
>(src_i.ptr());
180 const auto a00 = (0 <= index_w && index_w < in_dim_w && 0 <= index_h && index_h < in_dim_h)
181 ? (*(pixel_row_ptr + index_w * stride_w + index_h * stride_h))
182 : const_border_value;
183 const auto a01 = (-1 <= index_w && index_w < in_dim_w - 1 && 0 <= index_h && index_h < in_dim_h)
184 ? (*(pixel_row_ptr + (index_w + 1) * stride_w + index_h * stride_h))
185 : const_border_value;
186 const auto a10 = (0 <= index_w && index_w < in_dim_w && -1 <= index_h && index_h < in_dim_h - 1)
187 ? (*(pixel_row_ptr + index_w * stride_w + (index_h + 1) * stride_h))
188 : const_border_value;
189 const auto a11 = (-1 <= index_w && index_w < in_dim_w - 1 && -1 <= index_h && index_h < in_dim_h - 1)
190 ? (*(pixel_row_ptr + (index_w + 1) * stride_w + (index_h + 1) * stride_h))
191 : const_border_value;
206 [&](
const Coordinates &
id)
208 const int index_h = std::floor((
id[
idx_height] + sampling_offset) * hr - sampling_offset);
209 const int32_t index_w = *(
reinterpret_cast<const int32_t *
>(
212 *(
reinterpret_cast<const float *
>(dx->ptr_to_element(Coordinates(
id[
idx_width],
id[
idx_height]))));
214 *(
reinterpret_cast<const float *
>(dy->ptr_to_element(Coordinates(
id[
idx_width],
id[
idx_height]))));
215 const auto pixel_row_ptr =
reinterpret_cast<const T *
>(src_i.ptr());
217 auto clamped_w = utility::clamp<int>(index_w, 0, in_dim_w - 1);
218 auto clamped_w1 = utility::clamp<int>(index_w + 1, 0, in_dim_w - 1);
219 auto clamped_h = utility::clamp<int>(index_h, 0, in_dim_h - 1);
220 auto clamped_h1 = utility::clamp<int>(index_h + 1, 0, in_dim_h - 1);
222 const auto a00 = *(pixel_row_ptr + clamped_w * stride_w + clamped_h * stride_h);
223 const auto a01 = *(pixel_row_ptr + clamped_w1 * stride_w + clamped_h * stride_h);
224 const auto a10 = *(pixel_row_ptr + clamped_w * stride_w + clamped_h1 * stride_h);
225 const auto a11 = *(pixel_row_ptr + clamped_w1 * stride_w + clamped_h1 * stride_h);
243 template <
typename T>
246 const ITensor *offsets,
251 PixelValue constant_border_value,
252 float sampling_offset,
254 const Window &window)
257 arm_compute::cpu::scale_bilinear_nchw<T>(
src,
dst, dx, dy, offsets, border_mode, constant_border_value,
258 sampling_offset, align_corners, window);
261 template <
typename T>
262 void scale_nearest_nchw(
const ITensor *
src,
264 const ITensor *offsets,
269 PixelValue constant_border_value,
270 float sampling_offset,
272 const Window &window)
275 arm_compute::cpu::scale_nearest_nchw<T>(
src,
dst, dx, dy, offsets, constant_border_value, sampling_offset,
276 align_corners, window);
279 #endif // ENABLE_NCHW_KERNELS
285 static const std::vector<CpuScaleKernel::ScaleKernel> available_kernels = {
287 [](
const ScaleKernelDataTypeISASelectorData &data)
289 return data.dt ==
DataType::F16 && data.isa.sve && data.isa.fp16 &&
294 [](
const ScaleKernelDataTypeISASelectorData &data)
298 [](
const ScaleKernelDataTypeISASelectorData &data) {
304 [](
const ScaleKernelDataTypeISASelectorData &data)
311 [](
const ScaleKernelDataTypeISASelectorData &data)
315 [](
const ScaleKernelDataTypeISASelectorData &data)
319 [](
const ScaleKernelDataTypeISASelectorData &data) {
return data.dt ==
DataType::F16 && data.isa.fp16; },
321 {
"neon_fp32_scale", [](
const ScaleKernelDataTypeISASelectorData &data) {
return data.dt ==
DataType::F32; },
323 {
"neon_qu8_scale", [](
const ScaleKernelDataTypeISASelectorData &data) {
return data.dt ==
DataType::QASYMM8; },
328 {
"neon_u8_scale", [](
const ScaleKernelDataTypeISASelectorData &data) {
return data.dt ==
DataType::U8; },
330 {
"neon_s8_scale", [](
const ScaleKernelDataTypeISASelectorData &data) {
return data.dt ==
DataType::S8; },
332 {
"neon_s16_scale", [](
const ScaleKernelDataTypeISASelectorData &data) {
return data.dt ==
DataType::S16; },
337 const ITensorInfo *dx,
338 const ITensorInfo *dy,
339 const ITensorInfo *offsets,
341 const ScaleKernelInfo &
info)
359 const auto output_width =
dst->dimension(width_index);
360 const auto output_height =
dst->dimension(height_index);
377 if (dx !=
nullptr && dy !=
nullptr)
413 _run_method = uk->ukernel;
414 _name = std::string(
"CpuScaleKernel")
432 _sampling_offset = 0.5f;
451 #ifdef ENABLE_NCHW_KERNELS
455 std::string function_to_call(
"scale_");
460 const static std::map<std::string, ScaleKernelPtr> map_nchw_function = {
461 {
"scale_U8_NCHW_AREA_CONSTANT", &arm_compute::cpu::scale_area_nchw_u8},
462 {
"scale_U8_NCHW_AREA_CONSTANT", &arm_compute::cpu::scale_area_nchw_u8},
463 {
"scale_U8_NCHW_BILINEAR", &arm_compute::cpu::scale_bilinear_nchw<uint8_t>},
464 {
"scale_U8_NCHW_NEAREST_NEIGHBOUR", &arm_compute::cpu::scale_nearest_nchw<uint8_t>},
465 {
"scale_QASYMM8_NCHW_BILINEAR", &arm_compute::cpu::scale_bilinear_qasymm_nchw<uint8_t>},
466 {
"scale_QASYMM8_NCHW_NEAREST_NEIGHBOUR", &arm_compute::cpu::scale_nearest_nchw<uint8_t>},
467 {
"scale_QASYMM8_SIGNED_NCHW_BILINEAR", &arm_compute::cpu::scale_bilinear_qasymm_nchw<int8_t>},
468 {
"scale_QASYMM8_SIGNED_NCHW_NEAREST_NEIGHBOUR", &arm_compute::cpu::scale_nearest_nchw<int8_t>},
469 {
"scale_S16_NCHW_BILINEAR", &arm_compute::cpu::scale_bilinear_nchw<int16_t>},
470 {
"scale_S16_NCHW_NEAREST_NEIGHBOUR", &arm_compute::cpu::scale_nearest_nchw<int16_t>},
473 {
"scale_F32_NCHW_BILINEAR", &arm_compute::cpu::scale_bilinear_nchw<float>},
474 {
"scale_F32_NCHW_NEAREST_NEIGHBOUR", &arm_compute::cpu::scale_nearest_nchw<float>},
476 auto it = map_nchw_function.find(function_to_call);
477 if (it != map_nchw_function.end())
479 _nchw_func = it->second;
482 #endif // ENABLE_NCHW_KERNELS
486 ICpuKernel::configure(win);
516 _nchw_func(
src,
dst, offsets, dx, dy, _policy, _border_mode, _constant_border_value, _sampling_offset,
521 _run_method(
src,
dst, offsets, dx, dy, _policy, _border_mode, _constant_border_value, _sampling_offset,
528 return _name.c_str();
533 return available_kernels;
Class to describe a number of elements in each dimension.
@ NCHW
Num samples, channels, height, width.
@ CONSTANT
Pixels outside the image are assumed to have a constant value.
float delta_bilinear(float a00, float a01, float a10, float a11, float dx_val, float dy_val)
Computes bilinear interpolation using the top-left, top-right, bottom-left, bottom-right pixels and t...
SimpleTensor< float > src
decltype(strategy::transforms) typedef type
Class describing the value of a pixel for any image format.
#define ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
const char * name() const override
Name of the kernel.
DataLayout
[DataLayout enum definition]
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
@ NHWC
Num samples, height, width, channels.
@ UNDEFINED
Borders are left undefined.
@ TOP_LEFT
Samples are taken at pixel top left corner.
@ AREA
Output values are determined by averaging the source pixels whose areas fall under the area of the de...
@ QASYMM8
quantized, asymmetric fixed-point 8-bit number unsigned
void u8_sve_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, const ITensor *dx, const ITensor *dy, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, float sampling_offset, bool align_corners, const Window &window)
Status validate_arguments(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const PadStrideInfo &conv_info)
__kernel void scale_bilinear_nchw(__global uchar *in_ptr, uint in_stride_x, uint in_step_x, uint in_stride_y, uint in_step_y, uint in_offset_first_element_in_bytes, __global uchar *out_ptr, uint out_stride_x, uint out_step_x, uint out_stride_y, uint out_step_y, uint out_offset_first_element_in_bytes)
Performs an affine transformation on an image interpolating with the BILINEAR method.
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
void fp16_sve_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, const ITensor *dx, const ITensor *dy, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, float sampling_offset, bool align_corners, const Window &window)
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
#define REGISTER_QASYMM8_SIGNED_NEON(func_name)
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
InterpolationPolicy
Interpolation method.
static CPUInfo & get()
Access the KernelLibrary singleton.
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
#define REGISTER_FP16_NEON(func_name)
const std::string & string_from_data_type(DataType dt)
Convert a data type identity into a string.
#define REGISTER_QASYMM8_NEON(func_name)
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
bool use_padding
Indication of using padding.
constexpr auto data_layout
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
DataLayout data_layout
Data layout to use.
#define REGISTER_FP32_NEON(func_name)
const std::string & string_from_interpolation_policy(InterpolationPolicy policy)
Translates a given interpolation policy to a string.
#define REGISTER_QASYMM8_SVE(func_name)
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
half_float::half half
16-bit floating point type
#define REGISTER_FP32_SVE(func_name)
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
static QUANTIZED_TYPE quantize(float value, const UniformQuantizationInfo &qinfo)
Quantize a value given a 8-bit asymmetric quantization scheme.
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
void fp16_bilinear_neon_scale_nchw(const ITensor *src, ITensor *dst, const ITensor *offsets, const ITensor *dx, const ITensor *dy, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, float sampling_offset, bool align_corners, const Window &window)
#define ARM_COMPUTE_ERROR_THROW_ON(status)
BorderMode border_mode
Border mode policy.
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&...iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
#define REGISTER_QASYMM8_SIGNED_SVE(func_name)
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
@ U8
unsigned 8-bit number
@ S16
signed 16-bit number
void u8_neon_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, const ITensor *dx, const ITensor *dy, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, float sampling_offset, bool align_corners, const Window &window)
void qasymm8_sve_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, const ITensor *dx, const ITensor *dy, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, float sampling_offset, bool align_corners, const Window &window)
@ QASYMM8_SIGNED
quantized, asymmetric fixed-point 8-bit number signed
bool align_corners
Align corners of input and output.
@ REPLICATE
Pixels outside the image are assumed to have the same value as the closest image pixel.
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
void qasymm8_signed_sve_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, const ITensor *dx, const ITensor *dy, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, float sampling_offset, bool align_corners, const Window &window)
#define REGISTER_INTEGER_NEON(func_name)
static float dequantize(QUANTIZED_TYPE value, const UniformQuantizationInfo &qinfo)
Dequantize a value given a 8-bit asymmetric quantization scheme.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
static const auto * get_implementation(const SelectorType &selector, KernelSelectionType selection_type=KernelSelectionType::Supported)
Micro-kernel selector.
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
@ BILINEAR
Output values are defined by bilinear interpolation between the pixels.
uint8_t pixel_area_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float wr, float hr, int x, int y)
Return the pixel at (x,y) using area interpolation by clamping when out of borders.
const Window & window() const
The maximum window the kernel can be executed on.
const std::string & string_from_data_layout(DataLayout dl)
Convert a data layout identity into a string.
Information about executing thread and CPU.
void s16_neon_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, const ITensor *dx, const ITensor *dy, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, float sampling_offset, bool align_corners, const Window &window)
size_t get_data_layout_dimension_index(const DataLayout &data_layout, const DataLayoutDimension &data_layout_dimension)
Get the index of the given dimension.
#define REGISTER_INTEGER_SVE(func_name)
void fp32_sve_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, const ITensor *dx, const ITensor *dy, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, float sampling_offset, bool align_corners, const Window &window)
void qasymm8_neon_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, const ITensor *dx, const ITensor *dy, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, float sampling_offset, bool align_corners, const Window &window)
PixelValue constant_border_value
Constant value to use for constant border mode policy.
static Status validate(const ITensorInfo *src, const ITensorInfo *dx, const ITensorInfo *dy, const ITensorInfo *offsets, ITensorInfo *dst, const ScaleKernelInfo &info)
Static function to check if given info will lead to a valid configuration.
Describe a multidimensional execution window.
bool is_align_corners_allowed_sampling_policy(SamplingPolicy sampling_policy)
Returns if aligned corners are allowed for the given sampling policy.
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Copyright (c) 2017-2024 Arm Limited.
#define REGISTER_FP16_SVE(func_name)
@ F16
16-bit floating-point number
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
@ S32
signed 32-bit number
float calculate_resize_ratio(size_t input_size, size_t output_size, bool align_corners=false)
Returns resize ratio between input and output with consideration of aligned corners.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
@ NEAREST_NEIGHBOR
Output values are defined to match the source pixel whose center is nearest to the sample position.
void qasymm8_signed_neon_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, const ITensor *dx, const ITensor *dy, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, float sampling_offset, bool align_corners, const Window &window)
@ CENTER
Samples are taken at pixel center.
BorderMode
Methods available to handle borders.
static const std::vector< ScaleKernel > & get_available_kernels()
void configure(const ITensorInfo *src, const ITensorInfo *dx, const ITensorInfo *dy, const ITensorInfo *offsets, ITensorInfo *dst, const ScaleKernelInfo &info)
Initialise the kernel's inputs, output and interpolation policy.
@ UNKNOWN
Unknown data layout.
Store the tensor's metadata.
@ F32
32-bit floating-point number
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
void fp16_nearest_neon_scale_nchw(const ITensor *src, ITensor *dst, const ITensor *offsets, const ITensor *dx, const ITensor *dy, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, float sampling_offset, bool align_corners, const Window &window)
void fp16_common_neon_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, const ITensor *dx, const ITensor *dy, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, float sampling_offset, bool align_corners, const Window &window)
SamplingPolicy sampling_policy
Sampling policy used by the interpolation.
void s16_sve_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, const ITensor *dx, const ITensor *dy, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, float sampling_offset, bool align_corners, const Window &window)
InterpolationPolicy interpolation_policy
Interpolation type to use.
void s8_neon_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, const ITensor *dx, const ITensor *dy, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, float sampling_offset, bool align_corners, const Window &window)