36 #ifdef SAMPLING_POLICY_TOP_LEFT 37 const float4 in_x_coords = (float4)(coord.s0, 1 + coord.s0, 2 + coord.s0, 3 + coord.s0);
38 const float4 new_x = in_x_coords * (float4)(scale.s0);
39 const float4 new_y = (float4)(coord.s1 * scale.s1);
40 return (float8)(new_x.s0, new_y.s0, new_x.s1, new_y.s1, new_x.s2, new_y.s2, new_x.s3, new_y.s3);
41 #elif SAMPLING_POLICY_CENTER 42 const float4 in_x_coords = (float4)(coord.s0, 1 + coord.s0, 2 + coord.s0, 3 + coord.s0);
43 const float4 new_x = (in_x_coords + ((float4)(0.5f))) * (float4)(scale.s0);
44 const float4 new_y = (float4)((coord.s1 + 0.5f) * scale.s1);
45 return (float8)(new_x.s0, new_y.s0, new_x.s1, new_y.s1, new_x.s2, new_y.s2, new_x.s3, new_y.s3);
47 #error("Unsupported sampling policy"); 60 const float4 in_x_coords = (float4)(coord.s0, 1 + coord.s0, 2 + coord.s0, 3 + coord.s0);
61 #ifdef SAMPLING_POLICY_TOP_LEFT 62 const float4 new_x = in_x_coords * (float4)(scale.s0);
63 const float4 new_y = (float4)(coord.s1 * scale.s1);
64 return (float8)(new_x.s0, new_y.s0, new_x.s1, new_y.s1, new_x.s2, new_y.s2, new_x.s3, new_y.s3);
65 #elif SAMPLING_POLICY_CENTER 66 const float4 new_x = (in_x_coords + ((float4)(0.5f))) * (float4)(scale.s0) - (float4)(0.5f);
67 const float4 new_y = (float4)((coord.s1 + 0.5f) * scale.s1 - 0.5f);
68 return (float8)(new_x.s0, new_y.s0, new_x.s1, new_y.s1, new_x.s2, new_y.s2, new_x.s3, new_y.s3);
70 #error("Unsupported sampling policy"); 95 const int x = get_global_id(0);
96 const int y = get_global_id(1);
100 transformed =
round(transformed);
101 #endif // ALIGN_CORNERS 104 cond[0].v =
CONVERT(((transformed.even < 0) || (transformed.even >= (
int)SRC_WIDTH)) || ((transformed.odd < 0) || (transformed.odd >= (
int)SRC_HEIGHT)),
SELECT_VEC_DATA_TYPE(DATA_TYPE, 4));
106 TILE(
int, 1, 4, in_x);
107 TILE(
int, 1, 4, in_y);
108 in_x[0].v = convert_int4(
clamp(transformed.even, 0.f, SRC_WIDTH - 1.f));
109 in_y[0].v = convert_int4(
clamp(transformed.odd, 0.f, SRC_HEIGHT - 1.f));
111 TILE(DATA_TYPE, 1, VEC_SIZE, out_vals);
114 out_vals[0].s[i] =
select(*((__global DATA_TYPE *)(in_ptr + in_offset_first_element_in_bytes + in_x[0].s[i] *
sizeof(DATA_TYPE) + in_y[0].s[i] * in_stride_y)), (DATA_TYPE)CONSTANT_VALUE, cond[0].s[i]);
117 __global uchar *out_addr = out_ptr + out_offset_first_element_in_bytes + x * out_step_x + y * out_stride_y;
119 if(x == get_global_size(0) - 1)
123 (out_vals[0].s[0], 0, (__global DATA_TYPE *)out_addr);
124 #else // VEC_SIZE == 1 126 (out_vals[0].v, 0, (__global DATA_TYPE *)out_addr);
127 #endif // VEC_SIZE == 1 133 (out_vals[0].s[0], 0, (__global DATA_TYPE *)out_addr);
134 #else // VEC_SIZE == 1 136 (out_vals[0].v, 0, (__global DATA_TYPE *)out_addr);
137 #endif // VEC_SIZE == 1 162 const int x = get_global_id(0);
163 const int y = get_global_id(1);
165 TILE(
float, 1, 8, trans_coords);
166 TILE(
float, 1, 8, floor_coords);
167 TILE(
int, 1, 16, in_x);
168 TILE(
int, 1, 16, in_y);
171 floor_coords[0].v = floor(trans_coords[0].v);
177 in_x[0].s[i * 4 + j] = floor_coords[0].s[i * 2 + 0] + (j % 2);
178 in_y[0].s[i * 4 + j] = floor_coords[0].s[i * 2 + 1] + (j > 1);
182 #if defined(BORDER_MODE_CONSTANT) 184 cond[0].v =
CONVERT(((in_x[0].v < 0) || (in_x[0].v >= (
int)SRC_WIDTH)) || ((in_y[0].v < 0) || (in_y[0].v >= (
int)SRC_HEIGHT)),
SELECT_VEC_DATA_TYPE(DATA_TYPE, 16));
185 #endif // defined(BORDER_MODE_CONSTANT) 187 in_x[0].v =
clamp(in_x[0].v, 0, (int16)((
int)SRC_WIDTH - 1));
188 in_y[0].v =
clamp(in_y[0].v, 0, (int16)((
int)SRC_HEIGHT - 1));
190 TILE(DATA_TYPE, 1, 16, in_vals);
193 #if defined(BORDER_MODE_CONSTANT) 196 in_vals[0].s[i] =
select(*((__global DATA_TYPE *)(in_ptr + in_offset_first_element_in_bytes + in_x[0].s[i] *
sizeof(DATA_TYPE) + in_y[0].s[i] * (
int)in_stride_y)), (DATA_TYPE)CONSTANT_VALUE, cond[0].s[i]);
201 in_vals[0].s[i] = *((__global DATA_TYPE *)(in_ptr + in_offset_first_element_in_bytes + in_x[0].s[i] *
sizeof(DATA_TYPE) + in_y[0].s[i] * (int)in_stride_y));
205 TILE(
float, 1, 8, a);
206 TILE(
float, 1, 8,
b);
208 a[0].v = trans_coords[0].v - floor_coords[0].v;
209 b[0].v = ((float8)(1.f)) - a[0].v;
211 #if defined(OFFSET) && defined(SCALE) 212 TILE(
float, 1, 16, in_vals_f32);
213 TILE(
float, 1, 4, out_vals_f32);
215 in_vals_f32[0].v = convert_float16(convert_int16(in_vals[0].v) - (int16)OFFSET) * (float16)SCALE;
223 out_vals_f32[0].s[i] = (in_vals_f32[0].s[i * 4 + 0] *
b[0].s[i * 2] *
b[0].s[i * 2 + 1]) + (in_vals_f32[0].s[i * 4 + 1] * a[0].s[i * 2] *
b[0].s[i * 2 + 1]) + (in_vals_f32[0].s[i * 4 + 2] *
b[0].s[i * 2] * a[0].s[i * 2 + 1]) + (in_vals_f32[0].s[i * 4 + 3] * a[0].s[i * 2] * a[0].s[i * 2 + 1]);
226 TILE(DATA_TYPE, 1, 4, out_vals_4);
227 TILE(DATA_TYPE, 1, VEC_SIZE, out_vals);
229 out_vals_4[0].v =
CONVERT_SAT(convert_int4_sat_rtp(out_vals_f32[0].v / (
float)SCALE) + OFFSET,
VEC_DATA_TYPE(DATA_TYPE, 4));
233 out_vals[0].s[i] = out_vals_4[0].s[i];
237 TILE(DATA_TYPE, 1, VEC_SIZE, out_vals);
245 out_vals[0].s[i] = (in_vals[0].s[i * 4 + 0] *
b[0].s[i * 2] *
b[0].s[i * 2 + 1]) + (in_vals[0].s[i * 4 + 1] * a[0].s[i * 2] *
b[0].s[i * 2 + 1]) + (in_vals[0].s[i * 4 + 2] *
b[0].s[i * 2] * a[0].s[i * 2 + 1]) + (in_vals[0].s[i * 4 + 3] * a[0].s[i * 2] * a[0].s[i * 2 + 1]);
249 __global uchar *out_addr = out_ptr + out_offset_first_element_in_bytes + x * out_step_x + y * out_stride_y;
251 if(x == get_global_size(0) - 1)
255 (out_vals[0].s[0], 0, (__global DATA_TYPE *)out_addr);
256 #else // VEC_SIZE == 1 258 (out_vals[0].v, 0, (__global DATA_TYPE *)out_addr);
259 #endif // VEC_SIZE == 1 265 (out_vals[0].s[0], 0, (__global DATA_TYPE *)out_addr);
266 #else // VEC_SIZE == 1 268 (out_vals[0].v, 0, (__global DATA_TYPE *)out_addr);
269 #endif // VEC_SIZE == 1
#define VSTORE_PARTIAL(size, store_size)
#define LOOP_UNROLLING(type, idx, start, step, num, macro)
#define IMAGE_DECLARATION(name)
const float8 transform_bilinear(const float2 coord, const float2 scale)
Transforms four 2D coordinates.
__kernel void scale_bilinear_nchw(__global uchar *in_ptr, uint in_stride_x, uint in_step_x, uint in_stride_y, uint in_step_y, uint in_offset_first_element_in_bytes, __global uchar *out_ptr, uint out_stride_x, uint out_step_x, uint out_stride_y, uint out_step_y, uint out_offset_first_element_in_bytes)
Performs an affine transformation on an image interpolating with the BILINEAR method.
const float8 transform_nearest(const float2 coord, const float2 scale)
Transforms four 2D coordinates.
DataType clamp(const DataType &n, const DataType &lower=std::numeric_limits< RangeType >::lowest(), const DataType &upper=std::numeric_limits< RangeType >::max())
Performs clamping among a lower and upper value.
#define SELECT_DATA_TYPE(type)
#define CONVERT_SAT(x, type)
__kernel void scale_nearest_neighbour_nchw(__global uchar *in_ptr, uint in_stride_x, uint in_step_x, uint in_stride_y, uint in_step_y, uint in_offset_first_element_in_bytes, __global uchar *out_ptr, uint out_stride_x, uint out_step_x, uint out_stride_y, uint out_step_y, uint out_offset_first_element_in_bytes)
Performs an affine transformation on an image interpolating with the NEAREAST NEIGHBOUR method...
int round(float x, RoundingPolicy rounding_policy)
Return a rounded value of x.
SimpleTensor< T > select(const SimpleTensor< uint8_t > &c, const SimpleTensor< T > &x, const SimpleTensor< T > &y)
#define SELECT_VEC_DATA_TYPE(type, size)
#define TILE(DATA_TYPE, H, W, BASENAME)
Tile object A tile object is a 2D memory block and can be accessed using the following syntax:a[m0]...
#define VEC_DATA_TYPE(type, size)