36 #ifdef SAMPLING_POLICY_TOP_LEFT 37 const float4 in_x_coords = (float4)(coord.s0, 1 + coord.s0, 2 + coord.s0, 3 + coord.s0);
38 const float4 new_x = in_x_coords * (float4)(
scale.s0);
39 const float4 new_y = (float4)(coord.s1 *
scale.s1);
40 return (float8)(new_x.s0, new_y.s0, new_x.s1, new_y.s1, new_x.s2, new_y.s2, new_x.s3, new_y.s3);
41 #elif SAMPLING_POLICY_CENTER 42 const float4 in_x_coords = (float4)(coord.s0, 1 + coord.s0, 2 + coord.s0, 3 + coord.s0);
43 const float4 new_x = (in_x_coords + ((float4)(0.5f))) * (float4)(
scale.s0);
44 const float4 new_y = (float4)((coord.s1 + 0.5f) *
scale.s1);
45 return (float8)(new_x.s0, new_y.s0, new_x.s1, new_y.s1, new_x.s2, new_y.s2, new_x.s3, new_y.s3);
47 #error("Unsupported sampling policy"); 60 const float4 in_x_coords = (float4)(coord.s0, 1 + coord.s0, 2 + coord.s0, 3 + coord.s0);
61 #ifdef SAMPLING_POLICY_TOP_LEFT 62 const float4 new_x = in_x_coords * (float4)(
scale.s0);
63 const float4 new_y = (float4)(coord.s1 *
scale.s1);
64 return (float8)(new_x.s0, new_y.s0, new_x.s1, new_y.s1, new_x.s2, new_y.s2, new_x.s3, new_y.s3);
65 #elif SAMPLING_POLICY_CENTER 66 const float4 new_x = (in_x_coords + ((float4)(0.5f))) * (float4)(
scale.s0) - (float4)(0.5f);
67 const float4 new_y = (float4)((coord.s1 + 0.5f) *
scale.s1 - 0.5f);
68 return (float8)(new_x.s0, new_y.s0, new_x.s1, new_y.s1, new_x.s2, new_y.s2, new_x.s3, new_y.s3);
70 #error("Unsupported sampling policy"); 108 transformed =
round(transformed);
109 #endif // ALIGN_CORNERS 111 vstore4(
read_texels4(&in, convert_int8(tc)), 0, (__global DATA_TYPE *)out.
ptr);
150 #if defined(DEPTH_OUT) 177 __kernel
void scale_nearest_neighbour_nhwc(
188 #ifdef SAMPLING_POLICY_TOP_LEFT 189 float new_x = get_global_id(1) *
scale_x;
190 float new_y = (get_global_id(2) % DEPTH_OUT) *
scale_y;
191 #elif SAMPLING_POLICY_CENTER 192 float new_x = (get_global_id(1) + 0.5f) *
scale_x;
193 float new_y = ((get_global_id(2) % DEPTH_OUT) + 0.5f) *
scale_y;
195 #error("Unsupported sampling policy"); 198 new_x =
round(new_x);
199 new_y =
round(new_y);
204 *((__global DATA_TYPE *)out.
ptr) = *((__global DATA_TYPE *)
tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x), convert_int(clamped_y), (get_global_id(2) / DEPTH_OUT)));
236 __kernel
void scale_bilinear_nhwc(
247 #ifdef SAMPLING_POLICY_TOP_LEFT 248 const float new_x = get_global_id(1) *
scale_x;
249 const float new_y = (get_global_id(2) % DEPTH_OUT) *
scale_y;
250 #elif SAMPLING_POLICY_CENTER 251 const float new_x = (get_global_id(1) + 0.5f) *
scale_x - 0.5f;
252 const float new_y = ((get_global_id(2) % DEPTH_OUT) + 0.5f) *
scale_y - 0.5f;
254 #error("Unsupported sampling policy"); 257 const float new_xf = floor(new_x);
258 const float new_yf = floor(new_y);
264 #ifndef BORDER_MODE_REPLICATE 265 const bool check_x = (0.f <= new_xf && new_xf <
input_width);
266 const bool check_x1 = (-1.f <= new_xf && new_xf <
input_width - 1);
267 const bool check_y = (0.f <= new_yf && new_yf <
input_height);
268 const bool check_y1 = (-1.f <= new_yf && new_yf <
input_height - 1);
269 const float ins_0 =
select((
float)(CONSTANT_VALUE), (
float)(*((__global DATA_TYPE *)
tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x), convert_int(clamped_y),
270 (get_global_id(2) / DEPTH_OUT)))),
272 const float ins_1 =
select((
float)(CONSTANT_VALUE), (
float)(*((__global DATA_TYPE *)
tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x1), convert_int(clamped_y),
273 (get_global_id(2) / DEPTH_OUT)))),
274 check_x1 && check_y);
275 const float ins_2 =
select((
float)(CONSTANT_VALUE), (
float)(*((__global DATA_TYPE *)
tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x), convert_int(clamped_y1),
276 (get_global_id(2) / DEPTH_OUT)))),
277 check_x && check_y1);
278 const float ins_3 =
select((
float)(CONSTANT_VALUE), (
float)(*((__global DATA_TYPE *)
tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x1), convert_int(clamped_y1),
279 (get_global_id(2) / DEPTH_OUT)))),
280 check_x1 && check_y1);
281 float4 ins = (float4)(ins_0, ins_1, ins_2, ins_3);
283 float4 ins = (float4)(*((__global DATA_TYPE *)
tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x), convert_int(clamped_y), (get_global_id(2) / DEPTH_OUT))),
284 *((__global DATA_TYPE *)
tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x1), convert_int(clamped_y), (get_global_id(2) / DEPTH_OUT))),
285 *((__global DATA_TYPE *)
tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x), convert_int(clamped_y1), (get_global_id(2) / DEPTH_OUT))),
286 *((__global DATA_TYPE *)
tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x1), convert_int(clamped_y1), (get_global_id(2) / DEPTH_OUT))));
289 const float a = new_x - new_xf;
290 const float b = 1.f - a;
291 const float a1 = new_y - new_yf;
292 const float b1 = 1.f - a1;
293 const float fr = ((ins.s0 *
b * b1) + (ins.s1 * a * b1) + (ins.s2 *
b * a1) + (ins.s3 * a * a1));
295 *((__global DATA_TYPE *)out.
ptr) =
CONVERT(fr, DATA_TYPE);
__global uchar * ptr
Pointer to the starting postion of the buffer.
__kernel void scale_bilinear_nchw(__global uchar *in_ptr, uint in_stride_x, uint in_step_x, uint in_stride_y, uint in_step_y, uint in_offset_first_element_in_bytes, __global uchar *out_ptr, uint out_stride_x, uint out_step_x, uint out_stride_y, uint out_step_y, uint out_offset_first_element_in_bytes, const float input_width, const float input_height, const float scale_x, const float scale_y)
Performs an affine transformation on an image interpolating with the BILINEAR method.
__kernel void scale_nearest_neighbour_nchw(__global uchar *in_ptr, uint in_stride_x, uint in_step_x, uint in_stride_y, uint in_step_y, uint in_offset_first_element_in_bytes, __global uchar *out_ptr, uint out_stride_x, uint out_step_x, uint out_stride_y, uint out_step_y, uint out_offset_first_element_in_bytes, const float input_width, const float input_height, const float scale_x, const float scale_y)
Performs an affine transformation on an image interpolating with the NEAREAST NEIGHBOUR method.
#define CONVERT_TO_IMAGE_STRUCT(name)
const DATA_TYPE4 bilinear_interpolate_with_border(const Image *in, const float8 coords, const float width, const float height, const float border_size)
Computes the bilinear interpolation for each set of coordinates in the vector coords and returns the ...
#define IMAGE_DECLARATION(name)
Structure to hold 4D tensor information.
DataType clamp(const DataType &n, const DataType &lower=std::numeric_limits< RangeType >::lowest(), const DataType &upper=std::numeric_limits< RangeType >::max())
Performs clamping among a lower and upper value.
#define CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(name, mod_size)
#define CONVERT_TO_IMAGE_STRUCT_NO_STEP(name)
const float8 transform_bilinear(const float2 coord, const float2 scale)
Transforms four 2D coordinates.
__global const uchar * tensor4D_offset(const Tensor4D *tensor, int x, int y, int z, int w)
Get the pointer position of a Tensor4D.
#define CONVERT_TO_TENSOR4D_STRUCT(name, mod_size)
const float8 transform_nearest(const float2 coord, const float2 scale)
Transforms four 2D coordinates.
const size_t input_height
Structure to hold Image information.
int round(float x, RoundingPolicy rounding_policy)
Return a rounded value of x.
__global uchar * ptr
Pointer to the starting postion of the buffer.
SimpleTensor< T > select(const SimpleTensor< uint8_t > &c, const SimpleTensor< T > &x, const SimpleTensor< T > &y)
const DATA_TYPE4 read_texels4(const Image *in, const int8 coords)
Reads four texels from the input image.
#define TENSOR4D_DECLARATION(name)
const float8 clamp_to_border_with_size(float8 coords, const float width, const float height, const float border_size)
Clamps the given coordinates to the borders according to the border size.
const float2 get_current_coords()
Returns the current thread coordinates.