Compute Library
 22.08
scale.cl File Reference
#include "helpers.h"
#include "tile_helpers.h"

Go to the source code of this file.

Functions

const float8 transform_nearest (const float2 coord, const float2 scale)
 Transforms four 2D coordinates. More...
 
const float8 transform_bilinear (const float2 coord, const float2 scale)
 Transforms four 2D coordinates. More...
 
__kernel void scale_nearest_neighbour_nchw (__global uchar *in_ptr, uint in_stride_x, uint in_step_x, uint in_stride_y, uint in_step_y, uint in_offset_first_element_in_bytes, __global uchar *out_ptr, uint out_stride_x, uint out_step_x, uint out_stride_y, uint out_step_y, uint out_offset_first_element_in_bytes)
 Performs an affine transformation on an image interpolating with the NEAREAST NEIGHBOUR method. More...
 
__kernel void scale_bilinear_nchw (__global uchar *in_ptr, uint in_stride_x, uint in_step_x, uint in_stride_y, uint in_step_y, uint in_offset_first_element_in_bytes, __global uchar *out_ptr, uint out_stride_x, uint out_step_x, uint out_stride_y, uint out_step_y, uint out_offset_first_element_in_bytes)
 Performs an affine transformation on an image interpolating with the BILINEAR method. More...
 

Function Documentation

◆ scale_bilinear_nchw()

__kernel void scale_bilinear_nchw ( __global uchar *  in_ptr,
uint  in_stride_x,
uint  in_step_x,
uint  in_stride_y,
uint  in_step_y,
uint  in_offset_first_element_in_bytes,
__global uchar *  out_ptr,
uint  out_stride_x,
uint  out_step_x,
uint  out_stride_y,
uint  out_step_y,
uint  out_offset_first_element_in_bytes 
)

Performs an affine transformation on an image interpolating with the BILINEAR method.

Note
Sampling policy to used is passed as -DSAMPLING_POLICY_(TYPE) e.g. -DSAMPLING_POLICY_TOP_LEFT
Parameters
[in]in_ptrPointer to the source image. Supported data types: U8, S16.
[in]in_stride_xStride of the source image in X dimension (in bytes)
[in]in_step_xsrc_stride_x * number of elements along X processed per workitem(in bytes)
[in]in_stride_yStride of the source image in Y dimension (in bytes)
[in]in_step_ysrc_stride_y * number of elements along Y processed per workitem(in bytes)
[in]in_offset_first_element_in_bytesThe offset of the first element in the source image
[out]out_ptrPointer to the destination image. Supported data types: U8, S16. (Must be the same as the input)
[in]out_stride_xStride of the destination image in X dimension (in bytes)
[in]out_step_xdst_stride_x * number of elements along X processed per workitem(in bytes)
[in]out_stride_yStride of the destination image in Y dimension (in bytes)
[in]out_step_ydst_stride_y * number of elements along Y processed per workitem(in bytes)
[in]out_offset_first_element_in_bytesThe offset of the first element in the destination image

Definition at line 158 of file scale.cl.

References arm_compute::test::validation::b, arm_compute::utility::clamp(), CONVERT, CONVERT_SAT, LOOP_UNROLLING, arm_compute::test::validation::reference::select(), SELECT_DATA_TYPE, SELECT_VEC_DATA_TYPE, TILE, transform_bilinear(), VEC_DATA_TYPE, VEC_SIZE, VSTORE, and VSTORE_PARTIAL.

Referenced by CpuScaleKernel::configure().

161 {
162  const int x = get_global_id(0);
163  const int y = get_global_id(1);
164 
165  TILE(float, 1, 8, trans_coords);
166  TILE(float, 1, 8, floor_coords);
167  TILE(int, 1, 16, in_x);
168  TILE(int, 1, 16, in_y);
169 
170  trans_coords[0].v = transform_bilinear((float2)(x * VEC_SIZE, y), (float2)(SCALE_X, SCALE_Y));
171  floor_coords[0].v = floor(trans_coords[0].v);
172 
173  LOOP_UNROLLING(int, i, 0, 1, 4,
174  {
175  LOOP_UNROLLING(int, j, 0, 1, 4,
176  {
177  in_x[0].s[i * 4 + j] = floor_coords[0].s[i * 2 + 0] + (j % 2);
178  in_y[0].s[i * 4 + j] = floor_coords[0].s[i * 2 + 1] + (j > 1);
179  })
180  })
181 
182 #if defined(BORDER_MODE_CONSTANT)
183  TILE(SELECT_DATA_TYPE(DATA_TYPE), 1, 16, cond);
184  cond[0].v = CONVERT(((in_x[0].v < 0) || (in_x[0].v >= (int)SRC_WIDTH)) || ((in_y[0].v < 0) || (in_y[0].v >= (int)SRC_HEIGHT)), SELECT_VEC_DATA_TYPE(DATA_TYPE, 16));
185 #endif // defined(BORDER_MODE_CONSTANT)
186 
187  in_x[0].v = clamp(in_x[0].v, 0, (int16)((int)SRC_WIDTH - 1));
188  in_y[0].v = clamp(in_y[0].v, 0, (int16)((int)SRC_HEIGHT - 1));
189 
190  TILE(DATA_TYPE, 1, 16, in_vals);
191 
192  // Loads the values from the input image
193 #if defined(BORDER_MODE_CONSTANT)
194  LOOP_UNROLLING(int, i, 0, 1, 16,
195  {
196  in_vals[0].s[i] = select(*((__global DATA_TYPE *)(in_ptr + in_offset_first_element_in_bytes + in_x[0].s[i] * sizeof(DATA_TYPE) + in_y[0].s[i] * (int)in_stride_y)), (DATA_TYPE)CONSTANT_VALUE, cond[0].s[i]);
197  })
198 #else // defined(BORDER_MODE_CONSTANT)
199  LOOP_UNROLLING(int, i, 0, 1, 16,
200  {
201  in_vals[0].s[i] = *((__global DATA_TYPE *)(in_ptr + in_offset_first_element_in_bytes + in_x[0].s[i] * sizeof(DATA_TYPE) + in_y[0].s[i] * (int)in_stride_y));
202  })
203 #endif // defined(BORDER_MODE_CONSTANT)
204 
205  TILE(float, 1, 8, a);
206  TILE(float, 1, 8, b);
207 
208  a[0].v = trans_coords[0].v - floor_coords[0].v;
209  b[0].v = ((float8)(1.f)) - a[0].v;
210 
211 #if defined(OFFSET) && defined(SCALE)
212  TILE(float, 1, 16, in_vals_f32);
213  TILE(float, 1, 4, out_vals_f32);
214 
215  in_vals_f32[0].v = convert_float16(convert_int16(in_vals[0].v) - (int16)OFFSET) * (float16)SCALE;
216 
217  // Bilinear interpolation: (in0 * b0 * b1) + (in1 * a0 * b1) + (in2 * b0 * a1) + (in3 * a0 * a1)
218  // (in4 * b2 * b3) + (in5 * a2 * b3) + (in6 * b2 * a3) + (in7 * a2 * a3)
219  // (in8 * b4 * b5) + (in9 * a4 * b5) + (in10 * b4 * a5) + (in11 * a4 * a5)
220  // (in12 * b6 * b7) + (in13 * a6 * b7) + (in14 * b6 * a7) + (in15 * a6 * a7)
221  LOOP_UNROLLING(int, i, 0, 1, 4,
222  {
223  out_vals_f32[0].s[i] = (in_vals_f32[0].s[i * 4 + 0] * b[0].s[i * 2] * b[0].s[i * 2 + 1]) + (in_vals_f32[0].s[i * 4 + 1] * a[0].s[i * 2] * b[0].s[i * 2 + 1]) + (in_vals_f32[0].s[i * 4 + 2] * b[0].s[i * 2] * a[0].s[i * 2 + 1]) + (in_vals_f32[0].s[i * 4 + 3] * a[0].s[i * 2] * a[0].s[i * 2 + 1]);
224  })
225 
226  TILE(DATA_TYPE, 1, 4, out_vals_4);
227  TILE(DATA_TYPE, 1, VEC_SIZE, out_vals);
228 
229  out_vals_4[0].v = CONVERT_SAT(convert_int4_sat_rtp(out_vals_f32[0].v / (float)SCALE) + OFFSET, VEC_DATA_TYPE(DATA_TYPE, 4));
230 
231  LOOP_UNROLLING(int, i, 0, 1, VEC_SIZE,
232  {
233  out_vals[0].s[i] = out_vals_4[0].s[i];
234  })
235 #else // defined(OFFSET) && defined(SCALE)
236 
237  TILE(DATA_TYPE, 1, VEC_SIZE, out_vals);
238 
239  // Bilinear interpolation: (in0 * b0 * b1) + (in1 * a0 * b1) + (in2 * b0 * a1) + (in3 * a0 * a1)
240  // (in4 * b2 * b3) + (in5 * a2 * b3) + (in6 * b2 * a3) + (in7 * a2 * a3)
241  // (in8 * b4 * b5) + (in9 * a4 * b5) + (in10 * b4 * a5) + (in11 * a4 * a5)
242  // (in12 * b6 * b7) + (in13 * a6 * b7) + (in14 * b6 * a7) + (in15 * a6 * a7)
243  LOOP_UNROLLING(int, i, 0, 1, VEC_SIZE,
244  {
245  out_vals[0].s[i] = (in_vals[0].s[i * 4 + 0] * b[0].s[i * 2] * b[0].s[i * 2 + 1]) + (in_vals[0].s[i * 4 + 1] * a[0].s[i * 2] * b[0].s[i * 2 + 1]) + (in_vals[0].s[i * 4 + 2] * b[0].s[i * 2] * a[0].s[i * 2 + 1]) + (in_vals[0].s[i * 4 + 3] * a[0].s[i * 2] * a[0].s[i * 2 + 1]);
246  })
247 #endif // defined(OFFSET) && defined(SCALE)
248 
249  __global uchar *out_addr = out_ptr + out_offset_first_element_in_bytes + x * out_step_x + y * out_stride_y;
250 
251  if(x == get_global_size(0) - 1)
252  {
253 #if VEC_SIZE == 1
254  VSTORE_PARTIAL(VEC_SIZE, VEC_SIZE_LEFTOVER)
255  (out_vals[0].s[0], 0, (__global DATA_TYPE *)out_addr);
256 #else // VEC_SIZE == 1
257  VSTORE_PARTIAL(VEC_SIZE, VEC_SIZE_LEFTOVER)
258  (out_vals[0].v, 0, (__global DATA_TYPE *)out_addr);
259 #endif // VEC_SIZE == 1
260  }
261  else
262  {
263 #if VEC_SIZE == 1
264  VSTORE(VEC_SIZE)
265  (out_vals[0].s[0], 0, (__global DATA_TYPE *)out_addr);
266 #else // VEC_SIZE == 1
267  VSTORE(VEC_SIZE)
268  (out_vals[0].v, 0, (__global DATA_TYPE *)out_addr);
269 #endif // VEC_SIZE == 1
270  }
271 }
#define VEC_SIZE
#define VSTORE_PARTIAL(size, store_size)
Definition: helpers.h:491
#define CONVERT(x, type)
Definition: helpers.h:731
SimpleTensor< float > b
Definition: DFT.cpp:157
#define LOOP_UNROLLING(type, idx, start, step, num, macro)
Definition: tile_helpers.h:304
const float8 transform_bilinear(const float2 coord, const float2 scale)
Transforms four 2D coordinates.
Definition: scale.cl:58
DataType clamp(const DataType &n, const DataType &lower=std::numeric_limits< RangeType >::lowest(), const DataType &upper=std::numeric_limits< RangeType >::max())
Performs clamping among a lower and upper value.
Definition: Utility.h:101
#define SELECT_DATA_TYPE(type)
Definition: helpers.h:752
#define CONVERT_SAT(x, type)
Definition: helpers.h:734
SimpleTensor< T > select(const SimpleTensor< uint8_t > &c, const SimpleTensor< T > &x, const SimpleTensor< T > &y)
Definition: Select.cpp:38
#define SELECT_VEC_DATA_TYPE(type, size)
Definition: helpers.h:751
#define TILE(DATA_TYPE, H, W, BASENAME)
Tile object A tile object is a 2D memory block and can be accessed using the following syntax:a[m0]...
Definition: tile_helpers.h:74
#define VSTORE(size)
Definition: helpers.h:458
#define VEC_DATA_TYPE(type, size)
Definition: helpers.h:728

◆ scale_nearest_neighbour_nchw()

__kernel void scale_nearest_neighbour_nchw ( __global uchar *  in_ptr,
uint  in_stride_x,
uint  in_step_x,
uint  in_stride_y,
uint  in_step_y,
uint  in_offset_first_element_in_bytes,
__global uchar *  out_ptr,
uint  out_stride_x,
uint  out_step_x,
uint  out_stride_y,
uint  out_step_y,
uint  out_offset_first_element_in_bytes 
)

Performs an affine transformation on an image interpolating with the NEAREAST NEIGHBOUR method.

Input and output are single channel U8 or S16.

Note
Sampling policy to used is passed as -DSAMPLING_POLICY_(TYPE) e.g. -DSAMPLING_POLICY_TOP_LEFT
Parameters
[in]in_ptrPointer to the source image. Supported data types: U8, S16.
[in]in_stride_xStride of the source image in X dimension (in bytes)
[in]in_step_xsrc_stride_x * number of elements along X processed per workitem(in bytes)
[in]in_stride_yStride of the source image in Y dimension (in bytes)
[in]in_step_ysrc_stride_y * number of elements along Y processed per workitem(in bytes)
[in]in_offset_first_element_in_bytesThe offset of the first element in the source image
[out]out_ptrPointer to the destination image. Supported data types: U8, S16. (Must be the same as the input)
[in]out_stride_xStride of the destination image in X dimension (in bytes)
[in]out_step_xdst_stride_x * number of elements along X processed per workitem(in bytes)
[in]out_stride_yStride of the destination image in Y dimension (in bytes)
[in]out_step_ydst_stride_y * number of elements along Y processed per workitem(in bytes)
[in]out_offset_first_element_in_bytesThe offset of the first element in the destination image

Definition at line 91 of file scale.cl.

References arm_compute::utility::clamp(), CONVERT, LOOP_UNROLLING, arm_compute::round(), arm_compute::test::validation::reference::select(), SELECT_DATA_TYPE, SELECT_VEC_DATA_TYPE, TILE, transform_nearest(), VEC_SIZE, VSTORE, and VSTORE_PARTIAL.

94 {
95  const int x = get_global_id(0);
96  const int y = get_global_id(1);
97 
98  float8 transformed = transform_nearest((float2)(x * VEC_SIZE, y), (float2)(SCALE_X, SCALE_Y));
99 #ifdef ALIGN_CORNERS
100  transformed = round(transformed);
101 #endif // ALIGN_CORNERS
102 
103  TILE(SELECT_DATA_TYPE(DATA_TYPE), 1, 4, cond);
104  cond[0].v = CONVERT(((transformed.even < 0) || (transformed.even >= (int)SRC_WIDTH)) || ((transformed.odd < 0) || (transformed.odd >= (int)SRC_HEIGHT)), SELECT_VEC_DATA_TYPE(DATA_TYPE, 4));
105 
106  TILE(int, 1, 4, in_x);
107  TILE(int, 1, 4, in_y);
108  in_x[0].v = convert_int4(clamp(transformed.even, 0.f, SRC_WIDTH - 1.f));
109  in_y[0].v = convert_int4(clamp(transformed.odd, 0.f, SRC_HEIGHT - 1.f));
110 
111  TILE(DATA_TYPE, 1, VEC_SIZE, out_vals);
112  LOOP_UNROLLING(int, i, 0, 1, VEC_SIZE,
113  {
114  out_vals[0].s[i] = select(*((__global DATA_TYPE *)(in_ptr + in_offset_first_element_in_bytes + in_x[0].s[i] * sizeof(DATA_TYPE) + in_y[0].s[i] * in_stride_y)), (DATA_TYPE)CONSTANT_VALUE, cond[0].s[i]);
115  })
116 
117  __global uchar *out_addr = out_ptr + out_offset_first_element_in_bytes + x * out_step_x + y * out_stride_y;
118 
119  if(x == get_global_size(0) - 1)
120  {
121 #if VEC_SIZE == 1
122  VSTORE_PARTIAL(VEC_SIZE, VEC_SIZE_LEFTOVER)
123  (out_vals[0].s[0], 0, (__global DATA_TYPE *)out_addr);
124 #else // VEC_SIZE == 1
125  VSTORE_PARTIAL(VEC_SIZE, VEC_SIZE_LEFTOVER)
126  (out_vals[0].v, 0, (__global DATA_TYPE *)out_addr);
127 #endif // VEC_SIZE == 1
128  }
129  else
130  {
131 #if VEC_SIZE == 1
132  VSTORE(VEC_SIZE)
133  (out_vals[0].s[0], 0, (__global DATA_TYPE *)out_addr);
134 #else // VEC_SIZE == 1
135  VSTORE(VEC_SIZE)
136  (out_vals[0].v, 0, (__global DATA_TYPE *)out_addr);
137 #endif // VEC_SIZE == 1
138  }
139 }
#define VEC_SIZE
#define VSTORE_PARTIAL(size, store_size)
Definition: helpers.h:491
#define CONVERT(x, type)
Definition: helpers.h:731
#define LOOP_UNROLLING(type, idx, start, step, num, macro)
Definition: tile_helpers.h:304
const float8 transform_nearest(const float2 coord, const float2 scale)
Transforms four 2D coordinates.
Definition: scale.cl:34
DataType clamp(const DataType &n, const DataType &lower=std::numeric_limits< RangeType >::lowest(), const DataType &upper=std::numeric_limits< RangeType >::max())
Performs clamping among a lower and upper value.
Definition: Utility.h:101
#define SELECT_DATA_TYPE(type)
Definition: helpers.h:752
int round(float x, RoundingPolicy rounding_policy)
Return a rounded value of x.
Definition: Rounding.cpp:35
SimpleTensor< T > select(const SimpleTensor< uint8_t > &c, const SimpleTensor< T > &x, const SimpleTensor< T > &y)
Definition: Select.cpp:38
#define SELECT_VEC_DATA_TYPE(type, size)
Definition: helpers.h:751
#define TILE(DATA_TYPE, H, W, BASENAME)
Tile object A tile object is a 2D memory block and can be accessed using the following syntax:a[m0]...
Definition: tile_helpers.h:74
#define VSTORE(size)
Definition: helpers.h:458

◆ transform_bilinear()

const float8 transform_bilinear ( const float2  coord,
const float2  scale 
)
inline

Transforms four 2D coordinates.

This is used to map the output coordinates to the input coordinates.

Parameters
[in]coord2D coordinates to transform.
[in]scaleinput/output scale ratio
Returns
a float8 containing 4 2D transformed values in the input image.

Definition at line 58 of file scale.cl.

Referenced by scale_bilinear_nchw().

59 {
60  const float4 in_x_coords = (float4)(coord.s0, 1 + coord.s0, 2 + coord.s0, 3 + coord.s0);
61 #ifdef SAMPLING_POLICY_TOP_LEFT
62  const float4 new_x = in_x_coords * (float4)(scale.s0);
63  const float4 new_y = (float4)(coord.s1 * scale.s1);
64  return (float8)(new_x.s0, new_y.s0, new_x.s1, new_y.s1, new_x.s2, new_y.s2, new_x.s3, new_y.s3);
65 #elif SAMPLING_POLICY_CENTER
66  const float4 new_x = (in_x_coords + ((float4)(0.5f))) * (float4)(scale.s0) - (float4)(0.5f);
67  const float4 new_y = (float4)((coord.s1 + 0.5f) * scale.s1 - 0.5f);
68  return (float8)(new_x.s0, new_y.s0, new_x.s1, new_y.s1, new_x.s2, new_y.s2, new_x.s3, new_y.s3);
69 #else /* SAMPLING_POLICY */
70 #error("Unsupported sampling policy");
71 #endif /* SAMPLING_POLICY */
72 }

◆ transform_nearest()

const float8 transform_nearest ( const float2  coord,
const float2  scale 
)
inline

Transforms four 2D coordinates.

This is used to map the output coordinates to the input coordinates.

Parameters
[in]coord2D coordinates to transform.
[in]scaleinput/output scale ratio
Returns
a float8 containing 4 2D transformed values in the input image.

Definition at line 34 of file scale.cl.

Referenced by scale_nearest_neighbour_nchw().

35 {
36 #ifdef SAMPLING_POLICY_TOP_LEFT
37  const float4 in_x_coords = (float4)(coord.s0, 1 + coord.s0, 2 + coord.s0, 3 + coord.s0);
38  const float4 new_x = in_x_coords * (float4)(scale.s0);
39  const float4 new_y = (float4)(coord.s1 * scale.s1);
40  return (float8)(new_x.s0, new_y.s0, new_x.s1, new_y.s1, new_x.s2, new_y.s2, new_x.s3, new_y.s3);
41 #elif SAMPLING_POLICY_CENTER
42  const float4 in_x_coords = (float4)(coord.s0, 1 + coord.s0, 2 + coord.s0, 3 + coord.s0);
43  const float4 new_x = (in_x_coords + ((float4)(0.5f))) * (float4)(scale.s0);
44  const float4 new_y = (float4)((coord.s1 + 0.5f) * scale.s1);
45  return (float8)(new_x.s0, new_y.s0, new_x.s1, new_y.s1, new_x.s2, new_y.s2, new_x.s3, new_y.s3);
46 #else /* SAMPLING_POLICY */
47 #error("Unsupported sampling policy");
48 #endif /* SAMPLING_POLICY */
49 }