27 #if defined(SCALE_NEAREST_NEIGHBOUR)
63 __kernel
void scale_nearest_neighbour_nhwc(
71 #if defined(BATCHED_EXECUTION)
74 #else // defined(BATCHED_EXECUTION)
77 #endif // defined(BATCHED_EXECUTION)
79 #ifdef SAMPLING_POLICY_TOP_LEFT
82 #elif SAMPLING_POLICY_CENTER
83 float xi_f = ((xo + 0.5f) *
scale_x);
84 float yi_f = ((yo + 0.5f) *
scale_y);
85 #else // SAMPLING_POLICY
86 #error("Unsupported sampling policy");
87 #endif // SAMPLING_POLICY
92 #endif // ALIGN_CORNERS
94 const int xi0 =
clamp((
int)xi_f, 0, (
int)src_w - 1);
95 const int yi0 =
clamp((
int)yi_f, 0, (
int)src_h - 1);
97 TILE(SRC_DATA_TYPE, 1, N0, in00);
99 T_LOAD_NHWC_WITH_DILATION(SRC_DATA_TYPE, 1, 1, N0, SRC_TENSOR_TYPE,
src, bout, yi0, xi0, cout, src_w, src_h, 1, 1,
false, in00);
101 TILE(uint, 1, 1, dst_indirect_y);
104 dst_indirect_y[0].v = xo + (yo * (int)(dst_w)) + bout * (
int)(dst_w * dst_h);
106 bool x_cond = PARTIAL_N0 != 0 && get_global_id(0) == 0;
108 T_STORE_INDIRECT_WIDTH_SELECT(DST_DATA_TYPE, 1, N0, PARTIAL_N0, DST_TENSOR_TYPE,
dst, cout, dst_stride_y, x_cond, in00, dst_indirect_y);
112 #if defined(SCALE_BILINEAR)
155 __kernel
void scale_bilinear_nhwc(
163 #if defined(BATCHED_EXECUTION)
166 #else // defined(BATCHED_EXECUTION)
169 #endif // defined(BATCHED_EXECUTION)
171 #ifdef SAMPLING_POLICY_TOP_LEFT
174 #elif SAMPLING_POLICY_CENTER
175 float xi_f = ((xo + 0.5f) *
scale_x - 0.5f);
176 float yi_f = ((yo + 0.5f) *
scale_y - 0.5f);
177 #else // SAMPLING_POLICY
178 #error("Unsupported sampling policy");
179 #endif // SAMPLING_POLICY
181 const int xi = (int)floor(xi_f);
182 const int yi = (int)floor(yi_f);
184 TILE(SRC_DATA_TYPE, 1, N0, in00);
185 TILE(SRC_DATA_TYPE, 1, N0, in01);
186 TILE(SRC_DATA_TYPE, 1, N0, in10);
187 TILE(SRC_DATA_TYPE, 1, N0, in11);
190 in00[0].v = CONSTANT_VALUE;
191 in01[0].v = CONSTANT_VALUE;
192 in10[0].v = CONSTANT_VALUE;
193 in11[0].v = CONSTANT_VALUE;
195 #ifndef BORDER_MODE_REPLICATE
196 T_LOAD_NHWC_WITH_DILATION(SRC_DATA_TYPE, 1, 1, N0, SRC_TENSOR_TYPE,
src, bout, yi, xi, cout, src_w, src_h, 1, 1,
true, in00);
197 T_LOAD_NHWC_WITH_DILATION(SRC_DATA_TYPE, 1, 1, N0, SRC_TENSOR_TYPE,
src, bout, yi, xi + 1, cout, src_w, src_h, 1, 1,
true, in01);
198 T_LOAD_NHWC_WITH_DILATION(SRC_DATA_TYPE, 1, 1, N0, SRC_TENSOR_TYPE,
src, bout, yi + 1, xi, cout, src_w, src_h, 1, 1,
true, in10);
199 T_LOAD_NHWC_WITH_DILATION(SRC_DATA_TYPE, 1, 1, N0, SRC_TENSOR_TYPE,
src, bout, yi + 1, xi + 1, cout, src_w, src_h, 1, 1,
true, in11);
200 #else // BORDER_MODE_REPLICATE
201 const int xi0 =
clamp(xi, 0, (
int)src_w - 1);
202 const int yi0 =
clamp(yi, 0, (
int)src_h - 1);
203 const int xi1 =
clamp(xi + 1, 0, (
int)src_w - 1);
204 const int yi1 =
clamp(yi + 1, 0, (
int)src_h - 1);
206 T_LOAD_NHWC_WITH_DILATION(SRC_DATA_TYPE, 1, 1, N0, SRC_TENSOR_TYPE,
src, bout, yi0, xi0, cout, src_w, src_h, 1, 1,
false, in00);
207 T_LOAD_NHWC_WITH_DILATION(SRC_DATA_TYPE, 1, 1, N0, SRC_TENSOR_TYPE,
src, bout, yi0, xi1, cout, src_w, src_h, 1, 1,
false, in01);
208 T_LOAD_NHWC_WITH_DILATION(SRC_DATA_TYPE, 1, 1, N0, SRC_TENSOR_TYPE,
src, bout, yi1, xi0, cout, src_w, src_h, 1, 1,
false, in10);
209 T_LOAD_NHWC_WITH_DILATION(SRC_DATA_TYPE, 1, 1, N0, SRC_TENSOR_TYPE,
src, bout, yi1, xi1, cout, src_w, src_h, 1, 1,
false, in11);
210 #endif // BORDER_MODE_REPLICATE
212 TILE(DST_DATA_TYPE, 1, N0, out);
214 #if defined(IS_FLOATING_POINT)
215 const SRC_DATA_TYPE a = (SRC_DATA_TYPE)(xi_f - (
float)xi);
216 const SRC_DATA_TYPE
b = (SRC_DATA_TYPE)(1.f - a);
217 const SRC_DATA_TYPE a1 = (SRC_DATA_TYPE)(yi_f - (
float)yi);
218 const SRC_DATA_TYPE b1 = (SRC_DATA_TYPE)(1.f - a1);
221 out[0].v = ((in00[0].v *
b * b1) + (in01[0].v * a * b1) + (in10[0].v *
b * a1) + (in11[0].v * a * a1));
222 #else // defined(IS_FLOATING_POINT)
224 const float a = (xi_f - (float)xi);
225 const float b = (1.f - a);
226 const float a1 = (yi_f - (float)yi);
227 const float b1 = (1.f - a1);
234 #endif // defined(IS_FLOATING_POINT)
236 TILE(uint, 1, 1, dst_indirect_y);
239 dst_indirect_y[0].v = xo + (yo * (int)(dst_w)) + bout * (
int)(dst_w * dst_h);
241 bool x_cond = PARTIAL_N0 != 0 && get_global_id(0) == 0;
243 T_STORE_INDIRECT_WIDTH_SELECT(DST_DATA_TYPE, 1, N0, PARTIAL_N0, DST_TENSOR_TYPE,
dst, cout, dst_stride_y, x_cond, out, dst_indirect_y);