26 #if defined(DATA_TYPE) && defined(VEC_SIZE) && defined(PAD_X_BEFORE) && defined(SRC_WIDTH) && defined(PAD_X_BEFORE_REMAINDER) && defined(VEC_SIZE_LEFTOVER_WRITE)
28 #define VEC_TYPE VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
29 #define VEC_INT VEC_DATA_TYPE(int, VEC_SIZE)
30 #define VEC_SELECT SELECT_VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
31 #define OFFSETS VEC_OFFS(SELECT_DATA_TYPE(DATA_TYPE), VEC_SIZE)
32 #define SCALAR_COND(x) CONVERT((VEC_SELECT)x == (VEC_SELECT)1, VEC_SELECT)
34 #if defined(CONST_VAL) && defined(VEC_SIZE_LEFTOVER_READ)
76 #
if defined(PAD_W_BEFORE)
84 int x = get_global_id(0);
85 int y = get_global_id(1);
86 int z = get_global_id(2);
90 #if defined(THREADS_TO_SKIP_BEFORE)
91 cond |= x < THREADS_TO_SKIP_BEFORE || x > THREADS_TO_SKIP_AFTER;
92 #endif // defined(THREADS_TO_SKIP_BEFORE)
93 #if defined(PAD_Y_BEFORE)
94 cond |= y < PAD_Y_BEFORE || y >= (SRC_HEIGHT + PAD_Y_BEFORE);
95 #endif // defined(PAD_Y_BEFORE)
96 #if defined(PAD_Z_BEFORE)
97 cond |= z < PAD_Z_BEFORE || z >= (SRC_DEPTH + PAD_Z_BEFORE);
98 #endif // defined(PAD_Z_BEFORE)
99 #if defined(PAD_W_BEFORE)
100 cond |= batch < PAD_W_BEFORE || batch >= (SRC_BATCH + PAD_W_BEFORE);
101 #endif // defined(PAD_W_BEFORE)
105 VEC_TYPE const_vals0 = (VEC_TYPE)CONST_VAL;
112 #if defined(THREADS_TO_SKIP_BEFORE)
113 x -= THREADS_TO_SKIP_BEFORE;
114 #endif // defined(THREADS_TO_SKIP_BEFORE)
115 #if defined(PAD_Y_BEFORE)
117 #endif // defined(PAD_Y_BEFORE)
118 #if defined(PAD_Z_BEFORE)
120 #endif // defined(PAD_Z_BEFORE)
121 #if defined(PAD_W_BEFORE)
122 w -= PAD_W_BEFORE * SRC_DEPTH;
123 #endif // defined(PAD_W_BEFORE)
125 x -= PAD_X_BEFORE_REMAINDER;
128 uint cond_left = x < 0;
129 uint cond_right = (x +
VEC_SIZE) > SRC_WIDTH;
133 __global uchar *src_addr = src_ptr + src_offset_first_element_in_bytes + x * src_stride_x + y * src_stride_y + z * src_stride_z +
w * (int)src_stride_z;
136 VEC_TYPE src_vals0 =
VLOAD(
VEC_SIZE)(0, (__global DATA_TYPE *)src_addr);
137 src_vals0 =
select(src_vals0,
ROTATE(src_vals0,
VEC_SIZE, PAD_X_BEFORE_REMAINDER), SCALAR_COND(cond_left));
138 src_vals0 =
select(src_vals0,
ROTATE(src_vals0,
VEC_SIZE, VEC_SIZE_LEFTOVER_READ), SCALAR_COND(cond_right));
142 VEC_INT conds = xs_out < (VEC_INT)PAD_X_BEFORE || xs_out >= (VEC_INT)(SRC_WIDTH + PAD_X_BEFORE);
143 src_vals0 =
select(src_vals0, (VEC_TYPE)CONST_VAL,
CONVERT(conds, VEC_SELECT));
149 #endif // defined(CONST_VAL) && defined(VEC_SIZE_LEFTOVER_READ)
151 #if defined(IS_REFLECT) && defined(PAD_X_AFTER_REMAINDER) && defined(PAD_X_BEFORE_REMAINDER_REFL) && defined(PAD_X_AFTER_REMAINDER_REFL) && defined(AFTER_PAD_FACT_X)
153 #define ROTATE_REVERSE(x, n) ROTATE(REVERSE(x, VEC_SIZE), VEC_SIZE, n)
154 #define SYMM_REFL_LEFT(x, n0, n1) select(ROTATE_REVERSE(x, n1), ROTATE(x, VEC_SIZE, n0), OFFSETS >= (VEC_SELECT)n0)
155 #define SYMM_REFL_RIGHT(x, n0, n1) select(ROTATE(x, VEC_SIZE, n0), ROTATE_REVERSE(x, n1), OFFSETS >= (VEC_SELECT)n0)
200 const int x = get_global_id(0);
201 const int y = get_global_id(1);
202 const int z = get_global_id(2);
205 const int x_out_first = x *
VEC_SIZE;
206 const int x_out_last = x_out_first +
VEC_SIZE;
207 const int is_before_pad_left = (x_out_last <= PAD_X_BEFORE);
208 const int is_across_pad_left = (x_out_first < PAD_X_BEFORE) && (x_out_last > PAD_X_BEFORE);
209 const int is_inside_input = (x_out_first >= PAD_X_BEFORE) && (x_out_last <= (SRC_WIDTH + PAD_X_BEFORE));
210 const int is_across_pad_right = (x_out_first < (SRC_WIDTH + PAD_X_BEFORE)) && (x_out_last > (SRC_WIDTH + PAD_X_BEFORE));
211 const int is_after_pad_right = (x_out_first >= (SRC_WIDTH + PAD_X_BEFORE));
214 __global uchar *src_addr = src_ptr + src_offset_first_element_in_bytes;
219 x_offset =
select(x_offset, PAD_X_BEFORE - x_out_last + IS_REFLECT, is_before_pad_left);
220 x_offset =
select(x_offset, x_out_first - PAD_X_BEFORE, is_inside_input);
221 x_offset =
select(x_offset, SRC_WIDTH -
VEC_SIZE, is_across_pad_right);
222 x_offset =
select(x_offset, AFTER_PAD_FACT_X - x_out_last, is_after_pad_right);
224 #if defined(AFTER_PAD_REM)
225 int neg_offs = x_offset < 0;
226 x_offset = max(x_offset, 0);
227 #endif // defined(AFTER_PAD_REM)
232 #if defined(PAD_Y_BEFORE)
233 y_in =
select(y - PAD_Y_BEFORE, PAD_Y_BEFORE - y + IS_REFLECT - 1, y < PAD_Y_BEFORE);
234 y_in =
select(y_in, 2 * SRC_HEIGHT + PAD_Y_BEFORE - y - IS_REFLECT - 1, y >= (SRC_HEIGHT + PAD_Y_BEFORE));
235 #endif // defined(PAD_Y_BEFORE)
236 #if defined(PAD_Z_BEFORE)
237 z_in =
select(z - PAD_Z_BEFORE, PAD_Z_BEFORE - z + IS_REFLECT - 1, z < PAD_Z_BEFORE);
238 z_in =
select(z_in, 2 * SRC_DEPTH + PAD_Z_BEFORE - z - IS_REFLECT - 1, z >= (SRC_DEPTH + PAD_Z_BEFORE));
239 #endif // defined(PAD_Y_BEFORE)
241 src_addr += x_offset * src_stride_x + y_in * src_step_y + z_in * src_step_z;
245 ((VEC_TYPE)(*(__global DATA_TYPE *)src_addr), 0, (__global DATA_TYPE *)
dst.ptr);
246 #else // SRC_WIDTH == 1
248 VEC_TYPE src_vals0 =
VLOAD(
VEC_SIZE)(0, (__global DATA_TYPE *)src_addr);
251 src_vals0 =
select(src_vals0, SYMM_REFL_LEFT(src_vals0, PAD_X_BEFORE_REMAINDER, PAD_X_BEFORE_REMAINDER_REFL), SCALAR_COND(is_across_pad_left));
252 src_vals0 =
select(src_vals0, SYMM_REFL_RIGHT(src_vals0, PAD_X_AFTER_REMAINDER, PAD_X_AFTER_REMAINDER_REFL), SCALAR_COND(is_across_pad_right));
253 src_vals0 =
select(src_vals0,
REVERSE(src_vals0,
VEC_SIZE), SCALAR_COND((is_before_pad_left || is_after_pad_right)));
254 #if defined(AFTER_PAD_REM)
255 src_vals0 =
select(src_vals0,
ROTATE(src_vals0,
VEC_SIZE, AFTER_PAD_REM), SCALAR_COND(neg_offs));
256 #endif // defined(AFTER_PAD_REM)
260 #endif // SRC_WIDTH == 1
262 #endif // defined(IS_REFLECT) && defined(PAD_X_AFTER_REMAINDER) && defined(PAD_X_BEFORE_REMAINDER_REFL) && defined(PAD_X_AFTER_REMAINDER_REFL) && defined(AFTER_PAD_FACT_X)
263 #endif // defined(DATA_TYPE) && defined(VEC_SIZE) && defined(PAD_X_BEFORE) && defined(SRC_WIDTH) && defined(PAD_X_BEFORE_REMAINDER) && defined(VEC_SIZE_LEFTOVER_WRITE)