26 #if defined(DATA_TYPE) && defined(WIDTH_OUTPUT) && defined(ELEMENT_SIZE) && defined(WIDTH_INPUT) && defined(NUM_GROUPS)
29 #define COND_DATA_TYPE char
30 #elif ELEMENT_SIZE == 2
31 #define COND_DATA_TYPE short
32 #elif ELEMENT_SIZE == 4
33 #define COND_DATA_TYPE int
35 #error "Element size not support"
36 #endif // ELEMENT_SIZE
72 const uint xd = get_global_id(1) % WIDTH_OUTPUT;
73 const uint yd = get_global_id(1) / WIDTH_OUTPUT;
76 data = vload8(0, (__global DATA_TYPE *)
src.ptr);
78 uint x = get_global_id(0) * 8;
79 uint8 x_clamped = x + (uint8)(0, 1, 2, 3, 4, 5, 6, 7);
85 x_clamped =
select((uint8)x, x_clamped, convert_int8(cond0));
92 int idx = yd * dst_stride_y + xd * dst_stride_x + (get_global_id(2) / NUM_GROUPS) *
dst.stride_w;
94 const uint group = get_global_id(2) % NUM_GROUPS;
95 x_clamped += group * WIDTH_INPUT;
98 int idx = yd *
dst.stride_y + xd *
dst.stride_x + get_global_id(2) *
dst.stride_w;
102 *((__global DATA_TYPE *)(
dst.ptr + idx + x_clamped.s0 *
dst.stride_z)) = data.s0;
103 *((__global DATA_TYPE *)(
dst.ptr + idx + x_clamped.s1 *
dst.stride_z)) = data.s1;
104 *((__global DATA_TYPE *)(
dst.ptr + idx + x_clamped.s2 *
dst.stride_z)) = data.s2;
105 *((__global DATA_TYPE *)(
dst.ptr + idx + x_clamped.s3 *
dst.stride_z)) = data.s3;
106 *((__global DATA_TYPE *)(
dst.ptr + idx + x_clamped.s4 *
dst.stride_z)) = data.s4;
107 *((__global DATA_TYPE *)(
dst.ptr + idx + x_clamped.s5 *
dst.stride_z)) = data.s5;
108 *((__global DATA_TYPE *)(
dst.ptr + idx + x_clamped.s6 *
dst.stride_z)) = data.s6;
109 *((__global DATA_TYPE *)(
dst.ptr + idx + x_clamped.s7 *
dst.stride_z)) = data.s7;
111 #endif // defined(DATA_TYPE) && defined(WIDTH_OUTPUT) && defined(ELEMENT_SIZE) && defined(WIDTH_INPUT) && defined(NUM_GROUPS)