27 #if defined(DATA_TYPE) && defined(VEC_SIZE) && defined(NUM_GROUPS) && defined(K) && defined(SRC_DIM_Z) 30 #if VEC_SIZE != 1 && VEC_SIZE != 2 && VEC_SIZE != 3 && VEC_SIZE != 4 && VEC_SIZE != 8 && VEC_SIZE != 16 31 #error "Only vector sizes 1, 2, 3, 4, 8 and 16 are supported" 32 #endif // VEC_SIZE != 1 && VEC_SIZE != 2 && VEC_SIZE != 3 && VEC_SIZE != 4 && VEC_SIZE != 8 && VEC_SIZE != 16 34 #define DIV_MOD_UINT(x, y, div_res, mod_res) \ 36 div_res = (uint)((x)/(y)); \ 37 uint r = div_res * (y); \ 73 uint curr_channel = 0;
79 DIV_MOD_UINT(get_global_id(2), SRC_DIM_Z, batch_id, curr_channel);
82 DIV_MOD_UINT(curr_channel,
K, group_id, channel_id);
84 const uint x = get_global_id(0) *
VEC_SIZE;
85 const uint y = get_global_id(1) * 2;
86 const uint z = channel_id * NUM_GROUPS + group_id;
89 const __global uchar *input_ptr = src_ptr + src_offset_first_element_in_bytes + x *
sizeof(DATA_TYPE) + y * src_stride_y + curr_channel * src_stride_z + batch_id * src_stride_w;
91 u0 =
VLOAD(
VEC_SIZE)(0, (__global DATA_TYPE *)(input_ptr + 0 * src_stride_y));
93 u1 =
VLOAD(
VEC_SIZE)(0, (__global DATA_TYPE *)(input_ptr + 1 * src_stride_y));
96 __global uchar *output_ptr = dst_ptr + dst_offset_first_element_in_bytes + x *
sizeof(DATA_TYPE) + y * dst_stride_y + z * dst_stride_z + batch_id * dst_stride_w;
98 (u0, 0, (__global DATA_TYPE *)(output_ptr + 0 * dst_stride_y));
100 (u1, 0, (__global DATA_TYPE *)(output_ptr + 1 * dst_stride_y));
103 #endif // defined(DATA_TYPE) && defined(VEC_SIZE) && defined(NUM_GROUPS) && defined(K) && defined(SRC_DIM_Z)
SimpleTensor< float > src
#define TENSOR4D_DECLARATION(name)
#define VEC_DATA_TYPE(type, size)