26 #if defined(DATA_TYPE) && defined(VEC_SIZE) && defined(VEC_SIZE_LEFTOVER)
66 __kernel
void select_same_rank(
74 __global uchar *c_addr = c_ptr + c_offset_first_element_in_bytes +
offset + get_global_id(1) * c_step_y + get_global_id(2) * c_step_z;
75 __global uchar *x_addr = x_ptr + x_offset_first_element_in_bytes +
offset *
sizeof(DATA_TYPE) + get_global_id(1) * x_step_y + get_global_id(2) * x_step_z;
76 __global uchar *y_addr = y_ptr + y_offset_first_element_in_bytes +
offset *
sizeof(DATA_TYPE) + get_global_id(1) * y_step_y + get_global_id(2) * y_step_z;
77 __global uchar *out_addr = out_ptr + out_offset_first_element_in_bytes +
offset *
sizeof(DATA_TYPE) + get_global_id(1) * out_step_y + get_global_id(2) * out_step_z;
92 STORE_VECTOR_SELECT(res, DATA_TYPE, (__global DATA_TYPE *)out_addr,
VEC_SIZE, VEC_SIZE_LEFTOVER, VEC_SIZE_LEFTOVER != 0 && get_global_id(0) == 0);
130 __kernel
void select_different_rank_2(
136 const int c_idx = get_global_id(1);
140 __global uchar *c_addr = c_ptr + c_offset_first_element_in_bytes;
141 __global uchar *x_addr = x_ptr + x_offset_first_element_in_bytes +
offset *
sizeof(DATA_TYPE) + get_global_id(1) * x_step_y + get_global_id(2) * x_step_z;
142 __global uchar *y_addr = y_ptr + y_offset_first_element_in_bytes +
offset *
sizeof(DATA_TYPE) + get_global_id(1) * y_step_y + get_global_id(2) * y_step_z;
143 __global uchar *out_addr = out_ptr + out_offset_first_element_in_bytes +
offset *
sizeof(DATA_TYPE) + get_global_id(1) * out_step_y + get_global_id(2) * out_step_z;
147 in_c = *((__global uchar *)(c_addr + c_idx * c_stride_x));
158 STORE_VECTOR_SELECT(res, DATA_TYPE, (__global DATA_TYPE *)out_addr,
VEC_SIZE, VEC_SIZE_LEFTOVER, VEC_SIZE_LEFTOVER != 0 && get_global_id(0) == 0);
162 #if defined(DATA_TYPE) && defined(VEC_SIZE) && defined(DEPTH_SIZE) && defined(VEC_SIZE_LEFTOVER)
198 __kernel
void select_different_rank_n(
204 const int c_idx = get_global_id(2) / DEPTH_SIZE;
208 __global uchar *c_addr = c_ptr + c_offset_first_element_in_bytes;
209 __global uchar *x_addr = x_ptr + x_offset_first_element_in_bytes +
offset *
sizeof(DATA_TYPE) + get_global_id(1) * x_step_y + get_global_id(2) * x_step_z;
210 __global uchar *y_addr = y_ptr + y_offset_first_element_in_bytes +
offset *
sizeof(DATA_TYPE) + get_global_id(1) * y_step_y + get_global_id(2) * y_step_z;
211 __global uchar *out_addr = out_ptr + out_offset_first_element_in_bytes +
offset *
sizeof(DATA_TYPE) + get_global_id(1) * out_step_y + get_global_id(2) * out_step_z;
215 in_c = *((__global uchar *)(c_addr + c_idx * c_stride_x));
226 STORE_VECTOR_SELECT(res, DATA_TYPE, (__global DATA_TYPE *)out_addr,
VEC_SIZE, VEC_SIZE_LEFTOVER, VEC_SIZE_LEFTOVER != 0 && get_global_id(0) == 0);