26 #if defined(DATA_TYPE) && defined(NUM_GROUPS) 56 __kernel
void reshape_to_columns(
62 uint width, uint height, uint depth, uint total_filters, uint dst_stride_z)
65 bool is_last_thread = (get_global_id(0) == (get_global_size(0) - 1) && get_global_id(1) == (get_global_size(1) - 1) && get_global_id(2) == (get_global_size(2) - 1));
67 __global uchar *tmp_src_ptr = src.
ptr;
68 __global uchar *tmp_dst_ptr = dst_ptr + dst_offset_first_element_in_bytes + get_global_id(0) * dst_stride_y + get_global_id(1) * width * dst_stride_y + get_global_id(
69 2) * width * height * dst_stride_y;
71 __global uchar *tmp_bias_ptr = bias_ptr + bias_offset_first_element_in_bytes;
76 for(uint g = 0; g < NUM_GROUPS; ++g)
78 __global uchar *curr_group_dst = tmp_dst_ptr;
80 for(uint i = 0; i < total_filters / NUM_GROUPS; ++i)
85 *((__global
DATA_TYPE *)(curr_group_dst + dst_stride_y)) = *((__global
DATA_TYPE *)(tmp_bias_ptr));
86 tmp_bias_ptr += bias_stride_x;
88 tmp_src_ptr += depth * src_stride_z;
89 curr_group_dst += dst_stride_x;
92 tmp_dst_ptr += dst_stride_z;
97 for(uint g = 0; g < NUM_GROUPS; ++g)
99 __global uchar *curr_group_dst = tmp_dst_ptr;
101 for(uint i = 0; i < total_filters / NUM_GROUPS; ++i)
104 tmp_src_ptr += depth * src_stride_z;
105 curr_group_dst += dst_stride_x;
108 tmp_dst_ptr += dst_stride_z;
112 #endif // defined(DATA_TYPE)
#define IMAGE_DECLARATION(name)
Structure to hold 3D tensor information.
SimpleTensor< float > src
#define VECTOR_DECLARATION(name)
#define CONVERT_TO_TENSOR3D_STRUCT(name)
__global uchar * ptr
Pointer to the starting postion of the buffer.
#define TENSOR3D_DECLARATION(name)