26 #if defined(DATA_TYPE) && defined(WIDTH_OUTPUT) && defined(ELEMENT_SIZE) && defined(WIDTH_INPUT) && defined(NUM_GROUPS) 29 #define COND_DATA_TYPE char 30 #elif ELEMENT_SIZE == 2 31 #define COND_DATA_TYPE short 32 #elif ELEMENT_SIZE == 4 33 #define COND_DATA_TYPE int 35 #error "Element size not support" 36 #endif // ELEMENT_SIZE 72 const uint xd = get_global_id(1) % WIDTH_OUTPUT;
73 const uint yd = get_global_id(1) / WIDTH_OUTPUT;
76 data = vload8(0, (__global
DATA_TYPE *)src.ptr);
78 uint x = get_global_id(0) * 8;
79 uint8 x_clamped = x + (uint8)(0, 1, 2, 3, 4, 5, 6, 7);
85 x_clamped =
select((uint8)x, x_clamped, convert_int8(cond0));
92 int idx = yd * dst_stride_y + xd * dst_stride_x + (get_global_id(2) / NUM_GROUPS) * dst.
stride_w;
94 const uint group = get_global_id(2) % NUM_GROUPS;
95 x_clamped += group * WIDTH_INPUT;
102 *((__global DATA_TYPE *)(dst.
ptr + idx + x_clamped.s0 * dst.
stride_z)) = data.s0;
103 *((__global DATA_TYPE *)(dst.
ptr + idx + x_clamped.s1 * dst.
stride_z)) = data.s1;
104 *((__global DATA_TYPE *)(dst.
ptr + idx + x_clamped.s2 * dst.
stride_z)) = data.s2;
105 *((__global DATA_TYPE *)(dst.
ptr + idx + x_clamped.s3 * dst.
stride_z)) = data.s3;
106 *((__global DATA_TYPE *)(dst.
ptr + idx + x_clamped.s4 * dst.
stride_z)) = data.s4;
107 *((__global DATA_TYPE *)(dst.
ptr + idx + x_clamped.s5 * dst.
stride_z)) = data.s5;
108 *((__global DATA_TYPE *)(dst.
ptr + idx + x_clamped.s6 * dst.
stride_z)) = data.s6;
109 *((__global DATA_TYPE *)(dst.
ptr + idx + x_clamped.s7 * dst.
stride_z)) = data.s7;
111 #endif // defined(DATA_TYPE) && defined(WIDTH_OUTPUT) && defined(ELEMENT_SIZE) && defined(WIDTH_INPUT) && defined(NUM_GROUPS)
__global uchar * ptr
Pointer to the starting postion of the buffer.
Structure to hold 3D tensor information.
SimpleTensor< float > src
Structure to hold 4D tensor information.
#define CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(name, mod_size)
int stride_w
Stride of the image in W dimension (in bytes)
int stride_y
Stride of the image in Y dimension (in bytes)
#define CONVERT_TO_TENSOR3D_STRUCT(name)
SimpleTensor< T > select(const SimpleTensor< uint8_t > &c, const SimpleTensor< T > &x, const SimpleTensor< T > &y)
#define TENSOR4D_DECLARATION(name)
int stride_x
Stride of the image in X dimension (in bytes)
SimpleTensor< T > col2im(const SimpleTensor< T > &src, const TensorShape &dst_shape, unsigned int num_groups)
int stride_z
Stride of the image in Z dimension (in bytes)
#define TENSOR3D_DECLARATION(name)
#define VEC_DATA_TYPE(type, size)