110 #define _IWEI_WIDTH WEI_WIDTH
111 #define _IWEI_HEIGHT WEI_HEIGHT
112 #define _IWEI_DEPTH WEI_DEPTH
113 #define _ISRC_WIDTH SRC_WIDTH
114 #define _ISRC_HEIGHT SRC_HEIGHT
115 #define _ISRC_DEPTH SRC_DEPTH
116 #define _ISRC_CHANNELS SRC_CHANNELS
117 #define _IDST_WIDTH DST_WIDTH
118 #define _IDST_HEIGHT DST_HEIGHT
119 #define _IDST_DEPTH DST_DEPTH
120 #define _IDST_CHANNELS DST_CHANNELS
121 #define _IY_MULTIPLIER (_IWEI_WIDTH * _IWEI_HEIGHT * _IWEI_DEPTH)
124 #if defined(IS_QUANTIZED)
125 #define _IOUTPUT_TILE cq
126 #else // defined(IS_QUANTIZED)
127 #define _IOUTPUT_TILE c
128 #endif // defined(IS_QUANTIZED)
134 TILE(
int, M0, 1, xi);
135 TILE(
int, M0, 1, yi);
136 TILE(
int, M0, 1, zi);
147 zi[i].v -= PAD_FRONT;
151 TILE(ACC_DATA_TYPE, M0, N0, c);
155 c[i].v = (ACC_DATA_TYPE)0;
168 TILE(DATA_TYPE, M0, K0, a);
169 TILE(DATA_TYPE, N0, K0,
b);
177 T_LOAD_NDHWC_INDIRECT(DATA_TYPE, M0, K0, BUFFER,
src, bout, zk, yk, xk, ck,
_ISRC_WIDTH,
_ISRC_HEIGHT,
_ISRC_DEPTH, src_stride_y, xi, yi, zi, a);
187 b[i].s[j] = *(__global DATA_TYPE *)(wei_ptr + wei_offset_first_element_in_bytes + (cout + i) *
sizeof(DATA_TYPE) + j * wei_stride_y + b_offs * wei_stride_y);
193 T_MMUL(DATA_TYPE, DATA_TYPE, ACC_DATA_TYPE, M0, N0, K0, NT, T, a,
b, c);
202 #if((_ISRC_CHANNELS % K0) != 0)
206 TILE(DATA_TYPE, M0, 1, a);
207 TILE(DATA_TYPE, N0, 1,
b);
215 T_LOAD_NDHWC_INDIRECT(DATA_TYPE, M0, 1, BUFFER,
src, bout, zk, yk, xk, ck,
_ISRC_WIDTH,
_ISRC_HEIGHT,
_ISRC_DEPTH, src_stride_y, xi, yi, zi, a);
223 b[i].v = *(__global DATA_TYPE *)(wei_ptr + wei_offset_first_element_in_bytes + (cout + i) *
sizeof(DATA_TYPE) + b_offs * wei_stride_y);
228 T_MMUL(DATA_TYPE, DATA_TYPE, ACC_DATA_TYPE, M0, N0, 1, NT, T, a,
b, c);
236 #endif // ((_ISRC_CHANNELS % K0) != 0)
243 #if defined(HAS_BIAS)
244 TILE(BIA_DATA_TYPE, 1, N0, bias0);
248 bias0[0].v =
VLOAD(N0)(0, (__global BIA_DATA_TYPE *)(bia_ptr + bia_offset_first_element_in_bytes + cout *
sizeof(BIA_DATA_TYPE)));
253 (bias0[0].v, 0, (__global BIA_DATA_TYPE *)(bia_ptr + bia_offset_first_element_in_bytes + cout *
sizeof(BIA_DATA_TYPE)));
261 TILE(uint, M0, 1, dst_indirect_y);
270 #
if defined(IS_QUANTIZED)
271 TILE(DATA_TYPE, M0, N0, cq);
275 #endif // defined(IS_QUANTIZED)
277 bool x_cond = PARTIAL_N0 != 0 && get_global_id(0) == 0;
280 T_STORE_INDIRECT_WIDTH_SELECT(DATA_TYPE, M0, N0, PARTIAL_N0, BUFFER,
dst, cout, dst_stride_y, x_cond,
_IOUTPUT_TILE, dst_indirect_y);