109 #
if defined(HAS_BIAS)
117 #define _IWEI_WIDTH WEI_WIDTH
118 #define _IWEI_HEIGHT WEI_HEIGHT
119 #define _ISRC_WIDTH SRC_WIDTH
120 #define _ISRC_HEIGHT SRC_HEIGHT
121 #define _ISRC_CHANNELS SRC_CHANNELS
122 #define _IDST_WIDTH DST_WIDTH
123 #define _IDST_HEIGHT DST_HEIGHT
124 #define _IDST_CHANNELS DST_CHANNELS
125 #define _IY_MULTIPLIER (_IWEI_WIDTH * _IWEI_HEIGHT)
128 #if defined(IS_QUANTIZED)
129 #define _IOUTPUT_TILE cq
130 #else // defined(IS_QUANTIZED)
131 #define _IOUTPUT_TILE c
132 #endif // defined(IS_QUANTIZED)
140 TILE(
int, 1, M0, xi);
141 TILE(
int, 1, M0, yi);
146 xi[0].s[i] = ((mout + i) %
_IDST_WIDTH) * STRIDE_X;
147 yi[0].s[i] = ((mout + i) /
_IDST_WIDTH) * STRIDE_Y;
148 xi[0].s[i] -= PAD_LEFT;
149 yi[0].s[i] -= PAD_TOP;
153 TILE(ACC_DATA_TYPE, M0, N0, c);
165 TILE(
int, 1, M0, my);
169 int x_s = xi[0].s[i] + xk;
170 int y_s = yi[0].s[i] + yk;
173 my[0].s[i] =
select(-1, my[0].s[i], x_s >= 0);
175 my[0].s[i] =
select(-1, my[0].s[i], y_s >= 0);
182 TILE(SRC_DATA_TYPE, M0, K0, a);
183 TILE(WEI_DATA_TYPE, N0, K0,
b);
203 T_MMUL(SRC_DATA_TYPE, WEI_DATA_TYPE, ACC_DATA_TYPE, M0, N0, K0, NT, T, a,
b, c);
211 #if defined(LEFTOVER_LOOP)
215 TILE(SRC_DATA_TYPE, M0, 1, a);
216 TILE(WEI_DATA_TYPE, N0, 1,
b);
237 T_MMUL(SRC_DATA_TYPE, WEI_DATA_TYPE, ACC_DATA_TYPE, M0, N0, 1, NT, T, a,
b, c);
243 #endif // defined(LEFTOVER_LOOP)
250 #if defined(HAS_BIAS)
251 TILE(BIA_DATA_TYPE, 1, N0, bias0);
253 T_LOAD(BIA_DATA_TYPE, 1, N0, BUFFER, bia, cout, 0, 1, 0, bias0);
260 #if defined(IS_QUANTIZED)
262 TILE(DST_DATA_TYPE, M0, N0, cq);
265 T_QUANTIZE8_ASYMMETRIC(ACC_DATA_TYPE, DST_DATA_TYPE, M0, N0, DST_OFFSET, DST_SHIFT, DST_MULTIPLIER, c, cq);
266 #endif // defined(IS_QUANTIZED)
271 TILE(uint, M0, 1, dst_indirect_y);
280 bool x_cond = PARTIAL_N0 != 0 && get_global_id(0) == 0;
284 T_STORE_INDIRECT_WIDTH_SELECT(DST_DATA_TYPE, M0, N0, PARTIAL_N0, DST_TENSOR_TYPE,
dst, cout, dst_stride_y, x_cond,
_IOUTPUT_TILE, dst_indirect_y);
290 #undef _ISRC_CHANNELS
293 #undef _IDST_CHANNELS
294 #undef _IY_MULTIPLIER