49 unsigned int m,
unsigned int n,
unsigned int k,
unsigned int b,
bool rhs_lock_padding,
const MatMulInfo &
info);
52 &ClMatMulNativeDefaultConfigValhall::configure_G710_f32,
53 &ClMatMulNativeDefaultConfigValhall::configure_G710_f16,
54 &ClMatMulNativeDefaultConfigValhall::configure_G710_u8);
57 &ClMatMulNativeDefaultConfigValhall::configure_G715_f32,
58 &ClMatMulNativeDefaultConfigValhall::configure_G715_f16,
59 &ClMatMulNativeDefaultConfigValhall::configure_G715_u8);
61 ConfigurationFunctionExecutorPtr func =
nullptr;
74 const bool adj_lhs =
info.adj_lhs();
75 const bool adj_rhs =
info.adj_rhs();
82 if (is_batched ==
true)
87 const unsigned int m = adj_lhs ? lhs_shape.
x() : lhs_shape.
y();
88 const unsigned int n = adj_rhs ? rhs_shape.
y() : rhs_shape.
x();
89 const unsigned int k = adj_lhs ? lhs_shape.
y() : lhs_shape.
x();
90 const unsigned int b = lhs_shape.
z();
97 unsigned int m,
unsigned int n,
unsigned int k,
unsigned int b,
bool rhs_lock_padding,
const MatMulInfo &
info)
100 return {
info.adj_lhs(),
info.adj_rhs(), 1, 4, 1,
false};
103 MatMulKernelInfo ClMatMulNativeDefaultConfigValhall::configure_G715_f16(
104 unsigned int m,
unsigned int n,
unsigned int k,
unsigned int b,
bool rhs_lock_padding,
const MatMulInfo &
info)
106 return configure_G715_f32(m, n, k,
b, rhs_lock_padding,
info);
109 MatMulKernelInfo ClMatMulNativeDefaultConfigValhall::configure_G715_u8(
110 unsigned int m,
unsigned int n,
unsigned int k,
unsigned int b,
bool rhs_lock_padding,
const MatMulInfo &
info)
113 return {
info.adj_lhs(),
info.adj_rhs(), 4, 16, 4,
false};
116 MatMulKernelInfo ClMatMulNativeDefaultConfigValhall::configure_G710_f32(
117 unsigned int m,
unsigned int n,
unsigned int k,
unsigned int b,
bool rhs_lock_padding,
const MatMulInfo &
info)
120 {3136, 64, 64, 36, 4, 4, 16, 1}, {4096, 48, 32, 36, 4, 4, 4, 1}, {688, 92, 68, 32, 2, 8, 4, 1},
121 {24, 464, 412, 24, 2, 8, 4, 1}, {112, 184, 144, 28, 4, 4, 16, 1}, {5776, 64, 32, 36, 2, 4, 16, 1},
122 {1568, 64, 40, 36, 2, 8, 8, 1}, {2920, 64, 64, 24, 4, 4, 16, 1}};
125 {3136, 64, 64, 36, 4, 4, 8, 0}, {4096, 48, 32, 36, 4, 4, 8, 0}, {688, 92, 68, 32, 5, 4, 4, 0},
126 {24, 464, 412, 24, 6, 2, 8, 0}, {112, 184, 144, 28, 6, 4, 4, 0}, {5776, 64, 32, 36, 5, 4, 4, 0},
127 {1568, 64, 40, 36, 4, 4, 8, 0}, {2920, 64, 64, 24, 4, 4, 8, 0}};
130 {3136, 64, 64, 36, 4, 4, 4, 1}, {4096, 48, 32, 36, 2, 2, 16, 1}, {688, 92, 68, 32, 4, 4, 4, 1},
131 {24, 464, 412, 24, 6, 2, 8, 1}, {112, 184, 144, 28, 4, 2, 16, 1}, {5776, 64, 32, 36, 4, 4, 4, 1},
132 {1568, 64, 40, 36, 4, 4, 8, 1}, {2920, 64, 64, 24, 4, 4, 4, 1}};
135 {3136, 64, 64, 36, 5, 4, 4, 0}, {4096, 48, 32, 36, 5, 4, 4, 0}, {688, 92, 68, 32, 5, 4, 4, 0},
136 {24, 464, 412, 24, 6, 2, 4, 0}, {112, 184, 144, 28, 5, 4, 4, 0}, {5776, 64, 32, 36, 5, 4, 4, 0},
137 {1568, 64, 40, 36, 5, 4, 4, 0}, {2920, 64, 64, 24, 6, 2, 4, 0}};
140 {3136, 64, 64, 36, 4, 4, 16, 1}, {4096, 48, 32, 36, 4, 4, 4, 1}, {688, 92, 68, 32, 2, 8, 4, 1},
141 {24, 464, 412, 24, 2, 8, 4, 1}, {112, 184, 144, 28, 4, 4, 16, 1}, {5776, 64, 32, 36, 2, 8, 8, 1},
142 {1568, 64, 40, 36, 4, 4, 8, 1}, {2920, 64, 64, 24, 4, 4, 16, 1}};
145 {3136, 64, 64, 36, 4, 4, 4, 0}, {4096, 48, 32, 36, 4, 4, 4, 0}, {688, 92, 68, 32, 4, 4, 4, 0},
146 {24, 464, 412, 24, 4, 4, 4, 0}, {112, 184, 144, 28, 4, 4, 4, 0}, {5776, 64, 32, 36, 4, 4, 8, 0},
147 {1568, 64, 40, 36, 4, 4, 4, 0}, {2920, 64, 64, 24, 4, 4, 4, 0}};
150 {3136, 64, 64, 36, 4, 4, 4, 1}, {4096, 48, 32, 36, 4, 4, 4, 1}, {688, 92, 68, 32, 4, 4, 4, 1},
151 {24, 464, 412, 24, 2, 2, 16, 1}, {112, 184, 144, 28, 4, 4, 4, 1}, {5776, 64, 32, 36, 4, 4, 4, 1},
152 {1568, 64, 40, 36, 4, 4, 4, 1}, {2920, 64, 64, 24, 4, 4, 4, 1}};
155 {3136, 64, 64, 36, 4, 4, 4, 0}, {4096, 48, 32, 36, 4, 4, 4, 0}, {688, 92, 68, 32, 4, 4, 4, 0},
156 {24, 464, 412, 24, 4, 2, 8, 0}, {112, 184, 144, 28, 4, 4, 4, 0}, {5776, 64, 32, 36, 4, 4, 4, 0},
157 {1568, 64, 40, 36, 4, 4, 4, 0}, {2920, 64, 64, 24, 4, 4, 4, 0}};
159 const bool adj_lhs =
info.adj_lhs();
160 const bool adj_rhs =
info.adj_rhs();
165 if ((adj_lhs ==
false) && (adj_rhs ==
false))
167 configs_best_to_use = &configs_mnkb_best_nt_nt;
168 configs_fallback_to_use = &configs_mnkb_fallback_nt_nt;
170 else if ((adj_lhs ==
false) && (adj_rhs ==
true))
172 configs_best_to_use = &configs_mnkb_best_nt_t;
173 configs_fallback_to_use = &configs_mnkb_fallback_nt_t;
175 else if ((adj_lhs ==
true) && (adj_rhs ==
false))
177 configs_best_to_use = &configs_mnkb_best_t_nt;
178 configs_fallback_to_use = &configs_mnkb_fallback_t_nt;
182 configs_best_to_use = &configs_mnkb_best_t_t;
183 configs_fallback_to_use = &configs_mnkb_fallback_t_t;
186 MatMulKernelInfo desc0 =
find_info(*configs_best_to_use, adj_lhs, adj_rhs, m, n, k,
b);
187 MatMulKernelInfo desc1 =
find_info(*configs_fallback_to_use, adj_lhs, adj_rhs, m, n, k,
b);
192 MatMulKernelInfo ClMatMulNativeDefaultConfigValhall::configure_G710_f16(
193 unsigned int m,
unsigned int n,
unsigned int k,
unsigned int b,
bool rhs_lock_padding,
const MatMulInfo &
info)
196 {3136, 64, 64, 36, 4, 4, 16, 1}, {4096, 48, 32, 36, 4, 4, 8, 1}, {688, 92, 68, 32, 4, 4, 16, 1},
197 {24, 464, 412, 24, 4, 4, 4, 1}, {112, 184, 144, 28, 4, 4, 16, 1}, {5776, 64, 32, 36, 4, 4, 8, 1},
198 {1568, 64, 40, 36, 4, 4, 8, 1}, {2920, 64, 64, 24, 4, 4, 16, 1}};
201 {3136, 64, 64, 36, 6, 4, 8, 0}, {4096, 48, 32, 36, 6, 4, 8, 0}, {688, 92, 68, 32, 6, 4, 8, 0},
202 {24, 464, 412, 24, 4, 4, 8, 0}, {112, 184, 144, 28, 6, 4, 8, 0}, {5776, 64, 32, 36, 6, 4, 8, 0},
203 {1568, 64, 40, 36, 6, 4, 8, 0}, {2920, 64, 64, 24, 6, 4, 8, 0}};
206 {3136, 64, 64, 36, 6, 4, 8, 1}, {4096, 48, 32, 36, 6, 4, 8, 1}, {688, 92, 68, 32, 4, 4, 4, 1},
207 {24, 464, 412, 24, 6, 2, 4, 1}, {112, 184, 144, 28, 4, 2, 16, 1}, {5776, 64, 32, 36, 6, 4, 8, 1},
208 {1568, 64, 40, 36, 6, 4, 8, 1}, {2920, 64, 64, 24, 6, 4, 8, 1}};
211 {3136, 64, 64, 36, 6, 2, 16, 0}, {4096, 48, 32, 36, 5, 4, 8, 0}, {688, 92, 68, 32, 6, 2, 16, 0},
212 {24, 464, 412, 24, 6, 2, 16, 0}, {112, 184, 144, 28, 6, 2, 16, 0}, {5776, 64, 32, 36, 5, 4, 8, 0},
213 {1568, 64, 40, 36, 5, 4, 8, 0}, {2920, 64, 64, 24, 6, 2, 16, 0}};
216 {3136, 64, 64, 36, 4, 4, 16, 1}, {4096, 48, 32, 36, 4, 4, 4, 1}, {688, 92, 68, 32, 4, 4, 4, 1},
217 {24, 464, 412, 24, 4, 4, 4, 1}, {112, 184, 144, 28, 4, 4, 4, 1}, {5776, 64, 32, 36, 4, 4, 4, 1},
218 {1568, 64, 40, 36, 4, 4, 4, 1}, {2920, 64, 64, 24, 4, 4, 4, 1}};
221 {3136, 64, 64, 36, 4, 4, 4, 0}, {4096, 48, 32, 36, 4, 4, 4, 0}, {688, 92, 68, 32, 4, 4, 4, 0},
222 {24, 464, 412, 24, 4, 4, 4, 0}, {112, 184, 144, 28, 4, 4, 4, 0}, {5776, 64, 32, 36, 4, 4, 4, 0},
223 {1568, 64, 40, 36, 4, 4, 4, 0}, {2920, 64, 64, 24, 4, 4, 4, 0}};
226 {3136, 64, 64, 36, 4, 4, 16, 1}, {4096, 48, 32, 36, 4, 4, 8, 1}, {688, 92, 68, 32, 4, 4, 4, 1},
227 {24, 464, 412, 24, 4, 2, 8, 1}, {112, 184, 144, 28, 4, 2, 16, 1}, {5776, 64, 32, 36, 4, 4, 16, 1},
228 {1568, 64, 40, 36, 4, 4, 8, 1}, {2920, 64, 64, 24, 4, 4, 16, 1}};
231 {3136, 64, 64, 36, 4, 4, 8, 0}, {4096, 48, 32, 36, 4, 4, 8, 0}, {688, 92, 68, 32, 4, 4, 8, 0},
232 {24, 464, 412, 24, 4, 4, 8, 0}, {112, 184, 144, 28, 4, 4, 8, 0}, {5776, 64, 32, 36, 4, 4, 8, 0},
233 {1568, 64, 40, 36, 4, 4, 8, 0}, {2920, 64, 64, 24, 4, 4, 8, 0}};
235 const bool adj_lhs =
info.adj_lhs();
236 const bool adj_rhs =
info.adj_rhs();
241 if ((adj_lhs ==
false) && (adj_rhs ==
false))
243 configs_best_to_use = &configs_mnkb_best_nt_nt;
244 configs_fallback_to_use = &configs_mnkb_fallback_nt_nt;
246 else if ((adj_lhs ==
false) && (adj_rhs ==
true))
248 configs_best_to_use = &configs_mnkb_best_nt_t;
249 configs_fallback_to_use = &configs_mnkb_fallback_nt_t;
251 else if ((adj_lhs ==
true) && (adj_rhs ==
false))
253 configs_best_to_use = &configs_mnkb_best_t_nt;
254 configs_fallback_to_use = &configs_mnkb_fallback_t_nt;
258 configs_best_to_use = &configs_mnkb_best_t_t;
259 configs_fallback_to_use = &configs_mnkb_fallback_t_t;
262 MatMulKernelInfo desc0 =
find_info(*configs_best_to_use, adj_lhs, adj_rhs, m, n, k,
b);
263 MatMulKernelInfo desc1 =
find_info(*configs_fallback_to_use, adj_lhs, adj_rhs, m, n, k,
b);
268 MatMulKernelInfo ClMatMulNativeDefaultConfigValhall::configure_G710_u8(
269 unsigned int m,
unsigned int n,
unsigned int k,
unsigned int b,
bool rhs_lock_padding,
const MatMulInfo &
info)
274 {3136, 64, 64, 36, 6, 4, 4, 0}, {4096, 48, 32, 36, 6, 4, 4, 0}, {688, 92, 68, 32, 2, 8, 4, 0},
275 {24, 464, 412, 24, 4, 4, 4, 0}, {112, 184, 144, 28, 6, 4, 4, 0}, {5776, 64, 32, 36, 6, 4, 4, 0},
276 {1568, 64, 40, 36, 6, 4, 4, 0}, {2920, 64, 64, 24, 5, 4, 4, 0}};
279 {3136, 64, 64, 36, 4, 4, 16, 0}, {4096, 48, 32, 36, 4, 4, 16, 0}, {688, 92, 68, 32, 4, 4, 16, 0},
280 {24, 464, 412, 24, 6, 2, 16, 0}, {112, 184, 144, 28, 4, 4, 16, 0}, {5776, 64, 32, 36, 4, 4, 16, 0},
281 {1568, 64, 40, 36, 6, 4, 4, 0}, {2920, 64, 64, 24, 4, 4, 16, 0}};
284 {3136, 64, 64, 36, 4, 4, 8, 0}, {4096, 48, 32, 36, 4, 4, 8, 0}, {688, 92, 68, 32, 4, 4, 4, 0},
285 {24, 464, 412, 24, 4, 4, 4, 0}, {112, 184, 144, 28, 4, 4, 8, 0}, {5776, 64, 32, 36, 4, 4, 8, 0},
286 {1568, 64, 40, 36, 4, 4, 8, 0}, {2920, 64, 64, 24, 4, 4, 8, 0}};
289 {3136, 64, 64, 36, 4, 2, 16, 0}, {4096, 48, 32, 36, 4, 4, 4, 0}, {688, 92, 68, 32, 4, 4, 8, 0},
290 {24, 464, 412, 24, 4, 2, 16, 0}, {112, 184, 144, 28, 4, 2, 16, 0}, {5776, 64, 32, 36, 4, 4, 4, 0},
291 {1568, 64, 40, 36, 4, 4, 8, 0}, {2920, 64, 64, 24, 4, 2, 16, 0}};
293 const bool adj_lhs =
info.adj_lhs();
294 const bool adj_rhs =
info.adj_rhs();
296 if ((adj_lhs ==
false) && (adj_rhs ==
false))
298 return find_info(configs_mnkb_best_nt_nt, adj_lhs, adj_rhs, m, n, k,
b);
300 else if ((adj_lhs ==
false) && (adj_rhs ==
true))
302 return find_info(configs_mnkb_best_nt_t, adj_lhs, adj_rhs, m, n, k,
b);
304 else if ((adj_lhs ==
true) && (adj_rhs ==
false))
306 return find_info(configs_mnkb_best_t_nt, adj_lhs, adj_rhs, m, n, k,
b);
310 return find_info(configs_mnkb_best_t_t, adj_lhs, adj_rhs, m, n, k,
b);