28 #include "../performance_parameters.hpp"
29 #include "../std_transforms_fixed.hpp"
34 void a64_gemm_u8_8x12(
const uint8_t *,
const uint8_t *, uint32_t *,
int,
int,
int);
35 void a64_gemm_u8_8x12_a55r1(
const uint8_t *,
const uint8_t *, uint32_t *,
int,
int,
int);
36 void a64_gemm_u8_8x12_x1(
const uint8_t *,
const uint8_t *, uint32_t *,
int,
int,
int);
38 class cls_a64_gemm_u8_8x12 {
40 typedef uint8_t operand_type;
41 typedef uint32_t result_type;
43 typedef void (*kern_type)(
const uint8_t *,
const uint8_t *, uint32_t *, int, int, int);
46 static const int A_interleave = 8;
47 static const int A_block = 4;
48 static const bool A_transpose =
false;
51 static const int B_interleave = 12;
52 static const int B_block = 4;
53 static const bool B_transpose =
true;
56 static unsigned int out_width() {
60 static unsigned int out_height() {
64 static unsigned int k_unroll() {
69 StdTransformsFixed<operand_type, result_type, 8, 12, 4> transforms = {};
70 StdTransformsFixed<operand_type, result_type, 8, 12, 4, true> transforms_quantized = {};
73 static PerformanceParameters get_performance_parameters(
const CPUInfo *
ci) {
74 if (std::is_same<T, uint8_t>::value) {
77 return { 19.73, 3.38, 0.27 };
80 return { 15.361, 0.9341, 0.1636 };
83 return { 51.14, 7.38, 0.65 };
86 return { 29.0698, 3.9793, 0.4003 };
90 if (std::is_same<T, uint32_t>::value) {
93 return { 19.73, 3.38, 3.70 };
96 return { 14.286, 1.171, 1.209 };
99 return { 61.58, 4.78, 10.83 };
102 return { 31.82, 3.51, 8.03 };
109 kern_type kernel = a64_gemm_u8_8x12;
111 cls_a64_gemm_u8_8x12(
const CPUInfo *
ci) {
114 if (mod == CPUModel::A55r1) {
115 kernel = a64_gemm_u8_8x12_a55r1;
116 }
else if (mod == CPUModel::X1) {
117 kernel = a64_gemm_u8_8x12_x1;
124 #endif // __aarch64__