28 #include "../performance_parameters.hpp"
29 #include "../std_transforms_fixed.hpp"
34 void a64_gemm_u8_4x4(
const uint8_t *Apanel,
const uint8_t *Bpanel, uint32_t *Cpanel,
int ablocks,
int bblocks,
int K);
36 class cls_a64_gemm_u8_4x4 {
38 typedef uint8_t operand_type;
39 typedef uint32_t result_type;
41 typedef void (*kern_type)(
const uint8_t *,
const uint8_t *, uint32_t *, int, int, int);
44 static const int A_interleave = 4;
45 static const int A_block = 16;
46 static const bool A_transpose =
false;
49 static const int B_interleave = 4;
50 static const int B_block = 16;
51 static const bool B_transpose =
true;
54 static unsigned int out_width() {
58 static unsigned int out_height() {
62 static unsigned int k_unroll() {
67 StdTransformsFixed<operand_type, result_type, 4, 4, 16> transforms = {};
68 StdTransformsFixed<operand_type, result_type, 4, 4, 16, true> transforms_quantized = {};
71 static PerformanceParameters get_performance_parameters(
const CPUInfo *
ci) {
72 if (std::is_same<T, uint32_t>::value) {
76 return { 2.25, 2.92, 1.84 };
78 return { 2.64, 2.72, 2.64 };
80 return { 7.95, 3.76, 7.27 };
84 if (std::is_same<T, uint8_t>::value) {
88 return { 2.25, 2.18, 0.09 };
90 return { 2.64, 1.79, 0.10 };
92 return { 7.95, 4.09, 0.33 };
99 kern_type kernel = a64_gemm_u8_4x4;
101 cls_a64_gemm_u8_4x4(
const CPUInfo *) { }
106 #endif // __aarch64__