28 #include "../std_transforms_fixed_trB.hpp"
29 #include "../performance_parameters.hpp"
31 #include "../bfloat.hpp"
36 void a64_sgemm_asimd_8x12(
const float *,
const float *,
float *,
int,
int,
int);
37 void a64_sgemm_asimd_8x12_a53(
const float *,
const float *,
float *,
int,
int,
int);
38 void a64_sgemm_asimd_8x12_a55(
const float *,
const float *,
float *,
int,
int,
int);
39 void a64_sgemm_asimd_8x12_a55r1(
const float *,
const float *,
float *,
int,
int,
int);
40 void a64_sgemm_asimd_8x12_x1(
const float *,
const float *,
float *,
int,
int,
int);
50 class cls_a64_sgemm_8x12 {
52 typedef float operand_type;
53 typedef float result_type;
55 typedef void (*kern_type)(
const float *,
const float *,
float *, int, int, int);
58 static unsigned int out_width() {
62 static unsigned int out_height() {
66 static unsigned int k_unroll() {
71 StdTransformsFixedTRB<operand_type, result_type, 8, 12> transforms = {};
74 static PerformanceParameters get_performance_parameters(
const CPUInfo *
ci) {
75 if (std::is_same<T, float>::value) {
78 return { 3.954, 1.252, 1.141 };
81 return { 2.777, 0.987, 0.898 };
84 return { 2.885, 1.429, 1.163 };
87 return { 14.95, 9.95, 5.28 };
90 return { 7.2307, 3.876, 2.932 };
94 if (std::is_same<T, bfloat16>::value) {
97 return { 4.98, 2.27, 3.05 };
100 return { 7.99, 5.06, 7.32 };
105 kern_type kernel=a64_sgemm_asimd_8x12;
111 kernel = a64_sgemm_asimd_8x12_a53;
114 case CPUModel::A55r0:
115 kernel = a64_sgemm_asimd_8x12_a55;
118 case CPUModel::A55r1:
119 kernel = a64_sgemm_asimd_8x12_a55r1;
123 kernel = a64_sgemm_asimd_8x12_x1;
135 #endif // __aarch64__