27 #include "../std_transforms_fixed.hpp"
28 #include "../bfloat.hpp"
29 #include "../performance_parameters.hpp"
32 const bfloat16 *, const bfloat16 *, \
33 float *, int, int, int
38 void a64_interleaved_bf16fp32_mmla_8x12( ARGLIST );
39 void a64_interleaved_bf16fp32_mmla_8x12_a510( ARGLIST );
41 class cls_a64_interleaved_bf16fp32_mmla_8x12
45 typedef float result_type;
47 typedef void (*kern_type)( ARGLIST );
50 static constexpr
unsigned int out_height()
55 static unsigned int out_width()
60 static constexpr
unsigned int k_unroll()
66 StdTransformsFixed<operand_type, result_type, 8, 12, 4> transforms = {};
67 StdTransformsFixed<operand_type, result_type, 8, 12, 4, true> transforms_quantized = {};
69 static inline PerformanceParameters get_performance_parameters(
const CPUInfo *
ci)
72 if (std::is_same<T, bfloat16>::value) {
75 return { 31.54, 4.30, 7.33 };
77 return { 59.94, 5.08, 9.83 };
79 return { 7.82, 4.05, 3.07 };
84 if (std::is_same<T, float>::value) {
87 return { 31.15, 2.51, 5.25 };
89 return { 41.44, 5.01, 5.64 };
91 return { 7.83, 2.53, 2.71 };
99 kern_type kernel=a64_interleaved_bf16fp32_mmla_8x12;
100 cls_a64_interleaved_bf16fp32_mmla_8x12(
const CPUInfo *
ci)
106 kernel=a64_interleaved_bf16fp32_mmla_8x12_a510;
115 #endif // __aarch64__