28 #include "../std_transforms_fixed.hpp"
29 #include "../performance_parameters.hpp"
34 void a64_gemm_s8_4x4(
const int8_t *,
const int8_t *, int32_t *,
int,
int,
int);
38 class cls_a64_gemm_s8_4x4 {
40 typedef int8_t operand_type;
41 typedef int32_t result_type;
43 typedef void (*kern_type)(
const int8_t *,
const int8_t *, int32_t *, int, int, int);
46 static unsigned int out_width() {
50 static unsigned int out_height() {
54 static unsigned int k_unroll() {
59 StdTransformsFixed<operand_type, result_type, 4, 4, 16> transforms = {};
60 StdTransformsFixed<operand_type, result_type, 4, 4, 16, true> transforms_quantized = {};
63 static PerformanceParameters get_performance_parameters(
const CPUInfo *
ci) {
64 if (std::is_same<T, int32_t>::value) {
68 return { 3.12, 2.93, 1.84 };
70 return { 3.32, 2.56, 2.63 };
72 return { 7.97, 3.72, 7.31 };
76 if (std::is_same<T, int8_t>::value) {
80 return { 3.12, 2.18, 0.09 };
82 return { 3.33, 2.89, 0.09 };
84 return { 7.97, 3.74, 0.34 };
89 kern_type kernel=a64_gemm_s8_4x4;
91 cls_a64_gemm_s8_4x4(
const CPUInfo *) { }