26 #ifdef ARM_COMPUTE_ENABLE_SME2
28 #include "../bfloat.hpp"
29 #include "../std_transforms_sme.hpp"
35 void sme2_interleaved_nomerge_bf16fp32_mopa_1VLx4VL(
const bfloat16 *
const A,
const bfloat16 *
const B,
float *
const C,
int ldc,
const int M,
const int N,
const int K,
const float *
const bias,
const Activation act,
bool accumulate,
float *
const accumulator_buffer);
37 class cls_sme2_interleaved_nomerge_bf16fp32_mopa_1VLx4VL
41 typedef float result_type;
43 typedef void (*kern_type)(
const bfloat16 *
const A,
const bfloat16 *
const B,
float *
const C,
int ldc,
const int M,
const int N,
const int K,
const float *
const bias,
const Activation act,
bool accumulate,
float *
const accumulator_buffer);
46 static unsigned int out_height()
48 return sme::get_vector_length<float>() * 1;
51 static unsigned int out_width()
53 return sme::get_vector_length<float>() * 4;
56 static constexpr
unsigned int k_unroll()
61 static constexpr
bool supports_accumulate()
66 static constexpr
bool supports_bias()
71 static constexpr
bool supports_activation()
76 static constexpr
bool is_sme()
82 kern_type kernel = sme2_interleaved_nomerge_bf16fp32_mopa_1VLx4VL;
84 StdTransformsSME<operand_type, result_type, 1, 4, 2> transforms = {};
86 cls_sme2_interleaved_nomerge_bf16fp32_mopa_1VLx4VL(
const CPUInfo *)
93 #endif // ARM_COMPUTE_ENABLE_SME2