42 template<
typename TOperand,
typename TResult,
unsigned int height,
unsigned int w
idth_vectors,
unsigned int block=1,
unsigned int mmla=1,
bool integrate_sums=false>
46 template<
typename TIn>
47 void PrepareA(TOperand *out,
const TIn *in,
const int stride,
const int y0,
48 const int ymax,
const int k0,
const int kmax, int32_t row_sum_multiplier) {
49 Interleave<height, block, VLType::None>(out, in, stride, y0, ymax, k0, kmax, integrate_sums, row_sum_multiplier);
52 template<
typename TIn>
53 void PrepareA_indirect(TOperand *out,
const TIn *
const *
const *ptr,
size_t stringlen,
size_t rounded_stringlen,
const int y0,
54 const int ymax,
const int k0,
const int kmax, int32_t row_sum_multiplier) {
55 IndirectInterleave<height, block, VLType::None>(out, ptr, stringlen, rounded_stringlen, y0, ymax, k0, kmax, integrate_sums, row_sum_multiplier);
58 template<
typename TIn>
60 const int y0,
const int ymax,
const int k0,
const int kmax, int32_t row_sum_multiplier) {
61 ConvolutionInterleave<height, block, VLType::None>(out, ptr, stride, conv, rounded_stringlen, y0, ymax, k0, kmax, integrate_sums, row_sum_multiplier);
64 template<
typename TIn>
65 void PrepareB(TOperand *out,
const TIn *in,
const int stride,
const int x0,
66 const int xmax,
const int k0,
const int kmax) {
67 Transform<width_vectors, block, true, VLType::SVE>(out, in, stride, x0, xmax, k0, kmax);
70 template<
typename TOut>
71 void Merge(TOut *out,
const TResult *in,
int stride,
int y0,
int ymax,
int x0,
int xmax,
const TOut *bias,
const Activation act,
bool append) {
72 MergeResults<width_vectors / mmla, height,
true>(out, in, stride, y0, ymax, x0, xmax, bias, act, append);
void MergeResults(Tout *out, const Tin *in, int ldc, int y0, int ymax, int x0, int xmax, const Tout *bias, Activation act, bool append)