Compute Library
 22.08
std_transforms_sve.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2018 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #pragma once
25 
26 #include "convolver.hpp"
27 #include "mergeresults.hpp"
28 #include "transform.hpp"
29 
30 namespace arm_gemm {
31 
32 /*
33  * Define "standard" transforms for the blocked GEMMs for SVE.
34  *
35  * This assumes that A is interleaved 'height' ways, B is interleaved
36  * 'width'xVL ways and transposed, and that the merge needs to work in
37  * 'height' x 'width'xVL blocks.
38  *
39  * The optional 'block' parameter is for kernels using dot-product type
40  * instructions like UDOT and SDOT.
41  */
42 template<typename TOperand, typename TResult, unsigned int height, unsigned int width_vectors, unsigned int block=1, unsigned int mmla=1, bool integrate_sums=false>
44 {
45 public:
46  template<typename TIn>
47  void PrepareA(TOperand *out, const TIn *in, const int stride, const int y0,
48  const int ymax, const int k0, const int kmax, int32_t row_sum_multiplier) {
49  Interleave<height, block, VLType::None>(out, in, stride, y0, ymax, k0, kmax, integrate_sums, row_sum_multiplier);
50  }
51 
52  template<typename TIn>
53  void PrepareA_indirect(TOperand *out, const TIn * const * const *ptr, size_t stringlen, size_t rounded_stringlen, const int y0,
54  const int ymax, const int k0, const int kmax, int32_t row_sum_multiplier) {
55  IndirectInterleave<height, block, VLType::None>(out, ptr, stringlen, rounded_stringlen, y0, ymax, k0, kmax, integrate_sums, row_sum_multiplier);
56  }
57 
58  template<typename TIn>
59  void PrepareA_convolution(TOperand *out, const TIn *ptr, size_t stride, const convolver<TIn> &conv, size_t rounded_stringlen,
60  const int y0, const int ymax, const int k0, const int kmax, int32_t row_sum_multiplier) {
61  ConvolutionInterleave<height, block, VLType::None>(out, ptr, stride, conv, rounded_stringlen, y0, ymax, k0, kmax, integrate_sums, row_sum_multiplier);
62  }
63 
64  template<typename TIn>
65  void PrepareB(TOperand *out, const TIn *in, const int stride, const int x0,
66  const int xmax, const int k0, const int kmax) {
67  Transform<width_vectors, block, true, VLType::SVE>(out, in, stride, x0, xmax, k0, kmax);
68  }
69 
70  template<typename TOut>
71  void Merge(TOut *out, const TResult *in, int stride, int y0, int ymax, int x0, int xmax, const TOut *bias, const Activation act, bool append) {
72  MergeResults<width_vectors / mmla, height, true>(out, in, stride, y0, ymax, x0, xmax, bias, act, append);
73  }
74 };
75 
76 } // namespace arm_gemm
void PrepareA_indirect(TOperand *out, const TIn *const *const *ptr, size_t stringlen, size_t rounded_stringlen, const int y0, const int ymax, const int k0, const int kmax, int32_t row_sum_multiplier)
void Merge(TOut *out, const TResult *in, int stride, int y0, int ymax, int x0, int xmax, const TOut *bias, const Activation act, bool append)
void MergeResults(Tout *out, const Tin *in, int ldc, int y0, int ymax, int x0, int xmax, const Tout *bias, Activation act, bool append)
void PrepareA_convolution(TOperand *out, const TIn *ptr, size_t stride, const convolver< TIn > &conv, size_t rounded_stringlen, const int y0, const int ymax, const int k0, const int kmax, int32_t row_sum_multiplier)
void PrepareB(TOperand *out, const TIn *in, const int stride, const int x0, const int xmax, const int k0, const int kmax)
void PrepareA(TOperand *out, const TIn *in, const int stride, const int y0, const int ymax, const int k0, const int kmax, int32_t row_sum_multiplier)
const int32_t * bias