Compute Library
 22.05
arm_gemm.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018-2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #pragma once
25 
26 #include <cstring>
27 #include <memory>
28 #include <vector>
29 
30 #include "arm_gemm_local.hpp"
31 #include "gemm_common.hpp"
32 
33 namespace arm_gemm
34 {
35 enum class GemmMethod
36 {
37  DEFAULT,
48 };
49 
51 {
53  std::string name = "";
54  bool is_default = false;
55  uint64_t cycle_estimate = 0;
56 
57  KernelDescription(GemmMethod m, std::string n, bool d = false, uint64_t c = 0)
58  : method(m), name(n), is_default(d), cycle_estimate(c)
59  {
60  }
61  KernelDescription() noexcept
62  {
63  }
64 };
65 
66 struct GemmConfig
67 {
69  std::string filter = "";
70  unsigned int inner_block_size = 0;
71  unsigned int outer_block_size = 0;
72 
74  : method(method)
75  {
76  }
78  {
79  }
80 };
81 
82 struct Activation
83 {
84  enum class Type
85  {
86  None,
87  ReLU,
88  BoundedReLU
89  };
90 
92  float param1;
93  float param2;
94 
95  Activation(Type type = Type::None, float p1 = 0.0f, float p2 = 0.0f)
96  : type(type), param1(p1), param2(p2)
97  {
98  }
99 };
100 
101 struct GemmArgs
102 {
103 public:
104  const CPUInfo *_ci;
105  unsigned int _Msize;
106  unsigned int _Nsize;
107  unsigned int _Ksize;
108  unsigned int _Ksections;
109  unsigned int _nbatches;
110  unsigned int _nmulti;
115  const GemmConfig *_cfg;
116 
117  GemmArgs(const CPUInfo *ci, unsigned int M, unsigned int N,
118  unsigned int K, unsigned int Ksections, unsigned int nbatches,
119  unsigned int nmulti, bool indirect_input, Activation act, const int maxthreads,
120  bool fast_mode = false, const GemmConfig *cfg = nullptr)
121  : _ci(ci), _Msize(M), _Nsize(N), _Ksize(K), _Ksections(Ksections), _nbatches(nbatches), _nmulti(nmulti), _indirect_input(indirect_input), _act(act), _maxthreads(maxthreads), _fast_mode(fast_mode),
122  _cfg(cfg)
123  {
124  }
125 };
126 
128 {
129 public:
130  const int32_t *bias = nullptr;
131  size_t bias_multi_stride = 0;
132  int32_t a_offset = 0;
133  int32_t b_offset = 0;
134  int32_t c_offset = 0;
135  bool per_channel_requant = false;
136  int32_t per_layer_left_shift = 0;
137  int32_t per_layer_right_shift = 0;
138  int32_t per_layer_mul = 0;
139  const int32_t *per_channel_left_shifts = nullptr;
140  const int32_t *per_channel_right_shifts = nullptr;
141  const int32_t *per_channel_muls = nullptr;
142  int32_t minval = 0;
143  int32_t maxval = 0;
144 
145  Requantize32() = default;
146 
147  // Constructor for per-tensor quantization
148  Requantize32(const int32_t *bias, size_t bias_multi_stride,
149  int32_t a_offset, int32_t b_offset, int32_t c_offset,
150  int32_t requant_shift, int32_t requant_mul, int32_t minv, int32_t maxv)
151  : bias(bias), bias_multi_stride(bias_multi_stride), a_offset(a_offset), b_offset(b_offset), c_offset(c_offset), per_channel_requant(false), per_layer_left_shift(std::max<int32_t>(requant_shift, 0)),
152  per_layer_right_shift(std::min<int32_t>(requant_shift, 0)), per_layer_mul(requant_mul), minval(minv), maxval(maxv)
153  {
154  }
155 
156  // Constructor for per-channel quantization
157  Requantize32(const int32_t *bias, size_t bias_multi_stride,
158  int32_t a_offset, int32_t b_offset, int32_t c_offset,
159  const int32_t *requant_left_shifts,
160  const int32_t *requant_right_shifts,
161  const int32_t *requant_muls,
162  int32_t minv, int32_t maxv)
163  : bias(bias), bias_multi_stride(bias_multi_stride), a_offset(a_offset), b_offset(b_offset), c_offset(c_offset), per_channel_requant(true), per_channel_left_shifts(requant_left_shifts),
164  per_channel_right_shifts(requant_right_shifts), per_channel_muls(requant_muls), minval(minv), maxval(maxv)
165  {
166  }
167 };
168 
169 struct Nothing
170 {
171 };
172 
173 template <typename Top, typename Tret>
174 using UniqueGemmCommon = std::unique_ptr<GemmCommon<Top, Tret>>;
175 
176 /* Low level API calls.
177  * These are implemented as 'GemmArgs' versions, or with the arguments explicitly listed. */
178 
179 /* get_gemm_method(): Given the templated types and provided parameters,
180  * which is the preferred method to implement this GEMM? */
181 template <typename Top, typename Tret, class OutputStage = Nothing>
182 KernelDescription get_gemm_method(const GemmArgs &args, const OutputStage & = {});
183 
184 template <typename Top, typename Tret, class OutputStage = Nothing>
185 UniqueGemmCommon<Top, Tret> gemm(const GemmArgs &args, const OutputStage & = {});
186 
187 template <typename Top, typename Tret, class OutputStage = Nothing>
188 std::vector<KernelDescription> get_compatible_kernels(const GemmArgs &args, const OutputStage & = {});
189 
190 template <typename Top, typename Tret, class OutputStage = Nothing>
191 bool has_opt_gemm(const GemmArgs &args, const OutputStage & = {});
192 
193 } // namespace arm_gemm
const CPUInfo * _ci
Definition: arm_gemm.hpp:104
GemmArgs(const CPUInfo *ci, unsigned int M, unsigned int N, unsigned int K, unsigned int Ksections, unsigned int nbatches, unsigned int nmulti, bool indirect_input, Activation act, const int maxthreads, bool fast_mode=false, const GemmConfig *cfg=nullptr)
Definition: arm_gemm.hpp:117
std::vector< KernelDescription > get_compatible_kernels(const GemmArgs &args, const OutputStage &os)
unsigned int _nmulti
Definition: arm_gemm.hpp:110
unsigned int _Nsize
Definition: arm_gemm.hpp:106
Activation _act
Definition: arm_gemm.hpp:112
unsigned int M
const GemmConfig * _cfg
Definition: arm_gemm.hpp:115
const CPUInfo & ci
GemmConfig(GemmMethod method)
Definition: arm_gemm.hpp:73
Activation(Type type=Type::None, float p1=0.0f, float p2=0.0f)
Definition: arm_gemm.hpp:95
unsigned int N
const char * name
Requantize32(const int32_t *bias, size_t bias_multi_stride, int32_t a_offset, int32_t b_offset, int32_t c_offset, int32_t requant_shift, int32_t requant_mul, int32_t minv, int32_t maxv)
Definition: arm_gemm.hpp:148
UniqueGemmCommon< Top, Tret > gemm(const GemmArgs &args, const OutputStage &os)
KernelDescription get_gemm_method(const GemmArgs &args, const OutputStage &={})
unsigned int _Msize
Definition: arm_gemm.hpp:105
std::unique_ptr< GemmCommon< Top, Tret > > UniqueGemmCommon
Definition: arm_gemm.hpp:174
bool has_opt_gemm(const GemmArgs &args, const OutputStage &os)
KernelDescription(GemmMethod m, std::string n, bool d=false, uint64_t c=0)
Definition: arm_gemm.hpp:57
const int32_t * requant_muls
unsigned int _Ksections
Definition: arm_gemm.hpp:108
unsigned int _Ksize
Definition: arm_gemm.hpp:107
unsigned int _nbatches
Definition: arm_gemm.hpp:109
Requantize32(const int32_t *bias, size_t bias_multi_stride, int32_t a_offset, int32_t b_offset, int32_t c_offset, const int32_t *requant_left_shifts, const int32_t *requant_right_shifts, const int32_t *requant_muls, int32_t minv, int32_t maxv)
Definition: arm_gemm.hpp:157
const int32_t * bias
unsigned int K