Compute Library
 21.11
utils.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
25 #pragma once
26 
27 #include "arm_gemm.hpp"
28 
29 #include <cstddef>
30 #include <limits>
31 #include <tuple>
32 
33 // Macro for unreachable code (e.g. impossible default cases on switch)
34 #define UNREACHABLE(why) __builtin_unreachable()
35 
36 // Paranoid option for the above with assert
37 // #define UNREACHABLE(why) assert(0 && why)
38 
39 namespace arm_gemm {
40 
41 template<typename T>
42 std::string get_type_name() {
43 #ifdef __GNUC__
44  std::string s = __PRETTY_FUNCTION__;
45 
46  auto start = s.find("cls_");
47 
48  if (start==std::string::npos) {
49  return "(unknown)";
50  }
51 
52  for(size_t x = start+4; x<s.size(); x++) {
53  if (s[x] == ';' || s[x] == ']') {
54  return s.substr(start+4, x-(start+4));
55  }
56  }
57 
58  return "(unknown)";
59 #else
60  return "(unsupported)";
61 #endif
62 }
63 
64 template<typename T>
65 inline T iceildiv(const T a, const T b) {
66  return (a + b - 1) / b;
67 }
68 
69 template <typename T>
70 inline T roundup(const T a, const T b) {
71  T rem = a % b;
72 
73  if (rem) {
74  return a + b - rem;
75  } else {
76  return a;
77  }
78 }
79 
80 enum class VLType {
81  None,
82  SVE,
83 };
84 
85 template<typename T>
87  struct {
88  T *base;
89  size_t stride;
90  } direct = {};
91  struct {
92  T * const *ptr;
93  size_t offset;
94  } indirect = {};
96 
97  // Direct
98  IndirectOutputArg(T *base, size_t stride) : is_indirect(false) {
99  direct.base = base;
100  direct.stride = stride;
101  }
102 
103  // Indirect
104  IndirectOutputArg(T * const * ptr, size_t offset) : is_indirect(true) {
105  indirect.ptr = ptr;
106  indirect.offset = offset;
107  }
108 
109  IndirectOutputArg() : is_indirect(false) {
110  direct.base = nullptr;
111  direct.stride = 0;
112  }
113 };
114 
115 // Check that the provided Requantize32 doesn't have a left shift.
116 inline bool quant_no_left_shift(const Requantize32 &qp) {
117  if (qp.per_channel_requant) {
118  return (qp.per_channel_left_shifts == nullptr);
119  } else {
120  return (qp.per_layer_left_shift == 0);
121  }
122 }
123 
124 // Check that the provided Requantize32 is compatible with the "symmetric" hybrid kernels. These don't include row
125 // sums, so the 'b_offset' has to be zero.
126 inline bool quant_hybrid_symmetric(const Requantize32 &qp) {
127  return quant_no_left_shift(qp) && qp.b_offset == 0;
128 }
129 
130 // Check that the provided Requantize32 is compatible with the "asymmetric" hybrid kernels. These don't support per
131 // channel quantization. Technically b_offset==0 cases would work, but it is a waste to sum and then multiply by 0...
132 inline bool quant_hybrid_asymmetric(const Requantize32 &qp) {
133  return quant_no_left_shift(qp) /* && qp.b_offset != 0 */ && qp.per_channel_requant==false;
134 }
135 
136 template<typename T>
138  struct {
139  const T *base;
140  size_t stride;
141  } direct = {};
142  struct {
143  const T * const * const * ptr;
144  unsigned int start_row;
145  unsigned int start_col;
146  } indirect = {};
148 
149  // Direct
150  IndirectInputArg(const T *base, size_t stride) : is_indirect(false) {
151  direct.base = base;
152  direct.stride = stride;
153  }
154 
155  // Indirect
156  IndirectInputArg(const T * const * const *ptr, unsigned int start_row, unsigned int start_col) : is_indirect(true) {
157  indirect.ptr = ptr;
158  indirect.start_row = start_row;
159  indirect.start_col = start_col;
160  }
161 
162  IndirectInputArg() : is_indirect(false) {
163  direct.base = nullptr;
164  direct.stride = 0;
165  }
166 };
167 
168 namespace utils {
169 
170 // get_vector_length(): Returns SVE vector length for type "T".
171 //
172 // It is required that this can be compiled by a compiler in non-SVE mode, but it must be prevented from running (at
173 // runtime) if SVE is not enabled. Typically this is used by switchyard/driver code which is built in normal mode
174 // which then calls SVE kernels (compiled accordingly) iff SVE is detected at runtime.
175 template <typename T>
176 inline unsigned long get_vector_length() {
177 #if defined(__aarch64__)
178  uint64_t vl;
179 
180  __asm __volatile (
181  ".inst 0x0420e3e0\n" // CNTB X0, ALL, MUL #1
182  "mov %0, X0\n"
183  : "=r" (vl)
184  :
185  : "x0"
186  );
187 
188  return vl / sizeof(T);
189 #else // !defined(__aarch64__)
190  return 16 / sizeof(T);
191 #endif // defined(__aarch64__)
192 }
193 
194 // get_vector_length(VLType): Returns vector length for type "T".
195 //
196 // This has the same requirements and constraints as the SVE-only form above, so we call into that code for SVE.
197 
198 template <typename T>
199 inline unsigned long get_vector_length(VLType vl_type) {
200  switch (vl_type) {
201  case VLType::SVE:
202  return get_vector_length<T>();
203  default:
204  return 16 / sizeof(T);
205  }
206 }
207 
208 // get_default_activation_values(): Returns the default values for activation min and max for integer activation.
209 template <typename T>
210 inline std::tuple<T, T> get_default_activation_values()
211 {
212  const T min = static_cast<T>(std::numeric_limits<T>::min());
213  const T max = static_cast<T>(std::numeric_limits<T>::max());
214 
215  return std::make_tuple(min, max);
216 }
217 
218 // get_default_activation_values(): Returns the default values for activation min and max for float activation.
219 template <>
220 inline std::tuple<float, float> get_default_activation_values()
221 {
222  const float min = static_cast<float>(-std::numeric_limits<float>::infinity());
223  const float max = static_cast<float>(std::numeric_limits<float>::infinity());
224 
225  return std::make_tuple(min, max);
226 }
227 
228 #if defined(__ARM_FP16_ARGS)
229 // get_default_activation_values(): Returns the default values for activation min and max for __fp16 activation.
230 template <>
231 inline std::tuple<__fp16, __fp16> get_default_activation_values()
232 {
233  const __fp16 min = static_cast<__fp16>(-std::numeric_limits<float>::infinity());
234  const __fp16 max = static_cast<__fp16>(std::numeric_limits<float>::infinity());
235 
236  return std::make_tuple(min, max);
237 }
238 #endif // defined(__ARM_FP16_ARGS)
239 } // utils namespace
240 } // arm_gemm namespace
241 
242 using namespace arm_gemm::utils;
T roundup(const T a, const T b)
Definition: utils.hpp:70
IndirectOutputArg(T *const *ptr, size_t offset)
Definition: utils.hpp:104
const T *const *const * ptr
Definition: utils.hpp:143
__global uchar * offset(const Image *img, int x, int y)
Get the pointer position of a Image.
Definition: helpers.h:1069
const int32_t * per_channel_left_shifts
Definition: arm_gemm.hpp:139
SimpleTensor< float > b
Definition: DFT.cpp:157
bool quant_hybrid_symmetric(const Requantize32 &qp)
Definition: utils.hpp:126
T iceildiv(const T a, const T b)
Definition: utils.hpp:65
int32_t per_layer_left_shift
Definition: arm_gemm.hpp:136
unsigned int start_col
Definition: utils.hpp:145
std::tuple< float, float > get_default_activation_values()
Definition: utils.hpp:220
bool quant_no_left_shift(const Requantize32 &qp)
Definition: utils.hpp:116
std::string get_type_name()
Definition: utils.hpp:42
IndirectInputArg(const T *base, size_t stride)
Definition: utils.hpp:150
IndirectInputArg(const T *const *const *ptr, unsigned int start_row, unsigned int start_col)
Definition: utils.hpp:156
unsigned long get_vector_length(VLType vl_type)
Definition: utils.hpp:199
unsigned int start_row
Definition: utils.hpp:144
IndirectOutputArg(T *base, size_t stride)
Definition: utils.hpp:98
bool quant_hybrid_asymmetric(const Requantize32 &qp)
Definition: utils.hpp:132