Compute Library
 22.02
AsymmHelpers.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
27 
28 #include <cmath>
29 #include <limits>
30 #include <numeric>
31 
32 namespace arm_compute
33 {
34 namespace quantization
35 {
36 constexpr int64_t fixed_point_one_Q0 = (1LL << 31);
37 constexpr float epsilon = 0.00001f;
38 
39 Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon)
40 {
41  if(multiplier >= 1.f)
42  {
43  Status status = calculate_quantized_multiplier_greater_than_one(multiplier, quant_multiplier, shift);
44  *shift *= -1;
45  return status;
46  }
47  else
48  {
49  return calculate_quantized_multiplier_less_than_one(multiplier, quant_multiplier, shift, ignore_epsilon);
50  }
51 }
52 
54  int32_t *quant_multiplier,
55  int32_t *right_shift,
56  bool ignore_epsilon)
57 {
58  const float internal_epsilon = ignore_epsilon ? 0.0f : epsilon;
59 
60  ARM_COMPUTE_RETURN_ERROR_ON(quant_multiplier == nullptr);
61  ARM_COMPUTE_RETURN_ERROR_ON(right_shift == nullptr);
62  ARM_COMPUTE_RETURN_ERROR_ON(multiplier < -internal_epsilon);
63  ARM_COMPUTE_RETURN_ERROR_ON(multiplier > 1.0f + internal_epsilon);
64 
65  int shift_exp = 0;
66  const double q = std::frexp(multiplier, &shift_exp);
67  *right_shift = -1 * shift_exp;
68  auto q_fixed = static_cast<int64_t>(support::cpp11::round(q * fixed_point_one_Q0));
69  ARM_COMPUTE_RETURN_ERROR_ON(q_fixed > fixed_point_one_Q0);
70  if(q_fixed == fixed_point_one_Q0)
71  {
72  q_fixed /= 2;
73  --*right_shift;
74  }
75 
76  if(ignore_epsilon && *right_shift > 31)
77  {
78  *right_shift = 0;
79  q_fixed = 0;
80  }
81 
82  ARM_COMPUTE_RETURN_ERROR_ON(*right_shift < 0);
83  ARM_COMPUTE_RETURN_ERROR_ON(q_fixed > std::numeric_limits<int32_t>::max());
84  *quant_multiplier = static_cast<int32_t>(q_fixed);
85 
86  return Status{};
87 }
88 
90  int32_t *quantized_multiplier,
91  int32_t *left_shift)
92 {
93  ARM_COMPUTE_RETURN_ERROR_ON(quantized_multiplier == nullptr);
94  ARM_COMPUTE_RETURN_ERROR_ON(left_shift == nullptr);
95  ARM_COMPUTE_RETURN_ERROR_ON(multiplier < 1.f);
96 
97  int shift_exp = 0;
98  const double q = std::frexp(multiplier, &shift_exp);
99  *left_shift = shift_exp;
100  auto q_fixed = static_cast<int64_t>(support::cpp11::round(q * fixed_point_one_Q0));
101  ARM_COMPUTE_RETURN_ERROR_ON(q_fixed > fixed_point_one_Q0);
102  if(q_fixed == fixed_point_one_Q0)
103  {
104  q_fixed /= 2;
105  ++*left_shift;
106  }
107  ARM_COMPUTE_RETURN_ERROR_ON(*left_shift < 0);
108  ARM_COMPUTE_RETURN_ERROR_ON(q_fixed > std::numeric_limits<int32_t>::max());
109  *quantized_multiplier = static_cast<int32_t>(q_fixed);
110 
111  return Status{};
112 }
113 
115  const QuantizationInfo &wq_info,
116  const QuantizationInfo &oq_info,
117  GEMMLowpOutputStageInfo &stage_info)
118 {
119  ARM_COMPUTE_RETURN_ERROR_ON(iq_info.scale().empty());
120  ARM_COMPUTE_RETURN_ERROR_ON(wq_info.scale().empty());
121  ARM_COMPUTE_RETURN_ERROR_ON(oq_info.scale().empty());
122 
123  const unsigned int size = wq_info.scale().size();
124 
125  auto &quant_multipliers = stage_info.gemmlowp_multipliers;
126  auto &quant_shifts = stage_info.gemmlowp_shifts;
127  quant_multipliers.resize(size);
128  quant_shifts.resize(size);
129 
130  const auto &w_scales = wq_info.scale();
131  const float i_scale = iq_info.scale().at(0);
132  const float o_scale = oq_info.scale().at(0);
133 
134  for(unsigned int i = 0; i < size; ++i)
135  {
136  const float multiplier = i_scale * w_scales[i] / o_scale;
137  int32_t quant_multiplier = 0;
138  int32_t quant_shift = 0;
139  ARM_COMPUTE_RETURN_ON_ERROR(calculate_quantized_multiplier(multiplier, &quant_multiplier, &quant_shift));
140  quant_multipliers[i] = quant_multiplier;
141  quant_shifts[i] = quant_shift;
142  }
143 
144  // Legacy part
145  stage_info.gemmlowp_shift = quant_shifts[0];
146  stage_info.gemmlowp_multiplier = quant_multipliers[0];
147 
148  return Status{};
149 }
150 
152 {
153  int min_quant_val = 0;
154  int max_quant_val = 0;
155  switch(data_type)
156  {
157  case DataType::QASYMM8:
158  min_quant_val = std::numeric_limits<uint8_t>::min();
159  max_quant_val = std::numeric_limits<uint8_t>::max();
160  break;
161  case DataType::QSYMM8:
163  min_quant_val = std::numeric_limits<int8_t>::min();
164  max_quant_val = std::numeric_limits<int8_t>::max();
165  break;
166  case DataType::QASYMM16:
167  min_quant_val = std::numeric_limits<uint16_t>::min();
168  max_quant_val = std::numeric_limits<uint16_t>::max();
169  break;
170  case DataType::QSYMM16:
171  min_quant_val = std::numeric_limits<int16_t>::min();
172  max_quant_val = std::numeric_limits<int16_t>::max();
173  break;
174  default:
175  ARM_COMPUTE_ERROR("Unsupported data type");
176  }
177  return std::make_pair(min_quant_val, max_quant_val);
178 }
180  const ITensorInfo *weights,
181  const ITensorInfo *output,
182  int32_t *output_multipliers_ptr,
183  int32_t *output_shifts_ptr)
184 {
185  const UniformQuantizationInfo iq_info = input->quantization_info().uniform();
186  const QuantizationInfo wq_info = weights->quantization_info();
187  const UniformQuantizationInfo oq_info = output->quantization_info().uniform();
188 
189  const unsigned int num_filters = wq_info.scale().size();
190 
191  for(unsigned int i = 0; i < num_filters; ++i)
192  {
193  int32_t output_multiplier = 0;
194  int32_t output_shift = 0;
195  const float multiplier = iq_info.scale * wq_info.scale()[i] / oq_info.scale;
196  calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift);
197 
198  output_multipliers_ptr[i] = output_multiplier;
199  output_shifts_ptr[i] = output_shift;
200  }
201 }
202 
203 int32_t saturating_rounding_doubling_highmul(int32_t a, int32_t b)
204 {
205  bool overflow = a == b && a == std::numeric_limits<int32_t>::min();
206  int64_t a_64(a);
207  int64_t b_64(b);
208  int64_t ab_64 = a_64 * b_64;
209  bool is_positive_or_zero = a == 0 || b == 0 || (std::signbit(a) == std::signbit(b));
210  int32_t nudge = is_positive_or_zero ? (1 << 30) : (1 - (1 << 30));
211  int32_t ab_x2_high32 = static_cast<int32_t>((ab_64 + nudge) / (1ll << 31));
212  return overflow ? std::numeric_limits<int32_t>::max() : ab_x2_high32;
213 }
214 
215 inline int32_t rounding_divide_by_pow2(int32_t x, int exponent)
216 {
217  const int32_t mask = (1 << exponent) - 1;
218  const int32_t threshold = (mask >> 1) + (x < 0 ? 1 : 0);
219  return (x >> exponent) + ((x & mask) > threshold ? 1 : 0);
220 }
221 
222 int32_t multiply_by_quantized_multiplier(int32_t input, int32_t qmul, int32_t shift)
223 {
224  const auto left_shift = shift > 0 ? shift : 0;
225  const auto right_shift = shift > 0 ? 0 : -shift;
226  return rounding_divide_by_pow2(saturating_rounding_doubling_highmul(input * (1 << left_shift), qmul), right_shift);
227 }
228 
229 int32_t saturating_rounding_multiply_by_pow2(int32_t exponent, int32_t v)
230 {
231  if(exponent == 0)
232  {
233  return v;
234  }
235  else if(exponent < 0)
236  {
237  return rounding_divide_by_pow2(v, -exponent);
238  }
239  else
240  {
241  constexpr auto min = std::numeric_limits<int32_t>::min();
242  constexpr auto max = std::numeric_limits<int32_t>::max();
243  const auto width = sizeof(int32_t) * 8;
244 
245  const int32_t threshold = ((1 << (width - 1 - exponent)) - 1);
246  bool pos_mask = v > threshold;
247  bool neg_mask = v < -threshold;
248  int32_t result = v << exponent;
249  result = pos_mask ? max : result;
250  result = neg_mask ? min : result;
251  return result;
252  }
253 }
254 
255 void get_invsqrt_quantized_multiplier_exp(int32_t input, int32_t reverse_shift, int32_t &output_inv_sqrt, int32_t &output_shift)
256 {
257  ARM_COMPUTE_ERROR_ON(input < 0);
258 
259  if(input <= 1)
260  {
261  // dealing the inputs (0 and 1) separately to avoid overflow
262  output_inv_sqrt = std::numeric_limits<std::int32_t>::max();
263  output_shift = 0;
264  return;
265  }
266 
267  // prepare input for fixed point operation and compute shift value
268  output_shift = 11;
269  while(input >= (1 << 29))
270  {
271  input /= 4;
272  ++output_shift;
273  }
274 
275  const uint32_t max_left_shift_bits = __builtin_clz(static_cast<uint32_t>(input)) - 1;
276  const uint32_t max_left_shift_bits_pairs = max_left_shift_bits / 2;
277  const uint32_t left_shift_bit_pairs = max_left_shift_bits_pairs - 1;
278  output_shift -= left_shift_bit_pairs;
279  input <<= 2 * left_shift_bit_pairs;
280 
281  // Calculation in fixed point domain with 3 integer bits.
282  using FixedPointRawType = int32_t;
283  constexpr uint32_t fixedpoint_position = 3;
284  constexpr uint32_t fixedpoint_int_position = sizeof(FixedPointRawType) * 8 - 1 - fixedpoint_position;
285  using FixedPoint3 = FixedPointRawType;
286  using FixedPoint0 = FixedPointRawType;
287 
288  // fixed point representation of input divided by 2 and 1.5 for Newton-Raphson iteration
289  const FixedPoint3 fixedpoint_input = (input >> 1);
290  const FixedPoint3 fixedpoint_half_input = rounding_divide_by_pow2(fixedpoint_input, 1);
291  const FixedPoint3 fixedpoint_half_three = (0x1 << fixedpoint_int_position) + (0x1 << (fixedpoint_int_position - 1));
292 
293  // initial guess (1) in fixed point representation
294  FixedPoint3 x = 0x1 << fixedpoint_int_position;
295 
296  // multiplication of two fixed point numbers, defined for readability
297  auto fixed_point_mul = [](FixedPointRawType a, FixedPointRawType b) -> FixedPointRawType
298  {
300  };
301 
302  // rescaling of fixed point to have dst_bit integer bits, defined for readability
303  auto fixed_point_rescale = [](FixedPointRawType a, uint32_t src_bit, uint32_t dst_bit) -> FixedPointRawType
304  {
305  const uint32_t exponent = src_bit - dst_bit;
306  return saturating_rounding_multiply_by_pow2(exponent, a);
307  };
308 
309  // 5 iterations of Newton-Raphson method for inverse square root - 1.5 * x_n = input/2 * (x_n)^3
310  constexpr int32_t num_iteration = 5;
311  for(int32_t i = 0; i < num_iteration; ++i)
312  {
313  const auto x3 = fixed_point_rescale(fixed_point_mul(fixed_point_mul(x, x), x), 9, fixedpoint_position);
314  x = fixed_point_rescale(fixed_point_mul(fixedpoint_half_three, x) - fixed_point_mul(fixedpoint_half_input, x3), 6, fixedpoint_position);
315  }
316 
317  // fixed point representation of sqrt(1/2)
318  const FixedPoint0 fixedpoint_half_sqrt_2 = 1518500250;
319  x = fixed_point_mul(fixedpoint_half_sqrt_2, x);
320  output_inv_sqrt = x;
321  if(output_shift < 0)
322  {
323  output_inv_sqrt <<= -output_shift;
324  output_shift = 0;
325  }
326  // convert right shift to left shift
327  output_shift *= reverse_shift;
328 }
329 } // quantization
330 } // arm_compute
int32_t gemmlowp_multiplier
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
Definition: Types.h:1926
quantized, symmetric fixed-point 16-bit number
SimpleTensor< float > b
Definition: DFT.cpp:157
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
Quantization info when assuming per layer quantization.
quantized, asymmetric fixed-point 16-bit number
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.
Status class.
Definition: Error.h:52
Status calculate_quantized_multiplier_greater_than_one(float multiplier, int32_t *quantized_multiplier, int32_t *left_shift)
Calculate quantized representation of multiplier having value greater than one.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:296
Copyright (c) 2017-2021 Arm Limited.
std::vector< int32_t > gemmlowp_shifts
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
Definition: Types.h:1931
const DataType data_type
Definition: Im2Col.cpp:150
Quantization information.
std::pair< int, int > get_min_max_values_from_quantized_data_type(DataType data_type)
Get minimum and maximum values for the input quantized data type.
void get_invsqrt_quantized_multiplier_exp(int32_t input, int32_t reverse_shift, int32_t &output_inv_sqrt, int32_t &output_shift)
Compute quantized multiplier and shift for the inverse square root of input.
int32_t saturating_rounding_doubling_highmul(int32_t a, int32_t b)
Compute multiplication of two integers.
Status calculate_quantized_multipliers(const QuantizationInfo &iq_info, const QuantizationInfo &wq_info, const QuantizationInfo &oq_info, GEMMLowpOutputStageInfo &stage_info)
Calculate quantized representation of per-channel multipliers.
int32_t multiply_by_quantized_multiplier(int32_t input, int32_t qmul, int32_t shift)
Compute the value multiplied by given quantized multiplier and shift.
quantized, asymmetric fixed-point 8-bit number unsigned
std::vector< int32_t > gemmlowp_multipliers
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
Definition: Types.h:1930
UniformQuantizationInfo uniform() const
Return per layer quantization info.
GEMMLowp output stage info.
Definition: Types.h:1922
const std::vector< float > & scale() const
Scale vector accessor.
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
quantized, symmetric fixed-point 8-bit number
int32_t rounding_divide_by_pow2(int32_t x, int exponent)
Round to the nearest division by a power-of-two using exponent, copied from NEMath.
int32_t gemmlowp_shift
GEMMLowp output stage shift used for quantizing to uint8.
Definition: Types.h:1927
void compute_quantized_multipliers_and_shifts(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, int32_t *output_multipliers_ptr, int32_t *output_shifts_ptr)
Compute quantized per-channel multipliers and shifts.
constexpr int64_t fixed_point_one_Q0
T round(T value)
Round floating-point value with half value rounding away from zero.
int32_t saturating_rounding_multiply_by_pow2(int32_t exponent, int32_t v)
Compute the value multiplied the power-of-two.
quantized, asymmetric fixed-point 8-bit number signed
Status calculate_quantized_multiplier_less_than_one(float multiplier, int32_t *quant_multiplier, int32_t *right_shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier with value less than one.
DataType
Available data types.
Definition: Types.h:79