Compute Library
 22.08
QuantizationInfo.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_QUANTIZATION_INFO_H
25 #define ARM_COMPUTE_QUANTIZATION_INFO_H
26 
27 #include "arm_compute/core/Error.h"
30 #include "utils/misc/Utility.h"
31 
32 #include <cstddef>
33 #include <type_traits>
34 #include <vector>
35 
36 namespace arm_compute
37 {
38 using qasymm8_signed_t = int8_t; /**< 8 bit signed quantized asymmetric scalar value */
39 using qasymm8_t = uint8_t; /**< 8 bit quantized asymmetric scalar value */
40 using qsymm16_t = int16_t; /**< 16 bit quantized symmetric scalar value */
41 using qasymm16_t = uint16_t; /**< 16 bit quantized asymmetric scalar value */
42 
43 /** Quantization info when assuming per layer quantization */
45 {
46  /** Default constructor */
48  : scale(0.f), offset(0)
49  {
50  }
51  /** Constructor
52  *
53  * @param[in] scale Quantization scale
54  * @param[in] offset Quantization offset
55  */
57  : scale(scale), offset(offset)
58  {
59  }
60  /** Checks if the scale and offset are both zero */
61  bool empty() const
62  {
63  return (scale == 0) && (offset == 0);
64  }
65 
66  float scale;
67  int32_t offset;
68 };
69 
70 /** Quantization information */
72 {
73 public:
74  /** Default constructor */
75  QuantizationInfo() noexcept
76  : _scale(),
77  _offset()
78  {
79  }
80  /** Construct quantization info.
81  *
82  * @note Used for symmetric quantization
83  *
84  * @param[in] scale Scale.
85  */
87  : _scale(1, scale), _offset()
88  {
89  }
90  /** Construct quantization info.
91  *
92  * @note Used for asymmetric quantization
93  *
94  * @param[in] scale Scale.
95  * @param[in] offset Offset.
96  */
98  : _scale(1, scale), _offset(1, offset)
99  {
100  }
101  /** Construct quantization info.
102  *
103  * @note Used for symmetric per channel quantization
104  *
105  * @param[in] scale Scale.
106  */
107  QuantizationInfo(std::vector<float> scale)
108  : _scale(scale), _offset()
109  {
110  }
111  /** Construct quantization info.
112  *
113  * @note Used for asymmetric per channel quantization
114  *
115  * @param[in] scale Scale.
116  * @param[in] offset Offset.
117  */
118  QuantizationInfo(std::vector<float> scale, std::vector<int32_t> offset)
119  : _scale(scale), _offset(offset)
120  {
121  }
122  /** Scale vector accessor
123  *
124  * @return A reference to quantization scale metadata
125  */
126  const std::vector<float> &scale() const
127  {
128  return _scale;
129  }
130  /** Offset vector accessor
131  *
132  * @return A reference to quantization offset metadata
133  */
134  const std::vector<int32_t> &offset() const
135  {
136  return _offset;
137  }
138  /** Indicates whether this QuantizationInfo has valid settings or not
139  *
140  * @return True if the this has invalid settings.
141  */
142  bool empty() const
143  {
144  return _scale.empty() && _offset.empty();
145  }
146  /** Return per layer quantization info
147  *
148  * @return Uniform quantization information in case of empty information zero is returned in the respective fields
149  */
151  {
153  uqinfo.scale = _scale.empty() ? 0 : _scale[0];
154  uqinfo.offset = _offset.empty() ? 0 : _offset[0];
155 
156  return uqinfo;
157  }
158 
159 private:
160  std::vector<float> _scale; /**< Vector containing scaling factors */
161  std::vector<int32_t> _offset; /**< Vector containing zero offsets */
162 };
163 
164 /** Check whether two quantization info are equal.
165  *
166  * @param[in] lhs RHS quantization info.
167  * @param[in] rhs LHS quantization info.
168  *
169  * @return True if the given quantization info is the same.
170  */
171 inline bool operator==(const QuantizationInfo &lhs, const QuantizationInfo &rhs)
172 {
173  return (lhs.scale() == rhs.scale()) && (lhs.offset() == rhs.offset());
174 }
175 
176 /** Check whether two quantization info are not equal.
177  *
178  * @param[in] lhs RHS quantization info.
179  * @param[in] rhs LHS quantization info.
180  *
181  * @return True if the given quantization info is the same.
182  */
183 inline bool operator!=(const QuantizationInfo &lhs, const QuantizationInfo &rhs)
184 {
185  return !(operator==(lhs, rhs));
186 }
187 
188 /** Check whether two quantization info are equal.
189  *
190  * @param[in] lhs RHS quantization info.
191  * @param[in] rhs LHS quantization info.
192  *
193  * @return True if the given quantization info is the same.
194  */
195 inline bool operator==(const UniformQuantizationInfo &lhs, const UniformQuantizationInfo &rhs)
196 {
197  return (lhs.scale == rhs.scale) && (lhs.offset == rhs.offset);
198 }
199 
200 /** Check whether two quantization info are not equal.
201  *
202  * @param[in] lhs RHS quantization info.
203  * @param[in] rhs LHS quantization info.
204  *
205  * @return True if the given quantization info is the same.
206  */
207 inline bool operator!=(const UniformQuantizationInfo &lhs, const UniformQuantizationInfo &rhs)
208 {
209  return !(operator==(lhs, rhs));
210 }
211 template <typename QUANTIZED_TYPE = uint8_t>
213 {
214  static_assert(std::is_same<QUANTIZED_TYPE, uint8_t>::value
215  || std::is_same<QUANTIZED_TYPE, int8_t>::value,
216  "quantized type should be either uint8_t or int8_t.");
217 
218  /** Quantize a value given a 8-bit asymmetric quantization scheme
219  *
220  * @param[in] value Value to quantize
221  * @param[in] qinfo Quantization information to use for quantizing
222  *
223  * @return Quantized value
224  */
225  static inline QUANTIZED_TYPE quantize(float value, const UniformQuantizationInfo &qinfo)
226  {
227  ARM_COMPUTE_ERROR_ON(qinfo.scale == 0);
228  const int quantized = support::cpp11::lround(value / qinfo.scale) + qinfo.offset;
229  return static_cast<QUANTIZED_TYPE>(arm_compute::utility::clamp<decltype(quantized), QUANTIZED_TYPE>(quantized));
230  }
231 
232  /** Quantize a value given a 8-bit asymmetric quantization scheme using a specific rounding policy
233  *
234  * @param[in] value Value to quantize
235  * @param[in] qinfo Quantization information to use for quantizing
236  * @param[in] rounding_policy Rounding policy to use
237  *
238  * @return Quantized value
239  */
240  static inline QUANTIZED_TYPE quantize(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy)
241  {
242  if(rounding_policy == RoundingPolicy::TO_NEAREST_UP)
243  {
244  return quantize(value, qinfo);
245  }
246 
247  ARM_COMPUTE_ERROR_ON(qinfo.scale == 0);
248  const int quantized = arm_compute::round(value / qinfo.scale, rounding_policy) + qinfo.offset;
249  return static_cast<QUANTIZED_TYPE>(arm_compute::utility::clamp<decltype(quantized), QUANTIZED_TYPE>(quantized));
250  }
251 
252  /** Quantize a value given a 8-bit asymmetric quantization scheme
253  *
254  * @param[in] value Value to quantize
255  * @param[in] qinfo Quantization information to use for quantizing
256  * @param[in] rounding_policy (Optional) Rounding policy to use. Default: nearest up
257  *
258  * @return Quantized value
259  */
260  static inline QUANTIZED_TYPE quantize(float value, const QuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
261  {
262  const UniformQuantizationInfo uqinfo = qinfo.uniform();
263  ARM_COMPUTE_ERROR_ON(uqinfo.scale == 0);
264  const int quantized = arm_compute::round(value / uqinfo.scale, rounding_policy) + uqinfo.offset;
265  return static_cast<QUANTIZED_TYPE>(arm_compute::utility::clamp<decltype(quantized), QUANTIZED_TYPE>(quantized));
266  }
267 
268  /** Dequantize a value given a 8-bit asymmetric quantization scheme
269  *
270  * @param[in] value Value to dequantize
271  * @param[in] qinfo Quantization information to use for dequantizing
272  *
273  * @return Dequantized value
274  */
275  static inline float dequantize(QUANTIZED_TYPE value, const UniformQuantizationInfo &qinfo)
276  {
277  return (static_cast<int>(value) - qinfo.offset) * qinfo.scale;
278  }
279 
280  /** Dequantize a value given a 8-bit asymmetric quantization scheme
281  *
282  * @param[in] value Value to dequantize
283  * @param[in] qinfo Quantization information to use for dequantizing
284  *
285  * @return Dequantized value
286  */
287  static inline float dequantize(QUANTIZED_TYPE value, const QuantizationInfo &qinfo)
288  {
289  const UniformQuantizationInfo uqinfo = qinfo.uniform();
290  return (static_cast<int>(value) - uqinfo.offset) * uqinfo.scale;
291  }
292 };
293 
294 /** Quantize a value given an unsigned 8-bit asymmetric quantization scheme
295  *
296  * @param[in] value Value to quantize
297  * @param[in] qinfo Quantization information to use for quantizing
298  * @param[in] rounding_policy (Optional) Rounding policy to use. Default: nearest up
299  *
300  * @return Quantized value
301  */
302 template <typename INFO_TYPE>
303 inline uint8_t quantize_qasymm8(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
304 {
305  return Qasymm8QuantizationHelper<uint8_t>::quantize(value, qinfo, rounding_policy);
306 }
307 
308 /** Quantize a value given a signed 8-bit asymmetric quantization scheme
309  *
310  * @param[in] value Value to quantize
311  * @param[in] qinfo Quantization information to use for quantizing
312  * @param[in] rounding_policy (Optional) Rounding policy to use. Default: nearest up
313  *
314  * @return Quantized value
315  */
316 template <typename INFO_TYPE>
317 inline int8_t quantize_qasymm8_signed(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
318 {
319  return Qasymm8QuantizationHelper<int8_t>::quantize(value, qinfo, rounding_policy);
320 }
321 
322 /** Quantize a value given a 8-bit symmetric quantization scheme
323  *
324  * @param[in] value Value to quantize
325  * @param[in] qinfo Quantization information to use for quantizing
326  *
327  * @return Quantized value
328  */
329 inline int8_t quantize_qsymm8(float value, const QuantizationInfo &qinfo)
330 {
331  int quantized = arm_compute::round(value / qinfo.uniform().scale, RoundingPolicy::TO_NEAREST_UP);
332  quantized = std::max(-128, std::min(quantized, 127));
333  return quantized;
334 }
335 
336 /** Quantize a value given a 8-bit symmetric per channel quantization scheme
337  *
338  * @param[in] value Value to quantize
339  * @param[in] qinfo Quantization information to use for quantizing
340  * @param[in] channel_id channel index into the scale vector of quantization info
341  *
342  * @return Quantized value
343  */
344 inline int8_t quantize_qsymm8_per_channel(float value, const QuantizationInfo &qinfo, size_t channel_id = 0)
345 {
346  int quantized = arm_compute::round(value / qinfo.scale()[channel_id], RoundingPolicy::TO_NEAREST_UP);
347  quantized = std::max(-128, std::min(quantized, 127));
348  return quantized;
349 }
350 
351 /** Dequantize a value given an unsigned 8-bit asymmetric quantization scheme
352  *
353  * @param[in] value Value to dequantize
354  * @param[in] qinfo Quantization information to use for dequantizing
355  *
356  * @return Dequantized value
357  */
358 template <typename INFO_TYPE>
359 inline float dequantize_qasymm8(uint8_t value, const INFO_TYPE &qinfo)
360 {
362 }
363 
364 /** Dequantize a value given a signed 8-bit asymmetric quantization scheme
365  *
366  * @param[in] value Value to dequantize
367  * @param[in] qinfo Quantization information to use for dequantizing
368  *
369  * @return Dequantized value
370  */
371 template <typename INFO_TYPE>
372 inline float dequantize_qasymm8_signed(int8_t value, const INFO_TYPE &qinfo)
373 {
375 }
376 
377 /** Dequantize a value given an 8-bit asymmetric quantization scheme
378  *
379  * @param[in] value Value to dequantize
380  * @param[in] scale Scale to use for dequantization
381  * @param[in] offset Zero-offset to use for dequantization
382  *
383  * @return Dequantized value
384  */
385 inline float dequantize(uint8_t value, float scale, int32_t offset)
386 {
387  return (static_cast<int>(value) - offset) * scale;
388 }
389 
390 /** Dequantize a value given a 8-bit symmetric quantization scheme
391  *
392  * @param[in] value Value to dequantize
393  * @param[in] qinfo Quantization information to use for dequantizing
394  *
395  * @return Dequantized value
396  */
397 inline float dequantize_qsymm8(int8_t value, const UniformQuantizationInfo &qinfo)
398 {
399  return value * qinfo.scale;
400 }
401 
403  const UniformQuantizationInfo &qi_in,
404  const UniformQuantizationInfo &qi_out)
405 {
406  float tmp_f = dequantize_qasymm8(in, qi_in);
407  tmp_f = tmp_f * ((std::min(std::max((tmp_f + 3), 0.0f), 6.0f)) * 0.166666667f);
408  const qasymm8_t tmp = quantize_qasymm8(tmp_f, qi_out);
409  return tmp;
410 }
411 
413  const UniformQuantizationInfo &qi_in,
414  const UniformQuantizationInfo &qi_out,
415  float alpha)
416 {
417  float tmp_f = dequantize_qasymm8(in, qi_in);
418  tmp_f = tmp_f > 0 ? tmp_f : tmp_f * alpha;
419  const qasymm8_t tmp = quantize_qasymm8(tmp_f, qi_out);
420  return tmp;
421 }
422 
423 /** Dequantize a value given a 8-bit symmetric quantization scheme
424  *
425  * @param[in] value Value to dequantize
426  * @param[in] scale Scale to use for dequantization
427  *
428  * @return Dequantized value
429  */
430 inline float dequantize(int8_t value, float scale)
431 {
432  return value * scale;
433 }
434 
435 /** Dequantize a value given a 16-bit symmetric quantization scheme
436  *
437  * @param[in] value Value to dequantize
438  * @param[in] scale Scale to use for dequantization
439  *
440  * @return Dequantized value
441  */
442 inline float dequantize(int16_t value, float scale)
443 {
444  return value * scale;
445 }
446 
447 /** Dequantize a value given a 16-bit asymmetric quantization scheme
448  *
449  * @param[in] value Value to dequantize
450  * @param[in] scale Scale to use for dequantization
451  * @param[in] offset Zero-offset to use for dequantization
452  *
453  * @return Dequantized value
454  */
455 inline float dequantize(uint16_t value, float scale, int32_t offset)
456 {
457  return (static_cast<int>(value) - offset) * scale;
458 }
459 
460 /** Quantize a value given a 16-bit symmetric quantization scheme
461  *
462  * @param[in] value Value to quantize
463  * @param[in] qinfo Quantization information to use for quantizing
464  * @param[in] rounding_policy (Optional) Rounding policy to use. Default: nearest up
465  *
466  * @return Quantized value
467  */
468 inline int16_t quantize_qsymm16(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
469 {
470  int quantized = arm_compute::round(value / qinfo.scale, rounding_policy);
471  quantized = arm_compute::utility::clamp<int, int16_t>(quantized);
472  return quantized;
473 }
474 
475 /** Dequantize a value given a 16-bit symmetric quantization scheme
476  *
477  * @param[in] value Value to dequantize
478  * @param[in] qinfo Quantization information to use for dequantizing
479  *
480  * @return Dequantized value
481  */
482 inline float dequantize_qsymm16(int16_t value, const UniformQuantizationInfo &qinfo)
483 {
484  return value * qinfo.scale;
485 }
486 
487 /** Quantize a value given a 16-bit symmetric quantization scheme
488  *
489  * @param[in] value Value to quantize
490  * @param[in] qinfo Quantization information to use for quantizing
491  *
492  * @return Quantized value
493  */
494 inline int16_t quantize_qsymm16(float value, const QuantizationInfo &qinfo)
495 {
496  return quantize_qsymm16(value, qinfo.uniform());
497 }
498 
499 /** Dequantize a value given a 16-bit symmetric quantization scheme
500  *
501  * @param[in] value Value to dequantize
502  * @param[in] qinfo Quantization information to use for dequantizing
503  *
504  * @return Dequantized value
505  */
506 inline float dequantize_qsymm16(int16_t value, const QuantizationInfo &qinfo)
507 {
508  return dequantize_qsymm16(value, qinfo.uniform());
509 }
510 
511 /** Quantize a value given a 16-bit asymmetric quantization scheme
512  *
513  * @param[in] value Value to quantize
514  * @param[in] qinfo Quantization information to use for quantizing
515  * @param[in] rounding_policy (Optional) Rounding policy to use. Default: nearest up
516  *
517  * @return Quantized value
518  */
519 inline uint16_t quantize_qasymm16(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
520 {
521  int quantized = arm_compute::round(value / qinfo.scale, rounding_policy) + qinfo.offset;
522  quantized = arm_compute::utility::clamp<int, uint16_t>(quantized);
523  return quantized;
524 }
525 
526 /** Dequantize a value given a 16-bit asymmetric quantization scheme
527  *
528  * @param[in] value Value to dequantize
529  * @param[in] qinfo Quantization information to use for dequantizing
530  *
531  * @return Dequantized value
532  */
533 inline float dequantize_qasymm16(uint16_t value, const UniformQuantizationInfo &qinfo)
534 {
535  return (static_cast<int>(value) - qinfo.offset) * qinfo.scale;
536 }
537 
538 /** Quantize a value given a 16-bit asymmetric quantization scheme
539  *
540  * @param[in] value Value to quantize
541  * @param[in] qinfo Quantization information to use for quantizing
542  *
543  * @return Quantized value
544  */
545 inline uint16_t quantize_qasymm16(float value, const QuantizationInfo &qinfo)
546 {
547  return quantize_qasymm16(value, qinfo.uniform());
548 }
549 
550 /** Dequantize a value given a 16-bit asymmetric quantization scheme
551  *
552  * @param[in] value Value to dequantize
553  * @param[in] qinfo Quantization information to use for dequantizing
554  *
555  * @return Dequantized value
556  */
557 inline float dequantize_qasymm16(uint16_t value, const QuantizationInfo &qinfo)
558 {
559  return dequantize_qasymm16(value, qinfo.uniform());
560 }
561 
562 /*
563  * In case of requantization of a quantized input tensor to an output tensor with another quantization
564  * instead of applying dequantization and then a quantization functions, we just compute new scale and
565  * offset.
566  *
567  * Assuming:
568  * - q_i as input quantized value
569  * - q_o as output quantized value
570  * - z_i as input quantization offset value
571  * - z_o as output quantization offset value
572  * - s_i as input quantization scale value
573  * - s_o as output quantization scale value
574  * - z_n as new quantization offset value
575  * - s_n as new quantization scale value
576  *
577  * q_o = ( q_i - z_i ) * s_i / s_o + z_o
578  *
579  * We can rewrite the formula as:
580  *
581  * q_o = ( q_i * s_i / s_o ) - z_i * s_i / s_o + z_o
582  *
583  * q_o = q_i / s_n + z_n
584  *
585  * Where:
586  *
587  * s_n = s_o / s_i
588  *
589  * z_n = - z_i * s_i / s_o + z_o
590  *
591  */
593 {
594  float scale_to_apply = uqinfo_out.scale;
595  int32_t offset_to_apply = uqinfo_out.offset;
596 
597  scale_to_apply /= uqinfo_in.scale;
598  // In order to minimize flooring we convert the offset to a float,
599  // then compute the new offset in the float domain,
600  // finally we convert it back as int32_t
601  offset_to_apply -= static_cast<int32_t>(static_cast<float>(uqinfo_in.offset) * uqinfo_in.scale / uqinfo_out.scale);
602  return UniformQuantizationInfo(scale_to_apply, offset_to_apply);
603 }
604 
605 } // namespace arm_compute
606 #endif /* ARM_COMPUTE_QUANTIZATION_INFO_H */
QuantizationInfo() noexcept
Default constructor.
const std::vector< int32_t > & offset() const
Offset vector accessor.
int8_t quantize_qsymm8(float value, const QuantizationInfo &qinfo)
Quantize a value given a 8-bit symmetric quantization scheme.
int16_t quantize_qsymm16(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given a 16-bit symmetric quantization scheme.
Rounds to nearest value; half rounds away from zero.
float dequantize_qasymm8(uint8_t value, const INFO_TYPE &qinfo)
Dequantize a value given an unsigned 8-bit asymmetric quantization scheme.
bool operator!=(const Dimensions< T > &lhs, const Dimensions< T > &rhs)
Check that given dimensions are not equal.
Definition: Dimensions.h:288
UniformQuantizationInfo(float scale, int32_t offset)
Constructor.
uint8_t quantize_qasymm8(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given an unsigned 8-bit asymmetric quantization scheme.
QuantizationInfo(float scale, int offset)
Construct quantization info.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
Quantization info when assuming per layer quantization.
QuantizationInfo(std::vector< float > scale)
Construct quantization info.
bool operator==(const Dimensions< T > &lhs, const Dimensions< T > &rhs)
Check that given dimensions are equal.
Definition: Dimensions.h:276
Copyright (c) 2017-2022 Arm Limited.
static QUANTIZED_TYPE quantize(float value, const UniformQuantizationInfo &qinfo)
Quantize a value given a 8-bit asymmetric quantization scheme.
float dequantize_qasymm16(uint16_t value, const UniformQuantizationInfo &qinfo)
Dequantize a value given a 16-bit asymmetric quantization scheme.
Quantization information.
int8_t quantize_qasymm8_signed(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given a signed 8-bit asymmetric quantization scheme.
float dequantize_qsymm16(int16_t value, const UniformQuantizationInfo &qinfo)
Dequantize a value given a 16-bit symmetric quantization scheme.
static QUANTIZED_TYPE quantize(float value, const QuantizationInfo &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given a 8-bit asymmetric quantization scheme.
QuantizationInfo(std::vector< float > scale, std::vector< int32_t > offset)
Construct quantization info.
UniformQuantizationInfo uniform() const
Return per layer quantization info.
RoundingPolicy
Rounding method.
Definition: Rounding.h:30
const std::vector< float > & scale() const
Scale vector accessor.
UniformQuantizationInfo compute_requantization_scale_offset(const UniformQuantizationInfo &uqinfo_in, const UniformQuantizationInfo &uqinfo_out)
static QUANTIZED_TYPE quantize(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy)
Quantize a value given a 8-bit asymmetric quantization scheme using a specific rounding policy...
qasymm8_t qasymm8_leaky_relu(qasymm8_t in, const UniformQuantizationInfo &qi_in, const UniformQuantizationInfo &qi_out, float alpha)
qasymm8_t qasymm8_hard_swish(qasymm8_t in, const UniformQuantizationInfo &qi_in, const UniformQuantizationInfo &qi_out)
uint16_t qasymm16_t
16 bit quantized asymmetric scalar value
int round(float x, RoundingPolicy rounding_policy)
Return a rounded value of x.
Definition: Rounding.cpp:35
static float dequantize(QUANTIZED_TYPE value, const QuantizationInfo &qinfo)
Dequantize a value given a 8-bit asymmetric quantization scheme.
UniformQuantizationInfo()
Default constructor.
bool empty() const
Indicates whether this QuantizationInfo has valid settings or not.
int8_t quantize_qsymm8_per_channel(float value, const QuantizationInfo &qinfo, size_t channel_id=0)
Quantize a value given a 8-bit symmetric per channel quantization scheme.
const QuantizationInfo qinfo
Definition: Im2Col.cpp:155
uint8_t qasymm8_t
8 bit quantized asymmetric scalar value
int8_t qasymm8_signed_t
8 bit signed quantized asymmetric scalar value
QuantizationInfo(float scale)
Construct quantization info.
float dequantize_qasymm8_signed(int8_t value, const INFO_TYPE &qinfo)
Dequantize a value given a signed 8-bit asymmetric quantization scheme.
static float dequantize(QUANTIZED_TYPE value, const UniformQuantizationInfo &qinfo)
Dequantize a value given a 8-bit asymmetric quantization scheme.
float dequantize_qsymm8(int8_t value, const UniformQuantizationInfo &qinfo)
Dequantize a value given a 8-bit symmetric quantization scheme.
long lround(T value)
Round floating-point value with half value rounding away from zero and cast to long.
float dequantize(uint8_t value, float scale, int32_t offset)
Dequantize a value given an 8-bit asymmetric quantization scheme.
bool empty() const
Checks if the scale and offset are both zero.
int16_t qsymm16_t
16 bit quantized symmetric scalar value
uint16_t quantize_qasymm16(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given a 16-bit asymmetric quantization scheme.