Compute Library
 21.08
QuantizationInfo.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_QUANTIZATION_INFO_H
25 #define ARM_COMPUTE_QUANTIZATION_INFO_H
26 
27 #include "arm_compute/core/Error.h"
30 #include "utils/misc/Utility.h"
31 
32 #include <cstddef>
33 #include <type_traits>
34 #include <vector>
35 
36 namespace arm_compute
37 {
38 using qasymm8_signed_t = int8_t; /**< 8 bit signed quantized asymmetric scalar value */
39 using qasymm8_t = uint8_t; /**< 8 bit quantized asymmetric scalar value */
40 using qsymm16_t = int16_t; /**< 16 bit quantized symmetric scalar value */
41 using qasymm16_t = uint16_t; /**< 16 bit quantized asymmetric scalar value */
42 
43 /** Quantization info when assuming per layer quantization */
45 {
46  /** Default constructor */
48  : scale(0.f), offset(0)
49  {
50  }
51  /** Constructor
52  *
53  * @param[in] scale Quantization scale
54  * @param[in] offset Quantization offset
55  */
57  : scale(scale), offset(offset)
58  {
59  }
60  /** Checks if the scale and offset are both zero */
61  bool empty() const
62  {
63  return (scale == 0) && (offset == 0);
64  }
65 
66  float scale;
67  int32_t offset;
68 };
69 
70 /** Quantization information */
72 {
73 public:
74  /** Default constructor */
75  QuantizationInfo() noexcept
76  : _scale(),
77  _offset()
78  {
79  }
80  /** Construct quantization info.
81  *
82  * @note Used for symmetric quantization
83  *
84  * @param[in] scale Scale.
85  */
87  : _scale(1, scale), _offset()
88  {
89  }
90  /** Construct quantization info.
91  *
92  * @note Used for asymmetric quantization
93  *
94  * @param[in] scale Scale.
95  * @param[in] offset Offset.
96  */
98  : _scale(1, scale), _offset(1, offset)
99  {
100  }
101  /** Construct quantization info.
102  *
103  * @note Used for symmetric per channel quantization
104  *
105  * @param[in] scale Scale.
106  */
107  QuantizationInfo(std::vector<float> scale)
108  : _scale(scale), _offset()
109  {
110  }
111  /** Construct quantization info.
112  *
113  * @note Used for asymmetric per channel quantization
114  *
115  * @param[in] scale Scale.
116  * @param[in] offset Offset.
117  */
118  QuantizationInfo(std::vector<float> scale, std::vector<int32_t> offset)
119  : _scale(scale), _offset(offset)
120  {
121  }
122  /** Scale vector accessor
123  *
124  * @return A reference to quantization scale metadata
125  */
126  const std::vector<float> &scale() const
127  {
128  return _scale;
129  }
130  /** Offset vector accessor
131  *
132  * @return A reference to quantization offset metadata
133  */
134  const std::vector<int32_t> &offset() const
135  {
136  return _offset;
137  }
138  /** Indicates whether this QuantizationInfo has valid settings or not
139  *
140  * @return True if the this has invalid settings.
141  */
142  bool empty() const
143  {
144  return _scale.empty() && _offset.empty();
145  }
146  /** Return per layer quantization info
147  *
148  * @return Uniform quantization information in case of empty information zero is returned in the respective fields
149  */
151  {
153  uqinfo.scale = _scale.empty() ? 0 : _scale[0];
154  uqinfo.offset = _offset.empty() ? 0 : _offset[0];
155 
156  return uqinfo;
157  }
158 
159 private:
160  std::vector<float> _scale; /**< Vector containing scaling factors */
161  std::vector<int32_t> _offset; /**< Vector containing zero offsets */
162 };
163 
164 /** Check whether two quantization info are equal.
165  *
166  * @param[in] lhs RHS quantization info.
167  * @param[in] rhs LHS quantization info.
168  *
169  * @return True if the given quantization info is the same.
170  */
171 inline bool operator==(const QuantizationInfo &lhs, const QuantizationInfo &rhs)
172 {
173  return (lhs.scale() == rhs.scale()) && (lhs.offset() == rhs.offset());
174 }
175 
176 /** Check whether two quantization info are not equal.
177  *
178  * @param[in] lhs RHS quantization info.
179  * @param[in] rhs LHS quantization info.
180  *
181  * @return True if the given quantization info is the same.
182  */
183 inline bool operator!=(const QuantizationInfo &lhs, const QuantizationInfo &rhs)
184 {
185  return !(operator==(lhs, rhs));
186 }
187 
188 /** Check whether two quantization info are equal.
189  *
190  * @param[in] lhs RHS quantization info.
191  * @param[in] rhs LHS quantization info.
192  *
193  * @return True if the given quantization info is the same.
194  */
195 inline bool operator==(const UniformQuantizationInfo &lhs, const UniformQuantizationInfo &rhs)
196 {
197  return (lhs.scale == rhs.scale) && (lhs.offset == rhs.offset);
198 }
199 
200 /** Check whether two quantization info are not equal.
201  *
202  * @param[in] lhs RHS quantization info.
203  * @param[in] rhs LHS quantization info.
204  *
205  * @return True if the given quantization info is the same.
206  */
207 inline bool operator!=(const UniformQuantizationInfo &lhs, const UniformQuantizationInfo &rhs)
208 {
209  return !(operator==(lhs, rhs));
210 }
211 template <typename QUANTIZED_TYPE = uint8_t>
213 {
214  static_assert(std::is_same<QUANTIZED_TYPE, uint8_t>::value
215  || std::is_same<QUANTIZED_TYPE, int8_t>::value,
216  "quantized type should be either uint8_t or int8_t.");
217 
218  /** Quantize a value given a 8-bit asymmetric quantization scheme
219  *
220  * @param[in] value Value to quantize
221  * @param[in] qinfo Quantization information to use for quantizing
222  *
223  * @return Quantized value
224  */
225  static inline QUANTIZED_TYPE quantize(float value, const UniformQuantizationInfo &qinfo)
226  {
227  ARM_COMPUTE_ERROR_ON(qinfo.scale == 0);
228  const int quantized = support::cpp11::lround(value / qinfo.scale) + qinfo.offset;
229  return static_cast<QUANTIZED_TYPE>(arm_compute::utility::clamp<decltype(quantized), QUANTIZED_TYPE>(quantized));
230  }
231 
232  /** Quantize a value given a 8-bit asymmetric quantization scheme using a specific rounding policy
233  *
234  * @param[in] value Value to quantize
235  * @param[in] qinfo Quantization information to use for quantizing
236  * @param[in] rounding_policy Rounding policy to use
237  *
238  * @return Quantized value
239  */
240  static inline QUANTIZED_TYPE quantize(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy)
241  {
242  if(rounding_policy == RoundingPolicy::TO_NEAREST_UP)
243  {
244  return quantize(value, qinfo);
245  }
246 
247  ARM_COMPUTE_ERROR_ON(qinfo.scale == 0);
248  const int quantized = arm_compute::round(value / qinfo.scale, rounding_policy) + qinfo.offset;
249  return static_cast<QUANTIZED_TYPE>(arm_compute::utility::clamp<decltype(quantized), QUANTIZED_TYPE>(quantized));
250  }
251 
252  /** Quantize a value given a 8-bit asymmetric quantization scheme
253  *
254  * @param[in] value Value to quantize
255  * @param[in] qinfo Quantization information to use for quantizing
256  * @param[in] rounding_policy (Optional) Rounding policy to use. Default: nearest up
257  *
258  * @return Quantized value
259  */
260  static inline QUANTIZED_TYPE quantize(float value, const QuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
261  {
262  const UniformQuantizationInfo uqinfo = qinfo.uniform();
263  ARM_COMPUTE_ERROR_ON(uqinfo.scale == 0);
264  const int quantized = arm_compute::round(value / uqinfo.scale, rounding_policy) + uqinfo.offset;
265  return static_cast<QUANTIZED_TYPE>(arm_compute::utility::clamp<decltype(quantized), QUANTIZED_TYPE>(quantized));
266  }
267 
268  /** Dequantize a value given a 8-bit asymmetric quantization scheme
269  *
270  * @param[in] value Value to dequantize
271  * @param[in] qinfo Quantization information to use for dequantizing
272  *
273  * @return Dequantized value
274  */
275  static inline float dequantize(QUANTIZED_TYPE value, const UniformQuantizationInfo &qinfo)
276  {
277  return (static_cast<int>(value) - qinfo.offset) * qinfo.scale;
278  }
279 
280  /** Dequantize a value given a 8-bit asymmetric quantization scheme
281  *
282  * @param[in] value Value to dequantize
283  * @param[in] qinfo Quantization information to use for dequantizing
284  *
285  * @return Dequantized value
286  */
287  static inline float dequantize(QUANTIZED_TYPE value, const QuantizationInfo &qinfo)
288  {
289  const UniformQuantizationInfo uqinfo = qinfo.uniform();
290  return (static_cast<int>(value) - uqinfo.offset) * uqinfo.scale;
291  }
292 };
293 
294 /** Quantize a value given an unsigned 8-bit asymmetric quantization scheme
295  *
296  * @param[in] value Value to quantize
297  * @param[in] qinfo Quantization information to use for quantizing
298  * @param[in] rounding_policy (Optional) Rounding policy to use. Default: nearest up
299  *
300  * @return Quantized value
301  */
302 template <typename INFO_TYPE>
303 inline uint8_t quantize_qasymm8(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
304 {
305  return Qasymm8QuantizationHelper<uint8_t>::quantize(value, qinfo, rounding_policy);
306 }
307 
308 /** Quantize a value given a signed 8-bit asymmetric quantization scheme
309  *
310  * @param[in] value Value to quantize
311  * @param[in] qinfo Quantization information to use for quantizing
312  * @param[in] rounding_policy (Optional) Rounding policy to use. Default: nearest up
313  *
314  * @return Quantized value
315  */
316 template <typename INFO_TYPE>
317 inline int8_t quantize_qasymm8_signed(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
318 {
319  return Qasymm8QuantizationHelper<int8_t>::quantize(value, qinfo, rounding_policy);
320 }
321 
322 /** Quantize a value given a 8-bit symmetric quantization scheme
323  *
324  * @param[in] value Value to quantize
325  * @param[in] qinfo Quantization information to use for quantizing
326  *
327  * @return Quantized value
328  */
329 inline int8_t quantize_qsymm8(float value, const QuantizationInfo &qinfo)
330 {
331  int quantized = arm_compute::round(value / qinfo.uniform().scale, RoundingPolicy::TO_NEAREST_UP);
332  quantized = std::max(-128, std::min(quantized, 127));
333  return quantized;
334 }
335 
336 /** Quantize a value given a 8-bit symmetric per channel quantization scheme
337  *
338  * @param[in] value Value to quantize
339  * @param[in] qinfo Quantization information to use for quantizing
340  * @param[in] channel_id channel index into the scale vector of quantization info
341  *
342  * @return Quantized value
343  */
344 inline int8_t quantize_qsymm8_per_channel(float value, const QuantizationInfo &qinfo, size_t channel_id = 0)
345 {
346  int quantized = arm_compute::round(value / qinfo.scale()[channel_id], RoundingPolicy::TO_NEAREST_UP);
347  quantized = std::max(-128, std::min(quantized, 127));
348  return quantized;
349 }
350 
351 /** Dequantize a value given an unsigned 8-bit asymmetric quantization scheme
352  *
353  * @param[in] value Value to dequantize
354  * @param[in] qinfo Quantization information to use for dequantizing
355  *
356  * @return Dequantized value
357  */
358 template <typename INFO_TYPE>
359 inline float dequantize_qasymm8(uint8_t value, const INFO_TYPE &qinfo)
360 {
362 }
363 
364 /** Dequantize a value given a signed 8-bit asymmetric quantization scheme
365  *
366  * @param[in] value Value to dequantize
367  * @param[in] qinfo Quantization information to use for dequantizing
368  *
369  * @return Dequantized value
370  */
371 template <typename INFO_TYPE>
372 inline float dequantize_qasymm8_signed(int8_t value, const INFO_TYPE &qinfo)
373 {
375 }
376 
377 /** Dequantize a value given an 8-bit asymmetric quantization scheme
378  *
379  * @param[in] value Value to dequantize
380  * @param[in] scale Scale to use for dequantization
381  * @param[in] offset Zero-offset to use for dequantization
382  *
383  * @return Dequantized value
384  */
385 inline float dequantize(uint8_t value, float scale, int32_t offset)
386 {
387  return (static_cast<int>(value) - offset) * scale;
388 }
389 
390 /** Dequantize a value given a 8-bit symmetric quantization scheme
391  *
392  * @param[in] value Value to dequantize
393  * @param[in] qinfo Quantization information to use for dequantizing
394  *
395  * @return Dequantized value
396  */
397 inline float dequantize_qsymm8(int8_t value, const UniformQuantizationInfo &qinfo)
398 {
399  return value * qinfo.scale;
400 }
401 
402 /** Dequantize a value given a 8-bit symmetric quantization scheme
403  *
404  * @param[in] value Value to dequantize
405  * @param[in] scale Scale to use for dequantization
406  *
407  * @return Dequantized value
408  */
409 inline float dequantize(int8_t value, float scale)
410 {
411  return value * scale;
412 }
413 
414 /** Dequantize a value given a 16-bit symmetric quantization scheme
415  *
416  * @param[in] value Value to dequantize
417  * @param[in] scale Scale to use for dequantization
418  *
419  * @return Dequantized value
420  */
421 inline float dequantize(int16_t value, float scale)
422 {
423  return value * scale;
424 }
425 
426 /** Dequantize a value given a 16-bit asymmetric quantization scheme
427  *
428  * @param[in] value Value to dequantize
429  * @param[in] scale Scale to use for dequantization
430  * @param[in] offset Zero-offset to use for dequantization
431  *
432  * @return Dequantized value
433  */
434 inline float dequantize(uint16_t value, float scale, int32_t offset)
435 {
436  return (static_cast<int>(value) - offset) * scale;
437 }
438 
439 /** Quantize a value given a 16-bit symmetric quantization scheme
440  *
441  * @param[in] value Value to quantize
442  * @param[in] qinfo Quantization information to use for quantizing
443  * @param[in] rounding_policy (Optional) Rounding policy to use. Default: nearest up
444  *
445  * @return Quantized value
446  */
447 inline int16_t quantize_qsymm16(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
448 {
449  int quantized = arm_compute::round(value / qinfo.scale, rounding_policy);
450  quantized = arm_compute::utility::clamp<int, int16_t>(quantized);
451  return quantized;
452 }
453 
454 /** Dequantize a value given a 16-bit symmetric quantization scheme
455  *
456  * @param[in] value Value to dequantize
457  * @param[in] qinfo Quantization information to use for dequantizing
458  *
459  * @return Dequantized value
460  */
461 inline float dequantize_qsymm16(int16_t value, const UniformQuantizationInfo &qinfo)
462 {
463  return value * qinfo.scale;
464 }
465 
466 /** Quantize a value given a 16-bit symmetric quantization scheme
467  *
468  * @param[in] value Value to quantize
469  * @param[in] qinfo Quantization information to use for quantizing
470  *
471  * @return Quantized value
472  */
473 inline int16_t quantize_qsymm16(float value, const QuantizationInfo &qinfo)
474 {
475  return quantize_qsymm16(value, qinfo.uniform());
476 }
477 
478 /** Dequantize a value given a 16-bit symmetric quantization scheme
479  *
480  * @param[in] value Value to dequantize
481  * @param[in] qinfo Quantization information to use for dequantizing
482  *
483  * @return Dequantized value
484  */
485 inline float dequantize_qsymm16(int16_t value, const QuantizationInfo &qinfo)
486 {
487  return dequantize_qsymm16(value, qinfo.uniform());
488 }
489 
490 /** Quantize a value given a 16-bit asymmetric quantization scheme
491  *
492  * @param[in] value Value to quantize
493  * @param[in] qinfo Quantization information to use for quantizing
494  * @param[in] rounding_policy (Optional) Rounding policy to use. Default: nearest up
495  *
496  * @return Quantized value
497  */
498 inline uint16_t quantize_qasymm16(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
499 {
500  int quantized = arm_compute::round(value / qinfo.scale, rounding_policy) + qinfo.offset;
501  quantized = arm_compute::utility::clamp<int, uint16_t>(quantized);
502  return quantized;
503 }
504 
505 /** Dequantize a value given a 16-bit asymmetric quantization scheme
506  *
507  * @param[in] value Value to dequantize
508  * @param[in] qinfo Quantization information to use for dequantizing
509  *
510  * @return Dequantized value
511  */
512 inline float dequantize_qasymm16(uint16_t value, const UniformQuantizationInfo &qinfo)
513 {
514  return (static_cast<int>(value) - qinfo.offset) * qinfo.scale;
515 }
516 
517 /** Quantize a value given a 16-bit asymmetric quantization scheme
518  *
519  * @param[in] value Value to quantize
520  * @param[in] qinfo Quantization information to use for quantizing
521  *
522  * @return Quantized value
523  */
524 inline uint16_t quantize_qasymm16(float value, const QuantizationInfo &qinfo)
525 {
526  return quantize_qasymm16(value, qinfo.uniform());
527 }
528 
529 /** Dequantize a value given a 16-bit asymmetric quantization scheme
530  *
531  * @param[in] value Value to dequantize
532  * @param[in] qinfo Quantization information to use for dequantizing
533  *
534  * @return Dequantized value
535  */
536 inline float dequantize_qasymm16(uint16_t value, const QuantizationInfo &qinfo)
537 {
538  return dequantize_qasymm16(value, qinfo.uniform());
539 }
540 
541 /*
542  * In case of requantization of a quantized input tensor to an output tensor with another quantization
543  * instead of applying dequantization and then a quantization functions, we just compute new scale and
544  * offset.
545  *
546  * Assuming:
547  * - q_i as input quantized value
548  * - q_o as output quantized value
549  * - z_i as input quantization offset value
550  * - z_o as output quantization offset value
551  * - s_i as input quantization scale value
552  * - s_o as output quantization scale value
553  * - z_n as new quantization offset value
554  * - s_n as new quantization scale value
555  *
556  * q_o = ( q_i - z_i ) * s_i / s_o + z_o
557  *
558  * We can rewrite the formula as:
559  *
560  * q_o = ( q_i * s_i / s_o ) - z_i * s_i / s_o + z_o
561  *
562  * q_o = q_i / s_n + z_n
563  *
564  * Where:
565  *
566  * s_n = s_o / s_i
567  *
568  * z_n = - z_i * s_i / s_o + z_o
569  *
570  */
572 {
573  float scale_to_apply = uqinfo_out.scale;
574  int32_t offset_to_apply = uqinfo_out.offset;
575 
576  scale_to_apply /= uqinfo_in.scale;
577  // In order to minimize flooring we convert the offset to a float,
578  // then compute the new offset in the float domain,
579  // finally we convert it back as int32_t
580  offset_to_apply -= static_cast<int32_t>(static_cast<float>(uqinfo_in.offset) * uqinfo_in.scale / uqinfo_out.scale);
581  return UniformQuantizationInfo(scale_to_apply, offset_to_apply);
582 }
583 
584 } // namespace arm_compute
585 #endif /* ARM_COMPUTE_QUANTIZATION_INFO_H */
QuantizationInfo() noexcept
Default constructor.
const std::vector< int32_t > & offset() const
Offset vector accessor.
int8_t quantize_qsymm8(float value, const QuantizationInfo &qinfo)
Quantize a value given a 8-bit symmetric quantization scheme.
int16_t quantize_qsymm16(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given a 16-bit symmetric quantization scheme.
Rounds to nearest value; half rounds away from zero.
float dequantize_qasymm8(uint8_t value, const INFO_TYPE &qinfo)
Dequantize a value given an unsigned 8-bit asymmetric quantization scheme.
bool operator!=(const Dimensions< T > &lhs, const Dimensions< T > &rhs)
Check that given dimensions are not equal.
Definition: Dimensions.h:288
UniformQuantizationInfo(float scale, int32_t offset)
Constructor.
uint8_t quantize_qasymm8(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given an unsigned 8-bit asymmetric quantization scheme.
QuantizationInfo(float scale, int offset)
Construct quantization info.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
Quantization info when assuming per layer quantization.
QuantizationInfo(std::vector< float > scale)
Construct quantization info.
bool operator==(const Dimensions< T > &lhs, const Dimensions< T > &rhs)
Check that given dimensions are equal.
Definition: Dimensions.h:276
Copyright (c) 2017-2021 Arm Limited.
static QUANTIZED_TYPE quantize(float value, const UniformQuantizationInfo &qinfo)
Quantize a value given a 8-bit asymmetric quantization scheme.
float dequantize_qasymm16(uint16_t value, const UniformQuantizationInfo &qinfo)
Dequantize a value given a 16-bit asymmetric quantization scheme.
Quantization information.
std::enable_if< std::is_same< T, int8_t >::value, int8_t >::type quantize(float val, const UniformQuantizationInfo &info)
Definition: quantized.h:41
int8_t quantize_qasymm8_signed(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given a signed 8-bit asymmetric quantization scheme.
float dequantize_qsymm16(int16_t value, const UniformQuantizationInfo &qinfo)
Dequantize a value given a 16-bit symmetric quantization scheme.
static QUANTIZED_TYPE quantize(float value, const QuantizationInfo &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given a 8-bit asymmetric quantization scheme.
QuantizationInfo(std::vector< float > scale, std::vector< int32_t > offset)
Construct quantization info.
UniformQuantizationInfo uniform() const
Return per layer quantization info.
RoundingPolicy
Rounding method.
Definition: Rounding.h:30
const std::vector< float > & scale() const
Scale vector accessor.
UniformQuantizationInfo compute_requantization_scale_offset(const UniformQuantizationInfo &uqinfo_in, const UniformQuantizationInfo &uqinfo_out)
static QUANTIZED_TYPE quantize(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy)
Quantize a value given a 8-bit asymmetric quantization scheme using a specific rounding policy...
uint16_t qasymm16_t
16 bit quantized asymmetric scalar value
int round(float x, RoundingPolicy rounding_policy)
Return a rounded value of x.
Definition: Rounding.cpp:35
static float dequantize(QUANTIZED_TYPE value, const QuantizationInfo &qinfo)
Dequantize a value given a 8-bit asymmetric quantization scheme.
UniformQuantizationInfo()
Default constructor.
bool empty() const
Indicates whether this QuantizationInfo has valid settings or not.
int8_t quantize_qsymm8_per_channel(float value, const QuantizationInfo &qinfo, size_t channel_id=0)
Quantize a value given a 8-bit symmetric per channel quantization scheme.
const QuantizationInfo qinfo
Definition: Im2Col.cpp:155
uint8_t qasymm8_t
8 bit quantized asymmetric scalar value
int8_t qasymm8_signed_t
8 bit signed quantized asymmetric scalar value
QuantizationInfo(float scale)
Construct quantization info.
float dequantize_qasymm8_signed(int8_t value, const INFO_TYPE &qinfo)
Dequantize a value given a signed 8-bit asymmetric quantization scheme.
static float dequantize(QUANTIZED_TYPE value, const UniformQuantizationInfo &qinfo)
Dequantize a value given a 8-bit asymmetric quantization scheme.
float dequantize_qsymm8(int8_t value, const UniformQuantizationInfo &qinfo)
Dequantize a value given a 8-bit symmetric quantization scheme.
long lround(T value)
Round floating-point value with half value rounding away from zero and cast to long.
float dequantize(uint8_t value, float scale, int32_t offset)
Dequantize a value given an 8-bit asymmetric quantization scheme.
bool empty() const
Checks if the scale and offset are both zero.
int16_t qsymm16_t
16 bit quantized symmetric scalar value
uint16_t quantize_qasymm16(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given a 16-bit asymmetric quantization scheme.