ArmNN
 24.02
TypesUtils.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #include <armnn/TypesUtils.hpp>
8 
9 namespace
10 {
11 /// Workaround for std:isnan() not being implemented correctly for integral types in MSVC.
12 /// https://stackoverflow.com/a/56356405
13 /// @{
14 template <typename T, typename std::enable_if<std::is_integral<T>::value, T>::type* = nullptr>
15 inline int IsNan(T x)
16 {
17  // The spec defines integral types to be handled as if they were casted to doubles.
18  return std::isnan(static_cast<double>(x));
19 }
20 
21 template <typename T, typename std::enable_if<!std::is_integral<T>::value, T>::type * = nullptr>
22 inline int IsNan(T x)
23 {
24  return std::isnan(x);
25 }
26 /// @}
27 } // namespace std
28 
29 template<typename QuantizedType>
30 QuantizedType armnn::Quantize(float value, float scale, int32_t offset)
31 {
32  static_assert(IsQuantizedType<QuantizedType>(), "Not an integer type.");
33  constexpr QuantizedType max = std::numeric_limits<QuantizedType>::max();
34  constexpr QuantizedType min = std::numeric_limits<QuantizedType>::lowest();
35  if (scale == 0.f)
36  {
37  throw armnn::InvalidArgumentException("Quantize: Scale cannot be 0.f");
38  }
39  if (std::isnan(value))
40  {
41  throw armnn::InvalidArgumentException("Quantize: Value is NaN");
42  }
43 
44  float clampedValue = std::min(std::max((static_cast<float>(offset) + static_cast<float>(round(value/scale))),
45  static_cast<float>(min)), static_cast<float>(max));
46  auto quantizedBits = static_cast<QuantizedType>(clampedValue);
47 
48  return quantizedBits;
49 }
50 
51 template <typename QuantizedType>
52 float armnn::Dequantize(QuantizedType value, float scale, int32_t offset)
53 {
54  static_assert(IsQuantizedType<QuantizedType>(), "Not an integer type.");
55  if (scale == 0.f)
56  {
57  throw armnn::InvalidArgumentException("Dequantize: Scale cannot be 0.f");
58  }
59  if (std::isnan(value))
60  {
61  throw armnn::InvalidArgumentException("Dequantize: Value is NaN");
62  }
63  return (armnn::numeric_cast<float>(value - offset)) * scale;
64 }
65 
66 /// Explicit specialization of Quantize for int8_t
67 template
68 int8_t armnn::Quantize<int8_t>(float value, float scale, int32_t offset);
69 
70 /// Explicit specialization of Quantize for uint8_t
71 template
72 uint8_t armnn::Quantize<uint8_t>(float value, float scale, int32_t offset);
73 
74 /// Explicit specialization of Quantize for int16_t
75 template
76 int16_t armnn::Quantize<int16_t>(float value, float scale, int32_t offset);
77 
78 /// Explicit specialization of Quantize for int32_t
79 template
80 int32_t armnn::Quantize<int32_t>(float value, float scale, int32_t offset);
81 
82 /// Explicit specialization of Dequantize for int8_t
83 template
84 float armnn::Dequantize<int8_t>(int8_t value, float scale, int32_t offset);
85 
86 /// Explicit specialization of Dequantize for uint8_t
87 template
88 float armnn::Dequantize<uint8_t>(uint8_t value, float scale, int32_t offset);
89 
90 /// Explicit specialization of Dequantize for int16_t
91 template
92 float armnn::Dequantize<int16_t>(int16_t value, float scale, int32_t offset);
93 
94 /// Explicit specialization of Dequantize for int32_t
95 template
96 float armnn::Dequantize<int32_t>(int32_t value, float scale, int32_t offset);
97 
98 /// Explicit specialization of Dequantize for int64_t
99 template
100 float armnn::Dequantize<int64_t>(int64_t value, float scale, int32_t offset);
TypesUtils.hpp
NumericCast.hpp
Assert.hpp
armnn::InvalidArgumentException
Definition: Exceptions.hpp:80
armnn::Dequantize
float Dequantize(QuantizedType value, float scale, int32_t offset)
Dequantize an 8-bit data type into a floating point data type.
Definition: TypesUtils.cpp:52
armnn::Quantize
QuantizedType Quantize(float value, float scale, int32_t offset)
Quantize a floating point data type into an 8-bit data type.
Definition: TypesUtils.cpp:30