#include <BFloat16.hpp>
Definition at line 15 of file BFloat16.hpp.
◆ BFloat16() [1/4]
◆ BFloat16() [2/4]
◆ BFloat16() [3/4]
◆ BFloat16() [4/4]
◆ Float32ToBFloat16()
| static BFloat16 Float32ToBFloat16 |
( |
const float | v | ) |
|
|
inlinestatic |
Definition at line 51 of file BFloat16.hpp.
52 {
53 if (std::isnan(v))
54 {
56 }
57 else
58 {
59
60
61
62
63
64
65
66
67
68
69
70
71 const uint32_t* u32 = reinterpret_cast<const uint32_t*>(&v);
72 uint16_t u16 = static_cast<uint16_t>(*u32 >> 16u);
73
74 const uint16_t lsb = u16 & 0x0001;
75
76 const uint16_t
error =
static_cast<uint16_t
>((*u32 & 0x0000FFFF));
77 if ((
error > 0x8000 || (
error == 0x8000 && lsb == 1)))
78 {
79 u16++;
80 }
82 return b;
83 }
84 }
References armnn::error, and BFloat16::Nan().
Referenced by BFloat16::BFloat16(), and BFloat16::operator=().
◆ Inf()
◆ Max()
◆ Nan()
◆ operator float()
◆ operator=() [1/2]
◆ operator=() [2/2]
◆ operator==()
| bool operator== |
( |
const BFloat16 & | r | ) |
const |
|
inline |
◆ ToFloat32()
| float ToFloat32 |
( |
| ) |
const |
|
inline |
Definition at line 86 of file BFloat16.hpp.
87 {
88 const uint32_t u32 = static_cast<uint32_t>(m_Value << 16u);
89 float f32;
90 static_assert(sizeof u32 == sizeof f32, "");
91 std::memcpy(&f32, &u32, sizeof u32);
92 return f32;
93 }
Referenced by BFloat16::operator float(), and armnn::operator<<().
◆ Val()
The documentation for this class was generated from the following file: