ArmNN
 25.11
Loading...
Searching...
No Matches
BFloat16 Class Reference

#include <BFloat16.hpp>

Public Member Functions

 BFloat16 ()
 BFloat16 (const BFloat16 &v)=default
 BFloat16 (uint16_t v)
 BFloat16 (float v)
 operator float () const
BFloat16operator= (const BFloat16 &other)=default
BFloat16operator= (float v)
bool operator== (const BFloat16 &r) const
float ToFloat32 () const
uint16_t Val () const

Static Public Member Functions

static BFloat16 Float32ToBFloat16 (const float v)
static BFloat16 Max ()
static BFloat16 Nan ()
static BFloat16 Inf ()

Detailed Description

Definition at line 15 of file BFloat16.hpp.

Constructor & Destructor Documentation

◆ BFloat16() [1/4]

BFloat16 ( )
inline

Definition at line 18 of file BFloat16.hpp.

19 : m_Value(0)
20 {}

Referenced by BFloat16(), Float32ToBFloat16(), Inf(), Max(), Nan(), operator=(), operator=(), and operator==().

◆ BFloat16() [2/4]

BFloat16 ( const BFloat16 & v)
default

References BFloat16().

◆ BFloat16() [3/4]

BFloat16 ( uint16_t v)
inlineexplicit

Definition at line 24 of file BFloat16.hpp.

25 : m_Value(v)
26 {}

◆ BFloat16() [4/4]

BFloat16 ( float v)
inlineexplicit

Definition at line 28 of file BFloat16.hpp.

29 {
30 m_Value = Float32ToBFloat16(v).Val();
31 }

References Float32ToBFloat16(), and Val().

Member Function Documentation

◆ Float32ToBFloat16()

BFloat16 Float32ToBFloat16 ( const float v)
inlinestatic

Definition at line 51 of file BFloat16.hpp.

52 {
53 if (std::isnan(v))
54 {
55 return Nan();
56 }
57 else
58 {
59 // Round value to the nearest even
60 // Float32
61 // S EEEEEEEE MMMMMMLRMMMMMMMMMMMMMMM
62 // BFloat16
63 // S EEEEEEEE MMMMMML
64 // LSB (L): Least significat bit of BFloat16 (last bit of the Mantissa of BFloat16)
65 // R: Rounding bit
66 // LSB = 0, R = 0 -> round down
67 // LSB = 1, R = 0 -> round down
68 // LSB = 0, R = 1, all the rest = 0 -> round down
69 // LSB = 1, R = 1 -> round up
70 // LSB = 0, R = 1 -> round up
71 const uint32_t* u32 = reinterpret_cast<const uint32_t*>(&v);
72 uint16_t u16 = static_cast<uint16_t>(*u32 >> 16u);
73 // Mark the LSB
74 const uint16_t lsb = u16 & 0x0001;
75 // Mark the error to be truncate (the rest of 16 bits of FP32)
76 const uint16_t error = static_cast<uint16_t>((*u32 & 0x0000FFFF));
77 if ((error > 0x8000 || (error == 0x8000 && lsb == 1)))
78 {
79 u16++;
80 }
81 BFloat16 b(u16);
82 return b;
83 }
84 }

References BFloat16(), armnn::error, and Nan().

Referenced by BFloat16(), and operator=().

◆ Inf()

BFloat16 Inf ( )
inlinestatic

Definition at line 112 of file BFloat16.hpp.

113 {
114 uint16_t infVal = 0x7F80;
115 return BFloat16(infVal);
116 }

References BFloat16().

◆ Max()

BFloat16 Max ( )
inlinestatic

Definition at line 100 of file BFloat16.hpp.

101 {
102 uint16_t max = 0x7F7F;
103 return BFloat16(max);
104 }

References BFloat16().

◆ Nan()

BFloat16 Nan ( )
inlinestatic

Definition at line 106 of file BFloat16.hpp.

107 {
108 uint16_t nan = 0x7FC0;
109 return BFloat16(nan);
110 }

References BFloat16().

Referenced by Float32ToBFloat16().

◆ operator float()

operator float ( ) const
inline

Definition at line 33 of file BFloat16.hpp.

34 {
35 return ToFloat32();
36 }

References ToFloat32().

◆ operator=() [1/2]

BFloat16 & operator= ( const BFloat16 & other)
default

References BFloat16().

◆ operator=() [2/2]

BFloat16 & operator= ( float v)
inline

Definition at line 40 of file BFloat16.hpp.

41 {
42 m_Value = Float32ToBFloat16(v).Val();
43 return *this;
44 }

References BFloat16(), Float32ToBFloat16(), and Val().

◆ operator==()

bool operator== ( const BFloat16 & r) const
inline

Definition at line 46 of file BFloat16.hpp.

47 {
48 return m_Value == r.Val();
49 }

References BFloat16(), and Val().

◆ ToFloat32()

float ToFloat32 ( ) const
inline

Definition at line 86 of file BFloat16.hpp.

87 {
88 const uint32_t u32 = static_cast<uint32_t>(m_Value << 16u);
89 float f32;
90 static_assert(sizeof u32 == sizeof f32, "");
91 std::memcpy(&f32, &u32, sizeof u32);
92 return f32;
93 }

Referenced by operator float(), and armnn::operator<<().

◆ Val()

uint16_t Val ( ) const
inline

Definition at line 95 of file BFloat16.hpp.

96 {
97 return m_Value;
98 }

Referenced by BFloat16(), armnn::operator<<(), operator=(), and operator==().


The documentation for this class was generated from the following file: