#include <BFloat16.hpp>

Public Member Functions
	BFloat16 ()

	BFloat16 (const BFloat16 &v)=default

	BFloat16 (uint16_t v)

	BFloat16 (float v)

	operator float () const

BFloat16 &	operator= (const BFloat16 &other)=default

BFloat16 &	operator= (float v)

bool	operator== (const BFloat16 &r) const

float	ToFloat32 () const

uint16_t	Val () const

Static Public Member Functions
static BFloat16	Float32ToBFloat16 (const float v)

static BFloat16	Max ()

static BFloat16	Nan ()

static BFloat16	Inf ()

Detailed Description

Definition at line 15 of file BFloat16.hpp.

Constructor & Destructor Documentation

◆ BFloat16() [1/4]

BFloat16 ( )

inline

Definition at line 18 of file BFloat16.hpp.

19 : m_Value(0)

20 {}

Referenced by BFloat16::Inf(), BFloat16::Max(), and BFloat16::Nan().

◆ BFloat16() [2/4]

BFloat16 ( const BFloat16 & v )

default

◆ BFloat16() [3/4]

BFloat16 ( uint16_t v )

inlineexplicit

Definition at line 24 of file BFloat16.hpp.

25 : m_Value(v)

26 {}

◆ BFloat16() [4/4]

BFloat16 ( float v )

inlineexplicit

Definition at line 28 of file BFloat16.hpp.

    {
        m_Value = Float32ToBFloat16(v).Val();
    }

References BFloat16::Float32ToBFloat16(), and BFloat16::Val().

Member Function Documentation

◆ Float32ToBFloat16()

static BFloat16 Float32ToBFloat16 ( const float v )

inlinestatic

Definition at line 51 of file BFloat16.hpp.

    {
        if (std::isnan(v))
        {
            return Nan();
        }
        else
        {
            // Round value to the nearest even
            // Float32
            // S EEEEEEEE MMMMMMLRMMMMMMMMMMMMMMM
            // BFloat16
            // S EEEEEEEE MMMMMML
            // LSB (L): Least significat bit of BFloat16 (last bit of the Mantissa of BFloat16)
            // R: Rounding bit
            // LSB = 0, R = 0 -> round down
            // LSB = 1, R = 0 -> round down
            // LSB = 0, R = 1, all the rest = 0 -> round down
            // LSB = 1, R = 1 -> round up
            // LSB = 0, R = 1 -> round up
            const uint32_t* u32 = reinterpret_cast<const uint32_t*>(&v);
            uint16_t u16 = static_cast<uint16_t>(*u32 >> 16u);
            // Mark the LSB
            const uint16_t lsb = u16 & 0x0001;
            // Mark the error to be truncate (the rest of 16 bits of FP32)
            const uint16_t error = static_cast<uint16_t>((*u32 & 0x0000FFFF));
            if ((error > 0x8000 || (error == 0x8000 && lsb == 1)))
            {
                u16++;
            }
            BFloat16 b(u16);
            return b;
        }
    }

References armnn::error, and BFloat16::Nan().

Referenced by BFloat16::BFloat16(), and BFloat16::operator=().

◆ Inf()

static BFloat16 Inf ( )

inlinestatic

Definition at line 112 of file BFloat16.hpp.

    {
        uint16_t infVal = 0x7F80;
        return BFloat16(infVal);
    }

References BFloat16::BFloat16().

◆ Max()

static BFloat16 Max ( )

inlinestatic

Definition at line 100 of file BFloat16.hpp.

    {
        uint16_t max = 0x7F7F;
        return BFloat16(max);
    }

References BFloat16::BFloat16().

◆ Nan()

static BFloat16 Nan ( )

inlinestatic

Definition at line 106 of file BFloat16.hpp.

    {
        uint16_t nan = 0x7FC0;
        return BFloat16(nan);
    }

References BFloat16::BFloat16().

Referenced by BFloat16::Float32ToBFloat16().

◆ operator float()

operator float ( ) const

inline

Definition at line 33 of file BFloat16.hpp.

    {
        return ToFloat32();
    }

References BFloat16::ToFloat32().

◆ operator=() [1/2]

BFloat16 & operator= ( const BFloat16 & other )

default

◆ operator=() [2/2]

BFloat16 & operator= ( float v )

inline

Definition at line 40 of file BFloat16.hpp.

    {
        m_Value = Float32ToBFloat16(v).Val();
        return *this;
    }

References BFloat16::Float32ToBFloat16(), and BFloat16::Val().

◆ operator==()

bool operator== ( const BFloat16 & r ) const

inline

Definition at line 46 of file BFloat16.hpp.

    {
        return m_Value == r.Val();
    }

References BFloat16::Val().

◆ ToFloat32()

float ToFloat32 ( ) const

inline

Definition at line 86 of file BFloat16.hpp.

    {
        const uint32_t u32 = static_cast<uint32_t>(m_Value << 16u);
        float f32;
        static_assert(sizeof u32 == sizeof f32, "");
        std::memcpy(&f32, &u32, sizeof u32);
        return f32;
    }

Referenced by BFloat16::operator float(), and armnn::operator<<().

◆ Val()

uint16_t Val ( ) const

inline

Definition at line 95 of file BFloat16.hpp.

    {
        return m_Value;
    }

Referenced by BFloat16::BFloat16(), armnn::operator<<(), BFloat16::operator=(), and BFloat16::operator==().

The documentation for this class was generated from the following file:

src/armnnUtils/BFloat16.hpp

Public Member Functions

Static Public Member Functions

Detailed Description

Constructor & Destructor Documentation

◆ BFloat16() [1/4]

◆ BFloat16() [2/4]

◆ BFloat16() [3/4]

◆ BFloat16() [4/4]

Member Function Documentation

◆ Float32ToBFloat16()

◆ Inf()

◆ Max()

◆ Nan()

◆ operator float()

◆ operator=() [1/2]

◆ operator=() [2/2]

◆ operator==()

◆ ToFloat32()

◆ Val()