ComputeLibrary/latest/_bfloat16_8h_source.xhtml

/*

 * Copyright (c) 2020-2022 Arm Limited.

 *

 * SPDX-License-Identifier: MIT

 *

 * Permission is hereby granted, free of charge, to any person obtaining a copy

 * of this software and associated documentation files (the "Software"), to

 * deal in the Software without restriction, including without limitation the

 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

 * sell copies of the Software, and to permit persons to whom the Software is

 * furnished to do so, subject to the following conditions:

 *

 * The above copyright notice and this permission notice shall be included in all

 * copies or substantial portions of the Software.

 *

 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

 * SOFTWARE.

 */

#ifndef ARM_COMPUTE_BFLOAT16_H

#define ARM_COMPUTE_BFLOAT16_H


#include <cstdint>

#include <cstring>


namespace arm_compute

{

namespace

{

/** Convert float to bfloat16

 *

 * @param[in] v Floating-point value to convert to bfloat

 *

 * @return Converted value

 */

inline uint16_t float_to_bf16(const float v)

{

    const uint32_t *fromptr = reinterpret_cast<const uint32_t *>(&v);

#if defined(ARM_COMPUTE_ENABLE_BF16)

    uint16_t res;


    __asm __volatile("ldr    s0, [%[fromptr]]\n"

                     ".inst    0x1e634000\n" // BFCVT h0, s0

                     "str    h0, [%[toptr]]\n"

                     :

                     : [fromptr] "r"(fromptr), [toptr] "r"(&res)

                     : "v0", "memory");

#else  /* defined(ARM_COMPUTE_ENABLE_BF16) */

    uint16_t       res   = (*fromptr >> 16);

    const uint16_t error = (*fromptr & 0x0000ffff);

    uint16_t       bf_l  = res & 0x0001;

    if ((error > 0x8000) || ((error == 0x8000) && (bf_l != 0)))

    {

        res += 1;

    }

#endif /* defined(ARM_COMPUTE_ENABLE_BF16) */

    return res;

}


/** Convert bfloat16 to float

 *

 * @param[in] v Bfloat16 value to convert to float

 *

 * @return Converted value

 */

inline float bf16_to_float(const uint16_t &v)

{

    const uint32_t lv = (v << 16);

    float          fp;

    memcpy(&fp, &lv, sizeof(lv));

    return fp;

}

} // namespace


/** Brain floating point representation class */

class bfloat16 final

{

public:

    /** Default Constructor */

    bfloat16() : value(0)

    {

    }

    /** Constructor

     *

     * @param[in] v Floating-point value

     */

    bfloat16(float v) : value(float_to_bf16(v))

    {

    }

    /** Assignment operator

     *

     * @param[in] v Floating point value to assign

     *

     * @return The updated object

     */

    bfloat16 &operator=(float v)

    {

        value = float_to_bf16(v);

        return *this;

    }

    /** Floating point conversion operator

     *

     * @return Floating point representation of the value

     */

    operator float() const

    {

        return bf16_to_float(value);

    }

    /** Lowest representative value

     *

     * @return Returns the lowest finite value representable by bfloat16

     */

    static bfloat16 lowest()

    {

        bfloat16 val;

        val.value = 0xFF7F;

        return val;

    }

    /** Largest representative value

     *

     * @return Returns the largest finite value representable by bfloat16

     */

    static bfloat16 max()

    {

        bfloat16 val;

        val.value = 0x7F7F;

        return val;

    }


private:

    uint16_t value;

};

} // namespace arm_compute

#endif /* ARM_COMPUTE_BFLOAT16_H */