Compute Library
 21.08
Bfloat16.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_BFLOAT16_H
25 #define ARM_COMPUTE_BFLOAT16_H
26 
27 #include <cstdint>
28 #include <cstring>
29 
30 namespace arm_compute
31 {
32 namespace
33 {
34 /** Convert float to bfloat16
35  *
36  * @param[in] v Floating-point value to convert to bfloat
37  *
38  * @return Converted value
39  */
40 inline uint16_t float_to_bf16(const float v)
41 {
42  const uint32_t *fromptr = reinterpret_cast<const uint32_t *>(&v);
43 #if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16)
44  uint16_t res;
45 
46  __asm __volatile(
47  "ldr s0, [%[fromptr]]\n"
48  ".inst 0x1e634000\n" // BFCVT h0, s0
49  "str h0, [%[toptr]]\n"
50  :
51  : [fromptr] "r"(fromptr), [toptr] "r"(&res)
52  : "v0", "memory");
53 #else /* defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) */
54  uint16_t res = (*fromptr >> 16);
55  const uint16_t error = (*fromptr & 0x0000ffff);
56  uint16_t bf_l = res & 0x0001;
57  if((error > 0x8000) || ((error == 0x8000) && (bf_l != 0)))
58  {
59  res += 1;
60  }
61 #endif /* defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) */
62  return res;
63 }
64 
65 /** Convert bfloat16 to float
66  *
67  * @param[in] v Bfloat16 value to convert to float
68  *
69  * @return Converted value
70  */
71 inline float bf16_to_float(const uint16_t &v)
72 {
73  const uint32_t lv = (v << 16);
74  float fp;
75  memcpy(&fp, &lv, sizeof(lv));
76  return fp;
77 }
78 }
79 
80 /** Brain floating point representation class */
81 class bfloat16 final
82 {
83 public:
84  /** Default Constructor */
86  : value(0)
87  {
88  }
89  /** Constructor
90  *
91  * @param[in] v Floating-point value
92  */
93  explicit bfloat16(float v)
94  : value(float_to_bf16(v))
95  {
96  }
97  /** Assignment operator
98  *
99  * @param[in] v Floating point value to assign
100  *
101  * @return The updated object
102  */
103  bfloat16 &operator=(float v)
104  {
105  value = float_to_bf16(v);
106  return *this;
107  }
108  /** Floating point conversion operator
109  *
110  * @return Floating point representation of the value
111  */
112  operator float() const
113  {
114  return bf16_to_float(value);
115  }
116  /** Lowest representative value
117  *
118  * @return Returns the lowest finite value representable by bfloat16
119  */
120  static bfloat16 lowest()
121  {
122  bfloat16 val;
123  val.value = 0xFF7F;
124  return val;
125  }
126  /** Largest representative value
127  *
128  * @return Returns the largest finite value representable by bfloat16
129  */
130  static bfloat16 max()
131  {
132  bfloat16 val;
133  val.value = 0x7F7F;
134  return val;
135  }
136 
137 private:
138  uint16_t value;
139 };
140 } // namespace arm_compute
141 #endif /* ARM_COMPUTE_BFLOAT16_H */
bfloat16(float v)
Constructor.
Definition: Bfloat16.h:93
Brain floating point representation class.
Definition: Bfloat16.h:81
bfloat16 & operator=(float v)
Assignment operator.
Definition: Bfloat16.h:103
Copyright (c) 2017-2021 Arm Limited.
static bfloat16 lowest()
Lowest representative value.
Definition: Bfloat16.h:120
bfloat16()
Default Constructor.
Definition: Bfloat16.h:85
static bfloat16 max()
Largest representative value.
Definition: Bfloat16.h:130