ArmNN
 24.08
FloatingPointConverter.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
7 
8 #include "BFloat16.hpp"
9 #include "Half.hpp"
10 
11 #include <armnn/Exceptions.hpp>
12 #include <armnn/utility/Assert.hpp>
13 
14 namespace armnnUtils
15 {
16 
17 void FloatingPointConverter::ConvertFloat32To16(const float* srcFloat32Buffer,
18  size_t numElements,
19  void* dstFloat16Buffer)
20 {
21  if (srcFloat32Buffer == nullptr)
22  {
23  throw armnn::InvalidArgumentException("ConvertFloat32To16: source float32 buffer pointer is null");
24  }
25  if (dstFloat16Buffer == nullptr)
26  {
27  throw armnn::InvalidArgumentException("ConvertFloat32To16: destination float16 buffer pointer is null");
28  }
29 
30  armnn::Half* pHalf = static_cast<armnn::Half*>(dstFloat16Buffer);
31 
32  for (size_t i = 0; i < numElements; i++)
33  {
34  pHalf[i] = armnn::Half(srcFloat32Buffer[i]);
35  if (isinf(pHalf[i]))
36  {
37  // If the value of converted Fp16 is infinity, round to the closest finite Fp16 value.
38  pHalf[i] = copysign(std::numeric_limits<armnn::Half>::max(), pHalf[i]);
39  }
40  }
41 }
42 
43 void FloatingPointConverter::ConvertFloat16To32(const void* srcFloat16Buffer,
44  size_t numElements,
45  float* dstFloat32Buffer)
46 {
47  if (srcFloat16Buffer == nullptr)
48  {
49  throw armnn::InvalidArgumentException("ConvertFloat16To32: source float16 buffer pointer is null");
50  }
51  if (dstFloat32Buffer == nullptr)
52  {
53  throw armnn::InvalidArgumentException("ConvertFloat16To32: destination float32 buffer pointer is null");
54  }
55 
56  const armnn::Half* pHalf = static_cast<const armnn::Half*>(srcFloat16Buffer);
57 
58  for (size_t i = 0; i < numElements; i++)
59  {
60  dstFloat32Buffer[i] = pHalf[i];
61  }
62 }
63 
64 } //namespace armnnUtils
armnn::Half
half_float::half Half
Definition: Half.hpp:22
Assert.hpp
armnnUtils::FloatingPointConverter::ConvertFloat16To32
static void ConvertFloat16To32(const void *srcFloat16Buffer, size_t numElements, float *dstFloat32Buffer)
Definition: FloatingPointConverter.cpp:43
armnnUtils
Definition: CompatibleTypes.hpp:10
armnn::InvalidArgumentException
Definition: Exceptions.hpp:80
Half.hpp
armnnUtils::FloatingPointConverter::ConvertFloat32To16
static void ConvertFloat32To16(const float *srcFloat32Buffer, size_t numElements, void *dstFloat16Buffer)
Converts a buffer of FP32 values to FP16, and stores in the given dstFloat16Buffer.
Definition: FloatingPointConverter.cpp:17
FloatingPointConverter.hpp
Exceptions.hpp
BFloat16.hpp