24.08
FloatingPointConverter.cpp
Go to the documentation of this file.
1
//
2
// Copyright © 2017 Arm Ltd. All rights reserved.
3
// SPDX-License-Identifier: MIT
4
//
5
6
#include <
armnnUtils/FloatingPointConverter.hpp
>
7
8
#include "
BFloat16.hpp
"
9
#include "
Half.hpp
"
10
11
#include <
armnn/Exceptions.hpp
>
12
#include <
armnn/utility/Assert.hpp
>
13
14
namespace
armnnUtils
15
{
16
17
void
FloatingPointConverter::ConvertFloat32To16
(
const
float
* srcFloat32Buffer,
18
size_t
numElements,
19
void
* dstFloat16Buffer)
20
{
21
if
(srcFloat32Buffer ==
nullptr
)
22
{
23
throw
armnn::InvalidArgumentException
(
"ConvertFloat32To16: source float32 buffer pointer is null"
);
24
}
25
if
(dstFloat16Buffer ==
nullptr
)
26
{
27
throw
armnn::InvalidArgumentException
(
"ConvertFloat32To16: destination float16 buffer pointer is null"
);
28
}
29
30
armnn::Half
* pHalf =
static_cast<
armnn::Half
*
>
(dstFloat16Buffer);
31
32
for
(
size_t
i = 0; i < numElements; i++)
33
{
34
pHalf[i] =
armnn::Half
(srcFloat32Buffer[i]);
35
if
(isinf(pHalf[i]))
36
{
37
// If the value of converted Fp16 is infinity, round to the closest finite Fp16 value.
38
pHalf[i] = copysign(std::numeric_limits<armnn::Half>::max(), pHalf[i]);
39
}
40
}
41
}
42
43
void
FloatingPointConverter::ConvertFloat16To32
(
const
void
* srcFloat16Buffer,
44
size_t
numElements,
45
float
* dstFloat32Buffer)
46
{
47
if
(srcFloat16Buffer ==
nullptr
)
48
{
49
throw
armnn::InvalidArgumentException
(
"ConvertFloat16To32: source float16 buffer pointer is null"
);
50
}
51
if
(dstFloat32Buffer ==
nullptr
)
52
{
53
throw
armnn::InvalidArgumentException
(
"ConvertFloat16To32: destination float32 buffer pointer is null"
);
54
}
55
56
const
armnn::Half
* pHalf =
static_cast<
const
armnn::Half
*
>
(srcFloat16Buffer);
57
58
for
(
size_t
i = 0; i < numElements; i++)
59
{
60
dstFloat32Buffer[i] = pHalf[i];
61
}
62
}
63
64
}
//namespace armnnUtils
armnn::Half
half_float::half Half
Definition:
Half.hpp:22
Assert.hpp
armnnUtils::FloatingPointConverter::ConvertFloat16To32
static void ConvertFloat16To32(const void *srcFloat16Buffer, size_t numElements, float *dstFloat32Buffer)
Definition:
FloatingPointConverter.cpp:43
armnnUtils
Definition:
CompatibleTypes.hpp:10
armnn::InvalidArgumentException
Definition:
Exceptions.hpp:80
Half.hpp
armnnUtils::FloatingPointConverter::ConvertFloat32To16
static void ConvertFloat32To16(const float *srcFloat32Buffer, size_t numElements, void *dstFloat16Buffer)
Converts a buffer of FP32 values to FP16, and stores in the given dstFloat16Buffer.
Definition:
FloatingPointConverter.cpp:17
FloatingPointConverter.hpp
Exceptions.hpp
BFloat16.hpp
src
armnnUtils
FloatingPointConverter.cpp
Generated on Wed Aug 28 2024 14:31:49 for Arm NN by
1.8.17