ArmNN
 24.08
FloatingPointConverter.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2019 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #pragma once
7 
8 #include <cstddef>
9 
10 namespace armnnUtils
11 {
12 
14 {
15 public:
16  /// Converts a buffer of FP32 values to FP16, and stores in the given dstFloat16Buffer.
17  /// dstFloat16Buffer should be (numElements * 2) in size
18  static void ConvertFloat32To16(const float *srcFloat32Buffer, size_t numElements, void *dstFloat16Buffer);
19 
20  static void ConvertFloat16To32(const void *srcFloat16Buffer, size_t numElements, float *dstFloat32Buffer);
21 
22  // Converts a buffer of FP32 values to BFloat16, and stores in the given dstBFloat16Buffer.
23  static void ConvertFloat32ToBFloat16(const float* srcFloat32Buffer, size_t numElements, void* dstBFloat16Buffer);
24 
25  // Converts a buffer of BFloat16 to FP32 value, and stores in the given dstFloat32Buffer.
26  static void ConvertBFloat16ToFloat32(const void* srcBFloat16Buffer, size_t numElements, float* dstFloat32Buffer);
27 };
28 
29 } // namespace armnnUtils
armnnUtils::FloatingPointConverter::ConvertFloat32ToBFloat16
static void ConvertFloat32ToBFloat16(const float *srcFloat32Buffer, size_t numElements, void *dstBFloat16Buffer)
armnnUtils::FloatingPointConverter::ConvertFloat16To32
static void ConvertFloat16To32(const void *srcFloat16Buffer, size_t numElements, float *dstFloat32Buffer)
Definition: FloatingPointConverter.cpp:43
armnnUtils
Definition: CompatibleTypes.hpp:10
armnnUtils::FloatingPointConverter
Definition: FloatingPointConverter.hpp:13
armnnUtils::FloatingPointConverter::ConvertBFloat16ToFloat32
static void ConvertBFloat16ToFloat32(const void *srcBFloat16Buffer, size_t numElements, float *dstFloat32Buffer)
armnnUtils::FloatingPointConverter::ConvertFloat32To16
static void ConvertFloat32To16(const float *srcFloat32Buffer, size_t numElements, void *dstFloat16Buffer)
Converts a buffer of FP32 values to FP16, and stores in the given dstFloat16Buffer.
Definition: FloatingPointConverter.cpp:17