ArmNN
 25.11
Loading...
Searching...
No Matches
NeonConvertFp32ToFp16Workload.cpp
Go to the documentation of this file.
1//
2// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
7
8#include <arm_compute/runtime/NEON/functions/NECast.h>
9#include <Half.hpp>
10#include <Profiling.hpp>
11
13
15
16static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE;
17
18namespace armnn
19{
20
21arm_compute::Status NeonConvertFp32ToFp16WorkloadValidate(const TensorInfo& input, const TensorInfo& output)
22{
23 // Fallback to portable software implementation if Compute Library NECast won't work, so
24 // this method always returns success
25
27 armnn::IgnoreUnused(output);
28 return arm_compute::Status();
29}
30
32 const WorkloadInfo& info)
34{
35 this->m_Data.ValidateInputsOutputs("NeonConvertFp32ToFp16Workload", 1, 1);
36
37 arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
38 arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
39
40 if (arm_compute::NECast::validate(input.info(), output.info(), g_AclConvertPolicy))
41 {
42 // Use NECast if supported (needs hardware support for FP16)
43 m_Cast.reset(new arm_compute::NECast);
44 m_Cast->configure(&input, &output, g_AclConvertPolicy);
45 }
46 else
47 {
48 // Else use software implementation from Half.hpp
49 GatherTensorHandlePairs(descriptor, m_TensorHandlePairs);
50 }
51}
52
54{
55 ARMNN_SCOPED_PROFILING_EVENT_NEON_NAME_GUID("NeonConvertFp32ToFp16Workload_Execute");
56
57 if (m_Cast)
58 {
59 // Use NECast if supported and initialised
60 m_Cast->run();
61 }
62 else
63 {
64 // Else use softwre implementabion using Half.hpp
65 auto convertFunc = [](uint8_t* dst, const uint8_t* src, size_t size)
66 {
67 auto input = reinterpret_cast<const float*>(src);
68 auto output = reinterpret_cast<Half*>(dst);
69 size_t numElements = size/2; // 2 bytes per fp16
71 };
72
73 for (const auto& pair : m_TensorHandlePairs)
74 {
75 CopyTensorContentsGeneric(pair.first, pair.second, convertFunc);
76 }
77 }
78}
79
81{
82 ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot];
83 this->m_Data.m_Inputs[slot] = tensorHandle;
84 try
85 {
86 Reconfigure();
87 }
89 {
90 // Cannot reconfigure, revert the slot back and throw the exception.
91 this->m_Data.m_Inputs[slot] = backupHandle;
92 throw e;
93 }
94}
95
96// Replace output tensor handle with the given TensorHandle
98{
99 ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot];
100 this->m_Data.m_Inputs[slot] = tensorHandle;
101 try
102 {
103 Reconfigure();
104 }
106 {
107 // Cannot reconfigure, revert the slot back and throw the exception.
108 this->m_Data.m_Inputs[slot] = backupHandle;
109 throw e;
110 }
111}
112
113void NeonConvertFp32ToFp16Workload::Reconfigure()
114{
115 throw armnn::UnimplementedException("Reconfigure not implemented for this workload");
116}
117
118} //namespace armnn
#define ARMNN_SCOPED_PROFILING_EVENT_NEON_NAME_GUID(label)
Creates a profiling event that uses GetGuid() and GetName() from the calling class.
QueueDescriptor m_Data
Definition Workload.hpp:74
NeonConvertFp32ToFp16Workload(const ConvertFp32ToFp16QueueDescriptor &descriptor, const WorkloadInfo &info)
void ReplaceInputTensorHandle(ITensorHandle *tensorHandle, unsigned int slot) override
void ReplaceOutputTensorHandle(ITensorHandle *tensorHandle, unsigned int slot) override
static void ConvertFloat32To16(const float *srcFloat32Buffer, size_t numElements, void *dstFloat16Buffer)
Converts a buffer of FP32 values to FP16, and stores in the given dstFloat16Buffer.
Copyright (c) 2021 ARM Limited and Contributors.
half_float::half Half
Definition Half.hpp:22
arm_compute::Status NeonConvertFp32ToFp16WorkloadValidate(const TensorInfo &input, const TensorInfo &output)
void GatherTensorHandlePairs(const DescriptorType &descriptor, std::vector< std::pair< SrcTensorHandleType *, DstTensorHandleType * > > &tensorHandlePairs)
DestType PolymorphicDowncast(SourceType *value)
Polymorphic downcast for build in pointers only.
MultiTypedWorkload< QueueDescriptor, armnn::DataType::Float32, armnn::DataType::Float16 > Float32ToFloat16Workload
Definition Workload.hpp:237
void CopyTensorContentsGeneric(const ITensorHandle *srcTensor, ITensorHandle *dstTensor, CopyFunc copy)
void IgnoreUnused(Ts &&...)
Contains information about TensorInfos of a layer.