ArmNN
 25.11
Loading...
Searching...
No Matches
NeonConvertFp16ToFp32Workload.cpp
Go to the documentation of this file.
1//
2// Copyright © 2017-2019,2021-2023 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
7
9
10#include <Half.hpp>
11
13
14static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE;
15
16namespace armnn
17{
18
19arm_compute::Status NeonConvertFp16ToFp32WorkloadValidate(const TensorInfo& input, const TensorInfo& output)
20{
21 // Fallback to portable software implementation if Compute Library NECast won't work, so
22 // this method always returns success
23
25 armnn::IgnoreUnused(output);
26 return arm_compute::Status();
27}
28
30 const WorkloadInfo& info)
32{
33 this->m_Data.ValidateInputsOutputs("NeonConvertFp16ToFp32Workload", 1, 1);
34
35 arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
36 arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
37
38 if (arm_compute::NECast::validate(input.info(), output.info(), g_AclConvertPolicy))
39 {
40 // Use NECast if supported (needs hardware support for FP16)
41 m_Cast.reset(new arm_compute::NECast());
42 m_Cast->configure(&input, &output, g_AclConvertPolicy);
43 }
44 else
45 {
46 // Else use software implementation using Half.hpp
47 GatherTensorHandlePairs(descriptor, m_TensorHandlePairs);
48 }
49}
50
52{
53 ARMNN_SCOPED_PROFILING_EVENT_NEON_NAME_GUID("NeonConvertFp16ToFp32Workload_Execute");
54
55 if (m_Cast)
56 {
57 // Use NECast if supported and initialised
58 m_Cast->run();
59 }
60 else
61 {
62 // Else use softare implementation using Half.hpp
63 auto convertFunc = [](uint8_t* dst, const uint8_t* src, size_t size)
64 {
65 auto input = reinterpret_cast<const Half*>(src);
66 auto output = reinterpret_cast<float*>(dst);
67 size_t numElements = size/2; // 2 bytes per fp16
69 };
70
71 for (const auto& pair : m_TensorHandlePairs)
72 {
73 CopyTensorContentsGeneric(pair.first, pair.second, convertFunc);
74 }
75 }
76}
77
79{
80 ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot];
81 this->m_Data.m_Inputs[slot] = tensorHandle;
82 try
83 {
84 Reconfigure();
85 }
87 {
88 // Cannot reconfigure, revert the slot back and throw the exception.
89 this->m_Data.m_Inputs[slot] = backupHandle;
90 throw e;
91 }
92}
93
94// Replace output tensor handle with the given TensorHandle
96{
97 ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot];
98 this->m_Data.m_Inputs[slot] = tensorHandle;
99 try
100 {
101 Reconfigure();
102 }
104 {
105 // Cannot reconfigure, revert the slot back and throw the exception.
106 this->m_Data.m_Inputs[slot] = backupHandle;
107 throw e;
108 }
109}
110
111void NeonConvertFp16ToFp32Workload::Reconfigure()
112{
113 throw armnn::UnimplementedException("Reconfigure not implemented for this workload");
114}
115
116} //namespace armnn
#define ARMNN_SCOPED_PROFILING_EVENT_NEON_NAME_GUID(label)
Creates a profiling event that uses GetGuid() and GetName() from the calling class.
QueueDescriptor m_Data
Definition Workload.hpp:74
NeonConvertFp16ToFp32Workload(const ConvertFp16ToFp32QueueDescriptor &descriptor, const WorkloadInfo &info)
void ReplaceInputTensorHandle(ITensorHandle *tensorHandle, unsigned int slot) override
void ReplaceOutputTensorHandle(ITensorHandle *tensorHandle, unsigned int slot) override
static void ConvertFloat16To32(const void *srcFloat16Buffer, size_t numElements, float *dstFloat32Buffer)
Copyright (c) 2021 ARM Limited and Contributors.
half_float::half Half
Definition Half.hpp:22
arm_compute::Status NeonConvertFp16ToFp32WorkloadValidate(const TensorInfo &input, const TensorInfo &output)
MultiTypedWorkload< QueueDescriptor, armnn::DataType::Float16, armnn::DataType::Float32 > Float16ToFloat32Workload
Definition Workload.hpp:232
void GatherTensorHandlePairs(const DescriptorType &descriptor, std::vector< std::pair< SrcTensorHandleType *, DstTensorHandleType * > > &tensorHandlePairs)
DestType PolymorphicDowncast(SourceType *value)
Polymorphic downcast for build in pointers only.
void CopyTensorContentsGeneric(const ITensorHandle *srcTensor, ITensorHandle *dstTensor, CopyFunc copy)
void IgnoreUnused(Ts &&...)
Contains information about TensorInfos of a layer.