ArmNN
 24.08
NeonConvertFp32ToFp16Workload.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
7 
8 #include <arm_compute/runtime/NEON/functions/NECast.h>
9 #include <Half.hpp>
10 #include <Profiling.hpp>
11 
13 
15 
16 static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE;
17 
18 namespace armnn
19 {
20 
22 {
23  // Fallback to portable software implementation if Compute Library NECast won't work, so
24  // this method always returns success
25 
26  armnn::IgnoreUnused(input);
27  armnn::IgnoreUnused(output);
28  return arm_compute::Status();
29 }
30 
32  const WorkloadInfo& info)
34 {
35  this->m_Data.ValidateInputsOutputs("NeonConvertFp32ToFp16Workload", 1, 1);
36 
37  arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
38  arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
39 
40  if (arm_compute::NECast::validate(input.info(), output.info(), g_AclConvertPolicy))
41  {
42  // Use NECast if supported (needs hardware support for FP16)
43  m_Cast.reset(new arm_compute::NECast);
44  m_Cast->configure(&input, &output, g_AclConvertPolicy);
45  }
46  else
47  {
48  // Else use software implementation from Half.hpp
49  GatherTensorHandlePairs(descriptor, m_TensorHandlePairs);
50  }
51 }
52 
54 {
55  ARMNN_SCOPED_PROFILING_EVENT_NEON_NAME_GUID("NeonConvertFp32ToFp16Workload_Execute");
56 
57  if (m_Cast)
58  {
59  // Use NECast if supported and initialised
60  m_Cast->run();
61  }
62  else
63  {
64  // Else use softwre implementabion using Half.hpp
65  auto convertFunc = [](uint8_t* dst, const uint8_t* src, size_t size)
66  {
67  auto input = reinterpret_cast<const float*>(src);
68  auto output = reinterpret_cast<Half*>(dst);
69  size_t numElements = size/2; // 2 bytes per fp16
71  };
72 
73  for (const auto& pair : m_TensorHandlePairs)
74  {
75  CopyTensorContentsGeneric(pair.first, pair.second, convertFunc);
76  }
77  }
78 }
79 
81 {
82  ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot];
83  this->m_Data.m_Inputs[slot] = tensorHandle;
84  try
85  {
86  Reconfigure();
87  }
89  {
90  // Cannot reconfigure, revert the slot back and throw the exception.
91  this->m_Data.m_Inputs[slot] = backupHandle;
92  throw e;
93  }
94 }
95 
96 // Replace output tensor handle with the given TensorHandle
98 {
99  ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot];
100  this->m_Data.m_Inputs[slot] = tensorHandle;
101  try
102  {
103  Reconfigure();
104  }
106  {
107  // Cannot reconfigure, revert the slot back and throw the exception.
108  this->m_Data.m_Inputs[slot] = backupHandle;
109  throw e;
110  }
111 }
112 
113 void NeonConvertFp32ToFp16Workload::Reconfigure()
114 {
115  throw armnn::UnimplementedException("Reconfigure not implemented for this workload");
116 }
117 
118 } //namespace armnn
armnn::NeonConvertFp32ToFp16Workload::ReplaceOutputTensorHandle
void ReplaceOutputTensorHandle(ITensorHandle *tensorHandle, unsigned int slot) override
Definition: NeonConvertFp32ToFp16Workload.cpp:97
armnn::NeonConvertFp32ToFp16WorkloadValidate
arm_compute::Status NeonConvertFp32ToFp16WorkloadValidate(const TensorInfo &input, const TensorInfo &output)
Definition: NeonConvertFp32ToFp16Workload.cpp:21
armnn::NeonConvertFp32ToFp16Workload::NeonConvertFp32ToFp16Workload
NeonConvertFp32ToFp16Workload(const ConvertFp32ToFp16QueueDescriptor &descriptor, const WorkloadInfo &info)
Definition: NeonConvertFp32ToFp16Workload.cpp:31
WorkloadUtils.hpp
armnn::NeonConvertFp32ToFp16Workload::ReplaceInputTensorHandle
void ReplaceInputTensorHandle(ITensorHandle *tensorHandle, unsigned int slot) override
Definition: NeonConvertFp32ToFp16Workload.cpp:80
armnn::QueueDescriptor::ValidateInputsOutputs
void ValidateInputsOutputs(const std::string &descName, unsigned int numExpectedIn, unsigned int numExpectedOut) const
Definition: WorkloadData.cpp:447
armnn::TensorInfo
Definition: Tensor.hpp:152
Profiling.hpp
armnn::ITensorHandle
Definition: ITensorHandle.hpp:16
armnn::ConvertFp32ToFp16QueueDescriptor
Definition: WorkloadData.hpp:457
armnn::Half
half_float::half Half
Definition: Half.hpp:22
armnn::NeonConvertFp32ToFp16Workload::Execute
virtual void Execute() const override
Definition: NeonConvertFp32ToFp16Workload.cpp:53
armnn::WorkloadInfo
Contains information about TensorInfos of a layer.
Definition: WorkloadInfo.hpp:16
armnn::GatherTensorHandlePairs
void GatherTensorHandlePairs(const DescriptorType &descriptor, std::vector< std::pair< SrcTensorHandleType *, DstTensorHandleType * >> &tensorHandlePairs)
Definition: WorkloadUtils.hpp:204
armnn::BoostLogSeverityMapping::info
@ info
armnn::CopyTensorContentsGeneric
void CopyTensorContentsGeneric(const ITensorHandle *srcTensor, ITensorHandle *dstTensor, CopyFunc copy)
Definition: WorkloadUtils.hpp:46
armnn::QueueDescriptor::m_Outputs
std::vector< ITensorHandle * > m_Outputs
Definition: WorkloadData.hpp:27
Half.hpp
armnn::Status
Status
Definition: Types.hpp:42
armnn::BaseWorkload::m_Data
QueueDescriptor m_Data
Definition: Workload.hpp:89
armnnUtils::FloatingPointConverter::ConvertFloat32To16
static void ConvertFloat32To16(const float *srcFloat32Buffer, size_t numElements, void *dstFloat16Buffer)
Converts a buffer of FP32 values to FP16, and stores in the given dstFloat16Buffer.
Definition: FloatingPointConverter.cpp:17
armnn::IgnoreUnused
void IgnoreUnused(Ts &&...)
Definition: IgnoreUnused.hpp:14
FloatingPointConverter.hpp
armnn
Copyright (c) 2021 ARM Limited and Contributors.
Definition: 01_00_quick_start.dox:6
armnn::UnimplementedException
Definition: Exceptions.hpp:98
ARMNN_SCOPED_PROFILING_EVENT_NEON_NAME_GUID
#define ARMNN_SCOPED_PROFILING_EVENT_NEON_NAME_GUID(label)
Creates a profiling event that uses GetGuid() and GetName() from the calling class.
Definition: NeonWorkloadUtils.hpp:33
NeonConvertFp32ToFp16Workload.hpp
armnn::QueueDescriptor::m_Inputs
std::vector< ITensorHandle * > m_Inputs
Definition: WorkloadData.hpp:26
armnn::MultiTypedWorkload
Definition: Workload.hpp:160