ArmNN
 25.11
Loading...
Searching...
No Matches
HardSwishOperator.cpp
Go to the documentation of this file.
1//
2// Copyright © 2024 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5//
6// Copyright © 2020 The TensorFlow Authors. All Rights Reserved.
7// SPDX-License-Identifier: Apache-2.0
8//
9
10#include "HardSwishOperator.hpp"
11
12#include <gemmlowp/fixedpoint.h>
13
14// This function is paraphrased from:
15// tensorflow/lite/kernels/internal/quantization_util.cc QuantizeMultiplier()
16static void quantizeMultiplier(double doubleMultiplier, int32_t* quantizedMultiplier, int* shift)
17{
18 if (doubleMultiplier == 0.)
19 {
20 *quantizedMultiplier = 0;
21 *shift = 0;
22 return;
23 }
24
25 const double q = std::frexp(doubleMultiplier, shift);
26 auto qFixed = static_cast<int64_t>(std::round(q * (1LL << 31)));
27
28 ARMNN_THROW_INVALIDARG_IF_FALSE(qFixed <= (1LL << 31));
29
30 if (qFixed == (1LL << 31))
31 {
32 qFixed /= 2;
33 ++*shift;
34 }
35
36 ARMNN_THROW_INVALIDARG_IF_FALSE(qFixed <= std::numeric_limits<int32_t>::max());
37
38 if (*shift < -31)
39 {
40 *shift = 0;
41 qFixed = 0;
42 }
43
44 *quantizedMultiplier = static_cast<int32_t>(qFixed);
45}
46
47// This function is paraphrased from:
48// tensorflow/lite/kernels/internal/reference/hard_swish.h SaturatingDoublingHighMul()
49static int16_t saturatingDoublingHighMul(int16_t a, int16_t b)
50{
51 bool overflow = (a == b && a == std::numeric_limits<int16_t>::min());
52 if (overflow)
53 {
54 return std::numeric_limits<int16_t>::max();
55 }
56
57 int32_t a32(a);
58 int32_t b32(b);
59 int32_t ab32 = a32 * b32;
60 int16_t abX2High16 = static_cast<int16_t>((ab32) / (1 << 15));
61
62 return abX2High16;
63}
64
65// This function is paraphrased from:
66// tensorflow/lite/kernels/internal/common.h DownScaleInt32ToInt16Multiplier()
67static void downScaleInt32ToInt16Multiplier(int32_t multiplierInt32, int16_t* multiplierInt16)
68{
69 ARMNN_THROW_INVALIDARG_IF_FALSE(multiplierInt32 >= 0);
70
71 static constexpr int32_t kRoundingOffset = 1 << 15;
72 if (multiplierInt32 >= std::numeric_limits<int32_t>::max() - kRoundingOffset)
73 {
74 *multiplierInt16 = std::numeric_limits<int16_t>::max();
75 return;
76 }
77
78 const int32_t result = (multiplierInt32 + kRoundingOffset) >> 16;
79
80 ARMNN_THROW_INVALIDARG_IF_FALSE(result << 16 <= multiplierInt32 + kRoundingOffset);
81 ARMNN_THROW_INVALIDARG_IF_FALSE(result << 16 > multiplierInt32 - kRoundingOffset);
82
83 *multiplierInt16 = static_cast<int16_t>(result);
84
85 ARMNN_THROW_INVALIDARG_IF_FALSE(*multiplierInt16 == result);
86}
87
88// This function is paraphrased from:
89// tensorflow/compiler/mlir/tosa/transforms/legalize_utils.cc getTosaConstHardSwish8bitTable()
90std::vector<int16_t> getTosaConstHardSwish8bitTable(float inputScale,
91 int32_t inputZp,
92 float outputScale,
93 int32_t outputZp)
94{
95 const float hiresInputScale = (1.0f / 128.0f) * inputScale;
96 const float outputMultiplier = hiresInputScale / outputScale;
97 int outputMultiplierExponent;
98 int16_t outputMultiplierFixedpointInt16;
99 int32_t outputMultiplierFixedpointInt32;
100
101 quantizeMultiplier(outputMultiplier, &outputMultiplierFixedpointInt32, &outputMultiplierExponent);
102 downScaleInt32ToInt16Multiplier(outputMultiplierFixedpointInt32, &outputMultiplierFixedpointInt16);
103
104 ARMNN_THROW_INVALIDARG_IF_FALSE(outputMultiplierExponent <= 0);
105
106 const float reluishScale = 3.0f / 32768.0f;
107 const float reluishMultiplier = hiresInputScale / reluishScale;
108 int reluishMultiplierExponent;
109 int16_t reluishMultiplierFixedpointInt16;
110 int32_t reluishMultiplierFixedpointInt32;
111
112 quantizeMultiplier(reluishMultiplier, &reluishMultiplierFixedpointInt32, &reluishMultiplierExponent);
113 downScaleInt32ToInt16Multiplier(reluishMultiplierFixedpointInt32, &reluishMultiplierFixedpointInt16);
114
115 std::vector<int16_t> table;
116 table.reserve(256);
117 for (int32_t i = -128; i < 128; i++)
118 {
119 const int16_t inputValue = static_cast<int16_t>(i - inputZp);
120 const int16_t inputValueHiresInputScale = static_cast<int16_t>(inputValue * (1 << 7));
121
122 int16_t reluishValue = inputValueHiresInputScale;
123 if (reluishMultiplierExponent > 0)
124 {
125 reluishValue = gemmlowp::ShiftLeft(reluishValue, reluishMultiplierExponent - 1);
126 }
127
128 reluishValue = gemmlowp::SaturatingRoundingDoublingHighMul(reluishValue, reluishMultiplierFixedpointInt16);
129
130 if (reluishMultiplierExponent > 0)
131 {
132 reluishValue = gemmlowp::ShiftLeft(reluishValue, 1);
133 }
134 else if (reluishMultiplierExponent < 0)
135 {
136 reluishValue = gemmlowp::RoundingDivideByPOT(reluishValue, -reluishMultiplierExponent);
137 }
138
139 reluishValue = static_cast<int16_t>((reluishValue + (1 << 15)) >> 1);
140
141 const int16_t inputValPreshiftOutputScale =
142 gemmlowp::SaturatingRoundingDoublingHighMul(inputValueHiresInputScale, outputMultiplierFixedpointInt16);
143
144 const int16_t preshiftOutputValue = saturatingDoublingHighMul(reluishValue, inputValPreshiftOutputScale);
145
146 int16_t outputValue = gemmlowp::RoundingDivideByPOT(preshiftOutputValue, -outputMultiplierExponent);
147
148 outputValue = static_cast<int16_t>(outputValue + outputZp);
149 outputValue = std::min<int16_t>(outputValue, std::numeric_limits<int8_t>::max());
150 outputValue = std::max<int16_t>(outputValue, std::numeric_limits<int8_t>::min());
151
152 table.push_back(outputValue);
153 }
154
155 return table;
156}
157
158// This function is paraphrased from:
159// tensorflow/compiler/mlir/tosa/transforms/legalize_tfl.cc ConvertTFLHardSwishOp()
160TosaSerializationBasicBlock* ConvertHardSwishToTosaOperator(const Layer* layer,
161 const std::vector<const TensorInfo*>& inputs,
162 const std::vector<const TensorInfo*>& outputs,
163 const ActivationDescriptor* desc)
164{
165 if (inputs.size() != 1)
166 {
167 throw armnn::Exception("ConvertHardSwishToTosaOperator: 1 input tensors required.");
168 }
169
170 if (outputs.size() != 1)
171 {
172 throw armnn::Exception("ConvertHardSwishToTosaOperator: 1 output tensor required.");
173 }
174
175 if (desc->m_Function != ActivationFunction::HardSwish)
176 {
177 throw armnn::Exception("ConvertHardSwishToTosaOperator ActivationDescriptor only supports function HardSwish.");
178 }
179
180 std::string inputName = std::string("input_");
181 std::string outputName = std::string("output0_");
182 std::string blockName = std::string("Op_HARDSWISH_block_") + GetUniqueTosaMappingID();
183
184 // If a layer is present then the block will be used for execution, so input and output names need to be determined
185 // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter.
186 if (layer != nullptr)
187 {
188 inputName = GenerateUniqueInputName(layer->GetInputSlot(0));
189 outputName = GenerateUniqueOutputName(*layer);
190 }
191
192 std::vector<TosaSerializationTensor*> tensors;
193 std::vector<TosaSerializationOperator*> operators;
194
195 DataType inputDType = inputs[0]->GetDataType();
196
197 bool isInt8 = (inputDType == DataType::QAsymmS8 || inputDType == DataType::QSymmS8);
198 if (isInt8)
199 {
200 float inputScale = inputs[0]->GetQuantizationScale();
201 float outputScale = outputs[0]->GetQuantizationScale();
202 int32_t inputZp = inputs[0]->GetQuantizationOffset();
203 int32_t outputZp = outputs[0]->GetQuantizationOffset();
204
205 TosaTableAttribute attribute(
206 getTosaConstHardSwish8bitTable(inputScale, inputZp, outputScale, outputZp));
207 operators.push_back(new TosaSerializationOperator(tosa::Op_TABLE,
208 Attribute_TableAttribute,
209 &attribute,
210 {inputName},
211 {outputName}));
212 }
213 else
214 {
215 throw Exception("ConvertHardSwishToTosaOperator() type currently unimplemented.");
216 }
217
218 // Only add input tensors if connected layer is an input layer.
219 // As intermediate or constant tensors will be created separately.
220 // There also can't be duplicate tensor.
221 std::vector<int32_t> inputShape0;
222 DType inputDType0 = ArmNNToDType(inputs[0]->GetDataType());
223 if(inputName.find("input_") != std::string::npos)
224 {
225 inputShape0 = GetTosaTensorShape(inputs[0]->GetShape());
226 tensors.push_back(new TosaSerializationTensor(inputName, inputShape0, inputDType0, {}));
227 }
228
229 std::vector<int32_t> outputShape0 = GetTosaTensorShape(outputs[0]->GetShape());
230 DType outputDType0 = ArmNNToDType(outputs[0]->GetDataType());
231 tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {}));
232
233 // operatorInputNames/operatorOutputNames ends up being the same as
234 // blockInputNames/blockOutputNames for one-to-one ArmNN to Tosa mappings
235 return new TosaSerializationBasicBlock(blockName, // name
236 mainName, // region name
237 operators, // operators
238 tensors, // tensors
239 {inputName}, // inputs
240 {outputName}); // outputs
241}
#define ARMNN_THROW_INVALIDARG_IF_FALSE(_cond)
TosaSerializationBasicBlock * ConvertHardSwishToTosaOperator(const Layer *layer, const std::vector< const TensorInfo * > &inputs, const std::vector< const TensorInfo * > &outputs, const ActivationDescriptor *desc)
std::vector< int16_t > getTosaConstHardSwish8bitTable(float inputScale, int32_t inputZp, float outputScale, int32_t outputZp)
std::string GenerateUniqueOutputName(const Layer &layer, uint32_t layerSlot=0)
const std::string mainName
DType ArmNNToDType(const DataType &type)
std::string GenerateUniqueInputName(const armnn::InputSlot &slot)
std::string GetUniqueTosaMappingID()
std::vector< int32_t > GetTosaTensorShape(const TensorShape &shape)
Base class for all ArmNN exceptions so that users can filter to just those.
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition Layer.hpp:337
DataType
Definition Types.hpp:49
An ActivationDescriptor for the ActivationLayer.
ActivationFunction m_Function
The activation function to use (Sigmoid, TanH, Linear, ReLu, BoundedReLu, SoftReLu,...