ArmNN
 25.02
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
HardSwishOperator.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2024 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 //
6 // Copyright © 2020 The TensorFlow Authors. All Rights Reserved.
7 // SPDX-License-Identifier: Apache-2.0
8 //
9 
10 #include "HardSwishOperator.hpp"
11 
12 #include <gemmlowp/fixedpoint.h>
13 
14 // This function is paraphrased from:
15 // tensorflow/lite/kernels/internal/quantization_util.cc QuantizeMultiplier()
16 static void quantizeMultiplier(double doubleMultiplier, int32_t* quantizedMultiplier, int* shift)
17 {
18  if (doubleMultiplier == 0.)
19  {
20  *quantizedMultiplier = 0;
21  *shift = 0;
22  return;
23  }
24 
25  const double q = std::frexp(doubleMultiplier, shift);
26  auto qFixed = static_cast<int64_t>(std::round(q * (1LL << 31)));
27 
28  ARMNN_THROW_INVALIDARG_IF_FALSE(qFixed <= (1LL << 31));
29 
30  if (qFixed == (1LL << 31))
31  {
32  qFixed /= 2;
33  ++*shift;
34  }
35 
36  ARMNN_THROW_INVALIDARG_IF_FALSE(qFixed <= std::numeric_limits<int32_t>::max());
37 
38  if (*shift < -31)
39  {
40  *shift = 0;
41  qFixed = 0;
42  }
43 
44  *quantizedMultiplier = static_cast<int32_t>(qFixed);
45 }
46 
47 // This function is paraphrased from:
48 // tensorflow/lite/kernels/internal/reference/hard_swish.h SaturatingDoublingHighMul()
49 static int16_t saturatingDoublingHighMul(int16_t a, int16_t b)
50 {
51  bool overflow = (a == b && a == std::numeric_limits<int16_t>::min());
52  if (overflow)
53  {
54  return std::numeric_limits<int16_t>::max();
55  }
56 
57  int32_t a32(a);
58  int32_t b32(b);
59  int32_t ab32 = a32 * b32;
60  int16_t abX2High16 = static_cast<int16_t>((ab32) / (1 << 15));
61 
62  return abX2High16;
63 }
64 
65 // This function is paraphrased from:
66 // tensorflow/lite/kernels/internal/common.h DownScaleInt32ToInt16Multiplier()
67 static void downScaleInt32ToInt16Multiplier(int32_t multiplierInt32, int16_t* multiplierInt16)
68 {
69  ARMNN_THROW_INVALIDARG_IF_FALSE(multiplierInt32 >= 0);
70 
71  static constexpr int32_t kRoundingOffset = 1 << 15;
72  if (multiplierInt32 >= std::numeric_limits<int32_t>::max() - kRoundingOffset)
73  {
74  *multiplierInt16 = std::numeric_limits<int16_t>::max();
75  return;
76  }
77 
78  const int32_t result = (multiplierInt32 + kRoundingOffset) >> 16;
79 
80  ARMNN_THROW_INVALIDARG_IF_FALSE(result << 16 <= multiplierInt32 + kRoundingOffset);
81  ARMNN_THROW_INVALIDARG_IF_FALSE(result << 16 > multiplierInt32 - kRoundingOffset);
82 
83  *multiplierInt16 = static_cast<int16_t>(result);
84 
85  ARMNN_THROW_INVALIDARG_IF_FALSE(*multiplierInt16 == result);
86 }
87 
88 // This function is paraphrased from:
89 // tensorflow/compiler/mlir/tosa/transforms/legalize_utils.cc getTosaConstHardSwish8bitTable()
90 std::vector<int16_t> getTosaConstHardSwish8bitTable(float inputScale,
91  int32_t inputZp,
92  float outputScale,
93  int32_t outputZp)
94 {
95  const float hiresInputScale = (1.0f / 128.0f) * inputScale;
96  const float outputMultiplier = hiresInputScale / outputScale;
97  int outputMultiplierExponent;
98  int16_t outputMultiplierFixedpointInt16;
99  int32_t outputMultiplierFixedpointInt32;
100 
101  quantizeMultiplier(outputMultiplier, &outputMultiplierFixedpointInt32, &outputMultiplierExponent);
102  downScaleInt32ToInt16Multiplier(outputMultiplierFixedpointInt32, &outputMultiplierFixedpointInt16);
103 
104  ARMNN_THROW_INVALIDARG_IF_FALSE(outputMultiplierExponent <= 0);
105 
106  const float reluishScale = 3.0f / 32768.0f;
107  const float reluishMultiplier = hiresInputScale / reluishScale;
108  int reluishMultiplierExponent;
109  int16_t reluishMultiplierFixedpointInt16;
110  int32_t reluishMultiplierFixedpointInt32;
111 
112  quantizeMultiplier(reluishMultiplier, &reluishMultiplierFixedpointInt32, &reluishMultiplierExponent);
113  downScaleInt32ToInt16Multiplier(reluishMultiplierFixedpointInt32, &reluishMultiplierFixedpointInt16);
114 
115  std::vector<int16_t> table;
116  table.reserve(256);
117  for (int32_t i = -128; i < 128; i++)
118  {
119  const int16_t inputValue = static_cast<int16_t>(i - inputZp);
120  const int16_t inputValueHiresInputScale = static_cast<int16_t>(inputValue * (1 << 7));
121 
122  int16_t reluishValue = inputValueHiresInputScale;
123  if (reluishMultiplierExponent > 0)
124  {
125  reluishValue = gemmlowp::ShiftLeft(reluishValue, reluishMultiplierExponent - 1);
126  }
127 
128  reluishValue = gemmlowp::SaturatingRoundingDoublingHighMul(reluishValue, reluishMultiplierFixedpointInt16);
129 
130  if (reluishMultiplierExponent > 0)
131  {
132  reluishValue = gemmlowp::ShiftLeft(reluishValue, 1);
133  }
134  else if (reluishMultiplierExponent < 0)
135  {
136  reluishValue = gemmlowp::RoundingDivideByPOT(reluishValue, -reluishMultiplierExponent);
137  }
138 
139  reluishValue = static_cast<int16_t>((reluishValue + (1 << 15)) >> 1);
140 
141  const int16_t inputValPreshiftOutputScale =
142  gemmlowp::SaturatingRoundingDoublingHighMul(inputValueHiresInputScale, outputMultiplierFixedpointInt16);
143 
144  const int16_t preshiftOutputValue = saturatingDoublingHighMul(reluishValue, inputValPreshiftOutputScale);
145 
146  int16_t outputValue = gemmlowp::RoundingDivideByPOT(preshiftOutputValue, -outputMultiplierExponent);
147 
148  outputValue = static_cast<int16_t>(outputValue + outputZp);
149  outputValue = std::min<int16_t>(outputValue, std::numeric_limits<int8_t>::max());
150  outputValue = std::max<int16_t>(outputValue, std::numeric_limits<int8_t>::min());
151 
152  table.push_back(outputValue);
153  }
154 
155  return table;
156 }
157 
158 // This function is paraphrased from:
159 // tensorflow/compiler/mlir/tosa/transforms/legalize_tfl.cc ConvertTFLHardSwishOp()
160 TosaSerializationBasicBlock* ConvertHardSwishToTosaOperator(const Layer* layer,
161  const std::vector<const TensorInfo*>& inputs,
162  const std::vector<const TensorInfo*>& outputs,
163  const ActivationDescriptor* desc)
164 {
165  if (inputs.size() != 1)
166  {
167  throw armnn::Exception("ConvertHardSwishToTosaOperator: 1 input tensors required.");
168  }
169 
170  if (outputs.size() != 1)
171  {
172  throw armnn::Exception("ConvertHardSwishToTosaOperator: 1 output tensor required.");
173  }
174 
175  if (desc->m_Function != ActivationFunction::HardSwish)
176  {
177  throw armnn::Exception("ConvertHardSwishToTosaOperator ActivationDescriptor only supports function HardSwish.");
178  }
179 
180  std::string inputName = std::string("input_");
181  std::string outputName = std::string("output0_");
182  std::string blockName = std::string("Op_HARDSWISH_block_") + GetUniqueTosaMappingID();
183 
184  // If a layer is present then the block will be used for execution, so input and output names need to be determined
185  // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter.
186  if (layer != nullptr)
187  {
188  inputName = GenerateUniqueInputName(layer->GetInputSlot(0));
189  outputName = GenerateUniqueOutputName(*layer);
190  }
191 
192  std::vector<TosaSerializationTensor*> tensors;
193  std::vector<TosaSerializationOperator*> operators;
194 
195  DataType inputDType = inputs[0]->GetDataType();
196 
197  bool isInt8 = (inputDType == DataType::QAsymmS8 || inputDType == DataType::QSymmS8);
198  if (isInt8)
199  {
200  float inputScale = inputs[0]->GetQuantizationScale();
201  float outputScale = outputs[0]->GetQuantizationScale();
202  int32_t inputZp = inputs[0]->GetQuantizationOffset();
203  int32_t outputZp = outputs[0]->GetQuantizationOffset();
204 
205  TosaTableAttribute attribute(
206  getTosaConstHardSwish8bitTable(inputScale, inputZp, outputScale, outputZp));
207  operators.push_back(new TosaSerializationOperator(tosa::Op_TABLE,
208  Attribute_TableAttribute,
209  &attribute,
210  {inputName},
211  {outputName}));
212  }
213  else
214  {
215  throw Exception("ConvertHardSwishToTosaOperator() type currently unimplemented.");
216  }
217 
218  // Only add input tensors if connected layer is an input layer.
219  // As intermediate or constant tensors will be created separately.
220  // There also can't be duplicate tensor.
221  std::vector<int32_t> inputShape0;
222  DType inputDType0 = ArmNNToDType(inputs[0]->GetDataType());
223  if(inputName.find("input_") != std::string::npos)
224  {
225  inputShape0 = GetTosaTensorShape(inputs[0]->GetShape());
226  tensors.push_back(new TosaSerializationTensor(inputName, inputShape0, inputDType0, {}));
227  }
228 
229  std::vector<int32_t> outputShape0 = GetTosaTensorShape(outputs[0]->GetShape());
230  DType outputDType0 = ArmNNToDType(outputs[0]->GetDataType());
231  tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {}));
232 
233  // operatorInputNames/operatorOutputNames ends up being the same as
234  // blockInputNames/blockOutputNames for one-to-one ArmNN to Tosa mappings
235  return new TosaSerializationBasicBlock(blockName, // name
236  mainName, // region name
237  operators, // operators
238  tensors, // tensors
239  {inputName}, // inputs
240  {outputName}); // outputs
241 }
#define ARMNN_THROW_INVALIDARG_IF_FALSE(_cond)
Definition: Exceptions.hpp:212
std::vector< int16_t > getTosaConstHardSwish8bitTable(float inputScale, int32_t inputZp, float outputScale, int32_t outputZp)
TosaSerializationBasicBlock * ConvertHardSwishToTosaOperator(const Layer *layer, const std::vector< const TensorInfo * > &inputs, const std::vector< const TensorInfo * > &outputs, const ActivationDescriptor *desc)
std::string GenerateUniqueOutputName(const Layer &layer, uint32_t layerSlot=0)
const std::string mainName
DType ArmNNToDType(const DataType &type)
std::vector< int32_t > GetTosaTensorShape(const TensorShape &shape)
std::string GenerateUniqueInputName(const armnn::InputSlot &slot)
std::string GetUniqueTosaMappingID()
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:47
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition: Layer.hpp:337
DataType
Definition: Types.hpp:49
An ActivationDescriptor for the ActivationLayer.
Definition: Descriptors.hpp:37
ActivationFunction m_Function
The activation function to use (Sigmoid, TanH, Linear, ReLu, BoundedReLu, SoftReLu,...
Definition: Descriptors.hpp:59