ArmNN
 24.08
QuantizeOperator.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2023-2024 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 // Copyright © 2020 The TensorFlow Authors. All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 //
8 
9 #include "QuantizeOperator.hpp"
10 
12 
13 // This function is paraphrased from:
14 // tensorflow/compiler/mlir/tosa/transforms/legalize_common.cc from function convertQuantizeOp
15 TosaSerializationBasicBlock* ConvertQuantizeToTosaOperator(const Layer* layer,
16  const std::vector<const TensorInfo*>& inputs,
17  const std::vector<const TensorInfo*>& outputs)
18 {
19  ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( inputs.size() == 1,
20  "ConvertQuantizeToTosaOperator: Quantize must have only one input" );
21  ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( outputs.size() == 1,
22  "ConvertQuantizeToTosaOperator: Quantize must have only one output" );
23 
24  std::string inputName = std::string("input_");
25  std::string outputName = std::string("output0_");
26  std::string blockName = std::string("Op_QUANTIZE_block_") + GetUniqueTosaMappingID();
27 
28  // If a layer is present then the block will be used for execution, so input and output names need to be determined
29  // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter.
30  if(layer != nullptr)
31  {
32  inputName = GenerateUniqueInputName(layer->GetInputSlot(0));
33  outputName = GenerateUniqueOutputName(*layer);
34  }
35 
36  const TensorInfo inputInfo = *inputs[0];
37  const TensorInfo outputInfo = *outputs[0];
38 
39  // Extract quantization detail from Tensor
40  float zeroPoint = static_cast<float>(outputInfo.GetQuantizationOffset());
41  // No per axis support in Tensorflow TOSA code
42  float scale = outputInfo.GetQuantizationScale();
43 
44  // As per the Tensorflow quantization specification
45  // Tensorflow TOSA code calculates quantization using multiplication by scale
46  // Armnn code calculates quantization using division by scale
47  // Invert scale factor passed from Armnn for tf TOSA code
48  scale = (scale != 0) ? (1 / scale) : scale;
49 
50  std::vector<TosaSerializationTensor*> tensors;
51 
52  std::vector<int32_t> inputShape0 = GetTosaTensorShape(inputInfo.GetShape());
53  DType inputDType0 = ArmNNToDType(inputInfo.GetDataType());
54  bool isFloatInput = inputDType0 == DType::DType_FP16 || inputDType0 == DType::DType_FP32;
55 
56  // Only add input tensors if connected layer is an input layer.
57  // As intermediate or constant tensors will be created separately.
58  // There also can't be duplicate tensor.
59  if(inputName.find("input_") != std::string::npos)
60  {
61  tensors.push_back(new TosaSerializationTensor(inputName, inputShape0, inputDType0, {}));
62  }
63 
64  std::vector<int32_t> outputShape0 = GetTosaTensorShape(outputInfo.GetShape());
65  DType outputDType0 = ArmNNToDType(outputInfo.GetDataType());
66 
67  if (isFloatInput)
68  {
69  // quantize:
70  // const_zeroPoint = constant(zeroPoint)
71  // const_scale = constant(scale)
72  // out_mul = mul(input, const_scale)
73  // out_add = add(out_mul, const_zeroPoint)
74  // output = cast<output_type>(out_add)
75 
76  std::string outputNameScale = std::string("input1_") + GetUniqueTosaMappingID();
77  std::string outputNameZeroPoint = std::string("input2_") + GetUniqueTosaMappingID();
78  std::string outputNameMul = std::string("intermediate0_") + GetUniqueTosaMappingID();
79  std::string outputNameAdd = std::string("intermediate1_") + GetUniqueTosaMappingID();
80 
81  // const_zeroPoint
82  TosaSerializationOperator* zeroPointOp = nullptr;
83  TosaSerializationTensor* zeroPointTensor = nullptr;
84  CreateConstTosaOperator<float>(outputNameZeroPoint,
85  zeroPoint,
86  inputDType0,
87  inputShape0,
88  zeroPointOp,
89  zeroPointTensor);
90  tensors.push_back(zeroPointTensor);
91 
92  // const_scale
93  TosaSerializationOperator* scaleOp = nullptr;
94  TosaSerializationTensor* scaleTensor = nullptr;
95  CreateConstTosaOperator<float>(outputNameScale,
96  scale,
97  inputDType0,
98  inputShape0,
99  scaleOp,
100  scaleTensor);
101  tensors.push_back(scaleTensor);
102 
103  // mul
104  int32_t shift = 0;
105  TosaMulAttribute mulAttribute(shift);
106  TosaSerializationOperator* mulOp = new TosaSerializationOperator(Op_MUL,
107  Attribute_MulAttribute,
108  &mulAttribute,
109  {inputName, outputNameScale},
110  {outputNameMul});
111  tensors.push_back(new TosaSerializationTensor(outputNameMul, inputShape0, inputDType0, {}));
112 
113  // add
114  TosaSerializationOperator* addOp = new TosaSerializationOperator(Op_ADD,
115  Attribute_NONE,
116  nullptr,
117  {outputNameMul, outputNameZeroPoint},
118  {outputNameAdd});
119  tensors.push_back(new TosaSerializationTensor(outputNameAdd, inputShape0, inputDType0, {}));
120 
121  // cast
122  TosaSerializationOperator* castOp = new TosaSerializationOperator(Op_CAST,
123  Attribute_NONE,
124  nullptr,
125  {outputNameAdd},
126  {outputName});
127 
128  tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {}));
129 
130  // operatorInputNames/operatorOutputNames ends up being the same as
131  // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings
132  return new TosaSerializationBasicBlock(blockName, // name
133  mainName, // region name
134  {zeroPointOp, scaleOp, mulOp, addOp, castOp}, // operators
135  tensors, // tensors
136  {inputName}, // inputs
137  {outputName}); // outputs
138  }
139  else
140  {
141  double scale_alpha = inputs[0]->GetQuantizationScale() / outputs[0]->GetQuantizationScale();
142  int32_t input_zp = inputs[0]->GetQuantizationOffset();
143  int32_t output_zp = outputs[0]->GetQuantizationOffset();
144 
145  TosaSerializationOperator* rescaleOp = nullptr;
146  CreateRescaleTosaOperator(inputName,
147  outputName,
148  scale_alpha,
149  input_zp,
150  output_zp,
151  true,
152  true,
153  &rescaleOp);
154  tensors.push_back(new TosaSerializationTensor(outputName,
155  inputShape0,
156  outputDType0, {}));
157 
158  // operatorInputNames/operatorOutputNames ends up being the same as
159  // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings
160  return new TosaSerializationBasicBlock(blockName, // name
161  mainName, // region name
162  {rescaleOp}, // operators
163  tensors, // tensors
164  {inputName}, // inputs
165  {outputName}); // outputs
166  }
167 }
QuantizeOperator.hpp
armnn::TensorInfo::GetQuantizationScale
float GetQuantizationScale() const
Definition: Tensor.cpp:461
armnn::TensorInfo
Definition: Tensor.hpp:152
TosaRescaleOperatorUtils.hpp
GenerateUniqueOutputName
std::string GenerateUniqueOutputName(const Layer &layer, uint32_t layerSlot=0)
Definition: TosaOperatorUtils.hpp:120
armnn::Layer::GetInputSlot
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition: Layer.hpp:337
armnn::Layer
Definition: Layer.hpp:230
mainName
const std::string mainName
Definition: TosaOperatorUtils.hpp:19
ArmNNToDType
DType ArmNNToDType(const DataType &type)
Definition: TosaOperatorUtils.hpp:22
armnn::TensorInfo::GetDataType
DataType GetDataType() const
Definition: Tensor.hpp:200
ConvertQuantizeToTosaOperator
TosaSerializationBasicBlock * ConvertQuantizeToTosaOperator(const Layer *layer, const std::vector< const TensorInfo * > &inputs, const std::vector< const TensorInfo * > &outputs)
Definition: QuantizeOperator.cpp:15
armnn::TensorInfo::GetShape
const TensorShape & GetShape() const
Definition: Tensor.hpp:193
GetTosaTensorShape
std::vector< int32_t > GetTosaTensorShape(const TensorShape &shape)
Definition: TosaOperatorUtils.hpp:79
CreateRescaleTosaOperator
void CreateRescaleTosaOperator(const std::string &inputName, const std::string &outputName, const std::vector< int32_t > &multipliers, const std::vector< int32_t > &shifts, int32_t input_zp, int32_t output_zp, bool double_round, bool scale32, bool per_channel, TosaSerializationOperator **op)
Definition: TosaRescaleOperatorUtils.hpp:10
armnn::TensorInfo::GetQuantizationOffset
int32_t GetQuantizationOffset() const
Definition: Tensor.cpp:482
GenerateUniqueInputName
std::string GenerateUniqueInputName(const armnn::InputSlot &slot)
Definition: TosaOperatorUtils.hpp:109
GetUniqueTosaMappingID
std::string GetUniqueTosaMappingID()
Definition: TosaOperatorUtils.hpp:138
ARMNN_THROW_INVALIDARG_MSG_IF_FALSE
#define ARMNN_THROW_INVALIDARG_MSG_IF_FALSE(_cond, _str)
Definition: Exceptions.hpp:210