ArmNN
 25.02
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
QuantizeOperator.cpp File Reference
#include "QuantizeOperator.hpp"
#include "TosaRescaleOperatorUtils.hpp"
#include <fmt/format.h>
Include dependency graph for QuantizeOperator.cpp:

Go to the source code of this file.

Functions

TosaSerializationBasicBlock * ConvertQuantizeToTosaOperator (const Layer *layer, const std::vector< const TensorInfo * > &inputs, const std::vector< const TensorInfo * > &outputs)
 

Function Documentation

◆ ConvertQuantizeToTosaOperator()

TosaSerializationBasicBlock* ConvertQuantizeToTosaOperator ( const Layer layer,
const std::vector< const TensorInfo * > &  inputs,
const std::vector< const TensorInfo * > &  outputs 
)

Definition at line 17 of file QuantizeOperator.cpp.

20 {
21  ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( inputs.size() == 1,
22  "ConvertQuantizeToTosaOperator: Quantize must have only one input" );
23  ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( outputs.size() == 1,
24  "ConvertQuantizeToTosaOperator: Quantize must have only one output" );
25 
26  std::string inputName = std::string("input_");
27  std::string outputName = std::string("output0_");
28  std::string blockName = std::string("Op_QUANTIZE_block_") + GetUniqueTosaMappingID();
29 
30  // If a layer is present then the block will be used for execution, so input and output names need to be determined
31  // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter.
32  if(layer != nullptr)
33  {
34  inputName = GenerateUniqueInputName(layer->GetInputSlot(0));
35  outputName = GenerateUniqueOutputName(*layer);
36  }
37 
38  const TensorInfo inputInfo = *inputs[0];
39  const TensorInfo outputInfo = *outputs[0];
40 
41  // Extract quantization detail from Tensor
42  float zeroPoint = static_cast<float>(outputInfo.GetQuantizationOffset());
43  // No per axis support in Tensorflow TOSA code
44  float scale = outputInfo.GetQuantizationScale();
45 
46  // As per the Tensorflow quantization specification
47  // Tensorflow TOSA code calculates quantization using multiplication by scale
48  // Armnn code calculates quantization using division by scale
49  // Invert scale factor passed from Armnn for tf TOSA code
50  scale = (scale != 0) ? (1 / scale) : scale;
51 
52  std::vector<TosaSerializationTensor*> tensors;
53 
54  std::vector<int32_t> inputShape0 = GetTosaTensorShape(inputInfo.GetShape());
55  DType inputDType0 = ArmNNToDType(inputInfo.GetDataType());
56  bool isFloatInput = inputDType0 == DType::DType_FP16 || inputDType0 == DType::DType_FP32;
57 
58  // Only add input tensors if connected layer is an input layer.
59  // As intermediate or constant tensors will be created separately.
60  // There also can't be duplicate tensor.
61  if(inputName.find("input_") != std::string::npos)
62  {
63  DType tmp = inputDType0;
64 
65  if (IsUnsignedDataType(tmp))
66  {
67  //TOSA rescale only supports signed types. Need to override type
68  //when using unsigned attribute
69  FlipSignage(tmp);
70  }
71  tensors.push_back(new TosaSerializationTensor(inputName, inputShape0, tmp, {}));
72  }
73  else
74  {
75  if (IsUnsignedDataType(inputDType0))
76  {
77  // Can't modify the type of a previously created TosaSerializationTensor
78  throw armnn::Exception(fmt::format("ConvertQuantizeToTosaOperator: {} intermediate input"
79  " layer not supported.",EnumNamesDType()[inputDType0]));
80  }
81  }
82 
83  std::vector<int32_t> outputShape0 = GetTosaTensorShape(outputInfo.GetShape());
84  DType outputDType0 = ArmNNToDType(outputInfo.GetDataType());
85 
86  if (isFloatInput)
87  {
88  // quantize:
89  // const_zeroPoint = constant(zeroPoint)
90  // const_scale = constant(scale)
91  // out_mul = mul(input, const_scale)
92  // out_add = add(out_mul, const_zeroPoint)
93  // output = cast<output_type>(out_add)
94 
95  std::string outputNameScale = std::string("constant0") + GetUniqueTosaMappingID();
96  std::string outputNameZeroPoint = std::string("constant1") + GetUniqueTosaMappingID();
97  std::string outputNameMul = std::string("intermediate0_") + GetUniqueTosaMappingID();
98  std::string outputNameAdd = std::string("intermediate1_") + GetUniqueTosaMappingID();
99 
100  // const_zeroPoint
101  TosaSerializationOperator* zeroPointOp = nullptr;
102  TosaSerializationTensor* zeroPointTensor = nullptr;
103  CreateConstTosaOperator<float>(outputNameZeroPoint,
104  zeroPoint,
105  inputDType0,
106  inputShape0,
107  zeroPointOp,
108  zeroPointTensor);
109  tensors.push_back(zeroPointTensor);
110 
111  // const_scale
112  TosaSerializationOperator* scaleOp = nullptr;
113  TosaSerializationTensor* scaleTensor = nullptr;
114  CreateConstTosaOperator<float>(outputNameScale,
115  scale,
116  inputDType0,
117  inputShape0,
118  scaleOp,
119  scaleTensor);
120  tensors.push_back(scaleTensor);
121 
122  // mul
123  int32_t shift = 0;
124  TosaMulAttribute mulAttribute(shift);
125  TosaSerializationOperator* mulOp = new TosaSerializationOperator(Op_MUL,
126  Attribute_MulAttribute,
127  &mulAttribute,
128  {inputName, outputNameScale},
129  {outputNameMul});
130  tensors.push_back(new TosaSerializationTensor(outputNameMul, inputShape0, inputDType0, {}));
131 
132  // add
133  TosaSerializationOperator* addOp = new TosaSerializationOperator(Op_ADD,
134  Attribute_NONE,
135  nullptr,
136  {outputNameMul, outputNameZeroPoint},
137  {outputNameAdd});
138  tensors.push_back(new TosaSerializationTensor(outputNameAdd, inputShape0, inputDType0, {}));
139 
140  // cast
141  TosaSerializationOperator* castOp = new TosaSerializationOperator(Op_CAST,
142  Attribute_NONE,
143  nullptr,
144  {outputNameAdd},
145  {outputName});
146 
147  tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {}));
148 
149  // operatorInputNames/operatorOutputNames ends up being the same as
150  // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings
151  return new TosaSerializationBasicBlock(blockName, // name
152  mainName, // region name
153  {zeroPointOp, scaleOp, mulOp, addOp, castOp}, // operators
154  tensors, // tensors
155  {inputName}, // inputs
156  {outputName}); // outputs
157  }
158  else
159  {
160  double scale_alpha = inputs[0]->GetQuantizationScale() / outputs[0]->GetQuantizationScale();
161  int32_t input_zp = inputs[0]->GetQuantizationOffset();
162  int32_t output_zp = outputs[0]->GetQuantizationOffset();
163 
164  TosaSerializationOperator* rescaleOp = nullptr;
165  CreateRescaleTosaOperator(inputName,
166  outputName,
167  scale_alpha,
168  input_zp,
169  output_zp,
170  IsUnsignedDataType(inputDType0),
171  IsUnsignedDataType(outputDType0),
172  true,
173  true,
174  &rescaleOp);
175 
176  if (IsUnsignedDataType(outputDType0))
177  {
178  // TOSA rescale only supports signed types. Need to override type
179  // when using unsigned attribute
180  FlipSignage(outputDType0);
181  }
182 
183  tensors.push_back(new TosaSerializationTensor(outputName,
184  inputShape0,
185  outputDType0, {}));
186 
187  // operatorInputNames/operatorOutputNames ends up being the same as
188  // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings
189  return new TosaSerializationBasicBlock(blockName, // name
190  mainName, // region name
191  {rescaleOp}, // operators
192  tensors, // tensors
193  {inputName}, // inputs
194  {outputName}); // outputs
195  }
196 }
#define ARMNN_THROW_INVALIDARG_MSG_IF_FALSE(_cond, _str)
Definition: Exceptions.hpp:210
std::string GenerateUniqueOutputName(const Layer &layer, uint32_t layerSlot=0)
const std::string mainName
bool IsUnsignedDataType(DType type)
DType ArmNNToDType(const DataType &type)
std::vector< int32_t > GetTosaTensorShape(const TensorShape &shape)
std::string GenerateUniqueInputName(const armnn::InputSlot &slot)
std::string GetUniqueTosaMappingID()
void FlipSignage(DType &type)
void CreateRescaleTosaOperator(const std::string &inputName, const std::string &outputName, double scale, int32_t input_zp, int32_t output_zp, bool input_unsigned, bool output_unsigned, bool double_round, bool scale32, TosaSerializationOperator **op)
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:47
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition: Layer.hpp:337
float GetQuantizationScale() const
Definition: Tensor.cpp:461
int32_t GetQuantizationOffset() const
Definition: Tensor.cpp:482
const TensorShape & GetShape() const
Definition: Tensor.hpp:193
DataType GetDataType() const
Definition: Tensor.hpp:200

References ARMNN_THROW_INVALIDARG_MSG_IF_FALSE, ArmNNToDType(), CreateRescaleTosaOperator(), FlipSignage(), GenerateUniqueInputName(), GenerateUniqueOutputName(), TensorInfo::GetDataType(), Layer::GetInputSlot(), TensorInfo::GetQuantizationOffset(), TensorInfo::GetQuantizationScale(), TensorInfo::GetShape(), GetTosaTensorShape(), GetUniqueTosaMappingID(), IsUnsignedDataType(), and mainName.

Referenced by GetTosaMapping().