ArmNN
 25.11
Loading...
Searching...
No Matches
QuantizeOperator.cpp File Reference
#include "QuantizeOperator.hpp"
#include "TosaRescaleOperatorUtils.hpp"
#include <fmt/format.h>
Include dependency graph for QuantizeOperator.cpp:

Go to the source code of this file.

Functions

TosaSerializationBasicBlock * ConvertQuantizeToTosaOperator (const Layer *layer, const std::vector< const TensorInfo * > &inputs, const std::vector< const TensorInfo * > &outputs)

Function Documentation

◆ ConvertQuantizeToTosaOperator()

TosaSerializationBasicBlock * ConvertQuantizeToTosaOperator ( const Layer * layer,
const std::vector< const TensorInfo * > & inputs,
const std::vector< const TensorInfo * > & outputs )

Definition at line 17 of file QuantizeOperator.cpp.

20{
21 bool tosaRefBackend {false};
22
23 ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( inputs.size() == 1,
24 "ConvertQuantizeToTosaOperator: Quantize must have only one input" );
25 ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( outputs.size() == 1,
26 "ConvertQuantizeToTosaOperator: Quantize must have only one output" );
27
28 std::string inputName = std::string("input_");
29 std::string outputName = std::string("output0_");
30 std::string blockName = std::string("Op_QUANTIZE_block_") + GetUniqueTosaMappingID();
31
32 // If a layer is present then the block will be used for execution, so input and output names need to be determined
33 // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter.
34 if(layer != nullptr)
35 {
36 inputName = GenerateUniqueInputName(layer->GetInputSlot(0));
37 outputName = GenerateUniqueOutputName(*layer);
38
39 tosaRefBackend = (layer->GetBackendId().Get().find("TosaRef") != std::string::npos);
40 }
41
42 const TensorInfo inputInfo = *inputs[0];
43 const TensorInfo outputInfo = *outputs[0];
44
45 // Extract quantization detail from Tensor
46 float zeroPoint = static_cast<float>(outputInfo.GetQuantizationOffset());
47 // No per axis support in Tensorflow TOSA code
48 float scale = outputInfo.GetQuantizationScale();
49
50 // As per the Tensorflow quantization specification
51 // Tensorflow TOSA code calculates quantization using multiplication by scale
52 // Armnn code calculates quantization using division by scale
53 // Invert scale factor passed from Armnn for tf TOSA code
54 scale = (scale != 0) ? (1 / scale) : scale;
55
56 std::vector<TosaSerializationTensor*> tensors;
57
58 std::vector<int32_t> inputShape0 = GetTosaTensorShape(inputInfo.GetShape());
59 DType inputDType0 = ArmNNToDType(inputInfo.GetDataType());
60 bool isFloatInput = inputDType0 == DType::DType_FP16 || inputDType0 == DType::DType_FP32;
61
62 // Only add input tensors if connected layer is an input layer.
63 // As intermediate or constant tensors will be created separately.
64 // There also can't be duplicate tensor.
65 if(inputName.find("input_") != std::string::npos)
66 {
67 DType tmp = inputDType0;
68
69 if (IsUnsignedDataType(tmp) && !tosaRefBackend)
70 {
71 //TOSA rescale only supports signed types. Need to override type
72 //when using unsigned attribute
73 FlipSignage(tmp);
74 }
75 tensors.push_back(new TosaSerializationTensor(inputName, inputShape0, tmp, {}));
76 }
77 else
78 {
79 if (IsUnsignedDataType(inputDType0))
80 {
81 // Can't modify the type of a previously created TosaSerializationTensor
82 throw armnn::Exception(fmt::format("ConvertQuantizeToTosaOperator: {} intermediate input"
83 " layer not supported.",EnumNamesDType()[inputDType0]));
84 }
85 }
86
87 std::vector<int32_t> outputShape0 = GetTosaTensorShape(outputInfo.GetShape());
88 DType outputDType0 = ArmNNToDType(outputInfo.GetDataType());
89
90 if (isFloatInput)
91 {
92 // quantize:
93 // const_zeroPoint = constant(zeroPoint)
94 // const_scale = constant(scale)
95 // out_mul = mul(input, const_scale)
96 // out_add = add(out_mul, const_zeroPoint)
97 // output = cast<output_type>(out_add)
98
99 std::string outputNameScale = std::string("constant0") + GetUniqueTosaMappingID();
100 std::string outputNameZeroPoint = std::string("constant1") + GetUniqueTosaMappingID();
101 std::string outputNameMul = std::string("layer_intermediate0_") + GetUniqueTosaMappingID();
102 std::string outputNameAdd = std::string("layer_intermediate1_") + GetUniqueTosaMappingID();
103
104 // const_zeroPoint
105 TosaSerializationOperator* zeroPointOp = nullptr;
106 TosaSerializationTensor* zeroPointTensor = nullptr;
107 CreateConstTosaOperator<float>(outputNameZeroPoint,
108 zeroPoint,
109 inputDType0,
110 inputShape0,
111 zeroPointOp,
112 zeroPointTensor);
113 tensors.push_back(zeroPointTensor);
114
115 // const_scale
116 TosaSerializationOperator* scaleOp = nullptr;
117 TosaSerializationTensor* scaleTensor = nullptr;
118 CreateConstTosaOperator<float>(outputNameScale,
119 scale,
120 inputDType0,
121 inputShape0,
122 scaleOp,
123 scaleTensor);
124 tensors.push_back(scaleTensor);
125
126 // mul
127 int32_t shift = 0;
128 TosaMulAttribute mulAttribute(shift);
129 TosaSerializationOperator* mulOp = new TosaSerializationOperator(Op_MUL,
130 Attribute_MulAttribute,
131 &mulAttribute,
132 {inputName, outputNameScale},
133 {outputNameMul});
134 tensors.push_back(new TosaSerializationTensor(outputNameMul, inputShape0, inputDType0, {}));
135
136 // add
137 TosaSerializationOperator* addOp = new TosaSerializationOperator(Op_ADD,
138 Attribute_NONE,
139 nullptr,
140 {outputNameMul, outputNameZeroPoint},
141 {outputNameAdd});
142 tensors.push_back(new TosaSerializationTensor(outputNameAdd, inputShape0, inputDType0, {}));
143
144 // cast
145 TosaSerializationOperator* castOp = new TosaSerializationOperator(Op_CAST,
146 Attribute_NONE,
147 nullptr,
148 {outputNameAdd},
149 {outputName});
150
151 tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {}));
152
153 // operatorInputNames/operatorOutputNames ends up being the same as
154 // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings
155 return new TosaSerializationBasicBlock(blockName, // name
156 mainName, // region name
157 {zeroPointOp, scaleOp, mulOp, addOp, castOp}, // operators
158 tensors, // tensors
159 {inputName}, // inputs
160 {outputName}); // outputs
161 }
162 else
163 {
164 double scale_alpha = inputs[0]->GetQuantizationScale() / outputs[0]->GetQuantizationScale();
165 int32_t input_zp = inputs[0]->GetQuantizationOffset();
166 int32_t output_zp = outputs[0]->GetQuantizationOffset();
167
168 TosaSerializationOperator* rescaleOp = nullptr;
170 outputName,
171 scale_alpha,
172 input_zp,
173 output_zp,
174 IsUnsignedDataType(inputDType0),
175 IsUnsignedDataType(outputDType0),
176 true,
177 true,
178 &rescaleOp);
179
180 if (IsUnsignedDataType(outputDType0) && !tosaRefBackend)
181 {
182 // TOSA rescale only supports signed types. Need to override type
183 // when using unsigned attribute
184 FlipSignage(outputDType0);
185 }
186
187 tensors.push_back(new TosaSerializationTensor(outputName,
188 inputShape0,
189 outputDType0, {}));
190
191 // operatorInputNames/operatorOutputNames ends up being the same as
192 // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings
193 return new TosaSerializationBasicBlock(blockName, // name
194 mainName, // region name
195 {rescaleOp}, // operators
196 tensors, // tensors
197 {inputName}, // inputs
198 {outputName}); // outputs
199 }
200}
#define ARMNN_THROW_INVALIDARG_MSG_IF_FALSE(_cond, _str)
std::string GenerateUniqueOutputName(const Layer &layer, uint32_t layerSlot=0)
const std::string mainName
bool IsUnsignedDataType(DType type)
DType ArmNNToDType(const DataType &type)
std::string GenerateUniqueInputName(const armnn::InputSlot &slot)
std::string GetUniqueTosaMappingID()
void FlipSignage(DType &type)
std::vector< int32_t > GetTosaTensorShape(const TensorShape &shape)
void CreateConstTosaOperator(const std::string &outputName, const T value, DType dtype, const std::vector< int32_t > &shape, TosaSerializationOperator *&op, TosaSerializationTensor *&tensor)
void CreateRescaleTosaOperator(const std::string &inputName, const std::string &outputName, double scale, int32_t input_zp, int32_t output_zp, bool input_unsigned, bool output_unsigned, bool double_round, bool scale32, TosaSerializationOperator **op)
Creates a Tosa rescale operator.
Base class for all ArmNN exceptions so that users can filter to just those.
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition Layer.hpp:337
const BackendId & GetBackendId() const
Definition Layer.hpp:290
float GetQuantizationScale() const
Definition Tensor.cpp:461
const TensorShape & GetShape() const
Definition Tensor.hpp:193
int32_t GetQuantizationOffset() const
Definition Tensor.cpp:482
DataType GetDataType() const
Definition Tensor.hpp:200

References ARMNN_THROW_INVALIDARG_MSG_IF_FALSE, ArmNNToDType(), CreateConstTosaOperator(), CreateRescaleTosaOperator(), FlipSignage(), GenerateUniqueInputName(), GenerateUniqueOutputName(), Layer::GetBackendId(), TensorInfo::GetDataType(), Layer::GetInputSlot(), TensorInfo::GetQuantizationOffset(), TensorInfo::GetQuantizationScale(), TensorInfo::GetShape(), GetTosaTensorShape(), GetUniqueTosaMappingID(), IsUnsignedDataType(), and mainName.

Referenced by GetTosaMapping().