ArmNN
 25.11
Loading...
Searching...
No Matches
TosaRescaleOperatorUtils.hpp
Go to the documentation of this file.
1//
2// Copyright © 2024-2025 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
7
8#pragma once
9
10
11//////////////////////////////////////////////////////////////////////////////////////////////////////////////
12/// @brief Creates a raw rescale TOSA operator.
13///
14/// This inline function creates a raw rescale operator for TOSA that adjusts the quantization
15/// parameters for an input tensor. It validates the multipliers and shifts vectors, ensuring they meet
16/// specific criteria for per-channel or global quantization. If any validation fails, an exception is thrown.
17///
18/// @param inputName : The name of the input tensor.
19/// @param outputName : The name of the output tensor.
20/// @param multipliers : A vector of multiplier values for scaling.
21/// @param shifts : A vector of shift values corresponding to the multipliers.
22/// @param input_zp : The zero point for the input tensor.
23/// @param output_zp : The zero point for the output tensor.
24/// @param input_unsigned : Indicates if the input tensor is unsigned.
25/// @param output_unsigned : Indicates if the output tensor is unsigned.
26/// @param double_round : If true, applies double rounding during quantization.
27/// @param scale32 : If true, performs 32-bit scaling; otherwise, 16-bit scaling is used.
28/// @param per_channel : Determines whether per-channel quantization is applied.
29/// @param op : Pointer to store the created TosaSerializationOperator.
30///////////////////////////////////////////////////////////////////////////////////////////////////////////////
31
32inline void CreateRawRescaleTosaOperator(const std::string& inputName,
33 const std::string& outputName,
34 const std::vector<int32_t>& multipliers,
35 const std::vector<int32_t>& shifts,
36 int32_t input_zp,
37 int32_t output_zp,
38 bool input_unsigned,
39 bool output_unsigned,
40 bool double_round,
41 bool scale32,
42 bool per_channel,
43 TosaSerializationOperator** op)
44{
45 if (!op)
46 {
47 throw armnn::Exception("CreateRawRescaleTosaOperator: nullptr op.");
48 }
49
50 if (multipliers.empty())
51 {
52 throw armnn::Exception("CreateRawRescaleTosaOperator: multipliers is empty.");
53 }
54
55 if (multipliers.size() != shifts.size())
56 {
57 throw armnn::Exception("CreateRawRescaleTosaOperator: multipliers and shift not same size.");
58 }
59
60 if (multipliers.size() == 1 && per_channel)
61 {
62 throw armnn::Exception("CreateRawRescaleTosaOperator: \
63 multipliers must be greater than 1 if per_channel is true.");
64 }
65
66 if (multipliers.size() > 1 && !per_channel)
67 {
68 throw armnn::Exception("CreateRawRescaleTosaOperator: \
69 multipliers size must be 1 if per_channel is false.");
70 }
71
72 TosaRescaleAttribute attribute(input_zp,
73 output_zp,
74 multipliers,
75 shifts,
76 scale32,
77 double_round,
78 per_channel,
79 input_unsigned,
80 output_unsigned);
81
82 // op
83 *op = new TosaSerializationOperator(Op_RESCALE, Attribute_RescaleAttribute, &attribute, {inputName}, {outputName});
84 if (!(*op))
85 {
86 throw armnn::Exception("CreateRescaleTosaOperator: failed to created operator");
87 }
88}
89
90/// The following is taken from mlir/lib/Dialect/Tosa/Utils/QuantUtils.cpp in the LLVM project
91/// From a scale value, generates multiplier and shift values where
92/// mantissa is in [-1.0,-0.5] or [0.5, 1.0] such that
93/// multiplier = mantissa*2^shift for 32-bit scaling.
94inline void ComputeMultiplierAndShiftTosaScale32(double scale,
95 int32_t &multiplier,
96 int32_t &shift)
97{
98 const double mantissa = std::frexp(scale, &shift);
99 auto shiftedM = std::round(mantissa * (int64_t(1) << 31));
100
101 // Can't be greater than 1.0.
102 if (!(shiftedM <= (int64_t(1) << 31)))
103 {
104 throw armnn::Exception("Shifted mantissa exceeds 32 signed bits");
105 }
106
107 if (shiftedM == (int64_t(1) << 31))
108 {
109 shiftedM /= 2;
110 shift++;
111 }
112
113 // TOSA expects right shift to be positive, and embed (1 << 31) into right
114 // shift bits.
115 shift = (-shift) + 31;
116
117 if (!(shiftedM <= std::numeric_limits<int32_t>::max()))
118 {
119 throw armnn::Exception("Shifted mantissa exceeds 32-bit signed output type");
120 }
121
122 multiplier = static_cast<int32_t>(shiftedM);
123
124 // Shifting tops out at 47 bits. Right shift to make 47 bits the max.
125 int32_t maxShiftValue = 47;
126 if (shift > maxShiftValue)
127 {
128 multiplier = multiplier >> std::min<int32_t>(31, shift - maxShiftValue);
129 shift = maxShiftValue;
130 }
131}
132
133/// The following is taken from mlir/lib/Dialect/Tosa/Utils/QuantUtils.cpp in the LLVM project
134/// From a scale value, generates multiplier and shift values where
135/// mantissa is in [-1.0,-0.5] or [0.5, 1.0] such that
136/// multiplier = mantissa*2^shift for 16-bit scaling.
138 int32_t &multiplier,
139 int32_t &shift)
140{
141 const double mantissa = std::frexp(scale, &shift);
142 auto shiftedM = std::round(mantissa * (int64_t(1) << 15));
143
144 // Can't be greater than 1.0.
145 if (!(shiftedM <= (int64_t(1) << 15)))
146 {
147 throw armnn::Exception("Shifted mantissa exceeds 16 signed bits");
148 }
149
150 if (shiftedM == (int64_t(1) << 15))
151 {
152 shiftedM /= 2;
153 shift++;
154 }
155
156 // TOSA expects right shift to be positive and embed (1 << 15) into right
157 // shift bits.
158 shift = (-shift) + 15;
159
160 if (!(shiftedM <= std::numeric_limits<int32_t>::max()))
161 {
162 throw armnn::Exception("Shifted mantissa exceeds 32-bit signed output type");
163 }
164
165 multiplier = static_cast<int32_t>(shiftedM);
166
167 // Shifting tops out at 62 bits. Right shift to make 62 bits the max.
168 // The limit of 62 on shift allows the shift to be decomposed as
169 // two right shifts of 31.
170 if (shift > 62)
171 {
172 // Shifting the multiplier by more than 31-bits is unnecessary.
173 multiplier = multiplier >> std::min<int32_t>(31, shift - 62);
174 shift = 62;
175 }
176}
177
178//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
179/// @brief Creates a Tosa rescale operator.
180///
181/// This inline function computes the multiplier and shift values based on the given scale
182/// using either 32-bit or 16-bit scaling. It then creates a raw rescale operator that adjusts
183/// the quantization parameters for the input tensor.
184///
185/// @param inputName : The name of the input tensor.
186/// @param outputName : The name of the output tensor.
187/// @param scale : The scale factor used to compute the multiplier and shift.
188/// @param input_zp : The zero point for the input tensor.
189/// @param output_zp : The zero point for the output tensor.
190/// @param input_unsigned : Indicates if the input tensor is unsigned.
191/// @param output_unsigned: Indicates if the output tensor is unsigned.
192/// @param double_round : If true, uses double rounding for quantization.
193/// @param scale32 : If true, performs 32-bit scaling; otherwise, 16-bit scaling is used.
194/// @param op : Pointer to a variable that will store the created TosaSerializationOperator.
195//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
196
197inline void CreateRescaleTosaOperator(const std::string& inputName,
198 const std::string& outputName,
199 double scale,
200 int32_t input_zp,
201 int32_t output_zp,
202 bool input_unsigned,
203 bool output_unsigned,
204 bool double_round,
205 bool scale32,
206 TosaSerializationOperator** op)
207{
208 int32_t multiplier;
209 int32_t shift;
210
211 if (scale32)
212 {
213 ComputeMultiplierAndShiftTosaScale32(scale, multiplier, shift);
214 }
215 else
216 {
217 ComputeMultiplierAndShiftTosaScale16(scale, multiplier, shift);
218 }
219
220 const std::vector<int32_t> multipliers{multiplier};
221 const std::vector<int32_t> shifts{shift};
222
224 outputName,
225 multipliers,
226 shifts,
227 input_zp,
228 output_zp,
229 input_unsigned,
230 output_unsigned,
231 double_round,
232 scale32,
233 false,
234 op);
235}
236
237//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
238/// @brief Creates a TOSA rescale operator for weight tensors.
239///
240/// This function computes multipliers and shift values for each weight scale by combining the input scale,
241/// weight scale, and output scale. It determines the quantization parameters using either 32-bit or 16-bit
242/// calculations based on the scale32 flag. The per_channel flag is set true if the provided weight scales are more
243/// than one. An exception is thrown if any computation fails.
244///
245/// @param inputName : The name of the input tensor.
246/// @param outputName : The name of the output tensor.
247/// @param input_zp : The zero point for the input tensor.
248/// @param output_zp : The zero point for the output tensor.
249/// @param input_unsigned : Indicates if the input tensor is unsigned.
250/// @param output_unsigned : Indicates if the output tensor is unsigned.
251/// @param double_round : If true, uses double rounding for quantization.
252/// @param scale32 : If true, uses 32-bit scaling; otherwise, uses 16-bit scaling.
253/// @param input_scale : The scaling factor for the input tensor.
254/// @param output_scale : The scaling factor for the output tensor.
255/// @param weight_scales : Vector of weight scales for per-channel quantization.
256/// @param op : Pointer to store the created TosaSerializationOperator.
257//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
258inline void CreateRescaleTosaOperatorForWeights(const std::string& inputName,
259 const std::string& outputName,
260 int32_t input_zp,
261 int32_t output_zp,
262 bool input_unsigned,
263 bool output_unsigned,
264 bool double_round,
265 bool scale32,
266 double input_scale,
267 double output_scale,
268 const std::vector<float>& weight_scales,
269 TosaSerializationOperator** op)
270{
271 std::vector<int32_t> op_tensor_multipliers;
272 std::vector<int32_t> op_tensor_shifts;
273 op_tensor_multipliers.reserve(weight_scales.size());
274 op_tensor_shifts.reserve(weight_scales.size());
275
276 for (const float& weight_scale : weight_scales)
277 {
278 double op_tensor_scale = (input_scale * weight_scale) / output_scale;
279 int32_t multiplier;
280 int32_t shift;
281
282 if (scale32)
283 {
284 ComputeMultiplierAndShiftTosaScale32(op_tensor_scale, multiplier, shift);
285 }
286 else
287 {
288 ComputeMultiplierAndShiftTosaScale16(op_tensor_scale, multiplier, shift);
289 }
290
291 op_tensor_multipliers.push_back(multiplier);
292 op_tensor_shifts.push_back(shift);
293 }
294
295 bool per_channel = weight_scales.size() == 1 ? false : true;
297 outputName,
298 op_tensor_multipliers,
299 op_tensor_shifts,
300 input_zp,
301 output_zp,
302 input_unsigned,
303 output_unsigned,
304 double_round,
305 scale32,
306 per_channel,
307 op);
308}
void ComputeMultiplierAndShiftTosaScale16(double scale, int32_t &multiplier, int32_t &shift)
The following is taken from mlir/lib/Dialect/Tosa/Utils/QuantUtils.cpp in the LLVM project From a sca...
void CreateRawRescaleTosaOperator(const std::string &inputName, const std::string &outputName, const std::vector< int32_t > &multipliers, const std::vector< int32_t > &shifts, int32_t input_zp, int32_t output_zp, bool input_unsigned, bool output_unsigned, bool double_round, bool scale32, bool per_channel, TosaSerializationOperator **op)
Creates a raw rescale TOSA operator.
void CreateRescaleTosaOperator(const std::string &inputName, const std::string &outputName, double scale, int32_t input_zp, int32_t output_zp, bool input_unsigned, bool output_unsigned, bool double_round, bool scale32, TosaSerializationOperator **op)
Creates a Tosa rescale operator.
void ComputeMultiplierAndShiftTosaScale32(double scale, int32_t &multiplier, int32_t &shift)
The following is taken from mlir/lib/Dialect/Tosa/Utils/QuantUtils.cpp in the LLVM project From a sca...
void CreateRescaleTosaOperatorForWeights(const std::string &inputName, const std::string &outputName, int32_t input_zp, int32_t output_zp, bool input_unsigned, bool output_unsigned, bool double_round, bool scale32, double input_scale, double output_scale, const std::vector< float > &weight_scales, TosaSerializationOperator **op)
Creates a TOSA rescale operator for weight tensors.
Base class for all ArmNN exceptions so that users can filter to just those.