11 const std::string& outputName,
12 const std::vector<int32_t>& multipliers,
13 const std::vector<int32_t>& shifts,
21 TosaSerializationOperator** op)
28 if (multipliers.empty())
30 throw armnn::Exception(
"CreateRawRescaleTosaOperator: multipliers is empty.");
33 if (multipliers.size() != shifts.size())
35 throw armnn::Exception(
"CreateRawRescaleTosaOperator: multipliers and shift not same size.");
38 if (multipliers.size() == 1 && per_channel)
41 multipliers must be greater than 1 if per_channel is true.");
44 if (multipliers.size() == 1 && per_channel)
47 multipliers size must be greater than 1 if per_channel is true.");
50 if (multipliers.size() > 1 && !per_channel)
53 multipliers size must be 1 if per_channel is false.");
56 TosaRescaleAttribute attribute(input_zp,
67 *op =
new TosaSerializationOperator(Op_RESCALE, Attribute_RescaleAttribute, &attribute, {inputName}, {outputName});
70 throw armnn::Exception(
"CreateRescaleTosaOperator: failed to created operator");
82 const double mantissa = std::frexp(scale, &shift);
83 auto shiftedM = std::round(mantissa * (int64_t(1) << 31));
86 if (!(shiftedM <= (int64_t(1) << 31)))
91 if (shiftedM == (int64_t(1) << 31))
99 shift = (-shift) + 31;
101 if (!(shiftedM <= std::numeric_limits<int32_t>::max()))
103 throw armnn::Exception(
"Shifted mantissa exceeds 32-bit signed output type");
106 multiplier =
static_cast<int32_t
>(shiftedM);
114 multiplier = multiplier >> std::min<int32_t>(31, shift - 62);
127 const double mantissa = std::frexp(scale, &shift);
128 auto shiftedM = std::round(mantissa * (int64_t(1) << 15));
131 if (!(shiftedM <= (int64_t(1) << 15)))
136 if (shiftedM == (int64_t(1) << 15))
144 shift = (-shift) + 15;
146 if (!(shiftedM <= std::numeric_limits<int32_t>::max()))
148 throw armnn::Exception(
"Shifted mantissa exceeds 32-bit signed output type");
151 multiplier =
static_cast<int32_t
>(shiftedM);
159 multiplier = multiplier >> std::min<int32_t>(31, shift - 62);
165 const std::string& outputName,
170 bool output_unsigned,
173 TosaSerializationOperator** op)
187 const std::vector<int32_t> multipliers{multiplier};
188 const std::vector<int32_t> shifts{shift};
190 input_zp, output_zp, input_unsigned, output_unsigned,
191 double_round, scale32,
false, op);
195 const std::string& outputName,
199 bool output_unsigned,
204 const std::vector<float>& weight_scales,
205 TosaSerializationOperator** op)
207 std::vector<int32_t> op_tensor_multipliers;
208 std::vector<int32_t> op_tensor_shifts;
209 op_tensor_multipliers.reserve(weight_scales.size());
210 op_tensor_shifts.reserve(weight_scales.size());
212 for (
const float& weight_scale : weight_scales)
214 double op_tensor_scale = (input_scale * weight_scale) / output_scale;
227 op_tensor_multipliers.push_back(multiplier);
228 op_tensor_shifts.push_back(shift);
231 bool per_channel = weight_scales.size() == 1 ? false :
true;
233 input_zp, output_zp, input_unsigned, output_unsigned, double_round,
234 scale32, per_channel, op);
void ComputeMultiplierAndShiftTosaScale16(double scale, int32_t &multiplier, int32_t &shift)
The following is taken from mlir/lib/Dialect/Tosa/Utils/QuantUtils.cpp in the LLVM project From a sca...
void CreateRawRescaleTosaOperator(const std::string &inputName, const std::string &outputName, const std::vector< int32_t > &multipliers, const std::vector< int32_t > &shifts, int32_t input_zp, int32_t output_zp, bool input_unsigned, bool output_unsigned, bool double_round, bool scale32, bool per_channel, TosaSerializationOperator **op)
void CreateRescaleTosaOperator(const std::string &inputName, const std::string &outputName, double scale, int32_t input_zp, int32_t output_zp, bool input_unsigned, bool output_unsigned, bool double_round, bool scale32, TosaSerializationOperator **op)
void ComputeMultiplierAndShiftTosaScale32(double scale, int32_t &multiplier, int32_t &shift)
The following is taken from mlir/lib/Dialect/Tosa/Utils/QuantUtils.cpp in the LLVM project From a sca...
void CreateRescaleTosaOperatorForWeights(const std::string &inputName, const std::string &outputName, int32_t input_zp, int32_t output_zp, bool input_unsigned, bool output_unsigned, bool double_round, bool scale32, double input_scale, double output_scale, const std::vector< float > &weight_scales, TosaSerializationOperator **op)
Base class for all ArmNN exceptions so that users can filter to just those.