25 throw armnn::Exception(
"ConvertReduceOperator: Must provide a valid input tensor.");
28 if (inputs[0]->IsQuantized() ^ outputs[0]->IsQuantized())
31 "Both input and output tensors must be either quantised or non-quantised data types.");
34 if (reduceDescriptor->
m_vAxis.empty())
36 throw armnn::Exception(
"ConvertReduceOperator: Reduce Operation with empty axis not implemented.");
40 std::string inputName =
"input_";
42 std::size_t intermediateCounter = 0;
44 std::string outputName =
"output0_";
58 std::vector<TosaSerializationTensor*> tensors;
59 std::vector<std::string> inputNames{inputName};
61 DType inputType =
ArmNNToDType(inputs[0]->GetDataType());
63 if (inputName.substr(0, 6) ==
"input_")
65 tensors.emplace_back(
new TosaSerializationTensor(inputName,
72 int64_t output_zp = 0;
74 double input_scale = 1.0;
75 double output_scale = 1.0;
77 int32_t input_multiplier = 1;
78 int32_t output_multiplier = 1;
80 int32_t input_shift = 0;
81 int32_t output_shift = 0;
83 int64_t numElemsOnReducedAxis = 1;
85 std::vector<int32_t> axes(reduceDescriptor->
m_vAxis.begin(), reduceDescriptor->
m_vAxis.end());
87 for (int64_t axis : axes)
89 numElemsOnReducedAxis *= inputShape[
static_cast<uint64_t
>(axis)];
92 std::vector<TosaSerializationOperator*> operators;
94 bool inputQuantised = inputs[0]->IsQuantized();
99 input_zp = inputs[0]->GetQuantizationOffset();
100 output_zp = outputs[0]->GetQuantizationOffset();
102 std::string outputNameRescale =
105 TosaSerializationOperator* rescaleOp1 =
nullptr;
109 case ReduceOperation::Sum:
112 input_scale =
static_cast<double>(1 << input_shift) * inputs[0]->GetQuantizationScale();
113 output_scale = 1.0 / (outputs[0]->GetQuantizationScale() *
static_cast<double>(1 << input_shift));
118 static_cast<int32_t
>(input_zp),
127 case ReduceOperation::Mean:
133 static_cast<double>(inputs[0]->GetQuantizationScale()) /
134 static_cast<double>(outputs[0]->GetQuantizationScale()),
139 int shift = 63 - __builtin_clzl(
static_cast<uint64_t
>(numElemsOnReducedAxis));
140 shift = std::min(shift, 32);
141 shift = std::min(shift, 62 - output_shift);
143 output_multiplier =
static_cast<int32_t
>(
144 (
static_cast<int64_t
>(output_multiplier) << shift) / numElemsOnReducedAxis);
146 output_shift += shift;
152 static_cast<int32_t
>(input_zp),
163 throw armnn::Exception(
"ConvertReduceOperator: Reduce Operation not implemented.");
166 operators.emplace_back(rescaleOp1);
168 tensors.emplace_back(
new TosaSerializationTensor(outputNameRescale,
175 for (
const auto axis : axes)
177 auto rank =
static_cast<int64_t
>(inputs[0]->GetNumDimensions());
179 if (axis < 0 || axis >= rank)
184 TosaAxisAttribute reduceAttribute(axis);
186 std::string outputNameReduce =
191 case ReduceOperation::Sum:
192 case ReduceOperation::Mean:
193 operators.emplace_back(
new TosaSerializationOperator(Op_REDUCE_SUM,
194 Attribute_AxisAttribute,
196 { tensors.back()->GetName() },
197 { outputNameReduce }));
200 throw armnn::Exception(
"ConvertReduceOperator: Reduce Operation not implemented.");
203 std::vector<int32_t> outputShapeReduce = tensors.back()->GetShape();
204 outputShapeReduce[
static_cast<std::size_t
>(axis)] = 1;
206 tensors.emplace_back(
new TosaSerializationTensor(outputNameReduce,
208 tensors.back()->GetDtype(),
215 std::string outputNameRescale =
218 TosaSerializationOperator* rescaleOp2 =
nullptr;
222 case ReduceOperation::Sum:
227 static_cast<int32_t
>(output_zp),
234 case ReduceOperation::Mean:
240 static_cast<int32_t
>(output_zp),
249 throw armnn::Exception(
"ConvertReduceOperator: Reduce Operation not implemented.");
252 operators.push_back(rescaleOp2);
254 tensors.emplace_back(
new TosaSerializationTensor(outputNameRescale,
255 tensors.back()->GetShape(),
263 std::string outputNameReshape = !inputQuantised && reduceDescriptor->
m_ReduceOperation == ReduceOperation::Mean
267 operators.emplace_back(
new TosaSerializationOperator(Op_RESHAPE,
268 Attribute_ReshapeAttribute,
270 { tensors.back()->GetName() },
271 { outputNameReshape }));
273 tensors.emplace_back(
new TosaSerializationTensor(outputNameReshape,
280 if (!inputQuantised && reduceDescriptor->
m_ReduceOperation == ReduceOperation::Mean)
284 inputNames.emplace_back(constNameDivScale);
286 operators.push_back(
new TosaSerializationOperator(Op_CONST,
290 { constNameDivScale }));
292 float divScale = 1.0f /
static_cast<float>(numElemsOnReducedAxis);
294 std::vector<uint8_t> uint8DivScale;
298 TosaSerializationHandler::ConvertF32toU8({divScale}, uint8DivScale);
301 TosaSerializationHandler::ConvertF16toU8({divScale}, uint8DivScale);
308 std::vector<int32_t> divConstantShape(outputShape.size(), 1);
310 tensors.push_back(
new TosaSerializationTensor(constNameDivScale,
316 TosaMulAttribute mulAttribute(0);
318 operators.emplace_back(
new TosaSerializationOperator(Op_MUL,
319 Attribute_MulAttribute,
321 { constNameDivScale, outputNameReshape },
324 tensors.push_back(
new TosaSerializationTensor(outputName,
330 return new TosaSerializationBasicBlock(blockName,
std::string GenerateUniqueOutputName(const Layer &layer, uint32_t layerSlot=0)
const std::string mainName
DType ArmNNToDType(const DataType &type)
std::vector< int32_t > GetTosaTensorShape(const TensorShape &shape)
std::string GenerateUniqueInputName(const armnn::InputSlot &slot)
std::string GetUniqueTosaMappingID()
void CreateRawRescaleTosaOperator(const std::string &inputName, const std::string &outputName, const std::vector< int32_t > &multipliers, const std::vector< int32_t > &shifts, int32_t input_zp, int32_t output_zp, bool input_unsigned, bool output_unsigned, bool double_round, bool scale32, bool per_channel, TosaSerializationOperator **op)
void CreateRescaleTosaOperator(const std::string &inputName, const std::string &outputName, double scale, int32_t input_zp, int32_t output_zp, bool input_unsigned, bool output_unsigned, bool double_round, bool scale32, TosaSerializationOperator **op)
void ComputeMultiplierAndShiftTosaScale32(double scale, int32_t &multiplier, int32_t &shift)
The following is taken from mlir/lib/Dialect/Tosa/Utils/QuantUtils.cpp in the LLVM project From a sca...
Base class for all ArmNN exceptions so that users can filter to just those.
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
constexpr char const * GetReduceOperationAsCString(ReduceOperation reduce_operation)
std::vector< uint32_t > m_vAxis
The indices of the dimensions to reduce.
ReduceOperation m_ReduceOperation
Specifies the reduction operation to execute.