21{
22
23 if (!inputs[0])
24 {
25 throw armnn::Exception(
"ConvertReduceOperator: Must provide a valid input tensor.");
26 }
27
28 if (inputs[0]->IsQuantized() ^ outputs[0]->IsQuantized())
29 {
31 "Both input and output tensors must be either quantised or non-quantised data types.");
32 }
33
34 if (reduceDescriptor->
m_vAxis.empty())
35 {
36 throw armnn::Exception(
"ConvertReduceOperator: Reduce Operation with empty axis not implemented.");
37 }
38
39
40 std::string inputName = "input_";
41
42 std::size_t intermediateCounter = 0;
43
44 std::string outputName = "output0_";
45
48
51
52 if (layer)
53 {
56 }
57
58 std::vector<TosaSerializationTensor*> tensors;
59 std::vector<std::string> inputNames{inputName};
60
61 DType inputType =
ArmNNToDType(inputs[0]->GetDataType());
62
63 if (inputName.substr(0, 6) == "input_")
64 {
65 tensors.emplace_back(new TosaSerializationTensor(inputName,
66 inputShape,
67 inputType,
68 {}));
69 }
70
71 int64_t input_zp = 0;
72 int64_t output_zp = 0;
73
74 double input_scale = 1.0;
75 double output_scale = 1.0;
76
77 int32_t input_multiplier = 1;
78 int32_t output_multiplier = 1;
79
80 int32_t input_shift = 0;
81 int32_t output_shift = 0;
82
83 int64_t numElemsOnReducedAxis = 1;
84
85 std::vector<int32_t> axes(reduceDescriptor->
m_vAxis.begin(), reduceDescriptor->
m_vAxis.end());
86
87 for (int64_t axis : axes)
88 {
89 numElemsOnReducedAxis *= inputShape[static_cast<uint64_t>(axis)];
90 }
91
92 std::vector<TosaSerializationOperator*> operators;
93
94 bool inputQuantised = inputs[0]->IsQuantized();
95
96
97 if (inputQuantised)
98 {
99 input_zp = inputs[0]->GetQuantizationOffset();
100 output_zp = outputs[0]->GetQuantizationOffset();
101
102 std::string outputNameRescale =
104
105 TosaSerializationOperator* rescaleOp1 = nullptr;
106
108 {
109 case ReduceOperation::Sum:
110 input_shift = 20;
111
112 input_scale = static_cast<double>(1 << input_shift) * inputs[0]->GetQuantizationScale();
113 output_scale = 1.0 / (outputs[0]->GetQuantizationScale() * static_cast<double>(1 << input_shift));
114
116 outputNameRescale,
117 input_scale,
118 static_cast<int32_t>(input_zp),
119 0,
120 false,
121 false,
122 true,
123 true,
124 &rescaleOp1);
125
126 break;
127 case ReduceOperation::Mean:
128 {
129
132 (
133 static_cast<double>(inputs[0]->GetQuantizationScale()) /
134 static_cast<double>(outputs[0]->GetQuantizationScale()),
135 output_multiplier,
136 output_shift
137 );
138
139 int shift = 63 - __builtin_clzl(static_cast<uint64_t>(numElemsOnReducedAxis));
140 shift = std::min(shift, 32);
141 shift = std::min(shift, 62 - output_shift);
142
143 output_multiplier = static_cast<int32_t>(
144 (static_cast<int64_t>(output_multiplier) << shift) / numElemsOnReducedAxis);
145
146 output_shift += shift;
147
149 outputNameRescale,
150 {input_multiplier},
151 {input_shift},
152 static_cast<int32_t>(input_zp),
153 0,
154 false,
155 false,
156 true,
157 true,
158 false,
159 &rescaleOp1);
160 break;
161 }
162 default:
163 throw armnn::Exception(
"ConvertReduceOperator: Reduce Operation not implemented.");
164 }
165
166 operators.emplace_back(rescaleOp1);
167
168 tensors.emplace_back(new TosaSerializationTensor(outputNameRescale,
169 inputShape,
170 DType_INT32,
171 {}));
172 }
173
174 std::string outputNameReduce;
175 bool reuseOutputName = !inputQuantised && reduceDescriptor->
m_ReduceOperation == ReduceOperation::Sum;
176
177
178 for (const auto axis : axes)
179 {
180 auto rank = static_cast<int64_t>(inputs[0]->GetNumDimensions());
181
182 if (axis < 0 || axis >= rank)
183 {
185 }
186
187 TosaAxisAttribute reduceAttribute(axis);
188
189 std::vector<int32_t> outputShapeReduce = tensors.back()->GetShape();
190 outputShapeReduce[static_cast<std::size_t>(axis)] = 1;
191
192 outputNameReduce = (reuseOutputName && outputShapeReduce == outputShape)
193 ? outputName
195
197 {
198 case ReduceOperation::Sum:
199 case ReduceOperation::Mean:
200 operators.emplace_back(new TosaSerializationOperator(Op_REDUCE_SUM,
201 Attribute_AxisAttribute,
202 &reduceAttribute,
203 { tensors.back()->GetName() },
204 { outputNameReduce }));
205 break;
206 default:
207 throw armnn::Exception(
"ConvertReduceOperator: Reduce Operation not implemented.");
208 }
209
210 tensors.emplace_back(new TosaSerializationTensor(outputNameReduce,
211 outputShapeReduce,
212 tensors.back()->GetDtype(),
213 {}));
214 }
215
216 std::string outputNameReshape;
217 bool reshapeLogic = false;
218
219
220
221 if (inputShape.size() == outputShape.size() && inputShape != outputShape && !axes.empty())
222 {
223 bool onlyMeanAxisChanged = true;
224
225 for (size_t i = 0; i < inputShape.size(); ++i)
226 {
227 if (inputShape[i] != outputShape[i] &&
228 std::find(axes.begin(), axes.end(), static_cast<int64_t>(i)) == axes.end())
229 {
230 onlyMeanAxisChanged = false;
231 break;
232 }
233 }
234
235
236 reshapeLogic = !onlyMeanAxisChanged;
237 }
238 else if (inputShape.size() != outputShape.size())
239 {
240 reshapeLogic = true;
241 }
242
243 std::string outputNameRescale;
244 if (inputQuantised)
245 {
247 }
248
249 if(reshapeLogic)
250 {
251 TosaReshapeAttribute reshapeAttribute(outputShape);
252 outputNameReshape = !inputQuantised && reduceDescriptor->
m_ReduceOperation == ReduceOperation::Mean
254
255 if(!outputNameRescale.empty())
256 {
257 outputNameReshape = outputNameRescale;
258 }
259
260 operators.emplace_back(new TosaSerializationOperator(Op_RESHAPE,
261 Attribute_ReshapeAttribute,
262 &reshapeAttribute,
263 { tensors.back()->GetName() },
264 { outputNameReshape }));
265 if(outputNameReshape != outputName)
266 {
267 tensors.emplace_back(new TosaSerializationTensor(outputNameReshape,
268 outputShape,
269 tensors.back()->GetDtype(),
270 {}));
271 }
272 }
273
274
275 if (inputQuantised)
276 {
277 TosaSerializationOperator* rescaleOp2 = nullptr;
278
280 {
281 case ReduceOperation::Sum:
283 outputName,
284 output_scale,
285 0,
286 static_cast<int32_t>(output_zp),
287 false,
288 false,
289 true,
290 true,
291 &rescaleOp2);
292 break;
293 case ReduceOperation::Mean:
295 outputName,
296 {output_multiplier},
297 {output_shift},
298 0,
299 static_cast<int32_t>(output_zp),
300 false,
301 false,
302 true,
303 true,
304 false,
305 &rescaleOp2);
306 break;
307 default:
308 throw armnn::Exception(
"ConvertReduceOperator: Reduce Operation not implemented.");
309 }
310
311 operators.emplace_back(rescaleOp2);
312 }
313
314
315
316 if (!inputQuantised && reduceDescriptor->
m_ReduceOperation == ReduceOperation::Mean)
317 {
318
320 inputNames.emplace_back(constNameDivScale);
321
322 operators.push_back(new TosaSerializationOperator(Op_CONST,
323 Attribute_NONE,
324 nullptr,
325 {},
326 { constNameDivScale }));
327
328 float divScale = 1.0f / static_cast<float>(numElemsOnReducedAxis);
329
330 std::vector<uint8_t> uint8DivScale;
331 switch (inputType)
332 {
333 case DType_FP32:
334 TosaSerializationHandler::ConvertF32toU8({divScale}, uint8DivScale);
335 break;
336 case DType_FP16:
337 TosaSerializationHandler::ConvertF16toU8({divScale}, uint8DivScale);
338 break;
339 default:
341 }
342
343
344 std::vector<int32_t> divConstantShape(outputShape.size(), 1);
345
346 tensors.push_back(new TosaSerializationTensor(constNameDivScale,
347 divConstantShape,
348 inputType,
349 uint8DivScale));
350
351
352 int8_t shift = 0;
353 TosaMulAttribute mulAttribute(shift);
354 if(reshapeLogic && !outputNameReshape.empty())
355 {
356 operators.emplace_back(new TosaSerializationOperator(Op_MUL,
357 Attribute_MulAttribute,
358 &mulAttribute,
359 { constNameDivScale, outputNameReshape },
360 { outputName }));
361 }
362 else if (!outputNameReduce.empty())
363 {
364 operators.emplace_back(new TosaSerializationOperator(Op_MUL,
365 Attribute_MulAttribute,
366 &mulAttribute,
367 { constNameDivScale, outputNameReduce },
368 { outputName }));
369 }
370 }
371
372
373 if(tensors.back()->GetName() != outputName)
374 {
375 tensors.emplace_back(new TosaSerializationTensor(outputName,
376 outputShape,
377 inputType,
378 {}));
379 }
380
381 return new TosaSerializationBasicBlock(blockName,
383 operators,
384 tensors,
385 inputNames,
386 { outputName });
387}
std::string GenerateUniqueOutputName(const Layer &layer, uint32_t layerSlot=0)
const std::string mainName
DType ArmNNToDType(const DataType &type)
std::string GenerateUniqueInputName(const armnn::InputSlot &slot)
std::string GetUniqueTosaMappingID()
std::vector< int32_t > GetTosaTensorShape(const TensorShape &shape)
void CreateRawRescaleTosaOperator(const std::string &inputName, const std::string &outputName, const std::vector< int32_t > &multipliers, const std::vector< int32_t > &shifts, int32_t input_zp, int32_t output_zp, bool input_unsigned, bool output_unsigned, bool double_round, bool scale32, bool per_channel, TosaSerializationOperator **op)
Creates a raw rescale TOSA operator.
void CreateRescaleTosaOperator(const std::string &inputName, const std::string &outputName, double scale, int32_t input_zp, int32_t output_zp, bool input_unsigned, bool output_unsigned, bool double_round, bool scale32, TosaSerializationOperator **op)
Creates a Tosa rescale operator.
void ComputeMultiplierAndShiftTosaScale32(double scale, int32_t &multiplier, int32_t &shift)
The following is taken from mlir/lib/Dialect/Tosa/Utils/QuantUtils.cpp in the LLVM project From a sca...
Base class for all ArmNN exceptions so that users can filter to just those.
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
constexpr char const * GetReduceOperationAsCString(ReduceOperation reduce_operation)
std::vector< uint32_t > m_vAxis
The indices of the dimensions to reduce.
ReduceOperation m_ReduceOperation
Specifies the reduction operation to execute.