18 const std::vector<const TensorInfo*>& inputs,
19 const std::vector<const TensorInfo*>& outputs)
21 bool tosaRefBackend {
false};
24 "ConvertQuantizeToTosaOperator: Quantize must have only one input" );
26 "ConvertQuantizeToTosaOperator: Quantize must have only one output" );
28 std::string inputName = std::string(
"input_");
29 std::string outputName = std::string(
"output0_");
39 tosaRefBackend = (layer->
GetBackendId().Get().find(
"TosaRef") != std::string::npos);
54 scale = (scale != 0) ? (1 / scale) : scale;
56 std::vector<TosaSerializationTensor*> tensors;
60 bool isFloatInput = inputDType0 == DType::DType_FP16 || inputDType0 == DType::DType_FP32;
65 if(inputName.find(
"input_") != std::string::npos)
67 DType tmp = inputDType0;
75 tensors.push_back(
new TosaSerializationTensor(inputName, inputShape0, tmp, {}));
82 throw armnn::Exception(fmt::format(
"ConvertQuantizeToTosaOperator: {} intermediate input"
83 " layer not supported.",EnumNamesDType()[inputDType0]));
105 TosaSerializationOperator* zeroPointOp =
nullptr;
106 TosaSerializationTensor* zeroPointTensor =
nullptr;
113 tensors.push_back(zeroPointTensor);
116 TosaSerializationOperator* scaleOp =
nullptr;
117 TosaSerializationTensor* scaleTensor =
nullptr;
124 tensors.push_back(scaleTensor);
128 TosaMulAttribute mulAttribute(shift);
129 TosaSerializationOperator* mulOp =
new TosaSerializationOperator(Op_MUL,
130 Attribute_MulAttribute,
132 {inputName, outputNameScale},
134 tensors.push_back(
new TosaSerializationTensor(outputNameMul, inputShape0, inputDType0, {}));
137 TosaSerializationOperator* addOp =
new TosaSerializationOperator(Op_ADD,
140 {outputNameMul, outputNameZeroPoint},
142 tensors.push_back(
new TosaSerializationTensor(outputNameAdd, inputShape0, inputDType0, {}));
145 TosaSerializationOperator* castOp =
new TosaSerializationOperator(Op_CAST,
151 tensors.push_back(
new TosaSerializationTensor(outputName, outputShape0, outputDType0, {}));
155 return new TosaSerializationBasicBlock(blockName,
157 {zeroPointOp, scaleOp, mulOp, addOp, castOp},
164 double scale_alpha = inputs[0]->GetQuantizationScale() / outputs[0]->GetQuantizationScale();
165 int32_t input_zp = inputs[0]->GetQuantizationOffset();
166 int32_t output_zp = outputs[0]->GetQuantizationOffset();
168 TosaSerializationOperator* rescaleOp =
nullptr;
187 tensors.push_back(
new TosaSerializationTensor(outputName,
193 return new TosaSerializationBasicBlock(blockName,
void CreateRescaleTosaOperator(const std::string &inputName, const std::string &outputName, double scale, int32_t input_zp, int32_t output_zp, bool input_unsigned, bool output_unsigned, bool double_round, bool scale32, TosaSerializationOperator **op)
Creates a Tosa rescale operator.