17 const std::vector<const TensorInfo*>& inputs,
18 const std::vector<const TensorInfo*>& outputs,
21 std::vector<std::string> inputNames;
22 std::vector<std::string> fcInputNames;
23 std::string outputName = std::string(
"output0_");
26 DType inputDType0 =
ArmNNToDType(inputs[0]->GetDataType());
27 DType outputDType0 =
ArmNNToDType(outputs[0]->GetDataType());
32 inputNames.emplace_back(
"input_0");
33 inputNames.emplace_back(
"constant_1");
36 inputNames.emplace_back(
"constant_2");
45 for (uint32_t i = 0; i < inputs.size(); ++i)
48 inputNames.push_back(inputName);
55 std::vector<TosaSerializationTensor*> tensors;
56 std::vector<TosaSerializationOperator*> operators;
63 if(inputNames[0].find(
"input_") != std::string::npos)
65 tensors.push_back(
new TosaSerializationTensor(inputNames[0], inputShape0, inputDType0, {}));
70 if(!inputs[1]->IsConstant() || layer ==
nullptr)
73 DType inputDType1 =
ArmNNToDType(inputs[1]->GetDataType());
74 tensors.push_back(
new TosaSerializationTensor(inputNames[1], inputShape1, inputDType1, {}));
79 if(!inputs[2]->IsConstant() || layer ==
nullptr)
82 DType inputDType2 =
ArmNNToDType(inputs[2]->GetDataType());
83 tensors.push_back(
new TosaSerializationTensor(inputNames[2], inputShape2, inputDType2, {}));
91 operators.push_back(
new TosaSerializationOperator(Op_CONST, Attribute_NONE,
nullptr, {}, {constantName}));
93 const DType dType = (inputDType0 == DType_INT8) ? DType_INT32 : outputDType0;
94 std::vector<float> data(outputs[0]->GetShape()[1], 0);
96 std::vector<uint8_t> uint8Data;
97 TosaSerializationHandler::ConvertF32toU8(data, uint8Data);
99 tensors.push_back(
new TosaSerializationTensor(constantName,
100 {
static_cast<int32_t
>(outputs[0]->GetShape()[1])},
103 inputNames.emplace_back(constantName);
106 fcInputNames = inputNames;
111 uint32_t num_elems = inputs[1]->GetShape()[1];
112 uint32_t num_batch = inputs[0]->GetShape().GetNumElements() / num_elems;
115 const std::vector<int32_t>& targetShape = {
static_cast<int32_t
>(num_batch),
static_cast<int32_t
>(num_elems)};
118 auto* reshapeOp =
new TosaSerializationOperator(Op_RESHAPE,
119 Attribute_ReshapeAttribute,
122 {outputReshapeName});
123 operators.push_back(reshapeOp);
125 tensors.push_back(
new TosaSerializationTensor(outputReshapeName, targetShape, inputDType0, {}));
127 fcInputNames[0] = outputReshapeName;
133 std::string fcOutputName;
134 bool isInputInt8 = (inputDType0 == DType_INT8);
138 tensors.push_back(
new TosaSerializationTensor(fcOutputName, outputShape0, DType_INT32, {}));
142 tensors.push_back(
new TosaSerializationTensor(outputName, outputShape0, outputDType0, {}));
146 TosaFullyConnectedAttribute attribute(inputs[0]->GetQuantizationOffset(),
147 inputs[1]->GetQuantizationOffset());
149 std::string& fcOutStr = isInputInt8 ? fcOutputName : outputName;
150 auto* fullyConnected_op =
new TosaSerializationOperator(Op_FULLY_CONNECTED,
151 Attribute_FullyConnectedAttribute,
155 operators.push_back(fullyConnected_op);
159 int32_t output_zp = outputs[0]->GetQuantizationOffset();
160 double output_scale = outputs[0]->GetQuantizationScales()[0];
161 double input_scale = inputs[0]->GetQuantizationScales()[0];
162 const std::vector<float>& weight_scales = inputs[1]->GetQuantizationScales();
164 TosaSerializationOperator* rescaleOp =
nullptr;
175 operators.push_back(rescaleOp);
176 tensors.push_back(
new TosaSerializationTensor(outputName,
183 return new TosaSerializationBasicBlock(blockName,