ArmNN
 25.11
Loading...
Searching...
No Matches
FullyConnectedOperator.cpp
Go to the documentation of this file.
1//
2// Copyright © 2024 2025 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5// Copyright © 2020 The TensorFlow Authors. All Rights Reserved.
6// SPDX-License-Identifier: Apache-2.0
7//
8
9#include <numeric>
12
13
14// This function is paraphrased from:
15// tensorflow/compiler/mlir/tosa/transforms/legalize_tfl.cc from function ConvertTFLFullyConnectedOp
16TosaSerializationBasicBlock* ConvertFullyConnectedToTosaOperator(const Layer* layer,
17 const std::vector<const TensorInfo*>& inputs,
18 const std::vector<const TensorInfo*>& outputs,
19 const FullyConnectedDescriptor* fcDescriptor)
20{
21 std::string inputName;
22 std::vector<std::string> inputNames;
23 std::vector<std::string> fcInputNames;
24 std::string outputName = std::string("output0_");
25 std::string blockName = std::string("Op_FULLY_CONNECTED_block_") + GetUniqueTosaMappingID();
26
27 DType inputDType0 = ArmNNToDType(inputs[0]->GetDataType());
28 DType outputDType0 = ArmNNToDType(outputs[0]->GetDataType());
29
30 // Set input names for validation purposes only.
31 if(layer == nullptr)
32 {
33 inputNames.emplace_back("input_0");
34 inputNames.emplace_back("constant_1");
35 if(fcDescriptor->m_BiasEnabled)
36 {
37 inputNames.emplace_back("constant_2");
38 }
39 }
40 // If a layer is present then the block will be used for execution, so input and output names need to be
41 // determined using the previous and following layers so the graph is connected correctly.
42 // For validation this doesn't matter.
43 else
44 {
45 inputName = GenerateUniqueInputName(layer->GetInputSlot(0));
46 inputNames.push_back(inputName);
47
48 inputName = GenerateUniqueInputName(layer->GetInputSlot(1));
49 inputNames.push_back(inputName);
50
51 if(fcDescriptor->m_BiasEnabled)
52 {
53 inputName = GenerateUniqueInputName(layer->GetInputSlot(2));
54 inputNames.push_back(inputName);
55 }
56
57 // Determine unique output tensor name.
58 outputName = GenerateUniqueOutputName(*layer);
59 }
60
61 std::vector<TosaSerializationTensor*> tensors;
62 std::vector<TosaSerializationOperator*> operators;
63
64 // Setup input Tensor
65 // Only add tensor if connected layer is an input layer.
66 // As intermediate or constant tensors will be created separately.
67 // There also can't be duplicate tensors.
68 if(inputNames[0].find("input_") != std::string::npos)
69 {
70 std::vector<int32_t> inputShape0 = GetTosaTensorShape(inputs[0]->GetShape());
71 tensors.push_back(new TosaSerializationTensor(inputNames[0], inputShape0, inputDType0, {}));
72 }
73
74 // Only add input tensors if weights and bias are not constant or if running validation.
75 // Constant tensors will be created in the ConvertConstantToTosaOperator function.
76 if(layer == nullptr || (!inputs[1]->IsConstant() && !WeightFromDifferentLayer(*layer)))
77 {
78 std::vector<int32_t> inputShape1 = GetTosaTensorShape(inputs[1]->GetShape());
79 DType inputDType1 = ArmNNToDType(inputs[1]->GetDataType());
80 tensors.push_back(new TosaSerializationTensor(inputNames[1], inputShape1, inputDType1, {}));
81 }
82
83 if(fcDescriptor->m_BiasEnabled)
84 {
85 if(!inputs[2]->IsConstant() || layer == nullptr)
86 {
87 std::vector<int32_t> inputShape2 = GetTosaTensorShape(inputs[2]->GetShape());
88 DType inputDType2 = ArmNNToDType(inputs[2]->GetDataType());
89 tensors.push_back(new TosaSerializationTensor(inputNames[2], inputShape2, inputDType2, {}));
90 }
91 }
92 else
93 {
94 // If bias is disabled, create a constant bias of 0 as three inputs are required.
95 inputName = std::string("constant_") + GetUniqueTosaMappingID();
96 inputNames.push_back(inputName);
97
98 operators.push_back(new TosaSerializationOperator(Op_CONST, Attribute_NONE, nullptr, {}, {inputName}));
99
100 const DType dType = (inputDType0 == DType_INT8) ? DType_INT32 : outputDType0;
101 std::vector<float> data(outputs[0]->GetShape()[1], 0);
102
103 std::vector<uint8_t> uint8Data;
104 TosaSerializationHandler::ConvertF32toU8(data, uint8Data);
105
106 tensors.push_back(new TosaSerializationTensor(inputName,
107 {static_cast<int32_t>(outputs[0]->GetShape()[1])},
108 dType,
109 uint8Data));
110 }
111
112 fcInputNames = inputNames;
113
114 // Set up Reshape operator. TOSA Fully Connected only accepts 2D rank tensors.
115 if (inputs[0]->GetShape().GetNumDimensions() != 2)
116 {
117 uint32_t num_elems = inputs[1]->GetShape()[1];
118 uint32_t num_batch = inputs[0]->GetShape().GetNumElements() / num_elems;
119
120 std::string outputReshapeName = std::string("layer_intermediate0_") + GetUniqueTosaMappingID();
121 const std::vector<int32_t>& targetShape = {static_cast<int32_t>(num_batch), static_cast<int32_t>(num_elems)};
122 TosaReshapeAttribute attribute(GetTosaTensorShape(TensorShape({num_batch, num_elems})));
123
124 auto* reshapeOp = new TosaSerializationOperator(Op_RESHAPE,
125 Attribute_ReshapeAttribute,
126 &attribute,
127 {inputNames[0]},
128 {outputReshapeName});
129 operators.push_back(reshapeOp);
130
131 tensors.push_back(new TosaSerializationTensor(outputReshapeName, targetShape, inputDType0, {}));
132
133 fcInputNames[0] = outputReshapeName;
134 }
135
136
137 // Setup Output Tensor
138 std::vector<int32_t> outputShape0 = {GetTosaTensorShape(outputs[0]->GetShape())};
139 std::string fcOutputName;
140 bool isInputInt8 = (inputDType0 == DType_INT8);
141 if (isInputInt8)
142 {
143 fcOutputName = std::string("layer_intermediate0_") + GetUniqueTosaMappingID();
144 tensors.push_back(new TosaSerializationTensor(fcOutputName, outputShape0, DType_INT32, {}));
145 }
146 else
147 {
148 tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {}));
149 }
150
151 // Set up Fully Connected operator
152 TosaFullyConnectedAttribute attribute(inputs[0]->GetQuantizationOffset(), // input_zp
153 inputs[1]->GetQuantizationOffset()); // weight_zp
154
155 std::string& fcOutStr = isInputInt8 ? fcOutputName : outputName;
156 auto* fullyConnected_op = new TosaSerializationOperator(Op_FULLY_CONNECTED,
157 Attribute_FullyConnectedAttribute,
158 &attribute,
159 fcInputNames,
160 {fcOutStr});
161 operators.push_back(fullyConnected_op);
162
163 if (isInputInt8)
164 {
165 int32_t output_zp = outputs[0]->GetQuantizationOffset();
166 double output_scale = outputs[0]->GetQuantizationScales()[0];
167 double input_scale = inputs[0]->GetQuantizationScales()[0];
168 const std::vector<float>& weight_scales = inputs[1]->GetQuantizationScales();
169
170 TosaSerializationOperator* rescaleOp = nullptr;
172 outputName,
173 0,
174 output_zp,
175 false,
176 false,
177 true,
178 true,
179 input_scale,
180 output_scale,
181 weight_scales,
182 &rescaleOp);
183 operators.push_back(rescaleOp);
184 tensors.push_back(new TosaSerializationTensor(outputName,
185 outputShape0,
186 DType_INT8, {}));
187 }
188
189 // operatorInputNames/operatorOutputNames ends up being the same as
190 // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings
191 return new TosaSerializationBasicBlock(blockName, // name
192 mainName, // region name
193 operators, // operators
194 tensors, // tensors
195 inputNames, // inputs
196 {outputName}); // outputs
197}
TosaSerializationBasicBlock * ConvertFullyConnectedToTosaOperator(const Layer *layer, const std::vector< const TensorInfo * > &inputs, const std::vector< const TensorInfo * > &outputs, const FullyConnectedDescriptor *fcDescriptor)
std::string GenerateUniqueOutputName(const Layer &layer, uint32_t layerSlot=0)
const std::string mainName
DType ArmNNToDType(const DataType &type)
bool WeightFromDifferentLayer(const Layer &layer)
std::string GenerateUniqueInputName(const armnn::InputSlot &slot)
std::string GetUniqueTosaMappingID()
std::vector< int32_t > GetTosaTensorShape(const TensorShape &shape)
void CreateRescaleTosaOperatorForWeights(const std::string &inputName, const std::string &outputName, int32_t input_zp, int32_t output_zp, bool input_unsigned, bool output_unsigned, bool double_round, bool scale32, double input_scale, double output_scale, const std::vector< float > &weight_scales, TosaSerializationOperator **op)
Creates a TOSA rescale operator for weight tensors.
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition Layer.hpp:337
A FullyConnectedDescriptor for the FullyConnectedLayer.
bool m_BiasEnabled
Enable/disable bias.