ArmNN
 24.08
FullyConnectedOperator.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2024 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 // Copyright © 2020 The TensorFlow Authors. All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 //
8 
9 #include <numeric>
12 
13 
14 // This function is paraphrased from:
15 // tensorflow/compiler/mlir/tosa/transforms/legalize_tfl.cc from function ConvertTFLFullyConnectedOp
16 TosaSerializationBasicBlock* ConvertFullyConnectedToTosaOperator(const Layer* layer,
17  const std::vector<const TensorInfo*>& inputs,
18  const std::vector<const TensorInfo*>& outputs,
19  const FullyConnectedDescriptor* fcDescriptor)
20 {
21  std::vector<std::string> inputNames;
22  std::vector<std::string> fcInputNames;
23  std::string outputName = std::string("output0_");
24  std::string blockName = std::string("Op_FULLY_CONNECTED_block_") + GetUniqueTosaMappingID();
25 
26  DType inputDType0 = ArmNNToDType(inputs[0]->GetDataType());
27  DType outputDType0 = ArmNNToDType(outputs[0]->GetDataType());
28 
29  // Set input names for validation purposes only.
30  if(layer == nullptr)
31  {
32  inputNames.emplace_back("input_0");
33  inputNames.emplace_back("constant_1");
34  if(fcDescriptor->m_BiasEnabled)
35  {
36  inputNames.emplace_back("constant_2");
37  }
38  }
39  // If a layer is present then the block will be used for execution, so input and output names need to be
40  // determined using the previous and following layers so the graph is connected correctly.
41  // For validation this doesn't matter.
42  else
43  {
44  // Get the layer connected to the input slot and determine unique tensor names.
45  for (uint32_t i = 0; i < inputs.size(); ++i)
46  {
47  std::string inputName = GenerateUniqueInputName(layer->GetInputSlot(i));
48  inputNames.push_back(inputName);
49  }
50 
51  // Determine unique output tensor name.
52  outputName = GenerateUniqueOutputName(*layer);
53  }
54 
55  std::vector<TosaSerializationTensor*> tensors;
56  std::vector<TosaSerializationOperator*> operators;
57 
58  // Setup input Tensor
59  // Only add tensor if connected layer is an input layer.
60  // As intermediate or constant tensors will be created separately.
61  // There also can't be duplicate tensors.
62  std::vector<int32_t> inputShape0 = GetTosaTensorShape(inputs[0]->GetShape());
63  if(inputNames[0].find("input_") != std::string::npos)
64  {
65  tensors.push_back(new TosaSerializationTensor(inputNames[0], inputShape0, inputDType0, {}));
66  }
67 
68  // Only add input tensors if weights and bias are not constant or if running validation.
69  // Constant tensors will be created in the ConvertConstantToTosaOperator function.
70  if(!inputs[1]->IsConstant() || layer == nullptr)
71  {
72  std::vector<int32_t> inputShape1 = GetTosaTensorShape(inputs[1]->GetShape());
73  DType inputDType1 = ArmNNToDType(inputs[1]->GetDataType());
74  tensors.push_back(new TosaSerializationTensor(inputNames[1], inputShape1, inputDType1, {}));
75  }
76 
77  if(fcDescriptor->m_BiasEnabled)
78  {
79  if(!inputs[2]->IsConstant() || layer == nullptr)
80  {
81  std::vector<int32_t> inputShape2 = GetTosaTensorShape(inputs[2]->GetShape());
82  DType inputDType2 = ArmNNToDType(inputs[2]->GetDataType());
83  tensors.push_back(new TosaSerializationTensor(inputNames[2], inputShape2, inputDType2, {}));
84  }
85  }
86  else
87  {
88  // If bias is disabled, create a constant bias of 0 as three inputs are required.
89  std::string constantName = std::string("constant_") + GetUniqueTosaMappingID();
90 
91  operators.push_back(new TosaSerializationOperator(Op_CONST, Attribute_NONE, nullptr, {}, {constantName}));
92 
93  const DType dType = (inputDType0 == DType_INT8) ? DType_INT32 : outputDType0;
94  std::vector<float> data(outputs[0]->GetShape()[1], 0);
95 
96  std::vector<uint8_t> uint8Data;
97  TosaSerializationHandler::ConvertF32toU8(data, uint8Data);
98 
99  tensors.push_back(new TosaSerializationTensor(constantName,
100  {static_cast<int32_t>(outputs[0]->GetShape()[1])},
101  dType,
102  uint8Data));
103  inputNames.emplace_back(constantName);
104  }
105 
106  fcInputNames = inputNames;
107 
108  // Set up Reshape operator. TOSA Fully Connected only accepts 2D rank tensors.
109  if (inputs[0]->GetShape().GetNumDimensions() != 2)
110  {
111  uint32_t num_elems = inputs[1]->GetShape()[1];
112  uint32_t num_batch = inputs[0]->GetShape().GetNumElements() / num_elems;
113 
114  std::string outputReshapeName = std::string("intermediate0_") + GetUniqueTosaMappingID();
115  const std::vector<int32_t>& targetShape = {static_cast<int32_t>(num_batch), static_cast<int32_t>(num_elems)};
116  TosaReshapeAttribute attribute(GetTosaTensorShape(TensorShape({num_batch, num_elems})));
117 
118  auto* reshapeOp = new TosaSerializationOperator(Op_RESHAPE,
119  Attribute_ReshapeAttribute,
120  &attribute,
121  {inputNames[0]},
122  {outputReshapeName});
123  operators.push_back(reshapeOp);
124 
125  tensors.push_back(new TosaSerializationTensor(outputReshapeName, targetShape, inputDType0, {}));
126 
127  fcInputNames[0] = outputReshapeName;
128  }
129 
130 
131  // Setup Output Tensor
132  std::vector<int32_t> outputShape0 = {GetTosaTensorShape(outputs[0]->GetShape())};
133  std::string fcOutputName;
134  bool isInputInt8 = (inputDType0 == DType_INT8);
135  if (isInputInt8)
136  {
137  fcOutputName = std::string("intermediate0_") + GetUniqueTosaMappingID();
138  tensors.push_back(new TosaSerializationTensor(fcOutputName, outputShape0, DType_INT32, {}));
139  }
140  else
141  {
142  tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {}));
143  }
144 
145  // Set up Fully Connected operator
146  TosaFullyConnectedAttribute attribute(inputs[0]->GetQuantizationOffset(), // input_zp
147  inputs[1]->GetQuantizationOffset()); // weight_zp
148 
149  std::string& fcOutStr = isInputInt8 ? fcOutputName : outputName;
150  auto* fullyConnected_op = new TosaSerializationOperator(Op_FULLY_CONNECTED,
151  Attribute_FullyConnectedAttribute,
152  &attribute,
153  fcInputNames,
154  {fcOutStr});
155  operators.push_back(fullyConnected_op);
156 
157  if (isInputInt8)
158  {
159  int32_t output_zp = outputs[0]->GetQuantizationOffset();
160  double output_scale = outputs[0]->GetQuantizationScales()[0];
161  double input_scale = inputs[0]->GetQuantizationScales()[0];
162  const std::vector<float>& weight_scales = inputs[1]->GetQuantizationScales();
163 
164  TosaSerializationOperator* rescaleOp = nullptr;
166  outputName,
167  0,
168  output_zp,
169  true,
170  true,
171  input_scale,
172  output_scale,
173  weight_scales,
174  &rescaleOp);
175  operators.push_back(rescaleOp);
176  tensors.push_back(new TosaSerializationTensor(outputName,
177  outputShape0,
178  DType_INT8, {}));
179  }
180 
181  // operatorInputNames/operatorOutputNames ends up being the same as
182  // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings
183  return new TosaSerializationBasicBlock(blockName, // name
184  mainName, // region name
185  operators, // operators
186  tensors, // tensors
187  inputNames, // inputs
188  {outputName}); // outputs
189 }
armnn::FullyConnectedDescriptor
A FullyConnectedDescriptor for the FullyConnectedLayer.
Definition: Descriptors.hpp:507
TosaRescaleOperatorUtils.hpp
GenerateUniqueOutputName
std::string GenerateUniqueOutputName(const Layer &layer, uint32_t layerSlot=0)
Definition: TosaOperatorUtils.hpp:120
armnn::Layer::GetInputSlot
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition: Layer.hpp:337
armnn::Layer
Definition: Layer.hpp:230
mainName
const std::string mainName
Definition: TosaOperatorUtils.hpp:19
armnn::TensorShape
Definition: Tensor.hpp:20
armnn::TensorShape::GetNumDimensions
unsigned int GetNumDimensions() const
Function that returns the tensor rank.
Definition: Tensor.cpp:174
ArmNNToDType
DType ArmNNToDType(const DataType &type)
Definition: TosaOperatorUtils.hpp:22
armnn::FullyConnectedDescriptor::m_BiasEnabled
bool m_BiasEnabled
Enable/disable bias.
Definition: Descriptors.hpp:526
CreateRescaleTosaOperatorPerChannel
void CreateRescaleTosaOperatorPerChannel(const std::string &inputName, const std::string &outputName, int32_t input_zp, int32_t output_zp, bool double_round, bool scale32, double input_scale, double output_scale, const std::vector< float > &weight_scales, TosaSerializationOperator **op)
Definition: TosaRescaleOperatorUtils.hpp:176
ConvertFullyConnectedToTosaOperator
TosaSerializationBasicBlock * ConvertFullyConnectedToTosaOperator(const Layer *layer, const std::vector< const TensorInfo * > &inputs, const std::vector< const TensorInfo * > &outputs, const FullyConnectedDescriptor *fcDescriptor)
Definition: FullyConnectedOperator.cpp:16
GetTosaTensorShape
std::vector< int32_t > GetTosaTensorShape(const TensorShape &shape)
Definition: TosaOperatorUtils.hpp:79
FullyConnectedOperator.hpp
GenerateUniqueInputName
std::string GenerateUniqueInputName(const armnn::InputSlot &slot)
Definition: TosaOperatorUtils.hpp:109
GetUniqueTosaMappingID
std::string GetUniqueTosaMappingID()
Definition: TosaOperatorUtils.hpp:138