ArmNN
 24.08
NeonBackendOptimizationUtils.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2023-2024 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #pragma once
7 
9 
10 namespace armnn
11 {
12 
13 // Changes shapes of the form [1, 1, ..., W] to [ W ]
15 {
16  unsigned int numDimensions = in.GetNumDimensions();
17  for (unsigned int i = 0; i < (numDimensions-1); ++i)
18  {
19  if (in.GetShape()[i] != 1)
20  {
21  return false;
22  }
23  }
24 
25  unsigned int w = in.GetShape()[numDimensions-1];
26  out = in;
27  out.SetShape({w});
28 
29  return true;
30 }
31 
32 //
33 // Build slot and tensor info lists for Add/Mul/Add replacement
34 //
35 template<typename SlotListType>
36 void BuildAddMulAddSlotLists(bool handleReLu,
37  bool multipleOutputs,
38  std::vector<SlotListType>& inputLayersSlotLists,
39  std::vector<SlotListType>& outputLayersSlotLists)
40 {
41  // Build input slot list
42  inputLayersSlotLists.push_back({0, 1}); // Add
43  inputLayersSlotLists.push_back({1}); // Mul
44  inputLayersSlotLists.push_back({1}); // Add
45  if (handleReLu)
46  {
47  inputLayersSlotLists.push_back({}); // Relu
48  }
49 
50  // Build output slot list
51  if (multipleOutputs)
52  {
53  outputLayersSlotLists.push_back({0}); // Add
54  }
55  else
56  {
57  outputLayersSlotLists.push_back({}); // Add
58  }
59  outputLayersSlotLists.push_back({}); // Mul
60  if (handleReLu)
61  {
62  outputLayersSlotLists.push_back({}); // Add
63  outputLayersSlotLists.push_back({0}); // Relu
64  }
65  else
66  {
67  outputLayersSlotLists.push_back({0}); // Add
68  }
69 }
70 
71 inline void GetFusedName(Layer *layerList[4], std::string& fusedName)
72 {
73  // Build the fused name string
74  fusedName = "fused";
75  for (unsigned int layerIdx = 0; layerIdx< 4; ++layerIdx)
76  {
77  if (! layerList[layerIdx])
78  {
79  break;
80  }
81  fusedName += "-";
82  fusedName += layerList[layerIdx]->GetNameStr();
83  }
84 }
85 
86 template<typename Type>
87 bool BuildAddMulAddTensorInfoLists(Type* layerList[4],
88  unsigned int& numInputs,
89  unsigned int& numOutputs,
90  std::vector<TensorInfo>& inputInfos,
91  std::vector<TensorInfo>& outputInfos,
92  const ActivationDescriptor*& activationDescriptor,
93  bool& fuseReLu)
94 {
95  ARMNN_THROW_INVALIDARG_IF_FALSE(layerList[0]);
96  ARMNN_THROW_INVALIDARG_IF_FALSE(layerList[1]);
97  ARMNN_THROW_INVALIDARG_IF_FALSE(layerList[2]);
98 
102 
103  auto is1D = [](const TensorInfo expanded)
104  {
105  TensorInfo collapsed;
106  if (CollapseLeadingUnitDimensions(expanded, collapsed))
107  {
108  return (collapsed.GetNumDimensions() == 1);
109  }
110  else
111  {
112  return (expanded.GetNumDimensions() == 1);
113  }
114  };
115 
116  // One of the 2 inputs for MUL and the Second ADD must be 1D
117  // ref: clframework/src/cpu/kernels/CpuAddMulAddKernel.cpp
118  auto& mulLayer = *(PolymorphicDowncast<ElementwiseBinaryLayer*>(layerList[1]));
119  auto& add2Layer = *(PolymorphicDowncast<ElementwiseBinaryLayer*>(layerList[2]));
120 
121  Layer& mulInput0 = mulLayer.GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
122  Layer& mulInput1 = mulLayer.GetInputSlot(1).GetConnectedOutputSlot()->GetOwningLayer();
123  Layer& add2Input0 = add2Layer.GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
124  Layer& add2Input1 = add2Layer.GetInputSlot(1).GetConnectedOutputSlot()->GetOwningLayer();
125  if (!is1D(mulInput0.GetOutputSlot(0).GetTensorInfo()) && !is1D(mulInput1.GetOutputSlot(0).GetTensorInfo()))
126  {
127  return false;
128  }
129  if (!is1D(add2Input0.GetOutputSlot(0).GetTensorInfo()) && !is1D(add2Input1.GetOutputSlot(0).GetTensorInfo()))
130  {
131  return false;
132  }
133 
134  fuseReLu = (layerList[3] != nullptr);
135  if (fuseReLu)
136  {
137  activationDescriptor = &PolymorphicDowncast<ActivationLayer *>(layerList[3])->GetParameters();
139  (activationDescriptor->m_Function == ActivationFunction::BoundedReLu));
140  }
141 
142  numInputs = 0;
143  numOutputs = 0;
144 
145  // Ensure that there are 6 input slots in the add/mul/add layers
146  // we are going to replace
147  unsigned int layerIdx = 0;
148  unsigned int inputSlotCount = 0;
149  for (layerIdx = 0; layerIdx < 3; ++layerIdx)
150  {
151  for (unsigned int slotIdx = 0; slotIdx < layerList[layerIdx]->GetNumInputSlots(); ++slotIdx)
152  {
153  InputSlot* inputSlot = &layerList[layerIdx]->GetInputSlot(slotIdx);
154  OutputSlot* outputSlot = inputSlot->GetConnectedOutputSlot();
155  if (outputSlot)
156  {
157  if (layerIdx == 0)
158  {
159  // Always count the input connections of the first add
160  inputInfos.push_back(inputSlot->GetTensorInfo());
161  numInputs++;
162  }
163  else
164  {
165  // For subsequent layers, we skip connections to the previous layers in the counting
166  if (&outputSlot->GetOwningLayer() != layerList[layerIdx-1])
167  {
168  TensorInfo inputSlotInfo = inputSlot->GetTensorInfo();
169  if (numInputs == 2 || numInputs == 3)
170  {
171  // Workaround the broadcast optimization to collapse shapes such as
172  // [1, 1, 1, 2] to [2] as required by backend
173  if (CollapseLeadingUnitDimensions(inputSlot->GetTensorInfo(), inputSlotInfo))
174  {
175  OutputSlot* previousLayerSlot = inputSlot->GetConnectedOutputSlot();
176  if (previousLayerSlot)
177  {
178  if (previousLayerSlot->GetOwningLayer().GetType() == LayerType::Constant)
179  {
180  // First update the TensorInfo in the constant owning layer
181  previousLayerSlot->SetTensorInfo(inputSlotInfo);
182  // Then update the TensorInfo in the workload for the owning layer
183  ConstantLayer* layer = PolymorphicDowncast<ConstantLayer*>(
184  &previousLayerSlot->GetOwningLayer());
185  layer->m_LayerOutput
186  = std::make_unique<ScopedTensorHandle>(
187  ConstTensor(inputSlotInfo,
188  layer->m_LayerOutput.get()->GetConstTensor<void>()));
189  }
190  }
191  }
192  }
193  inputInfos.push_back(inputSlotInfo);
194  numInputs++;
195  }
196  }
197  inputSlotCount++;
198  }
199  }
200  }
201 
202  // Check the input counts
203  bool validInputCount = (inputSlotCount == 6) && (inputInfos.size() == 4);
204  if (! validInputCount)
205  {
206  return false;
207  }
208 
209  const unsigned int maxIdx = (fuseReLu) ? 4 : 3;
210  for (layerIdx = 0; layerIdx < maxIdx; ++layerIdx)
211  {
212  for (unsigned int slotIdx = 0; slotIdx < layerList[layerIdx]->GetNumOutputSlots(); ++slotIdx)
213  {
214  OutputSlot* outputSlot = &layerList[layerIdx]->GetOutputSlot(slotIdx);
215 
216  for (unsigned int connectionIdx = 0; connectionIdx < outputSlot->GetNumConnections(); ++connectionIdx)
217  {
218  InputSlot* inputSlot = outputSlot->GetConnection(connectionIdx);
219  if (layerIdx < (maxIdx-1))
220  {
221  if (&inputSlot->GetOwningLayer() != layerList[layerIdx+1])
222  {
223  outputInfos.push_back(outputSlot->GetTensorInfo());
224  numOutputs++;
225  }
226  }
227  else if (layerList[layerIdx] != nullptr)
228  {
229  outputInfos.push_back(outputSlot->GetTensorInfo());
230  numOutputs++;
231  }
232  }
233  }
234  }
235 
236  // Check the output count
237  bool validOutputCount = (outputInfos.size() > 0);
238  if (! validOutputCount)
239  {
240  return false;
241  }
242 
243  return true;
244 }
245 
246 }
armnn::BinaryOperation::Mul
@ Mul
armnn::ActivationDescriptor
An ActivationDescriptor for the ActivationLayer.
Definition: Descriptors.hpp:36
armnn::BinaryOperation::Add
@ Add
armnn::InputSlot::GetOwningLayer
Layer & GetOwningLayer() const
Definition: Layer.hpp:53
armnn::OutputSlot::GetTensorInfo
const TensorInfo & GetTensorInfo() const override
Definition: Layer.cpp:100
armnn::CollapseLeadingUnitDimensions
bool CollapseLeadingUnitDimensions(const TensorInfo &in, TensorInfo &out)
Definition: NeonBackendOptimizationUtils.hpp:14
armnn::OutputSlot
Definition: Layer.hpp:100
armnn::OutputSlot::SetTensorInfo
void SetTensorInfo(const TensorInfo &tensorInfo) override
Definition: Layer.cpp:95
armnn::BuildAddMulAddSlotLists
void BuildAddMulAddSlotLists(bool handleReLu, bool multipleOutputs, std::vector< SlotListType > &inputLayersSlotLists, std::vector< SlotListType > &outputLayersSlotLists)
Definition: NeonBackendOptimizationUtils.hpp:36
armnn::TensorInfo
Definition: Tensor.hpp:152
armnn::IsSequenceLayerType
bool IsSequenceLayerType(Layer &layer, LayerType type)
Definition: SubgraphUtils.hpp:362
armnn::GetFusedName
void GetFusedName(Layer *layerList[4], std::string &fusedName)
Definition: NeonBackendOptimizationUtils.hpp:71
armnn::TensorInfo::GetNumDimensions
unsigned int GetNumDimensions() const
Definition: Tensor.hpp:197
armnn::Layer::GetOutputSlot
const OutputSlot & GetOutputSlot(unsigned int index=0) const override
Get the const output slot handle by slot index.
Definition: Layer.hpp:339
armnn::ActivationFunction::BoundedReLu
@ BoundedReLu
min(a, max(b, input)) ReLu1 & ReLu6.
armnn::Layer::GetInputSlot
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition: Layer.hpp:337
armnn::Layer
Definition: Layer.hpp:230
armnn::InputSlot::GetTensorInfo
const TensorInfo & GetTensorInfo() const override
Gets the TensorInfo for this InputSlot.
Definition: Layer.cpp:614
armnn::OutputSlot::GetOwningLayer
Layer & GetOwningLayer() const
Definition: Layer.hpp:132
ARMNN_THROW_INVALIDARG_IF_FALSE
#define ARMNN_THROW_INVALIDARG_IF_FALSE(_cond)
Definition: Exceptions.hpp:212
armnn::OutputSlot::GetNumConnections
unsigned int GetNumConnections() const override
Definition: Layer.hpp:158
armnn::ActivationDescriptor::m_Function
ActivationFunction m_Function
The activation function to use (Sigmoid, TanH, Linear, ReLu, BoundedReLu, SoftReLu,...
Definition: Descriptors.hpp:59
armnn::BuildAddMulAddTensorInfoLists
bool BuildAddMulAddTensorInfoLists(Type *layerList[4], unsigned int &numInputs, unsigned int &numOutputs, std::vector< TensorInfo > &inputInfos, std::vector< TensorInfo > &outputInfos, const ActivationDescriptor *&activationDescriptor, bool &fuseReLu)
Definition: NeonBackendOptimizationUtils.hpp:87
armnn::Layer::GetNameStr
const std::string & GetNameStr() const
Definition: Layer.hpp:240
armnn::InputSlot
Definition: Layer.hpp:42
armnn::Layer::GetType
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:286
armnn::TensorInfo::GetShape
const TensorShape & GetShape() const
Definition: Tensor.hpp:193
armnn::ConstantLayer::m_LayerOutput
std::shared_ptr< ConstTensorHandle > m_LayerOutput
Definition: ConstantLayer.hpp:46
armnn::ActivationFunction::ReLu
@ ReLu
armnn::InputSlot::GetConnectedOutputSlot
const OutputSlot * GetConnectedOutputSlot() const
Definition: Layer.hpp:56
armnn::ConstantLayer
A layer that the constant data can be bound to.
Definition: ConstantLayer.hpp:15
armnn::TensorInfo::SetShape
void SetShape(const TensorShape &newShape)
Definition: Tensor.hpp:195
armnn
Copyright (c) 2021 ARM Limited and Contributors.
Definition: 01_00_quick_start.dox:6
armnn::ConstTensor
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:329
armnn::OutputSlot::GetConnection
const InputSlot * GetConnection(unsigned int index) const override
Definition: Layer.cpp:83
ArmComputeSubgraphUtils.hpp
armnn::LayerType::Constant
@ Constant