ArmNN
 25.11
Loading...
Searching...
No Matches
NeonBackendOptimizationUtils.hpp
Go to the documentation of this file.
1//
2// Copyright © 2023-2024 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6#pragma once
7
9
10namespace armnn
11{
12
13// Changes shapes of the form [1, 1, ..., W] to [ W ]
15{
16 unsigned int numDimensions = in.GetNumDimensions();
17 for (unsigned int i = 0; i < (numDimensions-1); ++i)
18 {
19 if (in.GetShape()[i] != 1)
20 {
21 return false;
22 }
23 }
24
25 unsigned int w = in.GetShape()[numDimensions-1];
26 out = in;
27 out.SetShape({w});
28
29 return true;
30}
31
32//
33// Build slot and tensor info lists for Add/Mul/Add replacement
34//
35template<typename SlotListType>
36void BuildAddMulAddSlotLists(bool handleReLu,
37 bool multipleOutputs,
38 std::vector<SlotListType>& inputLayersSlotLists,
39 std::vector<SlotListType>& outputLayersSlotLists)
40{
41 // Build input slot list
42 inputLayersSlotLists.push_back({0, 1}); // Add
43 inputLayersSlotLists.push_back({1}); // Mul
44 inputLayersSlotLists.push_back({1}); // Add
45 if (handleReLu)
46 {
47 inputLayersSlotLists.push_back({}); // Relu
48 }
49
50 // Build output slot list
51 if (multipleOutputs)
52 {
53 outputLayersSlotLists.push_back({0}); // Add
54 }
55 else
56 {
57 outputLayersSlotLists.push_back({}); // Add
58 }
59 outputLayersSlotLists.push_back({}); // Mul
60 if (handleReLu)
61 {
62 outputLayersSlotLists.push_back({}); // Add
63 outputLayersSlotLists.push_back({0}); // Relu
64 }
65 else
66 {
67 outputLayersSlotLists.push_back({0}); // Add
68 }
69}
70
71inline void GetFusedName(Layer *layerList[4], std::string& fusedName)
72{
73 // Build the fused name string
74 fusedName = "fused";
75 for (unsigned int layerIdx = 0; layerIdx< 4; ++layerIdx)
76 {
77 if (! layerList[layerIdx])
78 {
79 break;
80 }
81 fusedName += "-";
82 fusedName += layerList[layerIdx]->GetNameStr();
83 }
84}
85
86template<typename Type>
87bool BuildAddMulAddTensorInfoLists(Type* layerList[4],
88 unsigned int& numInputs,
89 unsigned int& numOutputs,
90 std::vector<TensorInfo>& inputInfos,
91 std::vector<TensorInfo>& outputInfos,
92 const ActivationDescriptor*& activationDescriptor,
93 bool& fuseReLu)
94{
98
102
103 auto is1D = [](const TensorInfo expanded)
104 {
105 TensorInfo collapsed;
106 if (CollapseLeadingUnitDimensions(expanded, collapsed))
107 {
108 return (collapsed.GetNumDimensions() == 1);
109 }
110 else
111 {
112 return (expanded.GetNumDimensions() == 1);
113 }
114 };
115
116 // One of the 2 inputs for MUL and the Second ADD must be 1D
117 // ref: clframework/src/cpu/kernels/CpuAddMulAddKernel.cpp
118 auto& mulLayer = *(PolymorphicDowncast<ElementwiseBinaryLayer*>(layerList[1]));
119 auto& add2Layer = *(PolymorphicDowncast<ElementwiseBinaryLayer*>(layerList[2]));
120
121 Layer& mulInput0 = mulLayer.GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
122 Layer& mulInput1 = mulLayer.GetInputSlot(1).GetConnectedOutputSlot()->GetOwningLayer();
123 Layer& add2Input0 = add2Layer.GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
124 Layer& add2Input1 = add2Layer.GetInputSlot(1).GetConnectedOutputSlot()->GetOwningLayer();
125 if (!is1D(mulInput0.GetOutputSlot(0).GetTensorInfo()) && !is1D(mulInput1.GetOutputSlot(0).GetTensorInfo()))
126 {
127 return false;
128 }
129 if (!is1D(add2Input0.GetOutputSlot(0).GetTensorInfo()) && !is1D(add2Input1.GetOutputSlot(0).GetTensorInfo()))
130 {
131 return false;
132 }
133
134 fuseReLu = (layerList[3] != nullptr);
135 if (fuseReLu)
136 {
137 activationDescriptor = &PolymorphicDowncast<ActivationLayer *>(layerList[3])->GetParameters();
139 (activationDescriptor->m_Function == ActivationFunction::BoundedReLu));
140 }
141
142 numInputs = 0;
143 numOutputs = 0;
144
145 // Ensure that there are 6 input slots in the add/mul/add layers
146 // we are going to replace
147 unsigned int layerIdx = 0;
148 unsigned int inputSlotCount = 0;
149 for (layerIdx = 0; layerIdx < 3; ++layerIdx)
150 {
151 for (unsigned int slotIdx = 0; slotIdx < layerList[layerIdx]->GetNumInputSlots(); ++slotIdx)
152 {
153 InputSlot* inputSlot = &layerList[layerIdx]->GetInputSlot(slotIdx);
154 OutputSlot* outputSlot = inputSlot->GetConnectedOutputSlot();
155 if (outputSlot)
156 {
157 if (layerIdx == 0)
158 {
159 // Always count the input connections of the first add
160 inputInfos.push_back(inputSlot->GetTensorInfo());
161 numInputs++;
162 }
163 else
164 {
165 // For subsequent layers, we skip connections to the previous layers in the counting
166 if (&outputSlot->GetOwningLayer() != layerList[layerIdx-1])
167 {
168 TensorInfo inputSlotInfo = inputSlot->GetTensorInfo();
169 if (numInputs == 2 || numInputs == 3)
170 {
171 // Workaround the broadcast optimization to collapse shapes such as
172 // [1, 1, 1, 2] to [2] as required by backend
173 if (CollapseLeadingUnitDimensions(inputSlot->GetTensorInfo(), inputSlotInfo))
174 {
175 OutputSlot* previousLayerSlot = inputSlot->GetConnectedOutputSlot();
176 if (previousLayerSlot)
177 {
178 if (previousLayerSlot->GetOwningLayer().GetType() == LayerType::Constant)
179 {
180 // First update the TensorInfo in the constant owning layer
181 previousLayerSlot->SetTensorInfo(inputSlotInfo);
182 // Then update the TensorInfo in the workload for the owning layer
184 &previousLayerSlot->GetOwningLayer());
185 layer->m_LayerOutput
186 = std::make_unique<ScopedTensorHandle>(
187 ConstTensor(inputSlotInfo,
188 layer->m_LayerOutput.get()->GetConstTensor<void>()));
189 }
190 }
191 }
192 }
193 inputInfos.push_back(inputSlotInfo);
194 numInputs++;
195 }
196 }
197 inputSlotCount++;
198 }
199 }
200 }
201
202 // Check the input counts
203 bool validInputCount = (inputSlotCount == 6) && (inputInfos.size() == 4);
204 if (! validInputCount)
205 {
206 return false;
207 }
208
209 const unsigned int maxIdx = (fuseReLu) ? 4 : 3;
210 for (layerIdx = 0; layerIdx < maxIdx; ++layerIdx)
211 {
212 for (unsigned int slotIdx = 0; slotIdx < layerList[layerIdx]->GetNumOutputSlots(); ++slotIdx)
213 {
214 OutputSlot* outputSlot = &layerList[layerIdx]->GetOutputSlot(slotIdx);
215
216 for (unsigned int connectionIdx = 0; connectionIdx < outputSlot->GetNumConnections(); ++connectionIdx)
217 {
218 InputSlot* inputSlot = outputSlot->GetConnection(connectionIdx);
219 if (layerIdx < (maxIdx-1))
220 {
221 if (&inputSlot->GetOwningLayer() != layerList[layerIdx+1])
222 {
223 outputInfos.push_back(outputSlot->GetTensorInfo());
224 numOutputs++;
225 }
226 }
227 else if (layerList[layerIdx] != nullptr)
228 {
229 outputInfos.push_back(outputSlot->GetTensorInfo());
230 numOutputs++;
231 }
232 }
233 }
234 }
235
236 // Check the output count
237 bool validOutputCount = (outputInfos.size() > 0);
238 if (! validOutputCount)
239 {
240 return false;
241 }
242
243 return true;
244}
245
246}
#define ARMNN_THROW_INVALIDARG_IF_FALSE(_cond)
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition Tensor.hpp:330
A layer that the constant data can be bound to.
std::shared_ptr< ConstTensorHandle > m_LayerOutput
Layer & GetOwningLayer() const
Definition Layer.hpp:53
const OutputSlot * GetConnectedOutputSlot() const
Definition Layer.hpp:56
const TensorInfo & GetTensorInfo() const override
Gets the TensorInfo for this InputSlot.
Definition Layer.cpp:614
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition Layer.hpp:337
const std::string & GetNameStr() const
Definition Layer.hpp:240
const OutputSlot & GetOutputSlot(unsigned int index=0) const override
Get the const output slot handle by slot index.
Definition Layer.hpp:339
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition Layer.hpp:286
const InputSlot * GetConnection(unsigned int index) const override
Definition Layer.cpp:83
unsigned int GetNumConnections() const override
Definition Layer.hpp:158
void SetTensorInfo(const TensorInfo &tensorInfo) override
Definition Layer.cpp:95
Layer & GetOwningLayer() const
Definition Layer.hpp:132
const TensorInfo & GetTensorInfo() const override
Definition Layer.cpp:100
const TensorShape & GetShape() const
Definition Tensor.hpp:193
unsigned int GetNumDimensions() const
Definition Tensor.hpp:197
void SetShape(const TensorShape &newShape)
Definition Tensor.hpp:195
Copyright (c) 2021 ARM Limited and Contributors.
bool IsSequenceLayerType(Layer &layer, LayerType type)
void GetFusedName(Layer *layerList[4], std::string &fusedName)
void BuildAddMulAddSlotLists(bool handleReLu, bool multipleOutputs, std::vector< SlotListType > &inputLayersSlotLists, std::vector< SlotListType > &outputLayersSlotLists)
bool CollapseLeadingUnitDimensions(const TensorInfo &in, TensorInfo &out)
@ BoundedReLu
min(a, max(b, input)) ReLu1 & ReLu6.
Definition Types.hpp:92
DestType PolymorphicDowncast(SourceType *value)
Polymorphic downcast for build in pointers only.
bool BuildAddMulAddTensorInfoLists(Type *layerList[4], unsigned int &numInputs, unsigned int &numOutputs, std::vector< TensorInfo > &inputInfos, std::vector< TensorInfo > &outputInfos, const ActivationDescriptor *&activationDescriptor, bool &fuseReLu)
An ActivationDescriptor for the ActivationLayer.
ActivationFunction m_Function
The activation function to use (Sigmoid, TanH, Linear, ReLu, BoundedReLu, SoftReLu,...