ArmNN
 24.08
FoldPadIntoLayer2d.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2021-2024 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #pragma once
7 
8 #include "Optimization.hpp"
9 
11 
14 
15 namespace armnn
16 {
17 namespace optimizations
18 {
19 namespace pad_fold
20 {
21 inline float GetZeroElement(const TensorInfo& tensorInfo)
22 {
23  return static_cast<float>(tensorInfo.IsQuantized() ? tensorInfo.GetQuantizationOffset() : 0);
24 }
25 
26 inline float GetLowestElement(const TensorInfo& tensorInfo)
27 {
28  constexpr float negativeInfinity = -std::numeric_limits<float>::infinity();
29  const float scale = tensorInfo.GetQuantizationScale();
30  const int32_t offset = tensorInfo.GetQuantizationOffset();
31 
32  switch (tensorInfo.GetDataType())
33  {
34  case DataType::Float16:
35  return armnnUtils::SelectiveQuantize<armnn::Half>(negativeInfinity, scale, offset);
36  case DataType::Float32:
37  return armnnUtils::SelectiveQuantize<float>(negativeInfinity, scale, offset);
38  case DataType::QAsymmU8:
39  return armnnUtils::SelectiveQuantize<uint8_t>(negativeInfinity, scale, offset);
40  case DataType::QSymmS16:
41  return armnnUtils::SelectiveQuantize<int16_t>(negativeInfinity, scale, offset);
42  case DataType::QSymmS8:
43  // Fall-through
44  case DataType::QAsymmS8:
45  return armnnUtils::SelectiveQuantize<int8_t>(negativeInfinity, scale, offset);
46  case DataType::BFloat16:
47  return armnnUtils::SelectiveQuantize<armnn::BFloat16>(negativeInfinity, scale, offset);
48  default:
49  {
50  ARMNN_ASSERT_MSG(false, "Unsupported DataType");
51  return NAN;
52  }
53  }
54 }
55 
56 inline bool IsNeutralElement(const Convolution2dDescriptor&, const TensorInfo& tensorInfo, const float tensorValue)
57 {
58  return tensorValue == GetZeroElement(tensorInfo);
59 }
60 
62  const TensorInfo& tensorInfo,
63  const float tensorValue)
64 {
65  return tensorValue == GetZeroElement(tensorInfo);
66 }
67 
68 inline bool IsNeutralElement(
69  const Pooling2dDescriptor& descriptor, const TensorInfo& tensorInfo, const float tensorValue)
70 {
71  return (descriptor.m_PoolType == PoolingAlgorithm::Max)
72  ? tensorValue <= GetLowestElement(tensorInfo)
73  : tensorValue == GetZeroElement(tensorInfo);
74 }
75 
76 inline bool IsPooling2dPadded(const Pooling2dDescriptor& poolDescriptor)
77 {
78  const auto poolingPadValues = std::make_tuple(poolDescriptor.m_PadLeft, poolDescriptor.m_PadRight,
79  poolDescriptor.m_PadTop, poolDescriptor.m_PadBottom);
80  if (poolingPadValues != std::make_tuple(0U, 0U, 0U, 0U))
81  {
82  return true;
83  }
84  return false;
85 }
86 
87 template <typename Descriptor>
89  const PadDescriptor& padDescriptor, Descriptor& layerDescriptor, const TensorInfo& tensorInfo)
90 {
91  armnnUtils::DataLayoutIndexed layout = armnnUtils::DataLayoutIndexed(layerDescriptor.m_DataLayout);
92  constexpr unsigned int batchIndex = 0;
93 
94  constexpr auto noPad = std::make_pair(0U, 0U);
95 
96  if ((!IsNeutralElement(layerDescriptor, tensorInfo, padDescriptor.m_PadValue)) ||
97  (padDescriptor.m_PadList[batchIndex] != noPad) || (padDescriptor.m_PadList[layout.GetChannelsIndex()] != noPad))
98  {
99  return false;
100  }
101 
102  const auto& padList = padDescriptor.m_PadList;
103 
104  // In Convolution2dDescriptor/Pooling2dDescriptor, padLeft and padRight are defined as paddings
105  // on width dimension whereas padTop and padBottom - paddings on height dimension, so updating
106  // these according to data layout
107  layerDescriptor.m_PadLeft += padList[layout.GetWidthIndex()].first;
108  layerDescriptor.m_PadRight += padList[layout.GetWidthIndex()].second;
109  layerDescriptor.m_PadTop += padList[layout.GetHeightIndex()].first;
110  layerDescriptor.m_PadBottom += padList[layout.GetHeightIndex()].second;
111 
112  return true;
113 }
114 
115 inline bool TryFoldPadIntoLayer2d(const PadDescriptor& padDescriptor,
116  Pooling2dDescriptor& poolDescriptor,
117  const TensorInfo& tensorInfo,
118  bool isBackendOptimization = false)
119 {
120  // Cannot fold Average or L2 pooling if padding exists and the padding method is Exclude.
121  if (poolDescriptor.m_PoolType != PoolingAlgorithm::Max &&
122  IsPooling2dPadded(poolDescriptor) &&
123  poolDescriptor.m_PaddingMethod == PaddingMethod::Exclude)
124  {
125  return false;
126  }
127 
128  // Cannot fold Average pooling if data type is quantized and layout is NHWC in Neon backend.
129  // Therefore, this specific case will become a backend specific optimization.
130  if (!isBackendOptimization &&
131  tensorInfo.IsQuantized() &&
132  poolDescriptor.m_PoolType == PoolingAlgorithm::Average &&
133  poolDescriptor.m_DataLayout == DataLayout::NHWC)
134  {
135  return false;
136  }
137 
139 
140  return TryFoldPadIntoLayer2d<Pooling2dDescriptor>(padDescriptor, poolDescriptor, tensorInfo);
141 }
142 
143 template <typename Layer2dT>
144 Layer2dT* FoldPadIntoLayer2dImpl(Graph& graph, InputSlot& connection)
145 {
146  PadLayer& padLayer = *PolymorphicDowncast<PadLayer*>(&connection.GetConnectedOutputSlot()->GetOwningLayer());
147  Layer2dT& layer2d = *PolymorphicDowncast<Layer2dT*>(&connection.GetOwningLayer());
148 
149  const PadDescriptor& padDescriptor = padLayer.GetParameters();
150  auto newLayer2dDescriptor = layer2d.GetParameters();
151 
152  if (!TryFoldPadIntoLayer2d(padDescriptor, newLayer2dDescriptor, padLayer.GetOutputSlot().GetTensorInfo()))
153  {
154  return nullptr;
155  }
156 
157  // Workaround an issue in the compute library. The conv2d algorithm that the
158  // compute library is choosing is not handling the 1x1 filter case when
159  // the padding size >= filter size
160  if (layer2d.GetType() == armnn::LayerType::Convolution2d)
161  {
162  // Get filter width and height
163  armnnUtils::DataLayoutIndexed dataLayoutIndex(newLayer2dDescriptor.m_DataLayout);
164  const TensorShape& filterShape = layer2d.GetInputSlot(1).GetTensorInfo().GetShape();
165  unsigned int filterWidth = filterShape[dataLayoutIndex.GetWidthIndex()];
166  unsigned int filterHeight = filterShape[dataLayoutIndex.GetHeightIndex()];
167  // Calculate total padding and check conditions
168  auto horizontalPadding = newLayer2dDescriptor.m_PadLeft + newLayer2dDescriptor.m_PadRight;
169  auto verticalPadding = newLayer2dDescriptor.m_PadTop + newLayer2dDescriptor.m_PadBottom;
170  if ((filterWidth == 1) && (horizontalPadding >= filterWidth))
171  {
172  return nullptr;
173  }
174  else if ((filterHeight == 1) && (verticalPadding >= filterHeight))
175  {
176  return nullptr;
177  }
178  }
179 
180  // Save original parent output slot of the pad layer
181  OutputSlot& parentSlot = *padLayer.GetInputSlot(0).GetConnectedOutputSlot();
182 
183  // Insert new layer2d layer between the pad layer and its parent layer.
184  const std::string name = std::string("folded-") + padLayer.GetName() + "-into-" + layer2d.GetName();
185  auto& newLayer2d = *graph.InsertNewLayer<Layer2dT>(padLayer.GetInputSlot(0), newLayer2dDescriptor, name.c_str());
186 
187  newLayer2d.GetOutputSlot().MoveAllConnections(parentSlot);
188  // Start at 1 to connect only weights and bias
189  for (unsigned int i = 1; i < layer2d.GetNumInputSlots(); ++i)
190  {
191  if (layer2d.GetInputSlot(i).GetConnectedOutputSlot() != nullptr)
192  {
193  Layer& tgtLayer = layer2d.GetInputSlot(i).GetConnectedOutputSlot()->GetOwningLayer();
194  // Remove old connection and connect to new layer2d
195  tgtLayer.GetOutputSlot(0).Disconnect(layer2d.GetInputSlot(i));
196  tgtLayer.GetOutputSlot(0).Connect(newLayer2d.GetInputSlot(i));
197  }
198  }
199 
200  // Moves connections in old layer2d layer output to new layer.
201  // Old layer2d layer will be removed as it's left unconnected.
202  // Pad layer will be removed if left unconnected.
203  layer2d.GetOutputSlot().MoveAllConnections(newLayer2d.GetOutputSlot());
204 
205  return &newLayer2d;
206 }
207 
209 {
210 public:
211  void Run(Graph& graph, InputSlot& connection) const
212  {
213  const auto newConv2dLayer = FoldPadIntoLayer2dImpl<Convolution2dLayer>(graph, connection);
214 
215  if (newConv2dLayer != nullptr)
216  {
217  const auto conv2dLayer = PolymorphicDowncast<Convolution2dLayer*>(&connection.GetOwningLayer());
218  ARMNN_ASSERT_MSG(newConv2dLayer->GetInputSlot(1).GetConnection() != nullptr,
219  "FoldPadIntoConvolution2d: New convolution layer is missing connection to weights layer");
220 
221  if (conv2dLayer->GetParameters().m_BiasEnabled)
222  {
223  ARMNN_ASSERT_MSG(newConv2dLayer->GetInputSlot(2).GetConnection() != nullptr,
224  "FoldPadIntoConvolution2d: New convolution layer is missing "
225  "connection to bias layer.");
226  }
227  }
228  }
229 
230 protected:
231  FoldPadIntoConvolution2dImpl() = default;
232  ~FoldPadIntoConvolution2dImpl() = default;
233 };
234 
236 {
237 public:
238  void Run(Graph& graph, InputSlot& connection) const
239  {
240  const auto newConv2dLayer = FoldPadIntoLayer2dImpl<DepthwiseConvolution2dLayer>(graph, connection);
241 
242  if (newConv2dLayer != nullptr)
243  {
244  const auto conv2dLayer = PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&connection.GetOwningLayer());
245  ARMNN_ASSERT_MSG(newConv2dLayer->GetInputSlot(1).GetConnection() != nullptr,
246  "FoldPadIntoDepthwiseConvolution2d: New convolution layer is missing "
247  "connection to weights layer");
248 
249  if (conv2dLayer->GetParameters().m_BiasEnabled)
250  {
251  ARMNN_ASSERT_MSG(newConv2dLayer->GetInputSlot(2).GetConnection() != nullptr,
252  "FoldPadIntoConvolution2d: New convolution layer is missing "
253  "connection to bias layer.");
254  }
255  }
256  }
257 protected:
260 };
261 
263 {
264 public:
265  void Run(Graph& graph, InputSlot& connection) const
266  {
267  FoldPadIntoLayer2dImpl<Pooling2dLayer>(graph, connection);
268  }
269 
270 protected:
271  FoldPadIntoPooling2dImpl() = default;
272  ~FoldPadIntoPooling2dImpl() = default;
273 };
274 } // namespace pad_fold
275 
282 using FoldPadIntoPooling2d =
284 
285 } // namespace optimizations
286 } // namespace armnn
287 
288 
armnn::optimizations::pad_fold::IsPooling2dPadded
bool IsPooling2dPadded(const Pooling2dDescriptor &poolDescriptor)
Definition: FoldPadIntoLayer2d.hpp:76
armnn::optimizations::pad_fold::FoldPadIntoConvolution2dImpl::FoldPadIntoConvolution2dImpl
FoldPadIntoConvolution2dImpl()=default
armnn::Pooling2dDescriptor::m_PaddingMethod
PaddingMethod m_PaddingMethod
The padding method to be used. (Exclude, IgnoreValue).
Definition: Descriptors.hpp:425
armnn::optimizations::pad_fold::FoldPadIntoDepthwiseConvolution2dImpl
Definition: FoldPadIntoLayer2d.hpp:235
armnn::InputSlot::GetOwningLayer
Layer & GetOwningLayer() const
Definition: Layer.hpp:53
armnn::OutputSlot::GetTensorInfo
const TensorInfo & GetTensorInfo() const override
Definition: Layer.cpp:100
armnn::DataLayout::NHWC
@ NHWC
QuantizeHelper.hpp
armnn::OutputSlot
Definition: Layer.hpp:100
armnn::DepthwiseConvolution2dLayer
This layer represents a depthwise convolution 2d operation.
Definition: DepthwiseConvolution2dLayer.hpp:15
armnn::TensorInfo::GetQuantizationScale
float GetQuantizationScale() const
Definition: Tensor.cpp:461
armnn::TensorInfo
Definition: Tensor.hpp:152
armnn::optimizations::FoldPadIntoConvolution2d
OptimizeForExclusiveConnection< PadLayer, Convolution2dLayer, pad_fold::FoldPadIntoConvolution2dImpl > FoldPadIntoConvolution2d
Definition: FoldPadIntoLayer2d.hpp:277
armnnUtils::DataLayoutIndexed
Provides access to the appropriate indexes for Channels, Height and Width based on DataLayout.
Definition: DataLayoutIndexed.hpp:17
armnn::DataType::Float32
@ Float32
armnn::Layer::GetOutputSlot
const OutputSlot & GetOutputSlot(unsigned int index=0) const override
Get the const output slot handle by slot index.
Definition: Layer.hpp:339
armnn::Pooling2dDescriptor::m_PadTop
uint32_t m_PadTop
Padding top value in the height dimension.
Definition: Descriptors.hpp:411
armnn::optimizations::pad_fold::FoldPadIntoPooling2dImpl::~FoldPadIntoPooling2dImpl
~FoldPadIntoPooling2dImpl()=default
armnn::DataType::QAsymmU8
@ QAsymmU8
armnn::DataType::QSymmS8
@ QSymmS8
armnn::optimizations::pad_fold::FoldPadIntoDepthwiseConvolution2dImpl::Run
void Run(Graph &graph, InputSlot &connection) const
Definition: FoldPadIntoLayer2d.hpp:238
armnn::OutputSlot::Connect
int Connect(InputSlot &destination)
Definition: Layer.cpp:123
ARMNN_ASSERT_MSG
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15
Optimization.hpp
armnn::Layer::GetInputSlot
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition: Layer.hpp:337
armnn::LayerWithParameters::GetParameters
const Parameters & GetParameters() const override
If the layer has a descriptor return it.
Definition: LayerWithParameters.hpp:19
armnn::DataType::QSymmS16
@ QSymmS16
armnn::DataType::BFloat16
@ BFloat16
armnn::Layer::GetName
const char * GetName() const override
Returns the name of the layer.
Definition: Layer.hpp:332
armnn::optimizations::pad_fold::FoldPadIntoPooling2dImpl
Definition: FoldPadIntoLayer2d.hpp:262
armnnUtils::DataLayoutIndexed::GetHeightIndex
unsigned int GetHeightIndex() const
Definition: DataLayoutIndexed.hpp:24
armnn::Layer
Definition: Layer.hpp:230
armnn::TensorShape
Definition: Tensor.hpp:20
armnn::OutputSlot::GetOwningLayer
Layer & GetOwningLayer() const
Definition: Layer.hpp:132
armnn::optimizations::pad_fold::TryFoldPadIntoLayer2d
bool TryFoldPadIntoLayer2d(const PadDescriptor &padDescriptor, Descriptor &layerDescriptor, const TensorInfo &tensorInfo)
Definition: FoldPadIntoLayer2d.hpp:88
armnn::DataType::Float16
@ Float16
armnn::optimizations::pad_fold::GetLowestElement
float GetLowestElement(const TensorInfo &tensorInfo)
Definition: FoldPadIntoLayer2d.hpp:26
armnn::Pooling2dDescriptor::m_DataLayout
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
Definition: Descriptors.hpp:427
armnn::OutputSlot::Disconnect
void Disconnect(InputSlot &slot)
Definition: Layer.cpp:131
armnn::Pooling2dDescriptor::m_PadBottom
uint32_t m_PadBottom
Padding bottom value in the height dimension.
Definition: Descriptors.hpp:413
armnn::Pooling2dDescriptor::m_PadRight
uint32_t m_PadRight
Padding right value in the width dimension.
Definition: Descriptors.hpp:409
armnn::optimizations::pad_fold::FoldPadIntoPooling2dImpl::Run
void Run(Graph &graph, InputSlot &connection) const
Definition: FoldPadIntoLayer2d.hpp:265
armnn::PadDescriptor
A PadDescriptor for the PadLayer.
Definition: Descriptors.hpp:1196
armnn::PaddingMethod::Exclude
@ Exclude
The padding fields don't count and are ignored.
PolymorphicDowncast.hpp
armnn::TensorInfo::IsQuantized
bool IsQuantized() const
Definition: Tensor.cpp:508
armnn::optimizations::pad_fold::IsNeutralElement
bool IsNeutralElement(const Convolution2dDescriptor &, const TensorInfo &tensorInfo, const float tensorValue)
Definition: FoldPadIntoLayer2d.hpp:56
armnn::PaddingMethod::IgnoreValue
@ IgnoreValue
The padding fields count, but are ignored.
armnn::PadDescriptor::m_PadValue
float m_PadValue
Optional value to use for padding, defaults to 0.
Definition: Descriptors.hpp:1221
armnn::optimizations::pad_fold::FoldPadIntoConvolution2dImpl::Run
void Run(Graph &graph, InputSlot &connection) const
Definition: FoldPadIntoLayer2d.hpp:211
armnn::optimizations::pad_fold::FoldPadIntoDepthwiseConvolution2dImpl::FoldPadIntoDepthwiseConvolution2dImpl
FoldPadIntoDepthwiseConvolution2dImpl()=default
armnnUtils::DataLayoutIndexed::GetWidthIndex
unsigned int GetWidthIndex() const
Definition: DataLayoutIndexed.hpp:25
armnn::optimizations::pad_fold::FoldPadIntoConvolution2dImpl::~FoldPadIntoConvolution2dImpl
~FoldPadIntoConvolution2dImpl()=default
armnn::Pooling2dDescriptor::m_PadLeft
uint32_t m_PadLeft
Padding left value in the width dimension.
Definition: Descriptors.hpp:407
armnn::TensorInfo::GetDataType
DataType GetDataType() const
Definition: Tensor.hpp:200
armnn::PoolingAlgorithm::Average
@ Average
armnn::InputSlot
Definition: Layer.hpp:42
armnn::Convolution2dDescriptor
A Convolution2dDescriptor for the Convolution2dLayer.
Definition: Descriptors.hpp:534
armnn::DataType::QAsymmS8
@ QAsymmS8
armnn::PadDescriptor::m_PadList
std::vector< std::pair< unsigned int, unsigned int > > m_PadList
Specifies the padding for input dimension.
Definition: Descriptors.hpp:1218
armnn::optimizations::pad_fold::FoldPadIntoLayer2dImpl
Layer2dT * FoldPadIntoLayer2dImpl(Graph &graph, InputSlot &connection)
Definition: FoldPadIntoLayer2d.hpp:144
armnn::InputSlot::GetConnectedOutputSlot
const OutputSlot * GetConnectedOutputSlot() const
Definition: Layer.hpp:56
armnn
Copyright (c) 2021 ARM Limited and Contributors.
Definition: 01_00_quick_start.dox:6
armnnUtils::DataLayoutIndexed::GetChannelsIndex
unsigned int GetChannelsIndex() const
Definition: DataLayoutIndexed.hpp:23
armnn::PoolingAlgorithm::Max
@ Max
armnn::optimizations::pad_fold::FoldPadIntoDepthwiseConvolution2dImpl::~FoldPadIntoDepthwiseConvolution2dImpl
~FoldPadIntoDepthwiseConvolution2dImpl()=default
armnn::LayerType::Convolution2d
@ Convolution2d
armnn::Pooling2dDescriptor
A Pooling2dDescriptor for the Pooling2dLayer.
Definition: Descriptors.hpp:371
armnn::TensorInfo::GetQuantizationOffset
int32_t GetQuantizationOffset() const
Definition: Tensor.cpp:482
armnn::DepthwiseConvolution2dDescriptor
A DepthwiseConvolution2dDescriptor for the DepthwiseConvolution2dLayer.
Definition: Descriptors.hpp:659
armnn::optimizations::pad_fold::FoldPadIntoPooling2dImpl::FoldPadIntoPooling2dImpl
FoldPadIntoPooling2dImpl()=default
DataLayoutIndexed.hpp
armnn::Graph
Definition: Graph.hpp:30
armnn::Graph::InsertNewLayer
LayerT * InsertNewLayer(InputSlot &insertBefore, Args &&... args)
Inserts a new layer between the output slot currently connected to insertBefore and insertBefore itse...
Definition: Graph.hpp:481
armnn::PadLayer
This layer represents a pad operation.
Definition: PadLayer.hpp:14
armnn::Pooling2dDescriptor::m_PoolType
PoolingAlgorithm m_PoolType
The pooling algorithm to use (Max. Average, L2).
Definition: Descriptors.hpp:405
armnn::optimizations::pad_fold::GetZeroElement
float GetZeroElement(const TensorInfo &tensorInfo)
Definition: FoldPadIntoLayer2d.hpp:21
armnn::optimizations::pad_fold::FoldPadIntoConvolution2dImpl
Definition: FoldPadIntoLayer2d.hpp:208
armnn::OptimizeForExclusiveConnection
Definition: Optimization.hpp:173