ArmNN
 24.08
ConvImpl.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017, 2024 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "ConvImpl.hpp"
7 
8 #include <cmath>
9 #include <limits>
10 
11 namespace armnn
12 {
13 
15 {
16  ARMNN_THROW_INVALIDARG_MSG_IF_FALSE(multiplier >= 0.0f && multiplier < 1.0f,
17  "QuantizedMultiplierSmallerThanOne: multiplier must be between 0.0f and 1.0f.");
18  if (multiplier == 0.0f)
19  {
20  m_Multiplier = 0;
21  m_RightShift = 0;
22  }
23  else
24  {
25  const double q = std::frexp(multiplier, &m_RightShift);
26  m_RightShift = -m_RightShift;
27  int64_t qFixed = static_cast<int64_t>(::round(q * (1ll << 31)));
28  if (qFixed == (1ll << 31))
29  {
30  qFixed /= 2;
31  --m_RightShift;
32  }
33  m_Multiplier = static_cast<int32_t>(qFixed);
34  }
35 }
36 
38 {
39  int32_t x = SaturatingRoundingDoublingHighMul(rhs, m_Multiplier);
40  return RoundingDivideByPOT(x, m_RightShift);
41 }
42 
43 int32_t QuantizedMultiplierSmallerThanOne::SaturatingRoundingDoublingHighMul(int32_t a, int32_t b)
44 {
45  // Check for overflow.
46  if (a == b && a == std::numeric_limits<int32_t>::min())
47  {
48  return std::numeric_limits<int32_t>::max();
49  }
50  int64_t a_64(a);
51  int64_t b_64(b);
52  int64_t ab_64 = a_64 * b_64;
53  int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30));
54  int32_t ab_x2_high32 = static_cast<std::int32_t>((ab_64 + nudge) / (1ll << 31));
55  return ab_x2_high32;
56 }
57 
58 int32_t QuantizedMultiplierSmallerThanOne::RoundingDivideByPOT(int32_t x, int exponent)
59 {
60  ARMNN_THROW_INVALIDARG_MSG_IF_FALSE(exponent >= 0 && exponent <= 31,
61  "RoundingDivideByPOT: exponent must be between 0 and 31.");
62  int32_t mask = (1 << exponent) - 1;
63  int32_t remainder = x & mask;
64  int32_t threshold = (mask >> 1) + (x < 0 ? 1 : 0);
65  return (x >> exponent) + (remainder > threshold ? 1 : 0);
66 }
67 
68 void Convolve(const TensorShape& rInputShape,
69  Decoder<float>& rInputDecoder,
70  const TensorShape& rOutputShape,
71  Encoder<float>& rOutputEncoder,
72  const TensorShape& rFilterShape,
73  Decoder<float>& rFilterDecoder,
74  bool biasEnabled,
75  Decoder<float>* pBiasDecoder,
76  DataLayout dataLayout,
77  unsigned int paddingTop,
78  unsigned int paddingLeft,
79  unsigned int xStride,
80  unsigned int yStride,
81  unsigned int xDilation,
82  unsigned int yDilation,
83  bool depthwise)
84 {
85  if (biasEnabled && !pBiasDecoder)
86  {
87  throw InvalidArgumentException("Bias is enabled but the bias data is invalid");
88  }
89  const armnnUtils::DataLayoutIndexed dataLayoutIndexed(dataLayout);
90 
91  const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
92  const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex();
93  const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex();
94 
95  // Weights layout:
96  // Conv2d: [O,H,W,I]
97  // Depthwise: [1,H,W,O]
98  const unsigned int inputChannels = rInputShape[channelsIndex];
99  const unsigned int outputChannels = rOutputShape[channelsIndex];
100  const unsigned int depthMultiplier = depthwise ? outputChannels/inputChannels : 1;
101 
102  const unsigned int batchSize = rOutputShape[0];
103  const unsigned int outputHeight = rOutputShape[heightIndex];
104  const unsigned int outputWidth = rOutputShape[widthIndex];
105  const unsigned int inputHeight = rInputShape[heightIndex];
106  const unsigned int inputWidth = rInputShape[widthIndex];
107 
108  const unsigned int filterHeight = depthwise ? rFilterShape[1] : rFilterShape[heightIndex];
109  const unsigned int filterWidth = depthwise ? rFilterShape[2] : rFilterShape[widthIndex];
110 
111  const std::vector<float> inputVec = rInputDecoder.DecodeTensor(rInputShape);
112  const std::vector<float> filterVec = rFilterDecoder.DecodeTensor(rFilterShape, depthwise);
113 
114  const TensorShape biasShape{outputChannels};
115  const std::vector<float> biasVec = biasEnabled ? pBiasDecoder->DecodeTensor(biasShape) : std::vector<float>();
116 
117  for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)
118  {
119  for (unsigned int cOutput = 0; cOutput < outputChannels; cOutput++)
120  {
121  for (unsigned int yOutput = 0; yOutput < outputHeight; yOutput++)
122  {
123  for (unsigned int xOutput = 0; xOutput < outputWidth; xOutput++)
124  {
125  // This loop goes over each output element.
126  float sum = 0.0f;
127 
128  // For depthwise, each output channel corresponds to exactly one input channel.
129  // For normal, must loop over each input channel.
130  for (unsigned int cInput = 0; cInput < (depthwise ? 1 : inputChannels); cInput++)
131  {
132  for (unsigned int yFilter = 0; yFilter < filterHeight; yFilter++)
133  {
134  for (unsigned int xFilter = 0; xFilter < filterWidth; xFilter++)
135  {
136  // This loop goes over each input element for each output element.
137  unsigned int filterIndex = 0;
138 
139  // Since dimensionality of kernel depends on depthwiseness, so does index.
140  if (depthwise)
141  {
142  cInput = cOutput / depthMultiplier;
143  // filterDepth = outputChannels;
144  filterIndex = xFilter * outputChannels + cOutput +
145  yFilter * filterWidth * outputChannels;
146  }
147  else
148  {
149  // Keep this implementation, as using DataLayoutIndexed::GetIndex causes great
150  // performance regression.
151  if (dataLayoutIndexed.GetDataLayout() == DataLayout::NHWC)
152  {
153  filterIndex = cOutput * filterHeight * filterWidth * inputChannels +
154  yFilter * filterWidth * inputChannels +
155  xFilter * inputChannels +
156  cInput;
157  }
158  else
159  {
160  filterIndex = cOutput * filterWidth * filterHeight * inputChannels +
161  cInput * filterWidth * filterHeight +
162  yFilter * filterWidth +
163  xFilter;
164  }
165  }
166 
167  unsigned int yInput = yOutput * yStride + yFilter * yDilation;
168  unsigned int xInput = xOutput * xStride + xFilter * xDilation;
169 
170  float inputValue;
171 
172  // Check if we're in the padding.
173  if (yInput < paddingTop || yInput >= inputHeight + paddingTop ||
174  xInput < paddingLeft || xInput >= inputWidth + paddingLeft)
175  {
176  inputValue = 0.0f;
177  }
178  else
179  {
180  unsigned int inputIndex = 0;
181 
182  // Keep this implementation, as using DataLayoutIndexed::GetIndex causes great
183  // performance regression.
184  if (dataLayoutIndexed.GetDataLayout() == DataLayout::NHWC)
185  {
186  inputIndex = batchIdx * inputHeight * inputWidth * inputChannels +
187  (yInput - paddingTop) * inputWidth * inputChannels +
188  (xInput - paddingLeft) * inputChannels +
189  cInput;
190  }
191  else
192  {
193  inputIndex = batchIdx * inputWidth * inputHeight * inputChannels +
194  inputWidth * inputHeight * cInput +
195  inputWidth * (yInput - paddingTop) +
196  xInput - paddingLeft;
197  }
198  inputValue = inputVec[inputIndex];
199  }
200 
201  sum += filterVec[filterIndex] * inputValue;
202  }
203  }
204  }
205 
206  if (biasEnabled)
207  {
208  sum += biasVec[cOutput];
209  }
210 
211  unsigned int outIdx;
212  if (dataLayoutIndexed.GetDataLayout() == DataLayout::NHWC)
213  {
214  outIdx = batchIdx * outputHeight * outputWidth * outputChannels +
215  yOutput * outputWidth * outputChannels +
216  xOutput * outputChannels +
217  cOutput;
218  }
219  else
220  {
221  outIdx = batchIdx * outputHeight * outputWidth * outputChannels +
222  cOutput * outputHeight * outputWidth +
223  yOutput * outputWidth +
224  xOutput;
225  }
226 
227  rOutputEncoder[outIdx];
228  rOutputEncoder.Set(sum);
229  }
230  }
231  }
232  }
233 }
234 
235 } // namespace armnn
armnn::Decoder< float >
armnn::Encoder::Set
virtual void Set(IType right)=0
armnn::DataLayout
DataLayout
Definition: Types.hpp:62
armnn::DataLayout::NHWC
@ NHWC
armnnUtils::DataLayoutIndexed
Provides access to the appropriate indexes for Channels, Height and Width based on DataLayout.
Definition: DataLayoutIndexed.hpp:17
armnnUtils::DataLayoutIndexed::GetDataLayout
armnn::DataLayout GetDataLayout() const
Definition: DataLayoutIndexed.hpp:22
ConvImpl.hpp
armnn::QuantizedMultiplierSmallerThanOne::QuantizedMultiplierSmallerThanOne
QuantizedMultiplierSmallerThanOne(float multiplier)
Constructs a QuantizedMultiplierSmallerThanOne which will multiply by the given multiplier.
Definition: ConvImpl.cpp:14
armnnUtils::DataLayoutIndexed::GetHeightIndex
unsigned int GetHeightIndex() const
Definition: DataLayoutIndexed.hpp:24
armnn::QuantizedMultiplierSmallerThanOne::operator*
int32_t operator*(int32_t rhs) const
The implementation of this function is adapted from Android NN's MultiplyByQuantizedMultiplierSmaller...
Definition: ConvImpl.cpp:37
armnn::TensorShape
Definition: Tensor.hpp:20
armnn::Encoder< float >
armnn::InvalidArgumentException
Definition: Exceptions.hpp:80
armnn::Decoder::DecodeTensor
virtual std::vector< float > DecodeTensor(const TensorShape &tensorShape, bool isDepthwise=false)=0
armnnUtils::DataLayoutIndexed::GetWidthIndex
unsigned int GetWidthIndex() const
Definition: DataLayoutIndexed.hpp:25
armnn
Copyright (c) 2021 ARM Limited and Contributors.
Definition: 01_00_quick_start.dox:6
armnnUtils::DataLayoutIndexed::GetChannelsIndex
unsigned int GetChannelsIndex() const
Definition: DataLayoutIndexed.hpp:23
armnn::Convolve
void Convolve(const TensorShape &rInputShape, Decoder< float > &rInputDecoder, const TensorShape &rOutputShape, Encoder< float > &rOutputEncoder, const TensorShape &rFilterShape, Decoder< float > &rFilterDecoder, bool biasEnabled, Decoder< float > *pBiasDecoder, DataLayout dataLayout, unsigned int paddingTop, unsigned int paddingLeft, unsigned int xStride, unsigned int yStride, unsigned int xDilation, unsigned int yDilation, bool depthwise)
Definition: ConvImpl.cpp:68
ARMNN_THROW_INVALIDARG_MSG_IF_FALSE
#define ARMNN_THROW_INVALIDARG_MSG_IF_FALSE(_cond, _str)
Definition: Exceptions.hpp:210