ArmNN
 25.11
Loading...
Searching...
No Matches
ConvImpl.cpp
Go to the documentation of this file.
1//
2// Copyright © 2017, 2024 Arm Ltd. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6#include "ConvImpl.hpp"
7
8#include <cmath>
9#include <limits>
10
11namespace armnn
12{
13
15{
16 ARMNN_THROW_INVALIDARG_MSG_IF_FALSE(multiplier >= 0.0f && multiplier < 1.0f,
17 "QuantizedMultiplierSmallerThanOne: multiplier must be between 0.0f and 1.0f.");
18 if (multiplier == 0.0f)
19 {
20 m_Multiplier = 0;
21 m_RightShift = 0;
22 }
23 else
24 {
25 const double q = std::frexp(multiplier, &m_RightShift);
26 m_RightShift = -m_RightShift;
27 int64_t qFixed = static_cast<int64_t>(::round(q * (1ll << 31)));
28 if (qFixed == (1ll << 31))
29 {
30 qFixed /= 2;
31 --m_RightShift;
32 }
33 m_Multiplier = static_cast<int32_t>(qFixed);
34 }
35}
36
38{
39 int32_t x = SaturatingRoundingDoublingHighMul(rhs, m_Multiplier);
40 return RoundingDivideByPOT(x, m_RightShift);
41}
42
43int32_t QuantizedMultiplierSmallerThanOne::SaturatingRoundingDoublingHighMul(int32_t a, int32_t b)
44{
45 // Check for overflow.
46 if (a == b && a == std::numeric_limits<int32_t>::min())
47 {
48 return std::numeric_limits<int32_t>::max();
49 }
50 int64_t a_64(a);
51 int64_t b_64(b);
52 int64_t ab_64 = a_64 * b_64;
53 int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30));
54 int32_t ab_x2_high32 = static_cast<std::int32_t>((ab_64 + nudge) / (1ll << 31));
55 return ab_x2_high32;
56}
57
58int32_t QuantizedMultiplierSmallerThanOne::RoundingDivideByPOT(int32_t x, int exponent)
59{
60 ARMNN_THROW_INVALIDARG_MSG_IF_FALSE(exponent >= 0 && exponent <= 31,
61 "RoundingDivideByPOT: exponent must be between 0 and 31.");
62 int32_t mask = (1 << exponent) - 1;
63 int32_t remainder = x & mask;
64 int32_t threshold = (mask >> 1) + (x < 0 ? 1 : 0);
65 return (x >> exponent) + (remainder > threshold ? 1 : 0);
66}
67
68void Convolve(const TensorShape& rInputShape,
69 Decoder<float>& rInputDecoder,
70 const TensorShape& rOutputShape,
71 Encoder<float>& rOutputEncoder,
72 const TensorShape& rFilterShape,
73 Decoder<float>& rFilterDecoder,
74 bool biasEnabled,
75 Decoder<float>* pBiasDecoder,
76 DataLayout dataLayout,
77 unsigned int paddingTop,
78 unsigned int paddingLeft,
79 unsigned int xStride,
80 unsigned int yStride,
81 unsigned int xDilation,
82 unsigned int yDilation,
83 bool depthwise)
84{
85 if (biasEnabled && !pBiasDecoder)
86 {
87 throw InvalidArgumentException("Bias is enabled but the bias data is invalid");
88 }
89 const armnnUtils::DataLayoutIndexed dataLayoutIndexed(dataLayout);
90
91 const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
92 const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex();
93 const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex();
94
95 // Weights layout:
96 // Conv2d: [O,H,W,I]
97 // Depthwise: [1,H,W,O]
98 const unsigned int inputChannels = rInputShape[channelsIndex];
99 const unsigned int outputChannels = rOutputShape[channelsIndex];
100 const unsigned int depthMultiplier = depthwise ? outputChannels/inputChannels : 1;
101
102 const unsigned int batchSize = rOutputShape[0];
103 const unsigned int outputHeight = rOutputShape[heightIndex];
104 const unsigned int outputWidth = rOutputShape[widthIndex];
105 const unsigned int inputHeight = rInputShape[heightIndex];
106 const unsigned int inputWidth = rInputShape[widthIndex];
107
108 const unsigned int filterHeight = depthwise ? rFilterShape[1] : rFilterShape[heightIndex];
109 const unsigned int filterWidth = depthwise ? rFilterShape[2] : rFilterShape[widthIndex];
110
111 const std::vector<float> inputVec = rInputDecoder.DecodeTensor(rInputShape);
112 const std::vector<float> filterVec = rFilterDecoder.DecodeTensor(rFilterShape, depthwise);
113
114 const TensorShape biasShape{outputChannels};
115 const std::vector<float> biasVec = biasEnabled ? pBiasDecoder->DecodeTensor(biasShape) : std::vector<float>();
116
117 for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)
118 {
119 for (unsigned int cOutput = 0; cOutput < outputChannels; cOutput++)
120 {
121 for (unsigned int yOutput = 0; yOutput < outputHeight; yOutput++)
122 {
123 for (unsigned int xOutput = 0; xOutput < outputWidth; xOutput++)
124 {
125 // This loop goes over each output element.
126 float sum = 0.0f;
127
128 // For depthwise, each output channel corresponds to exactly one input channel.
129 // For normal, must loop over each input channel.
130 for (unsigned int cInput = 0; cInput < (depthwise ? 1 : inputChannels); cInput++)
131 {
132 for (unsigned int yFilter = 0; yFilter < filterHeight; yFilter++)
133 {
134 for (unsigned int xFilter = 0; xFilter < filterWidth; xFilter++)
135 {
136 // This loop goes over each input element for each output element.
137 unsigned int filterIndex = 0;
138
139 // Since dimensionality of kernel depends on depthwiseness, so does index.
140 if (depthwise)
141 {
142 cInput = cOutput / depthMultiplier;
143 // filterDepth = outputChannels;
144 filterIndex = xFilter * outputChannels + cOutput +
145 yFilter * filterWidth * outputChannels;
146 }
147 else
148 {
149 // Keep this implementation, as using DataLayoutIndexed::GetIndex causes great
150 // performance regression.
151 if (dataLayoutIndexed.GetDataLayout() == DataLayout::NHWC)
152 {
153 filterIndex = cOutput * filterHeight * filterWidth * inputChannels +
154 yFilter * filterWidth * inputChannels +
155 xFilter * inputChannels +
156 cInput;
157 }
158 else
159 {
160 filterIndex = cOutput * filterWidth * filterHeight * inputChannels +
161 cInput * filterWidth * filterHeight +
162 yFilter * filterWidth +
163 xFilter;
164 }
165 }
166
167 unsigned int yInput = yOutput * yStride + yFilter * yDilation;
168 unsigned int xInput = xOutput * xStride + xFilter * xDilation;
169
170 float inputValue;
171
172 // Check if we're in the padding.
173 if (yInput < paddingTop || yInput >= inputHeight + paddingTop ||
174 xInput < paddingLeft || xInput >= inputWidth + paddingLeft)
175 {
176 inputValue = 0.0f;
177 }
178 else
179 {
180 unsigned int inputIndex = 0;
181
182 // Keep this implementation, as using DataLayoutIndexed::GetIndex causes great
183 // performance regression.
184 if (dataLayoutIndexed.GetDataLayout() == DataLayout::NHWC)
185 {
186 inputIndex = batchIdx * inputHeight * inputWidth * inputChannels +
187 (yInput - paddingTop) * inputWidth * inputChannels +
188 (xInput - paddingLeft) * inputChannels +
189 cInput;
190 }
191 else
192 {
193 inputIndex = batchIdx * inputWidth * inputHeight * inputChannels +
194 inputWidth * inputHeight * cInput +
195 inputWidth * (yInput - paddingTop) +
196 xInput - paddingLeft;
197 }
198 inputValue = inputVec[inputIndex];
199 }
200
201 sum += filterVec[filterIndex] * inputValue;
202 }
203 }
204 }
205
206 if (biasEnabled)
207 {
208 sum += biasVec[cOutput];
209 }
210
211 unsigned int outIdx;
212 if (dataLayoutIndexed.GetDataLayout() == DataLayout::NHWC)
213 {
214 outIdx = batchIdx * outputHeight * outputWidth * outputChannels +
215 yOutput * outputWidth * outputChannels +
216 xOutput * outputChannels +
217 cOutput;
218 }
219 else
220 {
221 outIdx = batchIdx * outputHeight * outputWidth * outputChannels +
222 cOutput * outputHeight * outputWidth +
223 yOutput * outputWidth +
224 xOutput;
225 }
226
227 rOutputEncoder[outIdx];
228 rOutputEncoder.Set(sum);
229 }
230 }
231 }
232 }
233}
234
235} // namespace armnn
#define ARMNN_THROW_INVALIDARG_MSG_IF_FALSE(_cond, _str)
virtual std::vector< float > DecodeTensor(const TensorShape &tensorShape, bool isDepthwise=false)=0
virtual void Set(IType right)=0
Provides access to the appropriate indexes for Channels, Height and Width based on DataLayout.
unsigned int GetHeightIndex() const
armnn::DataLayout GetDataLayout() const
unsigned int GetChannelsIndex() const
Copyright (c) 2021 ARM Limited and Contributors.
DataLayout
Definition Types.hpp:63
void Convolve(const TensorShape &rInputShape, Decoder< float > &rInputDecoder, const TensorShape &rOutputShape, Encoder< float > &rOutputEncoder, const TensorShape &rFilterShape, Decoder< float > &rFilterDecoder, bool biasEnabled, Decoder< float > *pBiasDecoder, DataLayout dataLayout, unsigned int paddingTop, unsigned int paddingLeft, unsigned int xStride, unsigned int yStride, unsigned int xDilation, unsigned int yDilation, bool depthwise)
Definition ConvImpl.cpp:68
QuantizedMultiplierSmallerThanOne(float multiplier)
Constructs a QuantizedMultiplierSmallerThanOne which will multiply by the given multiplier.
Definition ConvImpl.cpp:14
int32_t operator*(int32_t rhs) const
The implementation of this function is adapted from Android NN's MultiplyByQuantizedMultiplierSmaller...
Definition ConvImpl.cpp:37