19 if (multiplier == 0.0f)
26 const double q = std::frexp(multiplier, &m_RightShift);
27 m_RightShift = -m_RightShift;
28 int64_t qFixed =
static_cast<int64_t
>(std::round(q * (1ll << 31)));
30 if (qFixed == (1ll << 31))
36 ARMNN_ASSERT(qFixed <= std::numeric_limits<int32_t>::max());
37 m_Multiplier =
static_cast<int32_t
>(qFixed);
43 int32_t x = SaturatingRoundingDoublingHighMul(rhs, m_Multiplier);
44 return RoundingDivideByPOT(x, m_RightShift);
47 int32_t QuantizedMultiplierSmallerThanOne::SaturatingRoundingDoublingHighMul(int32_t a, int32_t b)
50 if (a == b && a == std::numeric_limits<int32_t>::min())
52 return std::numeric_limits<int32_t>::max();
56 int64_t ab_64 = a_64 * b_64;
57 int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30));
58 int32_t ab_x2_high32 =
static_cast<std::int32_t
>((ab_64 + nudge) / (1ll << 31));
62 int32_t QuantizedMultiplierSmallerThanOne::RoundingDivideByPOT(int32_t x,
int exponent)
65 int32_t mask = (1 << exponent) - 1;
66 int32_t remainder = x & mask;
67 int32_t threshold = (mask >> 1) + (x < 0 ? 1 : 0);
68 return (x >> exponent) + (remainder > threshold ? 1 : 0);
80 unsigned int paddingTop,
81 unsigned int paddingLeft,
84 unsigned int xDilation,
85 unsigned int yDilation,
88 if (biasEnabled && !pBiasDecoder)
95 const unsigned int heightIndex = dataLayoutIndexed.
GetHeightIndex();
96 const unsigned int widthIndex = dataLayoutIndexed.
GetWidthIndex();
101 const unsigned int inputChannels = rInputShape[channelsIndex];
102 const unsigned int outputChannels = rOutputShape[channelsIndex];
103 const unsigned int depthMultiplier = depthwise ? outputChannels/inputChannels : 1;
105 const unsigned int batchSize = rOutputShape[0];
106 const unsigned int outputHeight = rOutputShape[heightIndex];
107 const unsigned int outputWidth = rOutputShape[widthIndex];
108 const unsigned int inputHeight = rInputShape[heightIndex];
109 const unsigned int inputWidth = rInputShape[widthIndex];
111 const unsigned int filterHeight = depthwise ? rFilterShape[1] : rFilterShape[heightIndex];
112 const unsigned int filterWidth = depthwise ? rFilterShape[2] : rFilterShape[widthIndex];
114 const std::vector<float> inputVec = rInputDecoder.
DecodeTensor(rInputShape);
115 const std::vector<float> filterVec = rFilterDecoder.
DecodeTensor(rFilterShape, depthwise);
118 const std::vector<float> biasVec = biasEnabled ? pBiasDecoder->
DecodeTensor(biasShape) : std::vector<float>();
120 for (
unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)
122 for (
unsigned int cOutput = 0; cOutput < outputChannels; cOutput++)
124 for (
unsigned int yOutput = 0; yOutput < outputHeight; yOutput++)
126 for (
unsigned int xOutput = 0; xOutput < outputWidth; xOutput++)
133 for (
unsigned int cInput = 0; cInput < (depthwise ? 1 : inputChannels); cInput++)
135 for (
unsigned int yFilter = 0; yFilter < filterHeight; yFilter++)
137 for (
unsigned int xFilter = 0; xFilter < filterWidth; xFilter++)
140 unsigned int filterIndex = 0;
145 cInput = cOutput / depthMultiplier;
147 filterIndex = xFilter * outputChannels + cOutput +
148 yFilter * filterWidth * outputChannels;
156 filterIndex = cOutput * filterHeight * filterWidth * inputChannels +
157 yFilter * filterWidth * inputChannels +
158 xFilter * inputChannels +
163 filterIndex = cOutput * filterWidth * filterHeight * inputChannels +
164 cInput * filterWidth * filterHeight +
165 yFilter * filterWidth +
170 unsigned int yInput = yOutput * yStride + yFilter * yDilation;
171 unsigned int xInput = xOutput * xStride + xFilter * xDilation;
176 if (yInput < paddingTop || yInput >= inputHeight + paddingTop ||
177 xInput < paddingLeft || xInput >= inputWidth + paddingLeft)
183 unsigned int inputIndex = 0;
189 inputIndex = batchIdx * inputHeight * inputWidth * inputChannels +
190 (yInput - paddingTop) * inputWidth * inputChannels +
191 (xInput - paddingLeft) * inputChannels +
196 inputIndex = batchIdx * inputWidth * inputHeight * inputChannels +
197 inputWidth * inputHeight * cInput +
198 inputWidth * (yInput - paddingTop) +
199 xInput - paddingLeft;
201 inputValue = inputVec[inputIndex];
204 sum += filterVec[filterIndex] * inputValue;
211 sum += biasVec[cOutput];
217 outIdx = batchIdx * outputHeight * outputWidth * outputChannels +
218 yOutput * outputWidth * outputChannels +
219 xOutput * outputChannels +
224 outIdx = batchIdx * outputHeight * outputWidth * outputChannels +
225 cOutput * outputHeight * outputWidth +
226 yOutput * outputWidth +
230 rOutputEncoder[outIdx];
231 rOutputEncoder.
Set(sum);
unsigned int GetWidthIndex() const
int32_t operator*(int32_t rhs) const
The implementation of this function is adapted from Android NN's MultiplyByQuantizedMultiplierSmaller...
virtual std::vector< float > DecodeTensor(const TensorShape &tensorShape, bool isDepthwise=false)=0
virtual void Set(IType right)=0
Copyright (c) 2021 ARM Limited and Contributors.
unsigned int GetHeightIndex() const
QuantizedMultiplierSmallerThanOne(float multiplier)
Constructs a QuantizedMultiplierSmallerThanOne which will multiply by the given multiplier.
void Convolve(const TensorShape &rInputShape, Decoder< float > &rInputDecoder, const TensorShape &rOutputShape, Encoder< float > &rOutputEncoder, const TensorShape &rFilterShape, Decoder< float > &rFilterDecoder, bool biasEnabled, Decoder< float > *pBiasDecoder, DataLayout dataLayout, unsigned int paddingTop, unsigned int paddingLeft, unsigned int xStride, unsigned int yStride, unsigned int xDilation, unsigned int yDilation, bool depthwise)
Provides access to the appropriate indexes for Channels, Height and Width based on DataLayout...
#define ARMNN_ASSERT(COND)
armnn::DataLayout GetDataLayout() const
unsigned int GetChannelsIndex() const