19 if (multiplier == 0.0f)
26 const double q = std::frexp(multiplier, &m_RightShift);
27 m_RightShift = -m_RightShift;
28 int64_t qFixed =
static_cast<int64_t
>(std::round(q * (1ll << 31)));
30 if (qFixed == (1ll << 31))
36 ARMNN_ASSERT(qFixed <= std::numeric_limits<int32_t>::max());
37 m_Multiplier =
static_cast<int32_t
>(qFixed);
43 int32_t x = SaturatingRoundingDoublingHighMul(rhs, m_Multiplier);
44 return RoundingDivideByPOT(x, m_RightShift);
47 int32_t QuantizedMultiplierSmallerThanOne::SaturatingRoundingDoublingHighMul(int32_t a, int32_t b)
50 if (a == b && a == std::numeric_limits<int32_t>::min())
52 return std::numeric_limits<int32_t>::max();
56 int64_t ab_64 = a_64 * b_64;
57 int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30));
58 int32_t ab_x2_high32 =
static_cast<std::int32_t
>((ab_64 + nudge) / (1ll << 31));
62 int32_t QuantizedMultiplierSmallerThanOne::RoundingDivideByPOT(int32_t x,
int exponent)
65 int32_t mask = (1 << exponent) - 1;
66 int32_t remainder = x & mask;
67 int32_t threshold = (mask >> 1) + (x < 0 ? 1 : 0);
68 return (x >> exponent) + (remainder > threshold ? 1 : 0);
80 unsigned int paddingTop,
81 unsigned int paddingLeft,
84 unsigned int xDilation,
85 unsigned int yDilation,
88 if (biasEnabled && !pBiasDecoder)
95 const unsigned int heightIndex = dataLayoutIndexed.
GetHeightIndex();
96 const unsigned int widthIndex = dataLayoutIndexed.
GetWidthIndex();
98 const unsigned int depthMultiplier = depthwise ? rFilterShape[0] : 1;
99 const unsigned int inputChannels = depthwise ? rFilterShape[1] : rFilterShape[channelsIndex];
100 const unsigned int outputChannels = depthwise ? inputChannels * depthMultiplier : rFilterShape[0];
102 const unsigned int batchSize = rOutputShape[0];
103 const unsigned int outputHeight = rOutputShape[heightIndex];
104 const unsigned int outputWidth = rOutputShape[widthIndex];
105 const unsigned int inputHeight = rInputShape[heightIndex];
106 const unsigned int inputWidth = rInputShape[widthIndex];
108 const unsigned int filterHeight = depthwise ? rFilterShape[2] : rFilterShape[heightIndex];
109 const unsigned int filterWidth = depthwise ? rFilterShape[3] : rFilterShape[widthIndex];
111 const std::vector<float> inputVec = rInputDecoder.
DecodeTensor(rInputShape);
112 const std::vector<float> filterVec = rFilterDecoder.
DecodeTensor(rFilterShape, depthMultiplier, depthwise);
115 const std::vector<float> biasVec = biasEnabled ? pBiasDecoder->
DecodeTensor(biasShape) : std::vector<float>();
117 unsigned int depthwiseMultiplierIdx = 0;
118 for (
unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)
120 for (
unsigned int cOutput = 0; cOutput < outputChannels; cOutput++)
122 for (
unsigned int yOutput = 0; yOutput < outputHeight; yOutput++)
124 for (
unsigned int xOutput = 0; xOutput < outputWidth; xOutput++)
131 for (
unsigned int cInput = 0; cInput < (depthwise ? 1 : inputChannels); cInput++)
135 depthwiseMultiplierIdx = 0;
136 cInput = cOutput / depthMultiplier;
137 depthwiseMultiplierIdx = cOutput % depthMultiplier;
140 for (
unsigned int yFilter = 0; yFilter < filterHeight; yFilter++)
142 for (
unsigned int xFilter = 0; xFilter < filterWidth; xFilter++)
145 unsigned int filterIndex = 0;
150 filterIndex = depthwiseMultiplierIdx * filterWidth * filterHeight * inputChannels +
151 cInput * filterWidth * filterHeight +
152 yFilter * filterWidth +
161 filterIndex = cOutput * filterHeight * filterWidth * inputChannels +
162 yFilter * filterWidth * inputChannels +
163 xFilter * inputChannels +
168 filterIndex = cOutput * filterWidth * filterHeight * inputChannels +
169 cInput * filterWidth * filterHeight +
170 yFilter * filterWidth +
175 unsigned int yInput = yOutput * yStride + yFilter * yDilation;
176 unsigned int xInput = xOutput * xStride + xFilter * xDilation;
181 if (yInput < paddingTop || yInput >= inputHeight + paddingTop ||
182 xInput < paddingLeft || xInput >= inputWidth + paddingLeft)
188 unsigned int inputIndex = 0;
194 inputIndex = batchIdx * inputHeight * inputWidth * inputChannels +
195 (yInput - paddingTop) * inputWidth * inputChannels +
196 (xInput - paddingLeft) * inputChannels +
201 inputIndex = batchIdx * inputWidth * inputHeight * inputChannels +
202 inputWidth * inputHeight * cInput +
203 inputWidth * (yInput - paddingTop) +
204 xInput - paddingLeft;
206 inputValue = inputVec[inputIndex];
209 sum += filterVec[filterIndex] * inputValue;
216 sum += biasVec[cOutput];
222 outIdx = batchIdx * outputHeight * outputWidth * outputChannels +
223 yOutput * outputWidth * outputChannels +
224 xOutput * outputChannels +
229 outIdx = batchIdx * outputHeight * outputWidth * outputChannels +
230 cOutput * outputHeight * outputWidth +
231 yOutput * outputWidth +
235 rOutputEncoder[outIdx];
236 rOutputEncoder.
Set(sum);
unsigned int GetWidthIndex() const
int32_t operator*(int32_t rhs) const
The implementation of this function is adapted from Android NN's MultiplyByQuantizedMultiplierSmaller...
virtual void Set(IType right)=0
Copyright (c) 2021 ARM Limited and Contributors.
virtual std::vector< float > DecodeTensor(const TensorShape &tensorShape, const unsigned int channelMultiplier=1, bool isDepthwise=false)=0
unsigned int GetHeightIndex() const
QuantizedMultiplierSmallerThanOne(float multiplier)
Constructs a QuantizedMultiplierSmallerThanOne which will multiply by the given multiplier.
void Convolve(const TensorShape &rInputShape, Decoder< float > &rInputDecoder, const TensorShape &rOutputShape, Encoder< float > &rOutputEncoder, const TensorShape &rFilterShape, Decoder< float > &rFilterDecoder, bool biasEnabled, Decoder< float > *pBiasDecoder, DataLayout dataLayout, unsigned int paddingTop, unsigned int paddingLeft, unsigned int xStride, unsigned int yStride, unsigned int xDilation, unsigned int yDilation, bool depthwise)
Provides access to the appropriate indexes for Channels, Height and Width based on DataLayout...
#define ARMNN_ASSERT(COND)
armnn::DataLayout GetDataLayout() const
unsigned int GetChannelsIndex() const