19 if (multiplier == 0.0f)
26 const double q = std::frexp(multiplier, &m_RightShift);
27 m_RightShift = -m_RightShift;
28 int64_t qFixed =
static_cast<int64_t
>(::round(q * (1ll << 31)));
30 if (qFixed == (1ll << 31))
36 ARMNN_ASSERT(qFixed <= std::numeric_limits<int32_t>::max());
37 m_Multiplier =
static_cast<int32_t
>(qFixed);
43 int32_t x = SaturatingRoundingDoublingHighMul(rhs, m_Multiplier);
44 return RoundingDivideByPOT(x, m_RightShift);
47 int32_t QuantizedMultiplierSmallerThanOne::SaturatingRoundingDoublingHighMul(int32_t a, int32_t b)
50 if (a == b && a == std::numeric_limits<int32_t>::min())
52 return std::numeric_limits<int32_t>::max();
56 int64_t ab_64 = a_64 * b_64;
57 int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30));
58 int32_t ab_x2_high32 =
static_cast<std::int32_t
>((ab_64 + nudge) / (1ll << 31));
62 int32_t QuantizedMultiplierSmallerThanOne::RoundingDivideByPOT(int32_t x,
int exponent)
65 int32_t mask = (1 << exponent) - 1;
66 int32_t remainder = x & mask;
67 int32_t threshold = (mask >> 1) + (x < 0 ? 1 : 0);
68 return (x >> exponent) + (remainder > threshold ? 1 : 0);
80 unsigned int paddingTop,
81 unsigned int paddingLeft,
84 unsigned int xDilation,
85 unsigned int yDilation,
88 if (biasEnabled && !pBiasDecoder)
95 const unsigned int heightIndex = dataLayoutIndexed.
GetHeightIndex();
96 const unsigned int widthIndex = dataLayoutIndexed.
GetWidthIndex();
101 const unsigned int inputChannels = rInputShape[channelsIndex];
102 const unsigned int outputChannels = rOutputShape[channelsIndex];
103 const unsigned int depthMultiplier = depthwise ? outputChannels/inputChannels : 1;
105 const unsigned int batchSize = rOutputShape[0];
106 const unsigned int outputHeight = rOutputShape[heightIndex];
107 const unsigned int outputWidth = rOutputShape[widthIndex];
108 const unsigned int inputHeight = rInputShape[heightIndex];
109 const unsigned int inputWidth = rInputShape[widthIndex];
111 const unsigned int filterHeight = depthwise ? rFilterShape[1] : rFilterShape[heightIndex];
112 const unsigned int filterWidth = depthwise ? rFilterShape[2] : rFilterShape[widthIndex];
114 const std::vector<float> inputVec = rInputDecoder.
DecodeTensor(rInputShape);
115 const std::vector<float> filterVec = rFilterDecoder.
DecodeTensor(rFilterShape, depthwise);
118 const std::vector<float> biasVec = biasEnabled ? pBiasDecoder->
DecodeTensor(biasShape) : std::vector<float>();
120 for (
unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)
122 for (
unsigned int cOutput = 0; cOutput < outputChannels; cOutput++)
124 for (
unsigned int yOutput = 0; yOutput < outputHeight; yOutput++)
126 for (
unsigned int xOutput = 0; xOutput < outputWidth; xOutput++)
133 for (
unsigned int cInput = 0; cInput < (depthwise ? 1 : inputChannels); cInput++)
135 for (
unsigned int yFilter = 0; yFilter < filterHeight; yFilter++)
137 for (
unsigned int xFilter = 0; xFilter < filterWidth; xFilter++)
140 unsigned int filterIndex = 0;
145 cInput = cOutput / depthMultiplier;
147 filterIndex = xFilter * outputChannels + cOutput +
148 yFilter * filterWidth * outputChannels;
156 filterIndex = cOutput * filterHeight * filterWidth * inputChannels +
157 yFilter * filterWidth * inputChannels +
158 xFilter * inputChannels +
163 filterIndex = cOutput * filterWidth * filterHeight * inputChannels +
164 cInput * filterWidth * filterHeight +
165 yFilter * filterWidth +
170 unsigned int yInput = yOutput * yStride + yFilter * yDilation;
171 unsigned int xInput = xOutput * xStride + xFilter * xDilation;
176 if (yInput < paddingTop || yInput >= inputHeight + paddingTop ||
177 xInput < paddingLeft || xInput >= inputWidth + paddingLeft)
183 unsigned int inputIndex = 0;
189 inputIndex = batchIdx * inputHeight * inputWidth * inputChannels +
190 (yInput - paddingTop) * inputWidth * inputChannels +
191 (xInput - paddingLeft) * inputChannels +
196 inputIndex = batchIdx * inputWidth * inputHeight * inputChannels +
197 inputWidth * inputHeight * cInput +
198 inputWidth * (yInput - paddingTop) +
199 xInput - paddingLeft;
201 inputValue = inputVec[inputIndex];
204 sum += filterVec[filterIndex] * inputValue;
211 sum += biasVec[cOutput];
217 outIdx = batchIdx * outputHeight * outputWidth * outputChannels +
218 yOutput * outputWidth * outputChannels +
219 xOutput * outputChannels +
224 outIdx = batchIdx * outputHeight * outputWidth * outputChannels +
225 cOutput * outputHeight * outputWidth +
226 yOutput * outputWidth +
230 rOutputEncoder[outIdx];
231 rOutputEncoder.
Set(sum);