17 "QuantizedMultiplierSmallerThanOne: multiplier must be between 0.0f and 1.0f.");
18 if (multiplier == 0.0f)
25 const double q = std::frexp(multiplier, &m_RightShift);
26 m_RightShift = -m_RightShift;
27 int64_t qFixed =
static_cast<int64_t
>(::round(q * (1ll << 31)));
28 if (qFixed == (1ll << 31))
33 m_Multiplier =
static_cast<int32_t
>(qFixed);
39 int32_t x = SaturatingRoundingDoublingHighMul(rhs, m_Multiplier);
40 return RoundingDivideByPOT(x, m_RightShift);
43 int32_t QuantizedMultiplierSmallerThanOne::SaturatingRoundingDoublingHighMul(int32_t a, int32_t b)
46 if (a == b && a == std::numeric_limits<int32_t>::min())
48 return std::numeric_limits<int32_t>::max();
52 int64_t ab_64 = a_64 * b_64;
53 int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30));
54 int32_t ab_x2_high32 =
static_cast<std::int32_t
>((ab_64 + nudge) / (1ll << 31));
58 int32_t QuantizedMultiplierSmallerThanOne::RoundingDivideByPOT(int32_t x,
int exponent)
61 "RoundingDivideByPOT: exponent must be between 0 and 31.");
62 int32_t mask = (1 << exponent) - 1;
63 int32_t remainder = x & mask;
64 int32_t threshold = (mask >> 1) + (x < 0 ? 1 : 0);
65 return (x >> exponent) + (remainder > threshold ? 1 : 0);
77 unsigned int paddingTop,
78 unsigned int paddingLeft,
81 unsigned int xDilation,
82 unsigned int yDilation,
85 if (biasEnabled && !pBiasDecoder)
92 const unsigned int heightIndex = dataLayoutIndexed.
GetHeightIndex();
93 const unsigned int widthIndex = dataLayoutIndexed.
GetWidthIndex();
98 const unsigned int inputChannels = rInputShape[channelsIndex];
99 const unsigned int outputChannels = rOutputShape[channelsIndex];
100 const unsigned int depthMultiplier = depthwise ? outputChannels/inputChannels : 1;
102 const unsigned int batchSize = rOutputShape[0];
103 const unsigned int outputHeight = rOutputShape[heightIndex];
104 const unsigned int outputWidth = rOutputShape[widthIndex];
105 const unsigned int inputHeight = rInputShape[heightIndex];
106 const unsigned int inputWidth = rInputShape[widthIndex];
108 const unsigned int filterHeight = depthwise ? rFilterShape[1] : rFilterShape[heightIndex];
109 const unsigned int filterWidth = depthwise ? rFilterShape[2] : rFilterShape[widthIndex];
111 const std::vector<float> inputVec = rInputDecoder.
DecodeTensor(rInputShape);
112 const std::vector<float> filterVec = rFilterDecoder.
DecodeTensor(rFilterShape, depthwise);
115 const std::vector<float> biasVec = biasEnabled ? pBiasDecoder->
DecodeTensor(biasShape) : std::vector<float>();
117 for (
unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)
119 for (
unsigned int cOutput = 0; cOutput < outputChannels; cOutput++)
121 for (
unsigned int yOutput = 0; yOutput < outputHeight; yOutput++)
123 for (
unsigned int xOutput = 0; xOutput < outputWidth; xOutput++)
130 for (
unsigned int cInput = 0; cInput < (depthwise ? 1 : inputChannels); cInput++)
132 for (
unsigned int yFilter = 0; yFilter < filterHeight; yFilter++)
134 for (
unsigned int xFilter = 0; xFilter < filterWidth; xFilter++)
137 unsigned int filterIndex = 0;
142 cInput = cOutput / depthMultiplier;
144 filterIndex = xFilter * outputChannels + cOutput +
145 yFilter * filterWidth * outputChannels;
153 filterIndex = cOutput * filterHeight * filterWidth * inputChannels +
154 yFilter * filterWidth * inputChannels +
155 xFilter * inputChannels +
160 filterIndex = cOutput * filterWidth * filterHeight * inputChannels +
161 cInput * filterWidth * filterHeight +
162 yFilter * filterWidth +
167 unsigned int yInput = yOutput * yStride + yFilter * yDilation;
168 unsigned int xInput = xOutput * xStride + xFilter * xDilation;
173 if (yInput < paddingTop || yInput >= inputHeight + paddingTop ||
174 xInput < paddingLeft || xInput >= inputWidth + paddingLeft)
180 unsigned int inputIndex = 0;
186 inputIndex = batchIdx * inputHeight * inputWidth * inputChannels +
187 (yInput - paddingTop) * inputWidth * inputChannels +
188 (xInput - paddingLeft) * inputChannels +
193 inputIndex = batchIdx * inputWidth * inputHeight * inputChannels +
194 inputWidth * inputHeight * cInput +
195 inputWidth * (yInput - paddingTop) +
196 xInput - paddingLeft;
198 inputValue = inputVec[inputIndex];
201 sum += filterVec[filterIndex] * inputValue;
208 sum += biasVec[cOutput];
214 outIdx = batchIdx * outputHeight * outputWidth * outputChannels +
215 yOutput * outputWidth * outputChannels +
216 xOutput * outputChannels +
221 outIdx = batchIdx * outputHeight * outputWidth * outputChannels +
222 cOutput * outputHeight * outputWidth +
223 yOutput * outputWidth +
227 rOutputEncoder[outIdx];
228 rOutputEncoder.
Set(sum);