95 const float hiresInputScale = (1.0f / 128.0f) * inputScale;
96 const float outputMultiplier = hiresInputScale / outputScale;
97 int outputMultiplierExponent;
98 int16_t outputMultiplierFixedpointInt16;
99 int32_t outputMultiplierFixedpointInt32;
101 quantizeMultiplier(outputMultiplier, &outputMultiplierFixedpointInt32, &outputMultiplierExponent);
102 downScaleInt32ToInt16Multiplier(outputMultiplierFixedpointInt32, &outputMultiplierFixedpointInt16);
106 const float reluishScale = 3.0f / 32768.0f;
107 const float reluishMultiplier = hiresInputScale / reluishScale;
108 int reluishMultiplierExponent;
109 int16_t reluishMultiplierFixedpointInt16;
110 int32_t reluishMultiplierFixedpointInt32;
112 quantizeMultiplier(reluishMultiplier, &reluishMultiplierFixedpointInt32, &reluishMultiplierExponent);
113 downScaleInt32ToInt16Multiplier(reluishMultiplierFixedpointInt32, &reluishMultiplierFixedpointInt16);
115 std::vector<int16_t> table;
117 for (int32_t i = -128; i < 128; i++)
119 const int16_t inputValue =
static_cast<int16_t
>(i - inputZp);
120 const int16_t inputValueHiresInputScale =
static_cast<int16_t
>(inputValue * (1 << 7));
122 int16_t reluishValue = inputValueHiresInputScale;
123 if (reluishMultiplierExponent > 0)
125 reluishValue = gemmlowp::ShiftLeft(reluishValue, reluishMultiplierExponent - 1);
128 reluishValue = gemmlowp::SaturatingRoundingDoublingHighMul(reluishValue, reluishMultiplierFixedpointInt16);
130 if (reluishMultiplierExponent > 0)
132 reluishValue = gemmlowp::ShiftLeft(reluishValue, 1);
134 else if (reluishMultiplierExponent < 0)
136 reluishValue = gemmlowp::RoundingDivideByPOT(reluishValue, -reluishMultiplierExponent);
139 reluishValue =
static_cast<int16_t
>((reluishValue + (1 << 15)) >> 1);
141 const int16_t inputValPreshiftOutputScale =
142 gemmlowp::SaturatingRoundingDoublingHighMul(inputValueHiresInputScale, outputMultiplierFixedpointInt16);
144 const int16_t preshiftOutputValue = saturatingDoublingHighMul(reluishValue, inputValPreshiftOutputScale);
146 int16_t outputValue = gemmlowp::RoundingDivideByPOT(preshiftOutputValue, -outputMultiplierExponent);
148 outputValue =
static_cast<int16_t
>(outputValue + outputZp);
149 outputValue = std::min<int16_t>(outputValue, std::numeric_limits<int8_t>::max());
150 outputValue = std::max<int16_t>(outputValue, std::numeric_limits<int8_t>::min());
152 table.push_back(outputValue);
#define ARMNN_THROW_INVALIDARG_IF_FALSE(_cond)