Compute Library
 22.02
arm_compute::quantization Namespace Reference

Functions

Status calculate_quantized_multiplier (float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
 Calculate quantized representation of multiplier. More...
 
Status calculate_quantized_multiplier_less_than_one (float multiplier, int32_t *quant_multiplier, int32_t *right_shift, bool ignore_epsilon=false)
 Calculate quantized representation of multiplier with value less than one. More...
 
Status calculate_quantized_multiplier_greater_than_one (float multiplier, int32_t *quantized_multiplier, int32_t *left_shift)
 Calculate quantized representation of multiplier having value greater than one. More...
 
Status calculate_quantized_multipliers (const QuantizationInfo &iq_info, const QuantizationInfo &wq_info, const QuantizationInfo &oq_info, GEMMLowpOutputStageInfo &stage_info)
 Calculate quantized representation of per-channel multipliers. More...
 
std::pair< int, int > get_min_max_values_from_quantized_data_type (DataType data_type)
 Get minimum and maximum values for the input quantized data type. More...
 
void compute_quantized_multipliers_and_shifts (const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, int32_t *output_multipliers_ptr, int32_t *output_shifts_ptr)
 Compute quantized per-channel multipliers and shifts. More...
 
int32_t rounding_divide_by_pow2 (int32_t x, int exponent)
 Round to the nearest division by a power-of-two using exponent, copied from NEMath. More...
 
int32_t saturating_rounding_doubling_highmul (int32_t a, int32_t b)
 Compute multiplication of two integers. More...
 
int32_t multiply_by_quantized_multiplier (int32_t input, int32_t qmul, int32_t shift)
 Compute the value multiplied by given quantized multiplier and shift. More...
 
int32_t saturating_rounding_multiply_by_pow2 (int32_t exponent, int32_t v)
 Compute the value multiplied the power-of-two. More...
 
void get_invsqrt_quantized_multiplier_exp (int32_t input, int32_t reverse_shift, int32_t &output_inv_sqrt, int32_t &output_shift)
 Compute quantized multiplier and shift for the inverse square root of input. More...
 

Variables

constexpr int64_t fixed_point_one_Q0 = (1LL << 31)
 
constexpr float epsilon = 0.00001f
 

Function Documentation

◆ calculate_quantized_multiplier()

Status calculate_quantized_multiplier ( float  multiplier,
int32_t *  quant_multiplier,
int32_t *  shift,
bool  ignore_epsilon = false 
)

Calculate quantized representation of multiplier.

Parameters
[in]multiplierReal multiplier.
[out]quant_multiplierInteger multiplier.
[out]shiftbit shift. A negative value indicates a left shift, while a positive value indicates a right shift
[in]ignore_epsilonWhen true, ignore pre-defined epsilon value. Defaults to false
Returns
a status

Definition at line 39 of file AsymmHelpers.cpp.

References calculate_quantized_multiplier_greater_than_one(), and calculate_quantized_multiplier_less_than_one().

Referenced by calculate_quantized_multipliers(), CLQLSTMLayer::CLQLSTMLayer(), compute_quantized_multipliers_and_shifts(), NEQLSTMLayerNormalizationKernel::configure(), CpuDepthwiseConv2dNativeKernel::configure(), ClDirectConv2dKernel::configure(), CLQLSTMLayerNormalizationKernel::configure(), ClDirectConv3dKernel::configure(), CLDepthwiseConvolutionLayerNativeKernel::configure(), NELSTMLayerQuantized::configure(), CLLSTMLayerQuantized::configure(), NEQLSTMLayer::configure(), CLQLSTMLayer::configure(), arm_compute::test::convolution_3d::detail::convolution3d(), arm_compute::cpu::directconv3d_quantized_neon_ndhwc(), CpuPool2dAssemblyWrapperKernel::is_configured(), NEQLSTMLayer::NEQLSTMLayer(), arm_compute::test::validation::reference::qlstm_layer_normalization(), CpuPool2dAssemblyWrapperKernel::validate(), NELSTMLayerQuantized::validate(), CLLSTMLayerQuantized::validate(), NEQLSTMLayer::validate(), and CLQLSTMLayer::validate().

40 {
41  if(multiplier >= 1.f)
42  {
43  Status status = calculate_quantized_multiplier_greater_than_one(multiplier, quant_multiplier, shift);
44  *shift *= -1;
45  return status;
46  }
47  else
48  {
49  return calculate_quantized_multiplier_less_than_one(multiplier, quant_multiplier, shift, ignore_epsilon);
50  }
51 }
Status calculate_quantized_multiplier_greater_than_one(float multiplier, int32_t *quantized_multiplier, int32_t *left_shift)
Calculate quantized representation of multiplier having value greater than one.
Status calculate_quantized_multiplier_less_than_one(float multiplier, int32_t *quant_multiplier, int32_t *right_shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier with value less than one.

◆ calculate_quantized_multiplier_greater_than_one()

Status calculate_quantized_multiplier_greater_than_one ( float  multiplier,
int32_t *  quantized_multiplier,
int32_t *  left_shift 
)

Calculate quantized representation of multiplier having value greater than one.

Parameters
[in]multiplierReal multiplier.
[out]quantized_multiplierInteger multiplier.
[out]left_shiftLeft bit shift.
Returns
a status

Definition at line 89 of file AsymmHelpers.cpp.

References ARM_COMPUTE_RETURN_ERROR_ON, and arm_compute::support::cpp11::round().

Referenced by calculate_quantized_multiplier().

92 {
93  ARM_COMPUTE_RETURN_ERROR_ON(quantized_multiplier == nullptr);
94  ARM_COMPUTE_RETURN_ERROR_ON(left_shift == nullptr);
95  ARM_COMPUTE_RETURN_ERROR_ON(multiplier < 1.f);
96 
97  int shift_exp = 0;
98  const double q = std::frexp(multiplier, &shift_exp);
99  *left_shift = shift_exp;
100  auto q_fixed = static_cast<int64_t>(support::cpp11::round(q * fixed_point_one_Q0));
102  if(q_fixed == fixed_point_one_Q0)
103  {
104  q_fixed /= 2;
105  ++*left_shift;
106  }
107  ARM_COMPUTE_RETURN_ERROR_ON(*left_shift < 0);
108  ARM_COMPUTE_RETURN_ERROR_ON(q_fixed > std::numeric_limits<int32_t>::max());
109  *quantized_multiplier = static_cast<int32_t>(q_fixed);
110 
111  return Status{};
112 }
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:296
int round(float x, RoundingPolicy rounding_policy)
Return a rounded value of x.
Definition: Rounding.cpp:35
constexpr int64_t fixed_point_one_Q0

◆ calculate_quantized_multiplier_less_than_one()

Status calculate_quantized_multiplier_less_than_one ( float  multiplier,
int32_t *  quant_multiplier,
int32_t *  right_shift,
bool  ignore_epsilon = false 
)

Calculate quantized representation of multiplier with value less than one.

Parameters
[in]multiplierReal multiplier.
[out]quant_multiplierInteger multiplier.
[out]right_shiftRight bit shift.
[in]ignore_epsilonWhen true, ignore pre-defined epsilon value. Defaults to false
Returns
a status

Definition at line 53 of file AsymmHelpers.cpp.

References ARM_COMPUTE_RETURN_ERROR_ON, epsilon, and arm_compute::support::cpp11::round().

Referenced by calculate_quantized_multiplier(), and main().

57 {
58  const float internal_epsilon = ignore_epsilon ? 0.0f : epsilon;
59 
60  ARM_COMPUTE_RETURN_ERROR_ON(quant_multiplier == nullptr);
61  ARM_COMPUTE_RETURN_ERROR_ON(right_shift == nullptr);
62  ARM_COMPUTE_RETURN_ERROR_ON(multiplier < -internal_epsilon);
63  ARM_COMPUTE_RETURN_ERROR_ON(multiplier > 1.0f + internal_epsilon);
64 
65  int shift_exp = 0;
66  const double q = std::frexp(multiplier, &shift_exp);
67  *right_shift = -1 * shift_exp;
68  auto q_fixed = static_cast<int64_t>(support::cpp11::round(q * fixed_point_one_Q0));
70  if(q_fixed == fixed_point_one_Q0)
71  {
72  q_fixed /= 2;
73  --*right_shift;
74  }
75 
76  if(ignore_epsilon && *right_shift > 31)
77  {
78  *right_shift = 0;
79  q_fixed = 0;
80  }
81 
82  ARM_COMPUTE_RETURN_ERROR_ON(*right_shift < 0);
83  ARM_COMPUTE_RETURN_ERROR_ON(q_fixed > std::numeric_limits<int32_t>::max());
84  *quant_multiplier = static_cast<int32_t>(q_fixed);
85 
86  return Status{};
87 }
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:296
int round(float x, RoundingPolicy rounding_policy)
Return a rounded value of x.
Definition: Rounding.cpp:35
constexpr int64_t fixed_point_one_Q0

◆ calculate_quantized_multipliers()

arm_compute::Status calculate_quantized_multipliers ( const QuantizationInfo iq_info,
const QuantizationInfo wq_info,
const QuantizationInfo oq_info,
GEMMLowpOutputStageInfo stage_info 
)

Calculate quantized representation of per-channel multipliers.

Parameters
[in]iq_infoInput quantization info.
[in]wq_infoWeights quantization info.
[in]oq_infoOutput quantization info.
[in,out]stage_infoGemmLowp output stage info
Returns
a status

Definition at line 114 of file AsymmHelpers.cpp.

References ARM_COMPUTE_RETURN_ERROR_ON, ARM_COMPUTE_RETURN_ON_ERROR, calculate_quantized_multiplier(), GEMMLowpOutputStageInfo::gemmlowp_multiplier, GEMMLowpOutputStageInfo::gemmlowp_multipliers, GEMMLowpOutputStageInfo::gemmlowp_shift, GEMMLowpOutputStageInfo::gemmlowp_shifts, and QuantizationInfo::scale().

118 {
119  ARM_COMPUTE_RETURN_ERROR_ON(iq_info.scale().empty());
120  ARM_COMPUTE_RETURN_ERROR_ON(wq_info.scale().empty());
121  ARM_COMPUTE_RETURN_ERROR_ON(oq_info.scale().empty());
122 
123  const unsigned int size = wq_info.scale().size();
124 
125  auto &quant_multipliers = stage_info.gemmlowp_multipliers;
126  auto &quant_shifts = stage_info.gemmlowp_shifts;
127  quant_multipliers.resize(size);
128  quant_shifts.resize(size);
129 
130  const auto &w_scales = wq_info.scale();
131  const float i_scale = iq_info.scale().at(0);
132  const float o_scale = oq_info.scale().at(0);
133 
134  for(unsigned int i = 0; i < size; ++i)
135  {
136  const float multiplier = i_scale * w_scales[i] / o_scale;
137  int32_t quant_multiplier = 0;
138  int32_t quant_shift = 0;
139  ARM_COMPUTE_RETURN_ON_ERROR(calculate_quantized_multiplier(multiplier, &quant_multiplier, &quant_shift));
140  quant_multipliers[i] = quant_multiplier;
141  quant_shifts[i] = quant_shift;
142  }
143 
144  // Legacy part
145  stage_info.gemmlowp_shift = quant_shifts[0];
146  stage_info.gemmlowp_multiplier = quant_multipliers[0];
147 
148  return Status{};
149 }
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:296

◆ compute_quantized_multipliers_and_shifts()

void compute_quantized_multipliers_and_shifts ( const ITensorInfo input,
const ITensorInfo weights,
const ITensorInfo output,
int32_t *  output_multipliers_ptr,
int32_t *  output_shifts_ptr 
)

Compute quantized per-channel multipliers and shifts.

As many multipliers and shifts as output channels are computed. If weights are not quantized per-channel, multipliers and shifts will end up being the same for each channel.

Parameters
[in]inputInput tensor info.
[in]weightsWeights tensor info.
[in]outputOutput tensor info.
[out]output_multipliers_ptrPointer to the buffer where to store per-channel multipliers.
[out]output_shifts_ptrPointer to the buffer where to store per-channel shifts.
Returns
min and max values for the quantized data type

Definition at line 179 of file AsymmHelpers.cpp.

References calculate_quantized_multiplier(), ITensorInfo::quantization_info(), UniformQuantizationInfo::scale, QuantizationInfo::scale(), and QuantizationInfo::uniform().

Referenced by ClGemmConv2d::configure(), CLDepthwiseConvolutionLayer::prepare(), and ClGemmConv2d::validate().

184 {
185  const UniformQuantizationInfo iq_info = input->quantization_info().uniform();
186  const QuantizationInfo wq_info = weights->quantization_info();
187  const UniformQuantizationInfo oq_info = output->quantization_info().uniform();
188 
189  const unsigned int num_filters = wq_info.scale().size();
190 
191  for(unsigned int i = 0; i < num_filters; ++i)
192  {
193  int32_t output_multiplier = 0;
194  int32_t output_shift = 0;
195  const float multiplier = iq_info.scale * wq_info.scale()[i] / oq_info.scale;
196  calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift);
197 
198  output_multipliers_ptr[i] = output_multiplier;
199  output_shifts_ptr[i] = output_shift;
200  }
201 }
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.

◆ get_invsqrt_quantized_multiplier_exp()

void get_invsqrt_quantized_multiplier_exp ( int32_t  input,
int32_t  reverse_shift,
int32_t &  output_inv_sqrt,
int32_t &  output_shift 
)

Compute quantized multiplier and shift for the inverse square root of input.

Using 3-bit fixed point and 5 iteration of Newton-Raphson method.

Parameters
[in]inputInput to use
[in]reverse_shift-1 to reverse the shift direction
[out]output_inv_sqrtQuantized multiplier for inverse square root
[out]output_shiftShift for inverse square root

Definition at line 255 of file AsymmHelpers.cpp.

References ARM_COMPUTE_ERROR_ON, arm_compute::test::validation::b, rounding_divide_by_pow2(), saturating_rounding_doubling_highmul(), and saturating_rounding_multiply_by_pow2().

Referenced by arm_compute::test::validation::reference::qlstm_layer_normalization(), and NEQLSTMLayerNormalizationKernel::run().

256 {
258 
259  if(input <= 1)
260  {
261  // dealing the inputs (0 and 1) separately to avoid overflow
262  output_inv_sqrt = std::numeric_limits<std::int32_t>::max();
263  output_shift = 0;
264  return;
265  }
266 
267  // prepare input for fixed point operation and compute shift value
268  output_shift = 11;
269  while(input >= (1 << 29))
270  {
271  input /= 4;
272  ++output_shift;
273  }
274 
275  const uint32_t max_left_shift_bits = __builtin_clz(static_cast<uint32_t>(input)) - 1;
276  const uint32_t max_left_shift_bits_pairs = max_left_shift_bits / 2;
277  const uint32_t left_shift_bit_pairs = max_left_shift_bits_pairs - 1;
278  output_shift -= left_shift_bit_pairs;
279  input <<= 2 * left_shift_bit_pairs;
280 
281  // Calculation in fixed point domain with 3 integer bits.
282  using FixedPointRawType = int32_t;
283  constexpr uint32_t fixedpoint_position = 3;
284  constexpr uint32_t fixedpoint_int_position = sizeof(FixedPointRawType) * 8 - 1 - fixedpoint_position;
285  using FixedPoint3 = FixedPointRawType;
286  using FixedPoint0 = FixedPointRawType;
287 
288  // fixed point representation of input divided by 2 and 1.5 for Newton-Raphson iteration
289  const FixedPoint3 fixedpoint_input = (input >> 1);
290  const FixedPoint3 fixedpoint_half_input = rounding_divide_by_pow2(fixedpoint_input, 1);
291  const FixedPoint3 fixedpoint_half_three = (0x1 << fixedpoint_int_position) + (0x1 << (fixedpoint_int_position - 1));
292 
293  // initial guess (1) in fixed point representation
294  FixedPoint3 x = 0x1 << fixedpoint_int_position;
295 
296  // multiplication of two fixed point numbers, defined for readability
297  auto fixed_point_mul = [](FixedPointRawType a, FixedPointRawType b) -> FixedPointRawType
298  {
300  };
301 
302  // rescaling of fixed point to have dst_bit integer bits, defined for readability
303  auto fixed_point_rescale = [](FixedPointRawType a, uint32_t src_bit, uint32_t dst_bit) -> FixedPointRawType
304  {
305  const uint32_t exponent = src_bit - dst_bit;
306  return saturating_rounding_multiply_by_pow2(exponent, a);
307  };
308 
309  // 5 iterations of Newton-Raphson method for inverse square root - 1.5 * x_n = input/2 * (x_n)^3
310  constexpr int32_t num_iteration = 5;
311  for(int32_t i = 0; i < num_iteration; ++i)
312  {
313  const auto x3 = fixed_point_rescale(fixed_point_mul(fixed_point_mul(x, x), x), 9, fixedpoint_position);
314  x = fixed_point_rescale(fixed_point_mul(fixedpoint_half_three, x) - fixed_point_mul(fixedpoint_half_input, x3), 6, fixedpoint_position);
315  }
316 
317  // fixed point representation of sqrt(1/2)
318  const FixedPoint0 fixedpoint_half_sqrt_2 = 1518500250;
319  x = fixed_point_mul(fixedpoint_half_sqrt_2, x);
320  output_inv_sqrt = x;
321  if(output_shift < 0)
322  {
323  output_inv_sqrt <<= -output_shift;
324  output_shift = 0;
325  }
326  // convert right shift to left shift
327  output_shift *= reverse_shift;
328 }
SimpleTensor< float > b
Definition: DFT.cpp:157
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
int32_t saturating_rounding_doubling_highmul(int32_t a, int32_t b)
Compute multiplication of two integers.
int32_t rounding_divide_by_pow2(int32_t x, int exponent)
Round to the nearest division by a power-of-two using exponent, copied from NEMath.
int32_t saturating_rounding_multiply_by_pow2(int32_t exponent, int32_t v)
Compute the value multiplied the power-of-two.

◆ get_min_max_values_from_quantized_data_type()

std::pair< int, int > get_min_max_values_from_quantized_data_type ( DataType  data_type)

Get minimum and maximum values for the input quantized data type.

Returns
min and max values for the quantized data type

Definition at line 151 of file AsymmHelpers.cpp.

References ARM_COMPUTE_ERROR, arm_compute::QASYMM16, arm_compute::QASYMM8, arm_compute::QASYMM8_SIGNED, arm_compute::QSYMM16, and arm_compute::QSYMM8.

Referenced by ClQuantizeKernel::configure(), ClGemmLowpQuantizeDownInt32ScaleByFixedPointKernel::configure(), ClGemmLowpQuantizeDownInt32ScaleKernel::configure(), CLQLSTMLayerNormalizationKernel::configure(), and CpuGemmLowpQuantizeDownInt32ScaleKernel::configure().

152 {
153  int min_quant_val = 0;
154  int max_quant_val = 0;
155  switch(data_type)
156  {
157  case DataType::QASYMM8:
158  min_quant_val = std::numeric_limits<uint8_t>::min();
159  max_quant_val = std::numeric_limits<uint8_t>::max();
160  break;
161  case DataType::QSYMM8:
162  case DataType::QASYMM8_SIGNED:
163  min_quant_val = std::numeric_limits<int8_t>::min();
164  max_quant_val = std::numeric_limits<int8_t>::max();
165  break;
166  case DataType::QASYMM16:
167  min_quant_val = std::numeric_limits<uint16_t>::min();
168  max_quant_val = std::numeric_limits<uint16_t>::max();
169  break;
170  case DataType::QSYMM16:
171  min_quant_val = std::numeric_limits<int16_t>::min();
172  max_quant_val = std::numeric_limits<int16_t>::max();
173  break;
174  default:
175  ARM_COMPUTE_ERROR("Unsupported data type");
176  }
177  return std::make_pair(min_quant_val, max_quant_val);
178 }
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352
const DataType data_type
Definition: Im2Col.cpp:150

◆ multiply_by_quantized_multiplier()

int32_t multiply_by_quantized_multiplier ( int32_t  input,
int32_t  qmul,
int32_t  shift 
)

Compute the value multiplied by given quantized multiplier and shift.

Parameters
[in]inputTarget value to multiply.
[in]qmulQuantized multipler
[in]shiftLeft bit shift
Returns
The multiplied value

Definition at line 222 of file AsymmHelpers.cpp.

References rounding_divide_by_pow2(), and saturating_rounding_doubling_highmul().

Referenced by arm_compute::test::validation::reference::qlstm_layer_normalization(), and NEQLSTMLayerNormalizationKernel::run().

223 {
224  const auto left_shift = shift > 0 ? shift : 0;
225  const auto right_shift = shift > 0 ? 0 : -shift;
226  return rounding_divide_by_pow2(saturating_rounding_doubling_highmul(input * (1 << left_shift), qmul), right_shift);
227 }
int32_t saturating_rounding_doubling_highmul(int32_t a, int32_t b)
Compute multiplication of two integers.
int32_t rounding_divide_by_pow2(int32_t x, int exponent)
Round to the nearest division by a power-of-two using exponent, copied from NEMath.

◆ rounding_divide_by_pow2()

int32_t rounding_divide_by_pow2 ( int32_t  x,
int  exponent 
)
inline

Round to the nearest division by a power-of-two using exponent, copied from NEMath.

Note
This function calculates the following expression: (x + 2^n -1 ) / 2^n where n = exponent
Parameters
[in]xElement to divide.
[in]exponentInteger value used to round to nearest division by a power-of-two
Returns
the nearest division by a power-of-two using exponent

Definition at line 215 of file AsymmHelpers.cpp.

Referenced by get_invsqrt_quantized_multiplier_exp(), multiply_by_quantized_multiplier(), and saturating_rounding_multiply_by_pow2().

216 {
217  const int32_t mask = (1 << exponent) - 1;
218  const int32_t threshold = (mask >> 1) + (x < 0 ? 1 : 0);
219  return (x >> exponent) + ((x & mask) > threshold ? 1 : 0);
220 }

◆ saturating_rounding_doubling_highmul()

int32_t saturating_rounding_doubling_highmul ( int32_t  a,
int32_t  b 
)

Compute multiplication of two integers.

Parameters
[in]aOne integer to multiply
[in]bAnother integer to multiply
Returns
The multiplied value

Definition at line 203 of file AsymmHelpers.cpp.

References arm_compute::support::cpp11::signbit().

Referenced by get_invsqrt_quantized_multiplier_exp(), and multiply_by_quantized_multiplier().

204 {
205  bool overflow = a == b && a == std::numeric_limits<int32_t>::min();
206  int64_t a_64(a);
207  int64_t b_64(b);
208  int64_t ab_64 = a_64 * b_64;
209  bool is_positive_or_zero = a == 0 || b == 0 || (std::signbit(a) == std::signbit(b));
210  int32_t nudge = is_positive_or_zero ? (1 << 30) : (1 - (1 << 30));
211  int32_t ab_x2_high32 = static_cast<int32_t>((ab_64 + nudge) / (1ll << 31));
212  return overflow ? std::numeric_limits<int32_t>::max() : ab_x2_high32;
213 }
SimpleTensor< float > b
Definition: DFT.cpp:157

◆ saturating_rounding_multiply_by_pow2()

int32_t saturating_rounding_multiply_by_pow2 ( int32_t  exponent,
int32_t  v 
)

Compute the value multiplied the power-of-two.

Parameters
[in]exponentExponent used to calculate power-of-two
[in]vTarget value to multiply
Returns
The multiplied value

Definition at line 229 of file AsymmHelpers.cpp.

References rounding_divide_by_pow2().

Referenced by get_invsqrt_quantized_multiplier_exp().

230 {
231  if(exponent == 0)
232  {
233  return v;
234  }
235  else if(exponent < 0)
236  {
237  return rounding_divide_by_pow2(v, -exponent);
238  }
239  else
240  {
241  constexpr auto min = std::numeric_limits<int32_t>::min();
242  constexpr auto max = std::numeric_limits<int32_t>::max();
243  const auto width = sizeof(int32_t) * 8;
244 
245  const int32_t threshold = ((1 << (width - 1 - exponent)) - 1);
246  bool pos_mask = v > threshold;
247  bool neg_mask = v < -threshold;
248  int32_t result = v << exponent;
249  result = pos_mask ? max : result;
250  result = neg_mask ? min : result;
251  return result;
252  }
253 }
int32_t rounding_divide_by_pow2(int32_t x, int exponent)
Round to the nearest division by a power-of-two using exponent, copied from NEMath.

Variable Documentation

◆ epsilon

constexpr float epsilon = 0.00001f

Definition at line 37 of file AsymmHelpers.cpp.

Referenced by GraphBuilder::add_batch_normalization_node(), GraphBuilder::add_l2_normalize_node(), arm_compute::test::validation::reference::batch_normalization_layer(), calculate_quantized_multiplier_less_than_one(), NEMeanStdDevNormalizationKernel::configure(), CLMeanStdDevNormalizationKernel::configure(), NEFuseBatchNormalizationKernel::configure(), NEBatchNormalizationLayerKernel::configure(), CLL2NormalizeLayerKernel::configure(), NEInstanceNormalizationLayer::configure(), CLBatchNormalizationLayerKernel::configure(), CLFuseBatchNormalizationKernel::configure(), arm_compute::graph::backends::detail::create_batch_normalization_layer(), arm_compute::graph::backends::detail::create_fused_convolution_batch_normalization_layer(), arm_compute::graph::backends::detail::create_fused_convolution_batch_normalization_with_post_op(), arm_compute::graph::backends::detail::create_fused_depthwise_convolution_batch_normalization_layer(), arm_compute::graph::backends::detail::create_l2_normalize_layer(), AssetsLibrary::fill_boxes(), arm_compute::cpu::fp32_neon_batch_normalization(), arm_compute::test::validation::reference::fuse_batch_normalization_conv_layer(), arm_compute::graph::detail::fuse_convolution_batch_normalization_with_post_ops(), arm_compute::graph::detail::fuse_convolution_with_batch_normalization(), arm_compute::graph::detail::fuse_depthwise_convolution_with_batch_normalization(), arm_compute::test::validation::reference::instance_normalization(), arm_compute::cpu::instance_normalization_nchw(), arm_compute::helpers::float_ops::is_one(), arm_compute::helpers::float_ops::is_zero(), arm_compute::test::validation::reference::l2_normalize(), arm_compute::test::validation::reference::mean_std_normalization_layer(), arm_compute::cpu::mean_stddev_normalization(), NEL2NormalizeLayerKernel::name(), NEFuseBatchNormalizationKernel::name(), NEInstanceNormalizationLayerKernel::name(), NEBatchNormalizationLayerKernel::name(), NEMeanStdDevNormalizationKernel::name(), arm_compute::cpu::neon_fp32_instancenorm(), arm_compute::cpu::neon_fp32_meanstddevnorm(), compare< RelativeTolerance< U > >::operator bool(), AbsoluteTolerance< U >::operator U(), RelativeTolerance< U >::operator value_type(), RangedUniformDistribution< T >::RangedUniformDistribution(), arm_compute::test::round_half_even(), arm_compute::utils::rounding::round_half_even(), NEInstanceNormalizationLayer::validate(), and arm_compute::graph::backends::detail::validate_l2_normalize_layer().

◆ fixed_point_one_Q0

constexpr int64_t fixed_point_one_Q0 = (1LL << 31)

Definition at line 36 of file AsymmHelpers.cpp.