Compute Library
 21.02
neon_gemm_qasymm8.cpp File Reference

Go to the source code of this file.

Functions

void find_min_max (int size, const float *data, float *min, float *max)
 
QuantizationInfo choose_quantization_params (float min, float max)
 
void quantize_values (int size, qasymm8_t *output, float *input, const QuantizationInfo qinfo)
 
int main (int argc, char **argv)
 

Function Documentation

◆ choose_quantization_params()

QuantizationInfo choose_quantization_params ( float  min,
float  max 
)

Definition at line 51 of file neon_gemm_qasymm8.cpp.

References arm_compute::test::validation::qinfo, arm_compute::support::cpp11::round(), and arm_compute::test::validation::scale.

Referenced by main().

52 {
53  // Extend the [min,max] interval to contain 0 so we can represent it exactly
54  min = std::min(min, 0.f);
55  max = std::max(max, 0.f);
56 
57  // Set the quantized min and max in float values
58  const float qmin = 0;
59  const float qmax = 255;
60 
61  // Determine the scale
62  const float scale = (max - min) / (qmax - qmin);
63 
64  // Determine the zero-point; using affine equation val = (qval-zerop) * scale
65  const float zero_point_real = qmin - min / scale;
66 
67  // But we need to nudge the zero_point to an integer (exact quantized value)
68  std::uint8_t zero_point_nudged = 0;
69  if(zero_point_real < qmin)
70  {
71  zero_point_nudged = qmin;
72  }
73  else if(zero_point_real > qmax)
74  {
75  zero_point_nudged = qmax;
76  }
77  else
78  {
79  zero_point_nudged = static_cast<std::uint8_t>(support::cpp11::round(zero_point_real));
80  }
81 
82  QuantizationInfo qinfo = QuantizationInfo(scale, zero_point_nudged);
83  return qinfo;
84 }
Quantization information.
int round(float x, RoundingPolicy rounding_policy)
Return a rounded value of x.
Definition: Rounding.cpp:35
const QuantizationInfo qinfo
Definition: Im2Col.cpp:155

◆ find_min_max()

void find_min_max ( int  size,
const float *  data,
float *  min,
float *  max 
)

Definition at line 38 of file neon_gemm_qasymm8.cpp.

Referenced by main().

39 {
40  *min = *max = data[0];
41  for(int i = 0; i < size; i++)
42  {
43  const float val = data[i];
44  *min = std::min(*min, val);
45  *max = std::max(*max, val);
46  }
47 }

◆ main()

int main ( int  argc,
char **  argv 
)

Definition at line 95 of file neon_gemm_qasymm8.cpp.

References TensorAllocator::allocate(), Tensor::allocator(), Tensor::buffer(), arm_compute::quantization::calculate_quantized_multiplier_less_than_one(), choose_quantization_params(), NEQuantizationLayer::configure(), NEGEMMLowpMatrixMultiplyCore::configure(), NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::configure(), arm_compute::F32, arm_compute::utils::fill_random_tensor(), find_min_max(), TensorAllocator::init(), K, M, N, UniformQuantizationInfo::offset, QuantizationInfo::offset(), ITensor::print(), arm_compute::QASYMM8, INESimpleFunctionNoBorder::run(), NEGEMMLowpMatrixMultiplyCore::run(), arm_compute::S32, UniformQuantizationInfo::scale, QuantizationInfo::scale(), and QuantizationInfo::uniform().

96 {
97  Tensor src1;
98  Tensor src2;
99  Tensor dst0;
100  Tensor q_src1;
101  Tensor q_src2;
102  Tensor q_dst0;
103  Tensor q_res;
104  Tensor q_res_output;
105  size_t M = 4;
106  size_t N = 4;
107  size_t K = 4;
108  bool default_input = true;
109 
110  // Parse args
111  if(argc < 3) /* case default matrix sizes */
112  {
113  // Print help
114  std::cout << "Usage: ./build/neon_gemm_qasymm8 M N K\n";
115  std::cout << "Too few or no inputs provided. Using default M=4, N=4, K=4\n\n";
116  }
117  else /* case M N K arguments provided */
118  {
119  M = strtol(argv[1], nullptr, 10);
120  N = strtol(argv[2], nullptr, 10);
121  K = strtol(argv[3], nullptr, 10);
122  default_input = false;
123  }
124 
125  /*** Floating point matrix multiplication ***/
126 
127  // Initialise input matrices
128  NEGEMM fgemm{};
129 
130  src1.allocator()->init(TensorInfo(TensorShape(K, M), 1, DataType::F32));
131  src2.allocator()->init(TensorInfo(TensorShape(N, K), 1, DataType::F32));
132  dst0.allocator()->init(TensorInfo(TensorShape(N, M), 1, DataType::F32));
133  fgemm.configure(&src1, &src2, nullptr, &dst0, 1, 0);
134 
135  // Allocate matrices
136  src1.allocator()->allocate();
137  src2.allocator()->allocate();
138  dst0.allocator()->allocate();
139 
140  // Fill in tensors, by default fill in with known data - for easy testing
141  auto *src1_ptr = reinterpret_cast<float *>(src1.buffer());
142  auto *src2_ptr = reinterpret_cast<float *>(src2.buffer());
143  auto *dst0_ptr = reinterpret_cast<float *>(dst0.buffer());
144 
145  // Fill in: one is the identity matrix, other is sequential values
146  // src1: Identity matrix
147  for(size_t i = 0; i < M * K; i++) {
148  src1_ptr[i] = 0;
149  }
150  for(size_t i = 0; i < M; i++) {
151  src1_ptr[i * K + i] = 1.0f;
152  }
153 
154  // src2: Sequential values matrix
155  for(size_t i = 0; i < K * N; i++) {
156  src2_ptr[i] = i * 1.123f;
157  }
158 
159  // Otherwise if M, N, K is given, fill in with random values
160  if(!default_input)
161  {
162  fill_random_tensor(src1, 0.f, 1.f);
163  fill_random_tensor(src2, 0.f, 1.f);
164  }
165 
166  // Run single precision gemm and print result
167  fgemm.run();
168 
169 #if ARM_COMPUTE_DEBUG_ENABLED
170  std::cout << "Result matrix:\n";
171  src1.print(std::cout);
172  src2.print(std::cout);
173  dst0.print(std::cout);
174 #endif // ARM_COMPUTE_DEBUG_ENABLED
175 
176  /*** Quantised asymmetric 8bit matrix multiplication ***/
177 
178  // Start by finding the quantisation parameters for each set of values
179  float src1_min;
180  float src1_max;
181  float src2_min;
182  float src2_max;
183  float dst0_min;
184  float dst0_max;
185 
186  find_min_max(M * K, src1_ptr, &src1_min, &src1_max);
187  find_min_max(K * N, src2_ptr, &src2_min, &src2_max);
188  find_min_max(M * N, dst0_ptr, &dst0_min, &dst0_max);
189 
190  const QuantizationInfo src1_qinfo = choose_quantization_params(src1_min, src1_max);
191  const QuantizationInfo src2_qinfo = choose_quantization_params(src2_min, src2_max);
192  const QuantizationInfo dst0_qinfo = choose_quantization_params(dst0_min, dst0_max);
193 
194  std::cout << "Matrix 1: min=" << src1_min << ", max=" << src1_max << ", ";
195  std::cout << "QuantisationInfo(" << src1_qinfo.scale()[0] << ", " << src1_qinfo.offset()[0] << ")\n";
196  std::cout << "Matrix 2: min=" << src2_min << ", max=" << src2_max << ", ";
197  std::cout << "QuantisationInfo(" << src2_qinfo.scale()[0] << ", " << src2_qinfo.offset()[0] << ")\n";
198  std::cout << "Result : min=" << dst0_min << ", max=" << dst0_max << ", ";
199  std::cout << "QuantisationInfo(" << dst0_qinfo.scale()[0] << ", " << dst0_qinfo.offset()[0] << ")\n";
200 
201  // We now have the quantisation info and can configure the quantised tensors
202  q_src1.allocator()->init(TensorInfo(TensorShape(K, M), 1, DataType::QASYMM8, src1_qinfo));
203  q_src2.allocator()->init(TensorInfo(TensorShape(N, K), 1, DataType::QASYMM8, src2_qinfo));
204  q_dst0.allocator()->init(TensorInfo(TensorShape(N, M), 1, DataType::QASYMM8, dst0_qinfo));
205 
206  // In this approach we use the QuantizationLayer construct to perform quantization
210  q1.configure(&src1, &q_src1);
211  q2.configure(&src2, &q_src2);
212  q3.configure(&dst0, &q_dst0);
213 
214  // Configure low precision gemm and initialise result tensor (pre-output)
216  q_res.allocator()->init(TensorInfo(TensorShape(N, M), 1, DataType::S32));
217  qgemm.configure(&q_src1, &q_src2, nullptr, &q_res);
218 
219  // Configure output stage after computing shift and multiplier parameters
221  int output_multiplier;
222  int output_shift;
223  float multiplier = (src1_qinfo.uniform().scale * src2_qinfo.uniform().scale) / dst0_qinfo.uniform().scale;
224  quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
225  std::cout << "(q_multiplier, q_shift) = (" << output_multiplier << ", " << output_shift << ")\n\n";
226  gemmlowp_output_stage.configure(&q_res, nullptr, &q_res_output, output_multiplier, output_shift, dst0_qinfo.uniform().offset);
227 
228  // Allocate all tensors
229  q_src1.allocator()->allocate();
230  q_src2.allocator()->allocate();
231  q_dst0.allocator()->allocate();
232  q_res.allocator()->allocate();
233  q_res_output.allocator()->allocate();
234 
235  // Run quantization layers (quantizes values of each tensor)
236  q1.run();
237  q2.run();
238  q3.run();
239  // Run low precision matrix multiply kernel
240  qgemm.run();
241  // Run output stage kernel
242  gemmlowp_output_stage.run();
243  std::cout << "Done\n";
244 
245 #if ARM_COMPUTE_DEBUG_ENABLED
246  // Print quantized source matrices
247  q_src1.print(std::cout);
248  q_src2.print(std::cout);
249  // Print result matrix in int32 form - before output stage processing
250  std::cout << "Lowp GEMM output (int32):\n";
251  q_res.print(std::cout);
252  // Print QASYMM8 (quantized) matrix
253  std::cout << "Output pipeline result matrix:\n";
254  q_res_output.print(std::cout);
255 
256  // Expected result
257  std::cout << "Expected result:\n";
258  q_dst0.print(std::cout);
259 #endif // ARM_COMPUTE_DEBUG_ENABLED
260 }
unsigned int M
const std::vector< int32_t > & offset() const
Offset vector accessor.
Shape of a tensor.
Definition: TensorShape.h:39
void run() override final
Run the kernels contained in the function.
Basic function to execute GEMM on Neon.
Definition: NEGEMM.h:62
void init(const TensorAllocator &allocator, const Coordinates &coords, TensorInfo &sub_info)
Shares the same backing memory with another tensor allocator, while the tensor info might be differen...
Basic function to execute NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint on Neon.
Basic function to simulate a quantization layer.
void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, int min=std::numeric_limits< int32_t >::lowest(), int max=std::numeric_limits< int32_t >::max())
Initialise the kernel&#39;s inputs, output.
unsigned int N
TensorAllocator * allocator()
Return a pointer to the tensor&#39;s allocator.
Definition: Tensor.cpp:48
void fill_random_tensor(TensorType &tensor, std::random_device::result_type seed, T lower_bound=std::numeric_limits< T >::lowest(), T upper_bound=std::numeric_limits< T >::max())
Definition: Utils.h:774
Quantization information.
void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel&#39;s inputs, output.
void run() override
Run the kernels contained in the function.
void allocate() override
Allocate size specified by TensorInfo of CPU memory.
UniformQuantizationInfo uniform() const
Return per layer quantization info.
const std::vector< float > & scale() const
Scale vector accessor.
Basic implementation of the tensor interface.
Definition: Tensor.h:37
void configure(const ITensor *input, ITensor *output)
Set the input and output tensors.
uint8_t * buffer() const override
Interface to be implemented by the child class to return a pointer to CPU memory. ...
Definition: Tensor.cpp:43
Store the tensor&#39;s metadata.
Definition: TensorInfo.h:45
unsigned int K
void print(std::ostream &s, IOFormatInfo io_fmt=IOFormatInfo()) const
Print a tensor to a given stream using user defined formatting information.
Status calculate_quantized_multiplier_less_than_one(float multiplier, int32_t *quant_multiplier, int32_t *right_shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier with value less than one.
Basic function to execute GEMMLowpMatrixMultiplyCore on Neon.
QuantizationInfo choose_quantization_params(float min, float max)
void find_min_max(int size, const float *data, float *min, float *max)

◆ quantize_values()

void quantize_values ( int  size,
qasymm8_t output,
float *  input,
const QuantizationInfo  qinfo 
)

Definition at line 86 of file neon_gemm_qasymm8.cpp.

References arm_compute::quantize_qasymm8().

87 {
88  for(int i = 0; i < size; i++)
89  {
90  output[i] = quantize_qasymm8(input[i], qinfo);
91  }
92  std::cout << "\n";
93 }
uchar quantize_qasymm8(float input, float offset, float scale)
Quantize a floating-point scalar value to 8-bit asymmetric.
Definition: helpers_asymm.h:47