29 #include "utils/Utils.h" 35 using namespace utils;
38 void find_min_max(
int size,
const float *data,
float *min,
float *max)
40 *min = *max = data[0];
41 for(
int i = 0; i < size; i++)
43 const float val = data[i];
44 *min = std::min(*min, val);
45 *max = std::max(*max, val);
54 min = std::min(min, 0.f);
55 max = std::max(max, 0.f);
59 const float qmax = 255;
62 const float scale = (max - min) / (qmax - qmin);
65 const float zero_point_real = qmin - min /
scale;
68 std::uint8_t zero_point_nudged = 0;
69 if(zero_point_real < qmin)
71 zero_point_nudged = qmin;
73 else if(zero_point_real > qmax)
75 zero_point_nudged = qmax;
88 for(
int i = 0; i < size; i++)
95 int main(
int argc,
char **argv)
108 bool default_input =
true;
114 std::cout <<
"Usage: ./build/neon_gemm_qasymm8 M N K\n";
115 std::cout <<
"Too few or no inputs provided. Using default M=4, N=4, K=4\n\n";
119 M = strtol(argv[1],
nullptr, 10);
120 N = strtol(argv[2],
nullptr, 10);
121 K = strtol(argv[3],
nullptr, 10);
122 default_input =
false;
133 fgemm.configure(&src1, &src2,
nullptr, &dst0, 1, 0);
141 auto *src1_ptr = reinterpret_cast<float *>(src1.
buffer());
142 auto *src2_ptr = reinterpret_cast<float *>(src2.
buffer());
143 auto *dst0_ptr = reinterpret_cast<float *>(dst0.
buffer());
147 for(
size_t i = 0; i <
M *
K; i++) {
150 for(
size_t i = 0; i <
M; i++) {
151 src1_ptr[i *
K + i] = 1.0f;
155 for(
size_t i = 0; i <
K *
N; i++) {
156 src2_ptr[i] = i * 1.123f;
169 #if ARM_COMPUTE_DEBUG_ENABLED 170 std::cout <<
"Result matrix:\n";
171 src1.
print(std::cout);
172 src2.
print(std::cout);
173 dst0.
print(std::cout);
174 #endif // ARM_COMPUTE_DEBUG_ENABLED 194 std::cout <<
"Matrix 1: min=" << src1_min <<
", max=" << src1_max <<
", ";
195 std::cout <<
"QuantisationInfo(" << src1_qinfo.
scale()[0] <<
", " << src1_qinfo.
offset()[0] <<
")\n";
196 std::cout <<
"Matrix 2: min=" << src2_min <<
", max=" << src2_max <<
", ";
197 std::cout <<
"QuantisationInfo(" << src2_qinfo.
scale()[0] <<
", " << src2_qinfo.
offset()[0] <<
")\n";
198 std::cout <<
"Result : min=" << dst0_min <<
", max=" << dst0_max <<
", ";
199 std::cout <<
"QuantisationInfo(" << dst0_qinfo.
scale()[0] <<
", " << dst0_qinfo.
offset()[0] <<
")\n";
217 qgemm.
configure(&q_src1, &q_src2,
nullptr, &q_res);
221 int output_multiplier;
225 std::cout <<
"(q_multiplier, q_shift) = (" << output_multiplier <<
", " << output_shift <<
")\n\n";
226 gemmlowp_output_stage.
configure(&q_res,
nullptr, &q_res_output, output_multiplier, output_shift, dst0_qinfo.
uniform().
offset);
242 gemmlowp_output_stage.
run();
243 std::cout <<
"Done\n";
245 #if ARM_COMPUTE_DEBUG_ENABLED 247 q_src1.
print(std::cout);
248 q_src2.
print(std::cout);
250 std::cout <<
"Lowp GEMM output (int32):\n";
251 q_res.
print(std::cout);
253 std::cout <<
"Output pipeline result matrix:\n";
254 q_res_output.
print(std::cout);
257 std::cout <<
"Expected result:\n";
258 q_dst0.
print(std::cout);
259 #endif // ARM_COMPUTE_DEBUG_ENABLED
const std::vector< int32_t > & offset() const
Offset vector accessor.
void run() override final
Run the kernels contained in the function.
Basic function to execute GEMM.
void init(const TensorAllocator &allocator, const Coordinates &coords, TensorInfo &sub_info)
Shares the same backing memory with another tensor allocator, while the tensor info might be differen...
uint8_t quantize_qasymm8(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given an unsigned 8-bit asymmetric quantization scheme.
Basic function to execute NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint.
void quantize_values(int size, qasymm8_t *output, float *input, const QuantizationInfo qinfo)
1 channel, 1 F32 per channel
Basic function to run a quantization layer using cpu::CpuQuantization.
void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, int min=std::numeric_limits< int32_t >::lowest(), int max=std::numeric_limits< int32_t >::max())
Initialise the kernel's inputs, output.
Includes all the Arm® Neon™ functions at once.
Copyright (c) 2017-2021 Arm Limited.
TensorAllocator * allocator()
Return a pointer to the tensor's allocator.
void fill_random_tensor(TensorType &tensor, std::random_device::result_type seed, T lower_bound=std::numeric_limits< T >::lowest(), T upper_bound=std::numeric_limits< T >::max())
int main(int argc, char **argv)
1 channel, 1 S32 per channel
Quantization information.
void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs, output.
void run() override
Run the kernels contained in the function.
quantized, asymmetric fixed-point 8-bit number unsigned
void allocate() override
Allocate size specified by TensorInfo of CPU memory.
UniformQuantizationInfo uniform() const
Return per layer quantization info.
const std::vector< float > & scale() const
Scale vector accessor.
Basic implementation of the tensor interface.
void configure(const ITensor *input, ITensor *output)
Set the input and output tensors.
T round(T value)
Round floating-point value with half value rounding away from zero.
const QuantizationInfo qinfo
uint8_t qasymm8_t
8 bit quantized asymmetric scalar value
uint8_t * buffer() const override
Interface to be implemented by the child class to return a pointer to CPU memory.
void run() override
Run the kernels contained in the function.
Store the tensor's metadata.
void print(std::ostream &s, IOFormatInfo io_fmt=IOFormatInfo()) const
Print a tensor to a given stream using user defined formatting information.
Status calculate_quantized_multiplier_less_than_one(float multiplier, int32_t *quant_multiplier, int32_t *right_shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier with value less than one.
Basic function to execute GEMMLowpMatrixMultiplyCore.
QuantizationInfo choose_quantization_params(float min, float max)
void find_min_max(int size, const float *data, float *min, float *max)