30 #include "utils/Utils.h" 35 using namespace utils;
38 void find_min_max(
int size,
const float *data,
float *min,
float *max)
40 *min = *max = data[0];
41 for(
int i = 0; i < size; i++)
43 const float val = data[i];
44 *min = std::min(*min, val);
45 *max = std::max(*max, val);
54 min = std::min(min, 0.f);
55 max = std::max(max, 0.f);
59 const float qmax = 255;
62 const float scale = (max - min) / (qmax - qmin);
65 const float zero_point_real = qmin - min /
scale;
68 std::uint8_t zero_point_nudged = 0;
69 if(zero_point_real < qmin)
71 zero_point_nudged = qmin;
73 else if(zero_point_real > qmax)
75 zero_point_nudged = qmax;
88 for(
int i = 0; i < size; i++)
95 int main(
int argc,
char **argv)
108 bool default_input =
true;
114 std::cout <<
"Usage: ./build/neon_gemm_qasymm8 M N K\n";
115 std::cout <<
"Too few or no inputs provided. Using default M=4, N=4, K=4\n\n";
119 M = strtol(argv[1],
nullptr, 10);
120 N = strtol(argv[2],
nullptr, 10);
121 K = strtol(argv[3],
nullptr, 10);
122 default_input =
false;
133 fgemm.configure(&src1, &src2,
nullptr, &dst0, 1, 0);
141 auto *src1_ptr =
reinterpret_cast<float *
>(src1.
buffer());
142 auto *src2_ptr =
reinterpret_cast<float *
>(src2.
buffer());
143 auto *dst0_ptr =
reinterpret_cast<float *
>(dst0.
buffer());
147 for(
size_t i = 0; i < M *
K; i++)
151 for(
size_t i = 0; i <
M; i++)
153 src1_ptr[i * K + i] = 1.0f;
157 for(
size_t i = 0; i < K *
N; i++)
159 src2_ptr[i] = i * 1.123f;
172 #if ARM_COMPUTE_DEBUG_ENABLED 173 std::cout <<
"Result matrix:\n";
174 src1.
print(std::cout);
175 src2.
print(std::cout);
176 dst0.
print(std::cout);
177 #endif // ARM_COMPUTE_DEBUG_ENABLED 197 std::cout <<
"Matrix 1: min=" << src1_min <<
", max=" << src1_max <<
", ";
198 std::cout <<
"QuantisationInfo(" << src1_qinfo.
scale()[0] <<
", " << src1_qinfo.
offset()[0] <<
")\n";
199 std::cout <<
"Matrix 2: min=" << src2_min <<
", max=" << src2_max <<
", ";
200 std::cout <<
"QuantisationInfo(" << src2_qinfo.
scale()[0] <<
", " << src2_qinfo.
offset()[0] <<
")\n";
201 std::cout <<
"Result : min=" << dst0_min <<
", max=" << dst0_max <<
", ";
202 std::cout <<
"QuantisationInfo(" << dst0_qinfo.
scale()[0] <<
", " << dst0_qinfo.
offset()[0] <<
")\n";
220 qgemm.
configure(&q_src1, &q_src2,
nullptr, &q_res);
224 int output_multiplier;
228 std::cout <<
"(q_multiplier, q_shift) = (" << output_multiplier <<
", " << output_shift <<
")\n\n";
238 gemmlowp_output_stage.
configure(&q_res,
nullptr, &q_res_output, info);
254 gemmlowp_output_stage.
run();
255 std::cout <<
"Done\n";
257 #if ARM_COMPUTE_DEBUG_ENABLED 259 q_src1.
print(std::cout);
260 q_src2.
print(std::cout);
262 std::cout <<
"Lowp GEMM output (int32):\n";
263 q_res.
print(std::cout);
265 std::cout <<
"Output pipeline result matrix:\n";
266 q_res_output.
print(std::cout);
269 std::cout <<
"Expected result:\n";
270 q_dst0.
print(std::cout);
271 #endif // ARM_COMPUTE_DEBUG_ENABLED const std::vector< int32_t > & offset() const
Offset vector accessor.
virtual ITensorInfo & set_num_channels(int num_channels)=0
Set the number of channels to the specified value.
int32_t gemmlowp_multiplier
GEMMLowp output stage multiplier used for quantizing to QASYMM8.
Quantize using a fixed point multiplication.
Basic function to execute GEMM.
void init(const TensorAllocator &allocator, const Coordinates &coords, TensorInfo &sub_info)
Shares the same backing memory with another tensor allocator, while the tensor info might be differen...
void run() override
Run the kernels contained in the function.
uint8_t quantize_qasymm8(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given an unsigned 8-bit asymmetric quantization scheme.
void quantize_values(int size, qasymm8_t *output, float *input, const QuantizationInfo qinfo)
1 channel, 1 F32 per channel
Basic function to run a quantization layer using cpu::CpuQuantize.
int32_t gemmlowp_offset
GEMMLowp output stage offset used for quantizing to QASYMM8.
GEMMLowpOutputStageType type
GEMMLowp output stage type.
Includes all the Arm® Neon™ functions at once.
Copyright (c) 2017-2021 Arm Limited.
TensorAllocator * allocator()
Return a pointer to the tensor's allocator.
ITensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
void fill_random_tensor(TensorType &tensor, std::random_device::result_type seed, T lower_bound=std::numeric_limits< T >::lowest(), T upper_bound=std::numeric_limits< T >::max())
int main(int argc, char **argv)
1 channel, 1 S32 per channel
Quantization information.
void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs, output.
void run() override
Run the kernels contained in the function.
quantized, asymmetric fixed-point 8-bit number unsigned
void allocate() override
Allocate size specified by TensorInfo of CPU memory.
UniformQuantizationInfo uniform() const
Return per layer quantization info.
GEMMLowp output stage info.
const std::vector< float > & scale() const
Scale vector accessor.
Basic implementation of the tensor interface.
void configure(const ITensor *input, ITensor *output)
Set the input and output tensors.
virtual ITensorInfo & set_data_type(DataType data_type)=0
Set the data type to the specified value.
int32_t gemmlowp_shift
GEMMLowp output stage shift used for quantizing to uint8.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
void configure(const ITensor *input, const ITensor *bias, ITensor *output, const GEMMLowpOutputStageInfo &info)
Initialise the kernel's inputs, output.
T round(T value)
Round floating-point value with half value rounding away from zero.
const QuantizationInfo qinfo
uint8_t qasymm8_t
8 bit quantized asymmetric scalar value
Basic function to execute GEMMLowpQuantizeDown kernels.
uint8_t * buffer() const override
Interface to be implemented by the child class to return a pointer to CPU memory. ...
void run() override
Run the kernels contained in the function.
Store the tensor's metadata.
void print(std::ostream &s, IOFormatInfo io_fmt=IOFormatInfo()) const
Print a tensor to a given stream using user defined formatting information.
Status calculate_quantized_multiplier_less_than_one(float multiplier, int32_t *quant_multiplier, int32_t *right_shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier with value less than one.
DataType output_data_type
Output tensor data type to use if the output is not initialized.
Function to run Gemm on quantized types.
QuantizationInfo choose_quantization_params(float min, float max)
void find_min_max(int size, const float *data, float *min, float *max)