24.02.1
|
Go to the source code of this file.
◆ choose_quantization_params()
◆ find_min_max()
void find_min_max |
( |
int |
size, |
|
|
const float * |
data, |
|
|
float * |
min, |
|
|
float * |
max |
|
) |
| |
Definition at line 39 of file neon_gemm_qasymm8.cpp.
41 *min = *max = data[0];
42 for (
int i = 0; i < size; i++)
44 const float val = data[i];
45 *min = std::min(*min, val);
46 *max = std::max(*max, val);
Referenced by main().
◆ main()
int main |
( |
int |
argc, |
|
|
char ** |
argv |
|
) |
| |
Definition at line 96 of file neon_gemm_qasymm8.cpp.
109 bool default_input =
true;
115 std::cout <<
"Usage: ./build/neon_gemm_qasymm8 M N K\n";
116 std::cout <<
"Too few or no inputs provided. Using default M=4, N=4, K=4\n\n";
120 M = strtol(argv[1],
nullptr, 10);
121 N = strtol(argv[2],
nullptr, 10);
122 K = strtol(argv[3],
nullptr, 10);
123 default_input =
false;
134 fgemm.configure(&src1, &src2,
nullptr, &dst0, 1, 0);
142 auto *src1_ptr =
reinterpret_cast<float *
>(src1.
buffer());
143 auto *src2_ptr =
reinterpret_cast<float *
>(src2.
buffer());
144 auto *dst0_ptr =
reinterpret_cast<float *
>(dst0.
buffer());
148 for (
size_t i = 0; i <
M *
K; i++)
152 for (
size_t i = 0; i <
M; i++)
154 src1_ptr[i *
K + i] = 1.0f;
158 for (
size_t i = 0; i <
K *
N; i++)
160 src2_ptr[i] = i * 1.123f;
173 #if ARM_COMPUTE_DEBUG_ENABLED
174 std::cout <<
"Result matrix:\n";
175 src1.
print(std::cout);
176 src2.
print(std::cout);
177 dst0.
print(std::cout);
178 #endif // ARM_COMPUTE_DEBUG_ENABLED
198 std::cout <<
"Matrix 1: min=" << src1_min <<
", max=" << src1_max <<
", ";
199 std::cout <<
"QuantisationInfo(" << src1_qinfo.
scale()[0] <<
", " << src1_qinfo.
offset()[0] <<
")\n";
200 std::cout <<
"Matrix 2: min=" << src2_min <<
", max=" << src2_max <<
", ";
201 std::cout <<
"QuantisationInfo(" << src2_qinfo.
scale()[0] <<
", " << src2_qinfo.
offset()[0] <<
")\n";
202 std::cout <<
"Result : min=" << dst0_min <<
", max=" << dst0_max <<
", ";
203 std::cout <<
"QuantisationInfo(" << dst0_qinfo.
scale()[0] <<
", " << dst0_qinfo.
offset()[0] <<
")\n";
221 qgemm.
configure(&q_src1, &q_src2,
nullptr, &q_res);
225 int output_multiplier;
229 std::cout <<
"(q_multiplier, q_shift) = (" << output_multiplier <<
", " << output_shift <<
")\n\n";
232 info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
233 info.gemmlowp_multiplier = output_multiplier;
234 info.gemmlowp_shift = output_shift;
236 info.output_data_type = DataType::QASYMM8;
239 gemmlowp_output_stage.
configure(&q_res,
nullptr, &q_res_output,
info);
255 gemmlowp_output_stage.
run();
256 std::cout <<
"\nTest Passed\n";
258 #if ARM_COMPUTE_DEBUG_ENABLED
260 q_src1.
print(std::cout);
261 q_src2.
print(std::cout);
263 std::cout <<
"Lowp GEMM output (int32):\n";
264 q_res.
print(std::cout);
266 std::cout <<
"Output pipeline result matrix:\n";
267 q_res_output.
print(std::cout);
270 std::cout <<
"Expected result:\n";
271 q_dst0.
print(std::cout);
272 #endif // ARM_COMPUTE_DEBUG_ENABLED
References TensorAllocator::allocate(), Tensor::allocator(), Tensor::buffer(), arm_compute::quantization::calculate_quantized_multiplier_less_than_one(), choose_quantization_params(), NEQuantizationLayer::configure(), NEGEMMLowpOutputStage::configure(), NEGEMMLowpMatrixMultiplyCore::configure(), arm_compute::F32, arm_compute::utils::fill_random_tensor(), find_min_max(), Tensor::info(), arm_compute::test::validation::info, TensorAllocator::init(), K, M, N, UniformQuantizationInfo::offset, QuantizationInfo::offset(), ITensor::print(), arm_compute::QASYMM8, arm_compute::QUANTIZE_DOWN_FIXEDPOINT, NEQuantizationLayer::run(), NEGEMMLowpOutputStage::run(), NEGEMMLowpMatrixMultiplyCore::run(), arm_compute::S32, UniformQuantizationInfo::scale, QuantizationInfo::scale(), ITensorInfo::set_data_type(), ITensorInfo::set_num_channels(), and QuantizationInfo::uniform().
◆ quantize_values()
void configure(const ITensor *input, const ITensor *bias, ITensor *output, const GEMMLowpOutputStageInfo &info)
Initialise the kernel's inputs, output.
void run() override
Run the kernels contained in the function.
Quantization information.
void init(const TensorAllocator &allocator, const Coordinates &coords, TensorInfo &sub_info)
Shares the same backing memory with another tensor allocator, while the tensor info might be differen...
virtual ITensorInfo & set_num_channels(int num_channels)=0
Set the number of channels to the specified value.
void fill_random_tensor(TensorType &tensor, std::random_device::result_type seed, T lower_bound=std::numeric_limits< T >::lowest(), T upper_bound=std::numeric_limits< T >::max())
GEMMLowp output stage info.
void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs, output.
Status calculate_quantized_multiplier_less_than_one(float multiplier, int32_t *quant_multiplier, int32_t *right_shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier with value less than one.
Basic function to execute GEMM.
virtual ITensorInfo & set_data_type(DataType data_type)=0
Set the data type to the specified value.
TensorAllocator * allocator()
Return a pointer to the tensor's allocator.
void find_min_max(int size, const float *data, float *min, float *max)
uchar quantize_qasymm8(float input, float offset, float scale)
Quantize a floating-point scalar value to 8-bit asymmetric.
int round(float x, RoundingPolicy rounding_policy)
Return a rounded value of x.
uint8_t * buffer() const override
Interface to be implemented by the child class to return a pointer to CPU memory.
Basic function to run a quantization layer using cpu::CpuQuantize.
const std::vector< float > & scale() const
Scale vector accessor.
UniformQuantizationInfo uniform() const
Return per layer quantization info.
void run() override
Run the kernels contained in the function.
ITensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
void run() override
Run the kernels contained in the function.
void print(std::ostream &s, IOFormatInfo io_fmt=IOFormatInfo()) const
Print a tensor to a given stream using user defined formatting information.
Store the tensor's metadata.
void allocate() override
Allocate size specified by TensorInfo of CPU memory.
void configure(const ITensor *input, ITensor *output)
Set the input and output tensors.
Function to run Gemm on quantized types.
Basic function to execute GEMMLowpQuantizeDown kernels.
Basic implementation of the tensor interface.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
QuantizationInfo choose_quantization_params(float min, float max)
const QuantizationInfo qinfo
const std::vector< int32_t > & offset() const
Offset vector accessor.