24 #ifndef ARM_COMPUTE_CL 25 #error "This example needs to be built with -DARM_COMPUTE_CL" 35 #include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" 37 #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" 38 #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" 39 #include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h" 40 #include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h" 41 #include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h" 42 #include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" 43 #include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h" 44 #include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h" 45 #include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" 46 #include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" 47 #include "src/core/CL/kernels/CLIm2ColKernel.h" 48 #include "src/core/CL/kernels/CLWeightsReshapeKernel.h" 59 #include "utils/Utils.h" 68 using namespace utils;
80 class GEMMCommandLineOptions final
83 explicit GEMMCommandLineOptions(CommandLineParser &
parser) noexcept
84 :
help(parser.add_option<ToggleOption>(
"help")),
85 add_bias(parser.add_option<ToggleOption>(
"add_bias")),
86 M(parser.add_option<SimpleOption<int>>(
"m", 7)),
87 N(parser.add_option<SimpleOption<int>>(
"n", 3)),
88 K(parser.add_option<SimpleOption<int>>(
"k", 5)),
89 B(parser.add_option<SimpleOption<int>>(
"b", 1)),
90 alpha(parser.add_option<SimpleOption<float>>(
"alpha", 1.f)),
91 beta(parser.add_option<SimpleOption<float>>(
"beta", 0.f)),
92 offset_src0(parser.add_option<SimpleOption<int>>(
"offset_i0", 10)),
93 offset_src1(parser.add_option<SimpleOption<int>>(
"offset_i1", 10)),
94 offset_dst(parser.add_option<SimpleOption<int>>(
"offset_o", 10)),
95 scale_src0(parser.add_option<SimpleOption<float>>(
"scale_i0", 1.f / 255)),
96 scale_src1(parser.add_option<SimpleOption<float>>(
"scale_i1", 1.f / 255)),
97 scale_dst(parser.add_option<SimpleOption<float>>(
"scale_o", 1.f / 255)),
101 const std::set<arm_compute::DataType> supported_data_types
110 help->set_help(
"Show this help message");
111 add_bias->set_help(
"Add bias to the GEMM. Used when running in QASYMM8");
112 M->set_help(
"M value");
113 N->set_help(
"N value");
114 K->set_help(
"K value");
115 B->set_help(
"B value - number of batches");
116 alpha->set_help(
"Alpha value");
117 beta->set_help(
"Beta value");
118 offset_src0->set_help(
"Offset of first input. Used when running in QASYMM8");
119 offset_src1->set_help(
"Offset of second input. Used when running in QASYMM8");
120 offset_dst->set_help(
"Offset of output. Used when running in QASYMM8");
121 scale_src0->set_help(
"Scale of first input. Used when running in QASYMM8");
122 scale_src1->set_help(
"Scale of second input. Used when running in QASYMM8");
123 scale_dst->set_help(
"Scale of output. Used when running in QASYMM8");
127 GEMMCommandLineOptions(
const GEMMCommandLineOptions &) =
delete;
129 GEMMCommandLineOptions &operator=(
const GEMMCommandLineOptions &) =
delete;
131 GEMMCommandLineOptions(GEMMCommandLineOptions &&) noexcept(
true) =
default;
133 GEMMCommandLineOptions &operator=(GEMMCommandLineOptions &&) noexcept(
true) =
default;
135 ~GEMMCommandLineOptions() =
default;
139 ToggleOption *add_bias;
140 SimpleOption<int> *
M;
141 SimpleOption<int> *
N;
142 SimpleOption<int> *
K;
143 SimpleOption<int> *
B;
144 SimpleOption<float> *alpha;
145 SimpleOption<float> *beta;
146 SimpleOption<int> *offset_src0;
147 SimpleOption<int> *offset_src1;
148 SimpleOption<int> *offset_dst;
149 SimpleOption<float> *scale_src0;
150 SimpleOption<float> *scale_src1;
151 SimpleOption<float> *scale_dst;
152 EnumOption<arm_compute::DataType> *
data_type;
156 class CLGEMMValidateExample :
public ValidateExample
159 bool do_setup(
int argc,
char **argv)
override 165 GEMMCommandLineOptions gemm_options(parser);
166 parser.parse(argc, argv);
169 const bool print_help = gemm_options.help->is_set() ? gemm_options.help->value() :
false;
172 parser.print_help(argv[0]);
177 consume_params(gemm_options);
178 print_parameters_internal();
185 float multiplier = scale_src0 * scale_src1 / scale_dst;
198 src0.info()->set_quantization_info(
QuantizationInfo(scale_src0, offset_src0));
199 src1.info()->set_quantization_info(
QuantizationInfo(scale_src1, offset_src1));
205 mm_gemmlowp.configure(&src0, &src1,
nullptr, &tmp_dst);
208 mm_gemmlowp_output_stage.configure(&tmp_dst, add_bias ? &biases :
nullptr, &
dst, dst_multiplier, dst_shift, offset_dst);
209 tmp_dst.allocator()->allocate();
210 biases.allocator()->allocate();
216 mm_gemm.configure(&src0, &src1, &src2, &
dst, alpha, beta);
220 src0.allocator()->allocate();
221 src1.allocator()->allocate();
222 dst.allocator()->allocate();
223 src2.allocator()->allocate();
232 void print_parameters_internal()
256 void do_validate()
override 270 SimpleTensor<half> ref_dst = reference::gemm<half>(ref_src0, ref_src1, ref_src2, alpha, beta);
284 SimpleTensor<float> ref_dst = reference::gemm<float>(ref_src0, ref_src1, ref_src2, alpha, beta);
300 const std::vector<int32_t> dst_multiplier_vec = { dst_multiplier };
301 const std::vector<int32_t> dst_shift_vec = { dst_shift };
308 ref_dst = reference::gemmlowp_quantize_down_scale_by_fixedpoint<int32_t, uint8_t>(ref_tmp_dst, biases, dst_multiplier_vec, dst_shift_vec, offset_dst);
312 ref_dst = reference::gemmlowp_quantize_down_scale_by_fixedpoint<int32_t, uint8_t>(ref_tmp_dst, dst_multiplier_vec, dst_shift_vec, offset_dst);
321 void do_run()
override 329 mm_gemmlowp_output_stage.run();
342 template <
typename U>
343 void fill(
U &&tensor,
int i)
345 switch(tensor.data_type())
355 std::uniform_real_distribution<float>
distribution(-1.0f, 1.0f);
356 library->fill(tensor, distribution, i);
362 std::uniform_int_distribution<>
distribution(-6000, 6000);
363 library->fill(tensor, distribution, i);
367 library->fill_tensor_uniform(tensor, i);
371 void consume_params(
const GEMMCommandLineOptions &opts)
381 alpha = opts.alpha->value();
382 beta = opts.beta->value();
383 offset_src0 = opts.offset_src0->value();
384 offset_src1 = opts.offset_src1->value();
385 offset_dst = opts.offset_dst->value();
386 scale_src0 = opts.scale_src0->value();
387 scale_src1 = opts.scale_src1->value();
388 scale_dst = opts.scale_dst->value();
389 add_bias = opts.add_bias->is_set() ? opts.add_bias->value() :
true;
398 CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint mm_gemmlowp_output_stage{};
400 size_t M{ 7 },
N{ 3 },
K{ 5 },
B{ 1 };
402 float alpha{ 1.0 }, beta{ 0.0 };
403 int offset_src0{ 10 }, offset_src1{ 10 }, offset_dst{ 10 };
404 float scale_src0{ 1.0f / 255 }, scale_src1{ 1.0f / 255 }, scale_dst{ 1.0f / 255 };
405 int32_t dst_multiplier{ 0 }, dst_shift{ 0 };
406 bool add_bias{
true };
415 int main(
int argc,
char **argv)
417 return utils::run_example<CLGEMMValidateExample>(argc, argv);
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
RelativeTolerance< float > tolerance_f32(0.001f)
F32 Tolerance value for comparing reference's output against implementation's output for floating poi...
constexpr float tolerance_num_f16
F16 Tolerance number.
static CLScheduler & get()
Access the scheduler singleton.
std::string to_string(T &&value)
Convert integer and float values to string.
void default_init(ICLTuner *cl_tuner=nullptr, CLGEMMHeuristicsHandle *gemm_h=nullptr, CLBackendType cl_backend_type=CLBackendType::Native)
Initialises the context and command queue used by the scheduler to default values and sets a default ...
int main(int argc, char **argv)
Main program for gemm test.
half_float::half half
16-bit floating point type
1 channel, 1 F32 per channel
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.
Copyright (c) 2017-2021 Arm Limited.
1 channel, 1 F16 per channel
1 channel, 1 S32 per channel
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
Quantization information.
const std::string & string_from_data_type(DataType dt)
Convert a data type identity into a string.
library fill(src, distribution, 0)
std::unique_ptr< AssetsLibrary > library
Basic function to execute GEMM on OpenCL.
quantized, asymmetric fixed-point 8-bit number unsigned
Accessor implementation for CLTensor objects.
std::uniform_real_distribution< float > distribution(-5.f, 5.f)
void sync()
Blocks until all commands in the associated command queue have finished.
Simple tensor object that stores elements in a consecutive chunk of memory.
Class reprensenting a relative tolerance value.
Store the tensor's metadata.
Basic function to execute GEMMLowpMatrixMultiplyCore on OpenCL.
RelativeTolerance< half_float::half > tolerance_f16(half(0.2))
F16 Tolerance value for comparing reference's output against implementation's output for floating poi...
DataType
Available data types.
void init_sgemm_output(T &dst, T &src0, T &src1, arm_compute::DataType dt)
constexpr float abs_tolerance_f32(0.0001f)
F32 Absolute tolerance value for comparing reference's output against implementation's output for flo...
Status validate(const ITensorInfo *scores_in, const ITensorInfo *boxes_in, const ITensorInfo *batch_splits_in, const ITensorInfo *scores_out, const ITensorInfo *boxes_out, const ITensorInfo *classes, const ITensorInfo *batch_splits_out, const ITensorInfo *keeps, const ITensorInfo *keeps_size, const BoxNMSLimitInfo info)
Basic implementation of the OpenCL tensor interface.