24.02.1
|
Go to the documentation of this file.
24 #ifndef ARM_COMPUTE_CL
25 #error "This example needs to be built with -DARM_COMPUTE_CL"
33 #include "utils/Utils.h"
38 using namespace utils;
40 class CLSGEMMExample :
public Example
43 bool do_setup(
int argc,
char **argv)
override
56 stream.open(argv[1], std::fstream::in);
59 if (argc < 3 || (argc < 4 && stream.bad()))
62 std::cout <<
"Usage: 1) ./build/cl_sgemm input_matrix_1.npy input_matrix_2.npy [input_matrix_3.npy] [alpha "
64 std::cout <<
" 2) ./build/cl_sgemm M N K [alpha = 1.0f] [beta = 0.0f]\n\n";
65 std::cout <<
"Too few or no input_matrices provided. Using M=7, N=3, K=5, alpha=1.0f and beta=0.0f\n\n";
84 stream.open(argv[3], std::fstream::in);
93 alpha = strtof(argv[4],
nullptr);
98 beta = strtof(argv[5],
nullptr);
104 alpha = strtof(argv[3],
nullptr);
108 beta = strtof(argv[4],
nullptr);
115 size_t M = strtol(argv[1],
nullptr, 10);
116 size_t N = strtol(argv[2],
nullptr, 10);
117 size_t K = strtol(argv[3],
nullptr, 10);
125 alpha = strtof(argv[4],
nullptr);
129 beta = strtof(argv[5],
nullptr);
138 sgemm.configure(&src0, &src1, (src2.info()->total_size() > 0) ? &src2 :
nullptr, &
dst, alpha, beta);
141 src0.allocator()->allocate();
142 src1.allocator()->allocate();
143 dst.allocator()->allocate();
151 output_filename =
"sgemm_out.npy";
156 src2.allocator()->allocate();
162 src2.allocator()->allocate();
174 void do_run()
override
182 void do_teardown()
override
184 if (!output_filename.empty())
197 float alpha{}, beta{};
199 std::string output_filename{};
207 int main(
int argc,
char **argv)
209 return utils::run_example<CLSGEMMExample>(argc, argv);
void init_sgemm_output(T &dst, T &src0, T &src1, arm_compute::DataType dt)
bool is_open()
Return true if a NPY file is currently open.
void fill_random_tensor(TensorType &tensor, std::random_device::result_type seed, T lower_bound=std::numeric_limits< T >::lowest(), T upper_bound=std::numeric_limits< T >::max())
void sync()
Blocks until all commands in the associated command queue have finished.
void fill_tensor(T &tensor)
Fill a tensor with the content of the currently open NPY file.
Basic implementation of the OpenCL tensor interface.
bool is_fortran()
Return true if a NPY file is in fortran order.
void init_tensor(T &tensor, arm_compute::DataType dt)
Initialise the tensor's metadata with the dimensions of the NPY file currently open.
void save_to_npy(T &tensor, const std::string &npy_filename, bool fortran_order)
Template helper function to save a tensor image to a NPY file.
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
Basic function to execute GEMM on OpenCL.
static CLScheduler & get()
Access the scheduler singleton.
void open(const std::string &npy_filename, DataLayout file_layout=DataLayout::NCHW)
Open a NPY file and reads its metadata.
Store the tensor's metadata.
void default_init(ICLTuner *cl_tuner=nullptr, CLGEMMHeuristicsHandle *gemm_h=nullptr, CLBackendType cl_backend_type=CLBackendType::Native)
Initialises the context and command queue used by the scheduler to default values and sets a default ...
Copyright (c) 2017-2024 Arm Limited.
int main(int argc, char **argv)
Main program for sgemm test.
@ F32
32-bit floating-point number
Basic implementation of the OpenCL tuner interface.