23.11
|
Go to the documentation of this file.
57 v0 = std::max(std::min(
static_cast<int>(m / m0),
static_cast<int>(v0)),
static_cast<int>(1));
62 h0 = std::max(n / n0, 1
U);
66 h0 = std::max(std::min(
static_cast<int>(n / n0),
static_cast<int>(h0)),
static_cast<int>(1));
72 return std::make_pair(lhs_info, rhs_info);
75 std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
77 std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> info_buf,
84 "The fallback GeMM configuration cannot have export_to_cl_image = true");
102 constexpr
unsigned int num_floats_per_pixel = 4;
104 const unsigned int stride_y_in_elements =
tensor->strides_in_bytes()[1] /
tensor->element_size();
108 if (pixel_alignment == 0)
113 const unsigned int row_pitch_alignment = pixel_alignment * num_floats_per_pixel;
114 const unsigned int round_up_width =
115 ((stride_y_in_elements + row_pitch_alignment - 1) / row_pitch_alignment) * row_pitch_alignment;
116 const unsigned int padding = round_up_width - stride_y_in_elements;
126 "Export to cl_image only supported with n0 = 4, 8 or 16");
128 "Export to cl_image only supported with k0 = 4, 8 or 16");
132 "The extension cl_khr_image2d_from_buffer is not supported on the target platform");
134 "Impossible to retrieve the cl_image pitch alignment");
142 "Not supported width for cl_image");
145 "Not supported height for cl_image");
152 const unsigned int n,
153 const unsigned int k,
154 const unsigned int b,
156 unsigned int &best_m0,
157 unsigned int &best_n0)
161 const unsigned int mmul_k0 = 4;
166 const unsigned int m_div_m0 = ceil_to_multiple_m_m0 / best_m0;
167 const unsigned int ceil_to_multiple_m_div_m0_mmul_k0 =
ceil_to_multiple(m_div_m0, mmul_k0);
168 const unsigned int gws_y = ceil_to_multiple_m_div_m0_mmul_k0 / mmul_k0;
170 return ((k % mmul_k0) == 0) && (gws_y > 4);
173 std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
176 size_t min_acc = std::numeric_limits<size_t>::max();
180 const size_t num_rows = configs.size();
181 const size_t num_cols = configs[0].size();
184 "N0. K0, V0, H0, INT_LHS, INT_RHS, TRA_LHS, TRA_RHS, IMG_RHS");
189 for (
size_t y = 0; y < num_rows; ++y)
191 size_t mc0 =
static_cast<size_t>(configs[y][0]);
192 size_t nc0 =
static_cast<size_t>(configs[y][1]);
193 size_t kc0 =
static_cast<size_t>(configs[y][2]);
194 size_t bc0 =
static_cast<size_t>(configs[y][3]);
197 acc += (m - mc0) * (m - mc0);
198 acc += (n - nc0) * (n - nc0);
199 acc += (k - kc0) * (k - kc0);
200 acc += (
b - bc0) * (
b - bc0);
201 acc = std::sqrt(acc);
210 const int m0 = configs[min_idx][4];
211 const int n0 = configs[min_idx][5];
212 const int k0 = configs[min_idx][6];
213 const int v0 = configs[min_idx][7];
214 const int h0 = configs[min_idx][8];
215 const int i_lhs = configs[min_idx][9];
216 const int i_rhs = configs[min_idx][10];
217 const int t_lhs = configs[min_idx][11];
218 const int t_rhs = configs[min_idx][12];
219 const int im_rhs = configs[min_idx][13];
221 return configure_lhs_rhs_info(m, n, m0, n0, k0, v0, h0, i_lhs, i_rhs, t_lhs, t_rhs, im_rhs);
std::pair< GEMMLHSMatrixInfo, GEMMRHSMatrixInfo > configure_lhs_rhs_info(unsigned int m, unsigned int n, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0, bool lhs_interleave, bool rhs_interleave, bool lhs_transpose, bool rhs_transpose, bool export_to_cl_image)
Configure GEMMLHSMatrixInfo and GEMMRHSMatrixInfo.
bool export_to_cl_image(const ITensorInfo *tensor)
std::vector< std::vector< int32_t > > GeMMConfigsMatrix
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
unsigned int n0
Number of columns processed by the matrix multiplication.
UniqueGemmCommon< Top, Tret > gemm(const GemmArgs &args, const OutputStage &os)
Status validate_image2d_support_on_rhs(const ITensorInfo &tensor_reshaped_info, const GEMMRHSMatrixInfo &rhs_info)
Utility function to validate the image2d OpenCL object support on the RHS reshaped matrix.
bool export_to_cl_image
True if the reshaped rhs has to be exported to cl_image.
std::pair< GEMMLHSMatrixInfo, GEMMRHSMatrixInfo > find_lhs_rhs_info(const GeMMConfigsMatrix &configs, unsigned int m, unsigned int n, unsigned int k, unsigned int b)
Find the preferred configurations for the LHS and RHS tensor using the GeMMConfigsMatrix provided by ...
bool image2d_from_buffer_supported(const cl::Device &device)
Helper function to check whether the cl_khr_image2d_from_buffer extension is supported.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(t,...)
unsigned int k0
Number of partial accumulations performed by the matrix multiplication.
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
Wrapper to configure the Khronos OpenCL C++ header.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
CLTensor * tensor
Pointer to the auxiliary tensor.
void update_padding_for_cl_image(ITensorInfo *tensor)
Update padding required to export the OpenCL buffer to OpenCL image2d.
auto ceil_to_multiple(S value, T divisor) -> decltype(((value+divisor - 1)/divisor) *divisor)
Computes the smallest number larger or equal to value that is a multiple of divisor.
TensorShape compute_rhs_reshaped_shape(const ITensorInfo &a, const GEMMRHSMatrixInfo &rhs_info)
Calculate the Right Hand Side matrix reshaped shape.
GEMM LHS (Left Hand Side) matrix information.
Store the tensor's metadata.
BorderSize PaddingSize
Container for 2D padding size.
bool transpose
True if the (k0xn0) block has to be transposed before been stored.
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Copyright (c) 2017-2023 Arm Limited.
@ F16
16-bit floating-point number
size_t get_cl_image_pitch_alignment(const cl::Device &device)
Helper function to get the cl_image pitch alignment in pixels.
std::pair< GEMMLHSMatrixInfo, GEMMRHSMatrixInfo > select_lhs_rhs_info(std::pair< GEMMLHSMatrixInfo, GEMMRHSMatrixInfo > info_img, std::pair< GEMMLHSMatrixInfo, GEMMRHSMatrixInfo > info_buf, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
Select GEMMLHSMatrixInfo and GEMMRHSMatrixInfo.
Store the tensor's metadata.
@ F32
32-bit floating-point number
GEMM RHS (Right Hand Side) matrix information.
bool is_mmul_kernel_preferred(const unsigned int m, const unsigned int n, const unsigned int k, const unsigned int b, const DataType data_type, unsigned int &best_m0, unsigned int &best_n0)
Determine if the MMUL kernels should be preferred.
DataType
Available data types.
const cl::Device & get_device()
Gets the CL device for which the programs are created.