Data Structures
class	CLGEMMConfigArray
	Basic container for the OpenCL GEMM configuration functions. More...

class	ClGemmDefaultConfigNativeBifrost
	Bifrost based OpenCL GEMMNative configuration. More...

class	ClGemmDefaultConfigNativeMidgard
	Midgard based OpenCL GEMMNative configuration. More...

class	ClGemmDefaultConfigNativeValhall
	Valhall based OpenCL GEMMNative configuration. More...

class	ClGemmDefaultConfigReshapedBifrost
	Bifrost based OpenCL GEMMReshaped configuration. More...

class	ClGemmDefaultConfigReshapedRhsOnlyBifrost
	Bifrost based OpenCL GEMMReshapedOnlyRHS configuration. More...

class	ClGemmDefaultConfigReshapedRhsOnlyValhall
	Valhall based OpenCL GEMMReshapedOnlyRHS configuration. More...

class	ClGemmDefaultConfigReshapedValhall
	Valhall based OpenCL GEMMReshaped configuration. More...

class	ClGemmNativeKernelConfigurationFactory
	CLGEMMNative factory class. More...

class	ClGemmReshapedKernelConfigurationFactory
	CLGEMMReshaped factory class. More...

class	ClGemmReshapedOnlyRhsKernelConfigurationFactory
	CLGEMMReshapedOnlyRHS factory class. More...

class	IClGemmKernelConfig
	Basic interface for the GEMM kernel configuration. More...

Typedefs
using	GeMMConfigsMatrix = std::vector< std::vector< int32_t > >

Functions
std::pair< GEMMLHSMatrixInfo, GEMMRHSMatrixInfo >	configure_lhs_rhs_info (unsigned int m, unsigned int n, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0, bool lhs_interleave, bool rhs_interleave, bool lhs_transpose, bool rhs_transpose, bool export_to_cl_image=false)
	Configure GEMMLHSMatrixInfo and GEMMRHSMatrixInfo. More...

std::pair< GEMMLHSMatrixInfo, GEMMRHSMatrixInfo >	select_lhs_rhs_info (std::pair< GEMMLHSMatrixInfo, GEMMRHSMatrixInfo > info_img, std::pair< GEMMLHSMatrixInfo, GEMMRHSMatrixInfo > info_buf, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
	Select GEMMLHSMatrixInfo and GEMMRHSMatrixInfo. More...

void	update_padding_for_cl_image (ITensorInfo *tensor)
	Update padding required to export the OpenCL buffer to OpenCL image2d. More...

Status	validate_image2d_support_on_rhs (const ITensorInfo &tensor_reshaped_info, const GEMMRHSMatrixInfo &rhs_info)
	Utility function to validate the image2d OpenCL object support on the RHS reshaped matrix. More...

bool	is_mmul_kernel_preferred (const unsigned int m, const unsigned int n, const unsigned int k, const unsigned int b, const DataType data_type, unsigned int &best_m0, unsigned int &best_n0)
	Determine if the MMUL kernels should be preferred. More...

std::pair< GEMMLHSMatrixInfo, GEMMRHSMatrixInfo >	find_lhs_rhs_info (const GeMMConfigsMatrix &configs, unsigned int m, unsigned int n, unsigned int k, unsigned int b)
	Find the preferred configurations for the LHS and RHS tensor using the GeMMConfigsMatrix provided by the user. More...

Typedef Documentation

◆ GeMMConfigsMatrix

using GeMMConfigsMatrix = std::vector<std::vector<int32_t> >

Definition at line 38 of file ClGemmHelpers.h.

Function Documentation

◆ configure_lhs_rhs_info()

std::pair< GEMMLHSMatrixInfo, GEMMRHSMatrixInfo > configure_lhs_rhs_info	(	unsigned int	m,
		unsigned int	n,
		unsigned int	m0,
		unsigned int	n0,
		unsigned int	k0,
		unsigned int	v0,
		unsigned int	h0,
		bool	lhs_interleave,
		bool	rhs_interleave,
		bool	lhs_transpose,
		bool	rhs_transpose,
		bool	export_to_cl_image = `false`
	)

Configure GEMMLHSMatrixInfo and GEMMRHSMatrixInfo.

Parameters

[in]	m	Number of rows (M) in the LHS matrix not reshaped
[in]	n	Number of columns (N) in the RHS matrix not reshaped
[in]	m0	Number of rows processed by each thread/work-item
[in]	n0	Number of columns processed by each thread/work-item
[in]	k0	Number of inner accumulation performed by each thread/work-item
[in]	v0	Number of vertical blocks of size (m0xk0) stored on the same output row
[in]	h0	Number of horizontal blocks of size (k0xn0) stored on the same output row
[in]	lhs_interleave	True if the v0 (m0xk0) blocks have to be interleaved in the output row
[in]	rhs_interleave	True if the h0 (k0xn0) blocks have to be interleaved in the output row
[in]	lhs_transpose	True if the (m0xk0) block has to be transposed before been stored
[in]	rhs_transpose	True if the (k0xn0) block has to be transposed before been stored
[in]	export_to_cl_image	(Optional) True if the RHS reshaped matrix has to be exported to cl_image

Returns: GEMMLHSMatrixInfo and GEMMRHSMatrixInfo

Definition at line 42 of file ClGemmHelpers.cpp.

 {
     ARM_COMPUTE_ERROR_ON(m0 == 0 || n0 == 0);
     ARM_COMPUTE_ERROR_ON(v0 == 0);
     v0 = std::max(std::min(static_cast<int>(m / m0), static_cast<int>(v0)), static_cast<int>(1));
  
     if (h0 == 0)
     {
         // When h0 is 0, we should take the maximum H0 possible
         h0 = std::max(n / n0, 1U);
     }
     else
     {
         h0 = std::max(std::min(static_cast<int>(n / n0), static_cast<int>(h0)), static_cast<int>(1));
     }
  
     const GEMMLHSMatrixInfo lhs_info(m0, k0, v0, lhs_transpose, lhs_interleave);
     const GEMMRHSMatrixInfo rhs_info(n0, k0, h0, rhs_transpose, rhs_interleave, export_to_cl_image);
  
     return std::make_pair(lhs_info, rhs_info);
 }

References ARM_COMPUTE_ERROR_ON, arm_compute::export_to_cl_image(), and arm_compute::utils::cast::U.

Referenced by find_lhs_rhs_info(), arm_compute::cl_gemm::auto_heuristics::select_mlgo_gemm_config_native(), arm_compute::cl_gemm::auto_heuristics::select_mlgo_gemm_config_reshaped(), and arm_compute::cl_gemm::auto_heuristics::select_mlgo_gemm_config_reshaped_only_rhs().

◆ find_lhs_rhs_info()

std::pair< GEMMLHSMatrixInfo, GEMMRHSMatrixInfo > find_lhs_rhs_info	(	const GeMMConfigsMatrix &	configs,
		unsigned int	m,
		unsigned int	n,
		unsigned int	k,
		unsigned int	b
	)

Find the preferred configurations for the LHS and RHS tensor using the GeMMConfigsMatrix provided by the user.

Parameters

[in]	configs	List of best configurations for a limited number of GeMM shapes
[in]	m	Number of rows of the LHS matrix
[in]	n	Number of columns of the RHS matrix
[in]	k	Number of columns of the LHS matrix, rows of the RHS matrix
[in]	b	Batch size

Returns: GEMMLHSMatrixInfo and GEMMRHSMatrixInfo

Definition at line 174 of file ClGemmHelpers.cpp.

 {
     size_t min_acc = std::numeric_limits<size_t>::max();
     size_t min_idx = 0;
  
     ARM_COMPUTE_ERROR_ON(configs.size() == 0);
     const size_t num_rows = configs.size();
     const size_t num_cols = configs[0].size();
  
     ARM_COMPUTE_ERROR_ON_MSG(num_cols != 14U, "The entry should have 14 integer values representing: M, N, K, B, M0, "
                                               "N0. K0, V0, H0, INT_LHS, INT_RHS, TRA_LHS, TRA_RHS, IMG_RHS");
     ARM_COMPUTE_UNUSED(num_cols);
  
     // Find nearest GeMM workload
     // Note: the workload does not depend on the K dimension
     for (size_t y = 0; y < num_rows; ++y)
     {
         size_t mc0 = static_cast<size_t>(configs[y][0]);
         size_t nc0 = static_cast<size_t>(configs[y][1]);
         size_t kc0 = static_cast<size_t>(configs[y][2]);
         size_t bc0 = static_cast<size_t>(configs[y][3]);
  
         size_t acc = 0;
         acc += (m - mc0) * (m - mc0);
         acc += (n - nc0) * (n - nc0);
         acc += (k - kc0) * (k - kc0);
         acc += (b - bc0) * (b - bc0);
         acc = std::sqrt(acc);
         if (acc < min_acc)
         {
             min_acc = acc;
             min_idx = y;
         }
     }
  
     // Get the configuration from the nearest GeMM shape
     const int m0     = configs[min_idx][4];
     const int n0     = configs[min_idx][5];
     const int k0     = configs[min_idx][6];
     const int v0     = configs[min_idx][7];
     const int h0     = configs[min_idx][8];
     const int i_lhs  = configs[min_idx][9];
     const int i_rhs  = configs[min_idx][10];
     const int t_lhs  = configs[min_idx][11];
     const int t_rhs  = configs[min_idx][12];
     const int im_rhs = configs[min_idx][13];
  
     return configure_lhs_rhs_info(m, n, m0, n0, k0, v0, h0, i_lhs, i_rhs, t_lhs, t_rhs, im_rhs);
 }

References ARM_COMPUTE_ERROR_ON, ARM_COMPUTE_ERROR_ON_MSG, ARM_COMPUTE_UNUSED, arm_compute::test::validation::b, configure_lhs_rhs_info(), and arm_compute::utils::cast::U.

◆ is_mmul_kernel_preferred()

bool is_mmul_kernel_preferred	(	const unsigned int	m,
		const unsigned int	n,
		const unsigned int	k,
		const unsigned int	b,
		const DataType	data_type,
		unsigned int &	best_m0,
		unsigned int &	best_n0
	)

Determine if the MMUL kernels should be preferred.

Parameters

[in]	m	Number of rows of the LHS matrix
[in]	n	Number of columns of the RHS matrix
[in]	k	Number of columns of the LHS matrix, rows of the RHS matrix
[in]	b	Batch size
[in]	data_type	Data type FP32/FP16
[in,out]	best_m0	Suggested M0 (number of rows of the output block) for the kernel
[in,out]	best_n0	Suggested N0 (number of columns of the output block) for the kernel

Returns: true if MMUL kernel is preferred over kernels w/o MMUL, false otherwise

Definition at line 151 of file ClGemmHelpers.cpp.

 {
     ARM_COMPUTE_UNUSED(n, k, b, data_type);
  
     const unsigned int mmul_k0 = 4;
     best_m0                    = 4;
     best_n0                    = 4;
  
     const unsigned int ceil_to_multiple_m_m0             = ceil_to_multiple(m, best_m0);
     const unsigned int m_div_m0                          = ceil_to_multiple_m_m0 / best_m0;
     const unsigned int ceil_to_multiple_m_div_m0_mmul_k0 = ceil_to_multiple(m_div_m0, mmul_k0);
     const unsigned int gws_y                             = ceil_to_multiple_m_div_m0_mmul_k0 / mmul_k0;
  
     return ((k % mmul_k0) == 0) && (gws_y > 4);
 }

References ARM_COMPUTE_UNUSED, arm_compute::test::validation::b, arm_compute::ceil_to_multiple(), and arm_compute::test::validation::data_type.

◆ select_lhs_rhs_info()

std::pair< GEMMLHSMatrixInfo, GEMMRHSMatrixInfo > select_lhs_rhs_info	(	std::pair< GEMMLHSMatrixInfo, GEMMRHSMatrixInfo >	info_img,
		std::pair< GEMMLHSMatrixInfo, GEMMRHSMatrixInfo >	info_buf,
		unsigned int	n,
		unsigned int	k,
		unsigned int	b,
		DataType	data_type
	)

Select GEMMLHSMatrixInfo and GEMMRHSMatrixInfo.

This function accepts two pairs of GEMMLHSMatrixInfo/GEMMRHSMatrixInfo where only the first is with cl_image2d support, and selects the valid one validating the GEMMRHSMatrixInfo. If the validation passes, the functions will return the first GEMMLHSMatrixInfo/GEMMRHSMatrixInfo pair with cl_image2d support.

Parameters

[in]	info_img	GEMMLHSMatrixInfo/GEMMRHSMatrixInfo with cl_image2d support
[in]	info_buf	GEMMLHSMatrixInfo/GEMMRHSMatrixInfo to fall-back if cl_image2d cannot be used
[in]	n	Number of columns (N) in the RHS matrix not reshaped
[in]	k	Number of rows (K) in the RHS matrix not reshaped
[in]	b	Batch size
[in]	data_type	Data type

Returns: GEMMLHSMatrixInfo and GEMMRHSMatrixInfo

Definition at line 76 of file ClGemmHelpers.cpp.

 {
     ARM_COMPUTE_ERROR_ON_MSG(info_buf.second.export_to_cl_image == true,
                              "The fallback GeMM configuration cannot have export_to_cl_image = true");
  
     const TensorInfo  tensor_rhs_info(TensorShape(n, k, b), 1, data_type);
     const TensorShape shape = misc::shape_calculator::compute_rhs_reshaped_shape(tensor_rhs_info, info_img.second);
     const TensorInfo  tensor_reshaped_info(shape, 1, data_type);
  
     if (bool(validate_image2d_support_on_rhs(tensor_reshaped_info, info_img.second)))
     {
         return info_img;
     }
     else
     {
         return info_buf;
     }
 }

References ARM_COMPUTE_ERROR_ON_MSG, arm_compute::test::validation::b, arm_compute::misc::shape_calculator::compute_rhs_reshaped_shape(), arm_compute::test::validation::data_type, arm_compute::test::validation::shape, and validate_image2d_support_on_rhs().

◆ update_padding_for_cl_image()

void update_padding_for_cl_image ( ITensorInfo * tensor )

Update padding required to export the OpenCL buffer to OpenCL image2d.

Parameters

[in,out] tensor ITensorInfo of the tensor required to be exported to OpenCL image2d

Definition at line 100 of file ClGemmHelpers.cpp.

 {
     constexpr unsigned int num_floats_per_pixel = 4;
  
     const unsigned int stride_y_in_elements = tensor->strides_in_bytes()[1] / tensor->element_size();
     const unsigned int pixel_alignment      = get_cl_image_pitch_alignment(CLKernelLibrary::get().get_device());
  
     ARM_COMPUTE_ERROR_ON_MSG(pixel_alignment == 0, "Cannot retrieve cl_image pitch alignment");
     if (pixel_alignment == 0)
     {
         return;
     }
  
     const unsigned int row_pitch_alignment = pixel_alignment * num_floats_per_pixel;
     const unsigned int round_up_width =
         ((stride_y_in_elements + row_pitch_alignment - 1) / row_pitch_alignment) * row_pitch_alignment;
     const unsigned int padding = round_up_width - stride_y_in_elements;
  
     tensor->extend_padding(PaddingSize(0, tensor->padding().right + padding, 0, 0));
 }

References ARM_COMPUTE_ERROR_ON_MSG, CLKernelLibrary::get(), arm_compute::get_cl_image_pitch_alignment(), and tensor.

Referenced by ClMatMulNativeKernel::configure(), ClIndirectConv2dKernel::configure(), ClDirectConv2dKernel::configure(), CLDepthwiseConvolutionLayerNativeKernel::configure(), GpuDepthwiseConv2d::create_op(), and GpuConv2d::create_op().

◆ validate_image2d_support_on_rhs()

Status validate_image2d_support_on_rhs	(	const ITensorInfo &	tensor_reshaped_info,
		const GEMMRHSMatrixInfo &	rhs_info
	)

Utility function to validate the image2d OpenCL object support on the RHS reshaped matrix.

Parameters

[in]	tensor_reshaped_info	TensorInfo for the RHS reshaped matrix
[in]	rhs_info	GEMMRHSMatrixInfo

Returns: Status reporting if we can use the image2d OpenCL object on the RHS reshaped matrix

Definition at line 121 of file ClGemmHelpers.cpp.

 {
     if (rhs_info.export_to_cl_image)
     {
         ARM_COMPUTE_RETURN_ERROR_ON_MSG(((rhs_info.n0 == 2) || (rhs_info.n0 == 3)) && rhs_info.transpose == false,
                                         "Export to cl_image only supported with n0 = 4, 8 or 16");
         ARM_COMPUTE_RETURN_ERROR_ON_MSG(((rhs_info.k0 == 2) || (rhs_info.k0 == 3)) && rhs_info.transpose == true,
                                         "Export to cl_image only supported with k0 = 4, 8 or 16");
         ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(&tensor_reshaped_info, DataType::F32, DataType::F16);
         ARM_COMPUTE_RETURN_ERROR_ON_MSG(
             !image2d_from_buffer_supported(CLKernelLibrary::get().get_device()),
             "The extension cl_khr_image2d_from_buffer is not supported on the target platform");
         ARM_COMPUTE_RETURN_ERROR_ON_MSG(get_cl_image_pitch_alignment(CLKernelLibrary::get().get_device()) == 0,
                                         "Impossible to retrieve the cl_image pitch alignment");
  
         // Check the width and height of the output tensor.
         // Since we cannot create a 3d image from a buffer, the third dimension is collapsed on the second dimension
         const size_t max_image_w = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_WIDTH>();
         const size_t max_image_h = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_HEIGHT>();
  
         ARM_COMPUTE_RETURN_ERROR_ON_MSG(tensor_reshaped_info.tensor_shape()[0] > max_image_w * 4,
                                         "Not supported width for cl_image");
         ARM_COMPUTE_RETURN_ERROR_ON_MSG(
             tensor_reshaped_info.tensor_shape()[1] * tensor_reshaped_info.tensor_shape()[2] > max_image_h,
             "Not supported height for cl_image");
     }
  
     return Status{};
 }

References ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN, ARM_COMPUTE_RETURN_ERROR_ON_MSG, GEMMRHSMatrixInfo::export_to_cl_image, arm_compute::F16, arm_compute::F32, CLKernelLibrary::get(), arm_compute::get_cl_image_pitch_alignment(), CLKernelLibrary::get_device(), arm_compute::image2d_from_buffer_supported(), GEMMRHSMatrixInfo::k0, GEMMRHSMatrixInfo::n0, ITensorInfo::tensor_shape(), and GEMMRHSMatrixInfo::transpose.

Referenced by select_lhs_rhs_info().

Data Structures

Typedefs

Functions

Typedef Documentation

◆ GeMMConfigsMatrix

Function Documentation

◆ configure_lhs_rhs_info()

◆ find_lhs_rhs_info()

◆ is_mmul_kernel_preferred()

◆ select_lhs_rhs_info()

◆ update_padding_for_cl_image()

◆ validate_image2d_support_on_rhs()