24.02.1
|
Go to the documentation of this file.
50 using ElementsProcessed = Steps;
53 const ITensorInfo *src1,
54 const ITensorInfo *
dst,
55 const GEMMLHSMatrixInfo &lhs_info,
56 const GEMMRHSMatrixInfo &rhs_info,
57 const GEMMReshapeInfo &gemm_info)
71 "The number of dimensions for the LHS matrix must be <= 4");
73 "The number of dimensions for the RHS matrix must be <= 3");
76 "Only 2,3,4,8,16 are supported for k0");
80 "Only 2,3,4,8,16 are supported for n0");
83 const int m = gemm_info.m();
84 const int n = gemm_info.n();
85 const int k = gemm_info.k();
94 if (gemm_info.reinterpret_input_as_3d())
103 if (
dst->total_size() != 0)
105 const TensorInfo tensor_info_dst =
117 const GEMMLHSMatrixInfo &lhs_info,
118 const GEMMRHSMatrixInfo &rhs_info,
119 const GEMMReshapeInfo &gemm_info,
120 ElementsProcessed &num_elements_processed)
122 unsigned int &num_elems_processed_per_iteration_x = num_elements_processed[0];
123 unsigned int &num_elems_processed_per_iteration_y = num_elements_processed[1];
124 bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
125 bool reinterpret_dst_as_3d = (gemm_info.depth_output_gemm3d() != 0);
128 bool window_changed =
false;
132 if (reinterpret_input_as_3d == reinterpret_dst_as_3d)
134 reinterpret_dst_as_3d =
false;
142 TensorInfo tmp_info(*
dst);
144 if (reinterpret_dst_as_3d)
148 TensorShape tmp_shape(
dst->tensor_shape());
149 tmp_shape.collapse(2U, 1U);
150 tmp_info.set_tensor_shape(tmp_shape);
154 num_elems_processed_per_iteration_x = rhs_info.n0;
155 num_elems_processed_per_iteration_y = lhs_info.m0;
158 calculate_max_window(tmp_info, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y));
161 AccessWindowStatic src1_access(
162 src1, 0, 0,
ceil_to_multiple(src1->dimension(0), num_elems_processed_per_iteration_x), src1->dimension(1));
168 Window collapsed = win;
169 const unsigned int dimension_to_collapse = std::min(
static_cast<unsigned int>(
dst->num_dimensions()), 2u);
170 collapsed = win.collapse(win, dimension_to_collapse);
174 return std::make_pair(err, collapsed);
204 if (_reinterpret_input_as_3d == _reinterpret_output_as_3d)
206 _reinterpret_input_as_3d =
false;
207 _reinterpret_output_as_3d =
false;
212 _slide_matrix_b = (src1->
num_dimensions() >= num_dimensions_src0);
214 ElementsProcessed num_elements_processed{};
220 ICLKernel::configure_internal(win_config.second);
225 const unsigned int internal_m = _reinterpret_output_as_3d ? gemm_info.
m() :
dst->dimension(1);
227 const unsigned int partial_store_m0 = internal_m % lhs_info.
m0;
228 const unsigned int partial_store_n0 = gemm_info.
n() % rhs_info.
n0;
232 const unsigned int internal_m0 = std::min(internal_m, lhs_info.
m0);
236 build_opts.
add_option_if(_reinterpret_input_as_3d,
"-DREINTERPRET_INPUT_AS_3D");
237 build_opts.
add_option_if(_reinterpret_output_as_3d,
"-DREINTERPRET_OUTPUT_AS_3D");
238 build_opts.
add_option_if(_reinterpret_input_as_3d || _reinterpret_output_as_3d,
240 build_opts.
add_option_if(_reinterpret_input_as_3d || _reinterpret_output_as_3d,
243 build_opts.
add_option_if(_use_dummy_work_items,
"-DDUMMY_WORK_ITEMS");
267 _config_id += (_reinterpret_input_as_3d ?
"3di_" :
"");
268 _config_id += (_reinterpret_output_as_3d ?
"3do_" :
"");
293 ElementsProcessed num_elements_processed{};
296 dst->clone().get(), lhs_info, rhs_info, gemm_info,
297 num_elements_processed)
314 if (src1->info()->num_dimensions() < 3)
326 if (_reinterpret_input_as_3d)
330 const unsigned int total_cross_plane_pad = src0->info()->padding().top + src0->info()->padding().bottom;
331 _kernel.setArg<cl_uint>(idx0,
static_cast<unsigned int>(total_cross_plane_pad));
334 if (_reinterpret_output_as_3d)
338 const unsigned int total_cross_plane_pad =
dst->info()->padding().top +
dst->info()->padding().bottom;
339 _kernel.setArg<cl_uint>(idx0,
static_cast<unsigned int>(total_cross_plane_pad));
347 if (!_slide_matrix_b)
349 slice_b = slice_matrix_b;
352 unsigned int idx = 0;
356 _kernel.setArg<cl_uint>(idx++,
static_cast<unsigned int>(src0->info()->strides_in_bytes()[2]));
357 _kernel.setArg<cl_uint>(idx++,
static_cast<unsigned int>(src1->info()->strides_in_bytes()[2]));
358 _kernel.setArg<cl_uint>(idx++,
static_cast<unsigned int>(
dst->info()->strides_in_bytes()[2]));
std::string to_string(T &&value)
Convert integer and float values to string.
@ QSYMM8_PER_CHANNEL
quantized, symmetric per channel fixed-point 8-bit number
void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue.
bool dot8_supported(const cl::Device &device)
Helper function to check whether the cl_arm_integer_dot_product_int8 extension is supported.
unsigned int n0
Number of columns processed by the matrix multiplication.
const StringSet & options() const
Gets the current options list set.
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
bool preferred_dummy_work_items_support(const cl::Device &device)
Helper function to check if "dummy work-items" are preferred to have a power of two NDRange In case d...
TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info)
Calculate the matrix multiplication output shape of two tensors.
GEMM reshape information class.
std::string get_cl_dot8_acc_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL dot8 accumulator type.
int k() const
Number of matrix A columns or matrix B rows.
int n() const
Number of matrix B columns.
@ QASYMM8
quantized, asymmetric fixed-point 8-bit number unsigned
Status validate_arguments(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const PadStrideInfo &conv_info)
constexpr static unsigned int num_arguments_per_2D_tensor()
Returns the number of arguments enqueued per 2D tensor object.
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
@ QSYMM8
quantized, symmetric fixed-point 8-bit number
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
bool update_window_and_padding(Window &win, Ts &&...patterns)
Update window and padding size for each of the access patterns.
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
std::string upper_string(const std::string &val)
Raise a given string to upper case.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context.
bool reinterpret_input_as_3d() const
Flag which specifies if the input tensor has to be reinterpreted as 3D.
int m() const
Number of matrix A rows.
void add_2D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
@ RUNTIME_ERROR
Generic runtime error.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
void add_option(std::string option)
Adds option to the existing build option list.
unsigned int k0
Number of partial accumulations performed by the matrix multiplication.
unsigned int m0
Number of rows processed by the matrix multiplication.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set< std::string > &build_opts=std::set< std::string >())
Creates an opencl kernel using a compile context.
@ QASYMM8_SIGNED
quantized, asymmetric fixed-point 8-bit number signed
void add_option_if(bool cond, std::string option)
Adds option if a given condition is true;.
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
virtual DataType data_type() const =0
Data type used for each element of the tensor.
bool slide_window_slice_3D(Window &slice) const
Slide the passed 3D window slice.
std::pair< Status, Window > validate_and_configure_window(ITensorInfo *src, ITensorInfo *dst)
Wrapper to configure the Khronos OpenCL C++ header.
ClGemmLowpMatrixMultiplyNativeKernel()
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Describe one of the image's dimensions with a start, end and step.
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
auto ceil_to_multiple(S value, T divisor) -> decltype(((value+divisor - 1)/divisor) *divisor)
Computes the smallest number larger or equal to value that is a multiple of divisor.
Window first_slice_window_3D() const
First 3D slice of the window.
const Window & window() const
The maximum window the kernel can be executed on.
std::string get_cl_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL type.
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
GEMM LHS (Left Hand Side) matrix information.
Describe a multidimensional execution window.
int depth_output_gemm3d() const
Depth (third dimension) of the output tensor to be used with the GEMM3D kernel.
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Copyright (c) 2017-2024 Arm Limited.
bool has_padding_changed(const std::unordered_map< const ITensorInfo *, PaddingSize > &padding_map)
Check if the previously stored padding info has changed after configuring a kernel.
@ S32
signed 32-bit number
cl::NDRange lws_hint() const
Return the Local-Workgroup-Size hint.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Store the tensor's metadata.
unsigned int k0
Number of partial accumulations performed by the matrix multiplication.
#define ARM_COMPUTE_CREATE_ERROR(error_code, msg)
Creates an error with a given message.
static Status validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMReshapeInfo &gemm_info)
Static function to check if given info will lead to a valid configuration.
GEMM RHS (Right Hand Side) matrix information.
@ GEMM
GEMM CL kernel type.
std::unordered_map< const ITensorInfo *, PaddingSize > get_padding_info(std::initializer_list< const ITensorInfo * > infos)
Stores padding information before configuring a kernel.
SimpleTensor< T > slice(const SimpleTensor< T > &src, Coordinates starts, Coordinates ends)
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
void configure(const CLCompileContext &compile_context, const ITensorInfo *src0, ITensorInfo *src1, ITensorInfo *dst, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMReshapeInfo &gemm_info)
Initialise the kernel's input and dst.
void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint=CLKernelLibrary::get().default_ndrange(), bool use_dummy_work_items=false)
Add the kernel to the command queue with the given window.