49 Status validate_arguments(
const ITensorInfo *
src,
const ITensorInfo *
dst,
const GEMMLHSMatrixInfo &lhs_info,
bool reinterpret_input_as_3d)
62 if(dst->total_size() != 0)
73 std::pair<Status, Window> validate_and_configure_window(ITensorInfo *src, ITensorInfo *dst,
const GEMMLHSMatrixInfo &lhs_info,
bool reinterpret_input_as_3d)
75 const unsigned int num_elems_processed_per_iteration_x = lhs_info.k0;
76 const unsigned int num_elems_processed_per_iteration_y = lhs_info.m0;
77 bool window_changed =
false;
79 TensorInfo tmp_info(*src);
81 if(reinterpret_input_as_3d)
85 TensorShape tmp_shape(src->tensor_shape());
86 tmp_shape.collapse(2U, 1U);
87 tmp_info.set_tensor_shape(tmp_shape);
94 Window win =
calculate_max_window(tmp_info, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y));
95 Window win_in =
calculate_max_window(*src, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y));
97 AccessWindowStatic src_access(src, 0, 0,
100 AccessWindowStatic dst_access(dst, 0, 0, dst->dimension(0), dst->dimension(1));
110 return std::make_pair(err, collapsed);
128 _reinterpret_input_as_3d = reinterpret_input_as_3d;
130 const unsigned int src_w = src->
dimension(0);
132 const unsigned int partial_load_m0 = src_h % lhs_info.
m0;
133 const unsigned int partial_load_k0 = src_w % lhs_info.
k0;
143 build_opts.
add_option_if(_reinterpret_input_as_3d,
"-DREINTERPRET_INPUT_AS_3D");
150 std::string
kernel_name(
"gemm_reshape_lhs_matrix_");
151 kernel_name += lhs_info.
transpose ?
"t" :
"nt";
157 auto win_config = validate_and_configure_window(src, dst, lhs_info, reinterpret_input_as_3d);
159 ICLKernel::configure_internal(win_config.second);
162 _config_id =
"gemm_reshape_lhs_matrix_";
163 _config_id += (_reinterpret_input_as_3d ?
"3d_" :
"");
205 if(_reinterpret_input_as_3d)
209 const unsigned int total_cross_plane_pad = src->info()->padding().top + src->info()->padding().bottom;
210 _kernel.setArg<cl_uint>(idx0,
static_cast<unsigned int>(total_cross_plane_pad));
215 unsigned int idx = 0;
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
#define ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(tensor)
const Window & window() const
The maximum window the kernel can be executed on.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(...)
void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint=CLKernelLibrary::get().default_ndrange(), bool use_dummy_work_items=false)
Add the kernel to the command queue with the given window.
const StringSet & options() const
Gets the current options list set.
unsigned int v0
Number of vertical blocks of size (m0xk0) stored on the same output row.
cl::NDRange lws_hint() const
Return the Local-Workgroup-Size hint.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
std::string to_string(T &&value)
Convert integer and float values to string.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
GEMM LHS (Left Hand Side) matrix information.
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context...
std::string lower_string(const std::string &val)
Lower a given string.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(...)
void add_3D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx...
bool interleave
True if the v0 (m0xk0) blocks have to be interleaved in the output row.
SimpleTensor< float > src
Copyright (c) 2017-2021 Arm Limited.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
void add_option(std::string option)
Adds option to the existing build option list.
bool transpose
True if the (m0xk0) block has to be transposed before been stored.
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set< std::string > &build_opts=std::set< std::string >())
Creates an opencl kernel using a compile context.
const std::string & string_from_data_type(DataType dt)
Convert a data type identity into a string.
bool update_window_and_padding(Window &win, Ts &&... patterns)
Update window and padding size for each of the access patterns.
static constexpr unsigned int num_arguments_per_3D_tensor()
Returns the number of arguments enqueued per 3D tensor object.
TensorShape compute_lhs_reshaped_shape(const ITensorInfo &a, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d=false)
Calculate the Left Hand Side matrix reshaped shape.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_src_as_3d)
Static function to check if given info will lead to a valid configuration.
void add_option_if(bool cond, std::string option)
Adds option if a given condition is true;.
virtual size_t element_size() const =0
Element size in bytes calculated as data_size() * num_channels()
Elementeise CL kernel type.
bool slide_window_slice_3D(Window &slice) const
Slide the passed 3D window slice.
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
bool has_padding_changed(const std::unordered_map< const ITensorInfo *, PaddingSize > &padding_map)
Check if the previously stored padding info has changed after configuring a kernel.
void configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_src_as_3d=false)
Initialise the kernel's input and output.
void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue...
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
#define ARM_COMPUTE_CREATE_ERROR(error_code, msg)
Creates an error with a given message.
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
std::string get_cl_unsigned_type_from_element_size(size_t element_size)
Translates the element size to an unsigned integer data type.
std::unordered_map< const ITensorInfo *, PaddingSize > get_padding_info(std::initializer_list< const ITensorInfo *> infos)
Stores padding information before configuring a kernel.
Wrapper to configure the Khronos OpenCL C++ header.
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
unsigned int k0
Number of partial accumulations performed by the matrix multiplication.
ClGemmReshapeLhsMatrixKernel()
unsigned int m0
Number of rows processed by the matrix multiplication.
Window first_slice_window_3D() const
First 3D slice of the window.
Describe a multidimensional execution window.
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
SimpleTensor< T > slice(const SimpleTensor< T > &src, Coordinates starts, Coordinates ends)