24.02.1
|
Go to the documentation of this file.
37 namespace experimental
39 namespace dynamic_fusion
56 return "elementwise_binary";
68 //------------------ START KERNEL {{meta_kernel_id}} {{ELTWISE_OP}} ---------------------
75 TILE(uint, M0, 1, g_dst_indirect_y);
83 TILE({{DATA_TYPE}}, {{lhs_m0}}, N0, {{lhs}});
91 TILE({{DATA_TYPE}}, {{rhs_m0}}, N0, {{rhs}});
104 {{lhs}}_offset_first_element_in_bytes += g_ind_2 * {{lhs}}_stride_w;
105 T_LOAD({{DATA_TYPE}}, {{lhs_m0}}, {{lhs_n0}}, BUFFER, {{lhs}}, {{lhs_start_ind_0}}, {{lhs_start_ind_1}}, 1, {{lhs}}_stride_y, {{lhs}});
113 {{rhs}}_offset_first_element_in_bytes += g_ind_2 * {{rhs}}_stride_w;
114 T_LOAD({{DATA_TYPE}}, {{rhs_m0}}, {{rhs_n0}}, BUFFER, {{rhs}}, {{rhs_start_ind_0}}, {{rhs_start_ind_1}}, 1, {{rhs}}_stride_y, {{rhs}});
120 T_ELTWISE_{{BROADCAST_OP}}{{ELTWISE_OP}}({{DATA_TYPE}}, M0, N0, {{lhs}}, {{rhs}}, {{dst}});
128 LOOP_UNROLLING(int, i, 0, 1, M0,
130 g_dst_indirect_y[i].v = (uint)min(g_ind_1 + i, (int)({{arg_dst}}_w * {{arg_dst}}_h) - 1);
131 g_dst_indirect_y[i].v += g_ind_2 * (int)({{arg_dst}}_w * {{arg_dst}}_h);
139 //------------------ END KERNEL {{meta_kernel_id}} {{ELTWISE_OP}} ---------------------
146 const ComponentGroup &comp_group)
const
156 const ComponentGroup &comp_group)
const
161 lut[
"meta_kernel_id"] =
id();
168 lut[
"arg_dst"] = vtable.
get_variable(comp_group.get_any_dst_tensor());
173 lut[
"ELTWISE_OP"] =
"ADD";
176 lut[
"ELTWISE_OP"] =
"SUB";
179 lut[
"ELTWISE_OP"] =
"MUL";
196 const auto lhs_broadcast_x = dst_dims[0] != 1 && lhs_dims[0] == 1;
197 const auto rhs_broadcast_x = dst_dims[0] != 1 && rhs_dims[0] == 1;
198 const auto lhs_broadcast_y = dst_dims[1] != 1 && lhs_dims[1] == 1;
199 const auto rhs_broadcast_y = dst_dims[1] != 1 && rhs_dims[1] == 1;
200 const auto lhs_broadcast_z = dst_dims[2] != 1 && lhs_dims[2] == 1;
201 const auto rhs_broadcast_z = dst_dims[2] != 1 && rhs_dims[2] == 1;
203 const auto lhs_broadcast_yz = lhs_broadcast_y && lhs_broadcast_z;
204 const auto rhs_broadcast_yz = rhs_broadcast_y && rhs_broadcast_z;
206 lut[
"lhs_n0"] = (lhs_broadcast_x) ?
"1" :
"N0";
207 lut[
"lhs_start_ind_0"] = (lhs_broadcast_x) ?
"0" :
"g_ind_0";
208 lut[
"rhs_n0"] = (rhs_broadcast_x) ?
"1" :
"N0";
209 lut[
"rhs_start_ind_0"] = (rhs_broadcast_x) ?
"0" :
"g_ind_0";
211 lut[
"lhs_m0"] = (lhs_broadcast_yz) ?
"1" :
"M0";
212 lut[
"lhs_start_ind_1"] = (lhs_broadcast_yz) ?
"0" :
"g_ind_1";
213 lut[
"rhs_m0"] = (rhs_broadcast_yz) ?
"1" :
"M0";
214 lut[
"rhs_start_ind_1"] = (rhs_broadcast_yz) ?
"0" :
"g_ind_1";
216 lut[
"BROADCAST_OP"] = (lhs_broadcast_yz) ?
"BROADCAST_LHS_X_" : (rhs_broadcast_yz) ?
"BROADCAST_RHS_X_" :
"";
225 const auto root_window = comp_group.get_root_component()->template_writer()->get_window();
226 const unsigned int n0 = root_window.x().step();
227 const unsigned int m0 = root_window.y().step();
228 const unsigned int partial_store_n0 = _dst->
dimension(0) % n0;
240 std::string config_id{};
254 return std::set<std::string>{
"helpers.h",
"tile_helpers.h"};
std::string to_string(T &&value)
Convert integer and float values to string.
std::string get_component_code(const ComponentGroup &comp_group) const override
Generate kernel component code template.
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.
constexpr unsigned int vector_size_byte_opencl
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
static constexpr GpuKernelArgumentInfo::Type common_tensor_type
For now all kernel intermeditate/destination tensors are expected to be of type Tensor_4D_t_Buffer.
std::unordered_map< Tag, TagVal > TagLUT
Tag lookup table.
std::string lower_string(const std::string &val)
Lower a given string.
ComponentPtr get_root_component() const
Get the root (first) component of this group.
TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override
Generate the tag look-up table used to instantiate the component code.
virtual size_t element_size() const =0
Element size in bytes calculated as data_size() * num_channels()
ClTemplateElementwiseBinary(ComponentId id, const ArgumentPack< ITensorInfo > &tensors, const Attributes &attributes)
Constructor.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Window get_window() const override
Generate the execution window for the component.
ElementwiseBinaryCommonAttributes & operation(const ElementwiseBinaryCommonAttributes::ElementwiseOp &operation)
Set operation.
const std::string & string_from_data_type(DataType dt)
Convert a data type identity into a string.
An interface used by ClTemplateWriter to write source code for a kernel component.
This is a generic class that packs the arguments of an operator.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
ArgumentPack< ITensorInfo > tensors() const
Get tensor arguments.
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
size_t total_size() const
Collapses all dimensions to a single linear total size.
bool have_different_dimensions(const Dimensions< T > &dim1, const Dimensions< T > &dim2, unsigned int upper_dim)
void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override
Declare all variables used by the component in the vtable.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
ComponentId id() const
Get component id.
Contain information required to set up a kernel argument at run time.
std::set< std::string > get_headers_list() const override
Generate the header list used in the component.
TensorVariable get_variable(const ITensorInfo *tensor) const
Get the TensorVariable associated with tensor.
CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override
Generate the build options used in the component.
const std::string & string_from_data_layout(DataLayout dl)
Convert a data layout identity into a string.
std::string get_cl_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL type.
int32_t ComponentId
Uniquely identifies a kernel component within a workload.
bool is_input_tensor(const ITensorInfo *tensor) const
Check if an ITensorInfo is an input tensor of the group.
Copyright (c) 2017-2024 Arm Limited.
unsigned int adjust_vec_size(unsigned int vec_size, size_t dim0)
Returns the adjusted vector size in case it is less than the input's first dimension,...
std::string get_config_id() const override
Generate the component config id string used for tuning.
std::string get_name() const override
Generate kernel component name.
A group of gpu kernel components to be fused together PRECONDITIONS:
A table of all the variables used in the kernel.
void declare_variable(const GpuKernelComponentGroup &comp_group, const ITensorInfo *tensor, GpuKernelArgumentInfo argument_info, const std::string &alias="unnamed")
Declare a TensorVariable for a corresponding tensor info.
unsigned int num_elems_processed_per_iteration
ComponentId id() const
Get component id.
void collapse(size_t n, size_t first=0)
Collapse the first n dimensions.