24.02.1
|
Go to the documentation of this file.
36 namespace experimental
38 namespace dynamic_fusion
42 constexpr
unsigned int serial_vector_size = 8;
59 return "logits_1d_max_shift_exp_sum";
66 std::string code = R
"_(
67 //------------------ START KERNEL {{meta_kernel_id}} ---------------------
68 #define VEC_TYPE VEC_DATA_TYPE({{DATA_TYPE}}, N0)
69 #define SELECT_TYPE SELECT_VEC_DATA_TYPE({{DATA_TYPE}}, N0)
71 __global uchar *src_addr = {{src}}_ptr + {{src}}_offset_first_element_in_bytes + g_ind_1 * {{src}}_stride_y + g_ind_2 * {{src}}_stride_z;
72 __global uchar *dst_addr = {{dst}}_ptr + {{dst}}_offset_first_element_in_bytes + g_ind_1 * {{dst}}_stride_y + g_ind_2 * {{dst}}_stride_z;
73 Image sum = CONVERT_TENSOR3D_TO_IMAGE_STRUCT({{sum}});
74 VEC_TYPE max_val_vec = (VEC_TYPE)({{MINVAL}});
77 const bool beta_defined = (_attributes.
beta() != 1.f);
82 VEC_TYPE beta = (VEC_TYPE){{BETA}};
86 constexpr unsigned int _serial_vector_size = 8;
87 const unsigned int reduction_dim_size = _src->
dimension(0);
89 const bool non_multiple_of_n0 = ((reduction_dim_size %
vector_size) != 0);
91 if (non_multiple_of_n0)
94 VEC_TYPE data = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)src_addr);
95 SELECT_TYPE widx = (SELECT_TYPE)PARTIAL_N0 > VEC_OFFS(SELECT_DATA_TYPE({{DATA_TYPE}}), N0);
96 max_val_vec = max(max_val_vec, select((VEC_TYPE)({{MINVAL}}), data, widx));
101 for(uint i = PARTIAL_N0; i < {{SRC_WIDTH}}; i += N0)
103 VEC_TYPE data = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(src_addr + i * sizeof({{DATA_TYPE}})));
104 max_val_vec = max(data, max_val_vec);
107 {{DATA_TYPE}} max_val = MAX_REDUCE(max_val_vec, N0);
111 if (non_multiple_of_n0)
126 VSTORE_PARTIAL(N0, PARTIAL_N0)
127 (data, 0, (__global {{DATA_TYPE}} *)dst_addr);
129 data = select(0, data, widx);
136 data = select(0, data, widx);
137 VSTORE_PARTIAL(N0, PARTIAL_N0)
138 (data, 0, (__global {{DATA_TYPE}} *)dst_addr);
147 for(uint i = PARTIAL_N0; i < {{SRC_WIDTH}}; i += N0)
149 VEC_TYPE data = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(src_addr + i * sizeof({{DATA_TYPE}})));
164 (data, 0, (__global {{DATA_TYPE}} *)(dst_addr + i * sizeof({{DATA_TYPE}})));
173 (data, 0, (__global {{DATA_TYPE}} *)(dst_addr + i * sizeof({{DATA_TYPE}})));
183 *((__global {{DATA_TYPE}} *)sum.ptr) = SUM_REDUCE(sum1D, N0);
185 //------------------ END KERNEL {{meta_kernel_id}} ---------------------
214 lut[
"meta_kernel_id"] =
id();
231 const unsigned int reduction_dim_size = _src->
dimension(0);
254 return std::set<std::string>{
"helpers.h",
"tile_helpers.h"};
Class to describe a number of elements in each dimension.
std::string to_string(T &&value)
Convert integer and float values to string.
constexpr size_t vector_size
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
std::string get_component_code(const ComponentGroup &comp_group) const override
Generate kernel component code template.
std::unordered_map< Tag, TagVal > TagLUT
Tag lookup table.
Attributes are backend-agnostic parameters (in addition to the input/output tensors) of an operator.
SoftmaxAttributes & beta(float beta)
Set beta.
const std::string & string_from_data_type(DataType dt)
Convert a data type identity into a string.
An interface used by ClTemplateWriter to write source code for a kernel component.
This is a generic class that packs the arguments of an operator.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override
Declare all variables used by the component in the vtable.
Window collapse(const Window &full_window, size_t first, size_t last=Coordinates::num_max_dimensions) const
Collapse the dimensions between first and last.
ArgumentPack< ITensorInfo > tensors() const
Get tensor arguments.
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
size_t total_size() const
Collapses all dimensions to a single linear total size.
ClTemplateLogits1DMaxShiftExpSum(ComponentId id, const ArgumentPack< ITensorInfo > &tensors, const Attributes &attributes)
Constructor.
std::string get_config_id() const override
Generate the component config id string used for tuning.
std::string float_to_string_with_full_precision(float val)
Create a string with the float in full precision.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
ComponentId id() const
Get component id.
Contain information required to set up a kernel argument at run time.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
SoftmaxAttributes & is_log_softmax(bool is_log)
Set is_log.
TensorVariable get_variable(const ITensorInfo *tensor) const
Get the TensorVariable associated with tensor.
std::string get_cl_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL type.
int32_t ComponentId
Uniquely identifies a kernel component within a workload.
CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override
Generate the build options used in the component.
std::set< std::string > get_headers_list() const override
Generate the header list used in the component.
Describe a multidimensional execution window.
Copyright (c) 2017-2024 Arm Limited.
std::string get_name() const override
Generate kernel component name.
@ F16
16-bit floating-point number
unsigned int adjust_vec_size(unsigned int vec_size, size_t dim0)
Returns the adjusted vector size in case it is less than the input's first dimension,...
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
Window get_window() const override
Generate the execution window for the component.
A group of gpu kernel components to be fused together PRECONDITIONS:
TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override
Generate the tag look-up table used to instantiate the component code.
A table of all the variables used in the kernel.
void declare_variable(const GpuKernelComponentGroup &comp_group, const ITensorInfo *tensor, GpuKernelArgumentInfo argument_info, const std::string &alias="unnamed")
Declare a TensorVariable for a corresponding tensor info.
DataType
Available data types.