24.02.1
|
Go to the documentation of this file.
34 namespace experimental
36 namespace dynamic_fusion
52 return (src_size >= dst_size) ?
"cast_down" :
"cast_up";
62 std::string code = R
"_(
63 //------------------ START KERNEL {{meta_kernel_id}} CAST ---------------------
70 // OUT(dst, accum) {{dst}}
72 TILE(uint, M0, 1, g_dst_indirect_y);
74 {{src}}_offset_first_element_in_bytes += get_global_id(2) * {{src}}_stride_z;
76 TILE({{DATA_TYPE_IN}}, M0, N0, {{tmp}});
77 T_LOAD({{DATA_TYPE_IN}}, M0, N0, BUFFER, {{src}}, g_ind_0, g_ind_1, 1, {{src}}_stride_y, {{tmp}});
82 LOOP_UNROLLING(int, m0, 0, 1, M0,
89 {{tmp}}[m0].v ^= (VEC_DATA_TYPE({{DATA_TYPE_IN}}, N0))0x80;
97 {{dst}}[m0].v = CONVERT_SAT({{tmp}}[m0].v, VEC_DATA_TYPE({{DATA_TYPE_OUT}}, N0));
103 {{dst}}[m0].v = CONVERT({{tmp}}[m0].v, VEC_DATA_TYPE({{DATA_TYPE_OUT}}, N0));
114 LOOP_UNROLLING(int, i, 0, 1, M0,
116 g_dst_indirect_y[i].v = (uint)min((int)(g_ind_1 + i), (int)({{arg_dst}}_w) - 1);
117 g_dst_indirect_y[i].v += (int)(g_ind_2 % {{arg_dst}}_h) * (int)({{arg_dst}}_w);
118 g_dst_indirect_y[i].v += (int)(g_ind_2 / {{arg_dst}}_h) * (int)({{arg_dst}}_w * {{arg_dst}}_h);
125 //------------------ END KERNEL {{meta_kernel_id}} CAST ---------------------
149 lut[
"tmp"] = (is_root) ? lut[
"src"].value +
"_in_data" : lut[
"src"];
155 lut[
"meta_kernel_id"] =
id();
168 const unsigned int n0 = root_window.
x().
step();
169 const unsigned int m0 = root_window.y().step();
182 std::string config_id{};
198 return std::set<std::string>{
"helpers.h",
"tile_helpers.h"};
Class to describe a number of elements in each dimension.
std::string to_string(T &&value)
Convert integer and float values to string.
CastAttributes & convert_policy(const ConvertPolicy &policy)
Set Policy.
TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override
Generate the tag look-up table used to instantiate the component code.
virtual const IGpuTemplateComponentWriter * template_writer() const
Get writer for the component.
ClTemplateCast(ComponentId id, const ArgumentPack< ITensorInfo > &tensors, const Attributes &attributes)
Constructor.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
std::unordered_map< Tag, TagVal > TagLUT
Tag lookup table.
void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override
Declare all variables used by the component in the vtable.
constexpr int step() const
Return the step of the dimension.
std::string lower_string(const std::string &val)
Lower a given string.
ComponentPtr get_root_component() const
Get the root (first) component of this group.
Attributes are backend-agnostic parameters (in addition to the input/output tensors) of an operator.
virtual size_t element_size() const =0
Element size in bytes calculated as data_size() * num_channels()
const std::string & string_from_data_type(DataType dt)
Convert a data type identity into a string.
CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override
Generate the build options used in the component.
const ITensorInfo * get_any_dst_tensor() const
Get one of the destination tensors of this group.
An interface used by ClTemplateWriter to write source code for a kernel component.
This is a generic class that packs the arguments of an operator.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
size_t data_size_from_type(DataType data_type)
The size in bytes of the data type.
std::set< std::string > get_headers_list() const override
Generate the header list used in the component.
void add_option(std::string option)
Adds option to the existing build option list.
std::string get_component_code(const ComponentGroup &comp_group) const override
Generate kernel component code template.
Window collapse(const Window &full_window, size_t first, size_t last=Coordinates::num_max_dimensions) const
Collapse the dimensions between first and last.
ArgumentPack< ITensorInfo > tensors() const
Get tensor arguments.
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
size_t total_size() const
Collapses all dimensions to a single linear total size.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
Window get_window() const override
Generate the execution window for the component.
ComponentId id() const
Get component id.
Contain information required to set up a kernel argument at run time.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
TensorVariable get_variable(const ITensorInfo *tensor) const
Get the TensorVariable associated with tensor.
std::string get_config_id() const override
Generate the component config id string used for tuning.
std::string get_cl_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL type.
int32_t ComponentId
Uniquely identifies a kernel component within a workload.
Describe a multidimensional execution window.
Copyright (c) 2017-2024 Arm Limited.
unsigned int adjust_vec_size(unsigned int vec_size, size_t dim0)
Returns the adjusted vector size in case it is less than the input's first dimension,...
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
bool is_data_type_float(DataType dt)
Check if a given data type is of floating point type.
A group of gpu kernel components to be fused together PRECONDITIONS:
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
virtual Window get_window() const
Generate the execution window for the component.
A table of all the variables used in the kernel.
void declare_variable(const GpuKernelComponentGroup &comp_group, const ITensorInfo *tensor, GpuKernelArgumentInfo argument_info, const std::string &alias="unnamed")
Declare a TensorVariable for a corresponding tensor info.
constexpr const Dimension & x() const
Alias to access the first dimension of the window.
std::string get_name() const override
Generate kernel component name.
ComponentId id() const
Get component id.