24.02.1
|
Go to the documentation of this file.
37 namespace experimental
39 namespace dynamic_fusion
61 std::string code = R
"_(
62 //------------------ START KERNEL {{meta_kernel_id}} ---------------------
63 TILE(uint, 1, 1, g_dst_indirect_y);
65 const int yo = g_ind_2 % {{arg_dst}}_h;
66 const int bout = g_ind_2 / {{arg_dst}}_h;
74 float xi_f = (g_ind_1 * {{SCALE_X}});
75 float yi_f = (yo * {{SCALE_Y}});
81 float xi_f = ((g_ind_1 + 0.5f) * {{SCALE_X}});
82 float yi_f = ((yo + 0.5f) * {{SCALE_Y}});
95 const int xi0 = clamp((int)xi_f, 0, (int){{src}}_w - 1);
96 const int yi0 = clamp((int)yi_f, 0, (int){{src}}_h - 1);
98 T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, 1, N0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi0, xi0, g_ind_0, {{src}}_w, {{src}}_h, 1, 1, false, {{dst}});
106 float xi_f = (g_ind_1 * {{SCALE_X}});
107 float yi_f = (yo * {{SCALE_Y}});
113 float xi_f = ((g_ind_1 + 0.5f) * {{SCALE_X}} - 0.5f);
114 float yi_f = ((yo + 0.5f) * {{SCALE_Y}} - 0.5f);
119 const int xi = (int)floor(xi_f);
120 const int yi = (int)floor(yi_f);
122 TILE({{SRC_DATA_TYPE}}, 1, N0, in00);
123 TILE({{SRC_DATA_TYPE}}, 1, N0, in01);
124 TILE({{SRC_DATA_TYPE}}, 1, N0, in10);
125 TILE({{SRC_DATA_TYPE}}, 1, N0, in11);
127 in00[0].v = {{CONSTANT_VALUE}};
128 in01[0].v = {{CONSTANT_VALUE}};
129 in10[0].v = {{CONSTANT_VALUE}};
130 in11[0].v = {{CONSTANT_VALUE}};
132 const int xi0 = clamp(xi, 0, (int){{src}}_w - 1);
133 const int yi0 = clamp(yi, 0, (int){{src}}_h - 1);
134 const int xi1 = clamp(xi + 1, 0, (int){{src}}_w - 1);
135 const int yi1 = clamp(yi + 1, 0, (int){{src}}_h - 1);
137 T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, 1, N0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi0, xi0, g_ind_0, {{src}}_w, {{src}}_h, 1, 1, false, in00);
138 T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, 1, N0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi0, xi1, g_ind_0, {{src}}_w, {{src}}_h, 1, 1, false, in01);
139 T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, 1, N0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi1, xi0, g_ind_0, {{src}}_w, {{src}}_h, 1, 1, false, in10);
140 T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, 1, N0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi1, xi1, g_ind_0, {{src}}_w, {{src}}_h, 1, 1, false, in11);
146 const {{SRC_DATA_TYPE}} a = ({{SRC_DATA_TYPE}})(xi_f - (float)xi);
147 const {{SRC_DATA_TYPE}} b = ({{SRC_DATA_TYPE}})(1.f - a);
148 const {{SRC_DATA_TYPE}} a1 = ({{SRC_DATA_TYPE}})(yi_f - (float)yi);
149 const {{SRC_DATA_TYPE}} b1 = ({{SRC_DATA_TYPE}})(1.f - a1);
151 // Calculate the output
152 {{dst}}[0].v = ((in00[0].v * b * b1) + (in01[0].v * a * b1) + (in10[0].v * b * a1) + (in11[0].v * a * a1));
158 const float a = (xi_f - (float)xi);
159 const float b = (1.f - a);
160 const float a1 = (yi_f - (float)yi);
161 const float b1 = (1.f - a1);
163 {{dst}}[0].v = CONVERT_SAT(
164 (CONVERT(in00[0].v, VEC_DATA_TYPE(float, N0)) * b * b1) +
165 (CONVERT(in01[0].v, VEC_DATA_TYPE(float, N0)) * a * b1) +
166 (CONVERT(in10[0].v, VEC_DATA_TYPE(float, N0)) * b * a1) +
167 (CONVERT(in11[0].v, VEC_DATA_TYPE(float, N0)) * a * a1), VEC_DATA_TYPE({{DST_DATA_TYPE}}, N0));
177 g_dst_indirect_y[0].v = g_ind_1 + (yo * (int)({{arg_dst}}_w)) + bout * (int)({{arg_dst}}_w * {{arg_dst}}_h);
179 //------------------ END KERNEL {{meta_kernel_id}} ---------------------
208 lut[
"meta_kernel_id"] =
id();
210 lut[
"SRC_TENSOR_TYPE"] =
"BUFFER";
228 const unsigned int n0 = root_window.
x().
step();
229 const unsigned int m0 = root_window.
y().
step();
230 const unsigned int partial_n0 = _dst->
dimension(0) % n0;
243 std::string config_id{};
245 config_id +=
"resize_";
265 return std::set<std::string>{
"helpers.h",
"tile_helpers.h"};
Class to describe a number of elements in each dimension.
std::string to_string(T &&value)
Convert integer and float values to string.
virtual const IGpuTemplateComponentWriter * template_writer() const
Get writer for the component.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
@ TOP_LEFT
Samples are taken at pixel top left corner.
std::unordered_map< Tag, TagVal > TagLUT
Tag lookup table.
constexpr int step() const
Return the step of the dimension.
ComponentPtr get_root_component() const
Get the root (first) component of this group.
std::string string_from_pixel_value(const PixelValue &value, const DataType data_type)
Convert a PixelValue to a string, represented through the specific data type.
virtual size_t element_size() const =0
Element size in bytes calculated as data_size() * num_channels()
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
const ITensorInfo * get_any_dst_tensor() const
Get one of the destination tensors of this group.
An interface used by ClTemplateWriter to write source code for a kernel component.
This is a generic class that packs the arguments of an operator.
Attributes are backend-agnostic parameters (in addition to the input/output tensors) of an operator.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
std::string get_config_id() const override
Generate the component config id string used for tuning.
void add_option(std::string option)
Adds option to the existing build option list.
Window collapse(const Window &full_window, size_t first, size_t last=Coordinates::num_max_dimensions) const
Collapse the dimensions between first and last.
ArgumentPack< ITensorInfo > tensors() const
Get tensor arguments.
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
size_t total_size() const
Collapses all dimensions to a single linear total size.
constexpr const Dimension & y() const
Alias to access the second dimension of the window.
std::string float_to_string_with_full_precision(float val)
Create a string with the float in full precision.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
std::set< std::string > get_headers_list() const override
Generate the header list used in the component.
ClTemplateResize(ComponentId id, const ArgumentPack< ITensorInfo > &tensors, const Attributes &attributes)
Constructor.
ComponentId id() const
Get component id.
std::string get_component_code(const ComponentGroup &comp_group) const override
Generate kernel component code template.
Contain information required to set up a kernel argument at run time.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
@ BILINEAR
Output values are defined by bilinear interpolation between the pixels.
TensorVariable get_variable(const ITensorInfo *tensor) const
Get the TensorVariable associated with tensor.
std::string get_cl_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL type.
int32_t ComponentId
Uniquely identifies a kernel component within a workload.
std::string get_name() const override
Generate kernel component name.
void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override
Declare all variables used by the component in the vtable.
ResizeAttributes & align_corners(bool align_corners)
Set align corners.
Describe a multidimensional execution window.
ResizeAttributes & interpolation_policy(InterpolationPolicy interpolation_policy)
Set interpolation policy.
Window get_window() const override
Generate the execution window for the component.
Copyright (c) 2017-2024 Arm Limited.
unsigned int adjust_vec_size(unsigned int vec_size, size_t dim0)
Returns the adjusted vector size in case it is less than the input's first dimension,...
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
bool is_data_type_float(DataType dt)
Check if a given data type is of floating point type.
float calculate_resize_ratio(size_t input_size, size_t output_size, bool align_corners=false)
Returns resize ratio between input and output with consideration of aligned corners.
A group of gpu kernel components to be fused together PRECONDITIONS:
@ NEAREST_NEIGHBOR
Output values are defined to match the source pixel whose center is nearest to the sample position.
@ CENTER
Samples are taken at pixel center.
virtual Window get_window() const
Generate the execution window for the component.
A table of all the variables used in the kernel.
void declare_variable(const GpuKernelComponentGroup &comp_group, const ITensorInfo *tensor, GpuKernelArgumentInfo argument_info, const std::string &alias="unnamed")
Declare a TensorVariable for a corresponding tensor info.
TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override
Generate the tag look-up table used to instantiate the component code.
constexpr const Dimension & x() const
Alias to access the first dimension of the window.
CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override
Generate the build options used in the component.
ResizeAttributes & sampling_policy(SamplingPolicy sampling_policy)
Set sampling policy.