24.02.1
|
Go to the documentation of this file.
37 namespace experimental
39 namespace dynamic_fusion
71 return get_2x2_kernel_code();
75 return get_MxN_kernel_code();
79 std::string ClTemplatePool2d::get_MxN_kernel_code()
const
81 const auto pool_type = _attributes.
pool_type();
85 std::string pool_op = (pool_type ==
PoolingType::AVG) ? R
"_(#define POOL_OP(x,y) ((x) + (y)))_"
86 : R"_(#define POOL_OP(x,y) (fmax((x), (y))) )_";
91 std::string code = R
"_(
92 //------------------ START KERNEL {{meta_kernel_id}} ---------------------
94 // OUT(dst, accum) {{dst}}
97 const int idx_out_c = g_ind_0;
98 const int idx_out_w = g_ind_1;
102 code +=
"\n" + pool_op +
"\n";
105 const int idx_out_h = g_ind_2 % {{DST_HEIGHT}};
106 const int idx_out_n = g_ind_2 / {{DST_HEIGHT}};
111 __global unsigned char *in_base_ptr = {{src}}_ptr + {{src}}_offset_first_element_in_bytes + idx_out_c * sizeof({{DATA_TYPE}}) + idx_out_n * {{src}}_stride_w;
113 __global unsigned char *out_base_ptr = {{dst}}_ptr + {{dst}}_offset_first_element_in_bytes + idx_out_c * sizeof({{DATA_TYPE}}) + idx_out_w * {{dst}}_stride_y + idx_out_h * {{dst}}_stride_z + idx_out_n * {{dst}}_stride_w;
115 VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)
116 res0 = {{INITIAL_VALUE}};
118 const int idx_in_w = idx_out_w * {{STRIDE_X}} - {{PAD_X}};
119 const int idx_in_h = idx_out_h * {{STRIDE_Y}} - {{PAD_Y}};
121 const int pool_x_s = max((int)0, -idx_in_w);
122 const int pool_x_e = min((int){{POOL_SIZE_X}}, (int){{SRC_WIDTH}} - idx_in_w);
123 const int pool_y_s = max((int)0, -idx_in_h);
124 const int pool_y_e = min((int){{POOL_SIZE_Y}}, (int){{SRC_HEIGHT}} - idx_in_h);
131 const int filter_size = (pool_y_e - pool_y_s) * (pool_x_e - pool_x_s);
137 const int filter_size = {{POOL_SIZE_X}} * {{POOL_SIZE_Y}};
148 // Global pooling path
149 for(int y = 0; y < {{POOL_SIZE_Y}}; ++y)
152 for(int x = 0; x < {{POOL_SIZE_X}}; ++x)
154 VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)
161 for(int y = pool_y_s; y < pool_y_e; ++y)
164 for(int x = pool_x_s; x < pool_x_e; ++x)
166 VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)
173 if (fp_mixed_precision)
177 data0 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + (x + idx_in_w) * {{src}}_stride_y + (y + idx_in_h) * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0));
178 res0 = POOL_OP(res0, data0);
186 data0 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + (x + idx_in_w) * {{src}}_stride_y + (y + idx_in_h) * {{src}}_stride_z));
187 res0 = POOL_OP(res0, data0);
197 res0 /= (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0))filter_size;
202 if (fp_mixed_precision)
205 VEC_DATA_TYPE({{DATA_TYPE}}, N0)
206 res_converted0 = CONVERT(res0, VEC_DATA_TYPE({{DATA_TYPE}}, N0));
207 STORE_VECTOR_SELECT(res_converted, {{DATA_TYPE}}, out_base_ptr, N0, PARTIAL_N0, (PARTIAL_N0 != 0) && g_ind_0 == 0);
214 STORE_VECTOR_SELECT(res, {{DATA_TYPE}}, out_base_ptr, N0, PARTIAL_N0, (PARTIAL_N0 != 0) && g_ind_0 == 0);
219 //------------------ END KERNEL {{meta_kernel_id}} ---------------------
226 std::string ClTemplatePool2d::get_2x2_kernel_code()
const
228 const auto pool_type = _attributes.
pool_type();
230 std::string pool_op = (pool_type ==
PoolingType::AVG) ? R
"_(#define POOL_OP(x,y) ((x) + (y)))_"
231 : R"_(#define POOL_OP(x,y) (fmax((x), (y))) )_";
233 std::string code = R"_(
234 //------------------ START KERNEL {{meta_kernel_id}} ---------------------
236 // OUT(dst, accum) {{dst}}
238 #define SELECT_TYPE SELECT_VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)
241 const int idx_out_c = g_ind_0;
242 const int idx_out_w = g_ind_1;
246 code +=
"\n" + pool_op +
"\n";
250 const int idx_out_h = g_ind_2 % {{DST_HEIGHT}};
251 const int idx_out_n = g_ind_2 / {{DST_HEIGHT}};
255 const int idx_in_w = idx_out_w * {{STRIDE_X}} - {{PAD_X}};
256 const int idx_in_h = idx_out_h * {{STRIDE_Y}} - {{PAD_Y}};
258 __global unsigned char *in_base_ptr = {{src}}_ptr + {{src}}_offset_first_element_in_bytes + idx_out_c * sizeof({{DATA_TYPE}}) + idx_out_n * {{src}}_stride_w;
259 __global unsigned char *out_base_ptr = {{dst}}_ptr + {{dst}}_offset_first_element_in_bytes + idx_out_c * sizeof({{DATA_TYPE}}) + idx_out_w * {{dst}}_stride_y + idx_out_h * {{dst}}_stride_z + idx_out_n *
261 const int pool_x_s = max((int)0, -idx_in_w);
262 const int pool_x_e = min((int)2, (int){{SRC_WIDTH}} - idx_in_w);
263 const int pool_y_s = max((int)0, -idx_in_h);
264 const int pool_y_e = min((int)2, (int){{SRC_HEIGHT}} - idx_in_h);
266 const int filter_size = (pool_x_e - pool_x_s) * (pool_y_e - pool_y_s);
267 const int x0 = pool_x_s + idx_in_w;
268 const int y0 = pool_y_s + idx_in_h;
269 const int x1 = pool_x_e - 1 + idx_in_w;
270 const int y1 = pool_y_e - 1 + idx_in_h;
272 REPEAT_VAR_INIT_TO_CONST(4, VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0), data, 0);
275 if (fp_mixed_precision)
279 data0 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x0 * {{src}}_stride_y + y0 * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0));
280 data1 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x1 * {{src}}_stride_y + y0 * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0));
281 data2 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x0 * {{src}}_stride_y + y1 * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0));
282 data3 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x1 * {{src}}_stride_y + y1 * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0));
288 data0 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x0 * {{src}}_stride_y + y0 * {{src}}_stride_z));
289 data1 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x1 * {{src}}_stride_y + y0 * {{src}}_stride_z));
290 data2 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x0 * {{src}}_stride_y + y1 * {{src}}_stride_z));
291 data3 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x1 * {{src}}_stride_y + y1 * {{src}}_stride_z));
301 SELECT_TYPE cond_w_s = (SELECT_TYPE)idx_in_w < (SELECT_TYPE)0;
302 SELECT_TYPE cond_w_e = (SELECT_TYPE)idx_in_w >= (SELECT_TYPE)({{SRC_WIDTH}} - 1);
303 SELECT_TYPE cond_h_s = (SELECT_TYPE)idx_in_h < (SELECT_TYPE)0;
304 SELECT_TYPE cond_h_e = (SELECT_TYPE)idx_in_h >= (SELECT_TYPE)({{SRC_HEIGHT}} - 1);
306 data0 = select(data0, (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)){{INITIAL_VALUE}}, (SELECT_TYPE)(cond_w_s | cond_h_s));
307 data1 = select(data1, (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)){{INITIAL_VALUE}}, (SELECT_TYPE)(cond_w_e | cond_h_s));
308 data2 = select(data2, (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)){{INITIAL_VALUE}}, (SELECT_TYPE)(cond_w_s | cond_h_e));
309 data3 = select(data3, (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)){{INITIAL_VALUE}}, (SELECT_TYPE)(cond_w_e | cond_h_e));
315 VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)
317 res0 = POOL_OP(res0, data1);
318 res0 = POOL_OP(res0, data2);
319 res0 = POOL_OP(res0, data3);
328 res0 /= (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0))filter_size;
334 res0 /= (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0))4;
340 if (fp_mixed_precision)
343 VEC_DATA_TYPE({{DATA_TYPE}}, N0)
344 res_converted0 = CONVERT(res0, VEC_DATA_TYPE({{DATA_TYPE}}, N0));
345 STORE_VECTOR_SELECT(res_converted, {{DATA_TYPE}}, out_base_ptr, N0, PARTIAL_N0, (PARTIAL_N0 != 0) && g_ind_0 == 0);
351 STORE_VECTOR_SELECT(res, {{DATA_TYPE}}, out_base_ptr, N0, PARTIAL_N0, (PARTIAL_N0 != 0) && g_ind_0 == 0);
356 //------------------ END KERNEL {{meta_kernel_id}} ---------------------
383 lut[
"meta_kernel_id"] =
id();
386 const auto padding = _attributes.
pad();
387 const auto stride = _attributes.
stride();
388 const auto pool_size = _attributes.
pool_size();
390 const auto use_fp_mixed_precision =
392 const std::string max_initial_value =
397 lut[
"STRIDE_X"] = stride.x();
398 lut[
"STRIDE_Y"] = stride.y();
399 lut[
"PAD_X"] = padding.left;
400 lut[
"PAD_Y"] = padding.top;
401 lut[
"POOL_SIZE_X"] = pool_size.width;
402 lut[
"POOL_SIZE_Y"] = pool_size.height;
421 const unsigned int n0 = root_window.
x().
step();
422 const unsigned int partial_store_n0 = _dst->
dimension(0) % n0;
436 std::string config_id{};
437 config_id +=
"pooling_layer_2d_";
453 return std::set<std::string>{
"helpers.h",
"tile_helpers.h",
"repeat.h"};
Class to describe a number of elements in each dimension.
std::string to_string(T &&value)
Convert integer and float values to string.
size_t y() const
Semantic accessor for height as y.
bool use_inf_as_limit() const
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.
Operator backend specific settings.
virtual const IGpuTemplateComponentWriter * template_writer() const
Get writer for the component.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
DataLayout
[DataLayout enum definition]
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
Window get_window() const override
Generate the execution window for the component.
std::unordered_map< Tag, TagVal > TagLUT
Tag lookup table.
constexpr int step() const
Return the step of the dimension.
std::string lower_string(const std::string &val)
Lower a given string.
ComponentPtr get_root_component() const
Get the root (first) component of this group.
Class for specifying the size of an image or rectangle.
const std::string & string_from_data_type(DataType dt)
Convert a data type identity into a string.
An interface used by ClTemplateWriter to write source code for a kernel component.
This is a generic class that packs the arguments of an operator.
std::string get_config_id() const override
Generate the component config id string used for tuning.
constexpr auto data_layout
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
ClTemplatePool2d(ComponentId id, const ArgumentPack< ITensorInfo > &tensors, const Attributes &attributes, const Settings &settings)
Constructor.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
PoolingType pool_type() const
void add_option(std::string option)
Adds option to the existing build option list.
std::set< std::string > get_headers_list() const override
Generate the header list used in the component.
ArgumentPack< ITensorInfo > tensors() const
Get tensor arguments.
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
size_t total_size() const
Collapses all dimensions to a single linear total size.
std::string get_component_code(const ComponentGroup &comp_group) const override
Generate kernel component code template.
std::string get_name() const override
Generate kernel component name.
std::string float_to_string_with_full_precision(float val)
Create a string with the float in full precision.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
ComponentId id() const
Get component id.
size_t x() const
Semantic accessor for width as x.
Contain information required to set up a kernel argument at run time.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
bool exclude_padding() const
Attributes are backend-agnostic parameters (in addition to the input/output tensors) of an operator.
TensorVariable get_variable(const ITensorInfo *tensor) const
Get the TensorVariable associated with tensor.
const std::string & string_from_data_layout(DataLayout dl)
Convert a data layout identity into a string.
std::string get_cl_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL type.
int32_t ComponentId
Uniquely identifies a kernel component within a workload.
Describe a multidimensional execution window.
Copyright (c) 2017-2024 Arm Limited.
void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override
Declare all variables used by the component in the vtable.
@ F16
16-bit floating-point number
unsigned int adjust_vec_size(unsigned int vec_size, size_t dim0)
Returns the adjusted vector size in case it is less than the input's first dimension,...
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
A group of gpu kernel components to be fused together PRECONDITIONS:
virtual Window get_window() const
Generate the execution window for the component.
@ F32
32-bit floating-point number
TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override
Generate the tag look-up table used to instantiate the component code.
A table of all the variables used in the kernel.
void declare_variable(const GpuKernelComponentGroup &comp_group, const ITensorInfo *tensor, GpuKernelArgumentInfo argument_info, const std::string &alias="unnamed")
Declare a TensorVariable for a corresponding tensor info.
constexpr const Dimension & x() const
Alias to access the first dimension of the window.
DataType
Available data types.
CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override
Generate the build options used in the component.