33 namespace experimental
35 namespace dynamic_fusion
40 std::string replaced_code =
"";
41 bool scanning_pattern =
false;
42 std::string pattern_found =
"";
43 for (
size_t i = 0; i < code_template.size() - 1; ++i)
45 if (!scanning_pattern)
47 if (code_template[i] ==
'{' && code_template[i + 1] ==
'{')
50 scanning_pattern =
true;
55 replaced_code += code_template[i];
60 if (code_template[i] ==
'}' && code_template[i + 1] ==
'}')
63 scanning_pattern =
false;
64 std::string err =
"Pattern " + pattern_found +
" not found in tags";
66 replaced_code += tags.find(pattern_found)->second.value;
70 pattern_found += code_template[i];
85 return write_kernel_name();
94 for (
const auto &comp : _components)
96 config_id +=
"--" + comp->template_writer()->get_config_id() +
"--";
106 for (
const auto &comp : _components)
108 build_opts.
add_options(comp->template_writer()->get_build_options(_components).options());
118 return root_comp->template_writer()->get_window();
124 std::map<ITensorInfo::Id, GpuKernelArgument> tensors;
127 tensors.emplace(
t->id(),
GpuKernelArgument{*t, _vtable.get_variable(t).kernel_argument_info});
132 std::string ClTemplateWriter::write_code()
137 std::set<std::string> headers_list{};
138 std::set<std::string> additional_macros{};
139 std::vector<std::string> component_codes{};
142 for (
auto &component : _components)
144 component->template_writer()->declare_variables(_vtable, _components);
147 for (
auto &component : _components)
149 const auto component_writer = component->template_writer();
150 auto curr_headers_list = component_writer->get_headers_list();
151 auto curr_additional_macros = component_writer->get_additional_macros();
152 auto curr_component_code = component_writer->get_component_code(_components);
153 const auto var_lut = component_writer->get_tag_lut(
156 component_codes.push_back(
replace_tags(curr_component_code, var_lut));
158 headers_list.insert(curr_headers_list.begin(), curr_headers_list.end());
159 if (!additional_macros.empty())
161 additional_macros.insert(
replace_tags(curr_additional_macros, var_lut));
166 std::string code =
"";
168 for (
auto &
header : headers_list)
170 #if defined(EMBEDDED_KERNELS)
172 #else // defined(EMBEDDED_KERNELS)
173 code +=
"#include \"" +
header +
"\"\n";
174 #endif // defined(EMBEDDED_KERNELS)
177 for (
auto ¯os : additional_macros)
182 auto arguments = _components.get_argument_tensors();
183 std::sort(arguments.begin(), arguments.end(),
184 [](
const ITensorInfo *l,
const ITensorInfo *r) { return l->id() < r->id(); });
189 code +=
" //------------------ START KERNEL_BUILDER_COORDINATE ---------------------\n\n";
190 code += write_global_section();
191 code +=
" //------------------ END KERNEL_BUILDER_COORDINATE ---------------------\n";
194 const auto tiles = _components.get_tiles();
195 std::stringstream tiles_ss;
197 tiles_ss <<
" //------------------ START TILE DECLARATION ---------------------\n";
199 for (
auto tile : tiles)
203 const auto var_name = var.uniq_name;
205 tiles_ss <<
" TILE(" <<
data_type <<
", M0, N0, " << var_name <<
");\n";
208 tiles_ss <<
" //------------------ END TILE DECLARATION ---------------------\n";
210 code += tiles_ss.str();
213 for (
const auto &component_code : component_codes)
215 code += component_code;
223 std::string ClTemplateWriter::write_global_section()
const
225 const auto dst_info = _components.get_any_dst_tensor();
226 const auto dst_w = dst_info->dimension(0);
229 auto leftover_w = dst_w % tile_w;
231 std::string code =
"";
232 code += std::string(
" int g_ind_0 = GET_SPATIAL_IDX(0, ") +
std::to_string(tile_w) +
", " +
234 code += std::string(
" int g_ind_1 = GET_SPATIAL_IDX(1, ") +
std::to_string(tile_h) +
", " +
"0);\n";
235 code += std::string(
" int g_ind_2 = GET_SPATIAL_IDX(2, 1, 0);\n\n");
237 code +=
" const bool g_cond_x = (g_ind_0 == 0);\n";
238 code +=
" const bool g_cond_y = (g_ind_1 == 0);\n";
242 std::string ClTemplateWriter::write_argument_declaration(
const GpuKernelVariableTable::TensorVariable &var)
const
245 switch (var.kernel_argument_info.type)
249 code +=
"\n VECTOR_DECLARATION(" + var.uniq_name +
")";
254 code +=
"\n IMAGE_DECLARATION(" + var.uniq_name +
")";
259 code +=
"\n IMAGE_DECLARATION(" + var.uniq_name +
"),";
260 code +=
"\n unsigned int " + var.uniq_name +
"_stride_z";
265 code +=
"\n __read_only image2d_t " + var.uniq_name +
"_img,";
266 code +=
"\n unsigned int " + var.uniq_name +
"_stride_z";
271 code +=
"\n TENSOR4D_T(" + var.uniq_name +
", BUFFER)";
276 code +=
"\n TENSOR4D_T(" + var.uniq_name +
", IMAGE)";
281 code +=
"\n TENSOR3D_DECLARATION(" + var.uniq_name +
")";
286 ARM_COMPUTE_ERROR(
"Unsupported declaration generation for GpuKernelArgumentInfo::Type");
293 std::string code =
"\n__kernel void " + write_kernel_name() +
"(";
295 for (
int i = 0; i < static_cast<int>(argument_list.size()) - 1; ++i)
297 code += write_argument_declaration(argument_list[i]) +
",";
299 if (
static_cast<int>(argument_list.size()) - 1 >= 0)
301 code += write_argument_declaration(argument_list[argument_list.size() - 1]);
308 std::string ClTemplateWriter::write_kernel_name()
const
310 if (_components.empty())
312 return "empty_kernel";
314 std::string
name = _components.empty() ?
"" : _components[0]->template_writer()->get_name();
315 for (
size_t i = 1; i < _components.size(); ++i)
318 name += _components[i]->template_writer()->get_name();