Compute Library
 22.08
Common.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
25 
26 #ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMMON_H
27 #define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMMON_H
28 
31 #include "arm_compute/core/Error.h"
33 #include "src/core/common/Macros.h"
34 #include "support/Requires.h"
35 #include "support/StringSupport.h"
36 
38 
39 #include <iostream>
40 #include <queue>
41 #include <stack>
42 #include <string>
43 #include <unordered_set>
44 
45 namespace arm_compute
46 {
47 namespace experimental
48 {
49 namespace dynamic_fusion
50 {
51 /** We introduce the concept of *Shared Variables* in the context of kernel building.
52  * They are variables that can be accessed / shared among all the kernel components within a single kernel.
53  * For now we consider 2 groups of shared variables:
54  * Argument: The argument variables (parameters) of a kernel
55  * Automatic: The automatic variables declared inside a kernel
56  * All Shared Variables have the same kernel scope, and are thus visible to all kernel components
57 */
58 
59 enum class SharedVarIO
60 {
61  Input,
62  Output
63 };
64 
65 enum class SharedVarGroup
66 {
67  Argument, // Parameters to a kernel function == dst or src tensors of the whole blueprint graph
68  Automatic // Automatic variables declared within the kernel body == intermediate tensors of the whole blueprint graph
69 };
70 
71 /** Specifies a shared variable link for a component.
72  * It describes all the information that's available when a component is constructed / added:
73  * e.g. its linkage (via ArgumentID and io) and its group
74  * This is not shared variable on its own, but is used for instantiating a SharedVar when building the code
75  */
77 {
78  ArgumentID arg_id{ g_arg_placeholder };
80  bool is_empty() const
81  {
82  return arg_id == g_arg_placeholder;
83  }
84 };
85 
86 /** A table of all the variables used in the kernel / blueprint
87  * Because we limit the DependencyGraph in the blueprint to a Linear Sequence for now, we only allow ** a single global variable (the accumulator) **
88  *
89  * NOTE: the order they appear in the table is the order of their "declaration" in the component code, and is also their ID
90  * NOTE: the variables all have the scope of the full kernel function
91  */
93 {
94 public:
95  /** A fully realized SharedVarLink
96  */
97  struct SharedVar
98  {
99  ArgumentID arg_id{ g_arg_placeholder };
102  std::string uniq_name{}; // Unique name, also the final variable name used in the built code
103  ClKernelArgDescriptor desc{}; // Automatic variables can and should still be described using this struct
104  bool is_empty() const
105  {
106  return arg_id == g_arg_placeholder;
107  }
108  };
109 
110  class Arguments
111  {
112  public:
113  Arguments() = default;
114  void add_var(const SharedVar &var)
115  {
117  _vars.push_back(var);
118  }
119  std::vector<SharedVar> get_all_vars() const
120  {
121  return _vars;
122  }
123  std::vector<SharedVar> get_src_vars() const
124  {
125  std::vector<SharedVar> src_vars;
126  std::copy_if(_vars.begin(), _vars.end(), std::back_inserter(src_vars), [](const SharedVar & var)
127  {
128  return var.io == SharedVarIO::Input;
129  });
130  return src_vars;
131  }
133  {
134  std::vector<SharedVar> dst_vars;
135  std::copy_if(_vars.begin(), _vars.end(), std::back_inserter(dst_vars), [](const SharedVar & var)
136  {
137  return var.io == SharedVarIO::Output;
138  });
139  ARM_COMPUTE_ERROR_ON(dst_vars.size() != 1);
140  return dst_vars.at(0);
141  }
142 
143  private:
144  std::vector<SharedVar> _vars{};
145  };
146 
147  /** Create a SharedVar for a corresponding SharedVarLink (contains ArgumentID). If one has already been created for the SharedVarLink, simply return it instead of creating a new one
148  *
149  * @note: The order of insertion is important. There is one precondition:
150  * PRECOND: The components have been sorted topologically / is being traversed in topological order
151  * This ensures that all the consumer var links (Output, Automatic Links) can consume (return) the producer var links when they're referred
152  */
153  void add(SharedVarLink var_link, SharedVarGroup group, ClKernelArgDescriptor runtime_desc, const std::string &name = "unnamed")
154  {
155  ARM_COMPUTE_ERROR_ON_MSG(var_link.is_empty(), "Non-empty SharedVarLink expected");
156  if(!get(var_link).is_empty())
157  {
158  return;
159  }
160 
161  auto var_id = _num_var;
162  std::stringstream ss;
163  ss << name << "_" << var_id;
164  const auto uniq_name = ss.str();
165  SharedVar var{ var_link.arg_id, var_link.io, group, uniq_name, runtime_desc };
166 
167  if(group == SharedVarGroup::Argument)
168  {
169  _arguments.emplace(var_id, var);
170  _arg_id_map.emplace(var_link.arg_id, var_id);
171  _num_var++;
172  }
173  else if(group == SharedVarGroup::Automatic)
174  {
175  if(_global_vars.empty())
176  {
177  if(var_link.io == SharedVarIO::Output)
178  {
179  _global_vars.emplace(var_id, var);
180  _arg_id_map.emplace(var_link.arg_id, var_id);
181  _num_var++;
182  }
183  else
184  {
185  ARM_COMPUTE_ERROR("Component likely not traversed in topological order");
186  }
187  }
188  else
189  {
190  // Associate additional SharedVarLinks with the single global shared variable
191  const auto global_var_id = _global_vars.begin()->first;
192  _arg_id_map[var_link.arg_id] = global_var_id;
193  }
194  }
195  else
196  {
197  ARM_COMPUTE_ERROR("Unrecognised SharedVarGroup");
198  }
199  }
200 
201  /** Get the SharedVar associated with @p var_link
202  *
203  * @param var_link
204  * @return SharedVar
205  */
206  SharedVar get(const SharedVarLink &var_link) const
207  {
208  const SharedVar empty_var{};
209  if(_arg_id_map.find(var_link.arg_id) != _arg_id_map.end())
210  {
211  const auto var_id = _arg_id_map.at(var_link.arg_id);
212  const auto arg_var = _arguments.find(var_id);
213  if(arg_var != _arguments.end())
214  {
215  return arg_var->second;
216  }
217  else
218  {
219  return _global_vars.at(var_id);
220  }
221  }
222  return empty_var;
223  }
224 
225  /** @note The arguments are returned in the order they are added
226  */
228  {
229  Arguments args{};
230  for(const auto &a : _arguments)
231  {
232  args.add_var(a.second);
233  }
234  return args;
235  }
236 
237 private:
238  using VarID = int32_t;
239 
240 private:
241  std::map<VarID, SharedVar> _global_vars{}; // Shared, global variable
242  std::map<VarID, SharedVar> _arguments{};
243  std::map<ArgumentID, VarID> _arg_id_map{}; // Track ArgumentIDs that have already been added
244  VarID _num_var{ 0 };
245 };
246 
247 enum class ComponentType
248 {
249  Simple,
250  Complex,
251  Store
252 };
253 
255 using ComponentList = std::vector<ComponentID>;
257 {
258 public:
260  using Tag = std::string;
261  struct TagVal
262  {
263  TagVal() = default;
265  : value{ var.uniq_name }
266  {
267  }
268 
269  template <typename T, ARM_COMPUTE_REQUIRES_TA(std::is_integral<T>::value)>
270  TagVal(T val)
271  : value{ support::cpp11::to_string(val) }
272  {
273  }
274 
275  TagVal(const std::string &val)
276  : value{ val }
277  {
278  }
279 
280  TagVal(const char *val)
281  : value{ std::string(val) }
282  {
283  }
284 
286  : value{ get_cl_type_from_data_type(data_type) }
287  {
288  }
289 
290  std::string value{};
291  };
292  using TagLUT = std::unordered_map<Tag, TagVal>; // Used to instantiating a code template / replacing tags
293 public:
295  : _blueprint(blueprint)
296  {
297  }
298 
300 
301  virtual ~IClKernelComponent() = default;
302  virtual ComponentType get_component_type() const = 0;
303  virtual std::vector<Link> get_links() const = 0;
304  virtual std::string name() const = 0;
305 
306  // @note: some tags can be unused since they could be used only for the macros, or only for the component code
307  static std::string replace_tags(const std::string &code_template, const TagLUT &tags)
308  {
309  std::string replaced_code = "";
310  bool scanning_pattern = false;
311  std::string pattern_found = "";
312  for(size_t i = 0; i < code_template.size() - 1; ++i)
313  {
314  if(!scanning_pattern)
315  {
316  if(code_template[i] == '{' && code_template[i + 1] == '{')
317  {
318  i += 1;
319  scanning_pattern = true;
320  pattern_found = "";
321  }
322  else
323  {
324  replaced_code += code_template[i];
325  }
326  }
327  else
328  {
329  if(code_template[i] == '}' && code_template[i + 1] == '}')
330  {
331  i += 1;
332  scanning_pattern = false;
333  std::string err = "Pattern " + pattern_found + " not found in tags";
334  ARM_COMPUTE_ERROR_ON_MSG(tags.find(pattern_found) == tags.end(), err.c_str());
335  replaced_code += tags.find(pattern_found)->second.value;
336  }
337  else
338  {
339  pattern_found += code_template[i];
340  }
341  }
342  }
343 
344  return replaced_code;
345  }
346  ComponentID id() const
347  {
348  return _id;
349  }
351  {
352  _id = id;
353  }
354 
355  virtual std::set<std::string> get_headers_list() const
356  {
357  return std::set<std::string> {};
358  }
359 
360  virtual std::string get_additional_macros() const
361  {
362  return "";
363  }
364 
365  virtual std::string get_component_code() const
366  {
367  return "";
368  }
369 
370  virtual Window get_window() const
371  {
372  return Window{};
373  }
374 
375  /** Get the tag look-up table used to instantiate the component code.
376  *
377  * @param vtable
378  * @return TagLUT
379  */
380  virtual TagLUT get_tag_lut(const SharedVarTable &vtable) const = 0;
381 
382  /** Allocate all shared variables used by the component in the @p vtable
383  *
384  * @param vtable
385  */
386  virtual void allocate_shared_vars(SharedVarTable &vtable) const = 0;
387 
388  virtual std::string get_dst_addr_calculation() const
389  {
390  return "";
391  }
392 
393  /** Generate config id of the component
394  *
395  * @return std::string
396  */
397  virtual std::string generate_config_id() const
398  {
399  return "";
400  }
401 
403  {
404  return CLBuildOptions{};
405  }
406 
407 protected:
408  ClKernelBlueprint *_blueprint;
409 
410 private:
411  ComponentID _id{};
412 };
413 
414 using ComponentUniquePtr = std::unique_ptr<IClKernelComponent>;
415 
416 /** Intermediate representation of the final, complete kernel source.
417  */
419 {
420 public:
421  Implementation() = default;
422  ~Implementation() = default;
423 
424 public:
426  {
427  return _graph.update_merge_point(t_id, merge_point);
428  }
429 
431  {
432  const auto id = _graph.add_tensor(merge_point);
433  if(_kernel_tensors.find(id) == _kernel_tensors.end())
434  {
435  _kernel_tensors.insert(std::make_pair(id, tensor_info));
436  }
437  return id;
438  }
439 
440  void set_tile_info(const TileDescriptor &tile_info)
441  {
442  _tile_info = tile_info;
443  }
444 
446  {
447  if(arg_id == g_arg_placeholder)
448  {
449  // In case of placeholder, don't care what we return;
451  }
452  return _shared_var_group_lut.at(arg_id);
453  }
454 
455  void validate_arg_ids(std::initializer_list<ArgumentID> args) const
456  {
457  for(const auto arg_id : args)
458  {
459  ARM_COMPUTE_UNUSED(arg_id);
460  ARM_COMPUTE_ERROR_ON_MSG(_kernel_tensors.find(arg_id) == _kernel_tensors.end() && arg_id != g_arg_placeholder,
461  "Trying to use an argument that hasn't been added to the blueprint");
462  }
463  }
464 
466  {
467  if(component->get_component_type() == ComponentType::Complex)
468  {
469  ++_num_complex_components;
470  ARM_COMPUTE_ERROR_ON_MSG(_num_complex_components > 1, "Only one complex component per blueprint is supported.");
471  }
472 
473  // Get an unique ID for the component that's being added
474  std::vector<ArgumentID> src_tensors;
475  std::vector<ArgumentID> dst_tensors;
476  for(const auto &link : component->get_links())
477  {
478  if(link.is_empty())
479  {
480  continue;
481  }
482  if(link.io == SharedVarIO::Input)
483  {
484  src_tensors.push_back(link.arg_id);
485  }
486  else
487  {
488  dst_tensors.push_back(link.arg_id);
489  }
490  }
491  const ComponentID component_id = _graph.add_operator(src_tensors, dst_tensors).second;
492  component->set_id(component_id);
493 
494  // Add this component to the component graph. Don't connect it to anything yet
495  _component_graph.emplace(component_id, ComponentList{});
496 
497  // For every { arg_id, arg_io } passed along with this component...
498  for(const auto &link : component->get_links())
499  {
500  const ArgumentID &arg_id = link.arg_id;
501  const SharedVarIO &arg_io = link.io;
502 
503  // Add the arg_id to the map describing the input/output relationship between an argument and the components that use it, if it doesn't yet exist there
504  if(_outgoing_components.find(arg_id) == _outgoing_components.end())
505  {
506  _outgoing_components.emplace(arg_id, ComponentList{});
507  _incoming_components.emplace(arg_id, ComponentList{});
508  }
509 
510  // If it's an input argument, connect any other component that has it as output with this component
511  // Additionally, set this component as one that treats this argument as "Input" (append to index 0)
512  // This is used so that we keep track of whether two components use the same argument, one as input and one as output
513  if(arg_io == SharedVarIO::Input)
514  {
515  for(const auto &prev_component : _incoming_components[arg_id])
516  {
517  _component_graph[prev_component].push_back(component_id);
518  }
519 
520  _outgoing_components[arg_id].push_back(component_id);
521  }
522  // If it's an output argument, connect this component with any other component that has it as input
523  // Additionally, set this component as one that treats this argument as "Output" (append to index 1)
524  else
525  {
526  if(component->get_component_type() == ComponentType::Store)
527  {
528  ARM_COMPUTE_ERROR_ON_MSG(_dst_id >= 0, "Trying to add more than one dst argument to the graph");
529  _dst_id = arg_id;
530  }
531 
532  for(const auto &subseq_component : _outgoing_components[arg_id])
533  {
534  _component_graph[component_id].push_back(subseq_component);
535  }
536 
537  _incoming_components[arg_id].push_back(component_id);
538  }
539  }
540 
541  ARM_COMPUTE_ERROR_ON_MSG(_graph.get_root_ops().size() != 1, "Trying to add more than one root to the graph");
542 
543  // Finally, add this component to the dictionary of components
544  _components.insert(std::make_pair(component_id, std::move(component)));
545  }
546 
547  std::string build_kernel_name() const
548  {
549  std::string name = "";
550 
551  traverse([&](std::stack<ComponentID> stack)
552  {
553  name += _components.find(stack.top())->second->name() + (stack.size() > 2 ? "___" : "");
554  });
555 
556  return name;
557  }
558 
559  std::string build_code()
560  {
561  ARM_COMPUTE_ERROR_ON_MSG(_graph_root == -1, "No root found in the component graph");
562 
563  // These data structures will hold the data from all the components in the blueprint
564  std::set<std::string> headers_list{};
565  std::set<std::string> additional_macros{};
566  std::vector<std::string> component_codes{}; // vector because order matters
567 
568  // Step 1: Allocate all kernel argument shared variables before generating the component code
569  auto stack = topological_sort();
570  while(!stack.empty())
571  {
572  auto curr_component_id = stack.top();
573  auto &curr_component = _components.find(curr_component_id)->second;
574 
575  curr_component->allocate_shared_vars(_vtable);
576 
577  stack.pop();
578  }
579  // Step 2: Generate component codes
580  stack = topological_sort();
581  while(!stack.empty())
582  {
583  auto curr_component_id = stack.top();
584  auto &curr_component = _components.find(curr_component_id)->second;
585 
586  auto curr_headers_list = curr_component->get_headers_list();
587  auto curr_additional_macros = curr_component->get_additional_macros();
588  auto curr_component_code = curr_component->get_component_code();
589  const auto var_lut = curr_component->get_tag_lut(_vtable); // Ideally can be merged with get_component_code once we have finer-grained code generation technique
590  component_codes.push_back(IClKernelComponent::replace_tags(curr_component_code, var_lut));
591 
592  headers_list.insert(curr_headers_list.begin(), curr_headers_list.end());
593  if(!curr_additional_macros.empty()) // Some components might not have any
594  {
595  additional_macros.insert(IClKernelComponent::replace_tags(curr_additional_macros, var_lut));
596  }
597 
598  stack.pop();
599  }
600 
601  // Step 3: Assemble the data gathered by traversing the graph into the string "code"
602  std::string code = "";
603 
604  for(auto &header : headers_list)
605  {
606 #if defined(EMBEDDED_KERNELS)
607  code += CLKernelLibrary::get().get_program(header).first;
608 #else // defined(EMBEDDED_KERNELS)
609  code += "#include \"" + header + "\"\n";
610 #endif // defined(EMBEDDED_KERNELS)
611  }
612 
613  for(auto &macros : additional_macros)
614  {
615  code += macros;
616  }
617 
618  code += generate_kernel_signature(_vtable.get_kernel_arguments());
619 
620  code += "\n{\n\n";
621 
622  code += " //------------------ START KERNEL_BUILDER_COORDINATE ---------------------\n\n";
623  code += generate_global_section();
624  code += " //------------------ END KERNEL_BUILDER_COORDINATE ---------------------\n";
625 
626  for(auto &component_code : component_codes)
627  {
628  code += component_code;
629  }
630 
631  code += "}\n";
632 
633  return code;
634  }
635 
636  /** Generate config id of the entire kernel
637  *
638  * Format: kernel_name--comp0_config_id--comp1_config_id--...
639  *
640  * @return std::string
641  */
642  std::string build_config_id() const
643  {
644  std::string config_id = build_kernel_name();
645  traverse([&](std::stack<ComponentID> stack)
646  {
647  config_id += "--" + _components.find(stack.top())->second->generate_config_id() + "--";
648  });
649 
650  return config_id;
651  }
652 
654  {
655  CLBuildOptions build_opts{};
656 
657  traverse([&](std::stack<ComponentID> stack)
658  {
659  build_opts.add_options(_components.find(stack.top())->second->generate_build_options().options());
660  });
661 
662  return build_opts;
663  }
664 
666  {
667  return _tile_info;
668  }
669 
670  // Get the global execution window, i.e. that of the root component
672  {
673  ARM_COMPUTE_ERROR_ON_MSG(_graph_root == -1, "No root found in the component graph");
674  ARM_COMPUTE_ERROR_ON_MSG(_dst_id == -1, "Destination Tensor Id should be ready before calling get_execution_window()");
675 
676  return _components.find(_graph_root)->second->get_window();
677  }
678 
680  {
681  return _dst_id;
682  }
683 
685  {
686  ClKernelArgList arg_list{};
687  for(const auto &arg_var : _vtable.get_kernel_arguments().get_all_vars())
688  {
689  arg_list[arg_var.desc.arg_id] = arg_var.desc;
690  }
691  return arg_list;
692  }
693 
694  /** Get the arguments as shared vars from the vtable
695  *
696  * @return SharedVarTable::Arguments
697  */
699  {
700  return _vtable.get_kernel_arguments();
701  }
702 
704  {
705  auto it = _kernel_tensors.find(id);
706  if(it != _kernel_tensors.end())
707  {
708  return it->second;
709  }
710  return nullptr;
711  }
712 
714  {
715  auto it = _kernel_tensors.find(id);
716  if(it != _kernel_tensors.end())
717  {
718  return it->second;
719  }
720  return nullptr;
721  }
722  /** Finalize graph construction. Graph is expected to not mutate after being finalized
723  */
724  void finalize()
725  {
726  cache_root_component();
727  assign_shared_var_group();
728  }
729 
731  {
732  return _graph;
733  }
734 
735 private:
736  void cache_root_component()
737  {
738  const auto roots = _graph.get_root_ops();
739  ARM_COMPUTE_ERROR_ON_MSG(roots.size() != 1, "Trying to add more than one root to the graph");
740  _graph_root = roots.at(0);
741  }
742  /** Assign the group for each shared var. Can only be performed at the end of the graph construction, before building
743  */
744  void assign_shared_var_group()
745  {
746  for(const auto &tensor : _kernel_tensors)
747  {
748  const auto tensor_id = tensor.first;
749  if(_graph.is_src_tensor(tensor_id) || _graph.is_dst_tensor(tensor_id))
750  {
751  _shared_var_group_lut[tensor_id] = SharedVarGroup::Argument;
752  }
753  else
754  {
755  _shared_var_group_lut[tensor_id] = SharedVarGroup::Automatic;
756  }
757  }
758  }
759 
760  void topological_sort_utility(ComponentID component_id, std::unordered_set<ComponentID> &visited, std::stack<ComponentID> &stack) const
761  {
762  visited.insert(component_id);
763 
764  for(auto connected_component : _component_graph.find(component_id)->second)
765  {
766  if(visited.find(connected_component) == visited.end())
767  {
768  topological_sort_utility(connected_component, visited, stack);
769  }
770  }
771 
772  stack.push(component_id);
773  }
774 
775  std::stack<ComponentID> topological_sort() const
776  {
777  std::stack<ComponentID> stack{};
778  std::unordered_set<ComponentID> visited{};
779 
780  topological_sort_utility(_graph_root, visited, stack);
781 
782  return stack;
783  }
784 
785  void traverse(const std::function<void(std::stack<ComponentID>)> &func) const
786  {
787  std::stack<ComponentID> stack = topological_sort();
788 
789  while(!stack.empty())
790  {
791  func(stack);
792  stack.pop();
793  }
794  }
795 
796  std::string generate_argument_declaration(const SharedVarTable::SharedVar &var) const
797  {
798  ARM_COMPUTE_ERROR_ON_MSG(var.group != SharedVarGroup::Argument, "An argument declaration can only be generated from a kernel argument");
799  std::string code;
800  switch(var.desc.tensor_arg_type)
801  {
803  {
804  code += "\n VECTOR_DECLARATION(" + var.uniq_name + ")";
805  break;
806  }
808  {
809  code += "\n IMAGE_DECLARATION(" + var.uniq_name + ")";
810  break;
811  }
813  {
814  code += "\n IMAGE_DECLARATION(" + var.uniq_name + "),";
815  code += "\n uint " + var.uniq_name + "_stride_z";
816  break;
817  }
819  {
820  code += "\n __read_only image2d_t " + var.uniq_name + "_img,";
821  code += "\n uint " + var.uniq_name + "_stride_z";
822  break;
823  }
825  {
826  code += "\n TENSOR4D_T(" + var.uniq_name + ", BUFFER)";
827  break;
828  }
830  {
831  code += "\n TENSOR4D_T(" + var.uniq_name + ", IMAGE)";
832  break;
833  }
834  default:
835  {
836  ARM_COMPUTE_ERROR("Unsupported declaration generation for ClKernelTensorArgType");
837  }
838  }
839  return code;
840  }
841 
842  std::string generate_kernel_signature(const SharedVarTable::Arguments &argument_list) const
843  {
844  std::string code = "\n__kernel void " + build_kernel_name() + "(";
845 
846  for(const auto &arg : argument_list.get_all_vars())
847  {
848  code += generate_argument_declaration(arg) + ",";
849  }
850 
851  code[code.length() - 1] = ')';
852 
853  return code;
854  }
855 
856  std::string generate_global_section() const
857  {
858  auto dst_info = get_kernel_argument_info(_dst_id);
859  auto dst_w = dst_info->dimension(0);
860  const auto tile_w = std::max(1, get_execution_window().x().step());
861  const auto tile_h = std::max(1, get_execution_window().y().step());
862  auto leftover_w = dst_w % tile_w;
863 
864  std::string code = "";
865  code += std::string(" int cout = GET_SPATIAL_IDX(0, ") + std::to_string(tile_w) + ", " + std::to_string(leftover_w) + ");\n";
866  code += std::string(" int mout = GET_SPATIAL_IDX(1, ") + std::to_string(tile_h) + ", " + "0);\n";
867  code += std::string(" int bout = GET_SPATIAL_IDX(2, 1, 0);\n\n");
868 
869  switch(_tile_info.clipping)
870  {
872  code += " const bool g_cond_x = (cout == 0);\n";
873  code += " const bool g_cond_y = (mout == 0);\n";
874  break;
876  code += " const bool g_cond_x = ((cout + 1) * " + std::to_string(tile_w) + " >= " + std::to_string(_tile_info.boundaries.x()) + ");\n";
877  code += " const bool g_cond_y = (mout == 0);\n";
878  break;
880  code += " const bool g_cond_x = (cout == 0);\n";
881  code += " const bool g_cond_y = ((mout + 1) * " + std::to_string(tile_h) + " >= " + std::to_string(_tile_info.boundaries.y()) + ");\n";
882  break;
884  code += " const bool g_cond_x = ((cout + 1) * " + std::to_string(tile_w) + " >= " + std::to_string(_tile_info.boundaries.x()) + ");\n";
885  code += " const bool g_cond_y = ((mout + 1) * " + std::to_string(tile_h) + " >= " + std::to_string(_tile_info.boundaries.y()) + ");\n";
886  break;
887  default:
888  ARM_COMPUTE_ERROR("Unsupported clipping strategy");
889  }
890 
891  return code;
892  }
893 
894  TileDescriptor _tile_info{};
895 
896  int32_t _num_complex_components{};
897 
898  ArgumentID _dst_id{ -1 }; // Initially set to -1, which means the graph has no dst yet, since node IDs are positive numbers
899 
900  DependencyGraph _graph{};
901 
902  // Tensors, components and IDs with corresponding ptrs (except intermediate)
903  std::unordered_map<ComponentID, ComponentUniquePtr> _components{};
904  std::unordered_map<ArgumentID, ITensorInfo *> _kernel_tensors{};
905  // Argument group lookup. Can be replaced by extending the ArgumentID type to include group info
906  std::unordered_map<ArgumentID, SharedVarGroup> _shared_var_group_lut{};
907 
908  // Tracks all variables (e.g.: kernel arguments, kernel "global variables")
909  SharedVarTable _vtable{};
910 
911  // Component directed graph (represented by an adjecency list of Component IDs)
912  // This is used to understand the ordering and bindings between components when generating the kernel
913  // It's initially set to -1 which means the graph has no root yet, since node IDs are positive numbers
914  ComponentID _graph_root{ -1 };
915  std::unordered_map<ComponentID, ComponentList> _component_graph{};
916 
917  // Additional data structures used to define the relationships between components and arguments
918  // For each argument, it contains the list of components that consider it as an incoming or an outgoing argument
919  // E.g. tensor0 -> component0 -> tensor1
920  // _outgoing_components[tensor0] == {component0} (component0 is the outgoing component of tensor0. Component0 treats tensor0 as an input tensor)
921  // _incoming_components[tensor1] == {component0} (component0 is the incoming component of tensor1. Component1 treats tensor1 as an output tensor)
922  std::unordered_map<ArgumentID, ComponentList> _outgoing_components{};
923  std::unordered_map<ArgumentID, ComponentList> _incoming_components{};
924 };
925 
926 } // namespace dynamic_fusion
927 } // namespace experimental
928 } // namespace arm_compute
929 #endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMMON_H
930 #endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
std::vector< ComponentID > ComponentList
Definition: Common.h:255
The dependency graph of a workload, where the nodes are of 2 types: Tensor or Operator Represented as...
virtual CLBuildOptions generate_build_options() const
Definition: Common.h:402
std::vector< Id > get_root_ops() const
Get all root ops.
std::string build_config_id() const
Generate config id of the entire kernel.
Definition: Common.h:642
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352
std::string to_string(T &&value)
Convert integer and float values to string.
std::string to_string(const CLBuildOptions &cl_build_opts)
Definition: Utils.h:51
SharedVarTable::Arguments get_argument_shared_vars() const
Get the arguments as shared vars from the vtable.
Definition: Common.h:698
std::stringstream ss(mlgo_str)
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
std::pair< std::string, bool > get_program(const std::string &program_name) const
Gets the source of the selected program.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
#define ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(TypeName)
Definition: Macros.h:27
Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context...
Status class.
Definition: Error.h:52
std::vector< const ClKernel * > traverse(const ClKernelFusionGroup &group)
Copyright (c) 2017-2022 Arm Limited.
std::map< int, ClKernelArgDescriptor > ClKernelArgList
Definition: ClWorkload.h:87
Describes all the info required to add a kernel argument at run time.
Definition: ClWorkload.h:70
Intermediate representation of the final, complete kernel source.
virtual std::set< std::string > get_headers_list() const
Definition: Common.h:355
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
ClKernelTensorArgType tensor_arg_type
tensor argument type
Definition: ClWorkload.h:83
A table of all the variables used in the kernel / blueprint Because we limit the DependencyGraph in t...
Definition: Common.h:92
static std::string replace_tags(const std::string &code_template, const TagLUT &tags)
Definition: Common.h:307
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
Definition: Error.h:456
const ITensorInfo * get_kernel_argument_info(const ArgumentID id) const
Definition: Common.h:703
std::string get_cl_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL type.
Definition: CLHelpers.cpp:39
virtual std::string generate_config_id() const
Generate config id of the component.
Definition: Common.h:397
std::unique_ptr< IClKernelComponent > ComponentUniquePtr
Definition: Common.h:414
const char * name
SharedVarIO
We introduce the concept of Shared Variables in the context of kernel building.
Definition: Common.h:59
constexpr int step
Definition: fp32.cpp:35
Intermediate representation of the final, complete kernel source.
Definition: Common.h:418
void add(SharedVarLink var_link, SharedVarGroup group, ClKernelArgDescriptor runtime_desc, const std::string &name="unnamed")
Create a SharedVar for a corresponding SharedVarLink (contains ArgumentID).
Definition: Common.h:153
Status update_merge_point(ArgumentID t_id, ArgumentID merge_point)
Definition: Common.h:425
DataType
Available data types.
Definition: Types.h:79
void header(TokenStream &in, bool &valid)
Definition: MLGOParser.cpp:481
Describe a multidimensional execution window.
Definition: Window.h:39
ArgumentID add_kernel_tensor(ITensorInfo *tensor_info, ArgumentID merge_point=DependencyGraph::empty_id())
Definition: Common.h:430
void validate_arg_ids(std::initializer_list< ArgumentID > args) const
Definition: Common.h:455