Compute Library
 22.11
ClWorkload.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
25 #ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLWORKLOAD_H
26 #define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLWORKLOAD_H
27 
31 
34 
35 #include <map>
36 
37 namespace arm_compute
38 {
39 namespace experimental
40 {
41 namespace dynamic_fusion
42 {
43 /** Verbose and explicit way to enumerate all the tensor arguments variants used by
44  * all kernel implementations. This avoids any ambiguity in what kernel arguments are passed
45  */
46 enum class ClKernelTensorArgType : int
47 {
48  Scalar,
49 
50  Vector,
51 
52  Image,
55 
56  Image_3D, // 3D Tensor represented as a 2D Image + stride_z
58 
59  Tensor_3D,
60  Tensor_4D,
63 };
64 
65 /** Describes all the info required to add a kernel argument at run time
66  *
67  * @note This struct can later be expanded into a more concise and formal way to specify how to set up
68  * arguments for a kernel inside a @ref ClUnitWorkload
69  */
71 {
72  ClKernelArgDescriptor() = default;
73  ClKernelArgDescriptor(int arg_id, ClKernelTensorArgType type, bool slide_along_dimz = true)
74  : arg_id{ arg_id }, tensor_arg_type{ type }, slide_along_dimz{ slide_along_dimz }
75  {
76  }
77  ~ClKernelArgDescriptor() = default;
78  friend bool operator==(const ClKernelArgDescriptor &arg0, const ClKernelArgDescriptor &arg1)
79  {
80  return (arg0.tensor_arg_type == arg1.tensor_arg_type) && (arg0.slide_along_dimz == arg1.slide_along_dimz);
81  }
82  int arg_id{ -1 }; /**< Arg ID in the blueprint, -1 means empty / uninitialized */
83  ClKernelTensorArgType tensor_arg_type{ ClKernelTensorArgType::Image }; /**< tensor argument type */
84  bool slide_along_dimz{ true }; /**< @note slide_along_dimz will be moved out of this descriptor in later iterations */
85 };
86 
87 using ClKernelArgList = std::map<int, ClKernelArgDescriptor>;
88 
89 /** Descriptor containing information required to run a single ClWorkload
90  */
92 {
93  cl::NDRange suggested_lws{}; /**< Suggested local work-group size for optimal performance if not zero */
94  cl::NDRange gws{}; /**< Global work-group to be used */
95  bool skip_sliding_window{ false }; /**< Skip sliding window slices during execution loop */
96 };
97 
98 /** Contains kernel code to be compiled and run in a ClUnitWorkload
99  */
101 {
102  friend bool operator==(const ClKernelCode &code0, const ClKernelCode &code1)
103  {
104  return (code0.name == code1.name) && (code0.code == code1.code) && (code0.config_id == code1.config_id) && (code0.build_options == code1.build_options) && (code0.window == code1.window)
105  && (code0.arguments == code1.arguments);
106  }
107  std::string name{}; /**< Kernel name */
108  std::string code{}; /**< Kernel source code */
109  std::string config_id{}; /**< Generated from blueprint based on complex component */
110  CLBuildOptions build_options{}; /**< Kernel build options */
111  Window window{}; /**< Execution window */
112  ClKernelArgList arguments{}; /**< Kernel argument descriptors. map key is kernel ArgumentID */
113 };
114 
115 /** A descriptor of ClWorkload Tensors.
116  */
118 {
119  ClWorkloadTensor() = default;
121  : WorkloadTensor{ id, info, memory_type, memory_info }, kernel_arg{ kernel_arg }
122  {
123  }
124  ClKernelArgDescriptor kernel_arg{};
125  friend bool operator==(const ClWorkloadTensor &t0, const ClWorkloadTensor &t1)
126  {
127  return t0.info == t1.info && t0.memory_info == t1.memory_info && t0.memory_type == t1.memory_type && t0.kernel_arg == t1.kernel_arg;
128  }
129 };
130 
131 /** The basic atomic unit in a @ref ClWorkload. It contains exactly one kernel to run.
132  */
134 {
135  ClUnitWorkload() = default;
137  : UnitWorkload{ id, stage }, code{ code }
138  {
139  }
140  friend bool operator==(const ClUnitWorkload &uworkload0, const ClUnitWorkload &uworkload1)
141  {
142  return uworkload0.stage == uworkload1.stage && uworkload0.code == uworkload1.code;
143  }
144  ClKernelCode code{};
145 };
146 
147 /** GPU information for @ref ClWorkloadContext
148  */
149 struct GpuInfo
150 {
151  friend bool operator==(const GpuInfo &info0, const GpuInfo &info1)
152  {
153  return info0.target == info1.target;
154  }
156 };
157 
158 /** Context (device capabilities, platform details) associated with a ClWorkload
159  *
160  * It is required for building the @ref ClKernelCode and could also be used by the runtime (e.g. schedulers)
161  */
163 {
164  friend bool operator==(const ClWorkloadContext &ctx0, const ClWorkloadContext &ctx1)
165  {
166  return ctx0.gpu_info == ctx1.gpu_info;
167  }
168  GpuInfo gpu_info{};
169 };
170 
171 /** Workload for Cl backend
172  */
173 struct ClWorkload : public IWorkload
174 {
175  Tid add_workload_tensor(ITensorInfo *info, MemoryType memory_type, const AuxMemoryInfo &memory_info, const ClKernelArgDescriptor &kernel_arg, Tid merge_point)
176  {
177  Tid id = graph.add_tensor(merge_point);
178  if(tensors.find(id) == tensors.end())
179  {
180  tensors[id] = ClWorkloadTensor(id, info, memory_type, memory_info, kernel_arg);
181  }
182  return id;
183  }
184  UnitWorkId add_unit_workload(UnitWorkloadStage stage, const ClKernelCode &code, const std::vector<Tid> &inputs, const std::vector<Tid> &outputs)
185  {
186  auto op = graph.add_operator(inputs, outputs);
187  auto id = op.second;
188  unit_workloads[id] = ClUnitWorkload(id, stage, code);
189  return id;
190  }
191  friend bool operator==(const ClWorkload &workload0, const ClWorkload &workload1)
192  {
193  return std::make_tuple(
194  workload0.graph, workload0.context, workload0.unit_workloads, workload0.tensors, workload0.op_tensor_id_lut)
195  == std::make_tuple(
196  workload1.graph, workload1.context, workload1.unit_workloads, workload1.tensors, workload1.op_tensor_id_lut);
197  }
198  ClWorkloadContext context{}; /**< Workload context*/
199  std::map<UnitWorkId, ClUnitWorkload> unit_workloads{}; /**< Unit workloads to run*/
200  std::map<Tid, ClWorkloadTensor> tensors{}; /**< Workload tensors*/
201  std::map<Tid, OpTensor::Id> op_tensor_id_lut{}; /**< Map from ClWorkloadTensor to SRC and DST Operator Tensors (no need to store "intermediate" Operator Tensors)*/
202  Status status{}; /**< For compatibility with the IOperator validate method. Store if the workload is valid or not. */
203 };
204 
205 /** Build a @ref ClWorkload from an @ref OperatorGraph.
206  *
207  * @param[out] workload
208  * @param[in] op_graph
209  * @param[in] ctx
210  * @return Status
211  */
213 
214 } // namespace dynamic_fusion
215 } // namespace experimental
216 } // namespace arm_compute
217 
218 #endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLWORKLOAD_H
219 #endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
friend bool operator==(const ClWorkloadTensor &t0, const ClWorkloadTensor &t1)
Definition: ClWorkload.h:125
ClWorkloadTensor(Id id, ITensorInfo *info, MemoryType memory_type, const AuxMemoryInfo &memory_info, const ClKernelArgDescriptor &kernel_arg)
Definition: ClWorkload.h:120
DependencyGraph graph
Dependency graph of the workload tensors and the unit workloads.
Definition: IWorkload.h:125
Tid add_workload_tensor(ITensorInfo *info, MemoryType memory_type, const AuxMemoryInfo &memory_info, const ClKernelArgDescriptor &kernel_arg, Tid merge_point)
Definition: ClWorkload.h:175
friend bool operator==(const ClKernelCode &code0, const ClKernelCode &code1)
Definition: ClWorkload.h:102
std::map< Tid, ClWorkloadTensor > tensors
Workload tensors.
Definition: ClWorkload.h:200
friend bool operator==(const ClKernelArgDescriptor &arg0, const ClKernelArgDescriptor &arg1)
Definition: ClWorkload.h:78
The basic atomic unit in a ClWorkload.
Definition: ClWorkload.h:133
Context (device capabilities, platform details) associated with a ClWorkload.
Definition: ClWorkload.h:162
Graph of operators to execute within a Workload.
Definition: OperatorGraph.h:42
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
Status class.
Definition: Error.h:52
MemoryType
Type of memory used by a Workload Tensor.
Definition: IWorkload.h:59
decltype(strategy::transforms) typedef type
std::set< std::string > build_options
ClWorkloadContext context
Workload context.
Definition: ClWorkload.h:198
Copyright (c) 2017-2022 Arm Limited.
std::map< int, ClKernelArgDescriptor > ClKernelArgList
Definition: ClWorkload.h:87
ClKernelTensorArgType
Verbose and explicit way to enumerate all the tensor arguments variants used by all kernel implementa...
Definition: ClWorkload.h:46
Describes all the info required to add a kernel argument at run time.
Definition: ClWorkload.h:70
Contains kernel code to be compiled and run in a ClUnitWorkload.
Definition: ClWorkload.h:100
Descriptor containing information required to run a single ClWorkload.
Definition: ClWorkload.h:91
The basic atomic unit in an IWorkload.
Definition: IWorkload.h:109
ClKernelTensorArgType tensor_arg_type
tensor argument type
Definition: ClWorkload.h:83
friend bool operator==(const ClWorkloadContext &ctx0, const ClWorkloadContext &ctx1)
Definition: ClWorkload.h:164
const char * name
friend bool operator==(const ClWorkload &workload0, const ClWorkload &workload1)
Definition: ClWorkload.h:191
AuxMemoryInfo memory_info
Memory requirement.
std::string config_id
Generated from blueprint based on complex component.
Definition: ClWorkload.h:109
Describes when a Unit Workload is run.
Definition: IWorkload.h:43
CLBuildOptions build_options
Kernel build options.
Definition: ClWorkload.h:110
ITensorInfo * info
TensorInfo associated with the workload tensor.
Definition: IWorkload.h:102
friend bool operator==(const GpuInfo &info0, const GpuInfo &info1)
Definition: ClWorkload.h:151
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
Status build(ClWorkload &workload, const OperatorGraph &op_graph, const ClWorkloadContext &ctx)
Build a ClWorkload from an OperatorGraph.
Definition: ClWorkload.cpp:36
GPU information for ClWorkloadContext.
Definition: ClWorkload.h:149
GPUTarget
Available GPU Targets.
Definition: GPUTarget.h:34
UnitWorkId add_unit_workload(UnitWorkloadStage stage, const ClKernelCode &code, const std::vector< Tid > &inputs, const std::vector< Tid > &outputs)
Definition: ClWorkload.h:184
Memory Info for a WorkloadTensor of Auxiliary memory type.
Definition: IWorkload.h:70
std::map< Tid, OpTensor::Id > op_tensor_id_lut
Map from ClWorkloadTensor to SRC and DST Operator Tensors (no need to store "intermediate" Operator T...
Definition: ClWorkload.h:201
ClUnitWorkload(Id id, UnitWorkloadStage stage, const ClKernelCode &code)
Definition: ClWorkload.h:136
ClKernelArgList arguments
Kernel argument descriptors.
Definition: ClWorkload.h:112
AuxMemoryInfo memory_info
Auxiliary memory information.
Definition: IWorkload.h:104
Run-time-agnostic, platform-specific graph that describes everything required to run a workload It ca...
Definition: IWorkload.h:119
std::map< UnitWorkId, ClUnitWorkload > unit_workloads
Unit workloads to run.
Definition: ClWorkload.h:199
friend bool operator==(const ClUnitWorkload &uworkload0, const ClUnitWorkload &uworkload1)
Definition: ClWorkload.h:140
Describe a multidimensional execution window.
Definition: Window.h:39
ClKernelArgDescriptor(int arg_id, ClKernelTensorArgType type, bool slide_along_dimz=true)
Definition: ClWorkload.h:73
A descriptor for IWorkload Tensors.
Definition: IWorkload.h:98