38 namespace experimental
40 namespace dynamic_fusion
62 ~DataView() =
default;
63 DataView(
const DataView &other) =
default;
64 DataView &operator=(
const DataView &other) =
default;
65 DataView(DataView &&other) =
default;
66 DataView &operator=(DataView &&other) =
default;
73 std::vector<DataView> get_tensors()
77 std::vector<DataView> get_tensors()
const
82 friend Status create_aux_tensors(ClAuxTensors *aux_tensors,
const GpuWorkloadSourceCode &code);
92 CLTensor *add_aux_tensor(
const ITensorInfo &
tensor_info,
const AuxMemoryInfo &aux_memory_info)
95 auto find_tensor_pair = _owned_tensors.find(t_id);
96 if (find_tensor_pair != _owned_tensors.end())
98 return find_tensor_pair->second.get();
102 auto tensor = std::make_unique<CLTensor>();
103 auto inserted_pair = _owned_tensors.emplace(t_id, std::move(
tensor)).first;
104 auto new_tensor = inserted_pair->second.get();
105 _tensors.emplace_back(new_tensor,
tensor_info, aux_memory_info);
110 std::map<ITensorInfo::Id, std::unique_ptr<CLTensor>> _owned_tensors{};
111 std::vector<DataView> _tensors{};
122 Status create_aux_tensors(ClAuxTensors *aux_tensors,
const GpuWorkloadSourceCode &code)
124 for (
auto t_id : code.tensors())
127 const auto workload_arg = code.query_tensor(t_id);
128 ICLTensor *tensor_object =
nullptr;
132 const TensorInfo
tensor_info = *workload_arg->tensor_info();
134 const auto aux_memory_info = workload_arg->memory_descriptor()->aux_memory_info;
135 tensor_object = aux_tensors->add_aux_tensor(
tensor_info, aux_memory_info);
137 if (tensor_object ==
nullptr)
159 auto tensor_pack = _tensor_packs.find(uwk_id);
160 if (tensor_pack != _tensor_packs.end())
162 return &(tensor_pack->second);
174 return _tensor_packs.at(uwk_id);
177 friend Status create_tensor_lut(ClTensorLUT *tensor_lut,
178 const GpuWorkloadSourceCode &code,
179 const std::vector<CLTensor *> &user_tensors,
180 const ClAuxTensors &aux_tensors);
188 void add_tensor_pack(
UnitWorkloadId uwk_id,
const ITensorPack &tensor_pack)
190 _tensor_packs[uwk_id] = tensor_pack;
192 std::map<UnitWorkloadId, ITensorPack> _tensor_packs{};
204 Status create_tensor_lut(ClTensorLUT *tensor_lut,
205 const GpuWorkloadSourceCode &code,
206 const std::vector<CLTensor *> &user_tensors,
207 const ClAuxTensors &aux_tensors)
210 std::map<ITensorInfo::Id, CLTensor *> tensor_map;
211 for (
auto tensor : user_tensors)
213 const auto t_id =
tensor->info()->id();
215 if (tensor_map.find(t_id) != tensor_map.end())
218 std::vector<ITensorInfo::Id> ids;
219 for (
auto &
t : tensor_map)
221 ids.push_back(
t.first);
223 ITensorInfo::Id new_id = *std::max_element(ids.begin(), ids.end()) + 1;
224 tensor_map[new_id] =
tensor;
228 tensor_map[t_id] =
tensor;
231 for (
const auto &data : aux_tensors.get_tensors())
233 const auto t_id = data.tensor_info.id();
234 const auto tensor = data.tensor;
235 if (tensor_map.find(t_id) != tensor_map.end())
239 tensor_map[t_id] =
tensor;
243 for (
auto id_tensor : tensor_map)
245 const auto t_id = id_tensor.first;
246 const auto tensor_object = id_tensor.second;
247 if (tensor_object ==
nullptr)
251 if (tensor_object->allocator()->info().total_size() == 0U)
256 for (
auto uwk_id : code.get_unit_workloads_from_tensor(t_id))
258 ITensorPack *tensor_pack = tensor_lut->find_tensor_pack(uwk_id);
259 if (tensor_pack ==
nullptr)
261 tensor_lut->add_tensor_pack(uwk_id, ITensorPack{{t_id, tensor_object}});
265 tensor_pack->add_tensor(t_id, tensor_object);
275 struct ClWorkloadRuntime::Implementation
277 std::map<UnitWorkloadId, std::unique_ptr<ClKernelRuntime>> _kernels{};
278 std::map<UnitWorkloadId, std::unique_ptr<ClKernelRuntime>> _kernels_prep{};
279 bool _is_configured{
false};
280 bool _is_prepared{
false};
281 ClTensorLUT _tensor_lut{};
282 ClAuxTensors _aux_tensors{};
283 GpuWorkloadSourceCode _source_code{};
300 "ClWorkloadRuntime cannot be configured with non-OpenCL workload sketch");
304 for (
auto uwk_id : _impl->_source_code.unit_workloads())
306 const auto work = _impl->_source_code.query_unit_workload(uwk_id);
307 const auto stage = work.stage().stage;
308 auto k = std::make_unique<ClKernelRuntime>();
315 _impl->_kernels.emplace(work.id(), std::move(k));
320 _impl->_kernels_prep.emplace(work.id(), std::move(k));
330 create_aux_tensors(&_impl->_aux_tensors, _impl->_source_code);
331 _impl->_is_configured =
true;
335 void ClWorkloadRuntime::prepare()
337 if (!_impl->_is_prepared)
339 for (
auto &id_kernel_pair : _impl->_kernels_prep)
341 const bool flush_queue =
false;
342 const auto uwk_id = id_kernel_pair.first;
343 auto kernel = id_kernel_pair.second.get();
347 _impl->_is_prepared =
true;
355 const auto st = create_tensor_lut(&_impl->_tensor_lut, _impl->_source_code, tensors, _impl->_aux_tensors);
358 for (
auto &id_kernel_pair : _impl->_kernels)
361 const bool flush_queue =
false;
362 const auto uwk_id = id_kernel_pair.first;
363 auto kernel = id_kernel_pair.second.get();
371 std::vector<std::tuple<CLTensor *, TensorInfo, AuxMemoryInfo>> aux_tensors;
372 for (
const auto &data : _impl->_aux_tensors.get_tensors())
374 aux_tensors.emplace_back(data.tensor, data.tensor_info, data.memory_info);