30 #include <common/include/Processes.hpp>
32 #include <fmt/format.h>
43 template <
typename ExceptionType>
44 std::string ToErrorMessage(
const char * prefix,
const ExceptionType & error)
47 ss << prefix <<
" " <<
error.what();
51 void AddLayerStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
53 ProfilingGuid networkGuid)
56 std::string layerName = layer.GetNameStr().empty() ?
"<Unnamed>" : layer.GetNameStr();
57 timelineUtils->CreateNamedTypedChildEntity(layer.GetGuid(),
60 LabelsAndEventClasses::LAYER_GUID);
61 for (
auto&& input : layer.GetInputSlots())
63 const IOutputSlot* source = input.GetConnectedOutputSlot();
68 timelineUtils->CreateConnectionRelationship(ProfilingRelationshipType::RetentionLink,
69 source->GetOwningLayerGuid(),
74 void AddWorkloadStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
75 std::unique_ptr<IWorkload>& workload,
79 timelineUtils->CreateTypedEntity(workload->GetGuid(), LabelsAndEventClasses::WORKLOAD_GUID);
80 timelineUtils->MarkEntityWithLabel(workload->GetGuid(),
81 layer.GetBackendId().Get(),
82 LabelsAndEventClasses::BACKENDID_GUID);
85 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
88 LabelsAndEventClasses::CHILD_GUID);
107 const vector<BackendOptions>::iterator& backendItr =
108 find_if(optimizedOptions.begin(), optimizedOptions.end(), [](
const BackendOptions& backend) {
109 if (backend.GetBackendId().Get() ==
"Global")
118 bool importEnabled =
false;
119 bool exportEnabled =
false;
120 if (backendItr != optimizedOptions.end())
123 for (
size_t i = 0; i < backendItr->GetOptionCount(); i++)
125 const BackendOptions::BackendOption& option = backendItr->GetOption(i);
126 if (option.GetName() ==
"ImportEnabled")
128 importEnabled = option.GetValue().AsBool();
130 if (option.GetName() ==
"ExportEnabled")
132 exportEnabled = option.GetValue().AsBool();
142 auto message = fmt::format(
"The input memory source specified, '{0}',", networkProperties.m_InputSource);
145 message.append(
" requires that memory import be enabled. However, "
146 "it was disabled when this network was optimized.");
150 message.append(
" requires that memory import be disabled. However, "
151 "it was enabled when this network was optimized.");
153 throw InvalidArgumentException(message);
159 auto message = fmt::format(
"The output memory source specified, '{0}',", networkProperties.m_OutputSource);
162 message.append(
" requires that memory export be enabled. However, "
163 "it was disabled when this network was optimized.");
167 message.append(
" requires that memory export be disabled. However, "
168 "it was enabled when this network was optimized.");
170 throw InvalidArgumentException(message);
175 std::string& errorMessage,
177 arm::pipe::IProfilingService* profilingService)
179 std::unique_ptr<LoadedNetwork> loadedNetwork;
181 auto Fail = [&](
const std::exception&
error) -> std::unique_ptr<LoadedNetwork>
183 errorMessage = ToErrorMessage(
"An error occurred when preparing the network workloads: ",
error);
186 return std::unique_ptr<LoadedNetwork>();
191 loadedNetwork.reset(
new LoadedNetwork(std::move(net), networkProperties, profilingService));
201 catch (
const std::runtime_error&
error)
206 return loadedNetwork;
209 LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
211 arm::pipe::IProfilingService* profilingService) :
212 m_OptimizedNetwork(
std::move(net)),
213 m_NetworkProperties(networkProperties),
214 m_TensorHandleFactoryRegistry(),
215 m_ProfilingService(profilingService)
219 const std::shared_ptr<IProfiler>& profiler = m_OptimizedNetwork->GetProfiler();
229 m_NetworkProperties);
236 bool useExternalMemoryManager =
false;
237 bool useInternalMemoryManager =
false;
238 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
245 m_IsInputImported = std::vector<bool>(order.
GetNumInputs(),
false);
246 m_IsOutputImported = std::vector<bool>(order.
GetNumOutputs(),
false);
249 for (
auto&& layer : order)
251 auto const& backendId = layer->GetBackendId();
252 if (m_Backends.count(backendId) == 0)
255 auto it = m_Backends.emplace(std::make_pair(backendId, createBackend()));
257 IBackendInternal* backend = it.first->second.get();
263 backend->GetCapabilities()))
265 std::string er = backend->GetId();
266 er +=
" does not support AsyncExecution";
267 throw BackendCapabilityException(er);
270 backend->GetCapabilities()))
272 std::string er = backend->GetId();
273 er +=
" does not support ExternallyManagedMemory\n";
274 er +=
"AsyncEnabled networks require all backends to support ExternallyManagedMemory";
275 throw BackendCapabilityException(er);
277 m_SupportsExternallyManagedMemory[backend->GetId()] =
true;
278 useExternalMemoryManager =
true;
282 m_SupportsExternallyManagedMemory[backend->GetId()] =
false;
283 useInternalMemoryManager =
true;
287 if (backend->SupportsTensorAllocatorAPI())
289 workloadFactory = backend->CreateWorkloadFactory(
290 m_TensorHandleFactoryRegistry,
291 m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions(),
297 m_BackendMemoryMangers.emplace_back(backend->CreateMemoryManager());
298 workloadFactory = backend->CreateWorkloadFactory(
299 m_BackendMemoryMangers.back(), m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions());
301 m_WorkloadFactories[backendId ] = std::move(workloadFactory);
307 for (
auto&& layer : order)
309 auto& workloadFactory = GetWorkloadFactory(*layer);
310 bool supportsExternalManager = m_SupportsExternallyManagedMemory[layer->GetBackendId()];
312 switch (layer->GetType())
319 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
321 !supportsExternalManager &&
327 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory,
true);
334 if ((layer->GetNumOutputSlots() == 1) &&
335 (layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
336 (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() ==
LayerType::Output))
338 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
340 !supportsExternalManager &&
345 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
347 !supportsExternalManager);
354 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
355 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
356 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
359 timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
361 timelineUtils->RecordEvent(networkGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
363 int processID = arm::pipe::GetCurrentProcessId();
364 std::stringstream ss;
366 timelineUtils->MarkEntityWithLabel(networkGuid, ss.str(), LabelsAndEventClasses::PROCESS_ID_GUID);
369 std::vector<IWorkload*> ConstWorkloads;
374 for (
auto&& layer: order)
379 AddLayerStructure(timelineUtils, *layer, networkGuid);
382 const IWorkloadFactory& workloadFactory = GetWorkloadFactory(*layer);
384 switch (layer->GetType())
394 auto workload = layer->CreateWorkload(workloadFactory);
398 const char*
const layerName =
399 layer->GetNameStr().length() != 0 ? layer->GetName() :
"<Unnamed>";
400 throw InvalidArgumentException(
401 fmt::format(
"No workload created for layer (name: '{0}' type: '{1}') (compute '{2}')",
402 layerName,
static_cast<int>(layer->GetType()), layer->GetBackendId().Get()
409 AddWorkloadStructure(timelineUtils, workload, *layer);
414 if((networkProperties.
m_AsyncEnabled || useExternalMemoryManager) &&
417 m_ConstantTensorHandles[layer->GetGuid()] =
418 layer->GetOutputSlot(0).GetOutputHandler().GetData();
419 m_ConstantWorkloads[layer->GetGuid()] = std::move(workload);
423 m_WorkloadQueue.push_back(std::move(workload));
428 ConstWorkloads.push_back(m_WorkloadQueue.back().get());
432 layer->ReleaseConstantData();
440 if (!networkProperties.
m_AsyncEnabled && m_WorkloadQueue.size() != 0)
442 const int noOfInputs = armnn::numeric_cast<int>(order.GetNumInputs());
446 for (
const BindableLayer* layer: order.GetInputLayers())
448 const auto bindingId = layer->GetBindingId();
450 bool supportsReplacement =
true;
452 for (
const auto inputSlot: layer->GetOutputSlot(0).GetConnections())
454 auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(inputSlot->GetOwningLayer()));
455 workloadIndex -= noOfInputs;
457 m_InputWorkloadSlotPairs[bindingId].emplace_back(WorkloadIndices{
458 armnn::numeric_cast<unsigned int>(workloadIndex), inputSlot->GetSlotIndex()});
463 auto workload = m_WorkloadQueue[m_InputWorkloadSlotPairs[bindingId].back().m_WorkloadIndex].get();
464 supportsReplacement &= workload->SupportsTensorHandleReplacement();
473 ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.
GetFactory(importFactoryId);
475 if (supportsReplacement && importFactory)
477 m_PreImportedInputHandles.emplace_back(
478 bindingId, importFactory->CreateTensorHandle(layer->GetOutputSlot(0).GetTensorInfo(),
false));
482 m_PreImportedInputHandles.emplace_back(bindingId,
nullptr);
488 for (
const BindableLayer* layer: order.GetOutputLayers())
490 const auto bindingId = layer->GetBindingId();
492 const auto outputSlot = layer->GetInputSlot(0).GetConnectedOutputSlot();
493 auto& indices = m_OutputWorkloadSlotPairs[bindingId];
498 auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(outputSlot->GetOwningLayer()));
499 workloadIndex -= noOfInputs;
501 indices.m_OutputSlotIndices = WorkloadIndices{numeric_cast<unsigned int>(workloadIndex),
502 outputSlot->CalculateIndexOnOwner()};
504 bool supportsReplacement =
true;
505 auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
506 supportsReplacement &= outputWorkload->SupportsTensorHandleReplacement();
508 for (
auto &inputSlot: outputSlot->GetConnections())
512 auto inWorkloadIndex = std::distance(order.begin(),
513 order.GetPosInGraph(inputSlot->GetOwningLayer()));
514 inWorkloadIndex -= noOfInputs;
515 indices.m_InputSlotIndices.emplace_back(
516 WorkloadIndices{numeric_cast<unsigned int>(inWorkloadIndex),
517 inputSlot->GetSlotIndex()});
518 auto inputWorkload = m_WorkloadQueue[indices.m_InputSlotIndices.back().m_WorkloadIndex].get();
519 supportsReplacement &= inputWorkload->SupportsTensorHandleReplacement();
527 ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.
GetFactory(importFactoryId);
529 if (supportsReplacement && importFactory)
531 m_PreImportedOutputHandles.emplace_back(
532 bindingId, importFactory->CreateTensorHandle(outputSlot->GetTensorInfo(),
false));
536 m_PreImportedOutputHandles.emplace_back(bindingId,
nullptr);
542 for (
auto&& workloadFactory : m_WorkloadFactories)
544 workloadFactory.second->AfterWorkloadsCreated();
550 timelineUtils->Commit();
553 if (useExternalMemoryManager)
557 CreateMemoryProfileAsync();
561 CreateMemoryProfile();
565 for (
auto& backendMemoryProfile : m_MemBlockMap)
567 const BackendId& backendId = backendMemoryProfile.first;
568 if (backendStrategyMap.find(backendId) != backendStrategyMap.end())
570 m_MemBinMap[backendId] = backendStrategyMap[backendId]->Optimize(backendMemoryProfile.second);
574 m_MemBinMap[backendId] = m_ConstantStrategy->Optimize(backendMemoryProfile.second);
580 m_ExternalMemoryManager = CreateExternalMemoryManger(m_TensorMemory);
583 std::sort(m_TensorMemory.begin(), m_TensorMemory.end(),
584 [](
const std::pair<std::shared_ptr<TensorMemory>,
MemorySource>& lhs,
585 const std::pair<std::shared_ptr<TensorMemory>,
MemorySource>& rhs)
587 return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
596 if (useInternalMemoryManager)
599 m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().AllocateDynamicBuffers();
602 for (
auto &workload : m_WorkloadQueue)
604 workload->PostAllocationConfigure();
608 if (useExternalMemoryManager)
612 AllocateAndExecuteConstantWorkloads();
616 AllocateAndExecuteConstantWorkloadsAsync();
622 for (
auto workload: ConstWorkloads)
630 void LoadedNetwork::AllocateAndExecuteConstantWorkloads()
633 for (
auto& pair : m_ConstantWorkloads)
635 auto tensorHandle = m_ConstantTensorHandles[pair.first];
636 tensorHandle->Allocate();
637 pair.second->Execute();
641 void LoadedNetwork::AllocateAndExecuteConstantWorkloadsAsync()
644 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
645 for (
auto&& layer : order)
649 const auto& outSlot = layer->GetOutputSlots()[0];
650 const auto factoryId = outSlot.GetTensorHandleFactoryId();
655 auto& workloadFactory = GetWorkloadFactory(*layer);
657 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory);
658 ITensorHandle* tensorHandle = outSlot.GetOutputHandler().GetData();
660 m_ConstantTensorHandles[layer->GetGuid()] = tensorHandle;
661 tensorHandle->Allocate();
663 auto& backend = m_Backends.at(layer->GetBackendId());
665 WorkingMemDescriptor memDesc;
666 memDesc.m_Outputs.push_back(tensorHandle);
668 ExecutionData executionData = backend->CreateExecutionData(memDesc);
669 m_ConstantWorkloads[layer->GetGuid()]->ExecuteAsync(executionData);
677 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
678 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
680 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
681 TimelineUtilityMethods::GetTimelineUtils(profilingService);
683 timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
685 for (
auto&& layer : order)
688 AddLayerStructure(timelineUtils, *layer, networkGuid);
689 switch (layer->GetType())
699 for (
auto& workload : m_WorkloadQueue)
702 AddWorkloadStructure(timelineUtils, workload, *layer);
709 timelineUtils->Commit();
714 return m_OptimizedNetwork->GetGuid();
719 for (
auto&& inputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetInputLayers())
721 if (inputLayer->GetNumOutputSlots() != 1)
726 if (inputLayer->GetBindingId() == layerId)
728 return inputLayer->GetOutputSlot(0).GetTensorInfo();
737 for (
auto&& outputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetOutputLayers())
739 if (outputLayer->GetNumInputSlots() != 1)
744 if (!outputLayer->GetInputSlot(0).GetConnection())
749 if (outputLayer->GetBindingId() == layerId)
751 return outputLayer->GetInputSlot(0).GetTensorInfo();
762 auto it = m_WorkloadFactories.find(layer.
GetBackendId());
763 if (it == m_WorkloadFactories.end())
765 throw RuntimeException(fmt::format(
"No workload factory for {0} to be used for layer: {1}",
771 workloadFactory = it->second.get();
773 if (!workloadFactory)
778 return *workloadFactory;
787 TensorPin(std::unique_ptr<ITensorHandle> handle,
const TensorInfo& info,
LayerBindingId id)
788 : m_TensorHandle(
std::move(handle))
794 ITensorHandle* GetTensorHandle()
const {
return m_TensorHandle.get(); }
795 const TensorInfo&
GetTensorInfo()
const {
return m_TensorInfo; }
799 std::unique_ptr<ITensorHandle> m_TensorHandle;
800 TensorInfo m_TensorInfo;
805 const std::vector<TensorPin>& pins,
806 char const* bindingPointDesc)
808 auto it = std::find_if(pins.begin(), pins.end(),
809 [
id](
const TensorPin& pin)
811 return pin.GetBindingId() == id;
814 if (it != pins.end())
820 throw InvalidArgumentException(fmt::format(
"No tensor supplied for {0} {1}", bindingPointDesc,
id));
830 m_InputTensorPins.reserve(inputTensors.size());
831 m_OutputTensorPins.reserve(outputTensors.size());
833 for (
auto inputTensorPair : inputTensors)
835 auto inputTensor = inputTensorPair.second;
837 std::unique_ptr<ITensorHandle> tensorHandle =
838 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),inputTensor.GetMemoryArea());
841 m_InputTensorPins.emplace_back(std::move(tensorHandle), inputTensor.GetInfo(), layerId);
844 for (
auto outputTensorPair : outputTensors)
846 auto outputTensor = outputTensorPair.second;
848 std::unique_ptr<ITensorHandle> tensorHandle =
849 std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
852 m_OutputTensorPins.emplace_back(std::move(tensorHandle), outputTensor.GetInfo(), layerId);
858 return GetTensorPin(
id, m_InputTensorPins,
"input");
863 return GetTensorPin(
id, m_OutputTensorPins,
"output");
868 std::vector<TensorPin> m_InputTensorPins;
869 std::vector<TensorPin> m_OutputTensorPins;
876 std::vector<ImportedInputId> preImportedInputIds,
877 std::vector<ImportedOutputId> preImportedOutputIds)
879 const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
884 ARMNN_LOG(
warning) <<
"IRuntime::EnqueueWorkload()::Less than two nodes in graph";
889 WorkloadData workloadData(inputTensors, outputTensors);
893 if (graph.
GetNumInputs() != (inputTensors.size() + preImportedInputIds.size()))
901 m_InputQueue.clear();
904 unsigned int inputIndex = 0;
905 unsigned int importedInputIdIndex = 0;
906 std::sort(preImportedInputIds.begin(), preImportedInputIds.end());
909 if (importedInputIdIndex < preImportedInputIds.size() &&
910 inputIndex == preImportedInputIds[importedInputIdIndex])
913 if (!m_IsInputImported[inputIndex])
915 auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
917 for (
const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
919 auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
920 workload->ReplaceInputTensorHandle(outputTensorHandle, workloadInfo.m_SlotIndex);
922 m_IsInputImported[inputIndex] =
true;
924 importedInputIdIndex++;
928 if (m_IsInputImported[inputIndex])
932 for (
const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
934 auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
935 workload->ReplaceInputTensorHandle(handler.
GetData(), workloadInfo.m_SlotIndex);
938 m_IsInputImported[inputIndex] =
false;
942 const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
943 EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
951 m_OutputQueue.clear();
959 unsigned int outputIndex = 0;
960 unsigned int importedOutputIdIndex = 0;
961 std::sort(preImportedOutputIds.begin(), preImportedOutputIds.end());
964 if (importedOutputIdIndex < preImportedOutputIds.size() &&
965 outputIndex == preImportedOutputIds[importedOutputIdIndex])
968 ITensorHandle* inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
970 if (!m_IsOutputImported[outputIndex])
972 const auto bindingId = outputLayer->GetBindingId();
973 const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
975 auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
977 outputWorkload->ReplaceOutputTensorHandle(inputTensorHandle,
978 indices.m_OutputSlotIndices.m_SlotIndex);
980 for (
const auto& workloadInfo: indices.m_InputSlotIndices)
982 auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
983 inputWorkload->ReplaceInputTensorHandle(inputTensorHandle, workloadInfo.m_SlotIndex);
985 m_IsOutputImported[outputIndex] =
true;
988 if (!inputTensorHandle)
994 syncDesc.
m_Inputs.push_back(inputTensorHandle);
996 info.m_InputTensorInfos.push_back(outputLayer->GetInputSlot(0).GetTensorInfo());
998 auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc,
info);
1004 m_OutputQueue.push_back(std::move(syncWorkload));
1005 importedOutputIdIndex++;
1009 if (m_IsOutputImported[outputIndex])
1011 const auto bindingId = outputLayer->GetBindingId();
1012 const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
1014 auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
1016 outputLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOutputHandler();
1018 outputWorkload->ReplaceOutputTensorHandle(
1019 outputHandler.
GetData(), indices.m_OutputSlotIndices.m_SlotIndex);
1021 for (
const auto& workloadInfo: indices.m_InputSlotIndices)
1023 auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
1024 inputWorkload->ReplaceInputTensorHandle(outputHandler.
GetData(), workloadInfo.m_SlotIndex);
1026 m_IsOutputImported[outputIndex] =
false;
1029 const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
1031 EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
1037 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1038 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1039 ProfilingGuid inferenceGuid = m_ProfilingService->GetNextGuid();
1043 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
1044 timelineUtils->CreateTypedEntity(inferenceGuid, LabelsAndEventClasses::INFERENCE_GUID);
1045 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
1048 LabelsAndEventClasses::EXECUTION_OF_GUID);
1049 timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
1052 bool executionSucceeded =
true;
1055 if (m_ProfilingService->IsProfilingEnabled())
1057 m_ProfilingService->IncrementCounterValue(INFERENCES_RUN);
1061 executionSucceeded =
Execute(timelineUtils, inferenceGuid);
1067 timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
1068 timelineUtils->Commit();
1081 if (tensorHandle ==
nullptr)
1083 throw InvalidArgumentException(
"EnqueueInput: tensorHandle must not be NULL");
1089 inputQueueDescriptor.m_Inputs.push_back(tensorHandle);
1090 info.m_InputTensorInfos.push_back(tensorInfo);
1098 const TensorInfo& outputTensorInfo = handler.
GetTensorInfo();
1099 ITensorHandle* outputTensorHandle = handler.GetData();
1101 if (!outputTensorHandle)
1106 inputQueueDescriptor.m_Outputs.push_back(outputTensorHandle);
1107 info.m_OutputTensorInfos.push_back(outputTensorInfo);
1110 bool needMemCopy =
true;
1115 needMemCopy =
false;
1117 void* mem = tensorHandle->
Map(
false);
1118 if (outputTensorHandle->Import(mem, m_NetworkProperties.
m_InputSource))
1120 tensorHandle->
Unmap();
1123 tensorHandle->
Unmap();
1124 throw MemoryImportException(
"EnqueueInput: Memory Import failed");
1130 std::unique_ptr<IWorkload> inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor, info);
1137 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1138 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1142 AddWorkloadStructure(timelineUtils, inputWorkload, layer);
1143 timelineUtils->Commit();
1146 m_InputQueue.push_back(std::move(inputWorkload));
1150 void LoadedNetwork::EnqueueOutput(
const BindableLayer& layer, ITensorHandle* tensorHandle,
const TensorInfo& tensorInfo)
1154 throw InvalidArgumentException(
"EnqueueOutput: given layer not an OutputLayer");
1157 if (tensorHandle ==
nullptr)
1159 throw InvalidArgumentException(
"EnqueueOutput: tensorHandle must not be NULL");
1165 outputQueueDescriptor.m_Outputs.push_back(tensorHandle);
1166 info.m_OutputTensorInfos.push_back(tensorInfo);
1168 if (layer.GetNumInputSlots() != 1)
1174 const OutputHandler& outputHandler = layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler();
1176 const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo();
1177 ITensorHandle* inputTensorHandle = outputHandler.GetData();
1178 if (!inputTensorHandle)
1190 bool needMemCopy =
true;
1192 (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
1194 if(layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() !=
LayerType::Input)
1199 needMemCopy =
false;
1200 void *mem = tensorHandle->Map(
false);
1201 bool importOk = inputTensorHandle->Import(mem, m_NetworkProperties.
m_OutputSource);
1202 tensorHandle->Unmap();
1207 MemSyncQueueDescriptor syncDesc;
1208 syncDesc.m_Inputs.push_back(inputTensorHandle);
1209 info.m_InputTensorInfos.push_back(inputTensorInfo);
1210 auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
1215 m_OutputQueue.push_back(std::move(syncWorkload));
1219 throw MemoryExportException(
"EnqueueOutput: Memory Export failed");
1227 outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
1228 info.m_InputTensorInfos.push_back(inputTensorInfo);
1230 std::unique_ptr<IWorkload> outputWorkload =
1231 std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor, info);
1232 if (!outputWorkload)
1237 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1238 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1242 AddWorkloadStructure(timelineUtils, outputWorkload, layer);
1243 timelineUtils->Commit();
1246 m_OutputQueue.push_back(std::move(outputWorkload));
1250 void LoadedNetwork::AllocateWorkingMemory(
1251 #
if !defined(ARMNN_DISABLE_THREADS)
1252 std::lock_guard<std::mutex>& lock
1258 #if !defined(ARMNN_DISABLE_THREADS)
1262 if (m_IsWorkingMemAllocated)
1267 if (m_ExternalMemoryManager)
1269 m_ExternalMemoryManager->Allocate();
1271 for (
unsigned int i = 0; i < m_TensorMemory.size(); ++i)
1273 m_Tensorhandles[i]->Import(m_TensorMemory[i].first->m_Data, m_TensorMemory[i].second);
1277 for (
auto&& memoryManager : m_BackendMemoryMangers)
1281 memoryManager->Acquire();
1285 m_IsWorkingMemAllocated =
true;
1290 #if !defined(ARMNN_DISABLE_THREADS)
1291 std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1294 if (!m_IsWorkingMemAllocated)
1299 if (m_ExternalMemoryManager)
1301 m_ExternalMemoryManager->Deallocate();
1305 for (
auto&& memoryManager : m_BackendMemoryMangers)
1309 memoryManager->Release();
1313 m_IsWorkingMemAllocated =
false;
1317 ProfilingGuid inferenceGuid)
1319 bool success =
true;
1321 auto Fail = [&](
const std::exception&
error)
1323 ARMNN_LOG(error) <<
"An error occurred attempting to execute a workload: " <<
error.what();
1329 #if !defined(ARMNN_DISABLE_THREADS)
1330 std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1331 AllocateWorkingMemory(lockGuard);
1333 AllocateWorkingMemory();
1336 ProfilingDynamicGuid workloadInferenceID(0);
1337 auto ExecuteQueue = [&timelineUtils, &workloadInferenceID, &inferenceGuid](
WorkloadQueue& queue)
1339 for (
auto& workload : queue)
1343 workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1348 workload->Execute();
1352 timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1358 ExecuteQueue(m_InputQueue);
1359 ExecuteQueue(m_WorkloadQueue);
1360 ExecuteQueue(m_OutputQueue);
1363 catch (
const RuntimeException& error)
1367 catch (
const std::runtime_error& error)
1375 void LoadedNetwork::EnqueueInput(
const ConstTensor& inputTensor, ITensorHandle* inputTensorHandle)
1382 std::unique_ptr<ITensorHandle> tensorHandle =
1383 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),
1384 inputTensor.GetMemoryArea());
1385 void* mem = tensorHandle->Map(
false);
1387 if (inputTensorHandle->Import(mem, m_NetworkProperties.
m_InputSource))
1389 tensorHandle->Unmap();
1392 tensorHandle->Unmap();
1393 throw MemoryImportException(
"EnqueueInput: Memory Import failed");
1397 throw MemoryImportException(
"EnqueueInput: Memory Import failed, backend does not support Import");
1403 std::unique_ptr<ITensorHandle> tensorHandle =
1404 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(), inputTensor.GetMemoryArea());
1406 auto copyFunc = [](
void* dst,
const void* src,
size_t size)
1408 memcpy(dst, src, size);
1421 void LoadedNetwork::ImportOutputTensor(
const Tensor& outputTensor, ITensorHandle* outputTensorHandle)
1423 if (!outputTensorHandle)
1431 std::unique_ptr<ITensorHandle> tensorHandle =
1432 std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
1433 outputTensor.GetMemoryArea());
1435 void* mem = tensorHandle->Map(
false);
1436 bool importOk = outputTensorHandle->Import(mem, m_NetworkProperties.
m_OutputSource);
1437 tensorHandle->Unmap();
1441 throw MemoryExportException(
"ImportOutputTensor: Memory Export failed");
1446 throw MemoryExportException(
"ImportOutputTensor: Memory Export failed, attempting to export Input Layer");
1454 auto copyFunc = [](
void* dst,
const void* src,
size_t size)
1456 memcpy(dst, src, size);
1459 std::unique_ptr<ITensorHandle> tensorHandle =
1460 std::make_unique<PassthroughTensorHandle>(outputTensor.
GetInfo(),
1469 for (
auto inputTensorPair : inputTensors)
1474 return inputTensorPair.second;
1482 for (
auto outputTensorPair : outputTensors)
1487 return outputTensorPair.second;
1501 throw MemoryImportException(
"ImportInputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1504 if (inputTensors.size() > m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumInputs())
1506 throw MemoryImportException(
"ImportInputs: The number of tensors provided exceeds the number of inputs.");
1509 std::vector<ImportedInputId> importedInputs;
1510 Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1511 unsigned int inputIndex = 0;
1514 auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
1516 if (!outputTensorHandle)
1522 auto layerBindingId = inputLayer->GetBindingId();
1523 auto it = std::find_if(inputTensors.begin(), inputTensors.end(), [=](
const auto& inputTensor)
1525 return inputTensor.first == layerBindingId;
1528 if (it == inputTensors.end())
1534 const auto& inputTensor = *it;
1535 std::unique_ptr<ITensorHandle> passThroughTensorHandle =
1536 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.GetInfo(),
1537 inputTensor.second.GetMemoryArea());
1541 if (outputTensorHandle->CanBeImported(passThroughTensorHandle->Map(), forceImportMemorySource)
1542 && (outputTensorHandle->Import(passThroughTensorHandle->Map(), forceImportMemorySource)))
1544 importedInputs.push_back(inputIndex);
1546 passThroughTensorHandle->Unmap();
1550 ARMNN_LOG(
error) <<
"An error occurred attempting to import input_"
1551 << inputIndex <<
" : " << exception.
what();
1552 passThroughTensorHandle->Unmap();
1557 return importedInputs;
1562 std::vector<ImportedInputId> importedInputs;
1563 Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1565 for (
auto inputTensor : inputTensors)
1567 auto layerBindingId = inputTensor.first;
1570 return layer->GetBindingId() == layerBindingId;
1576 "ImportInputs: Memory Import failed, unknown LayerBindingId: {}", layerBindingId));
1579 const Layer* layer = *it;
1587 backend->GetCapabilities()))
1589 std::string er = backend->GetId();
1590 er +=
" does not have PreImportIOTensors capability";
1605 ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
1608 ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();
1613 fmt::format(
"ImportInputs: Memory Import failed, backend: "
1614 "{} does not support importing from source {}"
1618 std::unique_ptr<ITensorHandle> passThroughTensorHandle =
1619 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.GetInfo(),
1620 inputTensor.second.GetMemoryArea());
1622 if (tensorHandle->
Import(passThroughTensorHandle->Map(), forceImportMemorySource))
1624 importedInputs.push_back(m_CurImportedInputId++);
1625 passThroughTensorHandle->Unmap();
1629 passThroughTensorHandle->Unmap();
1633 m_PreImportedInputHandles.push_back(std::move(importedTensorHandlePin));
1635 return importedInputs;
1647 throw MemoryImportException(
"ImportOutputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1650 if (outputTensors.size() != m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumOutputs())
1654 std::vector<ImportedOutputId> importedOutputs;
1655 Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1657 unsigned int outputIndex = 0;
1660 auto inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
1661 if (!inputTensorHandle)
1667 auto layerBindingId = outputLayer->GetBindingId();
1668 auto it = std::find_if(outputTensors.begin(), outputTensors.end(), [=] (
const auto& outputTensor)
1670 return outputTensor.first == layerBindingId;
1673 if (it == outputTensors.end())
1679 const auto outputTensor = *it;
1683 if (inputTensorHandle->CanBeImported(outputTensor.second.GetMemoryArea(), forceImportMemorySource)
1684 && inputTensorHandle->Import(outputTensor.second.GetMemoryArea(), forceImportMemorySource))
1686 importedOutputs.push_back(outputIndex);
1691 ARMNN_LOG(
error) <<
"An error occurred attempting to import output_"
1692 << outputIndex <<
" : " << exception.
what();
1696 return importedOutputs;
1699 std::vector<ImportedOutputId> importedOutputs;
1700 Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1702 for (
const auto& outputTensor : outputTensors)
1704 auto layerBindingId = outputTensor.first;
1707 return layer->GetBindingId() == layerBindingId;
1712 throw MemoryImportException(fmt::format(
"ImportOutputs: Memory Import failed, unknown LayerBindingId: {}",
1716 const Layer* layer = *it;
1724 backend->GetCapabilities()))
1726 std::string er = backend->GetId();
1727 er +=
" does not have PreImportIOTensors capability";
1741 ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
1744 ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();
1749 "{} does not support importing from source {}"
1750 , factoryId, forceImportMemorySource));
1753 if (tensorHandle->
Import(outputTensor.second.GetMemoryArea(), forceImportMemorySource))
1755 importedOutputs.push_back(m_CurImportedOutputId++);
1762 m_PreImportedOutputHandles.push_back(std::move(importedTensorHandlePin));
1765 return importedOutputs;
1770 for (
auto id : inputIds)
1772 if (
id > m_PreImportedInputHandles.size())
1777 auto& importedTensorHandle = m_PreImportedInputHandles[id].m_TensorHandle;
1778 if (!importedTensorHandle)
1781 fmt::format(
"ClearImportedInputs::ImportedInput with id: {} has already been deleted",
id));
1784 importedTensorHandle->Unimport();
1785 importedTensorHandle = {};
1791 for (
auto id : outputIds)
1793 if (
id > m_PreImportedOutputHandles.size())
1798 auto& importedTensorHandle = m_PreImportedOutputHandles[id].m_TensorHandle;
1799 if (!importedTensorHandle)
1802 fmt::format(
"ClearImportedOutputs::ImportedOutput with id: {} has already been deleted",
id));
1805 importedTensorHandle->Unimport();
1806 importedTensorHandle = {};
1813 std::vector<ImportedInputId> preImportedInputs,
1814 std::vector<ImportedOutputId> preImportedOutputs)
1816 const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
1818 if (inputTensors.size() + preImportedInputs.size() != graph.
GetNumInputs())
1820 if (preImportedInputs.empty())
1827 "Number of inputs + preImportedInputs provided does not match network.");
1831 if (outputTensors.size() + preImportedOutputs.size() != graph.
GetNumOutputs())
1833 if (preImportedOutputs.empty())
1836 "Number of outputs provided does not match network.");
1841 "Number of outputs + preImportedOutputs provided does not match network.");
1848 unsigned int index = 0;
1849 for (
auto pair : inputTensors)
1851 bindingIds[index++] = pair.first;
1855 bindingIds[index++] = ValidateImportedInputID(
id);
1857 for (
auto pair : outputTensors)
1859 bindingIds[index++] = pair.first;
1863 bindingIds[index++] = ValidateImportedOutputID(
id);
1868 auto resetMemHandle = [&]()
1872 const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId;
1874 auto inputHandle = workingMemHandle.
GetInputHandle(layerBindingId);
1876 for (
auto it : inputConnections)
1884 const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId;
1889 for (
auto it : outputConnections)
1896 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1897 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1898 ProfilingGuid inferenceGuid = m_ProfilingService->GetNextGuid();
1902 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
1903 timelineUtils->CreateTypedEntity(inferenceGuid,LabelsAndEventClasses::INFERENCE_GUID);
1904 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
1907 LabelsAndEventClasses::EXECUTION_OF_GUID);
1908 timelineUtils->RecordEvent(inferenceGuid,LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
1911 bool executionSucceeded =
true;
1916 timelineUtils->RecordEvent(inferenceGuid,LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
1917 timelineUtils->Commit();
1927 for (
auto pair : inputTensors)
1929 EnqueueInput(pair.second, workingMemHandle.
GetInputHandle(pair.first));
1935 const ImportedTensorHandlePin& importedInputPin = m_PreImportedInputHandles[id];
1936 const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId;
1937 const auto& preimportedHandle = importedInputPin.m_TensorHandle;
1940 for (
auto it : inputConnections)
1942 *it = preimportedHandle.get();
1950 for (
auto pair: outputTensors)
1952 ImportOutputTensor(pair.second, workingMemHandle.
GetOutputHandle(pair.first));
1958 const ImportedTensorHandlePin& importedOutputPin = m_PreImportedOutputHandles[id];
1959 const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId;
1960 const auto& preimportedHandle = importedOutputPin.m_TensorHandle;
1963 for (
auto it : outputConnections)
1965 *it = preimportedHandle.get();
1970 auto Fail = [&](
const std::exception&
error)
1972 ARMNN_LOG(
error) <<
"An error occurred attempting to execute a workload: " <<
error.what();
1973 executionSucceeded =
false;
1975 ProfilingDynamicGuid workloadInferenceID(0);
1979 for (
unsigned int i = 0; i < m_WorkloadQueue.size(); ++i)
1981 auto& workload = m_WorkloadQueue[i];
1984 workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1992 timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
2001 catch (
const std::runtime_error&
error)
2014 for (
auto pair: outputTensors)
2034 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
2037 std::vector<std::unique_ptr<ITensorHandle>> managedTensorHandles;
2039 std::vector<std::unique_ptr<ITensorHandle>> unmanagedTensorHandles;
2041 std::vector<WorkingMemDescriptor> workingMemDescriptors;
2042 std::vector<std::pair<BackendId, ExecutionData>> executionDataVec;
2044 auto GetTensorHandle = [&](
Layer* layer,
const OutputSlot& outputSlot)
2047 const TensorInfo& tensorInfo = outputSlot.GetTensorInfo();
2053 return m_WorkloadFactories.at(
id)->CreateTensorHandle(tensorInfo,
false);
2071 bool m_IsInputLayerHandle =
false;
2072 bool m_IsOutputLayerHandle =
false;
2078 std::unordered_map<const OutputSlot*, HandleInfo> outputToHandleInfoMap;
2080 unsigned int layerIndex = 0;
2081 for (
auto&& layer : order)
2091 bool isMemoryManaged =
true;
2092 bool isInputLayer =
false;
2093 bool isOutputLayer =
false;
2094 bool isConnectedToOutputLayer =
false;
2100 isInputLayer =
true;
2105 isOutputLayer =
true;
2108 unsigned int slotIndex = 0;
2113 for (
unsigned int i = 0; i < slot.GetNumConnections(); ++i)
2117 if (!isConnectedToOutputLayer)
2119 isConnectedToOutputLayer =
true;
2127 fmt::format(
"Layer name: '{0}' guid: '{1}' has two or more OutputLayers connected to it. "
2128 "This will prevent importing on the connected OutputLayers.",
2130 isMemoryManaged =
true;
2136 if (isMemoryManaged)
2138 managedTensorHandles.emplace_back(GetTensorHandle(layer, slot));
2139 tensorHandle = managedTensorHandles.back().get();
2143 unmanagedTensorHandles.emplace_back(GetTensorHandle(layer, slot));
2144 tensorHandle = unmanagedTensorHandles.back().get();
2147 workingMemDescriptor.
m_Outputs.push_back(tensorHandle);
2149 HandleInfo& handleInfo = outputToHandleInfoMap[&slot];
2150 handleInfo.m_TensorHandle = tensorHandle;
2153 if (isConnectedToOutputLayer)
2155 handleInfo.m_IsOutputLayerHandle =
true;
2156 handleInfo.m_OutputMemDescriptorCoords.m_OutputSlotCoords = {layerIndex, slotIndex};
2161 handleInfo.m_IsInputLayerHandle =
true;
2163 handleInfo.m_InputMemDescriptorCoords.m_LayerBindingId = bindingId;
2173 if (!slot.GetConnection())
2178 auto outputSlot = slot.GetConnectedOutputSlot();
2179 auto key = outputSlot->GetOwningLayer().GetGuid();
2182 auto found = m_ConstantTensorHandles.find(key);
2183 if (found != m_ConstantTensorHandles.end())
2186 if (slot.IsTensorInfoOverridden())
2191 tensorHandle = decorated;
2194 workingMemDescriptor.
m_Inputs.push_back(tensorHandle);
2202 HandleInfo& handleInfo = outputToHandleInfoMap[outputSlot];
2203 handleInfo.m_TensorHandle = tensorHandle;
2204 handleInfo.m_IsOutputLayerHandle =
true;
2205 handleInfo.m_OutputMemDescriptorCoords.m_LayerBindingIds.push_back(bindingId);
2206 handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, 0});
2211 HandleInfo& handleInfo = outputToHandleInfoMap.at(outputSlot);
2213 ITensorHandle* inputTensorHandle = handleInfo.m_TensorHandle;
2214 if (slot.IsTensorInfoOverridden())
2219 inputTensorHandle = decorated;
2222 workingMemDescriptor.
m_Inputs.push_back(inputTensorHandle);
2228 handleInfo.m_OutputMemDescriptorCoords.m_LayerBindingIds.push_back(bindingId);
2229 handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, 0});
2233 else if (handleInfo.m_IsOutputLayerHandle)
2235 handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, slot.GetSlotIndex()});
2240 if (handleInfo.m_IsInputLayerHandle)
2242 std::pair<LayerGuid, unsigned int> connectionLocation{layerIndex, slot.GetSlotIndex()};
2243 handleInfo.m_InputMemDescriptorCoords.m_InputSlotCoords.emplace_back(connectionLocation);
2253 std::pair<BackendId, ExecutionData> dataPair;
2256 executionDataVec.push_back(dataPair);
2257 workingMemDescriptors.push_back(workingMemDescriptor);
2263 std::vector<std::pair<std::shared_ptr<TensorMemory>,
MemorySource>> tensorMemory;
2265 auto externalMemoryManager = CreateExternalMemoryManger(tensorMemory);
2268 std::sort(tensorMemory.begin(), tensorMemory.end(),
2269 [](
const std::pair<std::shared_ptr<TensorMemory>,
MemorySource>& lhs,
2270 const std::pair<std::shared_ptr<TensorMemory>,
MemorySource>& rhs)
2272 return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
2275 std::vector<WorkingMemHandle::InputMemDescriptorCoords> inputConnectionsInfo;
2276 std::vector<WorkingMemHandle::OutputMemDescriptorCoords> outputConnectionsInfo;
2278 for (
const auto& handleInfo: outputToHandleInfoMap)
2280 if (handleInfo.second.m_IsOutputLayerHandle)
2282 outputConnectionsInfo.emplace_back(handleInfo.second.m_OutputMemDescriptorCoords);
2285 if (handleInfo.second.m_IsInputLayerHandle)
2287 inputConnectionsInfo.emplace_back(handleInfo.second.m_InputMemDescriptorCoords);
2291 return std::make_unique<WorkingMemHandle>(networkId,
2292 inputConnectionsInfo,
2293 outputConnectionsInfo,
2294 workingMemDescriptors,
2295 std::move(externalMemoryManager),
2296 std::move(tensorMemory),
2297 std::move(managedTensorHandles),
2298 std::move(unmanagedTensorHandles),
2305 for (
auto&& workloadPtr: m_WorkloadQueue)
2307 workloadPtr.get()->RegisterDebugCallback(func);
2312 void LoadedNetwork::CreateMemoryProfileAsync()
2316 unsigned int m_StartOfLife;
2317 unsigned int m_Lifetime;
2320 unsigned int m_Index;
2325 auto align = [](
size_t numToAlign)
2327 const size_t alignment =
sizeof(float);
2328 return ((numToAlign + alignment - 1) / alignment) * alignment;
2331 std::unordered_map<const OutputSlot*, PartialBlock> memBlockTrackerMap;
2336 unsigned int timestep = 0;
2337 unsigned int outputIndex = 0;
2338 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
2340 for (
auto&& layer : order)
2342 const LayerType& layerType = layer->GetType();
2350 && layer->GetInputSlot(0).GetConnectedOutputSlot()->GetNumConnections() == 1)
2361 BackendId backendId = layer->GetBackendId();
2362 for (
auto& outputSlot : layer->GetOutputSlots())
2364 if (!m_SupportsExternallyManagedMemory[backendId])
2369 PartialBlock partialBlock;
2371 partialBlock.m_StartOfLife = timestep;
2373 size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
2374 partialBlock.m_MemSize = alignedSize;
2375 partialBlock.m_Index = outputIndex++;
2376 partialBlock.m_Lifetime = outputSlot.GetNumConnections();
2377 partialBlock.m_BackendId = backendId;
2379 if (partialBlock.m_Lifetime == 0)
2381 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2382 partialBlock.m_StartOfLife,
2383 partialBlock.m_MemSize,
2385 partialBlock.m_Index);
2389 memBlockTrackerMap[&outputSlot] = partialBlock;
2393 for (
auto& inputSlot : layer->GetInputSlots())
2395 const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
2396 const LayerType& owningLayerType = connectedInputLayer.GetType();
2407 auto outputSlot = inputSlot.GetConnectedOutputSlot();
2409 PartialBlock& partialBlock = memBlockTrackerMap.at(outputSlot);
2411 auto& lifetime = partialBlock.m_Lifetime;
2416 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2418 partialBlock.m_MemSize,
2420 partialBlock.m_Index);
2427 void LoadedNetwork::CreateMemoryProfile()
2431 auto TraceSubTensorHandleAncestry = [](ITensorHandle*
const subTensorHandle)
2433 ITensorHandle* ancestor = subTensorHandle;
2434 while (ancestor && ancestor->GetParent())
2436 ancestor = ancestor->GetParent();
2443 unsigned int m_StartOfLife;
2444 unsigned int m_Lifetime;
2447 unsigned int m_Index;
2449 BackendId m_BackendId;
2452 auto align = [](
size_t numToAlign)
2454 const size_t alignment =
sizeof(float);
2455 return ((numToAlign + alignment - 1) / alignment) * alignment;
2458 std::unordered_map<ITensorHandle*, PartialBlock> memBlockTrackerMap;
2463 unsigned int timestep = 0;
2464 unsigned int outputIndex = 0;
2465 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
2467 for (
auto&& layer : order)
2469 const LayerType& layerType = layer->GetType();
2477 && layer->GetInputSlot(0).GetConnectedOutputSlot()->GetNumConnections() == 1)
2488 BackendId backendId = layer->GetBackendId();
2489 for (
auto& outputSlot : layer->GetOutputSlots())
2491 if (!m_SupportsExternallyManagedMemory[backendId])
2496 ITensorHandle* tensorHandle = outputSlot.GetOutputHandler().GetData();
2497 tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
2499 if (memBlockTrackerMap.find(tensorHandle) == memBlockTrackerMap.end())
2501 PartialBlock partialBlock;
2503 partialBlock.m_StartOfLife = timestep;
2505 size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
2506 partialBlock.m_MemSize = alignedSize;
2507 partialBlock.m_Index = outputIndex++;
2508 partialBlock.m_Lifetime = outputSlot.GetNumConnections();
2509 partialBlock.m_BackendId = backendId;
2511 if (partialBlock.m_Lifetime == 0)
2513 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2514 partialBlock.m_StartOfLife,
2515 partialBlock.m_MemSize,
2517 partialBlock.m_Index);
2521 memBlockTrackerMap[tensorHandle] = partialBlock;
2523 m_Tensorhandles.push_back(tensorHandle);
2528 memBlockTrackerMap.at(tensorHandle).m_Lifetime += outputSlot.GetNumConnections();
2532 for (
auto& inputSlot : layer->GetInputSlots())
2534 const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
2535 const LayerType& owningLayerType = connectedInputLayer.GetType();
2545 if (!m_SupportsExternallyManagedMemory[connectedInputLayer.GetBackendId()])
2550 auto outputSlot = inputSlot.GetConnectedOutputSlot();
2552 ITensorHandle* tensorHandle = outputSlot->GetOutputHandler().GetData();
2553 tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
2555 PartialBlock& partialBlock = memBlockTrackerMap.at(tensorHandle);
2557 auto& lifetime = partialBlock.m_Lifetime;
2562 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2564 partialBlock.m_MemSize,
2566 partialBlock.m_Index);
2574 std::unique_ptr<MemoryManager> LoadedNetwork::CreateExternalMemoryManger(
2575 std::vector<std::pair<std::shared_ptr<TensorMemory>,
MemorySource>>& tensorMemoryVec)
2577 std::unique_ptr<MemoryManager> memoryManager = std::make_unique<MemoryManager>();
2580 for (
auto& backend : m_MemBinMap)
2582 std::vector<BufferStorage> bufferStorageVec;
2584 std::shared_ptr<ICustomAllocator> backendAllocator;
2585 if (allocatorMap.find(backend.first) != allocatorMap.end())
2587 backendAllocator = allocatorMap[backend.first];
2591 backendAllocator = m_Backends[backend.first]->GetDefaultAllocator();
2594 for (
auto& memBin : backend.second)
2596 BufferStorage bufferStorage;
2597 bufferStorage.m_BufferSize = memBin.m_MemSize;
2598 bufferStorage.m_TensorMemoryVector.reserve(memBin.m_MemBlocks.size());
2600 for (
auto& memBlock : memBin.m_MemBlocks)
2602 auto tensorMemory = std::make_shared<TensorMemory>(TensorMemory{memBlock.m_Offset, memBlock.m_Index});
2604 tensorMemoryVec.emplace_back(tensorMemory, backendAllocator->GetMemorySourceType());
2605 bufferStorage.m_TensorMemoryVector.emplace_back(tensorMemory);
2608 bufferStorageVec.emplace_back(std::move(bufferStorage));
2611 memoryManager->StoreMemToAllocate(bufferStorageVec, backendAllocator, 4);
2614 return memoryManager;
2621 const auto& importedTensorHandlePin = m_PreImportedInputHandles.at(
id);
2622 if (!importedTensorHandlePin.m_TensorHandle)
2624 throw InvalidArgumentException(fmt::format(
"LoadedNetwork::Execute:"
2625 "PreImportedInput: {} has been deleted",
id));
2627 return importedTensorHandlePin.m_LayerBindingId;
2629 catch (
const std::out_of_range&)
2631 throw InvalidArgumentException(fmt::format(
"LoadedNetwork::Execute: Unknown ImportedInputId: {}",
id));
2639 const auto& importedTensorHandlePin = m_PreImportedOutputHandles.at(
id);
2640 if (!importedTensorHandlePin.m_TensorHandle)
2642 throw InvalidArgumentException(fmt::format(
"LoadedNetwork::Execute: "
2643 "PreImportedOutput: {} has been deleted",
id));
2645 return importedTensorHandlePin.m_LayerBindingId;
2647 catch (
const std::out_of_range&)
2649 throw InvalidArgumentException(fmt::format(
"LoadedNetwork::Execute: Unknown ImportedOutputId: {}",
id));