29 #include <common/include/Processes.hpp>
31 #include <fmt/format.h>
42 template <
typename ExceptionType>
43 std::string ToErrorMessage(
const char * prefix,
const ExceptionType & error)
46 ss << prefix <<
" " <<
error.what();
50 void AddLayerStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
52 ProfilingGuid networkGuid)
55 std::string layerName = layer.GetNameStr().empty() ?
"<Unnamed>" : layer.GetNameStr();
56 timelineUtils->CreateNamedTypedChildEntity(layer.GetGuid(),
59 LabelsAndEventClasses::LAYER_GUID);
60 for (
auto&& input : layer.GetInputSlots())
62 const IOutputSlot* source = input.GetConnectedOutputSlot();
67 timelineUtils->CreateConnectionRelationship(ProfilingRelationshipType::RetentionLink,
68 source->GetOwningLayerGuid(),
73 void AddWorkloadStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
74 std::unique_ptr<IWorkload>& workload,
78 timelineUtils->CreateTypedEntity(workload->GetGuid(), LabelsAndEventClasses::WORKLOAD_GUID);
79 timelineUtils->MarkEntityWithLabel(workload->GetGuid(),
80 layer.GetBackendId().Get(),
81 LabelsAndEventClasses::BACKENDID_GUID);
84 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
87 LabelsAndEventClasses::CHILD_GUID);
106 const vector<BackendOptions>::iterator& backendItr =
107 find_if(optimizedOptions.begin(), optimizedOptions.end(), [](
const BackendOptions& backend) {
108 if (backend.GetBackendId().Get() ==
"Global")
117 bool importEnabled =
false;
118 bool exportEnabled =
false;
119 if (backendItr != optimizedOptions.end())
122 for (
size_t i = 0; i < backendItr->GetOptionCount(); i++)
124 const BackendOptions::BackendOption& option = backendItr->GetOption(i);
125 if (option.GetName() ==
"ImportEnabled")
127 importEnabled = option.GetValue().AsBool();
129 if (option.GetName() ==
"ExportEnabled")
131 exportEnabled = option.GetValue().AsBool();
141 auto message = fmt::format(
"The input memory source specified, '{0}',", networkProperties.m_InputSource);
144 message.append(
" requires that memory import be enabled. However, "
145 "it was disabled when this network was optimized.");
149 message.append(
" requires that memory import be disabled. However, "
150 "it was enabled when this network was optimized.");
152 throw InvalidArgumentException(message);
158 auto message = fmt::format(
"The output memory source specified, '{0}',", networkProperties.m_OutputSource);
161 message.append(
" requires that memory export be enabled. However, "
162 "it was disabled when this network was optimized.");
166 message.append(
" requires that memory export be disabled. However, "
167 "it was enabled when this network was optimized.");
169 throw InvalidArgumentException(message);
174 std::string& errorMessage,
176 arm::pipe::IProfilingService* profilingService)
178 std::unique_ptr<LoadedNetwork> loadedNetwork;
180 auto Fail = [&](
const std::exception&
error) -> std::unique_ptr<LoadedNetwork>
182 errorMessage = ToErrorMessage(
"An error occurred when preparing the network workloads: ",
error);
185 return std::unique_ptr<LoadedNetwork>();
190 loadedNetwork.reset(
new LoadedNetwork(std::move(net), networkProperties, profilingService));
200 catch (
const std::runtime_error&
error)
205 return loadedNetwork;
208 LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
210 arm::pipe::IProfilingService* profilingService) :
211 m_OptimizedNetwork(
std::move(net)),
212 m_NetworkProperties(networkProperties),
213 m_TensorHandleFactoryRegistry(),
214 m_ProfilingService(profilingService)
218 const std::shared_ptr<IProfiler>& profiler = m_OptimizedNetwork->GetProfiler();
228 m_NetworkProperties);
235 bool useExternalMemoryManager =
false;
236 bool useInternalMemoryManager =
false;
237 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
244 m_IsInputImported = std::vector<bool>(order.
GetNumInputs(),
false);
245 m_IsOutputImported = std::vector<bool>(order.
GetNumOutputs(),
false);
248 for (
auto&& layer : order)
250 auto const& backendId = layer->GetBackendId();
251 if (m_Backends.count(backendId) == 0)
254 auto it = m_Backends.emplace(std::make_pair(backendId, createBackend()));
256 IBackendInternal* backend = it.first->second.get();
262 backend->GetCapabilities()))
264 std::string er = backend->GetId();
265 er +=
" does not support AsyncExecution";
266 throw BackendCapabilityException(er);
269 backend->GetCapabilities()))
271 std::string er = backend->GetId();
272 er +=
" does not support ExternallyManagedMemory\n";
273 er +=
"AsyncEnabled networks require all backends to support ExternallyManagedMemory";
274 throw BackendCapabilityException(er);
276 m_SupportsExternallyManagedMemory[backend->GetId()] =
true;
277 useExternalMemoryManager =
true;
281 m_SupportsExternallyManagedMemory[backend->GetId()] =
false;
282 useInternalMemoryManager =
true;
286 if (backend->SupportsTensorAllocatorAPI())
288 workloadFactory = backend->CreateWorkloadFactory(
289 m_TensorHandleFactoryRegistry,
290 m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions(),
296 m_BackendMemoryMangers.emplace_back(backend->CreateMemoryManager());
297 workloadFactory = backend->CreateWorkloadFactory(
298 m_BackendMemoryMangers.back(), m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions());
300 m_WorkloadFactories[backendId ] = std::move(workloadFactory);
306 for (
auto&& layer : order)
308 auto& workloadFactory = GetWorkloadFactory(*layer);
309 bool supportsExternalManager = m_SupportsExternallyManagedMemory[layer->GetBackendId()];
311 switch (layer->GetType())
318 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
320 !supportsExternalManager &&
326 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory,
true);
333 if ((layer->GetNumOutputSlots() == 1) &&
334 (layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
335 (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() ==
LayerType::Output))
337 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
339 !supportsExternalManager &&
344 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
346 !supportsExternalManager);
353 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
354 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
355 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
358 timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
360 timelineUtils->RecordEvent(networkGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
362 int processID = arm::pipe::GetCurrentProcessId();
363 std::stringstream ss;
365 timelineUtils->MarkEntityWithLabel(networkGuid, ss.str(), LabelsAndEventClasses::PROCESS_ID_GUID);
368 std::vector<IWorkload*> ConstWorkloads;
373 for (
auto&& layer: order)
378 AddLayerStructure(timelineUtils, *layer, networkGuid);
381 const IWorkloadFactory& workloadFactory = GetWorkloadFactory(*layer);
383 switch (layer->GetType())
393 auto workload = layer->CreateWorkload(workloadFactory);
397 const char*
const layerName =
398 layer->GetNameStr().length() != 0 ? layer->GetName() :
"<Unnamed>";
399 throw InvalidArgumentException(
400 fmt::format(
"No workload created for layer (name: '{0}' type: '{1}') (compute '{2}')",
401 layerName,
static_cast<int>(layer->GetType()), layer->GetBackendId().Get()
408 AddWorkloadStructure(timelineUtils, workload, *layer);
413 if((networkProperties.
m_AsyncEnabled || useExternalMemoryManager) &&
416 m_ConstantTensorHandles[layer->GetGuid()] =
417 layer->GetOutputSlot(0).GetOutputHandler().GetData();
418 m_ConstantWorkloads[layer->GetGuid()] = std::move(workload);
422 m_WorkloadQueue.push_back(std::move(workload));
427 ConstWorkloads.push_back(m_WorkloadQueue.back().get());
431 layer->ReleaseConstantData();
439 if (!networkProperties.
m_AsyncEnabled && m_WorkloadQueue.size() != 0)
441 const int noOfInputs = armnn::numeric_cast<int>(order.GetNumInputs());
445 for (
const BindableLayer* layer: order.GetInputLayers())
447 const auto bindingId = layer->GetBindingId();
449 bool supportsReplacement =
true;
451 for (
const auto inputSlot: layer->GetOutputSlot(0).GetConnections())
453 auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(inputSlot->GetOwningLayer()));
454 workloadIndex -= noOfInputs;
456 m_InputWorkloadSlotPairs[bindingId].emplace_back(WorkloadIndices{
457 armnn::numeric_cast<unsigned int>(workloadIndex), inputSlot->GetSlotIndex()});
462 auto workload = m_WorkloadQueue[m_InputWorkloadSlotPairs[bindingId].back().m_WorkloadIndex].get();
463 supportsReplacement &= workload->SupportsTensorHandleReplacement();
472 ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.
GetFactory(importFactoryId);
474 if (supportsReplacement && importFactory)
476 m_PreImportedInputHandles.emplace_back(
477 bindingId, importFactory->CreateTensorHandle(layer->GetOutputSlot(0).GetTensorInfo(),
false));
481 m_PreImportedInputHandles.emplace_back(bindingId,
nullptr);
487 for (
const BindableLayer* layer: order.GetOutputLayers())
489 const auto bindingId = layer->GetBindingId();
491 const auto outputSlot = layer->GetInputSlot(0).GetConnectedOutputSlot();
492 auto& indices = m_OutputWorkloadSlotPairs[bindingId];
497 auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(outputSlot->GetOwningLayer()));
498 workloadIndex -= noOfInputs;
500 indices.m_OutputSlotIndices = WorkloadIndices{numeric_cast<unsigned int>(workloadIndex),
501 outputSlot->CalculateIndexOnOwner()};
503 bool supportsReplacement =
true;
504 auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
505 supportsReplacement &= outputWorkload->SupportsTensorHandleReplacement();
507 for (
auto &inputSlot: outputSlot->GetConnections())
511 auto inWorkloadIndex = std::distance(order.begin(),
512 order.GetPosInGraph(inputSlot->GetOwningLayer()));
513 inWorkloadIndex -= noOfInputs;
514 indices.m_InputSlotIndices.emplace_back(
515 WorkloadIndices{numeric_cast<unsigned int>(inWorkloadIndex),
516 inputSlot->GetSlotIndex()});
517 auto inputWorkload = m_WorkloadQueue[indices.m_InputSlotIndices.back().m_WorkloadIndex].get();
518 supportsReplacement &= inputWorkload->SupportsTensorHandleReplacement();
526 ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.
GetFactory(importFactoryId);
528 if (supportsReplacement && importFactory)
530 m_PreImportedOutputHandles.emplace_back(
531 bindingId, importFactory->CreateTensorHandle(outputSlot->GetTensorInfo(),
false));
535 m_PreImportedOutputHandles.emplace_back(bindingId,
nullptr);
541 for (
auto&& workloadFactory : m_WorkloadFactories)
543 workloadFactory.second->AfterWorkloadsCreated();
549 timelineUtils->Commit();
552 if (useExternalMemoryManager)
556 CreateMemoryProfileAsync();
560 CreateMemoryProfile();
564 for (
auto& backendMemoryProfile : m_MemBlockMap)
566 const BackendId& backendId = backendMemoryProfile.first;
567 if (backendStrategyMap.find(backendId) != backendStrategyMap.end())
569 m_MemBinMap[backendId] = backendStrategyMap[backendId]->Optimize(backendMemoryProfile.second);
573 m_MemBinMap[backendId] = m_ConstantStrategy->Optimize(backendMemoryProfile.second);
579 m_ExternalMemoryManager = CreateExternalMemoryManger(m_TensorMemory);
582 std::sort(m_TensorMemory.begin(), m_TensorMemory.end(),
583 [](
const std::pair<std::shared_ptr<TensorMemory>,
MemorySource>& lhs,
584 const std::pair<std::shared_ptr<TensorMemory>,
MemorySource>& rhs)
586 return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
595 if (useInternalMemoryManager)
598 m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().AllocateDynamicBuffers();
601 for (
auto &workload : m_WorkloadQueue)
603 workload->PostAllocationConfigure();
607 if (useExternalMemoryManager)
611 AllocateAndExecuteConstantWorkloads();
615 AllocateAndExecuteConstantWorkloadsAsync();
621 for (
auto workload: ConstWorkloads)
628 void LoadedNetwork::AllocateAndExecuteConstantWorkloads()
631 for (
auto& pair : m_ConstantWorkloads)
633 auto tensorHandle = m_ConstantTensorHandles[pair.first];
634 tensorHandle->Allocate();
635 pair.second->Execute();
639 void LoadedNetwork::AllocateAndExecuteConstantWorkloadsAsync()
642 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
643 for (
auto&& layer : order)
647 const auto& outSlot = layer->GetOutputSlots()[0];
648 const auto factoryId = outSlot.GetTensorHandleFactoryId();
653 auto& workloadFactory = GetWorkloadFactory(*layer);
655 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory);
656 ITensorHandle* tensorHandle = outSlot.GetOutputHandler().GetData();
658 m_ConstantTensorHandles[layer->GetGuid()] = tensorHandle;
659 tensorHandle->Allocate();
661 auto& backend = m_Backends.at(layer->GetBackendId());
663 WorkingMemDescriptor memDesc;
664 memDesc.m_Outputs.push_back(tensorHandle);
666 ExecutionData executionData = backend->CreateExecutionData(memDesc);
667 m_ConstantWorkloads[layer->GetGuid()]->ExecuteAsync(executionData);
675 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
676 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
678 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
679 TimelineUtilityMethods::GetTimelineUtils(profilingService);
681 timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
683 for (
auto&& layer : order)
686 AddLayerStructure(timelineUtils, *layer, networkGuid);
687 switch (layer->GetType())
697 for (
auto& workload : m_WorkloadQueue)
700 AddWorkloadStructure(timelineUtils, workload, *layer);
707 timelineUtils->Commit();
712 return m_OptimizedNetwork->GetGuid();
717 for (
auto&& inputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetInputLayers())
719 if (inputLayer->GetNumOutputSlots() != 1)
724 if (inputLayer->GetBindingId() == layerId)
726 return inputLayer->GetOutputSlot(0).GetTensorInfo();
735 for (
auto&& outputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetOutputLayers())
737 if (outputLayer->GetNumInputSlots() != 1)
742 if (!outputLayer->GetInputSlot(0).GetConnection())
747 if (outputLayer->GetBindingId() == layerId)
749 return outputLayer->GetInputSlot(0).GetTensorInfo();
760 auto it = m_WorkloadFactories.find(layer.
GetBackendId());
761 if (it == m_WorkloadFactories.end())
763 throw RuntimeException(fmt::format(
"No workload factory for {0} to be used for layer: {1}",
769 workloadFactory = it->second.get();
771 if (!workloadFactory)
776 return *workloadFactory;
785 TensorPin(std::unique_ptr<ITensorHandle> handle,
const TensorInfo& info,
LayerBindingId id)
786 : m_TensorHandle(
std::move(handle))
792 ITensorHandle* GetTensorHandle()
const {
return m_TensorHandle.get(); }
793 const TensorInfo&
GetTensorInfo()
const {
return m_TensorInfo; }
797 std::unique_ptr<ITensorHandle> m_TensorHandle;
798 TensorInfo m_TensorInfo;
803 const std::vector<TensorPin>& pins,
804 char const* bindingPointDesc)
806 auto it = std::find_if(pins.begin(), pins.end(),
807 [
id](
const TensorPin& pin)
809 return pin.GetBindingId() == id;
812 if (it != pins.end())
818 throw InvalidArgumentException(fmt::format(
"No tensor supplied for {0} {1}", bindingPointDesc,
id));
828 m_InputTensorPins.reserve(inputTensors.size());
829 m_OutputTensorPins.reserve(outputTensors.size());
831 for (
auto inputTensorPair : inputTensors)
833 auto inputTensor = inputTensorPair.second;
835 std::unique_ptr<ITensorHandle> tensorHandle =
836 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),inputTensor.GetMemoryArea());
839 m_InputTensorPins.emplace_back(std::move(tensorHandle), inputTensor.GetInfo(), layerId);
842 for (
auto outputTensorPair : outputTensors)
844 auto outputTensor = outputTensorPair.second;
846 std::unique_ptr<ITensorHandle> tensorHandle =
847 std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
850 m_OutputTensorPins.emplace_back(std::move(tensorHandle), outputTensor.GetInfo(), layerId);
856 return GetTensorPin(
id, m_InputTensorPins,
"input");
861 return GetTensorPin(
id, m_OutputTensorPins,
"output");
866 std::vector<TensorPin> m_InputTensorPins;
867 std::vector<TensorPin> m_OutputTensorPins;
874 std::vector<ImportedInputId> preImportedInputIds,
875 std::vector<ImportedOutputId> preImportedOutputIds)
877 const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
882 ARMNN_LOG(
warning) <<
"IRuntime::EnqueueWorkload()::Less than two nodes in graph";
887 WorkloadData workloadData(inputTensors, outputTensors);
891 if (graph.
GetNumInputs() != (inputTensors.size() + preImportedInputIds.size()))
899 m_InputQueue.clear();
902 unsigned int inputIndex = 0;
903 unsigned int importedInputIdIndex = 0;
904 std::sort(preImportedInputIds.begin(), preImportedInputIds.end());
907 if (importedInputIdIndex < preImportedInputIds.size() &&
908 inputIndex == preImportedInputIds[importedInputIdIndex])
911 if (!m_IsInputImported[inputIndex])
913 auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
915 for (
const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
917 auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
918 workload->ReplaceInputTensorHandle(outputTensorHandle, workloadInfo.m_SlotIndex);
920 m_IsInputImported[inputIndex] =
true;
922 importedInputIdIndex++;
926 if (m_IsInputImported[inputIndex])
930 for (
const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
932 auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
933 workload->ReplaceInputTensorHandle(handler.
GetData(), workloadInfo.m_SlotIndex);
936 m_IsInputImported[inputIndex] =
false;
940 const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
941 EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
949 m_OutputQueue.clear();
957 unsigned int outputIndex = 0;
958 unsigned int importedOutputIdIndex = 0;
959 std::sort(preImportedOutputIds.begin(), preImportedOutputIds.end());
962 if (importedOutputIdIndex < preImportedOutputIds.size() &&
963 outputIndex == preImportedOutputIds[importedOutputIdIndex])
966 ITensorHandle* inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
968 if (!m_IsOutputImported[outputIndex])
970 const auto bindingId = outputLayer->GetBindingId();
971 const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
973 auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
975 outputWorkload->ReplaceOutputTensorHandle(inputTensorHandle,
976 indices.m_OutputSlotIndices.m_SlotIndex);
978 for (
const auto& workloadInfo: indices.m_InputSlotIndices)
980 auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
981 inputWorkload->ReplaceInputTensorHandle(inputTensorHandle, workloadInfo.m_SlotIndex);
983 m_IsOutputImported[outputIndex] =
true;
986 if (!inputTensorHandle)
992 syncDesc.
m_Inputs.push_back(inputTensorHandle);
994 info.m_InputTensorInfos.push_back(outputLayer->GetInputSlot(0).GetTensorInfo());
996 auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc,
info);
1002 m_OutputQueue.push_back(std::move(syncWorkload));
1003 importedOutputIdIndex++;
1007 if (m_IsOutputImported[outputIndex])
1009 const auto bindingId = outputLayer->GetBindingId();
1010 const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
1012 auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
1014 outputLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOutputHandler();
1016 outputWorkload->ReplaceOutputTensorHandle(
1017 outputHandler.
GetData(), indices.m_OutputSlotIndices.m_SlotIndex);
1019 for (
const auto& workloadInfo: indices.m_InputSlotIndices)
1021 auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
1022 inputWorkload->ReplaceInputTensorHandle(outputHandler.
GetData(), workloadInfo.m_SlotIndex);
1024 m_IsOutputImported[outputIndex] =
false;
1027 const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
1029 EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
1035 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1036 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1037 ProfilingGuid inferenceGuid = m_ProfilingService->GetNextGuid();
1041 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
1042 timelineUtils->CreateTypedEntity(inferenceGuid, LabelsAndEventClasses::INFERENCE_GUID);
1043 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
1046 LabelsAndEventClasses::EXECUTION_OF_GUID);
1047 timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
1050 bool executionSucceeded =
true;
1053 if (m_ProfilingService->IsProfilingEnabled())
1055 m_ProfilingService->IncrementCounterValue(INFERENCES_RUN);
1059 executionSucceeded =
Execute(timelineUtils, inferenceGuid);
1065 timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
1066 timelineUtils->Commit();
1079 if (tensorHandle ==
nullptr)
1081 throw InvalidArgumentException(
"EnqueueInput: tensorHandle must not be NULL");
1087 inputQueueDescriptor.m_Inputs.push_back(tensorHandle);
1088 info.m_InputTensorInfos.push_back(tensorInfo);
1096 const TensorInfo& outputTensorInfo = handler.
GetTensorInfo();
1097 ITensorHandle* outputTensorHandle = handler.GetData();
1099 if (!outputTensorHandle)
1104 inputQueueDescriptor.m_Outputs.push_back(outputTensorHandle);
1105 info.m_OutputTensorInfos.push_back(outputTensorInfo);
1108 bool needMemCopy =
true;
1113 needMemCopy =
false;
1115 void* mem = tensorHandle->
Map(
false);
1116 if (outputTensorHandle->Import(mem, m_NetworkProperties.
m_InputSource))
1118 tensorHandle->
Unmap();
1121 tensorHandle->
Unmap();
1122 throw MemoryImportException(
"EnqueueInput: Memory Import failed");
1128 std::unique_ptr<IWorkload> inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor, info);
1135 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1136 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1140 AddWorkloadStructure(timelineUtils, inputWorkload, layer);
1141 timelineUtils->Commit();
1144 m_InputQueue.push_back(std::move(inputWorkload));
1148 void LoadedNetwork::EnqueueOutput(
const BindableLayer& layer, ITensorHandle* tensorHandle,
const TensorInfo& tensorInfo)
1152 throw InvalidArgumentException(
"EnqueueOutput: given layer not an OutputLayer");
1155 if (tensorHandle ==
nullptr)
1157 throw InvalidArgumentException(
"EnqueueOutput: tensorHandle must not be NULL");
1163 outputQueueDescriptor.m_Outputs.push_back(tensorHandle);
1164 info.m_OutputTensorInfos.push_back(tensorInfo);
1166 if (layer.GetNumInputSlots() != 1)
1172 const OutputHandler& outputHandler = layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler();
1174 const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo();
1175 ITensorHandle* inputTensorHandle = outputHandler.GetData();
1176 if (!inputTensorHandle)
1188 bool needMemCopy =
true;
1190 (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
1192 if(layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() !=
LayerType::Input)
1197 needMemCopy =
false;
1198 void *mem = tensorHandle->Map(
false);
1199 bool importOk = inputTensorHandle->Import(mem, m_NetworkProperties.
m_OutputSource);
1200 tensorHandle->Unmap();
1205 MemSyncQueueDescriptor syncDesc;
1206 syncDesc.m_Inputs.push_back(inputTensorHandle);
1207 info.m_InputTensorInfos.push_back(inputTensorInfo);
1208 auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
1213 m_OutputQueue.push_back(std::move(syncWorkload));
1217 throw MemoryExportException(
"EnqueueOutput: Memory Export failed");
1225 outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
1226 info.m_InputTensorInfos.push_back(inputTensorInfo);
1228 std::unique_ptr<IWorkload> outputWorkload =
1229 std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor, info);
1230 if (!outputWorkload)
1235 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1236 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1240 AddWorkloadStructure(timelineUtils, outputWorkload, layer);
1241 timelineUtils->Commit();
1244 m_OutputQueue.push_back(std::move(outputWorkload));
1248 void LoadedNetwork::AllocateWorkingMemory(
1249 #
if !defined(ARMNN_DISABLE_THREADS)
1250 std::lock_guard<std::mutex>& lock
1256 #if !defined(ARMNN_DISABLE_THREADS)
1260 if (m_IsWorkingMemAllocated)
1265 if (m_ExternalMemoryManager)
1267 m_ExternalMemoryManager->Allocate();
1269 for (
unsigned int i = 0; i < m_TensorMemory.size(); ++i)
1271 m_Tensorhandles[i]->Import(m_TensorMemory[i].first->m_Data, m_TensorMemory[i].second);
1275 for (
auto&& memoryManager : m_BackendMemoryMangers)
1279 memoryManager->Acquire();
1283 m_IsWorkingMemAllocated =
true;
1288 #if !defined(ARMNN_DISABLE_THREADS)
1289 std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1292 if (!m_IsWorkingMemAllocated)
1297 if (m_ExternalMemoryManager)
1299 m_ExternalMemoryManager->Deallocate();
1303 for (
auto&& memoryManager : m_BackendMemoryMangers)
1307 memoryManager->Release();
1311 m_IsWorkingMemAllocated =
false;
1315 ProfilingGuid inferenceGuid)
1317 bool success =
true;
1319 auto Fail = [&](
const std::exception&
error)
1321 ARMNN_LOG(error) <<
"An error occurred attempting to execute a workload: " <<
error.what();
1327 #if !defined(ARMNN_DISABLE_THREADS)
1328 std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1329 AllocateWorkingMemory(lockGuard);
1331 AllocateWorkingMemory();
1334 ProfilingDynamicGuid workloadInferenceID(0);
1335 auto ExecuteQueue = [&timelineUtils, &workloadInferenceID, &inferenceGuid](
WorkloadQueue& queue)
1337 for (
auto& workload : queue)
1341 workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1344 workload->Execute();
1347 timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1352 ExecuteQueue(m_InputQueue);
1353 ExecuteQueue(m_WorkloadQueue);
1354 ExecuteQueue(m_OutputQueue);
1356 catch (
const RuntimeException& error)
1360 catch (
const std::runtime_error& error)
1368 void LoadedNetwork::EnqueueInput(
const ConstTensor& inputTensor, ITensorHandle* inputTensorHandle)
1375 std::unique_ptr<ITensorHandle> tensorHandle =
1376 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),
1377 inputTensor.GetMemoryArea());
1378 void* mem = tensorHandle->Map(
false);
1380 if (inputTensorHandle->Import(mem, m_NetworkProperties.
m_InputSource))
1382 tensorHandle->Unmap();
1385 tensorHandle->Unmap();
1386 throw MemoryImportException(
"EnqueueInput: Memory Import failed");
1390 throw MemoryImportException(
"EnqueueInput: Memory Import failed, backend does not support Import");
1396 std::unique_ptr<ITensorHandle> tensorHandle =
1397 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(), inputTensor.GetMemoryArea());
1399 auto copyFunc = [](
void* dst,
const void* src,
size_t size)
1401 memcpy(dst, src, size);
1414 void LoadedNetwork::ImportOutputTensor(
const Tensor& outputTensor, ITensorHandle* outputTensorHandle)
1416 if (!outputTensorHandle)
1424 std::unique_ptr<ITensorHandle> tensorHandle =
1425 std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
1426 outputTensor.GetMemoryArea());
1428 void* mem = tensorHandle->Map(
false);
1429 bool importOk = outputTensorHandle->Import(mem, m_NetworkProperties.
m_OutputSource);
1430 tensorHandle->Unmap();
1434 throw MemoryExportException(
"ImportOutputTensor: Memory Export failed");
1439 throw MemoryExportException(
"ImportOutputTensor: Memory Export failed, attempting to export Input Layer");
1447 auto copyFunc = [](
void* dst,
const void* src,
size_t size)
1449 memcpy(dst, src, size);
1452 std::unique_ptr<ITensorHandle> tensorHandle =
1453 std::make_unique<PassthroughTensorHandle>(outputTensor.
GetInfo(),
1462 for (
auto inputTensorPair : inputTensors)
1467 return inputTensorPair.second;
1475 for (
auto outputTensorPair : outputTensors)
1480 return outputTensorPair.second;
1494 throw MemoryImportException(
"ImportInputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1497 if (inputTensors.size() > m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumInputs())
1499 throw MemoryImportException(
"ImportInputs: The number of tensors provided exceeds the number of inputs.");
1502 std::vector<ImportedInputId> importedInputs;
1503 Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1504 unsigned int inputIndex = 0;
1507 auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
1509 if (!outputTensorHandle)
1515 auto layerBindingId = inputLayer->GetBindingId();
1516 auto it = std::find_if(inputTensors.begin(), inputTensors.end(), [=](
const auto& inputTensor)
1518 return inputTensor.first == layerBindingId;
1521 if (it == inputTensors.end())
1527 const auto& inputTensor = *it;
1528 std::unique_ptr<ITensorHandle> passThroughTensorHandle =
1529 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.GetInfo(),
1530 inputTensor.second.GetMemoryArea());
1534 if (outputTensorHandle->CanBeImported(passThroughTensorHandle->Map(), forceImportMemorySource)
1535 && (outputTensorHandle->Import(passThroughTensorHandle->Map(), forceImportMemorySource)))
1537 importedInputs.push_back(inputIndex);
1539 passThroughTensorHandle->Unmap();
1543 ARMNN_LOG(
error) <<
"An error occurred attempting to import input_"
1544 << inputIndex <<
" : " << exception.
what();
1545 passThroughTensorHandle->Unmap();
1550 return importedInputs;
1555 std::vector<ImportedInputId> importedInputs;
1556 Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1558 for (
auto inputTensor : inputTensors)
1560 auto layerBindingId = inputTensor.first;
1563 return layer->GetBindingId() == layerBindingId;
1569 "ImportInputs: Memory Import failed, unknown LayerBindingId: {}", layerBindingId));
1572 const Layer* layer = *it;
1580 backend->GetCapabilities()))
1582 std::string er = backend->GetId();
1583 er +=
" does not have PreImportIOTensors capability";
1598 ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
1601 ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();
1606 fmt::format(
"ImportInputs: Memory Import failed, backend: "
1607 "{} does not support importing from source {}"
1611 std::unique_ptr<ITensorHandle> passThroughTensorHandle =
1612 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.GetInfo(),
1613 inputTensor.second.GetMemoryArea());
1615 if (tensorHandle->
Import(passThroughTensorHandle->Map(), forceImportMemorySource))
1617 importedInputs.push_back(m_CurImportedInputId++);
1618 passThroughTensorHandle->Unmap();
1622 passThroughTensorHandle->Unmap();
1626 m_PreImportedInputHandles.push_back(std::move(importedTensorHandlePin));
1628 return importedInputs;
1640 throw MemoryImportException(
"ImportOutputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1643 if (outputTensors.size() != m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumOutputs())
1647 std::vector<ImportedOutputId> importedOutputs;
1648 Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1650 unsigned int outputIndex = 0;
1653 auto inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
1654 if (!inputTensorHandle)
1660 auto layerBindingId = outputLayer->GetBindingId();
1661 auto it = std::find_if(outputTensors.begin(), outputTensors.end(), [=] (
const auto& outputTensor)
1663 return outputTensor.first == layerBindingId;
1666 if (it == outputTensors.end())
1672 const auto outputTensor = *it;
1676 if (inputTensorHandle->CanBeImported(outputTensor.second.GetMemoryArea(), forceImportMemorySource)
1677 && inputTensorHandle->Import(outputTensor.second.GetMemoryArea(), forceImportMemorySource))
1679 importedOutputs.push_back(outputIndex);
1684 ARMNN_LOG(
error) <<
"An error occurred attempting to import output_"
1685 << outputIndex <<
" : " << exception.
what();
1689 return importedOutputs;
1692 std::vector<ImportedOutputId> importedOutputs;
1693 Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1695 for (
const auto& outputTensor : outputTensors)
1697 auto layerBindingId = outputTensor.first;
1700 return layer->GetBindingId() == layerBindingId;
1705 throw MemoryImportException(fmt::format(
"ImportOutputs: Memory Import failed, unknown LayerBindingId: {}",
1709 const Layer* layer = *it;
1717 backend->GetCapabilities()))
1719 std::string er = backend->GetId();
1720 er +=
" does not have PreImportIOTensors capability";
1734 ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
1737 ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();
1742 "{} does not support importing from source {}"
1743 , factoryId, forceImportMemorySource));
1746 if (tensorHandle->
Import(outputTensor.second.GetMemoryArea(), forceImportMemorySource))
1748 importedOutputs.push_back(m_CurImportedOutputId++);
1755 m_PreImportedOutputHandles.push_back(std::move(importedTensorHandlePin));
1758 return importedOutputs;
1763 for (
auto id : inputIds)
1765 if (
id > m_PreImportedInputHandles.size())
1770 auto& importedTensorHandle = m_PreImportedInputHandles[id].m_TensorHandle;
1771 if (!importedTensorHandle)
1774 fmt::format(
"ClearImportedInputs::ImportedInput with id: {} has already been deleted",
id));
1777 importedTensorHandle->Unimport();
1778 importedTensorHandle = {};
1784 for (
auto id : outputIds)
1786 if (
id > m_PreImportedOutputHandles.size())
1791 auto& importedTensorHandle = m_PreImportedOutputHandles[id].m_TensorHandle;
1792 if (!importedTensorHandle)
1795 fmt::format(
"ClearImportedOutputs::ImportedOutput with id: {} has already been deleted",
id));
1798 importedTensorHandle->Unimport();
1799 importedTensorHandle = {};
1806 std::vector<ImportedInputId> preImportedInputs,
1807 std::vector<ImportedOutputId> preImportedOutputs)
1809 const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
1811 if (inputTensors.size() + preImportedInputs.size() != graph.
GetNumInputs())
1813 if (preImportedInputs.empty())
1820 "Number of inputs + preImportedInputs provided does not match network.");
1824 if (outputTensors.size() + preImportedOutputs.size() != graph.
GetNumOutputs())
1826 if (preImportedOutputs.empty())
1829 "Number of outputs provided does not match network.");
1834 "Number of outputs + preImportedOutputs provided does not match network.");
1841 unsigned int index = 0;
1842 for (
auto pair : inputTensors)
1844 bindingIds[index++] = pair.first;
1848 bindingIds[index++] = ValidateImportedInputID(
id);
1850 for (
auto pair : outputTensors)
1852 bindingIds[index++] = pair.first;
1856 bindingIds[index++] = ValidateImportedOutputID(
id);
1861 auto resetMemHandle = [&]()
1865 const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId;
1867 auto inputHandle = workingMemHandle.
GetInputHandle(layerBindingId);
1869 for (
auto it : inputConnections)
1877 const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId;
1882 for (
auto it : outputConnections)
1889 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1890 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1891 ProfilingGuid inferenceGuid = m_ProfilingService->GetNextGuid();
1895 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
1896 timelineUtils->CreateTypedEntity(inferenceGuid,LabelsAndEventClasses::INFERENCE_GUID);
1897 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
1900 LabelsAndEventClasses::EXECUTION_OF_GUID);
1901 timelineUtils->RecordEvent(inferenceGuid,LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
1904 bool executionSucceeded =
true;
1909 timelineUtils->RecordEvent(inferenceGuid,LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
1910 timelineUtils->Commit();
1920 for (
auto pair : inputTensors)
1922 EnqueueInput(pair.second, workingMemHandle.
GetInputHandle(pair.first));
1928 const ImportedTensorHandlePin& importedInputPin = m_PreImportedInputHandles[id];
1929 const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId;
1930 const auto& preimportedHandle = importedInputPin.m_TensorHandle;
1933 for (
auto it : inputConnections)
1935 *it = preimportedHandle.get();
1943 for (
auto pair: outputTensors)
1945 ImportOutputTensor(pair.second, workingMemHandle.
GetOutputHandle(pair.first));
1951 const ImportedTensorHandlePin& importedOutputPin = m_PreImportedOutputHandles[id];
1952 const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId;
1953 const auto& preimportedHandle = importedOutputPin.m_TensorHandle;
1956 for (
auto it : outputConnections)
1958 *it = preimportedHandle.get();
1963 auto Fail = [&](
const std::exception&
error)
1965 ARMNN_LOG(
error) <<
"An error occurred attempting to execute a workload: " <<
error.what();
1966 executionSucceeded =
false;
1968 ProfilingDynamicGuid workloadInferenceID(0);
1972 for (
unsigned int i = 0; i < m_WorkloadQueue.size(); ++i)
1974 auto& workload = m_WorkloadQueue[i];
1977 workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1985 timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1994 catch (
const std::runtime_error&
error)
2007 for (
auto pair: outputTensors)
2027 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
2030 std::vector<std::unique_ptr<ITensorHandle>> managedTensorHandles;
2032 std::vector<std::unique_ptr<ITensorHandle>> unmanagedTensorHandles;
2034 std::vector<WorkingMemDescriptor> workingMemDescriptors;
2035 std::vector<std::pair<BackendId, ExecutionData>> executionDataVec;
2037 auto GetTensorHandle = [&](
Layer* layer,
const OutputSlot& outputSlot)
2040 const TensorInfo& tensorInfo = outputSlot.GetTensorInfo();
2046 return m_WorkloadFactories.at(
id)->CreateTensorHandle(tensorInfo,
false);
2064 bool m_IsInputLayerHandle =
false;
2065 bool m_IsOutputLayerHandle =
false;
2071 std::unordered_map<const OutputSlot*, HandleInfo> outputToHandleInfoMap;
2073 unsigned int layerIndex = 0;
2074 for (
auto&& layer : order)
2084 bool isMemoryManaged =
true;
2085 bool isInputLayer =
false;
2086 bool isOutputLayer =
false;
2087 bool isConnectedToOutputLayer =
false;
2093 isInputLayer =
true;
2098 isOutputLayer =
true;
2101 unsigned int slotIndex = 0;
2106 for (
unsigned int i = 0; i < slot.GetNumConnections(); ++i)
2110 if (!isConnectedToOutputLayer)
2112 isConnectedToOutputLayer =
true;
2120 fmt::format(
"Layer name: '{0}' guid: '{1}' has two or more OutputLayers connected to it. "
2121 "This will prevent importing on the connected OutputLayers.",
2123 isMemoryManaged =
true;
2129 if (isMemoryManaged)
2131 managedTensorHandles.emplace_back(GetTensorHandle(layer, slot));
2132 tensorHandle = managedTensorHandles.back().get();
2136 unmanagedTensorHandles.emplace_back(GetTensorHandle(layer, slot));
2137 tensorHandle = unmanagedTensorHandles.back().get();
2140 workingMemDescriptor.
m_Outputs.push_back(tensorHandle);
2142 HandleInfo& handleInfo = outputToHandleInfoMap[&slot];
2143 handleInfo.m_TensorHandle = tensorHandle;
2146 if (isConnectedToOutputLayer)
2148 handleInfo.m_IsOutputLayerHandle =
true;
2149 handleInfo.m_OutputMemDescriptorCoords.m_OutputSlotCoords = {layerIndex, slotIndex};
2154 handleInfo.m_IsInputLayerHandle =
true;
2156 handleInfo.m_InputMemDescriptorCoords.m_LayerBindingId = bindingId;
2166 if (!slot.GetConnection())
2171 auto outputSlot = slot.GetConnectedOutputSlot();
2172 auto key = outputSlot->GetOwningLayer().GetGuid();
2175 auto found = m_ConstantTensorHandles.find(key);
2176 if (found != m_ConstantTensorHandles.end())
2179 if (slot.IsTensorInfoOverridden())
2184 tensorHandle = decorated;
2187 workingMemDescriptor.
m_Inputs.push_back(tensorHandle);
2195 HandleInfo& handleInfo = outputToHandleInfoMap[outputSlot];
2196 handleInfo.m_TensorHandle = tensorHandle;
2197 handleInfo.m_IsOutputLayerHandle =
true;
2198 handleInfo.m_OutputMemDescriptorCoords.m_LayerBindingIds.push_back(bindingId);
2199 handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, 0});
2204 HandleInfo& handleInfo = outputToHandleInfoMap.at(outputSlot);
2206 ITensorHandle* inputTensorHandle = handleInfo.m_TensorHandle;
2207 if (slot.IsTensorInfoOverridden())
2212 inputTensorHandle = decorated;
2215 workingMemDescriptor.
m_Inputs.push_back(inputTensorHandle);
2221 handleInfo.m_OutputMemDescriptorCoords.m_LayerBindingIds.push_back(bindingId);
2222 handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, 0});
2226 else if (handleInfo.m_IsOutputLayerHandle)
2228 handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, slot.GetSlotIndex()});
2233 if (handleInfo.m_IsInputLayerHandle)
2235 std::pair<LayerGuid, unsigned int> connectionLocation{layerIndex, slot.GetSlotIndex()};
2236 handleInfo.m_InputMemDescriptorCoords.m_InputSlotCoords.emplace_back(connectionLocation);
2246 std::pair<BackendId, ExecutionData> dataPair;
2249 executionDataVec.push_back(dataPair);
2250 workingMemDescriptors.push_back(workingMemDescriptor);
2256 std::vector<std::pair<std::shared_ptr<TensorMemory>,
MemorySource>> tensorMemory;
2258 auto externalMemoryManager = CreateExternalMemoryManger(tensorMemory);
2261 std::sort(tensorMemory.begin(), tensorMemory.end(),
2262 [](
const std::pair<std::shared_ptr<TensorMemory>,
MemorySource>& lhs,
2263 const std::pair<std::shared_ptr<TensorMemory>,
MemorySource>& rhs)
2265 return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
2268 std::vector<WorkingMemHandle::InputMemDescriptorCoords> inputConnectionsInfo;
2269 std::vector<WorkingMemHandle::OutputMemDescriptorCoords> outputConnectionsInfo;
2271 for (
const auto& handleInfo: outputToHandleInfoMap)
2273 if (handleInfo.second.m_IsOutputLayerHandle)
2275 outputConnectionsInfo.emplace_back(handleInfo.second.m_OutputMemDescriptorCoords);
2278 if (handleInfo.second.m_IsInputLayerHandle)
2280 inputConnectionsInfo.emplace_back(handleInfo.second.m_InputMemDescriptorCoords);
2284 return std::make_unique<WorkingMemHandle>(networkId,
2285 inputConnectionsInfo,
2286 outputConnectionsInfo,
2287 workingMemDescriptors,
2288 std::move(externalMemoryManager),
2289 std::move(tensorMemory),
2290 std::move(managedTensorHandles),
2291 std::move(unmanagedTensorHandles),
2298 for (
auto&& workloadPtr: m_WorkloadQueue)
2300 workloadPtr.get()->RegisterDebugCallback(func);
2305 void LoadedNetwork::CreateMemoryProfileAsync()
2309 unsigned int m_StartOfLife;
2310 unsigned int m_Lifetime;
2313 unsigned int m_Index;
2318 auto align = [](
size_t numToAlign)
2320 const size_t alignment =
sizeof(float);
2321 return ((numToAlign + alignment - 1) / alignment) * alignment;
2324 std::unordered_map<const OutputSlot*, PartialBlock> memBlockTrackerMap;
2329 unsigned int timestep = 0;
2330 unsigned int outputIndex = 0;
2331 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
2333 for (
auto&& layer : order)
2335 const LayerType& layerType = layer->GetType();
2343 && layer->GetInputSlot(0).GetConnectedOutputSlot()->GetNumConnections() == 1)
2354 BackendId backendId = layer->GetBackendId();
2355 for (
auto& outputSlot : layer->GetOutputSlots())
2357 if (!m_SupportsExternallyManagedMemory[backendId])
2362 PartialBlock partialBlock;
2364 partialBlock.m_StartOfLife = timestep;
2366 size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
2367 partialBlock.m_MemSize = alignedSize;
2368 partialBlock.m_Index = outputIndex++;
2369 partialBlock.m_Lifetime = outputSlot.GetNumConnections();
2370 partialBlock.m_BackendId = backendId;
2372 if (partialBlock.m_Lifetime == 0)
2374 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2375 partialBlock.m_StartOfLife,
2376 partialBlock.m_MemSize,
2378 partialBlock.m_Index);
2382 memBlockTrackerMap[&outputSlot] = partialBlock;
2386 for (
auto& inputSlot : layer->GetInputSlots())
2388 const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
2389 const LayerType& owningLayerType = connectedInputLayer.GetType();
2400 auto outputSlot = inputSlot.GetConnectedOutputSlot();
2402 PartialBlock& partialBlock = memBlockTrackerMap.at(outputSlot);
2404 auto& lifetime = partialBlock.m_Lifetime;
2409 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2411 partialBlock.m_MemSize,
2413 partialBlock.m_Index);
2420 void LoadedNetwork::CreateMemoryProfile()
2424 auto TraceSubTensorHandleAncestry = [](ITensorHandle*
const subTensorHandle)
2426 ITensorHandle* ancestor = subTensorHandle;
2427 while (ancestor && ancestor->GetParent())
2429 ancestor = ancestor->GetParent();
2436 unsigned int m_StartOfLife;
2437 unsigned int m_Lifetime;
2440 unsigned int m_Index;
2442 BackendId m_BackendId;
2445 auto align = [](
size_t numToAlign)
2447 const size_t alignment =
sizeof(float);
2448 return ((numToAlign + alignment - 1) / alignment) * alignment;
2451 std::unordered_map<ITensorHandle*, PartialBlock> memBlockTrackerMap;
2456 unsigned int timestep = 0;
2457 unsigned int outputIndex = 0;
2458 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
2460 for (
auto&& layer : order)
2462 const LayerType& layerType = layer->GetType();
2470 && layer->GetInputSlot(0).GetConnectedOutputSlot()->GetNumConnections() == 1)
2481 BackendId backendId = layer->GetBackendId();
2482 for (
auto& outputSlot : layer->GetOutputSlots())
2484 if (!m_SupportsExternallyManagedMemory[backendId])
2489 ITensorHandle* tensorHandle = outputSlot.GetOutputHandler().GetData();
2490 tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
2492 if (memBlockTrackerMap.find(tensorHandle) == memBlockTrackerMap.end())
2494 PartialBlock partialBlock;
2496 partialBlock.m_StartOfLife = timestep;
2498 size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
2499 partialBlock.m_MemSize = alignedSize;
2500 partialBlock.m_Index = outputIndex++;
2501 partialBlock.m_Lifetime = outputSlot.GetNumConnections();
2502 partialBlock.m_BackendId = backendId;
2504 if (partialBlock.m_Lifetime == 0)
2506 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2507 partialBlock.m_StartOfLife,
2508 partialBlock.m_MemSize,
2510 partialBlock.m_Index);
2514 memBlockTrackerMap[tensorHandle] = partialBlock;
2516 m_Tensorhandles.push_back(tensorHandle);
2521 memBlockTrackerMap.at(tensorHandle).m_Lifetime += outputSlot.GetNumConnections();
2525 for (
auto& inputSlot : layer->GetInputSlots())
2527 const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
2528 const LayerType& owningLayerType = connectedInputLayer.GetType();
2538 if (!m_SupportsExternallyManagedMemory[connectedInputLayer.GetBackendId()])
2543 auto outputSlot = inputSlot.GetConnectedOutputSlot();
2545 ITensorHandle* tensorHandle = outputSlot->GetOutputHandler().GetData();
2546 tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
2548 PartialBlock& partialBlock = memBlockTrackerMap.at(tensorHandle);
2550 auto& lifetime = partialBlock.m_Lifetime;
2555 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2557 partialBlock.m_MemSize,
2559 partialBlock.m_Index);
2567 std::unique_ptr<MemoryManager> LoadedNetwork::CreateExternalMemoryManger(
2568 std::vector<std::pair<std::shared_ptr<TensorMemory>,
MemorySource>>& tensorMemoryVec)
2570 std::unique_ptr<MemoryManager> memoryManager = std::make_unique<MemoryManager>();
2573 for (
auto& backend : m_MemBinMap)
2575 std::vector<BufferStorage> bufferStorageVec;
2577 std::shared_ptr<ICustomAllocator> backendAllocator;
2578 if (allocatorMap.find(backend.first) != allocatorMap.end())
2580 backendAllocator = allocatorMap[backend.first];
2584 backendAllocator = m_Backends[backend.first]->GetDefaultAllocator();
2587 for (
auto& memBin : backend.second)
2589 BufferStorage bufferStorage;
2590 bufferStorage.m_BufferSize = memBin.m_MemSize;
2591 bufferStorage.m_TensorMemoryVector.reserve(memBin.m_MemBlocks.size());
2593 for (
auto& memBlock : memBin.m_MemBlocks)
2595 auto tensorMemory = std::make_shared<TensorMemory>(TensorMemory{memBlock.m_Offset, memBlock.m_Index});
2597 tensorMemoryVec.emplace_back(tensorMemory, backendAllocator->GetMemorySourceType());
2598 bufferStorage.m_TensorMemoryVector.emplace_back(tensorMemory);
2601 bufferStorageVec.emplace_back(std::move(bufferStorage));
2604 memoryManager->StoreMemToAllocate(bufferStorageVec, backendAllocator, 4);
2607 return memoryManager;
2614 const auto& importedTensorHandlePin = m_PreImportedInputHandles.at(
id);
2615 if (!importedTensorHandlePin.m_TensorHandle)
2617 throw InvalidArgumentException(fmt::format(
"LoadedNetwork::Execute:"
2618 "PreImportedInput: {} has been deleted",
id));
2620 return importedTensorHandlePin.m_LayerBindingId;
2622 catch (
const std::out_of_range&)
2624 throw InvalidArgumentException(fmt::format(
"LoadedNetwork::Execute: Unknown ImportedInputId: {}",
id));
2632 const auto& importedTensorHandlePin = m_PreImportedOutputHandles.at(
id);
2633 if (!importedTensorHandlePin.m_TensorHandle)
2635 throw InvalidArgumentException(fmt::format(
"LoadedNetwork::Execute: "
2636 "PreImportedOutput: {} has been deleted",
id));
2638 return importedTensorHandlePin.m_LayerBindingId;
2640 catch (
const std::out_of_range&)
2642 throw InvalidArgumentException(fmt::format(
"LoadedNetwork::Execute: Unknown ImportedOutputId: {}",
id));