29 #include <common/include/Processes.hpp>
31 #include <fmt/format.h>
42 template <
typename ExceptionType>
43 std::string ToErrorMessage(
const char * prefix,
const ExceptionType & error)
46 ss << prefix <<
" " <<
error.what();
50 void AddLayerStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
52 ProfilingGuid networkGuid)
55 std::string layerName = layer.GetNameStr().empty() ?
"<Unnamed>" : layer.GetNameStr();
56 timelineUtils->CreateNamedTypedChildEntity(layer.GetGuid(),
59 LabelsAndEventClasses::LAYER_GUID);
60 for (
auto&& input : layer.GetInputSlots())
62 const IOutputSlot* source = input.GetConnectedOutputSlot();
64 timelineUtils->CreateConnectionRelationship(ProfilingRelationshipType::RetentionLink,
65 source->GetOwningLayerGuid(),
70 void AddWorkloadStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
71 std::unique_ptr<IWorkload>& workload,
75 timelineUtils->CreateTypedEntity(workload->GetGuid(), LabelsAndEventClasses::WORKLOAD_GUID);
76 timelineUtils->MarkEntityWithLabel(workload->GetGuid(),
77 layer.GetBackendId().Get(),
78 LabelsAndEventClasses::BACKENDID_GUID);
81 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
84 LabelsAndEventClasses::CHILD_GUID);
103 const vector<BackendOptions>::iterator& backendItr =
104 find_if(optimizedOptions.begin(), optimizedOptions.end(), [](
const BackendOptions& backend) {
105 if (backend.GetBackendId().Get() ==
"Global")
114 bool importEnabled =
false;
115 bool exportEnabled =
false;
116 if (backendItr != optimizedOptions.end())
119 for (
size_t i = 0; i < backendItr->GetOptionCount(); i++)
121 const BackendOptions::BackendOption& option = backendItr->GetOption(i);
122 if (option.GetName() ==
"ImportEnabled")
124 importEnabled = option.GetValue().AsBool();
126 if (option.GetName() ==
"ExportEnabled")
128 exportEnabled = option.GetValue().AsBool();
138 auto message = fmt::format(
"The input memory source specified, '{0}',", networkProperties.m_InputSource);
141 message.append(
" requires that memory import be enabled. However, "
142 "it was disabled when this network was optimized.");
146 message.append(
" requires that memory import be disabled. However, "
147 "it was enabled when this network was optimized.");
149 throw InvalidArgumentException(message);
155 auto message = fmt::format(
"The output memory source specified, '{0}',", networkProperties.m_OutputSource);
158 message.append(
" requires that memory export be enabled. However, "
159 "it was disabled when this network was optimized.");
163 message.append(
" requires that memory export be disabled. However, "
164 "it was enabled when this network was optimized.");
166 throw InvalidArgumentException(message);
171 std::string& errorMessage,
173 arm::pipe::IProfilingService* profilingService)
175 std::unique_ptr<LoadedNetwork> loadedNetwork;
177 auto Fail = [&](
const std::exception&
error) -> std::unique_ptr<LoadedNetwork>
179 errorMessage = ToErrorMessage(
"An error occurred when preparing the network workloads: ",
error);
182 return std::unique_ptr<LoadedNetwork>();
187 loadedNetwork.reset(
new LoadedNetwork(std::move(net), networkProperties, profilingService));
197 catch (
const std::runtime_error&
error)
202 return loadedNetwork;
205 LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
207 arm::pipe::IProfilingService* profilingService) :
208 m_OptimizedNetwork(
std::move(net)),
209 m_NetworkProperties(networkProperties),
210 m_TensorHandleFactoryRegistry(),
211 m_ProfilingService(profilingService)
215 const std::shared_ptr<IProfiler>& profiler = m_OptimizedNetwork->GetProfiler();
225 m_NetworkProperties);
232 bool useExternalMemoryManager =
false;
233 bool useInternalMemoryManager =
false;
234 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
241 m_IsInputImported = std::vector<bool>(order.
GetNumInputs(),
false);
242 m_IsOutputImported = std::vector<bool>(order.
GetNumOutputs(),
false);
245 for (
auto&& layer : order)
247 auto const& backendId = layer->GetBackendId();
248 if (m_Backends.count(backendId) == 0)
251 auto it = m_Backends.emplace(std::make_pair(backendId, createBackend()));
253 IBackendInternal* backend = it.first->second.get();
259 backend->GetCapabilities()))
261 std::string er = backend->GetId();
262 er +=
" does not support AsyncExecution";
263 throw BackendCapabilityException(er);
266 backend->GetCapabilities()))
268 std::string er = backend->GetId();
269 er +=
" does not support ExternallyManagedMemory\n";
270 er +=
"AsyncEnabled networks require all backends to support ExternallyManagedMemory";
271 throw BackendCapabilityException(er);
273 m_SupportsExternallyManagedMemory[backend->GetId()] =
true;
274 useExternalMemoryManager =
true;
278 m_SupportsExternallyManagedMemory[backend->GetId()] =
false;
279 useInternalMemoryManager =
true;
283 if (backend->SupportsTensorAllocatorAPI())
285 workloadFactory = backend->CreateWorkloadFactory(
286 m_TensorHandleFactoryRegistry,
287 m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions(),
293 m_BackendMemoryMangers.emplace_back(backend->CreateMemoryManager());
294 workloadFactory = backend->CreateWorkloadFactory(
295 m_BackendMemoryMangers.back(), m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions());
297 m_WorkloadFactories[backendId ] = std::move(workloadFactory);
303 for (
auto&& layer : order)
305 auto& workloadFactory = GetWorkloadFactory(*layer);
306 bool supportsExternalManager = m_SupportsExternallyManagedMemory[layer->GetBackendId()];
308 switch (layer->GetType())
315 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
317 !supportsExternalManager &&
323 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory,
true);
330 if ((layer->GetNumOutputSlots() == 1) &&
331 (layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
332 (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() ==
LayerType::Output))
334 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
336 !supportsExternalManager &&
341 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
343 !supportsExternalManager);
350 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
351 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
352 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
355 timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
357 timelineUtils->RecordEvent(networkGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
359 int processID = arm::pipe::GetCurrentProcessId();
360 std::stringstream ss;
362 timelineUtils->MarkEntityWithLabel(networkGuid, ss.str(), LabelsAndEventClasses::PROCESS_ID_GUID);
365 std::vector<IWorkload*> ConstWorkloads;
370 for (
auto&& layer: order)
375 AddLayerStructure(timelineUtils, *layer, networkGuid);
378 const IWorkloadFactory& workloadFactory = GetWorkloadFactory(*layer);
380 switch (layer->GetType())
390 auto workload = layer->CreateWorkload(workloadFactory);
394 const char*
const layerName =
395 layer->GetNameStr().length() != 0 ? layer->GetName() :
"<Unnamed>";
396 throw InvalidArgumentException(
397 fmt::format(
"No workload created for layer (name: '{0}' type: '{1}') (compute '{2}')",
398 layerName,
static_cast<int>(layer->GetType()), layer->GetBackendId().Get()
405 AddWorkloadStructure(timelineUtils, workload, *layer);
410 if((networkProperties.
m_AsyncEnabled || useExternalMemoryManager) &&
413 m_ConstantTensorHandles[layer->GetGuid()] =
414 layer->GetOutputSlot(0).GetOutputHandler().GetData();
415 m_ConstantWorkloads[layer->GetGuid()] = std::move(workload);
419 m_WorkloadQueue.push_back(std::move(workload));
424 ConstWorkloads.push_back(m_WorkloadQueue.back().get());
428 layer->ReleaseConstantData();
436 if (!networkProperties.
m_AsyncEnabled && m_WorkloadQueue.size() != 0)
438 const int noOfInputs = armnn::numeric_cast<int>(order.GetNumInputs());
442 for (
const BindableLayer* layer: order.GetInputLayers())
444 const auto bindingId = layer->GetBindingId();
446 bool supportsReplacement =
true;
448 for (
const auto inputSlot: layer->GetOutputSlot(0).GetConnections())
450 auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(inputSlot->GetOwningLayer()));
451 workloadIndex -= noOfInputs;
453 m_InputWorkloadSlotPairs[bindingId].emplace_back(WorkloadIndices{
454 armnn::numeric_cast<unsigned int>(workloadIndex), inputSlot->GetSlotIndex()});
459 auto workload = m_WorkloadQueue[m_InputWorkloadSlotPairs[bindingId].back().m_WorkloadIndex].get();
460 supportsReplacement &= workload->SupportsTensorHandleReplacement();
469 ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.
GetFactory(importFactoryId);
471 if (supportsReplacement && importFactory)
473 m_PreImportedInputHandles.emplace_back(
474 bindingId, importFactory->CreateTensorHandle(layer->GetOutputSlot(0).GetTensorInfo(),
false));
478 m_PreImportedInputHandles.emplace_back(bindingId,
nullptr);
484 for (
const BindableLayer* layer: order.GetOutputLayers())
486 const auto bindingId = layer->GetBindingId();
488 const auto outputSlot = layer->GetInputSlot(0).GetConnectedOutputSlot();
489 auto& indices = m_OutputWorkloadSlotPairs[bindingId];
494 auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(outputSlot->GetOwningLayer()));
495 workloadIndex -= noOfInputs;
497 indices.m_OutputSlotIndices = WorkloadIndices{numeric_cast<unsigned int>(workloadIndex),
498 outputSlot->CalculateIndexOnOwner()};
500 bool supportsReplacement =
true;
501 auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
502 supportsReplacement &= outputWorkload->SupportsTensorHandleReplacement();
504 for (
auto &inputSlot: outputSlot->GetConnections())
508 auto inWorkloadIndex = std::distance(order.begin(),
509 order.GetPosInGraph(inputSlot->GetOwningLayer()));
510 inWorkloadIndex -= noOfInputs;
511 indices.m_InputSlotIndices.emplace_back(
512 WorkloadIndices{numeric_cast<unsigned int>(inWorkloadIndex),
513 inputSlot->GetSlotIndex()});
514 auto inputWorkload = m_WorkloadQueue[indices.m_InputSlotIndices.back().m_WorkloadIndex].get();
515 supportsReplacement &= inputWorkload->SupportsTensorHandleReplacement();
523 ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.
GetFactory(importFactoryId);
525 if (supportsReplacement && importFactory)
527 m_PreImportedOutputHandles.emplace_back(
528 bindingId, importFactory->CreateTensorHandle(outputSlot->GetTensorInfo(),
false));
532 m_PreImportedOutputHandles.emplace_back(bindingId,
nullptr);
538 for (
auto&& workloadFactory : m_WorkloadFactories)
540 workloadFactory.second->AfterWorkloadsCreated();
546 timelineUtils->Commit();
549 if (useExternalMemoryManager)
553 CreateMemoryProfileAsync();
557 CreateMemoryProfile();
561 for (
auto& backendMemoryProfile : m_MemBlockMap)
563 const BackendId& backendId = backendMemoryProfile.first;
564 if (backendStrategyMap.find(backendId) != backendStrategyMap.end())
566 m_MemBinMap[backendId] = backendStrategyMap[backendId]->Optimize(backendMemoryProfile.second);
570 m_MemBinMap[backendId] = m_ConstantStrategy->Optimize(backendMemoryProfile.second);
576 m_ExternalMemoryManager = CreateExternalMemoryManger(m_TensorMemory);
579 std::sort(m_TensorMemory.begin(), m_TensorMemory.end(),
580 [](
const std::pair<std::shared_ptr<TensorMemory>,
MemorySource>& lhs,
581 const std::pair<std::shared_ptr<TensorMemory>,
MemorySource>& rhs)
583 return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
592 if (useInternalMemoryManager)
595 m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().AllocateDynamicBuffers();
598 for (
auto &workload : m_WorkloadQueue)
600 workload->PostAllocationConfigure();
604 if (useExternalMemoryManager)
608 AllocateAndExecuteConstantWorkloads();
612 AllocateAndExecuteConstantWorkloadsAsync();
618 for (
auto workload: ConstWorkloads)
625 void LoadedNetwork::AllocateAndExecuteConstantWorkloads()
628 for (
auto& pair : m_ConstantWorkloads)
630 auto tensorHandle = m_ConstantTensorHandles[pair.first];
631 tensorHandle->Allocate();
632 pair.second->Execute();
636 void LoadedNetwork::AllocateAndExecuteConstantWorkloadsAsync()
639 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
640 for (
auto&& layer : order)
644 const auto& outSlot = layer->GetOutputSlots()[0];
645 const auto factoryId = outSlot.GetTensorHandleFactoryId();
647 auto& workloadFactory = GetWorkloadFactory(*layer);
649 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory);
650 ITensorHandle* tensorHandle = outSlot.GetOutputHandler().GetData();
652 m_ConstantTensorHandles[layer->GetGuid()] = tensorHandle;
653 tensorHandle->Allocate();
655 auto& backend = m_Backends.at(layer->GetBackendId());
657 WorkingMemDescriptor memDesc;
658 memDesc.m_Outputs.push_back(tensorHandle);
660 ExecutionData executionData = backend->CreateExecutionData(memDesc);
661 m_ConstantWorkloads[layer->GetGuid()]->ExecuteAsync(executionData);
669 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
670 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
672 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
673 TimelineUtilityMethods::GetTimelineUtils(profilingService);
675 timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
677 for (
auto&& layer : order)
680 AddLayerStructure(timelineUtils, *layer, networkGuid);
681 switch (layer->GetType())
691 for (
auto& workload : m_WorkloadQueue)
694 AddWorkloadStructure(timelineUtils, workload, *layer);
701 timelineUtils->Commit();
706 return m_OptimizedNetwork->GetGuid();
711 for (
auto&& inputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetInputLayers())
713 ARMNN_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1,
"Input layer should have exactly 1 output slot");
714 if (inputLayer->GetBindingId() == layerId)
716 return inputLayer->GetOutputSlot(0).GetTensorInfo();
725 for (
auto&& outputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetOutputLayers())
727 ARMNN_ASSERT_MSG(outputLayer->GetNumInputSlots() == 1,
"Output layer should have exactly 1 input slot");
728 ARMNN_ASSERT_MSG(outputLayer->GetInputSlot(0).GetConnection(),
"Input slot on Output layer must be connected");
729 if (outputLayer->GetBindingId() == layerId)
731 return outputLayer->GetInputSlot(0).GetTensorInfo();
742 auto it = m_WorkloadFactories.find(layer.
GetBackendId());
743 if (it == m_WorkloadFactories.end())
745 throw RuntimeException(fmt::format(
"No workload factory for {0} to be used for layer: {1}",
751 workloadFactory = it->second.get();
755 return *workloadFactory;
764 TensorPin(std::unique_ptr<ITensorHandle> handle,
const TensorInfo& info,
LayerBindingId id)
765 : m_TensorHandle(
std::move(handle))
771 ITensorHandle* GetTensorHandle()
const {
return m_TensorHandle.get(); }
772 const TensorInfo&
GetTensorInfo()
const {
return m_TensorInfo; }
776 std::unique_ptr<ITensorHandle> m_TensorHandle;
777 TensorInfo m_TensorInfo;
782 const std::vector<TensorPin>& pins,
783 char const* bindingPointDesc)
785 auto it = std::find_if(pins.begin(), pins.end(),
786 [
id](
const TensorPin& pin)
788 return pin.GetBindingId() == id;
791 if (it != pins.end())
797 throw InvalidArgumentException(fmt::format(
"No tensor supplied for {0} {1}", bindingPointDesc,
id));
807 m_InputTensorPins.reserve(inputTensors.size());
808 m_OutputTensorPins.reserve(outputTensors.size());
810 for (
auto inputTensorPair : inputTensors)
812 auto inputTensor = inputTensorPair.second;
814 std::unique_ptr<ITensorHandle> tensorHandle =
815 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),inputTensor.GetMemoryArea());
818 m_InputTensorPins.emplace_back(std::move(tensorHandle), inputTensor.GetInfo(), layerId);
821 for (
auto outputTensorPair : outputTensors)
823 auto outputTensor = outputTensorPair.second;
825 std::unique_ptr<ITensorHandle> tensorHandle =
826 std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
829 m_OutputTensorPins.emplace_back(std::move(tensorHandle), outputTensor.GetInfo(), layerId);
835 return GetTensorPin(
id, m_InputTensorPins,
"input");
840 return GetTensorPin(
id, m_OutputTensorPins,
"output");
845 std::vector<TensorPin> m_InputTensorPins;
846 std::vector<TensorPin> m_OutputTensorPins;
853 std::vector<ImportedInputId> preImportedInputIds,
854 std::vector<ImportedOutputId> preImportedOutputIds)
856 const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
861 ARMNN_LOG(
warning) <<
"IRuntime::EnqueueWorkload()::Less than two nodes in graph";
866 WorkloadData workloadData(inputTensors, outputTensors);
870 if (graph.
GetNumInputs() != (inputTensors.size() + preImportedInputIds.size()))
878 m_InputQueue.clear();
881 unsigned int inputIndex = 0;
882 unsigned int importedInputIdIndex = 0;
883 std::sort(preImportedInputIds.begin(), preImportedInputIds.end());
886 if (importedInputIdIndex < preImportedInputIds.size() &&
887 inputIndex == preImportedInputIds[importedInputIdIndex])
890 if (!m_IsInputImported[inputIndex])
892 auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
894 for (
const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
896 auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
897 workload->ReplaceInputTensorHandle(outputTensorHandle, workloadInfo.m_SlotIndex);
899 m_IsInputImported[inputIndex] =
true;
901 importedInputIdIndex++;
905 if (m_IsInputImported[inputIndex])
909 for (
const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
911 auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
912 workload->ReplaceInputTensorHandle(handler.
GetData(), workloadInfo.m_SlotIndex);
915 m_IsInputImported[inputIndex] =
false;
919 const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
920 EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
928 m_OutputQueue.clear();
936 unsigned int outputIndex = 0;
937 unsigned int importedOutputIdIndex = 0;
938 std::sort(preImportedOutputIds.begin(), preImportedOutputIds.end());
941 if (importedOutputIdIndex < preImportedOutputIds.size() &&
942 outputIndex == preImportedOutputIds[importedOutputIdIndex])
945 ITensorHandle* inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
947 if (!m_IsOutputImported[outputIndex])
949 const auto bindingId = outputLayer->GetBindingId();
950 const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
952 auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
954 outputWorkload->ReplaceOutputTensorHandle(inputTensorHandle,
955 indices.m_OutputSlotIndices.m_SlotIndex);
957 for (
const auto& workloadInfo: indices.m_InputSlotIndices)
959 auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
960 inputWorkload->ReplaceInputTensorHandle(inputTensorHandle, workloadInfo.m_SlotIndex);
962 m_IsOutputImported[outputIndex] =
true;
965 ARMNN_ASSERT_MSG(inputTensorHandle !=
nullptr,
"Data should have been allocated.");
967 syncDesc.
m_Inputs.push_back(inputTensorHandle);
969 info.m_InputTensorInfos.push_back(
970 outputLayer->GetInputSlot(0).GetTensorInfo());
971 auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc,
info);
973 m_OutputQueue.push_back(std::move(syncWorkload));
974 importedOutputIdIndex++;
978 if (m_IsOutputImported[outputIndex])
980 const auto bindingId = outputLayer->GetBindingId();
981 const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
983 auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
985 outputLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOutputHandler();
987 outputWorkload->ReplaceOutputTensorHandle(
988 outputHandler.
GetData(), indices.m_OutputSlotIndices.m_SlotIndex);
990 for (
const auto& workloadInfo: indices.m_InputSlotIndices)
992 auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
993 inputWorkload->ReplaceInputTensorHandle(outputHandler.
GetData(), workloadInfo.m_SlotIndex);
995 m_IsOutputImported[outputIndex] =
false;
998 const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
1000 EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
1006 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1007 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1008 ProfilingGuid inferenceGuid = m_ProfilingService->GetNextGuid();
1012 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
1013 timelineUtils->CreateTypedEntity(inferenceGuid, LabelsAndEventClasses::INFERENCE_GUID);
1014 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
1017 LabelsAndEventClasses::EXECUTION_OF_GUID);
1018 timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
1021 bool executionSucceeded =
true;
1024 if (m_ProfilingService->IsProfilingEnabled())
1026 m_ProfilingService->IncrementCounterValue(INFERENCES_RUN);
1030 executionSucceeded =
Execute(timelineUtils, inferenceGuid);
1036 timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
1037 timelineUtils->Commit();
1050 if (tensorHandle ==
nullptr)
1052 throw InvalidArgumentException(
"EnqueueInput: tensorHandle must not be NULL");
1058 inputQueueDescriptor.m_Inputs.push_back(tensorHandle);
1059 info.m_InputTensorInfos.push_back(tensorInfo);
1063 const TensorInfo& outputTensorInfo = handler.
GetTensorInfo();
1064 ITensorHandle* outputTensorHandle = handler.GetData();
1066 "Data should have been allocated.");
1067 inputQueueDescriptor.m_Outputs.push_back(outputTensorHandle);
1068 info.m_OutputTensorInfos.push_back(outputTensorInfo);
1071 bool needMemCopy =
true;
1076 needMemCopy =
false;
1078 void* mem = tensorHandle->
Map(
false);
1079 if (outputTensorHandle->Import(mem, m_NetworkProperties.
m_InputSource))
1081 tensorHandle->
Unmap();
1084 tensorHandle->
Unmap();
1085 throw MemoryImportException(
"EnqueueInput: Memory Import failed");
1091 std::unique_ptr<IWorkload> inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor, info);
1095 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1096 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1100 AddWorkloadStructure(timelineUtils, inputWorkload, layer);
1101 timelineUtils->Commit();
1104 m_InputQueue.push_back(std::move(inputWorkload));
1108 void LoadedNetwork::EnqueueOutput(
const BindableLayer& layer, ITensorHandle* tensorHandle,
const TensorInfo& tensorInfo)
1112 throw InvalidArgumentException(
"EnqueueOutput: given layer not an OutputLayer");
1115 if (tensorHandle ==
nullptr)
1117 throw InvalidArgumentException(
"EnqueueOutput: tensorHandle must not be NULL");
1123 outputQueueDescriptor.m_Outputs.push_back(tensorHandle);
1124 info.m_OutputTensorInfos.push_back(tensorInfo);
1126 ARMNN_ASSERT_MSG(layer.GetNumInputSlots() == 1,
"Output Layer should have exactly one input.");
1129 const OutputHandler& outputHandler = layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler();
1131 const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo();
1132 ITensorHandle* inputTensorHandle = outputHandler.GetData();
1133 ARMNN_ASSERT_MSG(inputTensorHandle !=
nullptr,
"Data should have been allocated.");
1142 bool needMemCopy =
true;
1144 (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
1146 if(layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() !=
LayerType::Input)
1151 needMemCopy =
false;
1152 void *mem = tensorHandle->Map(
false);
1153 bool importOk = inputTensorHandle->Import(mem, m_NetworkProperties.
m_OutputSource);
1154 tensorHandle->Unmap();
1159 MemSyncQueueDescriptor syncDesc;
1160 syncDesc.m_Inputs.push_back(inputTensorHandle);
1161 info.m_InputTensorInfos.push_back(inputTensorInfo);
1162 auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
1164 m_OutputQueue.push_back(std::move(syncWorkload));
1168 throw MemoryExportException(
"EnqueueOutput: Memory Export failed");
1176 outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
1177 info.m_InputTensorInfos.push_back(inputTensorInfo);
1179 std::unique_ptr<IWorkload> outputWorkload =
1180 std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor, info);
1183 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1184 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1188 AddWorkloadStructure(timelineUtils, outputWorkload, layer);
1189 timelineUtils->Commit();
1192 m_OutputQueue.push_back(std::move(outputWorkload));
1196 void LoadedNetwork::AllocateWorkingMemory(
1197 #
if !defined(ARMNN_DISABLE_THREADS)
1198 std::lock_guard<std::mutex>& lock
1204 #if !defined(ARMNN_DISABLE_THREADS)
1208 if (m_IsWorkingMemAllocated)
1213 if (m_ExternalMemoryManager)
1215 m_ExternalMemoryManager->Allocate();
1217 for (
unsigned int i = 0; i < m_TensorMemory.size(); ++i)
1219 m_Tensorhandles[i]->Import(m_TensorMemory[i].first->m_Data, m_TensorMemory[i].second);
1223 for (
auto&& memoryManager : m_BackendMemoryMangers)
1227 memoryManager->Acquire();
1231 m_IsWorkingMemAllocated =
true;
1236 #if !defined(ARMNN_DISABLE_THREADS)
1237 std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1240 if (!m_IsWorkingMemAllocated)
1245 if (m_ExternalMemoryManager)
1247 m_ExternalMemoryManager->Deallocate();
1251 for (
auto&& memoryManager : m_BackendMemoryMangers)
1255 memoryManager->Release();
1259 m_IsWorkingMemAllocated =
false;
1263 ProfilingGuid inferenceGuid)
1265 bool success =
true;
1267 auto Fail = [&](
const std::exception&
error)
1269 ARMNN_LOG(error) <<
"An error occurred attempting to execute a workload: " <<
error.what();
1275 #if !defined(ARMNN_DISABLE_THREADS)
1276 std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1277 AllocateWorkingMemory(lockGuard);
1279 AllocateWorkingMemory();
1282 ProfilingDynamicGuid workloadInferenceID(0);
1283 auto ExecuteQueue = [&timelineUtils, &workloadInferenceID, &inferenceGuid](
WorkloadQueue& queue)
1285 for (
auto& workload : queue)
1289 workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1292 workload->Execute();
1295 timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1300 ExecuteQueue(m_InputQueue);
1301 ExecuteQueue(m_WorkloadQueue);
1302 ExecuteQueue(m_OutputQueue);
1304 catch (
const RuntimeException& error)
1308 catch (
const std::runtime_error& error)
1316 void LoadedNetwork::EnqueueInput(
const ConstTensor& inputTensor, ITensorHandle* inputTensorHandle)
1323 std::unique_ptr<ITensorHandle> tensorHandle =
1324 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),
1325 inputTensor.GetMemoryArea());
1326 void* mem = tensorHandle->Map(
false);
1328 if (inputTensorHandle->Import(mem, m_NetworkProperties.
m_InputSource))
1330 tensorHandle->Unmap();
1333 tensorHandle->Unmap();
1334 throw MemoryImportException(
"EnqueueInput: Memory Import failed");
1338 throw MemoryImportException(
"EnqueueInput: Memory Import failed, backend does not support Import");
1344 std::unique_ptr<ITensorHandle> tensorHandle =
1345 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(), inputTensor.GetMemoryArea());
1347 auto copyFunc = [](
void* dst,
const void* src,
size_t size)
1349 memcpy(dst, src, size);
1362 void LoadedNetwork::ImportOutputTensor(
const Tensor& outputTensor, ITensorHandle* outputTensorHandle)
1364 ARMNN_ASSERT_MSG(outputTensorHandle !=
nullptr,
"Data should have been allocated.");
1368 std::unique_ptr<ITensorHandle> tensorHandle =
1369 std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
1370 outputTensor.GetMemoryArea());
1372 void* mem = tensorHandle->Map(
false);
1373 bool importOk = outputTensorHandle->Import(mem, m_NetworkProperties.
m_OutputSource);
1374 tensorHandle->Unmap();
1378 throw MemoryExportException(
"ImportOutputTensor: Memory Export failed");
1383 throw MemoryExportException(
"ImportOutputTensor: Memory Export failed, attempting to export Input Layer");
1391 auto copyFunc = [](
void* dst,
const void* src,
size_t size)
1393 memcpy(dst, src, size);
1396 std::unique_ptr<ITensorHandle> tensorHandle =
1397 std::make_unique<PassthroughTensorHandle>(outputTensor.
GetInfo(),
1406 for (
auto inputTensorPair : inputTensors)
1411 return inputTensorPair.second;
1419 for (
auto outputTensorPair : outputTensors)
1424 return outputTensorPair.second;
1438 throw MemoryImportException(
"ImportInputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1441 if (inputTensors.size() > m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumInputs())
1443 throw MemoryImportException(
"ImportInputs: The number of tensors provided exceeds the number of inputs.");
1446 std::vector<ImportedInputId> importedInputs;
1447 Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1448 unsigned int inputIndex = 0;
1451 auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
1453 if (!outputTensorHandle)
1459 auto layerBindingId = inputLayer->GetBindingId();
1460 auto it = std::find_if(inputTensors.begin(), inputTensors.end(), [=](
const auto& inputTensor)
1462 return inputTensor.first == layerBindingId;
1465 if (it == inputTensors.end())
1471 const auto& inputTensor = *it;
1472 std::unique_ptr<ITensorHandle> passThroughTensorHandle =
1473 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.GetInfo(),
1474 inputTensor.second.GetMemoryArea());
1478 if (outputTensorHandle->CanBeImported(passThroughTensorHandle->Map(), forceImportMemorySource)
1479 && (outputTensorHandle->Import(passThroughTensorHandle->Map(), forceImportMemorySource)))
1481 importedInputs.push_back(inputIndex);
1483 passThroughTensorHandle->Unmap();
1487 ARMNN_LOG(
error) <<
"An error occurred attempting to import input_"
1488 << inputIndex <<
" : " << exception.
what();
1489 passThroughTensorHandle->Unmap();
1494 return importedInputs;
1499 std::vector<ImportedInputId> importedInputs;
1500 Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1502 for (
auto inputTensor : inputTensors)
1504 auto layerBindingId = inputTensor.first;
1507 return layer->GetBindingId() == layerBindingId;
1513 "ImportInputs: Memory Import failed, unknown LayerBindingId: {}", layerBindingId));
1516 const Layer* layer = *it;
1524 backend->GetCapabilities()))
1526 std::string er = backend->GetId();
1527 er +=
" does not have PreImportIOTensors capability";
1539 ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
1542 ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();
1547 fmt::format(
"ImportInputs: Memory Import failed, backend: "
1548 "{} does not support importing from source {}"
1552 std::unique_ptr<ITensorHandle> passThroughTensorHandle =
1553 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.GetInfo(),
1554 inputTensor.second.GetMemoryArea());
1556 if (tensorHandle->
Import(passThroughTensorHandle->Map(), forceImportMemorySource))
1558 importedInputs.push_back(m_CurImportedInputId++);
1559 passThroughTensorHandle->Unmap();
1563 passThroughTensorHandle->Unmap();
1567 m_PreImportedInputHandles.push_back(std::move(importedTensorHandlePin));
1569 return importedInputs;
1581 throw MemoryImportException(
"ImportOutputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1584 if (outputTensors.size() != m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumOutputs())
1588 std::vector<ImportedOutputId> importedOutputs;
1589 Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1591 unsigned int outputIndex = 0;
1594 auto inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
1595 if (!inputTensorHandle)
1601 auto layerBindingId = outputLayer->GetBindingId();
1602 auto it = std::find_if(outputTensors.begin(), outputTensors.end(), [=] (
const auto& outputTensor)
1604 return outputTensor.first == layerBindingId;
1607 if (it == outputTensors.end())
1613 const auto outputTensor = *it;
1617 if (inputTensorHandle->CanBeImported(outputTensor.second.GetMemoryArea(), forceImportMemorySource)
1618 && inputTensorHandle->Import(outputTensor.second.GetMemoryArea(), forceImportMemorySource))
1620 importedOutputs.push_back(outputIndex);
1625 ARMNN_LOG(
error) <<
"An error occurred attempting to import output_"
1626 << outputIndex <<
" : " << exception.
what();
1630 return importedOutputs;
1633 std::vector<ImportedOutputId> importedOutputs;
1634 Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1636 for (
const auto& outputTensor : outputTensors)
1638 auto layerBindingId = outputTensor.first;
1641 return layer->GetBindingId() == layerBindingId;
1646 throw MemoryImportException(fmt::format(
"ImportOutputs: Memory Import failed, unknown LayerBindingId: {}",
1650 const Layer* layer = *it;
1658 backend->GetCapabilities()))
1660 std::string er = backend->GetId();
1661 er +=
" does not have PreImportIOTensors capability";
1672 ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
1675 ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();
1680 "{} does not support importing from source {}"
1681 , factoryId, forceImportMemorySource));
1684 if (tensorHandle->
Import(outputTensor.second.GetMemoryArea(), forceImportMemorySource))
1686 importedOutputs.push_back(m_CurImportedOutputId++);
1693 m_PreImportedOutputHandles.push_back(std::move(importedTensorHandlePin));
1696 return importedOutputs;
1701 for (
auto id : inputIds)
1703 if (
id > m_PreImportedInputHandles.size())
1708 auto& importedTensorHandle = m_PreImportedInputHandles[id].m_TensorHandle;
1709 if (!importedTensorHandle)
1712 fmt::format(
"ClearImportedInputs::ImportedInput with id: {} has already been deleted",
id));
1715 importedTensorHandle->Unimport();
1716 importedTensorHandle = {};
1722 for (
auto id : outputIds)
1724 if (
id > m_PreImportedOutputHandles.size())
1729 auto& importedTensorHandle = m_PreImportedOutputHandles[id].m_TensorHandle;
1730 if (!importedTensorHandle)
1733 fmt::format(
"ClearImportedOutputs::ImportedOutput with id: {} has already been deleted",
id));
1736 importedTensorHandle->Unimport();
1737 importedTensorHandle = {};
1744 std::vector<ImportedInputId> preImportedInputs,
1745 std::vector<ImportedOutputId> preImportedOutputs)
1747 const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
1749 if (inputTensors.size() + preImportedInputs.size() != graph.
GetNumInputs())
1751 if (preImportedInputs.empty())
1758 "Number of inputs + preImportedInputs provided does not match network.");
1762 if (outputTensors.size() + preImportedOutputs.size() != graph.
GetNumOutputs())
1764 if (preImportedOutputs.empty())
1767 "Number of outputs provided does not match network.");
1772 "Number of outputs + preImportedOutputs provided does not match network.");
1779 unsigned int index = 0;
1780 for (
auto pair : inputTensors)
1782 bindingIds[index++] = pair.first;
1786 bindingIds[index++] = ValidateImportedInputID(
id);
1788 for (
auto pair : outputTensors)
1790 bindingIds[index++] = pair.first;
1794 bindingIds[index++] = ValidateImportedOutputID(
id);
1799 auto resetMemHandle = [&]()
1803 const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId;
1805 auto inputHandle = workingMemHandle.
GetInputHandle(layerBindingId);
1807 for (
auto it : inputConnections)
1815 const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId;
1820 for (
auto it : outputConnections)
1827 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1828 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1829 ProfilingGuid inferenceGuid = m_ProfilingService->GetNextGuid();
1833 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
1834 timelineUtils->CreateTypedEntity(inferenceGuid,LabelsAndEventClasses::INFERENCE_GUID);
1835 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
1838 LabelsAndEventClasses::EXECUTION_OF_GUID);
1839 timelineUtils->RecordEvent(inferenceGuid,LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
1842 bool executionSucceeded =
true;
1847 timelineUtils->RecordEvent(inferenceGuid,LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
1848 timelineUtils->Commit();
1858 for (
auto pair : inputTensors)
1860 EnqueueInput(pair.second, workingMemHandle.
GetInputHandle(pair.first));
1866 const ImportedTensorHandlePin& importedInputPin = m_PreImportedInputHandles[id];
1867 const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId;
1868 const auto& preimportedHandle = importedInputPin.m_TensorHandle;
1871 for (
auto it : inputConnections)
1873 *it = preimportedHandle.get();
1881 for (
auto pair: outputTensors)
1883 ImportOutputTensor(pair.second, workingMemHandle.
GetOutputHandle(pair.first));
1889 const ImportedTensorHandlePin& importedOutputPin = m_PreImportedOutputHandles[id];
1890 const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId;
1891 const auto& preimportedHandle = importedOutputPin.m_TensorHandle;
1894 for (
auto it : outputConnections)
1896 *it = preimportedHandle.get();
1901 auto Fail = [&](
const std::exception&
error)
1903 ARMNN_LOG(
error) <<
"An error occurred attempting to execute a workload: " <<
error.what();
1904 executionSucceeded =
false;
1906 ProfilingDynamicGuid workloadInferenceID(0);
1910 for (
unsigned int i = 0; i < m_WorkloadQueue.size(); ++i)
1912 auto& workload = m_WorkloadQueue[i];
1915 workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1923 timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1932 catch (
const std::runtime_error&
error)
1945 for (
auto pair: outputTensors)
1965 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
1968 std::vector<std::unique_ptr<ITensorHandle>> managedTensorHandles;
1970 std::vector<std::unique_ptr<ITensorHandle>> unmanagedTensorHandles;
1972 std::vector<WorkingMemDescriptor> workingMemDescriptors;
1973 std::vector<std::pair<BackendId, ExecutionData>> executionDataVec;
1975 auto GetTensorHandle = [&](
Layer* layer,
const OutputSlot& outputSlot)
1978 const TensorInfo& tensorInfo = outputSlot.GetTensorInfo();
1984 return m_WorkloadFactories.at(
id)->CreateTensorHandle(tensorInfo,
false);
1999 bool m_IsInputLayerHandle =
false;
2000 bool m_IsOutputLayerHandle =
false;
2006 std::unordered_map<const OutputSlot*, HandleInfo> outputToHandleInfoMap;
2008 unsigned int layerIndex = 0;
2009 for (
auto&& layer : order)
2019 bool isMemoryManaged =
true;
2020 bool isInputLayer =
false;
2021 bool isOutputLayer =
false;
2022 bool isConnectedToOutputLayer =
false;
2028 isInputLayer =
true;
2033 isOutputLayer =
true;
2036 unsigned int slotIndex = 0;
2041 for (
unsigned int i = 0; i < slot.GetNumConnections(); ++i)
2045 if (!isConnectedToOutputLayer)
2047 isConnectedToOutputLayer =
true;
2055 fmt::format(
"Layer name: '{0}' guid: '{1}' has two or more OutputLayers connected to it. "
2056 "This will prevent importing on the connected OutputLayers.",
2058 isMemoryManaged =
true;
2064 if (isMemoryManaged)
2066 managedTensorHandles.emplace_back(GetTensorHandle(layer, slot));
2067 tensorHandle = managedTensorHandles.back().get();
2071 unmanagedTensorHandles.emplace_back(GetTensorHandle(layer, slot));
2072 tensorHandle = unmanagedTensorHandles.back().get();
2075 workingMemDescriptor.
m_Outputs.push_back(tensorHandle);
2077 HandleInfo& handleInfo = outputToHandleInfoMap[&slot];
2078 handleInfo.m_TensorHandle = tensorHandle;
2081 if (isConnectedToOutputLayer)
2083 handleInfo.m_IsOutputLayerHandle =
true;
2084 handleInfo.m_OutputMemDescriptorCoords.m_OutputSlotCoords = {layerIndex, slotIndex};
2089 handleInfo.m_IsInputLayerHandle =
true;
2091 handleInfo.m_InputMemDescriptorCoords.m_LayerBindingId = bindingId;
2102 auto outputSlot = slot.GetConnectedOutputSlot();
2103 auto key = outputSlot->GetOwningLayer().GetGuid();
2106 auto found = m_ConstantTensorHandles.find(key);
2107 if (found != m_ConstantTensorHandles.end())
2110 if (slot.IsTensorInfoOverridden())
2115 tensorHandle = decorated;
2118 workingMemDescriptor.
m_Inputs.push_back(tensorHandle);
2126 HandleInfo& handleInfo = outputToHandleInfoMap[outputSlot];
2127 handleInfo.m_TensorHandle = tensorHandle;
2128 handleInfo.m_IsOutputLayerHandle =
true;
2129 handleInfo.m_OutputMemDescriptorCoords.m_LayerBindingIds.push_back(bindingId);
2130 handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, 0});
2135 HandleInfo& handleInfo = outputToHandleInfoMap.at(outputSlot);
2137 ITensorHandle* inputTensorHandle = handleInfo.m_TensorHandle;
2138 if (slot.IsTensorInfoOverridden())
2143 inputTensorHandle = decorated;
2146 workingMemDescriptor.
m_Inputs.push_back(inputTensorHandle);
2152 handleInfo.m_OutputMemDescriptorCoords.m_LayerBindingIds.push_back(bindingId);
2153 handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, 0});
2157 else if (handleInfo.m_IsOutputLayerHandle)
2159 handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, slot.GetSlotIndex()});
2164 if (handleInfo.m_IsInputLayerHandle)
2166 std::pair<LayerGuid, unsigned int> connectionLocation{layerIndex, slot.GetSlotIndex()};
2167 handleInfo.m_InputMemDescriptorCoords.m_InputSlotCoords.emplace_back(connectionLocation);
2177 std::pair<BackendId, ExecutionData> dataPair;
2180 executionDataVec.push_back(dataPair);
2181 workingMemDescriptors.push_back(workingMemDescriptor);
2187 std::vector<std::pair<std::shared_ptr<TensorMemory>,
MemorySource>> tensorMemory;
2189 auto externalMemoryManager = CreateExternalMemoryManger(tensorMemory);
2192 std::sort(tensorMemory.begin(), tensorMemory.end(),
2193 [](
const std::pair<std::shared_ptr<TensorMemory>,
MemorySource>& lhs,
2194 const std::pair<std::shared_ptr<TensorMemory>,
MemorySource>& rhs)
2196 return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
2199 std::vector<WorkingMemHandle::InputMemDescriptorCoords> inputConnectionsInfo;
2200 std::vector<WorkingMemHandle::OutputMemDescriptorCoords> outputConnectionsInfo;
2202 for (
const auto& handleInfo: outputToHandleInfoMap)
2204 if (handleInfo.second.m_IsOutputLayerHandle)
2206 outputConnectionsInfo.emplace_back(handleInfo.second.m_OutputMemDescriptorCoords);
2209 if (handleInfo.second.m_IsInputLayerHandle)
2211 inputConnectionsInfo.emplace_back(handleInfo.second.m_InputMemDescriptorCoords);
2215 return std::make_unique<WorkingMemHandle>(networkId,
2216 inputConnectionsInfo,
2217 outputConnectionsInfo,
2218 workingMemDescriptors,
2219 std::move(externalMemoryManager),
2220 std::move(tensorMemory),
2221 std::move(managedTensorHandles),
2222 std::move(unmanagedTensorHandles),
2229 for (
auto&& workloadPtr: m_WorkloadQueue)
2231 workloadPtr.get()->RegisterDebugCallback(func);
2236 void LoadedNetwork::CreateMemoryProfileAsync()
2240 unsigned int m_StartOfLife;
2241 unsigned int m_Lifetime;
2244 unsigned int m_Index;
2249 auto align = [](
size_t numToAlign)
2251 const size_t alignment =
sizeof(float);
2252 return ((numToAlign + alignment - 1) / alignment) * alignment;
2255 std::unordered_map<const OutputSlot*, PartialBlock> memBlockTrackerMap;
2260 unsigned int timestep = 0;
2261 unsigned int outputIndex = 0;
2262 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
2264 for (
auto&& layer : order)
2266 const LayerType& layerType = layer->GetType();
2274 && layer->GetInputSlot(0).GetConnectedOutputSlot()->GetNumConnections() == 1)
2285 BackendId backendId = layer->GetBackendId();
2286 for (
auto& outputSlot : layer->GetOutputSlots())
2288 if (!m_SupportsExternallyManagedMemory[backendId])
2293 PartialBlock partialBlock;
2295 partialBlock.m_StartOfLife = timestep;
2297 size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
2298 partialBlock.m_MemSize = alignedSize;
2299 partialBlock.m_Index = outputIndex++;
2300 partialBlock.m_Lifetime = outputSlot.GetNumConnections();
2301 partialBlock.m_BackendId = backendId;
2303 if (partialBlock.m_Lifetime == 0)
2305 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2306 partialBlock.m_StartOfLife,
2307 partialBlock.m_MemSize,
2309 partialBlock.m_Index);
2313 memBlockTrackerMap[&outputSlot] = partialBlock;
2317 for (
auto& inputSlot : layer->GetInputSlots())
2319 const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
2320 const LayerType& owningLayerType = connectedInputLayer.GetType();
2331 auto outputSlot = inputSlot.GetConnectedOutputSlot();
2333 PartialBlock& partialBlock = memBlockTrackerMap.at(outputSlot);
2335 auto& lifetime = partialBlock.m_Lifetime;
2340 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2342 partialBlock.m_MemSize,
2344 partialBlock.m_Index);
2351 void LoadedNetwork::CreateMemoryProfile()
2355 auto TraceSubTensorHandleAncestry = [](ITensorHandle*
const subTensorHandle)
2357 ITensorHandle* ancestor = subTensorHandle;
2358 while (ancestor && ancestor->GetParent())
2360 ancestor = ancestor->GetParent();
2367 unsigned int m_StartOfLife;
2368 unsigned int m_Lifetime;
2371 unsigned int m_Index;
2373 BackendId m_BackendId;
2376 auto align = [](
size_t numToAlign)
2378 const size_t alignment =
sizeof(float);
2379 return ((numToAlign + alignment - 1) / alignment) * alignment;
2382 std::unordered_map<ITensorHandle*, PartialBlock> memBlockTrackerMap;
2387 unsigned int timestep = 0;
2388 unsigned int outputIndex = 0;
2389 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
2391 for (
auto&& layer : order)
2393 const LayerType& layerType = layer->GetType();
2401 && layer->GetInputSlot(0).GetConnectedOutputSlot()->GetNumConnections() == 1)
2412 BackendId backendId = layer->GetBackendId();
2413 for (
auto& outputSlot : layer->GetOutputSlots())
2415 if (!m_SupportsExternallyManagedMemory[backendId])
2420 ITensorHandle* tensorHandle = outputSlot.GetOutputHandler().GetData();
2421 tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
2423 if (memBlockTrackerMap.find(tensorHandle) == memBlockTrackerMap.end())
2425 PartialBlock partialBlock;
2427 partialBlock.m_StartOfLife = timestep;
2429 size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
2430 partialBlock.m_MemSize = alignedSize;
2431 partialBlock.m_Index = outputIndex++;
2432 partialBlock.m_Lifetime = outputSlot.GetNumConnections();
2433 partialBlock.m_BackendId = backendId;
2435 if (partialBlock.m_Lifetime == 0)
2437 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2438 partialBlock.m_StartOfLife,
2439 partialBlock.m_MemSize,
2441 partialBlock.m_Index);
2445 memBlockTrackerMap[tensorHandle] = partialBlock;
2447 m_Tensorhandles.push_back(tensorHandle);
2452 memBlockTrackerMap.at(tensorHandle).m_Lifetime += outputSlot.GetNumConnections();
2456 for (
auto& inputSlot : layer->GetInputSlots())
2458 const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
2459 const LayerType& owningLayerType = connectedInputLayer.GetType();
2469 if (!m_SupportsExternallyManagedMemory[connectedInputLayer.GetBackendId()])
2474 auto outputSlot = inputSlot.GetConnectedOutputSlot();
2476 ITensorHandle* tensorHandle = outputSlot->GetOutputHandler().GetData();
2477 tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
2479 PartialBlock& partialBlock = memBlockTrackerMap.at(tensorHandle);
2481 auto& lifetime = partialBlock.m_Lifetime;
2486 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2488 partialBlock.m_MemSize,
2490 partialBlock.m_Index);
2498 std::unique_ptr<MemoryManager> LoadedNetwork::CreateExternalMemoryManger(
2499 std::vector<std::pair<std::shared_ptr<TensorMemory>,
MemorySource>>& tensorMemoryVec)
2501 std::unique_ptr<MemoryManager> memoryManager = std::make_unique<MemoryManager>();
2504 for (
auto& backend : m_MemBinMap)
2506 std::vector<BufferStorage> bufferStorageVec;
2508 std::shared_ptr<ICustomAllocator> backendAllocator;
2509 if (allocatorMap.find(backend.first) != allocatorMap.end())
2511 backendAllocator = allocatorMap[backend.first];
2515 backendAllocator = m_Backends[backend.first]->GetDefaultAllocator();
2518 for (
auto& memBin : backend.second)
2520 BufferStorage bufferStorage;
2521 bufferStorage.m_BufferSize = memBin.m_MemSize;
2522 bufferStorage.m_TensorMemoryVector.reserve(memBin.m_MemBlocks.size());
2524 for (
auto& memBlock : memBin.m_MemBlocks)
2526 auto tensorMemory = std::make_shared<TensorMemory>(TensorMemory{memBlock.m_Offset, memBlock.m_Index});
2528 tensorMemoryVec.emplace_back(tensorMemory, backendAllocator->GetMemorySourceType());
2529 bufferStorage.m_TensorMemoryVector.emplace_back(tensorMemory);
2532 bufferStorageVec.emplace_back(std::move(bufferStorage));
2535 memoryManager->StoreMemToAllocate(bufferStorageVec, backendAllocator, 4);
2538 return memoryManager;
2545 const auto& importedTensorHandlePin = m_PreImportedInputHandles.at(
id);
2546 if (!importedTensorHandlePin.m_TensorHandle)
2548 throw InvalidArgumentException(fmt::format(
"LoadedNetwork::Execute:"
2549 "PreImportedInput: {} has been deleted",
id));
2551 return importedTensorHandlePin.m_LayerBindingId;
2553 catch (
const std::out_of_range&)
2555 throw InvalidArgumentException(fmt::format(
"LoadedNetwork::Execute: Unknown ImportedInputId: {}",
id));
2563 const auto& importedTensorHandlePin = m_PreImportedOutputHandles.at(
id);
2564 if (!importedTensorHandlePin.m_TensorHandle)
2566 throw InvalidArgumentException(fmt::format(
"LoadedNetwork::Execute: "
2567 "PreImportedOutput: {} has been deleted",
id));
2569 return importedTensorHandlePin.m_LayerBindingId;
2571 catch (
const std::out_of_range&)
2573 throw InvalidArgumentException(fmt::format(
"LoadedNetwork::Execute: Unknown ImportedOutputId: {}",
id));