26 #include <common/include/Processes.hpp>
28 #include <fmt/format.h>
39 template <
typename ExceptionType>
40 std::string ToErrorMessage(
const char * prefix,
const ExceptionType & error)
43 ss << prefix <<
" " <<
error.what();
47 void AddLayerStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
49 ProfilingGuid networkGuid)
52 std::string layerName = layer.GetNameStr().empty() ?
"<Unnamed>" : layer.GetNameStr();
53 timelineUtils->CreateNamedTypedChildEntity(layer.GetGuid(),
56 LabelsAndEventClasses::LAYER_GUID);
57 for (
auto&& input : layer.GetInputSlots())
59 const IOutputSlot* source = input.GetConnectedOutputSlot();
64 timelineUtils->CreateConnectionRelationship(ProfilingRelationshipType::RetentionLink,
65 source->GetOwningLayerGuid(),
70 void AddWorkloadStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
71 std::unique_ptr<IWorkload>& workload,
75 timelineUtils->CreateTypedEntity(workload->GetGuid(), LabelsAndEventClasses::WORKLOAD_GUID);
76 timelineUtils->MarkEntityWithLabel(workload->GetGuid(),
77 layer.GetBackendId().Get(),
78 LabelsAndEventClasses::BACKENDID_GUID);
81 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
84 LabelsAndEventClasses::CHILD_GUID);
103 const vector<BackendOptions>::iterator& backendItr =
104 find_if(optimizedOptions.begin(), optimizedOptions.end(), [](
const BackendOptions& backend) {
105 if (backend.GetBackendId().Get() ==
"Global")
114 bool importEnabled =
false;
115 bool exportEnabled =
false;
116 if (backendItr != optimizedOptions.end())
119 for (
size_t i = 0; i < backendItr->GetOptionCount(); i++)
121 const BackendOptions::BackendOption& option = backendItr->GetOption(i);
122 if (option.GetName() ==
"ImportEnabled")
124 importEnabled = option.GetValue().AsBool();
126 if (option.GetName() ==
"ExportEnabled")
128 exportEnabled = option.GetValue().AsBool();
138 auto message = fmt::format(
"The input memory source specified, '{0}',", networkProperties.m_InputSource);
141 message.append(
" requires that memory import be enabled. However, "
142 "it was disabled when this network was optimized.");
146 message.append(
" requires that memory import be disabled. However, "
147 "it was enabled when this network was optimized.");
149 throw InvalidArgumentException(message);
155 auto message = fmt::format(
"The output memory source specified, '{0}',", networkProperties.m_OutputSource);
158 message.append(
" requires that memory export be enabled. However, "
159 "it was disabled when this network was optimized.");
163 message.append(
" requires that memory export be disabled. However, "
164 "it was enabled when this network was optimized.");
166 throw InvalidArgumentException(message);
171 std::string& errorMessage,
173 arm::pipe::IProfilingService* profilingService)
175 std::unique_ptr<LoadedNetwork> loadedNetwork;
177 auto Fail = [&](
const std::exception&
error) -> std::unique_ptr<LoadedNetwork>
179 errorMessage = ToErrorMessage(
"An error occurred when preparing the network workloads: ",
error);
182 return std::unique_ptr<LoadedNetwork>();
187 loadedNetwork.reset(
new LoadedNetwork(std::move(net), networkProperties, profilingService));
197 catch (
const std::runtime_error&
error)
202 return loadedNetwork;
205 LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
207 arm::pipe::IProfilingService* profilingService) :
208 m_OptimizedNetwork(
std::move(net)),
209 m_NetworkProperties(networkProperties),
210 m_TensorHandleFactoryRegistry(),
211 m_ProfilingService(profilingService)
215 const std::shared_ptr<IProfiler>& profiler = m_OptimizedNetwork->GetProfiler();
225 m_NetworkProperties);
232 bool useExternalMemoryManager =
false;
233 bool useInternalMemoryManager =
false;
234 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
239 m_IsInputImported = std::vector<bool>(order.
GetNumInputs(),
false);
240 m_IsOutputImported = std::vector<bool>(order.
GetNumOutputs(),
false);
242 for (
auto&& layer : order)
244 auto const& backendId = layer->GetBackendId();
245 if (m_Backends.count(backendId) == 0)
248 auto it = m_Backends.emplace(std::make_pair(backendId, createBackend()));
252 m_SupportsExternallyManagedMemory[backend->
GetId()] =
false;
253 useInternalMemoryManager =
true;
259 m_SupportsExternallyManagedMemory[backend->
GetId()] =
true;
260 useExternalMemoryManager =
true;
261 useInternalMemoryManager =
false;
268 m_TensorHandleFactoryRegistry,
269 m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions(),
277 m_BackendMemoryMangers.back(), m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions());
279 m_WorkloadFactories[backendId ] = std::move(workloadFactory);
283 for (
auto&& layer : order)
285 auto& workloadFactory = GetWorkloadFactory(*layer);
286 bool supportsExternalManager = m_SupportsExternallyManagedMemory[layer->GetBackendId()];
288 switch (layer->GetType())
295 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
297 !supportsExternalManager &&
303 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory,
true);
310 if ((layer->GetNumOutputSlots() == 1) &&
311 (layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
312 (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() ==
LayerType::Output))
314 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
316 !supportsExternalManager &&
321 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
323 !supportsExternalManager);
329 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
330 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
331 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
334 timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
336 timelineUtils->RecordEvent(networkGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
338 int processID = arm::pipe::GetCurrentProcessId();
339 std::stringstream ss;
341 timelineUtils->MarkEntityWithLabel(networkGuid, ss.str(), LabelsAndEventClasses::PROCESS_ID_GUID);
344 std::vector<IWorkload*> ConstWorkloads;
349 for (
auto&& layer: order)
354 AddLayerStructure(timelineUtils, *layer, networkGuid);
357 const IWorkloadFactory& workloadFactory = GetWorkloadFactory(*layer);
359 switch (layer->GetType())
369 auto workload = layer->CreateWorkload(workloadFactory);
373 const char*
const layerName =
374 layer->GetNameStr().length() != 0 ? layer->GetName() :
"<Unnamed>";
375 throw InvalidArgumentException(
376 fmt::format(
"No workload created for layer (name: '{0}' type: '{1}') (compute '{2}')",
377 layerName,
static_cast<int>(layer->GetType()), layer->GetBackendId().Get()
384 AddWorkloadStructure(timelineUtils, workload, *layer);
387 m_WorkloadQueue.emplace_back(std::move(workload));
392 ConstWorkloads.emplace_back(m_WorkloadQueue.back().get());
396 layer->ReleaseConstantData();
404 if (m_WorkloadQueue.size() != 0)
406 const int noOfInputs = armnn::numeric_cast<int>(order.GetNumInputs());
410 for (
const BindableLayer* layer: order.GetInputLayers())
412 const auto bindingId = layer->GetBindingId();
414 bool supportsReplacement =
true;
416 for (
const auto inputSlot: layer->GetOutputSlot(0).GetConnections())
418 auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(inputSlot->GetOwningLayer()));
419 workloadIndex -= noOfInputs;
421 m_InputWorkloadSlotPairs[bindingId].emplace_back(WorkloadIndices{
422 armnn::numeric_cast<unsigned int>(workloadIndex), inputSlot->GetSlotIndex()});
427 auto workload = m_WorkloadQueue[m_InputWorkloadSlotPairs[bindingId].back().m_WorkloadIndex].get();
428 supportsReplacement &= workload->SupportsTensorHandleReplacement();
437 ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.
GetFactory(importFactoryId);
439 if (supportsReplacement && importFactory)
441 m_PreImportedInputHandles.emplace_back(
442 bindingId, importFactory->CreateTensorHandle(layer->GetOutputSlot(0).GetTensorInfo(),
false));
446 m_PreImportedInputHandles.emplace_back(bindingId,
nullptr);
452 for (
const BindableLayer* layer: order.GetOutputLayers())
454 const auto bindingId = layer->GetBindingId();
456 const auto outputSlot = layer->GetInputSlot(0).GetConnectedOutputSlot();
457 auto& indices = m_OutputWorkloadSlotPairs[bindingId];
462 auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(outputSlot->GetOwningLayer()));
463 workloadIndex -= noOfInputs;
465 indices.m_OutputSlotIndices = WorkloadIndices{numeric_cast<unsigned int>(workloadIndex),
466 outputSlot->CalculateIndexOnOwner()};
468 bool supportsReplacement =
true;
469 auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
470 supportsReplacement &= outputWorkload->SupportsTensorHandleReplacement();
472 for (
auto &inputSlot: outputSlot->GetConnections())
476 auto inWorkloadIndex = std::distance(order.begin(),
477 order.GetPosInGraph(inputSlot->GetOwningLayer()));
478 inWorkloadIndex -= noOfInputs;
479 indices.m_InputSlotIndices.emplace_back(
480 WorkloadIndices{numeric_cast<unsigned int>(inWorkloadIndex),
481 inputSlot->GetSlotIndex()});
482 auto inputWorkload = m_WorkloadQueue[indices.m_InputSlotIndices.back().m_WorkloadIndex].get();
483 supportsReplacement &= inputWorkload->SupportsTensorHandleReplacement();
491 ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.
GetFactory(importFactoryId);
493 if (supportsReplacement && importFactory)
495 m_PreImportedOutputHandles.emplace_back(
496 bindingId, importFactory->CreateTensorHandle(outputSlot->GetTensorInfo(),
false));
500 m_PreImportedOutputHandles.emplace_back(bindingId,
nullptr);
506 for (
auto&& workloadFactory : m_WorkloadFactories)
508 workloadFactory.second->AfterWorkloadsCreated();
514 timelineUtils->Commit();
517 if (useExternalMemoryManager)
519 CreateMemoryProfile();
521 for (
auto& backendMemoryProfile : m_MemBlockMap)
523 const BackendId& backendId = backendMemoryProfile.first;
524 if (backendStrategyMap.find(backendId) != backendStrategyMap.end())
526 m_MemBinMap[backendId] = backendStrategyMap[backendId]->Optimize(backendMemoryProfile.second);
530 m_MemBinMap[backendId] = m_ConstantStrategy->Optimize(backendMemoryProfile.second);
533 m_ExternalMemoryManager = CreateExternalMemoryManger(m_TensorMemory);
536 std::sort(m_TensorMemory.begin(), m_TensorMemory.end(),
537 [](
const std::pair<std::shared_ptr<TensorMemory>,
MemorySource>& lhs,
538 const std::pair<std::shared_ptr<TensorMemory>,
MemorySource>& rhs)
540 return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
548 if (useInternalMemoryManager)
551 m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().AllocateDynamicBuffers();
554 if (useExternalMemoryManager)
556 AllocateAndExecuteConstantWorkloads();
559 for (
const auto& workload : m_WorkloadQueue)
561 workload->PostAllocationConfigure();
565 for (
auto workload: ConstWorkloads)
572 void LoadedNetwork::AllocateAndExecuteConstantWorkloads()
575 for (
auto& pair : m_ConstantWorkloads)
577 auto tensorHandle = m_ConstantTensorHandles[pair.first];
578 tensorHandle->Allocate();
579 pair.second->Execute();
586 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
587 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
589 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
590 TimelineUtilityMethods::GetTimelineUtils(profilingService);
592 timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
594 for (
auto&& layer : order)
597 AddLayerStructure(timelineUtils, *layer, networkGuid);
598 switch (layer->GetType())
608 for (
auto& workload : m_WorkloadQueue)
611 AddWorkloadStructure(timelineUtils, workload, *layer);
618 timelineUtils->Commit();
623 return m_OptimizedNetwork->GetGuid();
628 for (
auto&& inputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetInputLayers())
630 if (inputLayer->GetNumOutputSlots() != 1)
635 if (inputLayer->GetBindingId() == layerId)
637 return inputLayer->GetOutputSlot(0).GetTensorInfo();
646 for (
auto&& outputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetOutputLayers())
648 if (outputLayer->GetNumInputSlots() != 1)
653 if (!outputLayer->GetInputSlot(0).GetConnection())
658 if (outputLayer->GetBindingId() == layerId)
660 return outputLayer->GetInputSlot(0).GetTensorInfo();
671 auto it = m_WorkloadFactories.find(layer.
GetBackendId());
672 if (it == m_WorkloadFactories.end())
674 throw RuntimeException(fmt::format(
"No workload factory for {0} to be used for layer: {1}",
680 workloadFactory = it->second.get();
682 if (!workloadFactory)
687 return *workloadFactory;
696 TensorPin(std::unique_ptr<ITensorHandle> handle,
const TensorInfo& info,
LayerBindingId id)
697 : m_TensorHandle(
std::move(handle))
703 ITensorHandle* GetTensorHandle()
const {
return m_TensorHandle.get(); }
704 const TensorInfo&
GetTensorInfo()
const {
return m_TensorInfo; }
708 std::unique_ptr<ITensorHandle> m_TensorHandle;
709 TensorInfo m_TensorInfo;
714 const std::vector<TensorPin>& pins,
715 char const* bindingPointDesc)
717 auto it = std::find_if(pins.begin(), pins.end(),
718 [
id](
const TensorPin& pin)
720 return pin.GetBindingId() == id;
723 if (it != pins.end())
729 throw InvalidArgumentException(fmt::format(
"No tensor supplied for {0} {1}", bindingPointDesc,
id));
739 m_InputTensorPins.reserve(inputTensors.size());
740 m_OutputTensorPins.reserve(outputTensors.size());
742 for (
auto inputTensorPair : inputTensors)
744 auto inputTensor = inputTensorPair.second;
746 std::unique_ptr<ITensorHandle> tensorHandle =
747 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),inputTensor.GetMemoryArea());
750 m_InputTensorPins.emplace_back(std::move(tensorHandle), inputTensor.GetInfo(), layerId);
753 for (
auto outputTensorPair : outputTensors)
755 auto outputTensor = outputTensorPair.second;
757 std::unique_ptr<ITensorHandle> tensorHandle =
758 std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
761 m_OutputTensorPins.emplace_back(std::move(tensorHandle), outputTensor.GetInfo(), layerId);
767 return GetTensorPin(
id, m_InputTensorPins,
"input");
772 return GetTensorPin(
id, m_OutputTensorPins,
"output");
777 std::vector<TensorPin> m_InputTensorPins;
778 std::vector<TensorPin> m_OutputTensorPins;
785 std::vector<ImportedInputId> preImportedInputIds,
786 std::vector<ImportedOutputId> preImportedOutputIds)
788 const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
793 ARMNN_LOG(
warning) <<
"IRuntime::EnqueueWorkload()::Less than two nodes in graph";
798 WorkloadData workloadData(inputTensors, outputTensors);
802 if (graph.
GetNumInputs() != (inputTensors.size() + preImportedInputIds.size()))
810 m_InputQueue.clear();
813 unsigned int inputIndex = 0;
814 unsigned int importedInputIdIndex = 0;
815 std::sort(preImportedInputIds.begin(), preImportedInputIds.end());
818 if (importedInputIdIndex < preImportedInputIds.size() &&
819 inputIndex == preImportedInputIds[importedInputIdIndex])
822 if (!m_IsInputImported[inputIndex])
824 auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
826 for (
const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
828 auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
829 workload->ReplaceInputTensorHandle(outputTensorHandle, workloadInfo.m_SlotIndex);
831 m_IsInputImported[inputIndex] =
true;
833 importedInputIdIndex++;
837 if (m_IsInputImported[inputIndex])
841 for (
const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
843 auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
844 workload->ReplaceInputTensorHandle(handler.
GetData(), workloadInfo.m_SlotIndex);
847 m_IsInputImported[inputIndex] =
false;
851 const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
852 EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
860 m_OutputQueue.clear();
868 unsigned int outputIndex = 0;
869 unsigned int importedOutputIdIndex = 0;
870 std::sort(preImportedOutputIds.begin(), preImportedOutputIds.end());
873 if (importedOutputIdIndex < preImportedOutputIds.size() &&
874 outputIndex == preImportedOutputIds[importedOutputIdIndex])
877 ITensorHandle* inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
879 if (!m_IsOutputImported[outputIndex])
881 const auto bindingId = outputLayer->GetBindingId();
882 const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
884 auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
886 outputWorkload->ReplaceOutputTensorHandle(inputTensorHandle,
887 indices.m_OutputSlotIndices.m_SlotIndex);
889 for (
const auto& workloadInfo: indices.m_InputSlotIndices)
891 auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
892 inputWorkload->ReplaceInputTensorHandle(inputTensorHandle, workloadInfo.m_SlotIndex);
894 m_IsOutputImported[outputIndex] =
true;
897 if (!inputTensorHandle)
903 syncDesc.
m_Inputs.push_back(inputTensorHandle);
905 info.m_InputTensorInfos.push_back(outputLayer->GetInputSlot(0).GetTensorInfo());
907 auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc,
info);
913 m_OutputQueue.push_back(std::move(syncWorkload));
914 importedOutputIdIndex++;
918 if (m_IsOutputImported[outputIndex])
920 const auto bindingId = outputLayer->GetBindingId();
921 const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
923 auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
925 outputLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOutputHandler();
927 outputWorkload->ReplaceOutputTensorHandle(
928 outputHandler.
GetData(), indices.m_OutputSlotIndices.m_SlotIndex);
930 for (
const auto& workloadInfo: indices.m_InputSlotIndices)
932 auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
933 inputWorkload->ReplaceInputTensorHandle(outputHandler.
GetData(), workloadInfo.m_SlotIndex);
935 m_IsOutputImported[outputIndex] =
false;
938 const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
940 EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
946 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
947 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
948 ProfilingGuid inferenceGuid = m_ProfilingService->GetNextGuid();
952 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
953 timelineUtils->CreateTypedEntity(inferenceGuid, LabelsAndEventClasses::INFERENCE_GUID);
954 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
957 LabelsAndEventClasses::EXECUTION_OF_GUID);
958 timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
961 bool executionSucceeded =
true;
964 if (m_ProfilingService->IsProfilingEnabled())
966 m_ProfilingService->IncrementCounterValue(INFERENCES_RUN);
970 executionSucceeded = Execute(timelineUtils, inferenceGuid);
976 timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
977 timelineUtils->Commit();
990 if (tensorHandle ==
nullptr)
992 throw InvalidArgumentException(
"EnqueueInput: tensorHandle must not be NULL");
998 inputQueueDescriptor.m_Inputs.push_back(tensorHandle);
999 info.m_InputTensorInfos.push_back(tensorInfo);
1007 const TensorInfo& outputTensorInfo = handler.
GetTensorInfo();
1008 ITensorHandle* outputTensorHandle = handler.GetData();
1010 if (!outputTensorHandle)
1015 inputQueueDescriptor.m_Outputs.push_back(outputTensorHandle);
1016 info.m_OutputTensorInfos.push_back(outputTensorInfo);
1019 bool needMemCopy =
true;
1024 needMemCopy =
false;
1026 void* mem = tensorHandle->
Map(
false);
1027 if (outputTensorHandle->Import(mem, m_NetworkProperties.
m_InputSource))
1029 tensorHandle->
Unmap();
1032 tensorHandle->
Unmap();
1033 throw MemoryImportException(
"EnqueueInput: Memory Import failed");
1039 std::unique_ptr<IWorkload> inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor, info);
1046 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1047 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1051 AddWorkloadStructure(timelineUtils, inputWorkload, layer);
1052 timelineUtils->Commit();
1055 m_InputQueue.push_back(std::move(inputWorkload));
1059 void LoadedNetwork::EnqueueOutput(
const BindableLayer& layer, ITensorHandle* tensorHandle,
const TensorInfo& tensorInfo)
1063 throw InvalidArgumentException(
"EnqueueOutput: given layer not an OutputLayer");
1066 if (tensorHandle ==
nullptr)
1068 throw InvalidArgumentException(
"EnqueueOutput: tensorHandle must not be NULL");
1074 outputQueueDescriptor.m_Outputs.push_back(tensorHandle);
1075 info.m_OutputTensorInfos.push_back(tensorInfo);
1077 if (layer.GetNumInputSlots() != 1)
1083 const OutputHandler& outputHandler = layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler();
1085 const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo();
1086 ITensorHandle* inputTensorHandle = outputHandler.GetData();
1087 if (!inputTensorHandle)
1099 bool needMemCopy =
true;
1101 (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
1103 if(layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() !=
LayerType::Input)
1108 needMemCopy =
false;
1109 void *mem = tensorHandle->Map(
false);
1110 bool importOk = inputTensorHandle->Import(mem, m_NetworkProperties.
m_OutputSource);
1111 tensorHandle->Unmap();
1116 MemSyncQueueDescriptor syncDesc;
1117 syncDesc.m_Inputs.push_back(inputTensorHandle);
1118 info.m_InputTensorInfos.push_back(inputTensorInfo);
1119 auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
1124 m_OutputQueue.push_back(std::move(syncWorkload));
1128 throw MemoryExportException(
"EnqueueOutput: Memory Export failed");
1136 outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
1137 info.m_InputTensorInfos.push_back(inputTensorInfo);
1139 std::unique_ptr<IWorkload> outputWorkload =
1140 std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor, info);
1141 if (!outputWorkload)
1146 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1147 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1151 AddWorkloadStructure(timelineUtils, outputWorkload, layer);
1152 timelineUtils->Commit();
1155 m_OutputQueue.push_back(std::move(outputWorkload));
1159 void LoadedNetwork::AllocateWorkingMemory(
1160 #
if !defined(ARMNN_DISABLE_THREADS)
1161 std::lock_guard<std::mutex>& lock
1167 #if !defined(ARMNN_DISABLE_THREADS)
1171 if (m_IsWorkingMemAllocated)
1176 if (m_ExternalMemoryManager)
1178 m_ExternalMemoryManager->Allocate();
1180 for (
unsigned int i = 0; i < m_TensorMemory.size(); ++i)
1182 m_Tensorhandles[i]->Import(m_TensorMemory[i].first->m_Data, m_TensorMemory[i].second);
1186 for (
auto&& memoryManager : m_BackendMemoryMangers)
1190 memoryManager->Acquire();
1194 m_IsWorkingMemAllocated =
true;
1199 #if !defined(ARMNN_DISABLE_THREADS)
1200 std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1203 if (!m_IsWorkingMemAllocated)
1208 if (m_ExternalMemoryManager)
1210 m_ExternalMemoryManager->Deallocate();
1214 for (
auto&& memoryManager : m_BackendMemoryMangers)
1218 memoryManager->Release();
1222 m_IsWorkingMemAllocated =
false;
1225 bool LoadedNetwork::Execute(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
1226 ProfilingGuid inferenceGuid)
1228 bool success =
true;
1230 auto Fail = [&](
const std::exception&
error)
1232 ARMNN_LOG(error) <<
"An error occurred attempting to execute a workload: " <<
error.what();
1238 #if !defined(ARMNN_DISABLE_THREADS)
1239 std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1240 AllocateWorkingMemory(lockGuard);
1242 AllocateWorkingMemory();
1245 ProfilingDynamicGuid workloadInferenceID(0);
1246 auto ExecuteQueue = [&timelineUtils, &workloadInferenceID, &inferenceGuid](
WorkloadQueue& queue)
1248 for (
auto& workload : queue)
1252 workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1257 workload->Execute();
1261 timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1267 ExecuteQueue(m_InputQueue);
1268 ExecuteQueue(m_WorkloadQueue);
1269 ExecuteQueue(m_OutputQueue);
1272 catch (
const RuntimeException& error)
1276 catch (
const std::runtime_error& error)
1284 void LoadedNetwork::EnqueueInput(
const ConstTensor& inputTensor, ITensorHandle* inputTensorHandle)
1291 std::unique_ptr<ITensorHandle> tensorHandle =
1292 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),
1293 inputTensor.GetMemoryArea());
1294 void* mem = tensorHandle->Map(
false);
1296 if (inputTensorHandle->Import(mem, m_NetworkProperties.
m_InputSource))
1298 tensorHandle->Unmap();
1301 tensorHandle->Unmap();
1302 throw MemoryImportException(
"EnqueueInput: Memory Import failed");
1306 throw MemoryImportException(
"EnqueueInput: Memory Import failed, backend does not support Import");
1312 std::unique_ptr<ITensorHandle> tensorHandle =
1313 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(), inputTensor.GetMemoryArea());
1315 auto copyFunc = [](
void* dst,
const void* src,
size_t size)
1317 memcpy(dst, src, size);
1330 void LoadedNetwork::ImportOutputTensor(
const Tensor& outputTensor, ITensorHandle* outputTensorHandle)
1332 if (!outputTensorHandle)
1340 std::unique_ptr<ITensorHandle> tensorHandle =
1341 std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
1342 outputTensor.GetMemoryArea());
1344 void* mem = tensorHandle->Map(
false);
1345 bool importOk = outputTensorHandle->Import(mem, m_NetworkProperties.
m_OutputSource);
1346 tensorHandle->Unmap();
1350 throw MemoryExportException(
"ImportOutputTensor: Memory Export failed");
1355 throw MemoryExportException(
"ImportOutputTensor: Memory Export failed, attempting to export Input Layer");
1363 auto copyFunc = [](
void* dst,
const void* src,
size_t size)
1365 memcpy(dst, src, size);
1368 std::unique_ptr<ITensorHandle> tensorHandle =
1369 std::make_unique<PassthroughTensorHandle>(outputTensor.
GetInfo(),
1378 for (
auto inputTensorPair : inputTensors)
1383 return inputTensorPair.second;
1391 for (
auto outputTensorPair : outputTensors)
1396 return outputTensorPair.second;
1408 throw MemoryImportException(
"ImportInputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1411 if (inputTensors.size() > m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumInputs())
1413 throw MemoryImportException(
"ImportInputs: The number of tensors provided exceeds the number of inputs.");
1416 std::vector<ImportedInputId> importedInputs;
1417 Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1418 unsigned int inputIndex = 0;
1421 auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
1423 if (!outputTensorHandle)
1429 auto layerBindingId = inputLayer->GetBindingId();
1430 auto it = std::find_if(inputTensors.begin(), inputTensors.end(), [=](
const auto& inputTensor)
1432 return inputTensor.first == layerBindingId;
1435 if (it == inputTensors.end())
1441 const auto& inputTensor = *it;
1442 std::unique_ptr<ITensorHandle> passThroughTensorHandle =
1443 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.GetInfo(),
1444 inputTensor.second.GetMemoryArea());
1448 if (outputTensorHandle->CanBeImported(passThroughTensorHandle->Map(), forceImportMemorySource)
1449 && (outputTensorHandle->Import(passThroughTensorHandle->Map(), forceImportMemorySource)))
1451 importedInputs.push_back(inputIndex);
1453 passThroughTensorHandle->Unmap();
1457 ARMNN_LOG(
error) <<
"An error occurred attempting to import input_"
1458 << inputIndex <<
" : " << exception.
what();
1459 passThroughTensorHandle->Unmap();
1464 return importedInputs;
1473 throw MemoryImportException(
"ImportOutputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1476 if (outputTensors.size() != m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumOutputs())
1480 std::vector<ImportedOutputId> importedOutputs;
1481 Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1483 unsigned int outputIndex = 0;
1486 auto inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
1487 if (!inputTensorHandle)
1493 auto layerBindingId = outputLayer->GetBindingId();
1494 auto it = std::find_if(outputTensors.begin(), outputTensors.end(), [=] (
const auto& outputTensor)
1496 return outputTensor.first == layerBindingId;
1499 if (it == outputTensors.end())
1505 const auto outputTensor = *it;
1509 if (inputTensorHandle->CanBeImported(outputTensor.second.GetMemoryArea(), forceImportMemorySource)
1510 && inputTensorHandle->Import(outputTensor.second.GetMemoryArea(), forceImportMemorySource))
1512 importedOutputs.push_back(outputIndex);
1517 ARMNN_LOG(
error) <<
"An error occurred attempting to import output_"
1518 << outputIndex <<
" : " << exception.
what();
1522 return importedOutputs;
1527 for (
auto id : inputIds)
1529 if (
id > m_PreImportedInputHandles.size())
1534 auto& importedTensorHandle = m_PreImportedInputHandles[id].m_TensorHandle;
1535 if (!importedTensorHandle)
1538 fmt::format(
"ClearImportedInputs::ImportedInput with id: {} has already been deleted",
id));
1541 importedTensorHandle->Unimport();
1542 importedTensorHandle = {};
1548 for (
auto id : outputIds)
1550 if (
id > m_PreImportedOutputHandles.size())
1555 auto& importedTensorHandle = m_PreImportedOutputHandles[id].m_TensorHandle;
1556 if (!importedTensorHandle)
1559 fmt::format(
"ClearImportedOutputs::ImportedOutput with id: {} has already been deleted",
id));
1562 importedTensorHandle->Unimport();
1563 importedTensorHandle = {};
1569 for (
auto&& workloadPtr: m_WorkloadQueue)
1571 workloadPtr.get()->RegisterDebugCallback(func);
1575 void LoadedNetwork::CreateMemoryProfile()
1579 auto TraceSubTensorHandleAncestry = [](
ITensorHandle*
const subTensorHandle)
1582 while (ancestor && ancestor->
GetParent())
1591 unsigned int m_StartOfLife;
1592 unsigned int m_Lifetime;
1595 unsigned int m_Index;
1597 BackendId m_BackendId;
1600 auto align = [](
size_t numToAlign)
1602 const size_t alignment =
sizeof(float);
1603 return ((numToAlign + alignment - 1) / alignment) * alignment;
1606 std::unordered_map<ITensorHandle*, PartialBlock> memBlockTrackerMap;
1611 unsigned int timestep = 0;
1612 unsigned int outputIndex = 0;
1613 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1615 for (
auto&& layer : order)
1617 const LayerType& layerType = layer->GetType();
1625 && layer->GetInputSlot(0).GetConnectedOutputSlot()->GetNumConnections() == 1)
1636 BackendId backendId = layer->GetBackendId();
1637 for (
auto& outputSlot : layer->GetOutputSlots())
1639 if (!m_SupportsExternallyManagedMemory[backendId])
1644 ITensorHandle* tensorHandle = outputSlot.GetOutputHandler().GetData();
1645 tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
1647 if (memBlockTrackerMap.find(tensorHandle) == memBlockTrackerMap.end())
1649 PartialBlock partialBlock;
1651 partialBlock.m_StartOfLife = timestep;
1653 size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
1654 partialBlock.m_MemSize = alignedSize;
1655 partialBlock.m_Index = outputIndex++;
1656 partialBlock.m_Lifetime = outputSlot.GetNumConnections();
1657 partialBlock.m_BackendId = backendId;
1659 if (partialBlock.m_Lifetime == 0)
1661 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
1662 partialBlock.m_StartOfLife,
1663 partialBlock.m_MemSize,
1665 partialBlock.m_Index);
1669 memBlockTrackerMap[tensorHandle] = partialBlock;
1671 m_Tensorhandles.push_back(tensorHandle);
1676 memBlockTrackerMap.at(tensorHandle).m_Lifetime += outputSlot.GetNumConnections();
1680 for (
auto& inputSlot : layer->GetInputSlots())
1682 const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
1683 const LayerType& owningLayerType = connectedInputLayer.GetType();
1693 if (!m_SupportsExternallyManagedMemory[connectedInputLayer.GetBackendId()])
1698 auto outputSlot = inputSlot.GetConnectedOutputSlot();
1700 ITensorHandle* tensorHandle = outputSlot->GetOutputHandler().GetData();
1701 tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
1703 PartialBlock& partialBlock = memBlockTrackerMap.at(tensorHandle);
1705 auto& lifetime = partialBlock.m_Lifetime;
1710 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
1712 partialBlock.m_MemSize,
1714 partialBlock.m_Index);
1722 std::unique_ptr<MemoryManager> LoadedNetwork::CreateExternalMemoryManger(
1723 std::vector<std::pair<std::shared_ptr<TensorMemory>,
MemorySource>>& tensorMemoryVec)
1725 std::unique_ptr<MemoryManager> memoryManager = std::make_unique<MemoryManager>();
1728 for (
auto& backend : m_MemBinMap)
1730 std::vector<BufferStorage> bufferStorageVec;
1732 std::shared_ptr<ICustomAllocator> backendAllocator;
1733 if (allocatorMap.find(backend.first) != allocatorMap.end())
1735 backendAllocator = allocatorMap[backend.first];
1739 backendAllocator = m_Backends[backend.first]->GetDefaultAllocator();
1742 for (
auto& memBin : backend.second)
1744 BufferStorage bufferStorage;
1745 bufferStorage.m_BufferSize = memBin.m_MemSize;
1746 bufferStorage.m_TensorMemoryVector.reserve(memBin.m_MemBlocks.size());
1748 for (
auto& memBlock : memBin.m_MemBlocks)
1750 auto tensorMemory = std::make_shared<TensorMemory>(TensorMemory{memBlock.m_Offset, memBlock.m_Index});
1752 tensorMemoryVec.emplace_back(tensorMemory, backendAllocator->GetMemorySourceType());
1753 bufferStorage.m_TensorMemoryVector.emplace_back(tensorMemory);
1756 bufferStorageVec.emplace_back(std::move(bufferStorage));
1759 memoryManager->StoreMemToAllocate(bufferStorageVec, backendAllocator, 4);
1762 return memoryManager;
1769 const auto& importedTensorHandlePin = m_PreImportedInputHandles.at(
id);
1770 if (!importedTensorHandlePin.m_TensorHandle)
1772 throw InvalidArgumentException(fmt::format(
"LoadedNetwork::Execute:"
1773 "PreImportedInput: {} has been deleted",
id));
1775 return importedTensorHandlePin.m_LayerBindingId;
1777 catch (
const std::out_of_range&)
1779 throw InvalidArgumentException(fmt::format(
"LoadedNetwork::Execute: Unknown ImportedInputId: {}",
id));
1787 const auto& importedTensorHandlePin = m_PreImportedOutputHandles.at(
id);
1788 if (!importedTensorHandlePin.m_TensorHandle)
1790 throw InvalidArgumentException(fmt::format(
"LoadedNetwork::Execute: "
1791 "PreImportedOutput: {} has been deleted",
id));
1793 return importedTensorHandlePin.m_LayerBindingId;
1795 catch (
const std::out_of_range&)
1797 throw InvalidArgumentException(fmt::format(
"LoadedNetwork::Execute: Unknown ImportedOutputId: {}",
id));
#define ARMNN_SCOPED_HEAP_PROFILING(TAG)
#define ARMNN_LOG(severity)
#define MARK_WORKLOAD_EXECUTION_END()
This empty macro has been inserted at LoadedNetwork::Execute.
#define MARK_INFERENCE_EXECUTION_END()
This empty macro has been inserted at LoadedNetwork::Execute.
#define MARK_INFERENCE_EXECUTION_BEGIN()
This empty macro has been inserted at LoadedNetwork::Execute.
#define MARK_WORKLOAD_EXECUTION_BEGIN()
This empty macro has been inserted at LoadedNetwork::Execute.
#define MARK_OPTIMIZED_NETWORK_LOADED()
This empty macro has been inserted at the end of LoadedNetwork constructor.
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
const std::string & Get() const
std::unordered_map< BackendId, std::shared_ptr< ICustomAllocator > > GetAllocators()
MemoryOptimizerStrategiesMapRef GetMemoryOptimizerStrategies()
FactoryFunction GetFactory(const BackendId &id) const
const TensorInfo & GetInfo() const
MemoryType GetMemoryArea() const
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Base class for all ArmNN exceptions so that users can filter to just those.
virtual const char * what() const noexcept override
size_t GetNumOutputs() const
size_t GetNumInputs() const
InputLayersAccessor GetInputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the input layers in a range-base...
Graph & TopologicalSort()
Sorts layers in topological order and return this.
OutputLayersAccessor GetOutputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the output layers in a range-bas...
void SetLayersOutOfOrder()
size_t GetNumLayers() const
virtual const BackendId & GetId() const =0
virtual BackendCapabilities GetCapabilities() const
Returns a BackendCapability if the backend lists the capability The BackendCapability must then be in...
virtual IMemoryManagerUniquePtr CreateMemoryManager() const
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
bool SupportsTensorAllocatorAPI() const
virtual IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr &memoryManager=nullptr) const =0
virtual ITensorHandle * GetParent() const =0
Get the parent tensor if this is a subtensor.
virtual void Unmap() const =0
Unmap the tensor data.
virtual const void * Map(bool blocking=true) const =0
Map the tensor data for access.
unsigned int GetNumOutputSlots() const override
Returns the number of connectable output slots.
const std::string & GetNameStr() const
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
const OutputHandler & GetOutputHandler(unsigned int i=0) const
const BackendId & GetBackendId() const
void RegisterDebugCallback(const DebugCallbackFunction &func)
TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const
std::vector< std::unique_ptr< IWorkload > > WorkloadQueue
std::vector< ImportedInputId > ImportInputs(const InputTensors &inputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
Status EnqueueWorkload(const InputTensors &inputTensors, const OutputTensors &outputTensors, std::vector< ImportedInputId > preImportedInputIds={}, std::vector< ImportedOutputId > preImportedOutputIds={})
Single thread execution of the loaded network.
void ClearImportedInputs(const std::vector< ImportedInputId > inputIds)
std::vector< ImportedOutputId > ImportOutputs(const OutputTensors &outputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
arm::pipe::ProfilingGuid GetNetworkGuid()
void SendNetworkStructure(arm::pipe::IProfilingService &profilingService)
void ClearImportedOutputs(const std::vector< ImportedOutputId > outputIds)
TensorInfo GetInputTensorInfo(LayerBindingId layerId) const
static std::unique_ptr< LoadedNetwork > MakeLoadedNetwork(std::unique_ptr< IOptimizedNetwork > net, std::string &errorMessage, const INetworkProperties &networkProperties, arm::pipe::IProfilingService *profilingService)
const TensorInfo & GetTensorInfo() const
Gets the matching TensorInfo for the output.
ITensorHandle * GetData() const
Gets the allocated tensor memory.
void RegisterProfiler(IProfiler *profiler)
static ProfilerManager & GetInstance()
void AquireMemory()
Aquire memory required for inference.
void ReleaseMemory()
Release memory required for inference.
ITensorHandleFactory * GetFactory(ITensorHandleFactory::FactoryId id) const
Find a TensorHandleFactory by Id Returns nullptr if not found.
ITensorHandleFactory::FactoryId GetMatchingImportFactoryId(ITensorHandleFactory::FactoryId copyFactoryId)
Get a matching TensorHandleFatory Id for Memory Import given TensorHandleFactory Id for Memory Copy.
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Copyright (c) 2021 ARM Limited and Contributors.
MemorySource
Define the Memory Source to reduce copies.
std::function< void(LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle)> DebugCallbackFunction
Define the type of callback for the Debug layer to call.
unsigned int ImportedInputId
MemCopyQueueDescriptor InputQueueDescriptor
MemCopyQueueDescriptor OutputQueueDescriptor
bool HasMatchingCapability(const BackendOptions::BackendOption &capability, const BackendCapabilities &capabilities)
Convenience function to check if a given capability matches a capability in a BackendCapabilities str...
void IgnoreUnused(Ts &&...)
void ValidateSourcesMatchOptimizedNetwork(std::vector< BackendOptions > optimizedOptions, const INetworkProperties &networkProperties)
This function performs a sanity check to ensure that the combination of input and output memory sourc...
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below.
void CopyToOutputTensor(const Tensor &outputTensor, ITensorHandle *outputTensorHandle)
unsigned int MemorySourceFlags
bool CheckFlag(MemorySourceFlags flags, MemorySource source)
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
void CopyTensorContentsGeneric(const ITensorHandle *srcTensor, ITensorHandle *dstTensor, CopyFunc copy)
const armnn::ConstTensor GetInputTensor(const LayerBindingId layerId, const InputTensors &inputTensors)
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
const TensorInfo & GetTensorInfo(const ITensorHandle *tensorHandle)
float32 helpers
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
BackendRegistry & BackendRegistryInstance()
const armnn::Tensor GetOutputTensor(const LayerBindingId layerId, const OutputTensors &outputTensors)
unsigned int ImportedOutputId
Struct for the users to pass backend specific options.
const MemorySource m_OutputSource
const bool m_ExternalMemoryManagementEnabled
const MemorySource m_InputSource
const bool m_ProfilingEnabled
const ProfilingDetailsMethod m_OutputNetworkDetailsMethod
std::vector< ITensorHandle * > m_Inputs
Contains information about TensorInfos of a layer.