26#include <common/include/Processes.hpp>
28#include <fmt/format.h>
39template <
typename ExceptionType>
40std::string ToErrorMessage(
const char * prefix,
const ExceptionType &
error)
43 ss << prefix <<
" " <<
error.what();
47void AddLayerStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
49 ProfilingGuid networkGuid)
52 std::string layerName = layer.GetNameStr().empty() ?
"<Unnamed>" : layer.GetNameStr();
53 timelineUtils->CreateNamedTypedChildEntity(layer.GetGuid(),
56 LabelsAndEventClasses::LAYER_GUID);
57 for (
auto&& input : layer.GetInputSlots())
59 const IOutputSlot* source = input.GetConnectedOutputSlot();
64 timelineUtils->CreateConnectionRelationship(ProfilingRelationshipType::RetentionLink,
65 source->GetOwningLayerGuid(),
70void AddWorkloadStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
71 std::unique_ptr<IWorkload>& workload,
75 timelineUtils->CreateTypedEntity(workload->GetGuid(), LabelsAndEventClasses::WORKLOAD_GUID);
76 timelineUtils->MarkEntityWithLabel(workload->GetGuid(),
77 layer.GetBackendId().Get(),
78 LabelsAndEventClasses::BACKENDID_GUID);
81 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
84 LabelsAndEventClasses::CHILD_GUID);
103 const vector<BackendOptions>::iterator& backendItr =
104 find_if(optimizedOptions.begin(), optimizedOptions.end(), [](
const BackendOptions& backend) {
105 if (backend.GetBackendId().Get() ==
"Global")
114 bool importEnabled =
false;
115 bool exportEnabled =
false;
116 if (backendItr != optimizedOptions.end())
119 for (
size_t i = 0; i < backendItr->GetOptionCount(); i++)
121 const BackendOptions::BackendOption& option = backendItr->GetOption(i);
122 if (option.GetName() ==
"ImportEnabled")
124 importEnabled = option.GetValue().AsBool();
126 if (option.GetName() ==
"ExportEnabled")
128 exportEnabled = option.GetValue().AsBool();
138 auto message = fmt::format(
"The input memory source specified, '{0}',", networkProperties.m_InputSource);
141 message.append(
" requires that memory import be enabled. However, "
142 "it was disabled when this network was optimized.");
146 message.append(
" requires that memory import be disabled. However, "
147 "it was enabled when this network was optimized.");
149 throw InvalidArgumentException(message);
155 auto message = fmt::format(
"The output memory source specified, '{0}',", networkProperties.m_OutputSource);
158 message.append(
" requires that memory export be enabled. However, "
159 "it was disabled when this network was optimized.");
163 message.append(
" requires that memory export be disabled. However, "
164 "it was enabled when this network was optimized.");
166 throw InvalidArgumentException(message);
170std::unique_ptr<LoadedNetwork> LoadedNetwork::MakeLoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
171 std::string& errorMessage,
173 arm::pipe::IProfilingService* profilingService)
175 std::unique_ptr<LoadedNetwork> loadedNetwork;
177 auto Fail = [&](
const std::exception&
error) -> std::unique_ptr<LoadedNetwork>
179 errorMessage = ToErrorMessage(
"An error occurred when preparing the network workloads: ",
error);
182 return std::unique_ptr<LoadedNetwork>();
187 loadedNetwork.reset(
new LoadedNetwork(std::move(net), networkProperties, profilingService));
197 catch (
const std::runtime_error&
error)
202 return loadedNetwork;
205LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
207 arm::pipe::IProfilingService* profilingService) :
208 m_OptimizedNetwork(
std::move(net)),
209 m_NetworkProperties(networkProperties),
210 m_TensorHandleFactoryRegistry(),
211 m_ProfilingService(profilingService)
215 const std::shared_ptr<IProfiler>& profiler = m_OptimizedNetwork->GetProfiler();
225 m_NetworkProperties);
232 bool useExternalMemoryManager =
false;
233 bool useInternalMemoryManager =
false;
234 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
239 m_IsInputImported = std::vector<bool>(order.
GetNumInputs(),
false);
240 m_IsOutputImported = std::vector<bool>(order.
GetNumOutputs(),
false);
242 for (
auto&& layer : order)
244 auto const& backendId = layer->GetBackendId();
245 if (m_Backends.count(backendId) == 0)
248 auto it = m_Backends.emplace(std::make_pair(backendId, createBackend()));
252 m_SupportsExternallyManagedMemory[backend->
GetId()] =
false;
253 useInternalMemoryManager =
true;
259 m_SupportsExternallyManagedMemory[backend->
GetId()] =
true;
260 useExternalMemoryManager =
true;
261 useInternalMemoryManager =
false;
268 m_TensorHandleFactoryRegistry,
269 m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions(),
277 m_BackendMemoryMangers.back(), m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions());
279 m_WorkloadFactories[backendId ] = std::move(workloadFactory);
283 for (
auto&& layer : order)
285 auto& workloadFactory = GetWorkloadFactory(*layer);
286 bool supportsExternalManager = m_SupportsExternallyManagedMemory[layer->GetBackendId()];
288 switch (layer->GetType())
295 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
297 !supportsExternalManager &&
303 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory,
true);
310 if ((layer->GetNumOutputSlots() == 1) &&
311 (layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
312 (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() ==
LayerType::Output))
314 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
316 !supportsExternalManager &&
321 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
323 !supportsExternalManager);
329 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
330 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
331 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
334 timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
336 timelineUtils->RecordEvent(networkGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
338 int processID = arm::pipe::GetCurrentProcessId();
339 std::stringstream ss;
341 timelineUtils->MarkEntityWithLabel(networkGuid, ss.str(), LabelsAndEventClasses::PROCESS_ID_GUID);
344 std::vector<IWorkload*> ConstWorkloads;
349 for (
auto&& layer: order)
354 AddLayerStructure(timelineUtils, *layer, networkGuid);
357 const IWorkloadFactory& workloadFactory = GetWorkloadFactory(*layer);
359 switch (layer->GetType())
369 auto workload = layer->CreateWorkload(workloadFactory);
373 const char*
const layerName =
374 layer->GetNameStr().length() != 0 ? layer->GetName() :
"<Unnamed>";
375 throw InvalidArgumentException(
376 fmt::format(
"No workload created for layer (name: '{0}' type: '{1}') (compute '{2}')",
377 layerName,
static_cast<int>(layer->GetType()), layer->GetBackendId().Get()
384 AddWorkloadStructure(timelineUtils, workload, *layer);
387 m_WorkloadQueue.emplace_back(std::move(workload));
392 ConstWorkloads.emplace_back(m_WorkloadQueue.back().get());
396 layer->ReleaseConstantData();
404 if (m_WorkloadQueue.size() != 0)
406 const int noOfInputs = armnn::numeric_cast<int>(order.
GetNumInputs());
410 for (
const BindableLayer* layer: order.GetInputLayers())
412 const auto bindingId = layer->GetBindingId();
414 bool supportsReplacement =
true;
416 for (
const auto inputSlot: layer->GetOutputSlot(0).GetConnections())
418 auto workloadIndex = std::distance(order.
begin(), order.
GetPosInGraph(inputSlot->GetOwningLayer()));
419 workloadIndex -= noOfInputs;
421 m_InputWorkloadSlotPairs[bindingId].emplace_back(WorkloadIndices{
422 armnn::numeric_cast<unsigned int>(workloadIndex), inputSlot->GetSlotIndex()});
427 auto workload = m_WorkloadQueue[m_InputWorkloadSlotPairs[bindingId].back().m_WorkloadIndex].get();
428 supportsReplacement &= workload->SupportsTensorHandleReplacement();
437 ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.
GetFactory(importFactoryId);
439 if (supportsReplacement && importFactory)
441 m_PreImportedInputHandles.emplace_back(
442 bindingId, importFactory->CreateTensorHandle(layer->GetOutputSlot(0).GetTensorInfo(),
false));
446 m_PreImportedInputHandles.emplace_back(bindingId,
nullptr);
452 for (
const BindableLayer* layer: order.GetOutputLayers())
454 const auto bindingId = layer->GetBindingId();
456 const auto outputSlot = layer->GetInputSlot(0).GetConnectedOutputSlot();
457 auto& indices = m_OutputWorkloadSlotPairs[bindingId];
462 auto workloadIndex = std::distance(order.
begin(), order.
GetPosInGraph(outputSlot->GetOwningLayer()));
463 workloadIndex -= noOfInputs;
465 indices.m_OutputSlotIndices = WorkloadIndices{numeric_cast<unsigned int>(workloadIndex),
466 outputSlot->CalculateIndexOnOwner()};
468 bool supportsReplacement =
true;
469 auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
470 supportsReplacement &= outputWorkload->SupportsTensorHandleReplacement();
472 for (
auto &inputSlot: outputSlot->GetConnections())
476 auto inWorkloadIndex = std::distance(order.
begin(),
478 inWorkloadIndex -= noOfInputs;
479 indices.m_InputSlotIndices.emplace_back(
480 WorkloadIndices{numeric_cast<unsigned int>(inWorkloadIndex),
481 inputSlot->GetSlotIndex()});
482 auto inputWorkload = m_WorkloadQueue[indices.m_InputSlotIndices.back().m_WorkloadIndex].get();
483 supportsReplacement &= inputWorkload->SupportsTensorHandleReplacement();
491 ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.
GetFactory(importFactoryId);
493 if (supportsReplacement && importFactory)
495 m_PreImportedOutputHandles.emplace_back(
496 bindingId, importFactory->CreateTensorHandle(outputSlot->GetTensorInfo(),
false));
500 m_PreImportedOutputHandles.emplace_back(bindingId,
nullptr);
506 for (
auto&& workloadFactory : m_WorkloadFactories)
508 workloadFactory.second->AfterWorkloadsCreated();
514 timelineUtils->Commit();
517 if (useExternalMemoryManager)
519 CreateMemoryProfile();
521 for (
auto& backendMemoryProfile : m_MemBlockMap)
523 const BackendId& backendId = backendMemoryProfile.first;
524 if (backendStrategyMap.find(backendId) != backendStrategyMap.end())
526 m_MemBinMap[backendId] = backendStrategyMap[backendId]->Optimize(backendMemoryProfile.second);
530 m_MemBinMap[backendId] = m_ConstantStrategy->Optimize(backendMemoryProfile.second);
533 m_ExternalMemoryManager = CreateExternalMemoryManger(m_TensorMemory);
536 std::sort(m_TensorMemory.begin(), m_TensorMemory.end(),
537 [](
const std::pair<std::shared_ptr<TensorMemory>,
MemorySource>& lhs,
538 const std::pair<std::shared_ptr<TensorMemory>,
MemorySource>& rhs)
540 return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
548 if (useInternalMemoryManager)
551 m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().AllocateDynamicBuffers();
554 if (useExternalMemoryManager)
556 AllocateAndExecuteConstantWorkloads();
559 for (
const auto& workload : m_WorkloadQueue)
561 workload->PostAllocationConfigure();
565 for (
auto workload: ConstWorkloads)
572void LoadedNetwork::AllocateAndExecuteConstantWorkloads()
575 for (
auto& pair : m_ConstantWorkloads)
577 auto tensorHandle = m_ConstantTensorHandles[pair.first];
578 tensorHandle->Allocate();
579 pair.second->Execute();
586 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
587 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
589 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
590 TimelineUtilityMethods::GetTimelineUtils(profilingService);
592 timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
594 for (
auto&& layer : order)
597 AddLayerStructure(timelineUtils, *layer, networkGuid);
598 switch (layer->GetType())
608 for (
auto& workload : m_WorkloadQueue)
611 AddWorkloadStructure(timelineUtils, workload, *layer);
618 timelineUtils->Commit();
623 return m_OptimizedNetwork->GetGuid();
628 for (
auto&& inputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetInputLayers())
630 if (inputLayer->GetNumOutputSlots() != 1)
635 if (inputLayer->GetBindingId() == layerId)
637 return inputLayer->GetOutputSlot(0).GetTensorInfo();
646 for (
auto&& outputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetOutputLayers())
648 if (outputLayer->GetNumInputSlots() != 1)
653 if (!outputLayer->GetInputSlot(0).GetConnection())
658 if (outputLayer->GetBindingId() == layerId)
660 return outputLayer->GetInputSlot(0).GetTensorInfo();
671 auto it = m_WorkloadFactories.find(layer.
GetBackendId());
672 if (it == m_WorkloadFactories.end())
674 throw RuntimeException(fmt::format(
"No workload factory for {0} to be used for layer: {1}",
680 workloadFactory = it->second.get();
682 if (!workloadFactory)
687 return *workloadFactory;
696 TensorPin(std::unique_ptr<ITensorHandle> handle,
const TensorInfo&
info,
LayerBindingId id)
697 : m_TensorHandle(
std::move(handle))
703 ITensorHandle* GetTensorHandle()
const {
return m_TensorHandle.get(); }
704 const TensorInfo&
GetTensorInfo()
const {
return m_TensorInfo; }
708 std::unique_ptr<ITensorHandle> m_TensorHandle;
709 TensorInfo m_TensorInfo;
714 const std::vector<TensorPin>& pins,
715 char const* bindingPointDesc)
717 auto it = std::find_if(pins.begin(), pins.end(),
718 [
id](
const TensorPin& pin)
720 return pin.GetBindingId() == id;
723 if (it != pins.end())
729 throw InvalidArgumentException(fmt::format(
"No tensor supplied for {0} {1}", bindingPointDesc,
id));
739 m_InputTensorPins.reserve(inputTensors.size());
740 m_OutputTensorPins.reserve(outputTensors.size());
742 for (
auto inputTensorPair : inputTensors)
744 auto inputTensor = inputTensorPair.second;
746 std::unique_ptr<ITensorHandle> tensorHandle =
747 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),inputTensor.GetMemoryArea());
750 m_InputTensorPins.emplace_back(std::move(tensorHandle), inputTensor.GetInfo(), layerId);
753 for (
auto outputTensorPair : outputTensors)
755 auto outputTensor = outputTensorPair.second;
757 std::unique_ptr<ITensorHandle> tensorHandle =
758 std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
761 m_OutputTensorPins.emplace_back(std::move(tensorHandle), outputTensor.GetInfo(), layerId);
767 return GetTensorPin(
id, m_InputTensorPins,
"input");
772 return GetTensorPin(
id, m_OutputTensorPins,
"output");
777 std::vector<TensorPin> m_InputTensorPins;
778 std::vector<TensorPin> m_OutputTensorPins;
785 std::vector<ImportedInputId> preImportedInputIds,
786 std::vector<ImportedOutputId> preImportedOutputIds)
788 const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
793 ARMNN_LOG(
warning) <<
"IRuntime::EnqueueWorkload()::Less than two nodes in graph";
798 WorkloadData workloadData(inputTensors, outputTensors);
802 if (graph.
GetNumInputs() != (inputTensors.size() + preImportedInputIds.size()))
810 m_InputQueue.clear();
813 unsigned int inputIndex = 0;
814 unsigned int importedInputIdIndex = 0;
815 std::sort(preImportedInputIds.begin(), preImportedInputIds.end());
818 if (importedInputIdIndex < preImportedInputIds.size() &&
819 inputIndex == preImportedInputIds[importedInputIdIndex])
822 if (!m_IsInputImported[inputIndex])
824 auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
826 for (
const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
828 auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
829 workload->ReplaceInputTensorHandle(outputTensorHandle, workloadInfo.m_SlotIndex);
831 m_IsInputImported[inputIndex] =
true;
833 importedInputIdIndex++;
837 if (m_IsInputImported[inputIndex])
841 for (
const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
843 auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
844 workload->ReplaceInputTensorHandle(handler.
GetData(), workloadInfo.m_SlotIndex);
847 m_IsInputImported[inputIndex] =
false;
851 const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
852 EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
860 m_OutputQueue.clear();
868 unsigned int outputIndex = 0;
869 unsigned int importedOutputIdIndex = 0;
870 std::sort(preImportedOutputIds.begin(), preImportedOutputIds.end());
873 if (importedOutputIdIndex < preImportedOutputIds.size() &&
874 outputIndex == preImportedOutputIds[importedOutputIdIndex])
877 ITensorHandle* inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
879 if (!m_IsOutputImported[outputIndex])
881 const auto bindingId = outputLayer->GetBindingId();
882 const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
884 auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
886 outputWorkload->ReplaceOutputTensorHandle(inputTensorHandle,
887 indices.m_OutputSlotIndices.m_SlotIndex);
889 for (
const auto& workloadInfo: indices.m_InputSlotIndices)
891 auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
892 inputWorkload->ReplaceInputTensorHandle(inputTensorHandle, workloadInfo.m_SlotIndex);
894 m_IsOutputImported[outputIndex] =
true;
897 if (!inputTensorHandle)
903 syncDesc.
m_Inputs.push_back(inputTensorHandle);
907 auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc,
info);
913 m_OutputQueue.push_back(std::move(syncWorkload));
914 importedOutputIdIndex++;
918 if (m_IsOutputImported[outputIndex])
920 const auto bindingId = outputLayer->GetBindingId();
921 const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
923 auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
925 outputLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOutputHandler();
927 outputWorkload->ReplaceOutputTensorHandle(
928 outputHandler.
GetData(), indices.m_OutputSlotIndices.m_SlotIndex);
930 for (
const auto& workloadInfo: indices.m_InputSlotIndices)
932 auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
933 inputWorkload->ReplaceInputTensorHandle(outputHandler.
GetData(), workloadInfo.m_SlotIndex);
935 m_IsOutputImported[outputIndex] =
false;
938 const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
940 EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
946 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
947 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
948 ProfilingGuid inferenceGuid = m_ProfilingService->GetNextGuid();
952 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
953 timelineUtils->CreateTypedEntity(inferenceGuid, LabelsAndEventClasses::INFERENCE_GUID);
954 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
957 LabelsAndEventClasses::EXECUTION_OF_GUID);
958 timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
961 bool executionSucceeded =
true;
964 if (m_ProfilingService->IsProfilingEnabled())
966 m_ProfilingService->IncrementCounterValue(INFERENCES_RUN);
970 executionSucceeded = Execute(timelineUtils, inferenceGuid);
976 timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
977 timelineUtils->Commit();
990 if (tensorHandle ==
nullptr)
992 throw InvalidArgumentException(
"EnqueueInput: tensorHandle must not be NULL");
998 inputQueueDescriptor.
m_Inputs.push_back(tensorHandle);
999 info.m_InputTensorInfos.push_back(tensorInfo);
1007 const TensorInfo& outputTensorInfo = handler.
GetTensorInfo();
1008 ITensorHandle* outputTensorHandle = handler.GetData();
1010 if (!outputTensorHandle)
1015 inputQueueDescriptor.m_Outputs.push_back(outputTensorHandle);
1016 info.m_OutputTensorInfos.push_back(outputTensorInfo);
1019 bool needMemCopy =
true;
1024 needMemCopy =
false;
1026 void* mem = tensorHandle->
Map(
false);
1027 if (outputTensorHandle->Import(mem, m_NetworkProperties.
m_InputSource))
1029 tensorHandle->
Unmap();
1032 tensorHandle->
Unmap();
1033 throw MemoryImportException(
"EnqueueInput: Memory Import failed");
1039 std::unique_ptr<IWorkload> inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor,
info);
1046 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1047 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1051 AddWorkloadStructure(timelineUtils, inputWorkload, layer);
1052 timelineUtils->Commit();
1055 m_InputQueue.push_back(std::move(inputWorkload));
1059void LoadedNetwork::EnqueueOutput(
const BindableLayer& layer, ITensorHandle* tensorHandle,
const TensorInfo& tensorInfo)
1063 throw InvalidArgumentException(
"EnqueueOutput: given layer not an OutputLayer");
1066 if (tensorHandle ==
nullptr)
1068 throw InvalidArgumentException(
"EnqueueOutput: tensorHandle must not be NULL");
1074 outputQueueDescriptor.
m_Outputs.push_back(tensorHandle);
1075 info.m_OutputTensorInfos.push_back(tensorInfo);
1077 if (layer.GetNumInputSlots() != 1)
1083 const OutputHandler& outputHandler = layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler();
1085 const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo();
1086 ITensorHandle* inputTensorHandle = outputHandler.GetData();
1087 if (!inputTensorHandle)
1099 bool needMemCopy =
true;
1101 (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
1103 if(layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() !=
LayerType::Input)
1108 needMemCopy =
false;
1109 void *mem = tensorHandle->Map(
false);
1110 bool importOk = inputTensorHandle->Import(mem, m_NetworkProperties.
m_OutputSource);
1111 tensorHandle->Unmap();
1116 MemSyncQueueDescriptor syncDesc;
1117 syncDesc.m_Inputs.push_back(inputTensorHandle);
1118 info.m_InputTensorInfos.push_back(inputTensorInfo);
1119 auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc,
info);
1124 m_OutputQueue.push_back(std::move(syncWorkload));
1128 throw MemoryExportException(
"EnqueueOutput: Memory Export failed");
1136 outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
1137 info.m_InputTensorInfos.push_back(inputTensorInfo);
1139 std::unique_ptr<IWorkload> outputWorkload =
1140 std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor,
info);
1141 if (!outputWorkload)
1146 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1147 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1151 AddWorkloadStructure(timelineUtils, outputWorkload, layer);
1152 timelineUtils->Commit();
1155 m_OutputQueue.push_back(std::move(outputWorkload));
1159void LoadedNetwork::AllocateWorkingMemory(
1160#
if !defined(ARMNN_DISABLE_THREADS)
1161 std::lock_guard<std::mutex>& lock
1167#if !defined(ARMNN_DISABLE_THREADS)
1171 if (m_IsWorkingMemAllocated)
1176 if (m_ExternalMemoryManager)
1178 m_ExternalMemoryManager->Allocate();
1180 for (
unsigned int i = 0; i < m_TensorMemory.size(); ++i)
1182 m_Tensorhandles[i]->Import(m_TensorMemory[i].first->m_Data, m_TensorMemory[i].second);
1186 for (
auto&& memoryManager : m_BackendMemoryMangers)
1190 memoryManager->Acquire();
1194 m_IsWorkingMemAllocated =
true;
1199#if !defined(ARMNN_DISABLE_THREADS)
1200 std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1203 if (!m_IsWorkingMemAllocated)
1208 if (m_ExternalMemoryManager)
1210 m_ExternalMemoryManager->Deallocate();
1214 for (
auto&& memoryManager : m_BackendMemoryMangers)
1218 memoryManager->Release();
1222 m_IsWorkingMemAllocated =
false;
1225bool LoadedNetwork::Execute(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
1226 ProfilingGuid inferenceGuid)
1228 bool success =
true;
1230 auto Fail = [&](
const std::exception&
error)
1232 ARMNN_LOG(
error) <<
"An error occurred attempting to execute a workload: " <<
error.what();
1238#if !defined(ARMNN_DISABLE_THREADS)
1239 std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1240 AllocateWorkingMemory(lockGuard);
1242 AllocateWorkingMemory();
1245 ProfilingDynamicGuid workloadInferenceID(0);
1246 auto ExecuteQueue = [&timelineUtils, &workloadInferenceID, &inferenceGuid](
WorkloadQueue& queue)
1248 for (
auto& workload : queue)
1252 workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1257 workload->Execute();
1261 timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1267 ExecuteQueue(m_InputQueue);
1268 ExecuteQueue(m_WorkloadQueue);
1269 ExecuteQueue(m_OutputQueue);
1272 catch (
const RuntimeException&
error)
1276 catch (
const std::runtime_error&
error)
1284void LoadedNetwork::EnqueueInput(
const ConstTensor& inputTensor, ITensorHandle* inputTensorHandle)
1291 std::unique_ptr<ITensorHandle> tensorHandle =
1292 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),
1293 inputTensor.GetMemoryArea());
1294 void* mem = tensorHandle->Map(
false);
1296 if (inputTensorHandle->Import(mem, m_NetworkProperties.
m_InputSource))
1298 tensorHandle->Unmap();
1301 tensorHandle->Unmap();
1302 throw MemoryImportException(
"EnqueueInput: Memory Import failed");
1306 throw MemoryImportException(
"EnqueueInput: Memory Import failed, backend does not support Import");
1312 std::unique_ptr<ITensorHandle> tensorHandle =
1313 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(), inputTensor.GetMemoryArea());
1315 auto copyFunc = [](
void* dst,
const void* src,
size_t size)
1317 memcpy(dst, src, size);
1330void LoadedNetwork::ImportOutputTensor(
const Tensor& outputTensor, ITensorHandle* outputTensorHandle)
1332 if (!outputTensorHandle)
1340 std::unique_ptr<ITensorHandle> tensorHandle =
1341 std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
1342 outputTensor.GetMemoryArea());
1344 void* mem = tensorHandle->Map(
false);
1345 bool importOk = outputTensorHandle->Import(mem, m_NetworkProperties.
m_OutputSource);
1346 tensorHandle->Unmap();
1350 throw MemoryExportException(
"ImportOutputTensor: Memory Export failed");
1355 throw MemoryExportException(
"ImportOutputTensor: Memory Export failed, attempting to export Input Layer");
1363 auto copyFunc = [](
void* dst,
const void* src,
size_t size)
1365 memcpy(dst, src, size);
1368 std::unique_ptr<ITensorHandle> tensorHandle =
1369 std::make_unique<PassthroughTensorHandle>(outputTensor.
GetInfo(),
1378 for (
auto inputTensorPair : inputTensors)
1383 return inputTensorPair.second;
1391 for (
auto outputTensorPair : outputTensors)
1396 return outputTensorPair.second;
1408 throw MemoryImportException(
"ImportInputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1411 if (inputTensors.size() > m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumInputs())
1413 throw MemoryImportException(
"ImportInputs: The number of tensors provided exceeds the number of inputs.");
1416 std::vector<ImportedInputId> importedInputs;
1417 Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1418 unsigned int inputIndex = 0;
1421 auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
1423 if (!outputTensorHandle)
1429 auto layerBindingId = inputLayer->GetBindingId();
1430 auto it = std::find_if(inputTensors.begin(), inputTensors.end(), [=](
const auto& inputTensor)
1432 return inputTensor.first == layerBindingId;
1435 if (it == inputTensors.end())
1441 const auto& inputTensor = *it;
1442 std::unique_ptr<ITensorHandle> passThroughTensorHandle =
1443 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.GetInfo(),
1444 inputTensor.second.GetMemoryArea());
1448 if (outputTensorHandle->CanBeImported(passThroughTensorHandle->Map(), forceImportMemorySource)
1449 && (outputTensorHandle->Import(passThroughTensorHandle->Map(), forceImportMemorySource)))
1451 importedInputs.push_back(inputIndex);
1453 passThroughTensorHandle->Unmap();
1457 ARMNN_LOG(
error) <<
"An error occurred attempting to import input_"
1458 << inputIndex <<
" : " << exception.what();
1459 passThroughTensorHandle->Unmap();
1464 return importedInputs;
1473 throw MemoryImportException(
"ImportOutputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1476 if (outputTensors.size() != m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumOutputs())
1480 std::vector<ImportedOutputId> importedOutputs;
1481 Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1483 unsigned int outputIndex = 0;
1486 auto inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
1487 if (!inputTensorHandle)
1493 auto layerBindingId = outputLayer->GetBindingId();
1494 auto it = std::find_if(outputTensors.begin(), outputTensors.end(), [=] (
const auto& outputTensor)
1496 return outputTensor.first == layerBindingId;
1499 if (it == outputTensors.end())
1505 const auto outputTensor = *it;
1509 if (inputTensorHandle->CanBeImported(outputTensor.second.GetMemoryArea(), forceImportMemorySource)
1510 && inputTensorHandle->Import(outputTensor.second.GetMemoryArea(), forceImportMemorySource))
1512 importedOutputs.push_back(outputIndex);
1517 ARMNN_LOG(
error) <<
"An error occurred attempting to import output_"
1518 << outputIndex <<
" : " << exception.what();
1522 return importedOutputs;
1527 for (
auto id : inputIds)
1529 if (
id > m_PreImportedInputHandles.size())
1534 auto& importedTensorHandle = m_PreImportedInputHandles[id].m_TensorHandle;
1535 if (!importedTensorHandle)
1538 fmt::format(
"ClearImportedInputs::ImportedInput with id: {} has already been deleted",
id));
1541 importedTensorHandle->Unimport();
1542 importedTensorHandle = {};
1548 for (
auto id : outputIds)
1550 if (
id > m_PreImportedOutputHandles.size())
1555 auto& importedTensorHandle = m_PreImportedOutputHandles[id].m_TensorHandle;
1556 if (!importedTensorHandle)
1559 fmt::format(
"ClearImportedOutputs::ImportedOutput with id: {} has already been deleted",
id));
1562 importedTensorHandle->Unimport();
1563 importedTensorHandle = {};
1569 for (
auto&& workloadPtr: m_WorkloadQueue)
1571 workloadPtr.get()->RegisterDebugCallback(func);
1575void LoadedNetwork::CreateMemoryProfile()
1579 auto TraceSubTensorHandleAncestry = [](
ITensorHandle*
const subTensorHandle)
1582 while (ancestor && ancestor->
GetParent())
1591 unsigned int m_StartOfLife;
1592 unsigned int m_Lifetime;
1595 unsigned int m_Index;
1597 BackendId m_BackendId;
1600 auto align = [](
size_t numToAlign)
1602 const size_t alignment =
sizeof(float);
1603 return ((numToAlign + alignment - 1) / alignment) * alignment;
1606 std::unordered_map<ITensorHandle*, PartialBlock> memBlockTrackerMap;
1611 unsigned int timestep = 0;
1612 unsigned int outputIndex = 0;
1613 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1615 for (
auto&& layer : order)
1617 const LayerType& layerType = layer->GetType();
1625 && layer->GetInputSlot(0).GetConnectedOutputSlot()->GetNumConnections() == 1)
1636 BackendId backendId = layer->GetBackendId();
1637 for (
auto& outputSlot : layer->GetOutputSlots())
1639 if (!m_SupportsExternallyManagedMemory[backendId])
1644 ITensorHandle* tensorHandle = outputSlot.GetOutputHandler().GetData();
1645 tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
1647 if (memBlockTrackerMap.find(tensorHandle) == memBlockTrackerMap.end())
1649 PartialBlock partialBlock;
1651 partialBlock.m_StartOfLife = timestep;
1653 size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
1654 partialBlock.m_MemSize = alignedSize;
1655 partialBlock.m_Index = outputIndex++;
1656 partialBlock.m_Lifetime = outputSlot.GetNumConnections();
1657 partialBlock.m_BackendId = backendId;
1659 if (partialBlock.m_Lifetime == 0)
1661 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
1662 partialBlock.m_StartOfLife,
1663 partialBlock.m_MemSize,
1665 partialBlock.m_Index);
1669 memBlockTrackerMap[tensorHandle] = partialBlock;
1671 m_Tensorhandles.push_back(tensorHandle);
1676 memBlockTrackerMap.at(tensorHandle).m_Lifetime += outputSlot.GetNumConnections();
1680 for (
auto& inputSlot : layer->GetInputSlots())
1682 const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
1683 const LayerType& owningLayerType = connectedInputLayer.GetType();
1693 if (!m_SupportsExternallyManagedMemory[connectedInputLayer.GetBackendId()])
1698 auto outputSlot = inputSlot.GetConnectedOutputSlot();
1700 ITensorHandle* tensorHandle = outputSlot->GetOutputHandler().GetData();
1701 tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
1703 PartialBlock& partialBlock = memBlockTrackerMap.at(tensorHandle);
1705 auto& lifetime = partialBlock.m_Lifetime;
1710 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
1712 partialBlock.m_MemSize,
1714 partialBlock.m_Index);
1722std::unique_ptr<MemoryManager> LoadedNetwork::CreateExternalMemoryManger(
1723 std::vector<std::pair<std::shared_ptr<TensorMemory>,
MemorySource>>& tensorMemoryVec)
1725 std::unique_ptr<MemoryManager> memoryManager = std::make_unique<MemoryManager>();
1728 for (
auto& backend : m_MemBinMap)
1730 std::vector<BufferStorage> bufferStorageVec;
1732 std::shared_ptr<ICustomAllocator> backendAllocator;
1733 if (allocatorMap.find(backend.first) != allocatorMap.end())
1735 backendAllocator = allocatorMap[backend.first];
1739 backendAllocator = m_Backends[backend.first]->GetDefaultAllocator();
1742 for (
auto& memBin : backend.second)
1744 BufferStorage bufferStorage;
1745 bufferStorage.m_BufferSize = memBin.m_MemSize;
1746 bufferStorage.m_TensorMemoryVector.reserve(memBin.m_MemBlocks.size());
1748 for (
auto& memBlock : memBin.m_MemBlocks)
1750 auto tensorMemory = std::make_shared<TensorMemory>(TensorMemory{memBlock.m_Offset, memBlock.m_Index});
1752 tensorMemoryVec.emplace_back(tensorMemory, backendAllocator->GetMemorySourceType());
1753 bufferStorage.m_TensorMemoryVector.emplace_back(tensorMemory);
1756 bufferStorageVec.emplace_back(std::move(bufferStorage));
1759 memoryManager->StoreMemToAllocate(bufferStorageVec, backendAllocator, 4);
1762 return memoryManager;
1769 const auto& importedTensorHandlePin = m_PreImportedInputHandles.at(
id);
1770 if (!importedTensorHandlePin.m_TensorHandle)
1772 throw InvalidArgumentException(fmt::format(
"LoadedNetwork::Execute:"
1773 "PreImportedInput: {} has been deleted",
id));
1775 return importedTensorHandlePin.m_LayerBindingId;
1777 catch (
const std::out_of_range&)
1779 throw InvalidArgumentException(fmt::format(
"LoadedNetwork::Execute: Unknown ImportedInputId: {}",
id));
1787 const auto& importedTensorHandlePin = m_PreImportedOutputHandles.at(
id);
1788 if (!importedTensorHandlePin.m_TensorHandle)
1790 throw InvalidArgumentException(fmt::format(
"LoadedNetwork::Execute: "
1791 "PreImportedOutput: {} has been deleted",
id));
1793 return importedTensorHandlePin.m_LayerBindingId;
1795 catch (
const std::out_of_range&)
1797 throw InvalidArgumentException(fmt::format(
"LoadedNetwork::Execute: Unknown ImportedOutputId: {}",
id));
#define ARMNN_SCOPED_HEAP_PROFILING(TAG)
#define ARMNN_LOG(severity)
#define MARK_WORKLOAD_EXECUTION_END()
This empty macro has been inserted at LoadedNetwork::Execute.
#define MARK_INFERENCE_EXECUTION_END()
This empty macro has been inserted at LoadedNetwork::Execute.
#define MARK_INFERENCE_EXECUTION_BEGIN()
This empty macro has been inserted at LoadedNetwork::Execute.
#define MARK_WORKLOAD_EXECUTION_BEGIN()
This empty macro has been inserted at LoadedNetwork::Execute.
#define MARK_OPTIMIZED_NETWORK_LOADED()
This empty macro has been inserted at the end of LoadedNetwork constructor.
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
const std::string & Get() const
std::unordered_map< BackendId, std::shared_ptr< ICustomAllocator > > GetAllocators()
MemoryOptimizerStrategiesMapRef GetMemoryOptimizerStrategies()
FactoryFunction GetFactory(const BackendId &id) const
const TensorInfo & GetInfo() const
MemoryType GetMemoryArea() const
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Base class for all ArmNN exceptions so that users can filter to just those.
Iterator begin()
Returns iterator pointing to the beginning of the list. Lowercase for range-based for loops.
size_t GetNumOutputs() const
size_t GetNumInputs() const
InputLayersAccessor GetInputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the input layers in a range-base...
OutputLayersAccessor GetOutputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the output layers in a range-bas...
void SetLayersOutOfOrder()
Iterator GetPosInGraph(Layer &layer)
Gets the position of a layer in the graph.
Graph & TopologicalSort()
Sorts layers in topological order and return this.
size_t GetNumLayers() const
virtual const BackendId & GetId() const =0
virtual BackendCapabilities GetCapabilities() const
Returns a BackendCapability if the backend lists the capability The BackendCapability must then be in...
virtual IMemoryManagerUniquePtr CreateMemoryManager() const
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
bool SupportsTensorAllocatorAPI() const
virtual IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr &memoryManager=nullptr) const =0
virtual const void * Map(bool blocking=true) const =0
Map the tensor data for access.
virtual ITensorHandle * GetParent() const =0
Get the parent tensor if this is a subtensor.
virtual void Unmap() const =0
Unmap the tensor data.
unsigned int GetNumOutputSlots() const override
Returns the number of connectable output slots.
const std::string & GetNameStr() const
const OutputHandler & GetOutputHandler(unsigned int i=0) const
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
const BackendId & GetBackendId() const
void RegisterDebugCallback(const DebugCallbackFunction &func)
TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const
std::vector< std::unique_ptr< IWorkload > > WorkloadQueue
std::vector< ImportedInputId > ImportInputs(const InputTensors &inputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
Status EnqueueWorkload(const InputTensors &inputTensors, const OutputTensors &outputTensors, std::vector< ImportedInputId > preImportedInputIds={}, std::vector< ImportedOutputId > preImportedOutputIds={})
Single thread execution of the loaded network.
void ClearImportedInputs(const std::vector< ImportedInputId > inputIds)
std::vector< ImportedOutputId > ImportOutputs(const OutputTensors &outputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
arm::pipe::ProfilingGuid GetNetworkGuid()
void SendNetworkStructure(arm::pipe::IProfilingService &profilingService)
void ClearImportedOutputs(const std::vector< ImportedOutputId > outputIds)
TensorInfo GetInputTensorInfo(LayerBindingId layerId) const
ITensorHandle * GetData() const
Gets the allocated tensor memory.
const TensorInfo & GetTensorInfo() const
Gets the matching TensorInfo for the output.
void RegisterProfiler(IProfiler *profiler)
static ProfilerManager & GetInstance()
void AquireMemory()
Aquire memory required for inference.
void ReleaseMemory()
Release memory required for inference.
ITensorHandleFactory * GetFactory(ITensorHandleFactory::FactoryId id) const
Find a TensorHandleFactory by Id Returns nullptr if not found.
ITensorHandleFactory::FactoryId GetMatchingImportFactoryId(ITensorHandleFactory::FactoryId copyFactoryId)
Get a matching TensorHandleFatory Id for Memory Import given TensorHandleFactory Id for Memory Copy.
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Copyright (c) 2021 ARM Limited and Contributors.
bool CheckFlag(MemorySourceFlags flags, MemorySource source)
MemorySource
Define the Memory Source to reduce copies.
std::function< void(LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle)> DebugCallbackFunction
Define the type of callback for the Debug layer to call.
unsigned int ImportedInputId
MemCopyQueueDescriptor InputQueueDescriptor
const armnn::ConstTensor GetInputTensor(const LayerBindingId layerId, const InputTensors &inputTensors)
MemCopyQueueDescriptor OutputQueueDescriptor
bool HasMatchingCapability(const BackendOptions::BackendOption &capability, const BackendCapabilities &capabilities)
Convenience function to check if a given capability matches a capability in a BackendCapabilities str...
void CopyToOutputTensor(const Tensor &outputTensor, ITensorHandle *outputTensorHandle)
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below.
unsigned int MemorySourceFlags
void ValidateSourcesMatchOptimizedNetwork(std::vector< BackendOptions > optimizedOptions, const INetworkProperties &networkProperties)
This function performs a sanity check to ensure that the combination of input and output memory sourc...
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
BackendRegistry & BackendRegistryInstance()
unsigned int ImportedOutputId
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
const armnn::Tensor GetOutputTensor(const LayerBindingId layerId, const OutputTensors &outputTensors)
void CopyTensorContentsGeneric(const ITensorHandle *srcTensor, ITensorHandle *dstTensor, CopyFunc copy)
void IgnoreUnused(Ts &&...)
armnn::TensorInfo GetTensorInfo(unsigned int numberOfBatches, unsigned int numberOfChannels, unsigned int height, unsigned int width, const armnn::DataLayout dataLayout, const armnn::DataType dataType)
Struct for the users to pass backend specific options.
const MemorySource m_OutputSource
const bool m_ExternalMemoryManagementEnabled
const MemorySource m_InputSource
const bool m_ProfilingEnabled
const ProfilingDetailsMethod m_OutputNetworkDetailsMethod
std::vector< ITensorHandle * > m_Inputs
std::vector< ITensorHandle * > m_Outputs
Contains information about TensorInfos of a layer.
std::vector< TensorInfo > m_InputTensorInfos