26 #include <fmt/format.h> 38 template <
typename ExceptionType>
39 std::string ToErrorMessage(
const char * prefix,
const ExceptionType &
error)
42 ss << prefix <<
" " << error.what();
46 void AddLayerStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
51 std::string layerName = layer.GetNameStr().empty() ?
"<Unnamed>" : layer.GetNameStr();
52 timelineUtils->CreateNamedTypedChildEntity(layer.GetGuid(),
56 for (
auto&& input : layer.GetInputSlots())
58 const IOutputSlot* source = input.GetConnectedOutputSlot();
60 timelineUtils->CreateConnectionRelationship(ProfilingRelationshipType::RetentionLink,
61 source->GetOwningLayerGuid(),
66 void AddWorkloadStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
67 std::unique_ptr<IWorkload>& workload,
72 timelineUtils->MarkEntityWithLabel(workload->GetGuid(),
73 layer.GetBackendId().Get(),
77 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
86 std::string& errorMessage,
91 std::unique_ptr<LoadedNetwork> loadedNetwork;
93 auto Fail = [&](
const std::exception&
error) -> std::unique_ptr<LoadedNetwork>
95 errorMessage = ToErrorMessage(
"An error occurred when preparing the network workloads: ", error);
98 return std::unique_ptr<LoadedNetwork>();
103 loadedNetwork.reset(
new LoadedNetwork(std::move(net), networkProperties, profilingService, networkIdOut));
113 catch (
const std::runtime_error& error)
118 return loadedNetwork;
121 LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
125 m_OptimizedNetwork(std::move(net)),
126 m_NetworkProperties(networkProperties),
127 m_NetworkId(networkId),
128 m_TensorHandleFactoryRegistry(),
129 m_ProfilingService(profilingService)
132 m_Profiler = std::make_shared<IProfiler>();
140 for (
auto&& layer : order)
142 auto const& backendId = layer->GetBackendId();
143 if (m_Backends.count(backendId) == 0)
146 auto it = m_Backends.emplace(std::make_pair(backendId, createBackend()));
153 m_TensorHandleFactoryRegistry, m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions(),
155 static_cast<MemorySourceFlags>(m_NetworkProperties.m_OutputSource));
156 m_WorkloadFactories.emplace(
157 std::make_pair(backendId, std::make_pair(std::move(workloadFactory),
nullptr)));
163 memoryManager, m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions());
165 m_WorkloadFactories.emplace(
166 std::make_pair(backendId, std::make_pair(std::move(workloadFactory), memoryManager)));
173 if (m_NetworkProperties.m_NumThreads > 0 && networkProperties.
m_AsyncEnabled)
175 CreateThreadPool(m_NetworkProperties.m_NumThreads);
180 for (
auto &&layer : order)
182 auto &workloadFactory = GetWorkloadFactory(*layer);
184 switch (layer->GetType())
191 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
193 !m_NetworkProperties.m_ImportEnabled);
200 if ((layer->GetNumOutputSlots() == 1) &&
201 (layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
202 (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() ==
LayerType::Output))
204 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
206 !m_NetworkProperties.m_ExportEnabled);
210 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory);
218 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
227 std::stringstream ss;
233 for (
auto&& layer : order)
238 AddLayerStructure(timelineUtils, *layer, networkGuid);
243 switch (layer->GetType())
253 auto workload = layer->CreateWorkload(workloadFactory);
257 const char*
const layerName =
258 layer->GetNameStr().length() != 0 ? layer->GetName() :
"<Unnamed>";
260 fmt::format(
"No workload created for layer (name: '{0}' type: '{1}') (compute '{2}')",
261 layerName, static_cast<int>(layer->GetType()), layer->GetBackendId().Get()
268 AddWorkloadStructure(timelineUtils, workload, *layer);
275 m_ConstantWorkloads[layer->GetGuid()] = std::move(workload);
279 m_WorkloadQueue.push_back(move(workload));
283 layer->ReleaseConstantData();
289 for (
auto&& workloadFactory : m_WorkloadFactories)
291 workloadFactory.second.first->AfterWorkloadsCreated();
297 timelineUtils->Commit();
303 m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().AllocateDynamicBuffers();
307 for (
auto &workload : m_WorkloadQueue)
309 workload->PostAllocationConfigure();
314 AllocateAndExecuteConstantWorkloads();
318 void LoadedNetwork::AllocateAndExecuteConstantWorkloads()
320 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
321 for (
auto&& layer : order)
325 const auto& outSlot = layer->GetOutputSlots()[0];
326 const auto factoryId = outSlot.GetTensorHandleFactoryId();
328 auto& workloadFactory = GetWorkloadFactory(*layer);
330 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory);
331 ITensorHandle* tensorHandle = outSlot.GetOutputHandler().GetData();
333 m_ConstantTensorHandles[layer->GetGuid()] = tensorHandle;
337 memDesc.
m_Outputs.push_back(tensorHandle);
338 m_ConstantWorkloads[layer->GetGuid()]->ExecuteAsync(memDesc);
349 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
354 for (
auto&& layer : order)
357 AddLayerStructure(timelineUtils, *layer, networkGuid);
358 switch (layer->GetType())
368 for (
auto& workload : m_WorkloadQueue)
371 AddWorkloadStructure(timelineUtils, workload, *layer);
378 timelineUtils->Commit();
383 return m_OptimizedNetwork->GetGuid();
388 for (
auto&& inputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetInputLayers())
390 ARMNN_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1,
"Input layer should have exactly 1 output slot");
391 if (inputLayer->GetBindingId() == layerId)
393 return inputLayer->GetOutputSlot(0).GetTensorInfo();
402 for (
auto&& outputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetOutputLayers())
404 ARMNN_ASSERT_MSG(outputLayer->GetNumInputSlots() == 1,
"Output layer should have exactly 1 input slot");
405 ARMNN_ASSERT_MSG(outputLayer->GetInputSlot(0).GetConnection(),
"Input slot on Output layer must be connected");
406 if (outputLayer->GetBindingId() == layerId)
408 return outputLayer->GetInputSlot(0).GetConnection()->GetTensorInfo();
419 auto it = m_WorkloadFactories.find(layer.
GetBackendId());
420 if (it == m_WorkloadFactories.end())
422 throw RuntimeException(fmt::format(
"No workload factory for {0} to be used for layer: {1}",
428 workloadFactory = it->second.first.get();
432 std::string reasonIfUnsupported;
436 m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions()),
437 "Factory does not support layer");
439 return *workloadFactory;
449 : m_TensorHandle(std::move(handle))
455 ITensorHandle* GetTensorHandle()
const {
return m_TensorHandle.get(); }
460 std::unique_ptr<ITensorHandle> m_TensorHandle;
466 const std::vector<TensorPin>& pins,
467 char const* bindingPointDesc)
469 auto it = std::find_if(pins.begin(), pins.end(),
470 [id](
const TensorPin& pin)
472 return pin.GetBindingId() == id;
475 if (it != pins.end())
491 m_InputTensorPins.reserve(inputTensors.size());
492 m_OutputTensorPins.reserve(outputTensors.size());
494 for (
auto inputTensorPair : inputTensors)
496 auto inputTensor = inputTensorPair.second;
498 std::unique_ptr<ITensorHandle> tensorHandle =
499 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),inputTensor.GetMemoryArea());
502 m_InputTensorPins.emplace_back(std::move(tensorHandle), inputTensor.GetInfo(), layerId);
505 for (
auto outputTensorPair : outputTensors)
507 auto outputTensor = outputTensorPair.second;
509 std::unique_ptr<ITensorHandle> tensorHandle =
510 std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
513 m_OutputTensorPins.emplace_back(std::move(tensorHandle), outputTensor.GetInfo(), layerId);
519 return GetTensorPin(
id, m_InputTensorPins,
"input");
524 return GetTensorPin(
id, m_OutputTensorPins,
"output");
529 std::vector<TensorPin> m_InputTensorPins;
530 std::vector<TensorPin> m_OutputTensorPins;
538 const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
543 ARMNN_LOG(
warning) <<
"IRuntime::EnqueueWorkload()::Less than two nodes in graph";
548 WorkloadData workloadData(inputTensors, outputTensors);
558 m_InputQueue.clear();
562 const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
563 EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
570 m_OutputQueue.clear();
574 const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
575 EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
579 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
581 ProfilingGuid inferenceGuid = m_ProfilingService.GetNextGuid();
587 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
594 bool executionSucceeded =
true;
597 if (m_ProfilingService.IsProfilingEnabled())
599 m_ProfilingService.IncrementCounterValue(armnn::profiling::INFERENCES_RUN);
603 executionSucceeded = Execute(timelineUtils, inferenceGuid);
610 timelineUtils->Commit();
622 if (tensorHandle ==
nullptr)
630 inputQueueDescriptor.
m_Inputs.push_back(tensorHandle);
635 const TensorInfo& outputTensorInfo = handler.GetTensorInfo();
638 "Data should have been allocated.");
639 inputQueueDescriptor.
m_Outputs.push_back(outputTensorHandle);
643 bool needMemCopy =
true;
644 if (m_NetworkProperties.m_ImportEnabled)
646 if(
CheckFlag(importFlags, m_NetworkProperties.m_InputSource))
650 void* mem = tensorHandle->
Map(
false);
651 if (outputTensorHandle->
Import(mem, m_NetworkProperties.m_InputSource))
653 tensorHandle->
Unmap();
656 tensorHandle->
Unmap();
663 std::unique_ptr<IWorkload> inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor,
info);
667 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
672 AddWorkloadStructure(timelineUtils, inputWorkload, layer);
673 timelineUtils->Commit();
676 m_InputQueue.push_back(move(inputWorkload));
687 if (tensorHandle ==
nullptr)
695 outputQueueDescriptor.
m_Outputs.push_back(tensorHandle);
703 const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo();
705 ARMNN_ASSERT_MSG(inputTensorHandle !=
nullptr,
"Data should have been allocated.");
714 bool needMemCopy =
true;
715 if (m_NetworkProperties.m_ExportEnabled &&
716 (layer.
GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
721 if (
CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
724 void *mem = tensorHandle->
Map(
false);
725 bool importOk = inputTensorHandle->
Import(mem, m_NetworkProperties.m_OutputSource);
726 tensorHandle->
Unmap();
732 syncDesc.
m_Inputs.push_back(inputTensorHandle);
734 auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc,
info);
736 m_OutputQueue.push_back(move(syncWorkload));
748 outputQueueDescriptor.
m_Inputs.push_back(inputTensorHandle);
751 std::unique_ptr<IWorkload> outputWorkload =
752 std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor,
info);
755 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
760 AddWorkloadStructure(timelineUtils, outputWorkload, layer);
761 timelineUtils->Commit();
764 m_OutputQueue.push_back(move(outputWorkload));
768 void LoadedNetwork::AllocateWorkingMemory(std::lock_guard<std::mutex>& lock)
775 if (m_IsWorkingMemAllocated)
779 for (
auto&& workloadFactory : m_WorkloadFactories)
784 memoryManager->Acquire();
787 m_TensorHandleFactoryRegistry.AquireMemory();
788 m_IsWorkingMemAllocated =
true;
793 std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
794 if (!m_IsWorkingMemAllocated)
799 for (
auto&& workloadFactory : m_WorkloadFactories)
804 memoryManager->Release();
807 m_TensorHandleFactoryRegistry.ReleaseMemory();
808 m_IsWorkingMemAllocated =
false;
816 auto Fail = [&](
const std::exception&
error)
818 ARMNN_LOG(error) <<
"An error occurred attempting to execute a workload: " << error.what();
824 std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
825 AllocateWorkingMemory(lockGuard);
828 auto ExecuteQueue = [&timelineUtils, &workloadInferenceID, &inferenceGuid](
WorkloadQueue& queue)
830 for (
auto& workload : queue)
834 workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
840 timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
845 ExecuteQueue(m_InputQueue);
846 ExecuteQueue(m_WorkloadQueue);
847 ExecuteQueue(m_OutputQueue);
853 catch (
const std::runtime_error& error)
861 void LoadedNetwork::CreateThreadPool(std::size_t numThreads)
864 for (
auto i = 0u; i < numThreads; ++i)
866 std::unique_ptr<IWorkingMemHandle> workingMemHandle = CreateWorkingMemHandle(m_NetworkId);
867 m_Threads.emplace_back(
868 std::make_unique<std::thread>(
869 &LoadedNetwork::ProcessExecPriorities,
871 std::move(workingMemHandle)
877 void LoadedNetwork::TerminateThreadPool() noexcept
880 std::unique_lock<std::mutex> threadPoolLock(m_ThreadPoolMutex);
881 m_TerminatePool =
true;
884 m_ThreadPoolEvent.notify_all();
886 for (
auto &thread : m_Threads)
895 std::shared_ptr<IAsyncExecutionCallback> cb)
898 ExecutionTuple groupExecParams = std::make_tuple(inputTensors, outputTensors, cb);
899 std::shared_ptr<ExecutionTuple> operation = make_shared<ExecutionTuple>(groupExecParams);
902 std::unique_lock<std::mutex> lock(m_ThreadPoolMutex);
905 m_HighPriorityQueue.push(operation);
908 m_LowPriorityQueue.push(operation);
912 m_MediumPriorityQueue.push(operation);
914 m_ThreadPoolEvent.notify_one();
917 void LoadedNetwork::ProcessExecPriorities(std::unique_ptr<IWorkingMemHandle> workingMemHandle)
920 int highPriorityCount = 0;
921 int mediumPriorityCount = 0;
927 std::shared_ptr<ExecutionTuple> currentExecInProgress(
nullptr);
931 std::unique_lock<std::mutex> lock(m_ThreadPoolMutex);
933 m_ThreadPoolEvent.wait(lock,
935 return m_TerminatePool || !m_HighPriorityQueue.empty() ||
936 !m_MediumPriorityQueue.empty() || !m_LowPriorityQueue.empty();
939 if (m_TerminatePool && m_HighPriorityQueue.empty() && m_MediumPriorityQueue.empty() &&
940 m_LowPriorityQueue.empty())
947 if (!m_HighPriorityQueue.empty() && highPriorityCount < expireRate)
949 currentExecInProgress = m_HighPriorityQueue.front();
950 m_HighPriorityQueue.pop();
951 highPriorityCount += 1;
954 else if (!m_MediumPriorityQueue.empty() && mediumPriorityCount < expireRate)
956 currentExecInProgress = m_MediumPriorityQueue.front();
957 m_MediumPriorityQueue.pop();
958 mediumPriorityCount += 1;
960 highPriorityCount = 0;
963 else if (!m_LowPriorityQueue.empty())
965 currentExecInProgress = m_LowPriorityQueue.front();
966 m_LowPriorityQueue.pop();
968 highPriorityCount = 0;
969 mediumPriorityCount = 0;
974 highPriorityCount = 0;
975 mediumPriorityCount = 0;
981 auto inputTensors = std::get<0>(*currentExecInProgress);
982 auto outputTensors = std::get<1>(*currentExecInProgress);
983 auto cb = std::get<2>(*currentExecInProgress);
991 Execute(inputTensors, outputTensors, workingMemHandleRef) ==
Status::Success ?
1002 void LoadedNetwork::EnqueueInput(
const BindableLayer& layer,
1014 if (m_NetworkProperties.m_ImportEnabled)
1016 if (
CheckFlag(importFlags, m_NetworkProperties.m_InputSource) )
1019 std::unique_ptr<ITensorHandle> tensorHandle =
1020 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.
GetInfo(),
1023 void* mem = tensorHandle->
Map(
false);
1024 if (descriptor.
m_Outputs[0]->Import(mem, m_NetworkProperties.m_InputSource))
1026 tensorHandle->Unmap();
1029 tensorHandle->Unmap();
1039 std::unique_ptr<ITensorHandle> tensorHandle =
1040 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.
GetInfo(), inputTensor.
GetMemoryArea());
1042 auto copyFunc = [](
void* dst,
const void* src,
size_t size)
1044 memcpy(dst, src, size);
1047 for (
const auto& input : descriptor.
m_Outputs)
1066 ARMNN_ASSERT_MSG(inputTensorHandle !=
nullptr,
"Data should have been allocated.");
1075 if (m_NetworkProperties.m_ExportEnabled &&
1076 (layer.
GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
1081 if (
CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
1083 std::unique_ptr<ITensorHandle> tensorHandle =
1084 std::make_unique<PassthroughTensorHandle>(outputTensor.
GetInfo(),
1087 void* mem = tensorHandle->
Map(
false);
1088 bool importOk = inputTensorHandle->
Import(mem, m_NetworkProperties.m_OutputSource);
1089 tensorHandle->Unmap();
1094 inputTensorHandle->
Map(
true);
1095 inputTensorHandle->
Unmap();
1104 throw MemoryExportException(
"EnqueueOutput: Memory Export failed, backend does not support Export");
1109 throw MemoryExportException(
"EnqueueOutput: Memory Export failed, attempting to export Input Layer");
1114 auto copyFunc = [](
void* dst,
const void* src,
size_t size)
1116 memcpy(dst, src, size);
1119 std::unique_ptr<ITensorHandle> tensorHandle =
1120 std::make_unique<PassthroughTensorHandle>(outputTensor.
GetInfo(),
1130 for (
auto inputTensorPair : inputTensors)
1135 return inputTensorPair.second;
1143 for (
auto outputTensorPair : outputTensors)
1148 return outputTensorPair.second;
1158 const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
1163 ARMNN_LOG(
warning) <<
"IRuntime::EnqueueWorkload()::Less than two nodes in graph";
1172 std::unique_ptr<profiling::TimelineUtilityMethods> timelineUtils =
1187 bool executionSucceeded =
true;
1193 timelineUtils->Commit();
1196 std::lock_guard<std::mutex> lockGuard(workingMemHandle.
GetMutex());
1207 EnqueueInput(*inputLayer,
GetInputTensor(inputLayer->GetBindingId(), inputTensors), workingMemHandle);
1211 auto Fail = [&](
const std::exception&
error)
1213 ARMNN_LOG(error) <<
"An error occurred attempting to execute a workload: " << error.what();
1214 executionSucceeded =
false;
1220 for (
unsigned int i = 0; i < m_WorkloadQueue.size(); ++i)
1222 auto& workload = m_WorkloadQueue[i];
1225 workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1232 timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1240 catch (
const std::runtime_error& error)
1249 EnqueueOutput(*outputLayer,
GetOutputTensor(outputLayer->GetBindingId(), outputTensors), workingMemHandle);
1260 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
1261 std::unordered_map<LayerGuid, std::vector<std::unique_ptr<ITensorHandle> > > tensorHandleMap;
1262 std::vector<WorkingMemDescriptor> workingMemDescriptors;
1263 std::unordered_map<LayerGuid, WorkingMemDescriptor> workingMemDescriptorMap;
1265 WorkloadFactoryMap workloadFactoryMap;
1267 std::vector<std::shared_ptr<IMemoryManager>> memoryManagers;
1269 for (
auto const& backend : m_Backends)
1271 if (backend.second->SupportsTensorAllocatorAPI())
1273 backend.second->RegisterTensorHandleFactories(
1274 tensorHandleFactoryRegistry,
1275 static_cast<MemorySourceFlags>(m_NetworkProperties.m_InputSource),
1276 static_cast<MemorySourceFlags>(m_NetworkProperties.m_OutputSource));
1277 memoryManagers.emplace_back(tensorHandleFactoryRegistry.
GetMemoryManagers().back());
1281 std::shared_ptr<IMemoryManager> memoryManager = backend.second->CreateMemoryManager();
1282 auto workloadFactory = backend.second->CreateWorkloadFactory(
1283 memoryManager, m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions());
1285 workloadFactoryMap.emplace(
1286 std::make_pair(backend.first, std::make_pair(std::move(workloadFactory), memoryManager)));
1287 memoryManagers.emplace_back(memoryManager);
1291 auto GetTensorHandle = [&](
Layer* layer,
const OutputSlot& outputSlot,
bool isMemoryManaged)
1294 const TensorInfo& tensorInfo = outputSlot.GetTensorInfo();
1300 return workloadFactoryMap.at(
id).first->CreateTensorHandle(tensorInfo, isMemoryManaged);
1311 std::unordered_map<const ITensorHandle*, unsigned int> handleReferenceCounts;
1312 for (
auto&& layer : order)
1321 bool isMemoryManaged =
true;
1322 bool isInputLayer =
true;
1329 isMemoryManaged = !m_NetworkProperties.m_ExportEnabled;
1335 isInputLayer =
false;
1336 isMemoryManaged = !m_NetworkProperties.m_ExportEnabled;
1343 tensorHandleMap[layer->
GetGuid()].emplace_back(GetTensorHandle(layer, slot, isMemoryManaged));
1346 workingMemDescriptor.
m_Outputs.push_back(tensorHandle);
1348 unsigned int numConnections = slot.GetNumConnections();
1351 handleReferenceCounts[tensorHandle] = numConnections;
1360 auto outputSlot = slot.GetConnectedOutputSlot();
1361 auto key = outputSlot->GetOwningLayer().GetGuid();
1364 auto found = m_ConstantTensorHandles.find(key);
1365 if (found != m_ConstantTensorHandles.end())
1367 workingMemDescriptor.
m_Inputs.push_back(found->second);
1371 auto search = tensorHandleMap.find(key);
1372 unsigned int index = outputSlot->CalculateIndexOnOwner();
1373 ITensorHandle* inputTensorHandle = search->second[index].get();
1374 workingMemDescriptor.
m_Inputs.push_back(inputTensorHandle);
1375 --handleReferenceCounts.at(inputTensorHandle);
1376 if (handleReferenceCounts.at(inputTensorHandle) == 0u)
1380 handleReferenceCounts.erase(inputTensorHandle);
1383 workingMemDescriptorMap.insert({layer->
GetGuid(), workingMemDescriptor});
1389 workingMemDescriptors.push_back(workingMemDescriptor);
1393 return std::make_unique<WorkingMemHandle>(networkId,
1394 workingMemDescriptors,
1395 workingMemDescriptorMap,
1397 std::move(tensorHandleMap));
1402 for (
auto&& workloadPtr: m_WorkloadQueue)
1404 workloadPtr.get()->RegisterDebugCallback(func);
static ARMNN_DLLEXPORT ProfilingStaticGuid INFERENCE_GUID
std::unique_ptr< IWorkingMemHandle > CreateWorkingMemHandle(NetworkId networkId)
Create a new unique WorkingMemHandle object.
virtual bool Import(void *memory, MemorySource source)
Import externally allocated memory.
FactoryFunction GetFactory(const BackendId &id) const
unsigned int GetNumInputSlots() const override
Returns the number of connectable input slots.
std::chrono::high_resolution_clock::time_point HighResolutionClock
Define a timer and associated inference ID for recording execution times.
constexpr unsigned int EXPIRE_RATE
Variable to control expire rate of priority queue.
static std::unique_ptr< TimelineUtilityMethods > GetTimelineUtils(ProfilingService &profilingService)
static ProfilerManager & GetInstance()
#define ARMNN_NO_DEPRECATE_WARN_BEGIN
virtual unsigned int GetImportFlags() const
Get flags describing supported import sources.
const bool m_AsyncEnabled
const armnn::Tensor GetOutputTensor(const LayerBindingId layerId, const OutputTensors &outputTensors)
WorkingMemDescriptor & GetWorkingMemDescriptorAt(unsigned int id) override
Get the WorkingMemDescriptor at an index.
Strongly typed guids to distinguish between those generated at runtime, and those that are statically...
virtual void Allocate()=0
Indicate to the memory manager that this resource is no longer active.
TensorInfo GetInputTensorInfo(LayerBindingId layerId) const
#define ARMNN_LOG(severity)
virtual void Manage()=0
Indicate to the memory manager that this resource is active.
BackendRegistry & BackendRegistryInstance()
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
unsigned int MemorySourceFlags
MemoryType GetMemoryArea() const
std::chrono::high_resolution_clock::time_point GetTimeNow()
size_t GetNumOutputs() const
TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
const std::vector< InputSlot > & GetInputSlots() const
std::function< void(LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle)> DebugCallbackFunction
Define the type of callback for the Debug layer to call.
unsigned int GetNumOutputSlots() const override
Returns the number of connectable output slots.
static ARMNN_DLLEXPORT ProfilingStaticGuid WORKLOAD_GUID
static ARMNN_DLLEXPORT ProfilingStaticGuid ARMNN_PROFILING_EOL_EVENT_CLASS
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
std::tuple< InputTensors, OutputTensors, std::shared_ptr< IAsyncExecutionCallback > > ExecutionTuple
static ARMNN_DLLEXPORT ProfilingStaticGuid ARMNN_PROFILING_SOL_EVENT_CLASS
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
void Schedule(const InputTensors &inputTensors, const OutputTensors &outputTensors, const QosExecPriority priority, std::shared_ptr< IAsyncExecutionCallback > cb)
Schedule an asynchronous execution on the loaded network.
Status Execute(const InputTensors &inputTensors, const OutputTensors &outputTensors, IWorkingMemHandle &workingMemHandle)
Thread safe execution of the loaded network.
virtual IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr &memoryManager=nullptr) const =0
std::vector< ITensorHandle * > m_Inputs
std::vector< TensorInfo > m_InputTensorInfos
static ARMNN_DLLEXPORT ProfilingStaticGuid LAYER_GUID
WorkingMemDescriptor & GetWorkingMemDescriptor(LayerGuid id) override
Get the WorkingMemDescriptor for a Layer. The mutex must be locked.
#define ARMNN_NO_DEPRECATE_WARN_END
#define ARMNN_ASSERT_MSG(COND, MSG)
bool SupportsTensorAllocatorAPI() const
std::shared_ptr< IMemoryManager > IMemoryManagerSharedPtr
#define ARMNN_SCOPED_HEAP_PROFILING(TAG)
static ARMNN_DLLEXPORT ProfilingStaticGuid EXECUTION_OF_GUID
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
const std::string & GetNameStr() const
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
#define ARMNN_ASSERT(COND)
std::vector< TensorInfo > m_OutputTensorInfos
static bool IsLayerSupported(const BackendId &backendId, const IConnectableLayer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
std::vector< std::unique_ptr< IWorkload > > WorkloadQueue
const TensorInfo & GetInfo() const
const BackendId & GetBackendId() const
void Allocate() override
Allocate the backing memory required for execution.
const std::vector< OutputSlot > & GetOutputSlots() const
std::mutex & GetMutex() override
Get a mutex which can be used for synchronizing access to the WorkingMemHandle object.
OutputLayersAccessor GetOutputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the output layers in a range-bas...
static ARMNN_DLLEXPORT ProfilingStaticGuid NETWORK_GUID
Status EnqueueWorkload(const InputTensors &inputTensors, const OutputTensors &outputTensors)
Single thread execution of the loaded network.
void RegisterProfiler(IProfiler *profiler)
virtual const void * Map(bool blocking=true) const =0
Map the tensor data for access.
profiling::ProfilingGuid GetNetworkGuid()
virtual void Unmap() const =0
Unmap the tensor data.
bool IsAllocated() override
IsAllocated returns true if the backing memory is currently allocated. The mutex must be locked...
std::vector< ITensorHandle * > m_Outputs
Base class for all ArmNN exceptions so that users can filter to just those.
const OutputHandler & GetOutputHandler(unsigned int i=0) const
const std::string & Get() const
void RegisterDebugCallback(const DebugCallbackFunction &func)
ITensorHandleFactory * GetFactory(ITensorHandleFactory::FactoryId id) const
Find a TensorHandleFactory by Id Returns nullptr if not found.
std::vector< ITensorHandle * > m_Outputs
Contains information about inputs and outputs to a layer.
std::vector< std::shared_ptr< IMemoryManager > > & GetMemoryManagers()
bool CheckFlag(MemorySourceFlags flags, MemorySource source)
void CopyTensorContentsGeneric(const ITensorHandle *srcTensor, ITensorHandle *dstTensor, CopyFunc copy)
Graph & TopologicalSort()
Sorts layers in topological order and return this.
InputLayersAccessor GetInputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the input layers in a range-base...
std::vector< ITensorHandle * > m_Inputs
static ARMNN_DLLEXPORT ProfilingStaticGuid PROCESS_ID_GUID
size_t GetNumLayers() const
virtual ARMNN_NO_DEPRECATE_WARN_END IMemoryManagerUniquePtr CreateMemoryManager() const
const armnn::ConstTensor GetInputTensor(const LayerBindingId layerId, const InputTensors &inputTensors)
const TensorInfo & GetTensorInfo(const ITensorHandle *tensorHandle)
float32 helpers
size_t GetNumInputs() const
virtual std::unique_ptr< ITensorHandle > CreateTensorHandle(const TensorInfo &tensorInfo) const =0
static std::unique_ptr< LoadedNetwork > MakeLoadedNetwork(std::unique_ptr< IOptimizedNetwork > net, std::string &errorMessage, const INetworkProperties &networkProperties, profiling::ProfilingService &profilingService, const NetworkId networkIdOut)
static ARMNN_DLLEXPORT ProfilingStaticGuid BACKENDID_GUID
static const FactoryId LegacyFactoryId
static ARMNN_DLLEXPORT ProfilingStaticGuid CHILD_GUID
LayerGuid GetGuid() const final
Returns the unique id of the layer.
void SendNetworkStructure()