ArmNN
 24.08
LoadedNetwork.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017-2024 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "LoadedNetwork.hpp"
7 #include "Layer.hpp"
8 #include "Graph.hpp"
9 #include "Profiling.hpp"
10 #include "HeapProfiling.hpp"
11 #include "WorkingMemHandle.hpp"
12 #include "ExecutionData.hpp"
13 
14 #include <armnn/BackendHelper.hpp>
16 #include <armnn/Logging.hpp>
17 
22 
24 
25 #include <armnn/utility/Assert.hpp>
26 
28 
29 #include <common/include/Processes.hpp>
30 
31 #include <fmt/format.h>
32 
33 namespace armnn
34 {
35 
36 using namespace std;
37 using namespace arm::pipe;
38 
39 namespace
40 {
41 
42 template <typename ExceptionType>
43 std::string ToErrorMessage(const char * prefix, const ExceptionType & error)
44 {
45  std::stringstream ss;
46  ss << prefix << " " << error.what();
47  return ss.str();
48 }
49 
50 void AddLayerStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
51  const Layer& layer,
52  ProfilingGuid networkGuid)
53 {
54  // Add layer to the post-optimisation network structure
55  std::string layerName = layer.GetNameStr().empty() ? "<Unnamed>" : layer.GetNameStr();
56  timelineUtils->CreateNamedTypedChildEntity(layer.GetGuid(),
57  networkGuid,
58  layerName,
59  LabelsAndEventClasses::LAYER_GUID);
60  for (auto&& input : layer.GetInputSlots())
61  {
62  const IOutputSlot* source = input.GetConnectedOutputSlot();
63  if (!source)
64  {
65  throw armnn::NullPointerException("Null source found on input to layer \"" + layerName + "\".");
66  }
67  timelineUtils->CreateConnectionRelationship(ProfilingRelationshipType::RetentionLink,
68  source->GetOwningLayerGuid(),
69  layer.GetGuid());
70  }
71 }
72 
73 void AddWorkloadStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
74  std::unique_ptr<IWorkload>& workload,
75  const Layer& layer)
76 {
77  // Add workload to the post-optimisation network structure
78  timelineUtils->CreateTypedEntity(workload->GetGuid(), LabelsAndEventClasses::WORKLOAD_GUID);
79  timelineUtils->MarkEntityWithLabel(workload->GetGuid(),
80  layer.GetBackendId().Get(),
81  LabelsAndEventClasses::BACKENDID_GUID);
82 
83  // Link the workload to the layer
84  timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
85  layer.GetGuid(),
86  workload->GetGuid(),
87  LabelsAndEventClasses::CHILD_GUID);
88 }
89 
90 } // anonymous
91 
92 /**
93  * This function performs a sanity check to ensure that the combination of input and output memory source matches the
94  * values for importEnabled and exportEnabled that were specified during optimization. During optimization the tensor
95  * handle factories are chosen based on whether import and export are enabled. If the user then specifies something
96  * incompatible here it can lead to problems.
97  *
98  * @param optimizedOptions
99  * @param networkProperties
100  */
101 void ValidateSourcesMatchOptimizedNetwork(std::vector<BackendOptions> optimizedOptions,
102  const INetworkProperties& networkProperties)
103 {
104  // Find the "Global" backend options. During the optimize phase the values of importEnabled and exportEnabled are
105  // added as backend options.
106  const vector<BackendOptions>::iterator& backendItr =
107  find_if(optimizedOptions.begin(), optimizedOptions.end(), [](const BackendOptions& backend) {
108  if (backend.GetBackendId().Get() == "Global")
109  {
110  return true;
111  }
112  else
113  {
114  return false;
115  }
116  });
117  bool importEnabled = false;
118  bool exportEnabled = false;
119  if (backendItr != optimizedOptions.end())
120  {
121  // Find the importEnabled and exportEnabled values.
122  for (size_t i = 0; i < backendItr->GetOptionCount(); i++)
123  {
124  const BackendOptions::BackendOption& option = backendItr->GetOption(i);
125  if (option.GetName() == "ImportEnabled")
126  {
127  importEnabled = option.GetValue().AsBool();
128  }
129  if (option.GetName() == "ExportEnabled")
130  {
131  exportEnabled = option.GetValue().AsBool();
132  }
133  }
134  }
135 
136  // Now that we have values for import and export compare them to the MemorySource variables.
137  // Any value of MemorySource that's not "Undefined" implies that we need to do an import of some kind.
138  if ((networkProperties.m_InputSource == MemorySource::Undefined && importEnabled) ||
139  (networkProperties.m_InputSource != MemorySource::Undefined && !importEnabled))
140  {
141  auto message = fmt::format("The input memory source specified, '{0}',", networkProperties.m_InputSource);
142  if (!importEnabled)
143  {
144  message.append(" requires that memory import be enabled. However, "
145  "it was disabled when this network was optimized.");
146  }
147  else
148  {
149  message.append(" requires that memory import be disabled. However, "
150  "it was enabled when this network was optimized.");
151  }
152  throw InvalidArgumentException(message);
153  }
154 
155  if ((networkProperties.m_OutputSource == MemorySource::Undefined && exportEnabled) ||
156  (networkProperties.m_OutputSource != MemorySource::Undefined && !exportEnabled))
157  {
158  auto message = fmt::format("The output memory source specified, '{0}',", networkProperties.m_OutputSource);
159  if (!exportEnabled)
160  {
161  message.append(" requires that memory export be enabled. However, "
162  "it was disabled when this network was optimized.");
163  }
164  else
165  {
166  message.append(" requires that memory export be disabled. However, "
167  "it was enabled when this network was optimized.");
168  }
169  throw InvalidArgumentException(message);
170  }
171 } // anonymous
172 
173 std::unique_ptr<LoadedNetwork> LoadedNetwork::MakeLoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
174  std::string& errorMessage,
175  const INetworkProperties& networkProperties,
176  arm::pipe::IProfilingService* profilingService)
177 {
178  std::unique_ptr<LoadedNetwork> loadedNetwork;
179 
180  auto Fail = [&](const std::exception& error) -> std::unique_ptr<LoadedNetwork>
181  {
182  errorMessage = ToErrorMessage("An error occurred when preparing the network workloads: ", error);
183  ARMNN_LOG(error) << errorMessage;
184 
185  return std::unique_ptr<LoadedNetwork>();
186  };
187 
188  try
189  {
190  loadedNetwork.reset(new LoadedNetwork(std::move(net), networkProperties, profilingService));
191  }
192  catch (const armnn::RuntimeException& error)
193  {
194  return Fail(error);
195  }
196  catch (const armnn::Exception& error)
197  {
198  return Fail(error);
199  }
200  catch (const std::runtime_error& error)
201  {
202  return Fail(error);
203  }
204 
205  return loadedNetwork;
206 }
207 
208 LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
209  const INetworkProperties& networkProperties,
210  arm::pipe::IProfilingService* profilingService) :
211  m_OptimizedNetwork(std::move(net)),
212  m_NetworkProperties(networkProperties),
213  m_TensorHandleFactoryRegistry(),
214  m_ProfilingService(profilingService)
215 {
217  // Get the profiler and register it for the current thread.
218  const std::shared_ptr<IProfiler>& profiler = m_OptimizedNetwork->GetProfiler();
220 
221  profiler->EnableProfiling(networkProperties.m_ProfilingEnabled);
222 
223  profiler->EnableNetworkDetailsToStdOut(networkProperties.m_OutputNetworkDetailsMethod);
224 
225  // We need to check that the memory sources match up with the values of import and export specified during the
226  // optimize phase. If they don't this will throw an exception.
227  ValidateSourcesMatchOptimizedNetwork(m_OptimizedNetwork.get()->pOptimizedNetworkImpl->GetModelOptions(),
228  m_NetworkProperties);
229 
230  //First create tensor handlers, backends and workload factories.
231  //Handlers are created before workloads are.
232  //Because workload creation can modify some of the handlers,
233  //(for example the splitter and concat layers).
234 
235  bool useExternalMemoryManager = false;
236  bool useInternalMemoryManager = false;
237  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
238  // Ensure Topological order
239  order.SetLayersOutOfOrder();
240  order.TopologicalSort();
241 
242  if (!networkProperties.m_AsyncEnabled)
243  {
244  m_IsInputImported = std::vector<bool>(order.GetNumInputs(), false);
245  m_IsOutputImported = std::vector<bool>(order.GetNumOutputs(), false);
246  }
247 
248  for (auto&& layer : order)
249  {
250  auto const& backendId = layer->GetBackendId();
251  if (m_Backends.count(backendId) == 0)
252  {
253  auto createBackend = BackendRegistryInstance().GetFactory(backendId);
254  auto it = m_Backends.emplace(std::make_pair(backendId, createBackend()));
255 
256  IBackendInternal* backend = it.first->second.get();
257 
258  // If we're doing async execution verify that the backend supports it and ExternallyManagedMemory.
259  if (networkProperties.m_AsyncEnabled)
260  {
261  if (!HasMatchingCapability(BackendOptions::BackendOption{"AsyncExecution", true},
262  backend->GetCapabilities()))
263  {
264  std::string er = backend->GetId();
265  er += " does not support AsyncExecution";
266  throw BackendCapabilityException(er);
267  }
268  if (!HasMatchingCapability(BackendOptions::BackendOption{"ExternallyManagedMemory", true},
269  backend->GetCapabilities()))
270  {
271  std::string er = backend->GetId();
272  er += " does not support ExternallyManagedMemory\n";
273  er += "AsyncEnabled networks require all backends to support ExternallyManagedMemory";
274  throw BackendCapabilityException(er);
275  }
276  m_SupportsExternallyManagedMemory[backend->GetId()] = true;
277  useExternalMemoryManager = true;
278  }
279  else
280  {
281  m_SupportsExternallyManagedMemory[backend->GetId()] = false;
282  useInternalMemoryManager = true;
283  }
284 
286  if (backend->SupportsTensorAllocatorAPI())
287  {
288  workloadFactory = backend->CreateWorkloadFactory(
289  m_TensorHandleFactoryRegistry,
290  m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions(),
291  static_cast<MemorySourceFlags>(m_NetworkProperties.m_InputSource),
292  static_cast<MemorySourceFlags>(m_NetworkProperties.m_OutputSource));
293  }
294  else
295  {
296  m_BackendMemoryMangers.emplace_back(backend->CreateMemoryManager());
297  workloadFactory = backend->CreateWorkloadFactory(
298  m_BackendMemoryMangers.back(), m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions());
299  }
300  m_WorkloadFactories[backendId ] = std::move(workloadFactory);
301  }
302  }
303 
304  if (!networkProperties.m_AsyncEnabled)
305  {
306  for (auto&& layer : order)
307  {
308  auto& workloadFactory = GetWorkloadFactory(*layer);
309  bool supportsExternalManager = m_SupportsExternallyManagedMemory[layer->GetBackendId()];
310 
311  switch (layer->GetType())
312  {
313  case LayerType::Input:
315  {
316  // If IsImportEnabled is true then we need to set IsMemoryManaged
317  // to false when creating TensorHandles
318  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
319  workloadFactory,
320  !supportsExternalManager &&
321  (m_NetworkProperties.m_InputSource == MemorySource::Undefined));
322  break;
323  }
324  case LayerType::Constant:
325  {
326  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, true);
327  break;
328  }
329  default:
330  {
331  // Look for a layer with 1 OutputSlot which has 1 connection and that connection is an Output Layer
332  // If Export is enabled disable memory management so we can export, otherwise we do a copy
333  if ((layer->GetNumOutputSlots() == 1) &&
334  (layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
335  (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() == LayerType::Output))
336  {
337  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
338  workloadFactory,
339  !supportsExternalManager &&
340  (m_NetworkProperties.m_OutputSource == MemorySource::Undefined));
341  }
342  else
343  {
344  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
345  workloadFactory,
346  !supportsExternalManager);
347  }
348  }
349  }
350  }
351  }
352 
353  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
354  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
355  TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
356  if (timelineUtils)
357  {
358  timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
359  // Mark the network with a start of life event
360  timelineUtils->RecordEvent(networkGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
361  // and with the process ID
362  int processID = arm::pipe::GetCurrentProcessId();
363  std::stringstream ss;
364  ss << processID;
365  timelineUtils->MarkEntityWithLabel(networkGuid, ss.str(), LabelsAndEventClasses::PROCESS_ID_GUID);
366  }
367 
368  std::vector<IWorkload*> ConstWorkloads;
369 
370  //Then create workloads.
371  {
372  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_CreateWorkloads");
373  for (auto&& layer: order)
374  {
375  if (timelineUtils)
376  {
377  // Add layer to the post-optimisation network structure
378  AddLayerStructure(timelineUtils, *layer, networkGuid);
379  }
380 
381  const IWorkloadFactory& workloadFactory = GetWorkloadFactory(*layer);
382 
383  switch (layer->GetType())
384  {
385  case LayerType::Input:
386  case LayerType::Output:
387  {
388  // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
389  break;
390  }
391  default:
392  {
393  auto workload = layer->CreateWorkload(workloadFactory);
394 
395  if (!workload)
396  {
397  const char* const layerName =
398  layer->GetNameStr().length() != 0 ? layer->GetName() : "<Unnamed>";
399  throw InvalidArgumentException(
400  fmt::format("No workload created for layer (name: '{0}' type: '{1}') (compute '{2}')",
401  layerName, static_cast<int>(layer->GetType()), layer->GetBackendId().Get()
402  ));
403  }
404 
405  if (timelineUtils)
406  {
407  // Add workload to the post-optimisation network structure
408  AddWorkloadStructure(timelineUtils, workload, *layer);
409  }
410 
411  // For async networks ConstantWorkloads are managed exclusively by LoadedNetwork
412  // and are separated out from the other workloads
413  if((networkProperties.m_AsyncEnabled || useExternalMemoryManager) &&
414  layer->GetType() == LayerType::Constant)
415  {
416  m_ConstantTensorHandles[layer->GetGuid()] =
417  layer->GetOutputSlot(0).GetOutputHandler().GetData();
418  m_ConstantWorkloads[layer->GetGuid()] = std::move(workload);
419  }
420  else
421  {
422  m_WorkloadQueue.push_back(std::move(workload));
423 
424  if (layer->GetType() == LayerType::Constant)
425  {
426  // Place the Constant Workloads into a queue so that they can be executed first
427  ConstWorkloads.push_back(m_WorkloadQueue.back().get());
428  }
429  }
430  // release the constant data in the layer.
431  layer->ReleaseConstantData();
432  break;
433  }
434  }
435  }
436  }
437 
438  // Gather information about workloads for inputs & outputs
439  if (!networkProperties.m_AsyncEnabled && m_WorkloadQueue.size() != 0)
440  {
441  const int noOfInputs = armnn::numeric_cast<int>(order.GetNumInputs());
442 
443  // Get indices of all workloads connected to each input and
444  // check if they support tensor handle replacement
445  for (const BindableLayer* layer: order.GetInputLayers())
446  {
447  const auto bindingId = layer->GetBindingId();
448 
449  bool supportsReplacement = true;
450 
451  for (const auto inputSlot: layer->GetOutputSlot(0).GetConnections())
452  {
453  auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(inputSlot->GetOwningLayer()));
454  workloadIndex -= noOfInputs;
455 
456  m_InputWorkloadSlotPairs[bindingId].emplace_back(WorkloadIndices{
457  armnn::numeric_cast<unsigned int>(workloadIndex), inputSlot->GetSlotIndex()});
458 
459  // Avoid if input is connected directly to an output
460  if (inputSlot->GetOwningLayer().GetType() != LayerType::Output)
461  {
462  auto workload = m_WorkloadQueue[m_InputWorkloadSlotPairs[bindingId].back().m_WorkloadIndex].get();
463  supportsReplacement &= workload->SupportsTensorHandleReplacement();
464  }
465  }
466 
467  ITensorHandleFactory::FactoryId factoryId = layer->GetOutputSlot(0).GetTensorHandleFactoryId();
468  // Get matching import factory Id
469  ITensorHandleFactory::FactoryId importFactoryId =
470  m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId);
471 
472  ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.GetFactory(importFactoryId);
473 
474  if (supportsReplacement && importFactory)
475  {
476  m_PreImportedInputHandles.emplace_back(
477  bindingId, importFactory->CreateTensorHandle(layer->GetOutputSlot(0).GetTensorInfo(), false));
478  }
479  else
480  {
481  m_PreImportedInputHandles.emplace_back(bindingId, nullptr);
482  }
483  }
484 
485  // Get indices of all workloads connected to each output and
486  // check if they support tensor handle replacement
487  for (const BindableLayer* layer: order.GetOutputLayers())
488  {
489  const auto bindingId = layer->GetBindingId();
490 
491  const auto outputSlot = layer->GetInputSlot(0).GetConnectedOutputSlot();
492  auto& indices = m_OutputWorkloadSlotPairs[bindingId];
493 
494  // Avoid if output is connected directly to an input
495  if (outputSlot->GetOwningLayer().GetType() != LayerType::Input)
496  {
497  auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(outputSlot->GetOwningLayer()));
498  workloadIndex -= noOfInputs;
499 
500  indices.m_OutputSlotIndices = WorkloadIndices{numeric_cast<unsigned int>(workloadIndex),
501  outputSlot->CalculateIndexOnOwner()};
502 
503  bool supportsReplacement = true;
504  auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
505  supportsReplacement &= outputWorkload->SupportsTensorHandleReplacement();
506 
507  for (auto &inputSlot: outputSlot->GetConnections())
508  {
509  if (inputSlot->GetOwningLayer().GetType() != LayerType::Output)
510  {
511  auto inWorkloadIndex = std::distance(order.begin(),
512  order.GetPosInGraph(inputSlot->GetOwningLayer()));
513  inWorkloadIndex -= noOfInputs;
514  indices.m_InputSlotIndices.emplace_back(
515  WorkloadIndices{numeric_cast<unsigned int>(inWorkloadIndex),
516  inputSlot->GetSlotIndex()});
517  auto inputWorkload = m_WorkloadQueue[indices.m_InputSlotIndices.back().m_WorkloadIndex].get();
518  supportsReplacement &= inputWorkload->SupportsTensorHandleReplacement();
519  }
520  }
521 
522  ITensorHandleFactory::FactoryId factoryId = outputSlot->GetTensorHandleFactoryId();
523  // Get matching import factory Id
524  ITensorHandleFactory::FactoryId importFactoryId =
525  m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId);
526  ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.GetFactory(importFactoryId);
527 
528  if (supportsReplacement && importFactory)
529  {
530  m_PreImportedOutputHandles.emplace_back(
531  bindingId, importFactory->CreateTensorHandle(outputSlot->GetTensorInfo(), false));
532  }
533  else
534  {
535  m_PreImportedOutputHandles.emplace_back(bindingId, nullptr);
536  }
537  }
538  }
539  }
540 
541  for (auto&& workloadFactory : m_WorkloadFactories)
542  {
543  workloadFactory.second->AfterWorkloadsCreated();
544  }
545 
546  if (timelineUtils)
547  {
548  // Commit to send the post-optimisation network structure
549  timelineUtils->Commit();
550  }
551 
552  if (useExternalMemoryManager)
553  {
554  if (networkProperties.m_AsyncEnabled)
555  {
556  CreateMemoryProfileAsync();
557  }
558  else
559  {
560  CreateMemoryProfile();
561  }
562 
563  auto backendStrategyMap = BackendRegistryInstance().GetMemoryOptimizerStrategies();
564  for (auto& backendMemoryProfile : m_MemBlockMap)
565  {
566  const BackendId& backendId = backendMemoryProfile.first;
567  if (backendStrategyMap.find(backendId) != backendStrategyMap.end())
568  {
569  m_MemBinMap[backendId] = backendStrategyMap[backendId]->Optimize(backendMemoryProfile.second);
570  }
571  else
572  {
573  m_MemBinMap[backendId] = m_ConstantStrategy->Optimize(backendMemoryProfile.second);
574  }
575  }
576 
577  if (!networkProperties.m_AsyncEnabled)
578  {
579  m_ExternalMemoryManager = CreateExternalMemoryManger(m_TensorMemory);
580 
581  // Sort m_TensorMemory, so it's order matches m_Tensorhandles
582  std::sort(m_TensorMemory.begin(), m_TensorMemory.end(),
583  [](const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& lhs,
584  const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& rhs)
585  {
586  return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
587  });
588  }
589  }
590 
591  // Now that the intermediate tensor memory has been set-up,
592  // do any post allocation configuration for each workload.
593  if (!networkProperties.m_AsyncEnabled)
594  {
595  if (useInternalMemoryManager)
596  {
597  // Set up memory.
598  m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().AllocateDynamicBuffers();
599  }
600 
601  for (auto &workload : m_WorkloadQueue)
602  {
603  workload->PostAllocationConfigure();
604  }
605  }
606 
607  if (useExternalMemoryManager)
608  {
609  if (!networkProperties.m_AsyncEnabled)
610  {
611  AllocateAndExecuteConstantWorkloads();
612  }
613  else
614  {
615  AllocateAndExecuteConstantWorkloadsAsync();
616  }
617  }
618  // If synchronous, execute all constant layer workloads
619  if (!networkProperties.m_AsyncEnabled)
620  {
621  for (auto workload: ConstWorkloads)
622  {
623  workload->Execute();
624  }
625  }
626 }
627 
628 void LoadedNetwork::AllocateAndExecuteConstantWorkloads()
629 {
630  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_AllocateAndExecuteConstants");
631  for (auto& pair : m_ConstantWorkloads)
632  {
633  auto tensorHandle = m_ConstantTensorHandles[pair.first];
634  tensorHandle->Allocate();
635  pair.second->Execute();
636  }
637 }
638 
639 void LoadedNetwork::AllocateAndExecuteConstantWorkloadsAsync()
640 {
641  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_AllocateAndExecuteConstants");
642  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
643  for (auto&& layer : order)
644  {
645  if (layer->GetType() == LayerType::Constant)
646  {
647  const auto& outSlot = layer->GetOutputSlots()[0];
648  const auto factoryId = outSlot.GetTensorHandleFactoryId();
649  if (factoryId == ITensorHandleFactory::LegacyFactoryId)
650  {
651  throw armnn::Exception("factoryId must not be of type \"Legacy\".");
652  }
653  auto& workloadFactory = GetWorkloadFactory(*layer);
654 
655  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory);
656  ITensorHandle* tensorHandle = outSlot.GetOutputHandler().GetData();
657 
658  m_ConstantTensorHandles[layer->GetGuid()] = tensorHandle;
659  tensorHandle->Allocate();
660 
661  auto& backend = m_Backends.at(layer->GetBackendId());
662 
663  WorkingMemDescriptor memDesc;
664  memDesc.m_Outputs.push_back(tensorHandle);
665 
666  ExecutionData executionData = backend->CreateExecutionData(memDesc);
667  m_ConstantWorkloads[layer->GetGuid()]->ExecuteAsync(executionData);
668  }
669  }
670 }
671 
672 void LoadedNetwork::SendNetworkStructure(arm::pipe::IProfilingService& profilingService)
673 {
674  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_SendNetworkStructure");
675  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
676  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
677 
678  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
679  TimelineUtilityMethods::GetTimelineUtils(profilingService);
680 
681  timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
682 
683  for (auto&& layer : order)
684  {
685  // Add layer to the post-optimisation network structure
686  AddLayerStructure(timelineUtils, *layer, networkGuid);
687  switch (layer->GetType())
688  {
689  case LayerType::Input:
690  case LayerType::Output:
691  {
692  // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
693  break;
694  }
695  default:
696  {
697  for (auto& workload : m_WorkloadQueue)
698  {
699  // Add workload to the post-optimisation network structure
700  AddWorkloadStructure(timelineUtils, workload, *layer);
701  }
702  break;
703  }
704  }
705  }
706  // Commit to send the post-optimisation network structure
707  timelineUtils->Commit();
708 }
709 
711 {
712  return m_OptimizedNetwork->GetGuid();
713 }
714 
716 {
717  for (auto&& inputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetInputLayers())
718  {
719  if (inputLayer->GetNumOutputSlots() != 1)
720  {
721  throw armnn::GraphValidationException("Input layer should have exactly 1 output slot");
722  }
723 
724  if (inputLayer->GetBindingId() == layerId)
725  {
726  return inputLayer->GetOutputSlot(0).GetTensorInfo();
727  }
728  }
729 
730  throw InvalidArgumentException(fmt::format("No input layer is associated with id {}", layerId));
731 }
732 
734 {
735  for (auto&& outputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetOutputLayers())
736  {
737  if (outputLayer->GetNumInputSlots() != 1)
738  {
739  throw armnn::GraphValidationException("Output layer should have exactly 1 input slot");
740  }
741 
742  if (!outputLayer->GetInputSlot(0).GetConnection())
743  {
744  throw armnn::GraphValidationException("Input slot on Output layer must be connected");
745  }
746 
747  if (outputLayer->GetBindingId() == layerId)
748  {
749  return outputLayer->GetInputSlot(0).GetTensorInfo();
750  }
751  }
752 
753  throw InvalidArgumentException(fmt::format("No output layer is associated with id {}", layerId));
754 }
755 
756 const IWorkloadFactory& LoadedNetwork::GetWorkloadFactory(const Layer& layer) const
757 {
758  const IWorkloadFactory* workloadFactory = nullptr;
759 
760  auto it = m_WorkloadFactories.find(layer.GetBackendId());
761  if (it == m_WorkloadFactories.end())
762  {
763  throw RuntimeException(fmt::format("No workload factory for {0} to be used for layer: {1}",
764  layer.GetBackendId().Get(),
765  layer.GetNameStr()),
766  CHECK_LOCATION());
767  }
768 
769  workloadFactory = it->second.get();
770 
771  if (!workloadFactory)
772  {
773  throw armnn::NullPointerException("No workload factory");
774  }
775 
776  return *workloadFactory;
777 }
778 
779 namespace {
780 
781 // Non-copyable class owning accelerator-specific tensor data.
782 class TensorPin
783 {
784 public:
785  TensorPin(std::unique_ptr<ITensorHandle> handle, const TensorInfo& info, LayerBindingId id)
786  : m_TensorHandle(std::move(handle))
787  , m_TensorInfo(info)
788  , m_Id(id)
789  {
790  }
791 
792  ITensorHandle* GetTensorHandle() const { return m_TensorHandle.get(); }
793  const TensorInfo& GetTensorInfo() const { return m_TensorInfo; }
794  LayerBindingId GetBindingId() const { return m_Id; }
795 
796 private:
797  std::unique_ptr<ITensorHandle> m_TensorHandle;
798  TensorInfo m_TensorInfo;
799  LayerBindingId m_Id;
800 };
801 
802 static const TensorPin& GetTensorPin(LayerBindingId id,
803  const std::vector<TensorPin>& pins,
804  char const* bindingPointDesc)
805 {
806  auto it = std::find_if(pins.begin(), pins.end(),
807  [id](const TensorPin& pin)
808  {
809  return pin.GetBindingId() == id;
810  });
811 
812  if (it != pins.end())
813  {
814  return *it;
815  }
816  else
817  {
818  throw InvalidArgumentException(fmt::format("No tensor supplied for {0} {1}", bindingPointDesc, id));
819  }
820 }
821 
822 // Stores data that needs to be kept accessible for the entire execution of a workload.
823 class WorkloadData
824 {
825 public:
826  WorkloadData(const InputTensors& inputTensors, const OutputTensors& outputTensors)
827  {
828  m_InputTensorPins.reserve(inputTensors.size());
829  m_OutputTensorPins.reserve(outputTensors.size());
830 
831  for (auto inputTensorPair : inputTensors)
832  {
833  auto inputTensor = inputTensorPair.second;
834 
835  std::unique_ptr<ITensorHandle> tensorHandle =
836  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),inputTensor.GetMemoryArea());
837  LayerBindingId layerId = inputTensorPair.first;
838 
839  m_InputTensorPins.emplace_back(std::move(tensorHandle), inputTensor.GetInfo(), layerId);
840  }
841 
842  for (auto outputTensorPair : outputTensors)
843  {
844  auto outputTensor = outputTensorPair.second;
845 
846  std::unique_ptr<ITensorHandle> tensorHandle =
847  std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
848  LayerBindingId layerId = outputTensorPair.first;
849 
850  m_OutputTensorPins.emplace_back(std::move(tensorHandle), outputTensor.GetInfo(), layerId);
851  }
852  }
853 
854  const TensorPin& GetInputTensorPin(LayerBindingId id) const
855  {
856  return GetTensorPin(id, m_InputTensorPins, "input");
857  }
858 
859  const TensorPin& GetOutputTensorPin(LayerBindingId id) const
860  {
861  return GetTensorPin(id, m_OutputTensorPins, "output");
862  }
863 
864 private:
865 
866  std::vector<TensorPin> m_InputTensorPins;
867  std::vector<TensorPin> m_OutputTensorPins;
868 };
869 
870 }
871 
873  const OutputTensors& outputTensors,
874  std::vector<ImportedInputId> preImportedInputIds,
875  std::vector<ImportedOutputId> preImportedOutputIds)
876 {
877  const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
878 
879  // Walk graph to determine the order of execution.
880  if (graph.GetNumLayers() < 2)
881  {
882  ARMNN_LOG(warning) << "IRuntime::EnqueueWorkload()::Less than two nodes in graph";
883  return Status::Failure;
884  }
885 
886  // Data that must be kept alive for the entire execution of the workload.
887  WorkloadData workloadData(inputTensors, outputTensors);
888 
889  // Input tensors can be provided as parameters or pre imported. Either way the number of
890  // tensors should match the number of inputs.
891  if (graph.GetNumInputs() != (inputTensors.size() + preImportedInputIds.size()))
892  {
893  throw InvalidArgumentException("Number of inputs provided does not match network.");
894  }
895 
896  // For each input to the network, call EnqueueInput with the data passed by the user.
897  {
899  m_InputQueue.clear();
900  m_InputQueue.reserve(graph.GetNumInputs());
901 
902  unsigned int inputIndex = 0;
903  unsigned int importedInputIdIndex = 0;
904  std::sort(preImportedInputIds.begin(), preImportedInputIds.end());
905  for (const BindableLayer* inputLayer : graph.GetInputLayers())
906  {
907  if (importedInputIdIndex < preImportedInputIds.size() &&
908  inputIndex == preImportedInputIds[importedInputIdIndex])
909  {
910  // Only replace tensorhandles if they have not already been replaced
911  if (!m_IsInputImported[inputIndex])
912  {
913  auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
914 
915  for (const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
916  {
917  auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
918  workload->ReplaceInputTensorHandle(outputTensorHandle, workloadInfo.m_SlotIndex);
919  }
920  m_IsInputImported[inputIndex] = true;
921  }
922  importedInputIdIndex++;
923  }
924  else
925  {
926  if (m_IsInputImported[inputIndex])
927  {
928  OutputHandler& handler = const_cast<OutputHandler&>(inputLayer->GetOutputHandler(0));
929 
930  for (const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
931  {
932  auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
933  workload->ReplaceInputTensorHandle(handler.GetData(), workloadInfo.m_SlotIndex);
934  }
935 
936  m_IsInputImported[inputIndex] = false;
937  }
938 
939  // InputTensorHandle is not imported yet, process to enqueue input
940  const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
941  EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
942  }
943  inputIndex++;
944  }
945  }
946  // For each output to the network, call EnqueueOutput with the data passed by the user.
947  {
949  m_OutputQueue.clear();
950  m_OutputQueue.reserve(graph.GetNumOutputs());
951 
952  if (preImportedOutputIds.size() > graph.GetNumOutputs())
953  {
954  throw InvalidArgumentException("Invalid number of preImportedOutputIds");
955  }
956 
957  unsigned int outputIndex = 0;
958  unsigned int importedOutputIdIndex = 0;
959  std::sort(preImportedOutputIds.begin(), preImportedOutputIds.end());
960  for (const BindableLayer* outputLayer : graph.GetOutputLayers())
961  {
962  if (importedOutputIdIndex < preImportedOutputIds.size() &&
963  outputIndex == preImportedOutputIds[importedOutputIdIndex])
964  {
965  // Only replace tensorhandles if they have not already been replaced
966  ITensorHandle* inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
967 
968  if (!m_IsOutputImported[outputIndex])
969  {
970  const auto bindingId = outputLayer->GetBindingId();
971  const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
972 
973  auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
974 
975  outputWorkload->ReplaceOutputTensorHandle(inputTensorHandle,
976  indices.m_OutputSlotIndices.m_SlotIndex);
977 
978  for (const auto& workloadInfo: indices.m_InputSlotIndices)
979  {
980  auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
981  inputWorkload->ReplaceInputTensorHandle(inputTensorHandle, workloadInfo.m_SlotIndex);
982  }
983  m_IsOutputImported[outputIndex] = true;
984  }
985 
986  if (!inputTensorHandle)
987  {
988  throw armnn::NullPointerException("Data should have been allocated.");
989  }
990 
991  MemSyncQueueDescriptor syncDesc;
992  syncDesc.m_Inputs.push_back(inputTensorHandle);
994  info.m_InputTensorInfos.push_back(outputLayer->GetInputSlot(0).GetTensorInfo());
995 
996  auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
997  if (!syncWorkload)
998  {
999  throw armnn::NullPointerException("No sync workload created");
1000  }
1001 
1002  m_OutputQueue.push_back(std::move(syncWorkload));
1003  importedOutputIdIndex++;
1004  }
1005  else
1006  {
1007  if (m_IsOutputImported[outputIndex])
1008  {
1009  const auto bindingId = outputLayer->GetBindingId();
1010  const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
1011 
1012  auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
1013  const OutputHandler& outputHandler =
1014  outputLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOutputHandler();
1015 
1016  outputWorkload->ReplaceOutputTensorHandle(
1017  outputHandler.GetData(), indices.m_OutputSlotIndices.m_SlotIndex);
1018 
1019  for (const auto& workloadInfo: indices.m_InputSlotIndices)
1020  {
1021  auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
1022  inputWorkload->ReplaceInputTensorHandle(outputHandler.GetData(), workloadInfo.m_SlotIndex);
1023  }
1024  m_IsOutputImported[outputIndex] = false;
1025  }
1026 
1027  const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
1028  // OutputTensorHandle is not imported yet, process to enqueue Output
1029  EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
1030  }
1031  outputIndex++;
1032  }
1033  }
1034 
1035  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1036  TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1037  ProfilingGuid inferenceGuid = m_ProfilingService->GetNextGuid();
1038  if (timelineUtils)
1039  {
1040  // Add inference timeline trace if profiling is enabled.
1041  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
1042  timelineUtils->CreateTypedEntity(inferenceGuid, LabelsAndEventClasses::INFERENCE_GUID);
1043  timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
1044  networkGuid,
1045  inferenceGuid,
1046  LabelsAndEventClasses::EXECUTION_OF_GUID);
1047  timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
1048  }
1049 
1050  bool executionSucceeded = true;
1051 
1052  {
1053  if (m_ProfilingService->IsProfilingEnabled())
1054  {
1055  m_ProfilingService->IncrementCounterValue(INFERENCES_RUN);
1056  }
1058  ARMNN_SCOPED_HEAP_PROFILING("Executing");
1059  executionSucceeded = Execute(timelineUtils, inferenceGuid);
1060  }
1061 
1062  if (timelineUtils)
1063  {
1064  // Add end of life of the inference timeline if profiling is enabled.
1065  timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
1066  timelineUtils->Commit();
1067  }
1068 
1069  return executionSucceeded ? Status::Success : Status::Failure;
1070 }
1071 
1072 void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
1073 {
1074  if (layer.GetType() != LayerType::Input)
1075  {
1076  throw InvalidArgumentException("EnqueueInput: given layer not an InputLayer");
1077  }
1078 
1079  if (tensorHandle == nullptr)
1080  {
1081  throw InvalidArgumentException("EnqueueInput: tensorHandle must not be NULL");
1082  }
1083 
1084  InputQueueDescriptor inputQueueDescriptor;
1085  WorkloadInfo info;
1086 
1087  inputQueueDescriptor.m_Inputs.push_back(tensorHandle);
1088  info.m_InputTensorInfos.push_back(tensorInfo);
1089 
1090  if (layer.GetNumOutputSlots() != 1)
1091  {
1092  throw armnn::GraphValidationException("Can only handle Input Layer with one output");
1093  }
1094 
1095  const OutputHandler& handler = layer.GetOutputHandler();
1096  const TensorInfo& outputTensorInfo = handler.GetTensorInfo();
1097  ITensorHandle* outputTensorHandle = handler.GetData();
1098 
1099  if (!outputTensorHandle)
1100  {
1101  throw armnn::NullPointerException("Data should have been allocated.");
1102  }
1103 
1104  inputQueueDescriptor.m_Outputs.push_back(outputTensorHandle);
1105  info.m_OutputTensorInfos.push_back(outputTensorInfo);
1106 
1107  MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
1108  bool needMemCopy = true;
1109  if ((m_NetworkProperties.m_InputSource != MemorySource::Undefined)) // Try import the input tensor
1110  {
1111  if(CheckFlag(importFlags, m_NetworkProperties.m_InputSource))
1112  {
1113  needMemCopy = false;
1114  // This assumes a CPU Tensor handle
1115  void* mem = tensorHandle->Map(false);
1116  if (outputTensorHandle->Import(mem, m_NetworkProperties.m_InputSource))
1117  {
1118  tensorHandle->Unmap();
1119  return; // No need for a workload since the import has been done.
1120  }
1121  tensorHandle->Unmap();
1122  throw MemoryImportException("EnqueueInput: Memory Import failed");
1123  }
1124  }
1125  if (needMemCopy)
1126  {
1127  // Create a mem copy workload for input since we did not import
1128  std::unique_ptr<IWorkload> inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor, info);
1129 
1130  if (!inputWorkload)
1131  {
1132  throw armnn::NullPointerException("No input workload created");
1133  }
1134 
1135  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1136  TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1137  if (timelineUtils)
1138  {
1139  // Add Input Workload to the post-optimisation network structure
1140  AddWorkloadStructure(timelineUtils, inputWorkload, layer);
1141  timelineUtils->Commit();
1142  }
1143 
1144  m_InputQueue.push_back(std::move(inputWorkload));
1145  }
1146 }
1147 
1148 void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
1149 {
1150  if (layer.GetType() != LayerType::Output)
1151  {
1152  throw InvalidArgumentException("EnqueueOutput: given layer not an OutputLayer");
1153  }
1154 
1155  if (tensorHandle == nullptr)
1156  {
1157  throw InvalidArgumentException("EnqueueOutput: tensorHandle must not be NULL");
1158  }
1159 
1160  OutputQueueDescriptor outputQueueDescriptor;
1161  WorkloadInfo info;
1162 
1163  outputQueueDescriptor.m_Outputs.push_back(tensorHandle);
1164  info.m_OutputTensorInfos.push_back(tensorInfo);
1165 
1166  if (layer.GetNumInputSlots() != 1)
1167  {
1168  throw armnn::GraphValidationException("Output Layer should have exactly one input.");
1169  }
1170 
1171  // Gets the output handler from the previous node.
1172  const OutputHandler& outputHandler = layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler();
1173 
1174  const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo();
1175  ITensorHandle* inputTensorHandle = outputHandler.GetData();
1176  if (!inputTensorHandle)
1177  {
1178  throw armnn::NullPointerException("Data should have been allocated.");
1179  }
1180 
1181  // Try import the output tensor.
1182  // Note: We can only import the output pointer if all of the following hold true:
1183  // a) The imported pointer is aligned sufficiently
1184  // b) The tensor has zero padding
1185  // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
1186  // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
1187  // e) m_NetworkProperties.m_OutputSource != MemorySource::Undefined
1188  bool needMemCopy = true;
1189  if (m_NetworkProperties.m_OutputSource != MemorySource::Undefined &&
1190  (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
1191  {
1192  if(layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input)
1193  {
1194  MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
1195  if (CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
1196  {
1197  needMemCopy = false;
1198  void *mem = tensorHandle->Map(false);
1199  bool importOk = inputTensorHandle->Import(mem, m_NetworkProperties.m_OutputSource);
1200  tensorHandle->Unmap();
1201 
1202  if (importOk)
1203  {
1204  // Insert synchronization workload
1205  MemSyncQueueDescriptor syncDesc;
1206  syncDesc.m_Inputs.push_back(inputTensorHandle);
1207  info.m_InputTensorInfos.push_back(inputTensorInfo);
1208  auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
1209  if (!syncWorkload)
1210  {
1211  throw armnn::NullPointerException("No sync workload created");
1212  }
1213  m_OutputQueue.push_back(std::move(syncWorkload));
1214  }
1215  else
1216  {
1217  throw MemoryExportException("EnqueueOutput: Memory Export failed");
1218  }
1219  }
1220  }
1221  }
1222  if (needMemCopy)
1223  {
1224  // If we got here then we didn't export the memory, so add an output workload which performs a memcopy.
1225  outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
1226  info.m_InputTensorInfos.push_back(inputTensorInfo);
1227 
1228  std::unique_ptr<IWorkload> outputWorkload =
1229  std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor, info);
1230  if (!outputWorkload)
1231  {
1232  throw armnn::NullPointerException("No output workload created");
1233  }
1234 
1235  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1236  TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1237  if (timelineUtils)
1238  {
1239  // Add Output Workload to the post-optimisation network structure
1240  AddWorkloadStructure(timelineUtils, outputWorkload, layer);
1241  timelineUtils->Commit();
1242  }
1243 
1244  m_OutputQueue.push_back(std::move(outputWorkload));
1245  }
1246 }
1247 
1248 void LoadedNetwork::AllocateWorkingMemory(
1249 #if !defined(ARMNN_DISABLE_THREADS)
1250  std::lock_guard<std::mutex>& lock
1251 #endif
1252  )
1253 {
1254  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Working Memory Allocation");
1255 
1256 #if !defined(ARMNN_DISABLE_THREADS)
1257  // this unused parameter makes sure we can only call this function with a valid lock
1258  IgnoreUnused(lock);
1259 #endif
1260  if (m_IsWorkingMemAllocated)
1261  {
1262  return;
1263  }
1264 
1265  if (m_ExternalMemoryManager)
1266  {
1267  m_ExternalMemoryManager->Allocate();
1268 
1269  for (unsigned int i = 0; i < m_TensorMemory.size(); ++i)
1270  {
1271  m_Tensorhandles[i]->Import(m_TensorMemory[i].first->m_Data, m_TensorMemory[i].second);
1272  }
1273  }
1274 
1275  for (auto&& memoryManager : m_BackendMemoryMangers)
1276  {
1277  if (memoryManager)
1278  {
1279  memoryManager->Acquire();
1280  }
1281  }
1282  m_TensorHandleFactoryRegistry.AquireMemory();
1283  m_IsWorkingMemAllocated = true;
1284 }
1285 
1287 {
1288 #if !defined(ARMNN_DISABLE_THREADS)
1289  std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1290 #endif
1291 
1292  if (!m_IsWorkingMemAllocated)
1293  {
1294  return;
1295  }
1296 
1297  if (m_ExternalMemoryManager)
1298  {
1299  m_ExternalMemoryManager->Deallocate();
1300  }
1301 
1302  // Informs the memory managers to release memory in its respective memory group
1303  for (auto&& memoryManager : m_BackendMemoryMangers)
1304  {
1305  if (memoryManager)
1306  {
1307  memoryManager->Release();
1308  }
1309  }
1310  m_TensorHandleFactoryRegistry.ReleaseMemory();
1311  m_IsWorkingMemAllocated = false;
1312 }
1313 
1314 bool LoadedNetwork::Execute(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
1315  ProfilingGuid inferenceGuid)
1316 {
1317  bool success = true;
1318 
1319  auto Fail = [&](const std::exception& error)
1320  {
1321  ARMNN_LOG(error) << "An error occurred attempting to execute a workload: " << error.what();
1322  success = false;
1323  };
1324 
1325  try
1326  {
1327 #if !defined(ARMNN_DISABLE_THREADS)
1328  std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1329  AllocateWorkingMemory(lockGuard);
1330 #else
1331  AllocateWorkingMemory();
1332 #endif
1333 
1334  ProfilingDynamicGuid workloadInferenceID(0);
1335  auto ExecuteQueue = [&timelineUtils, &workloadInferenceID, &inferenceGuid](WorkloadQueue& queue)
1336  {
1337  for (auto& workload : queue)
1338  {
1339  if(timelineUtils)
1340  {
1341  workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1342  inferenceGuid);
1343  }
1344  workload->Execute();
1345  if(timelineUtils)
1346  {
1347  timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1348  }
1349  }
1350  };
1351 
1352  ExecuteQueue(m_InputQueue);
1353  ExecuteQueue(m_WorkloadQueue);
1354  ExecuteQueue(m_OutputQueue);
1355  }
1356  catch (const RuntimeException& error)
1357  {
1358  Fail(error);
1359  }
1360  catch (const std::runtime_error& error)
1361  {
1362  Fail(error);
1363  }
1364 
1365  return success;
1366 }
1367 
1368 void LoadedNetwork::EnqueueInput(const ConstTensor& inputTensor, ITensorHandle* inputTensorHandle)
1369 {
1370  if (m_NetworkProperties.m_InputSource != MemorySource::Undefined) // Try import the input tensor
1371  {
1372  MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
1373  if (CheckFlag(importFlags, m_NetworkProperties.m_InputSource) )
1374  {
1375  std::unique_ptr<ITensorHandle> tensorHandle =
1376  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),
1377  inputTensor.GetMemoryArea());
1378  void* mem = tensorHandle->Map(false);
1379 
1380  if (inputTensorHandle->Import(mem, m_NetworkProperties.m_InputSource))
1381  {
1382  tensorHandle->Unmap();
1383  return;
1384  }
1385  tensorHandle->Unmap();
1386  throw MemoryImportException("EnqueueInput: Memory Import failed");
1387  }
1388  else
1389  {
1390  throw MemoryImportException("EnqueueInput: Memory Import failed, backend does not support Import");
1391  }
1392  }
1393  else
1394  {
1396  std::unique_ptr<ITensorHandle> tensorHandle =
1397  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(), inputTensor.GetMemoryArea());
1398 
1399  auto copyFunc = [](void* dst, const void* src, size_t size)
1400  {
1401  memcpy(dst, src, size);
1402  };
1403 
1404  CopyTensorContentsGeneric(tensorHandle.get(), inputTensorHandle, copyFunc);
1405  }
1406 }
1407 
1408 // Note: We can only import the output pointer if all of the following hold true:
1409 // a) The imported pointer is aligned sufficiently
1410 // b) The tensor has zero padding
1411 // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
1412 // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
1413 // e) m_IsExportEnabled must be set to true
1414 void LoadedNetwork::ImportOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle)
1415 {
1416  if (!outputTensorHandle)
1417  {
1418  throw armnn::NullPointerException("Data should have been allocated.");
1419  }
1420 
1421  MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
1422  if (CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
1423  {
1424  std::unique_ptr<ITensorHandle> tensorHandle =
1425  std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
1426  outputTensor.GetMemoryArea());
1427 
1428  void* mem = tensorHandle->Map(false);
1429  bool importOk = outputTensorHandle->Import(mem, m_NetworkProperties.m_OutputSource);
1430  tensorHandle->Unmap();
1431 
1432  if (!importOk)
1433  {
1434  throw MemoryExportException("ImportOutputTensor: Memory Export failed");
1435  }
1436  }
1437  else
1438  {
1439  throw MemoryExportException("ImportOutputTensor: Memory Export failed, attempting to export Input Layer");
1440  }
1441 
1442 }
1443 
1444 void CopyToOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle)
1445 {
1447  auto copyFunc = [](void* dst, const void* src, size_t size)
1448  {
1449  memcpy(dst, src, size);
1450  };
1451 
1452  std::unique_ptr<ITensorHandle> tensorHandle =
1453  std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
1454  outputTensor.GetMemoryArea());
1455 
1456  CopyTensorContentsGeneric(outputTensorHandle, tensorHandle.get(), copyFunc);
1457 }
1458 
1459 
1460 const armnn::ConstTensor GetInputTensor(const LayerBindingId layerId, const InputTensors& inputTensors)
1461 {
1462  for (auto inputTensorPair : inputTensors)
1463  {
1464  LayerBindingId id = inputTensorPair.first;
1465  if (id == layerId)
1466  {
1467  return inputTensorPair.second;
1468  }
1469  }
1470  throw InvalidArgumentException("Input does not exist.");
1471 }
1472 
1473 const armnn::Tensor GetOutputTensor(const LayerBindingId layerId, const OutputTensors& outputTensors)
1474 {
1475  for (auto outputTensorPair : outputTensors)
1476  {
1477  LayerBindingId id = outputTensorPair.first;
1478  if (id == layerId)
1479  {
1480  return outputTensorPair.second;
1481  }
1482  }
1483  throw InvalidArgumentException("Output does not exist.");
1484 }
1485 
1486 std::vector<ImportedInputId> LoadedNetwork::ImportInputs(const InputTensors& inputTensors,
1487  MemorySource forceImportMemorySource)
1488 {
1489  if (!m_NetworkProperties.m_AsyncEnabled)
1490  {
1491  // Cannot import if import is not enabled and forceImportMemorySource is undefined
1492  if (forceImportMemorySource == MemorySource::Undefined)
1493  {
1494  throw MemoryImportException("ImportInputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1495  }
1496  // The number of pre imported tensors should not exceed the number of inputs.
1497  if (inputTensors.size() > m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumInputs())
1498  {
1499  throw MemoryImportException("ImportInputs: The number of tensors provided exceeds the number of inputs.");
1500  }
1501 
1502  std::vector<ImportedInputId> importedInputs;
1503  Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1504  unsigned int inputIndex = 0;
1505  for (const BindableLayer* inputLayer : graph.GetInputLayers())
1506  {
1507  auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
1508 
1509  if (!outputTensorHandle)
1510  {
1511  inputIndex++;
1512  continue;
1513  }
1514 
1515  auto layerBindingId = inputLayer->GetBindingId();
1516  auto it = std::find_if(inputTensors.begin(), inputTensors.end(), [=](const auto& inputTensor)
1517  {
1518  return inputTensor.first == layerBindingId;
1519  });
1520 
1521  if (it == inputTensors.end())
1522  {
1523  inputIndex++;
1524  continue;
1525  }
1526 
1527  const auto& inputTensor = *it;
1528  std::unique_ptr<ITensorHandle> passThroughTensorHandle =
1529  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.GetInfo(),
1530  inputTensor.second.GetMemoryArea());
1531 
1532  try
1533  {
1534  if (outputTensorHandle->CanBeImported(passThroughTensorHandle->Map(), forceImportMemorySource)
1535  && (outputTensorHandle->Import(passThroughTensorHandle->Map(), forceImportMemorySource)))
1536  {
1537  importedInputs.push_back(inputIndex);
1538  }
1539  passThroughTensorHandle->Unmap();
1540  }
1541  catch(const MemoryImportException& exception)
1542  {
1543  ARMNN_LOG(error) << "An error occurred attempting to import input_"
1544  << inputIndex << " : " << exception.what();
1545  passThroughTensorHandle->Unmap();
1546  }
1547  inputIndex++;
1548  }
1549 
1550  return importedInputs;
1551  }
1552  else
1553  {
1554  // Import when the import of network properties is enabled
1555  std::vector<ImportedInputId> importedInputs;
1556  Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1557 
1558  for (auto inputTensor : inputTensors)
1559  {
1560  auto layerBindingId = inputTensor.first;
1561  auto it = std::find_if(graph.GetInputLayers().begin(), graph.GetInputLayers().end(), [=](auto* layer)
1562  {
1563  return layer->GetBindingId() == layerBindingId;
1564  });
1565 
1566  if (it == graph.GetInputLayers().end())
1567  {
1568  throw MemoryImportException(fmt::format(
1569  "ImportInputs: Memory Import failed, unknown LayerBindingId: {}", layerBindingId));
1570  }
1571 
1572  const Layer* layer = *it;
1573  if (layer->GetType() != LayerType::Input)
1574  {
1575  throw InvalidArgumentException("ImportInputs: given layer not an InputLayer");
1576  }
1577 
1578  auto& backend = m_Backends.at(layer->GetBackendId());
1579  if (!HasMatchingCapability(BackendOptions::BackendOption{"PreImportIOTensors", true},
1580  backend->GetCapabilities()))
1581  {
1582  std::string er = backend->GetId();
1583  er += " does not have PreImportIOTensors capability";
1584  throw BackendCapabilityException(er);
1585  }
1586 
1587  const OutputSlot& outputSlot = layer->GetOutputSlots()[0];
1588 
1590  const TensorInfo& tensorInfo = outputSlot.GetTensorInfo();
1591 
1592  ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId);
1593  if (!handleFactory)
1594  {
1595  throw armnn::NullPointerException("handleFactory must not be null.");
1596  }
1597 
1598  ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
1599  handleFactory->CreateTensorHandle(tensorInfo, false)};
1600 
1601  ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();
1602 
1603  if (!CheckFlag(tensorHandle->GetImportFlags(), forceImportMemorySource))
1604  {
1605  throw MemoryImportException(
1606  fmt::format("ImportInputs: Memory Import failed, backend: "
1607  "{} does not support importing from source {}"
1608  , factoryId, m_NetworkProperties.m_InputSource));
1609  }
1610 
1611  std::unique_ptr<ITensorHandle> passThroughTensorHandle =
1612  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.GetInfo(),
1613  inputTensor.second.GetMemoryArea());
1614 
1615  if (tensorHandle->Import(passThroughTensorHandle->Map(), forceImportMemorySource))
1616  {
1617  importedInputs.push_back(m_CurImportedInputId++);
1618  passThroughTensorHandle->Unmap();
1619  }
1620  else
1621  {
1622  passThroughTensorHandle->Unmap();
1623  throw MemoryImportException("ImportInputs: Memory Import failed");
1624  }
1625 
1626  m_PreImportedInputHandles.push_back(std::move(importedTensorHandlePin));
1627  }
1628  return importedInputs;
1629  }
1630 }
1631 
1632 std::vector<ImportedOutputId> LoadedNetwork::ImportOutputs(const OutputTensors& outputTensors,
1633  MemorySource forceImportMemorySource)
1634 {
1635  if (!m_NetworkProperties.m_AsyncEnabled)
1636  {
1637  // Cannot import if import is not enabled and forceImportMemorySource is undefined
1638  if (forceImportMemorySource == MemorySource::Undefined)
1639  {
1640  throw MemoryImportException("ImportOutputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1641  }
1642  // If forceImportMemorySource is defined, try import if memory is aligned
1643  if (outputTensors.size() != m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumOutputs())
1644  {
1645  throw MemoryImportException("ImportOutputs: Force Import failed, incorrect number of tensors");
1646  }
1647  std::vector<ImportedOutputId> importedOutputs;
1648  Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1649 
1650  unsigned int outputIndex = 0;
1651  for (const BindableLayer* const outputLayer : graph.GetOutputLayers())
1652  {
1653  auto inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
1654  if (!inputTensorHandle)
1655  {
1656  outputIndex++;
1657  continue;
1658  }
1659 
1660  auto layerBindingId = outputLayer->GetBindingId();
1661  auto it = std::find_if(outputTensors.begin(), outputTensors.end(), [=] (const auto& outputTensor)
1662  {
1663  return outputTensor.first == layerBindingId;
1664  });
1665 
1666  if (it == outputTensors.end())
1667  {
1668  outputIndex++;
1669  continue;
1670  }
1671 
1672  const auto outputTensor = *it;
1673  try
1674  {
1675  // Check if the output memory can be imported
1676  if (inputTensorHandle->CanBeImported(outputTensor.second.GetMemoryArea(), forceImportMemorySource)
1677  && inputTensorHandle->Import(outputTensor.second.GetMemoryArea(), forceImportMemorySource))
1678  {
1679  importedOutputs.push_back(outputIndex);
1680  }
1681  }
1682  catch(const MemoryImportException& exception)
1683  {
1684  ARMNN_LOG(error) << "An error occurred attempting to import output_"
1685  << outputIndex << " : " << exception.what();
1686  }
1687  outputIndex++;
1688  }
1689  return importedOutputs;
1690  }
1691 
1692  std::vector<ImportedOutputId> importedOutputs;
1693  Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1694 
1695  for (const auto& outputTensor : outputTensors)
1696  {
1697  auto layerBindingId = outputTensor.first;
1698  auto it = std::find_if(graph.GetOutputLayers().begin(), graph.GetOutputLayers().end(), [=](auto* layer)
1699  {
1700  return layer->GetBindingId() == layerBindingId;
1701  });
1702 
1703  if (it == graph.GetOutputLayers().end())
1704  {
1705  throw MemoryImportException(fmt::format("ImportOutputs: Memory Import failed, unknown LayerBindingId: {}",
1706  layerBindingId));
1707  }
1708 
1709  const Layer* layer = *it;
1710  if (layer->GetType() != LayerType::Output)
1711  {
1712  throw InvalidArgumentException("ImportOutputs: given layer not an OutputLayer");
1713  }
1714 
1715  auto& backend = m_Backends.at(layer->GetBackendId());
1716  if (!HasMatchingCapability(BackendOptions::BackendOption{"PreImportIOTensors", true},
1717  backend->GetCapabilities()))
1718  {
1719  std::string er = backend->GetId();
1720  er += " does not have PreImportIOTensors capability";
1721  throw BackendCapabilityException(er);
1722  }
1723 
1724  const InputSlot& inputSlot = layer->GetInputSlots()[0];
1726  const TensorInfo& tensorInfo = inputSlot.GetTensorInfo();
1727 
1728  ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId);
1729  if (!handleFactory)
1730  {
1731  throw armnn::NullPointerException("handleFactory must not be null.");
1732  }
1733 
1734  ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
1735  handleFactory->CreateTensorHandle(tensorInfo, false)};
1736 
1737  ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();
1738 
1739  if (!CheckFlag(tensorHandle->GetImportFlags(), forceImportMemorySource))
1740  {
1741  throw MemoryImportException(fmt::format("ImportInputs: Memory Import failed, backend: "
1742  "{} does not support importing from source {}"
1743  , factoryId, forceImportMemorySource));
1744  }
1745 
1746  if (tensorHandle->Import(outputTensor.second.GetMemoryArea(), forceImportMemorySource))
1747  {
1748  importedOutputs.push_back(m_CurImportedOutputId++);
1749  }
1750  else
1751  {
1752  throw MemoryImportException("ImportInputs: Memory Import failed");
1753  }
1754 
1755  m_PreImportedOutputHandles.push_back(std::move(importedTensorHandlePin));
1756  }
1757 
1758  return importedOutputs;
1759 }
1760 
1761 void LoadedNetwork::ClearImportedInputs(const std::vector<ImportedInputId> inputIds)
1762 {
1763  for (auto id : inputIds)
1764  {
1765  if (id > m_PreImportedInputHandles.size())
1766  {
1767  throw InvalidArgumentException(fmt::format("ClearImportedInputs::Unknown ImportedInputId: {}", id));
1768  }
1769 
1770  auto& importedTensorHandle = m_PreImportedInputHandles[id].m_TensorHandle;
1771  if (!importedTensorHandle)
1772  {
1774  fmt::format("ClearImportedInputs::ImportedInput with id: {} has already been deleted", id));
1775  }
1776  // Call Unimport then destroy the tensorHandle
1777  importedTensorHandle->Unimport();
1778  importedTensorHandle = {};
1779  }
1780 }
1781 
1782 void LoadedNetwork::ClearImportedOutputs(const std::vector<ImportedOutputId> outputIds)
1783 {
1784  for (auto id : outputIds)
1785  {
1786  if (id > m_PreImportedOutputHandles.size())
1787  {
1788  throw InvalidArgumentException(fmt::format("ClearImportedOutputs::Unknown ImportedOutputId: {}", id));
1789  }
1790 
1791  auto& importedTensorHandle = m_PreImportedOutputHandles[id].m_TensorHandle;
1792  if (!importedTensorHandle)
1793  {
1795  fmt::format("ClearImportedOutputs::ImportedOutput with id: {} has already been deleted", id));
1796  }
1797  // Call Unimport then destroy the tensorHandle
1798  importedTensorHandle->Unimport();
1799  importedTensorHandle = {};
1800  }
1801 }
1802 
1804  const OutputTensors& outputTensors,
1805  IWorkingMemHandle& iWorkingMemHandle,
1806  std::vector<ImportedInputId> preImportedInputs,
1807  std::vector<ImportedOutputId> preImportedOutputs)
1808 {
1809  const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
1810 
1811  if (inputTensors.size() + preImportedInputs.size() != graph.GetNumInputs())
1812  {
1813  if (preImportedInputs.empty())
1814  {
1815  throw InvalidArgumentException("LoadedNetwork::Execute: Number of inputs provided does not match network.");
1816  }
1817  else
1818  {
1819  throw InvalidArgumentException("LoadedNetwork::Execute: "
1820  "Number of inputs + preImportedInputs provided does not match network.");
1821  }
1822  }
1823 
1824  if (outputTensors.size() + preImportedOutputs.size() != graph.GetNumOutputs())
1825  {
1826  if (preImportedOutputs.empty())
1827  {
1828  throw InvalidArgumentException("LoadedNetwork::Execute: "
1829  "Number of outputs provided does not match network.");
1830  }
1831  else
1832  {
1833  throw InvalidArgumentException("LoadedNetwork::Execute: "
1834  "Number of outputs + preImportedOutputs provided does not match network.");
1835  }
1836  }
1837 
1838  WorkingMemHandle& workingMemHandle = dynamic_cast<WorkingMemHandle&>(iWorkingMemHandle);
1839  // Collect all the given LayerBindingIds and check them for duplicates and unknowns.
1840  std::vector<LayerBindingId>& bindingIds = workingMemHandle.GetBindingIdVector();
1841  unsigned int index = 0;
1842  for (auto pair : inputTensors)
1843  {
1844  bindingIds[index++] = pair.first;
1845  }
1846  for (ImportedInputId id : preImportedInputs)
1847  {
1848  bindingIds[index++] = ValidateImportedInputID(id);
1849  }
1850  for (auto pair : outputTensors)
1851  {
1852  bindingIds[index++] = pair.first;
1853  }
1854  for (ImportedOutputId id : preImportedOutputs)
1855  {
1856  bindingIds[index++] = ValidateImportedOutputID(id);
1857  }
1858 
1859  workingMemHandle.ValidateBindingIds();
1860 
1861  auto resetMemHandle = [&]()
1862  {
1863  for (ImportedInputId id: preImportedInputs)
1864  {
1865  const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId;
1866 
1867  auto inputHandle = workingMemHandle.GetInputHandle(layerBindingId);
1868  auto inputConnections = workingMemHandle.GetInputConnections(layerBindingId);
1869  for (auto it : inputConnections)
1870  {
1871  *it = inputHandle;
1872  }
1873  }
1874 
1875  for (ImportedOutputId id: preImportedOutputs)
1876  {
1877  const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId;
1878 
1879  auto outputHandle = workingMemHandle.GetOutputHandle(layerBindingId);
1880  auto outputConnections = workingMemHandle.GetOutputConnection(layerBindingId);
1881 
1882  for (auto it : outputConnections)
1883  {
1884  *it = outputHandle;
1885  }
1886  }
1887  };
1888 
1889  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1890  TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1891  ProfilingGuid inferenceGuid = m_ProfilingService->GetNextGuid();
1892  if (timelineUtils)
1893  {
1894  // Add inference timeline trace if profiling is enabled.
1895  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
1896  timelineUtils->CreateTypedEntity(inferenceGuid,LabelsAndEventClasses::INFERENCE_GUID);
1897  timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
1898  networkGuid,
1899  inferenceGuid,
1900  LabelsAndEventClasses::EXECUTION_OF_GUID);
1901  timelineUtils->RecordEvent(inferenceGuid,LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
1902  }
1903 
1904  bool executionSucceeded = true;
1905 
1906  if (timelineUtils)
1907  {
1908  // Add end of life of the inference timeline if profiling is enabled.
1909  timelineUtils->RecordEvent(inferenceGuid,LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
1910  timelineUtils->Commit();
1911  }
1912 
1913  if (!workingMemHandle.IsAllocated())
1914  {
1915  workingMemHandle.Allocate();
1916  }
1917 
1918  {
1920  for (auto pair : inputTensors)
1921  {
1922  EnqueueInput(pair.second, workingMemHandle.GetInputHandle(pair.first));
1923  }
1924 
1925  // Swap in the pre-imported inputs if any
1926  for (ImportedInputId id : preImportedInputs)
1927  {
1928  const ImportedTensorHandlePin& importedInputPin = m_PreImportedInputHandles[id];
1929  const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId;
1930  const auto& preimportedHandle = importedInputPin.m_TensorHandle;
1931 
1932  auto inputConnections = workingMemHandle.GetInputConnections(layerBindingId);
1933  for (auto it : inputConnections)
1934  {
1935  *it = preimportedHandle.get();
1936  }
1937  }
1938  }
1939  {
1941  if (m_NetworkProperties.m_OutputSource != MemorySource::Undefined)
1942  {
1943  for (auto pair: outputTensors)
1944  {
1945  ImportOutputTensor(pair.second, workingMemHandle.GetOutputHandle(pair.first));
1946  }
1947  }
1948 
1949  for (ImportedOutputId id : preImportedOutputs)
1950  {
1951  const ImportedTensorHandlePin& importedOutputPin = m_PreImportedOutputHandles[id];
1952  const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId;
1953  const auto& preimportedHandle = importedOutputPin.m_TensorHandle;
1954 
1955  auto outputConnections = workingMemHandle.GetOutputConnection(layerBindingId);
1956  for (auto it : outputConnections)
1957  {
1958  *it = preimportedHandle.get();
1959  }
1960  }
1961  }
1962 
1963  auto Fail = [&](const std::exception& error)
1964  {
1965  ARMNN_LOG(error) << "An error occurred attempting to execute a workload: " << error.what();
1966  executionSucceeded = false;
1967  };
1968  ProfilingDynamicGuid workloadInferenceID(0);
1969 
1970  try
1971  {
1972  for (unsigned int i = 0; i < m_WorkloadQueue.size(); ++i)
1973  {
1974  auto& workload = m_WorkloadQueue[i];
1975  if (timelineUtils)
1976  {
1977  workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1978  inferenceGuid);
1979  }
1980 
1981  workload->ExecuteAsync(workingMemHandle.GetExecutionDataAt(i).second);
1982 
1983  if (timelineUtils)
1984  {
1985  timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1986  }
1987  }
1988  }
1989  catch (const RuntimeException& error)
1990  {
1991  resetMemHandle();
1992  Fail(error);
1993  }
1994  catch (const std::runtime_error& error)
1995  {
1996  resetMemHandle();
1997  Fail(error);
1998  }
1999  catch (...)
2000  {
2001  resetMemHandle();
2002  throw;
2003  }
2004 
2005  if (m_NetworkProperties.m_OutputSource == MemorySource::Undefined)
2006  {
2007  for (auto pair: outputTensors)
2008  {
2009  CopyToOutputTensor(pair.second, workingMemHandle.GetOutputHandle(pair.first));
2010  }
2011  }
2012  else
2013  {
2014  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "SyncMemGeneric_Execute");
2015  workingMemHandle.MemSyncOutputs();
2016  }
2017 
2018  resetMemHandle();
2019 
2020  return executionSucceeded ? Status::Success : Status::Failure;
2021 }
2022 
2023 /// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have
2024 /// overlapped Execution by calling this function from different threads.
2025 std::unique_ptr<IWorkingMemHandle> LoadedNetwork::CreateWorkingMemHandle(NetworkId networkId)
2026 {
2027  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
2028 
2029  // Tensors that will need to be allocated internally within armnn
2030  std::vector<std::unique_ptr<ITensorHandle>> managedTensorHandles;
2031  // Tensors that will be allocated externally by the user
2032  std::vector<std::unique_ptr<ITensorHandle>> unmanagedTensorHandles;
2033 
2034  std::vector<WorkingMemDescriptor> workingMemDescriptors;
2035  std::vector<std::pair<BackendId, ExecutionData>> executionDataVec;
2036 
2037  auto GetTensorHandle = [&](Layer* layer, const OutputSlot& outputSlot)
2038  {
2039  ITensorHandleFactory::FactoryId factoryId = outputSlot.GetTensorHandleFactoryId();
2040  const TensorInfo& tensorInfo = outputSlot.GetTensorInfo();
2041 
2042  if (factoryId == ITensorHandleFactory::LegacyFactoryId)
2043  {
2044  BackendId id = layer->GetBackendId();
2046  return m_WorkloadFactories.at(id)->CreateTensorHandle(tensorInfo, false);
2048  }
2049  else
2050  {
2051  ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId);
2052  if (!handleFactory)
2053  {
2054  throw armnn::NullPointerException("handleFactory must not be null.");
2055  }
2056  return handleFactory->CreateTensorHandle(tensorInfo, false);
2057  }
2058  };
2059 
2060  struct HandleInfo
2061  {
2062  ITensorHandle* m_TensorHandle;
2063 
2064  bool m_IsInputLayerHandle = false;
2065  bool m_IsOutputLayerHandle = false;
2066 
2067  WorkingMemHandle::InputMemDescriptorCoords m_InputMemDescriptorCoords;
2068  WorkingMemHandle::OutputMemDescriptorCoords m_OutputMemDescriptorCoords;
2069  };
2070 
2071  std::unordered_map<const OutputSlot*, HandleInfo> outputToHandleInfoMap;
2072 
2073  unsigned int layerIndex = 0;
2074  for (auto&& layer : order)
2075  {
2076  // Constant layers execution and management is handled during loaded network construction
2077  if (layer->GetType() == LayerType::Constant)
2078  {
2079  continue;
2080  }
2081 
2082  WorkingMemDescriptor workingMemDescriptor;
2083 
2084  bool isMemoryManaged = true;
2085  bool isInputLayer = false;
2086  bool isOutputLayer = false;
2087  bool isConnectedToOutputLayer = false;
2088 
2089  if (layer->GetType() == LayerType::Input || layer->GetType() == LayerType::MemImport)
2090  {
2091  // Input layers/workloads will not be executed so the descriptor is not added to workingMemDescriptors
2092  // However we will still need to manage the tensorHandle
2093  isInputLayer = true;
2094  isMemoryManaged = m_NetworkProperties.m_InputSource == MemorySource::Undefined;
2095  }
2096  else if (layer->GetType() == LayerType::Output)
2097  {
2098  isOutputLayer = true;
2099  }
2100 
2101  unsigned int slotIndex = 0;
2102  // Create a tensor handle for each output slot of a layer
2103  // Once we create it, we start managing its lifetime
2104  for (auto& slot : layer->GetOutputSlots())
2105  {
2106  for (unsigned int i = 0; i < slot.GetNumConnections(); ++i)
2107  {
2108  if ((slot.GetConnection(i)->GetOwningLayer().GetType() == LayerType::Output))
2109  {
2110  if (!isConnectedToOutputLayer)
2111  {
2112  isConnectedToOutputLayer = true;
2113  // If Export is enabled disable memory management, so we can export, otherwise we do a copy
2114  isMemoryManaged = m_NetworkProperties.m_OutputSource == MemorySource::Undefined;
2115  }
2116  else
2117  {
2118  // Importing in this case would likely cause unexpected behaviour, so we disallow it.
2119  ARMNN_LOG(warning) <<
2120  fmt::format("Layer name: '{0}' guid: '{1}' has two or more OutputLayers connected to it. "
2121  "This will prevent importing on the connected OutputLayers.",
2122  layer->GetName(), layer->GetGuid());
2123  isMemoryManaged = true;
2124  }
2125  }
2126  }
2127 
2128  ITensorHandle* tensorHandle;
2129  if (isMemoryManaged)
2130  {
2131  managedTensorHandles.emplace_back(GetTensorHandle(layer, slot));
2132  tensorHandle = managedTensorHandles.back().get();
2133  }
2134  else
2135  {
2136  unmanagedTensorHandles.emplace_back(GetTensorHandle(layer, slot));
2137  tensorHandle = unmanagedTensorHandles.back().get();
2138  }
2139 
2140  workingMemDescriptor.m_Outputs.push_back(tensorHandle);
2141 
2142  HandleInfo& handleInfo = outputToHandleInfoMap[&slot];
2143  handleInfo.m_TensorHandle = tensorHandle;
2144 
2145  // Store the coordinates of the current layer's OutputSlot that is connected to the OutputLayer
2146  if (isConnectedToOutputLayer)
2147  {
2148  handleInfo.m_IsOutputLayerHandle = true;
2149  handleInfo.m_OutputMemDescriptorCoords.m_OutputSlotCoords = {layerIndex, slotIndex};
2150  }
2151  // Store the LayerBindingId of the InputLayer
2152  if (isInputLayer)
2153  {
2154  handleInfo.m_IsInputLayerHandle = true;
2155  LayerBindingId bindingId = static_cast<BindableLayer*>(layer)->GetBindingId();
2156  handleInfo.m_InputMemDescriptorCoords.m_LayerBindingId = bindingId;
2157  }
2158  slotIndex++;
2159  }
2160  // Loop through the input slots in the same layer and decrement the reference counter associated
2161  // to each tensor handle we encounter.
2162  // Once it reaches zero, the lifetime of the tensor handle has ended, and we mark its memory as available
2163  // so that the next tensor handle with a non overlapping lifetime can share its memory.
2164  for (auto& slot : layer->GetInputSlots())
2165  {
2166  if (!slot.GetConnection())
2167  {
2168  throw armnn::GraphValidationException("slot must be a valid input slot.");
2169  }
2170 
2171  auto outputSlot = slot.GetConnectedOutputSlot();
2172  auto key = outputSlot->GetOwningLayer().GetGuid();
2173 
2174  // Constant layers execution and management is handled during loaded network construction
2175  auto found = m_ConstantTensorHandles.find(key);
2176  if (found != m_ConstantTensorHandles.end())
2177  {
2178  ITensorHandle* tensorHandle = found->second;
2179  if (slot.IsTensorInfoOverridden())
2180  {
2181  ITensorHandle* decorated = tensorHandle->DecorateTensorHandle(slot.GetTensorInfo()).get();
2182  if (decorated)
2183  {
2184  tensorHandle = decorated;
2185  }
2186  }
2187  workingMemDescriptor.m_Inputs.push_back(tensorHandle);
2188 
2189  // Odd case where a constant layer is connected to an output layer
2190  // We will need to create a HandleInfo to track it
2191  if (isOutputLayer)
2192  {
2193  LayerBindingId bindingId = static_cast<BindableLayer*>(layer)->GetBindingId();
2194 
2195  HandleInfo& handleInfo = outputToHandleInfoMap[outputSlot];
2196  handleInfo.m_TensorHandle = tensorHandle;
2197  handleInfo.m_IsOutputLayerHandle = true;
2198  handleInfo.m_OutputMemDescriptorCoords.m_LayerBindingIds.push_back(bindingId);
2199  handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, 0});
2200  }
2201  continue;
2202  }
2203 
2204  HandleInfo& handleInfo = outputToHandleInfoMap.at(outputSlot);
2205 
2206  ITensorHandle* inputTensorHandle = handleInfo.m_TensorHandle;
2207  if (slot.IsTensorInfoOverridden())
2208  {
2209  ITensorHandle* decorated = inputTensorHandle->DecorateTensorHandle(slot.GetTensorInfo()).get();
2210  if (decorated)
2211  {
2212  inputTensorHandle = decorated;
2213  }
2214  }
2215  workingMemDescriptor.m_Inputs.push_back(inputTensorHandle);
2216 
2217  // Store the LayerBindingId of the OutputLayer
2218  if (isOutputLayer)
2219  {
2220  LayerBindingId bindingId = static_cast<BindableLayer*>(layer)->GetBindingId();
2221  handleInfo.m_OutputMemDescriptorCoords.m_LayerBindingIds.push_back(bindingId);
2222  handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, 0});
2223  }
2224  // In this case the layer is not an Output Layer but shares its input tensorhandle with an OutputLayer
2225  // It will need to be updated as well, if we swap out the tensorhandle
2226  else if (handleInfo.m_IsOutputLayerHandle)
2227  {
2228  handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, slot.GetSlotIndex()});
2229  }
2230 
2231  // Store the coordinates of the InputSlots connected to the InputLayer
2232  // There can be more than one InputSlot connected to an InputLayer, so we use a vector
2233  if (handleInfo.m_IsInputLayerHandle)
2234  {
2235  std::pair<LayerGuid, unsigned int> connectionLocation{layerIndex, slot.GetSlotIndex()};
2236  handleInfo.m_InputMemDescriptorCoords.m_InputSlotCoords.emplace_back(connectionLocation);
2237  }
2238  }
2239 
2240  // Input/Output layers/workloads will not be executed, so the descriptor is not added to workingMemDescriptors
2241  // However we will still need to manage the tensorHandle
2242  if (!isInputLayer)
2243  {
2244  // Simply auto initialise ExecutionData here, so it's added only for the layer that require execution.
2245  // The memory and data will be allocated/assigned for the void* in WorkingMemHandle::Allocate.
2246  std::pair<BackendId, ExecutionData> dataPair;
2247  dataPair.first = layer->GetBackendId();
2248 
2249  executionDataVec.push_back(dataPair);
2250  workingMemDescriptors.push_back(workingMemDescriptor);
2251 
2252  layerIndex++;
2253  }
2254  }
2255 
2256  std::vector<std::pair<std::shared_ptr<TensorMemory>, MemorySource>> tensorMemory;
2257 
2258  auto externalMemoryManager = CreateExternalMemoryManger(tensorMemory);
2259 
2260  // Sort m_TensorMemory, so it's order matches the outputSlot order
2261  std::sort(tensorMemory.begin(), tensorMemory.end(),
2262  [](const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& lhs,
2263  const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& rhs)
2264  {
2265  return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
2266  });
2267 
2268  std::vector<WorkingMemHandle::InputMemDescriptorCoords> inputConnectionsInfo;
2269  std::vector<WorkingMemHandle::OutputMemDescriptorCoords> outputConnectionsInfo;
2270 
2271  for (const auto& handleInfo: outputToHandleInfoMap)
2272  {
2273  if (handleInfo.second.m_IsOutputLayerHandle)
2274  {
2275  outputConnectionsInfo.emplace_back(handleInfo.second.m_OutputMemDescriptorCoords);
2276  }
2277 
2278  if (handleInfo.second.m_IsInputLayerHandle)
2279  {
2280  inputConnectionsInfo.emplace_back(handleInfo.second.m_InputMemDescriptorCoords);
2281  }
2282  }
2283 
2284  return std::make_unique<WorkingMemHandle>(networkId,
2285  inputConnectionsInfo,
2286  outputConnectionsInfo,
2287  workingMemDescriptors,
2288  std::move(externalMemoryManager),
2289  std::move(tensorMemory),
2290  std::move(managedTensorHandles),
2291  std::move(unmanagedTensorHandles),
2292  executionDataVec,
2293  &m_Backends);
2294 }
2295 
2297 {
2298  for (auto&& workloadPtr: m_WorkloadQueue)
2299  {
2300  workloadPtr.get()->RegisterDebugCallback(func);
2301  }
2302 }
2303 
2304 
2305 void LoadedNetwork::CreateMemoryProfileAsync()
2306 {
2307  struct PartialBlock
2308  {
2309  unsigned int m_StartOfLife;
2310  unsigned int m_Lifetime;
2311 
2312  size_t m_MemSize;
2313  unsigned int m_Index;
2314 
2315  BackendId m_BackendId;
2316  };
2317 
2318  auto align = [](size_t numToAlign)
2319  {
2320  const size_t alignment = sizeof(float);
2321  return ((numToAlign + alignment - 1) / alignment) * alignment;
2322  };
2323 
2324  std::unordered_map<const OutputSlot*, PartialBlock> memBlockTrackerMap;
2325 
2326  const bool inputImportingEnabled = m_NetworkProperties.m_InputSource != MemorySource::Undefined;
2327  const bool outputImportingEnabled = m_NetworkProperties.m_OutputSource != MemorySource::Undefined;
2328 
2329  unsigned int timestep = 0;
2330  unsigned int outputIndex = 0;
2331  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
2332 
2333  for (auto&& layer : order)
2334  {
2335  const LayerType& layerType = layer->GetType();
2336  // Don't manage memory if importing.
2337  if (layerType == LayerType::Input && inputImportingEnabled)
2338  {
2339  continue;
2340  }
2341  // Don't manage memory if importing.
2342  if (layerType == LayerType::Output && outputImportingEnabled
2343  && layer->GetInputSlot(0).GetConnectedOutputSlot()->GetNumConnections() == 1)
2344  {
2345  continue;
2346  }
2347  // Because Constant Layer memory can not be shared, the memory must persist for the lifetime of execution,
2348  // management is done separately.
2349  if (layerType == LayerType::Constant)
2350  {
2351  continue;
2352  }
2353 
2354  BackendId backendId = layer->GetBackendId();
2355  for (auto& outputSlot : layer->GetOutputSlots())
2356  {
2357  if (!m_SupportsExternallyManagedMemory[backendId])
2358  {
2359  continue;
2360  }
2361 
2362  PartialBlock partialBlock;
2363 
2364  partialBlock.m_StartOfLife = timestep;
2365 
2366  size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
2367  partialBlock.m_MemSize = alignedSize;
2368  partialBlock.m_Index = outputIndex++;
2369  partialBlock.m_Lifetime = outputSlot.GetNumConnections();
2370  partialBlock.m_BackendId = backendId;
2371 
2372  if (partialBlock.m_Lifetime == 0)
2373  {
2374  m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2375  partialBlock.m_StartOfLife,
2376  partialBlock.m_MemSize,
2377  0,
2378  partialBlock.m_Index);
2379  }
2380  else
2381  {
2382  memBlockTrackerMap[&outputSlot] = partialBlock;
2383  }
2384  }
2385 
2386  for (auto& inputSlot : layer->GetInputSlots())
2387  {
2388  const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
2389  const LayerType& owningLayerType = connectedInputLayer.GetType();
2390 
2391  if (owningLayerType == LayerType::Constant)
2392  {
2393  continue;
2394  }
2395  if (inputImportingEnabled && owningLayerType == LayerType::Input)
2396  {
2397  continue;
2398  }
2399 
2400  auto outputSlot = inputSlot.GetConnectedOutputSlot();
2401 
2402  PartialBlock& partialBlock = memBlockTrackerMap.at(outputSlot);
2403 
2404  auto& lifetime = partialBlock.m_Lifetime;
2405  --lifetime;
2406 
2407  if (lifetime == 0)
2408  {
2409  m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2410  timestep,
2411  partialBlock.m_MemSize,
2412  0,
2413  partialBlock.m_Index);
2414  }
2415  }
2416  ++timestep;
2417  }
2418 }
2419 
2420 void LoadedNetwork::CreateMemoryProfile()
2421 {
2422  // Finds the first TensorHandle ancestor of a SubTensorHandle. If the ITensorHandle provided
2423  // is a TensorHandle, the function just returns it
2424  auto TraceSubTensorHandleAncestry = [](ITensorHandle* const subTensorHandle)
2425  {
2426  ITensorHandle* ancestor = subTensorHandle;
2427  while (ancestor && ancestor->GetParent())
2428  {
2429  ancestor = ancestor->GetParent();
2430  }
2431  return ancestor;
2432  };
2433 
2434  struct PartialBlock
2435  {
2436  unsigned int m_StartOfLife;
2437  unsigned int m_Lifetime;
2438 
2439  size_t m_MemSize;
2440  unsigned int m_Index;
2441 
2442  BackendId m_BackendId;
2443  };
2444 
2445  auto align = [](size_t numToAlign)
2446  {
2447  const size_t alignment = sizeof(float);
2448  return ((numToAlign + alignment - 1) / alignment) * alignment;
2449  };
2450 
2451  std::unordered_map<ITensorHandle*, PartialBlock> memBlockTrackerMap;
2452 
2453  const bool inputImportingEnabled = m_NetworkProperties.m_InputSource != MemorySource::Undefined;
2454  const bool outputImportingEnabled = m_NetworkProperties.m_OutputSource != MemorySource::Undefined;
2455 
2456  unsigned int timestep = 0;
2457  unsigned int outputIndex = 0;
2458  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
2459 
2460  for (auto&& layer : order)
2461  {
2462  const LayerType& layerType = layer->GetType();
2463  // Don't manage memory if importing.
2464  if (layerType == LayerType::Input && inputImportingEnabled)
2465  {
2466  continue;
2467  }
2468  // Don't manage memory if importing.
2469  if (layerType == LayerType::Output && outputImportingEnabled
2470  && layer->GetInputSlot(0).GetConnectedOutputSlot()->GetNumConnections() == 1)
2471  {
2472  continue;
2473  }
2474  // Because Constant Layer memory can not be shared, the memory must persist for the lifetime of execution,
2475  // management is done separately.
2476  if (layerType == LayerType::Constant)
2477  {
2478  continue;
2479  }
2480 
2481  BackendId backendId = layer->GetBackendId();
2482  for (auto& outputSlot : layer->GetOutputSlots())
2483  {
2484  if (!m_SupportsExternallyManagedMemory[backendId])
2485  {
2486  continue;
2487  }
2488 
2489  ITensorHandle* tensorHandle = outputSlot.GetOutputHandler().GetData();
2490  tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
2491 
2492  if (memBlockTrackerMap.find(tensorHandle) == memBlockTrackerMap.end())
2493  {
2494  PartialBlock partialBlock;
2495 
2496  partialBlock.m_StartOfLife = timestep;
2497 
2498  size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
2499  partialBlock.m_MemSize = alignedSize;
2500  partialBlock.m_Index = outputIndex++;
2501  partialBlock.m_Lifetime = outputSlot.GetNumConnections();
2502  partialBlock.m_BackendId = backendId;
2503 
2504  if (partialBlock.m_Lifetime == 0)
2505  {
2506  m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2507  partialBlock.m_StartOfLife,
2508  partialBlock.m_MemSize,
2509  0,
2510  partialBlock.m_Index);
2511  }
2512  else
2513  {
2514  memBlockTrackerMap[tensorHandle] = partialBlock;
2515  }
2516  m_Tensorhandles.push_back(tensorHandle);
2517 
2518  }
2519  else
2520  {
2521  memBlockTrackerMap.at(tensorHandle).m_Lifetime += outputSlot.GetNumConnections();
2522  }
2523  }
2524 
2525  for (auto& inputSlot : layer->GetInputSlots())
2526  {
2527  const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
2528  const LayerType& owningLayerType = connectedInputLayer.GetType();
2529 
2530  if (owningLayerType == LayerType::Constant)
2531  {
2532  continue;
2533  }
2534  if (inputImportingEnabled && owningLayerType == LayerType::Input)
2535  {
2536  continue;
2537  }
2538  if (!m_SupportsExternallyManagedMemory[connectedInputLayer.GetBackendId()])
2539  {
2540  continue;
2541  }
2542 
2543  auto outputSlot = inputSlot.GetConnectedOutputSlot();
2544 
2545  ITensorHandle* tensorHandle = outputSlot->GetOutputHandler().GetData();
2546  tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
2547 
2548  PartialBlock& partialBlock = memBlockTrackerMap.at(tensorHandle);
2549 
2550  auto& lifetime = partialBlock.m_Lifetime;
2551  --lifetime;
2552 
2553  if (lifetime == 0)
2554  {
2555  m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2556  timestep,
2557  partialBlock.m_MemSize,
2558  0,
2559  partialBlock.m_Index);
2560  }
2561  }
2562  ++timestep;
2563  }
2564 
2565 }
2566 
2567 std::unique_ptr<MemoryManager> LoadedNetwork::CreateExternalMemoryManger(
2568  std::vector<std::pair<std::shared_ptr<TensorMemory>, MemorySource>>& tensorMemoryVec)
2569 {
2570  std::unique_ptr<MemoryManager> memoryManager = std::make_unique<MemoryManager>();
2571  auto allocatorMap = BackendRegistryInstance().GetAllocators();
2572 
2573  for (auto& backend : m_MemBinMap)
2574  {
2575  std::vector<BufferStorage> bufferStorageVec;
2576 
2577  std::shared_ptr<ICustomAllocator> backendAllocator;
2578  if (allocatorMap.find(backend.first) != allocatorMap.end())
2579  {
2580  backendAllocator = allocatorMap[backend.first];
2581  }
2582  else
2583  {
2584  backendAllocator = m_Backends[backend.first]->GetDefaultAllocator();
2585  }
2586 
2587  for (auto& memBin : backend.second)
2588  {
2589  BufferStorage bufferStorage;
2590  bufferStorage.m_BufferSize = memBin.m_MemSize;
2591  bufferStorage.m_TensorMemoryVector.reserve(memBin.m_MemBlocks.size());
2592 
2593  for (auto& memBlock : memBin.m_MemBlocks)
2594  {
2595  auto tensorMemory = std::make_shared<TensorMemory>(TensorMemory{memBlock.m_Offset, memBlock.m_Index});
2596 
2597  tensorMemoryVec.emplace_back(tensorMemory, backendAllocator->GetMemorySourceType());
2598  bufferStorage.m_TensorMemoryVector.emplace_back(tensorMemory);
2599  }
2600 
2601  bufferStorageVec.emplace_back(std::move(bufferStorage));
2602  }
2603 
2604  memoryManager->StoreMemToAllocate(bufferStorageVec, backendAllocator, 4);
2605  }
2606 
2607  return memoryManager;
2608 }
2609 
2610 LayerBindingId LoadedNetwork::ValidateImportedInputID(ImportedInputId id)
2611 {
2612  try
2613  {
2614  const auto& importedTensorHandlePin = m_PreImportedInputHandles.at(id);
2615  if (!importedTensorHandlePin.m_TensorHandle)
2616  {
2617  throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute:"
2618  "PreImportedInput: {} has been deleted", id));
2619  }
2620  return importedTensorHandlePin.m_LayerBindingId;
2621  }
2622  catch (const std::out_of_range&)
2623  {
2624  throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: Unknown ImportedInputId: {}", id));
2625  }
2626 }
2627 
2628 LayerBindingId LoadedNetwork::ValidateImportedOutputID(ImportedOutputId id)
2629 {
2630  try
2631  {
2632  const auto& importedTensorHandlePin = m_PreImportedOutputHandles.at(id);
2633  if (!importedTensorHandlePin.m_TensorHandle)
2634  {
2635  throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: "
2636  "PreImportedOutput: {} has been deleted", id));
2637  }
2638  return importedTensorHandlePin.m_LayerBindingId;
2639  }
2640  catch (const std::out_of_range&)
2641  {
2642  throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: Unknown ImportedOutputId: {}", id));
2643  }
2644 }
2645 
2646 }
armnn::BindableLayer
Definition: Layer.hpp:470
BackendHelper.hpp
armnn::ImportedInputId
unsigned int ImportedInputId
Definition: Types.hpp:310
armnn::Graph::SetLayersOutOfOrder
void SetLayersOutOfOrder()
Definition: Graph.cpp:738
armnn::Compute::Undefined
@ Undefined
armnn::Tensor
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:321
armnn::INetworkProperties::m_AsyncEnabled
const bool m_AsyncEnabled
Definition: IRuntime.hpp:59
armnn::BaseTensor::GetMemoryArea
MemoryType GetMemoryArea() const
Definition: Tensor.hpp:307
armnn::Graph::OutputLayersAccessor::begin
ConstIteratorOutputs begin() const
Definition: Graph.hpp:84
arm::pipe
Definition: BackendRegistry.hpp:17
armnn::LoadedNetwork::GetOutputTensorInfo
TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const
Definition: LoadedNetwork.cpp:733
armnn::INetworkProperties::m_InputSource
const MemorySource m_InputSource
Definition: IRuntime.hpp:65
armnn::ProfilerManager::RegisterProfiler
void RegisterProfiler(IProfiler *profiler)
Definition: Profiling.cpp:609
armnn::experimental::WorkingMemHandle::GetExecutionDataAt
std::pair< BackendId, ExecutionData > & GetExecutionDataAt(unsigned int id) override
Get the ExecutionData at an index.
Definition: WorkingMemHandle.hpp:92
armnn::OutputSlot::GetTensorInfo
const TensorInfo & GetTensorInfo() const override
Definition: Layer.cpp:100
armnn::InputTensors
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:394
armnn::LoadedNetwork::EnqueueWorkload
Status EnqueueWorkload(const InputTensors &inputTensors, const OutputTensors &outputTensors, std::vector< ImportedInputId > preImportedInputIds={}, std::vector< ImportedOutputId > preImportedOutputIds={})
Single thread execution of the loaded network.
Definition: LoadedNetwork.cpp:872
armnn::LoadedNetwork::RegisterDebugCallback
void RegisterDebugCallback(const DebugCallbackFunction &func)
Definition: LoadedNetwork.cpp:2296
ExecutionData.hpp
armnn::TensorHandleFactoryRegistry::ReleaseMemory
void ReleaseMemory()
Release memory required for inference.
Definition: TensorHandleFactoryRegistry.cpp:86
armnn::ValidateSourcesMatchOptimizedNetwork
void ValidateSourcesMatchOptimizedNetwork(std::vector< BackendOptions > optimizedOptions, const INetworkProperties &networkProperties)
This function performs a sanity check to ensure that the combination of input and output memory sourc...
Definition: LoadedNetwork.cpp:101
LoadedNetwork.hpp
armnn::OutputSlot
Definition: Layer.hpp:100
armnn::TensorHandleFactoryRegistry::GetFactory
ITensorHandleFactory * GetFactory(ITensorHandleFactory::FactoryId id) const
Find a TensorHandleFactory by Id Returns nullptr if not found.
Definition: TensorHandleFactoryRegistry.cpp:39
armnn::TensorInfo
Definition: Tensor.hpp:152
MemSyncWorkload.hpp
Graph.hpp
CHECK_LOCATION
#define CHECK_LOCATION()
Definition: Exceptions.hpp:203
armnn::MemorySourceFlags
unsigned int MemorySourceFlags
Definition: MemorySources.hpp:15
Profiling.hpp
armnn::INetworkProperties::m_ProfilingEnabled
const bool m_ProfilingEnabled
Definition: IRuntime.hpp:61
armnn::LoadedNetwork
Definition: LoadedNetwork.hpp:42
armnn::ITensorHandle
Definition: ITensorHandle.hpp:16
armnn::BackendOptions::BackendOption
Definition: BackendOptions.hpp:215
armnn::experimental::WorkingMemHandle::GetOutputConnection
const std::vector< std::vector< ITensorHandle * >::iterator > & GetOutputConnection(LayerBindingId layerBindingId) const
Definition: WorkingMemHandle.hpp:112
ARMNN_NO_DEPRECATE_WARN_BEGIN
#define ARMNN_NO_DEPRECATE_WARN_BEGIN
Definition: Deprecated.hpp:33
armnn::experimental::WorkingMemHandle::IsAllocated
bool IsAllocated() override
IsAllocated returns true if the backing memory is currently allocated.
Definition: WorkingMemHandle.hpp:77
armnn::ITensorHandle::GetImportFlags
virtual unsigned int GetImportFlags() const
Get flags describing supported import sources.
Definition: ITensorHandle.hpp:70
armnn::LoadedNetwork::SendNetworkStructure
void SendNetworkStructure(arm::pipe::IProfilingService &profilingService)
Definition: LoadedNetwork.cpp:672
BackendRegistry.hpp
armnn::experimental::WorkingMemHandle::OutputMemDescriptorCoords
Definition: WorkingMemHandle.hpp:40
armnn::OutputTensors
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:395
armnn::Layer::GetInputSlots
const std::vector< InputSlot > & GetInputSlots() const
Definition: Layer.hpp:258
armnn::Graph::InputLayersAccessor::end
ConstIteratorInputs end() const
Definition: Graph.hpp:70
armnn::experimental::IWorkingMemHandle
Definition: IWorkingMemHandle.hpp:20
armnn::BoostLogSeverityMapping::error
@ error
armnn::LoadedNetwork::WorkloadQueue
std::vector< std::unique_ptr< IWorkload > > WorkloadQueue
Definition: LoadedNetwork.hpp:45
armnn::BackendRegistry::GetMemoryOptimizerStrategies
MemoryOptimizerStrategiesMapRef GetMemoryOptimizerStrategies()
Definition: BackendRegistry.cpp:150
armnn::LoadedNetwork::ImportInputs
std::vector< ImportedInputId > ImportInputs(const InputTensors &inputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
Definition: LoadedNetwork.cpp:1486
armnn::OutputHandler::GetData
ITensorHandle * GetData() const
Gets the allocated tensor memory.
Definition: OutputHandler.hpp:46
armnn::Layer::GetName
const char * GetName() const override
Returns the name of the layer.
Definition: Layer.hpp:332
armnn::ITensorHandleFactory::LegacyFactoryId
static const FactoryId LegacyFactoryId
Definition: ITensorHandleFactory.hpp:50
armnn::Exception::what
virtual const char * what() const noexcept override
Definition: Exceptions.cpp:32
armnn::Layer
Definition: Layer.hpp:230
ARMNN_LOG
#define ARMNN_LOG(severity)
Definition: Logging.hpp:212
armnn::InputSlot::GetTensorInfo
const TensorInfo & GetTensorInfo() const override
Gets the TensorInfo for this InputSlot.
Definition: Layer.cpp:614
Assert.hpp
armnn::ITensorHandle::DecorateTensorHandle
virtual std::shared_ptr< ITensorHandle > DecorateTensorHandle(const TensorInfo &tensorInfo)
Returns a decorated version of this TensorHandle allowing us to override the TensorInfo for it.
Definition: ITensorHandle.hpp:98
armnn::ITensorHandle::Import
virtual bool Import(void *memory, MemorySource source)
Import externally allocated memory.
Definition: ITensorHandle.hpp:76
armnn::experimental::WorkingMemHandle::GetBindingIdVector
std::vector< LayerBindingId > & GetBindingIdVector()
Definition: WorkingMemHandle.hpp:119
armnn::INetworkProperties::m_OutputNetworkDetailsMethod
const ProfilingDetailsMethod m_OutputNetworkDetailsMethod
Definition: IRuntime.hpp:63
armnn::NetworkId
int NetworkId
Definition: IRuntime.hpp:35
armnn::OutputQueueDescriptor
MemCopyQueueDescriptor OutputQueueDescriptor
Definition: WorkloadData.hpp:92
Logging.hpp
ARMNN_SCOPED_PROFILING_EVENT
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
Definition: Profiling.hpp:220
armnn::MemorySource::Undefined
@ Undefined
armnn::WorkloadInfo
Contains information about TensorInfos of a layer.
Definition: WorkloadInfo.hpp:16
armnn::experimental::WorkingMemHandle::GetInputHandle
ITensorHandle * GetInputHandle(LayerBindingId layerBindingId) const
Definition: WorkingMemHandle.hpp:97
IBackendInternal.hpp
armnn::LoadedNetwork::ImportOutputs
std::vector< ImportedOutputId > ImportOutputs(const OutputTensors &outputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
Definition: LoadedNetwork.cpp:1632
armnn::BackendRegistryInstance
BackendRegistry & BackendRegistryInstance()
Definition: BackendRegistry.cpp:15
armnn::experimental::WorkingMemHandle::GetInputConnections
const std::vector< std::vector< ITensorHandle * >::iterator > & GetInputConnections(LayerBindingId layerBindingId) const
Definition: WorkingMemHandle.hpp:107
armnn::Graph::GetOutputLayers
OutputLayersAccessor GetOutputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the output layers in a range-bas...
Definition: Graph.hpp:203
armnn::IWorkloadFactory
Definition: WorkloadFactory.hpp:22
armnn::InvalidArgumentException
Definition: Exceptions.hpp:80
armnn::LayerBindingId
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition: Types.hpp:309
armnn::BackendId::Get
const std::string & Get() const
Definition: BackendId.hpp:138
armnn::Layer::GetGuid
LayerGuid GetGuid() const final
Returns the unique id of the layer.
Definition: Layer.hpp:343
armnn::Layer::GetOutputHandler
const OutputHandler & GetOutputHandler(unsigned int i=0) const
Definition: Layer.hpp:245
armnn::CopyToOutputTensor
void CopyToOutputTensor(const Tensor &outputTensor, ITensorHandle *outputTensorHandle)
Definition: LoadedNetwork.cpp:1444
armnn::Layer::GetNumOutputSlots
unsigned int GetNumOutputSlots() const override
Returns the number of connectable output slots.
Definition: Layer.hpp:335
armnn::GetTensorInfo
const TensorInfo & GetTensorInfo(const ITensorHandle *tensorHandle)
float32 helpers
Definition: RefWorkloadUtils.hpp:33
armnn::Graph::GetNumLayers
size_t GetNumLayers() const
Definition: Graph.hpp:205
armnn::ITensorHandleFactory
Definition: ITensorHandleFactory.hpp:46
armnn::TensorHandleFactoryRegistry::GetMatchingImportFactoryId
ITensorHandleFactory::FactoryId GetMatchingImportFactoryId(ITensorHandleFactory::FactoryId copyFactoryId)
Get a matching TensorHandleFatory Id for Memory Import given TensorHandleFactory Id for Memory Copy.
Definition: TensorHandleFactoryRegistry.cpp:72
ARMNN_SCOPED_HEAP_PROFILING
#define ARMNN_SCOPED_HEAP_PROFILING(TAG)
Definition: HeapProfiling.hpp:45
armnn::GetOutputTensor
const armnn::Tensor GetOutputTensor(const LayerBindingId layerId, const OutputTensors &outputTensors)
Definition: LoadedNetwork.cpp:1473
armnn::CheckFlag
bool CheckFlag(MemorySourceFlags flags, MemorySource source)
Definition: MemorySources.hpp:41
armnn::Status::Success
@ Success
armnn::INetworkProperties
Definition: IRuntime.hpp:43
armnn::Layer::GetOutputSlots
const std::vector< OutputSlot > & GetOutputSlots() const
Definition: Layer.hpp:259
armnn::Graph::GetNumInputs
size_t GetNumInputs() const
Definition: Graph.hpp:194
armnn::experimental::WorkingMemHandle::MemSyncOutputs
void MemSyncOutputs()
Definition: WorkingMemHandle.cpp:136
armnn::Exception
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46
armnn::ITensorHandle::Unmap
virtual void Unmap() const =0
Unmap the tensor data.
armnn::experimental::WorkingMemHandle
Definition: WorkingMemHandle.hpp:29
armnn::RuntimeException
Definition: Exceptions.hpp:120
armnn::experimental::WorkingMemHandle::InputMemDescriptorCoords
Definition: WorkingMemHandle.hpp:33
armnn::LoadedNetwork::GetNetworkGuid
arm::pipe::ProfilingGuid GetNetworkGuid()
Definition: LoadedNetwork.cpp:710
armnn::BaseTensor::GetInfo
const TensorInfo & GetInfo() const
Definition: Tensor.hpp:297
armnn::OutputHandler
Definition: OutputHandler.hpp:28
armnn::BoostLogSeverityMapping::info
@ info
armnn::LoadedNetwork::GetInputTensorInfo
TensorInfo GetInputTensorInfo(LayerBindingId layerId) const
Definition: LoadedNetwork.cpp:715
armnn::LayerType::MemImport
@ MemImport
armnn::CopyTensorContentsGeneric
void CopyTensorContentsGeneric(const ITensorHandle *srcTensor, ITensorHandle *dstTensor, CopyFunc copy)
Definition: WorkloadUtils.hpp:46
armnn::Layer::GetNameStr
const std::string & GetNameStr() const
Definition: Layer.hpp:240
armnn::InputQueueDescriptor
MemCopyQueueDescriptor InputQueueDescriptor
Definition: WorkloadData.hpp:91
armnn::TensorHandleFactoryRegistry::AquireMemory
void AquireMemory()
Aquire memory required for inference.
Definition: TensorHandleFactoryRegistry.cpp:78
armnn::InputSlot
Definition: Layer.hpp:42
ArmNNProfiling.hpp
armnn::BackendRegistry::GetFactory
FactoryFunction GetFactory(const BackendId &id) const
Definition: BackendRegistry.cpp:57
HeapProfiling.hpp
armnn::ImportedOutputId
unsigned int ImportedOutputId
Definition: Types.hpp:311
armnn::BackendOptions
Struct for the users to pass backend specific options.
Definition: BackendOptions.hpp:22
WorkingMemHandle.hpp
armnn::Layer::GetType
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:286
armnn::Graph::GetInputLayers
InputLayersAccessor GetInputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the input layers in a range-base...
Definition: Graph.hpp:199
armnn::Graph::InputLayersAccessor::begin
ConstIteratorInputs begin() const
Definition: Graph.hpp:65
TensorHandle.hpp
armnn::Status
Status
Definition: Types.hpp:42
armnn::ITensorHandleFactory::CreateTensorHandle
virtual std::unique_ptr< ITensorHandle > CreateTensorHandle(const TensorInfo &tensorInfo) const =0
armnn::LoadedNetwork::MakeLoadedNetwork
static std::unique_ptr< LoadedNetwork > MakeLoadedNetwork(std::unique_ptr< IOptimizedNetwork > net, std::string &errorMessage, const INetworkProperties &networkProperties, arm::pipe::IProfilingService *profilingService)
Definition: LoadedNetwork.cpp:173
armnn::LoadedNetwork::CreateWorkingMemHandle
std::unique_ptr< IWorkingMemHandle > CreateWorkingMemHandle(NetworkId networkId)
Create a new unique WorkingMemHandle object.
Definition: LoadedNetwork.cpp:2025
armnn::ProfilerManager::GetInstance
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:602
armnn::experimental::WorkingMemHandle::GetOutputHandle
ITensorHandle * GetOutputHandle(LayerBindingId layerBindingId) const
Definition: WorkingMemHandle.hpp:102
ARMNN_NO_DEPRECATE_WARN_END
#define ARMNN_NO_DEPRECATE_WARN_END
Definition: Deprecated.hpp:34
armnn::MemoryImportException
Definition: Exceptions.hpp:125
std
Definition: BackendId.hpp:149
MemCopyWorkload.hpp
armnn::BackendCapabilityException
Definition: Exceptions.hpp:152
armnn::Graph::TopologicalSort
Graph & TopologicalSort()
Sorts layers in topological order and return this.
Definition: Graph.hpp:191
armnn::LoadedNetwork::Execute
Status Execute(const InputTensors &inputTensors, const OutputTensors &outputTensors, IWorkingMemHandle &workingMemHandle, std::vector< ImportedInputId > preImportedInputs={}, std::vector< ImportedOutputId > preImportedOutputs={})
Thread safe execution of the loaded network.
Definition: LoadedNetwork.cpp:1803
armnn::IgnoreUnused
void IgnoreUnused(Ts &&...)
Definition: IgnoreUnused.hpp:14
armnn::DebugCallbackFunction
std::function< void(LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle)> DebugCallbackFunction
Define the type of callback for the Debug layer to call.
Definition: Types.hpp:398
armnn::MemorySource
MemorySource
Define the Memory Source to reduce copies.
Definition: Types.hpp:244
armnn::BackendRegistry::GetAllocators
std::unordered_map< BackendId, std::shared_ptr< ICustomAllocator > > GetAllocators()
Definition: BackendRegistry.cpp:128
armnn::Layer::GetBackendId
const BackendId & GetBackendId() const
Definition: Layer.hpp:290
armnn::BackendId
Definition: BackendId.hpp:75
armnn::LoadedNetwork::ClearImportedOutputs
void ClearImportedOutputs(const std::vector< ImportedOutputId > outputIds)
Definition: LoadedNetwork.cpp:1782
armnn::experimental::WorkingMemHandle::ValidateBindingIds
void ValidateBindingIds()
Definition: WorkingMemHandle.cpp:145
armnn::OutputSlot::GetTensorHandleFactoryId
ITensorHandleFactory::FactoryId GetTensorHandleFactoryId() const
Definition: Layer.cpp:218
armnn::Graph::OutputLayersAccessor::end
ConstIteratorOutputs end() const
Definition: Graph.hpp:90
armnn::InputSlot::GetConnectedOutputSlot
const OutputSlot * GetConnectedOutputSlot() const
Definition: Layer.hpp:56
armnn::experimental::WorkingMemDescriptor::m_Inputs
std::vector< ITensorHandle * > m_Inputs
Definition: WorkingMemDescriptor.hpp:20
armnn
Copyright (c) 2021 ARM Limited and Contributors.
Definition: 01_00_quick_start.dox:6
armnn::experimental::WorkingMemDescriptor
Definition: WorkingMemDescriptor.hpp:18
armnn::GraphValidationException
Definition: Exceptions.hpp:110
Layer.hpp
armnn::LoadedNetwork::FreeWorkingMemory
void FreeWorkingMemory()
Definition: LoadedNetwork.cpp:1286
armnn::MemSyncQueueDescriptor
Definition: WorkloadData.hpp:99
armnn::ITensorHandleFactory::FactoryId
std::string FactoryId
Definition: ITensorHandleFactory.hpp:49
armnn::BoostLogSeverityMapping::warning
@ warning
armnn::ConstTensor
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:329
armnn::LayerType::Input
@ Input
armnn::experimental::WorkingMemHandle::Allocate
void Allocate() override
Allocate the backing memory required for execution.
Definition: WorkingMemHandle.cpp:100
armnn::OutputHandler::GetTensorInfo
const TensorInfo & GetTensorInfo() const
Gets the matching TensorInfo for the output.
Definition: OutputHandler.hpp:42
armnn::LoadedNetwork::ClearImportedInputs
void ClearImportedInputs(const std::vector< ImportedInputId > inputIds)
Definition: LoadedNetwork.cpp:1761
armnn::IBackendInternal::IWorkloadFactoryPtr
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
Definition: IBackendInternal.hpp:89
armnn::Graph::GetNumOutputs
size_t GetNumOutputs() const
Definition: Graph.hpp:195
armnn::NullPointerException
Definition: Exceptions.hpp:146
armnn::LayerType
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below.
Definition: Types.hpp:491
armnn::Graph
Definition: Graph.hpp:30
IMemoryManager.hpp
armnn::INetworkProperties::m_OutputSource
const MemorySource m_OutputSource
Definition: IRuntime.hpp:66
armnn::Status::Failure
@ Failure
armnn::experimental::WorkingMemDescriptor::m_Outputs
std::vector< ITensorHandle * > m_Outputs
Definition: WorkingMemDescriptor.hpp:21
armnn::QueueDescriptor::m_Inputs
std::vector< ITensorHandle * > m_Inputs
Definition: WorkloadData.hpp:26
armnn::LayerType::Output
@ Output
armnn::LayerType::Constant
@ Constant
armnn::ITensorHandle::Map
virtual const void * Map(bool blocking=true) const =0
Map the tensor data for access.
armnn::GetInputTensor
const armnn::ConstTensor GetInputTensor(const LayerBindingId layerId, const InputTensors &inputTensors)
Definition: LoadedNetwork.cpp:1460
armnn::HasMatchingCapability
bool HasMatchingCapability(const BackendOptions::BackendOption &capability, const BackendCapabilities &capabilities)
Convenience function to check if a given capability matches a capability in a BackendCapabilities str...
Definition: BackendHelper.cpp:85