ArmNN
 24.11
LoadedNetwork.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017-2024 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "LoadedNetwork.hpp"
7 #include "Layer.hpp"
8 #include "Graph.hpp"
9 #include "Markers.hpp"
10 #include "Profiling.hpp"
11 #include "HeapProfiling.hpp"
12 #include "WorkingMemHandle.hpp"
13 #include "ExecutionData.hpp"
14 
15 #include <armnn/BackendHelper.hpp>
17 #include <armnn/Logging.hpp>
18 
23 
25 
26 #include <armnn/utility/Assert.hpp>
27 
29 
30 #include <common/include/Processes.hpp>
31 
32 #include <fmt/format.h>
33 
34 namespace armnn
35 {
36 
37 using namespace std;
38 using namespace arm::pipe;
39 
40 namespace
41 {
42 
43 template <typename ExceptionType>
44 std::string ToErrorMessage(const char * prefix, const ExceptionType & error)
45 {
46  std::stringstream ss;
47  ss << prefix << " " << error.what();
48  return ss.str();
49 }
50 
51 void AddLayerStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
52  const Layer& layer,
53  ProfilingGuid networkGuid)
54 {
55  // Add layer to the post-optimisation network structure
56  std::string layerName = layer.GetNameStr().empty() ? "<Unnamed>" : layer.GetNameStr();
57  timelineUtils->CreateNamedTypedChildEntity(layer.GetGuid(),
58  networkGuid,
59  layerName,
60  LabelsAndEventClasses::LAYER_GUID);
61  for (auto&& input : layer.GetInputSlots())
62  {
63  const IOutputSlot* source = input.GetConnectedOutputSlot();
64  if (!source)
65  {
66  throw armnn::NullPointerException("Null source found on input to layer \"" + layerName + "\".");
67  }
68  timelineUtils->CreateConnectionRelationship(ProfilingRelationshipType::RetentionLink,
69  source->GetOwningLayerGuid(),
70  layer.GetGuid());
71  }
72 }
73 
74 void AddWorkloadStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
75  std::unique_ptr<IWorkload>& workload,
76  const Layer& layer)
77 {
78  // Add workload to the post-optimisation network structure
79  timelineUtils->CreateTypedEntity(workload->GetGuid(), LabelsAndEventClasses::WORKLOAD_GUID);
80  timelineUtils->MarkEntityWithLabel(workload->GetGuid(),
81  layer.GetBackendId().Get(),
82  LabelsAndEventClasses::BACKENDID_GUID);
83 
84  // Link the workload to the layer
85  timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
86  layer.GetGuid(),
87  workload->GetGuid(),
88  LabelsAndEventClasses::CHILD_GUID);
89 }
90 
91 } // anonymous
92 
93 /**
94  * This function performs a sanity check to ensure that the combination of input and output memory source matches the
95  * values for importEnabled and exportEnabled that were specified during optimization. During optimization the tensor
96  * handle factories are chosen based on whether import and export are enabled. If the user then specifies something
97  * incompatible here it can lead to problems.
98  *
99  * @param optimizedOptions
100  * @param networkProperties
101  */
102 void ValidateSourcesMatchOptimizedNetwork(std::vector<BackendOptions> optimizedOptions,
103  const INetworkProperties& networkProperties)
104 {
105  // Find the "Global" backend options. During the optimize phase the values of importEnabled and exportEnabled are
106  // added as backend options.
107  const vector<BackendOptions>::iterator& backendItr =
108  find_if(optimizedOptions.begin(), optimizedOptions.end(), [](const BackendOptions& backend) {
109  if (backend.GetBackendId().Get() == "Global")
110  {
111  return true;
112  }
113  else
114  {
115  return false;
116  }
117  });
118  bool importEnabled = false;
119  bool exportEnabled = false;
120  if (backendItr != optimizedOptions.end())
121  {
122  // Find the importEnabled and exportEnabled values.
123  for (size_t i = 0; i < backendItr->GetOptionCount(); i++)
124  {
125  const BackendOptions::BackendOption& option = backendItr->GetOption(i);
126  if (option.GetName() == "ImportEnabled")
127  {
128  importEnabled = option.GetValue().AsBool();
129  }
130  if (option.GetName() == "ExportEnabled")
131  {
132  exportEnabled = option.GetValue().AsBool();
133  }
134  }
135  }
136 
137  // Now that we have values for import and export compare them to the MemorySource variables.
138  // Any value of MemorySource that's not "Undefined" implies that we need to do an import of some kind.
139  if ((networkProperties.m_InputSource == MemorySource::Undefined && importEnabled) ||
140  (networkProperties.m_InputSource != MemorySource::Undefined && !importEnabled))
141  {
142  auto message = fmt::format("The input memory source specified, '{0}',", networkProperties.m_InputSource);
143  if (!importEnabled)
144  {
145  message.append(" requires that memory import be enabled. However, "
146  "it was disabled when this network was optimized.");
147  }
148  else
149  {
150  message.append(" requires that memory import be disabled. However, "
151  "it was enabled when this network was optimized.");
152  }
153  throw InvalidArgumentException(message);
154  }
155 
156  if ((networkProperties.m_OutputSource == MemorySource::Undefined && exportEnabled) ||
157  (networkProperties.m_OutputSource != MemorySource::Undefined && !exportEnabled))
158  {
159  auto message = fmt::format("The output memory source specified, '{0}',", networkProperties.m_OutputSource);
160  if (!exportEnabled)
161  {
162  message.append(" requires that memory export be enabled. However, "
163  "it was disabled when this network was optimized.");
164  }
165  else
166  {
167  message.append(" requires that memory export be disabled. However, "
168  "it was enabled when this network was optimized.");
169  }
170  throw InvalidArgumentException(message);
171  }
172 } // anonymous
173 
174 std::unique_ptr<LoadedNetwork> LoadedNetwork::MakeLoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
175  std::string& errorMessage,
176  const INetworkProperties& networkProperties,
177  arm::pipe::IProfilingService* profilingService)
178 {
179  std::unique_ptr<LoadedNetwork> loadedNetwork;
180 
181  auto Fail = [&](const std::exception& error) -> std::unique_ptr<LoadedNetwork>
182  {
183  errorMessage = ToErrorMessage("An error occurred when preparing the network workloads: ", error);
184  ARMNN_LOG(error) << errorMessage;
185 
186  return std::unique_ptr<LoadedNetwork>();
187  };
188 
189  try
190  {
191  loadedNetwork.reset(new LoadedNetwork(std::move(net), networkProperties, profilingService));
192  }
193  catch (const armnn::RuntimeException& error)
194  {
195  return Fail(error);
196  }
197  catch (const armnn::Exception& error)
198  {
199  return Fail(error);
200  }
201  catch (const std::runtime_error& error)
202  {
203  return Fail(error);
204  }
205 
206  return loadedNetwork;
207 }
208 
209 LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
210  const INetworkProperties& networkProperties,
211  arm::pipe::IProfilingService* profilingService) :
212  m_OptimizedNetwork(std::move(net)),
213  m_NetworkProperties(networkProperties),
214  m_TensorHandleFactoryRegistry(),
215  m_ProfilingService(profilingService)
216 {
218  // Get the profiler and register it for the current thread.
219  const std::shared_ptr<IProfiler>& profiler = m_OptimizedNetwork->GetProfiler();
221 
222  profiler->EnableProfiling(networkProperties.m_ProfilingEnabled);
223 
224  profiler->EnableNetworkDetailsToStdOut(networkProperties.m_OutputNetworkDetailsMethod);
225 
226  // We need to check that the memory sources match up with the values of import and export specified during the
227  // optimize phase. If they don't this will throw an exception.
228  ValidateSourcesMatchOptimizedNetwork(m_OptimizedNetwork.get()->pOptimizedNetworkImpl->GetModelOptions(),
229  m_NetworkProperties);
230 
231  //First create tensor handlers, backends and workload factories.
232  //Handlers are created before workloads are.
233  //Because workload creation can modify some of the handlers,
234  //(for example the splitter and concat layers).
235 
236  bool useExternalMemoryManager = false;
237  bool useInternalMemoryManager = false;
238  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
239  // Ensure Topological order
240  order.SetLayersOutOfOrder();
241  order.TopologicalSort();
242 
243  if (!networkProperties.m_AsyncEnabled)
244  {
245  m_IsInputImported = std::vector<bool>(order.GetNumInputs(), false);
246  m_IsOutputImported = std::vector<bool>(order.GetNumOutputs(), false);
247  }
248 
249  for (auto&& layer : order)
250  {
251  auto const& backendId = layer->GetBackendId();
252  if (m_Backends.count(backendId) == 0)
253  {
254  auto createBackend = BackendRegistryInstance().GetFactory(backendId);
255  auto it = m_Backends.emplace(std::make_pair(backendId, createBackend()));
256 
257  IBackendInternal* backend = it.first->second.get();
258 
259  // If we're doing async execution verify that the backend supports it and ExternallyManagedMemory.
260  if (networkProperties.m_AsyncEnabled)
261  {
262  if (!HasMatchingCapability(BackendOptions::BackendOption{"AsyncExecution", true},
263  backend->GetCapabilities()))
264  {
265  std::string er = backend->GetId();
266  er += " does not support AsyncExecution";
267  throw BackendCapabilityException(er);
268  }
269  if (!HasMatchingCapability(BackendOptions::BackendOption{"ExternallyManagedMemory", true},
270  backend->GetCapabilities()))
271  {
272  std::string er = backend->GetId();
273  er += " does not support ExternallyManagedMemory\n";
274  er += "AsyncEnabled networks require all backends to support ExternallyManagedMemory";
275  throw BackendCapabilityException(er);
276  }
277  m_SupportsExternallyManagedMemory[backend->GetId()] = true;
278  useExternalMemoryManager = true;
279  }
280  else
281  {
282  m_SupportsExternallyManagedMemory[backend->GetId()] = false;
283  useInternalMemoryManager = true;
284  }
285 
287  if (backend->SupportsTensorAllocatorAPI())
288  {
289  workloadFactory = backend->CreateWorkloadFactory(
290  m_TensorHandleFactoryRegistry,
291  m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions(),
292  static_cast<MemorySourceFlags>(m_NetworkProperties.m_InputSource),
293  static_cast<MemorySourceFlags>(m_NetworkProperties.m_OutputSource));
294  }
295  else
296  {
297  m_BackendMemoryMangers.emplace_back(backend->CreateMemoryManager());
298  workloadFactory = backend->CreateWorkloadFactory(
299  m_BackendMemoryMangers.back(), m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions());
300  }
301  m_WorkloadFactories[backendId ] = std::move(workloadFactory);
302  }
303  }
304 
305  if (!networkProperties.m_AsyncEnabled)
306  {
307  for (auto&& layer : order)
308  {
309  auto& workloadFactory = GetWorkloadFactory(*layer);
310  bool supportsExternalManager = m_SupportsExternallyManagedMemory[layer->GetBackendId()];
311 
312  switch (layer->GetType())
313  {
314  case LayerType::Input:
316  {
317  // If IsImportEnabled is true then we need to set IsMemoryManaged
318  // to false when creating TensorHandles
319  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
320  workloadFactory,
321  !supportsExternalManager &&
322  (m_NetworkProperties.m_InputSource == MemorySource::Undefined));
323  break;
324  }
325  case LayerType::Constant:
326  {
327  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, true);
328  break;
329  }
330  default:
331  {
332  // Look for a layer with 1 OutputSlot which has 1 connection and that connection is an Output Layer
333  // If Export is enabled disable memory management so we can export, otherwise we do a copy
334  if ((layer->GetNumOutputSlots() == 1) &&
335  (layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
336  (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() == LayerType::Output))
337  {
338  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
339  workloadFactory,
340  !supportsExternalManager &&
341  (m_NetworkProperties.m_OutputSource == MemorySource::Undefined));
342  }
343  else
344  {
345  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
346  workloadFactory,
347  !supportsExternalManager);
348  }
349  }
350  }
351  }
352  }
353 
354  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
355  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
356  TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
357  if (timelineUtils)
358  {
359  timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
360  // Mark the network with a start of life event
361  timelineUtils->RecordEvent(networkGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
362  // and with the process ID
363  int processID = arm::pipe::GetCurrentProcessId();
364  std::stringstream ss;
365  ss << processID;
366  timelineUtils->MarkEntityWithLabel(networkGuid, ss.str(), LabelsAndEventClasses::PROCESS_ID_GUID);
367  }
368 
369  std::vector<IWorkload*> ConstWorkloads;
370 
371  //Then create workloads.
372  {
373  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_CreateWorkloads");
374  for (auto&& layer: order)
375  {
376  if (timelineUtils)
377  {
378  // Add layer to the post-optimisation network structure
379  AddLayerStructure(timelineUtils, *layer, networkGuid);
380  }
381 
382  const IWorkloadFactory& workloadFactory = GetWorkloadFactory(*layer);
383 
384  switch (layer->GetType())
385  {
386  case LayerType::Input:
387  case LayerType::Output:
388  {
389  // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
390  break;
391  }
392  default:
393  {
394  auto workload = layer->CreateWorkload(workloadFactory);
395 
396  if (!workload)
397  {
398  const char* const layerName =
399  layer->GetNameStr().length() != 0 ? layer->GetName() : "<Unnamed>";
400  throw InvalidArgumentException(
401  fmt::format("No workload created for layer (name: '{0}' type: '{1}') (compute '{2}')",
402  layerName, static_cast<int>(layer->GetType()), layer->GetBackendId().Get()
403  ));
404  }
405 
406  if (timelineUtils)
407  {
408  // Add workload to the post-optimisation network structure
409  AddWorkloadStructure(timelineUtils, workload, *layer);
410  }
411 
412  // For async networks ConstantWorkloads are managed exclusively by LoadedNetwork
413  // and are separated out from the other workloads
414  if((networkProperties.m_AsyncEnabled || useExternalMemoryManager) &&
415  layer->GetType() == LayerType::Constant)
416  {
417  m_ConstantTensorHandles[layer->GetGuid()] =
418  layer->GetOutputSlot(0).GetOutputHandler().GetData();
419  m_ConstantWorkloads[layer->GetGuid()] = std::move(workload);
420  }
421  else
422  {
423  m_WorkloadQueue.push_back(std::move(workload));
424 
425  if (layer->GetType() == LayerType::Constant)
426  {
427  // Place the Constant Workloads into a queue so that they can be executed first
428  ConstWorkloads.push_back(m_WorkloadQueue.back().get());
429  }
430  }
431  // release the constant data in the layer.
432  layer->ReleaseConstantData();
433  break;
434  }
435  }
436  }
437  }
438 
439  // Gather information about workloads for inputs & outputs
440  if (!networkProperties.m_AsyncEnabled && m_WorkloadQueue.size() != 0)
441  {
442  const int noOfInputs = armnn::numeric_cast<int>(order.GetNumInputs());
443 
444  // Get indices of all workloads connected to each input and
445  // check if they support tensor handle replacement
446  for (const BindableLayer* layer: order.GetInputLayers())
447  {
448  const auto bindingId = layer->GetBindingId();
449 
450  bool supportsReplacement = true;
451 
452  for (const auto inputSlot: layer->GetOutputSlot(0).GetConnections())
453  {
454  auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(inputSlot->GetOwningLayer()));
455  workloadIndex -= noOfInputs;
456 
457  m_InputWorkloadSlotPairs[bindingId].emplace_back(WorkloadIndices{
458  armnn::numeric_cast<unsigned int>(workloadIndex), inputSlot->GetSlotIndex()});
459 
460  // Avoid if input is connected directly to an output
461  if (inputSlot->GetOwningLayer().GetType() != LayerType::Output)
462  {
463  auto workload = m_WorkloadQueue[m_InputWorkloadSlotPairs[bindingId].back().m_WorkloadIndex].get();
464  supportsReplacement &= workload->SupportsTensorHandleReplacement();
465  }
466  }
467 
468  ITensorHandleFactory::FactoryId factoryId = layer->GetOutputSlot(0).GetTensorHandleFactoryId();
469  // Get matching import factory Id
470  ITensorHandleFactory::FactoryId importFactoryId =
471  m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId);
472 
473  ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.GetFactory(importFactoryId);
474 
475  if (supportsReplacement && importFactory)
476  {
477  m_PreImportedInputHandles.emplace_back(
478  bindingId, importFactory->CreateTensorHandle(layer->GetOutputSlot(0).GetTensorInfo(), false));
479  }
480  else
481  {
482  m_PreImportedInputHandles.emplace_back(bindingId, nullptr);
483  }
484  }
485 
486  // Get indices of all workloads connected to each output and
487  // check if they support tensor handle replacement
488  for (const BindableLayer* layer: order.GetOutputLayers())
489  {
490  const auto bindingId = layer->GetBindingId();
491 
492  const auto outputSlot = layer->GetInputSlot(0).GetConnectedOutputSlot();
493  auto& indices = m_OutputWorkloadSlotPairs[bindingId];
494 
495  // Avoid if output is connected directly to an input
496  if (outputSlot->GetOwningLayer().GetType() != LayerType::Input)
497  {
498  auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(outputSlot->GetOwningLayer()));
499  workloadIndex -= noOfInputs;
500 
501  indices.m_OutputSlotIndices = WorkloadIndices{numeric_cast<unsigned int>(workloadIndex),
502  outputSlot->CalculateIndexOnOwner()};
503 
504  bool supportsReplacement = true;
505  auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
506  supportsReplacement &= outputWorkload->SupportsTensorHandleReplacement();
507 
508  for (auto &inputSlot: outputSlot->GetConnections())
509  {
510  if (inputSlot->GetOwningLayer().GetType() != LayerType::Output)
511  {
512  auto inWorkloadIndex = std::distance(order.begin(),
513  order.GetPosInGraph(inputSlot->GetOwningLayer()));
514  inWorkloadIndex -= noOfInputs;
515  indices.m_InputSlotIndices.emplace_back(
516  WorkloadIndices{numeric_cast<unsigned int>(inWorkloadIndex),
517  inputSlot->GetSlotIndex()});
518  auto inputWorkload = m_WorkloadQueue[indices.m_InputSlotIndices.back().m_WorkloadIndex].get();
519  supportsReplacement &= inputWorkload->SupportsTensorHandleReplacement();
520  }
521  }
522 
523  ITensorHandleFactory::FactoryId factoryId = outputSlot->GetTensorHandleFactoryId();
524  // Get matching import factory Id
525  ITensorHandleFactory::FactoryId importFactoryId =
526  m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId);
527  ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.GetFactory(importFactoryId);
528 
529  if (supportsReplacement && importFactory)
530  {
531  m_PreImportedOutputHandles.emplace_back(
532  bindingId, importFactory->CreateTensorHandle(outputSlot->GetTensorInfo(), false));
533  }
534  else
535  {
536  m_PreImportedOutputHandles.emplace_back(bindingId, nullptr);
537  }
538  }
539  }
540  }
541 
542  for (auto&& workloadFactory : m_WorkloadFactories)
543  {
544  workloadFactory.second->AfterWorkloadsCreated();
545  }
546 
547  if (timelineUtils)
548  {
549  // Commit to send the post-optimisation network structure
550  timelineUtils->Commit();
551  }
552 
553  if (useExternalMemoryManager)
554  {
555  if (networkProperties.m_AsyncEnabled)
556  {
557  CreateMemoryProfileAsync();
558  }
559  else
560  {
561  CreateMemoryProfile();
562  }
563 
564  auto backendStrategyMap = BackendRegistryInstance().GetMemoryOptimizerStrategies();
565  for (auto& backendMemoryProfile : m_MemBlockMap)
566  {
567  const BackendId& backendId = backendMemoryProfile.first;
568  if (backendStrategyMap.find(backendId) != backendStrategyMap.end())
569  {
570  m_MemBinMap[backendId] = backendStrategyMap[backendId]->Optimize(backendMemoryProfile.second);
571  }
572  else
573  {
574  m_MemBinMap[backendId] = m_ConstantStrategy->Optimize(backendMemoryProfile.second);
575  }
576  }
577 
578  if (!networkProperties.m_AsyncEnabled)
579  {
580  m_ExternalMemoryManager = CreateExternalMemoryManger(m_TensorMemory);
581 
582  // Sort m_TensorMemory, so it's order matches m_Tensorhandles
583  std::sort(m_TensorMemory.begin(), m_TensorMemory.end(),
584  [](const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& lhs,
585  const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& rhs)
586  {
587  return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
588  });
589  }
590  }
591 
592  // Now that the intermediate tensor memory has been set-up,
593  // do any post allocation configuration for each workload.
594  if (!networkProperties.m_AsyncEnabled)
595  {
596  if (useInternalMemoryManager)
597  {
598  // Set up memory.
599  m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().AllocateDynamicBuffers();
600  }
601 
602  for (auto &workload : m_WorkloadQueue)
603  {
604  workload->PostAllocationConfigure();
605  }
606  }
607 
608  if (useExternalMemoryManager)
609  {
610  if (!networkProperties.m_AsyncEnabled)
611  {
612  AllocateAndExecuteConstantWorkloads();
613  }
614  else
615  {
616  AllocateAndExecuteConstantWorkloadsAsync();
617  }
618  }
619  // If synchronous, execute all constant layer workloads
620  if (!networkProperties.m_AsyncEnabled)
621  {
622  for (auto workload: ConstWorkloads)
623  {
624  workload->Execute();
625  }
626  }
628 }
629 
630 void LoadedNetwork::AllocateAndExecuteConstantWorkloads()
631 {
632  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_AllocateAndExecuteConstants");
633  for (auto& pair : m_ConstantWorkloads)
634  {
635  auto tensorHandle = m_ConstantTensorHandles[pair.first];
636  tensorHandle->Allocate();
637  pair.second->Execute();
638  }
639 }
640 
641 void LoadedNetwork::AllocateAndExecuteConstantWorkloadsAsync()
642 {
643  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_AllocateAndExecuteConstants");
644  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
645  for (auto&& layer : order)
646  {
647  if (layer->GetType() == LayerType::Constant)
648  {
649  const auto& outSlot = layer->GetOutputSlots()[0];
650  const auto factoryId = outSlot.GetTensorHandleFactoryId();
651  if (factoryId == ITensorHandleFactory::LegacyFactoryId)
652  {
653  throw armnn::Exception("factoryId must not be of type \"Legacy\".");
654  }
655  auto& workloadFactory = GetWorkloadFactory(*layer);
656 
657  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory);
658  ITensorHandle* tensorHandle = outSlot.GetOutputHandler().GetData();
659 
660  m_ConstantTensorHandles[layer->GetGuid()] = tensorHandle;
661  tensorHandle->Allocate();
662 
663  auto& backend = m_Backends.at(layer->GetBackendId());
664 
665  WorkingMemDescriptor memDesc;
666  memDesc.m_Outputs.push_back(tensorHandle);
667 
668  ExecutionData executionData = backend->CreateExecutionData(memDesc);
669  m_ConstantWorkloads[layer->GetGuid()]->ExecuteAsync(executionData);
670  }
671  }
672 }
673 
674 void LoadedNetwork::SendNetworkStructure(arm::pipe::IProfilingService& profilingService)
675 {
676  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_SendNetworkStructure");
677  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
678  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
679 
680  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
681  TimelineUtilityMethods::GetTimelineUtils(profilingService);
682 
683  timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
684 
685  for (auto&& layer : order)
686  {
687  // Add layer to the post-optimisation network structure
688  AddLayerStructure(timelineUtils, *layer, networkGuid);
689  switch (layer->GetType())
690  {
691  case LayerType::Input:
692  case LayerType::Output:
693  {
694  // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
695  break;
696  }
697  default:
698  {
699  for (auto& workload : m_WorkloadQueue)
700  {
701  // Add workload to the post-optimisation network structure
702  AddWorkloadStructure(timelineUtils, workload, *layer);
703  }
704  break;
705  }
706  }
707  }
708  // Commit to send the post-optimisation network structure
709  timelineUtils->Commit();
710 }
711 
713 {
714  return m_OptimizedNetwork->GetGuid();
715 }
716 
718 {
719  for (auto&& inputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetInputLayers())
720  {
721  if (inputLayer->GetNumOutputSlots() != 1)
722  {
723  throw armnn::GraphValidationException("Input layer should have exactly 1 output slot");
724  }
725 
726  if (inputLayer->GetBindingId() == layerId)
727  {
728  return inputLayer->GetOutputSlot(0).GetTensorInfo();
729  }
730  }
731 
732  throw InvalidArgumentException(fmt::format("No input layer is associated with id {}", layerId));
733 }
734 
736 {
737  for (auto&& outputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetOutputLayers())
738  {
739  if (outputLayer->GetNumInputSlots() != 1)
740  {
741  throw armnn::GraphValidationException("Output layer should have exactly 1 input slot");
742  }
743 
744  if (!outputLayer->GetInputSlot(0).GetConnection())
745  {
746  throw armnn::GraphValidationException("Input slot on Output layer must be connected");
747  }
748 
749  if (outputLayer->GetBindingId() == layerId)
750  {
751  return outputLayer->GetInputSlot(0).GetTensorInfo();
752  }
753  }
754 
755  throw InvalidArgumentException(fmt::format("No output layer is associated with id {}", layerId));
756 }
757 
758 const IWorkloadFactory& LoadedNetwork::GetWorkloadFactory(const Layer& layer) const
759 {
760  const IWorkloadFactory* workloadFactory = nullptr;
761 
762  auto it = m_WorkloadFactories.find(layer.GetBackendId());
763  if (it == m_WorkloadFactories.end())
764  {
765  throw RuntimeException(fmt::format("No workload factory for {0} to be used for layer: {1}",
766  layer.GetBackendId().Get(),
767  layer.GetNameStr()),
768  CHECK_LOCATION());
769  }
770 
771  workloadFactory = it->second.get();
772 
773  if (!workloadFactory)
774  {
775  throw armnn::NullPointerException("No workload factory");
776  }
777 
778  return *workloadFactory;
779 }
780 
781 namespace {
782 
783 // Non-copyable class owning accelerator-specific tensor data.
784 class TensorPin
785 {
786 public:
787  TensorPin(std::unique_ptr<ITensorHandle> handle, const TensorInfo& info, LayerBindingId id)
788  : m_TensorHandle(std::move(handle))
789  , m_TensorInfo(info)
790  , m_Id(id)
791  {
792  }
793 
794  ITensorHandle* GetTensorHandle() const { return m_TensorHandle.get(); }
795  const TensorInfo& GetTensorInfo() const { return m_TensorInfo; }
796  LayerBindingId GetBindingId() const { return m_Id; }
797 
798 private:
799  std::unique_ptr<ITensorHandle> m_TensorHandle;
800  TensorInfo m_TensorInfo;
801  LayerBindingId m_Id;
802 };
803 
804 static const TensorPin& GetTensorPin(LayerBindingId id,
805  const std::vector<TensorPin>& pins,
806  char const* bindingPointDesc)
807 {
808  auto it = std::find_if(pins.begin(), pins.end(),
809  [id](const TensorPin& pin)
810  {
811  return pin.GetBindingId() == id;
812  });
813 
814  if (it != pins.end())
815  {
816  return *it;
817  }
818  else
819  {
820  throw InvalidArgumentException(fmt::format("No tensor supplied for {0} {1}", bindingPointDesc, id));
821  }
822 }
823 
824 // Stores data that needs to be kept accessible for the entire execution of a workload.
825 class WorkloadData
826 {
827 public:
828  WorkloadData(const InputTensors& inputTensors, const OutputTensors& outputTensors)
829  {
830  m_InputTensorPins.reserve(inputTensors.size());
831  m_OutputTensorPins.reserve(outputTensors.size());
832 
833  for (auto inputTensorPair : inputTensors)
834  {
835  auto inputTensor = inputTensorPair.second;
836 
837  std::unique_ptr<ITensorHandle> tensorHandle =
838  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),inputTensor.GetMemoryArea());
839  LayerBindingId layerId = inputTensorPair.first;
840 
841  m_InputTensorPins.emplace_back(std::move(tensorHandle), inputTensor.GetInfo(), layerId);
842  }
843 
844  for (auto outputTensorPair : outputTensors)
845  {
846  auto outputTensor = outputTensorPair.second;
847 
848  std::unique_ptr<ITensorHandle> tensorHandle =
849  std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
850  LayerBindingId layerId = outputTensorPair.first;
851 
852  m_OutputTensorPins.emplace_back(std::move(tensorHandle), outputTensor.GetInfo(), layerId);
853  }
854  }
855 
856  const TensorPin& GetInputTensorPin(LayerBindingId id) const
857  {
858  return GetTensorPin(id, m_InputTensorPins, "input");
859  }
860 
861  const TensorPin& GetOutputTensorPin(LayerBindingId id) const
862  {
863  return GetTensorPin(id, m_OutputTensorPins, "output");
864  }
865 
866 private:
867 
868  std::vector<TensorPin> m_InputTensorPins;
869  std::vector<TensorPin> m_OutputTensorPins;
870 };
871 
872 }
873 
875  const OutputTensors& outputTensors,
876  std::vector<ImportedInputId> preImportedInputIds,
877  std::vector<ImportedOutputId> preImportedOutputIds)
878 {
879  const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
880 
881  // Walk graph to determine the order of execution.
882  if (graph.GetNumLayers() < 2)
883  {
884  ARMNN_LOG(warning) << "IRuntime::EnqueueWorkload()::Less than two nodes in graph";
885  return Status::Failure;
886  }
887 
888  // Data that must be kept alive for the entire execution of the workload.
889  WorkloadData workloadData(inputTensors, outputTensors);
890 
891  // Input tensors can be provided as parameters or pre imported. Either way the number of
892  // tensors should match the number of inputs.
893  if (graph.GetNumInputs() != (inputTensors.size() + preImportedInputIds.size()))
894  {
895  throw InvalidArgumentException("Number of inputs provided does not match network.");
896  }
897 
898  // For each input to the network, call EnqueueInput with the data passed by the user.
899  {
901  m_InputQueue.clear();
902  m_InputQueue.reserve(graph.GetNumInputs());
903 
904  unsigned int inputIndex = 0;
905  unsigned int importedInputIdIndex = 0;
906  std::sort(preImportedInputIds.begin(), preImportedInputIds.end());
907  for (const BindableLayer* inputLayer : graph.GetInputLayers())
908  {
909  if (importedInputIdIndex < preImportedInputIds.size() &&
910  inputIndex == preImportedInputIds[importedInputIdIndex])
911  {
912  // Only replace tensorhandles if they have not already been replaced
913  if (!m_IsInputImported[inputIndex])
914  {
915  auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
916 
917  for (const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
918  {
919  auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
920  workload->ReplaceInputTensorHandle(outputTensorHandle, workloadInfo.m_SlotIndex);
921  }
922  m_IsInputImported[inputIndex] = true;
923  }
924  importedInputIdIndex++;
925  }
926  else
927  {
928  if (m_IsInputImported[inputIndex])
929  {
930  OutputHandler& handler = const_cast<OutputHandler&>(inputLayer->GetOutputHandler(0));
931 
932  for (const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
933  {
934  auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
935  workload->ReplaceInputTensorHandle(handler.GetData(), workloadInfo.m_SlotIndex);
936  }
937 
938  m_IsInputImported[inputIndex] = false;
939  }
940 
941  // InputTensorHandle is not imported yet, process to enqueue input
942  const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
943  EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
944  }
945  inputIndex++;
946  }
947  }
948  // For each output to the network, call EnqueueOutput with the data passed by the user.
949  {
951  m_OutputQueue.clear();
952  m_OutputQueue.reserve(graph.GetNumOutputs());
953 
954  if (preImportedOutputIds.size() > graph.GetNumOutputs())
955  {
956  throw InvalidArgumentException("Invalid number of preImportedOutputIds");
957  }
958 
959  unsigned int outputIndex = 0;
960  unsigned int importedOutputIdIndex = 0;
961  std::sort(preImportedOutputIds.begin(), preImportedOutputIds.end());
962  for (const BindableLayer* outputLayer : graph.GetOutputLayers())
963  {
964  if (importedOutputIdIndex < preImportedOutputIds.size() &&
965  outputIndex == preImportedOutputIds[importedOutputIdIndex])
966  {
967  // Only replace tensorhandles if they have not already been replaced
968  ITensorHandle* inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
969 
970  if (!m_IsOutputImported[outputIndex])
971  {
972  const auto bindingId = outputLayer->GetBindingId();
973  const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
974 
975  auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
976 
977  outputWorkload->ReplaceOutputTensorHandle(inputTensorHandle,
978  indices.m_OutputSlotIndices.m_SlotIndex);
979 
980  for (const auto& workloadInfo: indices.m_InputSlotIndices)
981  {
982  auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
983  inputWorkload->ReplaceInputTensorHandle(inputTensorHandle, workloadInfo.m_SlotIndex);
984  }
985  m_IsOutputImported[outputIndex] = true;
986  }
987 
988  if (!inputTensorHandle)
989  {
990  throw armnn::NullPointerException("Data should have been allocated.");
991  }
992 
993  MemSyncQueueDescriptor syncDesc;
994  syncDesc.m_Inputs.push_back(inputTensorHandle);
996  info.m_InputTensorInfos.push_back(outputLayer->GetInputSlot(0).GetTensorInfo());
997 
998  auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
999  if (!syncWorkload)
1000  {
1001  throw armnn::NullPointerException("No sync workload created");
1002  }
1003 
1004  m_OutputQueue.push_back(std::move(syncWorkload));
1005  importedOutputIdIndex++;
1006  }
1007  else
1008  {
1009  if (m_IsOutputImported[outputIndex])
1010  {
1011  const auto bindingId = outputLayer->GetBindingId();
1012  const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
1013 
1014  auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
1015  const OutputHandler& outputHandler =
1016  outputLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOutputHandler();
1017 
1018  outputWorkload->ReplaceOutputTensorHandle(
1019  outputHandler.GetData(), indices.m_OutputSlotIndices.m_SlotIndex);
1020 
1021  for (const auto& workloadInfo: indices.m_InputSlotIndices)
1022  {
1023  auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
1024  inputWorkload->ReplaceInputTensorHandle(outputHandler.GetData(), workloadInfo.m_SlotIndex);
1025  }
1026  m_IsOutputImported[outputIndex] = false;
1027  }
1028 
1029  const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
1030  // OutputTensorHandle is not imported yet, process to enqueue Output
1031  EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
1032  }
1033  outputIndex++;
1034  }
1035  }
1036 
1037  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1038  TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1039  ProfilingGuid inferenceGuid = m_ProfilingService->GetNextGuid();
1040  if (timelineUtils)
1041  {
1042  // Add inference timeline trace if profiling is enabled.
1043  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
1044  timelineUtils->CreateTypedEntity(inferenceGuid, LabelsAndEventClasses::INFERENCE_GUID);
1045  timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
1046  networkGuid,
1047  inferenceGuid,
1048  LabelsAndEventClasses::EXECUTION_OF_GUID);
1049  timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
1050  }
1051 
1052  bool executionSucceeded = true;
1053 
1054  {
1055  if (m_ProfilingService->IsProfilingEnabled())
1056  {
1057  m_ProfilingService->IncrementCounterValue(INFERENCES_RUN);
1058  }
1060  ARMNN_SCOPED_HEAP_PROFILING("Executing");
1061  executionSucceeded = Execute(timelineUtils, inferenceGuid);
1062  }
1063 
1064  if (timelineUtils)
1065  {
1066  // Add end of life of the inference timeline if profiling is enabled.
1067  timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
1068  timelineUtils->Commit();
1069  }
1070 
1071  return executionSucceeded ? Status::Success : Status::Failure;
1072 }
1073 
1074 void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
1075 {
1076  if (layer.GetType() != LayerType::Input)
1077  {
1078  throw InvalidArgumentException("EnqueueInput: given layer not an InputLayer");
1079  }
1080 
1081  if (tensorHandle == nullptr)
1082  {
1083  throw InvalidArgumentException("EnqueueInput: tensorHandle must not be NULL");
1084  }
1085 
1086  InputQueueDescriptor inputQueueDescriptor;
1087  WorkloadInfo info;
1088 
1089  inputQueueDescriptor.m_Inputs.push_back(tensorHandle);
1090  info.m_InputTensorInfos.push_back(tensorInfo);
1091 
1092  if (layer.GetNumOutputSlots() != 1)
1093  {
1094  throw armnn::GraphValidationException("Can only handle Input Layer with one output");
1095  }
1096 
1097  const OutputHandler& handler = layer.GetOutputHandler();
1098  const TensorInfo& outputTensorInfo = handler.GetTensorInfo();
1099  ITensorHandle* outputTensorHandle = handler.GetData();
1100 
1101  if (!outputTensorHandle)
1102  {
1103  throw armnn::NullPointerException("Data should have been allocated.");
1104  }
1105 
1106  inputQueueDescriptor.m_Outputs.push_back(outputTensorHandle);
1107  info.m_OutputTensorInfos.push_back(outputTensorInfo);
1108 
1109  MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
1110  bool needMemCopy = true;
1111  if ((m_NetworkProperties.m_InputSource != MemorySource::Undefined)) // Try import the input tensor
1112  {
1113  if(CheckFlag(importFlags, m_NetworkProperties.m_InputSource))
1114  {
1115  needMemCopy = false;
1116  // This assumes a CPU Tensor handle
1117  void* mem = tensorHandle->Map(false);
1118  if (outputTensorHandle->Import(mem, m_NetworkProperties.m_InputSource))
1119  {
1120  tensorHandle->Unmap();
1121  return; // No need for a workload since the import has been done.
1122  }
1123  tensorHandle->Unmap();
1124  throw MemoryImportException("EnqueueInput: Memory Import failed");
1125  }
1126  }
1127  if (needMemCopy)
1128  {
1129  // Create a mem copy workload for input since we did not import
1130  std::unique_ptr<IWorkload> inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor, info);
1131 
1132  if (!inputWorkload)
1133  {
1134  throw armnn::NullPointerException("No input workload created");
1135  }
1136 
1137  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1138  TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1139  if (timelineUtils)
1140  {
1141  // Add Input Workload to the post-optimisation network structure
1142  AddWorkloadStructure(timelineUtils, inputWorkload, layer);
1143  timelineUtils->Commit();
1144  }
1145 
1146  m_InputQueue.push_back(std::move(inputWorkload));
1147  }
1148 }
1149 
1150 void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
1151 {
1152  if (layer.GetType() != LayerType::Output)
1153  {
1154  throw InvalidArgumentException("EnqueueOutput: given layer not an OutputLayer");
1155  }
1156 
1157  if (tensorHandle == nullptr)
1158  {
1159  throw InvalidArgumentException("EnqueueOutput: tensorHandle must not be NULL");
1160  }
1161 
1162  OutputQueueDescriptor outputQueueDescriptor;
1163  WorkloadInfo info;
1164 
1165  outputQueueDescriptor.m_Outputs.push_back(tensorHandle);
1166  info.m_OutputTensorInfos.push_back(tensorInfo);
1167 
1168  if (layer.GetNumInputSlots() != 1)
1169  {
1170  throw armnn::GraphValidationException("Output Layer should have exactly one input.");
1171  }
1172 
1173  // Gets the output handler from the previous node.
1174  const OutputHandler& outputHandler = layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler();
1175 
1176  const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo();
1177  ITensorHandle* inputTensorHandle = outputHandler.GetData();
1178  if (!inputTensorHandle)
1179  {
1180  throw armnn::NullPointerException("Data should have been allocated.");
1181  }
1182 
1183  // Try import the output tensor.
1184  // Note: We can only import the output pointer if all of the following hold true:
1185  // a) The imported pointer is aligned sufficiently
1186  // b) The tensor has zero padding
1187  // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
1188  // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
1189  // e) m_NetworkProperties.m_OutputSource != MemorySource::Undefined
1190  bool needMemCopy = true;
1191  if (m_NetworkProperties.m_OutputSource != MemorySource::Undefined &&
1192  (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
1193  {
1194  if(layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input)
1195  {
1196  MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
1197  if (CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
1198  {
1199  needMemCopy = false;
1200  void *mem = tensorHandle->Map(false);
1201  bool importOk = inputTensorHandle->Import(mem, m_NetworkProperties.m_OutputSource);
1202  tensorHandle->Unmap();
1203 
1204  if (importOk)
1205  {
1206  // Insert synchronization workload
1207  MemSyncQueueDescriptor syncDesc;
1208  syncDesc.m_Inputs.push_back(inputTensorHandle);
1209  info.m_InputTensorInfos.push_back(inputTensorInfo);
1210  auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
1211  if (!syncWorkload)
1212  {
1213  throw armnn::NullPointerException("No sync workload created");
1214  }
1215  m_OutputQueue.push_back(std::move(syncWorkload));
1216  }
1217  else
1218  {
1219  throw MemoryExportException("EnqueueOutput: Memory Export failed");
1220  }
1221  }
1222  }
1223  }
1224  if (needMemCopy)
1225  {
1226  // If we got here then we didn't export the memory, so add an output workload which performs a memcopy.
1227  outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
1228  info.m_InputTensorInfos.push_back(inputTensorInfo);
1229 
1230  std::unique_ptr<IWorkload> outputWorkload =
1231  std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor, info);
1232  if (!outputWorkload)
1233  {
1234  throw armnn::NullPointerException("No output workload created");
1235  }
1236 
1237  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1238  TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1239  if (timelineUtils)
1240  {
1241  // Add Output Workload to the post-optimisation network structure
1242  AddWorkloadStructure(timelineUtils, outputWorkload, layer);
1243  timelineUtils->Commit();
1244  }
1245 
1246  m_OutputQueue.push_back(std::move(outputWorkload));
1247  }
1248 }
1249 
1250 void LoadedNetwork::AllocateWorkingMemory(
1251 #if !defined(ARMNN_DISABLE_THREADS)
1252  std::lock_guard<std::mutex>& lock
1253 #endif
1254  )
1255 {
1256  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Working Memory Allocation");
1257 
1258 #if !defined(ARMNN_DISABLE_THREADS)
1259  // this unused parameter makes sure we can only call this function with a valid lock
1260  IgnoreUnused(lock);
1261 #endif
1262  if (m_IsWorkingMemAllocated)
1263  {
1264  return;
1265  }
1266 
1267  if (m_ExternalMemoryManager)
1268  {
1269  m_ExternalMemoryManager->Allocate();
1270 
1271  for (unsigned int i = 0; i < m_TensorMemory.size(); ++i)
1272  {
1273  m_Tensorhandles[i]->Import(m_TensorMemory[i].first->m_Data, m_TensorMemory[i].second);
1274  }
1275  }
1276 
1277  for (auto&& memoryManager : m_BackendMemoryMangers)
1278  {
1279  if (memoryManager)
1280  {
1281  memoryManager->Acquire();
1282  }
1283  }
1284  m_TensorHandleFactoryRegistry.AquireMemory();
1285  m_IsWorkingMemAllocated = true;
1286 }
1287 
1289 {
1290 #if !defined(ARMNN_DISABLE_THREADS)
1291  std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1292 #endif
1293 
1294  if (!m_IsWorkingMemAllocated)
1295  {
1296  return;
1297  }
1298 
1299  if (m_ExternalMemoryManager)
1300  {
1301  m_ExternalMemoryManager->Deallocate();
1302  }
1303 
1304  // Informs the memory managers to release memory in its respective memory group
1305  for (auto&& memoryManager : m_BackendMemoryMangers)
1306  {
1307  if (memoryManager)
1308  {
1309  memoryManager->Release();
1310  }
1311  }
1312  m_TensorHandleFactoryRegistry.ReleaseMemory();
1313  m_IsWorkingMemAllocated = false;
1314 }
1315 
1316 bool LoadedNetwork::Execute(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
1317  ProfilingGuid inferenceGuid)
1318 {
1319  bool success = true;
1320 
1321  auto Fail = [&](const std::exception& error)
1322  {
1323  ARMNN_LOG(error) << "An error occurred attempting to execute a workload: " << error.what();
1324  success = false;
1325  };
1326 
1327  try
1328  {
1329 #if !defined(ARMNN_DISABLE_THREADS)
1330  std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1331  AllocateWorkingMemory(lockGuard);
1332 #else
1333  AllocateWorkingMemory();
1334 #endif
1335 
1336  ProfilingDynamicGuid workloadInferenceID(0);
1337  auto ExecuteQueue = [&timelineUtils, &workloadInferenceID, &inferenceGuid](WorkloadQueue& queue)
1338  {
1339  for (auto& workload : queue)
1340  {
1341  if(timelineUtils)
1342  {
1343  workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1344  inferenceGuid);
1345  }
1346 
1348  workload->Execute();
1350  if(timelineUtils)
1351  {
1352  timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1353  }
1354  }
1355  };
1356 
1358  ExecuteQueue(m_InputQueue);
1359  ExecuteQueue(m_WorkloadQueue);
1360  ExecuteQueue(m_OutputQueue);
1362  }
1363  catch (const RuntimeException& error)
1364  {
1365  Fail(error);
1366  }
1367  catch (const std::runtime_error& error)
1368  {
1369  Fail(error);
1370  }
1371 
1372  return success;
1373 }
1374 
1375 void LoadedNetwork::EnqueueInput(const ConstTensor& inputTensor, ITensorHandle* inputTensorHandle)
1376 {
1377  if (m_NetworkProperties.m_InputSource != MemorySource::Undefined) // Try import the input tensor
1378  {
1379  MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
1380  if (CheckFlag(importFlags, m_NetworkProperties.m_InputSource) )
1381  {
1382  std::unique_ptr<ITensorHandle> tensorHandle =
1383  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),
1384  inputTensor.GetMemoryArea());
1385  void* mem = tensorHandle->Map(false);
1386 
1387  if (inputTensorHandle->Import(mem, m_NetworkProperties.m_InputSource))
1388  {
1389  tensorHandle->Unmap();
1390  return;
1391  }
1392  tensorHandle->Unmap();
1393  throw MemoryImportException("EnqueueInput: Memory Import failed");
1394  }
1395  else
1396  {
1397  throw MemoryImportException("EnqueueInput: Memory Import failed, backend does not support Import");
1398  }
1399  }
1400  else
1401  {
1403  std::unique_ptr<ITensorHandle> tensorHandle =
1404  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(), inputTensor.GetMemoryArea());
1405 
1406  auto copyFunc = [](void* dst, const void* src, size_t size)
1407  {
1408  memcpy(dst, src, size);
1409  };
1410 
1411  CopyTensorContentsGeneric(tensorHandle.get(), inputTensorHandle, copyFunc);
1412  }
1413 }
1414 
1415 // Note: We can only import the output pointer if all of the following hold true:
1416 // a) The imported pointer is aligned sufficiently
1417 // b) The tensor has zero padding
1418 // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
1419 // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
1420 // e) m_IsExportEnabled must be set to true
1421 void LoadedNetwork::ImportOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle)
1422 {
1423  if (!outputTensorHandle)
1424  {
1425  throw armnn::NullPointerException("Data should have been allocated.");
1426  }
1427 
1428  MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
1429  if (CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
1430  {
1431  std::unique_ptr<ITensorHandle> tensorHandle =
1432  std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
1433  outputTensor.GetMemoryArea());
1434 
1435  void* mem = tensorHandle->Map(false);
1436  bool importOk = outputTensorHandle->Import(mem, m_NetworkProperties.m_OutputSource);
1437  tensorHandle->Unmap();
1438 
1439  if (!importOk)
1440  {
1441  throw MemoryExportException("ImportOutputTensor: Memory Export failed");
1442  }
1443  }
1444  else
1445  {
1446  throw MemoryExportException("ImportOutputTensor: Memory Export failed, attempting to export Input Layer");
1447  }
1448 
1449 }
1450 
1451 void CopyToOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle)
1452 {
1454  auto copyFunc = [](void* dst, const void* src, size_t size)
1455  {
1456  memcpy(dst, src, size);
1457  };
1458 
1459  std::unique_ptr<ITensorHandle> tensorHandle =
1460  std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
1461  outputTensor.GetMemoryArea());
1462 
1463  CopyTensorContentsGeneric(outputTensorHandle, tensorHandle.get(), copyFunc);
1464 }
1465 
1466 
1467 const armnn::ConstTensor GetInputTensor(const LayerBindingId layerId, const InputTensors& inputTensors)
1468 {
1469  for (auto inputTensorPair : inputTensors)
1470  {
1471  LayerBindingId id = inputTensorPair.first;
1472  if (id == layerId)
1473  {
1474  return inputTensorPair.second;
1475  }
1476  }
1477  throw InvalidArgumentException("Input does not exist.");
1478 }
1479 
1480 const armnn::Tensor GetOutputTensor(const LayerBindingId layerId, const OutputTensors& outputTensors)
1481 {
1482  for (auto outputTensorPair : outputTensors)
1483  {
1484  LayerBindingId id = outputTensorPair.first;
1485  if (id == layerId)
1486  {
1487  return outputTensorPair.second;
1488  }
1489  }
1490  throw InvalidArgumentException("Output does not exist.");
1491 }
1492 
1493 std::vector<ImportedInputId> LoadedNetwork::ImportInputs(const InputTensors& inputTensors,
1494  MemorySource forceImportMemorySource)
1495 {
1496  if (!m_NetworkProperties.m_AsyncEnabled)
1497  {
1498  // Cannot import if import is not enabled and forceImportMemorySource is undefined
1499  if (forceImportMemorySource == MemorySource::Undefined)
1500  {
1501  throw MemoryImportException("ImportInputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1502  }
1503  // The number of pre imported tensors should not exceed the number of inputs.
1504  if (inputTensors.size() > m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumInputs())
1505  {
1506  throw MemoryImportException("ImportInputs: The number of tensors provided exceeds the number of inputs.");
1507  }
1508 
1509  std::vector<ImportedInputId> importedInputs;
1510  Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1511  unsigned int inputIndex = 0;
1512  for (const BindableLayer* inputLayer : graph.GetInputLayers())
1513  {
1514  auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
1515 
1516  if (!outputTensorHandle)
1517  {
1518  inputIndex++;
1519  continue;
1520  }
1521 
1522  auto layerBindingId = inputLayer->GetBindingId();
1523  auto it = std::find_if(inputTensors.begin(), inputTensors.end(), [=](const auto& inputTensor)
1524  {
1525  return inputTensor.first == layerBindingId;
1526  });
1527 
1528  if (it == inputTensors.end())
1529  {
1530  inputIndex++;
1531  continue;
1532  }
1533 
1534  const auto& inputTensor = *it;
1535  std::unique_ptr<ITensorHandle> passThroughTensorHandle =
1536  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.GetInfo(),
1537  inputTensor.second.GetMemoryArea());
1538 
1539  try
1540  {
1541  if (outputTensorHandle->CanBeImported(passThroughTensorHandle->Map(), forceImportMemorySource)
1542  && (outputTensorHandle->Import(passThroughTensorHandle->Map(), forceImportMemorySource)))
1543  {
1544  importedInputs.push_back(inputIndex);
1545  }
1546  passThroughTensorHandle->Unmap();
1547  }
1548  catch(const MemoryImportException& exception)
1549  {
1550  ARMNN_LOG(error) << "An error occurred attempting to import input_"
1551  << inputIndex << " : " << exception.what();
1552  passThroughTensorHandle->Unmap();
1553  }
1554  inputIndex++;
1555  }
1556 
1557  return importedInputs;
1558  }
1559  else
1560  {
1561  // Import when the import of network properties is enabled
1562  std::vector<ImportedInputId> importedInputs;
1563  Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1564 
1565  for (auto inputTensor : inputTensors)
1566  {
1567  auto layerBindingId = inputTensor.first;
1568  auto it = std::find_if(graph.GetInputLayers().begin(), graph.GetInputLayers().end(), [=](auto* layer)
1569  {
1570  return layer->GetBindingId() == layerBindingId;
1571  });
1572 
1573  if (it == graph.GetInputLayers().end())
1574  {
1575  throw MemoryImportException(fmt::format(
1576  "ImportInputs: Memory Import failed, unknown LayerBindingId: {}", layerBindingId));
1577  }
1578 
1579  const Layer* layer = *it;
1580  if (layer->GetType() != LayerType::Input)
1581  {
1582  throw InvalidArgumentException("ImportInputs: given layer not an InputLayer");
1583  }
1584 
1585  auto& backend = m_Backends.at(layer->GetBackendId());
1586  if (!HasMatchingCapability(BackendOptions::BackendOption{"PreImportIOTensors", true},
1587  backend->GetCapabilities()))
1588  {
1589  std::string er = backend->GetId();
1590  er += " does not have PreImportIOTensors capability";
1591  throw BackendCapabilityException(er);
1592  }
1593 
1594  const OutputSlot& outputSlot = layer->GetOutputSlots()[0];
1595 
1597  const TensorInfo& tensorInfo = outputSlot.GetTensorInfo();
1598 
1599  ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId);
1600  if (!handleFactory)
1601  {
1602  throw armnn::NullPointerException("handleFactory must not be null.");
1603  }
1604 
1605  ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
1606  handleFactory->CreateTensorHandle(tensorInfo, false)};
1607 
1608  ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();
1609 
1610  if (!CheckFlag(tensorHandle->GetImportFlags(), forceImportMemorySource))
1611  {
1612  throw MemoryImportException(
1613  fmt::format("ImportInputs: Memory Import failed, backend: "
1614  "{} does not support importing from source {}"
1615  , factoryId, m_NetworkProperties.m_InputSource));
1616  }
1617 
1618  std::unique_ptr<ITensorHandle> passThroughTensorHandle =
1619  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.GetInfo(),
1620  inputTensor.second.GetMemoryArea());
1621 
1622  if (tensorHandle->Import(passThroughTensorHandle->Map(), forceImportMemorySource))
1623  {
1624  importedInputs.push_back(m_CurImportedInputId++);
1625  passThroughTensorHandle->Unmap();
1626  }
1627  else
1628  {
1629  passThroughTensorHandle->Unmap();
1630  throw MemoryImportException("ImportInputs: Memory Import failed");
1631  }
1632 
1633  m_PreImportedInputHandles.push_back(std::move(importedTensorHandlePin));
1634  }
1635  return importedInputs;
1636  }
1637 }
1638 
1639 std::vector<ImportedOutputId> LoadedNetwork::ImportOutputs(const OutputTensors& outputTensors,
1640  MemorySource forceImportMemorySource)
1641 {
1642  if (!m_NetworkProperties.m_AsyncEnabled)
1643  {
1644  // Cannot import if import is not enabled and forceImportMemorySource is undefined
1645  if (forceImportMemorySource == MemorySource::Undefined)
1646  {
1647  throw MemoryImportException("ImportOutputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1648  }
1649  // If forceImportMemorySource is defined, try import if memory is aligned
1650  if (outputTensors.size() != m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumOutputs())
1651  {
1652  throw MemoryImportException("ImportOutputs: Force Import failed, incorrect number of tensors");
1653  }
1654  std::vector<ImportedOutputId> importedOutputs;
1655  Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1656 
1657  unsigned int outputIndex = 0;
1658  for (const BindableLayer* const outputLayer : graph.GetOutputLayers())
1659  {
1660  auto inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
1661  if (!inputTensorHandle)
1662  {
1663  outputIndex++;
1664  continue;
1665  }
1666 
1667  auto layerBindingId = outputLayer->GetBindingId();
1668  auto it = std::find_if(outputTensors.begin(), outputTensors.end(), [=] (const auto& outputTensor)
1669  {
1670  return outputTensor.first == layerBindingId;
1671  });
1672 
1673  if (it == outputTensors.end())
1674  {
1675  outputIndex++;
1676  continue;
1677  }
1678 
1679  const auto outputTensor = *it;
1680  try
1681  {
1682  // Check if the output memory can be imported
1683  if (inputTensorHandle->CanBeImported(outputTensor.second.GetMemoryArea(), forceImportMemorySource)
1684  && inputTensorHandle->Import(outputTensor.second.GetMemoryArea(), forceImportMemorySource))
1685  {
1686  importedOutputs.push_back(outputIndex);
1687  }
1688  }
1689  catch(const MemoryImportException& exception)
1690  {
1691  ARMNN_LOG(error) << "An error occurred attempting to import output_"
1692  << outputIndex << " : " << exception.what();
1693  }
1694  outputIndex++;
1695  }
1696  return importedOutputs;
1697  }
1698 
1699  std::vector<ImportedOutputId> importedOutputs;
1700  Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1701 
1702  for (const auto& outputTensor : outputTensors)
1703  {
1704  auto layerBindingId = outputTensor.first;
1705  auto it = std::find_if(graph.GetOutputLayers().begin(), graph.GetOutputLayers().end(), [=](auto* layer)
1706  {
1707  return layer->GetBindingId() == layerBindingId;
1708  });
1709 
1710  if (it == graph.GetOutputLayers().end())
1711  {
1712  throw MemoryImportException(fmt::format("ImportOutputs: Memory Import failed, unknown LayerBindingId: {}",
1713  layerBindingId));
1714  }
1715 
1716  const Layer* layer = *it;
1717  if (layer->GetType() != LayerType::Output)
1718  {
1719  throw InvalidArgumentException("ImportOutputs: given layer not an OutputLayer");
1720  }
1721 
1722  auto& backend = m_Backends.at(layer->GetBackendId());
1723  if (!HasMatchingCapability(BackendOptions::BackendOption{"PreImportIOTensors", true},
1724  backend->GetCapabilities()))
1725  {
1726  std::string er = backend->GetId();
1727  er += " does not have PreImportIOTensors capability";
1728  throw BackendCapabilityException(er);
1729  }
1730 
1731  const InputSlot& inputSlot = layer->GetInputSlots()[0];
1733  const TensorInfo& tensorInfo = inputSlot.GetTensorInfo();
1734 
1735  ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId);
1736  if (!handleFactory)
1737  {
1738  throw armnn::NullPointerException("handleFactory must not be null.");
1739  }
1740 
1741  ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
1742  handleFactory->CreateTensorHandle(tensorInfo, false)};
1743 
1744  ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();
1745 
1746  if (!CheckFlag(tensorHandle->GetImportFlags(), forceImportMemorySource))
1747  {
1748  throw MemoryImportException(fmt::format("ImportInputs: Memory Import failed, backend: "
1749  "{} does not support importing from source {}"
1750  , factoryId, forceImportMemorySource));
1751  }
1752 
1753  if (tensorHandle->Import(outputTensor.second.GetMemoryArea(), forceImportMemorySource))
1754  {
1755  importedOutputs.push_back(m_CurImportedOutputId++);
1756  }
1757  else
1758  {
1759  throw MemoryImportException("ImportInputs: Memory Import failed");
1760  }
1761 
1762  m_PreImportedOutputHandles.push_back(std::move(importedTensorHandlePin));
1763  }
1764 
1765  return importedOutputs;
1766 }
1767 
1768 void LoadedNetwork::ClearImportedInputs(const std::vector<ImportedInputId> inputIds)
1769 {
1770  for (auto id : inputIds)
1771  {
1772  if (id > m_PreImportedInputHandles.size())
1773  {
1774  throw InvalidArgumentException(fmt::format("ClearImportedInputs::Unknown ImportedInputId: {}", id));
1775  }
1776 
1777  auto& importedTensorHandle = m_PreImportedInputHandles[id].m_TensorHandle;
1778  if (!importedTensorHandle)
1779  {
1781  fmt::format("ClearImportedInputs::ImportedInput with id: {} has already been deleted", id));
1782  }
1783  // Call Unimport then destroy the tensorHandle
1784  importedTensorHandle->Unimport();
1785  importedTensorHandle = {};
1786  }
1787 }
1788 
1789 void LoadedNetwork::ClearImportedOutputs(const std::vector<ImportedOutputId> outputIds)
1790 {
1791  for (auto id : outputIds)
1792  {
1793  if (id > m_PreImportedOutputHandles.size())
1794  {
1795  throw InvalidArgumentException(fmt::format("ClearImportedOutputs::Unknown ImportedOutputId: {}", id));
1796  }
1797 
1798  auto& importedTensorHandle = m_PreImportedOutputHandles[id].m_TensorHandle;
1799  if (!importedTensorHandle)
1800  {
1802  fmt::format("ClearImportedOutputs::ImportedOutput with id: {} has already been deleted", id));
1803  }
1804  // Call Unimport then destroy the tensorHandle
1805  importedTensorHandle->Unimport();
1806  importedTensorHandle = {};
1807  }
1808 }
1809 
1811  const OutputTensors& outputTensors,
1812  IWorkingMemHandle& iWorkingMemHandle,
1813  std::vector<ImportedInputId> preImportedInputs,
1814  std::vector<ImportedOutputId> preImportedOutputs)
1815 {
1816  const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
1817 
1818  if (inputTensors.size() + preImportedInputs.size() != graph.GetNumInputs())
1819  {
1820  if (preImportedInputs.empty())
1821  {
1822  throw InvalidArgumentException("LoadedNetwork::Execute: Number of inputs provided does not match network.");
1823  }
1824  else
1825  {
1826  throw InvalidArgumentException("LoadedNetwork::Execute: "
1827  "Number of inputs + preImportedInputs provided does not match network.");
1828  }
1829  }
1830 
1831  if (outputTensors.size() + preImportedOutputs.size() != graph.GetNumOutputs())
1832  {
1833  if (preImportedOutputs.empty())
1834  {
1835  throw InvalidArgumentException("LoadedNetwork::Execute: "
1836  "Number of outputs provided does not match network.");
1837  }
1838  else
1839  {
1840  throw InvalidArgumentException("LoadedNetwork::Execute: "
1841  "Number of outputs + preImportedOutputs provided does not match network.");
1842  }
1843  }
1844 
1845  WorkingMemHandle& workingMemHandle = dynamic_cast<WorkingMemHandle&>(iWorkingMemHandle);
1846  // Collect all the given LayerBindingIds and check them for duplicates and unknowns.
1847  std::vector<LayerBindingId>& bindingIds = workingMemHandle.GetBindingIdVector();
1848  unsigned int index = 0;
1849  for (auto pair : inputTensors)
1850  {
1851  bindingIds[index++] = pair.first;
1852  }
1853  for (ImportedInputId id : preImportedInputs)
1854  {
1855  bindingIds[index++] = ValidateImportedInputID(id);
1856  }
1857  for (auto pair : outputTensors)
1858  {
1859  bindingIds[index++] = pair.first;
1860  }
1861  for (ImportedOutputId id : preImportedOutputs)
1862  {
1863  bindingIds[index++] = ValidateImportedOutputID(id);
1864  }
1865 
1866  workingMemHandle.ValidateBindingIds();
1867 
1868  auto resetMemHandle = [&]()
1869  {
1870  for (ImportedInputId id: preImportedInputs)
1871  {
1872  const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId;
1873 
1874  auto inputHandle = workingMemHandle.GetInputHandle(layerBindingId);
1875  auto inputConnections = workingMemHandle.GetInputConnections(layerBindingId);
1876  for (auto it : inputConnections)
1877  {
1878  *it = inputHandle;
1879  }
1880  }
1881 
1882  for (ImportedOutputId id: preImportedOutputs)
1883  {
1884  const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId;
1885 
1886  auto outputHandle = workingMemHandle.GetOutputHandle(layerBindingId);
1887  auto outputConnections = workingMemHandle.GetOutputConnection(layerBindingId);
1888 
1889  for (auto it : outputConnections)
1890  {
1891  *it = outputHandle;
1892  }
1893  }
1894  };
1895 
1896  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1897  TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1898  ProfilingGuid inferenceGuid = m_ProfilingService->GetNextGuid();
1899  if (timelineUtils)
1900  {
1901  // Add inference timeline trace if profiling is enabled.
1902  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
1903  timelineUtils->CreateTypedEntity(inferenceGuid,LabelsAndEventClasses::INFERENCE_GUID);
1904  timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
1905  networkGuid,
1906  inferenceGuid,
1907  LabelsAndEventClasses::EXECUTION_OF_GUID);
1908  timelineUtils->RecordEvent(inferenceGuid,LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
1909  }
1910 
1911  bool executionSucceeded = true;
1912 
1913  if (timelineUtils)
1914  {
1915  // Add end of life of the inference timeline if profiling is enabled.
1916  timelineUtils->RecordEvent(inferenceGuid,LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
1917  timelineUtils->Commit();
1918  }
1919 
1920  if (!workingMemHandle.IsAllocated())
1921  {
1922  workingMemHandle.Allocate();
1923  }
1924 
1925  {
1927  for (auto pair : inputTensors)
1928  {
1929  EnqueueInput(pair.second, workingMemHandle.GetInputHandle(pair.first));
1930  }
1931 
1932  // Swap in the pre-imported inputs if any
1933  for (ImportedInputId id : preImportedInputs)
1934  {
1935  const ImportedTensorHandlePin& importedInputPin = m_PreImportedInputHandles[id];
1936  const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId;
1937  const auto& preimportedHandle = importedInputPin.m_TensorHandle;
1938 
1939  auto inputConnections = workingMemHandle.GetInputConnections(layerBindingId);
1940  for (auto it : inputConnections)
1941  {
1942  *it = preimportedHandle.get();
1943  }
1944  }
1945  }
1946  {
1948  if (m_NetworkProperties.m_OutputSource != MemorySource::Undefined)
1949  {
1950  for (auto pair: outputTensors)
1951  {
1952  ImportOutputTensor(pair.second, workingMemHandle.GetOutputHandle(pair.first));
1953  }
1954  }
1955 
1956  for (ImportedOutputId id : preImportedOutputs)
1957  {
1958  const ImportedTensorHandlePin& importedOutputPin = m_PreImportedOutputHandles[id];
1959  const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId;
1960  const auto& preimportedHandle = importedOutputPin.m_TensorHandle;
1961 
1962  auto outputConnections = workingMemHandle.GetOutputConnection(layerBindingId);
1963  for (auto it : outputConnections)
1964  {
1965  *it = preimportedHandle.get();
1966  }
1967  }
1968  }
1969 
1970  auto Fail = [&](const std::exception& error)
1971  {
1972  ARMNN_LOG(error) << "An error occurred attempting to execute a workload: " << error.what();
1973  executionSucceeded = false;
1974  };
1975  ProfilingDynamicGuid workloadInferenceID(0);
1976 
1977  try
1978  {
1979  for (unsigned int i = 0; i < m_WorkloadQueue.size(); ++i)
1980  {
1981  auto& workload = m_WorkloadQueue[i];
1982  if (timelineUtils)
1983  {
1984  workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1985  inferenceGuid);
1986  }
1987 
1988  workload->ExecuteAsync(workingMemHandle.GetExecutionDataAt(i).second);
1989 
1990  if (timelineUtils)
1991  {
1992  timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1993  }
1994  }
1995  }
1996  catch (const RuntimeException& error)
1997  {
1998  resetMemHandle();
1999  Fail(error);
2000  }
2001  catch (const std::runtime_error& error)
2002  {
2003  resetMemHandle();
2004  Fail(error);
2005  }
2006  catch (...)
2007  {
2008  resetMemHandle();
2009  throw;
2010  }
2011 
2012  if (m_NetworkProperties.m_OutputSource == MemorySource::Undefined)
2013  {
2014  for (auto pair: outputTensors)
2015  {
2016  CopyToOutputTensor(pair.second, workingMemHandle.GetOutputHandle(pair.first));
2017  }
2018  }
2019  else
2020  {
2021  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "SyncMemGeneric_Execute");
2022  workingMemHandle.MemSyncOutputs();
2023  }
2024 
2025  resetMemHandle();
2026 
2027  return executionSucceeded ? Status::Success : Status::Failure;
2028 }
2029 
2030 /// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have
2031 /// overlapped Execution by calling this function from different threads.
2032 std::unique_ptr<IWorkingMemHandle> LoadedNetwork::CreateWorkingMemHandle(NetworkId networkId)
2033 {
2034  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
2035 
2036  // Tensors that will need to be allocated internally within armnn
2037  std::vector<std::unique_ptr<ITensorHandle>> managedTensorHandles;
2038  // Tensors that will be allocated externally by the user
2039  std::vector<std::unique_ptr<ITensorHandle>> unmanagedTensorHandles;
2040 
2041  std::vector<WorkingMemDescriptor> workingMemDescriptors;
2042  std::vector<std::pair<BackendId, ExecutionData>> executionDataVec;
2043 
2044  auto GetTensorHandle = [&](Layer* layer, const OutputSlot& outputSlot)
2045  {
2046  ITensorHandleFactory::FactoryId factoryId = outputSlot.GetTensorHandleFactoryId();
2047  const TensorInfo& tensorInfo = outputSlot.GetTensorInfo();
2048 
2049  if (factoryId == ITensorHandleFactory::LegacyFactoryId)
2050  {
2051  BackendId id = layer->GetBackendId();
2053  return m_WorkloadFactories.at(id)->CreateTensorHandle(tensorInfo, false);
2055  }
2056  else
2057  {
2058  ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId);
2059  if (!handleFactory)
2060  {
2061  throw armnn::NullPointerException("handleFactory must not be null.");
2062  }
2063  return handleFactory->CreateTensorHandle(tensorInfo, false);
2064  }
2065  };
2066 
2067  struct HandleInfo
2068  {
2069  ITensorHandle* m_TensorHandle;
2070 
2071  bool m_IsInputLayerHandle = false;
2072  bool m_IsOutputLayerHandle = false;
2073 
2074  WorkingMemHandle::InputMemDescriptorCoords m_InputMemDescriptorCoords;
2075  WorkingMemHandle::OutputMemDescriptorCoords m_OutputMemDescriptorCoords;
2076  };
2077 
2078  std::unordered_map<const OutputSlot*, HandleInfo> outputToHandleInfoMap;
2079 
2080  unsigned int layerIndex = 0;
2081  for (auto&& layer : order)
2082  {
2083  // Constant layers execution and management is handled during loaded network construction
2084  if (layer->GetType() == LayerType::Constant)
2085  {
2086  continue;
2087  }
2088 
2089  WorkingMemDescriptor workingMemDescriptor;
2090 
2091  bool isMemoryManaged = true;
2092  bool isInputLayer = false;
2093  bool isOutputLayer = false;
2094  bool isConnectedToOutputLayer = false;
2095 
2096  if (layer->GetType() == LayerType::Input || layer->GetType() == LayerType::MemImport)
2097  {
2098  // Input layers/workloads will not be executed so the descriptor is not added to workingMemDescriptors
2099  // However we will still need to manage the tensorHandle
2100  isInputLayer = true;
2101  isMemoryManaged = m_NetworkProperties.m_InputSource == MemorySource::Undefined;
2102  }
2103  else if (layer->GetType() == LayerType::Output)
2104  {
2105  isOutputLayer = true;
2106  }
2107 
2108  unsigned int slotIndex = 0;
2109  // Create a tensor handle for each output slot of a layer
2110  // Once we create it, we start managing its lifetime
2111  for (auto& slot : layer->GetOutputSlots())
2112  {
2113  for (unsigned int i = 0; i < slot.GetNumConnections(); ++i)
2114  {
2115  if ((slot.GetConnection(i)->GetOwningLayer().GetType() == LayerType::Output))
2116  {
2117  if (!isConnectedToOutputLayer)
2118  {
2119  isConnectedToOutputLayer = true;
2120  // If Export is enabled disable memory management, so we can export, otherwise we do a copy
2121  isMemoryManaged = m_NetworkProperties.m_OutputSource == MemorySource::Undefined;
2122  }
2123  else
2124  {
2125  // Importing in this case would likely cause unexpected behaviour, so we disallow it.
2126  ARMNN_LOG(warning) <<
2127  fmt::format("Layer name: '{0}' guid: '{1}' has two or more OutputLayers connected to it. "
2128  "This will prevent importing on the connected OutputLayers.",
2129  layer->GetName(), layer->GetGuid());
2130  isMemoryManaged = true;
2131  }
2132  }
2133  }
2134 
2135  ITensorHandle* tensorHandle;
2136  if (isMemoryManaged)
2137  {
2138  managedTensorHandles.emplace_back(GetTensorHandle(layer, slot));
2139  tensorHandle = managedTensorHandles.back().get();
2140  }
2141  else
2142  {
2143  unmanagedTensorHandles.emplace_back(GetTensorHandle(layer, slot));
2144  tensorHandle = unmanagedTensorHandles.back().get();
2145  }
2146 
2147  workingMemDescriptor.m_Outputs.push_back(tensorHandle);
2148 
2149  HandleInfo& handleInfo = outputToHandleInfoMap[&slot];
2150  handleInfo.m_TensorHandle = tensorHandle;
2151 
2152  // Store the coordinates of the current layer's OutputSlot that is connected to the OutputLayer
2153  if (isConnectedToOutputLayer)
2154  {
2155  handleInfo.m_IsOutputLayerHandle = true;
2156  handleInfo.m_OutputMemDescriptorCoords.m_OutputSlotCoords = {layerIndex, slotIndex};
2157  }
2158  // Store the LayerBindingId of the InputLayer
2159  if (isInputLayer)
2160  {
2161  handleInfo.m_IsInputLayerHandle = true;
2162  LayerBindingId bindingId = static_cast<BindableLayer*>(layer)->GetBindingId();
2163  handleInfo.m_InputMemDescriptorCoords.m_LayerBindingId = bindingId;
2164  }
2165  slotIndex++;
2166  }
2167  // Loop through the input slots in the same layer and decrement the reference counter associated
2168  // to each tensor handle we encounter.
2169  // Once it reaches zero, the lifetime of the tensor handle has ended, and we mark its memory as available
2170  // so that the next tensor handle with a non overlapping lifetime can share its memory.
2171  for (auto& slot : layer->GetInputSlots())
2172  {
2173  if (!slot.GetConnection())
2174  {
2175  throw armnn::GraphValidationException("slot must be a valid input slot.");
2176  }
2177 
2178  auto outputSlot = slot.GetConnectedOutputSlot();
2179  auto key = outputSlot->GetOwningLayer().GetGuid();
2180 
2181  // Constant layers execution and management is handled during loaded network construction
2182  auto found = m_ConstantTensorHandles.find(key);
2183  if (found != m_ConstantTensorHandles.end())
2184  {
2185  ITensorHandle* tensorHandle = found->second;
2186  if (slot.IsTensorInfoOverridden())
2187  {
2188  ITensorHandle* decorated = tensorHandle->DecorateTensorHandle(slot.GetTensorInfo()).get();
2189  if (decorated)
2190  {
2191  tensorHandle = decorated;
2192  }
2193  }
2194  workingMemDescriptor.m_Inputs.push_back(tensorHandle);
2195 
2196  // Odd case where a constant layer is connected to an output layer
2197  // We will need to create a HandleInfo to track it
2198  if (isOutputLayer)
2199  {
2200  LayerBindingId bindingId = static_cast<BindableLayer*>(layer)->GetBindingId();
2201 
2202  HandleInfo& handleInfo = outputToHandleInfoMap[outputSlot];
2203  handleInfo.m_TensorHandle = tensorHandle;
2204  handleInfo.m_IsOutputLayerHandle = true;
2205  handleInfo.m_OutputMemDescriptorCoords.m_LayerBindingIds.push_back(bindingId);
2206  handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, 0});
2207  }
2208  continue;
2209  }
2210 
2211  HandleInfo& handleInfo = outputToHandleInfoMap.at(outputSlot);
2212 
2213  ITensorHandle* inputTensorHandle = handleInfo.m_TensorHandle;
2214  if (slot.IsTensorInfoOverridden())
2215  {
2216  ITensorHandle* decorated = inputTensorHandle->DecorateTensorHandle(slot.GetTensorInfo()).get();
2217  if (decorated)
2218  {
2219  inputTensorHandle = decorated;
2220  }
2221  }
2222  workingMemDescriptor.m_Inputs.push_back(inputTensorHandle);
2223 
2224  // Store the LayerBindingId of the OutputLayer
2225  if (isOutputLayer)
2226  {
2227  LayerBindingId bindingId = static_cast<BindableLayer*>(layer)->GetBindingId();
2228  handleInfo.m_OutputMemDescriptorCoords.m_LayerBindingIds.push_back(bindingId);
2229  handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, 0});
2230  }
2231  // In this case the layer is not an Output Layer but shares its input tensorhandle with an OutputLayer
2232  // It will need to be updated as well, if we swap out the tensorhandle
2233  else if (handleInfo.m_IsOutputLayerHandle)
2234  {
2235  handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, slot.GetSlotIndex()});
2236  }
2237 
2238  // Store the coordinates of the InputSlots connected to the InputLayer
2239  // There can be more than one InputSlot connected to an InputLayer, so we use a vector
2240  if (handleInfo.m_IsInputLayerHandle)
2241  {
2242  std::pair<LayerGuid, unsigned int> connectionLocation{layerIndex, slot.GetSlotIndex()};
2243  handleInfo.m_InputMemDescriptorCoords.m_InputSlotCoords.emplace_back(connectionLocation);
2244  }
2245  }
2246 
2247  // Input/Output layers/workloads will not be executed, so the descriptor is not added to workingMemDescriptors
2248  // However we will still need to manage the tensorHandle
2249  if (!isInputLayer)
2250  {
2251  // Simply auto initialise ExecutionData here, so it's added only for the layer that require execution.
2252  // The memory and data will be allocated/assigned for the void* in WorkingMemHandle::Allocate.
2253  std::pair<BackendId, ExecutionData> dataPair;
2254  dataPair.first = layer->GetBackendId();
2255 
2256  executionDataVec.push_back(dataPair);
2257  workingMemDescriptors.push_back(workingMemDescriptor);
2258 
2259  layerIndex++;
2260  }
2261  }
2262 
2263  std::vector<std::pair<std::shared_ptr<TensorMemory>, MemorySource>> tensorMemory;
2264 
2265  auto externalMemoryManager = CreateExternalMemoryManger(tensorMemory);
2266 
2267  // Sort m_TensorMemory, so it's order matches the outputSlot order
2268  std::sort(tensorMemory.begin(), tensorMemory.end(),
2269  [](const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& lhs,
2270  const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& rhs)
2271  {
2272  return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
2273  });
2274 
2275  std::vector<WorkingMemHandle::InputMemDescriptorCoords> inputConnectionsInfo;
2276  std::vector<WorkingMemHandle::OutputMemDescriptorCoords> outputConnectionsInfo;
2277 
2278  for (const auto& handleInfo: outputToHandleInfoMap)
2279  {
2280  if (handleInfo.second.m_IsOutputLayerHandle)
2281  {
2282  outputConnectionsInfo.emplace_back(handleInfo.second.m_OutputMemDescriptorCoords);
2283  }
2284 
2285  if (handleInfo.second.m_IsInputLayerHandle)
2286  {
2287  inputConnectionsInfo.emplace_back(handleInfo.second.m_InputMemDescriptorCoords);
2288  }
2289  }
2290 
2291  return std::make_unique<WorkingMemHandle>(networkId,
2292  inputConnectionsInfo,
2293  outputConnectionsInfo,
2294  workingMemDescriptors,
2295  std::move(externalMemoryManager),
2296  std::move(tensorMemory),
2297  std::move(managedTensorHandles),
2298  std::move(unmanagedTensorHandles),
2299  executionDataVec,
2300  &m_Backends);
2301 }
2302 
2304 {
2305  for (auto&& workloadPtr: m_WorkloadQueue)
2306  {
2307  workloadPtr.get()->RegisterDebugCallback(func);
2308  }
2309 }
2310 
2311 
2312 void LoadedNetwork::CreateMemoryProfileAsync()
2313 {
2314  struct PartialBlock
2315  {
2316  unsigned int m_StartOfLife;
2317  unsigned int m_Lifetime;
2318 
2319  size_t m_MemSize;
2320  unsigned int m_Index;
2321 
2322  BackendId m_BackendId;
2323  };
2324 
2325  auto align = [](size_t numToAlign)
2326  {
2327  const size_t alignment = sizeof(float);
2328  return ((numToAlign + alignment - 1) / alignment) * alignment;
2329  };
2330 
2331  std::unordered_map<const OutputSlot*, PartialBlock> memBlockTrackerMap;
2332 
2333  const bool inputImportingEnabled = m_NetworkProperties.m_InputSource != MemorySource::Undefined;
2334  const bool outputImportingEnabled = m_NetworkProperties.m_OutputSource != MemorySource::Undefined;
2335 
2336  unsigned int timestep = 0;
2337  unsigned int outputIndex = 0;
2338  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
2339 
2340  for (auto&& layer : order)
2341  {
2342  const LayerType& layerType = layer->GetType();
2343  // Don't manage memory if importing.
2344  if (layerType == LayerType::Input && inputImportingEnabled)
2345  {
2346  continue;
2347  }
2348  // Don't manage memory if importing.
2349  if (layerType == LayerType::Output && outputImportingEnabled
2350  && layer->GetInputSlot(0).GetConnectedOutputSlot()->GetNumConnections() == 1)
2351  {
2352  continue;
2353  }
2354  // Because Constant Layer memory can not be shared, the memory must persist for the lifetime of execution,
2355  // management is done separately.
2356  if (layerType == LayerType::Constant)
2357  {
2358  continue;
2359  }
2360 
2361  BackendId backendId = layer->GetBackendId();
2362  for (auto& outputSlot : layer->GetOutputSlots())
2363  {
2364  if (!m_SupportsExternallyManagedMemory[backendId])
2365  {
2366  continue;
2367  }
2368 
2369  PartialBlock partialBlock;
2370 
2371  partialBlock.m_StartOfLife = timestep;
2372 
2373  size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
2374  partialBlock.m_MemSize = alignedSize;
2375  partialBlock.m_Index = outputIndex++;
2376  partialBlock.m_Lifetime = outputSlot.GetNumConnections();
2377  partialBlock.m_BackendId = backendId;
2378 
2379  if (partialBlock.m_Lifetime == 0)
2380  {
2381  m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2382  partialBlock.m_StartOfLife,
2383  partialBlock.m_MemSize,
2384  0,
2385  partialBlock.m_Index);
2386  }
2387  else
2388  {
2389  memBlockTrackerMap[&outputSlot] = partialBlock;
2390  }
2391  }
2392 
2393  for (auto& inputSlot : layer->GetInputSlots())
2394  {
2395  const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
2396  const LayerType& owningLayerType = connectedInputLayer.GetType();
2397 
2398  if (owningLayerType == LayerType::Constant)
2399  {
2400  continue;
2401  }
2402  if (inputImportingEnabled && owningLayerType == LayerType::Input)
2403  {
2404  continue;
2405  }
2406 
2407  auto outputSlot = inputSlot.GetConnectedOutputSlot();
2408 
2409  PartialBlock& partialBlock = memBlockTrackerMap.at(outputSlot);
2410 
2411  auto& lifetime = partialBlock.m_Lifetime;
2412  --lifetime;
2413 
2414  if (lifetime == 0)
2415  {
2416  m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2417  timestep,
2418  partialBlock.m_MemSize,
2419  0,
2420  partialBlock.m_Index);
2421  }
2422  }
2423  ++timestep;
2424  }
2425 }
2426 
2427 void LoadedNetwork::CreateMemoryProfile()
2428 {
2429  // Finds the first TensorHandle ancestor of a SubTensorHandle. If the ITensorHandle provided
2430  // is a TensorHandle, the function just returns it
2431  auto TraceSubTensorHandleAncestry = [](ITensorHandle* const subTensorHandle)
2432  {
2433  ITensorHandle* ancestor = subTensorHandle;
2434  while (ancestor && ancestor->GetParent())
2435  {
2436  ancestor = ancestor->GetParent();
2437  }
2438  return ancestor;
2439  };
2440 
2441  struct PartialBlock
2442  {
2443  unsigned int m_StartOfLife;
2444  unsigned int m_Lifetime;
2445 
2446  size_t m_MemSize;
2447  unsigned int m_Index;
2448 
2449  BackendId m_BackendId;
2450  };
2451 
2452  auto align = [](size_t numToAlign)
2453  {
2454  const size_t alignment = sizeof(float);
2455  return ((numToAlign + alignment - 1) / alignment) * alignment;
2456  };
2457 
2458  std::unordered_map<ITensorHandle*, PartialBlock> memBlockTrackerMap;
2459 
2460  const bool inputImportingEnabled = m_NetworkProperties.m_InputSource != MemorySource::Undefined;
2461  const bool outputImportingEnabled = m_NetworkProperties.m_OutputSource != MemorySource::Undefined;
2462 
2463  unsigned int timestep = 0;
2464  unsigned int outputIndex = 0;
2465  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
2466 
2467  for (auto&& layer : order)
2468  {
2469  const LayerType& layerType = layer->GetType();
2470  // Don't manage memory if importing.
2471  if (layerType == LayerType::Input && inputImportingEnabled)
2472  {
2473  continue;
2474  }
2475  // Don't manage memory if importing.
2476  if (layerType == LayerType::Output && outputImportingEnabled
2477  && layer->GetInputSlot(0).GetConnectedOutputSlot()->GetNumConnections() == 1)
2478  {
2479  continue;
2480  }
2481  // Because Constant Layer memory can not be shared, the memory must persist for the lifetime of execution,
2482  // management is done separately.
2483  if (layerType == LayerType::Constant)
2484  {
2485  continue;
2486  }
2487 
2488  BackendId backendId = layer->GetBackendId();
2489  for (auto& outputSlot : layer->GetOutputSlots())
2490  {
2491  if (!m_SupportsExternallyManagedMemory[backendId])
2492  {
2493  continue;
2494  }
2495 
2496  ITensorHandle* tensorHandle = outputSlot.GetOutputHandler().GetData();
2497  tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
2498 
2499  if (memBlockTrackerMap.find(tensorHandle) == memBlockTrackerMap.end())
2500  {
2501  PartialBlock partialBlock;
2502 
2503  partialBlock.m_StartOfLife = timestep;
2504 
2505  size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
2506  partialBlock.m_MemSize = alignedSize;
2507  partialBlock.m_Index = outputIndex++;
2508  partialBlock.m_Lifetime = outputSlot.GetNumConnections();
2509  partialBlock.m_BackendId = backendId;
2510 
2511  if (partialBlock.m_Lifetime == 0)
2512  {
2513  m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2514  partialBlock.m_StartOfLife,
2515  partialBlock.m_MemSize,
2516  0,
2517  partialBlock.m_Index);
2518  }
2519  else
2520  {
2521  memBlockTrackerMap[tensorHandle] = partialBlock;
2522  }
2523  m_Tensorhandles.push_back(tensorHandle);
2524 
2525  }
2526  else
2527  {
2528  memBlockTrackerMap.at(tensorHandle).m_Lifetime += outputSlot.GetNumConnections();
2529  }
2530  }
2531 
2532  for (auto& inputSlot : layer->GetInputSlots())
2533  {
2534  const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
2535  const LayerType& owningLayerType = connectedInputLayer.GetType();
2536 
2537  if (owningLayerType == LayerType::Constant)
2538  {
2539  continue;
2540  }
2541  if (inputImportingEnabled && owningLayerType == LayerType::Input)
2542  {
2543  continue;
2544  }
2545  if (!m_SupportsExternallyManagedMemory[connectedInputLayer.GetBackendId()])
2546  {
2547  continue;
2548  }
2549 
2550  auto outputSlot = inputSlot.GetConnectedOutputSlot();
2551 
2552  ITensorHandle* tensorHandle = outputSlot->GetOutputHandler().GetData();
2553  tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
2554 
2555  PartialBlock& partialBlock = memBlockTrackerMap.at(tensorHandle);
2556 
2557  auto& lifetime = partialBlock.m_Lifetime;
2558  --lifetime;
2559 
2560  if (lifetime == 0)
2561  {
2562  m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2563  timestep,
2564  partialBlock.m_MemSize,
2565  0,
2566  partialBlock.m_Index);
2567  }
2568  }
2569  ++timestep;
2570  }
2571 
2572 }
2573 
2574 std::unique_ptr<MemoryManager> LoadedNetwork::CreateExternalMemoryManger(
2575  std::vector<std::pair<std::shared_ptr<TensorMemory>, MemorySource>>& tensorMemoryVec)
2576 {
2577  std::unique_ptr<MemoryManager> memoryManager = std::make_unique<MemoryManager>();
2578  auto allocatorMap = BackendRegistryInstance().GetAllocators();
2579 
2580  for (auto& backend : m_MemBinMap)
2581  {
2582  std::vector<BufferStorage> bufferStorageVec;
2583 
2584  std::shared_ptr<ICustomAllocator> backendAllocator;
2585  if (allocatorMap.find(backend.first) != allocatorMap.end())
2586  {
2587  backendAllocator = allocatorMap[backend.first];
2588  }
2589  else
2590  {
2591  backendAllocator = m_Backends[backend.first]->GetDefaultAllocator();
2592  }
2593 
2594  for (auto& memBin : backend.second)
2595  {
2596  BufferStorage bufferStorage;
2597  bufferStorage.m_BufferSize = memBin.m_MemSize;
2598  bufferStorage.m_TensorMemoryVector.reserve(memBin.m_MemBlocks.size());
2599 
2600  for (auto& memBlock : memBin.m_MemBlocks)
2601  {
2602  auto tensorMemory = std::make_shared<TensorMemory>(TensorMemory{memBlock.m_Offset, memBlock.m_Index});
2603 
2604  tensorMemoryVec.emplace_back(tensorMemory, backendAllocator->GetMemorySourceType());
2605  bufferStorage.m_TensorMemoryVector.emplace_back(tensorMemory);
2606  }
2607 
2608  bufferStorageVec.emplace_back(std::move(bufferStorage));
2609  }
2610 
2611  memoryManager->StoreMemToAllocate(bufferStorageVec, backendAllocator, 4);
2612  }
2613 
2614  return memoryManager;
2615 }
2616 
2617 LayerBindingId LoadedNetwork::ValidateImportedInputID(ImportedInputId id)
2618 {
2619  try
2620  {
2621  const auto& importedTensorHandlePin = m_PreImportedInputHandles.at(id);
2622  if (!importedTensorHandlePin.m_TensorHandle)
2623  {
2624  throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute:"
2625  "PreImportedInput: {} has been deleted", id));
2626  }
2627  return importedTensorHandlePin.m_LayerBindingId;
2628  }
2629  catch (const std::out_of_range&)
2630  {
2631  throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: Unknown ImportedInputId: {}", id));
2632  }
2633 }
2634 
2635 LayerBindingId LoadedNetwork::ValidateImportedOutputID(ImportedOutputId id)
2636 {
2637  try
2638  {
2639  const auto& importedTensorHandlePin = m_PreImportedOutputHandles.at(id);
2640  if (!importedTensorHandlePin.m_TensorHandle)
2641  {
2642  throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: "
2643  "PreImportedOutput: {} has been deleted", id));
2644  }
2645  return importedTensorHandlePin.m_LayerBindingId;
2646  }
2647  catch (const std::out_of_range&)
2648  {
2649  throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: Unknown ImportedOutputId: {}", id));
2650  }
2651 }
2652 
2653 }
armnn::BindableLayer
Definition: Layer.hpp:470
BackendHelper.hpp
armnn::ImportedInputId
unsigned int ImportedInputId
Definition: Types.hpp:315
armnn::Graph::SetLayersOutOfOrder
void SetLayersOutOfOrder()
Definition: Graph.cpp:738
armnn::Compute::Undefined
@ Undefined
armnn::Tensor
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:321
armnn::INetworkProperties::m_AsyncEnabled
const bool m_AsyncEnabled
Definition: IRuntime.hpp:59
armnn::BaseTensor::GetMemoryArea
MemoryType GetMemoryArea() const
Definition: Tensor.hpp:307
armnn::Graph::OutputLayersAccessor::begin
ConstIteratorOutputs begin() const
Definition: Graph.hpp:84
arm::pipe
Definition: BackendRegistry.hpp:17
MARK_WORKLOAD_EXECUTION_END
#define MARK_WORKLOAD_EXECUTION_END()
This empty macro has been inserted at LoadedNetwork::Execute.
Definition: Markers.hpp:41
armnn::LoadedNetwork::GetOutputTensorInfo
TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const
Definition: LoadedNetwork.cpp:735
armnn::INetworkProperties::m_InputSource
const MemorySource m_InputSource
Definition: IRuntime.hpp:65
armnn::ProfilerManager::RegisterProfiler
void RegisterProfiler(IProfiler *profiler)
Definition: Profiling.cpp:609
armnn::experimental::WorkingMemHandle::GetExecutionDataAt
std::pair< BackendId, ExecutionData > & GetExecutionDataAt(unsigned int id) override
Get the ExecutionData at an index.
Definition: WorkingMemHandle.hpp:92
armnn::OutputSlot::GetTensorInfo
const TensorInfo & GetTensorInfo() const override
Definition: Layer.cpp:100
armnn::InputTensors
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:394
armnn::LoadedNetwork::EnqueueWorkload
Status EnqueueWorkload(const InputTensors &inputTensors, const OutputTensors &outputTensors, std::vector< ImportedInputId > preImportedInputIds={}, std::vector< ImportedOutputId > preImportedOutputIds={})
Single thread execution of the loaded network.
Definition: LoadedNetwork.cpp:874
armnn::LoadedNetwork::RegisterDebugCallback
void RegisterDebugCallback(const DebugCallbackFunction &func)
Definition: LoadedNetwork.cpp:2303
ExecutionData.hpp
armnn::TensorHandleFactoryRegistry::ReleaseMemory
void ReleaseMemory()
Release memory required for inference.
Definition: TensorHandleFactoryRegistry.cpp:86
armnn::ValidateSourcesMatchOptimizedNetwork
void ValidateSourcesMatchOptimizedNetwork(std::vector< BackendOptions > optimizedOptions, const INetworkProperties &networkProperties)
This function performs a sanity check to ensure that the combination of input and output memory sourc...
Definition: LoadedNetwork.cpp:102
LoadedNetwork.hpp
armnn::OutputSlot
Definition: Layer.hpp:100
armnn::TensorHandleFactoryRegistry::GetFactory
ITensorHandleFactory * GetFactory(ITensorHandleFactory::FactoryId id) const
Find a TensorHandleFactory by Id Returns nullptr if not found.
Definition: TensorHandleFactoryRegistry.cpp:39
armnn::TensorInfo
Definition: Tensor.hpp:152
MemSyncWorkload.hpp
Graph.hpp
CHECK_LOCATION
#define CHECK_LOCATION()
Definition: Exceptions.hpp:203
armnn::MemorySourceFlags
unsigned int MemorySourceFlags
Definition: MemorySources.hpp:15
Profiling.hpp
armnn::INetworkProperties::m_ProfilingEnabled
const bool m_ProfilingEnabled
Definition: IRuntime.hpp:61
armnn::LoadedNetwork
Definition: LoadedNetwork.hpp:42
armnn::ITensorHandle
Definition: ITensorHandle.hpp:16
armnn::BackendOptions::BackendOption
Definition: BackendOptions.hpp:215
armnn::experimental::WorkingMemHandle::GetOutputConnection
const std::vector< std::vector< ITensorHandle * >::iterator > & GetOutputConnection(LayerBindingId layerBindingId) const
Definition: WorkingMemHandle.hpp:112
ARMNN_NO_DEPRECATE_WARN_BEGIN
#define ARMNN_NO_DEPRECATE_WARN_BEGIN
Definition: Deprecated.hpp:33
armnn::experimental::WorkingMemHandle::IsAllocated
bool IsAllocated() override
IsAllocated returns true if the backing memory is currently allocated.
Definition: WorkingMemHandle.hpp:77
armnn::ITensorHandle::GetImportFlags
virtual unsigned int GetImportFlags() const
Get flags describing supported import sources.
Definition: ITensorHandle.hpp:70
armnn::LoadedNetwork::SendNetworkStructure
void SendNetworkStructure(arm::pipe::IProfilingService &profilingService)
Definition: LoadedNetwork.cpp:674
BackendRegistry.hpp
armnn::experimental::WorkingMemHandle::OutputMemDescriptorCoords
Definition: WorkingMemHandle.hpp:40
armnn::OutputTensors
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:395
armnn::Layer::GetInputSlots
const std::vector< InputSlot > & GetInputSlots() const
Definition: Layer.hpp:258
armnn::Graph::InputLayersAccessor::end
ConstIteratorInputs end() const
Definition: Graph.hpp:70
armnn::experimental::IWorkingMemHandle
Definition: IWorkingMemHandle.hpp:20
armnn::BoostLogSeverityMapping::error
@ error
armnn::LoadedNetwork::WorkloadQueue
std::vector< std::unique_ptr< IWorkload > > WorkloadQueue
Definition: LoadedNetwork.hpp:45
armnn::BackendRegistry::GetMemoryOptimizerStrategies
MemoryOptimizerStrategiesMapRef GetMemoryOptimizerStrategies()
Definition: BackendRegistry.cpp:150
armnn::LoadedNetwork::ImportInputs
std::vector< ImportedInputId > ImportInputs(const InputTensors &inputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
Definition: LoadedNetwork.cpp:1493
armnn::OutputHandler::GetData
ITensorHandle * GetData() const
Gets the allocated tensor memory.
Definition: OutputHandler.hpp:46
armnn::Layer::GetName
const char * GetName() const override
Returns the name of the layer.
Definition: Layer.hpp:332
armnn::ITensorHandleFactory::LegacyFactoryId
static const FactoryId LegacyFactoryId
Definition: ITensorHandleFactory.hpp:50
armnn::Exception::what
virtual const char * what() const noexcept override
Definition: Exceptions.cpp:32
armnn::Layer
Definition: Layer.hpp:230
ARMNN_LOG
#define ARMNN_LOG(severity)
Definition: Logging.hpp:212
armnn::InputSlot::GetTensorInfo
const TensorInfo & GetTensorInfo() const override
Gets the TensorInfo for this InputSlot.
Definition: Layer.cpp:614
Assert.hpp
armnn::ITensorHandle::DecorateTensorHandle
virtual std::shared_ptr< ITensorHandle > DecorateTensorHandle(const TensorInfo &tensorInfo)
Returns a decorated version of this TensorHandle allowing us to override the TensorInfo for it.
Definition: ITensorHandle.hpp:98
armnn::ITensorHandle::Import
virtual bool Import(void *memory, MemorySource source)
Import externally allocated memory.
Definition: ITensorHandle.hpp:76
armnn::experimental::WorkingMemHandle::GetBindingIdVector
std::vector< LayerBindingId > & GetBindingIdVector()
Definition: WorkingMemHandle.hpp:119
armnn::INetworkProperties::m_OutputNetworkDetailsMethod
const ProfilingDetailsMethod m_OutputNetworkDetailsMethod
Definition: IRuntime.hpp:63
armnn::NetworkId
int NetworkId
Definition: IRuntime.hpp:35
armnn::OutputQueueDescriptor
MemCopyQueueDescriptor OutputQueueDescriptor
Definition: WorkloadData.hpp:92
Logging.hpp
ARMNN_SCOPED_PROFILING_EVENT
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
Definition: Profiling.hpp:220
armnn::MemorySource::Undefined
@ Undefined
armnn::WorkloadInfo
Contains information about TensorInfos of a layer.
Definition: WorkloadInfo.hpp:16
armnn::experimental::WorkingMemHandle::GetInputHandle
ITensorHandle * GetInputHandle(LayerBindingId layerBindingId) const
Definition: WorkingMemHandle.hpp:97
IBackendInternal.hpp
armnn::LoadedNetwork::ImportOutputs
std::vector< ImportedOutputId > ImportOutputs(const OutputTensors &outputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
Definition: LoadedNetwork.cpp:1639
armnn::BackendRegistryInstance
BackendRegistry & BackendRegistryInstance()
Definition: BackendRegistry.cpp:15
armnn::experimental::WorkingMemHandle::GetInputConnections
const std::vector< std::vector< ITensorHandle * >::iterator > & GetInputConnections(LayerBindingId layerBindingId) const
Definition: WorkingMemHandle.hpp:107
armnn::Graph::GetOutputLayers
OutputLayersAccessor GetOutputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the output layers in a range-bas...
Definition: Graph.hpp:203
armnn::IWorkloadFactory
Definition: WorkloadFactory.hpp:22
armnn::InvalidArgumentException
Definition: Exceptions.hpp:80
armnn::LayerBindingId
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition: Types.hpp:314
armnn::BackendId::Get
const std::string & Get() const
Definition: BackendId.hpp:138
armnn::Layer::GetGuid
LayerGuid GetGuid() const final
Returns the unique id of the layer.
Definition: Layer.hpp:343
armnn::Layer::GetOutputHandler
const OutputHandler & GetOutputHandler(unsigned int i=0) const
Definition: Layer.hpp:245
armnn::CopyToOutputTensor
void CopyToOutputTensor(const Tensor &outputTensor, ITensorHandle *outputTensorHandle)
Definition: LoadedNetwork.cpp:1451
armnn::Layer::GetNumOutputSlots
unsigned int GetNumOutputSlots() const override
Returns the number of connectable output slots.
Definition: Layer.hpp:335
MARK_WORKLOAD_EXECUTION_BEGIN
#define MARK_WORKLOAD_EXECUTION_BEGIN()
This empty macro has been inserted at LoadedNetwork::Execute.
Definition: Markers.hpp:35
armnn::GetTensorInfo
const TensorInfo & GetTensorInfo(const ITensorHandle *tensorHandle)
float32 helpers
Definition: RefWorkloadUtils.hpp:33
armnn::Graph::GetNumLayers
size_t GetNumLayers() const
Definition: Graph.hpp:205
armnn::ITensorHandleFactory
Definition: ITensorHandleFactory.hpp:46
armnn::TensorHandleFactoryRegistry::GetMatchingImportFactoryId
ITensorHandleFactory::FactoryId GetMatchingImportFactoryId(ITensorHandleFactory::FactoryId copyFactoryId)
Get a matching TensorHandleFatory Id for Memory Import given TensorHandleFactory Id for Memory Copy.
Definition: TensorHandleFactoryRegistry.cpp:72
MARK_OPTIMIZED_NETWORK_LOADED
#define MARK_OPTIMIZED_NETWORK_LOADED()
This empty macro has been inserted at the end of LoadedNetwork constructor.
Definition: Markers.hpp:15
ARMNN_SCOPED_HEAP_PROFILING
#define ARMNN_SCOPED_HEAP_PROFILING(TAG)
Definition: HeapProfiling.hpp:45
MARK_INFERENCE_EXECUTION_END
#define MARK_INFERENCE_EXECUTION_END()
This empty macro has been inserted at LoadedNetwork::Execute.
Definition: Markers.hpp:29
armnn::GetOutputTensor
const armnn::Tensor GetOutputTensor(const LayerBindingId layerId, const OutputTensors &outputTensors)
Definition: LoadedNetwork.cpp:1480
armnn::CheckFlag
bool CheckFlag(MemorySourceFlags flags, MemorySource source)
Definition: MemorySources.hpp:41
Markers.hpp
armnn::Status::Success
@ Success
armnn::INetworkProperties
Definition: IRuntime.hpp:43
armnn::Layer::GetOutputSlots
const std::vector< OutputSlot > & GetOutputSlots() const
Definition: Layer.hpp:259
armnn::Graph::GetNumInputs
size_t GetNumInputs() const
Definition: Graph.hpp:194
armnn::experimental::WorkingMemHandle::MemSyncOutputs
void MemSyncOutputs()
Definition: WorkingMemHandle.cpp:136
armnn::Exception
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46
armnn::ITensorHandle::Unmap
virtual void Unmap() const =0
Unmap the tensor data.
armnn::experimental::WorkingMemHandle
Definition: WorkingMemHandle.hpp:29
armnn::RuntimeException
Definition: Exceptions.hpp:120
MARK_INFERENCE_EXECUTION_BEGIN
#define MARK_INFERENCE_EXECUTION_BEGIN()
This empty macro has been inserted at LoadedNetwork::Execute.
Definition: Markers.hpp:23
armnn::experimental::WorkingMemHandle::InputMemDescriptorCoords
Definition: WorkingMemHandle.hpp:33
armnn::LoadedNetwork::GetNetworkGuid
arm::pipe::ProfilingGuid GetNetworkGuid()
Definition: LoadedNetwork.cpp:712
armnn::BaseTensor::GetInfo
const TensorInfo & GetInfo() const
Definition: Tensor.hpp:297
armnn::OutputHandler
Definition: OutputHandler.hpp:28
armnn::BoostLogSeverityMapping::info
@ info
armnn::LoadedNetwork::GetInputTensorInfo
TensorInfo GetInputTensorInfo(LayerBindingId layerId) const
Definition: LoadedNetwork.cpp:717
armnn::LayerType::MemImport
@ MemImport
armnn::CopyTensorContentsGeneric
void CopyTensorContentsGeneric(const ITensorHandle *srcTensor, ITensorHandle *dstTensor, CopyFunc copy)
Definition: WorkloadUtils.hpp:46
armnn::Layer::GetNameStr
const std::string & GetNameStr() const
Definition: Layer.hpp:240
armnn::InputQueueDescriptor
MemCopyQueueDescriptor InputQueueDescriptor
Definition: WorkloadData.hpp:91
armnn::TensorHandleFactoryRegistry::AquireMemory
void AquireMemory()
Aquire memory required for inference.
Definition: TensorHandleFactoryRegistry.cpp:78
armnn::InputSlot
Definition: Layer.hpp:42
ArmNNProfiling.hpp
armnn::BackendRegistry::GetFactory
FactoryFunction GetFactory(const BackendId &id) const
Definition: BackendRegistry.cpp:57
HeapProfiling.hpp
armnn::ImportedOutputId
unsigned int ImportedOutputId
Definition: Types.hpp:316
armnn::BackendOptions
Struct for the users to pass backend specific options.
Definition: BackendOptions.hpp:22
WorkingMemHandle.hpp
armnn::Layer::GetType
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:286
armnn::Graph::GetInputLayers
InputLayersAccessor GetInputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the input layers in a range-base...
Definition: Graph.hpp:199
armnn::Graph::InputLayersAccessor::begin
ConstIteratorInputs begin() const
Definition: Graph.hpp:65
TensorHandle.hpp
armnn::Status
Status
Definition: Types.hpp:42
armnn::ITensorHandleFactory::CreateTensorHandle
virtual std::unique_ptr< ITensorHandle > CreateTensorHandle(const TensorInfo &tensorInfo) const =0
armnn::LoadedNetwork::MakeLoadedNetwork
static std::unique_ptr< LoadedNetwork > MakeLoadedNetwork(std::unique_ptr< IOptimizedNetwork > net, std::string &errorMessage, const INetworkProperties &networkProperties, arm::pipe::IProfilingService *profilingService)
Definition: LoadedNetwork.cpp:174
armnn::LoadedNetwork::CreateWorkingMemHandle
std::unique_ptr< IWorkingMemHandle > CreateWorkingMemHandle(NetworkId networkId)
Create a new unique WorkingMemHandle object.
Definition: LoadedNetwork.cpp:2032
armnn::ProfilerManager::GetInstance
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:602
armnn::experimental::WorkingMemHandle::GetOutputHandle
ITensorHandle * GetOutputHandle(LayerBindingId layerBindingId) const
Definition: WorkingMemHandle.hpp:102
ARMNN_NO_DEPRECATE_WARN_END
#define ARMNN_NO_DEPRECATE_WARN_END
Definition: Deprecated.hpp:34
armnn::MemoryImportException
Definition: Exceptions.hpp:125
std
Definition: BackendId.hpp:149
MemCopyWorkload.hpp
armnn::BackendCapabilityException
Definition: Exceptions.hpp:152
armnn::Graph::TopologicalSort
Graph & TopologicalSort()
Sorts layers in topological order and return this.
Definition: Graph.hpp:191
armnn::LoadedNetwork::Execute
Status Execute(const InputTensors &inputTensors, const OutputTensors &outputTensors, IWorkingMemHandle &workingMemHandle, std::vector< ImportedInputId > preImportedInputs={}, std::vector< ImportedOutputId > preImportedOutputs={})
Thread safe execution of the loaded network.
Definition: LoadedNetwork.cpp:1810
armnn::IgnoreUnused
void IgnoreUnused(Ts &&...)
Definition: IgnoreUnused.hpp:14
armnn::DebugCallbackFunction
std::function< void(LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle)> DebugCallbackFunction
Define the type of callback for the Debug layer to call.
Definition: Types.hpp:403
armnn::MemorySource
MemorySource
Define the Memory Source to reduce copies.
Definition: Types.hpp:245
armnn::BackendRegistry::GetAllocators
std::unordered_map< BackendId, std::shared_ptr< ICustomAllocator > > GetAllocators()
Definition: BackendRegistry.cpp:128
armnn::Layer::GetBackendId
const BackendId & GetBackendId() const
Definition: Layer.hpp:290
armnn::BackendId
Definition: BackendId.hpp:75
armnn::LoadedNetwork::ClearImportedOutputs
void ClearImportedOutputs(const std::vector< ImportedOutputId > outputIds)
Definition: LoadedNetwork.cpp:1789
armnn::experimental::WorkingMemHandle::ValidateBindingIds
void ValidateBindingIds()
Definition: WorkingMemHandle.cpp:145
armnn::OutputSlot::GetTensorHandleFactoryId
ITensorHandleFactory::FactoryId GetTensorHandleFactoryId() const
Definition: Layer.cpp:218
armnn::Graph::OutputLayersAccessor::end
ConstIteratorOutputs end() const
Definition: Graph.hpp:90
armnn::InputSlot::GetConnectedOutputSlot
const OutputSlot * GetConnectedOutputSlot() const
Definition: Layer.hpp:56
armnn::experimental::WorkingMemDescriptor::m_Inputs
std::vector< ITensorHandle * > m_Inputs
Definition: WorkingMemDescriptor.hpp:20
armnn
Copyright (c) 2021 ARM Limited and Contributors.
Definition: 01_00_quick_start.dox:6
armnn::experimental::WorkingMemDescriptor
Definition: WorkingMemDescriptor.hpp:18
armnn::GraphValidationException
Definition: Exceptions.hpp:110
Layer.hpp
armnn::LoadedNetwork::FreeWorkingMemory
void FreeWorkingMemory()
Definition: LoadedNetwork.cpp:1288
armnn::MemSyncQueueDescriptor
Definition: WorkloadData.hpp:99
armnn::ITensorHandleFactory::FactoryId
std::string FactoryId
Definition: ITensorHandleFactory.hpp:49
armnn::BoostLogSeverityMapping::warning
@ warning
armnn::ConstTensor
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:329
armnn::LayerType::Input
@ Input
armnn::experimental::WorkingMemHandle::Allocate
void Allocate() override
Allocate the backing memory required for execution.
Definition: WorkingMemHandle.cpp:100
armnn::OutputHandler::GetTensorInfo
const TensorInfo & GetTensorInfo() const
Gets the matching TensorInfo for the output.
Definition: OutputHandler.hpp:42
armnn::LoadedNetwork::ClearImportedInputs
void ClearImportedInputs(const std::vector< ImportedInputId > inputIds)
Definition: LoadedNetwork.cpp:1768
armnn::IBackendInternal::IWorkloadFactoryPtr
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
Definition: IBackendInternal.hpp:89
armnn::Graph::GetNumOutputs
size_t GetNumOutputs() const
Definition: Graph.hpp:195
armnn::NullPointerException
Definition: Exceptions.hpp:146
armnn::LayerType
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below.
Definition: Types.hpp:496
armnn::Graph
Definition: Graph.hpp:30
IMemoryManager.hpp
armnn::INetworkProperties::m_OutputSource
const MemorySource m_OutputSource
Definition: IRuntime.hpp:66
armnn::Status::Failure
@ Failure
armnn::experimental::WorkingMemDescriptor::m_Outputs
std::vector< ITensorHandle * > m_Outputs
Definition: WorkingMemDescriptor.hpp:21
armnn::QueueDescriptor::m_Inputs
std::vector< ITensorHandle * > m_Inputs
Definition: WorkloadData.hpp:26
armnn::LayerType::Output
@ Output
armnn::LayerType::Constant
@ Constant
armnn::ITensorHandle::Map
virtual const void * Map(bool blocking=true) const =0
Map the tensor data for access.
armnn::GetInputTensor
const armnn::ConstTensor GetInputTensor(const LayerBindingId layerId, const InputTensors &inputTensors)
Definition: LoadedNetwork.cpp:1467
armnn::HasMatchingCapability
bool HasMatchingCapability(const BackendOptions::BackendOption &capability, const BackendCapabilities &capabilities)
Convenience function to check if a given capability matches a capability in a BackendCapabilities str...
Definition: BackendHelper.cpp:85