ArmNN
 25.02
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
LoadedNetwork.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017-2024 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "LoadedNetwork.hpp"
7 #include "Layer.hpp"
8 #include "Graph.hpp"
9 #include "Markers.hpp"
10 #include "Profiling.hpp"
11 #include "HeapProfiling.hpp"
12 
13 #include <armnn/BackendHelper.hpp>
15 #include <armnn/Logging.hpp>
16 
21 
23 
25 
26 #include <common/include/Processes.hpp>
27 
28 #include <fmt/format.h>
29 
30 namespace armnn
31 {
32 
33 using namespace std;
34 using namespace arm::pipe;
35 
36 namespace
37 {
38 
39 template <typename ExceptionType>
40 std::string ToErrorMessage(const char * prefix, const ExceptionType & error)
41 {
42  std::stringstream ss;
43  ss << prefix << " " << error.what();
44  return ss.str();
45 }
46 
47 void AddLayerStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
48  const Layer& layer,
49  ProfilingGuid networkGuid)
50 {
51  // Add layer to the post-optimisation network structure
52  std::string layerName = layer.GetNameStr().empty() ? "<Unnamed>" : layer.GetNameStr();
53  timelineUtils->CreateNamedTypedChildEntity(layer.GetGuid(),
54  networkGuid,
55  layerName,
56  LabelsAndEventClasses::LAYER_GUID);
57  for (auto&& input : layer.GetInputSlots())
58  {
59  const IOutputSlot* source = input.GetConnectedOutputSlot();
60  if (!source)
61  {
62  throw armnn::NullPointerException("Null source found on input to layer \"" + layerName + "\".");
63  }
64  timelineUtils->CreateConnectionRelationship(ProfilingRelationshipType::RetentionLink,
65  source->GetOwningLayerGuid(),
66  layer.GetGuid());
67  }
68 }
69 
70 void AddWorkloadStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
71  std::unique_ptr<IWorkload>& workload,
72  const Layer& layer)
73 {
74  // Add workload to the post-optimisation network structure
75  timelineUtils->CreateTypedEntity(workload->GetGuid(), LabelsAndEventClasses::WORKLOAD_GUID);
76  timelineUtils->MarkEntityWithLabel(workload->GetGuid(),
77  layer.GetBackendId().Get(),
78  LabelsAndEventClasses::BACKENDID_GUID);
79 
80  // Link the workload to the layer
81  timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
82  layer.GetGuid(),
83  workload->GetGuid(),
84  LabelsAndEventClasses::CHILD_GUID);
85 }
86 
87 } // anonymous
88 
89 /**
90  * This function performs a sanity check to ensure that the combination of input and output memory source matches the
91  * values for importEnabled and exportEnabled that were specified during optimization. During optimization the tensor
92  * handle factories are chosen based on whether import and export are enabled. If the user then specifies something
93  * incompatible here it can lead to problems.
94  *
95  * @param optimizedOptions
96  * @param networkProperties
97  */
98 void ValidateSourcesMatchOptimizedNetwork(std::vector<BackendOptions> optimizedOptions,
99  const INetworkProperties& networkProperties)
100 {
101  // Find the "Global" backend options. During the optimize phase the values of importEnabled and exportEnabled are
102  // added as backend options.
103  const vector<BackendOptions>::iterator& backendItr =
104  find_if(optimizedOptions.begin(), optimizedOptions.end(), [](const BackendOptions& backend) {
105  if (backend.GetBackendId().Get() == "Global")
106  {
107  return true;
108  }
109  else
110  {
111  return false;
112  }
113  });
114  bool importEnabled = false;
115  bool exportEnabled = false;
116  if (backendItr != optimizedOptions.end())
117  {
118  // Find the importEnabled and exportEnabled values.
119  for (size_t i = 0; i < backendItr->GetOptionCount(); i++)
120  {
121  const BackendOptions::BackendOption& option = backendItr->GetOption(i);
122  if (option.GetName() == "ImportEnabled")
123  {
124  importEnabled = option.GetValue().AsBool();
125  }
126  if (option.GetName() == "ExportEnabled")
127  {
128  exportEnabled = option.GetValue().AsBool();
129  }
130  }
131  }
132 
133  // Now that we have values for import and export compare them to the MemorySource variables.
134  // Any value of MemorySource that's not "Undefined" implies that we need to do an import of some kind.
135  if ((networkProperties.m_InputSource == MemorySource::Undefined && importEnabled) ||
136  (networkProperties.m_InputSource != MemorySource::Undefined && !importEnabled))
137  {
138  auto message = fmt::format("The input memory source specified, '{0}',", networkProperties.m_InputSource);
139  if (!importEnabled)
140  {
141  message.append(" requires that memory import be enabled. However, "
142  "it was disabled when this network was optimized.");
143  }
144  else
145  {
146  message.append(" requires that memory import be disabled. However, "
147  "it was enabled when this network was optimized.");
148  }
149  throw InvalidArgumentException(message);
150  }
151 
152  if ((networkProperties.m_OutputSource == MemorySource::Undefined && exportEnabled) ||
153  (networkProperties.m_OutputSource != MemorySource::Undefined && !exportEnabled))
154  {
155  auto message = fmt::format("The output memory source specified, '{0}',", networkProperties.m_OutputSource);
156  if (!exportEnabled)
157  {
158  message.append(" requires that memory export be enabled. However, "
159  "it was disabled when this network was optimized.");
160  }
161  else
162  {
163  message.append(" requires that memory export be disabled. However, "
164  "it was enabled when this network was optimized.");
165  }
166  throw InvalidArgumentException(message);
167  }
168 } // anonymous
169 
170 std::unique_ptr<LoadedNetwork> LoadedNetwork::MakeLoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
171  std::string& errorMessage,
172  const INetworkProperties& networkProperties,
173  arm::pipe::IProfilingService* profilingService)
174 {
175  std::unique_ptr<LoadedNetwork> loadedNetwork;
176 
177  auto Fail = [&](const std::exception& error) -> std::unique_ptr<LoadedNetwork>
178  {
179  errorMessage = ToErrorMessage("An error occurred when preparing the network workloads: ", error);
180  ARMNN_LOG(error) << errorMessage;
181 
182  return std::unique_ptr<LoadedNetwork>();
183  };
184 
185  try
186  {
187  loadedNetwork.reset(new LoadedNetwork(std::move(net), networkProperties, profilingService));
188  }
189  catch (const armnn::RuntimeException& error)
190  {
191  return Fail(error);
192  }
193  catch (const armnn::Exception& error)
194  {
195  return Fail(error);
196  }
197  catch (const std::runtime_error& error)
198  {
199  return Fail(error);
200  }
201 
202  return loadedNetwork;
203 }
204 
205 LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
206  const INetworkProperties& networkProperties,
207  arm::pipe::IProfilingService* profilingService) :
208  m_OptimizedNetwork(std::move(net)),
209  m_NetworkProperties(networkProperties),
210  m_TensorHandleFactoryRegistry(),
211  m_ProfilingService(profilingService)
212 {
214  // Get the profiler and register it for the current thread.
215  const std::shared_ptr<IProfiler>& profiler = m_OptimizedNetwork->GetProfiler();
217 
218  profiler->EnableProfiling(networkProperties.m_ProfilingEnabled);
219 
220  profiler->EnableNetworkDetailsToStdOut(networkProperties.m_OutputNetworkDetailsMethod);
221 
222  // We need to check that the memory sources match up with the values of import and export specified during the
223  // optimize phase. If they don't this will throw an exception.
224  ValidateSourcesMatchOptimizedNetwork(m_OptimizedNetwork.get()->pOptimizedNetworkImpl->GetModelOptions(),
225  m_NetworkProperties);
226 
227  //First create tensor handlers, backends and workload factories.
228  //Handlers are created before workloads are.
229  //Because workload creation can modify some of the handlers,
230  //(for example the splitter and concat layers).
231 
232  bool useExternalMemoryManager = false;
233  bool useInternalMemoryManager = false;
234  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
235  // Ensure Topological order
236  order.SetLayersOutOfOrder();
237  order.TopologicalSort();
238 
239  m_IsInputImported = std::vector<bool>(order.GetNumInputs(), false);
240  m_IsOutputImported = std::vector<bool>(order.GetNumOutputs(), false);
241 
242  for (auto&& layer : order)
243  {
244  auto const& backendId = layer->GetBackendId();
245  if (m_Backends.count(backendId) == 0)
246  {
247  auto createBackend = BackendRegistryInstance().GetFactory(backendId);
248  auto it = m_Backends.emplace(std::make_pair(backendId, createBackend()));
249 
250  IBackendInternal* backend = it.first->second.get();
251 
252  m_SupportsExternallyManagedMemory[backend->GetId()] = false;
253  useInternalMemoryManager = true;
254 
255  if (HasMatchingCapability(BackendOptions::BackendOption{"ExternallyManagedMemory", true},
256  backend->GetCapabilities())
257  && (m_NetworkProperties.m_ExternalMemoryManagementEnabled))
258  {
259  m_SupportsExternallyManagedMemory[backend->GetId()] = true;
260  useExternalMemoryManager = true;
261  useInternalMemoryManager = false;
262  }
263 
265  if (backend->SupportsTensorAllocatorAPI())
266  {
267  workloadFactory = backend->CreateWorkloadFactory(
268  m_TensorHandleFactoryRegistry,
269  m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions(),
270  static_cast<MemorySourceFlags>(m_NetworkProperties.m_InputSource),
271  static_cast<MemorySourceFlags>(m_NetworkProperties.m_OutputSource));
272  }
273  else
274  {
275  m_BackendMemoryMangers.emplace_back(backend->CreateMemoryManager());
276  workloadFactory = backend->CreateWorkloadFactory(
277  m_BackendMemoryMangers.back(), m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions());
278  }
279  m_WorkloadFactories[backendId ] = std::move(workloadFactory);
280  }
281  }
282 
283  for (auto&& layer : order)
284  {
285  auto& workloadFactory = GetWorkloadFactory(*layer);
286  bool supportsExternalManager = m_SupportsExternallyManagedMemory[layer->GetBackendId()];
287 
288  switch (layer->GetType())
289  {
290  case LayerType::Input:
292  {
293  // If IsImportEnabled is true then we need to set IsMemoryManaged
294  // to false when creating TensorHandles
295  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
296  workloadFactory,
297  !supportsExternalManager &&
298  (m_NetworkProperties.m_InputSource == MemorySource::Undefined));
299  break;
300  }
301  case LayerType::Constant:
302  {
303  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, true);
304  break;
305  }
306  default:
307  {
308  // Look for a layer with 1 OutputSlot which has 1 connection and that connection is an Output Layer
309  // If Export is enabled disable memory management so we can export, otherwise we do a copy
310  if ((layer->GetNumOutputSlots() == 1) &&
311  (layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
312  (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() == LayerType::Output))
313  {
314  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
315  workloadFactory,
316  !supportsExternalManager &&
317  (m_NetworkProperties.m_OutputSource == MemorySource::Undefined));
318  }
319  else
320  {
321  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
322  workloadFactory,
323  !supportsExternalManager);
324  }
325  }
326  }
327  }
328 
329  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
330  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
331  TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
332  if (timelineUtils)
333  {
334  timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
335  // Mark the network with a start of life event
336  timelineUtils->RecordEvent(networkGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
337  // and with the process ID
338  int processID = arm::pipe::GetCurrentProcessId();
339  std::stringstream ss;
340  ss << processID;
341  timelineUtils->MarkEntityWithLabel(networkGuid, ss.str(), LabelsAndEventClasses::PROCESS_ID_GUID);
342  }
343 
344  std::vector<IWorkload*> ConstWorkloads;
345 
346  //Then create workloads.
347  {
348  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_CreateWorkloads");
349  for (auto&& layer: order)
350  {
351  if (timelineUtils)
352  {
353  // Add layer to the post-optimisation network structure
354  AddLayerStructure(timelineUtils, *layer, networkGuid);
355  }
356 
357  const IWorkloadFactory& workloadFactory = GetWorkloadFactory(*layer);
358 
359  switch (layer->GetType())
360  {
361  case LayerType::Input:
362  case LayerType::Output:
363  {
364  // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
365  break;
366  }
367  default:
368  {
369  auto workload = layer->CreateWorkload(workloadFactory);
370 
371  if (!workload)
372  {
373  const char* const layerName =
374  layer->GetNameStr().length() != 0 ? layer->GetName() : "<Unnamed>";
375  throw InvalidArgumentException(
376  fmt::format("No workload created for layer (name: '{0}' type: '{1}') (compute '{2}')",
377  layerName, static_cast<int>(layer->GetType()), layer->GetBackendId().Get()
378  ));
379  }
380 
381  if (timelineUtils)
382  {
383  // Add workload to the post-optimisation network structure
384  AddWorkloadStructure(timelineUtils, workload, *layer);
385  }
386 
387  m_WorkloadQueue.emplace_back(std::move(workload));
388 
389  if (layer->GetType() == LayerType::Constant)
390  {
391  // Place the Constant Workloads into a queue so that they can be executed first
392  ConstWorkloads.emplace_back(m_WorkloadQueue.back().get());
393  }
394 
395  // release the constant data in the layer.
396  layer->ReleaseConstantData();
397  break;
398  }
399  }
400  }
401  }
402 
403  // Gather information about workloads for inputs & outputs
404  if (m_WorkloadQueue.size() != 0)
405  {
406  const int noOfInputs = armnn::numeric_cast<int>(order.GetNumInputs());
407 
408  // Get indices of all workloads connected to each input and
409  // check if they support tensor handle replacement
410  for (const BindableLayer* layer: order.GetInputLayers())
411  {
412  const auto bindingId = layer->GetBindingId();
413 
414  bool supportsReplacement = true;
415 
416  for (const auto inputSlot: layer->GetOutputSlot(0).GetConnections())
417  {
418  auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(inputSlot->GetOwningLayer()));
419  workloadIndex -= noOfInputs;
420 
421  m_InputWorkloadSlotPairs[bindingId].emplace_back(WorkloadIndices{
422  armnn::numeric_cast<unsigned int>(workloadIndex), inputSlot->GetSlotIndex()});
423 
424  // Avoid if input is connected directly to an output
425  if (inputSlot->GetOwningLayer().GetType() != LayerType::Output)
426  {
427  auto workload = m_WorkloadQueue[m_InputWorkloadSlotPairs[bindingId].back().m_WorkloadIndex].get();
428  supportsReplacement &= workload->SupportsTensorHandleReplacement();
429  }
430  }
431 
432  ITensorHandleFactory::FactoryId factoryId = layer->GetOutputSlot(0).GetTensorHandleFactoryId();
433  // Get matching import factory Id
434  ITensorHandleFactory::FactoryId importFactoryId =
435  m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId);
436 
437  ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.GetFactory(importFactoryId);
438 
439  if (supportsReplacement && importFactory)
440  {
441  m_PreImportedInputHandles.emplace_back(
442  bindingId, importFactory->CreateTensorHandle(layer->GetOutputSlot(0).GetTensorInfo(), false));
443  }
444  else
445  {
446  m_PreImportedInputHandles.emplace_back(bindingId, nullptr);
447  }
448  }
449 
450  // Get indices of all workloads connected to each output and
451  // check if they support tensor handle replacement
452  for (const BindableLayer* layer: order.GetOutputLayers())
453  {
454  const auto bindingId = layer->GetBindingId();
455 
456  const auto outputSlot = layer->GetInputSlot(0).GetConnectedOutputSlot();
457  auto& indices = m_OutputWorkloadSlotPairs[bindingId];
458 
459  // Avoid if output is connected directly to an input
460  if (outputSlot->GetOwningLayer().GetType() != LayerType::Input)
461  {
462  auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(outputSlot->GetOwningLayer()));
463  workloadIndex -= noOfInputs;
464 
465  indices.m_OutputSlotIndices = WorkloadIndices{numeric_cast<unsigned int>(workloadIndex),
466  outputSlot->CalculateIndexOnOwner()};
467 
468  bool supportsReplacement = true;
469  auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
470  supportsReplacement &= outputWorkload->SupportsTensorHandleReplacement();
471 
472  for (auto &inputSlot: outputSlot->GetConnections())
473  {
474  if (inputSlot->GetOwningLayer().GetType() != LayerType::Output)
475  {
476  auto inWorkloadIndex = std::distance(order.begin(),
477  order.GetPosInGraph(inputSlot->GetOwningLayer()));
478  inWorkloadIndex -= noOfInputs;
479  indices.m_InputSlotIndices.emplace_back(
480  WorkloadIndices{numeric_cast<unsigned int>(inWorkloadIndex),
481  inputSlot->GetSlotIndex()});
482  auto inputWorkload = m_WorkloadQueue[indices.m_InputSlotIndices.back().m_WorkloadIndex].get();
483  supportsReplacement &= inputWorkload->SupportsTensorHandleReplacement();
484  }
485  }
486 
487  ITensorHandleFactory::FactoryId factoryId = outputSlot->GetTensorHandleFactoryId();
488  // Get matching import factory Id
489  ITensorHandleFactory::FactoryId importFactoryId =
490  m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId);
491  ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.GetFactory(importFactoryId);
492 
493  if (supportsReplacement && importFactory)
494  {
495  m_PreImportedOutputHandles.emplace_back(
496  bindingId, importFactory->CreateTensorHandle(outputSlot->GetTensorInfo(), false));
497  }
498  else
499  {
500  m_PreImportedOutputHandles.emplace_back(bindingId, nullptr);
501  }
502  }
503  }
504  }
505 
506  for (auto&& workloadFactory : m_WorkloadFactories)
507  {
508  workloadFactory.second->AfterWorkloadsCreated();
509  }
510 
511  if (timelineUtils)
512  {
513  // Commit to send the post-optimisation network structure
514  timelineUtils->Commit();
515  }
516 
517  if (useExternalMemoryManager)
518  {
519  CreateMemoryProfile();
520  auto backendStrategyMap = BackendRegistryInstance().GetMemoryOptimizerStrategies();
521  for (auto& backendMemoryProfile : m_MemBlockMap)
522  {
523  const BackendId& backendId = backendMemoryProfile.first;
524  if (backendStrategyMap.find(backendId) != backendStrategyMap.end())
525  {
526  m_MemBinMap[backendId] = backendStrategyMap[backendId]->Optimize(backendMemoryProfile.second);
527  }
528  else
529  {
530  m_MemBinMap[backendId] = m_ConstantStrategy->Optimize(backendMemoryProfile.second);
531  }
532  }
533  m_ExternalMemoryManager = CreateExternalMemoryManger(m_TensorMemory);
534 
535  // Sort m_TensorMemory, so it's order matches m_Tensorhandles
536  std::sort(m_TensorMemory.begin(), m_TensorMemory.end(),
537  [](const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& lhs,
538  const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& rhs)
539  {
540  return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
541  });
542  }
543 
544 
545  // Now that the intermediate tensor memory has been set-up,
546  // do any post allocation configuration for each workload.
547 
548  if (useInternalMemoryManager)
549  {
550  // Set up memory.
551  m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().AllocateDynamicBuffers();
552  }
553 
554  if (useExternalMemoryManager)
555  {
556  AllocateAndExecuteConstantWorkloads();
557  }
558 
559  for (const auto& workload : m_WorkloadQueue)
560  {
561  workload->PostAllocationConfigure();
562  }
563 
564  // Execute all constant layer workloads
565  for (auto workload: ConstWorkloads)
566  {
567  workload->Execute();
568  }
570 }
571 
572 void LoadedNetwork::AllocateAndExecuteConstantWorkloads()
573 {
574  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_AllocateAndExecuteConstants");
575  for (auto& pair : m_ConstantWorkloads)
576  {
577  auto tensorHandle = m_ConstantTensorHandles[pair.first];
578  tensorHandle->Allocate();
579  pair.second->Execute();
580  }
581 }
582 
583 void LoadedNetwork::SendNetworkStructure(arm::pipe::IProfilingService& profilingService)
584 {
585  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_SendNetworkStructure");
586  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
587  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
588 
589  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
590  TimelineUtilityMethods::GetTimelineUtils(profilingService);
591 
592  timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
593 
594  for (auto&& layer : order)
595  {
596  // Add layer to the post-optimisation network structure
597  AddLayerStructure(timelineUtils, *layer, networkGuid);
598  switch (layer->GetType())
599  {
600  case LayerType::Input:
601  case LayerType::Output:
602  {
603  // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
604  break;
605  }
606  default:
607  {
608  for (auto& workload : m_WorkloadQueue)
609  {
610  // Add workload to the post-optimisation network structure
611  AddWorkloadStructure(timelineUtils, workload, *layer);
612  }
613  break;
614  }
615  }
616  }
617  // Commit to send the post-optimisation network structure
618  timelineUtils->Commit();
619 }
620 
622 {
623  return m_OptimizedNetwork->GetGuid();
624 }
625 
627 {
628  for (auto&& inputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetInputLayers())
629  {
630  if (inputLayer->GetNumOutputSlots() != 1)
631  {
632  throw armnn::GraphValidationException("Input layer should have exactly 1 output slot");
633  }
634 
635  if (inputLayer->GetBindingId() == layerId)
636  {
637  return inputLayer->GetOutputSlot(0).GetTensorInfo();
638  }
639  }
640 
641  throw InvalidArgumentException(fmt::format("No input layer is associated with id {}", layerId));
642 }
643 
645 {
646  for (auto&& outputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetOutputLayers())
647  {
648  if (outputLayer->GetNumInputSlots() != 1)
649  {
650  throw armnn::GraphValidationException("Output layer should have exactly 1 input slot");
651  }
652 
653  if (!outputLayer->GetInputSlot(0).GetConnection())
654  {
655  throw armnn::GraphValidationException("Input slot on Output layer must be connected");
656  }
657 
658  if (outputLayer->GetBindingId() == layerId)
659  {
660  return outputLayer->GetInputSlot(0).GetTensorInfo();
661  }
662  }
663 
664  throw InvalidArgumentException(fmt::format("No output layer is associated with id {}", layerId));
665 }
666 
667 const IWorkloadFactory& LoadedNetwork::GetWorkloadFactory(const Layer& layer) const
668 {
669  const IWorkloadFactory* workloadFactory = nullptr;
670 
671  auto it = m_WorkloadFactories.find(layer.GetBackendId());
672  if (it == m_WorkloadFactories.end())
673  {
674  throw RuntimeException(fmt::format("No workload factory for {0} to be used for layer: {1}",
675  layer.GetBackendId().Get(),
676  layer.GetNameStr()),
677  CHECK_LOCATION());
678  }
679 
680  workloadFactory = it->second.get();
681 
682  if (!workloadFactory)
683  {
684  throw armnn::NullPointerException("No workload factory");
685  }
686 
687  return *workloadFactory;
688 }
689 
690 namespace {
691 
692 // Non-copyable class owning accelerator-specific tensor data.
693 class TensorPin
694 {
695 public:
696  TensorPin(std::unique_ptr<ITensorHandle> handle, const TensorInfo& info, LayerBindingId id)
697  : m_TensorHandle(std::move(handle))
698  , m_TensorInfo(info)
699  , m_Id(id)
700  {
701  }
702 
703  ITensorHandle* GetTensorHandle() const { return m_TensorHandle.get(); }
704  const TensorInfo& GetTensorInfo() const { return m_TensorInfo; }
705  LayerBindingId GetBindingId() const { return m_Id; }
706 
707 private:
708  std::unique_ptr<ITensorHandle> m_TensorHandle;
709  TensorInfo m_TensorInfo;
710  LayerBindingId m_Id;
711 };
712 
713 static const TensorPin& GetTensorPin(LayerBindingId id,
714  const std::vector<TensorPin>& pins,
715  char const* bindingPointDesc)
716 {
717  auto it = std::find_if(pins.begin(), pins.end(),
718  [id](const TensorPin& pin)
719  {
720  return pin.GetBindingId() == id;
721  });
722 
723  if (it != pins.end())
724  {
725  return *it;
726  }
727  else
728  {
729  throw InvalidArgumentException(fmt::format("No tensor supplied for {0} {1}", bindingPointDesc, id));
730  }
731 }
732 
733 // Stores data that needs to be kept accessible for the entire execution of a workload.
734 class WorkloadData
735 {
736 public:
737  WorkloadData(const InputTensors& inputTensors, const OutputTensors& outputTensors)
738  {
739  m_InputTensorPins.reserve(inputTensors.size());
740  m_OutputTensorPins.reserve(outputTensors.size());
741 
742  for (auto inputTensorPair : inputTensors)
743  {
744  auto inputTensor = inputTensorPair.second;
745 
746  std::unique_ptr<ITensorHandle> tensorHandle =
747  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),inputTensor.GetMemoryArea());
748  LayerBindingId layerId = inputTensorPair.first;
749 
750  m_InputTensorPins.emplace_back(std::move(tensorHandle), inputTensor.GetInfo(), layerId);
751  }
752 
753  for (auto outputTensorPair : outputTensors)
754  {
755  auto outputTensor = outputTensorPair.second;
756 
757  std::unique_ptr<ITensorHandle> tensorHandle =
758  std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
759  LayerBindingId layerId = outputTensorPair.first;
760 
761  m_OutputTensorPins.emplace_back(std::move(tensorHandle), outputTensor.GetInfo(), layerId);
762  }
763  }
764 
765  const TensorPin& GetInputTensorPin(LayerBindingId id) const
766  {
767  return GetTensorPin(id, m_InputTensorPins, "input");
768  }
769 
770  const TensorPin& GetOutputTensorPin(LayerBindingId id) const
771  {
772  return GetTensorPin(id, m_OutputTensorPins, "output");
773  }
774 
775 private:
776 
777  std::vector<TensorPin> m_InputTensorPins;
778  std::vector<TensorPin> m_OutputTensorPins;
779 };
780 
781 }
782 
784  const OutputTensors& outputTensors,
785  std::vector<ImportedInputId> preImportedInputIds,
786  std::vector<ImportedOutputId> preImportedOutputIds)
787 {
788  const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
789 
790  // Walk graph to determine the order of execution.
791  if (graph.GetNumLayers() < 2)
792  {
793  ARMNN_LOG(warning) << "IRuntime::EnqueueWorkload()::Less than two nodes in graph";
794  return Status::Failure;
795  }
796 
797  // Data that must be kept alive for the entire execution of the workload.
798  WorkloadData workloadData(inputTensors, outputTensors);
799 
800  // Input tensors can be provided as parameters or pre imported. Either way the number of
801  // tensors should match the number of inputs.
802  if (graph.GetNumInputs() != (inputTensors.size() + preImportedInputIds.size()))
803  {
804  throw InvalidArgumentException("Number of inputs provided does not match network.");
805  }
806 
807  // For each input to the network, call EnqueueInput with the data passed by the user.
808  {
810  m_InputQueue.clear();
811  m_InputQueue.reserve(graph.GetNumInputs());
812 
813  unsigned int inputIndex = 0;
814  unsigned int importedInputIdIndex = 0;
815  std::sort(preImportedInputIds.begin(), preImportedInputIds.end());
816  for (const BindableLayer* inputLayer : graph.GetInputLayers())
817  {
818  if (importedInputIdIndex < preImportedInputIds.size() &&
819  inputIndex == preImportedInputIds[importedInputIdIndex])
820  {
821  // Only replace tensorhandles if they have not already been replaced
822  if (!m_IsInputImported[inputIndex])
823  {
824  auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
825 
826  for (const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
827  {
828  auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
829  workload->ReplaceInputTensorHandle(outputTensorHandle, workloadInfo.m_SlotIndex);
830  }
831  m_IsInputImported[inputIndex] = true;
832  }
833  importedInputIdIndex++;
834  }
835  else
836  {
837  if (m_IsInputImported[inputIndex])
838  {
839  OutputHandler& handler = const_cast<OutputHandler&>(inputLayer->GetOutputHandler(0));
840 
841  for (const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
842  {
843  auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
844  workload->ReplaceInputTensorHandle(handler.GetData(), workloadInfo.m_SlotIndex);
845  }
846 
847  m_IsInputImported[inputIndex] = false;
848  }
849 
850  // InputTensorHandle is not imported yet, process to enqueue input
851  const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
852  EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
853  }
854  inputIndex++;
855  }
856  }
857  // For each output to the network, call EnqueueOutput with the data passed by the user.
858  {
860  m_OutputQueue.clear();
861  m_OutputQueue.reserve(graph.GetNumOutputs());
862 
863  if (preImportedOutputIds.size() > graph.GetNumOutputs())
864  {
865  throw InvalidArgumentException("Invalid number of preImportedOutputIds");
866  }
867 
868  unsigned int outputIndex = 0;
869  unsigned int importedOutputIdIndex = 0;
870  std::sort(preImportedOutputIds.begin(), preImportedOutputIds.end());
871  for (const BindableLayer* outputLayer : graph.GetOutputLayers())
872  {
873  if (importedOutputIdIndex < preImportedOutputIds.size() &&
874  outputIndex == preImportedOutputIds[importedOutputIdIndex])
875  {
876  // Only replace tensorhandles if they have not already been replaced
877  ITensorHandle* inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
878 
879  if (!m_IsOutputImported[outputIndex])
880  {
881  const auto bindingId = outputLayer->GetBindingId();
882  const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
883 
884  auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
885 
886  outputWorkload->ReplaceOutputTensorHandle(inputTensorHandle,
887  indices.m_OutputSlotIndices.m_SlotIndex);
888 
889  for (const auto& workloadInfo: indices.m_InputSlotIndices)
890  {
891  auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
892  inputWorkload->ReplaceInputTensorHandle(inputTensorHandle, workloadInfo.m_SlotIndex);
893  }
894  m_IsOutputImported[outputIndex] = true;
895  }
896 
897  if (!inputTensorHandle)
898  {
899  throw armnn::NullPointerException("Data should have been allocated.");
900  }
901 
902  MemSyncQueueDescriptor syncDesc;
903  syncDesc.m_Inputs.push_back(inputTensorHandle);
905  info.m_InputTensorInfos.push_back(outputLayer->GetInputSlot(0).GetTensorInfo());
906 
907  auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
908  if (!syncWorkload)
909  {
910  throw armnn::NullPointerException("No sync workload created");
911  }
912 
913  m_OutputQueue.push_back(std::move(syncWorkload));
914  importedOutputIdIndex++;
915  }
916  else
917  {
918  if (m_IsOutputImported[outputIndex])
919  {
920  const auto bindingId = outputLayer->GetBindingId();
921  const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
922 
923  auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
924  const OutputHandler& outputHandler =
925  outputLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOutputHandler();
926 
927  outputWorkload->ReplaceOutputTensorHandle(
928  outputHandler.GetData(), indices.m_OutputSlotIndices.m_SlotIndex);
929 
930  for (const auto& workloadInfo: indices.m_InputSlotIndices)
931  {
932  auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
933  inputWorkload->ReplaceInputTensorHandle(outputHandler.GetData(), workloadInfo.m_SlotIndex);
934  }
935  m_IsOutputImported[outputIndex] = false;
936  }
937 
938  const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
939  // OutputTensorHandle is not imported yet, process to enqueue Output
940  EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
941  }
942  outputIndex++;
943  }
944  }
945 
946  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
947  TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
948  ProfilingGuid inferenceGuid = m_ProfilingService->GetNextGuid();
949  if (timelineUtils)
950  {
951  // Add inference timeline trace if profiling is enabled.
952  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
953  timelineUtils->CreateTypedEntity(inferenceGuid, LabelsAndEventClasses::INFERENCE_GUID);
954  timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
955  networkGuid,
956  inferenceGuid,
957  LabelsAndEventClasses::EXECUTION_OF_GUID);
958  timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
959  }
960 
961  bool executionSucceeded = true;
962 
963  {
964  if (m_ProfilingService->IsProfilingEnabled())
965  {
966  m_ProfilingService->IncrementCounterValue(INFERENCES_RUN);
967  }
969  ARMNN_SCOPED_HEAP_PROFILING("Executing");
970  executionSucceeded = Execute(timelineUtils, inferenceGuid);
971  }
972 
973  if (timelineUtils)
974  {
975  // Add end of life of the inference timeline if profiling is enabled.
976  timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
977  timelineUtils->Commit();
978  }
979 
980  return executionSucceeded ? Status::Success : Status::Failure;
981 }
982 
983 void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
984 {
985  if (layer.GetType() != LayerType::Input)
986  {
987  throw InvalidArgumentException("EnqueueInput: given layer not an InputLayer");
988  }
989 
990  if (tensorHandle == nullptr)
991  {
992  throw InvalidArgumentException("EnqueueInput: tensorHandle must not be NULL");
993  }
994 
995  InputQueueDescriptor inputQueueDescriptor;
996  WorkloadInfo info;
997 
998  inputQueueDescriptor.m_Inputs.push_back(tensorHandle);
999  info.m_InputTensorInfos.push_back(tensorInfo);
1000 
1001  if (layer.GetNumOutputSlots() != 1)
1002  {
1003  throw armnn::GraphValidationException("Can only handle Input Layer with one output");
1004  }
1005 
1006  const OutputHandler& handler = layer.GetOutputHandler();
1007  const TensorInfo& outputTensorInfo = handler.GetTensorInfo();
1008  ITensorHandle* outputTensorHandle = handler.GetData();
1009 
1010  if (!outputTensorHandle)
1011  {
1012  throw armnn::NullPointerException("Data should have been allocated.");
1013  }
1014 
1015  inputQueueDescriptor.m_Outputs.push_back(outputTensorHandle);
1016  info.m_OutputTensorInfos.push_back(outputTensorInfo);
1017 
1018  MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
1019  bool needMemCopy = true;
1020  if ((m_NetworkProperties.m_InputSource != MemorySource::Undefined)) // Try import the input tensor
1021  {
1022  if(CheckFlag(importFlags, m_NetworkProperties.m_InputSource))
1023  {
1024  needMemCopy = false;
1025  // This assumes a CPU Tensor handle
1026  void* mem = tensorHandle->Map(false);
1027  if (outputTensorHandle->Import(mem, m_NetworkProperties.m_InputSource))
1028  {
1029  tensorHandle->Unmap();
1030  return; // No need for a workload since the import has been done.
1031  }
1032  tensorHandle->Unmap();
1033  throw MemoryImportException("EnqueueInput: Memory Import failed");
1034  }
1035  }
1036  if (needMemCopy)
1037  {
1038  // Create a mem copy workload for input since we did not import
1039  std::unique_ptr<IWorkload> inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor, info);
1040 
1041  if (!inputWorkload)
1042  {
1043  throw armnn::NullPointerException("No input workload created");
1044  }
1045 
1046  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1047  TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1048  if (timelineUtils)
1049  {
1050  // Add Input Workload to the post-optimisation network structure
1051  AddWorkloadStructure(timelineUtils, inputWorkload, layer);
1052  timelineUtils->Commit();
1053  }
1054 
1055  m_InputQueue.push_back(std::move(inputWorkload));
1056  }
1057 }
1058 
1059 void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
1060 {
1061  if (layer.GetType() != LayerType::Output)
1062  {
1063  throw InvalidArgumentException("EnqueueOutput: given layer not an OutputLayer");
1064  }
1065 
1066  if (tensorHandle == nullptr)
1067  {
1068  throw InvalidArgumentException("EnqueueOutput: tensorHandle must not be NULL");
1069  }
1070 
1071  OutputQueueDescriptor outputQueueDescriptor;
1072  WorkloadInfo info;
1073 
1074  outputQueueDescriptor.m_Outputs.push_back(tensorHandle);
1075  info.m_OutputTensorInfos.push_back(tensorInfo);
1076 
1077  if (layer.GetNumInputSlots() != 1)
1078  {
1079  throw armnn::GraphValidationException("Output Layer should have exactly one input.");
1080  }
1081 
1082  // Gets the output handler from the previous node.
1083  const OutputHandler& outputHandler = layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler();
1084 
1085  const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo();
1086  ITensorHandle* inputTensorHandle = outputHandler.GetData();
1087  if (!inputTensorHandle)
1088  {
1089  throw armnn::NullPointerException("Data should have been allocated.");
1090  }
1091 
1092  // Try import the output tensor.
1093  // Note: We can only import the output pointer if all of the following hold true:
1094  // a) The imported pointer is aligned sufficiently
1095  // b) The tensor has zero padding
1096  // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
1097  // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
1098  // e) m_NetworkProperties.m_OutputSource != MemorySource::Undefined
1099  bool needMemCopy = true;
1100  if (m_NetworkProperties.m_OutputSource != MemorySource::Undefined &&
1101  (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
1102  {
1103  if(layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input)
1104  {
1105  MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
1106  if (CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
1107  {
1108  needMemCopy = false;
1109  void *mem = tensorHandle->Map(false);
1110  bool importOk = inputTensorHandle->Import(mem, m_NetworkProperties.m_OutputSource);
1111  tensorHandle->Unmap();
1112 
1113  if (importOk)
1114  {
1115  // Insert synchronization workload
1116  MemSyncQueueDescriptor syncDesc;
1117  syncDesc.m_Inputs.push_back(inputTensorHandle);
1118  info.m_InputTensorInfos.push_back(inputTensorInfo);
1119  auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
1120  if (!syncWorkload)
1121  {
1122  throw armnn::NullPointerException("No sync workload created");
1123  }
1124  m_OutputQueue.push_back(std::move(syncWorkload));
1125  }
1126  else
1127  {
1128  throw MemoryExportException("EnqueueOutput: Memory Export failed");
1129  }
1130  }
1131  }
1132  }
1133  if (needMemCopy)
1134  {
1135  // If we got here then we didn't export the memory, so add an output workload which performs a memcopy.
1136  outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
1137  info.m_InputTensorInfos.push_back(inputTensorInfo);
1138 
1139  std::unique_ptr<IWorkload> outputWorkload =
1140  std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor, info);
1141  if (!outputWorkload)
1142  {
1143  throw armnn::NullPointerException("No output workload created");
1144  }
1145 
1146  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1147  TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1148  if (timelineUtils)
1149  {
1150  // Add Output Workload to the post-optimisation network structure
1151  AddWorkloadStructure(timelineUtils, outputWorkload, layer);
1152  timelineUtils->Commit();
1153  }
1154 
1155  m_OutputQueue.push_back(std::move(outputWorkload));
1156  }
1157 }
1158 
1159 void LoadedNetwork::AllocateWorkingMemory(
1160 #if !defined(ARMNN_DISABLE_THREADS)
1161  std::lock_guard<std::mutex>& lock
1162 #endif
1163  )
1164 {
1165  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Working Memory Allocation");
1166 
1167 #if !defined(ARMNN_DISABLE_THREADS)
1168  // this unused parameter makes sure we can only call this function with a valid lock
1169  IgnoreUnused(lock);
1170 #endif
1171  if (m_IsWorkingMemAllocated)
1172  {
1173  return;
1174  }
1175 
1176  if (m_ExternalMemoryManager)
1177  {
1178  m_ExternalMemoryManager->Allocate();
1179 
1180  for (unsigned int i = 0; i < m_TensorMemory.size(); ++i)
1181  {
1182  m_Tensorhandles[i]->Import(m_TensorMemory[i].first->m_Data, m_TensorMemory[i].second);
1183  }
1184  }
1185 
1186  for (auto&& memoryManager : m_BackendMemoryMangers)
1187  {
1188  if (memoryManager)
1189  {
1190  memoryManager->Acquire();
1191  }
1192  }
1193  m_TensorHandleFactoryRegistry.AquireMemory();
1194  m_IsWorkingMemAllocated = true;
1195 }
1196 
1198 {
1199 #if !defined(ARMNN_DISABLE_THREADS)
1200  std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1201 #endif
1202 
1203  if (!m_IsWorkingMemAllocated)
1204  {
1205  return;
1206  }
1207 
1208  if (m_ExternalMemoryManager)
1209  {
1210  m_ExternalMemoryManager->Deallocate();
1211  }
1212 
1213  // Informs the memory managers to release memory in its respective memory group
1214  for (auto&& memoryManager : m_BackendMemoryMangers)
1215  {
1216  if (memoryManager)
1217  {
1218  memoryManager->Release();
1219  }
1220  }
1221  m_TensorHandleFactoryRegistry.ReleaseMemory();
1222  m_IsWorkingMemAllocated = false;
1223 }
1224 
1225 bool LoadedNetwork::Execute(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
1226  ProfilingGuid inferenceGuid)
1227 {
1228  bool success = true;
1229 
1230  auto Fail = [&](const std::exception& error)
1231  {
1232  ARMNN_LOG(error) << "An error occurred attempting to execute a workload: " << error.what();
1233  success = false;
1234  };
1235 
1236  try
1237  {
1238 #if !defined(ARMNN_DISABLE_THREADS)
1239  std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1240  AllocateWorkingMemory(lockGuard);
1241 #else
1242  AllocateWorkingMemory();
1243 #endif
1244 
1245  ProfilingDynamicGuid workloadInferenceID(0);
1246  auto ExecuteQueue = [&timelineUtils, &workloadInferenceID, &inferenceGuid](WorkloadQueue& queue)
1247  {
1248  for (auto& workload : queue)
1249  {
1250  if(timelineUtils)
1251  {
1252  workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1253  inferenceGuid);
1254  }
1255 
1257  workload->Execute();
1259  if(timelineUtils)
1260  {
1261  timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1262  }
1263  }
1264  };
1265 
1267  ExecuteQueue(m_InputQueue);
1268  ExecuteQueue(m_WorkloadQueue);
1269  ExecuteQueue(m_OutputQueue);
1271  }
1272  catch (const RuntimeException& error)
1273  {
1274  Fail(error);
1275  }
1276  catch (const std::runtime_error& error)
1277  {
1278  Fail(error);
1279  }
1280 
1281  return success;
1282 }
1283 
1284 void LoadedNetwork::EnqueueInput(const ConstTensor& inputTensor, ITensorHandle* inputTensorHandle)
1285 {
1286  if (m_NetworkProperties.m_InputSource != MemorySource::Undefined) // Try import the input tensor
1287  {
1288  MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
1289  if (CheckFlag(importFlags, m_NetworkProperties.m_InputSource) )
1290  {
1291  std::unique_ptr<ITensorHandle> tensorHandle =
1292  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),
1293  inputTensor.GetMemoryArea());
1294  void* mem = tensorHandle->Map(false);
1295 
1296  if (inputTensorHandle->Import(mem, m_NetworkProperties.m_InputSource))
1297  {
1298  tensorHandle->Unmap();
1299  return;
1300  }
1301  tensorHandle->Unmap();
1302  throw MemoryImportException("EnqueueInput: Memory Import failed");
1303  }
1304  else
1305  {
1306  throw MemoryImportException("EnqueueInput: Memory Import failed, backend does not support Import");
1307  }
1308  }
1309  else
1310  {
1312  std::unique_ptr<ITensorHandle> tensorHandle =
1313  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(), inputTensor.GetMemoryArea());
1314 
1315  auto copyFunc = [](void* dst, const void* src, size_t size)
1316  {
1317  memcpy(dst, src, size);
1318  };
1319 
1320  CopyTensorContentsGeneric(tensorHandle.get(), inputTensorHandle, copyFunc);
1321  }
1322 }
1323 
1324 // Note: We can only import the output pointer if all of the following hold true:
1325 // a) The imported pointer is aligned sufficiently
1326 // b) The tensor has zero padding
1327 // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
1328 // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
1329 // e) m_IsExportEnabled must be set to true
1330 void LoadedNetwork::ImportOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle)
1331 {
1332  if (!outputTensorHandle)
1333  {
1334  throw armnn::NullPointerException("Data should have been allocated.");
1335  }
1336 
1337  MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
1338  if (CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
1339  {
1340  std::unique_ptr<ITensorHandle> tensorHandle =
1341  std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
1342  outputTensor.GetMemoryArea());
1343 
1344  void* mem = tensorHandle->Map(false);
1345  bool importOk = outputTensorHandle->Import(mem, m_NetworkProperties.m_OutputSource);
1346  tensorHandle->Unmap();
1347 
1348  if (!importOk)
1349  {
1350  throw MemoryExportException("ImportOutputTensor: Memory Export failed");
1351  }
1352  }
1353  else
1354  {
1355  throw MemoryExportException("ImportOutputTensor: Memory Export failed, attempting to export Input Layer");
1356  }
1357 
1358 }
1359 
1360 void CopyToOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle)
1361 {
1363  auto copyFunc = [](void* dst, const void* src, size_t size)
1364  {
1365  memcpy(dst, src, size);
1366  };
1367 
1368  std::unique_ptr<ITensorHandle> tensorHandle =
1369  std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
1370  outputTensor.GetMemoryArea());
1371 
1372  CopyTensorContentsGeneric(outputTensorHandle, tensorHandle.get(), copyFunc);
1373 }
1374 
1375 
1376 const armnn::ConstTensor GetInputTensor(const LayerBindingId layerId, const InputTensors& inputTensors)
1377 {
1378  for (auto inputTensorPair : inputTensors)
1379  {
1380  LayerBindingId id = inputTensorPair.first;
1381  if (id == layerId)
1382  {
1383  return inputTensorPair.second;
1384  }
1385  }
1386  throw InvalidArgumentException("Input does not exist.");
1387 }
1388 
1389 const armnn::Tensor GetOutputTensor(const LayerBindingId layerId, const OutputTensors& outputTensors)
1390 {
1391  for (auto outputTensorPair : outputTensors)
1392  {
1393  LayerBindingId id = outputTensorPair.first;
1394  if (id == layerId)
1395  {
1396  return outputTensorPair.second;
1397  }
1398  }
1399  throw InvalidArgumentException("Output does not exist.");
1400 }
1401 
1402 std::vector<ImportedInputId> LoadedNetwork::ImportInputs(const InputTensors& inputTensors,
1403  MemorySource forceImportMemorySource)
1404 {
1405  // Cannot import if import is not enabled and forceImportMemorySource is undefined
1406  if (forceImportMemorySource == MemorySource::Undefined)
1407  {
1408  throw MemoryImportException("ImportInputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1409  }
1410  // The number of pre imported tensors should not exceed the number of inputs.
1411  if (inputTensors.size() > m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumInputs())
1412  {
1413  throw MemoryImportException("ImportInputs: The number of tensors provided exceeds the number of inputs.");
1414  }
1415 
1416  std::vector<ImportedInputId> importedInputs;
1417  Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1418  unsigned int inputIndex = 0;
1419  for (const BindableLayer* inputLayer : graph.GetInputLayers())
1420  {
1421  auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
1422 
1423  if (!outputTensorHandle)
1424  {
1425  inputIndex++;
1426  continue;
1427  }
1428 
1429  auto layerBindingId = inputLayer->GetBindingId();
1430  auto it = std::find_if(inputTensors.begin(), inputTensors.end(), [=](const auto& inputTensor)
1431  {
1432  return inputTensor.first == layerBindingId;
1433  });
1434 
1435  if (it == inputTensors.end())
1436  {
1437  inputIndex++;
1438  continue;
1439  }
1440 
1441  const auto& inputTensor = *it;
1442  std::unique_ptr<ITensorHandle> passThroughTensorHandle =
1443  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.GetInfo(),
1444  inputTensor.second.GetMemoryArea());
1445 
1446  try
1447  {
1448  if (outputTensorHandle->CanBeImported(passThroughTensorHandle->Map(), forceImportMemorySource)
1449  && (outputTensorHandle->Import(passThroughTensorHandle->Map(), forceImportMemorySource)))
1450  {
1451  importedInputs.push_back(inputIndex);
1452  }
1453  passThroughTensorHandle->Unmap();
1454  }
1455  catch(const MemoryImportException& exception)
1456  {
1457  ARMNN_LOG(error) << "An error occurred attempting to import input_"
1458  << inputIndex << " : " << exception.what();
1459  passThroughTensorHandle->Unmap();
1460  }
1461  inputIndex++;
1462  }
1463 
1464  return importedInputs;
1465 }
1466 
1467 std::vector<ImportedOutputId> LoadedNetwork::ImportOutputs(const OutputTensors& outputTensors,
1468  MemorySource forceImportMemorySource)
1469 {
1470  // Cannot import if import is not enabled and forceImportMemorySource is undefined
1471  if (forceImportMemorySource == MemorySource::Undefined)
1472  {
1473  throw MemoryImportException("ImportOutputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1474  }
1475  // If forceImportMemorySource is defined, try import if memory is aligned
1476  if (outputTensors.size() != m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumOutputs())
1477  {
1478  throw MemoryImportException("ImportOutputs: Force Import failed, incorrect number of tensors");
1479  }
1480  std::vector<ImportedOutputId> importedOutputs;
1481  Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1482 
1483  unsigned int outputIndex = 0;
1484  for (const BindableLayer* const outputLayer : graph.GetOutputLayers())
1485  {
1486  auto inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
1487  if (!inputTensorHandle)
1488  {
1489  outputIndex++;
1490  continue;
1491  }
1492 
1493  auto layerBindingId = outputLayer->GetBindingId();
1494  auto it = std::find_if(outputTensors.begin(), outputTensors.end(), [=] (const auto& outputTensor)
1495  {
1496  return outputTensor.first == layerBindingId;
1497  });
1498 
1499  if (it == outputTensors.end())
1500  {
1501  outputIndex++;
1502  continue;
1503  }
1504 
1505  const auto outputTensor = *it;
1506  try
1507  {
1508  // Check if the output memory can be imported
1509  if (inputTensorHandle->CanBeImported(outputTensor.second.GetMemoryArea(), forceImportMemorySource)
1510  && inputTensorHandle->Import(outputTensor.second.GetMemoryArea(), forceImportMemorySource))
1511  {
1512  importedOutputs.push_back(outputIndex);
1513  }
1514  }
1515  catch(const MemoryImportException& exception)
1516  {
1517  ARMNN_LOG(error) << "An error occurred attempting to import output_"
1518  << outputIndex << " : " << exception.what();
1519  }
1520  outputIndex++;
1521  }
1522  return importedOutputs;
1523 }
1524 
1525 void LoadedNetwork::ClearImportedInputs(const std::vector<ImportedInputId> inputIds)
1526 {
1527  for (auto id : inputIds)
1528  {
1529  if (id > m_PreImportedInputHandles.size())
1530  {
1531  throw InvalidArgumentException(fmt::format("ClearImportedInputs::Unknown ImportedInputId: {}", id));
1532  }
1533 
1534  auto& importedTensorHandle = m_PreImportedInputHandles[id].m_TensorHandle;
1535  if (!importedTensorHandle)
1536  {
1538  fmt::format("ClearImportedInputs::ImportedInput with id: {} has already been deleted", id));
1539  }
1540  // Call Unimport then destroy the tensorHandle
1541  importedTensorHandle->Unimport();
1542  importedTensorHandle = {};
1543  }
1544 }
1545 
1546 void LoadedNetwork::ClearImportedOutputs(const std::vector<ImportedOutputId> outputIds)
1547 {
1548  for (auto id : outputIds)
1549  {
1550  if (id > m_PreImportedOutputHandles.size())
1551  {
1552  throw InvalidArgumentException(fmt::format("ClearImportedOutputs::Unknown ImportedOutputId: {}", id));
1553  }
1554 
1555  auto& importedTensorHandle = m_PreImportedOutputHandles[id].m_TensorHandle;
1556  if (!importedTensorHandle)
1557  {
1559  fmt::format("ClearImportedOutputs::ImportedOutput with id: {} has already been deleted", id));
1560  }
1561  // Call Unimport then destroy the tensorHandle
1562  importedTensorHandle->Unimport();
1563  importedTensorHandle = {};
1564  }
1565 }
1566 
1568 {
1569  for (auto&& workloadPtr: m_WorkloadQueue)
1570  {
1571  workloadPtr.get()->RegisterDebugCallback(func);
1572  }
1573 }
1574 
1575 void LoadedNetwork::CreateMemoryProfile()
1576 {
1577  // Finds the first TensorHandle ancestor of a SubTensorHandle. If the ITensorHandle provided
1578  // is a TensorHandle, the function just returns it
1579  auto TraceSubTensorHandleAncestry = [](ITensorHandle* const subTensorHandle)
1580  {
1581  ITensorHandle* ancestor = subTensorHandle;
1582  while (ancestor && ancestor->GetParent())
1583  {
1584  ancestor = ancestor->GetParent();
1585  }
1586  return ancestor;
1587  };
1588 
1589  struct PartialBlock
1590  {
1591  unsigned int m_StartOfLife;
1592  unsigned int m_Lifetime;
1593 
1594  size_t m_MemSize;
1595  unsigned int m_Index;
1596 
1597  BackendId m_BackendId;
1598  };
1599 
1600  auto align = [](size_t numToAlign)
1601  {
1602  const size_t alignment = sizeof(float);
1603  return ((numToAlign + alignment - 1) / alignment) * alignment;
1604  };
1605 
1606  std::unordered_map<ITensorHandle*, PartialBlock> memBlockTrackerMap;
1607 
1608  const bool inputImportingEnabled = m_NetworkProperties.m_InputSource != MemorySource::Undefined;
1609  const bool outputImportingEnabled = m_NetworkProperties.m_OutputSource != MemorySource::Undefined;
1610 
1611  unsigned int timestep = 0;
1612  unsigned int outputIndex = 0;
1613  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1614 
1615  for (auto&& layer : order)
1616  {
1617  const LayerType& layerType = layer->GetType();
1618  // Don't manage memory if importing.
1619  if (layerType == LayerType::Input && inputImportingEnabled)
1620  {
1621  continue;
1622  }
1623  // Don't manage memory if importing.
1624  if (layerType == LayerType::Output && outputImportingEnabled
1625  && layer->GetInputSlot(0).GetConnectedOutputSlot()->GetNumConnections() == 1)
1626  {
1627  continue;
1628  }
1629  // Because Constant Layer memory can not be shared, the memory must persist for the lifetime of execution,
1630  // management is done separately.
1631  if (layerType == LayerType::Constant)
1632  {
1633  continue;
1634  }
1635 
1636  BackendId backendId = layer->GetBackendId();
1637  for (auto& outputSlot : layer->GetOutputSlots())
1638  {
1639  if (!m_SupportsExternallyManagedMemory[backendId])
1640  {
1641  continue;
1642  }
1643 
1644  ITensorHandle* tensorHandle = outputSlot.GetOutputHandler().GetData();
1645  tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
1646 
1647  if (memBlockTrackerMap.find(tensorHandle) == memBlockTrackerMap.end())
1648  {
1649  PartialBlock partialBlock;
1650 
1651  partialBlock.m_StartOfLife = timestep;
1652 
1653  size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
1654  partialBlock.m_MemSize = alignedSize;
1655  partialBlock.m_Index = outputIndex++;
1656  partialBlock.m_Lifetime = outputSlot.GetNumConnections();
1657  partialBlock.m_BackendId = backendId;
1658 
1659  if (partialBlock.m_Lifetime == 0)
1660  {
1661  m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
1662  partialBlock.m_StartOfLife,
1663  partialBlock.m_MemSize,
1664  0,
1665  partialBlock.m_Index);
1666  }
1667  else
1668  {
1669  memBlockTrackerMap[tensorHandle] = partialBlock;
1670  }
1671  m_Tensorhandles.push_back(tensorHandle);
1672 
1673  }
1674  else
1675  {
1676  memBlockTrackerMap.at(tensorHandle).m_Lifetime += outputSlot.GetNumConnections();
1677  }
1678  }
1679 
1680  for (auto& inputSlot : layer->GetInputSlots())
1681  {
1682  const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
1683  const LayerType& owningLayerType = connectedInputLayer.GetType();
1684 
1685  if (owningLayerType == LayerType::Constant)
1686  {
1687  continue;
1688  }
1689  if (inputImportingEnabled && owningLayerType == LayerType::Input)
1690  {
1691  continue;
1692  }
1693  if (!m_SupportsExternallyManagedMemory[connectedInputLayer.GetBackendId()])
1694  {
1695  continue;
1696  }
1697 
1698  auto outputSlot = inputSlot.GetConnectedOutputSlot();
1699 
1700  ITensorHandle* tensorHandle = outputSlot->GetOutputHandler().GetData();
1701  tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
1702 
1703  PartialBlock& partialBlock = memBlockTrackerMap.at(tensorHandle);
1704 
1705  auto& lifetime = partialBlock.m_Lifetime;
1706  --lifetime;
1707 
1708  if (lifetime == 0)
1709  {
1710  m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
1711  timestep,
1712  partialBlock.m_MemSize,
1713  0,
1714  partialBlock.m_Index);
1715  }
1716  }
1717  ++timestep;
1718  }
1719 
1720 }
1721 
1722 std::unique_ptr<MemoryManager> LoadedNetwork::CreateExternalMemoryManger(
1723  std::vector<std::pair<std::shared_ptr<TensorMemory>, MemorySource>>& tensorMemoryVec)
1724 {
1725  std::unique_ptr<MemoryManager> memoryManager = std::make_unique<MemoryManager>();
1726  auto allocatorMap = BackendRegistryInstance().GetAllocators();
1727 
1728  for (auto& backend : m_MemBinMap)
1729  {
1730  std::vector<BufferStorage> bufferStorageVec;
1731 
1732  std::shared_ptr<ICustomAllocator> backendAllocator;
1733  if (allocatorMap.find(backend.first) != allocatorMap.end())
1734  {
1735  backendAllocator = allocatorMap[backend.first];
1736  }
1737  else
1738  {
1739  backendAllocator = m_Backends[backend.first]->GetDefaultAllocator();
1740  }
1741 
1742  for (auto& memBin : backend.second)
1743  {
1744  BufferStorage bufferStorage;
1745  bufferStorage.m_BufferSize = memBin.m_MemSize;
1746  bufferStorage.m_TensorMemoryVector.reserve(memBin.m_MemBlocks.size());
1747 
1748  for (auto& memBlock : memBin.m_MemBlocks)
1749  {
1750  auto tensorMemory = std::make_shared<TensorMemory>(TensorMemory{memBlock.m_Offset, memBlock.m_Index});
1751 
1752  tensorMemoryVec.emplace_back(tensorMemory, backendAllocator->GetMemorySourceType());
1753  bufferStorage.m_TensorMemoryVector.emplace_back(tensorMemory);
1754  }
1755 
1756  bufferStorageVec.emplace_back(std::move(bufferStorage));
1757  }
1758 
1759  memoryManager->StoreMemToAllocate(bufferStorageVec, backendAllocator, 4);
1760  }
1761 
1762  return memoryManager;
1763 }
1764 
1765 LayerBindingId LoadedNetwork::ValidateImportedInputID(ImportedInputId id)
1766 {
1767  try
1768  {
1769  const auto& importedTensorHandlePin = m_PreImportedInputHandles.at(id);
1770  if (!importedTensorHandlePin.m_TensorHandle)
1771  {
1772  throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute:"
1773  "PreImportedInput: {} has been deleted", id));
1774  }
1775  return importedTensorHandlePin.m_LayerBindingId;
1776  }
1777  catch (const std::out_of_range&)
1778  {
1779  throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: Unknown ImportedInputId: {}", id));
1780  }
1781 }
1782 
1783 LayerBindingId LoadedNetwork::ValidateImportedOutputID(ImportedOutputId id)
1784 {
1785  try
1786  {
1787  const auto& importedTensorHandlePin = m_PreImportedOutputHandles.at(id);
1788  if (!importedTensorHandlePin.m_TensorHandle)
1789  {
1790  throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: "
1791  "PreImportedOutput: {} has been deleted", id));
1792  }
1793  return importedTensorHandlePin.m_LayerBindingId;
1794  }
1795  catch (const std::out_of_range&)
1796  {
1797  throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: Unknown ImportedOutputId: {}", id));
1798  }
1799 }
1800 
1801 }
#define CHECK_LOCATION()
Definition: Exceptions.hpp:203
#define ARMNN_SCOPED_HEAP_PROFILING(TAG)
#define ARMNN_LOG(severity)
Definition: Logging.hpp:212
#define MARK_WORKLOAD_EXECUTION_END()
This empty macro has been inserted at LoadedNetwork::Execute.
Definition: Markers.hpp:41
#define MARK_INFERENCE_EXECUTION_END()
This empty macro has been inserted at LoadedNetwork::Execute.
Definition: Markers.hpp:29
#define MARK_INFERENCE_EXECUTION_BEGIN()
This empty macro has been inserted at LoadedNetwork::Execute.
Definition: Markers.hpp:23
#define MARK_WORKLOAD_EXECUTION_BEGIN()
This empty macro has been inserted at LoadedNetwork::Execute.
Definition: Markers.hpp:35
#define MARK_OPTIMIZED_NETWORK_LOADED()
This empty macro has been inserted at the end of LoadedNetwork constructor.
Definition: Markers.hpp:15
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
Definition: Profiling.hpp:220
const std::string & Get() const
Definition: BackendId.hpp:138
std::unordered_map< BackendId, std::shared_ptr< ICustomAllocator > > GetAllocators()
MemoryOptimizerStrategiesMapRef GetMemoryOptimizerStrategies()
FactoryFunction GetFactory(const BackendId &id) const
const TensorInfo & GetInfo() const
Definition: Tensor.hpp:297
MemoryType GetMemoryArea() const
Definition: Tensor.hpp:307
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:330
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:47
virtual const char * what() const noexcept override
Definition: Exceptions.cpp:32
size_t GetNumOutputs() const
Definition: Graph.hpp:195
size_t GetNumInputs() const
Definition: Graph.hpp:194
InputLayersAccessor GetInputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the input layers in a range-base...
Definition: Graph.hpp:199
Graph & TopologicalSort()
Sorts layers in topological order and return this.
Definition: Graph.hpp:191
OutputLayersAccessor GetOutputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the output layers in a range-bas...
Definition: Graph.hpp:203
void SetLayersOutOfOrder()
Definition: Graph.cpp:738
size_t GetNumLayers() const
Definition: Graph.hpp:205
virtual const BackendId & GetId() const =0
virtual BackendCapabilities GetCapabilities() const
Returns a BackendCapability if the backend lists the capability The BackendCapability must then be in...
virtual IMemoryManagerUniquePtr CreateMemoryManager() const
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
bool SupportsTensorAllocatorAPI() const
virtual IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr &memoryManager=nullptr) const =0
virtual ITensorHandle * GetParent() const =0
Get the parent tensor if this is a subtensor.
virtual void Unmap() const =0
Unmap the tensor data.
virtual const void * Map(bool blocking=true) const =0
Map the tensor data for access.
unsigned int GetNumOutputSlots() const override
Returns the number of connectable output slots.
Definition: Layer.hpp:335
const std::string & GetNameStr() const
Definition: Layer.hpp:240
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:286
const OutputHandler & GetOutputHandler(unsigned int i=0) const
Definition: Layer.hpp:245
const BackendId & GetBackendId() const
Definition: Layer.hpp:290
void RegisterDebugCallback(const DebugCallbackFunction &func)
TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const
std::vector< std::unique_ptr< IWorkload > > WorkloadQueue
std::vector< ImportedInputId > ImportInputs(const InputTensors &inputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
Status EnqueueWorkload(const InputTensors &inputTensors, const OutputTensors &outputTensors, std::vector< ImportedInputId > preImportedInputIds={}, std::vector< ImportedOutputId > preImportedOutputIds={})
Single thread execution of the loaded network.
void ClearImportedInputs(const std::vector< ImportedInputId > inputIds)
std::vector< ImportedOutputId > ImportOutputs(const OutputTensors &outputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
arm::pipe::ProfilingGuid GetNetworkGuid()
void SendNetworkStructure(arm::pipe::IProfilingService &profilingService)
void ClearImportedOutputs(const std::vector< ImportedOutputId > outputIds)
TensorInfo GetInputTensorInfo(LayerBindingId layerId) const
static std::unique_ptr< LoadedNetwork > MakeLoadedNetwork(std::unique_ptr< IOptimizedNetwork > net, std::string &errorMessage, const INetworkProperties &networkProperties, arm::pipe::IProfilingService *profilingService)
const TensorInfo & GetTensorInfo() const
Gets the matching TensorInfo for the output.
ITensorHandle * GetData() const
Gets the allocated tensor memory.
void RegisterProfiler(IProfiler *profiler)
Definition: Profiling.cpp:609
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:602
void AquireMemory()
Aquire memory required for inference.
void ReleaseMemory()
Release memory required for inference.
ITensorHandleFactory * GetFactory(ITensorHandleFactory::FactoryId id) const
Find a TensorHandleFactory by Id Returns nullptr if not found.
ITensorHandleFactory::FactoryId GetMatchingImportFactoryId(ITensorHandleFactory::FactoryId copyFactoryId)
Get a matching TensorHandleFatory Id for Memory Import given TensorHandleFactory Id for Memory Copy.
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:322
Copyright (c) 2021 ARM Limited and Contributors.
MemorySource
Define the Memory Source to reduce copies.
Definition: Types.hpp:246
std::function< void(LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle)> DebugCallbackFunction
Define the type of callback for the Debug layer to call.
Definition: Types.hpp:400
unsigned int ImportedInputId
Definition: Types.hpp:312
MemCopyQueueDescriptor InputQueueDescriptor
MemCopyQueueDescriptor OutputQueueDescriptor
bool HasMatchingCapability(const BackendOptions::BackendOption &capability, const BackendCapabilities &capabilities)
Convenience function to check if a given capability matches a capability in a BackendCapabilities str...
void IgnoreUnused(Ts &&...)
void ValidateSourcesMatchOptimizedNetwork(std::vector< BackendOptions > optimizedOptions, const INetworkProperties &networkProperties)
This function performs a sanity check to ensure that the combination of input and output memory sourc...
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below.
Definition: Types.hpp:494
void CopyToOutputTensor(const Tensor &outputTensor, ITensorHandle *outputTensorHandle)
unsigned int MemorySourceFlags
Status
enumeration
Definition: Types.hpp:43
bool CheckFlag(MemorySourceFlags flags, MemorySource source)
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:395
void CopyTensorContentsGeneric(const ITensorHandle *srcTensor, ITensorHandle *dstTensor, CopyFunc copy)
const armnn::ConstTensor GetInputTensor(const LayerBindingId layerId, const InputTensors &inputTensors)
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:394
const TensorInfo & GetTensorInfo(const ITensorHandle *tensorHandle)
float32 helpers
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition: Types.hpp:311
BackendRegistry & BackendRegistryInstance()
const armnn::Tensor GetOutputTensor(const LayerBindingId layerId, const OutputTensors &outputTensors)
unsigned int ImportedOutputId
Definition: Types.hpp:313
Struct for the users to pass backend specific options.
const MemorySource m_OutputSource
Definition: IRuntime.hpp:60
const bool m_ExternalMemoryManagementEnabled
Definition: IRuntime.hpp:62
const MemorySource m_InputSource
Definition: IRuntime.hpp:59
const bool m_ProfilingEnabled
Definition: IRuntime.hpp:55
const ProfilingDetailsMethod m_OutputNetworkDetailsMethod
Definition: IRuntime.hpp:57
std::vector< ITensorHandle * > m_Inputs
Contains information about TensorInfos of a layer.