ArmNN
 24.08
LoadedNetwork.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #pragma once
6 
7 #include "Network.hpp"
8 #include "LayerFwd.hpp"
9 #include "Profiling.hpp"
10 
11 #include <armnn/Tensor.hpp>
12 
17 
22 
23 #include <client/include/IProfilingService.hpp>
24 #include <client/include/TimelineUtilityMethods.hpp>
25 
26 #include <common/include/LabelsAndEventClasses.hpp>
27 
28 #include <mutex>
29 #include <condition_variable>
30 #include <unordered_map>
31 
32 namespace cl
33 {
34 class Context;
35 class CommandQueue;
36 class Device;
37 }
38 
39 namespace armnn
40 {
41 
43 {
44 public:
45  using WorkloadQueue = std::vector<std::unique_ptr<IWorkload>>;
46 
48  {
50  }
51 
52  /// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have
53  /// overlapped Execution by calling this function from different threads.
54  std::unique_ptr<IWorkingMemHandle> CreateWorkingMemHandle(NetworkId networkId);
55 
58 
59  std::vector<ImportedInputId> ImportInputs(const InputTensors& inputTensors,
60  MemorySource forceImportMemorySource = MemorySource::Undefined);
61  std::vector<ImportedOutputId> ImportOutputs(const OutputTensors& outputTensors,
62  MemorySource forceImportMemorySource = MemorySource::Undefined);
63 
64  void ClearImportedInputs(const std::vector<ImportedInputId> inputIds);
65  void ClearImportedOutputs(const std::vector<ImportedOutputId> outputIds);
66 
67  /// Single thread execution of the loaded network
68  Status EnqueueWorkload(const InputTensors& inputTensors, const OutputTensors& outputTensors,
69  std::vector<ImportedInputId> preImportedInputIds = {},
70  std::vector<ImportedOutputId> preImportedOutputIds = {});
71 
72  /// Thread safe execution of the loaded network
73  Status Execute(const InputTensors& inputTensors,
74  const OutputTensors& outputTensors,
75  IWorkingMemHandle& workingMemHandle,
76  std::vector<ImportedInputId> preImportedInputs = {},
77  std::vector<ImportedOutputId> preImportedOutputs = {});
78 
79  static std::unique_ptr<LoadedNetwork> MakeLoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
80  std::string& errorMessage,
81  const INetworkProperties& networkProperties,
82  arm::pipe::IProfilingService* profilingService);
83 
84  // NOTE we return by reference as the purpose of this method is only to provide
85  // access to the private m_Profiler and in theory we should not need to increment
86  // the shared_ptr's reference counter
87  const std::shared_ptr<IProfiler>& GetProfiler() const { return m_OptimizedNetwork->GetProfiler(); }
88 
89  void FreeWorkingMemory();
90 
92 
93  void SendNetworkStructure(arm::pipe::IProfilingService& profilingService);
94 
96  {
97  return m_NetworkProperties.m_AsyncEnabled;
98  }
99 
100  arm::pipe::ProfilingGuid GetNetworkGuid();
101 
102 private:
103 
104 
105  void AllocateWorkingMemory(
106 #if !defined(ARMNN_DISABLE_THREADS)
107  std::lock_guard<std::mutex>& lock
108 #endif
109  );
110  void AllocateAndExecuteConstantWorkloads();
111  void AllocateAndExecuteConstantWorkloadsAsync();
112 
113  std::unordered_map<LayerGuid, std::unique_ptr<IWorkload>> m_ConstantWorkloads;
114  std::unordered_map<LayerGuid, ITensorHandle*> m_ConstantTensorHandles;
115 
116  std::unique_ptr<IMemoryOptimizerStrategy> m_ConstantStrategy = std::make_unique<SingleAxisPriorityList>();
117 
118  LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
119  const INetworkProperties& networkProperties,
120  arm::pipe::IProfilingService* profilingService);
121 
122  void EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo);
123 
124  void EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo);
125 
126  void EnqueueInput(const ConstTensor& inputTensor, ITensorHandle* inputTensorHandle);
127 
128  void ImportOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle);
129 
130  bool Execute(std::unique_ptr<arm::pipe::TimelineUtilityMethods>& timelineUtils,
131  arm::pipe::ProfilingGuid inferenceGuid);
132 
133  const IWorkloadFactory& GetWorkloadFactory(const Layer& layer) const;
134 
135  inline LayerBindingId ValidateImportedInputID(ImportedInputId id);
136  inline LayerBindingId ValidateImportedOutputID(ImportedOutputId id);
137 
138  void CreateMemoryProfile();
139  void CreateMemoryProfileAsync();
140 
141  std::unique_ptr<MemoryManager> CreateExternalMemoryManger(
142  std::vector<std::pair<std::shared_ptr<TensorMemory>, MemorySource>>& tensorMemory);
143 
144  using BackendPtrMap = std::unordered_map<BackendId, IBackendInternalUniquePtr>;
145 
146  BackendPtrMap m_Backends;
147  std::vector<IBackendInternal::IMemoryManagerSharedPtr> m_BackendMemoryMangers;
148 
149  using WorkloadFactoryMap = std::unordered_map<BackendId, IBackendInternal::IWorkloadFactoryPtr>;
150  WorkloadFactoryMap m_WorkloadFactories;
151 
152  std::unique_ptr<IOptimizedNetwork> m_OptimizedNetwork;
153 
154  WorkloadQueue m_InputQueue;
155  WorkloadQueue m_WorkloadQueue;
156  WorkloadQueue m_OutputQueue;
157 
158 #if !defined(ARMNN_DISABLE_THREADS)
159  mutable std::mutex m_WorkingMemMutex;
160 #endif
161 
162  bool m_IsWorkingMemAllocated = false;
163 
164  INetworkProperties m_NetworkProperties;
165 
166  TensorHandleFactoryRegistry m_TensorHandleFactoryRegistry;
167 
168  // NOTE: raw pointer because the profiling service is controlled by the Runtime
169  arm::pipe::IProfilingService* m_ProfilingService;
170 
171  struct ImportedTensorHandlePin
172  {
173  ImportedTensorHandlePin()
174  {}
175 
176  ImportedTensorHandlePin(LayerBindingId layerBindingId,
177  std::unique_ptr<ITensorHandle> tensorHandle)
178  : m_LayerBindingId(layerBindingId)
179  , m_TensorHandle(std::move(tensorHandle))
180  {}
181 
182  ImportedTensorHandlePin(ImportedTensorHandlePin&&) = default;
183 
184  ~ImportedTensorHandlePin()
185  {
186  if (m_TensorHandle)
187  {
188  m_TensorHandle->Unimport();
189  }
190  }
191 
192  LayerBindingId m_LayerBindingId;
193  std::unique_ptr<ITensorHandle> m_TensorHandle;
194  };
195 
196  std::vector<ImportedTensorHandlePin> m_PreImportedInputHandles;
197  std::vector<ImportedTensorHandlePin> m_PreImportedOutputHandles;
198 
199  ImportedInputId m_CurImportedInputId = 0;
200  ImportedInputId m_CurImportedOutputId = 0;
201 
202  std::unordered_map<BackendId, std::vector<MemBlock>> m_MemBlockMap;
203  std::unordered_map<BackendId, std::vector<MemBin>> m_MemBinMap;
204 
205  std::vector<ITensorHandle*> m_Tensorhandles;
206 
207  std::vector<std::pair<std::shared_ptr<TensorMemory>, MemorySource>> m_TensorMemory;
208 
209  std::unique_ptr<MemoryManager> m_ExternalMemoryManager;
210 
211  std::unordered_map<BackendId, bool> m_SupportsExternallyManagedMemory;
212 
213  // A set of vectors to record the workload queue indexes and their corresponding Input/Output Slot indexes
214  // which are connected to Inputs and Outputs for the network.
215  struct WorkloadIndices
216  {
217  unsigned int m_WorkloadIndex;
218  unsigned int m_SlotIndex;
219  };
220 
221  struct OutputWorkloadIndices
222  {
223  WorkloadIndices m_OutputSlotIndices;
224  std::vector<WorkloadIndices> m_InputSlotIndices;
225  };
226  std::unordered_map<LayerBindingId, std::vector<WorkloadIndices>> m_InputWorkloadSlotPairs;
227  std::unordered_map<LayerBindingId, OutputWorkloadIndices> m_OutputWorkloadSlotPairs;
228  std::vector<bool> m_IsInputImported;
229  std::vector<bool> m_IsOutputImported;
230 
231 };
232 
233 }
armnn::BindableLayer
Definition: Layer.hpp:470
armnn::ImportedInputId
unsigned int ImportedInputId
Definition: Types.hpp:310
armnn::Tensor
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:321
armnn::INetworkProperties::m_AsyncEnabled
const bool m_AsyncEnabled
Definition: IRuntime.hpp:59
armnn::LoadedNetwork::GetOutputTensorInfo
TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const
Definition: LoadedNetwork.cpp:733
IMemoryOptimizerStrategy.hpp
armnn::InputTensors
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:394
armnn::LoadedNetwork::EnqueueWorkload
Status EnqueueWorkload(const InputTensors &inputTensors, const OutputTensors &outputTensors, std::vector< ImportedInputId > preImportedInputIds={}, std::vector< ImportedOutputId > preImportedOutputIds={})
Single thread execution of the loaded network.
Definition: LoadedNetwork.cpp:872
armnn::LoadedNetwork::RegisterDebugCallback
void RegisterDebugCallback(const DebugCallbackFunction &func)
Definition: LoadedNetwork.cpp:2296
DefaultAllocator.hpp
armnn::TensorHandleFactoryRegistry
Definition: TensorHandleFactoryRegistry.hpp:23
armnn::TensorInfo
Definition: Tensor.hpp:152
Profiling.hpp
armnn::LoadedNetwork
Definition: LoadedNetwork.hpp:42
armnn::ITensorHandle
Definition: ITensorHandle.hpp:16
armnn::WorkloadQueue
std::vector< std::unique_ptr< IWorkload > > WorkloadQueue
Definition: ExecutionFrame.hpp:13
armnn::LoadedNetwork::SendNetworkStructure
void SendNetworkStructure(arm::pipe::IProfilingService &profilingService)
Definition: LoadedNetwork.cpp:672
armnn::OutputTensors
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:395
TensorHandleFactoryRegistry.hpp
armnn::LoadedNetwork::WorkloadQueue
std::vector< std::unique_ptr< IWorkload > > WorkloadQueue
Definition: LoadedNetwork.hpp:45
LayerFwd.hpp
WorkloadFactory.hpp
armnn::LoadedNetwork::ImportInputs
std::vector< ImportedInputId > ImportInputs(const InputTensors &inputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
Definition: LoadedNetwork.cpp:1486
armnn::Layer
Definition: Layer.hpp:230
armnn::NetworkId
int NetworkId
Definition: IRuntime.hpp:35
armnn::MemorySource::Undefined
@ Undefined
IBackendInternal.hpp
Workload.hpp
armnn::LoadedNetwork::ImportOutputs
std::vector< ImportedOutputId > ImportOutputs(const OutputTensors &outputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
Definition: LoadedNetwork.cpp:1632
armnn::IWorkloadFactory
Definition: WorkloadFactory.hpp:22
armnn::LayerBindingId
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition: Types.hpp:309
armnn::INetworkProperties
Definition: IRuntime.hpp:43
SingleAxisPriorityList.hpp
armnn::LoadedNetwork::GetNetworkGuid
arm::pipe::ProfilingGuid GetNetworkGuid()
Definition: LoadedNetwork.cpp:710
armnn::LoadedNetwork::GetInputTensorInfo
TensorInfo GetInputTensorInfo(LayerBindingId layerId) const
Definition: LoadedNetwork.cpp:715
armnn::ImportedOutputId
unsigned int ImportedOutputId
Definition: Types.hpp:311
Tensor.hpp
armnn::Status
Status
Definition: Types.hpp:42
MemoryManager.hpp
armnn::LoadedNetwork::MakeLoadedNetwork
static std::unique_ptr< LoadedNetwork > MakeLoadedNetwork(std::unique_ptr< IOptimizedNetwork > net, std::string &errorMessage, const INetworkProperties &networkProperties, arm::pipe::IProfilingService *profilingService)
Definition: LoadedNetwork.cpp:173
armnn::LoadedNetwork::CreateWorkingMemHandle
std::unique_ptr< IWorkingMemHandle > CreateWorkingMemHandle(NetworkId networkId)
Create a new unique WorkingMemHandle object.
Definition: LoadedNetwork.cpp:2025
Network.hpp
std
Definition: BackendId.hpp:149
armnn::LoadedNetwork::Execute
Status Execute(const InputTensors &inputTensors, const OutputTensors &outputTensors, IWorkingMemHandle &workingMemHandle, std::vector< ImportedInputId > preImportedInputs={}, std::vector< ImportedOutputId > preImportedOutputs={})
Thread safe execution of the loaded network.
Definition: LoadedNetwork.cpp:1803
armnn::DebugCallbackFunction
std::function< void(LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle)> DebugCallbackFunction
Define the type of callback for the Debug layer to call.
Definition: Types.hpp:398
armnn::MemorySource
MemorySource
Define the Memory Source to reduce copies.
Definition: Types.hpp:244
armnn::LoadedNetwork::GetProfiler
const std::shared_ptr< IProfiler > & GetProfiler() const
Definition: LoadedNetwork.hpp:87
armnn::LoadedNetwork::ClearImportedOutputs
void ClearImportedOutputs(const std::vector< ImportedOutputId > outputIds)
Definition: LoadedNetwork.cpp:1782
armnn
Copyright (c) 2021 ARM Limited and Contributors.
Definition: 01_00_quick_start.dox:6
armnn::LoadedNetwork::FreeWorkingMemory
void FreeWorkingMemory()
Definition: LoadedNetwork.cpp:1286
armnn::ConstTensor
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:329
armnn::LoadedNetwork::IsAsyncEnabled
bool IsAsyncEnabled()
Definition: LoadedNetwork.hpp:95
armnn::experimental::BackendPtrMap
std::unordered_map< BackendId, IBackendInternalUniquePtr > BackendPtrMap
Definition: WorkingMemHandle.hpp:27
armnn::LoadedNetwork::ClearImportedInputs
void ClearImportedInputs(const std::vector< ImportedInputId > inputIds)
Definition: LoadedNetwork.cpp:1761
armnn::LoadedNetwork::~LoadedNetwork
~LoadedNetwork()
Definition: LoadedNetwork.hpp:47