ArmNN
 25.02
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
IRuntime.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017-2024 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #pragma once
6 
7 #include "BackendOptions.hpp"
8 #include "INetwork.hpp"
9 #include "IProfiler.hpp"
10 #include "Tensor.hpp"
11 #include "Types.hpp"
12 #include "TypesUtils.hpp"
13 
16 
17 #include <memory>
18 #include <map>
19 
20 namespace arm
21 {
22 
23 namespace pipe
24 {
25 class ILocalPacketHandler;
26 using ILocalPacketHandlerSharedPtr = std::shared_ptr<ILocalPacketHandler>;
27 } // pipe
28 } // arm
29 
30 namespace armnn
31 {
32 
33 using NetworkId = int;
34 
36 
37 struct RuntimeImpl;
38 class IRuntime;
39 using IRuntimePtr = std::unique_ptr<IRuntime, void(*)(IRuntime* runtime)>;
40 
42 {
44  MemorySource outputSource,
45  bool profilingEnabled = false,
47  bool externalMemoryManagementEnabled = false)
48  : m_ProfilingEnabled(profilingEnabled),
49  m_OutputNetworkDetailsMethod(detailsMethod),
50  m_InputSource(inputSource),
51  m_OutputSource(outputSource),
52  m_ExternalMemoryManagementEnabled(externalMemoryManagementEnabled)
53  {}
54 
55  const bool m_ProfilingEnabled;
56 
58 
61 
63 
64  virtual ~INetworkProperties() {}
65 };
66 
67 class IRuntime
68 {
69 public:
71  {
73  : m_GpuAccTunedParameters(nullptr)
74  , m_EnableGpuProfiling(false)
76  , m_ProtectedMode(false)
79  {}
80 
81  /// If set, uses the GpuAcc tuned parameters from the given object when executing GPU workloads.
82  /// It will also be updated with new tuned parameters if it is configured to do so.
83  std::shared_ptr<IGpuAccTunedParameters> m_GpuAccTunedParameters;
84 
85  /// Setting this flag will allow the user to obtain GPU profiling information from the runtime.
87 
88  /// Setting this value will override the paths set by the DYNAMIC_BACKEND_PATHS compiler directive
89  /// Only a single path is allowed for the override
90  /// It defines the path to search for any [dynamic backend libraries](src/dynamic/README.md).
91  std::string m_DynamicBackendsPath;
92 
93  /// Setting this flag will allow the user to create the Runtime in protected mode.
94  /// It will run all the inferences on protected memory and will make sure that
95  /// INetworkProperties::m_ImportEnabled set to true with MemorySource::DmaBufProtected option
96  /// This requires that the backend supports Protected Memory and has an allocator capable of
97  /// allocating Protected Memory associated with it.
99 
100  /// @brief A map to define a custom memory allocator for specific backend Ids.
101  ///
102  /// @details A Custom Allocator is used for allocation of working memory in the backends.
103  /// Set this if you need to take control of how memory is allocated on a backend. Required for
104  /// Protected Mode in order to correctly allocate Protected Memory
105  ///
106  /// @note Only supported for GpuAcc
107  std::map<BackendId, std::shared_ptr<ICustomAllocator>> m_CustomAllocatorMap;
108 
109  /// @brief A map to define a custom memory optimizer strategy for specific backend Ids.
110  ///
111  /// @details A Memory Optimizer Strategy provides a solution to an abstract representation of
112  /// a network's memory requirements. This can also be used to return a pre-computed solution
113  /// for a specific network. Set this if you want to implement a Custom Memory Optimizer Strategy
114  /// for a given backend.
115  std::map<BackendId, std::shared_ptr<IMemoryOptimizerStrategy>> m_MemoryOptimizerStrategyMap;
116 
118  {
120  : m_EnableProfiling(false)
121  , m_TimelineEnabled(false)
124  , m_FileOnly(false)
126  , m_FileFormat("binary")
128  {}
129 
130  /// Indicates whether external profiling is enabled or not.
132  /// Indicates whether external timeline profiling is enabled or not.
134  /// Path to a file in which outgoing timeline profiling messages will be stored.
136  /// Path to a file in which incoming timeline profiling messages will be stored.
138  /// Enable profiling output to file only.
140  /// The duration at which captured profiling messages will be flushed.
141  uint32_t m_CapturePeriod;
142  /// The format of the file used for outputting profiling data.
143  std::string m_FileFormat;
144  std::vector<arm::pipe::ILocalPacketHandlerSharedPtr> m_LocalPacketHandlers;
145  };
147 
148  /// Pass backend specific options.
149  ///
150  /// For example, to enable GpuAcc tuning add the following
151  /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~.cpp
152  /// m_BackendOption.emplace_back(
153  /// BackendOptions{"GpuAcc",
154  /// {
155  /// {"TuningLevel", 2},
156  /// {"TuningFile", filename}
157  /// {"MemoryOptimizerStrategy", strategyname}
158  /// }
159  /// });
160  /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
161  /// Execute representative workloads through the runtime to generate tuning data.
162  /// The tuning file is written once the runtime is destroyed
163 
164  /// To execute with the tuning data, start up with just the tuning file specified.
165  /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~.cpp
166  /// m_BackendOption.emplace_back(
167  /// BackendOptions{"GpuAcc",
168  /// {
169  /// {"TuningFile", filename}
170  /// }
171  /// });
172  /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
173 
174  /// The following backend options are available:
175  /// AllBackends:
176  /// "MemoryOptimizerStrategy" : string [stategynameString]
177  /// (Existing Memory Optimizer Strategies: ConstantMemoryStrategy)
178  /// GpuAcc:
179  /// "TuningLevel" : int [0..3] (0=UseOnly(default) | 1=RapidTuning | 2=NormalTuning | 3=ExhaustiveTuning)
180  /// "TuningFile" : string [filenameString]
181  /// "KernelProfilingEnabled" : bool [true | false]
182  std::vector<BackendOptions> m_BackendOptions;
183  };
184 
185  static IRuntime* CreateRaw(const CreationOptions& options);
186  static IRuntimePtr Create(const CreationOptions& options);
187  static void Destroy(IRuntime* runtime);
188 
189  /// Loads a complete network into the IRuntime.
190  /// @param [out] networkIdOut - Unique identifier for the network is returned in this reference.
191  /// @param [in] network - Complete network to load into the IRuntime.
192  /// The runtime takes ownership of the network once passed in.
193  /// @return armnn::Status
194  Status LoadNetwork(NetworkId& networkIdOut, IOptimizedNetworkPtr network);
195 
196  /// Load a complete network into the IRuntime.
197  /// @param [out] networkIdOut Unique identifier for the network is returned in this reference.
198  /// @param [in] network Complete network to load into the IRuntime.
199  /// @param [out] errorMessage Error message if there were any errors.
200  /// The runtime takes ownership of the network once passed in.
201  /// @return armnn::Status
202  Status LoadNetwork(NetworkId& networkIdOut,
203  IOptimizedNetworkPtr network,
204  std::string& errorMessage);
205 
206  Status LoadNetwork(NetworkId& networkIdOut,
207  IOptimizedNetworkPtr network,
208  std::string& errorMessage,
209  const INetworkProperties& networkProperties);
210 
211  TensorInfo GetInputTensorInfo(NetworkId networkId, LayerBindingId layerId) const;
212  TensorInfo GetOutputTensorInfo(NetworkId networkId, LayerBindingId layerId) const;
213 
214  /// ImportInputs separates the importing and mapping of InputTensors from network execution.
215  /// Allowing for a set of InputTensors to be imported and mapped once, but used in execution many times.
216  /// This function is not thread safe and must not be used while other threads are calling Execute().
217  /// No exceptions are thrown for failed imports. It is the caller's responsibility to check whether
218  /// tensors have been successfully imported by comparing returned ids with those passed in the InputTensors.
219  /// Whether a tensor can be imported or not is backend specific.
220  std::vector<ImportedInputId> ImportInputs(NetworkId networkId, const InputTensors& inputTensors,
221  MemorySource forceImportMemorySource = MemorySource::Undefined);
222 
223  /// ImportOutputs separates the importing and mapping of OutputTensors from network execution.
224  /// Allowing for a set of OutputTensors to be imported and mapped once, but used in execution many times.
225  /// This function is not thread safe and must not be used while other threads are calling Execute().
226  /// No exceptions are thrown for failed imports. It is the caller's responsibility to check whether
227  /// tensors have been successfully imported by comparing returned ids with those passed in the OutputTensors.
228  /// Whether a tensor can be imported or not is backend specific.
229  std::vector<ImportedOutputId> ImportOutputs(NetworkId networkId, const OutputTensors& outputTensors,
230  MemorySource forceImportMemorySource = MemorySource::Undefined);
231 
232  /// Evaluates a network using input in inputTensors and outputs filled into outputTensors
233  Status EnqueueWorkload(NetworkId networkId,
234  const InputTensors& inputTensors,
235  const OutputTensors& outputTensors,
236  std::vector<ImportedInputId> preImportedInputIds = {},
237  std::vector<ImportedOutputId> preImportedOutputIds = {});
238 
239  /// Unloads a network from the IRuntime.
240  /// At the moment this only removes the network from the m_Impl->m_Network.
241  /// This might need more work in the future to be AndroidNN compliant.
242  /// @param [in] networkId - Unique identifier for the network to be unloaded. Generated in LoadNetwork().
243  /// @return armnn::Status
244  Status UnloadNetwork(NetworkId networkId);
245 
246  const IDeviceSpec& GetDeviceSpec() const;
247 
248  /// Gets the profiler corresponding to the given network id.
249  /// @param networkId The id of the network for which to get the profile.
250  /// @return A pointer to the requested profiler, or nullptr if not found.
251  const std::shared_ptr<IProfiler> GetProfiler(NetworkId networkId) const;
252 
253  /// Registers a callback function to debug layers performing custom computations on intermediate tensors.
254  /// @param networkId The id of the network to register the callback.
255  /// @param func callback function to pass to the debug layer.
256  void RegisterDebugCallback(NetworkId networkId, const DebugCallbackFunction& func);
257 
258 protected:
259  IRuntime();
260  IRuntime(const IRuntime::CreationOptions& options);
262 
263  std::unique_ptr<RuntimeImpl> pRuntimeImpl;
264 };
265 
266 
267 /// The following API is replaced by the backend options API.
268 using IGpuAccTunedParametersPtr = std::shared_ptr<IGpuAccTunedParameters>;
269 
270 /// Manages a set of GpuAcc parameters which have been tuned for maximum performance.
271 /// Passes an instance of this object to the IRuntime::Create() method (via IRuntime::CreationOptions) to use it
272 /// for all GPU workload execution.
273 ///
274 /// Can be created in two modes:
275 /// - In UseTunedParameters mode, the parameters stored in this object are used to execute GPU workloads.
276 /// - In UpdateTunedParameters mode, additionally, whenever a GPU workload is executed for the first time, the
277 /// optimum parameters will be found and stored in this object. WARNING - This tuning can be slow.
278 ///
279 /// The parameters can be loaded from and saved to a file so that you can first run a slow initial read-write
280 /// execution, save the parameters for later and then run fast read-only executions using the optimised parameters.
282 {
283 public:
284  enum class Mode
285  {
288  };
289 
290  enum class TuningLevel
291  {
292  Rapid = 1,
293  Normal = 2,
294  Exhaustive = 3
295  };
296 
297  /// Creates an IClTunedParameters with the given mode.
298  /// @{
299  static IGpuAccTunedParameters* CreateRaw(Mode mode, TuningLevel tunerMode);
300  static IGpuAccTunedParametersPtr Create(Mode mode, TuningLevel tunerMode);
301  /// @}
302  static void Destroy(IGpuAccTunedParameters* params);
303 
304  /// Loads an existing set of tuned parameters from the given file.
305  /// If there is an error loading the file, an armnn::Exception is thrown.
306  virtual void Load(const char* filename) = 0;
307 
308  /// Saves the current set of tuned parameters to the given file.
309  /// If there is an error saving to the file, an armnn::Exception is thrown.
310  virtual void Save(const char* filename) const = 0;
311 
312 protected:
314 };
315 
316 } // namespace armnn
Manages a set of GpuAcc parameters which have been tuned for maximum performance.
Definition: IRuntime.hpp:282
virtual void Save(const char *filename) const =0
Saves the current set of tuned parameters to the given file.
static void Destroy(IGpuAccTunedParameters *params)
virtual void Load(const char *filename)=0
Loads an existing set of tuned parameters from the given file.
static IGpuAccTunedParametersPtr Create(Mode mode, TuningLevel tunerMode)
static IGpuAccTunedParameters * CreateRaw(Mode mode, TuningLevel tunerMode)
Creates an IClTunedParameters with the given mode.
const IDeviceSpec & GetDeviceSpec() const
Definition: Runtime.cpp:119
TensorInfo GetInputTensorInfo(NetworkId networkId, LayerBindingId layerId) const
Definition: Runtime.cpp:82
static IRuntime * CreateRaw(const CreationOptions &options)
Definition: Runtime.cpp:47
TensorInfo GetOutputTensorInfo(NetworkId networkId, LayerBindingId layerId) const
Definition: Runtime.cpp:87
std::vector< ImportedOutputId > ImportOutputs(NetworkId networkId, const OutputTensors &outputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
ImportOutputs separates the importing and mapping of OutputTensors from network execution.
Definition: Runtime.cpp:98
std::unique_ptr< RuntimeImpl > pRuntimeImpl
Definition: IRuntime.hpp:263
void RegisterDebugCallback(NetworkId networkId, const DebugCallbackFunction &func)
Registers a callback function to debug layers performing custom computations on intermediate tensors.
Definition: Runtime.cpp:129
static void Destroy(IRuntime *runtime)
Definition: Runtime.cpp:57
std::vector< ImportedInputId > ImportInputs(NetworkId networkId, const InputTensors &inputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
ImportInputs separates the importing and mapping of InputTensors from network execution.
Definition: Runtime.cpp:92
Status EnqueueWorkload(NetworkId networkId, const InputTensors &inputTensors, const OutputTensors &outputTensors, std::vector< ImportedInputId > preImportedInputIds={}, std::vector< ImportedOutputId > preImportedOutputIds={})
Evaluates a network using input in inputTensors and outputs filled into outputTensors.
Definition: Runtime.cpp:104
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:52
Status UnloadNetwork(NetworkId networkId)
Unloads a network from the IRuntime.
Definition: Runtime.cpp:114
const std::shared_ptr< IProfiler > GetProfiler(NetworkId networkId) const
Gets the profiler corresponding to the given network id.
Definition: Runtime.cpp:124
Status LoadNetwork(NetworkId &networkIdOut, IOptimizedNetworkPtr network)
Loads a complete network into the IRuntime.
Definition: Runtime.cpp:62
std::shared_ptr< ILocalPacketHandler > ILocalPacketHandlerSharedPtr
Definition: IRuntime.hpp:26
Copyright (c) 2021 ARM Limited and Contributors.
MemorySource
Define the Memory Source to reduce copies.
Definition: Types.hpp:246
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:39
std::function< void(LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle)> DebugCallbackFunction
Define the type of callback for the Debug layer to call.
Definition: Types.hpp:400
std::shared_ptr< IGpuAccTunedParameters > IGpuAccTunedParametersPtr
The following API is replaced by the backend options API.
Definition: IRuntime.hpp:268
constexpr unsigned int LOWEST_CAPTURE_PERIOD
The lowest performance data capture interval we support is 10 miliseconds.
Definition: Types.hpp:34
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:340
Status
enumeration
Definition: Types.hpp:43
int NetworkId
Definition: IRuntime.hpp:33
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:395
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:394
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition: Types.hpp:311
ProfilingDetailsMethod
Define the behaviour of the internal profiler when outputting network details.
Definition: Types.hpp:72
virtual ~INetworkProperties()
Definition: IRuntime.hpp:64
const MemorySource m_OutputSource
Definition: IRuntime.hpp:60
const bool m_ExternalMemoryManagementEnabled
Definition: IRuntime.hpp:62
const MemorySource m_InputSource
Definition: IRuntime.hpp:59
const bool m_ProfilingEnabled
Definition: IRuntime.hpp:55
INetworkProperties(MemorySource inputSource, MemorySource outputSource, bool profilingEnabled=false, ProfilingDetailsMethod detailsMethod=ProfilingDetailsMethod::Undefined, bool externalMemoryManagementEnabled=false)
Definition: IRuntime.hpp:43
const ProfilingDetailsMethod m_OutputNetworkDetailsMethod
Definition: IRuntime.hpp:57
uint32_t m_CapturePeriod
The duration at which captured profiling messages will be flushed.
Definition: IRuntime.hpp:141
bool m_EnableProfiling
Indicates whether external profiling is enabled or not.
Definition: IRuntime.hpp:131
std::string m_IncomingCaptureFile
Path to a file in which incoming timeline profiling messages will be stored.
Definition: IRuntime.hpp:137
bool m_TimelineEnabled
Indicates whether external timeline profiling is enabled or not.
Definition: IRuntime.hpp:133
std::string m_OutgoingCaptureFile
Path to a file in which outgoing timeline profiling messages will be stored.
Definition: IRuntime.hpp:135
std::vector< arm::pipe::ILocalPacketHandlerSharedPtr > m_LocalPacketHandlers
Definition: IRuntime.hpp:144
bool m_FileOnly
Enable profiling output to file only.
Definition: IRuntime.hpp:139
std::string m_FileFormat
The format of the file used for outputting profiling data.
Definition: IRuntime.hpp:143
bool m_EnableGpuProfiling
Setting this flag will allow the user to obtain GPU profiling information from the runtime.
Definition: IRuntime.hpp:86
ExternalProfilingOptions m_ProfilingOptions
Definition: IRuntime.hpp:146
std::shared_ptr< IGpuAccTunedParameters > m_GpuAccTunedParameters
If set, uses the GpuAcc tuned parameters from the given object when executing GPU workloads.
Definition: IRuntime.hpp:83
std::map< BackendId, std::shared_ptr< IMemoryOptimizerStrategy > > m_MemoryOptimizerStrategyMap
A map to define a custom memory optimizer strategy for specific backend Ids.
Definition: IRuntime.hpp:115
std::map< BackendId, std::shared_ptr< ICustomAllocator > > m_CustomAllocatorMap
A map to define a custom memory allocator for specific backend Ids.
Definition: IRuntime.hpp:107
bool m_ProtectedMode
Setting this flag will allow the user to create the Runtime in protected mode.
Definition: IRuntime.hpp:98
std::string m_DynamicBackendsPath
Setting this value will override the paths set by the DYNAMIC_BACKEND_PATHS compiler directive Only a...
Definition: IRuntime.hpp:91
std::vector< BackendOptions > m_BackendOptions
Pass backend specific options.
Definition: IRuntime.hpp:182