ArmNN
 25.11
Loading...
Searching...
No Matches
LoadedNetwork.cpp
Go to the documentation of this file.
1//
2// Copyright © 2017-2024 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6#include "LoadedNetwork.hpp"
7#include "Layer.hpp"
8#include "Graph.hpp"
9#include "Markers.hpp"
10#include "Profiling.hpp"
11#include "HeapProfiling.hpp"
12
15#include <armnn/Logging.hpp>
16
21
23
25
26#include <common/include/Processes.hpp>
27
28#include <fmt/format.h>
29
30namespace armnn
31{
32
33using namespace std;
34using namespace arm::pipe;
35
36namespace
37{
38
39template <typename ExceptionType>
40std::string ToErrorMessage(const char * prefix, const ExceptionType & error)
41{
42 std::stringstream ss;
43 ss << prefix << " " << error.what();
44 return ss.str();
45}
46
47void AddLayerStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
48 const Layer& layer,
49 ProfilingGuid networkGuid)
50{
51 // Add layer to the post-optimisation network structure
52 std::string layerName = layer.GetNameStr().empty() ? "<Unnamed>" : layer.GetNameStr();
53 timelineUtils->CreateNamedTypedChildEntity(layer.GetGuid(),
54 networkGuid,
55 layerName,
56 LabelsAndEventClasses::LAYER_GUID);
57 for (auto&& input : layer.GetInputSlots())
58 {
59 const IOutputSlot* source = input.GetConnectedOutputSlot();
60 if (!source)
61 {
62 throw armnn::NullPointerException("Null source found on input to layer \"" + layerName + "\".");
63 }
64 timelineUtils->CreateConnectionRelationship(ProfilingRelationshipType::RetentionLink,
65 source->GetOwningLayerGuid(),
66 layer.GetGuid());
67 }
68}
69
70void AddWorkloadStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
71 std::unique_ptr<IWorkload>& workload,
72 const Layer& layer)
73{
74 // Add workload to the post-optimisation network structure
75 timelineUtils->CreateTypedEntity(workload->GetGuid(), LabelsAndEventClasses::WORKLOAD_GUID);
76 timelineUtils->MarkEntityWithLabel(workload->GetGuid(),
77 layer.GetBackendId().Get(),
78 LabelsAndEventClasses::BACKENDID_GUID);
79
80 // Link the workload to the layer
81 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
82 layer.GetGuid(),
83 workload->GetGuid(),
84 LabelsAndEventClasses::CHILD_GUID);
85}
86
87} // anonymous
88
89/**
90 * This function performs a sanity check to ensure that the combination of input and output memory source matches the
91 * values for importEnabled and exportEnabled that were specified during optimization. During optimization the tensor
92 * handle factories are chosen based on whether import and export are enabled. If the user then specifies something
93 * incompatible here it can lead to problems.
94 *
95 * @param optimizedOptions
96 * @param networkProperties
97 */
98void ValidateSourcesMatchOptimizedNetwork(std::vector<BackendOptions> optimizedOptions,
99 const INetworkProperties& networkProperties)
100{
101 // Find the "Global" backend options. During the optimize phase the values of importEnabled and exportEnabled are
102 // added as backend options.
103 const vector<BackendOptions>::iterator& backendItr =
104 find_if(optimizedOptions.begin(), optimizedOptions.end(), [](const BackendOptions& backend) {
105 if (backend.GetBackendId().Get() == "Global")
106 {
107 return true;
108 }
109 else
110 {
111 return false;
112 }
113 });
114 bool importEnabled = false;
115 bool exportEnabled = false;
116 if (backendItr != optimizedOptions.end())
117 {
118 // Find the importEnabled and exportEnabled values.
119 for (size_t i = 0; i < backendItr->GetOptionCount(); i++)
120 {
121 const BackendOptions::BackendOption& option = backendItr->GetOption(i);
122 if (option.GetName() == "ImportEnabled")
123 {
124 importEnabled = option.GetValue().AsBool();
125 }
126 if (option.GetName() == "ExportEnabled")
127 {
128 exportEnabled = option.GetValue().AsBool();
129 }
130 }
131 }
132
133 // Now that we have values for import and export compare them to the MemorySource variables.
134 // Any value of MemorySource that's not "Undefined" implies that we need to do an import of some kind.
135 if ((networkProperties.m_InputSource == MemorySource::Undefined && importEnabled) ||
136 (networkProperties.m_InputSource != MemorySource::Undefined && !importEnabled))
137 {
138 auto message = fmt::format("The input memory source specified, '{0}',", networkProperties.m_InputSource);
139 if (!importEnabled)
140 {
141 message.append(" requires that memory import be enabled. However, "
142 "it was disabled when this network was optimized.");
143 }
144 else
145 {
146 message.append(" requires that memory import be disabled. However, "
147 "it was enabled when this network was optimized.");
148 }
149 throw InvalidArgumentException(message);
150 }
151
152 if ((networkProperties.m_OutputSource == MemorySource::Undefined && exportEnabled) ||
153 (networkProperties.m_OutputSource != MemorySource::Undefined && !exportEnabled))
154 {
155 auto message = fmt::format("The output memory source specified, '{0}',", networkProperties.m_OutputSource);
156 if (!exportEnabled)
157 {
158 message.append(" requires that memory export be enabled. However, "
159 "it was disabled when this network was optimized.");
160 }
161 else
162 {
163 message.append(" requires that memory export be disabled. However, "
164 "it was enabled when this network was optimized.");
165 }
166 throw InvalidArgumentException(message);
167 }
168} // anonymous
169
170std::unique_ptr<LoadedNetwork> LoadedNetwork::MakeLoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
171 std::string& errorMessage,
172 const INetworkProperties& networkProperties,
173 arm::pipe::IProfilingService* profilingService)
174{
175 std::unique_ptr<LoadedNetwork> loadedNetwork;
176
177 auto Fail = [&](const std::exception& error) -> std::unique_ptr<LoadedNetwork>
178 {
179 errorMessage = ToErrorMessage("An error occurred when preparing the network workloads: ", error);
180 ARMNN_LOG(error) << errorMessage;
181
182 return std::unique_ptr<LoadedNetwork>();
183 };
184
185 try
186 {
187 loadedNetwork.reset(new LoadedNetwork(std::move(net), networkProperties, profilingService));
188 }
189 catch (const armnn::RuntimeException& error)
190 {
191 return Fail(error);
192 }
193 catch (const armnn::Exception& error)
194 {
195 return Fail(error);
196 }
197 catch (const std::runtime_error& error)
198 {
199 return Fail(error);
200 }
201
202 return loadedNetwork;
203}
204
205LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
206 const INetworkProperties& networkProperties,
207 arm::pipe::IProfilingService* profilingService) :
208 m_OptimizedNetwork(std::move(net)),
209 m_NetworkProperties(networkProperties),
210 m_TensorHandleFactoryRegistry(),
211 m_ProfilingService(profilingService)
212{
214 // Get the profiler and register it for the current thread.
215 const std::shared_ptr<IProfiler>& profiler = m_OptimizedNetwork->GetProfiler();
217
218 profiler->EnableProfiling(networkProperties.m_ProfilingEnabled);
219
220 profiler->EnableNetworkDetailsToStdOut(networkProperties.m_OutputNetworkDetailsMethod);
221
222 // We need to check that the memory sources match up with the values of import and export specified during the
223 // optimize phase. If they don't this will throw an exception.
224 ValidateSourcesMatchOptimizedNetwork(m_OptimizedNetwork.get()->pOptimizedNetworkImpl->GetModelOptions(),
225 m_NetworkProperties);
226
227 //First create tensor handlers, backends and workload factories.
228 //Handlers are created before workloads are.
229 //Because workload creation can modify some of the handlers,
230 //(for example the splitter and concat layers).
231
232 bool useExternalMemoryManager = false;
233 bool useInternalMemoryManager = false;
234 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
235 // Ensure Topological order
236 order.SetLayersOutOfOrder();
237 order.TopologicalSort();
238
239 m_IsInputImported = std::vector<bool>(order.GetNumInputs(), false);
240 m_IsOutputImported = std::vector<bool>(order.GetNumOutputs(), false);
241
242 for (auto&& layer : order)
243 {
244 auto const& backendId = layer->GetBackendId();
245 if (m_Backends.count(backendId) == 0)
246 {
247 auto createBackend = BackendRegistryInstance().GetFactory(backendId);
248 auto it = m_Backends.emplace(std::make_pair(backendId, createBackend()));
249
250 IBackendInternal* backend = it.first->second.get();
251
252 m_SupportsExternallyManagedMemory[backend->GetId()] = false;
253 useInternalMemoryManager = true;
254
255 if (HasMatchingCapability(BackendOptions::BackendOption{"ExternallyManagedMemory", true},
256 backend->GetCapabilities())
257 && (m_NetworkProperties.m_ExternalMemoryManagementEnabled))
258 {
259 m_SupportsExternallyManagedMemory[backend->GetId()] = true;
260 useExternalMemoryManager = true;
261 useInternalMemoryManager = false;
262 }
263
265 if (backend->SupportsTensorAllocatorAPI())
266 {
267 workloadFactory = backend->CreateWorkloadFactory(
268 m_TensorHandleFactoryRegistry,
269 m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions(),
270 static_cast<MemorySourceFlags>(m_NetworkProperties.m_InputSource),
271 static_cast<MemorySourceFlags>(m_NetworkProperties.m_OutputSource));
272 }
273 else
274 {
275 m_BackendMemoryMangers.emplace_back(backend->CreateMemoryManager());
276 workloadFactory = backend->CreateWorkloadFactory(
277 m_BackendMemoryMangers.back(), m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions());
278 }
279 m_WorkloadFactories[backendId ] = std::move(workloadFactory);
280 }
281 }
282
283 for (auto&& layer : order)
284 {
285 auto& workloadFactory = GetWorkloadFactory(*layer);
286 bool supportsExternalManager = m_SupportsExternallyManagedMemory[layer->GetBackendId()];
287
288 switch (layer->GetType())
289 {
290 case LayerType::Input:
292 {
293 // If IsImportEnabled is true then we need to set IsMemoryManaged
294 // to false when creating TensorHandles
295 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
296 workloadFactory,
297 !supportsExternalManager &&
298 (m_NetworkProperties.m_InputSource == MemorySource::Undefined));
299 break;
300 }
302 {
303 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, true);
304 break;
305 }
306 default:
307 {
308 // Look for a layer with 1 OutputSlot which has 1 connection and that connection is an Output Layer
309 // If Export is enabled disable memory management so we can export, otherwise we do a copy
310 if ((layer->GetNumOutputSlots() == 1) &&
311 (layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
312 (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() == LayerType::Output))
313 {
314 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
315 workloadFactory,
316 !supportsExternalManager &&
317 (m_NetworkProperties.m_OutputSource == MemorySource::Undefined));
318 }
319 else
320 {
321 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
322 workloadFactory,
323 !supportsExternalManager);
324 }
325 }
326 }
327 }
328
329 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
330 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
331 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
332 if (timelineUtils)
333 {
334 timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
335 // Mark the network with a start of life event
336 timelineUtils->RecordEvent(networkGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
337 // and with the process ID
338 int processID = arm::pipe::GetCurrentProcessId();
339 std::stringstream ss;
340 ss << processID;
341 timelineUtils->MarkEntityWithLabel(networkGuid, ss.str(), LabelsAndEventClasses::PROCESS_ID_GUID);
342 }
343
344 std::vector<IWorkload*> ConstWorkloads;
345
346 //Then create workloads.
347 {
348 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_CreateWorkloads");
349 for (auto&& layer: order)
350 {
351 if (timelineUtils)
352 {
353 // Add layer to the post-optimisation network structure
354 AddLayerStructure(timelineUtils, *layer, networkGuid);
355 }
356
357 const IWorkloadFactory& workloadFactory = GetWorkloadFactory(*layer);
358
359 switch (layer->GetType())
360 {
361 case LayerType::Input:
363 {
364 // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
365 break;
366 }
367 default:
368 {
369 auto workload = layer->CreateWorkload(workloadFactory);
370
371 if (!workload)
372 {
373 const char* const layerName =
374 layer->GetNameStr().length() != 0 ? layer->GetName() : "<Unnamed>";
375 throw InvalidArgumentException(
376 fmt::format("No workload created for layer (name: '{0}' type: '{1}') (compute '{2}')",
377 layerName, static_cast<int>(layer->GetType()), layer->GetBackendId().Get()
378 ));
379 }
380
381 if (timelineUtils)
382 {
383 // Add workload to the post-optimisation network structure
384 AddWorkloadStructure(timelineUtils, workload, *layer);
385 }
386
387 m_WorkloadQueue.emplace_back(std::move(workload));
388
389 if (layer->GetType() == LayerType::Constant)
390 {
391 // Place the Constant Workloads into a queue so that they can be executed first
392 ConstWorkloads.emplace_back(m_WorkloadQueue.back().get());
393 }
394
395 // release the constant data in the layer.
396 layer->ReleaseConstantData();
397 break;
398 }
399 }
400 }
401 }
402
403 // Gather information about workloads for inputs & outputs
404 if (m_WorkloadQueue.size() != 0)
405 {
406 const int noOfInputs = armnn::numeric_cast<int>(order.GetNumInputs());
407
408 // Get indices of all workloads connected to each input and
409 // check if they support tensor handle replacement
410 for (const BindableLayer* layer: order.GetInputLayers())
411 {
412 const auto bindingId = layer->GetBindingId();
413
414 bool supportsReplacement = true;
415
416 for (const auto inputSlot: layer->GetOutputSlot(0).GetConnections())
417 {
418 auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(inputSlot->GetOwningLayer()));
419 workloadIndex -= noOfInputs;
420
421 m_InputWorkloadSlotPairs[bindingId].emplace_back(WorkloadIndices{
422 armnn::numeric_cast<unsigned int>(workloadIndex), inputSlot->GetSlotIndex()});
423
424 // Avoid if input is connected directly to an output
425 if (inputSlot->GetOwningLayer().GetType() != LayerType::Output)
426 {
427 auto workload = m_WorkloadQueue[m_InputWorkloadSlotPairs[bindingId].back().m_WorkloadIndex].get();
428 supportsReplacement &= workload->SupportsTensorHandleReplacement();
429 }
430 }
431
432 ITensorHandleFactory::FactoryId factoryId = layer->GetOutputSlot(0).GetTensorHandleFactoryId();
433 // Get matching import factory Id
434 ITensorHandleFactory::FactoryId importFactoryId =
435 m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId);
436
437 ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.GetFactory(importFactoryId);
438
439 if (supportsReplacement && importFactory)
440 {
441 m_PreImportedInputHandles.emplace_back(
442 bindingId, importFactory->CreateTensorHandle(layer->GetOutputSlot(0).GetTensorInfo(), false));
443 }
444 else
445 {
446 m_PreImportedInputHandles.emplace_back(bindingId, nullptr);
447 }
448 }
449
450 // Get indices of all workloads connected to each output and
451 // check if they support tensor handle replacement
452 for (const BindableLayer* layer: order.GetOutputLayers())
453 {
454 const auto bindingId = layer->GetBindingId();
455
456 const auto outputSlot = layer->GetInputSlot(0).GetConnectedOutputSlot();
457 auto& indices = m_OutputWorkloadSlotPairs[bindingId];
458
459 // Avoid if output is connected directly to an input
460 if (outputSlot->GetOwningLayer().GetType() != LayerType::Input)
461 {
462 auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(outputSlot->GetOwningLayer()));
463 workloadIndex -= noOfInputs;
464
465 indices.m_OutputSlotIndices = WorkloadIndices{numeric_cast<unsigned int>(workloadIndex),
466 outputSlot->CalculateIndexOnOwner()};
467
468 bool supportsReplacement = true;
469 auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
470 supportsReplacement &= outputWorkload->SupportsTensorHandleReplacement();
471
472 for (auto &inputSlot: outputSlot->GetConnections())
473 {
474 if (inputSlot->GetOwningLayer().GetType() != LayerType::Output)
475 {
476 auto inWorkloadIndex = std::distance(order.begin(),
477 order.GetPosInGraph(inputSlot->GetOwningLayer()));
478 inWorkloadIndex -= noOfInputs;
479 indices.m_InputSlotIndices.emplace_back(
480 WorkloadIndices{numeric_cast<unsigned int>(inWorkloadIndex),
481 inputSlot->GetSlotIndex()});
482 auto inputWorkload = m_WorkloadQueue[indices.m_InputSlotIndices.back().m_WorkloadIndex].get();
483 supportsReplacement &= inputWorkload->SupportsTensorHandleReplacement();
484 }
485 }
486
487 ITensorHandleFactory::FactoryId factoryId = outputSlot->GetTensorHandleFactoryId();
488 // Get matching import factory Id
489 ITensorHandleFactory::FactoryId importFactoryId =
490 m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId);
491 ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.GetFactory(importFactoryId);
492
493 if (supportsReplacement && importFactory)
494 {
495 m_PreImportedOutputHandles.emplace_back(
496 bindingId, importFactory->CreateTensorHandle(outputSlot->GetTensorInfo(), false));
497 }
498 else
499 {
500 m_PreImportedOutputHandles.emplace_back(bindingId, nullptr);
501 }
502 }
503 }
504 }
505
506 for (auto&& workloadFactory : m_WorkloadFactories)
507 {
508 workloadFactory.second->AfterWorkloadsCreated();
509 }
510
511 if (timelineUtils)
512 {
513 // Commit to send the post-optimisation network structure
514 timelineUtils->Commit();
515 }
516
517 if (useExternalMemoryManager)
518 {
519 CreateMemoryProfile();
520 auto backendStrategyMap = BackendRegistryInstance().GetMemoryOptimizerStrategies();
521 for (auto& backendMemoryProfile : m_MemBlockMap)
522 {
523 const BackendId& backendId = backendMemoryProfile.first;
524 if (backendStrategyMap.find(backendId) != backendStrategyMap.end())
525 {
526 m_MemBinMap[backendId] = backendStrategyMap[backendId]->Optimize(backendMemoryProfile.second);
527 }
528 else
529 {
530 m_MemBinMap[backendId] = m_ConstantStrategy->Optimize(backendMemoryProfile.second);
531 }
532 }
533 m_ExternalMemoryManager = CreateExternalMemoryManger(m_TensorMemory);
534
535 // Sort m_TensorMemory, so it's order matches m_Tensorhandles
536 std::sort(m_TensorMemory.begin(), m_TensorMemory.end(),
537 [](const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& lhs,
538 const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& rhs)
539 {
540 return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
541 });
542 }
543
544
545 // Now that the intermediate tensor memory has been set-up,
546 // do any post allocation configuration for each workload.
547
548 if (useInternalMemoryManager)
549 {
550 // Set up memory.
551 m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().AllocateDynamicBuffers();
552 }
553
554 if (useExternalMemoryManager)
555 {
556 AllocateAndExecuteConstantWorkloads();
557 }
558
559 for (const auto& workload : m_WorkloadQueue)
560 {
561 workload->PostAllocationConfigure();
562 }
563
564 // Execute all constant layer workloads
565 for (auto workload: ConstWorkloads)
566 {
567 workload->Execute();
568 }
570}
571
572void LoadedNetwork::AllocateAndExecuteConstantWorkloads()
573{
574 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_AllocateAndExecuteConstants");
575 for (auto& pair : m_ConstantWorkloads)
576 {
577 auto tensorHandle = m_ConstantTensorHandles[pair.first];
578 tensorHandle->Allocate();
579 pair.second->Execute();
580 }
581}
582
583void LoadedNetwork::SendNetworkStructure(arm::pipe::IProfilingService& profilingService)
584{
585 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_SendNetworkStructure");
586 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
587 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
588
589 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
590 TimelineUtilityMethods::GetTimelineUtils(profilingService);
591
592 timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
593
594 for (auto&& layer : order)
595 {
596 // Add layer to the post-optimisation network structure
597 AddLayerStructure(timelineUtils, *layer, networkGuid);
598 switch (layer->GetType())
599 {
600 case LayerType::Input:
602 {
603 // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
604 break;
605 }
606 default:
607 {
608 for (auto& workload : m_WorkloadQueue)
609 {
610 // Add workload to the post-optimisation network structure
611 AddWorkloadStructure(timelineUtils, workload, *layer);
612 }
613 break;
614 }
615 }
616 }
617 // Commit to send the post-optimisation network structure
618 timelineUtils->Commit();
619}
620
622{
623 return m_OptimizedNetwork->GetGuid();
624}
625
627{
628 for (auto&& inputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetInputLayers())
629 {
630 if (inputLayer->GetNumOutputSlots() != 1)
631 {
632 throw armnn::GraphValidationException("Input layer should have exactly 1 output slot");
633 }
634
635 if (inputLayer->GetBindingId() == layerId)
636 {
637 return inputLayer->GetOutputSlot(0).GetTensorInfo();
638 }
639 }
640
641 throw InvalidArgumentException(fmt::format("No input layer is associated with id {}", layerId));
642}
643
645{
646 for (auto&& outputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetOutputLayers())
647 {
648 if (outputLayer->GetNumInputSlots() != 1)
649 {
650 throw armnn::GraphValidationException("Output layer should have exactly 1 input slot");
651 }
652
653 if (!outputLayer->GetInputSlot(0).GetConnection())
654 {
655 throw armnn::GraphValidationException("Input slot on Output layer must be connected");
656 }
657
658 if (outputLayer->GetBindingId() == layerId)
659 {
660 return outputLayer->GetInputSlot(0).GetTensorInfo();
661 }
662 }
663
664 throw InvalidArgumentException(fmt::format("No output layer is associated with id {}", layerId));
665}
666
667const IWorkloadFactory& LoadedNetwork::GetWorkloadFactory(const Layer& layer) const
668{
669 const IWorkloadFactory* workloadFactory = nullptr;
670
671 auto it = m_WorkloadFactories.find(layer.GetBackendId());
672 if (it == m_WorkloadFactories.end())
673 {
674 throw RuntimeException(fmt::format("No workload factory for {0} to be used for layer: {1}",
675 layer.GetBackendId().Get(),
676 layer.GetNameStr()),
678 }
679
680 workloadFactory = it->second.get();
681
682 if (!workloadFactory)
683 {
684 throw armnn::NullPointerException("No workload factory");
685 }
686
687 return *workloadFactory;
688}
689
690namespace {
691
692// Non-copyable class owning accelerator-specific tensor data.
693class TensorPin
694{
695public:
696 TensorPin(std::unique_ptr<ITensorHandle> handle, const TensorInfo& info, LayerBindingId id)
697 : m_TensorHandle(std::move(handle))
698 , m_TensorInfo(info)
699 , m_Id(id)
700 {
701 }
702
703 ITensorHandle* GetTensorHandle() const { return m_TensorHandle.get(); }
704 const TensorInfo& GetTensorInfo() const { return m_TensorInfo; }
705 LayerBindingId GetBindingId() const { return m_Id; }
706
707private:
708 std::unique_ptr<ITensorHandle> m_TensorHandle;
709 TensorInfo m_TensorInfo;
710 LayerBindingId m_Id;
711};
712
713static const TensorPin& GetTensorPin(LayerBindingId id,
714 const std::vector<TensorPin>& pins,
715 char const* bindingPointDesc)
716{
717 auto it = std::find_if(pins.begin(), pins.end(),
718 [id](const TensorPin& pin)
719 {
720 return pin.GetBindingId() == id;
721 });
722
723 if (it != pins.end())
724 {
725 return *it;
726 }
727 else
728 {
729 throw InvalidArgumentException(fmt::format("No tensor supplied for {0} {1}", bindingPointDesc, id));
730 }
731}
732
733// Stores data that needs to be kept accessible for the entire execution of a workload.
734class WorkloadData
735{
736public:
737 WorkloadData(const InputTensors& inputTensors, const OutputTensors& outputTensors)
738 {
739 m_InputTensorPins.reserve(inputTensors.size());
740 m_OutputTensorPins.reserve(outputTensors.size());
741
742 for (auto inputTensorPair : inputTensors)
743 {
744 auto inputTensor = inputTensorPair.second;
745
746 std::unique_ptr<ITensorHandle> tensorHandle =
747 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),inputTensor.GetMemoryArea());
748 LayerBindingId layerId = inputTensorPair.first;
749
750 m_InputTensorPins.emplace_back(std::move(tensorHandle), inputTensor.GetInfo(), layerId);
751 }
752
753 for (auto outputTensorPair : outputTensors)
754 {
755 auto outputTensor = outputTensorPair.second;
756
757 std::unique_ptr<ITensorHandle> tensorHandle =
758 std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
759 LayerBindingId layerId = outputTensorPair.first;
760
761 m_OutputTensorPins.emplace_back(std::move(tensorHandle), outputTensor.GetInfo(), layerId);
762 }
763 }
764
765 const TensorPin& GetInputTensorPin(LayerBindingId id) const
766 {
767 return GetTensorPin(id, m_InputTensorPins, "input");
768 }
769
770 const TensorPin& GetOutputTensorPin(LayerBindingId id) const
771 {
772 return GetTensorPin(id, m_OutputTensorPins, "output");
773 }
774
775private:
776
777 std::vector<TensorPin> m_InputTensorPins;
778 std::vector<TensorPin> m_OutputTensorPins;
779};
780
781}
782
784 const OutputTensors& outputTensors,
785 std::vector<ImportedInputId> preImportedInputIds,
786 std::vector<ImportedOutputId> preImportedOutputIds)
787{
788 const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
789
790 // Walk graph to determine the order of execution.
791 if (graph.GetNumLayers() < 2)
792 {
793 ARMNN_LOG(warning) << "IRuntime::EnqueueWorkload()::Less than two nodes in graph";
794 return Status::Failure;
795 }
796
797 // Data that must be kept alive for the entire execution of the workload.
798 WorkloadData workloadData(inputTensors, outputTensors);
799
800 // Input tensors can be provided as parameters or pre imported. Either way the number of
801 // tensors should match the number of inputs.
802 if (graph.GetNumInputs() != (inputTensors.size() + preImportedInputIds.size()))
803 {
804 throw InvalidArgumentException("Number of inputs provided does not match network.");
805 }
806
807 // For each input to the network, call EnqueueInput with the data passed by the user.
808 {
810 m_InputQueue.clear();
811 m_InputQueue.reserve(graph.GetNumInputs());
812
813 unsigned int inputIndex = 0;
814 unsigned int importedInputIdIndex = 0;
815 std::sort(preImportedInputIds.begin(), preImportedInputIds.end());
816 for (const BindableLayer* inputLayer : graph.GetInputLayers())
817 {
818 if (importedInputIdIndex < preImportedInputIds.size() &&
819 inputIndex == preImportedInputIds[importedInputIdIndex])
820 {
821 // Only replace tensorhandles if they have not already been replaced
822 if (!m_IsInputImported[inputIndex])
823 {
824 auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
825
826 for (const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
827 {
828 auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
829 workload->ReplaceInputTensorHandle(outputTensorHandle, workloadInfo.m_SlotIndex);
830 }
831 m_IsInputImported[inputIndex] = true;
832 }
833 importedInputIdIndex++;
834 }
835 else
836 {
837 if (m_IsInputImported[inputIndex])
838 {
839 OutputHandler& handler = const_cast<OutputHandler&>(inputLayer->GetOutputHandler(0));
840
841 for (const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
842 {
843 auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
844 workload->ReplaceInputTensorHandle(handler.GetData(), workloadInfo.m_SlotIndex);
845 }
846
847 m_IsInputImported[inputIndex] = false;
848 }
849
850 // InputTensorHandle is not imported yet, process to enqueue input
851 const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
852 EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
853 }
854 inputIndex++;
855 }
856 }
857 // For each output to the network, call EnqueueOutput with the data passed by the user.
858 {
860 m_OutputQueue.clear();
861 m_OutputQueue.reserve(graph.GetNumOutputs());
862
863 if (preImportedOutputIds.size() > graph.GetNumOutputs())
864 {
865 throw InvalidArgumentException("Invalid number of preImportedOutputIds");
866 }
867
868 unsigned int outputIndex = 0;
869 unsigned int importedOutputIdIndex = 0;
870 std::sort(preImportedOutputIds.begin(), preImportedOutputIds.end());
871 for (const BindableLayer* outputLayer : graph.GetOutputLayers())
872 {
873 if (importedOutputIdIndex < preImportedOutputIds.size() &&
874 outputIndex == preImportedOutputIds[importedOutputIdIndex])
875 {
876 // Only replace tensorhandles if they have not already been replaced
877 ITensorHandle* inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
878
879 if (!m_IsOutputImported[outputIndex])
880 {
881 const auto bindingId = outputLayer->GetBindingId();
882 const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
883
884 auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
885
886 outputWorkload->ReplaceOutputTensorHandle(inputTensorHandle,
887 indices.m_OutputSlotIndices.m_SlotIndex);
888
889 for (const auto& workloadInfo: indices.m_InputSlotIndices)
890 {
891 auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
892 inputWorkload->ReplaceInputTensorHandle(inputTensorHandle, workloadInfo.m_SlotIndex);
893 }
894 m_IsOutputImported[outputIndex] = true;
895 }
896
897 if (!inputTensorHandle)
898 {
899 throw armnn::NullPointerException("Data should have been allocated.");
900 }
901
902 MemSyncQueueDescriptor syncDesc;
903 syncDesc.m_Inputs.push_back(inputTensorHandle);
905 info.m_InputTensorInfos.push_back(outputLayer->GetInputSlot(0).GetTensorInfo());
906
907 auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
908 if (!syncWorkload)
909 {
910 throw armnn::NullPointerException("No sync workload created");
911 }
912
913 m_OutputQueue.push_back(std::move(syncWorkload));
914 importedOutputIdIndex++;
915 }
916 else
917 {
918 if (m_IsOutputImported[outputIndex])
919 {
920 const auto bindingId = outputLayer->GetBindingId();
921 const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
922
923 auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
924 const OutputHandler& outputHandler =
925 outputLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOutputHandler();
926
927 outputWorkload->ReplaceOutputTensorHandle(
928 outputHandler.GetData(), indices.m_OutputSlotIndices.m_SlotIndex);
929
930 for (const auto& workloadInfo: indices.m_InputSlotIndices)
931 {
932 auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
933 inputWorkload->ReplaceInputTensorHandle(outputHandler.GetData(), workloadInfo.m_SlotIndex);
934 }
935 m_IsOutputImported[outputIndex] = false;
936 }
937
938 const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
939 // OutputTensorHandle is not imported yet, process to enqueue Output
940 EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
941 }
942 outputIndex++;
943 }
944 }
945
946 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
947 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
948 ProfilingGuid inferenceGuid = m_ProfilingService->GetNextGuid();
949 if (timelineUtils)
950 {
951 // Add inference timeline trace if profiling is enabled.
952 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
953 timelineUtils->CreateTypedEntity(inferenceGuid, LabelsAndEventClasses::INFERENCE_GUID);
954 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
955 networkGuid,
956 inferenceGuid,
957 LabelsAndEventClasses::EXECUTION_OF_GUID);
958 timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
959 }
960
961 bool executionSucceeded = true;
962
963 {
964 if (m_ProfilingService->IsProfilingEnabled())
965 {
966 m_ProfilingService->IncrementCounterValue(INFERENCES_RUN);
967 }
969 ARMNN_SCOPED_HEAP_PROFILING("Executing");
970 executionSucceeded = Execute(timelineUtils, inferenceGuid);
971 }
972
973 if (timelineUtils)
974 {
975 // Add end of life of the inference timeline if profiling is enabled.
976 timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
977 timelineUtils->Commit();
978 }
979
980 return executionSucceeded ? Status::Success : Status::Failure;
981}
982
983void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
984{
985 if (layer.GetType() != LayerType::Input)
986 {
987 throw InvalidArgumentException("EnqueueInput: given layer not an InputLayer");
988 }
989
990 if (tensorHandle == nullptr)
991 {
992 throw InvalidArgumentException("EnqueueInput: tensorHandle must not be NULL");
993 }
994
995 InputQueueDescriptor inputQueueDescriptor;
996 WorkloadInfo info;
997
998 inputQueueDescriptor.m_Inputs.push_back(tensorHandle);
999 info.m_InputTensorInfos.push_back(tensorInfo);
1000
1001 if (layer.GetNumOutputSlots() != 1)
1002 {
1003 throw armnn::GraphValidationException("Can only handle Input Layer with one output");
1004 }
1005
1006 const OutputHandler& handler = layer.GetOutputHandler();
1007 const TensorInfo& outputTensorInfo = handler.GetTensorInfo();
1008 ITensorHandle* outputTensorHandle = handler.GetData();
1009
1010 if (!outputTensorHandle)
1011 {
1012 throw armnn::NullPointerException("Data should have been allocated.");
1013 }
1014
1015 inputQueueDescriptor.m_Outputs.push_back(outputTensorHandle);
1016 info.m_OutputTensorInfos.push_back(outputTensorInfo);
1017
1018 MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
1019 bool needMemCopy = true;
1020 if ((m_NetworkProperties.m_InputSource != MemorySource::Undefined)) // Try import the input tensor
1021 {
1022 if(CheckFlag(importFlags, m_NetworkProperties.m_InputSource))
1023 {
1024 needMemCopy = false;
1025 // This assumes a CPU Tensor handle
1026 void* mem = tensorHandle->Map(false);
1027 if (outputTensorHandle->Import(mem, m_NetworkProperties.m_InputSource))
1028 {
1029 tensorHandle->Unmap();
1030 return; // No need for a workload since the import has been done.
1031 }
1032 tensorHandle->Unmap();
1033 throw MemoryImportException("EnqueueInput: Memory Import failed");
1034 }
1035 }
1036 if (needMemCopy)
1037 {
1038 // Create a mem copy workload for input since we did not import
1039 std::unique_ptr<IWorkload> inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor, info);
1040
1041 if (!inputWorkload)
1042 {
1043 throw armnn::NullPointerException("No input workload created");
1044 }
1045
1046 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1047 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1048 if (timelineUtils)
1049 {
1050 // Add Input Workload to the post-optimisation network structure
1051 AddWorkloadStructure(timelineUtils, inputWorkload, layer);
1052 timelineUtils->Commit();
1053 }
1054
1055 m_InputQueue.push_back(std::move(inputWorkload));
1056 }
1057}
1058
1059void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
1060{
1061 if (layer.GetType() != LayerType::Output)
1062 {
1063 throw InvalidArgumentException("EnqueueOutput: given layer not an OutputLayer");
1064 }
1065
1066 if (tensorHandle == nullptr)
1067 {
1068 throw InvalidArgumentException("EnqueueOutput: tensorHandle must not be NULL");
1069 }
1070
1071 OutputQueueDescriptor outputQueueDescriptor;
1072 WorkloadInfo info;
1073
1074 outputQueueDescriptor.m_Outputs.push_back(tensorHandle);
1075 info.m_OutputTensorInfos.push_back(tensorInfo);
1076
1077 if (layer.GetNumInputSlots() != 1)
1078 {
1079 throw armnn::GraphValidationException("Output Layer should have exactly one input.");
1080 }
1081
1082 // Gets the output handler from the previous node.
1083 const OutputHandler& outputHandler = layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler();
1084
1085 const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo();
1086 ITensorHandle* inputTensorHandle = outputHandler.GetData();
1087 if (!inputTensorHandle)
1088 {
1089 throw armnn::NullPointerException("Data should have been allocated.");
1090 }
1091
1092 // Try import the output tensor.
1093 // Note: We can only import the output pointer if all of the following hold true:
1094 // a) The imported pointer is aligned sufficiently
1095 // b) The tensor has zero padding
1096 // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
1097 // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
1098 // e) m_NetworkProperties.m_OutputSource != MemorySource::Undefined
1099 bool needMemCopy = true;
1100 if (m_NetworkProperties.m_OutputSource != MemorySource::Undefined &&
1101 (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
1102 {
1103 if(layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input)
1104 {
1105 MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
1106 if (CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
1107 {
1108 needMemCopy = false;
1109 void *mem = tensorHandle->Map(false);
1110 bool importOk = inputTensorHandle->Import(mem, m_NetworkProperties.m_OutputSource);
1111 tensorHandle->Unmap();
1112
1113 if (importOk)
1114 {
1115 // Insert synchronization workload
1116 MemSyncQueueDescriptor syncDesc;
1117 syncDesc.m_Inputs.push_back(inputTensorHandle);
1118 info.m_InputTensorInfos.push_back(inputTensorInfo);
1119 auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
1120 if (!syncWorkload)
1121 {
1122 throw armnn::NullPointerException("No sync workload created");
1123 }
1124 m_OutputQueue.push_back(std::move(syncWorkload));
1125 }
1126 else
1127 {
1128 throw MemoryExportException("EnqueueOutput: Memory Export failed");
1129 }
1130 }
1131 }
1132 }
1133 if (needMemCopy)
1134 {
1135 // If we got here then we didn't export the memory, so add an output workload which performs a memcopy.
1136 outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
1137 info.m_InputTensorInfos.push_back(inputTensorInfo);
1138
1139 std::unique_ptr<IWorkload> outputWorkload =
1140 std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor, info);
1141 if (!outputWorkload)
1142 {
1143 throw armnn::NullPointerException("No output workload created");
1144 }
1145
1146 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1147 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1148 if (timelineUtils)
1149 {
1150 // Add Output Workload to the post-optimisation network structure
1151 AddWorkloadStructure(timelineUtils, outputWorkload, layer);
1152 timelineUtils->Commit();
1153 }
1154
1155 m_OutputQueue.push_back(std::move(outputWorkload));
1156 }
1157}
1158
1159void LoadedNetwork::AllocateWorkingMemory(
1160#if !defined(ARMNN_DISABLE_THREADS)
1161 std::lock_guard<std::mutex>& lock
1162#endif
1163 )
1164{
1165 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Working Memory Allocation");
1166
1167#if !defined(ARMNN_DISABLE_THREADS)
1168 // this unused parameter makes sure we can only call this function with a valid lock
1169 IgnoreUnused(lock);
1170#endif
1171 if (m_IsWorkingMemAllocated)
1172 {
1173 return;
1174 }
1175
1176 if (m_ExternalMemoryManager)
1177 {
1178 m_ExternalMemoryManager->Allocate();
1179
1180 for (unsigned int i = 0; i < m_TensorMemory.size(); ++i)
1181 {
1182 m_Tensorhandles[i]->Import(m_TensorMemory[i].first->m_Data, m_TensorMemory[i].second);
1183 }
1184 }
1185
1186 for (auto&& memoryManager : m_BackendMemoryMangers)
1187 {
1188 if (memoryManager)
1189 {
1190 memoryManager->Acquire();
1191 }
1192 }
1193 m_TensorHandleFactoryRegistry.AquireMemory();
1194 m_IsWorkingMemAllocated = true;
1195}
1196
1198{
1199#if !defined(ARMNN_DISABLE_THREADS)
1200 std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1201#endif
1202
1203 if (!m_IsWorkingMemAllocated)
1204 {
1205 return;
1206 }
1207
1208 if (m_ExternalMemoryManager)
1209 {
1210 m_ExternalMemoryManager->Deallocate();
1211 }
1212
1213 // Informs the memory managers to release memory in its respective memory group
1214 for (auto&& memoryManager : m_BackendMemoryMangers)
1215 {
1216 if (memoryManager)
1217 {
1218 memoryManager->Release();
1219 }
1220 }
1221 m_TensorHandleFactoryRegistry.ReleaseMemory();
1222 m_IsWorkingMemAllocated = false;
1223}
1224
1225bool LoadedNetwork::Execute(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
1226 ProfilingGuid inferenceGuid)
1227{
1228 bool success = true;
1229
1230 auto Fail = [&](const std::exception& error)
1231 {
1232 ARMNN_LOG(error) << "An error occurred attempting to execute a workload: " << error.what();
1233 success = false;
1234 };
1235
1236 try
1237 {
1238#if !defined(ARMNN_DISABLE_THREADS)
1239 std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1240 AllocateWorkingMemory(lockGuard);
1241#else
1242 AllocateWorkingMemory();
1243#endif
1244
1245 ProfilingDynamicGuid workloadInferenceID(0);
1246 auto ExecuteQueue = [&timelineUtils, &workloadInferenceID, &inferenceGuid](WorkloadQueue& queue)
1247 {
1248 for (auto& workload : queue)
1249 {
1250 if(timelineUtils)
1251 {
1252 workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1253 inferenceGuid);
1254 }
1255
1257 workload->Execute();
1259 if(timelineUtils)
1260 {
1261 timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1262 }
1263 }
1264 };
1265
1267 ExecuteQueue(m_InputQueue);
1268 ExecuteQueue(m_WorkloadQueue);
1269 ExecuteQueue(m_OutputQueue);
1271 }
1272 catch (const RuntimeException& error)
1273 {
1274 Fail(error);
1275 }
1276 catch (const std::runtime_error& error)
1277 {
1278 Fail(error);
1279 }
1280
1281 return success;
1282}
1283
1284void LoadedNetwork::EnqueueInput(const ConstTensor& inputTensor, ITensorHandle* inputTensorHandle)
1285{
1286 if (m_NetworkProperties.m_InputSource != MemorySource::Undefined) // Try import the input tensor
1287 {
1288 MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
1289 if (CheckFlag(importFlags, m_NetworkProperties.m_InputSource) )
1290 {
1291 std::unique_ptr<ITensorHandle> tensorHandle =
1292 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),
1293 inputTensor.GetMemoryArea());
1294 void* mem = tensorHandle->Map(false);
1295
1296 if (inputTensorHandle->Import(mem, m_NetworkProperties.m_InputSource))
1297 {
1298 tensorHandle->Unmap();
1299 return;
1300 }
1301 tensorHandle->Unmap();
1302 throw MemoryImportException("EnqueueInput: Memory Import failed");
1303 }
1304 else
1305 {
1306 throw MemoryImportException("EnqueueInput: Memory Import failed, backend does not support Import");
1307 }
1308 }
1309 else
1310 {
1312 std::unique_ptr<ITensorHandle> tensorHandle =
1313 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(), inputTensor.GetMemoryArea());
1314
1315 auto copyFunc = [](void* dst, const void* src, size_t size)
1316 {
1317 memcpy(dst, src, size);
1318 };
1319
1320 CopyTensorContentsGeneric(tensorHandle.get(), inputTensorHandle, copyFunc);
1321 }
1322}
1323
1324// Note: We can only import the output pointer if all of the following hold true:
1325// a) The imported pointer is aligned sufficiently
1326// b) The tensor has zero padding
1327// c) There is only one connection to the OutputSlot and it is to an OutputLayer.
1328// d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
1329// e) m_IsExportEnabled must be set to true
1330void LoadedNetwork::ImportOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle)
1331{
1332 if (!outputTensorHandle)
1333 {
1334 throw armnn::NullPointerException("Data should have been allocated.");
1335 }
1336
1337 MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
1338 if (CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
1339 {
1340 std::unique_ptr<ITensorHandle> tensorHandle =
1341 std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
1342 outputTensor.GetMemoryArea());
1343
1344 void* mem = tensorHandle->Map(false);
1345 bool importOk = outputTensorHandle->Import(mem, m_NetworkProperties.m_OutputSource);
1346 tensorHandle->Unmap();
1347
1348 if (!importOk)
1349 {
1350 throw MemoryExportException("ImportOutputTensor: Memory Export failed");
1351 }
1352 }
1353 else
1354 {
1355 throw MemoryExportException("ImportOutputTensor: Memory Export failed, attempting to export Input Layer");
1356 }
1357
1358}
1359
1360void CopyToOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle)
1361{
1363 auto copyFunc = [](void* dst, const void* src, size_t size)
1364 {
1365 memcpy(dst, src, size);
1366 };
1367
1368 std::unique_ptr<ITensorHandle> tensorHandle =
1369 std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
1370 outputTensor.GetMemoryArea());
1371
1372 CopyTensorContentsGeneric(outputTensorHandle, tensorHandle.get(), copyFunc);
1373}
1374
1375
1376const armnn::ConstTensor GetInputTensor(const LayerBindingId layerId, const InputTensors& inputTensors)
1377{
1378 for (auto inputTensorPair : inputTensors)
1379 {
1380 LayerBindingId id = inputTensorPair.first;
1381 if (id == layerId)
1382 {
1383 return inputTensorPair.second;
1384 }
1385 }
1386 throw InvalidArgumentException("Input does not exist.");
1387}
1388
1389const armnn::Tensor GetOutputTensor(const LayerBindingId layerId, const OutputTensors& outputTensors)
1390{
1391 for (auto outputTensorPair : outputTensors)
1392 {
1393 LayerBindingId id = outputTensorPair.first;
1394 if (id == layerId)
1395 {
1396 return outputTensorPair.second;
1397 }
1398 }
1399 throw InvalidArgumentException("Output does not exist.");
1400}
1401
1402std::vector<ImportedInputId> LoadedNetwork::ImportInputs(const InputTensors& inputTensors,
1403 MemorySource forceImportMemorySource)
1404{
1405 // Cannot import if import is not enabled and forceImportMemorySource is undefined
1406 if (forceImportMemorySource == MemorySource::Undefined)
1407 {
1408 throw MemoryImportException("ImportInputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1409 }
1410 // The number of pre imported tensors should not exceed the number of inputs.
1411 if (inputTensors.size() > m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumInputs())
1412 {
1413 throw MemoryImportException("ImportInputs: The number of tensors provided exceeds the number of inputs.");
1414 }
1415
1416 std::vector<ImportedInputId> importedInputs;
1417 Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1418 unsigned int inputIndex = 0;
1419 for (const BindableLayer* inputLayer : graph.GetInputLayers())
1420 {
1421 auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
1422
1423 if (!outputTensorHandle)
1424 {
1425 inputIndex++;
1426 continue;
1427 }
1428
1429 auto layerBindingId = inputLayer->GetBindingId();
1430 auto it = std::find_if(inputTensors.begin(), inputTensors.end(), [=](const auto& inputTensor)
1431 {
1432 return inputTensor.first == layerBindingId;
1433 });
1434
1435 if (it == inputTensors.end())
1436 {
1437 inputIndex++;
1438 continue;
1439 }
1440
1441 const auto& inputTensor = *it;
1442 std::unique_ptr<ITensorHandle> passThroughTensorHandle =
1443 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.GetInfo(),
1444 inputTensor.second.GetMemoryArea());
1445
1446 try
1447 {
1448 if (outputTensorHandle->CanBeImported(passThroughTensorHandle->Map(), forceImportMemorySource)
1449 && (outputTensorHandle->Import(passThroughTensorHandle->Map(), forceImportMemorySource)))
1450 {
1451 importedInputs.push_back(inputIndex);
1452 }
1453 passThroughTensorHandle->Unmap();
1454 }
1455 catch(const MemoryImportException& exception)
1456 {
1457 ARMNN_LOG(error) << "An error occurred attempting to import input_"
1458 << inputIndex << " : " << exception.what();
1459 passThroughTensorHandle->Unmap();
1460 }
1461 inputIndex++;
1462 }
1463
1464 return importedInputs;
1465}
1466
1467std::vector<ImportedOutputId> LoadedNetwork::ImportOutputs(const OutputTensors& outputTensors,
1468 MemorySource forceImportMemorySource)
1469{
1470 // Cannot import if import is not enabled and forceImportMemorySource is undefined
1471 if (forceImportMemorySource == MemorySource::Undefined)
1472 {
1473 throw MemoryImportException("ImportOutputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1474 }
1475 // If forceImportMemorySource is defined, try import if memory is aligned
1476 if (outputTensors.size() != m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumOutputs())
1477 {
1478 throw MemoryImportException("ImportOutputs: Force Import failed, incorrect number of tensors");
1479 }
1480 std::vector<ImportedOutputId> importedOutputs;
1481 Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1482
1483 unsigned int outputIndex = 0;
1484 for (const BindableLayer* const outputLayer : graph.GetOutputLayers())
1485 {
1486 auto inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
1487 if (!inputTensorHandle)
1488 {
1489 outputIndex++;
1490 continue;
1491 }
1492
1493 auto layerBindingId = outputLayer->GetBindingId();
1494 auto it = std::find_if(outputTensors.begin(), outputTensors.end(), [=] (const auto& outputTensor)
1495 {
1496 return outputTensor.first == layerBindingId;
1497 });
1498
1499 if (it == outputTensors.end())
1500 {
1501 outputIndex++;
1502 continue;
1503 }
1504
1505 const auto outputTensor = *it;
1506 try
1507 {
1508 // Check if the output memory can be imported
1509 if (inputTensorHandle->CanBeImported(outputTensor.second.GetMemoryArea(), forceImportMemorySource)
1510 && inputTensorHandle->Import(outputTensor.second.GetMemoryArea(), forceImportMemorySource))
1511 {
1512 importedOutputs.push_back(outputIndex);
1513 }
1514 }
1515 catch(const MemoryImportException& exception)
1516 {
1517 ARMNN_LOG(error) << "An error occurred attempting to import output_"
1518 << outputIndex << " : " << exception.what();
1519 }
1520 outputIndex++;
1521 }
1522 return importedOutputs;
1523}
1524
1525void LoadedNetwork::ClearImportedInputs(const std::vector<ImportedInputId> inputIds)
1526{
1527 for (auto id : inputIds)
1528 {
1529 if (id > m_PreImportedInputHandles.size())
1530 {
1531 throw InvalidArgumentException(fmt::format("ClearImportedInputs::Unknown ImportedInputId: {}", id));
1532 }
1533
1534 auto& importedTensorHandle = m_PreImportedInputHandles[id].m_TensorHandle;
1535 if (!importedTensorHandle)
1536 {
1538 fmt::format("ClearImportedInputs::ImportedInput with id: {} has already been deleted", id));
1539 }
1540 // Call Unimport then destroy the tensorHandle
1541 importedTensorHandle->Unimport();
1542 importedTensorHandle = {};
1543 }
1544}
1545
1546void LoadedNetwork::ClearImportedOutputs(const std::vector<ImportedOutputId> outputIds)
1547{
1548 for (auto id : outputIds)
1549 {
1550 if (id > m_PreImportedOutputHandles.size())
1551 {
1552 throw InvalidArgumentException(fmt::format("ClearImportedOutputs::Unknown ImportedOutputId: {}", id));
1553 }
1554
1555 auto& importedTensorHandle = m_PreImportedOutputHandles[id].m_TensorHandle;
1556 if (!importedTensorHandle)
1557 {
1559 fmt::format("ClearImportedOutputs::ImportedOutput with id: {} has already been deleted", id));
1560 }
1561 // Call Unimport then destroy the tensorHandle
1562 importedTensorHandle->Unimport();
1563 importedTensorHandle = {};
1564 }
1565}
1566
1568{
1569 for (auto&& workloadPtr: m_WorkloadQueue)
1570 {
1571 workloadPtr.get()->RegisterDebugCallback(func);
1572 }
1573}
1574
1575void LoadedNetwork::CreateMemoryProfile()
1576{
1577 // Finds the first TensorHandle ancestor of a SubTensorHandle. If the ITensorHandle provided
1578 // is a TensorHandle, the function just returns it
1579 auto TraceSubTensorHandleAncestry = [](ITensorHandle* const subTensorHandle)
1580 {
1581 ITensorHandle* ancestor = subTensorHandle;
1582 while (ancestor && ancestor->GetParent())
1583 {
1584 ancestor = ancestor->GetParent();
1585 }
1586 return ancestor;
1587 };
1588
1589 struct PartialBlock
1590 {
1591 unsigned int m_StartOfLife;
1592 unsigned int m_Lifetime;
1593
1594 size_t m_MemSize;
1595 unsigned int m_Index;
1596
1597 BackendId m_BackendId;
1598 };
1599
1600 auto align = [](size_t numToAlign)
1601 {
1602 const size_t alignment = sizeof(float);
1603 return ((numToAlign + alignment - 1) / alignment) * alignment;
1604 };
1605
1606 std::unordered_map<ITensorHandle*, PartialBlock> memBlockTrackerMap;
1607
1608 const bool inputImportingEnabled = m_NetworkProperties.m_InputSource != MemorySource::Undefined;
1609 const bool outputImportingEnabled = m_NetworkProperties.m_OutputSource != MemorySource::Undefined;
1610
1611 unsigned int timestep = 0;
1612 unsigned int outputIndex = 0;
1613 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1614
1615 for (auto&& layer : order)
1616 {
1617 const LayerType& layerType = layer->GetType();
1618 // Don't manage memory if importing.
1619 if (layerType == LayerType::Input && inputImportingEnabled)
1620 {
1621 continue;
1622 }
1623 // Don't manage memory if importing.
1624 if (layerType == LayerType::Output && outputImportingEnabled
1625 && layer->GetInputSlot(0).GetConnectedOutputSlot()->GetNumConnections() == 1)
1626 {
1627 continue;
1628 }
1629 // Because Constant Layer memory can not be shared, the memory must persist for the lifetime of execution,
1630 // management is done separately.
1631 if (layerType == LayerType::Constant)
1632 {
1633 continue;
1634 }
1635
1636 BackendId backendId = layer->GetBackendId();
1637 for (auto& outputSlot : layer->GetOutputSlots())
1638 {
1639 if (!m_SupportsExternallyManagedMemory[backendId])
1640 {
1641 continue;
1642 }
1643
1644 ITensorHandle* tensorHandle = outputSlot.GetOutputHandler().GetData();
1645 tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
1646
1647 if (memBlockTrackerMap.find(tensorHandle) == memBlockTrackerMap.end())
1648 {
1649 PartialBlock partialBlock;
1650
1651 partialBlock.m_StartOfLife = timestep;
1652
1653 size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
1654 partialBlock.m_MemSize = alignedSize;
1655 partialBlock.m_Index = outputIndex++;
1656 partialBlock.m_Lifetime = outputSlot.GetNumConnections();
1657 partialBlock.m_BackendId = backendId;
1658
1659 if (partialBlock.m_Lifetime == 0)
1660 {
1661 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
1662 partialBlock.m_StartOfLife,
1663 partialBlock.m_MemSize,
1664 0,
1665 partialBlock.m_Index);
1666 }
1667 else
1668 {
1669 memBlockTrackerMap[tensorHandle] = partialBlock;
1670 }
1671 m_Tensorhandles.push_back(tensorHandle);
1672
1673 }
1674 else
1675 {
1676 memBlockTrackerMap.at(tensorHandle).m_Lifetime += outputSlot.GetNumConnections();
1677 }
1678 }
1679
1680 for (auto& inputSlot : layer->GetInputSlots())
1681 {
1682 const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
1683 const LayerType& owningLayerType = connectedInputLayer.GetType();
1684
1685 if (owningLayerType == LayerType::Constant)
1686 {
1687 continue;
1688 }
1689 if (inputImportingEnabled && owningLayerType == LayerType::Input)
1690 {
1691 continue;
1692 }
1693 if (!m_SupportsExternallyManagedMemory[connectedInputLayer.GetBackendId()])
1694 {
1695 continue;
1696 }
1697
1698 auto outputSlot = inputSlot.GetConnectedOutputSlot();
1699
1700 ITensorHandle* tensorHandle = outputSlot->GetOutputHandler().GetData();
1701 tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
1702
1703 PartialBlock& partialBlock = memBlockTrackerMap.at(tensorHandle);
1704
1705 auto& lifetime = partialBlock.m_Lifetime;
1706 --lifetime;
1707
1708 if (lifetime == 0)
1709 {
1710 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
1711 timestep,
1712 partialBlock.m_MemSize,
1713 0,
1714 partialBlock.m_Index);
1715 }
1716 }
1717 ++timestep;
1718 }
1719
1720}
1721
1722std::unique_ptr<MemoryManager> LoadedNetwork::CreateExternalMemoryManger(
1723 std::vector<std::pair<std::shared_ptr<TensorMemory>, MemorySource>>& tensorMemoryVec)
1724{
1725 std::unique_ptr<MemoryManager> memoryManager = std::make_unique<MemoryManager>();
1726 auto allocatorMap = BackendRegistryInstance().GetAllocators();
1727
1728 for (auto& backend : m_MemBinMap)
1729 {
1730 std::vector<BufferStorage> bufferStorageVec;
1731
1732 std::shared_ptr<ICustomAllocator> backendAllocator;
1733 if (allocatorMap.find(backend.first) != allocatorMap.end())
1734 {
1735 backendAllocator = allocatorMap[backend.first];
1736 }
1737 else
1738 {
1739 backendAllocator = m_Backends[backend.first]->GetDefaultAllocator();
1740 }
1741
1742 for (auto& memBin : backend.second)
1743 {
1744 BufferStorage bufferStorage;
1745 bufferStorage.m_BufferSize = memBin.m_MemSize;
1746 bufferStorage.m_TensorMemoryVector.reserve(memBin.m_MemBlocks.size());
1747
1748 for (auto& memBlock : memBin.m_MemBlocks)
1749 {
1750 auto tensorMemory = std::make_shared<TensorMemory>(TensorMemory{memBlock.m_Offset, memBlock.m_Index});
1751
1752 tensorMemoryVec.emplace_back(tensorMemory, backendAllocator->GetMemorySourceType());
1753 bufferStorage.m_TensorMemoryVector.emplace_back(tensorMemory);
1754 }
1755
1756 bufferStorageVec.emplace_back(std::move(bufferStorage));
1757 }
1758
1759 memoryManager->StoreMemToAllocate(bufferStorageVec, backendAllocator, 4);
1760 }
1761
1762 return memoryManager;
1763}
1764
1765LayerBindingId LoadedNetwork::ValidateImportedInputID(ImportedInputId id)
1766{
1767 try
1768 {
1769 const auto& importedTensorHandlePin = m_PreImportedInputHandles.at(id);
1770 if (!importedTensorHandlePin.m_TensorHandle)
1771 {
1772 throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute:"
1773 "PreImportedInput: {} has been deleted", id));
1774 }
1775 return importedTensorHandlePin.m_LayerBindingId;
1776 }
1777 catch (const std::out_of_range&)
1778 {
1779 throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: Unknown ImportedInputId: {}", id));
1780 }
1781}
1782
1783LayerBindingId LoadedNetwork::ValidateImportedOutputID(ImportedOutputId id)
1784{
1785 try
1786 {
1787 const auto& importedTensorHandlePin = m_PreImportedOutputHandles.at(id);
1788 if (!importedTensorHandlePin.m_TensorHandle)
1789 {
1790 throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: "
1791 "PreImportedOutput: {} has been deleted", id));
1792 }
1793 return importedTensorHandlePin.m_LayerBindingId;
1794 }
1795 catch (const std::out_of_range&)
1796 {
1797 throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: Unknown ImportedOutputId: {}", id));
1798 }
1799}
1800
1801}
#define CHECK_LOCATION()
#define ARMNN_SCOPED_HEAP_PROFILING(TAG)
#define ARMNN_LOG(severity)
Definition Logging.hpp:212
#define MARK_WORKLOAD_EXECUTION_END()
This empty macro has been inserted at LoadedNetwork::Execute.
Definition Markers.hpp:41
#define MARK_INFERENCE_EXECUTION_END()
This empty macro has been inserted at LoadedNetwork::Execute.
Definition Markers.hpp:29
#define MARK_INFERENCE_EXECUTION_BEGIN()
This empty macro has been inserted at LoadedNetwork::Execute.
Definition Markers.hpp:23
#define MARK_WORKLOAD_EXECUTION_BEGIN()
This empty macro has been inserted at LoadedNetwork::Execute.
Definition Markers.hpp:35
#define MARK_OPTIMIZED_NETWORK_LOADED()
This empty macro has been inserted at the end of LoadedNetwork constructor.
Definition Markers.hpp:15
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
const std::string & Get() const
std::unordered_map< BackendId, std::shared_ptr< ICustomAllocator > > GetAllocators()
MemoryOptimizerStrategiesMapRef GetMemoryOptimizerStrategies()
FactoryFunction GetFactory(const BackendId &id) const
const TensorInfo & GetInfo() const
Definition Tensor.hpp:297
MemoryType GetMemoryArea() const
Definition Tensor.hpp:307
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition Tensor.hpp:330
Base class for all ArmNN exceptions so that users can filter to just those.
size_t GetNumOutputs() const
Definition Graph.hpp:195
size_t GetNumInputs() const
Definition Graph.hpp:194
InputLayersAccessor GetInputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the input layers in a range-base...
Definition Graph.hpp:199
OutputLayersAccessor GetOutputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the output layers in a range-bas...
Definition Graph.hpp:203
void SetLayersOutOfOrder()
Definition Graph.cpp:738
Graph & TopologicalSort()
Sorts layers in topological order and return this.
Definition Graph.hpp:191
size_t GetNumLayers() const
Definition Graph.hpp:205
virtual const BackendId & GetId() const =0
virtual BackendCapabilities GetCapabilities() const
Returns a BackendCapability if the backend lists the capability The BackendCapability must then be in...
virtual IMemoryManagerUniquePtr CreateMemoryManager() const
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
bool SupportsTensorAllocatorAPI() const
virtual IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr &memoryManager=nullptr) const =0
An output connection slot for a layer.
Definition INetwork.hpp:54
virtual const void * Map(bool blocking=true) const =0
Map the tensor data for access.
virtual ITensorHandle * GetParent() const =0
Get the parent tensor if this is a subtensor.
virtual void Unmap() const =0
Unmap the tensor data.
unsigned int GetNumOutputSlots() const override
Returns the number of connectable output slots.
Definition Layer.hpp:335
const std::string & GetNameStr() const
Definition Layer.hpp:240
const OutputHandler & GetOutputHandler(unsigned int i=0) const
Definition Layer.hpp:245
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition Layer.hpp:286
const BackendId & GetBackendId() const
Definition Layer.hpp:290
void RegisterDebugCallback(const DebugCallbackFunction &func)
TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const
std::vector< std::unique_ptr< IWorkload > > WorkloadQueue
std::vector< ImportedInputId > ImportInputs(const InputTensors &inputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
Status EnqueueWorkload(const InputTensors &inputTensors, const OutputTensors &outputTensors, std::vector< ImportedInputId > preImportedInputIds={}, std::vector< ImportedOutputId > preImportedOutputIds={})
Single thread execution of the loaded network.
void ClearImportedInputs(const std::vector< ImportedInputId > inputIds)
std::vector< ImportedOutputId > ImportOutputs(const OutputTensors &outputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
arm::pipe::ProfilingGuid GetNetworkGuid()
void SendNetworkStructure(arm::pipe::IProfilingService &profilingService)
void ClearImportedOutputs(const std::vector< ImportedOutputId > outputIds)
TensorInfo GetInputTensorInfo(LayerBindingId layerId) const
static std::unique_ptr< LoadedNetwork > MakeLoadedNetwork(std::unique_ptr< IOptimizedNetwork > net, std::string &errorMessage, const INetworkProperties &networkProperties, arm::pipe::IProfilingService *profilingService)
ITensorHandle * GetData() const
Gets the allocated tensor memory.
const TensorInfo & GetTensorInfo() const
Gets the matching TensorInfo for the output.
void RegisterProfiler(IProfiler *profiler)
static ProfilerManager & GetInstance()
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition Tensor.hpp:322
Copyright (c) 2021 ARM Limited and Contributors.
bool CheckFlag(MemorySourceFlags flags, MemorySource source)
MemorySource
Define the Memory Source to reduce copies.
Definition Types.hpp:246
std::function< void(LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle)> DebugCallbackFunction
Define the type of callback for the Debug layer to call.
Definition Types.hpp:400
unsigned int ImportedInputId
Definition Types.hpp:312
MemCopyQueueDescriptor InputQueueDescriptor
const armnn::ConstTensor GetInputTensor(const LayerBindingId layerId, const InputTensors &inputTensors)
MemCopyQueueDescriptor OutputQueueDescriptor
bool HasMatchingCapability(const BackendOptions::BackendOption &capability, const BackendCapabilities &capabilities)
Convenience function to check if a given capability matches a capability in a BackendCapabilities str...
void CopyToOutputTensor(const Tensor &outputTensor, ITensorHandle *outputTensorHandle)
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below.
Definition Types.hpp:494
unsigned int MemorySourceFlags
void ValidateSourcesMatchOptimizedNetwork(std::vector< BackendOptions > optimizedOptions, const INetworkProperties &networkProperties)
This function performs a sanity check to ensure that the combination of input and output memory sourc...
Status
enumeration
Definition Types.hpp:43
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition Tensor.hpp:394
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition Types.hpp:311
BackendRegistry & BackendRegistryInstance()
unsigned int ImportedOutputId
Definition Types.hpp:313
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition Tensor.hpp:395
const armnn::Tensor GetOutputTensor(const LayerBindingId layerId, const OutputTensors &outputTensors)
void CopyTensorContentsGeneric(const ITensorHandle *srcTensor, ITensorHandle *dstTensor, CopyFunc copy)
void IgnoreUnused(Ts &&...)
armnn::TensorInfo GetTensorInfo(unsigned int numberOfBatches, unsigned int numberOfChannels, unsigned int height, unsigned int width, const armnn::DataLayout dataLayout, const armnn::DataType dataType)
Struct for the users to pass backend specific options.
const bool m_ExternalMemoryManagementEnabled
Definition IRuntime.hpp:62
const bool m_ProfilingEnabled
Definition IRuntime.hpp:55
const ProfilingDetailsMethod m_OutputNetworkDetailsMethod
Definition IRuntime.hpp:57
std::vector< ITensorHandle * > m_Inputs
std::vector< ITensorHandle * > m_Outputs
Contains information about TensorInfos of a layer.