ArmNN
 25.11
Loading...
Searching...
No Matches
Network.cpp
Go to the documentation of this file.
1//
2// Copyright © 2017-2025 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6#include "Network.hpp"
7#include "Graph.hpp"
8#include "Layer.hpp"
9#include "DeviceSpec.hpp"
10#include "Optimizer.hpp"
12#include "BackendSettings.hpp"
13#include "optimizations/All.hpp"
16
21
22#include <armnn/Exceptions.hpp>
23#include <armnn/TypesUtils.hpp>
25#include <armnn/Logging.hpp>
29
30#include <client/include/IProfilingService.hpp>
31
32#include <common/include/ProfilingGuid.hpp>
33
34#include <fmt/format.h>
35
36#include <fcntl.h>
37#include <algorithm>
38#include <memory>
39#include <vector>
40#include <armnn/ArmNN.hpp>
41
42namespace armnn
43{
44
45INetwork::INetwork(NetworkOptions networkOptions) : pNetworkImpl(new NetworkImpl(networkOptions)) {}
46
47INetwork::~INetwork() = default;
48
50 : p_OptimizerOptionsImpl(std::make_unique<OptimizerOptionsOpaqueImpl>())
51{
52}
53
55 : p_OptimizerOptionsImpl(std::make_unique<OptimizerOptionsOpaqueImpl>(*other.p_OptimizerOptionsImpl))
56{
57}
58
60
61OptimizerOptionsOpaque::OptimizerOptionsOpaque(bool reduceFp32ToFp16, bool debug, bool reduceFp32ToBf16,
62 bool importEnabled, ModelOptions modelOptions, bool exportEnabled,
63 bool debugToFile)
64 : p_OptimizerOptionsImpl(std::make_unique<OptimizerOptionsOpaqueImpl>(reduceFp32ToFp16, debug, reduceFp32ToBf16,
65 importEnabled, modelOptions,
66 exportEnabled, debugToFile))
67{
68}
69
70OptimizerOptionsOpaque::OptimizerOptionsOpaque(bool reduceFp32ToFp16, bool debug, bool reduceFp32ToBf16,
71 ShapeInferenceMethod shapeInferenceMethod,
72 bool importEnabled, ModelOptions modelOptions, bool exportEnabled,
73 bool debugToFile, bool allowExpandedDims)
74 : p_OptimizerOptionsImpl(std::make_unique<OptimizerOptionsOpaqueImpl>(reduceFp32ToFp16, debug, reduceFp32ToBf16,
75 shapeInferenceMethod, importEnabled,
76 modelOptions, exportEnabled,
77 debugToFile, allowExpandedDims))
78{
79}
80
82 : p_OptimizerOptionsImpl(std::make_unique<OptimizerOptionsOpaqueImpl>())
83{
84 p_OptimizerOptionsImpl->m_ImportEnabled = OptimizerStruct.m_ImportEnabled;
85 p_OptimizerOptionsImpl->m_shapeInferenceMethod = OptimizerStruct.m_shapeInferenceMethod;
86 p_OptimizerOptionsImpl->m_ModelOptions = OptimizerStruct.m_ModelOptions;
87 p_OptimizerOptionsImpl->m_ProfilingEnabled = OptimizerStruct.m_ProfilingEnabled;
88 p_OptimizerOptionsImpl->m_DebugToFile = OptimizerStruct.m_DebugToFile;
89 p_OptimizerOptionsImpl->m_Debug = OptimizerStruct.m_Debug;
90 p_OptimizerOptionsImpl->m_ReduceFp32ToFp16 = OptimizerStruct.m_ReduceFp32ToFp16;
91 p_OptimizerOptionsImpl->m_ExportEnabled = OptimizerStruct.m_ExportEnabled;
92 p_OptimizerOptionsImpl->m_AllowExpandedDims = OptimizerStruct.m_AllowExpandedDims;
93 p_OptimizerOptionsImpl->m_ReduceFp32ToBf16 = OptimizerStruct.m_ReduceFp32ToBf16;
94}
95
97{
98 p_OptimizerOptionsImpl->m_ImportEnabled = other.GetImportEnabled();
99 p_OptimizerOptionsImpl->m_shapeInferenceMethod = other.GetShapeInferenceMethod();
100 p_OptimizerOptionsImpl->m_ModelOptions = other.GetModelOptions();
101 p_OptimizerOptionsImpl->m_ProfilingEnabled = other.GetProfilingEnabled();
102 p_OptimizerOptionsImpl->m_DebugToFile = other.GetDebugToFileEnabled();
103 p_OptimizerOptionsImpl->m_Debug = other.GetDebugEnabled();
104 p_OptimizerOptionsImpl->m_ReduceFp32ToFp16 = other.GetReduceFp32ToFp16();
105 p_OptimizerOptionsImpl->m_ExportEnabled = other.GetExportEnabled();
106 p_OptimizerOptionsImpl->m_AllowExpandedDims = other.GetAllowExpandedDims();
107 p_OptimizerOptionsImpl->m_ReduceFp32ToBf16 = other.GetReduceFp32ToBf16();
108 return *this;
109}
110
112{
113 p_OptimizerOptionsImpl->m_ImportEnabled = ImportState;
114}
115
117{
118 p_OptimizerOptionsImpl->m_ExportEnabled = ExportState;
119}
120
122{
123 p_OptimizerOptionsImpl->m_ProfilingEnabled = ProfilingState;
124}
125
127{
128 p_OptimizerOptionsImpl->m_Debug = DebugState;
129}
130
132{
133 p_OptimizerOptionsImpl->m_DebugToFile = DebugFileState;
134}
135
136void OptimizerOptionsOpaque::SetReduceFp32ToFp16(bool ReduceFp32ToFp16State)
137{
138 p_OptimizerOptionsImpl->m_ReduceFp32ToFp16 = ReduceFp32ToFp16State;
139}
140
142{
143 p_OptimizerOptionsImpl->m_shapeInferenceMethod = ShapeInferenceMethodType;
144}
145
147{
148 p_OptimizerOptionsImpl->m_AllowExpandedDims = ExpandedDimsAllowed;
149}
150
152{
153 p_OptimizerOptionsImpl->m_ModelOptions.push_back(NewModelOption);
154}
155
157{
158 return p_OptimizerOptionsImpl->m_ProfilingEnabled;
159};
160
162{
163 return p_OptimizerOptionsImpl->m_ImportEnabled;
164};
165
167{
168 return p_OptimizerOptionsImpl->m_ExportEnabled;
169};
170
172{
173 return p_OptimizerOptionsImpl->m_ReduceFp32ToFp16;
174};
175
177{
178 return p_OptimizerOptionsImpl->m_ReduceFp32ToBf16;
179}
180
182{
183 return p_OptimizerOptionsImpl->m_Debug;
184}
185
187{
188 return p_OptimizerOptionsImpl->m_DebugToFile;
189}
190
192{
193 return p_OptimizerOptionsImpl->m_AllowExpandedDims;
194}
195
197{
198 return p_OptimizerOptionsImpl->m_ModelOptions;
199}
200
202{
203 return p_OptimizerOptionsImpl->m_shapeInferenceMethod;
204}
205
206const std::string OptimizerOptionsOpaque::ToString() const
207{
208 std::stringstream stream;
209 stream << "OptimizerOptions: \n";
210 stream << "\tReduceFp32ToFp16: " << p_OptimizerOptionsImpl->m_ReduceFp32ToFp16 << "\n";
211 stream << "\tReduceFp32ToBf16: " << p_OptimizerOptionsImpl->m_ReduceFp32ToBf16 << "\n";
212 stream << "\tDebug: " << p_OptimizerOptionsImpl->m_Debug << "\n";
213 stream << "\tDebug to file: " << p_OptimizerOptionsImpl->m_DebugToFile << "\n";
214 stream << "\tShapeInferenceMethod: " <<
215 (p_OptimizerOptionsImpl->m_shapeInferenceMethod == ShapeInferenceMethod::ValidateOnly ?
216 "ValidateOnly" : "InferAndValidate") << "\n";
217 stream << "\tImportEnabled: " << p_OptimizerOptionsImpl->m_ImportEnabled << "\n";
218 stream << "\tExportEnabled: " << p_OptimizerOptionsImpl->m_ExportEnabled << "\n";
219 stream << "\tProfilingEnabled: " << p_OptimizerOptionsImpl->m_ProfilingEnabled << "\n";
220 stream << "\tAllowExpandedDims: " << p_OptimizerOptionsImpl->m_AllowExpandedDims << "\n";
221
222 stream << "\tModelOptions: \n";
223 for (auto optionsGroup : p_OptimizerOptionsImpl->m_ModelOptions)
224 {
225 for (size_t i=0; i < optionsGroup.GetOptionCount(); i++)
226 {
227 const armnn::BackendOptions::BackendOption option = optionsGroup.GetOption(i);
228 stream << "\t\tBackend: " << optionsGroup.GetBackendId() << "\n"
229 << "\t\t\tOption: " << option.GetName() << "\n"
230 << "\t\t\tValue: " << std::string(option.GetValue().ToString()) << "\n";
231 }
232 }
233
234 return stream.str();
235}
236
238{
239 return pNetworkImpl->PrintGraph();
240}
241
243{
244 return pNetworkImpl->AddInputLayer(id, name);
245}
246
248 const char* name)
249{
250 return pNetworkImpl->AddArgMinMaxLayer(desc, name);
251}
252
254{
255 return pNetworkImpl->AddCastLayer(name);
256}
257
259 const char* name)
260{
261 return pNetworkImpl->AddComparisonLayer(comparisonDescriptor, name);
262}
263
264
266 const char* name)
267{
268 return pNetworkImpl->AddConcatLayer(concatDescriptor, name);
269}
270
271
273 const char* name)
274{
275 return pNetworkImpl->AddConvolution2dLayer(convolution2dDescriptor, name);
276}
277
279 const char* name)
280{
281 return pNetworkImpl->AddConvolution3dLayer(convolution3dDescriptor, name);
282}
283
284
286 const char* name)
287{
288 return pNetworkImpl->AddDepthToSpaceLayer(depthToSpaceDescriptor, name);
289}
290
291
293 const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
294 const char* name)
295{
296 return pNetworkImpl->AddDepthwiseConvolution2dLayer(convolution2dDescriptor, name);
297}
298
299
301{
302 return pNetworkImpl->AddDequantizeLayer(name);
303}
304
305
307 const DetectionPostProcessDescriptor& descriptor,
308 const ConstTensor& anchors,
309 const char* name)
310{
311 return pNetworkImpl->AddDetectionPostProcessLayer(descriptor, anchors, name);
312}
313
315 const char* name)
316{
317 return pNetworkImpl->AddElementwiseBinaryLayer(elementwiseBinaryDescriptor, name);
318}
319
321 const char* name)
322{
323 return pNetworkImpl->AddElementwiseUnaryLayer(elementwiseUnaryDescriptor, name);
324}
325
327 const char* name)
328{
329 return pNetworkImpl->AddFillLayer(fillDescriptor, name);
330}
331
333 const char* name)
334{
335 return pNetworkImpl->AddFullyConnectedLayer(fullyConnectedDescriptor, name);
336}
337
339 const char* name)
340{
341 return pNetworkImpl->AddFusedLayer(fusedDescriptor, name);
342}
343
345 const char* name)
346{
347 return pNetworkImpl->AddPermuteLayer(permuteDescriptor, name);
348}
349
351 const char* name)
352{
353 return pNetworkImpl->AddBatchToSpaceNdLayer(batchToSpaceNdDescriptor, name);
354}
355
357 const char* name)
358{
359 return pNetworkImpl->AddPooling2dLayer(pooling2dDescriptor, name);
360}
361
363 const char* name)
364{
365 return pNetworkImpl->AddPooling3dLayer(pooling3dDescriptor, name);
366}
367
369 CompiledBlobPtr compiledBlobPtr,
370 const Optional<BackendId>& backend,
371 const char* name)
372{
373 return pNetworkImpl->AddPrecompiledLayer(preCompiledDescriptor, std::move(compiledBlobPtr), backend, name);
374}
375
377 const char* name)
378{
379 return pNetworkImpl->AddActivationLayer(activationDescriptor, name);
380}
381
383 const char* name)
384{
385 return pNetworkImpl->AddNormalizationLayer(normalizationDescriptor, name);
386}
387
388IConnectableLayer* INetwork::AddSliceLayer(const SliceDescriptor& sliceDescriptor, const char* name)
389{
390 return pNetworkImpl->AddSliceLayer(sliceDescriptor, name);
391}
393 const char* name)
394{
395 return pNetworkImpl->AddSoftmaxLayer(softmaxDescriptor, name);
396}
397
399 const char* name)
400{
401 return pNetworkImpl->AddSplitterLayer(splitterDescriptor, name);
402}
403
405{
406 return pNetworkImpl->AddMergeLayer(name);
407}
408
410{
412 return pNetworkImpl->AddAdditionLayer(name);
414}
415
417{
419 return pNetworkImpl->AddMultiplicationLayer(name);
421}
422
424 const ConstTensor& mean,
425 const ConstTensor& variance,
426 const ConstTensor& beta,
427 const ConstTensor& gamma,
428 const char* name)
429{
430 return pNetworkImpl->AddBatchNormalizationLayer(desc, mean, variance, beta, gamma, name);
431}
432
434{
435 return pNetworkImpl->AddRankLayer(name);
436}
437
439 const char* name)
440{
441 return pNetworkImpl->AddResizeLayer(resizeDescriptor, name);
442}
443
445 const char* name)
446{
447 return pNetworkImpl->AddReduceLayer(reduceDescriptor, name);
448}
449
451 const char* name)
452{
453 return pNetworkImpl->AddInstanceNormalizationLayer(desc, name);
454}
455
457 const char* name)
458{
459 return pNetworkImpl->AddL2NormalizationLayer(desc, name);
460}
461
463 const char* name)
464{
465 return pNetworkImpl->AddLogSoftmaxLayer(logSoftmaxDescriptor, name);
466}
467
469 const char* name)
470{
471 return pNetworkImpl->AddConstantLayer(input, name);
472}
473
475 const char* name)
476{
477 return pNetworkImpl->AddReshapeLayer(reshapeDescriptor, name);
478}
479
481 const char* name)
482{
483 return pNetworkImpl->AddSpaceToBatchNdLayer(spaceToBatchNdDescriptor, name);
484}
485
487 const char* name)
488{
489 return pNetworkImpl->AddSpaceToDepthLayer(spaceToDepthDescriptor, name);
490}
491
493{
494 return pNetworkImpl->AddFloorLayer(name);
495}
497{
498 return pNetworkImpl->AddOutputLayer(id, name);
499}
500
502 const LstmInputParams& params,
503 const char* name)
504{
505 return pNetworkImpl->AddLstmLayer(descriptor, params, name);
506}
507
509{
511 return pNetworkImpl->AddDivisionLayer(name);
513}
514
516{
518 return pNetworkImpl->AddSubtractionLayer(name);
520}
521
523{
525 return pNetworkImpl->AddMaximumLayer(name);
527}
528
529IConnectableLayer* INetwork::AddMeanLayer(const MeanDescriptor& meanDescriptor, const char* name)
530{
531 return pNetworkImpl->AddMeanLayer(meanDescriptor, name);
532}
533
535 const char* name)
536{
537 return pNetworkImpl->AddPadLayer(padDescriptor, name);
538}
539
541{
542 return pNetworkImpl->AddQuantizeLayer(name);
543}
544
546 const char* name)
547{
548 return pNetworkImpl->AddStridedSliceLayer(stridedSliceDescriptor, name);
549}
550
552{
554 return pNetworkImpl->AddMinimumLayer(name);
556}
557
559 const char* name)
560{
561 return pNetworkImpl->AddGatherLayer(descriptor, name);
562}
563
565{
566 return pNetworkImpl->AddGatherNdLayer(name);
567}
568
570{
571 return pNetworkImpl->AddSwitchLayer(name);
572}
573
575{
576 return pNetworkImpl->AddPreluLayer(name);
577}
578
580 const ConstTensor& weights,
581 const Optional<ConstTensor>& biases,
582 const char* name)
583{
584 return pNetworkImpl->AddTransposeConvolution2dLayer(descriptor, weights, biases, name);
585}
586
588 const char* name)
589{
590 return pNetworkImpl->AddTransposeLayer(transposeDescriptor, name);
591}
592
594{
595 return pNetworkImpl->AddShapeLayer(name);
596}
597
599 const char* name)
600{
601 return pNetworkImpl->AddStackLayer(descriptor, name);
602}
603
605 const char* name)
606{
607 return pNetworkImpl->AddStandInLayer(descriptor, name);
608}
609
611 const char* name)
612{
613 return pNetworkImpl->AddQuantizedLstmLayer(params, name);
614}
615
617 const LstmInputParams& params,
618 const char* name)
619{
620 return pNetworkImpl->AddQLstmLayer(descriptor, params, name);
621}
622
624 const char* name)
625{
626 return pNetworkImpl->AddLogicalBinaryLayer(descriptor, name);
627}
628
630 const UnidirectionalSequenceLstmDescriptor& descriptor,
631 const LstmInputParams& params,
632 const char* name)
633{
634 return pNetworkImpl->AddUnidirectionalSequenceLstmLayer(descriptor, params, name);
635}
636
638 const char* name)
639{
640 return pNetworkImpl->AddChannelShuffleLayer(descriptor, name);
641}
642
644 const char* name)
645{
646 return pNetworkImpl->AddBatchMatMulLayer(descriptor, name);
647}
648
650{
651 return pNetworkImpl->AddReverseV2Layer(name);
652}
653
655 const char *name)
656{
657 return pNetworkImpl->AddTileLayer(descriptor, name);
658}
659
661 const char* name)
662{
663 return pNetworkImpl->AddBroadcastToLayer(descriptor, name);
664}
665
667 const char *name)
668{
669 return pNetworkImpl->AddScatterNdLayer(descriptor, name);
670}
671
673{
674 return pNetworkImpl->ExecuteStrategy(strategy);
675}
676
678{
679 return new INetwork(networkOptions);
680}
681
683{
684 return INetworkPtr(CreateRaw(networkOptions), &INetwork::Destroy);
685}
686
688{
689 delete network;
690}
691
694
695IOptimizedNetwork::IOptimizedNetwork(std::unique_ptr<Graph> graph)
696 : pOptimizedNetworkImpl(new OptimizedNetworkImpl(std::move(graph))) {}
697
698IOptimizedNetwork::IOptimizedNetwork(std::unique_ptr<OptimizedNetworkImpl> impl)
699 : pOptimizedNetworkImpl(std::move(impl)) {}
700
701IOptimizedNetwork::IOptimizedNetwork(std::unique_ptr<Graph> graph, const ModelOptions& modelOptions)
702 : pOptimizedNetworkImpl(new OptimizedNetworkImpl(std::move(graph), modelOptions)) {}
703
705
707{
708 delete network;
709}
710
712{
713 return pOptimizedNetworkImpl->PrintGraph();
714}
715
716Status IOptimizedNetwork::SerializeToDot(std::ostream& stream) const
717{
718 return pOptimizedNetworkImpl->SerializeToDot(stream);
719}
720
721const std::shared_ptr<IProfiler>& IOptimizedNetwork::GetProfiler() const
722{
723 return pOptimizedNetworkImpl->GetGraph().GetProfiler();
724}
725
726arm::pipe::ProfilingGuid IOptimizedNetwork::GetGuid() const
727{
728 return pOptimizedNetworkImpl->GetGuid();
729}
730
732{
733 return pOptimizedNetworkImpl->GetNumInputs();
734}
735
737{
738 return pOptimizedNetworkImpl->GetNumOutputs();
739}
740
742{
743 m_Graph->Print();
744 return Status::Success;
745}
746
748{
749 return m_Graph->SerializeToDot(stream);
750}
751
753{
754 return m_Graph->GetNumInputs();
755}
756
758{
759 return m_Graph->GetNumOutputs();
760}
761
762void ReportError(const std::string& errorMessage,
763 Optional<std::vector<std::string>&> errorMessages)
764{
765 std::stringstream fullErrorMessage;
766 fullErrorMessage << "ERROR: " << errorMessage;
767 ARMNN_LOG(warning) << fullErrorMessage.str();
768 if (errorMessages)
769 {
770 errorMessages.value().push_back(fullErrorMessage.str());
771 }
772}
773
774void ReportWarning(const std::string& warningMessage,
775 Optional<std::vector<std::string>&> warningMessages)
776{
777 std::stringstream fullWarningMessage;
778 fullWarningMessage << "WARNING: " << warningMessage;
779 ARMNN_LOG(warning) << fullWarningMessage.str();
780 if (warningMessages)
781 {
782 warningMessages.value().push_back(fullWarningMessage.str());
783 }
784}
785
786// Given an OptimizationResult, build and add an error message to the errMessages vector. Then return the result.
788 const Layer* layer,
789 const BackendSettings& backendSettings,
790 Optional<std::vector<std::string>&> errMessages)
791{
792 std::stringstream failureMsg;
793 failureMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType())
794 << " is not supported on any preferred backend " << backendSettings.m_PreferredBackends;
795 ReportError(failureMsg.str(), errMessages);
796
797 res.m_Error = true;
798 return res;
799}
800
801bool CheckScaleSetOnQuantizedType(Layer* layer, Optional<std::vector<std::string>&> errMessages)
802{
803 bool noErrors = true;
804 unsigned int numOutputs = layer->GetNumOutputSlots();
805 for (unsigned int i = 0; i < numOutputs; i++) {
806 OutputSlot& outputSlot = layer->GetOutputSlot(i);
807 TensorInfo info = outputSlot.GetTensorInfo();
808 auto quantizationDataType = info.GetDataType();
809 auto quantizationScales = info.GetQuantizationScales();
810 // For any Quantized Tensor ensure scale(s) are set
811 switch(quantizationDataType) {
816 if ((quantizationDataType == DataType::QAsymmU8 || quantizationDataType == DataType::QAsymmS8)
817 && info.HasPerAxisQuantization()) {
818 throw InvalidArgumentException("Per Axis Quantization is not supported in "
819 "Asymmetric Quantization Datatype.");
820 }
821 // Softmax under QuantisedAsymm8 must always be scale (1.0f/256.0f) and offset 0
822 if (!info.HasPerAxisQuantization() && quantizationDataType == DataType::QAsymmU8 &&
823 (info.GetQuantizationScale() != (1.0f / 256.0f) ||
824 info.GetQuantizationOffset() != 0) &&
826 std::stringstream ss;
827 ss << "Quantization parameters for Softmax layer (Scale: " <<
828 info.GetQuantizationScale() << " and Offset: " << info.GetQuantizationOffset() <<
829 ") are incorrect and have been updated to Scale: 0.00390625 and Offset: 0";
830 ARMNN_LOG(warning) << ss.str();
831 info.SetQuantizationScale((1.0f / 256.0f));
832 info.SetQuantizationOffset(0);
833 outputSlot.SetTensorInfo(info);
834 ReportError(ss.str(), errMessages);
835 }
836 break;
837 default:
838 break;
839 }
840 }
841 return noErrors;
842}
843
845 Graph& graph,
846 Layer* layer,
847 BackendId backend,
848 DataType dataTypeIn,
849 DataType dataTypeOut,
850 const std::vector<BackendId>& availablePreferredBackends,
851 std::string& reasonIfUnsupported,
852 Optional<std::vector<std::string>&> messages)
853{
854 OptimizationResult result;
855
856 // Helper lambda to compose meaningful error message before returning with error
857 auto ReturnError = [&](const Layer* layer)
858 {
859 return ReturnWithError(result, layer, backendSettings, messages);
860 };
861
862 // need to set the compute device on the layer
863 // before we can check if it is supported
864 layer->SetBackendId(backend);
865 std::string currentReasonIfUnsupported;
866
867 // To run FP16 operations on CpuAcc we need at least v8.2 architecture. If the available architecture
868 // is older than v8.2, we can check if the operator is supported by changing operator inputs & outputs
869 // to be FP32 and inserting convert layers around the FP32 operator.
870 bool isLayerSupported = IWorkloadFactory::IsLayerSupported(*layer, EmptyOptional(), currentReasonIfUnsupported);
871 reasonIfUnsupported += currentReasonIfUnsupported;
872 if (!isLayerSupported && HasCapability("AllOrNothing", backend))
873 {
874 // It has the capability but is it set to true?
875 if (GetCapability("AllOrNothing", backend).value().GetValue().AsBool())
876 {
877 // This is when a backend says it must execute all layers in a model. We'll report a message to say the
878 // backend will be ignored for the rest of this subgraph.
879 std::stringstream fullWarningMessage;
880 fullWarningMessage << "Backend: " << backend
881 << " has \"AllOrNothing\" enabled. A layer of type "
882 << GetLayerTypeAsCString(layer->GetType()) << " reports that it is not supported. "
883 << "This backend will not be considered to execute this subgraph.";
884 reasonIfUnsupported.append(fullWarningMessage.str());
885 // Also add it to the messages if they exist.
886 ReportWarning(fullWarningMessage.str(), messages);
887 result.m_Warning = true;
888 return result;
889 }
890 }
891 // This string matches the error message that is produced by acl when attempting to run FP16 kernels on
892 // a cpu or build that does not have fp16 support. We use this to check if we should add
893 // conversion layers or not.
894 std::string checkStr = "This CPU architecture does not support F16 data type, you need v8.2 or above";
895 if (!isLayerSupported || currentReasonIfUnsupported.find(checkStr) != std::string::npos)
896 {
897 if (dataTypeIn == DataType::Float16 || dataTypeOut == DataType::Float16)
898 {
899 if (IWorkloadFactory::IsLayerSupported(*layer, DataType::Float32, reasonIfUnsupported)
902 {
903 auto ConstantLayerFromFp16ToFp32 = [](Layer& layer)
904 {
905 if (layer.GetType() == LayerType::Constant)
906 {
907 ConstantLayer* constantLayer = PolymorphicDowncast<ConstantLayer*>(&layer);
908
909 auto& info = constantLayer->m_LayerOutput->GetTensorInfo();
910
911 if (info.GetDataType() == DataType::Float16)
912 {
913 std::vector<float> newValues(info.GetNumElements());
914
916 constantLayer->m_LayerOutput->GetConstTensor<Half>(),
917 info.GetNumElements(),
918 newValues.data());
919
920 TensorInfo newInfo(info);
922 ConstTensor newInput(newInfo, newValues);
923 constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
924
925 layer.GetOutputSlot(0).SetTensorInfo(newInfo);
926 }
927 }
928 };
929
930 bool checkType = false;
931
932 for (auto inputSlot : layer->GetInputSlots())
933 {
934 auto connectedOutputSlot = inputSlot.GetConnectedOutputSlot();
935 if (connectedOutputSlot->GetOwningLayer().GetType() == LayerType::Constant)
936 {
937 if (connectedOutputSlot->GetNumConnections() == 1)
938 {
939 checkType = true;
940 ConstantLayerFromFp16ToFp32(connectedOutputSlot->GetOwningLayer());
941 }
942 }
943 }
944
945 // Insert FP16 -> FP32 conversion layer before current layer
946 std::vector<ConvertFp16ToFp32Layer*> convertFp16ToFp32Layers;
947 if (dataTypeIn == DataType::Float16)
948 {
949 convertFp16ToFp32Layers =
950 InsertConvertFp16ToFp32LayersBefore(graph, *layer, checkType);
951 }
952
953 // Insert FP32 -> FP16 conversion layer after current layer
954 std::vector<ConvertFp32ToFp16Layer*> convertFp32ToFp16Layers;
955 if (dataTypeOut == DataType::Float16)
956 {
957 convertFp32ToFp16Layers =
959 }
960
961 // Assign a supported backend to the newly introduced conversion layers
962 auto AssignFirstSupportedBackend = [&](Layer* layer, BackendId preferredBackend)
963 {
964 bool supportedBackendFound = false;
965 std::string reasonIfUnsupported;
966
967 // Try preferred backend first
968 layer->SetBackendId(preferredBackend);
971 reasonIfUnsupported))
972 {
973 supportedBackendFound = true;
974 }
975 else
976 {
977 for (const auto& backend : availablePreferredBackends)
978 {
979 // Skip preferred backend (we already determined that it is not supported)
980 if (backend == preferredBackend)
981 {
982 continue;
983 }
984
985 layer->SetBackendId(backend);
988 reasonIfUnsupported))
989 {
990 supportedBackendFound = true;
991 break;
992 }
993 }
994 }
995
996 return supportedBackendFound;
997 };
998
999 for (ConvertFp16ToFp32Layer* convertLayer : convertFp16ToFp32Layers)
1000 {
1001 if (!AssignFirstSupportedBackend(convertLayer, backend))
1002 {
1003 return ReturnError(convertLayer);
1004 }
1005 }
1006
1007 for (ConvertFp32ToFp16Layer* convertLayer : convertFp32ToFp16Layers)
1008 {
1009 if (!AssignFirstSupportedBackend(convertLayer, backend))
1010 {
1011 return ReturnError(convertLayer);
1012 }
1013 }
1014
1015 return result;
1016 }
1017 }
1018
1019 std::stringstream warningMsg;
1020 warningMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType())
1021 << " is not supported on requested backend " << layer->GetBackendId().Get()
1022 << " for input data type " << GetDataTypeName(dataTypeIn)
1023 << " and output data type " << GetDataTypeName(dataTypeOut)
1024 << " (reason: " << reasonIfUnsupported
1025 << "), falling back to the next backend.";
1026 ReportWarning(warningMsg.str(), messages);
1027
1028 return OptimizationResult(true, false);
1029 }
1030 else
1031 {
1032 return result;
1033 }
1034}
1035
1036inline std::vector<DataType> GetLayerInOutDatatype(const Layer* layer)
1037{
1038 DataType dataTypeIn = layer->GetNumInputSlots() == 0 ? DataType::Float32 :
1040 DataType dataTypeOut = layer->GetNumOutputSlots() == 0 ? DataType::Float32 :
1042 return {dataTypeIn, dataTypeOut};
1043}
1044
1046 const std::vector<BackendId>& availablePreferredBackends)
1047{
1048 bool hasFp16 = false;
1049 // Check if the first preferred backend has FP16 support
1050 auto firstBackend = availablePreferredBackends[0];
1051 auto backendObjPtr = backends.find(firstBackend)->second.get();
1052
1053 auto hasFp16Capability = BackendOptions::BackendOption{"HasFp16", true};
1054 auto backendCapabilities = backendObjPtr->GetCapabilities();
1055
1056 if (HasMatchingCapability(hasFp16Capability, backendCapabilities))
1057 {
1058 // First preferred backend has FP16 support. Enable reduce FP32 to FP16 when fp16-turbo-mode is enabled.
1059 hasFp16 = true;
1060 ARMNN_LOG(debug) << "The first available preferred backend: " << firstBackend
1061 << ", has FP16 support.";
1062 }
1063 else
1064 {
1065 ARMNN_LOG(debug) << "The first available preferred backend: " << firstBackend
1066 << ", does not have FP16 support. "
1067 << "The FP16 turbo mode option will be disable. It will run using FP32.";
1068 }
1069
1070 // Check if the rest of the available preferred backends have FP16 support
1071 for (size_t i = 1; i < availablePreferredBackends.size(); ++i)
1072 {
1073 auto backend = availablePreferredBackends[i];
1074 backendObjPtr = backends.find(backend)->second.get();
1075 backendCapabilities = backendObjPtr->GetCapabilities();
1076 if (!HasMatchingCapability(hasFp16Capability, backendCapabilities))
1077 {
1078 ARMNN_LOG(debug) << "Next preferred backend: " << backend << ", does not have FP16 support. "
1079 << "It will run using FP32 when falling back to this backend.";
1080 }
1081 else
1082 {
1083 ARMNN_LOG(debug) << "Next preferred backend: " << backend << ", has FP16 support.";
1084 }
1085 }
1086
1087 return hasFp16;
1088}
1089
1090// Refactor to allow passing the IConnectableLayer* rather than Layer Iterator
1091// on Graph and SubgraphView which are different types.
1094 Optional<std::vector<std::string>&> errMessages,
1095 OptimizationResult& result,
1096 BackendSettings& backendSettings,
1097 std::vector<BackendId>& availablePreferredBackends,
1098 bool& restart)
1099{
1100 auto ReturnError = [&](const Layer* layer)
1101 {
1102 return ReturnWithError(result, layer, backendSettings, errMessages);
1103 };
1104
1105 auto layer = PolymorphicDowncast<Layer*>(it);
1106
1107 if (layer->GetType() == LayerType::Input)
1108 {
1109 return;
1110 }
1111
1112 std::vector<DataType> inOutDataType = GetLayerInOutDatatype(layer);
1113
1114 std::string reasonIfUnsupported;
1115 bool found = false;
1116 if (!CheckScaleSetOnQuantizedType(layer, errMessages))
1117 {
1118 // don't bomb immediately, find all the quantized outputs
1119 // which haven't had a scale set and report them all back.
1120 result.m_Error = true;
1121 }
1122
1123 // First try assign layer to hint backend
1124 if (layer->GetBackendHint().has_value() &&
1125 backendSettings.IsBackendSupported(layer->GetBackendHint().value()) &&
1126 AttemptBackendAssignment(backendSettings,
1127 optNetObjPtr->GetGraph(),
1128 layer,
1129 layer->GetBackendHint().value(),
1130 inOutDataType[0],
1131 inOutDataType[1],
1132 availablePreferredBackends,
1133 reasonIfUnsupported,
1134 errMessages).IsOk())
1135 {
1136 found = true;
1137 backendSettings.m_SelectedBackends.insert(layer->GetBackendHint().value());
1138 }
1139 else
1140 {
1141 // Try assign layer to preferred list of backends
1142 for (const auto& backend : availablePreferredBackends)
1143 {
1144 if (layer->GetBackendHint().has_value() &&
1145 layer->GetBackendHint().value() == backend)
1146 {
1147 continue; //Don't re-test the backend hint
1148 }
1149
1150 OptimizationResult res = AttemptBackendAssignment(backendSettings,
1151 optNetObjPtr->GetGraph(),
1152 layer,
1153 backend,
1154 inOutDataType[0],
1155 inOutDataType[1],
1156 availablePreferredBackends,
1157 reasonIfUnsupported,
1158 errMessages);
1159
1160 if (res.IsOk())
1161 {
1162 found = true;
1163 backendSettings.m_SelectedBackends.insert(backend);
1164 break;
1165 }
1166 else if (res.IsError())
1167 {
1168 result = res; // Cannot continue.
1169 // Note: we don't need to log the error as it would already
1170 // be logged in AttemptBackendAssignment().
1171 }
1172 else if (res.IsWarningOnly())
1173 {
1174 // Does the warning message relate to an AllOrNothing backend saying it rejects the subgraph?
1175 if (reasonIfUnsupported.find("AllOrNothing") != std::string::npos)
1176 {
1177 // Layer not supported by all or nothing backend. Add this backend to the ignore list and
1178 // indicate that the backend search should restart.
1179 backendSettings.m_IgnoredBackends.insert(backend);
1180 restart = true;
1181 return;
1182 }
1183 }
1184 }
1185 }
1186
1187 // If the layer is unsupported by any devices, log and return a null network.
1188 if (!found)
1189 {
1190 // NOTE: if the layer is not an operation queue type AND we have not got CpuRef as a
1191 // fallback we should set the compute device on the layer to CpuRef (these are not
1192 // available as accelerated operations, or are only available under certain
1193 // conditions, currently they comprise MemCopy, Constant, Permute)
1194 armnn::LayerType layerType = layer->GetType();
1195 if (!backendSettings.IsCpuRefUsed() && (layerType == armnn::LayerType::MemCopy ||
1196 layerType == armnn::LayerType::Constant ||
1197 layerType == armnn::LayerType::Permute))
1198 {
1199 BackendId cpuBackendId(armnn::Compute::CpuRef);
1200 layer->SetBackendId(cpuBackendId);
1201 backendSettings.m_SelectedBackends.insert(cpuBackendId);
1202 }
1203 else
1204 {
1205 result = ReturnError(layer);
1206 }
1207 }
1208
1209}
1210
1212 BackendSettings& backendSettings,
1213 Graph::Iterator& firstLayer,
1214 Graph::Iterator& lastLayer,
1215 Optional<std::vector<std::string>&> errMessages)
1216{
1217 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_AssignBackends");
1218 OptimizationResult result;
1219
1220 bool restart = false;
1221 BackendIdVector availablePreferredBackends;
1222 for (auto it = firstLayer; it != lastLayer; it = (restart ? firstLayer : ++it))
1223 {
1224 if (it == firstLayer)
1225 {
1226 availablePreferredBackends = backendSettings.GetAvailablePreferredBackends();
1227 if (availablePreferredBackends.empty())
1228 {
1229 ReportError("No preferred backends are available", errMessages);
1230 result.m_Error = true;
1231 return result;
1232 }
1233 }
1234 // In the case where we've set restart it must be reset before we continue looking at backends.
1235 if (restart)
1236 {
1237 restart = false;
1238 }
1239 AssignBackendsIConnectable(optNetObjPtr,
1240 *it,
1241 errMessages,
1242 result,
1243 backendSettings,
1244 availablePreferredBackends,
1245 restart);
1246 }
1247
1248 for (auto it = firstLayer; it != lastLayer; ++it)
1249 {
1250 auto layer = PolymorphicDowncast<Layer*>(*it);
1251 std::vector<DataType> inOutDataType = GetLayerInOutDatatype(layer);
1252
1253 // In AttemptBackendAssignment() we check:
1254 // - if input/output datatypes of the layer are float16
1255 // - if the layer is supported with these datatypes
1256 // If the layer is not supported (failing on ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED() in clframework),
1257 // we attempt to insert convertion layers either side of the new fp32 layer.
1258 bool isFloat16 = false;
1259 for (auto type : inOutDataType)
1260 {
1261 if (type == DataType::Float16)
1262 {
1263 isFloat16 = true;
1264 break;
1265 }
1266 }
1267
1268 if (layer->GetBackendId() == "Unknown" || isFloat16)
1269 {
1270 AssignBackendsIConnectable(optNetObjPtr,
1271 *it,
1272 errMessages,
1273 result,
1274 backendSettings,
1275 availablePreferredBackends,
1276 restart);
1277 }
1278 }
1279
1280 for (auto it = firstLayer; it != lastLayer; ++it)
1281 {
1282 auto layer = PolymorphicDowncast<Layer*>(*it);
1283
1284 if(layer->GetType() == LayerType::Input)
1285 {
1286 BackendId connectedBackendId = layer->GetOutputSlot(0).GetConnection(0)->GetOwningLayer().GetBackendId();
1287 layer->SetBackendId(connectedBackendId);
1288 }
1289 }
1290
1291 return result;
1292}
1293
1295 BackendSettings& backendSettings,
1298 Optional<std::vector<std::string>&> errMessages)
1299{
1300 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_AssignBackends");
1301 OptimizationResult result;
1302
1303 auto availablePreferredBackends = backendSettings.GetAvailablePreferredBackends();
1304 if (availablePreferredBackends.empty())
1305 {
1306 std::stringstream failureMsg;
1307 failureMsg << "No preferred backends are available";
1308 ReportError(failureMsg.str(), errMessages);
1309
1310 result.m_Error = true;
1311 return result;
1312 }
1313
1314 bool restart = false;
1315 for (auto it = firstLayer; it != lastLayer; ++it)
1316 {
1317 AssignBackendsIConnectable(optNetObjPtr,
1318 *it,
1319 errMessages,
1320 result,
1321 backendSettings,
1322 availablePreferredBackends,
1323 restart);
1324 }
1325
1326 for (auto it = firstLayer; it != lastLayer; ++it)
1327 {
1328 auto layer = PolymorphicDowncast<Layer*>(*it);
1329
1330 if(layer->GetType() == LayerType::Input)
1331 {
1332 BackendId connectedBackendId = layer->GetOutputSlot(0).GetConnection(0)->GetOwningLayer().GetBackendId();
1333 layer->SetBackendId(connectedBackendId);
1334 }
1335 }
1336
1337 return result;
1338}
1339
1341 BackendSettings& backendSettings,
1342 SubgraphView& subgraph,
1343 Optional<std::vector<std::string>&> errMessages)
1344{
1345 SubgraphView::IConnectableLayerIterator firstLayer = subgraph.begin();
1346 SubgraphView::IConnectableLayerIterator lastLayer = subgraph.end();
1347 return AssignBackends(optNetObjPtr,
1348 backendSettings,
1349 firstLayer,
1350 lastLayer,
1351 errMessages);
1352}
1353
1355 BackendSettings& backendSettings)
1356{
1357 BackendsMap backends;
1358 auto const& backendRegistry = BackendRegistryInstance();
1359 for (auto&& selectedBackend : backendSettings.m_SupportedBackends)
1360 {
1361 auto backendFactory = backendRegistry.GetFactory(selectedBackend);
1362 auto backendObjPtr = backendFactory();
1363
1364 backendObjPtr->RegisterTensorHandleFactories(handleFactoryRegistry);
1365
1366 backends[backendObjPtr->GetId()] = std::move(backendObjPtr);
1367 }
1368
1369 return backends;
1370}
1371
1373 BackendSettings& backendSettings,
1374 BackendsMap& backends,
1375 const ModelOptions& modelOptions,
1376 Optional<std::vector<std::string>&> errMessages)
1377{
1378 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ApplyBackendOptimizations")
1379 OptimizationResult result;
1380
1381 // Get the optimized graph
1382 Graph& optGraph = optNetObjPtr->GetGraph();
1383
1384 // Run backend specific optimizations
1385 for (auto&& selectedBackend : backendSettings.m_SelectedBackends)
1386 {
1387 auto backendObjPtr = backends.find(selectedBackend)->second.get();
1388 if (!backendObjPtr)
1389 {
1390 throw armnn::NullPointerException("backendObjPtr must not be null.");
1391 }
1392
1393
1394 if (selectedBackend == armnn::Compute::GpuAcc || selectedBackend == armnn::Compute::CpuAcc)
1395 {
1401 }
1402
1403 // Select sub-graphs based on backend
1406 // Select layers assigned to the requested backend
1407 [&backendObjPtr](const Layer& layer)
1408 {
1409
1410 return layer.GetType() != LayerType::Input &&
1411 layer.GetType() != LayerType::Output &&
1412 layer.GetBackendId() == backendObjPtr->GetId();
1413 });
1414 if (subgraphs.empty())
1415 {
1416 // No sub-graphs found, try with next selected backend
1417 continue;
1418 }
1419
1420 // Try to optimize each sub-graph
1421 for (auto& subgraph : subgraphs)
1422 {
1423 // Try to optimize the current sub-graph
1424 ARMNN_SCOPED_PROFILING_EVENT(backendObjPtr->GetId(), "Optimizer_OptimizeSubgraph");
1425 OptimizationViews optimizationViews = backendObjPtr->OptimizeSubgraphView(*subgraph, modelOptions);
1426 if (!optimizationViews.Validate(*subgraph))
1427 {
1428 throw armnn::Exception("optimizationViews must have a valid subgraph.");
1429 }
1430
1431 // Optimization attempted, check the resulting optimized sub-graph
1432 for (auto& substitution : optimizationViews.GetSubstitutions())
1433 {
1434 // Sub-graph optimized, substitute the sub-graph with the new optimized one in the main optimized graph
1435 SubgraphView& replacementSubgraph = substitution.m_ReplacementSubgraph;
1436 SubgraphView& substitutableSubgraph = substitution.m_SubstitutableSubgraph;
1437 optGraph.SubstituteSubgraph(substitutableSubgraph, replacementSubgraph);
1438
1439 // Assign the current backend to the optimized sub-graph
1440 const SubgraphView::IConnectableLayers& subgraphLayers = replacementSubgraph.GetIConnectableLayers();
1441 std::for_each(subgraphLayers.begin(), subgraphLayers.end(), [&selectedBackend](IConnectableLayer* l)
1442 {
1443 PolymorphicDowncast<Layer*>(l)->SetBackendId(selectedBackend);
1444 });
1445 }
1446
1447 // Remove deleted sub-graphs
1448 for (auto& deletedSubgraph : optimizationViews.GetDeletedSubgraphs())
1449 {
1450 for (auto& l : deletedSubgraph.GetIConnectableLayers())
1451 {
1452 Layer* deletedLayer = PolymorphicDowncast<Layer*>(l);
1453 for (unsigned int in = deletedLayer->GetNumInputSlots(); in > 0; --in)
1454 {
1455 auto inputSlot = deletedLayer->GetInputSlot(in -1);
1456 OutputSlot* parentOut = inputSlot.GetConnectedOutputSlot();
1457 parentOut->Disconnect(inputSlot);
1458 for (unsigned int out = deletedLayer->GetOutputSlot(in -1).GetNumConnections(); out > 0; --out)
1459 {
1460 InputSlot* childIn = deletedLayer->GetOutputSlot(in - 1).GetConnection(out -1);
1461 deletedLayer->GetOutputSlot(in - 1).Disconnect(*childIn);
1462 parentOut->Connect(*childIn);
1463 }
1464 }
1465 optGraph.EraseLayer(deletedLayer);
1466 }
1467 }
1468
1469 if (!optimizationViews.GetFailedSubgraphs().empty())
1470 {
1471 std::stringstream warningMsg;
1472 warningMsg << "Some sub-graph(s) failed to optimized on " << backendObjPtr->GetId() << " backend.";
1473 ReportWarning(warningMsg.str(), errMessages);
1474
1475 // Failed to optimize the given sub-graph, re-assign the sub-graph layers to other available backends
1476 BackendSettings settingsCopy(backendSettings);
1477 if (!backendObjPtr->GetId().IsCpuRef())
1478 {
1479 // Add the current backend to the list of backends to ignore
1480 settingsCopy.m_IgnoredBackends.insert(backendObjPtr->GetId());
1481 }
1482
1483 int count=0;
1484 for (auto& failedSubgraph : optimizationViews.GetFailedSubgraphs())
1485 {
1486 // An error occurred: the optimization was attempted but not performed, try different backends
1487 std::stringstream subgraphMsg;
1488 subgraphMsg << "Re-assigning backends to " << failedSubgraph.GetIConnectableLayers().size()
1489 << " layers inside sub-graph " << count++;
1490 ReportWarning(subgraphMsg.str(), errMessages);
1491
1492 OptimizationResult reassignmentResult = AssignBackends(optNetObjPtr,
1493 settingsCopy,
1494 *subgraph,
1495 errMessages);
1496 if (reassignmentResult.m_Error)
1497 {
1498 // Failed to re-assign one of the remaining backends to each layer of the sub-graph
1499 result.m_Error = true;
1500 return result;
1501 }
1502 }
1503 }
1504 }
1505 }
1506
1507 return result;
1508}
1509
1513{
1514 if (src != dst)
1515 {
1516 ITensorHandleFactory* srcFactory = registry.GetFactory(src);
1517 ITensorHandleFactory* dstFactory = registry.GetFactory(dst);
1518
1519 if (srcFactory && dstFactory &&
1520 (srcFactory->GetExportFlags() & dstFactory->GetImportFlags()) != 0)
1521 {
1522 return false;
1523 }
1524 return true;
1525 }
1526 return false;
1527}
1528
1529// Find the handle factory for the input layer which results in fewest required copies.
1531 OutputSlot& slot,
1533 bool importEnabled)
1534{
1535 Layer& layer = slot.GetOwningLayer();
1536
1537 if (layer.GetType() != LayerType::Input)
1538 {
1539 throw armnn::Exception("layer must be of type \"Input\".");
1540 }
1541
1542 // Explicitly select the tensorhandle factory for InputLayer because the rules for it are slightly different. It
1543 // doesn't matter which backend it is assigned to because they all use the same implementation, which
1544 // requires Map/Unmap support. This means that, so long as the handle type supports map/unmap semantics, we can
1545 // select a factory with maximum compatibility with the layers connected to the InputLayer.
1546
1547 // First ensure the from backends can support the TensorHandeAPI
1548 auto frmBackend = backends.find(layer.GetBackendId());
1549 if (frmBackend == backends.end() ||
1550 !frmBackend->second->SupportsTensorAllocatorAPI())
1551 {
1553 }
1554
1555 // Go through all connections to the output slot and determine the TensorHandleFactory which results in the
1556 // fewest copies.
1557 std::map<ITensorHandleFactory::FactoryId, int> factoryScores;
1558 int topScore = 0;
1560
1561 for (auto&& connection : slot.GetConnections())
1562 {
1563
1564 const Layer& connectedLayer = connection->GetOwningLayer();
1565
1566 auto toBackend = backends.find(connectedLayer.GetBackendId());
1567 if (toBackend == backends.end())
1568 {
1569 throw armnn::Exception("Backend id not found for the connected layer");
1570 }
1571
1572 if (!toBackend->second.get()->SupportsTensorAllocatorAPI())
1573 {
1574 // The destination backend does not support the tensor allocator API, move to the next one
1575 continue;
1576 }
1577
1578 auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
1579 for (auto&& dst : dstPrefs)
1580 {
1581 // Input layers use the mem copy workload or import, so the selected factory must
1582 // support either the map/unmap API or Import API
1583 ITensorHandleFactory* factory = registry.GetFactory(dst);
1584 if (importEnabled && factory->GetImportFlags() == 0)
1585 {
1586 continue;
1587 }
1588 else if (!importEnabled && !factory->SupportsMapUnmap())
1589 {
1590 continue;
1591 }
1592
1593 auto it = factoryScores.find(dst);
1594 if (it == factoryScores.end())
1595 {
1596 // Add new score to the table
1597 factoryScores[dst] = 0;
1599 {
1600 topChoice = dst;
1601 }
1602 }
1603 else
1604 {
1605 // Increase the score
1606 factoryScores[dst]++;
1607
1608 // Track the best option
1609 if (factoryScores[dst] > topScore)
1610 {
1611 topScore = factoryScores[dst];
1612 topChoice = dst;
1613 }
1614 }
1615 }
1616 }
1617
1618 return topChoice;
1619}
1620
1621// Find the handle factory for the output layer which results in fewest required copies.
1629
1630// For all handle factories supported on the source backend, we wish to find the one which requires the fewest copies
1631// when considering all connections.
1633 OutputSlot& outputSlot,
1635 bool exportEnabled)
1636{
1637 // First ensure the from backends can support the TensorHandeAPI
1638 Layer& layer = outputSlot.GetOwningLayer();
1639 auto frmBackend = backends.find(layer.GetBackendId());
1640 if (frmBackend == backends.end() ||
1641 !frmBackend->second->SupportsTensorAllocatorAPI())
1642 {
1644 }
1645
1646 bool outputConnection = false;
1647 for (auto&& connection : outputSlot.GetConnections())
1648 {
1649 const Layer& connectedLayer = connection->GetOwningLayer();
1650 if (connectedLayer.GetType() == LayerType::Output)
1651 {
1652 outputConnection = true;
1653 }
1654 }
1655
1656 IBackendInternal* srcBackend = frmBackend->second.get();
1657 auto srcPrefs = srcBackend->GetHandleFactoryPreferences();
1658
1659 // Initialize the scores
1660 std::map<ITensorHandleFactory::FactoryId, int> factoryScores;
1661 for (auto&& pref : srcPrefs)
1662 {
1663 if (exportEnabled)
1664 {
1665 ITensorHandleFactory* factory = registry.GetFactory(pref);
1666 if (outputConnection)
1667 {
1668 // Check if this is fallback case
1669 bool fallbackConnection = false;
1670 for (auto&& inputSlot : layer.GetInputSlots())
1671 {
1672 if (inputSlot.GetConnectedOutputSlot()->GetOwningLayer().GetBackendId() != layer.GetBackendId())
1673 {
1674 fallbackConnection = true;
1675 }
1676 }
1677 if (fallbackConnection)
1678 {
1679 auto factoryCap = factory->GetCapabilities(&layer, &layer, CapabilityClass::FallbackImportDisabled);
1680 // Cannot use factory import if fallback import is not supported.
1681 if (!factoryCap.empty())
1682 {
1683 continue;
1684 }
1685 }
1686 else if (factory->GetExportFlags() == 0)
1687 {
1688 continue;
1689 }
1690 }
1691 if (!outputConnection)
1692 {
1693 auto factoryCap = factory->GetCapabilities(&layer, &layer, CapabilityClass::FallbackImportDisabled);
1694 // Cannot use factory import if fallback import is not supported.
1695 if (!factoryCap.empty())
1696 {
1697 continue;
1698 }
1699 }
1700
1701 }
1702 else
1703 {
1704 // Only consider factories that support map/unmap
1705 ITensorHandleFactory* factory = registry.GetFactory(pref);
1706 if (!factory->SupportsMapUnmap())
1707 {
1708 // The current tensor handle factory does not support the map/unmap strategy, move to the next one
1709 continue;
1710 }
1711 }
1712
1713
1714 auto it = factoryScores.find(pref);
1715 if (it == factoryScores.end())
1716 {
1717 // Add new score to the table
1718 factoryScores[pref] = 0;
1719 }
1720 }
1721
1722 // Score each handle factory based on how many times it requires copies on the slot connections
1723 for (auto&& connection : outputSlot.GetConnections())
1724 {
1725 const Layer& connectedLayer = connection->GetOwningLayer();
1726
1727 auto toBackend = backends.find(connectedLayer.GetBackendId());
1728 if (toBackend == backends.end())
1729 {
1730 throw armnn::Exception("Backend id not found for the connected layer");
1731 }
1732
1733 auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
1734 for (auto&& src : srcPrefs)
1735 {
1736 if (factoryScores.find(src) == factoryScores.end()) // Don't consider excluded factories
1737 {
1738 continue;
1739 }
1740
1741 for (auto&& dst : dstPrefs)
1742 {
1743 if (RequiresCopy(src, dst, registry))
1744 {
1745 // Copy avoided, increase the score
1746 factoryScores[src]++;
1747 break;
1748 }
1749 }
1750 }
1751 }
1752
1753 // Find the lowest score
1754 int minScore = std::numeric_limits<int>::max();
1755 for (auto it : factoryScores)
1756 {
1757 minScore = std::min(minScore, it.second);
1758 }
1759
1760 // Collect factories matching the best(lowest) score
1761 std::vector<ITensorHandleFactory::FactoryId> optimalFactories;
1762 for (auto it : factoryScores)
1763 {
1764 if (it.second == minScore)
1765 {
1766 optimalFactories.push_back(it.first);
1767 }
1768 }
1769
1770 // For all compatible Factories matching the best score, find the preferred one for the current layer.
1771 for (auto&& srcPref : srcPrefs)
1772 {
1773 for (auto&& comp : optimalFactories)
1774 {
1775 if (comp == srcPref)
1776 {
1777 return comp;
1778 }
1779 }
1780 }
1781
1783}
1784
1787 const Layer& layer,
1788 const Layer& connectedLayer,
1790 bool importEnabled)
1791{
1792 auto toBackend = backends.find(connectedLayer.GetBackendId());
1793 if (toBackend == backends.end())
1794 {
1795 throw armnn::Exception("Backend id not found for the connected layer");
1796 }
1797
1798 auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
1799
1800 // Legacy API check for backward compatibility
1801 if (srcFactoryId == ITensorHandleFactory::LegacyFactoryId || dstPrefs.empty())
1802 {
1803 if (layer.GetBackendId() != connectedLayer.GetBackendId())
1804 {
1806 }
1807 else
1808 {
1810 }
1811 }
1812
1813 // TensorHandleFactory API present, so perform more sophisticated strategies.
1814 // Dst Output layers don't require copy because they use import or map/unmap
1815 if (connectedLayer.GetType() == LayerType::Output)
1816 {
1818 }
1819
1820 // Search for direct match in prefs
1821 for (auto&& pref : dstPrefs)
1822 {
1823 if (pref == srcFactoryId)
1824 {
1826 }
1827 }
1828
1829 // Search for export/import options
1830 ITensorHandleFactory* srcFactory = registry.GetFactory(srcFactoryId);
1831 if (srcFactory->GetExportFlags() != 0 && importEnabled)
1832 {
1833 for (auto&& pref : dstPrefs)
1834 {
1835 ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
1836
1837 // Handles cases when a destPref is not listed in TensorHandleFactoryRegistry
1838 if (!dstFactory) {
1839 continue;
1840 }
1841 if ((dstFactory->GetImportFlags() & srcFactory->GetExportFlags()) != 0)
1842 {
1843 auto srcCapability = srcFactory->GetCapabilities(&layer, &layer, CapabilityClass::PaddingRequired);
1844 auto dstCapability = dstFactory->GetCapabilities(&connectedLayer,
1845 &connectedLayer,
1847 auto srcFallback = srcFactory->GetCapabilities(&layer, &layer, CapabilityClass::FallbackImportDisabled);
1848 auto dstFallback = dstFactory->GetCapabilities(&connectedLayer,
1849 &connectedLayer,
1851 // Do not require memory copy if the source and destination do not require padding.
1852 if (srcCapability.empty() && dstCapability.empty() && srcFallback.empty() && dstFallback.empty())
1853 {
1855 }
1856 }
1857 }
1858 }
1859
1860 // Search for copy options via map/unmap
1861 if (srcFactory->SupportsMapUnmap())
1862 {
1863 for (auto&& pref : dstPrefs)
1864 {
1865 ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
1866 if (dstFactory && dstFactory->SupportsMapUnmap())
1867 {
1869 }
1870 }
1871 }
1872
1874}
1875
1876// Select the TensorHandleFactories and the corresponding memory strategy
1878 BackendsMap& backends,
1880 bool importEnabled,
1881 bool exportEnabled,
1882 Optional<std::vector<std::string>&> errMessages)
1883{
1884 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_SelectTensorHandleStrategy");
1885 OptimizationResult result;
1886
1887 optGraph.ForEachLayer([&backends, &registry, &result, &errMessages, importEnabled, exportEnabled](Layer* layer)
1888 {
1889 // Lets make sure the backend is in our list of supported backends. Something went wrong during backend
1890 // assignment if this check fails
1891 if (backends.find(layer->GetBackendId()) == backends.end())
1892 {
1893 throw armnn::Exception("Backend id not found for the layer");
1894 }
1895
1896 // Check each output separately
1897 for (unsigned int slotIdx = 0; slotIdx < layer->GetNumOutputSlots(); slotIdx++)
1898 {
1899 OutputSlot& outputSlot = layer->GetOutputSlot(slotIdx);
1900
1902
1903 // Calculate the factory to use which results in the fewest copies being made.
1904 switch(layer->GetType())
1905 {
1906 case LayerType::Input:
1907 slotOption = CalculateSlotOptionForInput(backends, outputSlot, registry, importEnabled);
1908 break;
1909 case LayerType::Output:
1910 slotOption = CalculateSlotOptionForOutput(backends, outputSlot, registry);
1911 break;
1912 default:
1913 slotOption = CalculateSlotOption(backends, outputSlot, registry, exportEnabled);
1914 break;
1915 }
1916 outputSlot.SetTensorHandleFactory(slotOption);
1917
1918 // Now determine the "best" edge strategy for each connection given the slotOption.
1919 unsigned int connectionIdx = 0;
1920 for (auto&& connection : outputSlot.GetConnections())
1921 {
1922 const Layer& connectedLayer = connection->GetOwningLayer();
1923
1924 EdgeStrategy strategy = CalculateEdgeStrategy(backends, slotOption, *layer, connectedLayer,
1925 registry, importEnabled);
1926
1927 if (strategy == EdgeStrategy::Undefined)
1928 {
1929 result.m_Error = true;
1930 if (errMessages)
1931 {
1932 errMessages.value().emplace_back("Could not find valid strategy required for compatibility"
1933 " between backends.");
1934 }
1935 return;
1936 }
1937
1938 outputSlot.SetEdgeStrategy(connectionIdx, strategy);
1939
1940 connectionIdx++;
1941 }
1942 }
1943 });
1944
1945 return result;
1946}
1947
1948bool CheckFastMathSupport(const std::vector<BackendId>& availablePreferredBackends,
1949 const ModelOptions& modelOptions)
1950{
1951 bool hasFastMath = false;
1952 // Check if the first preferred backend has Fastmath support
1953 auto firstBackend = availablePreferredBackends[0];
1954 if (!modelOptions.empty())
1955 {
1956 ParseOptions(modelOptions, firstBackend, [&](std::string name, const BackendOptions::Var& value)
1957 {
1958 if (name == "FastMathEnabled")
1959 {
1960 hasFastMath = value.AsBool();
1961 ARMNN_LOG(debug) << "The first available preferred backend: " << firstBackend
1962 << ", has FastMath support.";
1963 }
1964 });
1965 }
1966 else
1967 {
1968 ARMNN_LOG(warning) << "The first available preferred backend: " << firstBackend
1969 << ", does not have FastMath support. "
1970 << "Support for Turbo mode for TfLite post quantized FP16 models wil be disabled.";
1971 }
1972
1973 return hasFastMath;
1974}
1975
1976bool IsTfLiteTurboModel(const Graph& optGraph)
1977{
1978 // We will define a TfLiteTurboModel as follows:
1979 // All constant layers which are followed by a dequantize layer convert from Fp16 to FP32
1980 Graph::ConstIterator firstLayer = optGraph.begin();
1981 Graph::ConstIterator lastLayer = optGraph.end();
1982 // There must be at least one constant layer to dequantize layer converting from FP16 to Fp32
1983 bool atLeastOneDequantizeEncountered = false;
1984 for (auto it = firstLayer; it != lastLayer; ++it)
1985 {
1986 auto layer = *it;
1987 if (layer->GetType() == LayerType::Constant)
1988 {
1989 auto& connectedLayer = layer->GetOutputSlot(0).GetConnection(0)->GetOwningLayer();
1990 if (connectedLayer.GetType() == LayerType::Dequantize)
1991 {
1992 if(!(connectedLayer.GetInputSlot(0).GetTensorInfo().GetDataType() == DataType::Float16 &&
1993 connectedLayer.GetOutputSlot(0).GetTensorInfo().GetDataType() == DataType::Float32))
1994 {
1995 return false;
1996 }
1997 else
1998 {
1999 atLeastOneDequantizeEncountered = true;
2000 }
2001 }
2002 }
2003 }
2004 if (!atLeastOneDequantizeEncountered)
2005 {
2006 return false;
2007 }
2008 return true;
2009}
2010
2011
2012// Forwarding function to remain backward compatible with legacy OptimizerOptions
2014 const std::vector<BackendId>& backendPreferences,
2015 const IDeviceSpec& deviceSpec,
2016 const OptimizerOptions& options,
2017 Optional<std::vector<std::string>&> messages)
2018{
2019 return Optimize(inGraph,
2020 backendPreferences,
2021 deviceSpec,
2022 OptimizerOptionsOpaque(options),
2023 messages);
2024}
2025
2027 const std::vector<BackendId>& backendPreferences,
2028 const IDeviceSpec& deviceSpec,
2029 const OptimizerOptionsOpaque& options,
2030 Optional<std::vector<std::string>&> messages)
2031{
2032 ARMNN_LOG(debug) << options.ToString();
2033
2034 // Enable profiling
2035 auto profiler = inGraph.GetProfiler();
2037 profiler->EnableProfiling(options.GetProfilingEnabled());
2038
2040 if (backendPreferences.empty())
2041 {
2042 throw InvalidArgumentException("Invoked Optimize with no backends specified");
2043 }
2044
2045 if (options.GetReduceFp32ToBf16())
2046 {
2047 throw InvalidArgumentException("BFloat16 optimization is currently ignored. In order to use Bf16 optimization "
2048 "Please use the FastMathEnabled backend option for CpuAcc or GpuAcc.");
2049 }
2050
2051 if (options.GetReduceFp32ToFp16() && options.GetReduceFp32ToBf16())
2052 {
2053 throw InvalidArgumentException("BFloat16 and Float16 optimization cannot be enabled at the same time.");
2054 }
2055
2056 // Ensure TensorInfo is set on all output slots of ConstantLayers in the graph
2058
2059 std::unique_ptr<Graph> graph = std::make_unique<Graph>(inGraph);
2060
2061 // We need to pass on the information about whether import and export is enabled to the LoadNetwork phase.
2062 // The mechanism to do that is to add model options to the optimized network.
2063 armnn::BackendOptions importExport("Global",
2064 {{"ImportEnabled", options.GetImportEnabled()},
2065 {"ExportEnabled", options.GetExportEnabled()}});
2066 ModelOptions optimizedOptions(options.GetModelOptions());
2067 optimizedOptions.push_back(importExport);
2068
2069 auto optNet = IOptimizedNetworkPtr(new IOptimizedNetwork(std::move(graph), optimizedOptions),
2071
2072 IOptimizedNetwork* optNetObjPtr = optNet.get();
2073
2074 // Get the optimized graph
2075 Graph& optGraph = optNetObjPtr->pOptimizedNetworkImpl->GetGraph();
2076
2078 {
2079 // Infer the tensor infos for all output slots. Throws an exception on failure
2080 optGraph.InferTensorInfos();
2081 }
2082
2083 using namespace optimizations;
2084 // Substitute Max + Min with Bounded Relu before AddBroadcastReshapeLayer optimisation,
2085 // as Bounded ReLu needs the constants to be 1D size 1
2086 Optimizer::Pass(optGraph, MakeOptimizations(MaxMinIntoBoundedRelu()));
2087
2088 // Perform BroadcastToOptimizationLayer before AddBroadcastReshapeLayer optimisation
2089 Optimizer::Pass(optGraph, MakeOptimizations(BroadcastToOptimizationLayer()));
2090
2091 Optimizer::Pass(optGraph, MakeOptimizations(AddBroadcastReshapeLayer()));
2092
2094 {
2095 // Validate the tensor infos for all output slots. Throws an exception on failure
2096 optGraph.InferTensorInfos();
2097 }
2098
2099 // Initialize backend settings
2100 BackendSettings backendSettings(backendPreferences, deviceSpec);
2101 auto availablePreferredBackends = backendSettings.GetAvailablePreferredBackends();
2102 if (availablePreferredBackends.empty())
2103 {
2104 std::stringstream failureMsg;
2105 failureMsg << "None of the preferred backends " << backendPreferences
2106 << " are supported. Current platform provides " << backendSettings.m_SupportedBackends;
2107 ReportError(failureMsg.str(), messages);
2108 throw InvalidArgumentException(failureMsg.str());
2109 }
2110
2111 // Create a map to temporarily hold initialized backend objects
2112 TensorHandleFactoryRegistry tensorHandleFactoryRegistry;
2113 BackendsMap backends = CreateSupportedBackends(tensorHandleFactoryRegistry, backendSettings);
2114 bool hasFp16 = CheckFp16Support(backends, availablePreferredBackends);
2115
2116 bool reduceFp32ToFp16 = options.GetReduceFp32ToFp16();
2117 // If fp16 is supported on the backend and fastmath has been enabled and the model is a TfLite converted Fp16
2118 // model: enable turbo mode optimizations
2119 if (hasFp16 && CheckFastMathSupport(availablePreferredBackends, optimizedOptions) && IsTfLiteTurboModel(optGraph))
2120 {
2121 Optimizer::Pass(optGraph, MakeOptimizations(TurboConvertConstDequantisationLayersToConstLayers()));
2122 reduceFp32ToFp16 = true;
2123 }
2124 else
2125 {
2126 Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstDequantisationLayersToConstLayers()));
2127 }
2128
2129 // Group Constant Layer optimizations together where possible.
2130 // This is important as:
2131 // FusePermuteIntoConstantLayer must happen before FoldPadIntoDepthwiseConvolution2d and
2132 // FuseBatchNormIntoDepthwiseConvolution2D.
2133 Optimizer::Pass(optGraph, MakeOptimizations(FusePermuteIntoConstLayer()));
2134 // Perform optimisation passes
2135 Optimizer::Pass(optGraph, MakeOptimizations(SquashEqualPermuteSiblings(),
2136 SquashEqualTransposeSiblings(),
2137 SquashEqualReshapeSiblings(),
2138 OptimizeInversePermutes(),
2139 OptimizeInverseTransposes(),
2140 MovePermuteUp(),
2141 MoveTransposeUp(),
2142 PermuteAsReshape(),
2143 TransposeAsReshape(),
2144 OptimizeConsecutiveReshapes(),
2145 BroadcastToOptimizationLayer(),
2146 PermuteAndBatchToSpaceAsDepthToSpace(),
2147 TransposeAndBatchToSpaceAsDepthToSpace(),
2148 FuseBatchNormIntoConvolution2DFloat32(),
2149 FuseBatchNormIntoConvolution2DFloat16(),
2150 FuseBatchNormIntoDepthwiseConvolution2DFloat32(),
2151 FuseBatchNormIntoDepthwiseConvolution2DFloat16()));
2152
2153 const std::vector<BackendId> mappedGpuBackends = BackendRegistryInstance().GetMappedGpuBackends();
2154
2155 // All or nothing Gpu backends cannot be used as fallback
2156 for (auto backend : mappedGpuBackends)
2157 {
2158 if (std::count(backendPreferences.begin(), backendPreferences.end(), backend)
2159 && (backendPreferences[0] != backend) &&
2160 (backendPreferences[0] != armnn::BackendId("GpuAcc")))
2161 {
2162 std::stringstream failureMsg;
2163 failureMsg << backend << " backend cannot be specified as fallback.";
2164 ReportError(failureMsg.str(), messages);
2165 throw InvalidArgumentException(failureMsg.str());
2166 }
2167 }
2168
2169 std::vector<BackendId> amendedBackendPreferences = backendPreferences;
2170 std::unordered_set<BackendId> supportedBackends = armnn::BackendRegistryInstance().GetBackendIds();
2171 if (amendedBackendPreferences[0] == armnn::BackendId("GpuAcc"))
2172 {
2173 // Add mapped Gpu backends if not already there and GpuAcc is first backend requested
2174 for (auto backend : mappedGpuBackends)
2175 {
2176 if (!std::count(amendedBackendPreferences.begin(), amendedBackendPreferences.end(), backend))
2177 {
2178 amendedBackendPreferences.insert(amendedBackendPreferences.begin(), backend);
2179 }
2180 }
2181 }
2182
2183 if (reduceFp32ToFp16 && hasFp16)
2184 {
2185 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ReduceFp32ToFp16");
2186 Optimizer::Pass(optGraph, MakeOptimizations(Fp32NetworkToFp16Converter()));
2187 Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
2188 }
2189 // Assign an available backend to each layer
2190 Graph::Iterator firstLayer = optGraph.begin();
2191 Graph::Iterator lastLayer = optGraph.end();
2192 OptimizationResult assignBackendsResult = AssignBackends(optNetObjPtr->pOptimizedNetworkImpl.get(),
2193 backendSettings,
2194 firstLayer,
2195 lastLayer,
2196 messages);
2197 if (assignBackendsResult.m_Error)
2198 {
2199 // Failed to assign a backend to each layer
2200 throw InvalidArgumentException("Failed to assign a backend to each layer");
2201 }
2202
2203 Optimizer::Pass(optGraph, MakeOptimizations(OptimizeInverseConversionsFp16(),
2204 OptimizeInverseConversionsFp32()));
2205
2206 // Apply the backend-specific optimizations
2207 OptimizationResult backendOptimizationResult = ApplyBackendOptimizations(optNetObjPtr->pOptimizedNetworkImpl.get(),
2208 backendSettings,
2209 backends,
2210 options.GetModelOptions(),
2211 messages);
2212 if (backendOptimizationResult.m_Error)
2213 {
2214 // Failed to apply the backend-specific optimizations
2215 throw InvalidArgumentException("Failed to apply the backend-specific optimizations");
2216 }
2217
2218 // Convert constants
2219 {
2220 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ConvertConstants");
2221 Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
2222 Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsHalfToFloat()));
2223 }
2224
2225 // This must occur after all topological changes to the graph and any redirection of variables
2226 // If the debug flag is set, then insert a DebugLayer after each layer
2227 // Doing this after applying the backend optimizations as they might have changed some layers
2228 if (options.GetDebugEnabled() && !options.GetDebugToFileEnabled())
2229 {
2230 Optimizer::Pass(optGraph, MakeOptimizations(InsertDebugLayer()));
2231 }
2232 else if (options.GetDebugToFileEnabled())
2233 {
2234 // Setup the output file path
2235 try
2236 {
2237#if !defined(ARMNN_DISABLE_FILESYSTEM)
2238 auto result = armnnUtils::Filesystem::CreateDirectory("/ArmNNIntermediateLayerOutputs");
2239 ARMNN_LOG(info) << "Intermediate tensors will be written to: " << result;
2240#endif
2241 Optimizer::Pass(optGraph, MakeOptimizations(InsertDebugToFileLayer()));
2242 }
2243 catch (const armnn::RuntimeException& e)
2244 {
2245 // If we cannot create the output directory then we'll issue a warning and continue.
2246 ARMNN_LOG(warning) << "Unable to print intermediate layer outputs : " << e.what();
2247 }
2248 }
2249
2250 // Calculate the compatibility strategies for tensor handles
2251 OptimizationResult strategyResult = SelectTensorHandleStrategy(optGraph,
2252 backends,
2253 tensorHandleFactoryRegistry,
2254 options.GetImportEnabled(),
2255 options.GetExportEnabled(),
2256 messages);
2257
2258 if (strategyResult.m_Error)
2259 {
2260 // Failed to apply the backend-specific optimizations
2262 }
2263
2264 // Based on the tensor handle strategy determined above, insert copy layers where required.
2265 {
2266 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_AddCompatibilityLayers");
2267 optGraph.AddCompatibilityLayers(backends, tensorHandleFactoryRegistry);
2268 }
2269
2270 return optNet;
2271}
2272
2273// Forwarding function to remain backward compatible with legacy OptimizerOptions
2275 const std::vector<BackendId>& backendPreferences,
2276 const IDeviceSpec& deviceSpec,
2277 const OptimizerOptions& options,
2278 Optional<std::vector<std::string>&> messages)
2279{
2280 return Optimize(inNetwork,
2281 backendPreferences,
2282 deviceSpec,
2283 OptimizerOptionsOpaque(options),
2284 messages);
2285}
2286
2288 const std::vector<BackendId>& backendPreferences,
2289 const IDeviceSpec& deviceSpec,
2290 const OptimizerOptionsOpaque& options,
2291 Optional<std::vector<std::string>&> messages)
2292{
2293 return Optimize(inNetwork.pNetworkImpl->GetGraph(),
2294 backendPreferences,
2295 deviceSpec,
2296 options,
2297 messages);
2298}
2299
2300bool NetworkImpl::GetShapeInferenceMethod()
2301{
2302 bool shapeInferenceMethod = false;
2303
2304 ParseOptions(m_NetworkOptions, "ShapeInferenceMethod", [&](std::string name, const BackendOptions::Var& value)
2305 {
2306 if (name == "InferAndValidate")
2307 {
2308 shapeInferenceMethod |= value.AsBool();
2309 }
2310 });
2311 return shapeInferenceMethod;
2312}
2313
2314bool NetworkImpl::GetAllowExpandedDims()
2315{
2316 bool allowExpandedDims = false;
2317
2318 ParseOptions(m_NetworkOptions, "AllowExpandedDims", [&](std::string name, const BackendOptions::Var& value)
2319 {
2320 if (name == "AllowExpandedDims")
2321 {
2322 allowExpandedDims |= value.AsBool();
2323 }
2324 });
2325 return allowExpandedDims;
2326}
2327
2329: m_NetworkOptions(networkOptions),
2330 m_Graph(std::make_unique<Graph>(GetShapeInferenceMethod(), GetAllowExpandedDims()))
2331{}
2332
2336
2338{
2339 m_Graph->Print();
2340 return Status::Success;
2341}
2342
2344{
2345 return m_Graph->AddLayer<InputLayer>(id, name);
2346}
2347
2349 const char* name)
2350{
2351 return m_Graph->AddLayer<BatchToSpaceNdLayer>(batchToSpaceNdDescriptor, name);
2352}
2353
2355{
2356 return m_Graph->AddLayer<CastLayer>(name);
2357}
2359 const char* name)
2360{
2361 return m_Graph->AddLayer<ChannelShuffleLayer>(channelShuffleDescriptor, name);
2362}
2363
2365 const char* name)
2366{
2367 return m_Graph->AddLayer<ComparisonLayer>(comparisonDescriptor, name);
2368}
2369
2371 const char* name)
2372{
2373 return m_Graph->AddLayer<ElementwiseBinaryLayer>(elementwiseBinaryDesc, name);
2374}
2375
2377 const char* name)
2378{
2379 return m_Graph->AddLayer<ElementwiseUnaryLayer>(elementwiseUnaryDescriptor, name);
2380}
2381
2383 const char* name)
2384{
2385 return m_Graph->AddLayer<FillLayer>(fillDescriptor, name);
2386}
2387
2389 const char* name)
2390{
2391 return m_Graph->AddLayer<FullyConnectedLayer>(fullyConnectedDescriptor, name);
2392}
2393
2395 const char* name)
2396{
2397 return m_Graph->AddLayer<FusedLayer>(fusedDescriptor, name);
2398}
2399
2401 const char* name)
2402{
2403 return m_Graph->AddLayer<ConcatLayer>(concatDescriptor, name);
2404}
2405
2407 const char* name)
2408{
2409 return m_Graph->AddLayer<Convolution2dLayer>(convolution2dDescriptor, name);
2410}
2411
2413{
2414 return m_Graph->AddLayer<ConvertFp16ToFp32Layer>(name);
2415}
2416
2418{
2419 return m_Graph->AddLayer<ConvertFp32ToFp16Layer>(name);
2420}
2421
2423 const char* name)
2424{
2425 return m_Graph->AddLayer<Convolution3dLayer>(convolution3dDescriptor, name);
2426}
2427
2429 const char* name)
2430{
2431 return m_Graph->AddLayer<DepthToSpaceLayer>(depthToSpaceDescriptor, name);
2432}
2433
2435 const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
2436 const char* name)
2437{
2438 return m_Graph->AddLayer<DepthwiseConvolution2dLayer>(convolution2dDescriptor, name);
2439}
2440
2442 const ConstTensor& anchors, const char* name)
2443{
2444 const auto layer = m_Graph->AddLayer<DetectionPostProcessLayer>(descriptor, name);
2445
2446 layer->m_Anchors = std::make_shared<ScopedTensorHandle>(anchors);
2447
2448 return layer;
2449}
2450
2452 const char* name)
2453{
2454 return m_Graph->AddLayer<PermuteLayer>(permuteDescriptor, name);
2455}
2456
2458 const char* name)
2459{
2460 return m_Graph->AddLayer<Pooling2dLayer>(pooling2dDescriptor, name);
2461}
2462
2464 const char* name)
2465{
2466 return m_Graph->AddLayer<Pooling3dLayer>(pooling3dDescriptor, name);
2467}
2468
2470 const char* name)
2471{
2472 return m_Graph->AddLayer<ActivationLayer>(activationDescriptor, name);
2473}
2474
2476 const char* name)
2477{
2478 return m_Graph->AddLayer<ArgMinMaxLayer>(argMinMaxDescriptor, name);
2479}
2480
2482normalizationDescriptor,
2483 const char* name)
2484{
2485 return m_Graph->AddLayer<NormalizationLayer>(normalizationDescriptor, name);
2486}
2487
2488IConnectableLayer* NetworkImpl::AddSliceLayer(const SliceDescriptor& sliceDescriptor, const char* name)
2489{
2490 return m_Graph->AddLayer<SliceLayer>(sliceDescriptor, name);
2491}
2492
2494 const char* name)
2495{
2496 return m_Graph->AddLayer<SoftmaxLayer>(softmaxDescriptor, name);
2497}
2498
2500 const char* name)
2501{
2502 return m_Graph->AddLayer<SplitterLayer>(splitterDescriptor, name);
2503}
2504
2506{
2507 return m_Graph->AddLayer<MaximumLayer>(name);
2508}
2509
2511{
2512 return m_Graph->AddLayer<MinimumLayer>(name);
2513}
2514
2516{
2517 return m_Graph->AddLayer<AdditionLayer>(name);
2518}
2519
2521{
2522 return m_Graph->AddLayer<MultiplicationLayer>(name);
2523}
2524
2526{
2527 return m_Graph->AddLayer<OutputLayer>(id, name);
2528}
2529
2531 const ConstTensor& mean,
2532 const ConstTensor& variance,
2533 const ConstTensor& beta,
2534 const ConstTensor& gamma,
2535 const char* name)
2536{
2537 const auto layer = m_Graph->AddLayer<BatchNormalizationLayer>(desc, name);
2538
2539 layer->m_Mean = std::make_shared<ScopedTensorHandle>(mean);
2540 layer->m_Variance = std::make_shared<ScopedTensorHandle>(variance);
2541 layer->m_Beta = std::make_shared<ScopedTensorHandle>(beta);
2542 layer->m_Gamma = std::make_shared<ScopedTensorHandle>(gamma);
2543
2544 return layer;
2545}
2546
2548{
2549 return m_Graph->AddLayer<RankLayer>(name);
2550}
2551
2553 const char* name)
2554{
2555 return m_Graph->AddLayer<ReduceLayer>(reduceDescriptor, name);
2556}
2557
2558IConnectableLayer* NetworkImpl::AddResizeLayer(const ResizeDescriptor& resizeDescriptor, const char* name)
2559{
2560 return m_Graph->AddLayer<ResizeLayer>(resizeDescriptor, name);
2561}
2562
2564{
2565 return m_Graph->AddLayer<ShapeLayer>(name);
2566}
2567
2569 const char* name)
2570{
2571 return m_Graph->AddLayer<InstanceNormalizationLayer>(desc, name);
2572}
2573
2575 const char* name)
2576{
2577 return m_Graph->AddLayer<L2NormalizationLayer>(desc, name);
2578}
2579
2581 const char* name)
2582{
2583 return m_Graph->AddLayer<LogSoftmaxLayer>(desc, name);
2584}
2585
2587{
2588 auto layer = m_Graph->AddLayer<ConstantLayer>(name);
2589
2590 layer->m_LayerOutput = std::make_shared<ScopedTensorHandle>(input);
2591
2592 return layer;
2593}
2594
2596 const char* name)
2597{
2598 return m_Graph->AddLayer<ReshapeLayer>(reshapeDescriptor, name);
2599}
2600
2602 const char* name)
2603{
2604 return m_Graph->AddLayer<SpaceToBatchNdLayer>(spaceToBatchNdDescriptor, name);
2605}
2606
2608 const char* name)
2609{
2610 return m_Graph->AddLayer<SpaceToDepthLayer>(spaceToDepthDescriptor, name);
2611}
2612
2614{
2615 return m_Graph->AddLayer<FloorLayer>(name);
2616}
2617
2619 const LstmInputParams& params,
2620 const char* name)
2621{
2622 const auto layer = m_Graph->AddLayer<LstmLayer>(descriptor, name);
2623
2624 //Lstm Basic Parameters
2626 std::make_shared<ScopedTensorHandle>(*(params.m_InputToForgetWeights));
2627 layer->m_BasicParameters.m_InputToCellWeights =
2628 std::make_shared<ScopedTensorHandle>(*(params.m_InputToCellWeights));
2629 layer->m_BasicParameters.m_InputToOutputWeights =
2630 std::make_shared<ScopedTensorHandle>(*(params.m_InputToOutputWeights));
2631 layer->m_BasicParameters.m_RecurrentToForgetWeights =
2632 std::make_shared<ScopedTensorHandle>(*(params.m_RecurrentToForgetWeights));
2633 layer->m_BasicParameters.m_RecurrentToCellWeights =
2634 std::make_shared<ScopedTensorHandle>(*(params.m_RecurrentToCellWeights));
2635 layer->m_BasicParameters.m_RecurrentToOutputWeights =
2636 std::make_shared<ScopedTensorHandle>(*(params.m_RecurrentToOutputWeights));
2637 layer->m_BasicParameters.m_ForgetGateBias =
2638 std::make_shared<ScopedTensorHandle>(*(params.m_ForgetGateBias));
2639 layer->m_BasicParameters.m_CellBias =
2640 std::make_shared<ScopedTensorHandle>(*(params.m_CellBias));
2641 layer->m_BasicParameters.m_OutputGateBias =
2642 std::make_shared<ScopedTensorHandle>(*(params.m_OutputGateBias));
2643
2644 //Lstm Cifg parameters
2645 if(!descriptor.m_CifgEnabled)
2646 {
2647 if(params.m_InputToInputWeights == nullptr)
2648 {
2649 throw InvalidArgumentException("AddLstmLayer: Input To Input Weights cannot be NULL "
2650 "when CIFG is disabled.");
2651 }
2652 if(params.m_RecurrentToInputWeights == nullptr)
2653 {
2655 "AddLstmLayer: Recurrent To Input Weights cannot be NULL "
2656 "when CIFG is disabled.");
2657 }
2658 if(params.m_InputGateBias == nullptr)
2659 {
2660 throw InvalidArgumentException("AddLstmLayer: Input Gate Bias cannot be NULL "
2661 "when CIFG is disabled.");
2662 }
2663 layer->m_CifgParameters.m_InputToInputWeights =
2664 std::make_shared<ScopedTensorHandle>(*(params.m_InputToInputWeights));
2665 layer->m_CifgParameters.m_RecurrentToInputWeights =
2666 std::make_shared<ScopedTensorHandle>(*(params.m_RecurrentToInputWeights));
2667 layer->m_CifgParameters.m_InputGateBias =
2668 std::make_shared<ScopedTensorHandle>(*(params.m_InputGateBias));
2669 }
2670
2671 //Lstm projection parameters
2672 if(descriptor.m_ProjectionEnabled)
2673 {
2674 if(params.m_ProjectionWeights == nullptr)
2675 {
2676 throw InvalidArgumentException("AddLstmLayer: Projection Weights cannot be NULL "
2677 "when projection is enabled.");
2678 }
2679 layer->m_ProjectionParameters.m_ProjectionWeights =
2680 std::make_shared<ScopedTensorHandle>(*(params.m_ProjectionWeights));
2681 if(params.m_ProjectionBias != nullptr)
2682 {
2683 layer->m_ProjectionParameters.m_ProjectionBias =
2684 std::make_shared<ScopedTensorHandle>(*(params.m_ProjectionBias));
2685 }
2686 }
2687
2688 //Lstm Peephole params
2689 if(descriptor.m_PeepholeEnabled)
2690 {
2691 if(!descriptor.m_CifgEnabled)
2692 {
2693 if(params.m_CellToInputWeights == nullptr)
2694 {
2695 throw InvalidArgumentException("AddLstmLayer: Cell To Input Weights cannot be NULL "
2696 "when Peephole is enabled and CIFG disabled.");
2697 }
2698
2699 layer->m_PeepholeParameters.m_CellToInputWeights =
2700 std::make_shared<ScopedTensorHandle>(*(params.m_CellToInputWeights));
2701 }
2702
2703 if(params.m_CellToForgetWeights == nullptr)
2704 {
2705 throw InvalidArgumentException("AddLstmLayer: Cell To Forget Weights cannot be NULL "
2706 "when Peephole is enabled.");
2707 }
2708 if(params.m_CellToOutputWeights == nullptr)
2709 {
2710 throw InvalidArgumentException("AddLstmLayer: Cell To Output Weights cannot be NULL "
2711 "when Peephole is enabled.");
2712 }
2713
2714 layer->m_PeepholeParameters.m_CellToForgetWeights =
2715 std::make_shared<ScopedTensorHandle>(*(params.m_CellToForgetWeights));
2716 layer->m_PeepholeParameters.m_CellToOutputWeights =
2717 std::make_shared<ScopedTensorHandle>(*(params.m_CellToOutputWeights));
2718 }
2719
2720 //Lstm Layer Normalization params
2721 if(descriptor.m_LayerNormEnabled)
2722 {
2723 if(!descriptor.m_CifgEnabled)
2724 {
2725 if(params.m_InputLayerNormWeights == nullptr)
2726 {
2727 throw InvalidArgumentException("AddLstmLayer: Input layer normalization weights cannot be NULL "
2728 "when layer normalization is enabled and CIFG disabled.");
2729 }
2730 layer->m_LayerNormParameters.m_InputLayerNormWeights =
2731 std::make_shared<ScopedTensorHandle>(*(params.m_InputLayerNormWeights));
2732 }
2733
2734 if(params.m_ForgetLayerNormWeights == nullptr)
2735 {
2736 throw InvalidArgumentException("AddLstmLayer: Forget layer normalization weights cannot be NULL "
2737 "when layer normalization is enabled.");
2738 }
2739 if(params.m_CellLayerNormWeights == nullptr)
2740 {
2741 throw InvalidArgumentException("AddLstmLayer: Cell layer normalization weights cannot be NULL "
2742 "when layer normalization is enabled.");
2743 }
2744 if(params.m_OutputLayerNormWeights == nullptr)
2745 {
2746 throw InvalidArgumentException("AddLstmLayer: Output layer normalization weights cannot be NULL "
2747 "when layer normalization is enabled.");
2748 }
2749 layer->m_LayerNormParameters.m_ForgetLayerNormWeights =
2750 std::make_shared<ScopedTensorHandle>(*(params.m_ForgetLayerNormWeights));
2751 layer->m_LayerNormParameters.m_CellLayerNormWeights =
2752 std::make_shared<ScopedTensorHandle>(*(params.m_CellLayerNormWeights));
2753 layer->m_LayerNormParameters.m_OutputLayerNormWeights =
2754 std::make_shared<ScopedTensorHandle>(*(params.m_OutputLayerNormWeights));
2755 }
2756 return layer;
2757}
2758
2760{
2761 return m_Graph->AddLayer<DivisionLayer>(name);
2762}
2763
2765{
2766 return m_Graph->AddLayer<SubtractionLayer>(name);
2767}
2768
2769IConnectableLayer* NetworkImpl::AddMeanLayer(const MeanDescriptor& meanDescriptor, const char* name)
2770{
2771 return m_Graph->AddLayer<MeanLayer>(meanDescriptor,name);
2772}
2773
2774IConnectableLayer* NetworkImpl::AddPadLayer(const PadDescriptor& padDescriptor, const char* name)
2775{
2776 return m_Graph->AddLayer<PadLayer>(padDescriptor,name);
2777}
2778
2780{
2781 return m_Graph->AddLayer<QuantizeLayer>(name);
2782}
2783
2785{
2786 return m_Graph->AddLayer<DequantizeLayer>(name);
2787}
2788
2790 const char* name)
2791{
2792 return m_Graph->AddLayer<StridedSliceLayer>(stridedSliceDescriptor, name);
2793}
2794
2796 const char* name)
2797{
2798 return m_Graph->AddLayer<GatherLayer>(gatherDescriptor, name);
2799}
2800
2802{
2803 return m_Graph->AddLayer<GatherNdLayer>(name);
2804}
2805
2807{
2808 return m_Graph->AddLayer<MergeLayer>(name);
2809}
2810
2812{
2813 return m_Graph->AddLayer<SwitchLayer>(name);
2814}
2815
2817{
2818 return m_Graph->AddLayer<PreluLayer>(name);
2819}
2820
2822 const ConstTensor& weights,
2823 const Optional<ConstTensor>& biases,
2824 const char* name)
2825{
2826 if (descriptor.m_BiasEnabled && !biases.has_value())
2827 {
2828 throw InvalidArgumentException("AddTransposeConvolution2dLayer: Biases cannot be empty");
2829 }
2830
2831 const auto layer = m_Graph->AddLayer<TransposeConvolution2dLayer>(descriptor, name);
2832
2833 layer->m_Weight = std::make_shared<ScopedTensorHandle>(weights);
2834
2835 if (descriptor.m_BiasEnabled)
2836 {
2837 layer->m_Bias = std::make_shared<ScopedTensorHandle>(biases.value());
2838 }
2839
2840 return layer;
2841}
2842
2844 const char* name)
2845{
2846 return m_Graph->AddLayer<TransposeLayer>(transposeDescriptor, name);
2847}
2848
2850 const char* name)
2851{
2852 return m_Graph->AddLayer<StackLayer>(stackDescriptor, name);
2853}
2854
2855
2857 const char* name)
2858{
2859 return m_Graph->AddLayer<StandInLayer>(desc, name);
2860}
2861
2863 const char* name)
2864{
2865 const auto layer = m_Graph->AddLayer<QuantizedLstmLayer>(name);
2866
2867 // InputToX weights
2869 std::make_shared<ScopedTensorHandle>(params.GetInputToInputWeights());
2870 layer->m_QuantizedLstmParameters.m_InputToForgetWeights =
2871 std::make_shared<ScopedTensorHandle>(params.GetInputToForgetWeights());
2872 layer->m_QuantizedLstmParameters.m_InputToCellWeights =
2873 std::make_shared<ScopedTensorHandle>(params.GetInputToCellWeights());
2874 layer->m_QuantizedLstmParameters.m_InputToOutputWeights =
2875 std::make_shared<ScopedTensorHandle>(params.GetInputToOutputWeights());
2876
2877 // RecurrentToX weights
2878 layer->m_QuantizedLstmParameters.m_RecurrentToInputWeights =
2879 std::make_shared<ScopedTensorHandle>(params.GetRecurrentToInputWeights());
2880 layer->m_QuantizedLstmParameters.m_RecurrentToForgetWeights =
2881 std::make_shared<ScopedTensorHandle>(params.GetRecurrentToForgetWeights());
2882 layer->m_QuantizedLstmParameters.m_RecurrentToCellWeights =
2883 std::make_shared<ScopedTensorHandle>(params.GetRecurrentToCellWeights());
2884 layer->m_QuantizedLstmParameters.m_RecurrentToOutputWeights =
2885 std::make_shared<ScopedTensorHandle>(params.GetRecurrentToOutputWeights());
2886
2887 // Bias
2888 layer->m_QuantizedLstmParameters.m_InputGateBias =
2889 std::make_shared<ScopedTensorHandle>(params.GetInputGateBias());
2890 layer->m_QuantizedLstmParameters.m_ForgetGateBias =
2891 std::make_shared<ScopedTensorHandle>(params.GetForgetGateBias());
2892 layer->m_QuantizedLstmParameters.m_CellBias =
2893 std::make_shared<ScopedTensorHandle>(params.GetCellBias());
2894 layer->m_QuantizedLstmParameters.m_OutputGateBias =
2895 std::make_shared<ScopedTensorHandle>(params.GetOutputGateBias());
2896
2897 return layer;
2898}
2899
2901 const LstmInputParams& params,
2902 const char* name)
2903{
2904 const auto layer = m_Graph->AddLayer<QLstmLayer>(descriptor, name);
2905
2906 // QLstm Basic Parameters
2908 std::make_shared<ScopedTensorHandle>(*(params.m_InputToForgetWeights));
2909 layer->m_BasicParameters.m_InputToCellWeights =
2910 std::make_shared<ScopedTensorHandle>(*(params.m_InputToCellWeights));
2911 layer->m_BasicParameters.m_InputToOutputWeights =
2912 std::make_shared<ScopedTensorHandle>(*(params.m_InputToOutputWeights));
2913 layer->m_BasicParameters.m_RecurrentToForgetWeights =
2914 std::make_shared<ScopedTensorHandle>(*(params.m_RecurrentToForgetWeights));
2915 layer->m_BasicParameters.m_RecurrentToCellWeights =
2916 std::make_shared<ScopedTensorHandle>(*(params.m_RecurrentToCellWeights));
2917 layer->m_BasicParameters.m_RecurrentToOutputWeights =
2918 std::make_shared<ScopedTensorHandle>(*(params.m_RecurrentToOutputWeights));
2919 layer->m_BasicParameters.m_ForgetGateBias =
2920 std::make_shared<ScopedTensorHandle>(*(params.m_ForgetGateBias));
2921 layer->m_BasicParameters.m_CellBias =
2922 std::make_shared<ScopedTensorHandle>(*(params.m_CellBias));
2923 layer->m_BasicParameters.m_OutputGateBias =
2924 std::make_shared<ScopedTensorHandle>(*(params.m_OutputGateBias));
2925
2926 // QLstm Cifg parameters
2927 if(!descriptor.m_CifgEnabled)
2928 {
2929 if(params.m_InputToInputWeights == nullptr)
2930 {
2931 throw InvalidArgumentException("AddQLstmLayer: Input To Input Weights cannot be NULL");
2932 }
2933
2934 if(params.m_RecurrentToInputWeights == nullptr)
2935 {
2937 "AddQLstmLayer: Recurrent To Input Weights cannot be NULL");
2938 }
2939
2940 if(params.m_InputGateBias == nullptr)
2941 {
2942 throw InvalidArgumentException("AddQLstmLayer: Input Gate Bias cannot be NULL");
2943 }
2944
2945 layer->m_CifgParameters.m_InputToInputWeights =
2946 std::make_shared<ScopedTensorHandle>(*(params.m_InputToInputWeights));
2947 layer->m_CifgParameters.m_RecurrentToInputWeights =
2948 std::make_shared<ScopedTensorHandle>(*(params.m_RecurrentToInputWeights));
2949 layer->m_CifgParameters.m_InputGateBias =
2950 std::make_shared<ScopedTensorHandle>(*(params.m_InputGateBias));
2951 }
2952
2953 // QLstm Projection parameters
2954 if(descriptor.m_ProjectionEnabled)
2955 {
2956 if(params.m_ProjectionWeights == nullptr)
2957 {
2958 throw InvalidArgumentException("AddQLstmLayer: Projection Weights cannot be NULL");
2959 }
2960
2961 layer->m_ProjectionParameters.m_ProjectionWeights =
2962 std::make_shared<ScopedTensorHandle>(*(params.m_ProjectionWeights));
2963
2964 // Projection bias is optional even if projection is enabled
2965 if(params.m_ProjectionBias != nullptr)
2966 {
2967 layer->m_ProjectionParameters.m_ProjectionBias =
2968 std::make_shared<ScopedTensorHandle>(*(params.m_ProjectionBias));
2969 }
2970
2971 }
2972
2973 // QLstm Peephole params
2974 if(descriptor.m_PeepholeEnabled)
2975 {
2976 if(params.m_CellToForgetWeights == nullptr)
2977 {
2978 throw InvalidArgumentException("AddQLstmLayer: Cell To Forget Weights cannot be NULL");
2979 }
2980
2981 if(params.m_CellToOutputWeights == nullptr)
2982 {
2983 throw InvalidArgumentException("AddQLstmLayer: Cell To Output Weights cannot be NULL");
2984 }
2985
2986 if(!descriptor.m_CifgEnabled)
2987 {
2988 if(params.m_CellToInputWeights == nullptr)
2989 {
2990 throw InvalidArgumentException("AddQLstmLayer: Cell To Input Weights cannot be NULL");
2991 }
2992
2993 layer->m_PeepholeParameters.m_CellToInputWeights =
2994 std::make_shared<ScopedTensorHandle>(*(params.m_CellToInputWeights));
2995 }
2996
2997 layer->m_PeepholeParameters.m_CellToForgetWeights =
2998 std::make_shared<ScopedTensorHandle>(*(params.m_CellToForgetWeights));
2999 layer->m_PeepholeParameters.m_CellToOutputWeights =
3000 std::make_shared<ScopedTensorHandle>(*(params.m_CellToOutputWeights));
3001 }
3002
3003 // QLstm Layer Normalization params
3004 if(descriptor.m_LayerNormEnabled)
3005 {
3006 if(params.m_ForgetLayerNormWeights == nullptr)
3007 {
3008 throw InvalidArgumentException("AddQLstmLayer: Forget layer normalization weights cannot be NULL");
3009 }
3010
3011 if(params.m_CellLayerNormWeights == nullptr)
3012 {
3013 throw InvalidArgumentException("AddQLstmLayer: Cell layer normalization weights cannot be NULL");
3014 }
3015
3016 if(params.m_OutputLayerNormWeights == nullptr)
3017 {
3018 throw InvalidArgumentException("AddQLstmLayer: Output layer normalization weights cannot be NULL");
3019 }
3020
3021 if(!descriptor.m_CifgEnabled)
3022 {
3023 if(params.m_InputLayerNormWeights == nullptr)
3024 {
3025 throw InvalidArgumentException("AddQLstmLayer: Input layer normalization weights cannot be NULL");
3026 }
3027
3028 layer->m_LayerNormParameters.m_InputLayerNormWeights =
3029 std::make_shared<ScopedTensorHandle>(*(params.m_InputLayerNormWeights));
3030 }
3031
3032 layer->m_LayerNormParameters.m_ForgetLayerNormWeights =
3033 std::make_shared<ScopedTensorHandle>(*(params.m_ForgetLayerNormWeights));
3034 layer->m_LayerNormParameters.m_CellLayerNormWeights =
3035 std::make_shared<ScopedTensorHandle>(*(params.m_CellLayerNormWeights));
3036 layer->m_LayerNormParameters.m_OutputLayerNormWeights =
3037 std::make_shared<ScopedTensorHandle>(*(params.m_OutputLayerNormWeights));
3038 }
3039 return layer;
3040}
3041
3043 const char* name)
3044{
3045 return m_Graph->AddLayer<LogicalBinaryLayer>(logicalBinaryDescriptor, name);
3046}
3047
3049 const UnidirectionalSequenceLstmDescriptor& descriptor,
3050 const LstmInputParams& params,
3051 const char* name)
3052{
3053 const auto layer = m_Graph->AddLayer<UnidirectionalSequenceLstmLayer>(descriptor, name);
3054
3055 //Lstm Basic Parameters
3057 std::make_shared<ScopedTensorHandle>(*(params.m_InputToForgetWeights));
3058 layer->m_BasicParameters.m_InputToCellWeights =
3059 std::make_shared<ScopedTensorHandle>(*(params.m_InputToCellWeights));
3060 layer->m_BasicParameters.m_InputToOutputWeights =
3061 std::make_shared<ScopedTensorHandle>(*(params.m_InputToOutputWeights));
3062 layer->m_BasicParameters.m_RecurrentToForgetWeights =
3063 std::make_shared<ScopedTensorHandle>(*(params.m_RecurrentToForgetWeights));
3064 layer->m_BasicParameters.m_RecurrentToCellWeights =
3065 std::make_shared<ScopedTensorHandle>(*(params.m_RecurrentToCellWeights));
3066 layer->m_BasicParameters.m_RecurrentToOutputWeights =
3067 std::make_shared<ScopedTensorHandle>(*(params.m_RecurrentToOutputWeights));
3068 layer->m_BasicParameters.m_ForgetGateBias =
3069 std::make_shared<ScopedTensorHandle>(*(params.m_ForgetGateBias));
3070 layer->m_BasicParameters.m_CellBias =
3071 std::make_shared<ScopedTensorHandle>(*(params.m_CellBias));
3072 layer->m_BasicParameters.m_OutputGateBias =
3073 std::make_shared<ScopedTensorHandle>(*(params.m_OutputGateBias));
3074
3075 //Lstm Cifg parameters
3076 if(!descriptor.m_CifgEnabled)
3077 {
3078 if(params.m_InputToInputWeights == nullptr)
3079 {
3080 throw InvalidArgumentException("AddUnidirectionalSequenceLstmLayer: Input To Input Weights cannot be NULL "
3081 "when CIFG is disabled.");
3082 }
3083 if(params.m_RecurrentToInputWeights == nullptr)
3084 {
3086 "AddUnidirectionalSequenceLstmLayer: Recurrent To Input Weights cannot be NULL "
3087 "when CIFG is disabled.");
3088 }
3089 if(params.m_InputGateBias == nullptr)
3090 {
3091 throw InvalidArgumentException("AddUnidirectionalSequenceLstmLayer: Input Gate Bias cannot be NULL "
3092 "when CIFG is disabled.");
3093 }
3094 layer->m_CifgParameters.m_InputToInputWeights =
3095 std::make_shared<ScopedTensorHandle>(*(params.m_InputToInputWeights));
3096 layer->m_CifgParameters.m_RecurrentToInputWeights =
3097 std::make_shared<ScopedTensorHandle>(*(params.m_RecurrentToInputWeights));
3098 layer->m_CifgParameters.m_InputGateBias =
3099 std::make_shared<ScopedTensorHandle>(*(params.m_InputGateBias));
3100 }
3101
3102 //Lstm projection parameters
3103 if(descriptor.m_ProjectionEnabled)
3104 {
3105 if(params.m_ProjectionWeights == nullptr)
3106 {
3107 throw InvalidArgumentException("AddUnidirectionalSequenceLstmLayer: Projection Weights cannot be NULL "
3108 "when projection is enabled.");
3109 }
3110 layer->m_ProjectionParameters.m_ProjectionWeights =
3111 std::make_shared<ScopedTensorHandle>(*(params.m_ProjectionWeights));
3112 if(params.m_ProjectionBias != nullptr)
3113 {
3114 layer->m_ProjectionParameters.m_ProjectionBias =
3115 std::make_shared<ScopedTensorHandle>(*(params.m_ProjectionBias));
3116 }
3117 }
3118
3119 //Lstm Peephole params
3120 if(descriptor.m_PeepholeEnabled)
3121 {
3122 if(!descriptor.m_CifgEnabled)
3123 {
3124 if(params.m_CellToInputWeights == nullptr)
3125 {
3126 throw InvalidArgumentException("AddUnidirectionalSequenceLstmLayer: Cell To Input Weights "
3127 "cannot be NULL when Peephole is enabled and CIFG disabled.");
3128 }
3129
3130 layer->m_PeepholeParameters.m_CellToInputWeights =
3131 std::make_shared<ScopedTensorHandle>(*(params.m_CellToInputWeights));
3132 }
3133
3134 if(params.m_CellToForgetWeights == nullptr)
3135 {
3136 throw InvalidArgumentException("AddUnidirectionalSequenceLstmLayer: Cell To Forget Weights cannot be NULL "
3137 "when Peephole is enabled.");
3138 }
3139 if(params.m_CellToOutputWeights == nullptr)
3140 {
3141 throw InvalidArgumentException("AddUnidirectionalSequenceLstmLayer: Cell To Output Weights cannot be NULL "
3142 "when Peephole is enabled.");
3143 }
3144
3145 layer->m_PeepholeParameters.m_CellToForgetWeights =
3146 std::make_shared<ScopedTensorHandle>(*(params.m_CellToForgetWeights));
3147 layer->m_PeepholeParameters.m_CellToOutputWeights =
3148 std::make_shared<ScopedTensorHandle>(*(params.m_CellToOutputWeights));
3149 }
3150
3151 //Lstm Layer Normalization params
3152 if(descriptor.m_LayerNormEnabled)
3153 {
3154 if(!descriptor.m_CifgEnabled)
3155 {
3156 if(params.m_InputLayerNormWeights == nullptr)
3157 {
3158 throw InvalidArgumentException("AddUnidirectionalSequenceLstmLayer: Input layer normalization weights "
3159 "cannot be NULL when layer normalization is enabled and CIFG disabled.");
3160 }
3161 layer->m_LayerNormParameters.m_InputLayerNormWeights =
3162 std::make_shared<ScopedTensorHandle>(*(params.m_InputLayerNormWeights));
3163 }
3164
3165 if(params.m_ForgetLayerNormWeights == nullptr)
3166 {
3167 throw InvalidArgumentException("AddUnidirectionalSequenceLstmLayer: Forget layer normalization weights "
3168 "cannot be NULL when layer normalization is enabled.");
3169 }
3170 if(params.m_CellLayerNormWeights == nullptr)
3171 {
3172 throw InvalidArgumentException("AddUnidirectionalSequenceLstmLayer: Cell layer normalization weights "
3173 "cannot be NULL when layer normalization is enabled.");
3174 }
3175 if(params.m_OutputLayerNormWeights == nullptr)
3176 {
3177 throw InvalidArgumentException("AddUnidirectionalSequenceLstmLayer: Output layer normalization weights "
3178 "cannot be NULL when layer normalization is enabled.");
3179 }
3180 layer->m_LayerNormParameters.m_ForgetLayerNormWeights =
3181 std::make_shared<ScopedTensorHandle>(*(params.m_ForgetLayerNormWeights));
3182 layer->m_LayerNormParameters.m_CellLayerNormWeights =
3183 std::make_shared<ScopedTensorHandle>(*(params.m_CellLayerNormWeights));
3184 layer->m_LayerNormParameters.m_OutputLayerNormWeights =
3185 std::make_shared<ScopedTensorHandle>(*(params.m_OutputLayerNormWeights));
3186 }
3187 return layer;
3188}
3189
3191{
3192 return m_Graph->AddLayer<BatchMatMulLayer>(desc, name);
3193}
3194
3196{
3197 return m_Graph->AddLayer<ReverseV2Layer>(name);
3198}
3199
3201{
3202 return m_Graph->AddLayer<TileLayer>(desc, name);
3203}
3204
3206 CompiledBlobPtr compiledBlobPtr,
3207 const Optional<BackendId>& backend,
3208 const char* name)
3209{
3210 // Method use is for backend users.
3211 PreCompiledLayer* layer;
3212 if (name)
3213 {
3214 layer = m_Graph->AddLayer<PreCompiledLayer>(preCompiledDescriptor, name);
3215 }
3216 else
3217 {
3218 layer = m_Graph->AddLayer<PreCompiledLayer>(preCompiledDescriptor, "pre-compiled");
3219 }
3220
3221 // Assign the pre-compiled object to layer
3222 // Pass only one compiled network, Arm NN does not handle multiple
3223 // pre-compiled objects in a single pre-compiled layer currently
3224 layer->SetPreCompiledObject(std::move(compiledBlobPtr));
3225
3226 if (backend.has_value())
3227 {
3228 layer->SetBackendId(backend.value());
3229 }
3230 else if (layer->GetBackendHint().has_value())
3231 {
3232 layer->SetBackendId(layer->GetBackendHint().value());
3233 }
3234
3235 return layer;
3236}
3237
3239{
3240 return m_Graph->AddLayer<BroadcastToLayer>(desc, name);
3241}
3242
3244{
3245 return m_Graph->AddLayer<ScatterNdLayer>(desc, name);
3246}
3247
3249{
3250 for (auto layer : GetGraph())
3251 {
3252 layer->ExecuteStrategy(strategy);
3253 };
3254}
3255
3257 : m_Graph(new Graph(*other.m_Graph.get()))
3258 , m_Guid(arm::pipe::IProfilingService::GetNextGuid())
3259 , m_ModelOptions(modelOptions)
3260{
3261}
3262
3264 : m_Graph(std::move(graph)), m_Guid(arm::pipe::IProfilingService::GetNextGuid())
3265{
3266}
3267
3268OptimizedNetworkImpl::OptimizedNetworkImpl(std::unique_ptr<Graph> graph, const ModelOptions& modelOptions)
3269 : m_Graph(std::move(graph)), m_Guid(arm::pipe::IProfilingService::GetNextGuid()), m_ModelOptions(modelOptions)
3270{
3271}
3272
3276
3278{
3279 pOptimizedNetworkImpl->ExecuteStrategy(strategy);
3280}
3281
3283{
3284 for (auto layer : GetGraph())
3285 {
3286 layer->ExecuteStrategy(strategy);
3287 };
3288}
3289
3290} // namespace armnn
#define ARMNN_NO_DEPRECATE_WARN_BEGIN
#define ARMNN_NO_DEPRECATE_WARN_END
#define ARMNN_LOG(severity)
Definition Logging.hpp:212
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
This layer represents an activation operation with the specified activation function.
This layer represents an addition operation.
This layer represents a ArgMinMax operation.
const std::string & Get() const
Very basic type safe variant.
bool AsBool() const
Value getters.
BackendIdSet GetBackendIds() const
BackendIdVector GetMappedGpuBackends()
This layer represents a batch normalization operation.
std::shared_ptr< ConstTensorHandle > m_Mean
A unique pointer to store Mean values.
This layer represents a BatchToSpaceNd operation.
This layer represents a cast operation.
Definition CastLayer.hpp:15
This layer represents a comparison operation.
This layer represents a merge operation.
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition Tensor.hpp:330
A layer that the constant data can be bound to.
std::shared_ptr< ConstTensorHandle > m_LayerOutput
This layer converts data type Float 16 to Float 32.
This layer converts data type Float 32 to Float 16.
This layer represents a convolution 2d operation.
This layer represents a convolution 3d operation.
This layer represents a DepthToSpace operation.
This layer represents a depthwise convolution 2d operation.
This layer dequantizes the input tensor.
This layer represents a detection postprocess operator.
std::shared_ptr< ConstTensorHandle > m_Anchors
A unique pointer to store Anchor values.
This layer represents a division operation.
This layer represents a elementwiseBinary operation.
This layer represents a elementwiseUnary operation.
Base class for all ArmNN exceptions so that users can filter to just those.
virtual const char * what() const noexcept override
This layer represents a fill operation.
Definition FillLayer.hpp:14
This layer represents a floor operation.
This layer represents a fully connected operation.
This layer represents a Gather operator.
This layer represents a GatherNd operator.
Iterator begin()
Returns iterator pointing to the beginning of the list. Lowercase for range-based for loops.
Definition Graph.hpp:176
void InferTensorInfos()
Definition Graph.cpp:645
void VerifyConstantLayerSetTensorInfo() const
For each ConstantLayer in Graph, ensures TensorInfo is set on all output slots.
Definition Graph.cpp:622
const std::shared_ptr< IProfiler > & GetProfiler() const
Definition Graph.cpp:733
void EraseLayer(Iterator pos)
Deletes the layer at the specified position.
Definition Graph.hpp:517
void SubstituteSubgraph(SubgraphView &subgraph, IConnectableLayer *substituteLayer)
Substitutes the given sub-graph with either a new layer or a new sub-graph.
Definition Graph.cpp:475
Iterator end()
Returns iterator pointing to the end of the list. Lowercase for range-based for loops.
Definition Graph.hpp:178
TransformIterator< decltype(&PtrCast< const Layer >), Iterator > ConstIterator
Definition Graph.hpp:56
LayerList::const_iterator Iterator
Definition Graph.hpp:53
void ForEachLayer(Func func) const
Definition Graph.hpp:40
void AddCompatibilityLayers(std::map< BackendId, std::unique_ptr< class IBackendInternal > > &backends, TensorHandleFactoryRegistry &registry)
Modifies the graph in-place, removing edges connecting layers using different compute devices,...
Definition Graph.cpp:330
virtual std::vector< ITensorHandleFactory::FactoryId > GetHandleFactoryPreferences() const
(Optional) Returns a vector of supported TensorHandleFactory ids in preference order.
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition INetwork.hpp:81
Device specific knowledge to be passed to the optimizer.
Definition Types.hpp:302
Main network class which provides the interface for building up a neural network.
Definition INetwork.hpp:348
IConnectableLayer * AddFusedLayer(const FusedDescriptor &fusedDescriptor, const char *name=nullptr)
Adds a Fused layer to the network.
Definition Network.cpp:338
IConnectableLayer * AddElementwiseUnaryLayer(const ElementwiseUnaryDescriptor &elementwiseUnaryDescriptor, const char *name=nullptr)
Add an ElementwiseUnary layer to the network.
Definition Network.cpp:320
IConnectableLayer * AddLstmLayer(const LstmDescriptor &descriptor, const LstmInputParams &params, const char *name=nullptr)
Add a Lstm layer to the network.
Definition Network.cpp:501
IConnectableLayer * AddDivisionLayer(const char *name=nullptr)
Adds a division layer to the network.
Definition Network.cpp:508
IConnectableLayer * AddQuantizeLayer(const char *name=nullptr)
Add a quantize layer to the network.
Definition Network.cpp:540
IConnectableLayer * AddMergeLayer(const char *name=nullptr)
Adds a merge layer to the network.
Definition Network.cpp:404
IConnectableLayer * AddPermuteLayer(const PermuteDescriptor &permuteDescriptor, const char *name=nullptr)
Adds a permute layer to the network.
Definition Network.cpp:344
IConnectableLayer * AddSpaceToDepthLayer(const SpaceToDepthDescriptor &spaceToDepthDescriptor, const char *name=nullptr)
Adds a space to depth layer to the network.
Definition Network.cpp:486
IConnectableLayer * AddConstantLayer(const ConstTensor &input, const char *name=nullptr)
Adds a layer with no inputs and a single output, which always corresponds to the passed in constant t...
Definition Network.cpp:468
IConnectableLayer * AddGatherLayer(const GatherDescriptor &descriptor, const char *name=nullptr)
Add Gather layer to the network.
Definition Network.cpp:558
IConnectableLayer * AddRankLayer(const char *name=nullptr)
Adds a rank layer to the network.
Definition Network.cpp:433
IConnectableLayer * AddSwitchLayer(const char *name=nullptr)
Adds a switch layer to the network.
Definition Network.cpp:569
IConnectableLayer * AddQLstmLayer(const QLstmDescriptor &descriptor, const LstmInputParams &params, const char *name=nullptr)
Add a QLstm layer to the network.
Definition Network.cpp:616
INetwork(NetworkOptions networkOptions={})
Definition Network.cpp:45
IConnectableLayer * AddSoftmaxLayer(const SoftmaxDescriptor &softmaxDescriptor, const char *name=nullptr)
Adds a softmax layer to the network.
Definition Network.cpp:392
IConnectableLayer * AddDequantizeLayer(const char *name=nullptr)
Adds a Dequantize layer to the network.
Definition Network.cpp:300
IConnectableLayer * AddBroadcastToLayer(const BroadcastToDescriptor &descriptor, const char *name=nullptr)
Add a BroadcastTo layer to the network.
Definition Network.cpp:660
IConnectableLayer * AddConvolution2dLayer(const Convolution2dDescriptor &convolution2dDescriptor, const char *name=nullptr)
Adds a 2D convolution layer to the network.
Definition Network.cpp:272
IConnectableLayer * AddAdditionLayer(const char *name=nullptr)
Adds an addition layer to the network.
Definition Network.cpp:409
IConnectableLayer * AddQuantizedLstmLayer(const QuantizedLstmInputParams &params, const char *name=nullptr)
Add a QuantizedLstm layer to the network.
Definition Network.cpp:610
static INetworkPtr Create(const NetworkOptions &networkOptions={})
Definition Network.cpp:682
IConnectableLayer * AddTransposeConvolution2dLayer(const TransposeConvolution2dDescriptor &descriptor, const ConstTensor &weights, const Optional< ConstTensor > &biases, const char *name=nullptr)
Adds a 2D transpose convolution layer to the network.
Definition Network.cpp:579
std::unique_ptr< NetworkImpl > pNetworkImpl
Definition INetwork.hpp:895
IConnectableLayer * AddFloorLayer(const char *name=nullptr)
Adds a floor layer to the network.
Definition Network.cpp:492
IConnectableLayer * AddConvolution3dLayer(const Convolution3dDescriptor &convolution3dDescriptor, const char *name=nullptr)
Adds a 3D convolution layer to the network.
Definition Network.cpp:278
IConnectableLayer * AddFullyConnectedLayer(const FullyConnectedDescriptor &fullyConnectedDescriptor, const char *name=nullptr)
Adds a fully connected layer to the network.
Definition Network.cpp:332
IConnectableLayer * AddMinimumLayer(const char *name=nullptr)
Add a Minimum layer to the network.
Definition Network.cpp:551
IConnectableLayer * AddStackLayer(const StackDescriptor &descriptor, const char *name=nullptr)
Adds a stack layer to the network.
Definition Network.cpp:598
static void Destroy(INetwork *network)
Definition Network.cpp:687
IConnectableLayer * AddMaximumLayer(const char *name=nullptr)
Add a Maximum layer to the network.
Definition Network.cpp:522
IConnectableLayer * AddNormalizationLayer(const NormalizationDescriptor &normalizationDescriptor, const char *name=nullptr)
Adds a normalization layer to the network.
Definition Network.cpp:382
IConnectableLayer * AddPreluLayer(const char *name=nullptr)
Adds a PReLU layer to the network.
Definition Network.cpp:574
IConnectableLayer * AddPadLayer(const PadDescriptor &padDescriptor, const char *name=nullptr)
Adds a fully pad layer to the network.
Definition Network.cpp:534
IConnectableLayer * AddSplitterLayer(const ViewsDescriptor &splitterDescriptor, const char *name=nullptr)
Adds a splitter layer to the network.
Definition Network.cpp:398
void ExecuteStrategy(IStrategy &strategy) const
Definition Network.cpp:672
IConnectableLayer * AddSpaceToBatchNdLayer(const SpaceToBatchNdDescriptor &spaceToBatchNdDescriptor, const char *name=nullptr)
Adds a space to batch layer to the network.
Definition Network.cpp:480
IConnectableLayer * AddCastLayer(const char *name=nullptr)
Adds a cast layer to the network.
Definition Network.cpp:253
IConnectableLayer * AddStandInLayer(const StandInDescriptor &descriptor, const char *name=nullptr)
Add a stand-in layer for a type unknown to the Arm NN framework.
Definition Network.cpp:604
IConnectableLayer * AddChannelShuffleLayer(const ChannelShuffleDescriptor &descriptor, const char *name=nullptr)
Add a ChannelShuffle layer to the network.
Definition Network.cpp:637
IConnectableLayer * AddLogicalBinaryLayer(const LogicalBinaryDescriptor &descriptor, const char *name=nullptr)
Adds a Logical Binary layer to the network.
Definition Network.cpp:623
IConnectableLayer * AddLogSoftmaxLayer(const LogSoftmaxDescriptor &logSoftmaxDescriptor, const char *name=nullptr)
Adds a log softmax layer to the network.
Definition Network.cpp:462
IConnectableLayer * AddReshapeLayer(const ReshapeDescriptor &reshapeDescriptor, const char *name=nullptr)
Adds a reshape layer to the network.
Definition Network.cpp:474
IConnectableLayer * AddSliceLayer(const SliceDescriptor &sliceDescriptor, const char *name=nullptr)
Adds a slice layer to the network.
Definition Network.cpp:388
IConnectableLayer * AddBatchNormalizationLayer(const BatchNormalizationDescriptor &desc, const ConstTensor &mean, const ConstTensor &variance, const ConstTensor &beta, const ConstTensor &gamma, const char *name=nullptr)
Adds a batch normalization layer to the network.
Definition Network.cpp:423
IConnectableLayer * AddBatchToSpaceNdLayer(const BatchToSpaceNdDescriptor &batchToSpaceNdDescriptor, const char *name=nullptr)
Adds a batch to space ND layer to the network.
Definition Network.cpp:350
IConnectableLayer * AddActivationLayer(const ActivationDescriptor &activationDescriptor, const char *name=nullptr)
Adds an activation layer to the network.
Definition Network.cpp:376
IConnectableLayer * AddInputLayer(LayerBindingId id, const char *name=nullptr)
Adds an input layer to the network.
Definition Network.cpp:242
IConnectableLayer * AddElementwiseBinaryLayer(const ElementwiseBinaryDescriptor &elementwiseBinaryDescriptor, const char *name=nullptr)
Add an ElementwiseBinary layer to the network.
Definition Network.cpp:314
IConnectableLayer * AddL2NormalizationLayer(const L2NormalizationDescriptor &desc, const char *name=nullptr)
Adds an L2 normalization layer to the network.
Definition Network.cpp:456
IConnectableLayer * AddTransposeLayer(const TransposeDescriptor &transposeDescriptor, const char *name=nullptr)
Adds a transpose layer to the network.
Definition Network.cpp:587
static INetwork * CreateRaw(const NetworkOptions &networkOptions={})
Definition Network.cpp:677
IConnectableLayer * AddUnidirectionalSequenceLstmLayer(const UnidirectionalSequenceLstmDescriptor &descriptor, const LstmInputParams &params, const char *name=nullptr)
Add a UnidirectionalSequenceLstm layer to the network.
Definition Network.cpp:629
IConnectableLayer * AddMultiplicationLayer(const char *name=nullptr)
Adds a multiplication layer to the network.
Definition Network.cpp:416
IConnectableLayer * AddInstanceNormalizationLayer(const InstanceNormalizationDescriptor &desc, const char *name=nullptr)
Adds an instance normalization layer to the network.
Definition Network.cpp:450
IConnectableLayer * AddDetectionPostProcessLayer(const DetectionPostProcessDescriptor &descriptor, const ConstTensor &anchors, const char *name=nullptr)
Adds a Detection PostProcess layer to the network.
Definition Network.cpp:306
IConnectableLayer * AddStridedSliceLayer(const StridedSliceDescriptor &stridedSliceDescriptor, const char *name=nullptr)
Adds a strided slice layer to the network.
Definition Network.cpp:545
IConnectableLayer * AddTileLayer(const TileDescriptor &descriptor, const char *name=nullptr)
Add a Tile layer to the network.
Definition Network.cpp:654
IConnectableLayer * AddDepthwiseConvolution2dLayer(const DepthwiseConvolution2dDescriptor &convolution2dDescriptor, const char *name=nullptr)
Adds a 2D depthwise convolution layer to the network.
Definition Network.cpp:292
IConnectableLayer * AddComparisonLayer(const ComparisonDescriptor &comparisonDescriptor, const char *name=nullptr)
Add a Comparison layer to the network.
Definition Network.cpp:258
IConnectableLayer * AddMeanLayer(const MeanDescriptor &meanDescriptor, const char *name=nullptr)
Add a Mean layer to the network.
Definition Network.cpp:529
IConnectableLayer * AddResizeLayer(const ResizeDescriptor &resizeDescriptor, const char *name=nullptr)
Adds a resize layer to the network.
Definition Network.cpp:438
IConnectableLayer * AddArgMinMaxLayer(const ArgMinMaxDescriptor &desc, const char *name=nullptr)
Adds an ArgMinMax layer to the network.
Definition Network.cpp:247
IConnectableLayer * AddReduceLayer(const ReduceDescriptor &reduceDescriptor, const char *name=nullptr)
Adds a reduce layer to the network.
Definition Network.cpp:444
IConnectableLayer * AddPooling2dLayer(const Pooling2dDescriptor &pooling2dDescriptor, const char *name=nullptr)
Adds a 2D pooling layer to the network.
Definition Network.cpp:356
IConnectableLayer * AddConcatLayer(const ConcatDescriptor &concatDescriptor, const char *name=nullptr)
Adds a concatenation layer to the network.
Definition Network.cpp:265
IConnectableLayer * AddBatchMatMulLayer(const BatchMatMulDescriptor &descriptor, const char *name=nullptr)
Add a BatchMatMul layer to the network.
Definition Network.cpp:643
IConnectableLayer * AddPooling3dLayer(const Pooling3dDescriptor &pooling3dDescriptor, const char *name=nullptr)
Adds a 3D pooling layer to the network.
Definition Network.cpp:362
IConnectableLayer * AddPrecompiledLayer(const PreCompiledDescriptor &preCompiledDescriptor, CompiledBlobPtr compiledBlobPtr, const Optional< BackendId > &backend, const char *name=nullptr)
Adds a Precompiled layer to the network.
Definition Network.cpp:368
IConnectableLayer * AddSubtractionLayer(const char *name=nullptr)
Adds a subtraction layer to the network.
Definition Network.cpp:515
IConnectableLayer * AddDepthToSpaceLayer(const DepthToSpaceDescriptor &depthToSpaceDescriptor, const char *name=nullptr)
Adds a depth to space layer to the network.
Definition Network.cpp:285
IConnectableLayer * AddOutputLayer(LayerBindingId id, const char *name=nullptr)
Adds an output layer to the network.
Definition Network.cpp:496
IConnectableLayer * AddReverseV2Layer(const char *name=nullptr)
Add a ReverseV2 layer to the network.
Definition Network.cpp:649
IConnectableLayer * AddGatherNdLayer(const char *name=nullptr)
Add GatherNd layer to the network.
Definition Network.cpp:564
IConnectableLayer * AddShapeLayer(const char *name=nullptr)
Adds a shape layer to the network.
Definition Network.cpp:593
IConnectableLayer * AddFillLayer(const FillDescriptor &fillDescriptor, const char *name=nullptr)
Add an Fill layer to the network.
Definition Network.cpp:326
IConnectableLayer * AddScatterNdLayer(const ScatterNdDescriptor &descriptor, const char *name=nullptr)
Add a ScatterNd layer to the network.
Definition Network.cpp:666
Status PrintGraph()
Definition Network.cpp:237
Status SerializeToDot(std::ostream &stream) const
Definition Network.cpp:716
IOptimizedNetwork(const IOptimizedNetwork &other, const ModelOptions &modelOptions)
Creates a copy of the IOptimizedNetwork.
Definition Network.cpp:692
std::unique_ptr< OptimizedNetworkImpl > pOptimizedNetworkImpl
Definition INetwork.hpp:944
static void Destroy(IOptimizedNetwork *network)
Definition Network.cpp:706
size_t GetNumOutputs() const
Definition Network.cpp:736
void ExecuteStrategy(IStrategy &strategy) const
Definition Network.cpp:3277
const std::shared_ptr< IProfiler > & GetProfiler() const
Definition Network.cpp:721
size_t GetNumInputs() const
Definition Network.cpp:731
arm::pipe::ProfilingGuid GetGuid() const
Definition Network.cpp:726
virtual std::vector< Capability > GetCapabilities(const IConnectableLayer *layer, const IConnectableLayer *connectedLayer, CapabilityClass capabilityClass)
virtual MemorySourceFlags GetExportFlags() const
static const FactoryId LegacyFactoryId
virtual MemorySourceFlags GetImportFlags() const
static const FactoryId DeferredFactoryId
Use the workload factory to create the tensor handle.
virtual bool SupportsMapUnmap() const
static bool IsLayerSupported(const BackendId &backendId, const IConnectableLayer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
A layer user-provided data can be bound to (e.g. inputs, outputs).
const OutputSlot * GetConnectedOutputSlot() const
Definition Layer.hpp:56
This layer represents an instance normalization operation.
This layer represents a L2 normalization operation.
unsigned int GetNumOutputSlots() const override
Returns the number of connectable output slots.
Definition Layer.hpp:335
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition Layer.hpp:337
void SetBackendId(const BackendId &id) override
Set the backend of the IConnectableLayer.
Definition Layer.hpp:291
const std::vector< InputSlot > & GetInputSlots() const
Definition Layer.hpp:258
const OutputSlot & GetOutputSlot(unsigned int index=0) const override
Get the const output slot handle by slot index.
Definition Layer.hpp:339
unsigned int GetNumInputSlots() const override
Returns the number of connectable input slots.
Definition Layer.hpp:334
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition Layer.hpp:286
const BackendId & GetBackendId() const
Definition Layer.hpp:290
Optional< BackendId > GetBackendHint() const
Definition Layer.hpp:355
This layer represents a log softmax operation.
This layer represents a Logical Binary operation.
This layer represents a LSTM operation.
Definition LstmLayer.hpp:17
LstmBasicParameters m_BasicParameters
Definition LstmLayer.hpp:20
This layer represents a maximum operation.
This layer represents a mean operation.
Definition MeanLayer.hpp:15
This layer dequantizes the input tensor.
This layer represents a minimum operation.
This layer represents a multiplication operation.
Private implementation of INetwork.
Definition Network.hpp:33
IConnectableLayer * AddFusedLayer(const FusedDescriptor &fusedDescriptor, const char *name=nullptr)
Definition Network.cpp:2394
IConnectableLayer * AddElementwiseUnaryLayer(const ElementwiseUnaryDescriptor &elementwiseUnaryDescriptor, const char *name=nullptr)
Definition Network.cpp:2376
IConnectableLayer * AddLstmLayer(const LstmDescriptor &descriptor, const LstmInputParams &params, const char *name=nullptr)
Definition Network.cpp:2618
IConnectableLayer * AddDivisionLayer(const char *name=nullptr)
Definition Network.cpp:2759
IConnectableLayer * AddQuantizeLayer(const char *name=nullptr)
Definition Network.cpp:2779
IConnectableLayer * AddMergeLayer(const char *name=nullptr)
Definition Network.cpp:2806
IConnectableLayer * AddPermuteLayer(const PermuteDescriptor &permuteDescriptor, const char *name=nullptr)
Definition Network.cpp:2451
IConnectableLayer * AddSpaceToDepthLayer(const SpaceToDepthDescriptor &spaceToDepthDescriptor, const char *name=nullptr)
Definition Network.cpp:2607
IConnectableLayer * AddConstantLayer(const ConstTensor &input, const char *name=nullptr)
Definition Network.cpp:2586
NetworkImpl(const NetworkOptions &networkOptions={})
Definition Network.cpp:2328
IConnectableLayer * AddLogicalBinaryLayer(const LogicalBinaryDescriptor &logicalBinaryDescriptor, const char *name=nullptr)
Definition Network.cpp:3042
IConnectableLayer * AddConvertFp16ToFp32Layer(const char *name=nullptr)
Definition Network.cpp:2412
IConnectableLayer * AddRankLayer(const char *name=nullptr)
Definition Network.cpp:2547
IConnectableLayer * AddSwitchLayer(const char *name=nullptr)
Definition Network.cpp:2811
IConnectableLayer * AddQLstmLayer(const QLstmDescriptor &descriptor, const LstmInputParams &params, const char *name=nullptr)
Definition Network.cpp:2900
IConnectableLayer * AddSoftmaxLayer(const SoftmaxDescriptor &softmaxDescriptor, const char *name=nullptr)
Definition Network.cpp:2493
IConnectableLayer * AddDequantizeLayer(const char *name=nullptr)
Definition Network.cpp:2784
IConnectableLayer * AddBroadcastToLayer(const BroadcastToDescriptor &descriptor, const char *name=nullptr)
Definition Network.cpp:3238
IConnectableLayer * AddConvolution2dLayer(const Convolution2dDescriptor &convolution2dDescriptor, const char *name=nullptr)
Definition Network.cpp:2406
IConnectableLayer * AddAdditionLayer(const char *name=nullptr)
Definition Network.cpp:2515
IConnectableLayer * AddQuantizedLstmLayer(const QuantizedLstmInputParams &params, const char *name=nullptr)
Definition Network.cpp:2862
IConnectableLayer * AddTransposeConvolution2dLayer(const TransposeConvolution2dDescriptor &descriptor, const ConstTensor &weights, const Optional< ConstTensor > &biases, const char *name=nullptr)
Definition Network.cpp:2821
IConnectableLayer * AddFloorLayer(const char *name=nullptr)
Definition Network.cpp:2613
IConnectableLayer * AddConvolution3dLayer(const Convolution3dDescriptor &convolution3dDescriptor, const char *name=nullptr)
Definition Network.cpp:2422
IConnectableLayer * AddStackLayer(const StackDescriptor &stackDescriptor, const char *name=nullptr)
Definition Network.cpp:2849
IConnectableLayer * AddFullyConnectedLayer(const FullyConnectedDescriptor &fullyConnectedDescriptor, const char *name=nullptr)
Definition Network.cpp:2388
IConnectableLayer * AddMinimumLayer(const char *name=nullptr)
Definition Network.cpp:2510
IConnectableLayer * AddMaximumLayer(const char *name=nullptr)
Definition Network.cpp:2505
IConnectableLayer * AddChannelShuffleLayer(const ChannelShuffleDescriptor &channelShuffleDescriptor, const char *name=nullptr)
Definition Network.cpp:2358
IConnectableLayer * AddNormalizationLayer(const NormalizationDescriptor &normalizationDescriptor, const char *name=nullptr)
Definition Network.cpp:2481
IConnectableLayer * AddPreluLayer(const char *name=nullptr)
Definition Network.cpp:2816
IConnectableLayer * AddPadLayer(const PadDescriptor &padDescriptor, const char *name=nullptr)
Definition Network.cpp:2774
IConnectableLayer * AddSplitterLayer(const ViewsDescriptor &splitterDescriptor, const char *name=nullptr)
Definition Network.cpp:2499
void ExecuteStrategy(IStrategy &strategy) const
Definition Network.cpp:3248
IConnectableLayer * AddSpaceToBatchNdLayer(const SpaceToBatchNdDescriptor &spaceToBatchNdDescriptor, const char *name=nullptr)
Definition Network.cpp:2601
IConnectableLayer * AddCastLayer(const char *name=nullptr)
Definition Network.cpp:2354
IConnectableLayer * AddStandInLayer(const StandInDescriptor &descriptor, const char *name=nullptr)
Definition Network.cpp:2856
IConnectableLayer * AddScatterNdLayer(const ScatterNdDescriptor &scatterDescriptor, const char *name=nullptr)
Definition Network.cpp:3243
IConnectableLayer * AddBatchMatMulLayer(const BatchMatMulDescriptor &desc, const char *name=nullptr)
Definition Network.cpp:3190
IConnectableLayer * AddLogSoftmaxLayer(const LogSoftmaxDescriptor &logSoftmaxDescriptor, const char *name=nullptr)
Definition Network.cpp:2580
const Graph & GetGraph() const
Definition Network.hpp:38
IConnectableLayer * AddReshapeLayer(const ReshapeDescriptor &reshapeDescriptor, const char *name=nullptr)
Definition Network.cpp:2595
IConnectableLayer * AddSliceLayer(const SliceDescriptor &sliceDescriptor, const char *name=nullptr)
Definition Network.cpp:2488
IConnectableLayer * AddBatchNormalizationLayer(const BatchNormalizationDescriptor &desc, const ConstTensor &mean, const ConstTensor &variance, const ConstTensor &beta, const ConstTensor &gamma, const char *name=nullptr)
Definition Network.cpp:2530
IConnectableLayer * AddBatchToSpaceNdLayer(const BatchToSpaceNdDescriptor &batchToSpaceNdDescriptor, const char *name=nullptr)
Definition Network.cpp:2348
IConnectableLayer * AddActivationLayer(const ActivationDescriptor &activationDescriptor, const char *name=nullptr)
Definition Network.cpp:2469
IConnectableLayer * AddInputLayer(LayerBindingId id, const char *name=nullptr)
Definition Network.cpp:2343
IConnectableLayer * AddElementwiseBinaryLayer(const ElementwiseBinaryDescriptor &elementwiseBinaryDescriptor, const char *name=nullptr)
Definition Network.cpp:2370
IConnectableLayer * AddTileLayer(const TileDescriptor &tileDescriptor, const char *name=nullptr)
Definition Network.cpp:3200
IConnectableLayer * AddGatherLayer(const GatherDescriptor &gatherDescriptor, const char *name=nullptr)
Definition Network.cpp:2795
IConnectableLayer * AddL2NormalizationLayer(const L2NormalizationDescriptor &desc, const char *name=nullptr)
Definition Network.cpp:2574
IConnectableLayer * AddTransposeLayer(const TransposeDescriptor &transposeDescriptor, const char *name=nullptr)
Definition Network.cpp:2843
IConnectableLayer * AddConvertFp32ToFp16Layer(const char *name=nullptr)
Definition Network.cpp:2417
IConnectableLayer * AddUnidirectionalSequenceLstmLayer(const UnidirectionalSequenceLstmDescriptor &descriptor, const LstmInputParams &params, const char *name=nullptr)
Definition Network.cpp:3048
IConnectableLayer * AddMultiplicationLayer(const char *name=nullptr)
Definition Network.cpp:2520
IConnectableLayer * AddInstanceNormalizationLayer(const InstanceNormalizationDescriptor &desc, const char *name=nullptr)
Definition Network.cpp:2568
IConnectableLayer * AddDetectionPostProcessLayer(const DetectionPostProcessDescriptor &descriptor, const ConstTensor &anchors, const char *name=nullptr)
Definition Network.cpp:2441
IConnectableLayer * AddStridedSliceLayer(const StridedSliceDescriptor &stridedSliceDescriptor, const char *name=nullptr)
Definition Network.cpp:2789
IConnectableLayer * AddDepthwiseConvolution2dLayer(const DepthwiseConvolution2dDescriptor &convolution2dDescriptor, const char *name=nullptr)
Definition Network.cpp:2434
IConnectableLayer * AddComparisonLayer(const ComparisonDescriptor &comparisonDescriptor, const char *name=nullptr)
Definition Network.cpp:2364
IConnectableLayer * AddMeanLayer(const MeanDescriptor &meanDescriptor, const char *name=nullptr)
Definition Network.cpp:2769
IConnectableLayer * AddResizeLayer(const ResizeDescriptor &resizeDescriptor, const char *name=nullptr)
Definition Network.cpp:2558
IConnectableLayer * AddArgMinMaxLayer(const ArgMinMaxDescriptor &desc, const char *name=nullptr)
Definition Network.cpp:2475
IConnectableLayer * AddReduceLayer(const ReduceDescriptor &reduceDescriptor, const char *name=nullptr)
Definition Network.cpp:2552
IConnectableLayer * AddPooling2dLayer(const Pooling2dDescriptor &pooling2dDescriptor, const char *name=nullptr)
Definition Network.cpp:2457
IConnectableLayer * AddConcatLayer(const ConcatDescriptor &concatDescriptor, const char *name=nullptr)
Definition Network.cpp:2400
IConnectableLayer * AddPooling3dLayer(const Pooling3dDescriptor &pooling3dDescriptor, const char *name=nullptr)
Definition Network.cpp:2463
IConnectableLayer * AddPrecompiledLayer(const PreCompiledDescriptor &preCompiledDescriptor, CompiledBlobPtr compiledBlobPtr, const Optional< BackendId > &backend, const char *name=nullptr)
Definition Network.cpp:3205
IConnectableLayer * AddSubtractionLayer(const char *name=nullptr)
Definition Network.cpp:2764
IConnectableLayer * AddDepthToSpaceLayer(const DepthToSpaceDescriptor &depthToSpaceDescriptor, const char *name=nullptr)
Definition Network.cpp:2428
IConnectableLayer * AddOutputLayer(LayerBindingId id, const char *name=nullptr)
Definition Network.cpp:2525
IConnectableLayer * AddReverseV2Layer(const char *name=nullptr)
Definition Network.cpp:3195
IConnectableLayer * AddGatherNdLayer(const char *name=nullptr)
Definition Network.cpp:2801
IConnectableLayer * AddShapeLayer(const char *name=nullptr)
Definition Network.cpp:2563
IConnectableLayer * AddFillLayer(const FillDescriptor &fillDescriptor, const char *name=nullptr)
Definition Network.cpp:2382
This layer represents a normalization operation.
bool Validate(const SubgraphView &originalSubgraph) const
const Subgraphs & GetDeletedSubgraphs() const
const Subgraphs & GetFailedSubgraphs() const
const Substitutions & GetSubstitutions() const
virtual Status SerializeToDot(std::ostream &stream) const
Definition Network.cpp:747
virtual size_t GetNumOutputs() const
Definition Network.cpp:757
void ExecuteStrategy(IStrategy &strategy) const
Definition Network.cpp:3282
virtual size_t GetNumInputs() const
Definition Network.cpp:752
OptimizedNetworkImpl(const OptimizedNetworkImpl &other, const ModelOptions &modelOptions)
Definition Network.cpp:3256
virtual Status PrintGraph()
Definition Network.cpp:741
static void Pass(Graph &graph, const Optimizations &optimizations)
Definition Optimizer.cpp:16
void AddModelOption(armnn::BackendOptions)
Definition Network.cpp:151
void SetDebugEnabled(bool DebugState)
Definition Network.cpp:126
OptimizerOptionsOpaque & operator=(OptimizerOptionsOpaque other)
Definition Network.cpp:96
void SetReduceFp32ToFp16(bool ReduceFp32ToFp16State)
Definition Network.cpp:136
armnn::ShapeInferenceMethod GetShapeInferenceMethod() const
Definition Network.cpp:201
void SetAllowExpandedDims(bool ExpandedDimsAllowed)
Definition Network.cpp:146
void SetProfilingEnabled(bool ProfilingState)
Definition Network.cpp:121
bool GetDebugToFileEnabled() const
Definition Network.cpp:186
void SetDebugToFileEnabled(bool DebugFileState)
Definition Network.cpp:131
void SetExportEnabled(bool ExportState)
Definition Network.cpp:116
const std::string ToString() const
Definition Network.cpp:206
void SetImportEnabled(bool ImportState)
Definition Network.cpp:111
armnn::ModelOptions GetModelOptions() const
Definition Network.cpp:196
void SetShapeInferenceMethod(armnn::ShapeInferenceMethod ShapeInferenceMethodType)
Definition Network.cpp:141
bool GetAllowExpandedDims() const
Definition Network.cpp:191
bool has_value() const noexcept
Definition Optional.hpp:53
A layer user-provided data can be bound to (e.g. inputs, outputs).
const InputSlot * GetConnection(unsigned int index) const override
Definition Layer.cpp:83
unsigned int GetNumConnections() const override
Definition Layer.hpp:158
void SetEdgeStrategy(unsigned int connectionIndex, EdgeStrategy strategy)
Definition Layer.cpp:223
void SetTensorInfo(const TensorInfo &tensorInfo) override
Definition Layer.cpp:95
Layer & GetOwningLayer() const
Definition Layer.hpp:132
const std::vector< InputSlot * > & GetConnections() const
Definition Layer.hpp:145
void Disconnect(InputSlot &slot)
Definition Layer.cpp:131
const TensorInfo & GetTensorInfo() const override
Definition Layer.cpp:100
int Connect(InputSlot &destination)
Definition Layer.cpp:123
void SetTensorHandleFactory(const ITensorHandleFactory::FactoryId &id)
Definition Layer.cpp:213
This layer represents a pad operation.
Definition PadLayer.hpp:15
This layer represents a permutation operation.
This layer represents a pooling 2d operation.
This layer represents a pooling 3d operation.
void SetPreCompiledObject(PreCompiledObjectPtr preCompiledObject)
void RegisterProfiler(IProfiler *profiler)
static ProfilerManager & GetInstance()
This layer represents a QLstm operation.
QLstmBasicParameters m_BasicParameters
This layer represents a QuantizedLstm operation.
QuantizedLstmParameters m_QuantizedLstmParameters
This layer represents a reduction operation.
This layer represents a reshape operation.
This layer represents a resize operation.
This layer represents a ReverseV2 operation.
This layer represents a ScatterNd operator.
This layer represents a softmax operation.
This layer represents a SpaceToBatchNd operation.
This layer represents a SpaceToDepth operation.
This layer represents a split operation.
This layer represents a stack operation.
This layer represents an unknown operation in the input graph.
This layer represents a strided slice operation.
The SubgraphView class represents a subgraph of a Graph.
IConnectableLayers::iterator IConnectableLayerIterator
IConnectableLayerIterator begin()
const IConnectableLayers & GetIConnectableLayers() const
std::list< IConnectableLayer * > IConnectableLayers
IConnectableLayerIterator end()
static Subgraphs SelectSubgraphs(Graph &graph, const LayerSelectorFunction &selector)
Selects subgraphs from a graph based on the selector function and the algorithm.
std::vector< SubgraphView::SubgraphViewPtr > Subgraphs
This layer represents a subtraction operation.
This layer calculates both true and false outputs for input.
ITensorHandleFactory * GetFactory(ITensorHandleFactory::FactoryId id) const
Find a TensorHandleFactory by Id Returns nullptr if not found.
void SetDataType(DataType type)
Definition Tensor.hpp:201
DataType GetDataType() const
Definition Tensor.hpp:200
This layer represents a 2D transpose convolution operation.
std::shared_ptr< ConstTensorHandle > m_Weight
A unique pointer to store weight values.
This layer represents a transpose operation.
static void ConvertFloat16To32(const void *srcFloat16Buffer, size_t numElements, float *dstFloat32Buffer)
OptimizeForExclusiveConnection< PadLayer, DepthwiseConvolution2dLayer, pad_fold::FoldPadIntoDepthwiseConvolution2dImpl > FoldPadIntoDepthwiseConvolution2d
OptimizeForType< Layer, PermuteDepthwiseConv2dWeightsImpl > PermuteDepthwiseConv2dWeights
OptimizeForConnection< ConstantLayer, PermuteLayer, ConvertConstPermuteLayersToConstLayers > FusePermuteIntoConstLayer
OptimizeForExclusiveConnection< PadLayer, Pooling2dLayer, pad_fold::FoldPadIntoPooling2dImpl > FoldPadIntoPooling2d
OptimizeForExclusiveConnection< PadLayer, Convolution2dLayer, pad_fold::FoldPadIntoConvolution2dImpl > FoldPadIntoConvolution2d
Copyright (c) 2021 ARM Limited and Contributors.
bool IsTfLiteTurboModel(const Graph &optGraph)
Definition Network.cpp:1976
half_float::half Half
Definition Half.hpp:22
EdgeStrategy CalculateEdgeStrategy(BackendsMap &backends, ITensorHandleFactory::FactoryId srcFactoryId, const Layer &layer, const Layer &connectedLayer, TensorHandleFactoryRegistry &registry, bool importEnabled)
Definition Network.cpp:1785
void ReportWarning(const std::string &warningMessage, Optional< std::vector< std::string > & > warningMessages)
Definition Network.cpp:774
OptimizationResult ReturnWithError(OptimizationResult res, const Layer *layer, const BackendSettings &backendSettings, Optional< std::vector< std::string > & > errMessages)
Definition Network.cpp:787
BackendsMap CreateSupportedBackends(TensorHandleFactoryRegistry &handleFactoryRegistry, BackendSettings &backendSettings)
Definition Network.cpp:1354
Optimizer::Optimizations MakeOptimizations(Args &&... args)
Definition Optimizer.hpp:43
constexpr const char * GetDataTypeName(DataType dataType)
bool CheckFp16Support(BackendsMap &backends, const std::vector< BackendId > &availablePreferredBackends)
Definition Network.cpp:1045
ITensorHandleFactory::FactoryId CalculateSlotOptionForOutput(BackendsMap &backends, OutputSlot &slot, TensorHandleFactoryRegistry &registry)
Definition Network.cpp:1622
bool HasMatchingCapability(const BackendOptions::BackendOption &capability, const BackendCapabilities &capabilities)
Convenience function to check if a given capability matches a capability in a BackendCapabilities str...
std::vector< BackendOptions > NetworkOptions
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below.
Definition Types.hpp:494
bool CheckFastMathSupport(const std::vector< BackendId > &availablePreferredBackends, const ModelOptions &modelOptions)
Definition Network.cpp:1948
std::vector< BackendOptions > ModelOptions
SpaceToDepthDescriptor DepthToSpaceDescriptor
A DepthToSpaceDescriptor for the DepthToSpaceLayer.
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition INetwork.hpp:340
Status
enumeration
Definition Types.hpp:43
Optional< const BackendOptions::BackendOption > GetCapability(const std::string &backendCapabilityName, const BackendCapabilities &capabilities)
Returns a BackendCapability if the backend lists the capability The BackendCapability must then be in...
OptimizationResult AttemptBackendAssignment(BackendSettings &backendSettings, Graph &graph, Layer *layer, BackendId backend, DataType dataTypeIn, DataType dataTypeOut, const std::vector< BackendId > &availablePreferredBackends, std::string &reasonIfUnsupported, Optional< std::vector< std::string > & > messages)
Definition Network.cpp:844
ITensorHandleFactory::FactoryId CalculateSlotOptionForInput(BackendsMap &backends, OutputSlot &slot, TensorHandleFactoryRegistry &registry, bool importEnabled)
Definition Network.cpp:1530
OriginsDescriptor ConcatDescriptor
bool RequiresCopy(ITensorHandleFactory::FactoryId src, ITensorHandleFactory::FactoryId dst, TensorHandleFactoryRegistry &registry)
Definition Network.cpp:1510
OptimizationResult AssignBackends(OptimizedNetworkImpl *optNetObjPtr, BackendSettings &backendSettings, Graph::Iterator &firstLayer, Graph::Iterator &lastLayer, Optional< std::vector< std::string > & > errMessages)
Definition Network.cpp:1211
std::unique_ptr< void, CompiledBlobDeleter > CompiledBlobPtr
Definition INetwork.hpp:343
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptionsOpaque &options=OptimizerOptionsOpaque(), Optional< std::vector< std::string > & > messages=EmptyOptional())
Create an optimized version of the network.
Definition Network.cpp:2287
DestType PolymorphicDowncast(SourceType *value)
Polymorphic downcast for build in pointers only.
OptimizationResult ApplyBackendOptimizations(OptimizedNetworkImpl *optNetObjPtr, BackendSettings &backendSettings, BackendsMap &backends, const ModelOptions &modelOptions, Optional< std::vector< std::string > & > errMessages)
Definition Network.cpp:1372
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition Types.hpp:311
void ReportError(const std::string &errorMessage, Optional< std::vector< std::string > & > errorMessages)
Definition Network.cpp:762
std::vector< DataType > GetLayerInOutDatatype(const Layer *layer)
Definition Network.cpp:1036
void AssignBackendsIConnectable(OptimizedNetworkImpl *optNetObjPtr, IConnectableLayer *it, Optional< std::vector< std::string > & > errMessages, OptimizationResult &result, BackendSettings &backendSettings, std::vector< BackendId > &availablePreferredBackends, bool &restart)
Definition Network.cpp:1092
bool HasCapability(const std::string &name, const BackendCapabilities &capabilities)
Convenience function to check if a capability exists in a BackendCapabilites struct.
std::vector< ConvertFp32ToFp16Layer * > InsertConvertFp32ToFp16LayersAfter(Graph &graph, Layer &layer)
BackendRegistry & BackendRegistryInstance()
std::vector< BackendId > BackendIdVector
OptimizationResult SelectTensorHandleStrategy(Graph &optGraph, BackendsMap &backends, TensorHandleFactoryRegistry &registry, bool importEnabled, bool exportEnabled, Optional< std::vector< std::string > & > errMessages)
Definition Network.cpp:1877
SoftmaxDescriptor LogSoftmaxDescriptor
A LogSoftmaxDescriptor for the LogSoftmaxLayer.
LstmDescriptor UnidirectionalSequenceLstmDescriptor
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition INetwork.hpp:339
std::vector< ConvertFp16ToFp32Layer * > InsertConvertFp16ToFp32LayersBefore(Graph &graph, Layer &layer, bool expectCorrectInputType)
void ParseOptions(const std::vector< BackendOptions > &options, BackendId backend, F f)
DataType
Definition Types.hpp:49
const char * GetLayerTypeAsCString(LayerType type)
ShapeInferenceMethod
The ShapeInferenceMethod modify how the output shapes are treated.
Definition Types.hpp:237
@ InferAndValidate
Infer missing output shapes and validate all output shapes.
Definition Types.hpp:241
@ ValidateOnly
Validate all output shapes.
Definition Types.hpp:239
@ CpuAcc
CPU Execution: NEON: ArmCompute.
Definition BackendId.hpp:27
@ CpuRef
CPU Execution: Reference C++ kernels.
Definition BackendId.hpp:25
@ GpuAcc
GPU Execution: OpenCL: ArmCompute.
Definition BackendId.hpp:29
ITensorHandleFactory::FactoryId CalculateSlotOption(BackendsMap &backends, OutputSlot &outputSlot, TensorHandleFactoryRegistry &registry, bool exportEnabled)
Definition Network.cpp:1632
std::map< BackendId, std::unique_ptr< class IBackendInternal > > BackendsMap
Definition Network.hpp:285
bool CheckScaleSetOnQuantizedType(Layer *layer, Optional< std::vector< std::string > & > errMessages)
Definition Network.cpp:801
@ ExportToTarget
Destination backend can work directly with tensors on source backend.
@ DirectCompatibility
No strategy has been defined. Used internally to verify integrity of optimizations.
@ CopyToTarget
Source backends tensor data can be exported to destination backend tensor without copy.
void IgnoreUnused(Ts &&...)
std::string CreateDirectory(std::string sPath)
Returns full path to temporary folder.
An ActivationDescriptor for the ActivationLayer.
An ArgMinMaxDescriptor for ArgMinMaxLayer.
Struct for the users to pass backend specific options.
bool IsBackendSupported(const BackendId &backend) const
BackendIdVector GetAvailablePreferredBackends() const
BackendIdVector m_PreferredBackends
A BatchMatMulDescriptor for the BatchMatMul operator.
A BatchNormalizationDescriptor for the BatchNormalizationLayer.
A BatchToSpaceNdDescriptor for the BatchToSpaceNdLayer.
A ChannelShuffleDescriptor for the ChannelShuffle operator.
A ComparisonDescriptor for the ComparisonLayer.
A Convolution2dDescriptor for the Convolution2dLayer.
A Convolution3dDescriptor for the Convolution3dLayer.
A DepthwiseConvolution2dDescriptor for the DepthwiseConvolution2dLayer.
A ElementwiseBinaryDescriptor for the ElementwiseBinaryLayer.
A ElementwiseUnaryDescriptor for the ElementwiseUnaryLayer.
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
Definition Optional.hpp:32
A FillDescriptor for the FillLayer.
A FullyConnectedDescriptor for the FullyConnectedLayer.
A FusedDescriptor for the FusedLayer.
A GatherDescriptor for the GatherLayer.
An InstanceNormalizationDescriptor for InstanceNormalizationLayer.
A L2NormalizationDescriptor for the L2NormalizationLayer.
A LogicalBinaryDescriptor for the LogicalBinaryLayer.
std::shared_ptr< ConstTensorHandle > m_InputToForgetWeights
A unique pointer to represent 2D weights tensor with dimensions [input_size, num_units].
An LstmDescriptor for the LstmLayer.
bool m_PeepholeEnabled
Enable/disable peephole.
bool m_LayerNormEnabled
Enable/disable layer normalization.
bool m_ProjectionEnabled
Enable/disable the projection layer.
bool m_CifgEnabled
Enable/disable cifg (coupled input & forget gate).
const ConstTensor * m_InputLayerNormWeights
const ConstTensor * m_RecurrentToCellWeights
const ConstTensor * m_InputToForgetWeights
const ConstTensor * m_CellToForgetWeights
const ConstTensor * m_RecurrentToInputWeights
const ConstTensor * m_ProjectionBias
const ConstTensor * m_CellToInputWeights
const ConstTensor * m_InputToCellWeights
const ConstTensor * m_CellBias
const ConstTensor * m_RecurrentToOutputWeights
const ConstTensor * m_InputToOutputWeights
const ConstTensor * m_OutputGateBias
const ConstTensor * m_OutputLayerNormWeights
const ConstTensor * m_InputGateBias
const ConstTensor * m_ProjectionWeights
const ConstTensor * m_ForgetGateBias
const ConstTensor * m_CellLayerNormWeights
const ConstTensor * m_RecurrentToForgetWeights
const ConstTensor * m_ForgetLayerNormWeights
const ConstTensor * m_CellToOutputWeights
const ConstTensor * m_InputToInputWeights
A MeanDescriptor for the MeanLayer.
A NormalizationDescriptor for the NormalizationLayer.
bool IsWarningOnly() const
Definition Network.hpp:278
bool m_ExportEnabled
Enable Export.
Definition INetwork.hpp:262
bool m_ImportEnabled
Enable Import.
Definition INetwork.hpp:253
bool m_ReduceFp32ToBf16
@Note This feature has been replaced by enabling Fast Math in compute library backend options.
Definition INetwork.hpp:247
bool m_ProfilingEnabled
Enable profiling dump of the optimizer phase.
Definition INetwork.hpp:259
bool m_Debug
Add debug data for easier troubleshooting.
Definition INetwork.hpp:240
bool m_ReduceFp32ToFp16
Reduces all Fp32 operators in the model to Fp16 for faster processing.
Definition INetwork.hpp:237
ModelOptions m_ModelOptions
Enable Model Options.
Definition INetwork.hpp:256
ShapeInferenceMethod m_shapeInferenceMethod
Infer output size when not available.
Definition INetwork.hpp:250
bool m_AllowExpandedDims
When calculating tensor sizes, dimensions of size == 1 will be ignored.
Definition INetwork.hpp:265
bool m_DebugToFile
Pass debug data to separate output files for easier troubleshooting.
Definition INetwork.hpp:243
A PadDescriptor for the PadLayer.
A PermuteDescriptor for the PermuteLayer.
A Pooling2dDescriptor for the Pooling2dLayer.
A Pooling3dDescriptor for the Pooling3dLayer.
A PreCompiledDescriptor for the PreCompiledLayer.
std::shared_ptr< ConstTensorHandle > m_InputToForgetWeights
A unique pointer to represent 2D weights tensor with dimensions [num_units, inputSize] (QSymmS8).
A QLstmDescriptor for the QLstmLayer.
bool m_PeepholeEnabled
Enable/disable peephole.
bool m_LayerNormEnabled
Enable/disable layer normalization.
bool m_ProjectionEnabled
Enable/disable the projection layer.
bool m_CifgEnabled
Enable/disable CIFG (coupled input & forget gate).
const ConstTensor & GetCellBias() const
const ConstTensor & GetRecurrentToCellWeights() const
const ConstTensor & GetOutputGateBias() const
const ConstTensor & GetInputToCellWeights() const
const ConstTensor & GetInputToOutputWeights() const
const ConstTensor & GetRecurrentToOutputWeights() const
const ConstTensor & GetForgetGateBias() const
const ConstTensor & GetInputGateBias() const
const ConstTensor & GetInputToInputWeights() const
const ConstTensor & GetInputToForgetWeights() const
const ConstTensor & GetRecurrentToForgetWeights() const
const ConstTensor & GetRecurrentToInputWeights() const
std::shared_ptr< ConstTensorHandle > m_InputToInputWeights
A unique pointer to represent 2D weights tensor with dimensions [outputSize, inputSize] (QAsymm8).
A ReduceDescriptor for the REDUCE operators.
A ReshapeDescriptor for the ReshapeLayer.
A ResizeDescriptor for the ResizeLayer.
A ScatterNdDescriptor for the ScatterNdLayer.
A SliceDescriptor for the SliceLayer.
A SoftmaxDescriptor for the SoftmaxLayer.
A SpaceToBatchNdDescriptor for the SpaceToBatchNdLayer.
A SpaceToDepthDescriptor for the SpaceToDepthLayer.
A StackDescriptor for the StackLayer.
A StandInDescriptor for the StandIn layer.
A StridedSliceDescriptor for the StridedSliceLayer.
A TransposeConvolution2dDescriptor for the TransposeConvolution2dLayer.
A TransposeDescriptor for the TransposeLayer.
A ViewsDescriptor for the SplitterLayer.