30 #include <client/include/IProfilingService.hpp>
32 #include <common/include/ProfilingGuid.hpp>
34 #include <fmt/format.h>
62 bool importEnabled,
ModelOptions modelOptions,
bool exportEnabled,
65 importEnabled, modelOptions,
66 exportEnabled, debugToFile))
72 bool importEnabled,
ModelOptions modelOptions,
bool exportEnabled,
73 bool debugToFile,
bool allowExpandedDims)
75 shapeInferenceMethod, importEnabled,
76 modelOptions, exportEnabled,
77 debugToFile, allowExpandedDims))
84 p_OptimizerOptionsImpl->m_ImportEnabled = OptimizerStruct.
m_ImportEnabled;
86 p_OptimizerOptionsImpl->m_ModelOptions = OptimizerStruct.
m_ModelOptions;
88 p_OptimizerOptionsImpl->m_DebugToFile = OptimizerStruct.
m_DebugToFile;
89 p_OptimizerOptionsImpl->m_Debug = OptimizerStruct.
m_Debug;
91 p_OptimizerOptionsImpl->m_ExportEnabled = OptimizerStruct.
m_ExportEnabled;
113 p_OptimizerOptionsImpl->m_ImportEnabled = ImportState;
118 p_OptimizerOptionsImpl->m_ExportEnabled = ExportState;
123 p_OptimizerOptionsImpl->m_ProfilingEnabled = ProfilingState;
128 p_OptimizerOptionsImpl->m_Debug = DebugState;
133 p_OptimizerOptionsImpl->m_DebugToFile = DebugFileState;
138 p_OptimizerOptionsImpl->m_ReduceFp32ToFp16 = ReduceFp32ToFp16State;
143 p_OptimizerOptionsImpl->m_shapeInferenceMethod = ShapeInferenceMethodType;
148 p_OptimizerOptionsImpl->m_AllowExpandedDims = ExpandedDimsAllowed;
153 p_OptimizerOptionsImpl->m_ModelOptions.push_back(NewModelOption);
158 return p_OptimizerOptionsImpl->m_ProfilingEnabled;
163 return p_OptimizerOptionsImpl->m_ImportEnabled;
168 return p_OptimizerOptionsImpl->m_ExportEnabled;
173 return p_OptimizerOptionsImpl->m_ReduceFp32ToFp16;
178 return p_OptimizerOptionsImpl->m_ReduceFp32ToBf16;
183 return p_OptimizerOptionsImpl->m_Debug;
188 return p_OptimizerOptionsImpl->m_DebugToFile;
193 return p_OptimizerOptionsImpl->m_AllowExpandedDims;
198 return p_OptimizerOptionsImpl->m_ModelOptions;
203 return p_OptimizerOptionsImpl->m_shapeInferenceMethod;
208 std::stringstream stream;
209 stream <<
"OptimizerOptions: \n";
210 stream <<
"\tReduceFp32ToFp16: " << p_OptimizerOptionsImpl->m_ReduceFp32ToFp16 <<
"\n";
211 stream <<
"\tReduceFp32ToBf16: " << p_OptimizerOptionsImpl->m_ReduceFp32ToBf16 <<
"\n";
212 stream <<
"\tDebug: " << p_OptimizerOptionsImpl->m_Debug <<
"\n";
213 stream <<
"\tDebug to file: " << p_OptimizerOptionsImpl->m_DebugToFile <<
"\n";
214 stream <<
"\tShapeInferenceMethod: " <<
216 "ValidateOnly" :
"InferAndValidate") <<
"\n";
217 stream <<
"\tImportEnabled: " << p_OptimizerOptionsImpl->m_ImportEnabled <<
"\n";
218 stream <<
"\tExportEnabled: " << p_OptimizerOptionsImpl->m_ExportEnabled <<
"\n";
219 stream <<
"\tProfilingEnabled: " << p_OptimizerOptionsImpl->m_ProfilingEnabled <<
"\n";
220 stream <<
"\tAllowExpandedDims: " << p_OptimizerOptionsImpl->m_AllowExpandedDims <<
"\n";
222 stream <<
"\tModelOptions: \n";
223 for (
auto optionsGroup : p_OptimizerOptionsImpl->m_ModelOptions)
225 for (
size_t i=0; i < optionsGroup.GetOptionCount(); i++)
228 stream <<
"\t\tBackend: " << optionsGroup.GetBackendId() <<
"\n"
229 <<
"\t\t\tOption: " << option.
GetName() <<
"\n"
261 return pNetworkImpl->AddComparisonLayer(comparisonDescriptor, name);
268 return pNetworkImpl->AddConcatLayer(concatDescriptor, name);
275 return pNetworkImpl->AddConvolution2dLayer(convolution2dDescriptor, name);
281 return pNetworkImpl->AddConvolution3dLayer(convolution3dDescriptor, name);
288 return pNetworkImpl->AddDepthToSpaceLayer(depthToSpaceDescriptor, name);
296 return pNetworkImpl->AddDepthwiseConvolution2dLayer(convolution2dDescriptor, name);
311 return pNetworkImpl->AddDetectionPostProcessLayer(descriptor, anchors, name);
317 return pNetworkImpl->AddElementwiseBinaryLayer(elementwiseBinaryDescriptor, name);
323 return pNetworkImpl->AddElementwiseUnaryLayer(elementwiseUnaryDescriptor, name);
329 return pNetworkImpl->AddFillLayer(fillDescriptor, name);
335 return pNetworkImpl->AddFullyConnectedLayer(fullyConnectedDescriptor, name);
341 return pNetworkImpl->AddFusedLayer(fusedDescriptor, name);
347 return pNetworkImpl->AddPermuteLayer(permuteDescriptor, name);
353 return pNetworkImpl->AddBatchToSpaceNdLayer(batchToSpaceNdDescriptor, name);
359 return pNetworkImpl->AddPooling2dLayer(pooling2dDescriptor, name);
365 return pNetworkImpl->AddPooling3dLayer(pooling3dDescriptor, name);
373 return pNetworkImpl->AddPrecompiledLayer(preCompiledDescriptor, std::move(compiledBlobPtr), backend, name);
379 return pNetworkImpl->AddActivationLayer(activationDescriptor, name);
385 return pNetworkImpl->AddNormalizationLayer(normalizationDescriptor, name);
390 return pNetworkImpl->AddSliceLayer(sliceDescriptor, name);
395 return pNetworkImpl->AddSoftmaxLayer(softmaxDescriptor, name);
401 return pNetworkImpl->AddSplitterLayer(splitterDescriptor, name);
430 return pNetworkImpl->AddBatchNormalizationLayer(desc, mean, variance, beta, gamma, name);
441 return pNetworkImpl->AddResizeLayer(resizeDescriptor, name);
447 return pNetworkImpl->AddReduceLayer(reduceDescriptor, name);
453 return pNetworkImpl->AddInstanceNormalizationLayer(desc, name);
459 return pNetworkImpl->AddL2NormalizationLayer(desc, name);
465 return pNetworkImpl->AddLogSoftmaxLayer(logSoftmaxDescriptor, name);
477 return pNetworkImpl->AddReshapeLayer(reshapeDescriptor, name);
483 return pNetworkImpl->AddSpaceToBatchNdLayer(spaceToBatchNdDescriptor, name);
489 return pNetworkImpl->AddSpaceToDepthLayer(spaceToDepthDescriptor, name);
505 return pNetworkImpl->AddLstmLayer(descriptor, params, name);
531 return pNetworkImpl->AddMeanLayer(meanDescriptor, name);
548 return pNetworkImpl->AddStridedSliceLayer(stridedSliceDescriptor, name);
584 return pNetworkImpl->AddTransposeConvolution2dLayer(descriptor, weights, biases, name);
590 return pNetworkImpl->AddTransposeLayer(transposeDescriptor, name);
613 return pNetworkImpl->AddQuantizedLstmLayer(params, name);
620 return pNetworkImpl->AddQLstmLayer(descriptor, params, name);
626 return pNetworkImpl->AddLogicalBinaryLayer(descriptor, name);
634 return pNetworkImpl->AddUnidirectionalSequenceLstmLayer(descriptor, params, name);
640 return pNetworkImpl->AddChannelShuffleLayer(descriptor, name);
646 return pNetworkImpl->AddBatchMatMulLayer(descriptor, name);
663 return pNetworkImpl->AddBroadcastToLayer(descriptor, name);
669 return pNetworkImpl->AddScatterNdLayer(descriptor, name);
679 return new INetwork(networkOptions);
693 : pOptimizedNetworkImpl(new
OptimizedNetworkImpl(*other.pOptimizedNetworkImpl.get(), modelOptions)) {}
699 : pOptimizedNetworkImpl(
std::move(impl)) {}
749 return m_Graph->SerializeToDot(stream);
754 return m_Graph->GetNumInputs();
759 return m_Graph->GetNumOutputs();
763 Optional<std::vector<std::string>&> errorMessages)
765 std::stringstream fullErrorMessage;
766 fullErrorMessage <<
"ERROR: " << errorMessage;
770 errorMessages.value().push_back(fullErrorMessage.str());
775 Optional<std::vector<std::string>&> warningMessages)
777 std::stringstream fullWarningMessage;
778 fullWarningMessage <<
"WARNING: " << warningMessage;
782 warningMessages.value().push_back(fullWarningMessage.str());
790 Optional<std::vector<std::string>&> errMessages)
792 std::stringstream failureMsg;
803 bool noErrors =
true;
805 for (
unsigned int i = 0; i < numOutputs; i++) {
808 auto quantizationDataType =
info.GetDataType();
809 auto quantizationScales =
info.GetQuantizationScales();
811 switch(quantizationDataType) {
817 &&
info.HasPerAxisQuantization()) {
819 "Asymmetric Quantization Datatype.");
823 (
info.GetQuantizationScale() != (1.0f / 256.0f) ||
824 info.GetQuantizationOffset() != 0) &&
826 std::stringstream ss;
827 ss <<
"Quantization parameters for Softmax layer (Scale: " <<
828 info.GetQuantizationScale() <<
" and Offset: " <<
info.GetQuantizationOffset() <<
829 ") are incorrect and have been updated to Scale: 0.00390625 and Offset: 0";
831 info.SetQuantizationScale((1.0f / 256.0f));
832 info.SetQuantizationOffset(0);
850 const std::vector<BackendId>& availablePreferredBackends,
851 std::string& reasonIfUnsupported,
852 Optional<std::vector<std::string>&> messages)
857 auto ReturnError = [&](
const Layer* layer)
865 std::string currentReasonIfUnsupported;
871 reasonIfUnsupported += currentReasonIfUnsupported;
872 if (!isLayerSupported &&
HasCapability(
"AllOrNothing", backend))
875 if (
GetCapability(
"AllOrNothing", backend).value().GetValue().AsBool())
879 std::stringstream fullWarningMessage;
880 fullWarningMessage <<
"Backend: " << backend
881 <<
" has \"AllOrNothing\" enabled. A layer of type "
883 <<
"This backend will not be considered to execute this subgraph.";
884 reasonIfUnsupported.append(fullWarningMessage.str());
894 std::string checkStr =
"This CPU architecture does not support F16 data type, you need v8.2 or above";
895 if (!isLayerSupported || currentReasonIfUnsupported.find(checkStr) != std::string::npos)
903 auto ConstantLayerFromFp16ToFp32 = [](
Layer& layer)
907 ConstantLayer* constantLayer = PolymorphicDowncast<ConstantLayer*>(&layer);
913 std::vector<float> newValues(
info.GetNumElements());
917 info.GetNumElements(),
930 bool checkType =
false;
934 auto connectedOutputSlot = inputSlot.GetConnectedOutputSlot();
937 if (connectedOutputSlot->GetNumConnections() == 1)
940 ConstantLayerFromFp16ToFp32(connectedOutputSlot->GetOwningLayer());
946 std::vector<ConvertFp16ToFp32Layer*> convertFp16ToFp32Layers;
949 convertFp16ToFp32Layers =
954 std::vector<ConvertFp32ToFp16Layer*> convertFp32ToFp16Layers;
957 convertFp32ToFp16Layers =
962 auto AssignFirstSupportedBackend = [&](
Layer* layer,
BackendId preferredBackend)
964 bool supportedBackendFound =
false;
965 std::string reasonIfUnsupported;
971 reasonIfUnsupported))
973 supportedBackendFound =
true;
977 for (
const auto& backend : availablePreferredBackends)
980 if (backend == preferredBackend)
988 reasonIfUnsupported))
990 supportedBackendFound =
true;
996 return supportedBackendFound;
1001 if (!AssignFirstSupportedBackend(convertLayer, backend))
1003 return ReturnError(convertLayer);
1009 if (!AssignFirstSupportedBackend(convertLayer, backend))
1011 return ReturnError(convertLayer);
1019 std::stringstream warningMsg;
1021 <<
" is not supported on requested backend " << layer->
GetBackendId().
Get()
1024 <<
" (reason: " << reasonIfUnsupported
1025 <<
"), falling back to the next backend.";
1042 return {dataTypeIn, dataTypeOut};
1046 const std::vector<BackendId>& availablePreferredBackends)
1048 bool hasFp16 =
false;
1050 auto firstBackend = availablePreferredBackends[0];
1051 auto backendObjPtr = backends.find(firstBackend)->second.get();
1054 auto backendCapabilities = backendObjPtr->GetCapabilities();
1060 ARMNN_LOG(
debug) <<
"The first available preferred backend: " << firstBackend
1061 <<
", has FP16 support.";
1065 ARMNN_LOG(
warning) <<
"The first available preferred backend: " << firstBackend
1066 <<
", does not have FP16 support. "
1067 <<
"The FP16 turbo mode option will be disable. It will run using FP32.";
1071 for (
size_t i = 1; i < availablePreferredBackends.size(); ++i)
1073 auto backend = availablePreferredBackends[i];
1074 backendObjPtr = backends.find(backend)->second.get();
1075 backendCapabilities = backendObjPtr->GetCapabilities();
1078 ARMNN_LOG(
warning) <<
"Next preferred backend: " << backend <<
", does not have FP16 support. "
1079 <<
"It will run using FP32 when falling back to this backend.";
1083 ARMNN_LOG(
debug) <<
"Next preferred backend: " << backend <<
", has FP16 support.";
1094 Optional<std::vector<std::string>&> errMessages,
1097 std::vector<BackendId>& availablePreferredBackends,
1100 auto ReturnError = [&](
const Layer* layer)
1105 auto layer = PolymorphicDowncast<Layer*>(it);
1114 std::string reasonIfUnsupported;
1124 if (layer->GetBackendHint().has_value() &&
1129 layer->GetBackendHint().value(),
1132 availablePreferredBackends,
1133 reasonIfUnsupported,
1134 errMessages).
IsOk())
1142 for (
const auto& backend : availablePreferredBackends)
1144 if (layer->GetBackendHint().has_value() &&
1145 layer->GetBackendHint().value() == backend)
1156 availablePreferredBackends,
1157 reasonIfUnsupported,
1175 if (reasonIfUnsupported.find(
"AllOrNothing") != std::string::npos)
1200 layer->SetBackendId(cpuBackendId);
1205 result = ReturnError(layer);
1215 Optional<std::vector<std::string>&> errMessages)
1220 bool restart =
false;
1222 for (
auto it = firstLayer; it != lastLayer; it = (restart ? firstLayer : ++it))
1224 if (it == firstLayer)
1227 if (availablePreferredBackends.empty())
1229 ReportError(
"No preferred backends are available", errMessages);
1244 availablePreferredBackends,
1248 for (
auto it = firstLayer; it != lastLayer; ++it)
1250 auto layer = PolymorphicDowncast<Layer*>(*it);
1258 bool isFloat16 =
false;
1259 for (
auto type : inOutDataType)
1268 if (layer->GetBackendId() ==
"Unknown" || isFloat16)
1275 availablePreferredBackends,
1280 for (
auto it = firstLayer; it != lastLayer; ++it)
1282 auto layer = PolymorphicDowncast<Layer*>(*it);
1286 BackendId connectedBackendId = layer->GetOutputSlot(0).GetConnection(0)->GetOwningLayer().GetBackendId();
1287 layer->SetBackendId(connectedBackendId);
1298 Optional<std::vector<std::string>&> errMessages)
1304 if (availablePreferredBackends.empty())
1306 std::stringstream failureMsg;
1307 failureMsg <<
"No preferred backends are available";
1314 bool restart =
false;
1315 for (
auto it = firstLayer; it != lastLayer; ++it)
1322 availablePreferredBackends,
1326 for (
auto it = firstLayer; it != lastLayer; ++it)
1328 auto layer = PolymorphicDowncast<Layer*>(*it);
1332 BackendId connectedBackendId = layer->GetOutputSlot(0).GetConnection(0)->GetOwningLayer().GetBackendId();
1333 layer->SetBackendId(connectedBackendId);
1343 Optional<std::vector<std::string>&> errMessages)
1361 auto backendFactory = backendRegistry.GetFactory(selectedBackend);
1362 auto backendObjPtr = backendFactory();
1364 backendObjPtr->RegisterTensorHandleFactories(handleFactoryRegistry);
1366 backends[backendObjPtr->GetId()] = std::move(backendObjPtr);
1376 Optional<std::vector<std::string>&> errMessages)
1387 auto backendObjPtr = backends.find(selectedBackend)->second.get();
1403 [&backendObjPtr](
const Layer& layer)
1410 if (subgraphs.empty())
1417 for (
auto& subgraph : subgraphs)
1421 OptimizationViews optimizationViews = backendObjPtr->OptimizeSubgraphView(*subgraph, modelOptions);
1422 if (!optimizationViews.
Validate(*subgraph))
1431 SubgraphView& replacementSubgraph = substitution.m_ReplacementSubgraph;
1432 SubgraphView& substitutableSubgraph = substitution.m_SubstitutableSubgraph;
1437 std::for_each(subgraphLayers.begin(), subgraphLayers.end(), [&selectedBackend](
IConnectableLayer* l)
1439 PolymorphicDowncast<Layer*>(l)->SetBackendId(selectedBackend);
1446 for (
auto& l : deletedSubgraph.GetIConnectableLayers())
1448 Layer* deletedLayer = PolymorphicDowncast<Layer*>(l);
1452 OutputSlot* parentOut = inputSlot.GetConnectedOutputSlot();
1467 std::stringstream warningMsg;
1468 warningMsg <<
"Some sub-graph(s) failed to optimized on " << backendObjPtr->GetId() <<
" backend.";
1473 if (!backendObjPtr->GetId().IsCpuRef())
1483 std::stringstream subgraphMsg;
1484 subgraphMsg <<
"Re-assigning backends to " << failedSubgraph.GetIConnectableLayers().size()
1485 <<
" layers inside sub-graph " << count++;
1492 if (reassignmentResult.
m_Error)
1515 if (srcFactory && dstFactory &&
1545 if (frmBackend == backends.end() ||
1546 !frmBackend->second->SupportsTensorAllocatorAPI())
1553 std::map<ITensorHandleFactory::FactoryId, int> factoryScores;
1560 const Layer& connectedLayer = connection->GetOwningLayer();
1562 auto toBackend = backends.find(connectedLayer.
GetBackendId());
1563 if (toBackend == backends.end())
1568 if (!toBackend->second.get()->SupportsTensorAllocatorAPI())
1574 auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
1575 for (
auto&& dst : dstPrefs)
1589 auto it = factoryScores.find(dst);
1590 if (it == factoryScores.end())
1593 factoryScores[dst] = 0;
1602 factoryScores[dst]++;
1605 if (factoryScores[dst] > topScore)
1607 topScore = factoryScores[dst];
1636 if (frmBackend == backends.end() ||
1637 !frmBackend->second->SupportsTensorAllocatorAPI())
1642 bool outputConnection =
false;
1645 const Layer& connectedLayer = connection->GetOwningLayer();
1648 outputConnection =
true;
1656 std::map<ITensorHandleFactory::FactoryId, int> factoryScores;
1657 for (
auto&& pref : srcPrefs)
1662 if (outputConnection)
1665 bool fallbackConnection =
false;
1668 if (inputSlot.GetConnectedOutputSlot()->GetOwningLayer().GetBackendId() != layer.
GetBackendId())
1670 fallbackConnection =
true;
1673 if (fallbackConnection)
1677 if (!factoryCap.empty())
1687 if (!outputConnection)
1691 if (!factoryCap.empty())
1710 auto it = factoryScores.find(pref);
1711 if (it == factoryScores.end())
1714 factoryScores[pref] = 0;
1721 const Layer& connectedLayer = connection->GetOwningLayer();
1723 auto toBackend = backends.find(connectedLayer.
GetBackendId());
1724 if (toBackend == backends.end())
1729 auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
1730 for (
auto&& src : srcPrefs)
1732 if (factoryScores.find(src) == factoryScores.end())
1737 for (
auto&& dst : dstPrefs)
1742 factoryScores[src]++;
1750 int minScore = std::numeric_limits<int>::max();
1751 for (
auto it : factoryScores)
1753 minScore = std::min(minScore, it.second);
1757 std::vector<ITensorHandleFactory::FactoryId> optimalFactories;
1758 for (
auto it : factoryScores)
1760 if (it.second == minScore)
1762 optimalFactories.push_back(it.first);
1767 for (
auto&& srcPref : srcPrefs)
1769 for (
auto&& comp : optimalFactories)
1771 if (comp == srcPref)
1784 const Layer& connectedLayer,
1788 auto toBackend = backends.find(connectedLayer.
GetBackendId());
1789 if (toBackend == backends.end())
1794 auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
1817 for (
auto&& pref : dstPrefs)
1819 if (pref == srcFactoryId)
1829 for (
auto&& pref : dstPrefs)
1848 if (srcCapability.empty() && dstCapability.empty() && srcFallback.empty() && dstFallback.empty())
1859 for (
auto&& pref : dstPrefs)
1878 Optional<std::vector<std::string>&> errMessages)
1883 optGraph.
ForEachLayer([&backends, ®istry, &result, &errMessages, importEnabled, exportEnabled](
Layer* layer)
1887 if (backends.find(layer->
GetBackendId()) == backends.end())
1889 throw armnn::Exception(
"Backend id not found for the layer");
1902 case LayerType::Input:
1903 slotOption = CalculateSlotOptionForInput(backends, outputSlot, registry, importEnabled);
1905 case LayerType::Output:
1906 slotOption = CalculateSlotOptionForOutput(backends, outputSlot, registry);
1909 slotOption = CalculateSlotOption(backends, outputSlot, registry, exportEnabled);
1915 unsigned int connectionIdx = 0;
1918 const Layer& connectedLayer = connection->GetOwningLayer();
1920 EdgeStrategy strategy = CalculateEdgeStrategy(backends, slotOption, *layer, connectedLayer,
1921 registry, importEnabled);
1923 if (strategy == EdgeStrategy::Undefined)
1925 result.m_Error = true;
1928 errMessages.value().emplace_back(
"Could not find valid strategy required for compatibility"
1929 " between backends.");
1947 bool hasFastMath =
false;
1949 auto firstBackend = availablePreferredBackends[0];
1951 if (!modelOptions.empty())
1955 if (name ==
"FastMathEnabled")
1957 hasFastMath = value.
AsBool();
1958 ARMNN_LOG(
debug) <<
"The first available preferred backend: " << firstBackend
1959 <<
", has FastMath support.";
1965 ARMNN_LOG(
warning) <<
"The first available preferred backend: " << firstBackend
1966 <<
", does not have FastMath support. "
1967 <<
"Support for Turbo mode for TfLite post quantized FP16 models wil be disabled.";
1981 for (
auto it = firstLayer; it != lastLayer; ++it)
1984 if (layer->GetType() == LayerType::Constant)
1986 auto& connectedLayer = layer->GetOutputSlot(0).GetConnection(0)->GetOwningLayer();
1989 if(!(connectedLayer.GetInputSlot(0).GetTensorInfo().GetDataType() == DataType::Float16 &&
1990 connectedLayer.GetOutputSlot(0).GetTensorInfo().GetDataType() == DataType::Float32))
2003 const std::vector<BackendId>& backendPreferences,
2006 Optional<std::vector<std::string>&> messages)
2016 const std::vector<BackendId>& backendPreferences,
2019 Optional<std::vector<std::string>&> messages)
2025 ProfilerManager::GetInstance().RegisterProfiler(profiler.get());
2029 if (backendPreferences.empty())
2037 "Please use the FastMathEnabled backend option for CpuAcc or GpuAcc.");
2048 std::unique_ptr<Graph> graph = std::make_unique<Graph>(inGraph);
2056 optimizedOptions.push_back(importExport);
2059 &IOptimizedNetwork::Destroy);
2072 using namespace optimizations;
2091 if (availablePreferredBackends.empty())
2093 std::stringstream failureMsg;
2094 failureMsg <<
"None of the preferred backends " << backendPreferences
2111 reduceFp32ToFp16 =
true;
2148 for (
auto backend : mappedGpuBackends)
2150 if (std::count(backendPreferences.begin(), backendPreferences.end(), backend)
2151 && (backendPreferences[0] != backend) &&
2154 std::stringstream failureMsg;
2155 failureMsg << backend <<
" backend cannot be specified as fallback.";
2161 std::vector<BackendId> amendedBackendPreferences = backendPreferences;
2166 for (
auto backend : mappedGpuBackends)
2168 if (!std::count(amendedBackendPreferences.begin(), amendedBackendPreferences.end(), backend))
2170 amendedBackendPreferences.insert(amendedBackendPreferences.begin(), backend);
2175 if (reduceFp32ToFp16 && hasFp16)
2189 if (assignBackendsResult.
m_Error)
2204 if (backendOptimizationResult.
m_Error)
2229 #if !defined(ARMNN_DISABLE_FILESYSTEM)
2231 ARMNN_LOG(
info) <<
"Intermediate tensors will be written to: " << result;
2245 tensorHandleFactoryRegistry,
2267 const std::vector<BackendId>& backendPreferences,
2270 Optional<std::vector<std::string>&> messages)
2280 const std::vector<BackendId>& backendPreferences,
2283 Optional<std::vector<std::string>&> messages)
2292 bool NetworkImpl::GetShapeInferenceMethod()
2294 bool shapeInferenceMethod =
false;
2296 ParseOptions(m_NetworkOptions,
"ShapeInferenceMethod", [&](std::string name,
const BackendOptions::Var& value)
2298 if (name ==
"InferAndValidate")
2300 shapeInferenceMethod |= value.AsBool();
2303 return shapeInferenceMethod;
2306 bool NetworkImpl::GetAllowExpandedDims()
2308 bool allowExpandedDims =
false;
2310 ParseOptions(m_NetworkOptions,
"AllowExpandedDims", [&](std::string name,
const BackendOptions::Var& value)
2312 if (name ==
"AllowExpandedDims")
2314 allowExpandedDims |= value.AsBool();
2317 return allowExpandedDims;
2321 : m_NetworkOptions(networkOptions),
2322 m_Graph(
std::make_unique<
Graph>(GetShapeInferenceMethod(), GetAllowExpandedDims()))
2337 return m_Graph->AddLayer<
InputLayer>(id, name);
2348 return m_Graph->AddLayer<
CastLayer>(name);
2359 return m_Graph->AddLayer<
ComparisonLayer>(comparisonDescriptor, name);
2377 return m_Graph->AddLayer<
FillLayer>(fillDescriptor, name);
2389 return m_Graph->AddLayer<
FusedLayer>(fusedDescriptor, name);
2395 return m_Graph->AddLayer<
ConcatLayer>(concatDescriptor, name);
2438 layer->
m_Anchors = std::make_shared<ScopedTensorHandle>(anchors);
2446 return m_Graph->AddLayer<
PermuteLayer>(permuteDescriptor, name);
2452 return m_Graph->AddLayer<
Pooling2dLayer>(pooling2dDescriptor, name);
2458 return m_Graph->AddLayer<
Pooling3dLayer>(pooling3dDescriptor, name);
2464 return m_Graph->AddLayer<
ActivationLayer>(activationDescriptor, name);
2470 return m_Graph->AddLayer<
ArgMinMaxLayer>(argMinMaxDescriptor, name);
2474 normalizationDescriptor,
2482 return m_Graph->AddLayer<
SliceLayer>(sliceDescriptor, name);
2488 return m_Graph->AddLayer<
SoftmaxLayer>(softmaxDescriptor, name);
2494 return m_Graph->AddLayer<
SplitterLayer>(splitterDescriptor, name);
2531 layer->
m_Mean = std::make_shared<ScopedTensorHandle>(mean);
2532 layer->m_Variance = std::make_shared<ScopedTensorHandle>(variance);
2533 layer->m_Beta = std::make_shared<ScopedTensorHandle>(beta);
2534 layer->m_Gamma = std::make_shared<ScopedTensorHandle>(gamma);
2541 return m_Graph->AddLayer<
RankLayer>(name);
2547 return m_Graph->AddLayer<
ReduceLayer>(reduceDescriptor, name);
2552 return m_Graph->AddLayer<
ResizeLayer>(resizeDescriptor, name);
2582 layer->
m_LayerOutput = std::make_shared<ScopedTensorHandle>(input);
2590 return m_Graph->AddLayer<
ReshapeLayer>(reshapeDescriptor, name);
2614 const auto layer = m_Graph->AddLayer<
LstmLayer>(descriptor, name);
2619 layer->m_BasicParameters.m_InputToCellWeights =
2621 layer->m_BasicParameters.m_InputToOutputWeights =
2623 layer->m_BasicParameters.m_RecurrentToForgetWeights =
2625 layer->m_BasicParameters.m_RecurrentToCellWeights =
2627 layer->m_BasicParameters.m_RecurrentToOutputWeights =
2629 layer->m_BasicParameters.m_ForgetGateBias =
2631 layer->m_BasicParameters.m_CellBias =
2632 std::make_shared<ScopedTensorHandle>(*(params.
m_CellBias));
2633 layer->m_BasicParameters.m_OutputGateBias =
2637 if(!descriptor.m_CifgEnabled)
2642 "when CIFG is disabled.");
2647 "AddLstmLayer: Recurrent To Input Weights cannot be NULL "
2648 "when CIFG is disabled.");
2653 "when CIFG is disabled.");
2655 layer->m_CifgParameters.m_InputToInputWeights =
2657 layer->m_CifgParameters.m_RecurrentToInputWeights =
2659 layer->m_CifgParameters.m_InputGateBias =
2664 if(descriptor.m_ProjectionEnabled)
2669 "when projection is enabled.");
2671 layer->m_ProjectionParameters.m_ProjectionWeights =
2675 layer->m_ProjectionParameters.m_ProjectionBias =
2681 if(descriptor.m_PeepholeEnabled)
2683 if(!descriptor.m_CifgEnabled)
2688 "when Peephole is enabled and CIFG disabled.");
2691 layer->m_PeepholeParameters.m_CellToInputWeights =
2698 "when Peephole is enabled.");
2703 "when Peephole is enabled.");
2706 layer->m_PeepholeParameters.m_CellToForgetWeights =
2708 layer->m_PeepholeParameters.m_CellToOutputWeights =
2713 if(descriptor.m_LayerNormEnabled)
2715 if(!descriptor.m_CifgEnabled)
2720 "when layer normalization is enabled and CIFG disabled.");
2722 layer->m_LayerNormParameters.m_InputLayerNormWeights =
2729 "when layer normalization is enabled.");
2734 "when layer normalization is enabled.");
2739 "when layer normalization is enabled.");
2741 layer->m_LayerNormParameters.m_ForgetLayerNormWeights =
2743 layer->m_LayerNormParameters.m_CellLayerNormWeights =
2745 layer->m_LayerNormParameters.m_OutputLayerNormWeights =
2763 return m_Graph->AddLayer<
MeanLayer>(meanDescriptor,name);
2768 return m_Graph->AddLayer<
PadLayer>(padDescriptor,name);
2790 return m_Graph->AddLayer<
GatherLayer>(gatherDescriptor, name);
2825 layer->
m_Weight = std::make_shared<ScopedTensorHandle>(weights);
2827 if (descriptor.m_BiasEnabled)
2829 layer->m_Bias = std::make_shared<ScopedTensorHandle>(biases.
value());
2838 return m_Graph->AddLayer<
TransposeLayer>(transposeDescriptor, name);
2844 return m_Graph->AddLayer<
StackLayer>(stackDescriptor, name);
2862 layer->m_QuantizedLstmParameters.m_InputToForgetWeights =
2864 layer->m_QuantizedLstmParameters.m_InputToCellWeights =
2866 layer->m_QuantizedLstmParameters.m_InputToOutputWeights =
2870 layer->m_QuantizedLstmParameters.m_RecurrentToInputWeights =
2872 layer->m_QuantizedLstmParameters.m_RecurrentToForgetWeights =
2874 layer->m_QuantizedLstmParameters.m_RecurrentToCellWeights =
2876 layer->m_QuantizedLstmParameters.m_RecurrentToOutputWeights =
2880 layer->m_QuantizedLstmParameters.m_InputGateBias =
2882 layer->m_QuantizedLstmParameters.m_ForgetGateBias =
2884 layer->m_QuantizedLstmParameters.m_CellBias =
2885 std::make_shared<ScopedTensorHandle>(params.
GetCellBias());
2886 layer->m_QuantizedLstmParameters.m_OutputGateBias =
2896 const auto layer = m_Graph->AddLayer<
QLstmLayer>(descriptor, name);
2901 layer->m_BasicParameters.m_InputToCellWeights =
2903 layer->m_BasicParameters.m_InputToOutputWeights =
2905 layer->m_BasicParameters.m_RecurrentToForgetWeights =
2907 layer->m_BasicParameters.m_RecurrentToCellWeights =
2909 layer->m_BasicParameters.m_RecurrentToOutputWeights =
2911 layer->m_BasicParameters.m_ForgetGateBias =
2913 layer->m_BasicParameters.m_CellBias =
2914 std::make_shared<ScopedTensorHandle>(*(params.
m_CellBias));
2915 layer->m_BasicParameters.m_OutputGateBias =
2919 if(!descriptor.m_CifgEnabled)
2929 "AddQLstmLayer: Recurrent To Input Weights cannot be NULL");
2937 layer->m_CifgParameters.m_InputToInputWeights =
2939 layer->m_CifgParameters.m_RecurrentToInputWeights =
2941 layer->m_CifgParameters.m_InputGateBias =
2946 if(descriptor.m_ProjectionEnabled)
2953 layer->m_ProjectionParameters.m_ProjectionWeights =
2959 layer->m_ProjectionParameters.m_ProjectionBias =
2966 if(descriptor.m_PeepholeEnabled)
2978 if(!descriptor.m_CifgEnabled)
2985 layer->m_PeepholeParameters.m_CellToInputWeights =
2989 layer->m_PeepholeParameters.m_CellToForgetWeights =
2991 layer->m_PeepholeParameters.m_CellToOutputWeights =
2996 if(descriptor.m_LayerNormEnabled)
3013 if(!descriptor.m_CifgEnabled)
3020 layer->m_LayerNormParameters.m_InputLayerNormWeights =
3024 layer->m_LayerNormParameters.m_ForgetLayerNormWeights =
3026 layer->m_LayerNormParameters.m_CellLayerNormWeights =
3028 layer->m_LayerNormParameters.m_OutputLayerNormWeights =
3050 layer->m_BasicParameters.m_InputToCellWeights =
3052 layer->m_BasicParameters.m_InputToOutputWeights =
3054 layer->m_BasicParameters.m_RecurrentToForgetWeights =
3056 layer->m_BasicParameters.m_RecurrentToCellWeights =
3058 layer->m_BasicParameters.m_RecurrentToOutputWeights =
3060 layer->m_BasicParameters.m_ForgetGateBias =
3062 layer->m_BasicParameters.m_CellBias =
3063 std::make_shared<ScopedTensorHandle>(*(params.
m_CellBias));
3064 layer->m_BasicParameters.m_OutputGateBias =
3068 if(!descriptor.m_CifgEnabled)
3073 "when CIFG is disabled.");
3078 "AddUnidirectionalSequenceLstmLayer: Recurrent To Input Weights cannot be NULL "
3079 "when CIFG is disabled.");
3084 "when CIFG is disabled.");
3086 layer->m_CifgParameters.m_InputToInputWeights =
3088 layer->m_CifgParameters.m_RecurrentToInputWeights =
3090 layer->m_CifgParameters.m_InputGateBias =
3095 if(descriptor.m_ProjectionEnabled)
3100 "when projection is enabled.");
3102 layer->m_ProjectionParameters.m_ProjectionWeights =
3106 layer->m_ProjectionParameters.m_ProjectionBias =
3112 if(descriptor.m_PeepholeEnabled)
3114 if(!descriptor.m_CifgEnabled)
3119 "cannot be NULL when Peephole is enabled and CIFG disabled.");
3122 layer->m_PeepholeParameters.m_CellToInputWeights =
3129 "when Peephole is enabled.");
3134 "when Peephole is enabled.");
3137 layer->m_PeepholeParameters.m_CellToForgetWeights =
3139 layer->m_PeepholeParameters.m_CellToOutputWeights =
3144 if(descriptor.m_LayerNormEnabled)
3146 if(!descriptor.m_CifgEnabled)
3151 "cannot be NULL when layer normalization is enabled and CIFG disabled.");
3153 layer->m_LayerNormParameters.m_InputLayerNormWeights =
3160 "cannot be NULL when layer normalization is enabled.");
3165 "cannot be NULL when layer normalization is enabled.");
3170 "cannot be NULL when layer normalization is enabled.");
3172 layer->m_LayerNormParameters.m_ForgetLayerNormWeights =
3174 layer->m_LayerNormParameters.m_CellLayerNormWeights =
3176 layer->m_LayerNormParameters.m_OutputLayerNormWeights =
3194 return m_Graph->AddLayer<
TileLayer>(desc, name);
3210 layer = m_Graph->AddLayer<
PreCompiledLayer>(preCompiledDescriptor,
"pre-compiled");
3244 layer->ExecuteStrategy(strategy);
3249 : m_Graph(new
Graph(*other.m_Graph.get()))
3250 , m_Guid(
arm::pipe::IProfilingService::GetNextGuid())
3251 , m_ModelOptions(modelOptions)
3256 : m_Graph(
std::move(graph)), m_Guid(
arm::pipe::IProfilingService::GetNextGuid())
3261 : m_Graph(
std::move(graph)), m_Guid(
arm::pipe::IProfilingService::GetNextGuid()), m_ModelOptions(modelOptions)
3278 layer->ExecuteStrategy(strategy);