15 #if defined(ARMNN_SERIALIZER) 18 #if defined(ARMNN_TF_LITE_PARSER) 21 #if defined(ARMNN_ONNX_PARSER) 30 #include <cxxopts/cxxopts.hpp> 32 #include <fmt/format.h> 33 #include <mapbox/variant.hpp> 41 #include <type_traits> 46 inline bool CheckRequestedBackendsAreValid(
const std::vector<armnn::BackendId>& backendIds,
49 if (backendIds.empty())
57 for (
const auto& backendId : backendIds)
59 if (std::find(validBackendIds.begin(), validBackendIds.end(), backendId) == validBackendIds.end())
62 if (invalidBackendIds)
64 if (!invalidBackendIds.value().empty())
66 invalidBackendIds.value() +=
", ";
68 invalidBackendIds.value() += backendId;
128 template <
typename IParser>
135 std::vector<armnn::BindingPointInfo>& inputBindings,
136 std::vector<armnn::BindingPointInfo>& outputBindings)
141 auto parser(IParser::Create());
143 std::map<std::string, armnn::TensorShape> inputShapes;
148 if (numInputShapes < numInputBindings)
151 "Not every input has its tensor shape specified: expected={0}, got={1}",
152 numInputBindings, numInputShapes));
155 for (
size_t i = 0; i < numInputShapes; i++)
168 parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes, requestedOutputs) :
169 parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes, requestedOutputs));
174 inputBindings.push_back(parser->GetNetworkInputBindingInfo(inputLayerName));
179 outputBindings.push_back(parser->GetNetworkOutputBindingInfo(outputLayerName));
186 #if defined(ARMNN_SERIALIZER) 195 std::vector<armnn::BindingPointInfo>& inputBindings,
196 std::vector<armnn::BindingPointInfo>& outputBindings)
198 auto parser(IParser::Create());
206 std::error_code errorCode;
208 if (!fs::exists(pathToFile, errorCode))
215 std::ifstream file(params.
m_ModelPath, std::ios::binary);
217 network = parser->CreateNetworkFromBinary(file);
225 parser->GetNetworkInputBindingInfo(subgraphId, inputLayerName);
232 parser->GetNetworkOutputBindingInfo(subgraphId, outputLayerName);
241 #if defined(ARMNN_TF_LITE_PARSER) 250 std::vector<armnn::BindingPointInfo>& inputBindings,
251 std::vector<armnn::BindingPointInfo>& outputBindings)
256 IParser::TfLiteParserOptions options;
259 auto parser(IParser::Create(options));
265 network = parser->CreateNetworkFromBinaryFile(modelPath.c_str());
271 parser->GetNetworkInputBindingInfo(params.
m_SubgraphId, inputLayerName);
272 inputBindings.push_back(inputBinding);
278 parser->GetNetworkOutputBindingInfo(params.
m_SubgraphId, outputLayerName);
279 outputBindings.push_back(outputBinding);
287 #if defined(ARMNN_ONNX_PARSER) 297 std::vector<BindingPointInfo>& inputBindings,
298 std::vector<BindingPointInfo>& outputBindings)
303 auto parser(IParser::Create());
310 parser->CreateNetworkFromBinaryFile(modelPath.c_str()) :
311 parser->CreateNetworkFromTextFile(modelPath.c_str()));
316 BindingPointInfo inputBinding = parser->GetNetworkInputBindingInfo(inputLayerName);
317 inputBindings.push_back(inputBinding);
322 BindingPointInfo outputBinding = parser->GetNetworkOutputBindingInfo(outputLayerName);
323 outputBindings.push_back(outputBinding);
333 template <
typename IParser,
typename TDataType>
341 = mapbox::util::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>, std::vector<int8_t>>;
355 std::vector<armnn::BackendId> backendIds;
356 std::copy(m_ComputeDevices.begin(), m_ComputeDevices.end(), std::back_inserter(backendIds));
364 const std::vector<std::string> defaultComputes = {
"CpuAcc",
"CpuRef" };
366 const std::string backendsMessage =
"Which device to run layers on by default. Possible choices: " 370 .allow_unrecognised_options()
372 (
"m,model-dir",
"Path to directory containing model files (.prototxt/.tflite)",
373 cxxopts::value<std::string>(cLineOptions.
m_ModelDir))
374 (
"c,compute", backendsMessage.c_str(),
375 cxxopts::value<std::vector<std::string>>(cLineOptions.
m_ComputeDevices)->default_value(
"CpuRef"))
376 (
"b,dynamic-backends-path",
377 "Path where to load any available dynamic backend from. " 378 "If left empty (the default), dynamic backends will not be used.",
381 "Text file containing one image filename - correct label pair per line, " 382 "used to test the accuracy of the network.", cxxopts::value<std::string>(cLineOptions.
m_Labels))
383 (
"v,visualize-optimized-model",
384 "Produce a dot file useful for visualizing the graph post optimization." 385 "The file will have the same name as the model with the .dot extention.",
388 "If this option is enabled FP32 layers, weights and biases will be converted " 389 "to FP16 where the backend supports it.",
392 "If this option is enabled FP32 layers, weights and biases will be converted " 393 "to BF16 where the backend supports it.",
396 required.emplace_back(
"model-dir");
400 bool enableProfiling,
401 const std::string& dynamicBackendsPath,
402 const std::shared_ptr<armnn::IRuntime>& runtime =
nullptr)
403 : m_EnableProfiling(enableProfiling)
418 std::string invalidBackends;
429 ARMNN_LOG(info) <<
"Network parsing time: " << std::setprecision(2)
458 ARMNN_LOG(info) <<
"Optimization time: " << std::setprecision(2)
472 filename.replace_extension(
"dot");
473 std::fstream file(filename.c_str(), std::ios_base::out);
474 optNet->SerializeToDot(file);
485 std::string errorMessage;
486 ret = m_Runtime->LoadNetwork(m_NetworkIdentifier, std::move(optNet), errorMessage, networkProperties);
488 ARMNN_LOG(info) <<
"Network loading time: " << std::setprecision(2)
502 throw armnn::Exception(fmt::format(
"Input index out of range: {}", inputIndex));
510 throw armnn::Exception(fmt::format(
"Output index out of range: {}", outputIndex));
516 CheckInputIndexIsValid(inputIndex);
522 CheckOutputIndexIsValid(outputIndex);
526 std::chrono::duration<double, std::milli>
Run(
527 const std::vector<TContainer>& inputContainers,
528 std::vector<TContainer>& outputContainers)
530 for (
unsigned int i = 0; i < outputContainers.size(); ++i)
532 const unsigned int expectedOutputDataSize = GetOutputSize(i);
534 mapbox::util::apply_visitor([expectedOutputDataSize, i](
auto&& value)
537 if (actualOutputDataSize < expectedOutputDataSize)
539 unsigned int outputIndex = i;
541 fmt::format(
"Not enough data for output #{0}: expected " 542 "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize));
545 outputContainers[i]);
548 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
551 profiler->EnableProfiling(m_EnableProfiling);
557 armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier,
563 if (profiler && profiler->IsProfilingEnabled())
565 profiler->Print(std::cout);
578 std::tuple<armnn::profiling::ProfilingGuid, std::chrono::duration<double, std::milli>>
RunAsync(
580 const std::vector<TContainer>& inputContainers,
581 std::vector<TContainer>& outputContainers)
583 for (
unsigned int i = 0; i < outputContainers.size(); ++i)
585 const unsigned int expectedOutputDataSize = GetOutputSize(i);
587 mapbox::util::apply_visitor([expectedOutputDataSize, i](
auto&& value)
590 if (actualOutputDataSize < expectedOutputDataSize)
592 unsigned int outputIndex = i;
594 fmt::format(
"Not enough data for output #{0}: expected " 595 "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize));
598 outputContainers[i]);
601 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
604 profiler->EnableProfiling(m_EnableProfiling);
618 if (profiler && profiler->IsProfilingEnabled())
620 profiler->Print(std::cout);
626 fmt::format(
"IRuntime::Execute asynchronously failed for network #{0} on inference #{1}",
627 m_NetworkIdentifier, inferenceID));
631 return std::make_tuple(inferenceID, duration);
637 CheckInputIndexIsValid(inputIndex);
648 CheckOutputIndexIsValid(outputIndex);
659 CheckOutputIndexIsValid(outputIndex);
660 return std::make_pair(
m_OutputBindings[outputIndex].second.GetQuantizationScale(),
666 CheckInputIndexIsValid(inputIndex);
667 return std::make_pair(
m_InputBindings[inputIndex].second.GetQuantizationScale(),
673 std::vector<QuantizationParams> quantizationParams;
676 quantizationParams.push_back(GetQuantizationParams(i));
678 return quantizationParams;
683 return m_Runtime->CreateWorkingMemHandle(m_NetworkIdentifier);
688 std::shared_ptr<armnn::IRuntime> m_Runtime;
692 bool m_EnableProfiling;
695 template<
typename TContainer>
701 template<
typename TContainer>
ModelOptions m_ModelOptions
static IRuntimePtr Create(const CreationOptions &options)
BackendIdSet GetBackendIds() const
std::chrono::duration< double, std::milli > GetTimeDuration(std::chrono::high_resolution_clock::time_point start_time)
std::unordered_set< BackendId > BackendIdSet
QuantizationParams GetInputQuantizationParams(unsigned int inputIndex=0u) const
bool m_EnableFp16TurboMode
const std::vector< armnn::BindingPointInfo > & GetOutputBindingInfos() const
armnn::InputTensors MakeInputTensors(const std::vector< armnn::BindingPointInfo > &inputBindings, const std::vector< std::reference_wrapper< TContainer >> &inputDataContainers)
mapbox::util::variant< std::vector< float >, std::vector< int >, std::vector< unsigned char >, std::vector< int8_t > > TContainer
static void AddCommandLineOptions(cxxopts::Options &options, CommandLineOptions &cLineOptions, std::vector< std::string > &required)
std::string m_DynamicBackendsPath
const armnn::BindingPointInfo & GetOutputBindingInfo(unsigned int outputIndex=0u) const
#define ARMNN_LOG(severity)
Main network class which provides the interface for building up a neural network. ...
BackendRegistry & BackendRegistryInstance()
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
const armnn::BindingPointInfo & GetInputBindingInfo(unsigned int inputIndex=0u) const
armnn::BindingPointInfo BindingPointInfo
bool m_EnableFp16TurboMode
std::chrono::high_resolution_clock::time_point GetTimeNow()
InferenceModelInternal::QuantizationParams QuantizationParams
std::string m_DynamicBackendsPath
std::string GetBackendIdsAsString() const
bool m_VisualizePostOptimizationModel
void CheckInputIndexIsValid(unsigned int inputIndex) const
bool m_EnableBf16TurboMode
unsigned int GetOutputSize(unsigned int outputIndex=0u) const
std::vector< std::string > m_InputBindings
std::tuple< armnn::profiling::ProfilingGuid, std::chrono::duration< double, std::milli > > RunAsync(armnn::experimental::IWorkingMemHandle &workingMemHandleRef, const std::vector< TContainer > &inputContainers, std::vector< TContainer > &outputContainers)
InferenceModel(const Params ¶ms, bool enableProfiling, const std::string &dynamicBackendsPath, const std::shared_ptr< armnn::IRuntime > &runtime=nullptr)
std::vector< armnn::TensorShape > m_InputShapes
armnn::InputTensors MakeInputTensors(const std::vector< armnn::BindingPointInfo > &inputBindings, const std::vector< TContainer > &inputDataContainers)
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
std::vector< std::string > m_OutputBindings
std::vector< armnn::BackendId > m_ComputeDevices
#define ARMNN_SCOPED_HEAP_PROFILING(TAG)
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
QuantizationParams GetQuantizationParams(unsigned int outputIndex=0u) const
#define ARMNN_ASSERT(COND)
std::vector< QuantizationParams > GetAllQuantizationParams() const
std::pair< float, int32_t > QuantizationParams
armnn::OutputTensors MakeOutputTensors(const std::vector< armnn::BindingPointInfo > &outputBindings, std::vector< TContainer > &outputDataContainers)
bool m_EnableBf16TurboMode
bool m_VisualizePostOptimizationModel
Struct for the users to pass backend specific options.
std::string m_DynamicBackendsPath
Setting this value will override the paths set by the DYNAMIC_BACKEND_PATHS compiler directive Only a...
bool m_EnableGpuProfiling
Setting this flag will allow the user to obtain GPU profiling information from the runtime...
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
bool m_PrintIntermediateLayers
std::string m_CachedNetworkFilePath
std::pair< armnn::LayerBindingId, armnn::TensorInfo > BindingPointInfo
static armnn::INetworkPtr Create(const Params ¶ms, std::vector< armnn::BindingPointInfo > &inputBindings, std::vector< armnn::BindingPointInfo > &outputBindings)
std::unique_ptr< armnn::experimental::IWorkingMemHandle > CreateWorkingMemHandle()
virtual profiling::ProfilingGuid GetInferenceId()=0
Returns the InferenceId of the Inference that this IWorkingMemHandle works with.
std::chrono::duration< double, std::milli > Run(const std::vector< TContainer > &inputContainers, std::vector< TContainer > &outputContainers)
std::vector< armnn::BackendId > GetComputeDevicesAsBackendIds()
armnn::TensorInfo m_TensorInfo
Base class for all ArmNN exceptions so that users can filter to just those.
std::vector< std::string > m_ComputeDevices
unsigned int GetInputSize(unsigned int inputIndex=0u) const
armnn::OutputTensors MakeOutputTensors(const std::vector< armnn::BindingPointInfo > &outputBindings, const std::vector< std::reference_wrapper< TContainer >> &outputDataContainers)
std::string m_MLGOTuningFilePath
unsigned int m_NumberOfThreads
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
armnn::LayerBindingId m_BindingId
void CheckOutputIndexIsValid(unsigned int outputIndex) const
const std::vector< armnn::BindingPointInfo > & GetInputBindingInfos() const