15 #include <common/include/ProfilingGuid.hpp> 17 #if defined(ARMNN_SERIALIZER) 20 #if defined(ARMNN_TF_LITE_PARSER) 23 #if defined(ARMNN_ONNX_PARSER) 32 #include <cxxopts/cxxopts.hpp> 34 #include <fmt/format.h> 35 #include <mapbox/variant.hpp> 43 #include <type_traits> 48 inline bool CheckRequestedBackendsAreValid(
const std::vector<armnn::BackendId>& backendIds,
51 if (backendIds.empty())
59 for (
const auto& backendId : backendIds)
61 if (std::find(validBackendIds.begin(), validBackendIds.end(), backendId) == validBackendIds.end())
64 if (invalidBackendIds)
66 if (!invalidBackendIds.value().empty())
68 invalidBackendIds.value() +=
", ";
70 invalidBackendIds.value() += backendId;
134 template <
typename IParser>
141 std::vector<armnn::BindingPointInfo>& inputBindings,
142 std::vector<armnn::BindingPointInfo>& outputBindings)
147 auto parser(IParser::Create());
149 std::map<std::string, armnn::TensorShape> inputShapes;
154 if (numInputShapes < numInputBindings)
157 "Not every input has its tensor shape specified: expected={0}, got={1}",
158 numInputBindings, numInputShapes));
161 for (
size_t i = 0; i < numInputShapes; i++)
174 parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes, requestedOutputs) :
175 parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes, requestedOutputs));
180 inputBindings.push_back(parser->GetNetworkInputBindingInfo(inputLayerName));
185 outputBindings.push_back(parser->GetNetworkOutputBindingInfo(outputLayerName));
192 #if defined(ARMNN_SERIALIZER) 201 std::vector<armnn::BindingPointInfo>& inputBindings,
202 std::vector<armnn::BindingPointInfo>& outputBindings)
204 auto parser(IParser::Create());
212 std::error_code errorCode;
214 if (!fs::exists(pathToFile, errorCode))
221 std::ifstream file(params.
m_ModelPath, std::ios::binary);
223 network = parser->CreateNetworkFromBinary(file);
231 parser->GetNetworkInputBindingInfo(subgraphId, inputLayerName);
238 parser->GetNetworkOutputBindingInfo(subgraphId, outputLayerName);
247 #if defined(ARMNN_TF_LITE_PARSER) 256 std::vector<armnn::BindingPointInfo>& inputBindings,
257 std::vector<armnn::BindingPointInfo>& outputBindings)
262 IParser::TfLiteParserOptions options;
265 auto parser(IParser::Create(options));
271 network = parser->CreateNetworkFromBinaryFile(modelPath.c_str());
277 parser->GetNetworkInputBindingInfo(params.
m_SubgraphId, inputLayerName);
278 inputBindings.push_back(inputBinding);
284 parser->GetNetworkOutputBindingInfo(params.
m_SubgraphId, outputLayerName);
285 outputBindings.push_back(outputBinding);
293 #if defined(ARMNN_ONNX_PARSER) 303 std::vector<BindingPointInfo>& inputBindings,
304 std::vector<BindingPointInfo>& outputBindings)
309 auto parser(IParser::Create());
316 parser->CreateNetworkFromBinaryFile(modelPath.c_str()) :
317 parser->CreateNetworkFromTextFile(modelPath.c_str()));
322 BindingPointInfo inputBinding = parser->GetNetworkInputBindingInfo(inputLayerName);
323 inputBindings.push_back(inputBinding);
328 BindingPointInfo outputBinding = parser->GetNetworkOutputBindingInfo(outputLayerName);
329 outputBindings.push_back(outputBinding);
339 template <
typename IParser,
typename TDataType>
347 = mapbox::util::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>, std::vector<int8_t>>;
361 std::vector<armnn::BackendId> backendIds;
362 std::copy(m_ComputeDevices.begin(), m_ComputeDevices.end(), std::back_inserter(backendIds));
370 const std::vector<std::string> defaultComputes = {
"CpuAcc",
"CpuRef" };
372 const std::string backendsMessage =
"Which device to run layers on by default. Possible choices: " 376 .allow_unrecognised_options()
378 (
"m,model-dir",
"Path to directory containing model files (.prototxt/.tflite)",
379 cxxopts::value<std::string>(cLineOptions.
m_ModelDir))
380 (
"c,compute", backendsMessage.c_str(),
381 cxxopts::value<std::vector<std::string>>(cLineOptions.
m_ComputeDevices)->default_value(
"CpuRef"))
382 (
"b,dynamic-backends-path",
383 "Path where to load any available dynamic backend from. " 384 "If left empty (the default), dynamic backends will not be used.",
387 "Text file containing one image filename - correct label pair per line, " 388 "used to test the accuracy of the network.", cxxopts::value<std::string>(cLineOptions.
m_Labels))
389 (
"v,visualize-optimized-model",
390 "Produce a dot file useful for visualizing the graph post optimization." 391 "The file will have the same name as the model with the .dot extention.",
394 "If this option is enabled FP32 layers, weights and biases will be converted " 395 "to FP16 where the backend supports it.",
398 "If this option is enabled FP32 layers, weights and biases will be converted " 399 "to BF16 where the backend supports it.",
402 required.emplace_back(
"model-dir");
406 bool enableProfiling,
407 const std::string& dynamicBackendsPath,
408 const std::shared_ptr<armnn::IRuntime>& runtime =
nullptr)
409 : m_EnableProfiling(enableProfiling)
424 std::string invalidBackends;
435 ARMNN_LOG(info) <<
"Network parsing time: " << std::setprecision(2)
467 ARMNN_LOG(info) <<
"Optimization time: " << std::setprecision(2)
481 filename.replace_extension(
"dot");
482 std::fstream file(filename.c_str(), std::ios_base::out);
483 optNet->SerializeToDot(file);
496 std::string errorMessage;
497 ret = m_Runtime->LoadNetwork(m_NetworkIdentifier, std::move(optNet), errorMessage, networkProperties);
499 ARMNN_LOG(info) <<
"Network loading time: " << std::setprecision(2)
504 std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
507 memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(m_NetworkIdentifier));
526 throw armnn::Exception(fmt::format(
"Input index out of range: {}", inputIndex));
534 throw armnn::Exception(fmt::format(
"Output index out of range: {}", outputIndex));
540 CheckInputIndexIsValid(inputIndex);
546 CheckOutputIndexIsValid(outputIndex);
550 std::chrono::duration<double, std::milli>
Run(
551 const std::vector<TContainer>& inputContainers,
552 std::vector<TContainer>& outputContainers)
554 for (
unsigned int i = 0; i < outputContainers.size(); ++i)
556 const unsigned int expectedOutputDataSize = GetOutputSize(i);
558 mapbox::util::apply_visitor([expectedOutputDataSize, i](
auto&& value)
561 if (actualOutputDataSize < expectedOutputDataSize)
563 unsigned int outputIndex = i;
565 fmt::format(
"Not enough data for output #{0}: expected " 566 "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize));
569 outputContainers[i]);
572 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
577 armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier,
583 if (profiler && profiler->IsProfilingEnabled())
585 profiler->Print(std::cout);
598 std::tuple<unsigned int, std::chrono::duration<double, std::milli>>
RunAsync(
600 const std::vector<TContainer>& inputContainers,
601 std::vector<TContainer>& outputContainers,
602 unsigned int inferenceID)
604 for (
unsigned int i = 0; i < outputContainers.size(); ++i)
606 const unsigned int expectedOutputDataSize = GetOutputSize(i);
608 mapbox::util::apply_visitor([expectedOutputDataSize, i](
auto&& value)
611 if (actualOutputDataSize < expectedOutputDataSize)
613 unsigned int outputIndex = i;
615 fmt::format(
"Not enough data for output #{0}: expected " 616 "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize));
619 outputContainers[i]);
622 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
634 if (profiler && profiler->IsProfilingEnabled())
636 profiler->Print(std::cout);
642 fmt::format(
"IRuntime::Execute asynchronously failed for network #{0} on inference #{1}",
643 m_NetworkIdentifier, inferenceID));
647 return std::make_tuple(inferenceID, duration);
651 void RunAsync(
const std::vector<TContainer>& inputContainers,
652 std::vector<TContainer>& outputContainers,
653 std::shared_ptr<armnn::IAsyncExecutionCallback> cb)
655 for (
unsigned int i = 0; i < outputContainers.size(); ++i)
657 const unsigned int expectedOutputDataSize = GetOutputSize(i);
659 mapbox::util::apply_visitor([expectedOutputDataSize, i](
auto&& value)
662 if (actualOutputDataSize < expectedOutputDataSize)
664 unsigned int outputIndex = i;
666 fmt::format(
"Not enough data for output #{0}: expected " 667 "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize));
670 outputContainers[i]);
673 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
675 m_Threadpool->Schedule(m_NetworkIdentifier,
682 if (profiler && profiler->IsProfilingEnabled())
684 profiler->Print(std::cout);
690 CheckInputIndexIsValid(inputIndex);
701 CheckOutputIndexIsValid(outputIndex);
712 CheckOutputIndexIsValid(outputIndex);
713 return std::make_pair(
m_OutputBindings[outputIndex].second.GetQuantizationScale(),
719 CheckInputIndexIsValid(inputIndex);
720 return std::make_pair(
m_InputBindings[inputIndex].second.GetQuantizationScale(),
726 std::vector<QuantizationParams> quantizationParams;
729 quantizationParams.push_back(GetQuantizationParams(i));
731 return quantizationParams;
736 return m_Runtime->CreateWorkingMemHandle(m_NetworkIdentifier);
741 std::shared_ptr<armnn::IRuntime> m_Runtime;
742 std::unique_ptr<armnn::Threadpool> m_Threadpool;
746 bool m_EnableProfiling;
749 template<
typename TContainer>
755 template<
typename TContainer>
ModelOptions m_ModelOptions
static IRuntimePtr Create(const CreationOptions &options)
BackendIdSet GetBackendIds() const
std::chrono::duration< double, std::milli > GetTimeDuration(std::chrono::high_resolution_clock::time_point start_time)
ShapeInferenceMethod m_shapeInferenceMethod
std::unordered_set< BackendId > BackendIdSet
QuantizationParams GetInputQuantizationParams(unsigned int inputIndex=0u) const
bool m_EnableFp16TurboMode
const std::vector< armnn::BindingPointInfo > & GetOutputBindingInfos() const
armnn::InputTensors MakeInputTensors(const std::vector< armnn::BindingPointInfo > &inputBindings, const std::vector< std::reference_wrapper< TContainer >> &inputDataContainers)
mapbox::util::variant< std::vector< float >, std::vector< int >, std::vector< unsigned char >, std::vector< int8_t > > TContainer
static void AddCommandLineOptions(cxxopts::Options &options, CommandLineOptions &cLineOptions, std::vector< std::string > &required)
std::string m_DynamicBackendsPath
const armnn::BindingPointInfo & GetOutputBindingInfo(unsigned int outputIndex=0u) const
#define ARMNN_LOG(severity)
Main network class which provides the interface for building up a neural network. ...
BackendRegistry & BackendRegistryInstance()
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
const armnn::BindingPointInfo & GetInputBindingInfo(unsigned int inputIndex=0u) const
armnn::BindingPointInfo BindingPointInfo
bool m_EnableFp16TurboMode
std::chrono::high_resolution_clock::time_point GetTimeNow()
InferenceModelInternal::QuantizationParams QuantizationParams
std::string m_DynamicBackendsPath
std::string GetBackendIdsAsString() const
bool m_VisualizePostOptimizationModel
void CheckInputIndexIsValid(unsigned int inputIndex) const
bool m_EnableBf16TurboMode
unsigned int GetOutputSize(unsigned int outputIndex=0u) const
std::vector< std::string > m_InputBindings
InferenceModel(const Params ¶ms, bool enableProfiling, const std::string &dynamicBackendsPath, const std::shared_ptr< armnn::IRuntime > &runtime=nullptr)
std::vector< armnn::TensorShape > m_InputShapes
void RunAsync(const std::vector< TContainer > &inputContainers, std::vector< TContainer > &outputContainers, std::shared_ptr< armnn::IAsyncExecutionCallback > cb)
armnn::InputTensors MakeInputTensors(const std::vector< armnn::BindingPointInfo > &inputBindings, const std::vector< TContainer > &inputDataContainers)
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
std::vector< std::string > m_OutputBindings
std::vector< armnn::BackendId > m_ComputeDevices
#define ARMNN_SCOPED_HEAP_PROFILING(TAG)
Validate all output shapes.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
QuantizationParams GetQuantizationParams(unsigned int outputIndex=0u) const
#define ARMNN_ASSERT(COND)
std::vector< QuantizationParams > GetAllQuantizationParams() const
std::pair< float, int32_t > QuantizationParams
armnn::OutputTensors MakeOutputTensors(const std::vector< armnn::BindingPointInfo > &outputBindings, std::vector< TContainer > &outputDataContainers)
bool m_EnableBf16TurboMode
std::tuple< unsigned int, std::chrono::duration< double, std::milli > > RunAsync(armnn::experimental::IWorkingMemHandle &workingMemHandleRef, const std::vector< TContainer > &inputContainers, std::vector< TContainer > &outputContainers, unsigned int inferenceID)
bool m_VisualizePostOptimizationModel
Struct for the users to pass backend specific options.
std::string m_DynamicBackendsPath
Setting this value will override the paths set by the DYNAMIC_BACKEND_PATHS compiler directive Only a...
bool m_EnableGpuProfiling
Setting this flag will allow the user to obtain GPU profiling information from the runtime...
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
bool m_PrintIntermediateLayers
std::string m_CachedNetworkFilePath
std::pair< armnn::LayerBindingId, armnn::TensorInfo > BindingPointInfo
static armnn::INetworkPtr Create(const Params ¶ms, std::vector< armnn::BindingPointInfo > &inputBindings, std::vector< armnn::BindingPointInfo > &outputBindings)
std::unique_ptr< armnn::experimental::IWorkingMemHandle > CreateWorkingMemHandle()
std::chrono::duration< double, std::milli > Run(const std::vector< TContainer > &inputContainers, std::vector< TContainer > &outputContainers)
std::vector< armnn::BackendId > GetComputeDevicesAsBackendIds()
armnn::TensorInfo m_TensorInfo
Base class for all ArmNN exceptions so that users can filter to just those.
std::vector< std::string > m_ComputeDevices
unsigned int GetInputSize(unsigned int inputIndex=0u) const
armnn::OutputTensors MakeOutputTensors(const std::vector< armnn::BindingPointInfo > &outputBindings, const std::vector< std::reference_wrapper< TContainer >> &outputDataContainers)
bool m_OutputDetailsToStdOut
std::string m_MLGOTuningFilePath
unsigned int m_NumberOfThreads
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Infer missing output shapes and validate all output shapes.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
armnn::LayerBindingId m_BindingId
void CheckOutputIndexIsValid(unsigned int outputIndex) const
const std::vector< armnn::BindingPointInfo > & GetInputBindingInfos() const