305 std::vector<std::vector<TContainer>> inputs;
306 std::vector<std::vector<TContainer>> outputs;
356 armnn::MakeOptional<QuantizationParams>(
357 model.GetInputQuantizationParams()) :
362 ARMNN_LOG(info) <<
"Given network has " << numInputs <<
" input/s. One input-tensor-data file is required " 363 <<
"for each input. The user provided " 365 <<
" input-tensor-data file/s which will be used to fill the input/s.\n";
370 std::vector<TContainer> inputDataContainers;
371 for(
unsigned int i = 0; i < numInputs; ++i)
375 size_t inputFileIndex = j * numInputs + i;
384 params.m_InputTensorDataFilePaths.at(inputFileIndex));
386 unsigned int numElements = model.GetInputSize(i);
401 inputDataContainers.push_back(tensorData);
403 inputs.push_back(inputDataContainers);
410 std::vector <TContainer> outputDataContainers;
411 for (
unsigned int i = 0; i < numOutputs; ++i)
415 outputDataContainers.push_back(std::vector<float>(model.GetOutputSize(i)));
419 outputDataContainers.push_back(std::vector<int>(model.GetOutputSize(i)));
424 outputDataContainers.push_back(std::vector<uint8_t>(model.GetOutputSize(i)));
428 outputDataContainers.push_back(std::vector<int8_t>(model.GetOutputSize(i)));
435 outputs.push_back(outputDataContainers);
440 std::stringstream msg;
441 msg <<
"Network will be executed " << params.
m_Iterations;
444 msg <<
" times in an asynchronous manner. ";
448 msg <<
" times successively. ";
450 msg <<
"The input-tensor-data files will be reused recursively if the user didn't provide enough to " 451 "cover each execution.";
461 auto inference_duration = model.Run(inputs[x], outputs[x]);
465 ARMNN_LOG(warning) <<
"The input data was generated, note that the output will not be useful";
469 const auto& infosOut = model.GetOutputBindingInfos();
470 for (
size_t i = 0; i < numOutputs; i++)
478 size_t outputFileIndex = x * numOutputs + i;
482 ARMNN_LOG(info) <<
"Writing output " << i <<
" named: '" 484 <<
"' of iteration: " << x+1 <<
" to file: '" 495 mapbox::util::apply_visitor(printer, outputs[x][i]);
498 ARMNN_LOG(info) <<
"\nInference time: " << std::setprecision(2)
499 << std::fixed << inference_duration.count() <<
" ms\n";
504 ARMNN_LOG(info) <<
"Threshold time: " << std::setprecision(2)
506 auto thresholdMinusInference = params.
m_ThresholdTime - inference_duration.count();
507 ARMNN_LOG(info) <<
"Threshold time - Inference time: " << std::setprecision(2)
508 << std::fixed << thresholdMinusInference <<
" ms" <<
"\n";
510 if (thresholdMinusInference < 0)
512 std::string errorMessage =
"Elapsed inference time is greater than provided threshold time.";
523 ARMNN_LOG(info) <<
"Asynchronous execution with Arm NN thread pool... \n";
525 std::unordered_map<armnn::InferenceId, std::vector<TContainer>&> inferenceOutputMap;
528 std::chrono::high_resolution_clock::time_point earliestStartTime;
529 std::chrono::high_resolution_clock::time_point latestEndTime =
530 std::chrono::high_resolution_clock::now();
536 std::shared_ptr<armnn::AsyncExecutionCallback> cb = callbackManager.
GetNewCallback();
537 inferenceOutputMap.insert({cb->GetInferenceId(), outputs[i]});
538 model.RunAsync(inputs[i], outputs[i], cb);
543 for (
size_t iteration = 0; iteration < params.
m_Iterations; ++iteration)
548 auto endTime = time_point_cast<std::chrono::milliseconds>(cb->GetEndTime());
549 auto startTime = time_point_cast<std::chrono::milliseconds>(cb->GetStartTime());
550 auto inferenceDuration = endTime - startTime;
552 if (latestEndTime < cb->GetEndTime())
554 latestEndTime = cb->GetEndTime();
557 if (earliestStartTime.time_since_epoch().count() == 0)
559 earliestStartTime = cb->GetStartTime();
561 else if (earliestStartTime > cb->GetStartTime())
563 earliestStartTime = cb->GetStartTime();
568 ARMNN_LOG(warning) <<
"The input data was generated, note that the output will not be useful";
572 const auto& infosOut = model.GetOutputBindingInfos();
573 for (
size_t i = 0; i < numOutputs; i++)
579 size_t outputFileIndex = iteration * numOutputs + i;
583 ARMNN_LOG(info) <<
"Writing output " << i <<
" named: '" 585 <<
"' of iteration: " << iteration+1 <<
" to file: '" 598 mapbox::util::apply_visitor(printer, inferenceOutputMap.at(cb->GetInferenceId())[i]);
601 ARMNN_LOG(info) <<
"\nInference time: " << std::setprecision(2)
602 << std::fixed << inferenceDuration.count() <<
" ms\n";
607 ARMNN_LOG(info) <<
"Threshold time: " << std::setprecision(2)
609 auto thresholdMinusInference =
610 params.
m_ThresholdTime - duration<double, std::milli>(inferenceDuration).count();
611 ARMNN_LOG(info) <<
"Threshold time - Inference time: " << std::setprecision(2)
612 << std::fixed << thresholdMinusInference <<
" ms" <<
"\n";
614 if (thresholdMinusInference < 0)
616 ARMNN_LOG(fatal) <<
"Elapsed inference time is greater than provided threshold time. \n";
622 auto overallEndTime = time_point_cast<std::chrono::milliseconds>(latestEndTime);
623 auto overallStartTime = time_point_cast<std::chrono::milliseconds>(earliestStartTime);
624 auto totalInferenceDuration = overallEndTime - overallStartTime;
625 ARMNN_LOG(info) <<
"\nOverall Inference time: " << std::setprecision(2)
626 << std::fixed << totalInferenceDuration.count() <<
" ms\n";
639 ARMNN_LOG(info) <<
"Asynchronous Execution with std::launch:async... \n";
640 std::vector<std::future<std::tuple<
unsigned int,
641 std::chrono::duration<double, std::milli>>>> inferenceResults;
645 std::vector<std::unique_ptr<armnn::experimental::IWorkingMemHandle>> workingMemHandles;
649 workingMemHandles.push_back(model.CreateWorkingMemHandle());
659 inferenceResults.push_back(std::async(
660 std::launch::async, [&model, &workingMemHandleRef, &inputs, &outputs, i]() {
661 return model.RunAsync(workingMemHandleRef, inputs[i], outputs[i], i);
667 for (
unsigned int j = 0; j < inferenceResults.size(); ++j)
670 auto inferenceResult = inferenceResults[j].get();
671 auto inferenceDuration = std::get<1>(inferenceResult);
672 auto inferenceID = std::get<0>(inferenceResult);
676 ARMNN_LOG(warning) <<
"The input data was generated, note that the output will not be useful";
680 const auto& infosOut = model.GetOutputBindingInfos();
681 for (
size_t i = 0; i < numOutputs; i++)
687 size_t outputFileIndex = j * numOutputs + i;
691 ARMNN_LOG(info) <<
"Writing output " << i <<
" named: '" 693 <<
"' of iteration: " << j+1 <<
" to file: '" 705 mapbox::util::apply_visitor(printer, outputs[j][i]);
708 ARMNN_LOG(info) <<
"\nInference time: " << std::setprecision(2)
709 << std::fixed << inferenceDuration.count() <<
" ms\n";
714 ARMNN_LOG(info) <<
"Threshold time: " << std::setprecision(2)
716 auto thresholdMinusInference = params.
m_ThresholdTime - inferenceDuration.count();
717 ARMNN_LOG(info) <<
"Threshold time - Inference time: " << std::setprecision(2)
718 << std::fixed << thresholdMinusInference <<
" ms" <<
"\n";
720 if (thresholdMinusInference < 0)
722 ARMNN_LOG(fatal) <<
"Elapsed inference time is greater than provided threshold time. \n";
725 ARMNN_LOG(info) <<
"Asynchronous Execution is finished for Inference ID: " << inferenceID <<
" \n";
730 ARMNN_LOG(info) <<
"\nOverall Inference time: " << std::setprecision(2)
731 << std::fixed << duration.count() <<
" ms\n";
std::vector< std::string > m_InputTypes
std::chrono::duration< double, std::milli > GetTimeDuration(std::chrono::high_resolution_clock::time_point start_time)
std::string m_MLGOTuningFilePath
std::shared_ptr< AsyncExecutionCallback > GetNewCallback()
std::vector< TensorShapePtr > m_InputTensorShapes
bool m_EnableFp16TurboMode
std::string m_DynamicBackendsPath
std::string m_DynamicBackendsPath
virtual const char * what() const noexcept override
#define ARMNN_LOG(severity)
bool m_EnableFp16TurboMode
std::chrono::high_resolution_clock::time_point GetTimeNow()
void PopulateTensorWithData(TContainer &tensorData, unsigned int numElements, const std::string &dataTypeStr, const armnn::Optional< QuantizationParams > &qParams, const armnn::Optional< std::string > &dataFile)
std::vector< std::string > m_OutputNames
Copyright (c) 2021 ARM Limited and Contributors.
std::vector< std::string > m_OutputTensorFiles
bool m_VisualizePostOptimizationModel
bool m_OutputDetailsToStdOut
std::string m_CachedNetworkFilePath
bool m_EnableBf16TurboMode
std::vector< std::string > m_InputBindings
std::vector< armnn::BackendId > m_ComputeDevices
std::vector< std::string > m_OutputTypes
std::vector< armnn::TensorShape > m_InputShapes
bool m_GenerateTensorData
std::vector< std::string > m_OutputBindings
std::vector< armnn::BackendId > m_ComputeDevices
unsigned int m_NumberOfThreads
mapbox::util::variant< std::vector< float >, std::vector< int >, std::vector< unsigned char >, std::vector< int8_t > > TContainer
std::vector< std::string > m_InputNames
bool m_EnableBf16TurboMode
std::vector< std::string > m_InputTensorDataFilePaths
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
bool m_PrintIntermediateLayers
std::string m_CachedNetworkFilePath
bool m_EnableLayerDetails
Base class for all ArmNN exceptions so that users can filter to just those.
bool m_OutputDetailsToStdOut
std::string m_MLGOTuningFilePath
Optional< T > MakeOptional(Args &&... args)
Utility template that constructs an object of type T in-place and wraps it inside an Optional<T> obje...
unsigned int m_NumberOfThreads
std::shared_ptr< AsyncExecutionCallback > GetNotifiedCallback()