6 #define LOG_TAG "arm-armnn-sl"
11 #include <DefaultExecution.h>
12 #include <LegacyUtils.h>
13 #include <nnapi/IBurst.h>
14 #include <nnapi/IPreparedModel.h>
15 #include <nnapi/Result.h>
16 #include <nnapi/SharedMemory.h>
17 #include <nnapi/TypeUtils.h>
18 #include <nnapi/Types.h>
19 #include <nnapi/Validation.h>
29 static const Timing g_NoTiming = {};
35 unsigned long MicrosecondsDuration(android::nn::TimePoint endPoint, android::nn::TimePoint startPoint)
37 return static_cast<unsigned long>(std::chrono::duration_cast<std::chrono::microseconds>(
38 endPoint - startPoint).count());
41 bool ValidateRequestArgument(
const Request::Argument& requestArg,
const armnn::TensorInfo& tensorInfo)
43 if (requestArg.dimensions.size() != 0)
47 VLOG(DRIVER) <<
"Mismatched dimensions (request argument: "
48 << requestArg.dimensions.size() <<
" expected: " << tensorInfo.
GetNumDimensions();
54 if (requestArg.dimensions[d] != 0 && requestArg.dimensions[d] != tensorInfo.
GetShape()[d])
56 VLOG(DRIVER) <<
"Mismatched dimensions " << d
57 <<
" (request argument: " << requestArg.dimensions[d]
58 <<
" expected: " << tensorInfo.
GetShape()[d];
67 armnn::Tensor GetTensorForRequestArgument(
const Request::Argument& requestArg,
69 const std::vector<::android::nn::RunTimePoolInfo>& requestPools)
71 if (!ValidateRequestArgument(requestArg, tensorInfo))
76 if (requestArg.lifetime == Request::Argument::LifeTime::POINTER)
80 else if (requestArg.lifetime == Request::Argument::LifeTime::POOL)
87 inline std::string BuildTensorName(
const char* tensorNamePrefix, std::size_t index)
89 return tensorNamePrefix + std::to_string(index);
92 bool IsPointerTypeMemory(
const Request& request)
94 for (
auto& input : request.inputs)
96 if (input.lifetime != Request::Argument::LifeTime::POINTER)
102 for (
auto& output: request.outputs)
104 if (output.lifetime != Request::Argument::LifeTime::POINTER)
120 void ArmnnPreparedModel::Init()
123 m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled);
129 const std::string& requestInputsAndOutputsDumpDir,
130 const bool gpuProfilingEnabled,
132 : m_NetworkId(networkId)
135 , m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
136 , m_GpuProfilingEnabled(gpuProfilingEnabled)
137 , m_ModelPriority(priority)
138 , m_PrepareFromCache(false)
145 const std::string& requestInputsAndOutputsDumpDir,
146 const bool gpuProfilingEnabled,
148 const bool prepareModelFromCache)
149 : m_NetworkId(networkId)
151 , m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
152 , m_GpuProfilingEnabled(gpuProfilingEnabled)
153 , m_ModelPriority(priority)
154 , m_PrepareFromCache(prepareModelFromCache)
160 ErrorStatus ArmnnPreparedModel::PrepareMemoryForInputs(
162 const Request& request,
163 const std::vector<android::nn::RunTimePoolInfo>& memPools)
const
165 inputs.reserve(request.inputs.size());
166 for (
unsigned int i = 0; i < request.inputs.size(); i++)
168 const auto& inputArg = request.inputs[i];
174 const armnn::Tensor inputTensor = GetTensorForRequestArgument(inputArg, inputTensorInfo, memPools);
178 VLOG(DRIVER) <<
"Cannot execute request. Error converting request input " << i <<
"to tensor.";
179 return ErrorStatus::GENERAL_FAILURE;
181 inputs.emplace_back(i, inputTensor);
184 return ErrorStatus::NONE;
187 ErrorStatus ArmnnPreparedModel::PrepareMemoryForOutputs(
189 std::vector<OutputShape> &outputShapes,
190 const Request& request,
191 const std::vector<android::nn::RunTimePoolInfo>& memPools)
const
193 outputs.reserve(request.outputs.size());
194 for (
unsigned int i = 0; i < request.outputs.size(); i++)
196 auto& outputArg = request.outputs[i];
199 armnn::Tensor outputTensor = GetTensorForRequestArgument(outputArg, outputTensorInfo, memPools);
202 VLOG(DRIVER) <<
"Cannot execute request. Error converting request output " << i <<
"to tensor.";
203 return ErrorStatus::GENERAL_FAILURE;
206 const size_t outputSize = outputTensorInfo.
GetNumBytes();
208 unsigned int count = 0;
209 std::for_each(outputArg.dimensions.begin(), outputArg.dimensions.end(), [&](
auto dim)
213 outputTensorInfo.GetShape()[count] = dim;
217 outputTensorInfo.GetShape()[count] = outputArg.dimensions.size();
223 outputs.emplace_back(i, outputTensor);
226 if (outputArg.location.length < outputSize)
228 VLOG(DRIVER) <<
"ArmnnPreparedModel::Execute failed outputArg.location.length "
229 << std::to_string(outputArg.location.length).c_str()
230 <<
" < outputSize " << std::to_string(outputSize).c_str();
231 outputShapes[i].isSufficient =
false;
232 return ErrorStatus::OUTPUT_INSUFFICIENT_SIZE;
235 if (outputArg.lifetime == Request::Argument::LifeTime::POOL)
237 size_t bufferSize = memPools.at(outputArg.location.poolIndex).getSize();
238 if (bufferSize < outputSize)
240 VLOG(DRIVER) <<
"ArmnnPreparedModel::Execute failed bufferSize "
241 << std::to_string(outputArg.location.length).c_str()
242 <<
" < outputSize " << std::to_string(outputSize).c_str();
243 outputShapes[i].isSufficient =
false;
244 return ErrorStatus::OUTPUT_INSUFFICIENT_SIZE;
248 return ErrorStatus::NONE;
253 std::vector<android::nn::RunTimePoolInfo>& memPools,
254 const Request& request,
255 const bool pointerMemory)
const
261 if (!pointerMemory && !setRunTimePoolInfosFromMemoryPools(&memPools, request.pools))
263 return ErrorStatus::INVALID_ARGUMENT;
266 if (PrepareMemoryForInputs(inputs, request, memPools) != ErrorStatus::NONE)
268 VLOG(DRIVER) <<
"Failed when preparing memory for Inputs";
269 return ErrorStatus::GENERAL_FAILURE;
272 std::vector<OutputShape> outputShapes(request.outputs.size());
274 auto errorStatus = PrepareMemoryForOutputs(outputs, outputShapes, request, memPools);
275 if (errorStatus != ErrorStatus::NONE)
282 VLOG(DRIVER) <<
"armnn::Exception caught while preparing for EnqueueWorkload: " << e.
what();
283 return ErrorStatus::GENERAL_FAILURE;
285 catch (std::exception& e)
287 VLOG(DRIVER) <<
"std::exception caught while preparing for EnqueueWorkload: " << e.what();
288 return ErrorStatus::GENERAL_FAILURE;
291 return ErrorStatus::NONE;
295 const Request& request,
296 MeasureTiming measureTiming,
297 const OptionalTimePoint& deadline,
298 const OptionalDuration&,
299 const std::vector<android::nn::TokenValuePair>& hints,
300 const std::vector<android::nn::ExtensionNameAndPrefix>& extensionNameToPrefix)
const
302 VLOG(DRIVER) <<
"CanonicalDriver::PreparedModel::execute()";
305 if (measureTiming == MeasureTiming::YES)
311 if (!m_PrepareFromCache)
313 const auto modelRequest = validateRequestForModel(request, m_Model);
314 if (!modelRequest.ok())
316 return NN_ERROR(ErrorStatus::INVALID_ARGUMENT) << modelRequest.error();
318 VLOG(DRIVER) <<
"ArmnnPreparedModel::execute(): " <<
GetModelSummary(m_Model).c_str();
320 if (hasDeadlinePassed(deadline))
322 return NN_ERROR(ErrorStatus::MISSED_DEADLINE_PERSISTENT);
327 auto memPools = std::make_shared<std::vector<android::nn::RunTimePoolInfo>>();
330 auto inputTensors = std::make_shared<armnn::InputTensors>();
331 auto outputTensors = std::make_shared<armnn::OutputTensors>();
333 auto isPointerTypeMemory = IsPointerTypeMemory(request);
334 ErrorStatus theErrorStatus = PrepareMemoryForIO(*inputTensors,
338 isPointerTypeMemory);
340 switch(theErrorStatus)
342 case ErrorStatus::OUTPUT_INSUFFICIENT_SIZE:
343 return NN_ERROR(ErrorStatus::OUTPUT_INSUFFICIENT_SIZE);
344 case ErrorStatus::GENERAL_FAILURE:
345 return NN_ERROR(ErrorStatus::GENERAL_FAILURE);
346 case ErrorStatus::INVALID_ARGUMENT:
347 return NN_ERROR(ErrorStatus::INVALID_ARGUMENT);
352 std::vector<OutputShape> outputShapes(outputTensors->size());
353 for (
unsigned int i = 0; i < outputTensors->size(); i++)
355 std::pair<int, armnn::Tensor> outputTensorPair = (*outputTensors)[i];
363 VLOG(DRIVER) <<
"ArmnnPreparedModel::execute(...) before ExecuteGraph";
364 auto errorStatus =
ExecuteGraph(memPools, *inputTensors, *outputTensors, ctx, isPointerTypeMemory);
365 if (errorStatus != ErrorStatus::NONE)
367 return NN_ERROR(errorStatus) <<
"execute() failed";
369 VLOG(DRIVER) <<
"ArmnnPreparedModel::execute(...) after ExecuteGraph";
371 return std::make_pair(outputShapes, theTiming);
375 std::shared_ptr<std::vector<android::nn::RunTimePoolInfo>>& pMemPools,
379 const bool pointerMemory)
const
381 VLOG(DRIVER) <<
"ArmnnPreparedModel::ExecuteGraph(...)";
383 DumpTensorsIfRequired(
"Input", inputTensors);
384 std::vector<armnn::ImportedInputId> importedInputIds;
385 std::vector<armnn::ImportedOutputId> importedOutputIds;
393 VLOG(DRIVER) <<
"ArmnnPreparedModel::ExecuteGraph m_AsyncModelExecutionEnabled false";
395 if (!importedInputIds.empty())
403 inputTensors.begin(), inputTensors.end(),
404 [&importedId](std::pair<armnn::LayerBindingId, class armnn::ConstTensor>& element) {
405 return (element.first == static_cast<int>(importedId));
411 if (!importedOutputIds.empty())
419 outputTensors.begin(), outputTensors.end(),
420 [&importedId](std::pair<armnn::LayerBindingId, class armnn::Tensor>& element) {
421 return (element.first == static_cast<int>(importedId));
423 outputTensors.end());
438 VLOG(DRIVER) <<
"ArmnnPreparedModel:ExecuteGraph EnqueueWorkload failed";
439 return ErrorStatus::GENERAL_FAILURE;
444 VLOG(DRIVER) <<
"armnn:Exception caught from EnqueueWorkload: " << e.
what();
445 return ErrorStatus::GENERAL_FAILURE;
447 catch (std::exception& e)
449 VLOG(DRIVER) <<
"std::exception caught from EnqueueWorkload: " << e.what();
450 return ErrorStatus::GENERAL_FAILURE;
453 if (!pointerMemory && (!importedInputIds.empty() || !importedOutputIds.empty()))
457 DumpTensorsIfRequired(
"Output", outputTensors);
465 VLOG(DRIVER) <<
"ArmnnPreparedModel::execute timing - Device = "
466 << timing.timeOnDevice <<
"Driver = " << timing.timeInDriver;
468 return ErrorStatus::NONE;
473 return m_ModelPriority;
478 const Request& request,
479 const std::vector<SyncFence>& waitFor,
480 MeasureTiming measureTiming,
481 const OptionalTimePoint& deadline,
482 const OptionalDuration&,
483 const OptionalDuration&,
484 const std::vector<android::nn::TokenValuePair>& hints,
485 const std::vector<android::nn::ExtensionNameAndPrefix>& extensionNameToPrefix)
const
487 VLOG(DRIVER) <<
"ArmnnPreparedModel::executeFenced()";
489 if (!m_PrepareFromCache) {
490 const auto modelRequest = validateRequestForModel(request, m_Model);
491 if (!modelRequest.ok())
493 return NN_ERROR(ErrorStatus::INVALID_ARGUMENT) << modelRequest.error();
495 VLOG(DRIVER) <<
"ArmnnPreparedModel::executeFenced(): " <<
GetModelSummary(m_Model).c_str();
497 if (hasDeadlinePassed(deadline))
499 return NN_ERROR(ErrorStatus::MISSED_DEADLINE_PERSISTENT);
503 if (measureTiming == MeasureTiming::YES)
510 for (
const auto& syncFence : waitFor)
512 if (!syncFence.getSharedHandle())
514 return NN_ERROR(ErrorStatus::INVALID_ARGUMENT);
516 if (syncFence.syncWait({}) != SyncFence::FenceState::SIGNALED)
518 return NN_ERROR(ErrorStatus::GENERAL_FAILURE) <<
"syncWait failed";
522 android::nn::TimePoint fenceExecutionStart;
523 if (measureTiming == MeasureTiming::YES)
525 fenceExecutionStart = Clock::now();
530 auto memPools = std::make_shared<std::vector<android::nn::RunTimePoolInfo>>();
533 auto inputTensors = std::make_shared<armnn::InputTensors>();
534 auto outputTensors = std::make_shared<armnn::OutputTensors>();
536 auto isPointerTypeMemory = IsPointerTypeMemory(request);
537 ErrorStatus theErrorStatus = PrepareMemoryForIO(*inputTensors,
541 isPointerTypeMemory);
543 if (theErrorStatus != ErrorStatus::NONE)
545 return NN_ERROR(ErrorStatus::INVALID_ARGUMENT) <<
"executeFenced() failed";
548 Timing timingSinceLaunch = {};
549 Timing timingAfterFence = {};
550 if (measureTiming == MeasureTiming::YES)
553 timingAfterFence.timeInDriver = ctx.
driverEnd - fenceExecutionStart;
554 VLOG(DRIVER) <<
"executeFenced timingSinceLaunch = " << timingAfterFence.timeOnDevice;
555 VLOG(DRIVER) <<
"executeFenced timingAfterFence = " << timingAfterFence.timeInDriver;
558 VLOG(DRIVER) <<
"ArmnnCanonicalPreparedModel::executeFenced(...) before ExecuteGraph";
559 auto errorStatus =
ExecuteGraph(memPools, *inputTensors, *outputTensors, ctx, isPointerTypeMemory);
560 VLOG(DRIVER) <<
"ArmnnCanonicalPreparedModel::executeFenced(...) after ExecuteGraph";
562 ExecuteFencedInfoCallback armnnFencedExecutionCallback =
563 [timingSinceLaunch, timingAfterFence, errorStatus]() {
565 GeneralResult<std::pair<Timing, Timing>> result;
569 case ErrorStatus::OUTPUT_INSUFFICIENT_SIZE:
570 result.error().code = (ErrorStatus::OUTPUT_INSUFFICIENT_SIZE);
571 case ErrorStatus::GENERAL_FAILURE:
572 result.error().code = (ErrorStatus::GENERAL_FAILURE);
573 case ErrorStatus::INVALID_ARGUMENT:
574 result.error().code = (ErrorStatus::INVALID_ARGUMENT);
577 result.value() = std::make_pair(timingSinceLaunch, timingAfterFence);
582 return std::make_pair(SyncFence::createAsSignaled(), std::move(armnnFencedExecutionCallback ));
586 const Request& request,
587 MeasureTiming measureTiming,
588 const OptionalDuration& loopTimeoutDuration,
589 const std::vector<android::nn::TokenValuePair>& hints,
590 const std::vector<android::nn::ExtensionNameAndPrefix>& extensionNameToPrefix)
const
592 VLOG(DRIVER) <<
"ArmnnPreparedModel::createReusableExecution()";
593 return std::make_shared<DefaultExecution>(shared_from_this(),
596 loopTimeoutDuration);
609 template<
typename TensorBindingCollection>
610 void ArmnnPreparedModel::DumpTensorsIfRequired(
char const* tensorNamePrefix,
611 const TensorBindingCollection& tensorBindings)
const
613 if (!m_RequestInputsAndOutputsDumpDir.empty())
615 const std::string requestName = std::to_string(m_NetworkId) +
".dump";
616 for (std::size_t i = 0u; i < tensorBindings.size(); ++i)
620 BuildTensorName(tensorNamePrefix, i),
621 tensorBindings[i].second);
628 VLOG(DRIVER) <<
"ArmnnPreparedModel::~ArmnnPreparedModel()";
630 if (m_GpuProfilingEnabled)
632 auto profiler = m_Runtime->
GetProfiler(m_NetworkId);
637 m_RequestInputsAndOutputsDumpDir,
648 std::vector<std::vector<char>> storage;
650 for (
unsigned int i = 0; i < numInputs; i++)
656 storage.emplace_back(inputTensorInfo.
GetNumBytes());
659 inputTensors.emplace_back(i, inputTensor);
663 for (
unsigned int i = 0; i < numOutputs; i++)
666 storage.emplace_back(outputTensorInfo.
GetNumBytes());
667 const armnn::Tensor outputTensor(outputTensorInfo, storage.back().data());
669 outputTensors.emplace_back(i, outputTensor);
673 auto memPools = std::make_shared<std::vector<::android::nn::RunTimePoolInfo>>();
680 return errorStatus == ErrorStatus::NONE;