armnn/latest/_armnn_driver_impl_8cpp_source.html

//

// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved.

// SPDX-License-Identifier: MIT

//


#include "ArmnnDriverImpl.hpp"

#include "ArmnnPreparedModel.hpp"

#include "ModelToINetworkTransformer.hpp"

#include "SystemPropertiesUtils.hpp"


#include <armnnDeserializer/IDeserializer.hpp>


#include <log/log.h>

#include <sys/stat.h>


namespace

{


Capabilities GenerateCapabilities()

{

    VLOG(DRIVER) << "ArmnnDriverImpl::GenerateCapabilities()";


    float defaultPerfValue = .1f;

    const Capabilities::PerformanceInfo defaultPerfInfo = { /* execTime */ defaultPerfValue,

                                                            /* powerUsage */ defaultPerfValue

                                                          };

    std::vector<OperandType> operandsTypes({

                OperandType::FLOAT32,

                OperandType::INT32,

                OperandType::UINT32,

                OperandType::TENSOR_FLOAT32,

                OperandType::TENSOR_INT32,

                OperandType::TENSOR_QUANT8_ASYMM,

                OperandType::BOOL,

                OperandType::TENSOR_QUANT16_SYMM,

                OperandType::TENSOR_FLOAT16,

                OperandType::TENSOR_BOOL8,

                OperandType::FLOAT16,

                OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL,

                OperandType::TENSOR_QUANT16_ASYMM,

                OperandType::TENSOR_QUANT8_SYMM,

                OperandType::TENSOR_QUANT8_ASYMM_SIGNED,

    });


    std::vector<Capabilities::OperandPerformance> operandPerformances;

    operandPerformances.reserve(operandsTypes.size());


    for (auto opType : operandsTypes)

    {

        operandPerformances.push_back(

                Capabilities::OperandPerformance{ /* type */ opType, /* info */ defaultPerfInfo });

    }


    auto operandPerformanceTable =

               Capabilities::OperandPerformanceTable::create(std::move(operandPerformances)).value();


    return { /* relaxedFloat32toFloat16PerformanceScalar */ defaultPerfInfo,

             /* relaxedFloat32toFloat16PerformanceTensor */ defaultPerfInfo,

             /* operandPerformance */ std::move(operandPerformanceTable),

             /* ifPerformance */ defaultPerfInfo,

             /* whilePerformance */ defaultPerfInfo };

}


size_t Hash(std::vector<uint8_t>& cacheData)

{

    std::size_t hash = cacheData.size();

    for (auto& i : cacheData)

    {

        hash = ((hash << 5) - hash) + i;

    }

    return hash;

}


} // anonymous namespace


using namespace android::nn;


namespace armnn_driver

{


bool ArmnnDriverImpl::ValidateSharedHandle(const SharedHandle& sharedHandle)

{

    bool valid = true;


    if (*sharedHandle < 0)

    {

        return !valid;

    }


    int dataCacheFileAccessMode = fcntl(*sharedHandle, F_GETFL) & O_ACCMODE;

    if (dataCacheFileAccessMode != O_RDWR)

    {

        return !valid;

    }


    return valid;

}


GeneralResult<SharedPreparedModel> ArmnnDriverImpl::PrepareArmnnModel(

    const armnn::IRuntimePtr& runtime,

    const armnn::IGpuAccTunedParametersPtr& clTunedParameters,

    const DriverOptions& options,

    const Model& model,

    const std::vector<SharedHandle>& modelCacheHandle,

    const std::vector<SharedHandle>& dataCacheHandle,

    const CacheToken& token,

    bool float32ToFloat16,

    Priority priority)

{

    VLOG(DRIVER) << "ArmnnDriverImpl::PrepareArmnnModel()";


    if (!runtime)

    {

        return NN_ERROR(ErrorStatus::DEVICE_UNAVAILABLE) << "Device unavailable";

    }


    if (const auto result = validate(model); !result.ok())

    {

        return NN_ERROR(ErrorStatus::INVALID_ARGUMENT) << "Invalid model passed as input";

    }


    // Deliberately ignore any unsupported operations requested by the options -

    // at this point we're being asked to prepare a model that we've already declared support for

    // and the operation indices may be different to those in getSupportedOperations anyway.

    std::set<unsigned int> unsupportedOperations;

    ModelToINetworkTransformer modelConverter(options.GetBackends(),

                                              model,

                                              unsupportedOperations);


    if (modelConverter.GetConversionResult() != ConversionResult::Success)

    {

        return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "ModelToINetworkConverter failed";

    }


    // Serialize the network graph to a .armnn file if an output directory

    // has been specified in the drivers' arguments.

    std::vector<uint8_t> dataCacheData;

    bool serializeToFile = dataCacheHandle.size() < 1 ? false : true;

    auto serializedNetworkFileName =

            SerializeNetwork(*modelConverter.GetINetwork(),

                             options.GetRequestInputsAndOutputsDumpDir(),

                             dataCacheData,

                             serializeToFile);


    // Optimize the network

    armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);

    armnn::OptimizerOptionsOpaque OptOptions;

    OptOptions.SetReduceFp32ToFp16(float32ToFloat16);

    OptOptions.SetProfilingEnabled(options.IsGpuProfilingEnabled());


    int cachedFd = -1;

    bool saveCachedNetwork = options.SaveCachedNetwork();


    unsigned int numberOfCachedModelFiles = 0;

    if (modelCacheHandle.size() > 0)

    {

        unsigned int index = 0;

        for (auto& backend : options.GetBackends())

        {

            // modelCacheHandle size should be equal to numberOfCachedModelFiles

            // modelCacheHandle vector should be in same order as backends

            auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);

            if (numberOfCacheFiles > 0)

            {

                numberOfCachedModelFiles += numberOfCacheFiles;

                // For GpuAcc numberOfCachedFiles is 1

                if (backend == armnn::Compute::GpuAcc)

                {

                    cachedFd = *modelCacheHandle[index];

                    saveCachedNetwork = true;

                }

                index += numberOfCachedModelFiles;

            }

        }

    }


    armnn::BackendOptions gpuAcc("GpuAcc",

    {

        { "FastMathEnabled", options.IsFastMathEnabled() },

        { "SaveCachedNetwork", saveCachedNetwork },

        { "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },

        { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() },

        { "CachedFileDescriptor", cachedFd }

    });


    armnn::BackendOptions cpuAcc("CpuAcc",

    {

        { "FastMathEnabled", options.IsFastMathEnabled() },

        { "NumberOfThreads", options.GetNumberOfThreads() }

    });

    OptOptions.AddModelOption(gpuAcc);

    OptOptions.AddModelOption(cpuAcc);


    std::vector<std::string> errMessages;

    try

    {

        optNet = armnn::Optimize(*modelConverter.GetINetwork(),

                                 options.GetBackends(),

                                 runtime->GetDeviceSpec(),

                                 OptOptions,

                                 errMessages);

    }

    catch (std::exception& e)

    {

        return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << e.what();

    }


    // Check that the optimized network is valid.

    if (!optNet)

    {

        std::stringstream message;

        message << "Invalid optimized network";

        for (const std::string& msg : errMessages)

        {

            message << "\n" << msg;

        }

        return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();

    }


    // Export the optimized network graph to a dot file if an output dump directory

    // has been specified in the drivers' arguments.

    std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,

                                                               options.GetRequestInputsAndOutputsDumpDir());


    // Load it into the runtime.

    armnn::NetworkId netId = 0;

    std::string msg;

    armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),

                                                MemorySource::Undefined,

                                                MemorySource::Undefined,

                                                options.IsGpuProfilingEnabled());

    auto numInputs  = getMainModel(model).inputIndexes.size();

    auto numOutputs = getMainModel(model).outputIndexes.size();

    try

    {

        if (runtime->LoadNetwork(netId, std::move(optNet), msg, networkProperties) != armnn::Status::Success)

        {

            return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be loaded";

        }

    }

    catch (std::exception& e)

    {

        std::stringstream message;

        message << "Exception (" << e.what()<< ") caught from LoadNetwork.";

        return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();

    }


    // Now that we have a networkId for the graph rename the exported files to use it

    // so that we can associate the graph file and the input/output tensor exported files

    RenameExportedFiles(serializedNetworkFileName,

                        dotGraphFileName,

                        options.GetRequestInputsAndOutputsDumpDir(),

                        netId);


    // Cache the model

    size_t hashValue = 0;

    if (dataCacheHandle.size() == 1 )

    {

        hashValue = Hash(dataCacheData);

    }


    // Cache the model data

    if (modelCacheHandle.size() > 0)

    {

        if (modelCacheHandle.size() == numberOfCachedModelFiles)

        {

            for (uint32_t i = 0; i < modelCacheHandle.size(); ++i)

            {

                int modelCacheFileAccessMode = fcntl(*modelCacheHandle[i], F_GETFL) & O_ACCMODE;

                if (modelCacheFileAccessMode != O_RDONLY)

                {

                    struct stat statBuffer;

                    if (fstat(*modelCacheHandle[i], &statBuffer) == 0)

                    {

                        long modelDataSize = statBuffer.st_size;

                        if (modelDataSize > 0)

                        {

                            std::vector<uint8_t> modelData(modelDataSize);

                            pread(*modelCacheHandle[i], modelData.data(), modelData.size(), 0);

                            hashValue ^= Hash(modelData);

                        }

                    }

                }

            }

        }

    }

    if (dataCacheHandle.size() == 1 && hashValue != 0)

    {

        std::vector<uint8_t> theHashValue(sizeof(hashValue));

        ::memcpy(theHashValue.data(), &hashValue, sizeof(hashValue));


        write(*dataCacheHandle[0], theHashValue.data(), theHashValue.size());

        pwrite(*dataCacheHandle[0], dataCacheData.data(), dataCacheData.size(), theHashValue.size());

    }


    bool executeWithDummyInputs = (std::find(options.GetBackends().begin(),

                                            options.GetBackends().end(),

                                            armnn::Compute::GpuAcc) != options.GetBackends().end());


    auto preparedModel = std::make_shared<const ArmnnPreparedModel>(netId,

                                                                    runtime.get(),

                                                                    model,

                                                                    options.GetRequestInputsAndOutputsDumpDir(),

                                                                    options.IsGpuProfilingEnabled(),

                                                                    priority);


    // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if

    // this is enabled) before the first 'real' inference which removes the overhead of the first inference.

    // Only run this if the GpuAcc backend has been added to options

    if (std::find(options.GetBackends().begin(),

                  options.GetBackends().end(),

                  armnn::Compute::GpuAcc) != options.GetBackends().end())

    {

        if (!preparedModel->ExecuteWithDummyInputs(numInputs, numOutputs))

        {

            return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be executed";

        }


        if (clTunedParameters &&

            options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)

        {

            // Now that we've done one inference the CL kernel parameters will have been tuned,

            // so save the updated file.

            try

            {

                clTunedParameters->Save(options.GetClTunedParametersFile().c_str());

            }

            catch (std::exception& error)

            {

                VLOG(DRIVER) << "ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file"

                             << options.GetClTunedParametersFile().c_str() << error.what();

            }

        }

    }

    return std::move(preparedModel);

}


GeneralResult<SharedPreparedModel> ArmnnDriverImpl::PrepareArmnnModelFromCache(

    const armnn::IRuntimePtr& runtime,

    const armnn::IGpuAccTunedParametersPtr& clTunedParameters,

    const DriverOptions& options,

    const std::vector<SharedHandle>& modelCacheHandle,

    const std::vector<SharedHandle>& dataCacheHandle,

    const CacheToken& token,

    bool float32ToFloat16)

{

    VLOG(DRIVER) << "ArmnnDriverImpl::PrepareArmnnModelFromCache()";


    if (!runtime)

    {

        return NN_ERROR(ErrorStatus::DEVICE_UNAVAILABLE)

                            << "ArmnnDriverImpl::prepareModelFromCache(): Device unavailable";

    }


    if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN)

    {

        return NN_ERROR(ErrorStatus::GENERAL_FAILURE)

                            << "ArmnnDriverImpl::prepareModelFromCache(): Token size does not match!";

    }


    // Validate dataCacheHandle

    if (dataCacheHandle.size() != 1)

    {

        return NN_ERROR(ErrorStatus::GENERAL_FAILURE)

                            << "ArmnnDriverImpl::prepareModelFromCache(): Not valid data cache handle!";

    }


    if (!ValidateSharedHandle(dataCacheHandle[0]))

    {

        return NN_ERROR(ErrorStatus::GENERAL_FAILURE)

                << "ArmnnDriverImpl::prepareModelFromCache(): Not valid data cache handle!";

    }


    size_t cachedDataSize = 0;

    struct stat dataStatBuffer;

    if (fstat(*dataCacheHandle[0], &dataStatBuffer) == 0)

    {

        cachedDataSize = dataStatBuffer.st_size;

    }

    if (cachedDataSize == 0)

    {

        return NN_ERROR(ErrorStatus::GENERAL_FAILURE)

                << "ArmnnDriverImpl::prepareModelFromCache(): Not valid cached data!";

    }


    // Check if model files cached they match the expected value

    unsigned int numberOfCachedModelFiles = 0;

    for (auto& backend : options.GetBackends())

    {

        numberOfCachedModelFiles += GetNumberOfCacheFiles(backend);

    }

    if (modelCacheHandle.size() != numberOfCachedModelFiles)

    {

        return NN_ERROR(ErrorStatus::GENERAL_FAILURE)

                           << "ArmnnDriverImpl::prepareModelFromCache(): Model cache handle size does not match.";

    }


    // Read the hashValue

    std::vector<uint8_t> hashValue(sizeof(size_t));

    pread(*dataCacheHandle[0], hashValue.data(), hashValue.size(), 0);


    // Read the model

    if (cachedDataSize < hashValue.size())

    {

        return NN_ERROR(ErrorStatus::GENERAL_FAILURE)

                << "ArmnnDriverImpl::prepareModelFromCache(): cachedDataSize is less than hashValue!";

    }

    std::vector<uint8_t> dataCacheData(cachedDataSize - hashValue.size());

    pread(*dataCacheHandle[0], dataCacheData.data(), dataCacheData.size(), hashValue.size());

    auto calculatedHashValue = Hash(dataCacheData);


    int gpuAccCachedFd = -1;

    if (modelCacheHandle.size() > 0)

    {

        unsigned int index = 0;

        for (auto& backend : options.GetBackends())

        {

            // modelCacheHandle size should be equal to numberOfCachedModelFiles

            // modelCacheHandle vector should be in same order as backends

            auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);

            if (numberOfCacheFiles > 0)

            {

                if (!ValidateSharedHandle(modelCacheHandle[index]))

                {

                    return NN_ERROR(ErrorStatus::GENERAL_FAILURE)

                            << "ArmnnDriverImpl::prepareModelFromCache(): Invalid model cache handle!";

                }

                int cachedFd = *modelCacheHandle[index];

                struct stat statBuffer;

                if (fstat(cachedFd, &statBuffer) == 0)

                {

                    long modelDataSize = statBuffer.st_size;

                    if (modelDataSize > 0)

                    {

                        std::vector<uint8_t> modelData(modelDataSize);

                        pread(cachedFd, modelData.data(), modelData.size(), 0);

                        calculatedHashValue ^= Hash(modelData);


                        if (backend == armnn::Compute::GpuAcc)

                        {

                            gpuAccCachedFd = cachedFd;

                        }

                    }

                }

                index += numberOfCacheFiles;

            }

        }

    }


    std::vector<uint8_t> calculatedHashData(sizeof(calculatedHashValue));

    ::memcpy(calculatedHashData.data(), &calculatedHashValue, sizeof(calculatedHashValue));

    if (hashValue != calculatedHashData)

    {

        return NN_ERROR(ErrorStatus::GENERAL_FAILURE)

                << "ArmnnDriverImpl::prepareModelFromCache(): ValidateHash() failed!";

    }


    // Deserialize the network..

    armnn::INetworkPtr network = armnn::INetworkPtr(nullptr, [](armnn::INetwork*){});

    try

    {

        network = armnnDeserializer::IDeserializer::Create()->CreateNetworkFromBinary(dataCacheData);

    }

    catch (std::exception&)

    {

        return NN_ERROR(ErrorStatus::GENERAL_FAILURE)

                << "ArmnnDriverImpl::prepareModelFromCache(): Exception caught from Deserializer!";

    }


    // Optimize the network

    armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);

    armnn::OptimizerOptionsOpaque OptOptions;

    OptOptions.SetReduceFp32ToFp16(float32ToFloat16);

    OptOptions.SetProfilingEnabled(options.IsGpuProfilingEnabled());


    armnn::BackendOptions gpuAcc("GpuAcc",

    {

        { "FastMathEnabled", options.IsFastMathEnabled() },

        { "SaveCachedNetwork", false },

        { "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },

        { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() },

        { "CachedFileDescriptor", gpuAccCachedFd }

    });


    armnn::BackendOptions cpuAcc("CpuAcc",

    {

        { "FastMathEnabled", options.IsFastMathEnabled() },

        { "NumberOfThreads", options.GetNumberOfThreads() }

    });

    OptOptions.AddModelOption(gpuAcc);

    OptOptions.AddModelOption(cpuAcc);


    std::vector<std::string> errMessages;

    try

    {

        optNet = armnn::Optimize(*network.get(),

                                 options.GetBackends(),

                                 runtime->GetDeviceSpec(),

                                 OptOptions,

                                 errMessages);

    }

    catch (std::exception& e)

    {

        return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << e.what();

    }


    // Check that the optimized network is valid.

    if (!optNet)

    {

        std::stringstream message;

        message << "Invalid optimized network";

        for (const std::string& msg : errMessages)

        {

            message << "\n" << msg;

        }

        return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();

    }


    // Export the optimized network graph to a dot file if an output dump directory

    // has been specified in the drivers' arguments.

    std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,

                                                               options.GetRequestInputsAndOutputsDumpDir());


    // Load it into the runtime.

    armnn::NetworkId netId = 0;

    std::string msg;

    armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),

                                                MemorySource::Undefined,

                                                MemorySource::Undefined,

                                                options.IsGpuProfilingEnabled());

    try

    {

        if (runtime->LoadNetwork(netId, std::move(optNet), msg, networkProperties) != armnn::Status::Success)

        {

            return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be loaded";

        }

    }

    catch (std::exception& e)

    {

        std::stringstream message;

        message << "Exception (" << e.what()<< ") caught from LoadNetwork.";

        return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();

    }


    auto preparedModel = std::make_shared<const ArmnnPreparedModel>(netId,

                                                      runtime.get(),

                                                      options.GetRequestInputsAndOutputsDumpDir(),

                                                      options.IsGpuProfilingEnabled(),

                                                      Priority::MEDIUM,

                                                      true);

    return std::move(preparedModel);

}


const Capabilities& ArmnnDriverImpl::GetCapabilities(const armnn::IRuntimePtr& runtime)

{

    VLOG(DRIVER) << "ArmnnDriverImpl::GetCapabilities()";

    static const Capabilities theCapabilities = GenerateCapabilities();

    return theCapabilities;

}


} // namespace armnn_driver