armnn/latest/_quantize_operator_8cpp_source.html

//

// Copyright © 2023-2024 Arm Ltd and Contributors. All rights reserved.

// SPDX-License-Identifier: MIT

//

// Copyright © 2020 The TensorFlow Authors. All Rights Reserved.

// SPDX-License-Identifier: Apache-2.0

//


#include "QuantizeOperator.hpp"


#include "TosaRescaleOperatorUtils.hpp"


#include <fmt/format.h>


// This function is paraphrased from:

// tensorflow/compiler/mlir/tosa/transforms/legalize_common.cc from function convertQuantizeOp


TosaSerializationBasicBlock* ConvertQuantizeToTosaOperator(const Layer* layer,

                                                           const std::vector<const TensorInfo*>& inputs,

                                                           const std::vector<const TensorInfo*>& outputs)

{

    bool tosaRefBackend {false};


    ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( inputs.size() == 1,

                                         "ConvertQuantizeToTosaOperator: Quantize must have only one input" );

    ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( outputs.size() == 1,

                                         "ConvertQuantizeToTosaOperator: Quantize must have only one output" );


    std::string inputName           = std::string("input_");

    std::string outputName          = std::string("output0_");

    std::string blockName           = std::string("Op_QUANTIZE_block_") + GetUniqueTosaMappingID();


    // If a layer is present then the block will be used for execution, so input and output names need to be determined

    // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter.

    if(layer != nullptr)

    {

        inputName  = GenerateUniqueInputName(layer->GetInputSlot(0));

        outputName = GenerateUniqueOutputName(*layer);


        tosaRefBackend = (layer->GetBackendId().Get().find("TosaRef") != std::string::npos);

    }


    const TensorInfo inputInfo = *inputs[0];

    const TensorInfo outputInfo = *outputs[0];


    // Extract quantization detail from Tensor

    float zeroPoint = static_cast<float>(outputInfo.GetQuantizationOffset());

    // No per axis support in Tensorflow TOSA code

    float scale = outputInfo.GetQuantizationScale();


    // As per the Tensorflow quantization specification

    // Tensorflow TOSA code calculates quantization using multiplication by scale

    // Armnn code calculates quantization using division by scale

    // Invert scale factor passed from Armnn for tf TOSA code

    scale = (scale != 0) ?  (1 / scale) : scale;


    std::vector<TosaSerializationTensor*> tensors;


    std::vector<int32_t> inputShape0 = GetTosaTensorShape(inputInfo.GetShape());

    DType inputDType0 = ArmNNToDType(inputInfo.GetDataType());

    bool isFloatInput = inputDType0 == DType::DType_FP16 || inputDType0 == DType::DType_FP32;


    // Only add input tensors if connected layer is an input layer.

    // As intermediate or constant tensors will be created separately.

    // There also can't be duplicate tensor.

    if(inputName.find("input_") != std::string::npos)

    {

        DType tmp = inputDType0;


        if (IsUnsignedDataType(tmp) && !tosaRefBackend)

        {

            //TOSA rescale only supports signed types. Need to override type

            //when using unsigned attribute

            FlipSignage(tmp);

        }

        tensors.push_back(new TosaSerializationTensor(inputName, inputShape0, tmp, {}));

    }

    else

    {

        if (IsUnsignedDataType(inputDType0))

        {

            // Can't modify the type of a previously created TosaSerializationTensor

            throw armnn::Exception(fmt::format("ConvertQuantizeToTosaOperator: {} intermediate input"

                                               " layer not supported.",EnumNamesDType()[inputDType0]));

        }

    }


    std::vector<int32_t> outputShape0 = GetTosaTensorShape(outputInfo.GetShape());

    DType outputDType0 = ArmNNToDType(outputInfo.GetDataType());


    if (isFloatInput)

    {

        // quantize:

        // const_zeroPoint = constant(zeroPoint)

        // const_scale = constant(scale)

        // out_mul = mul(input, const_scale)

        // out_add = add(out_mul, const_zeroPoint)

        // output = cast<output_type>(out_add)


        std::string outputNameScale     = std::string("constant0") + GetUniqueTosaMappingID();

        std::string outputNameZeroPoint = std::string("constant1") + GetUniqueTosaMappingID();

        std::string outputNameMul       = std::string("layer_intermediate0_") + GetUniqueTosaMappingID();

        std::string outputNameAdd       = std::string("layer_intermediate1_") + GetUniqueTosaMappingID();


        // const_zeroPoint

        TosaSerializationOperator* zeroPointOp = nullptr;

        TosaSerializationTensor* zeroPointTensor = nullptr;

        CreateConstTosaOperator<float>(outputNameZeroPoint,

                                       zeroPoint,

                                       inputDType0,

                                       inputShape0,

                                       zeroPointOp,

                                       zeroPointTensor);

        tensors.push_back(zeroPointTensor);


        // const_scale

        TosaSerializationOperator* scaleOp = nullptr;

        TosaSerializationTensor* scaleTensor = nullptr;

        CreateConstTosaOperator<float>(outputNameScale,

                                       scale,

                                       inputDType0,

                                       inputShape0,

                                       scaleOp,

                                       scaleTensor);

        tensors.push_back(scaleTensor);


        // mul

        int32_t shift = 0;

        TosaMulAttribute mulAttribute(shift);

        TosaSerializationOperator* mulOp = new TosaSerializationOperator(Op_MUL,

                                                                         Attribute_MulAttribute,

                                                                         &mulAttribute,

                                                                         {inputName, outputNameScale},

                                                                         {outputNameMul});

        tensors.push_back(new TosaSerializationTensor(outputNameMul, inputShape0, inputDType0, {}));


        // add

        TosaSerializationOperator* addOp = new TosaSerializationOperator(Op_ADD,

                                                                         Attribute_NONE,

                                                                         nullptr,

                                                                         {outputNameMul, outputNameZeroPoint},

                                                                         {outputNameAdd});

        tensors.push_back(new TosaSerializationTensor(outputNameAdd, inputShape0, inputDType0, {}));


        // cast

        TosaSerializationOperator* castOp = new TosaSerializationOperator(Op_CAST,

                                                                          Attribute_NONE,

                                                                          nullptr,

                                                                          {outputNameAdd},

                                                                          {outputName});


        tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {}));


        // operatorInputNames/operatorOutputNames ends up being the same as

        // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings

        return new TosaSerializationBasicBlock(blockName,                                       // name

                                               mainName,                                        // region name

                                               {zeroPointOp, scaleOp, mulOp, addOp, castOp},    // operators

                                               tensors,                                         // tensors

                                               {inputName},                                     // inputs

                                               {outputName});                                   // outputs

    }

    else

    {

        double scale_alpha = inputs[0]->GetQuantizationScale() / outputs[0]->GetQuantizationScale();

        int32_t input_zp   = inputs[0]->GetQuantizationOffset();

        int32_t output_zp  = outputs[0]->GetQuantizationOffset();


        TosaSerializationOperator* rescaleOp = nullptr;

        CreateRescaleTosaOperator(inputName,

                                  outputName,

                                  scale_alpha,

                                  input_zp,

                                  output_zp,

                                  IsUnsignedDataType(inputDType0),

                                  IsUnsignedDataType(outputDType0),

                                  true,

                                  true,

                                  &rescaleOp);


        if (IsUnsignedDataType(outputDType0) && !tosaRefBackend)

        {

            // TOSA rescale only supports signed types. Need to override type

            // when using unsigned attribute

            FlipSignage(outputDType0);

        }


        tensors.push_back(new TosaSerializationTensor(outputName,

                                                      inputShape0,

                                                      outputDType0, {}));


        // operatorInputNames/operatorOutputNames ends up being the same as

        // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings

        return new TosaSerializationBasicBlock(blockName,      // name

                                               mainName,       // region name

                                               {rescaleOp},    // operators

                                               tensors,        // tensors

                                               {inputName},    // inputs

                                               {outputName});  // outputs

    }

}


ARMNN_THROW_INVALIDARG_MSG_IF_FALSE
#define ARMNN_THROW_INVALIDARG_MSG_IF_FALSE(_cond, _str)
Definition Exceptions.hpp:210

ConvertQuantizeToTosaOperator
TosaSerializationBasicBlock * ConvertQuantizeToTosaOperator(const Layer *layer, const std::vector< const TensorInfo * > &inputs, const std::vector< const TensorInfo * > &outputs)
Definition QuantizeOperator.cpp:17

QuantizeOperator.hpp

GenerateUniqueOutputName
std::string GenerateUniqueOutputName(const Layer &layer, uint32_t layerSlot=0)
Definition TosaOperatorUtils.hpp:137

mainName
const std::string mainName
Definition TosaOperatorUtils.hpp:23

IsUnsignedDataType
bool IsUnsignedDataType(DType type)
Definition TosaOperatorUtils.hpp:528

ArmNNToDType
DType ArmNNToDType(const DataType &type)
Definition TosaOperatorUtils.hpp:26

GenerateUniqueInputName
std::string GenerateUniqueInputName(const armnn::InputSlot &slot)
Definition TosaOperatorUtils.hpp:113

GetUniqueTosaMappingID
std::string GetUniqueTosaMappingID()
Definition TosaOperatorUtils.hpp:155

FlipSignage
void FlipSignage(DType &type)
Definition TosaOperatorUtils.hpp:544

GetTosaTensorShape
std::vector< int32_t > GetTosaTensorShape(const TensorShape &shape)
Definition TosaOperatorUtils.hpp:83

CreateConstTosaOperator
void CreateConstTosaOperator(const std::string &outputName, const T value, DType dtype, const std::vector< int32_t > &shape, TosaSerializationOperator *&op, TosaSerializationTensor *&tensor)
Definition TosaOperatorUtils.hpp:507

TosaRescaleOperatorUtils.hpp

CreateRescaleTosaOperator
void CreateRescaleTosaOperator(const std::string &inputName, const std::string &outputName, double scale, int32_t input_zp, int32_t output_zp, bool input_unsigned, bool output_unsigned, bool double_round, bool scale32, TosaSerializationOperator **op)
Creates a Tosa rescale operator.
Definition TosaRescaleOperatorUtils.hpp:197

armnn::Exception
Base class for all ArmNN exceptions so that users can filter to just those.
Definition Exceptions.hpp:47

armnn::Layer
Definition Layer.hpp:231

armnn::Layer::GetInputSlot
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition Layer.hpp:337

armnn::Layer::GetBackendId
const BackendId & GetBackendId() const
Definition Layer.hpp:290

armnn::TensorInfo
Definition Tensor.hpp:153

armnn::TensorInfo::GetQuantizationScale
float GetQuantizationScale() const
Definition Tensor.cpp:461

armnn::TensorInfo::GetShape
const TensorShape & GetShape() const
Definition Tensor.hpp:193

armnn::TensorInfo::GetQuantizationOffset
int32_t GetQuantizationOffset() const
Definition Tensor.cpp:482

armnn::TensorInfo::GetDataType
DataType GetDataType() const
Definition Tensor.hpp:200