ArmNN
 24.02
ConvertConstDequantisationLayersToConstLayers.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #pragma once
6 
7 #include "Optimization.hpp"
8 #include "NetworkUtils.hpp"
9 
10 #include <armnn/Logging.hpp>
11 #include <armnnUtils/Permute.hpp>
12 
13 namespace armnn
14 {
15 namespace optimizations
16 {
17 
19 {
20 public:
21  void Run(Graph& graph, InputSlot& connection) const
22  {
23  Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer();
24  Layer& child = connection.GetOwningLayer();
25 
28 
29  ReplaceConstDequantisationLayer(graph,
30  PolymorphicDowncast<ConstantLayer*>(&base),
31  PolymorphicDowncast<DequantizeLayer*>(&child));
32 
33  }
34 protected:
37 private:
38 
39  static void ReplaceConstDequantisationLayer(Graph&,
40  ConstantLayer* constantLayer,
41  DequantizeLayer* dequantizeLayer)
42  {
43  ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl::ReplaceConstDequantisationLayer()";
44  /**
45  * This optimisation is to find situations where a constant set of inputs is being provided to a Dequantization
46  * layer. In this case we don't want the overhead of Dequantizing the values on every inference, instead we
47  * want to Dequantize them once and store them in a Const layer to be used everytime as they will not change.
48  */
49  TensorInfo constantInfo = constantLayer->GetOutputSlot(0).GetTensorInfo();
50  TensorInfo inputDequantizeInfo = dequantizeLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
51  TensorInfo outputDequantizeInfo = dequantizeLayer->GetOutputSlot(0).GetTensorInfo();
52 
53  bool requiresPermute = false;
54 
55  auto connection = dequantizeLayer->GetOutputSlot(0).GetConnection(0);
56  if (connection)
57  {
58  if (connection->GetOwningLayer().GetType() == LayerType::Convolution2d)
59  {
60  /**
61  * ArmNN does not currently support non-fixed weights or bias
62  * The NNAPI filter is always OHWI [depth_out, filter_height, filter_width, depth_in]
63  * but ArmNN expects the filter's height and width indices to match the input's height
64  * and width indices so we permute it to OIHW if the DataLayout is NCHW
65  */
66  ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Connected to "
67  "Convolution layer.";
68  auto conv2dLayer = PolymorphicDowncast<Convolution2dLayer*>(&connection->GetOwningLayer());
69  if (conv2dLayer->GetParameters().m_DataLayout == DataLayout::NCHW)
70  {
71  ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Connected to "
72  "Convolution layer and requires permute on weights. ";
73  requiresPermute = true;
74  }
75  }
76  }
77 
78  ARMNN_ASSERT(constantLayer->GetNumOutputSlots() == 1);
79  auto numConnections = constantLayer->GetOutputSlot(0).GetNumConnections();
80 
81  ARMNN_LOG(info) << "constantInfo datatype:" << armnn::GetDataTypeName(constantInfo.GetDataType())
82  << "inputDequantizeInfo datatype:" << armnn::GetDataTypeName(inputDequantizeInfo.GetDataType())
83  << "outputDequantizeInfo datatype:" << armnn::GetDataTypeName(outputDequantizeInfo.GetDataType());
84 
85  std::vector<float> newValues(outputDequantizeInfo.GetNumElements());
86  if (constantInfo.GetDataType() == DataType::Float16 &&
87  inputDequantizeInfo.GetDataType() == DataType::Float16 &&
88  outputDequantizeInfo.GetDataType() == DataType::Float32)
89  {
90  ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Converting FP16 -> FP32";
92  outputDequantizeInfo.GetNumElements(),
93  newValues.data());
94  }
95  else if (((constantInfo.GetDataType() == DataType::QAsymmS8
96  && inputDequantizeInfo.GetDataType() == DataType::QAsymmS8)
97  || (constantInfo.GetDataType() == DataType::QSymmS8
98  && inputDequantizeInfo.GetDataType() == DataType::QSymmS8)) &&
99  outputDequantizeInfo.GetDataType() == DataType::Float32)
100  {
101  ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Converting INT8 -> FP32";
102  ConvertInt8To32(constantLayer->m_LayerOutput->Map(true),
103  outputDequantizeInfo.GetNumElements(),
104  inputDequantizeInfo.GetQuantizationScale(),
105  inputDequantizeInfo.GetQuantizationOffset(),
106  newValues.data());
107  }
108 
109  TensorInfo newInfo = outputDequantizeInfo;
110  newInfo.SetConstant(true);
111  if (requiresPermute)
112  {
113  ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Permuting the constant data.";
114  const PermutationVector OHWIToOIHW = {0, 2, 3, 1};
115  std::vector<float> permutedValues(outputDequantizeInfo.GetNumElements());
116  armnnUtils::Permute(outputDequantizeInfo.GetShape(), OHWIToOIHW,
117  newValues.data(), permutedValues.data(),
118  GetDataTypeSize(outputDequantizeInfo.GetDataType()));
119  ConstTensor newInput(newInfo, permutedValues);
120  constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
121  }
122  else
123  {
124  ConstTensor newInput(newInfo, newValues);
125  constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
126  }
127 
128  // Moves connections in dequantize output to the constant layer.
129  // Dequantize layer will be removed if left unconnected.
130  dequantizeLayer->GetOutputSlot().MoveAllConnections(constantLayer->GetOutputSlot());
131 
132  // Updating the output tensor
133  constantLayer->GetOutputSlot(0).SetTensorInfo(newInfo);
134  ARMNN_ASSERT(constantLayer->GetOutputSlot(0).GetTensorInfo().IsConstant() == true);
135 
136  // Set isConstant to true in all input tensor infos where constantLayer is now connected to
137  for (unsigned int i = numConnections; i < constantLayer->GetOutputSlot(0).GetNumConnections(); ++i)
138  {
139  auto info = constantLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer().GetInputSlot(0)
141  info.SetConstant();
142  constantLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer().GetInputSlot(0)
144  }
145  }
146 
147 
148 static void ConvertInt8To32(const void* srcInt8Buffer,
149  size_t numElements,
150  const float scale,
151  const int32_t offset,
152  float* dstFloat32Buffer)
153 {
154  ARMNN_ASSERT(srcInt8Buffer != nullptr);
155  ARMNN_ASSERT(dstFloat32Buffer != nullptr);
156 
157  ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: scale: " << scale;
158  ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: offset: " << offset;
159 
160  const auto* pInt8 = static_cast<const int8_t*>(srcInt8Buffer);
161 
162  for (size_t i = 0; i < numElements; ++i)
163  {
164  dstFloat32Buffer[i] = static_cast<float>(pInt8[i] - offset) * scale;
165  }
166 }
167 
168 };
169 
171  = OptimizeForConnection<ConstantLayer,
172  DequantizeLayer,
174 
175 } // namespace optimizations
176 } // namespace armnn
ARMNN_ASSERT
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
armnn::TensorInfo::GetNumElements
unsigned int GetNumElements() const
Definition: Tensor.hpp:198
armnn::InputSlot::GetOwningLayer
Layer & GetOwningLayer() const
Definition: Layer.hpp:53
armnn::OutputSlot::GetTensorInfo
const TensorInfo & GetTensorInfo() const override
Definition: Layer.cpp:92
armnn::optimizations::ConvertConstDequantisationLayersToConstLayersImpl::ConvertConstDequantisationLayersToConstLayersImpl
ConvertConstDequantisationLayersToConstLayersImpl()=default
armnn::OutputSlot::SetTensorInfo
void SetTensorInfo(const TensorInfo &tensorInfo) override
Definition: Layer.cpp:87
armnn::TensorInfo::GetQuantizationScale
float GetQuantizationScale() const
Definition: Tensor.cpp:461
armnn::TensorInfo
Definition: Tensor.hpp:152
armnn::GetDataTypeName
constexpr const char * GetDataTypeName(DataType dataType)
Definition: TypesUtils.hpp:233
armnn::optimizations::ConvertConstDequantisationLayersToConstLayers
OptimizeForConnection< ConstantLayer, DequantizeLayer, ConvertConstDequantisationLayersToConstLayersImpl > ConvertConstDequantisationLayersToConstLayers
Definition: ConvertConstDequantisationLayersToConstLayers.hpp:173
armnn::DataType::Float32
@ Float32
armnn::Layer::GetOutputSlot
const OutputSlot & GetOutputSlot(unsigned int index=0) const override
Get the const output slot handle by slot index.
Definition: Layer.hpp:339
NetworkUtils.hpp
armnn::DataType::QSymmS8
@ QSymmS8
armnnUtils::Permute
void Permute(const armnn::TensorShape &dstShape, const armnn::PermutationVector &mappings, const void *src, void *dst, size_t dataTypeSize)
Definition: Permute.cpp:164
Optimization.hpp
armnn::TensorInfo::IsConstant
bool IsConstant() const
Definition: Tensor.cpp:509
armnn::Layer::GetInputSlot
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition: Layer.hpp:337
armnn::Layer
Definition: Layer.hpp:230
ARMNN_LOG
#define ARMNN_LOG(severity)
Definition: Logging.hpp:212
armnn::OutputSlot::GetOwningLayer
Layer & GetOwningLayer() const
Definition: Layer.hpp:132
armnn::DataType::Float16
@ Float16
Logging.hpp
armnn::OutputSlot::GetNumConnections
unsigned int GetNumConnections() const override
Definition: Layer.hpp:158
armnn::optimizations::ConvertConstDequantisationLayersToConstLayersImpl
Definition: ConvertConstDequantisationLayersToConstLayers.hpp:18
armnnUtils::FloatingPointConverter::ConvertFloat16To32
static void ConvertFloat16To32(const void *srcFloat16Buffer, size_t numElements, float *dstFloat32Buffer)
Definition: FloatingPointConverter.cpp:43
armnn::optimizations::ConvertConstDequantisationLayersToConstLayersImpl::~ConvertConstDequantisationLayersToConstLayersImpl
~ConvertConstDequantisationLayersToConstLayersImpl()=default
armnn::Layer::GetNumOutputSlots
unsigned int GetNumOutputSlots() const override
Returns the number of connectable output slots.
Definition: Layer.hpp:335
armnn::DequantizeLayer
This layer dequantizes the input tensor.
Definition: DequantizeLayer.hpp:13
armnn::GetDataTypeSize
constexpr unsigned int GetDataTypeSize(DataType dataType)
Definition: TypesUtils.hpp:182
Permute.hpp
armnn::BoostLogSeverityMapping::info
@ info
armnn::TensorInfo::GetDataType
DataType GetDataType() const
Definition: Tensor.hpp:200
armnn::OutputSlot::MoveAllConnections
void MoveAllConnections(OutputSlot &destination)
Moves all connections to another OutputSlot.
Definition: Layer.cpp:145
armnn::InputSlot
Definition: Layer.hpp:42
armnn::DataType::QAsymmS8
@ QAsymmS8
armnn::Layer::GetType
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:286
armnn::TensorInfo::GetShape
const TensorShape & GetShape() const
Definition: Tensor.hpp:193
armnn::ConstantLayer::m_LayerOutput
std::shared_ptr< ConstTensorHandle > m_LayerOutput
Definition: ConstantLayer.hpp:46
armnn::InputSlot::GetConnectedOutputSlot
const OutputSlot * GetConnectedOutputSlot() const
Definition: Layer.hpp:56
armnn::optimizations::ConvertConstDequantisationLayersToConstLayersImpl::Run
void Run(Graph &graph, InputSlot &connection) const
Definition: ConvertConstDequantisationLayersToConstLayers.hpp:21
armnn::ConstantLayer
A layer that the constant data can be bound to.
Definition: ConstantLayer.hpp:15
armnn
Copyright (c) 2021 ARM Limited and Contributors.
Definition: 01_00_quick_start.dox:6
armnn::TensorInfo::SetConstant
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
Definition: Tensor.cpp:514
armnn::LayerType::Convolution2d
@ Convolution2d
armnn::TensorInfo::GetQuantizationOffset
int32_t GetQuantizationOffset() const
Definition: Tensor.cpp:478
armnn::LayerType::Dequantize
@ Dequantize
armnn::OutputSlot::GetConnection
const InputSlot * GetConnection(unsigned int index) const override
Definition: Layer.cpp:75
armnn::Graph
Definition: Graph.hpp:30
armnn::LayerType::Constant
@ Constant
armnn::DataLayout::NCHW
@ NCHW