ArmNN
 25.11
Loading...
Searching...
No Matches
ConvertConstDequantisationLayersToConstLayers.hpp
Go to the documentation of this file.
1//
2// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5#pragma once
6
7#include "Optimization.hpp"
8#include "NetworkUtils.hpp"
9
10#include <armnn/Logging.hpp>
12
13namespace armnn
14{
15namespace optimizations
16{
17
19{
20public:
21 void Run(Graph& graph, InputSlot& connection) const
22 {
23 Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer();
24 Layer& child = connection.GetOwningLayer();
25
28
29 ReplaceConstDequantisationLayer(graph,
32
33 }
34protected:
37private:
38
39 static void ReplaceConstDequantisationLayer(Graph&,
40 ConstantLayer* constantLayer,
41 DequantizeLayer* dequantizeLayer)
42 {
43 ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl::ReplaceConstDequantisationLayer()";
44 /**
45 * This optimisation is to find situations where a constant set of inputs is being provided to a Dequantization
46 * layer. In this case we don't want the overhead of Dequantizing the values on every inference, instead we
47 * want to Dequantize them once and store them in a Const layer to be used everytime as they will not change.
48 */
49 TensorInfo constantInfo = constantLayer->GetOutputSlot(0).GetTensorInfo();
50 TensorInfo inputDequantizeInfo = dequantizeLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
51 TensorInfo outputDequantizeInfo = dequantizeLayer->GetOutputSlot(0).GetTensorInfo();
52
53 bool requiresPermute = false;
54
55 auto connection = dequantizeLayer->GetOutputSlot(0).GetConnection(0);
56 if (connection)
57 {
58 if (connection->GetOwningLayer().GetType() == LayerType::Convolution2d)
59 {
60 /**
61 * ArmNN does not currently support non-fixed weights or bias
62 * The NNAPI filter is always OHWI [depth_out, filter_height, filter_width, depth_in]
63 * but ArmNN expects the filter's height and width indices to match the input's height
64 * and width indices so we permute it to OIHW if the DataLayout is NCHW
65 */
66 ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Connected to "
67 "Convolution layer.";
68 auto conv2dLayer = PolymorphicDowncast<Convolution2dLayer*>(&connection->GetOwningLayer());
69 if (conv2dLayer->GetParameters().m_DataLayout == DataLayout::NCHW)
70 {
71 ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Connected to "
72 "Convolution layer and requires permute on weights. ";
73 requiresPermute = true;
74 }
75 }
76 }
77
78 ARMNN_ASSERT(constantLayer->GetNumOutputSlots() == 1);
79 auto numConnections = constantLayer->GetOutputSlot(0).GetNumConnections();
80
81 ARMNN_LOG(info) << "constantInfo datatype:" << armnn::GetDataTypeName(constantInfo.GetDataType())
82 << "inputDequantizeInfo datatype:" << armnn::GetDataTypeName(inputDequantizeInfo.GetDataType())
83 << "outputDequantizeInfo datatype:" << armnn::GetDataTypeName(outputDequantizeInfo.GetDataType());
84
85 std::vector<float> newValues(outputDequantizeInfo.GetNumElements());
86 if (constantInfo.GetDataType() == DataType::Float16 &&
87 inputDequantizeInfo.GetDataType() == DataType::Float16 &&
88 outputDequantizeInfo.GetDataType() == DataType::Float32)
89 {
90 ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Converting FP16 -> FP32";
92 outputDequantizeInfo.GetNumElements(),
93 newValues.data());
94 }
95 else if (((constantInfo.GetDataType() == DataType::QAsymmS8
96 && inputDequantizeInfo.GetDataType() == DataType::QAsymmS8)
97 || (constantInfo.GetDataType() == DataType::QSymmS8
98 && inputDequantizeInfo.GetDataType() == DataType::QSymmS8)) &&
99 outputDequantizeInfo.GetDataType() == DataType::Float32)
100 {
101 ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Converting INT8 -> FP32";
102 ConvertInt8To32(constantLayer->m_LayerOutput->Map(true),
103 outputDequantizeInfo.GetNumElements(),
104 inputDequantizeInfo.GetQuantizationScale(),
105 inputDequantizeInfo.GetQuantizationOffset(),
106 newValues.data());
107 }
108
109 TensorInfo newInfo = outputDequantizeInfo;
110 newInfo.SetConstant(true);
111 if (requiresPermute)
112 {
113 ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Permuting the constant data.";
114 const PermutationVector OHWIToOIHW = {0, 2, 3, 1};
115 std::vector<float> permutedValues(outputDequantizeInfo.GetNumElements());
116 armnnUtils::Permute(outputDequantizeInfo.GetShape(), OHWIToOIHW,
117 newValues.data(), permutedValues.data(),
118 GetDataTypeSize(outputDequantizeInfo.GetDataType()));
119 ConstTensor newInput(newInfo, permutedValues);
120 constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
121 }
122 else
123 {
124 ConstTensor newInput(newInfo, newValues);
125 constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
126 }
127
128 // Moves connections in dequantize output to the constant layer.
129 // Dequantize layer will be removed if left unconnected.
130 dequantizeLayer->GetOutputSlot().MoveAllConnections(constantLayer->GetOutputSlot());
131
132 // Updating the output tensor
133 constantLayer->GetOutputSlot(0).SetTensorInfo(newInfo);
134 ARMNN_ASSERT(constantLayer->GetOutputSlot(0).GetTensorInfo().IsConstant() == true);
135
136 // Set isConstant to true in all input tensor infos where constantLayer is now connected to
137 for (unsigned int i = numConnections; i < constantLayer->GetOutputSlot(0).GetNumConnections(); ++i)
138 {
139 auto info = constantLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer().GetInputSlot(0)
141 info.SetConstant();
142 constantLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer().GetInputSlot(0)
144 }
145 }
146
147
148static void ConvertInt8To32(const void* srcInt8Buffer,
149 size_t numElements,
150 const float scale,
151 const int32_t offset,
152 float* dstFloat32Buffer)
153{
154 ARMNN_ASSERT(srcInt8Buffer != nullptr);
155 ARMNN_ASSERT(dstFloat32Buffer != nullptr);
156
157 ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: scale: " << scale;
158 ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: offset: " << offset;
159
160 const auto* pInt8 = static_cast<const int8_t*>(srcInt8Buffer);
161
162 for (size_t i = 0; i < numElements; ++i)
163 {
164 dstFloat32Buffer[i] = static_cast<float>(pInt8[i] - offset) * scale;
165 }
166}
167
168};
169
174
175} // namespace optimizations
176} // namespace armnn
#define ARMNN_ASSERT(COND)
Definition Assert.hpp:14
#define ARMNN_LOG(severity)
Definition Logging.hpp:212
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition Tensor.hpp:330
A layer that the constant data can be bound to.
std::shared_ptr< ConstTensorHandle > m_LayerOutput
This layer dequantizes the input tensor.
Layer & GetOwningLayer() const
Definition Layer.hpp:53
const OutputSlot * GetConnectedOutputSlot() const
Definition Layer.hpp:56
unsigned int GetNumOutputSlots() const override
Returns the number of connectable output slots.
Definition Layer.hpp:335
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition Layer.hpp:337
const OutputSlot & GetOutputSlot(unsigned int index=0) const override
Get the const output slot handle by slot index.
Definition Layer.hpp:339
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition Layer.hpp:286
const InputSlot * GetConnection(unsigned int index) const override
Definition Layer.cpp:83
void MoveAllConnections(OutputSlot &destination)
Moves all connections to another OutputSlot.
Definition Layer.cpp:156
unsigned int GetNumConnections() const override
Definition Layer.hpp:158
void SetTensorInfo(const TensorInfo &tensorInfo) override
Definition Layer.cpp:95
Layer & GetOwningLayer() const
Definition Layer.hpp:132
const TensorInfo & GetTensorInfo() const override
Definition Layer.cpp:100
float GetQuantizationScale() const
Definition Tensor.cpp:461
const TensorShape & GetShape() const
Definition Tensor.hpp:193
int32_t GetQuantizationOffset() const
Definition Tensor.cpp:482
unsigned int GetNumElements() const
Definition Tensor.hpp:198
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
Definition Tensor.cpp:518
bool IsConstant() const
Definition Tensor.cpp:513
DataType GetDataType() const
Definition Tensor.hpp:200
static void ConvertFloat16To32(const void *srcFloat16Buffer, size_t numElements, float *dstFloat32Buffer)
OptimizeForConnection< ConstantLayer, DequantizeLayer, ConvertConstDequantisationLayersToConstLayersImpl > ConvertConstDequantisationLayersToConstLayers
Copyright (c) 2021 ARM Limited and Contributors.
constexpr const char * GetDataTypeName(DataType dataType)
constexpr unsigned int GetDataTypeSize(DataType dataType)
DestType PolymorphicDowncast(SourceType *value)
Polymorphic downcast for build in pointers only.