ArmNN
 24.08
ArmComputeTensorUtils.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017-2024 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #pragma once
6 
7 #include <armnn/Tensor.hpp>
9 
11 
12 #include <arm_compute/core/ITensor.h>
13 #include <arm_compute/core/TensorInfo.h>
14 #include <arm_compute/core/Types.h>
15 #include <arm_compute/function_info/ScatterInfo.h>
16 
17 #include <Half.hpp>
18 
19 namespace armnn
20 {
21 class ITensorHandle;
22 
23 namespace armcomputetensorutils
24 {
25 
26 /// Utility function to map an armnn::DataType to corresponding arm_compute::DataType.
27 arm_compute::DataType GetArmComputeDataType(armnn::DataType dataType, bool multiScales);
28 
29 /// Utility function to map an arm_compute::DataType to corresponding armnn::DataType.
30 armnn::DataType GetArmNNDataType(arm_compute::DataType datatype);
31 
32 /// Utility function used to set up an arm_compute::Coordinates from a vector of ArmNN Axes for reduction functions
33 arm_compute::Coordinates BuildArmComputeReductionCoordinates(size_t inputDimensions,
34  unsigned int originalInputRank,
35  const std::vector<unsigned int>& armnnAxes);
36 
37 /// Utility function used to setup an arm_compute::TensorShape object from an armnn::TensorShape.
38 arm_compute::TensorShape BuildArmComputeTensorShape(const armnn::TensorShape& tensorShape);
39 
40 /// Utility function used to setup an arm_compute::TensorShape object from an armnn::TensorShape. This will
41 /// attempt to reduce the number of leading 1s until the dimension length is equal to the dimensions passed in.
42 arm_compute::TensorShape BuildArmComputeTensorShape(const armnn::TensorShape& tensorShape, unsigned int dimensions);
43 
44 /// Utility function used to setup an arm_compute::ITensorInfo object whose dimensions are based on the given
45 /// armnn::ITensorInfo.
46 arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo);
47 
48 /// Utility function used to setup an arm_compute::ITensorInfo object whose dimensions are based on the given
49 /// armnn::ITensorInfo. This will attempt to reduce the number of leading 1s until the dimension length is equal
50 /// to the dimensions passed in.
51 arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo, unsigned int dimensions);
52 
53 /// Utility function used to setup an arm_compute::ITensorInfo object whose dimensions are based on the given
54 /// armnn::ITensorInfo. This will attempt to reduce the number of leading 1s until the dimension length is equal
55 /// to the dimensions passed in.
56 arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo,
57  armnn::DataLayout dataLayout,
58  unsigned int dimensions);
59 
60 /// Utility function used to setup an arm_compute::ITensorInfo object whose dimensions are based on the given
61 /// armnn::ITensorInfo.
62 /// armnn::DataLayout.
63 arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo,
64  armnn::DataLayout dataLayout);
65 
66 /// Utility function used to setup an arm_compute::ITensorInfo object whose dimensions are based on the given
67 /// armnn::ITensorInfo. This will attempt to reduce the number of leading 1s until the dimension length is equal
68 /// to the dimensions passed in.
69 arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo,
70  armnn::DataLayout dataLayout, unsigned int dimensions);
71 
72 /// Utility function used to convert armnn::DataLayout to arm_compute::DataLayout
73 /// armnn::DataLayout.
74 arm_compute::DataLayout ConvertDataLayout(armnn::DataLayout dataLayout);
75 
76 /// Utility function used to setup an arm_compute::PoolingLayerInfo object from given
77 /// armnn::Pooling2dDescriptor
78 /// bool fpMixedPrecision
79 arm_compute::PoolingLayerInfo BuildArmComputePoolingLayerInfo(const Pooling2dDescriptor& descriptor,
80  bool fpMixedPrecision = false);
81 
82 /// Utility function used to setup an arm_compute::Pooling3dLayerInfo object from given
83 /// armnn::Pooling3dDescriptor
84 /// bool fpMixedPrecision
85 arm_compute::Pooling3dLayerInfo BuildArmComputePooling3dLayerInfo(const Pooling3dDescriptor& descriptor,
86  bool fpMixedPrecision = false);
87 
88 /// Utility function to setup an arm_compute::NormalizationLayerInfo object from an armnn::NormalizationDescriptor.
89 arm_compute::NormalizationLayerInfo BuildArmComputeNormalizationLayerInfo(const NormalizationDescriptor& desc);
90 
91 /// Utility function used to setup an arm_compute::PermutationVector object from an armnn::PermutationVector.
92 /// \param perm PermutationVector used in Arm NN Permute layer
93 /// \return PermutationVector used in ACL Transpose layer
94 arm_compute::PermutationVector BuildArmComputePermutationVector(const armnn::PermutationVector& perm);
95 
96 /// Utility function used to setup an arm_compute::PermutationVector object from an armnn::PermutationVector.
97 /// \param perm PermutationVector used in Arm NN Transpose layer
98 /// \return PermutationVector used in ACL Transpose layer
99 arm_compute::PermutationVector BuildArmComputeTransposeVector(const armnn::PermutationVector& perm);
100 
101 /// Utility function used to setup an arm_compute::Size2D object from width and height values.
102 arm_compute::Size2D BuildArmComputeSize2D(const unsigned int width, const unsigned int height);
103 
104 /// Gets the appropriate PixelValue for the TensorInfo DataType
105 arm_compute::PixelValue GetPixelValue(const arm_compute::ITensorInfo* tensorInfo, float value);
106 
107 /// Computes the depth multiplier parameter for the Depthwise Conv2d ACL workload.
108 unsigned int ComputeDepthwiseConv2dDepthMultiplier(armnn::DataLayout layout,
109  const arm_compute::TensorShape& weightsShape,
110  const arm_compute::TensorShape& inputShape);
111 
112 /// Utility function used to setup an arm_compute::ScatterInfo from ArmNN ScatterNd descriptor
113 arm_compute::ScatterInfo BuildArmComputeScatterInfo(const ScatterNdDescriptor& descriptor);
114 
115 /// Utility function used to setup an arm_compute::PadStrideInfo object from an ArmNN layer descriptor.
116 template <typename Descriptor>
117 arm_compute::PadStrideInfo BuildArmComputePadStrideInfo(const Descriptor& descriptor)
118 {
119  return arm_compute::PadStrideInfo(descriptor.m_StrideX,
120  descriptor.m_StrideY,
121  descriptor.m_PadLeft,
122  descriptor.m_PadRight,
123  descriptor.m_PadTop,
124  descriptor.m_PadBottom,
125  arm_compute::DimensionRoundingType::FLOOR);
126 }
127 
128 /// Utility function used to setup an arm_compute::Padding2D object from an armnn layer descriptor.
129 template <typename Descriptor>
130 arm_compute::Padding2D BuildArmComputePaddingInfo(const Descriptor &descriptor)
131 {
132  return arm_compute::Padding2D(descriptor.m_PadLeft,
133  descriptor.m_PadRight,
134  descriptor.m_PadTop,
135  descriptor.m_PadBottom);
136 }
137 
138 /// Utility function used to setup an arm_compute::CropInfo object from an ArmNN layer descriptor.
139 template <typename Descriptor>
140 arm_compute::CropInfo BuildArmComputeCropInfo(const Descriptor& descriptor, const unsigned int rank = 4)
141 {
142  if (rank == 3)
143  {
144  return arm_compute::CropInfo(0, 0,
145  descriptor.m_Crops[0].first, descriptor.m_Crops[0].second);
146  }
147  else if (rank == 4)
148  {
149  return arm_compute::CropInfo(descriptor.m_Crops[1].first, descriptor.m_Crops[1].second,
150  descriptor.m_Crops[0].first, descriptor.m_Crops[0].second);
151  }
152  else
153  {
154  throw InvalidArgumentException("Tensor rank must be either 3 or 4", CHECK_LOCATION());
155  }
156 }
157 
158 /// Sets up the given ArmCompute tensor's dimensions based on the given ArmNN tensor.
159 template <typename Tensor>
160 void BuildArmComputeTensor(Tensor& tensor, const armnn::TensorInfo& tensorInfo)
161 {
162  tensor.allocator()->init(BuildArmComputeTensorInfo(tensorInfo));
163 }
164 
165 /// Sets up the given ArmCompute tensor's dimensions based on the given ArmNN tensor.
166 template <typename Tensor>
167 void BuildArmComputeTensor(Tensor& tensor, const armnn::TensorInfo& tensorInfo, DataLayout dataLayout)
168 {
169  tensor.allocator()->init(BuildArmComputeTensorInfo(tensorInfo, dataLayout));
170 }
171 
172 template <typename Tensor>
173 void InitialiseArmComputeTensorEmpty(Tensor& tensor)
174 {
175  tensor.allocator()->allocate();
176 }
177 
178 /// Utility function to free unused tensors after a workload is configured and prepared
179 template <typename Tensor>
180 void FreeTensorIfUnused(std::unique_ptr<Tensor>& tensor)
181 {
182  if (tensor && !tensor->is_used())
183  {
184  tensor.reset(nullptr);
185  }
186 }
187 
188 // Helper function to obtain byte offset into tensor data
189 inline size_t GetTensorOffset(const arm_compute::ITensorInfo& info,
190  uint32_t depthIndex,
191  uint32_t batchIndex,
192  uint32_t channelIndex,
193  uint32_t y,
194  uint32_t x)
195 {
197  coords.set(4, static_cast<int>(depthIndex));
198  coords.set(3, static_cast<int>(batchIndex));
199  coords.set(2, static_cast<int>(channelIndex));
200  coords.set(1, static_cast<int>(y));
201  coords.set(0, static_cast<int>(x));
202  return armnn::numeric_cast<size_t>(info.offset_element_in_bytes(coords));
203 }
204 
205 // Helper function to obtain element offset into data buffer representing tensor data (assuming no strides).
206 inline size_t GetLinearBufferOffset(const arm_compute::ITensorInfo& info,
207  uint32_t depthIndex,
208  uint32_t batchIndex,
209  uint32_t channelIndex,
210  uint32_t y,
211  uint32_t x)
212 {
213  const arm_compute::TensorShape& shape = info.tensor_shape();
214  uint32_t width = static_cast<uint32_t>(shape[0]);
215  uint32_t height = static_cast<uint32_t>(shape[1]);
216  uint32_t numChannels = static_cast<uint32_t>(shape[2]);
217  uint32_t numBatches = static_cast<uint32_t>(shape[3]);
218  return (((depthIndex * numBatches + batchIndex) * numChannels + channelIndex) * height + y) * width + x;
219 }
220 
221 template <typename T>
222 void CopyArmComputeITensorData(const arm_compute::ITensor& srcTensor, T* dstData)
223 {
224  // If MaxNumOfTensorDimensions is increased, this loop will need fixing.
225  static_assert(MaxNumOfTensorDimensions == 5, "Please update CopyArmComputeITensorData");
226  {
227  const arm_compute::ITensorInfo& info = *srcTensor.info();
228  const arm_compute::TensorShape& shape = info.tensor_shape();
229  const uint8_t* const bufferPtr = srcTensor.buffer();
230  uint32_t width = static_cast<uint32_t>(shape[0]);
231  uint32_t height = static_cast<uint32_t>(shape[1]);
232  uint32_t numChannels = static_cast<uint32_t>(shape[2]);
233  uint32_t numBatches = static_cast<uint32_t>(shape[3]);
234  uint32_t depth = static_cast<uint32_t>(shape[4]);
235 
236  for (unsigned int depthIndex = 0; depthIndex < depth; ++depthIndex)
237  {
238  for (unsigned int batchIndex = 0; batchIndex < numBatches; ++batchIndex)
239  {
240  for (unsigned int channelIndex = 0; channelIndex < numChannels; ++channelIndex)
241  {
242  for (unsigned int y = 0; y < height; ++y)
243  {
244  // Copies one row from arm_compute tensor buffer to linear memory buffer.
245  // A row is the largest contiguous region we can copy, as the tensor data may be using strides.
246  memcpy(
247  dstData + GetLinearBufferOffset(info, depthIndex, batchIndex, channelIndex, y, 0),
248  bufferPtr + GetTensorOffset(info, depthIndex, batchIndex, channelIndex, y, 0),
249  width * sizeof(T));
250  }
251  }
252  }
253  }
254  }
255 }
256 
257 template <typename T>
258 void CopyArmComputeITensorData(const T* srcData, arm_compute::ITensor& dstTensor)
259 {
260  // If MaxNumOfTensorDimensions is increased, this loop will need fixing.
261  static_assert(MaxNumOfTensorDimensions == 5, "Please update CopyArmComputeITensorData");
262  {
263  const arm_compute::ITensorInfo& info = *dstTensor.info();
264  const arm_compute::TensorShape& shape = info.tensor_shape();
265  uint8_t* const bufferPtr = dstTensor.buffer();
266  uint32_t width = static_cast<uint32_t>(shape[0]);
267  uint32_t height = static_cast<uint32_t>(shape[1]);
268  uint32_t numChannels = static_cast<uint32_t>(shape[2]);
269  uint32_t numBatches = static_cast<uint32_t>(shape[3]);
270  uint32_t depth = static_cast<uint32_t>(shape[4]);
271 
272  for (unsigned int depthIndex = 0; depthIndex < depth; ++depthIndex)
273  {
274  for (unsigned int batchIndex = 0; batchIndex < numBatches; ++batchIndex)
275  {
276  for (unsigned int channelIndex = 0; channelIndex < numChannels; ++channelIndex)
277  {
278  for (unsigned int y = 0; y < height; ++y)
279  {
280  // Copies one row from linear memory buffer to arm_compute tensor buffer.
281  // A row is the largest contiguous region we can copy, as the tensor data may be using strides.
282  memcpy(
283  bufferPtr + GetTensorOffset(info, depthIndex, batchIndex, channelIndex, y, 0),
284  srcData + GetLinearBufferOffset(info, depthIndex, batchIndex, channelIndex, y, 0),
285  width * sizeof(T));
286  }
287  }
288  }
289  }
290  }
291 }
292 
293 /// Construct a TensorShape object from an ArmCompute object based on arm_compute::Dimensions.
294 /// \tparam ArmComputeType Any type that implements the Dimensions interface
295 /// \tparam T Shape value type
296 /// \param shapelike An ArmCompute object that implements the Dimensions interface
297 /// \param initial A default value to initialise the shape with
298 /// \return A TensorShape object filled from the Acl shapelike object.
299 template<typename ArmComputeType, typename T>
300 TensorShape GetTensorShape(const ArmComputeType& shapelike, T initial)
301 {
302  std::vector<unsigned int> s(MaxNumOfTensorDimensions, initial);
303  for (unsigned int i=0; i < shapelike.num_dimensions(); ++i)
304  {
305  s[(shapelike.num_dimensions()-1)-i] = armnn::numeric_cast<unsigned int>(shapelike[i]);
306  }
307  return TensorShape(armnn::numeric_cast<unsigned int>(shapelike.num_dimensions()), s.data());
308 };
309 
310 /// Get the strides from an ACL strides object
311 inline TensorShape GetStrides(const arm_compute::Strides& strides)
312 {
313  return GetTensorShape(strides, 0U);
314 }
315 
316 /// Get the shape from an ACL shape object
317 inline TensorShape GetShape(const arm_compute::TensorShape& shape)
318 {
319  return GetTensorShape(shape, 1U);
320 }
321 
322 } // namespace armcomputetensorutils
323 } // namespace armnn
armnn::DataLayout
DataLayout
Definition: Types.hpp:62
armnn::TensorInfo
Definition: Tensor.hpp:152
CHECK_LOCATION
#define CHECK_LOCATION()
Definition: Exceptions.hpp:203
armnn::MaxNumOfTensorDimensions
constexpr unsigned int MaxNumOfTensorDimensions
Definition: Types.hpp:31
armnn::Coordinates
std::array< unsigned int, MaxNumOfTensorDimensions > Coordinates
Definition: InternalTypes.hpp:15
NumericCast.hpp
armnn::TensorShape
Definition: Tensor.hpp:20
armnn::DataType
DataType
Definition: Types.hpp:48
DescriptorsFwd.hpp
armnn::PermutationVector
Definition: Types.hpp:314
armnn::BoostLogSeverityMapping::info
@ info
Half.hpp
Tensor.hpp
armnnDeserializer::Pooling3dDescriptor
const armnnSerializer::Pooling3dDescriptor * Pooling3dDescriptor
Definition: Deserializer.hpp:22
armnn
Copyright (c) 2021 ARM Limited and Contributors.
Definition: 01_00_quick_start.dox:6
armnnUtils::GetTensorShape
armnn::TensorShape GetTensorShape(unsigned int numberOfBatches, unsigned int numberOfChannels, unsigned int height, unsigned int width, const armnn::DataLayout dataLayout)
Definition: TensorUtils.cpp:21
armnnDeserializer::Pooling2dDescriptor
const armnnSerializer::Pooling2dDescriptor * Pooling2dDescriptor
Definition: Deserializer.hpp:21