ArmNN
 25.02
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ArmComputeTensorUtils.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017-2024 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include <armnn/Exceptions.hpp>
9 
10 #include "ArmComputeUtils.hpp"
11 #include <armnn/Descriptors.hpp>
12 
13 #include <fmt/format.h>
14 
15 namespace armnn
16 {
17 namespace armcomputetensorutils
18 {
19 
20 arm_compute::DataType GetArmComputeDataType(armnn::DataType dataType, bool multiScales)
21 {
22  switch(dataType)
23  {
25  return arm_compute::DataType::BFLOAT16;
27  return arm_compute::DataType::U8;
29  return arm_compute::DataType::F16;
31  return arm_compute::DataType::F32;
33  {
34  // Per Channel Quantization not supported for QAsymmS8 in ACL (as of 2024-08)
35  return multiScales ? arm_compute::DataType::UNKNOWN : arm_compute::DataType::QASYMM8_SIGNED;
36  }
38  {
39  // Per Channel Quantization not supported for QAsymmU8 in ACL (as of 2024-08)
40  return multiScales ? arm_compute::DataType::UNKNOWN : arm_compute::DataType::QASYMM8;
41  }
43  {
44  // Per Channel Quantization not supported for QSymmS16 in ACL (as of 2024-08)
45  return multiScales ? arm_compute::DataType::UNKNOWN : arm_compute::DataType::QSYMM16;
46  }
48  return arm_compute::DataType::S64;
50  {
51  return multiScales ? arm_compute::DataType::QSYMM8_PER_CHANNEL : arm_compute::DataType::QSYMM8;
52  }
54  return arm_compute::DataType::S32;
55  default:
56  return arm_compute::DataType::UNKNOWN;
57  }
58 }
59 
60 armnn::DataType GetArmNNDataType(arm_compute::DataType dataType)
61 {
62  switch(dataType)
63  {
64  case arm_compute::DataType::BFLOAT16:
66  case arm_compute::DataType::U8:
68  case arm_compute::DataType::F16:
70  case arm_compute::DataType::F32:
72  case arm_compute::DataType::QASYMM8_SIGNED:
74  case arm_compute::DataType::QASYMM8:
76  case arm_compute::DataType::QSYMM16:
78  case arm_compute::DataType::S64:
80  case arm_compute::DataType::QSYMM8_PER_CHANNEL:
82  case arm_compute::DataType::QSYMM8:
84  case arm_compute::DataType::S32:
86  default:
87  throw InvalidArgumentException("Unknown arm_compute::DataType data type");
88  }
89 }
90 
91 arm_compute::Coordinates BuildArmComputeReductionCoordinates(size_t inputDimensions,
92  unsigned int originalInputRank,
93  const std::vector<unsigned int>& armnnAxes)
94 {
95  arm_compute::Coordinates outAclCoords;
96 
97  if (armnnAxes.empty())
98  {
99  // If no reduction axes were provided, then the input must be reduced along all dimensions.
100  // Since Compute Library does not accept an empty vector as the reduction dimensions, we then
101  // manually create a vector including all the input dimensions (in reversed order) as:
102  //
103  // { inputDimensions - 1, inputDimensions - 2, ..., 1, 0 }
104  //
105  outAclCoords.set_num_dimensions(inputDimensions);
106  std::generate(outAclCoords.begin(), outAclCoords.end(), [d = inputDimensions - 1] () mutable { return d--; });
107  }
108  else
109  {
110  // Create a vector of reduction dimensions (in reversed order) with the given reduction axes.
111  //
112  // Adjust the given reduction axes according to the original rank of the input tensor (before ACL applied any
113  // dimension correction).
114  // For example, if the input tensor originally had 4 dimensions, and one of the reduction axes was 2, then the
115  // new value for that reduction axis should be 1.
116  //
117  // Example:
118  // ArmNN input shape = { 1, 1, 3, 2 } -> ACL input shape = { 2, 3 }
119  // ArmNN reduction axis = { 2 } -> ACL reduction axis = { 1 }
120  // ArmNN reduction axis = { 3 } -> ACL reduction axis = { 0 }
121  //
122  // The transformation: ACL reduction axis index = original rank - ArmNN reduction axis index - 1
123  //
124  outAclCoords.set_num_dimensions(armnnAxes.size());
125  std::transform(armnnAxes.begin(), armnnAxes.end(),
126  outAclCoords.begin(),
127  [originalInputRank](unsigned int i){ return originalInputRank - i - 1; });
128  }
129 
130  return outAclCoords;
131 }
132 
133 arm_compute::TensorShape BuildArmComputeTensorShape(const armnn::TensorShape& tensorShape)
134 {
135  arm_compute::TensorShape shape;
136 
137  // armnn tensors are (batch, channels, height, width).
138  // arm_compute tensors are (width, height, channels, batch).
139  for (unsigned int i = 0; i < tensorShape.GetNumDimensions(); i++)
140  {
141  // Note that our dimensions are stored in the opposite order to ACL's.
142  shape.set(tensorShape.GetNumDimensions() - i - 1, tensorShape[i], false);
143 
144  // TensorShape::set() flattens leading ones, so that batch size 1 cannot happen.
145  // arm_compute tensors expect this.
146  }
147 
148  // prevent arm_compute issue where tensor is flattened to nothing
149  if (shape.num_dimensions() == 0)
150  {
151  shape.set_num_dimensions(1);
152  }
153 
154  return shape;
155 }
156 
157 std::vector<unsigned int> ReduceDimsForACL(const armnn::TensorShape tensorShape, unsigned int dimensions)
158 {
159  std::vector<unsigned int> newShape;
160 
161  unsigned int dimsToSkip = 0;
162 
163  if (tensorShape.GetNumDimensions() > dimensions)
164  {
165  dimsToSkip = tensorShape.GetNumDimensions() - dimensions;
166  }
167  unsigned int dimsSkipped = 0;
168  bool insertRemainder = false;
169 
170  for (unsigned int i = 0; i < tensorShape.GetNumDimensions(); ++i)
171  {
172  if (tensorShape[i] == 1 && dimsSkipped < dimsToSkip && !insertRemainder)
173  {
174  ++dimsSkipped;
175  continue;
176  }
177  newShape.insert(newShape.begin(), tensorShape[i]);
178  // Once we insert the first dimension we can't skip any more
179  insertRemainder = true;
180  }
181  return newShape;
182 }
183 
184 arm_compute::TensorShape BuildArmComputeTensorShape(const armnn::TensorShape& tensorShape, unsigned int dimensions)
185 {
186  arm_compute::TensorShape shape;
187  std::vector<unsigned int> strippedShape = ReduceDimsForACL(tensorShape, dimensions);
188 
189  for (unsigned int i = 0; i < strippedShape.size(); i++)
190  {
191  shape.set(i, strippedShape[i], false);
192  }
193 
194  // prevent arm_compute issue where tensor is flattened to nothing
195  if (shape.num_dimensions() == 0)
196  {
197  shape.set_num_dimensions(1);
198  }
199  return shape;
200 }
201 
202 // Utility function used to build a TensorInfo object, that can be used to initialise
203 // ARM Compute Tensor and CLTensor allocators.
204 // Note: this utility ignores the value of armnn::TensorInfo.IsConstant(). ACL tensors
205 // default to constant but Arm NN ones default to non constant. In the cases where
206 // we expect ACL to treat a tensor as constant that value must be set after this
207 // utility has been called.
208 arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo)
209 {
210  bool multiScales = tensorInfo.HasMultipleQuantizationScales();
211  const arm_compute::TensorShape aclTensorShape = BuildArmComputeTensorShape(tensorInfo.GetShape());
212  const arm_compute::DataType aclDataType = GetArmComputeDataType(tensorInfo.GetDataType(), multiScales);
213 
214  const arm_compute::QuantizationInfo aclQuantizationInfo = multiScales ?
215  arm_compute::QuantizationInfo(tensorInfo.GetQuantizationScales()) :
216  arm_compute::QuantizationInfo(tensorInfo.GetQuantizationScale(), tensorInfo.GetQuantizationOffset());
217 
218  return arm_compute::TensorInfo(aclTensorShape, 1, aclDataType, aclQuantizationInfo);
219 }
220 
221 arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo,
222  armnn::DataLayout dataLayout)
223 {
224  arm_compute::TensorInfo aclTensorInfo = BuildArmComputeTensorInfo(tensorInfo);
225  aclTensorInfo.set_data_layout(ConvertDataLayout(dataLayout));
226 
227  return aclTensorInfo;
228 }
229 
230 arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo, unsigned int dimensions)
231 {
232  bool multiScales = tensorInfo.HasMultipleQuantizationScales();
233  const arm_compute::TensorShape aclTensorShape = BuildArmComputeTensorShape(tensorInfo.GetShape(), dimensions);
234  const arm_compute::DataType aclDataType = GetArmComputeDataType(tensorInfo.GetDataType(), multiScales);
235 
236  const arm_compute::QuantizationInfo aclQuantizationInfo = multiScales ?
237  arm_compute::QuantizationInfo(tensorInfo.GetQuantizationScales()) :
238  arm_compute::QuantizationInfo(tensorInfo.GetQuantizationScale(), tensorInfo.GetQuantizationOffset());
239 
240  return arm_compute::TensorInfo(aclTensorShape, 1, aclDataType, aclQuantizationInfo);
241 }
242 arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo,
243  armnn::DataLayout dataLayout, unsigned int dimensions)
244 {
245  arm_compute::TensorInfo aclTensorInfo = BuildArmComputeTensorInfo(tensorInfo, dimensions);
246  aclTensorInfo.set_data_layout(ConvertDataLayout(dataLayout));
247 
248  return aclTensorInfo;
249 }
250 
251 
252 arm_compute::DataLayout ConvertDataLayout(armnn::DataLayout dataLayout)
253 {
254  switch(dataLayout)
255  {
256  case armnn::DataLayout::NHWC : return arm_compute::DataLayout::NHWC;
257 
258  case armnn::DataLayout::NCHW : return arm_compute::DataLayout::NCHW;
259 
260  case armnn::DataLayout::NDHWC : return arm_compute::DataLayout::NDHWC;
261 
262  case armnn::DataLayout::NCDHW : return arm_compute::DataLayout::NCDHW;
263 
264  default: throw InvalidArgumentException("Unknown armnn::DataLayout: [" +
265  std::to_string(static_cast<int>(dataLayout)) + "]");
266  }
267 }
268 
269 arm_compute::PoolingLayerInfo BuildArmComputePoolingLayerInfo(const Pooling2dDescriptor& descriptor,
270  bool fpMixedPrecision)
271 {
272  // Resolve ARM Compute layer parameters.
273  const arm_compute::PoolingType poolingType = ConvertPoolingAlgorithmToAclPoolingType(descriptor.m_PoolType);
274 
275  const arm_compute::DataLayout dataLayout = ConvertDataLayout(descriptor.m_DataLayout);
276 
277  bool isGlobalPooling = (descriptor.m_StrideX==0 && descriptor.m_StrideY==0);
278  //use specific constructor if global pooling
279  if(isGlobalPooling)
280  {
281  return arm_compute::PoolingLayerInfo(poolingType, dataLayout);
282  }
283 
284  const arm_compute::DimensionRoundingType rounding = ConvertOutputShapeRoundingToAclDimensionRoundingType(
285  descriptor.m_OutputShapeRounding);
286  const arm_compute::PadStrideInfo padStrideInfo(descriptor.m_StrideX,
287  descriptor.m_StrideY,
288  descriptor.m_PadLeft,
289  descriptor.m_PadRight,
290  descriptor.m_PadTop,
291  descriptor.m_PadBottom,
292  rounding);
293 
294  const bool excludePadding = (descriptor.m_PaddingMethod == PaddingMethod::Exclude);
295 
296  const arm_compute::Size2D poolSize(descriptor.m_PoolWidth, descriptor.m_PoolHeight);
297 
298  return arm_compute::PoolingLayerInfo(poolingType, poolSize, dataLayout, padStrideInfo, excludePadding,
299  fpMixedPrecision);
300 }
301 
302 arm_compute::Pooling3dLayerInfo BuildArmComputePooling3dLayerInfo(const Pooling3dDescriptor& descriptor,
303  bool fpMixedPrecision)
304 {
305  const arm_compute::PoolingType poolingType = ConvertPoolingAlgorithmToAclPoolingType(descriptor.m_PoolType);
306 
307  bool isGlobalPooling = (descriptor.m_StrideX==0 && descriptor.m_StrideY==0 && descriptor.m_StrideZ==0);
308  //use specific constructor if global pooling
309  if(isGlobalPooling)
310  {
311  return arm_compute::Pooling3dLayerInfo(poolingType);
312  }
313 
314  const arm_compute::Size3D poolSize(descriptor.m_PoolWidth, descriptor.m_PoolHeight, descriptor.m_PoolDepth);
315 
316  const arm_compute::Size3D stride(descriptor.m_StrideX,
317  descriptor.m_StrideY,
318  descriptor.m_StrideZ);
319 
320  const arm_compute::Padding3D padding(descriptor.m_PadLeft,
321  descriptor.m_PadRight,
322  descriptor.m_PadTop,
323  descriptor.m_PadBottom,
324  descriptor.m_PadFront,
325  descriptor.m_PadBack);
326 
327  const bool excludePadding = (descriptor.m_PaddingMethod == PaddingMethod::Exclude);
328 
329  const arm_compute::DimensionRoundingType rounding = ConvertOutputShapeRoundingToAclDimensionRoundingType(
330  descriptor.m_OutputShapeRounding);
331 
332  return arm_compute::Pooling3dLayerInfo(poolingType,
333  poolSize,
334  stride,
335  padding,
336  excludePadding,
337  fpMixedPrecision,
338  rounding);
339 }
340 
341 arm_compute::NormalizationLayerInfo BuildArmComputeNormalizationLayerInfo(const NormalizationDescriptor& descriptor)
342 {
343  const arm_compute::NormType normType =
344  ConvertNormalizationAlgorithmChannelToAclNormType(descriptor.m_NormChannelType);
345  return arm_compute::NormalizationLayerInfo(normType,
346  descriptor.m_NormSize,
347  descriptor.m_Alpha,
348  descriptor.m_Beta,
349  descriptor.m_K,
350  false);
351 }
352 
353 arm_compute::PermutationVector BuildArmComputePermutationVector(const armnn::PermutationVector& perm)
354 {
355  arm_compute::PermutationVector aclPerm;
356 
357  unsigned int start = 0;
358  while ((start < perm.GetSize()) && (start == perm[start]))
359  {
360  ++start;
361  }
362 
363  for (unsigned int i = start; i < perm.GetSize(); ++i)
364  {
365  aclPerm.set(i - start, perm[i] - start);
366  }
367  return aclPerm;
368 }
369 
370 arm_compute::PermutationVector BuildArmComputeTransposeVector(const armnn::PermutationVector& perm)
371 {
372  // As ArmNN indexes are left to right and ACL indexes are right to left,
373  // the permutation vector has to be reversed and then translated into ACL axis.
374  // i.e. {1, 0, 2, 3} --> {3, 2, 0, 1} --> {0, 1, 3, 2}
375 
376  // Below an example of how the ArmNN and ACL index format work:
377  // ArmNN Format:
378  // Input Shape {1, 10, 20, 30}
379  // Permutation Vector {1, 0, 2, 3}
380  // Output Shape {10, 1, 20, 30}
381  // dim "1" of input goes into index 0 of the output ([ 10, X, X, X])
382  // dim "0" of input goes into index 1 of the output ([ 10, 1, X, X ])
383  // dim "2" of input goes into index 2 of the output ([ 10, 1, 20, X ])
384  // dim "3" of input goes into index 3 of the output ([ 10, 1, 20, 30 ])
385  // ACL Format:
386  // Input Shape {30, 20, 10, 1}
387  // Permutation Vector {0, 1, 3, 2}
388  // Output Shape {30, 20, 1, 10}
389  // dim "0" of input goes into index 0 of the output ([ 30, X, X, X])
390  // dim "1" of input goes into index 1 of the output ([ 30, 20, X, X ])
391  // dim "3" of input goes into index 2 of the output ([ 30, 20, 1, X ])
392  // dim "2" of input goes into index 3 of the output ([ 30, 20, 1, 10 ])
393 
394  arm_compute::PermutationVector aclPerm;
395  auto rank = perm.GetSize();
396 
397  // Reverse the order. i.e. {1, 0, 2, 3} --> {3, 2, 0, 1}
398  std::vector<unsigned int> reversedPerm;
399  reversedPerm.reserve(rank);
400  for (unsigned int i = rank; i > 0; --i)
401  {
402  reversedPerm.push_back(perm[i-1]);
403  }
404 
405  // Translate from Arm NN axis to ACL axis. i.e. {3, 2, 0, 1} --> {0, 1, 3, 2}
406  for (unsigned int i = 0; i < rank; ++i)
407  {
408  auto aclAxis = rank - 1 - reversedPerm[i];
409  aclPerm.set(i, aclAxis);
410  }
411  return aclPerm;
412 }
413 
414 arm_compute::Size2D BuildArmComputeSize2D(const unsigned int width, const unsigned int height)
415 {
416  return arm_compute::Size2D(width, height);
417 }
418 
419 arm_compute::PixelValue GetPixelValue(const arm_compute::ITensorInfo* tensorInfo, float value)
420 {
421  switch (tensorInfo->data_type())
422  {
423  case arm_compute::DataType::F16:
424  {
425  arm_compute::PixelValue pixelValue = arm_compute::PixelValue(static_cast<Half>(value));
426  if (isinf(pixelValue.get<Half>())) {
427  throw InvalidArgumentException("Under/Overflow converting float value [" + std::to_string(value) +
428  "] to fp16: [" + std::to_string(pixelValue.get<Half>()) + "]");
429  }
430  return pixelValue;
431  }
432  case arm_compute::DataType::F32:
433  return arm_compute::PixelValue(value);
434  case arm_compute::DataType::QASYMM8:
435  return arm_compute::PixelValue(static_cast<uint8_t>(value));
436  case arm_compute::DataType::QSYMM16:
437  return arm_compute::PixelValue(static_cast<int16_t>(value));
438  case arm_compute::DataType::QSYMM8:
439  case arm_compute::DataType::QASYMM8_SIGNED:
440  case arm_compute::DataType::QSYMM8_PER_CHANNEL:
441  return arm_compute::PixelValue(static_cast<int8_t>(value));
442  case arm_compute::DataType::S32:
443  return arm_compute::PixelValue(static_cast<int32_t>(value));
444  default:
445  throw InvalidArgumentException("Unsupported DataType: [" +
446  std::to_string(static_cast<int>(tensorInfo->data_type())) + "]");
447  }
448 }
449 
450 unsigned int ComputeDepthwiseConv2dDepthMultiplier(armnn::DataLayout layout,
451  const arm_compute::TensorShape& weightsShape,
452  const arm_compute::TensorShape& inputShape)
453 {
454  unsigned int depthMultiplier;
455  if (layout == armnn::DataLayout::NHWC)
456  {
457  depthMultiplier = static_cast<uint32_t>(weightsShape[0]) / static_cast<uint32_t>(inputShape[0]);
458  }
459  else if (layout == armnn::DataLayout::NCHW)
460  {
461  depthMultiplier = static_cast<uint32_t>(weightsShape[2]) / static_cast<uint32_t>(inputShape[2]);
462  }
463  else
464  {
465  throw InvalidArgumentException(fmt::format("Unknown data layout for tensor conversion: {}",
466  GetDataLayoutName(layout)));
467  }
468  return depthMultiplier;
469 }
470 
471 arm_compute::ScatterInfo BuildArmComputeScatterInfo(const ScatterNdDescriptor& descriptor)
472 {
473  arm_compute::ScatterFunction scatterFunction;
474  switch(descriptor.m_Function)
475  {
477  scatterFunction = arm_compute::ScatterFunction::Update;
478  break;
480  scatterFunction = arm_compute::ScatterFunction::Add;
481  break;
483  scatterFunction = arm_compute::ScatterFunction::Sub;
484  break;
486  scatterFunction = arm_compute::ScatterFunction::Max;
487  break;
489  scatterFunction = arm_compute::ScatterFunction::Min;
490  break;
491  default: throw InvalidArgumentException("Unknown ArmNN::ScatterNd Function: [" +
492  std::to_string(static_cast<int>(descriptor.m_Function)) + "]");
493  }
494 
495  return arm_compute::ScatterInfo(scatterFunction, !descriptor.m_InputEnabled);
496 }
497 } // namespace armcomputetensorutils
498 } // namespace armnn
SizeType GetSize() const
Definition: Types.hpp:359
const TensorShape & GetShape() const
Definition: Tensor.hpp:193
std::vector< float > GetQuantizationScales() const
Definition: Tensor.cpp:451
DataType GetDataType() const
Definition: Tensor.hpp:200
bool HasMultipleQuantizationScales() const
Definition: Tensor.hpp:203
unsigned int GetNumDimensions() const
Function that returns the tensor rank.
Definition: Tensor.cpp:174
const armnnSerializer::Pooling3dDescriptor * Pooling3dDescriptor
const armnnSerializer::Pooling2dDescriptor * Pooling2dDescriptor
Copyright (c) 2021 ARM Limited and Contributors.
half_float::half Half
Definition: Half.hpp:22
@ Exclude
The padding fields don't count and are ignored.
arm_compute::DimensionRoundingType ConvertOutputShapeRoundingToAclDimensionRoundingType(OutputShapeRounding rounding)
arm_compute::NormType ConvertNormalizationAlgorithmChannelToAclNormType(NormalizationAlgorithmChannel channelType)
std::array< unsigned int, MaxNumOfTensorDimensions > Coordinates
DataLayout
Definition: Types.hpp:63
arm_compute::PoolingType ConvertPoolingAlgorithmToAclPoolingType(PoolingAlgorithm poolingAlgorithm)
DataType
Definition: Types.hpp:49
constexpr const char * GetDataLayoutName(DataLayout dataLayout)
Definition: TypesUtils.hpp:254