ArmNN
 25.02
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
WorkloadUtils.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017-2024 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #pragma once
7 
10 #include <armnn/Tensor.hpp>
12 #include <armnnUtils/Permute.hpp>
13 
14 #include <Half.hpp>
15 #include <Profiling.hpp>
16 
17 
18 namespace armnn
19 {
20 namespace
21 {
22 
23 template <typename ArrayType, typename Arg>
24 void AssignValues(unsigned int num, unsigned int& idx, const ArrayType& array, Arg& arg)
25 {
26  if (idx >= num)
27  {
28  return;
29  }
30 
31  arg = array[(num - 1) - idx];
32  idx++;
33 }
34 
35 template <typename T, typename ArrayType, typename... Args>
36 void AssignValues(unsigned int num, unsigned int idx, const ArrayType& array, T& assignee, Args&... args)
37 {
38  AssignValues(num, idx, array, assignee);
39 
40  AssignValues(num, idx, array, args...);
41 }
42 
43 } // anonymous namespace
44 
45 template <typename CopyFunc>
46 void CopyTensorContentsGeneric(const ITensorHandle* srcTensor, ITensorHandle* dstTensor, CopyFunc copy)
47 {
48  // For ease of understanding, names are assigned to the dimensions
49  // of the tensor as if NHWC, however this routine works with any 5D tensor
50  static_assert(MaxNumOfTensorDimensions == 5, "Please update CopyTensorContents");
51 
52  TensorShape srcStrides = srcTensor->GetStrides();
53  const TensorShape& srcShape = srcTensor->GetShape();
54  auto srcSize = srcTensor->GetStrides()[0] * srcShape[0];
55  TensorShape dstStrides = dstTensor->GetStrides();
56  const TensorShape& dstShape = dstTensor->GetShape();
57  const auto dstSize = dstTensor->GetStrides()[0] * dstShape[0];
58 
59  size_t srcDepth = 1;
60  size_t srcBatches = 1;
61  size_t srcHeight = 1;
62  size_t srcWidth = 1;
63  size_t srcChannels = 1;
64  AssignValues(srcShape.GetNumDimensions(),
65  0,
66  srcShape,
67  srcChannels,
68  srcWidth,
69  srcHeight,
70  srcBatches,
71  srcDepth);
72 
73  size_t srcDepthStride = 0;
74  size_t srcBatchStride = 0;
75  size_t srcHeightStride = 0;
76  size_t srcWidthStride = 0;
77  size_t srcChannelStride = 0;
78  AssignValues(srcStrides.GetNumDimensions(),
79  0,
80  srcStrides,
81  srcChannelStride,
82  srcWidthStride,
83  srcHeightStride,
84  srcBatchStride,
85  srcDepthStride);
86 
87  size_t dstDepth = 1;
88  size_t dstBatches = 1;
89  size_t dstHeight = 1;
90  size_t dstWidth = 1;
91  size_t dstChannels = 1;
92  AssignValues(dstShape.GetNumDimensions(),
93  0,
94  dstShape,
95  dstChannels,
96  dstWidth,
97  dstHeight,
98  dstBatches,
99  dstDepth);
100 
101  size_t dstDepthStride = 0;
102  size_t dstBatchStride = 0;
103  size_t dstHeightStride = 0;
104  size_t dstWidthStride = 0;
105  size_t dstChannelStride = 0;
106  AssignValues(dstStrides.GetNumDimensions(),
107  0,
108  dstStrides,
109  dstChannelStride,
110  dstWidthStride,
111  dstHeightStride,
112  dstBatchStride,
113  dstDepthStride);
114 
115  const unsigned char* srcDataStart;
116  unsigned char* dstDataStart;
117  {
118  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Synchronize buffers");
119  srcDataStart = static_cast<const uint8_t*>(srcTensor->Map());
120  dstDataStart = static_cast<uint8_t*>(dstTensor->Map());
121  }
122  if (srcDataStart == nullptr)
123  {
124  throw MemoryValidationException("The source tensor is null.");
125  }
126  if (dstDataStart == nullptr)
127  {
128  throw MemoryValidationException("The destination tensor is null.");
129  }
130 
131  size_t copyLength = std::min(srcChannels * srcChannelStride, dstChannels * dstChannelStride);
132  size_t copyWidth = std::min(srcWidth, dstWidth);
133  size_t copyHeight = std::min(srcHeight, dstHeight);
134  size_t copyBatches = std::min(srcBatches, dstBatches);
135  size_t copyDepth = std::min(srcDepth, dstDepth);
136 
137  // Edge case fix for DTS 1.In1D_Int32End & 5.In1D_Int32End, This was down to how ACL handled 1D tensors.
138  if(copyLength != srcSize &&
139  srcSize != dstSize &&
140  srcWidthStride == copyLength &&
141  srcWidthStride == dstSize)
142  {
143  srcSize = dstSize;
144  srcWidthStride = dstWidthStride;
145  }
146 
147  // Coalesce inner dimensions where possible
148  // to reduce overheard calling copy() and to
149  // allow for memory bandwidth optimisations
150  if (copyLength == srcWidthStride &&
151  copyLength == dstWidthStride)
152  {
153  // There is no special padding between rows,
154  // and sizes are compatible, so copy whole rows
155  copyLength *= copyWidth;
156  copyWidth = 1;
157 
158  if (copyLength == srcHeightStride &&
159  copyLength == dstHeightStride)
160  {
161  // There is no special padding between batches
162  // and sizes are compatible so copy whole batches
163  copyLength *= copyHeight;
164  copyHeight = 1;
165  }
166  }
167 
168  const unsigned char* srcData = srcDataStart;
169  unsigned char* dstData = dstDataStart;
170  for (unsigned int d = 0; d < copyDepth; ++d)
171  {
172  auto srcPtrDepth = srcData;
173  auto dstPtrDepth = dstData;
174  for (unsigned int b = 0; b < copyBatches; ++b)
175  {
176  auto srcPtrBatch = srcData;
177  auto dstPtrBatch = dstData;
178  for (unsigned int h = 0; h < copyHeight; ++h)
179  {
180  auto srcPtrChannel = srcData;
181  auto dstPtrChannel = dstData;
182  for (unsigned int w = 0; w < copyWidth; ++w)
183  {
184  // Sanity check the memory area we've been asked to copy from and to.
185  if (copyLength > srcSize)
186  {
188  "The source tensor size does not match the size of the allocated tensor.");
189  }
190  if (copyLength > dstSize)
191  {
193  "The destination tensor size will overrun the destination tensor.");
194  }
195  copy(dstData, srcData, copyLength);
196  dstData += dstWidthStride;
197  srcData += srcWidthStride;
198  }
199  dstData += (static_cast<long>(dstHeightStride) - (dstData - dstPtrChannel));
200  srcData += (static_cast<long>(srcHeightStride) - (srcData - srcPtrChannel));
201  }
202  dstData += (static_cast<long>(dstBatchStride) - (dstData - dstPtrBatch));
203  srcData += (static_cast<long>(srcBatchStride) - (srcData - srcPtrBatch));
204  }
205  dstData += (static_cast<long>(dstDepthStride) - (dstData - dstPtrDepth));
206  srcData += (static_cast<long>(srcDepthStride) - (srcData - srcPtrDepth));
207  }
208 
209  srcTensor->Unmap();
210  dstTensor->Unmap();
211 }
212 
213 template <typename SrcTensorHandleType, typename DstTensorHandleType, typename DescriptorType>
214 void GatherTensorHandlePairs(const DescriptorType& descriptor,
215  std::vector<std::pair<SrcTensorHandleType*, DstTensorHandleType*>>& tensorHandlePairs)
216 {
217  const unsigned int numInputs = static_cast<unsigned int>(descriptor.m_Inputs.size());
218  tensorHandlePairs.reserve(numInputs);
219 
220  for (unsigned int i = 0; i < numInputs; ++i)
221  {
222  SrcTensorHandleType* const srcTensorHandle =
223  PolymorphicDowncast<SrcTensorHandleType*>(descriptor.m_Inputs[i]);
224  DstTensorHandleType* const dstTensorHandle =
225  PolymorphicDowncast<DstTensorHandleType*>(descriptor.m_Outputs[i]);
226 
227  tensorHandlePairs.emplace_back(srcTensorHandle, dstTensorHandle);
228  }
229 }
230 
231 int32_t ConvertMaskToACLFormat(int32_t mask, int32_t numDim);
232 
233 armnn::ConstTensor PermuteTensor(const ConstTensorHandle* tensor,
234  const PermutationVector& permutationVector,
235  void* permuteBuffer);
236 
237 void ReshapeWeightsForAcl(TensorInfo& weightInfo, DataLayout dataLayout);
238 
239 TensorInfo ConvertWeightTensorInfoFromArmnnToAcl(const TensorInfo& weightInfo, DataLayout dataLayout);
240 
241 /// Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M]
242 /// This function coverts a TensorInfo from [1,H,W,I*M] to [1,I*M,H,W] (if NCHW) or keeps it at [1,H,W,I*M] (if NHWC)
243 /// as required by the compute library
244 /// Returns a tuple of converted weights tensor info and depth multiplier
245 std::tuple<TensorInfo, unsigned int> Convert1HWOTensorInfoToAcl(const TensorInfo& weightInfo,
246  const TensorInfo& inputInfo,
247  const DataLayout dataLayout);
248 
249 armnn::ConstTensor ConvertWeightTensorFromArmnnToAcl(const ConstTensorHandle* weightTensor,
250  DataLayout dataLayout,
251  void* permuteBuffer);
252 
253 /// Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M]
254 /// This function coverts a ConstCpuTensorHandle from [1,H,W,I*M] to [1,I*M,H,W] (if NCHW) or
255 /// keeps it at [1,H,W,I*M] (if NHWC) as required by the compute library
256 ///
257 /// \param weightTensor - ConstTensorHandle of weights tensor
258 /// \param inputInfo - TensorInfo of input tensor
259 /// \param dataLayout - DataLayout of the input tensor
260 /// \param permuteBuffer - Pointer to memory with the size of tensor. Used for the permutation
261 /// \return tuple of transformed weights-ConstTensor and depthwise multiplier
262 std::tuple<ConstTensor, unsigned int> Convert1HWOTensorToAcl(const ConstTensorHandle* weightTensor,
263  const TensorInfo& inputInfo,
264  const DataLayout dataLayout,
265  void* permuteBuffer);
266 
267 /// Converts a (weights) tensor from [1, H, W, I*M] = [1, H, W, O] to [M, I, H, W]
268 ///
269 /// \param weightTensor - ConstTensorHandle of the weight tensor that should be converted
270 /// \param inputInfo - TensorInfo of the corresponding input tensor
271 /// \param dataLayout - DataLayout of the input tensor e.g. NHWC or NCHW
272 /// \param permuteBuffer - Memory location with the same size as the weight tensor to write converted data to
273 /// \return - A tuple of ConstTensor and unsigned int which is the converted weightTensor and the depthMultiplier
274 std::tuple<ConstTensor, unsigned int> Convert1HWOtoMIHW(const ConstTensorHandle* weightTensor,
275  const TensorInfo& inputInfo,
276  const DataLayout& dataLayout,
277  void* permuteBuffer);
278 
279 /// Calculates the key index values needed for GatherNd: N, ND, K, W, C (N is always 1)
280 ///
281 /// \param inputInfo0 - TensorInfo of the corresponding input tensor: params
282 /// \param inputInfo1 - TensorInfo of the corresponding input tensor: indices
283 /// \return - A map with names and values for N, ND, K, W, C
284 std::map<std::string, unsigned int> CalculateGatherNdKeyIndices(TensorInfo inputInfo0, TensorInfo inputInfo1);
285 
286 /// Generates a permutation vector of size rank that permutes the 2 most right dimensions
287 ///
288 /// \param rank - Tensor rank, i.e. number of dimensions in the tensors
289 /// \return - A permutation vector that permutes the 2 last dimensions
291 
292 /// Calculates the axis values for split operation.
293 ///
294 /// \param desc - Splitter Descriptor
295 /// \param input - Input tensor shape
296 /// \return - A set containing axis values of slitter operation
297  std::set<unsigned int> ComputeSplitAxis(const armnn::SplitterDescriptor& desc, const TensorShape& input);
298 
299 } //namespace armnn
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
Definition: Profiling.hpp:220
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:330
virtual TensorShape GetStrides() const =0
Get the strides for each dimension ordered from largest to smallest where the smallest value is the s...
virtual void Unmap() const =0
Unmap the tensor data.
virtual const void * Map(bool blocking=true) const =0
Map the tensor data for access.
virtual TensorShape GetShape() const =0
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
unsigned int GetNumDimensions() const
Function that returns the tensor rank.
Definition: Tensor.cpp:174
Copyright (c) 2021 ARM Limited and Contributors.
armnn::PermutationVector GeneratePermutationVectorOnLastTwoDimensions(unsigned int rank)
Generates a permutation vector of size rank that permutes the 2 most right dimensions.
TensorInfo ConvertWeightTensorInfoFromArmnnToAcl(const TensorInfo &weightInfo, DataLayout dataLayout)
std::set< unsigned int > ComputeSplitAxis(const armnn::SplitterDescriptor &desc, const TensorShape &input)
Calculates the axis values for split operation.
void ReshapeWeightsForAcl(TensorInfo &weightInfo, DataLayout dataLayout)
void GatherTensorHandlePairs(const DescriptorType &descriptor, std::vector< std::pair< SrcTensorHandleType *, DstTensorHandleType * >> &tensorHandlePairs)
armnn::ConstTensor ConvertWeightTensorFromArmnnToAcl(const ConstTensorHandle *weightTensor, DataLayout dataLayout, void *permuteBuffer)
void CopyTensorContentsGeneric(const ITensorHandle *srcTensor, ITensorHandle *dstTensor, CopyFunc copy)
std::tuple< ConstTensor, unsigned int > Convert1HWOtoMIHW(const ConstTensorHandle *weightTensor, const TensorInfo &inputInfo, const DataLayout &dataLayout, void *permuteBuffer)
Converts a (weights) tensor from [1, H, W, I*M] = [1, H, W, O] to [M, I, H, W].
armnn::ConstTensor PermuteTensor(const ConstTensorHandle *tensor, const PermutationVector &permutationVector, void *permuteBuffer)
constexpr unsigned int MaxNumOfTensorDimensions
Definition: Types.hpp:31
std::map< std::string, unsigned int > CalculateGatherNdKeyIndices(TensorInfo inputInfo0, TensorInfo inputInfo1)
Calculates the key index values needed for GatherNd: N, ND, K, W, C (N is always 1)
std::tuple< TensorInfo, unsigned int > Convert1HWOTensorInfoToAcl(const TensorInfo &weightInfo, const TensorInfo &inputInfo, const DataLayout dataLayout)
Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M] This function coverts a TensorInfo...
DataLayout
Definition: Types.hpp:63
int32_t ConvertMaskToACLFormat(int32_t mask, int32_t numDim)
std::tuple< ConstTensor, unsigned int > Convert1HWOTensorToAcl(const ConstTensorHandle *weightTensor, const TensorInfo &inputInfo, const DataLayout dataLayout, void *permuteBuffer)
Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M] This function coverts a ConstCpuTe...
A ViewsDescriptor for the SplitterLayer.