ArmNN
 25.11
Loading...
Searching...
No Matches
WorkloadUtils.hpp
Go to the documentation of this file.
1//
2// Copyright © 2017-2024 Arm Ltd. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6#pragma once
7
10#include <armnn/Tensor.hpp>
13
14#include <Half.hpp>
15#include <Profiling.hpp>
16
17
18namespace armnn
19{
20namespace
21{
22
23template <typename ArrayType, typename Arg>
24void AssignValues(unsigned int num, unsigned int& idx, const ArrayType& array, Arg& arg)
25{
26 if (idx >= num)
27 {
28 return;
29 }
30
31 arg = array[(num - 1) - idx];
32 idx++;
33}
34
35template <typename T, typename ArrayType, typename... Args>
36void AssignValues(unsigned int num, unsigned int idx, const ArrayType& array, T& assignee, Args&... args)
37{
38 AssignValues(num, idx, array, assignee);
39
40 AssignValues(num, idx, array, args...);
41}
42
43} // anonymous namespace
44
45template <typename CopyFunc>
46void CopyTensorContentsGeneric(const ITensorHandle* srcTensor, ITensorHandle* dstTensor, CopyFunc copy)
47{
48 // For ease of understanding, names are assigned to the dimensions
49 // of the tensor as if NHWC, however this routine works with any 5D tensor
50 static_assert(MaxNumOfTensorDimensions == 5, "Please update CopyTensorContents");
51
52 TensorShape srcStrides = srcTensor->GetStrides();
53 const TensorShape& srcShape = srcTensor->GetShape();
54 auto srcSize = srcTensor->GetStrides()[0] * srcShape[0];
55 TensorShape dstStrides = dstTensor->GetStrides();
56 const TensorShape& dstShape = dstTensor->GetShape();
57 const auto dstSize = dstTensor->GetStrides()[0] * dstShape[0];
58
59 size_t srcDepth = 1;
60 size_t srcBatches = 1;
61 size_t srcHeight = 1;
62 size_t srcWidth = 1;
63 size_t srcChannels = 1;
64 AssignValues(srcShape.GetNumDimensions(),
65 0,
66 srcShape,
67 srcChannels,
68 srcWidth,
69 srcHeight,
70 srcBatches,
71 srcDepth);
72
73 size_t srcDepthStride = 0;
74 size_t srcBatchStride = 0;
75 size_t srcHeightStride = 0;
76 size_t srcWidthStride = 0;
77 size_t srcChannelStride = 0;
78 AssignValues(srcStrides.GetNumDimensions(),
79 0,
80 srcStrides,
81 srcChannelStride,
82 srcWidthStride,
83 srcHeightStride,
84 srcBatchStride,
85 srcDepthStride);
86
87 size_t dstDepth = 1;
88 size_t dstBatches = 1;
89 size_t dstHeight = 1;
90 size_t dstWidth = 1;
91 size_t dstChannels = 1;
92 AssignValues(dstShape.GetNumDimensions(),
93 0,
94 dstShape,
95 dstChannels,
96 dstWidth,
97 dstHeight,
98 dstBatches,
99 dstDepth);
100
101 size_t dstDepthStride = 0;
102 size_t dstBatchStride = 0;
103 size_t dstHeightStride = 0;
104 size_t dstWidthStride = 0;
105 size_t dstChannelStride = 0;
106 AssignValues(dstStrides.GetNumDimensions(),
107 0,
108 dstStrides,
109 dstChannelStride,
110 dstWidthStride,
111 dstHeightStride,
112 dstBatchStride,
113 dstDepthStride);
114
115 const unsigned char* srcDataStart;
116 unsigned char* dstDataStart;
117 {
118 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Synchronize buffers");
119 srcDataStart = static_cast<const uint8_t*>(srcTensor->Map());
120 dstDataStart = static_cast<uint8_t*>(dstTensor->Map());
121 }
122 if (srcDataStart == nullptr)
123 {
124 throw MemoryValidationException("The source tensor is null.");
125 }
126 if (dstDataStart == nullptr)
127 {
128 throw MemoryValidationException("The destination tensor is null.");
129 }
130
131 size_t copyLength = std::min(srcChannels * srcChannelStride, dstChannels * dstChannelStride);
132 size_t copyWidth = std::min(srcWidth, dstWidth);
133 size_t copyHeight = std::min(srcHeight, dstHeight);
134 size_t copyBatches = std::min(srcBatches, dstBatches);
135 size_t copyDepth = std::min(srcDepth, dstDepth);
136
137 // Edge case fix for DTS 1.In1D_Int32End & 5.In1D_Int32End, This was down to how ACL handled 1D tensors.
138 if(copyLength != srcSize &&
139 srcSize != dstSize &&
140 srcWidthStride == copyLength &&
141 srcWidthStride == dstSize)
142 {
143 srcSize = dstSize;
144 srcWidthStride = dstWidthStride;
145 }
146
147 // Coalesce inner dimensions where possible
148 // to reduce overheard calling copy() and to
149 // allow for memory bandwidth optimisations
150 if (copyLength == srcWidthStride &&
151 copyLength == dstWidthStride)
152 {
153 // There is no special padding between rows,
154 // and sizes are compatible, so copy whole rows
155 copyLength *= copyWidth;
156 copyWidth = 1;
157
158 if (copyLength == srcHeightStride &&
159 copyLength == dstHeightStride)
160 {
161 // There is no special padding between batches
162 // and sizes are compatible so copy whole batches
163 copyLength *= copyHeight;
164 copyHeight = 1;
165 }
166 }
167
168 const unsigned char* srcData = srcDataStart;
169 unsigned char* dstData = dstDataStart;
170 for (unsigned int d = 0; d < copyDepth; ++d)
171 {
172 auto srcPtrDepth = srcData;
173 auto dstPtrDepth = dstData;
174 for (unsigned int b = 0; b < copyBatches; ++b)
175 {
176 auto srcPtrBatch = srcData;
177 auto dstPtrBatch = dstData;
178 for (unsigned int h = 0; h < copyHeight; ++h)
179 {
180 auto srcPtrChannel = srcData;
181 auto dstPtrChannel = dstData;
182 for (unsigned int w = 0; w < copyWidth; ++w)
183 {
184 // Sanity check the memory area we've been asked to copy from and to.
185 if (copyLength > srcSize)
186 {
188 "The source tensor size does not match the size of the allocated tensor.");
189 }
190 if (copyLength > dstSize)
191 {
193 "The destination tensor size will overrun the destination tensor.");
194 }
195 copy(dstData, srcData, copyLength);
196 dstData += dstWidthStride;
197 srcData += srcWidthStride;
198 }
199 dstData += (static_cast<long>(dstHeightStride) - (dstData - dstPtrChannel));
200 srcData += (static_cast<long>(srcHeightStride) - (srcData - srcPtrChannel));
201 }
202 dstData += (static_cast<long>(dstBatchStride) - (dstData - dstPtrBatch));
203 srcData += (static_cast<long>(srcBatchStride) - (srcData - srcPtrBatch));
204 }
205 dstData += (static_cast<long>(dstDepthStride) - (dstData - dstPtrDepth));
206 srcData += (static_cast<long>(srcDepthStride) - (srcData - srcPtrDepth));
207 }
208
209 srcTensor->Unmap();
210 dstTensor->Unmap();
211}
212
213template <typename SrcTensorHandleType, typename DstTensorHandleType, typename DescriptorType>
214void GatherTensorHandlePairs(const DescriptorType& descriptor,
215 std::vector<std::pair<SrcTensorHandleType*, DstTensorHandleType*>>& tensorHandlePairs)
216{
217 const unsigned int numInputs = static_cast<unsigned int>(descriptor.m_Inputs.size());
218 tensorHandlePairs.reserve(numInputs);
219
220 for (unsigned int i = 0; i < numInputs; ++i)
221 {
222 SrcTensorHandleType* const srcTensorHandle =
223 PolymorphicDowncast<SrcTensorHandleType*>(descriptor.m_Inputs[i]);
224 DstTensorHandleType* const dstTensorHandle =
225 PolymorphicDowncast<DstTensorHandleType*>(descriptor.m_Outputs[i]);
226
227 tensorHandlePairs.emplace_back(srcTensorHandle, dstTensorHandle);
228 }
229}
230
231int32_t ConvertMaskToACLFormat(int32_t mask, int32_t numDim);
232
233armnn::ConstTensor PermuteTensor(const ConstTensorHandle* tensor,
234 const PermutationVector& permutationVector,
235 void* permuteBuffer);
236
237void ReshapeWeightsForAcl(TensorInfo& weightInfo, DataLayout dataLayout);
238
239TensorInfo ConvertWeightTensorInfoFromArmnnToAcl(const TensorInfo& weightInfo, DataLayout dataLayout);
240
241/// Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M]
242/// This function coverts a TensorInfo from [1,H,W,I*M] to [1,I*M,H,W] (if NCHW) or keeps it at [1,H,W,I*M] (if NHWC)
243/// as required by the compute library
244/// Returns a tuple of converted weights tensor info and depth multiplier
245std::tuple<TensorInfo, unsigned int> Convert1HWOTensorInfoToAcl(const TensorInfo& weightInfo,
246 const TensorInfo& inputInfo,
247 const DataLayout dataLayout);
248
249armnn::ConstTensor ConvertWeightTensorFromArmnnToAcl(const ConstTensorHandle* weightTensor,
250 DataLayout dataLayout,
251 void* permuteBuffer);
252
253/// Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M]
254/// This function coverts a ConstCpuTensorHandle from [1,H,W,I*M] to [1,I*M,H,W] (if NCHW) or
255/// keeps it at [1,H,W,I*M] (if NHWC) as required by the compute library
256///
257/// \param weightTensor - ConstTensorHandle of weights tensor
258/// \param inputInfo - TensorInfo of input tensor
259/// \param dataLayout - DataLayout of the input tensor
260/// \param permuteBuffer - Pointer to memory with the size of tensor. Used for the permutation
261/// \return tuple of transformed weights-ConstTensor and depthwise multiplier
262std::tuple<ConstTensor, unsigned int> Convert1HWOTensorToAcl(const ConstTensorHandle* weightTensor,
263 const TensorInfo& inputInfo,
264 const DataLayout dataLayout,
265 void* permuteBuffer);
266
267/// Converts a (weights) tensor from [1, H, W, I*M] = [1, H, W, O] to [M, I, H, W]
268///
269/// \param weightTensor - ConstTensorHandle of the weight tensor that should be converted
270/// \param inputInfo - TensorInfo of the corresponding input tensor
271/// \param dataLayout - DataLayout of the input tensor e.g. NHWC or NCHW
272/// \param permuteBuffer - Memory location with the same size as the weight tensor to write converted data to
273/// \return - A tuple of ConstTensor and unsigned int which is the converted weightTensor and the depthMultiplier
274std::tuple<ConstTensor, unsigned int> Convert1HWOtoMIHW(const ConstTensorHandle* weightTensor,
275 const TensorInfo& inputInfo,
276 const DataLayout& dataLayout,
277 void* permuteBuffer);
278
279/// Calculates the key index values needed for GatherNd: N, ND, K, W, C (N is always 1)
280///
281/// \param inputInfo0 - TensorInfo of the corresponding input tensor: params
282/// \param inputInfo1 - TensorInfo of the corresponding input tensor: indices
283/// \return - A map with names and values for N, ND, K, W, C
284std::map<std::string, unsigned int> CalculateGatherNdKeyIndices(TensorInfo inputInfo0, TensorInfo inputInfo1);
285
286/// Generates a permutation vector of size rank that permutes the 2 most right dimensions
287///
288/// \param rank - Tensor rank, i.e. number of dimensions in the tensors
289/// \return - A permutation vector that permutes the 2 last dimensions
291
292/// Calculates the axis values for split operation.
293///
294/// \param desc - Splitter Descriptor
295/// \param input - Input tensor shape
296/// \return - A set containing axis values of slitter operation
297 std::set<unsigned int> ComputeSplitAxis(const armnn::SplitterDescriptor& desc, const TensorShape& input);
298
299} //namespace armnn
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition Tensor.hpp:330
virtual const void * Map(bool blocking=true) const =0
Map the tensor data for access.
virtual TensorShape GetStrides() const =0
Get the strides for each dimension ordered from largest to smallest where the smallest value is the s...
virtual void Unmap() const =0
Unmap the tensor data.
virtual TensorShape GetShape() const =0
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
unsigned int GetNumDimensions() const
Function that returns the tensor rank.
Definition Tensor.cpp:174
Copyright (c) 2021 ARM Limited and Contributors.
armnn::PermutationVector GeneratePermutationVectorOnLastTwoDimensions(unsigned int rank)
Generates a permutation vector of size rank that permutes the 2 most right dimensions.
TensorInfo ConvertWeightTensorInfoFromArmnnToAcl(const TensorInfo &weightInfo, DataLayout dataLayout)
std::set< unsigned int > ComputeSplitAxis(const armnn::SplitterDescriptor &desc, const TensorShape &input)
Calculates the axis values for split operation.
void ReshapeWeightsForAcl(TensorInfo &weightInfo, DataLayout dataLayout)
ViewsDescriptor SplitterDescriptor
armnn::ConstTensor ConvertWeightTensorFromArmnnToAcl(const ConstTensorHandle *weightTensor, DataLayout dataLayout, void *permuteBuffer)
void GatherTensorHandlePairs(const DescriptorType &descriptor, std::vector< std::pair< SrcTensorHandleType *, DstTensorHandleType * > > &tensorHandlePairs)
std::tuple< ConstTensor, unsigned int > Convert1HWOtoMIHW(const ConstTensorHandle *weightTensor, const TensorInfo &inputInfo, const DataLayout &dataLayout, void *permuteBuffer)
Converts a (weights) tensor from [1, H, W, I*M] = [1, H, W, O] to [M, I, H, W].
DestType PolymorphicDowncast(SourceType *value)
Polymorphic downcast for build in pointers only.
armnn::ConstTensor PermuteTensor(const ConstTensorHandle *tensor, const PermutationVector &permutationVector, void *permuteBuffer)
constexpr unsigned int MaxNumOfTensorDimensions
Definition Types.hpp:31
std::map< std::string, unsigned int > CalculateGatherNdKeyIndices(TensorInfo inputInfo0, TensorInfo inputInfo1)
Calculates the key index values needed for GatherNd: N, ND, K, W, C (N is always 1)
std::tuple< TensorInfo, unsigned int > Convert1HWOTensorInfoToAcl(const TensorInfo &weightInfo, const TensorInfo &inputInfo, const DataLayout dataLayout)
Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M] This function coverts a TensorInfo...
DataLayout
Definition Types.hpp:63
int32_t ConvertMaskToACLFormat(int32_t mask, int32_t numDim)
std::tuple< ConstTensor, unsigned int > Convert1HWOTensorToAcl(const ConstTensorHandle *weightTensor, const TensorInfo &inputInfo, const DataLayout dataLayout, void *permuteBuffer)
Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M] This function coverts a ConstCpuTe...
void CopyTensorContentsGeneric(const ITensorHandle *srcTensor, ITensorHandle *dstTensor, CopyFunc copy)