ArmNN
 24.08
GpuFsaDepthwiseConvolution2d.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2024 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
7 #include "UtilsGpuFsa.hpp"
8 
10 
12 
13 #include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h>
14 #include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h>
15 #include <arm_compute/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.h>
16 #include <arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h>
17 
18 #include <vector>
19 
20 using namespace arm_compute::experimental::dynamic_fusion;
21 using namespace armnn::armcomputetensorutils;
22 
23 namespace armnn
24 {
25 
27  const DepthwiseConvolution2dDescriptor& descriptor,
28  const TensorInfo& weights,
29  const Optional<TensorInfo>& biases)
30 {
31  // Create a new workload sketch, for validation purposes
32  auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context();
33  auto workloadContext = GpuWorkloadContext(&compileCtx);
34  GpuWorkloadSketch sketch{ &workloadContext };
35 
36  // Build and create tensor infos using the sketch
37  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
38 
39  // ArmNN format for weights for depthwise is [1, H, W, C] independently of the input/output layout
40  //
41  // ACL format for weights for depthwise is:
42  // - [1, H, W, C] for [N, H, W, C] input/output layout (matches with ArmNN)
43  // - [1, C, H, W] for [N, C, H, W] input/output layout
44  //
45  // Therefore ArmNN weights have to be permuted when input/output layout is [N, C, H, W] to pass them to ACL.
46  // The PermuteDepthwiseConv2dWeights backend optimization takes care of this, but it has not been performed yet,
47  // so we do the permute here for the TensorInfo weights.
48  unsigned int aclDepthMultiplier;
49  TensorInfo weightsPermuted;
50  std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input,descriptor.m_DataLayout);
51  auto weightsShape = weightsPermuted.GetShape();
52  weightsPermuted.SetShape({weightsShape[1], weightsShape[2], weightsShape[3]});
53 
54  arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
55  aclWeightsInfo.set_are_values_constant(weights.IsConstant());
56 
57  auto inputInfo = workloadContext.create_tensor_info(aclInputInfo);
58  auto weightInfo = workloadContext.create_tensor_info(aclWeightsInfo);
59 
60  // Only create the bias tensor info if enabled, otherwise pass nullptr to validate_op
61  arm_compute::TensorInfo aclBiasInfo;
62  arm_compute::ITensorInfo* biasSketchInfoPtr = nullptr;
63 
64  if (descriptor.m_BiasEnabled)
65  {
66  if(!biases.has_value())
67  {
69  "GpuFsaDepthwiseConvolution2dValidate: No biases set when biases are enabled");
70  }
71  aclBiasInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
72  aclBiasInfo.set_are_values_constant(biases.value().IsConstant());
73 
74  biasSketchInfoPtr = workloadContext.create_tensor_info(aclBiasInfo);
75  }
76 
77  DepthwiseConv2dAttributes depthwiseConv2dAttributes = CreateDWConv2dAttributes(descriptor, aclDepthMultiplier);
78 
79  // Validate operator, check status and update reasonIfUnsupported
80  arm_compute::Status aclStatus = GpuDepthwiseConv2d::validate_op(sketch,
81  inputInfo,
82  weightInfo,
83  biasSketchInfoPtr,
84  depthwiseConv2dAttributes);
85 
86  return aclStatus;
87 }
88 
90  const TensorInfo& input,
91  const DepthwiseConvolution2dDescriptor& descriptor,
92  const TensorInfo& weights,
93  const Optional<TensorInfo>& biases)
94 {
95 /*
96 * Creating an Op for the GpuFsa backend requires us to create and maintain quite a bit of data, which is then stored
97 * in a GpuFsaPreCompiledBlob for execution later. Specifically we need:
98 * GpuWorkloadContext, this contains the TensorInfos and is unique to the Graph being executed
99 * Sketch, this is similar to a subgraph and can contain one or more operations. Multiple ops can be "fused" together
100 * using a single sketch.
101 * The inputTensorinfos / outputTensorInfos, these are pointers to the TensorInfos used when creating the sketch.
102 * They refer to the TensorInfos stored within the GpuWorkloadContext and are needed when executing the sketch
103 * as the TensorInfos used when creating the Tensors must match those used to create the Sketch. Otherwise the runtime
104 * doesn't know which Tensors to use.
105 */
106  GpuWorkloadSketch* sketch = blob->sketch.get();
107  GpuWorkloadContext* workloadContext = blob->workloadContext.get();
108  std::vector<arm_compute::ITensorInfo*> inputTensorInfos = {};
109  std::vector<arm_compute::ITensorInfo*> outputTensorInfos = {};
110 
111  // Build and create tensor infos using the sketch
112  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
113 
114  // ArmNN format for weights for depthwise is [1, H, W, C] independently of the input/output layout
115  //
116  // ACL format for weights for depthwise is:
117  // - [1, H, W, C] for [N, H, W, C] input/output layout (matches with ArmNN)
118  // - [1, C, H, W] for [N, C, H, W] input/output layout
119  //
120  // Therefore ArmNN weights have to be permuted when input/output layout is [N, C, H, W] to pass them to ACL.
121  // The PermuteDepthwiseConv2dWeights backend optimization takes care of this, but it has not been performed yet,
122  // so we do the permute here for the TensorInfo weights.
123  unsigned int aclDepthMultiplier;
124  TensorInfo weightsPermuted;
125  std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input,descriptor.m_DataLayout);
126  auto weightsShape = weightsPermuted.GetShape();
127  weightsPermuted.SetShape({weightsShape[1], weightsShape[2], weightsShape[3]});
128 
129  arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
130  aclWeightsInfo.set_are_values_constant(weights.IsConstant());
131 
132  inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInputInfo));
133  inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclWeightsInfo));
134 
135  // Only create the bias tensor info if enabled, otherwise pass nullptr to validate_op
136  arm_compute::TensorInfo aclBiasInfo;
137  arm_compute::ITensorInfo* biasSketchInfoPtr = nullptr;
138 
139  if (descriptor.m_BiasEnabled)
140  {
141  if(!biases.has_value())
142  {
143  throw InvalidArgumentException("GpuFsaConvolution2dValidate: No biases set when biases are enabled");
144  }
145  aclBiasInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
146  aclBiasInfo.set_are_values_constant(biases.value().IsConstant());
147 
148  inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclBiasInfo));
149  biasSketchInfoPtr = inputTensorInfos[2];
150  }
151 
152  DepthwiseConv2dAttributes depthwiseConv2dAttributes = CreateDWConv2dAttributes(descriptor, aclDepthMultiplier);
153 
154  // Validate operator, check status and update reasonIfUnsupported
155  arm_compute::Status aclStatus = GpuDepthwiseConv2d::validate_op(*sketch,
156  inputTensorInfos[0],
157  inputTensorInfos[1],
158  biasSketchInfoPtr,
159  depthwiseConv2dAttributes);
160 
161  const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK);
162  if (!supported)
163  {
165  "\"GpuFsa\" backend failed during DepthwiseConvolution2D operation validation");
166  }
167 
168  // Create the Op within the Sketch using the TensorInfos we have stored
169  arm_compute::ITensorInfo* convOutInfo = GpuDepthwiseConv2d::create_op(*sketch,
170  inputTensorInfos[0],
171  inputTensorInfos[1],
172  biasSketchInfoPtr,
173  depthwiseConv2dAttributes);
174 
175  outputTensorInfos.emplace_back(workloadContext->create_tensor_info());
176  GpuOutput::create_op(*sketch, convOutInfo, outputTensorInfos[0]);
177 
178  // Store the TensorInfos within the blob as unique_ptrs to be used later
179  blob->inputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos);
180  blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos);
181 }
182 
183 } // namespace armnn
armnn::Convert1HWOTensorInfoToAcl
std::tuple< TensorInfo, unsigned int > Convert1HWOTensorInfoToAcl(const TensorInfo &weightInfo, const TensorInfo &inputInfo, const DataLayout dataLayout)
Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M] This function coverts a TensorInfo...
Definition: WorkloadUtils.cpp:177
armnn::Optional
Definition: Optional.hpp:270
armnn::DepthwiseConvolution2dDescriptor::m_BiasEnabled
bool m_BiasEnabled
Enable/disable bias.
Definition: Descriptors.hpp:708
WorkloadUtils.hpp
armnn::DepthwiseConvolution2dDescriptor::m_DataLayout
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
Definition: Descriptors.hpp:710
armnn::TensorInfo
Definition: Tensor.hpp:152
armnn::TensorInfo::IsConstant
bool IsConstant() const
Definition: Tensor.cpp:513
armnn::GpuFsaPreCompiledBlob::inputTensorInfos
std::unique_ptr< std::vector< arm_compute::ITensorInfo * > > inputTensorInfos
Definition: GpuFsaBackend.hpp:37
CreateDWConv2dAttributes
arm_compute::experimental::dynamic_fusion::DepthwiseConv2dAttributes CreateDWConv2dAttributes(const DepthwiseConvolution2dDescriptor &descriptor, const unsigned int aclDepthMultiplier)
Utility function used to setup an arm_compute::DepthwiseConv2dAttributes object from given descriptor...
Definition: UtilsGpuFsa.cpp:29
armnn::GpuFsaPreCompiledBlob::sketch
std::unique_ptr< arm_compute::experimental::dynamic_fusion::GpuWorkloadSketch > sketch
Definition: GpuFsaBackend.hpp:34
armnn::InvalidArgumentException
Definition: Exceptions.hpp:80
armnn::GpuFsaDepthwiseConvolution2dCreateOp
void GpuFsaDepthwiseConvolution2dCreateOp(GpuFsaPreCompiledBlob *blob, const TensorInfo &input, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases)
Definition: GpuFsaDepthwiseConvolution2d.cpp:89
GpuFsaDepthwiseConvolution2d.hpp
armnn::Status
Status
Definition: Types.hpp:42
armnn::TensorInfo::GetShape
const TensorShape & GetShape() const
Definition: Tensor.hpp:193
armnn::BackendCapabilityException
Definition: Exceptions.hpp:152
armnn::TensorInfo::SetShape
void SetShape(const TensorShape &newShape)
Definition: Tensor.hpp:195
armnn
Copyright (c) 2021 ARM Limited and Contributors.
Definition: 01_00_quick_start.dox:6
ArmComputeTensorUtils.hpp
armnn::GpuFsaPreCompiledBlob::workloadContext
std::shared_ptr< arm_compute::experimental::dynamic_fusion::GpuWorkloadContext > workloadContext
Definition: GpuFsaBackend.hpp:35
armnn::GpuFsaPreCompiledBlob
A structure which contains all the elements needed to execute a fused workload in the GpuFsa Backend.
Definition: GpuFsaBackend.hpp:32
UtilsGpuFsa.hpp
armnn::GpuFsaDepthwiseConvolution2dValidate
arm_compute::Status GpuFsaDepthwiseConvolution2dValidate(const TensorInfo &input, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases)
Definition: GpuFsaDepthwiseConvolution2d.cpp:26
armnn::DepthwiseConvolution2dDescriptor
A DepthwiseConvolution2dDescriptor for the DepthwiseConvolution2dLayer.
Definition: Descriptors.hpp:659
armnn::GpuFsaPreCompiledBlob::outputTensorInfos
std::unique_ptr< std::vector< arm_compute::ITensorInfo * > > outputTensorInfos
Definition: GpuFsaBackend.hpp:38
armnn::OptionalReferenceSwitch< std::is_reference< T >::value, T >::value
const T & value() const
Definition: Optional.hpp:146
armnn::OptionalBase::has_value
bool has_value() const noexcept
Definition: Optional.hpp:53