ArmNN
 24.08
GpuFsaBatchMatMul.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2024 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "GpuFsaBatchMatMul.hpp"
7 #include "UtilsGpuFsa.hpp"
8 
10 
11 #include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h>
12 #include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h>
13 #include <arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMatMul.h>
14 #include <arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h>
15 
16 using namespace arm_compute::experimental::dynamic_fusion;
17 using namespace armnn::armcomputetensorutils;
18 
19 namespace armnn
20 {
21 
23  const TensorInfo& input1,
24  const BatchMatMulDescriptor& descriptor)
25 {
26  // Create a new workload sketch, for validation purposes
27  auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context();
28  auto workloadContext = GpuWorkloadContext(&compileCtx);
29  GpuWorkloadSketch sketch{ &workloadContext };
30 
31  arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0, input0.GetNumDimensions());
32  arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1, input1.GetNumDimensions());
33 
34  aclInput0Info.set_are_values_constant(input0.IsConstant());
35  aclInput1Info.set_are_values_constant(input1.IsConstant());
36 
37  arm_compute::ITensorInfo* inputInfo0 = workloadContext.create_tensor_info(aclInput0Info);
38  arm_compute::ITensorInfo* inputInfo1 = workloadContext.create_tensor_info(aclInput1Info);
39 
40  MatMulAttributes matMulAttributes{};
41  matMulAttributes.adj_lhs(descriptor.m_TransposeX);
42  matMulAttributes.adj_rhs(descriptor.m_TransposeY);
43  GpuMatMulSettings matmulSettings{};
44  matmulSettings.m0(1);
45  matmulSettings.n0(1);
46  matmulSettings.k0(1);
47 
48  return GpuMatMul::validate_op(sketch, inputInfo0, inputInfo1, matMulAttributes, matmulSettings);
49 }
50 
52  const TensorInfo& input0,
53  const TensorInfo& input1,
54  const BatchMatMulDescriptor& descriptor)
55 {
56  GpuWorkloadSketch* sketch = blob->sketch.get();
57  GpuWorkloadContext* workloadContext = blob->workloadContext.get();
58  std::vector<arm_compute::ITensorInfo*> inputTensorInfos = {};
59  std::vector<arm_compute::ITensorInfo*> outputTensorInfos = {};
60 
61  arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0, input0.GetNumDimensions());
62  arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1, input1.GetNumDimensions());
63 
64  aclInput0Info.set_are_values_constant(input0.IsConstant());
65  aclInput1Info.set_are_values_constant(input1.IsConstant());
66 
67  inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInput0Info));
68  inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInput1Info));
69 
70  MatMulAttributes matMulAttributes{};
71  matMulAttributes.adj_lhs(descriptor.m_TransposeX);
72  matMulAttributes.adj_rhs(descriptor.m_TransposeY);
73  GpuMatMulSettings matmulSettings{};
74  matmulSettings.m0(1);
75  matmulSettings.n0(1);
76  matmulSettings.k0(1);
77 
78  // Validate operator, check status and update reasonIfUnsupported
79  arm_compute::Status aclStatus = GpuMatMul::validate_op(*sketch,
80  inputTensorInfos[0],
81  inputTensorInfos[1],
82  matMulAttributes,
83  matmulSettings);
84 
85  const bool supported = aclStatus.error_code() == arm_compute::ErrorCode::OK;
86  if (!supported)
87  {
88  throw BackendCapabilityException("\"GpuFsa\" backend failed during elementwise binary add validation");
89  }
90 
91  arm_compute::ITensorInfo* addOutputInfo = GpuMatMul::create_op(*sketch,
92  inputTensorInfos[0],
93  inputTensorInfos[1],
94  matMulAttributes,
95  matmulSettings);
96 
97  // Temporary fix until fusing attempt is make for GpuFsa backend and Output layer workload is created.
98  outputTensorInfos.emplace_back(workloadContext->create_tensor_info());
99  GpuOutput::create_op(*sketch, addOutputInfo, outputTensorInfos[0]);
100 
101  // Store the TensorInfos within the blob as unique_ptrs to be used later
102  blob->inputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos);
103  blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos);
104 }
105 
106 } // namespace armnn
armnn::BatchMatMulDescriptor::m_TransposeX
bool m_TransposeX
Transpose the slices of each input tensor Transpose and Adjoint can not both be set to true for the s...
Definition: Descriptors.hpp:1612
armnn::GpuFsaBatchMatMulCreateOp
void GpuFsaBatchMatMulCreateOp(GpuFsaPreCompiledBlob *blob, const TensorInfo &input0, const TensorInfo &input1, const BatchMatMulDescriptor &descriptor)
Definition: GpuFsaBatchMatMul.cpp:51
armnn::TensorInfo
Definition: Tensor.hpp:152
armnn::TensorInfo::GetNumDimensions
unsigned int GetNumDimensions() const
Definition: Tensor.hpp:197
armnn::TensorInfo::IsConstant
bool IsConstant() const
Definition: Tensor.cpp:513
armnn::GpuFsaPreCompiledBlob::inputTensorInfos
std::unique_ptr< std::vector< arm_compute::ITensorInfo * > > inputTensorInfos
Definition: GpuFsaBackend.hpp:37
armnn::BatchMatMulDescriptor::m_TransposeY
bool m_TransposeY
Definition: Descriptors.hpp:1613
armnn::GpuFsaPreCompiledBlob::sketch
std::unique_ptr< arm_compute::experimental::dynamic_fusion::GpuWorkloadSketch > sketch
Definition: GpuFsaBackend.hpp:34
armnn::BatchMatMulDescriptor
A BatchMatMulDescriptor for the BatchMatMul operator.
Definition: Descriptors.hpp:1584
armnn::Status
Status
Definition: Types.hpp:42
armnn::BackendCapabilityException
Definition: Exceptions.hpp:152
armnn
Copyright (c) 2021 ARM Limited and Contributors.
Definition: 01_00_quick_start.dox:6
ArmComputeTensorUtils.hpp
GpuFsaBatchMatMul.hpp
armnn::GpuFsaPreCompiledBlob::workloadContext
std::shared_ptr< arm_compute::experimental::dynamic_fusion::GpuWorkloadContext > workloadContext
Definition: GpuFsaBackend.hpp:35
armnn::GpuFsaPreCompiledBlob
A structure which contains all the elements needed to execute a fused workload in the GpuFsa Backend.
Definition: GpuFsaBackend.hpp:32
UtilsGpuFsa.hpp
armnn::GpuFsaPreCompiledBlob::outputTensorInfos
std::unique_ptr< std::vector< arm_compute::ITensorInfo * > > outputTensorInfos
Definition: GpuFsaBackend.hpp:38
armnn::GpuFsaBatchMatMulValidate
arm_compute::Status GpuFsaBatchMatMulValidate(const TensorInfo &input0, const TensorInfo &input1, const BatchMatMulDescriptor &descriptor)
Definition: GpuFsaBatchMatMul.cpp:22