ArmNN
 25.02
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
NeonBatchMatMulWorkload.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
7 
8 #include "NeonWorkloadUtils.hpp"
9 
12 
14 
15 #include <arm_compute/function_info/MatMulInfo.h>
16 
17 namespace armnn
18 {
20  const TensorInfo& inputInfoY,
21  const TensorInfo& outputInfo,
22  const BatchMatMulDescriptor& descriptor,
23  const bool isFastMathEnabled,
24  const ActivationDescriptor* activationDescriptor)
25 {
26  if (descriptor.m_AdjointX || descriptor.m_AdjointY )
27  {
28  throw Exception("Support for adjoint not implemented.");
29  }
31  {
32  throw Exception("Only supported the MatMul in the last 2 dimensions");
33  }
34 
35  arm_compute::TensorInfo aclInputInfoX = armcomputetensorutils::BuildArmComputeTensorInfo(inputInfoX);
36  arm_compute::TensorInfo aclInputInfoY = armcomputetensorutils::BuildArmComputeTensorInfo(inputInfoY);
37  arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(outputInfo);
38 
39  // GeMM dispatches kernel handles dynamic inputs differently to static so this flag needs to be set
40  aclInputInfoX.set_are_values_constant(false);
41  aclInputInfoY.set_are_values_constant(false);
42 
43  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
44  activationDescriptor);
45 
46  arm_compute::MatMulInfo matMulInfo;
47  matMulInfo.adj_lhs(descriptor.m_TransposeX);
48  matMulInfo.adj_rhs(descriptor.m_TransposeY);
49 
50  arm_compute::CpuMatMulSettings settings;
51  settings.fast_math(isFastMathEnabled);
52 
53  return arm_compute::NEMatMul::validate(&aclInputInfoX, &aclInputInfoY, &aclOutputInfo, matMulInfo, settings,
54  activationInfo);
55 }
56 
58  const WorkloadInfo& info,
59  const bool isFastMathEnabled)
61 {
62  if (descriptor.m_Parameters.m_AdjointX || descriptor.m_Parameters.m_AdjointY )
63  {
64  throw Exception("Support for adjoint not implemented.");
65  }
68  {
69  throw Exception("Only supported the MatMul in the last 2 dimensions");
70  }
71 
72  m_Data.ValidateInputsOutputs("NeonBatchMatMulWorkload", 2, 1);
73 
74  arm_compute::ITensor& inputX = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
75  arm_compute::ITensor& inputY = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
76  arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
77 
78  // GeMM dispatches kernel handles dynamic inputs differently to static so this flag needs to be set
79  inputX.info()->set_are_values_constant(false);
80  inputY.info()->set_are_values_constant(false);
81 
82  const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
83 
84  arm_compute::MatMulInfo matMulInfo;
85  matMulInfo.adj_lhs(descriptor.m_Parameters.m_TransposeX);
86  matMulInfo.adj_rhs(descriptor.m_Parameters.m_TransposeY);
87 
88  arm_compute::CpuMatMulSettings settings;
89  settings.fast_math(isFastMathEnabled);
90 
91  m_MatMulLayer.configure(&inputX, &inputY, &output, matMulInfo, settings, activationInfo);
92 
93  // Report Profiling Details
94  WorkloadInfo detailsInfo;
95  detailsInfo.m_InputTensorInfos = info.m_InputTensorInfos;
96  detailsInfo.m_OutputTensorInfos = info.m_OutputTensorInfos;
97  ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonBatchMatMulWorkload_Construct",
98  descriptor.m_Parameters,
99  detailsInfo,
100  GetGuid());
101 }
102 
104 {
105  ARMNN_SCOPED_PROFILING_EVENT_NEON_NAME_GUID("NeonBatchMatMulWorkload_Execute");
106  m_MatMulLayer.run();
107 }
108 } //namespace armnn
#define ARMNN_SCOPED_PROFILING_EVENT_NEON_NAME_GUID(label)
Creates a profiling event that uses GetGuid() and GetName() from the calling class.
#define ARMNN_REPORT_PROFILING_WORKLOAD_DESC(name, desc, infos, guid)
Definition: Profiling.hpp:227
arm::pipe::ProfilingGuid GetGuid() const final
Definition: Workload.hpp:52
QueueDescriptor m_Data
Definition: Workload.hpp:74
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:47
NeonBatchMatMulWorkload(const BatchMatMulQueueDescriptor &descriptor, const WorkloadInfo &info, const bool isFastMathEnabled)
virtual void Execute() const override
Copyright (c) 2021 ARM Limited and Contributors.
arm_compute::Status NeonBatchMatMulValidate(const TensorInfo &inputInfoX, const TensorInfo &inputInfoY, const TensorInfo &outputInfo, const BatchMatMulDescriptor &descriptor, const bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
Status
enumeration
Definition: Types.hpp:43
arm_compute::ActivationLayerInfo ConvertAdditionalInfoToAclActivationLayerInfo(const QueueDescriptor &queueDescriptor)
arm_compute::ActivationLayerInfo ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor &actDesc)
An ActivationDescriptor for the ActivationLayer.
Definition: Descriptors.hpp:37
A BatchMatMulDescriptor for the BatchMatMul operator.
bool m_AdjointX
Adjoint the slices of each input tensor Transpose and Adjoint can not both be set to true for the sam...
bool m_TransposeX
Transpose the slices of each input tensor Transpose and Adjoint can not both be set to true for the s...
DataLayout m_DataLayoutX
Data layout of each input tensor, such as NHWC/NDHWC (leave as default for arbitrary layout)
std::vector< ITensorHandle * > m_Inputs
std::vector< ITensorHandle * > m_Outputs
void ValidateInputsOutputs(const std::string &descName, unsigned int numExpectedIn, unsigned int numExpectedOut) const
Contains information about TensorInfos of a layer.
std::vector< TensorInfo > m_OutputTensorInfos
std::vector< TensorInfo > m_InputTensorInfos