ArmNN
 25.11
Loading...
Searching...
No Matches
NeonBatchMatMulWorkload.cpp
Go to the documentation of this file.
1//
2// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
7
9
12
14
15#include <arm_compute/function_info/MatMulInfo.h>
16
17namespace armnn
18{
19arm_compute::Status NeonBatchMatMulValidate(const TensorInfo& inputInfoX,
20 const TensorInfo& inputInfoY,
21 const TensorInfo& outputInfo,
22 const BatchMatMulDescriptor& descriptor,
23 const bool isFastMathEnabled,
24 const ActivationDescriptor* activationDescriptor)
25{
26 if (descriptor.m_AdjointX || descriptor.m_AdjointY )
27 {
28 throw Exception("Support for adjoint not implemented.");
29 }
31 {
32 throw Exception("Only supported the MatMul in the last 2 dimensions");
33 }
34
35 arm_compute::TensorInfo aclInputInfoX = armcomputetensorutils::BuildArmComputeTensorInfo(inputInfoX);
36 arm_compute::TensorInfo aclInputInfoY = armcomputetensorutils::BuildArmComputeTensorInfo(inputInfoY);
37 arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(outputInfo);
38
39 // GeMM dispatches kernel handles dynamic inputs differently to static so this flag needs to be set
40 aclInputInfoX.set_are_values_constant(false);
41 aclInputInfoY.set_are_values_constant(false);
42
43 const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
44 activationDescriptor);
45
46 arm_compute::MatMulInfo matMulInfo;
47 matMulInfo.adj_lhs(descriptor.m_TransposeX);
48 matMulInfo.adj_rhs(descriptor.m_TransposeY);
49
50 arm_compute::CpuMatMulSettings settings;
51 settings.fast_math(isFastMathEnabled);
52
53 return arm_compute::NEMatMul::validate(&aclInputInfoX, &aclInputInfoY, &aclOutputInfo, matMulInfo, settings,
54 activationInfo);
55}
56
58 const WorkloadInfo& info,
59 const bool isFastMathEnabled)
61{
62 if (descriptor.m_Parameters.m_AdjointX || descriptor.m_Parameters.m_AdjointY )
63 {
64 throw Exception("Support for adjoint not implemented.");
65 }
68 {
69 throw Exception("Only supported the MatMul in the last 2 dimensions");
70 }
71
72 m_Data.ValidateInputsOutputs("NeonBatchMatMulWorkload", 2, 1);
73
74 arm_compute::ITensor& inputX = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
75 arm_compute::ITensor& inputY = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
76 arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
77
78 // GeMM dispatches kernel handles dynamic inputs differently to static so this flag needs to be set
79 inputX.info()->set_are_values_constant(false);
80 inputY.info()->set_are_values_constant(false);
81
82 const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
83
84 arm_compute::MatMulInfo matMulInfo;
85 matMulInfo.adj_lhs(descriptor.m_Parameters.m_TransposeX);
86 matMulInfo.adj_rhs(descriptor.m_Parameters.m_TransposeY);
87
88 arm_compute::CpuMatMulSettings settings;
89 settings.fast_math(isFastMathEnabled);
90
91 m_MatMulLayer.configure(&inputX, &inputY, &output, matMulInfo, settings, activationInfo);
92
93 // Report Profiling Details
94 WorkloadInfo detailsInfo;
95 detailsInfo.m_InputTensorInfos = info.m_InputTensorInfos;
96 detailsInfo.m_OutputTensorInfos = info.m_OutputTensorInfos;
97 ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonBatchMatMulWorkload_Construct",
98 descriptor.m_Parameters,
99 detailsInfo,
100 GetGuid());
101}
102
104{
105 ARMNN_SCOPED_PROFILING_EVENT_NEON_NAME_GUID("NeonBatchMatMulWorkload_Execute");
106 m_MatMulLayer.run();
107}
108} //namespace armnn
#define ARMNN_SCOPED_PROFILING_EVENT_NEON_NAME_GUID(label)
Creates a profiling event that uses GetGuid() and GetName() from the calling class.
#define ARMNN_REPORT_PROFILING_WORKLOAD_DESC(name, desc, infos, guid)
arm::pipe::ProfilingGuid GetGuid() const final
Definition Workload.hpp:52
Base class for all ArmNN exceptions so that users can filter to just those.
NeonBaseWorkload(const BatchMatMulQueueDescriptor &descriptor, const WorkloadInfo &info)
NeonBatchMatMulWorkload(const BatchMatMulQueueDescriptor &descriptor, const WorkloadInfo &info, const bool isFastMathEnabled)
virtual void Execute() const override
Copyright (c) 2021 ARM Limited and Contributors.
arm_compute::Status NeonBatchMatMulValidate(const TensorInfo &inputInfoX, const TensorInfo &inputInfoY, const TensorInfo &outputInfo, const BatchMatMulDescriptor &descriptor, const bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
arm_compute::ActivationLayerInfo ConvertAdditionalInfoToAclActivationLayerInfo(const QueueDescriptor &queueDescriptor)
DestType PolymorphicDowncast(SourceType *value)
Polymorphic downcast for build in pointers only.
arm_compute::ActivationLayerInfo ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor &actDesc)
An ActivationDescriptor for the ActivationLayer.
A BatchMatMulDescriptor for the BatchMatMul operator.
bool m_AdjointX
Adjoint the slices of each input tensor Transpose and Adjoint can not both be set to true for the sam...
bool m_TransposeX
Transpose the slices of each input tensor Transpose and Adjoint can not both be set to true for the s...
DataLayout m_DataLayoutX
Data layout of each input tensor, such as NHWC/NDHWC (leave as default for arbitrary layout)
Contains information about TensorInfos of a layer.
std::vector< TensorInfo > m_InputTensorInfos