Compute Library
 23.11
GEMMMatrixMultiplyNative.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2023 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 #include "arm_compute/core/Types.h"
30 #include "tests/CL/CLAccessor.h"
31 #include "tests/CL/Helper.h"
33 #include "tests/datasets/ShapeDatasets.h"
35 #include "tests/framework/Macros.h"
38 #include "tests/validation/fixtures/GEMMFixture.h"
39 
40 namespace arm_compute
41 {
42 namespace test
43 {
44 namespace validation
45 {
47 using namespace arm_compute::opencl::kernels;
48 
49 // Create function for ClGemmMatrixMultiplyNativeKernel
51 
52 // Fixture for CLGEMMMatrixMultiplyNative
53 template <typename T>
54 using CLGEMMMatrixMultiplyNativeFixture = GEMMMatrixMultiplyNativeValidationFixture<CLTensor, CLAccessor, T, CLGEMMMatrixMultiplyNative>;
55 
56 // Fixture for CLGEMMMatrixMultiplyNative3D
57 template <typename T>
58 using CLGEMMMatrixMultiplyNative3DFixture = GEMMMatrixMultiplyNative3DValidationFixture<CLTensor, CLAccessor, T, CLGEMMMatrixMultiplyNative>;
59 
60 namespace
61 {
62 // *INDENT-OFF*
63 // clang-format off
64 RelativeTolerance<float> rel_tolerance_f32(0.001f);
65 constexpr float abs_tolerance_f32(0.0001f);
66 
67 /** Alpha values to test - Precommit */
68 const auto a_values = framework::dataset::make("alpha", {1.0f, -0.75f} );
69 
70 /** Beta values to test - Precommit */
71 const auto beta_values = framework::dataset::make("beta", {-0.75f, 0.0f} );
72 
73 /** M values to test */
74 const auto m_values = framework::dataset::make("M", 37);
75 
76 /** M_W values to test */
77 const auto m_w_values = framework::dataset::make("M_W", 5);
78 
79 /** M_H values to test */
80 const auto m_h_values = framework::dataset::make("M_H", 7);
81 
82 /** N values to test */
83 const auto n_values = framework::dataset::make("N", 51);
84 
85 /** K values to test */
86 const auto k_values = framework::dataset::make("K", 23);
87 
88 /** Batch size values to test */
89 const auto b_values = framework::dataset::make("batch_size", 1, 3);
90 
91 /** Activation values to test */
92 const auto act_values = framework::dataset::make("Activation",
93 {
94  ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 8.f, 2.f),
95  ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ELU),
96 });
97 
98 /** M0 values to test - Precommit */
99 const auto m0_values_precommit = framework::dataset::make("M0", { 4, 6 });
100 
101 /** N0 values to test - Precommit */
102 const auto n0_values_precommit = framework::dataset::make("N0", { 4 });
103 
104 /** K0 values to test - Precommit */
105 const auto k0_values_precommit = framework::dataset::make("K0", { 4 });
106 
107 /** H0 values to test - Precommit */
108 const auto h0_values_precommit = framework::dataset::make("H0", 1, 3);
109 
110 /** M0 values to test - Nightly */
111 const auto m0_values_nightly = framework::dataset::make("M0", 1, 8);
112 
113 /** N0 values to test - Nightly */
114 const auto n0_values_nightly = framework::dataset::make("N0", { 2, 3, 4, 8 });
115 
116 /** K0 values to test - Nightly */
117 const auto k0_values_nightly = framework::dataset::make("K0", { 2, 3, 4, 8 });
118 
119 /** Broadcast bias from vector to matrix */
120 const auto broadcast_bias_values = framework::dataset::make("broadcast_bias", { false, true } );
121 
122 /** Boundary handling cases for testing partial/non-partial (full) block dimensions, resulting from different combinations
123  * of M, M0, N and N0 values.
124  * M0 and N0 are kept constant, while the different test cases need to vary M and N.
125  *
126  * Eg. M = 64 and N = 33 result in a block dimension that has no partial blocks (all full blocks) in Y dimension and
127  * parital blocks in X dimension.
128  */
129 const auto boundary_handling_cases = combine(combine(combine(combine(combine(combine(combine(combine(combine(
130  // Large k to force potential out-of-bound reads on input0
131  framework::dataset::make("K", 315),
132  // Batch size == 1 to force potential out-of-bound reads on input0
133  framework::dataset::make("batch_size", 1)),
134  framework::dataset::make("M0", 4)),
135  framework::dataset::make("N0", 4)),
136  framework::dataset::make("K0", 4)),
137  // Only need to test F32 as F16 shares identical boundary handling logics
139  framework::dataset::make("alpha", -0.75f )),
140  framework::dataset::make("beta", -0.35f )),
141  broadcast_bias_values),
143 
144 /** Configuration test */
145 void validate_configuration(unsigned int m_value, unsigned int n_value, unsigned int k_value, unsigned int b_value, unsigned int m0_value, unsigned int n0_value, unsigned int k0_value, bool broadcast_bias, DataType data_type, const ActivationLayerInfo &act_info)
146 {
147  const unsigned int M = m_value;
148  const unsigned int N = n_value;
149  const unsigned int K = k_value;
150 
151  GEMMLHSMatrixInfo lhs_info;
152  lhs_info.m0 = m0_value;
153  lhs_info.k0 = k0_value;
154 
155  GEMMRHSMatrixInfo rhs_info;
156  rhs_info.n0 = n0_value;
157  rhs_info.k0 = k0_value;
158 
159  GEMMKernelInfo kernel_info;
160  kernel_info.m = M;
161  kernel_info.n = N;
162  kernel_info.k = K;
163  kernel_info.broadcast_bias = broadcast_bias;
164  kernel_info.activation_info = act_info;
165 
166  const TensorShape lhs_shape(K, M, b_value);
167  const TensorShape rhs_shape(N, K, b_value);
168  const TensorShape bias_shape(N,
169  broadcast_bias? 1 : M,
170  broadcast_bias? 1 : b_value);
172  TensorInfo(rhs_shape, 1, data_type),
173  kernel_info);
174 
175  // Create tensors
176  CLTensor lhs = create_tensor<CLTensor>(lhs_shape, data_type);
177  CLTensor rhs = create_tensor<CLTensor>(rhs_shape, data_type);
178  CLTensor bias = create_tensor<CLTensor>(bias_shape, data_type);
179  CLTensor dst = create_tensor<CLTensor>(dst_shape, data_type);
180 
183  ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
184  ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
185 
186  // Create and configure function
188  gemm.configure(lhs.info(), rhs.info(), bias.info(), dst.info(), 1.0f, 1.0f, lhs_info, rhs_info, kernel_info);
189 }
190 } // namespace
191 
192 TEST_SUITE(CL)
193 TEST_SUITE(GEMMMatrixMultiplyNative)
194 TEST_SUITE(Float)
195 TEST_SUITE(FP32)
197  m_values,
198  n_values),
199  k_values),
200  framework::dataset::make("batch_size", 1)),
204  broadcast_bias_values),
205  act_values),
206 m_value, n_value, k_value, b_value, m0_value, n0_value, k0_value, broadcast_bias, act_value)
207 {
208  validate_configuration(m_value, n_value, k_value, b_value, m0_value, n0_value, k0_value, broadcast_bias, DataType::F32, act_value);
209 }
210 
213  framework::dataset::make("M", 3),
214  framework::dataset::make("N", 1)),
215  boundary_handling_cases))
216 {
217  // Validate output
218  validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
219 }
220 
223  framework::dataset::make("M", 64),
224  framework::dataset::make("N", 51)),
225  boundary_handling_cases))
226 {
227  // Validate output
228  validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
229 }
230 
233  framework::dataset::make("M", 64),
234  framework::dataset::make("N", 32)),
235  boundary_handling_cases))
236 {
237  // Validate output
238  validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
239 }
240 
243  framework::dataset::make("M", 37),
244  framework::dataset::make("N", 32)),
245  boundary_handling_cases))
246 {
247  // Validate output
248  validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
249 }
250 
253  m_values,
254  n_values),
255  k_values),
256  b_values),
261  a_values),
262  beta_values),
263  broadcast_bias_values),
264  act_values))
265 {
266  // Validate output
267  validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
268 }
269 
272  m_values,
273  n_values),
274  k_values),
275  b_values),
276  m0_values_nightly),
278  k0_values_nightly),
280  a_values),
281  beta_values),
282  broadcast_bias_values),
283  act_values))
284 {
285  // Validate output
286  validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
287 }
288 
291  m_w_values,
292  m_h_values),
293  n_values),
294  k_values),
295  b_values),
300  a_values),
301  beta_values),
302  act_values))
303 {
304  // Validate output
305  validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
306 }
307 
310  m_w_values,
311  m_h_values),
312  n_values),
313  k_values),
314  b_values),
315  m0_values_nightly),
317  k0_values_nightly),
319  a_values),
320  beta_values),
321  act_values))
322 {
323  // Validate output
324  validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
325 }
326 
327 TEST_SUITE_END() // FP32
328 TEST_SUITE_END() // Float
329 TEST_SUITE_END() // GEMMMatrixMulipltyNative
330 TEST_SUITE_END() // CL
331 } // namespace validation
332 } // namespace test
333 } // namespace arm_compute
Datasets.h
arm_compute::test::CLSynthetizeOperator< ClGemmMatrixMultiplyNativeKernel >
arm_compute::test::validation::TEST_SUITE_END
TEST_SUITE_END() FIXTURE_DATA_TEST_CASE(RunSmall
[CLActivationLayer Test snippet]
Definition: DequantizationLayer.cpp:111
arm_compute::test::validation::dst_shape
TensorShape dst_shape
Definition: DFT.cpp:164
arm_compute::GEMMKernelInfo::m
unsigned int m
Number of LHS rows.
Definition: KernelDescriptors.h:94
PaddingCalculator.h
N
unsigned int N
Definition: CpuGemmAssemblyDispatch.cpp:103
arm_compute::test::validation::FIXTURE_DATA_TEST_CASE
FIXTURE_DATA_TEST_CASE(RunSmall, CLAbsLayerFixture< half >, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)))
Definition: AbsLayer.cpp:50
arm_compute::test::validation::DATA_TEST_CASE
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16), }), framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(30U, 11U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16, QuantizationInfo(1.f/32768.f, 0)), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16, QuantizationInfo(1.f/32768.f, 0)), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16, QuantizationInfo(1.f/32768.f, 0)), })), framework::dataset::make("ActivationInfo", { ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SQRT), })), framework::dataset::make("Expected", { false, true, true, true, false, false, true, true, false })), input_info, output_info, act_info, expected)
Definition: ActivationLayer.cpp:100
arm_compute::GEMMRHSMatrixInfo::n0
unsigned int n0
Number of columns processed by the matrix multiplication.
Definition: Types.h:1918
arm_compute::test::validation::combine
combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)))
Definition: AbsLayer.cpp:65
arm_compute::misc::shape_calculator::compute_mm_shape
TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info)
Calculate the matrix multiplication output shape of two tensors.
Definition: ShapeCalculator.h:980
arm_gemm::gemm
UniqueGemmCommon< Top, Tret > gemm(const GemmArgs &args, const OutputStage &os)
Definition: gemm_implementation.hpp:320
arm_compute::TensorShape
Shape of a tensor.
Definition: TensorShape.h:39
arm_compute::TensorInfo::is_resizable
bool is_resizable() const override
Flag indicating whether the size of the tensor can be changed.
Definition: TensorInfo.h:273
arm_compute::test::validation::dst
auto dst
Definition: DFT.cpp:170
arm_compute::test::validation::bias_shape
TensorShape bias_shape
Definition: LSTMLayerQuantized.cpp:470
Types.h
Helper.h
arm_compute::test::validation::CLGEMMMatrixMultiplyNative3DFixture
GEMMMatrixMultiplyNative3DValidationFixture< CLTensor, CLAccessor, T, CLGEMMMatrixMultiplyNative > CLGEMMMatrixMultiplyNative3DFixture
Definition: GEMMMatrixMultiplyNative.cpp:58
arm_compute::CLTensor
Basic implementation of the OpenCL tensor interface.
Definition: CLTensor.h:41
arm_compute::GEMMKernelInfo
Descriptor used by the GEMM kernels.
Definition: KernelDescriptors.h:58
arm_compute::ActivationLayerInfo
Activation Layer Information class.
Definition: ActivationLayerInfo.h:55
CLAccessor.h
arm_compute::test::validation::act_info
act_info
Definition: DirectConvolutionLayer.cpp:547
arm_compute::test::validation::validate
validate(CLAccessor(output_state), expected_output)
arm_compute::test::framework::DatasetMode::ALL
@ ALL
arm_compute::GEMMKernelInfo::n
unsigned int n
Number of RHS columns.
Definition: KernelDescriptors.h:95
arm_compute::test::CLAccessor
Accessor implementation for CLTensor objects.
Definition: CLAccessor.h:36
arm_compute::GEMMRHSMatrixInfo::k0
unsigned int k0
Number of partial accumulations performed by the matrix multiplication.
Definition: Types.h:1919
arm_compute::test::validation::ARM_COMPUTE_EXPECT
ARM_COMPUTE_EXPECT(has_error==expected, framework::LogLevel::ERRORS)
arm_compute::GEMMLHSMatrixInfo::m0
unsigned int m0
Number of rows processed by the matrix multiplication.
Definition: Types.h:1903
arm_compute::test::validation::k0_values_precommit
const auto k0_values_precommit
K0 values to test –precommit.
Definition: MatMulKernel.cpp:60
arm_compute::test::validation::CLGEMMMatrixMultiplyNativeFixture
GEMMMatrixMultiplyNativeValidationFixture< CLTensor, CLAccessor, T, CLGEMMMatrixMultiplyNative > CLGEMMMatrixMultiplyNativeFixture
Definition: GEMMMatrixMultiplyNative.cpp:54
Asserts.h
arm_compute::GEMMKernelInfo::broadcast_bias
bool broadcast_bias
Flag used to broadcast the bias addition.
Definition: KernelDescriptors.h:99
Macros.h
bias
const int32_t * bias
Definition: working_space.hpp:322
K
unsigned int K
Definition: CpuGemmAssemblyDispatch.cpp:104
arm_compute::GEMMKernelInfo::k
unsigned int k
Number of LHS columns or RHS rows.
Definition: KernelDescriptors.h:96
arm_compute::test::framework::dataset::make
std::enable_if< is_container< T >::value, ContainerDataset< T > >::type make(std::string name, T &&values)
Helper function to create a ContainerDataset.
Definition: ContainerDataset.h:160
arm_compute::test::validation::data_type
data_type
Definition: Cast.cpp:222
Validation.h
arm_compute::test::framework::DatasetMode::DISABLED
@ DISABLED
abs_tolerance_f32
constexpr float abs_tolerance_f32(0.0001f)
F32 Absolute tolerance value for comparing reference's output against implementation's output for flo...
ShapeCalculator.h
arm_compute::GEMMLHSMatrixInfo
GEMM LHS (Left Hand Side) matrix information.
Definition: Types.h:1896
arm_compute::TensorInfo
Store the tensor's metadata.
Definition: TensorInfo.h:41
KernelDescriptors.h
arm_compute::test::validation::m0_values_precommit
const auto m0_values_precommit
M0 values to test –precommit.
Definition: MatMulKernel.cpp:54
M
unsigned int M
Definition: CpuGemmAssemblyDispatch.cpp:102
arm_compute
Copyright (c) 2017-2023 Arm Limited.
Definition: introduction.dox:24
arm_compute::test::validation::TEST_SUITE
TEST_SUITE(QASYMM8_to_F32) FIXTURE_DATA_TEST_CASE(RunSmall
CLTensor.h
arm_compute::GEMMKernelInfo::activation_info
ActivationLayerInfo activation_info
Activation function to perform after the matrix multiplication.
Definition: KernelDescriptors.h:103
arm_compute::CLTensor::info
TensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
Definition: CLTensor.cpp:41
arm_compute::DataType::F32
@ F32
32-bit floating-point number
arm_compute::misc::shape_calculator
Definition: ShapeCalculator.h:41
arm_compute::test::validation::n0_values_nightly
const auto n0_values_nightly
N0 values to test –nightly.
Definition: MatMulLowpNativeMMULKernel.cpp:67
arm_compute::GEMMLHSMatrixInfo::k0
unsigned int k0
Number of partial accumulations performed by the matrix multiplication.
Definition: Types.h:1904
ClGemmMatrixMultiplyNativeKernel.h
arm_compute::opencl::kernels
Definition: CLLSTMLayer.h:51
arm_compute::test::validation::RelativeTolerance
Class reprensenting a relative tolerance value.
Definition: Validation.h:97
arm_compute::GEMMRHSMatrixInfo
GEMM RHS (Right Hand Side) matrix information.
Definition: Types.h:1911
CLTensorAllocator.h
arm_compute::test::framework::DatasetMode
DatasetMode
Possible dataset modes.
Definition: DatasetModes.h:40
arm_compute::DataType
DataType
Available data types.
Definition: CoreTypes.h:83
arm_compute::test::validation::k0_value
const auto k0_value
K0 value – Fixed to 1.
Definition: MatMulNativeMMULKernel.cpp:68
arm_compute::test::framework::LogLevel::ERRORS
@ ERRORS
arm_compute::test::validation::n0_values_precommit
const auto n0_values_precommit
N0 values to test –precommit.
Definition: MatMulKernel.cpp:57