Compute Library
 22.08
GEMMMatrixMultiplyReshaped.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 #include "arm_compute/core/Types.h"
33 #include "tests/CL/CLAccessor.h"
34 #include "tests/CL/Helper.h"
36 #include "tests/datasets/ShapeDatasets.h"
38 #include "tests/framework/Macros.h"
41 #include "tests/validation/fixtures/GEMMFixture.h"
42 
43 namespace arm_compute
44 {
45 namespace test
46 {
47 namespace validation
48 {
50 using namespace arm_compute::opencl::kernels;
51 
52 // Create function for ClGemmReshapeLhsMatrixKernel
53 using CLGEMMReshapeLHSMatrix = CLSynthetizeOperator<ClGemmReshapeLhsMatrixKernel>;
54 
55 // Create function for ClGemmReshapeRhsMatrixKernel
56 using CLGEMMReshapeRHSMatrix = CLSynthetizeOperator<ClGemmReshapeRhsMatrixKernel>;
57 
58 // Create function for ClGemmMatrixMultiplyReshapedKernel
60 
61 // Fixture for CLGEMMMatrixMultiplyReshaped
62 template <typename T>
63 using CLGEMMMatrixMultiplyReshapedFixture = GEMMMatrixMultiplyReshapedValidationFixture<CLTensor, CLAccessor, T, CLGEMMReshapeLHSMatrix, CLGEMMReshapeRHSMatrix, CLGEMMMatrixMultiplyReshaped>;
64 
65 // Fixture for CLGEMMMatrixMultiplyReshaped with post ops
66 template <typename T>
68  GEMMMatrixMultiplyReshapedWithPostOpsValidationFixture<CLTensor, CLAccessor, T, CLGEMMReshapeLHSMatrix, CLGEMMReshapeRHSMatrix, CLGEMMMatrixMultiplyReshaped>;
69 
70 // Fixture for CLGEMMMatrixMultiplyReshaped mixed precision
71 template <typename T>
73  GEMMMatrixMultiplyReshapedValidationFixture<CLTensor, CLAccessor, T, CLGEMMReshapeLHSMatrix, CLGEMMReshapeRHSMatrix, CLGEMMMatrixMultiplyReshaped, true>;
74 
75 // Fixture for CLGEMMMatrixMultiplyReshaped mixed precision with post ops
76 template <typename T>
78  GEMMMatrixMultiplyReshapedWithPostOpsValidationFixture<CLTensor, CLAccessor, T, CLGEMMReshapeLHSMatrix, CLGEMMReshapeRHSMatrix, CLGEMMMatrixMultiplyReshaped, true>;
79 
80 // Fixture for CLGEMMMatrixMultiplyReshaped3D
81 template <typename T>
82 using CLGEMMMatrixMultiplyReshaped3DFixture = GEMMMatrixMultiplyReshaped3DValidationFixture<CLTensor, CLAccessor, T, CLGEMMReshapeLHSMatrix, CLGEMMReshapeRHSMatrix, CLGEMMMatrixMultiplyReshaped>;
83 
84 // Fixture for CLGEMMMatrixMultiplyReshaped3D mixed precision
85 template <typename T>
87  GEMMMatrixMultiplyReshaped3DValidationFixture<CLTensor, CLAccessor, T, CLGEMMReshapeLHSMatrix, CLGEMMReshapeRHSMatrix, CLGEMMMatrixMultiplyReshaped, true>;
88 
89 namespace
90 {
91 // *INDENT-OFF*
92 // clang-format off
93 RelativeTolerance<float> rel_tolerance_f32(0.001f);
94 constexpr float abs_tolerance_f32(0.0001f);
95 
96 RelativeTolerance<float> rel_tolerance_f16_mixed_precision(0.001f);
97 constexpr float abs_tolerance_f16_mixed_precision(0.01f);
98 
99 RelativeTolerance<float> rel_tolerance_f16(0.001f);
100 constexpr float abs_tolerance_f16(0.01f);
101 
102 /** M values to test */
103 const auto m_values = framework::dataset::make("M", 17);
104 
105 /** M_W values to test */
106 const auto m_w_values = framework::dataset::make("M_W", 5);
107 
108 /** M_H values to test */
109 const auto m_h_values = framework::dataset::make("M_H", 7);
110 
111 /** N values to test */
112 const auto n_values = framework::dataset::make("N", 21);
113 
114 /** K values to test */
115 const auto k_values = framework::dataset::make("K", 13);
116 
117 /** Batch size values to test */
118 const auto b_values = framework::dataset::make("batch_size", 2, 3);
119 
120 /** Activation values to test */
121 const auto act_values = framework::dataset::make("Activation",
122 {
124 });
125 
126 /** Alpha values to test - Precommit */
127 const auto a_values_precommit = framework::dataset::make("alpha", {-0.75f} );
128 
129 /** Beta values to test - Precommit */
130 const auto beta_values_precommit = framework::dataset::make("beta", {-0.35f} );
131 
132 /** M0 values to test - Precommit */
133 const auto m0_values_precommit = framework::dataset::make("M0", { 4 });
134 
135 /** N0 values to test - Precommit */
136 const auto n0_values_precommit = framework::dataset::make("N0", { 4 });
137 
138 /** K0 values to test - Precommit */
139 const auto k0_values_precommit = framework::dataset::make("K0", { 4 });
140 
141 /** V0 values to test - Precommit */
142 const auto v0_values_precommit = framework::dataset::make("V0", 1, 3);
143 
144 /** H0 values to test - Precommit */
145 const auto h0_values_precommit = framework::dataset::make("H0", 1, 3);
146 
147 /** Alpha values to test - Nightly */
148 const auto a_values_nightly = framework::dataset::make("alpha", {1.0f} );
149 
150 /** Beta values to test - Nightly */
151 const auto beta_values_nightly = framework::dataset::make("beta", {1.0f} );
152 
153 /** M0 values to test - Nightly */
154 const auto m0_values_nightly = framework::dataset::make("M0", { 8 });
155 
156 /** N0 values to test - Nightly */
157 const auto n0_values_nightly = framework::dataset::make("N0", { 8 });
158 
159 /** K0 values to test - Nightly */
160 const auto k0_values_nightly = framework::dataset::make("K0", { 4 });
161 
162 /** N0 values to test with export to OpenCL image object - Nightly */
163 const auto n0_export_to_cl_image_values_nightly = framework::dataset::make("N0", { 4, 8, 16 });
164 
165 /** K0 values to test with export to OpenCL image object - Nightly */
166 const auto k0_export_to_cl_image_values_nightly = framework::dataset::make("K0", { 4, 8, 16 });
167 
168 /** V0 values to test - Nightly */
169 const auto v0_values_nightly = framework::dataset::make("V0", 1, 3);
170 
171 /** H0 values to test - Nightly */
172 const auto h0_values_nightly = framework::dataset::make("H0", 1, 3);
173 
174 /** Interleave values to test with LHS matrix */
175 const auto i_values_lhs = framework::dataset::make("interleave_lhs", { true, false });
176 
177 /** Interleave values to test with RHS matrix */
178 const auto i_values_rhs = framework::dataset::make("interleave_rhs", { true, false });
179 
180 /** Broadcast bias from vector to matrix */
181 const auto broadcast_bias_values = framework::dataset::make("broadcast_bias", { false, true } );
182 
183 /** LHS transposed values */
184 const auto lhs_transpose_values = framework::dataset::make("lhs_transpose", { false, true } );
185 
186 /** Post Ops */
189 {
193  std::make_tuple(true, true, false), // If broadcast in dims 0, 1 and 2
194  0,
197  return post_ops;
198 }
200 {
203  std::make_tuple(false, true, true), // If broadcast in dims 0, 1 and 2
204  1,
207  return post_ops;
208 }
210 {
214  std::make_tuple(false, false, true), // If broadcast in dims 0, 1 and 2
215  1,
217  return post_ops;
218 }
219 // To test that the output of the main op is the first parameter in prelu post op
221 {
225  std::make_tuple(false, false, true), // If true, broadcast in corresponding dim: 0, 1 or 2
226  0,
229  return post_ops;
230 }
231 // To test that the output of the main op is the second parameter in prelu post op i.e. it is the alpha_param
233 {
237  std::make_tuple(false, false, false), // If true, broadcast in corresponding dim: 0, 1 or 2
238  1,
241  return post_ops;
242 }
243 /** Different Post Op Lists */
244 const auto post_op_lists = framework::dataset::make("post_op_lists", {
245  post_ops_1(),
246  post_ops_2(),
247  post_ops_3(),
248  post_ops_4(),
249  post_ops_5()
250  } );
251 
252 bool is_post_op_list_valid(unsigned int m, unsigned int n, unsigned int k, unsigned int batch, DataType data_type, const experimental::PostOpList<ITensorInfo*>& post_ops)
253 {
254  const auto lhs_info = GEMMLHSMatrixInfo(4,4,1,false,true);
255  const auto rhs_info = GEMMRHSMatrixInfo(4,4,1,true,true,false);
256 
257  // Create TensorInfo for post op arguments
258  TensorInfo input0_info(TensorShape(k, m, batch), 1, data_type);
259  TensorInfo input1_info(TensorShape(n, k, batch), 1, data_type);
260  TensorInfo input2_info(TensorShape(n), 1, data_type);
261  TensorInfo output_info(TensorShape(n, m, batch), 1, data_type);
262 
263  const TensorInfo reshaped_input0_info = input0_info.clone()->set_tensor_shape(misc::shape_calculator::compute_lhs_reshaped_shape(input0_info, lhs_info));
264  const TensorInfo reshaped_input1_info = input1_info.clone()->set_tensor_shape(misc::shape_calculator::compute_rhs_reshaped_shape(input1_info, rhs_info));
265 
266  GEMMKernelInfo gemm_info(m, n, k, 0 /**< Depth of the output tensor in case is reinterpreted as 3D */,
267  false /**< reinterpret the input as 3D */,
268  true /**< Flag used to broadcast the bias addition */,
269  false /**< wider accumm */,
270  false /**< has pad y */,
272  1 /**< Multiplication factor for the width of the 1xW transposed block */,
273  1 /**< Multiplication factor for the height of the 4x4 interleaved block */,
274  lhs_info,
275  rhs_info,
276  0 /**< Offset to be added to each element of the matrix A */,
277  0 /**< Offset to be added to each element of the matrix B */,
278  post_ops);
279  return bool(ClGemmMatrixMultiplyReshapedKernel::validate(&reshaped_input0_info.clone()->set_is_resizable(true),
280  &reshaped_input1_info.clone()->set_is_resizable(true),
281  &input2_info.clone()->set_is_resizable(true),
282  &output_info.clone()->set_is_resizable(true),1.f,1.f,
283  lhs_info,
284  rhs_info,
285  gemm_info));
286 }
287 
288 } // namespace
289 
290 TEST_SUITE(CL)
291 TEST_SUITE(GEMMMatrixMultiplyReshaped)
292 
293 // *INDENT-OFF*
294 // clang-format off
295 DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip(
296  framework::dataset::make("Input0Info", { TensorInfo(TensorShape(64U, 5U, 2U), 1, DataType::F32), // OK
297  TensorInfo(TensorShape(64U, 5U, 2U), 1, DataType::F16), // OK
298  TensorInfo(TensorShape(64U, 5U, 2U), 1, DataType::QASYMM8), // Data type not supported
299  TensorInfo(TensorShape(10U, 5U, 2U), 1, DataType::F32), // Incorrect dimension bias
300  TensorInfo(TensorShape(64U, 5U, 2U), 1, DataType::F32), // Mismatching shapes
301  TensorInfo(TensorShape(64U, 5U, 2U), 1, DataType::F16), // OK, do not broadcast bias
302  TensorInfo(TensorShape(64U, 5U, 2U), 1, DataType::F16), // OK, wider accummulation
303  TensorInfo(TensorShape(64U, 5U, 2U), 1, DataType::F16), // OK, RHS 4,4,2
304 
305  }),
306  framework::dataset::make("Input1Info",{ TensorInfo(TensorShape(64U, 6U, 2U), 1, DataType::F32),
307  TensorInfo(TensorShape(64U, 6U, 2U), 1, DataType::F16),
309  TensorInfo(TensorShape(64U, 6U, 2U), 1, DataType::F32),
310  TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
311  TensorInfo(TensorShape(64U, 6U, 2U), 1, DataType::F16),
312  TensorInfo(TensorShape(64U, 6U, 2U), 1, DataType::F16),
313  TensorInfo(TensorShape(128U, 3U, 2U), 1, DataType::F16),
314 
315  })),
324 
325  })),
334 
335  })),
336  framework::dataset::make("LHSMInfo",{
337  GEMMLHSMatrixInfo(4,4,1,false,true),
338  GEMMLHSMatrixInfo(4,4,1,false,true),
339  GEMMLHSMatrixInfo(4,4,1,false,true),
340  GEMMLHSMatrixInfo(4,2,4,false,false),
341  GEMMLHSMatrixInfo(4,2,4,false,false),
342  GEMMLHSMatrixInfo(4,4,1,false,true),
343  GEMMLHSMatrixInfo(4,4,1,false,true),
344  GEMMLHSMatrixInfo(4,4,1,false,true),
345 
346  })),
347  framework::dataset::make("RHSMInfo",{
348  GEMMRHSMatrixInfo(4,4,1,true,true,false),
349  GEMMRHSMatrixInfo(4,4,1,true,true,false),
350  GEMMRHSMatrixInfo(4,4,1,true,true,false),
351  GEMMRHSMatrixInfo(2,2,1,true,false,false),
352  GEMMRHSMatrixInfo(2,2,1,true,false,false),
353  GEMMRHSMatrixInfo(4,4,1,true,true,false),
354  GEMMRHSMatrixInfo(4,4,1,true,true,false),
355  GEMMRHSMatrixInfo(4,4,2,true,false,false),
356 
357 
358  })),
359 
360 
361  framework::dataset::make("GEMMInfo",{
362  GEMMKernelInfo( 17 /**<M Number of LHS rows*/,
363  21 /**<N Number of RHS columns*/,
364  13 /**<K Number of LHS columns or RHS rows */, 0 /**< Depth of the output tensor in case is reinterpreted as 3D */,
365  false /**< reinterpret the input as 3D */,
366  true /**< Flag used to broadcast the bias addition */,
367  false /**< wider accumm */,
368  false /**< has pad y */,
370  1 /**< Multiplication factor for the width of the 1xW transposed block */,
371  1 /**< Multiplication factor for the height of the 4x4 interleaved block */,
372  GEMMLHSMatrixInfo(4,4,1,false,true),
373  GEMMRHSMatrixInfo(4,4,1,true,true,false),
374  0 /**< Offset to be added to each element of the matrix A */,
375  0 /**< Offset to be added to each element of the matrix B */),
376 
377  GEMMKernelInfo( 17 /**<M Number of LHS rows*/,
378  21 /**<N Number of RHS columns*/,
379  13 /**<K Number of LHS columns or RHS rows */, 0 /**< Depth of the output tensor in case is reinterpreted as 3D */,
380  false /**< reinterpret the input as 3D */,
381  true /**< Flag used to broadcast the bias addition */,
382  false /**< wider accumm */,
383  false /**< has pad y */,
385  1 /**< Multiplication factor for the width of the 1xW transposed block */,
386  1 /**< Multiplication factor for the height of the 4x4 interleaved block */,
387  GEMMLHSMatrixInfo(4,4,1,false,true),
388  GEMMRHSMatrixInfo(4,4,1,true,true,false),
389  0 /**< Offset to be added to each element of the matrix A */,
390  0 /**< Offset to be added to each element of the matrix B */),
391  GEMMKernelInfo(),
392  GEMMKernelInfo(),
393  GEMMKernelInfo(),
394 
395  GEMMKernelInfo( 17 /**<M Number of LHS rows*/,
396  21 /**<N Number of RHS columns*/,
397  13 /**<K Number of LHS columns or RHS rows */, 0 /**< Depth of the output tensor in case is reinterpreted as 3D */,
398  false /**< reinterpret the input as 3D */,
399  false /**< Flag used to broadcast the bias addition */,
400  false /**< wider accumm */,
401  false /**< has pad y */,
403  1 /**< Multiplication factor for the width of the 1xW transposed block */,
404  1 /**< Multiplication factor for the height of the 4x4 interleaved block */,
405  GEMMLHSMatrixInfo(4,4,1,false,true),
406  GEMMRHSMatrixInfo(4,4,1,true,true,false),
407  0 /**< Offset to be added to each element of the matrix A */,
408  0 /**< Offset to be added to each element of the matrix B */),
409 
410 
411  GEMMKernelInfo( 17 /**<M Number of LHS rows*/,
412  21 /**<N Number of RHS columns*/,
413  13 /**<K Number of LHS columns or RHS rows */, 0 /**< Depth of the output tensor in case is reinterpreted as 3D */,
414  false /**< reinterpret the input as 3D */,
415  false /**< Flag used to broadcast the bias addition */,
416  true /**< wider accumm */,
417  true /**< has pad y */,
419  1 /**< Multiplication factor for the width of the 1xW transposed block */,
420  1 /**< Multiplication factor for the height of the 4x4 interleaved block */,
421  GEMMLHSMatrixInfo(4,4,1,false,true),
422  GEMMRHSMatrixInfo(4,4,1,true,true,false),
423  0 /**< Offset to be added to each element of the matrix A */,
424  0 /**< Offset to be added to each element of the matrix B */),
425 
426  GEMMKernelInfo( 17 /**<M Number of LHS rows*/,
427  21 /**<N Number of RHS columns*/,
428  13 /**<K Number of LHS columns or RHS rows */, 0 /**< Depth of the output tensor in case is reinterpreted as 3D */,
429  false /**< reinterpret the input as 3D */,
430  false /**< Flag used to broadcast the bias addition */,
431  false /**< wider accumm */,
432  false /**< has pad y */,
434  1 /**< Multiplication factor for the width of the 1xW transposed block */,
435  1 /**< Multiplication factor for the height of the 4x4 interleaved block */,
436  GEMMLHSMatrixInfo(4,4,1,false,true),
437  GEMMRHSMatrixInfo(4,4,2,true,false,false),
438  0 /**< Offset to be added to each element of the matrix A */,
439  0 /**< Offset to be added to each element of the matrix B */),
440  })),
441  framework::dataset::make("Expected", { true, true, false, false, false, true, true,true})),
443 {
445  &input1_info.clone()->set_is_resizable(true),
446  &input2_info.clone()->set_is_resizable(true),
447  &output_info.clone()->set_is_resizable(true),1.f,1.f,
448  lhs_info,
449  rhs_info,
451 }
452 TEST_SUITE(ValidateFusedPostOpsConfigs)
453 TEST_SUITE(Invalid)
454 TEST_CASE(UnsupportedPostOpSequence, framework::DatasetMode::ALL)
455 {
456  const auto data_type = DataType::F32;
457  const unsigned int m = 17;
458  const unsigned int n = 1;
459  const unsigned int k = 13;
460  const unsigned int batch = 2;
461  TensorShape post_op_arg0_shape(n, m, batch);
462  TensorInfo post_op_arg_info(post_op_arg0_shape, 1, data_type);
463  auto post_op_arg1_info = post_op_arg_info.clone();
464 
465  // Unsupported sequence of post ops
468  &post_op_arg_info,
469  1,
472  post_op_arg1_info.get(),
473  0,
475 
476  ARM_COMPUTE_EXPECT(is_post_op_list_valid(m, n, k, batch, data_type, post_ops) == false, framework::LogLevel::ERRORS);
477 }
479 {
480  // Invalid broadcast: post op tensors "widen" the output tensor
481  const auto data_type = DataType::F32;
482  const unsigned int m = 17;
483  const unsigned int n = 1;
484  const unsigned int k = 13;
485  const unsigned int batch = 2;
486  TensorShape post_op_arg_shape(n + 4, m, batch); // output's X dimension (n) is "widened", which is not allowed
487  TensorInfo post_op_arg_info(post_op_arg_shape, 1, data_type);
490 
491  ARM_COMPUTE_EXPECT(is_post_op_list_valid(m, n, k, batch, data_type, post_ops) == false, framework::LogLevel::ERRORS);
492 }
493 TEST_CASE(BroadcastInXDimOnly, framework::DatasetMode::ALL)
494 {
495  // Invalid broadcast: post op tensors broadcast in the first dimension (X) only
496  const auto data_type = DataType::F32;
497  const unsigned int m = 22;
498  const unsigned int n = 16;
499  const unsigned int k = 15;
500  const unsigned int batch = 3;
501  TensorShape post_op_arg_shape(1, m, batch);
502  TensorInfo post_op_arg_info(post_op_arg_shape, 1, data_type);
505 
506  ARM_COMPUTE_EXPECT(is_post_op_list_valid(m, n, k, batch, data_type, post_ops) == false, framework::LogLevel::ERRORS);
507 }
508 TEST_SUITE_END() // Invalid
509 TEST_SUITE(Valid)
510 TEST_CASE(EmptyPostOpList, framework::DatasetMode::ALL)
511 {
512  const auto data_type = DataType::F32;
513  const unsigned int m = 22;
514  const unsigned int n = 16;
515  const unsigned int k = 15;
516  const unsigned int batch = 3;
518 
519  ARM_COMPUTE_EXPECT(is_post_op_list_valid(m, n, k, batch, data_type, post_ops) == true, framework::LogLevel::ERRORS);
520 }
521 TEST_CASE(BroadcastInYDimOnly, framework::DatasetMode::ALL)
522 {
523  const auto data_type = DataType::F32;
524  const unsigned int m = 22;
525  const unsigned int n = 16;
526  const unsigned int k = 15;
527  const unsigned int batch = 3;
528  TensorShape post_op_arg_shape(n, 1, batch);
529  TensorInfo post_op_arg_info(post_op_arg_shape, 1, data_type);
532 
533  ARM_COMPUTE_EXPECT(is_post_op_list_valid(m, n, k, batch, data_type, post_ops) == true, framework::LogLevel::ERRORS);
534 }
535 TEST_CASE(BroadcastInBothXandYDims, framework::DatasetMode::ALL)
536 {
537  const auto data_type = DataType::F32;
538  const unsigned int m = 22;
539  const unsigned int n = 16;
540  const unsigned int k = 15;
541  const unsigned int batch = 3;
542  TensorShape post_op_arg_shape(1, 1, batch);
543  TensorInfo post_op_arg_info(post_op_arg_shape, 1, data_type);
546 
547  ARM_COMPUTE_EXPECT(is_post_op_list_valid(m, n, k, batch, data_type, post_ops) == true, framework::LogLevel::ERRORS);
548 }
549 TEST_CASE(BroadcastInAllDims, framework::DatasetMode::ALL)
550 {
551  const auto data_type = DataType::F32;
552  const unsigned int m = 22;
553  const unsigned int n = 16;
554  const unsigned int k = 15;
555  const unsigned int batch = 3;
556  TensorShape post_op_arg_shape(1, 1, 1);
557  TensorInfo post_op_arg_info(post_op_arg_shape, 1, data_type);
560 
561  ARM_COMPUTE_EXPECT(is_post_op_list_valid(m, n, k, batch, data_type, post_ops) == true, framework::LogLevel::ERRORS);
562 }
563 TEST_SUITE_END() // Valid
564 TEST_SUITE_END() // ValidateFusedPostOps
565 TEST_SUITE(Float)
566 TEST_SUITE(FP32)
567 
570  m_values,
571  n_values),
572  k_values),
573  b_values),
574  m0_values_precommit),
575  n0_values_precommit),
576  k0_values_precommit),
577  v0_values_precommit),
578  h0_values_precommit),
579  i_values_lhs),
580  i_values_rhs),
581  framework::dataset::make("export_to_cl_image_rhs", false)),
582  framework::dataset::make("DataType", DataType::F32)),
583  a_values_precommit),
584  beta_values_precommit),
585  broadcast_bias_values),
586  lhs_transpose_values),
587  act_values))
588 {
589  // Validate output
590  if(validate_result)
591  {
592  validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
593  }
594  else
595  {
596  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
598  }
599 }
600 
603  m_values,
604  n_values),
605  k_values),
606  b_values),
607  m0_values_nightly),
608  n0_values_nightly),
609  k0_values_nightly),
610  v0_values_nightly),
611  h0_values_nightly),
612  i_values_lhs),
613  i_values_rhs),
614  framework::dataset::make("export_to_cl_image_rhs", false)),
616  a_values_nightly),
617  beta_values_nightly),
618  broadcast_bias_values),
619  lhs_transpose_values),
620  act_values))
621 {
622  // Validate output
623  if(validate_result)
624  {
625  validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
626  }
627  else
628  {
629  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
631  }
632 }
633 
636  m_w_values,
637  m_h_values),
638  n_values),
639  k_values),
640  b_values),
641  m0_values_precommit),
642  n0_values_precommit),
643  k0_values_precommit),
644  v0_values_precommit),
645  h0_values_precommit),
646  i_values_lhs),
647  i_values_rhs),
648  framework::dataset::make("export_to_cl_image_rhs", false)),
650  a_values_precommit),
651  beta_values_precommit),
652  lhs_transpose_values),
653  act_values))
654 {
655  // Validate output
656  if(validate_result)
657  {
658  validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
659  }
660  else
661  {
662  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
664  }
665 }
666 
669  m_w_values,
670  m_h_values),
671  n_values),
672  k_values),
673  b_values),
674  m0_values_nightly),
675  n0_values_nightly),
676  k0_values_nightly),
677  v0_values_nightly),
678  h0_values_nightly),
679  i_values_lhs),
680  i_values_rhs),
681  framework::dataset::make("export_to_cl_image_rhs", false)),
683  a_values_nightly),
684  beta_values_nightly),
685  lhs_transpose_values),
686  act_values))
687 {
688  // Validate output
689  if(validate_result)
690  {
691  validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
692  }
693  else
694  {
695  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
697  }
698 }
699 TEST_SUITE(FusedPostOps)
700 
703  m_values,
704  n_values),
705  k_values),
706  b_values),
707  m0_values_precommit),
708  n0_values_precommit),
709  k0_values_precommit),
710  v0_values_precommit),
711  h0_values_precommit),
712  framework::dataset::make("interleave_lhs", { false })),
713  framework::dataset::make("interleave_rhs", { false })),
714  framework::dataset::make("export_to_cl_image_rhs", false)),
716  a_values_precommit),
717  beta_values_precommit),
718  framework::dataset::make("broadcast_bias", { true } )),
719  lhs_transpose_values),
720  act_values),
721  post_op_lists)
722  )
723 {
724  // Validate output
725  if(validate_result)
726  {
727  validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
728  }
729  else
730  {
731  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
733  }
734 }
735 
736 TEST_SUITE_END() // FusedPostOps
737 
738 TEST_SUITE(ExportToCLImage)
740  framework::dataset::make("Input0Info", { TensorInfo(TensorShape(256U, 16U, 2U), 1, DataType::F32), // OK or incorrect if cl_khr_image2d_from_buffer not supported
741  TensorInfo(TensorShape(256U, 16U, 2U), 1, DataType::F32), // OK or incorrect if cl_khr_image2d_from_buffer not supported
742  TensorInfo(TensorShape(256U, 16U, 2U), 1, DataType::F32), // OK or incorrect if cl_khr_image2d_from_buffer not supported
743  TensorInfo(TensorShape(256U, 16U, 2U), 1, DataType::F32), // Incorrect k0
744  TensorInfo(TensorShape(256U, 16U, 2U), 1, DataType::F32), // Incorrect n0
745 
746  }),
747  framework::dataset::make("Input1Info",{ TensorInfo(TensorShape(256U, 16U, 2U), 1, DataType::F32),
748  TensorInfo(TensorShape(256U, 16U, 2U), 1, DataType::F32),
749  TensorInfo(TensorShape(512U, 8U, 2U), 1, DataType::F32),
750  TensorInfo(TensorShape(256U, 16U, 2U), 1, DataType::F32),
751  TensorInfo(TensorShape(128U, 32U, 2U), 1, DataType::F32),
752 
753  })),
759 
760  })),
761  framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(64U, 64U, 2U), 1, DataType::F32),
762  TensorInfo(TensorShape(64U, 64U, 2U), 1, DataType::F32),
763  TensorInfo(TensorShape(64U, 64U, 2U), 1, DataType::F32),
764  TensorInfo(TensorShape(64U, 64U, 2U), 1, DataType::F32),
765  TensorInfo(TensorShape(64U, 64U, 2U), 1, DataType::F32),
766  TensorInfo(TensorShape(64U, 64U, 2U), 1, DataType::F32),
767 
768  })),
769  framework::dataset::make("LHSMInfo",{
770  GEMMLHSMatrixInfo(4, 4, 1, false, true),
771  GEMMLHSMatrixInfo(4, 8, 1, false, true),
772  GEMMLHSMatrixInfo(4, 4, 1, false, true),
773  GEMMLHSMatrixInfo(4, 2, 1, false, false),
774  GEMMLHSMatrixInfo(4, 4, 1, false, false),
775 
776  })),
777  framework::dataset::make("RHSMInfo",{
778  GEMMRHSMatrixInfo(4, 4, 1, true, true, true),
779  GEMMRHSMatrixInfo(4, 8, 1, true, true, true),
780  GEMMRHSMatrixInfo(8, 4, 1, true, true, true),
781  GEMMRHSMatrixInfo(4, 2, 1, true, false, true),
782  GEMMRHSMatrixInfo(2, 4, 1, true, false, true),
783  })),
784  framework::dataset::make("GEMMInfo",{GEMMKernelInfo( 64 /**<M Number of LHS rows*/,
785  64 /**<N Number of RHS columns*/,
786  64 /**<K Number of LHS columns or RHS rows */, 0 /**< Depth of the output tensor in case is reinterpreted as 3D */,
787  false /**< reinterpret the input as 3D */,
788  true /**< Flag used to broadcast the bias addition */,
789  false /**< wider accumm */,
790  false /**< has pad y */,
792  1 /**< Multiplication factor for the width of the 1xW transposed block */,
793  1 /**< Multiplication factor for the height of the 4x4 interleaved block */,
796  0 /**< Offset to be added to each element of the matrix A */,
797  0 /**< Offset to be added to each element of the matrix B */),
798  GEMMKernelInfo( 64 /**<M Number of LHS rows*/,
799  64 /**<N Number of RHS columns*/,
800  64 /**<K Number of LHS columns or RHS rows */, 0 /**< Depth of the output tensor in case is reinterpreted as 3D */,
801  false /**< reinterpret the input as 3D */,
802  true /**< Flag used to broadcast the bias addition */,
803  false /**< wider accumm */,
804  false /**< has pad y */,
806  1 /**< Multiplication factor for the width of the 1xW transposed block */,
807  1 /**< Multiplication factor for the height of the 4x4 interleaved block */,
810  0 /**< Offset to be added to each element of the matrix A */,
811  0 /**< Offset to be added to each element of the matrix B */),
812  GEMMKernelInfo( 64 /**<M Number of LHS rows*/,
813  64 /**<N Number of RHS columns*/,
814  64 /**<K Number of LHS columns or RHS rows */, 0 /**< Depth of the output tensor in case is reinterpreted as 3D */,
815  false /**< reinterpret the input as 3D */,
816  true /**< Flag used to broadcast the bias addition */,
817  false /**< wider accumm */,
818  false /**< has pad y */,
820  1 /**< Multiplication factor for the width of the 1xW transposed block */,
821  1 /**< Multiplication factor for the height of the 4x4 interleaved block */,
824  0 /**< Offset to be added to each element of the matrix A */,
825  0 /**< Offset to be added to each element of the matrix B */),
826 
827  GEMMKernelInfo( 64 /**<M Number of LHS rows*/,
828  64 /**<N Number of RHS columns*/,
829  64 /**<K Number of LHS columns or RHS rows */, 0 /**< Depth of the output tensor in case is reinterpreted as 3D */,
830  false /**< reinterpret the input as 3D */,
831  true /**< Flag used to broadcast the bias addition */,
832  false /**< wider accumm */,
833  false /**< has pad y */,
835  1 /**< Multiplication factor for the width of the 1xW transposed block */,
836  1 /**< Multiplication factor for the height of the 4x4 interleaved block */,
839  0 /**< Offset to be added to each element of the matrix A */,
840  0 /**< Offset to be added to each element of the matrix B */),
841  GEMMKernelInfo( 64 /**<M Number of LHS rows*/,
842  64 /**<N Number of RHS columns*/,
843  64 /**<K Number of LHS columns or RHS rows */, 0 /**< Depth of the output tensor in case is reinterpreted as 3D */,
844  false /**< reinterpret the input as 3D */,
845  true /**< Flag used to broadcast the bias addition */,
846  false /**< wider accumm */,
847  false /**< has pad y */,
849  1 /**< Multiplication factor for the width of the 1xW transposed block */,
850  1 /**< Multiplication factor for the height of the 4x4 interleaved block */,
853  0 /**< Offset to be added to each element of the matrix A */,
854  0 /**< Offset to be added to each element of the matrix B */)
855  })),
856  framework::dataset::make("Expected", { true,
857  true,
858  true,
859  false,
860  false})),
862 {
864  &input1_info.clone()->set_is_resizable(true),
865  &input2_info.clone()->set_is_resizable(true),
866  &output_info.clone()->set_is_resizable(true),1.f,1.f,
867  lhs_info,
868  rhs_info,
870 }
871 
874  m_values,
875  n_values),
876  k_values),
877  b_values),
878  m0_values_precommit),
879  n0_values_precommit),
880  k0_values_precommit),
881  v0_values_precommit),
882  h0_values_precommit),
883  i_values_lhs),
884  i_values_rhs),
885  framework::dataset::make("export_to_cl_image_rhs", true)),
887  a_values_precommit),
888  beta_values_precommit),
889  broadcast_bias_values),
890  lhs_transpose_values),
891  act_values))
892 {
893  // Validate output only if validate() is successful
894  if(validate_result)
895  {
896  validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
897  }
898  else
899  {
900  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
902  }
903 
904 }
905 
908  m_values,
909  n_values),
910  k_values),
911  b_values),
912  m0_values_nightly),
913  n0_export_to_cl_image_values_nightly),
914  k0_export_to_cl_image_values_nightly),
915  v0_values_nightly),
916  h0_values_nightly),
917  i_values_lhs),
918  i_values_rhs),
919  framework::dataset::make("export_to_cl_image_rhs", true)),
921  a_values_nightly),
922  beta_values_nightly),
923  broadcast_bias_values),
924  lhs_transpose_values),
925  act_values))
926 {
927  // Validate output only if validate() is successful
928  if(validate_result)
929  {
930  validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
931  }
932  else
933  {
934  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
936  }
937 }
938 
941  m_w_values,
942  m_h_values),
943  n_values),
944  k_values),
945  b_values),
946  m0_values_precommit),
947  n0_values_precommit),
948  k0_values_precommit),
949  v0_values_precommit),
950  h0_values_precommit),
951  i_values_lhs),
952  i_values_rhs),
953  framework::dataset::make("export_to_cl_image_rhs", true)),
955  a_values_precommit),
956  beta_values_precommit),
957  lhs_transpose_values),
958  act_values))
959 {
960  // Validate output only if validate() is successful
961  if(validate_result)
962  {
963  validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
964  }
965  else
966  {
967  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
969  }
970 }
971 
974  m_w_values,
975  m_h_values),
976  n_values),
977  k_values),
978  b_values),
979  m0_values_nightly),
980  n0_export_to_cl_image_values_nightly),
981  k0_export_to_cl_image_values_nightly),
982  v0_values_nightly),
983  h0_values_nightly),
984  i_values_lhs),
985  i_values_rhs),
986  framework::dataset::make("export_to_cl_image_rhs", true)),
988  a_values_nightly),
989  beta_values_nightly),
990  lhs_transpose_values),
991  act_values))
992 {
993  // Validate output only if validate() is successful
994  if(validate_result)
995  {
996  validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
997  }
998  else
999  {
1000  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
1002  }
1003 }
1004 TEST_SUITE(FusedPostOps)
1005 
1008  m_values,
1009  n_values),
1010  k_values),
1011  b_values),
1012  m0_values_precommit),
1013  n0_values_precommit),
1014  k0_values_precommit),
1015  v0_values_precommit),
1016  h0_values_precommit),
1017  framework::dataset::make("interleave_lhs", { false })),
1018  framework::dataset::make("interleave_rhs", { false })),
1019  framework::dataset::make("export_to_cl_image_rhs", true)),
1021  a_values_precommit),
1022  beta_values_precommit),
1023  framework::dataset::make("broadcast_bias", { true } )),
1024  lhs_transpose_values),
1025  act_values),
1026  post_op_lists)
1027  )
1028 {
1029  // Validate output only if validate() is successful
1030  if(validate_result)
1031  {
1032  validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
1033  }
1034  else
1035  {
1036  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
1038  }
1039 }
1040 
1041 TEST_SUITE_END() // FusedPostOps
1042 
1043 TEST_SUITE_END() // ExportToCLImage
1044 TEST_SUITE_END() // FP32
1045 
1046 TEST_SUITE(FP16)
1047 
1050  m_values,
1051  n_values),
1052  k_values),
1053  b_values),
1054  m0_values_precommit),
1055  n0_values_precommit),
1056  k0_values_precommit),
1057  v0_values_precommit),
1058  h0_values_precommit),
1059  i_values_lhs),
1060  i_values_rhs),
1061  framework::dataset::make("export_to_cl_image_rhs", false)),
1062  framework::dataset::make("DataType", DataType::F16)),
1063  a_values_precommit),
1064  beta_values_precommit),
1065  broadcast_bias_values),
1066  lhs_transpose_values),
1067  act_values))
1068 {
1069  // Validate output
1070  if(validate_result)
1071  {
1072  validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
1073  }
1074  else
1075  {
1076  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
1078  }
1079 }
1080 
1083  m_values,
1084  n_values),
1085  k_values),
1086  b_values),
1087  m0_values_nightly),
1088  n0_values_nightly),
1089  k0_values_nightly),
1090  v0_values_nightly),
1091  h0_values_nightly),
1092  i_values_lhs),
1093  i_values_rhs),
1094  framework::dataset::make("export_to_cl_image_rhs", false)),
1096  a_values_nightly),
1097  beta_values_nightly),
1098  broadcast_bias_values),
1099  lhs_transpose_values),
1100  act_values))
1101 {
1102  // Validate output
1103  if(validate_result)
1104  {
1105  validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
1106  }
1107  else
1108  {
1109  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
1111  }
1112 }
1113 
1116  m_w_values,
1117  m_h_values),
1118  n_values),
1119  k_values),
1120  b_values),
1121  m0_values_precommit),
1122  n0_values_precommit),
1123  k0_values_precommit),
1124  v0_values_precommit),
1125  h0_values_precommit),
1126  i_values_lhs),
1127  i_values_rhs),
1128  framework::dataset::make("export_to_cl_image_rhs", false)),
1130  a_values_precommit),
1131  beta_values_precommit),
1132  lhs_transpose_values),
1133  act_values))
1134 {
1135  // Validate output
1136  if(validate_result)
1137  {
1138  validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
1139  }
1140  else
1141  {
1142  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
1144  }
1145 }
1146 
1149  m_w_values,
1150  m_h_values),
1151  n_values),
1152  k_values),
1153  b_values),
1154  m0_values_nightly),
1155  n0_values_nightly),
1156  k0_values_nightly),
1157  v0_values_nightly),
1158  h0_values_nightly),
1159  i_values_lhs),
1160  i_values_rhs),
1161  framework::dataset::make("export_to_cl_image_rhs", false)),
1163  a_values_nightly),
1164  beta_values_nightly),
1165  lhs_transpose_values),
1166  act_values))
1167 {
1168  // Validate output
1169  if(validate_result)
1170  {
1171  validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
1172  }
1173  else
1174  {
1175  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
1177  }
1178 }
1179 
1180 TEST_SUITE(FusedPostOps)
1181 
1184  m_values,
1185  n_values),
1186  k_values),
1187  b_values),
1188  m0_values_precommit),
1189  n0_values_precommit),
1190  k0_values_precommit),
1191  v0_values_precommit),
1192  h0_values_precommit),
1193  framework::dataset::make("interleave_lhs", { false })),
1194  framework::dataset::make("interleave_rhs", { false })),
1195  framework::dataset::make("export_to_cl_image_rhs", false)),
1197  a_values_precommit),
1198  beta_values_precommit),
1199  framework::dataset::make("broadcast_bias", { true } )),
1200  lhs_transpose_values),
1201  act_values),
1202  post_op_lists)
1203  )
1204 {
1205  // Validate output
1206  if(validate_result)
1207  {
1208  validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
1209  }
1210  else
1211  {
1212  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
1214  }
1215 }
1216 
1217 TEST_SUITE_END() // FusedPostOps
1218 
1219 TEST_SUITE(ExportToCLImage)
1221  framework::dataset::make("Input0Info", { TensorInfo(TensorShape(256U, 16U, 2U), 1, DataType::F16), // OK or incorrect if cl_khr_image2d_from_buffer not supported
1222  TensorInfo(TensorShape(256U, 16U, 2U), 1, DataType::F16), // OK or incorrect if cl_khr_image2d_from_buffer not supported
1223  TensorInfo(TensorShape(256U, 16U, 2U), 1, DataType::F16), // OK or incorrect if cl_khr_image2d_from_buffer not supported
1224  TensorInfo(TensorShape(256U, 16U, 2U), 1, DataType::F16), // Incorrect k0
1225  TensorInfo(TensorShape(256U, 16U, 2U), 1, DataType::F16), // Incorrect n0
1226 
1227  }),
1228  framework::dataset::make("Input1Info",{ TensorInfo(TensorShape(256U, 16U, 2U), 1, DataType::F16),
1229  TensorInfo(TensorShape(256U, 16U, 2U), 1, DataType::F16),
1230  TensorInfo(TensorShape(512U, 8U, 2U), 1, DataType::F16),
1231  TensorInfo(TensorShape(256U, 16U, 2U), 1, DataType::F16),
1232  TensorInfo(TensorShape(128U, 32U, 2U), 1, DataType::F16),
1233 
1234  })),
1240 
1241  })),
1242  framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(64U, 64U, 2U), 1, DataType::F16),
1243  TensorInfo(TensorShape(64U, 64U, 2U), 1, DataType::F16),
1244  TensorInfo(TensorShape(64U, 64U, 2U), 1, DataType::F16),
1245  TensorInfo(TensorShape(64U, 64U, 2U), 1, DataType::F16),
1246  TensorInfo(TensorShape(64U, 64U, 2U), 1, DataType::F16),
1247  TensorInfo(TensorShape(64U, 64U, 2U), 1, DataType::F16),
1248 
1249  })),
1250  framework::dataset::make("LHSMInfo",{
1251  GEMMLHSMatrixInfo(4, 4, 1, false, true),
1252  GEMMLHSMatrixInfo(4, 8, 1, false, true),
1253  GEMMLHSMatrixInfo(4, 4, 1, false, true),
1254  GEMMLHSMatrixInfo(4, 2, 1, false, false),
1255  GEMMLHSMatrixInfo(4, 4, 1, false, false),
1256 
1257  })),
1258  framework::dataset::make("RHSMInfo",{
1259  GEMMRHSMatrixInfo(4, 4, 1, true, true, true),
1260  GEMMRHSMatrixInfo(4, 8, 1, true, true, true),
1261  GEMMRHSMatrixInfo(8, 4, 1, true, true, true),
1262  GEMMRHSMatrixInfo(4, 2, 1, true, false, true),
1263  GEMMRHSMatrixInfo(2, 4, 1, true, false, true),
1264  })),
1265  framework::dataset::make("GEMMInfo",{GEMMKernelInfo( 64 /**<M Number of LHS rows*/,
1266  64 /**<N Number of RHS columns*/,
1267  64 /**<K Number of LHS columns or RHS rows */, 0 /**< Depth of the output tensor in case is reinterpreted as 3D */,
1268  false /**< reinterpret the input as 3D */,
1269  true /**< Flag used to broadcast the bias addition */,
1270  false /**< wider accumm */,
1271  false /**< has pad y */,
1273  1 /**< Multiplication factor for the width of the 1xW transposed block */,
1274  1 /**< Multiplication factor for the height of the 4x4 interleaved block */,
1277  0 /**< Offset to be added to each element of the matrix A */,
1278  0 /**< Offset to be added to each element of the matrix B */),
1279  GEMMKernelInfo( 64 /**<M Number of LHS rows*/,
1280  64 /**<N Number of RHS columns*/,
1281  64 /**<K Number of LHS columns or RHS rows */, 0 /**< Depth of the output tensor in case is reinterpreted as 3D */,
1282  false /**< reinterpret the input as 3D */,
1283  true /**< Flag used to broadcast the bias addition */,
1284  false /**< wider accumm */,
1285  false /**< has pad y */,
1287  1 /**< Multiplication factor for the width of the 1xW transposed block */,
1288  1 /**< Multiplication factor for the height of the 4x4 interleaved block */,
1291  0 /**< Offset to be added to each element of the matrix A */,
1292  0 /**< Offset to be added to each element of the matrix B */),
1293  GEMMKernelInfo( 64 /**<M Number of LHS rows*/,
1294  64 /**<N Number of RHS columns*/,
1295  64 /**<K Number of LHS columns or RHS rows */, 0 /**< Depth of the output tensor in case is reinterpreted as 3D */,
1296  false /**< reinterpret the input as 3D */,
1297  true /**< Flag used to broadcast the bias addition */,
1298  false /**< wider accumm */,
1299  false /**< has pad y */,
1301  1 /**< Multiplication factor for the width of the 1xW transposed block */,
1302  1 /**< Multiplication factor for the height of the 4x4 interleaved block */,
1305  0 /**< Offset to be added to each element of the matrix A */,
1306  0 /**< Offset to be added to each element of the matrix B */),
1307 
1308  GEMMKernelInfo( 64 /**<M Number of LHS rows*/,
1309  64 /**<N Number of RHS columns*/,
1310  64 /**<K Number of LHS columns or RHS rows */, 0 /**< Depth of the output tensor in case is reinterpreted as 3D */,
1311  false /**< reinterpret the input as 3D */,
1312  true /**< Flag used to broadcast the bias addition */,
1313  false /**< wider accumm */,
1314  false /**< has pad y */,
1316  1 /**< Multiplication factor for the width of the 1xW transposed block */,
1317  1 /**< Multiplication factor for the height of the 4x4 interleaved block */,
1320  0 /**< Offset to be added to each element of the matrix A */,
1321  0 /**< Offset to be added to each element of the matrix B */),
1322  GEMMKernelInfo( 64 /**<M Number of LHS rows*/,
1323  64 /**<N Number of RHS columns*/,
1324  64 /**<K Number of LHS columns or RHS rows */, 0 /**< Depth of the output tensor in case is reinterpreted as 3D */,
1325  false /**< reinterpret the input as 3D */,
1326  true /**< Flag used to broadcast the bias addition */,
1327  false /**< wider accumm */,
1328  false /**< has pad y */,
1330  1 /**< Multiplication factor for the width of the 1xW transposed block */,
1331  1 /**< Multiplication factor for the height of the 4x4 interleaved block */,
1334  0 /**< Offset to be added to each element of the matrix A */,
1335  0 /**< Offset to be added to each element of the matrix B */)
1336  })),
1337  framework::dataset::make("Expected", { true,
1338  true,
1339  true,
1340  false,
1341  false})),
1343 {
1344  ARM_COMPUTE_EXPECT(bool(ClGemmMatrixMultiplyReshapedKernel::validate(&input0_info.clone()->set_is_resizable(true),
1345  &input1_info.clone()->set_is_resizable(true),
1346  &input2_info.clone()->set_is_resizable(true),
1347  &output_info.clone()->set_is_resizable(true),1.f,1.f,
1348  lhs_info,
1349  rhs_info,
1351 }
1352 
1355  m_values,
1356  n_values),
1357  k_values),
1358  b_values),
1359  m0_values_precommit),
1360  n0_values_precommit),
1361  k0_values_precommit),
1362  v0_values_precommit),
1363  h0_values_precommit),
1364  i_values_lhs),
1365  i_values_rhs),
1366  framework::dataset::make("export_to_cl_image_rhs", true)),
1368  a_values_precommit),
1369  beta_values_precommit),
1370  broadcast_bias_values),
1371  lhs_transpose_values),
1372  act_values))
1373 {
1374  // Validate output only if validate() is successful
1375  if(validate_result)
1376  {
1377  validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
1378  }
1379  else
1380  {
1381  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
1383  }
1384 
1385 }
1386 
1389  m_values,
1390  n_values),
1391  k_values),
1392  b_values),
1393  m0_values_nightly),
1394  n0_export_to_cl_image_values_nightly),
1395  k0_export_to_cl_image_values_nightly),
1396  v0_values_nightly),
1397  h0_values_nightly),
1398  i_values_lhs),
1399  i_values_rhs),
1400  framework::dataset::make("export_to_cl_image_rhs", true)),
1402  a_values_nightly),
1403  beta_values_nightly),
1404  broadcast_bias_values),
1405  lhs_transpose_values),
1406  act_values))
1407 {
1408  // Validate output only if validate() is successful
1409  if(validate_result)
1410  {
1411  validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
1412  }
1413  else
1414  {
1415  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
1417  }
1418 }
1419 
1422  m_w_values,
1423  m_h_values),
1424  n_values),
1425  k_values),
1426  b_values),
1427  m0_values_precommit),
1428  n0_values_precommit),
1429  k0_values_precommit),
1430  v0_values_precommit),
1431  h0_values_precommit),
1432  i_values_lhs),
1433  i_values_rhs),
1434  framework::dataset::make("export_to_cl_image_rhs", true)),
1436  a_values_precommit),
1437  beta_values_precommit),
1438  lhs_transpose_values),
1439  act_values))
1440 {
1441  // Validate output only if validate() is successful
1442  if(validate_result)
1443  {
1444  validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
1445  }
1446  else
1447  {
1448  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
1450  }
1451 }
1452 
1455  m_w_values,
1456  m_h_values),
1457  n_values),
1458  k_values),
1459  b_values),
1460  m0_values_nightly),
1461  n0_export_to_cl_image_values_nightly),
1462  k0_export_to_cl_image_values_nightly),
1463  v0_values_nightly),
1464  h0_values_nightly),
1465  i_values_lhs),
1466  i_values_rhs),
1467  framework::dataset::make("export_to_cl_image_rhs", true)),
1469  a_values_nightly),
1470  beta_values_nightly),
1471  lhs_transpose_values),
1472  act_values))
1473 {
1474  // Validate output only if validate() is successful
1475  if(validate_result)
1476  {
1477  validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
1478  }
1479  else
1480  {
1481  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
1483  }
1484 }
1485 TEST_SUITE(FusedPostOps)
1486 
1489  m_values,
1490  n_values),
1491  k_values),
1492  b_values),
1493  m0_values_precommit),
1494  n0_values_precommit),
1495  k0_values_precommit),
1496  v0_values_precommit),
1497  h0_values_precommit),
1498  framework::dataset::make("interleave_lhs", { false })),
1499  framework::dataset::make("interleave_rhs", { false })),
1500  framework::dataset::make("export_to_cl_image_rhs", true)),
1502  a_values_precommit),
1503  beta_values_precommit),
1504  framework::dataset::make("broadcast_bias", { true } )),
1505  lhs_transpose_values),
1506  act_values),
1507  post_op_lists)
1508  )
1509 {
1510  // Validate output only if validate() is successful
1511  if(validate_result)
1512  {
1513  validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
1514  }
1515  else
1516  {
1517  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
1519  }
1520 }
1521 
1522 TEST_SUITE_END() // FusedPostOps
1523 
1524 TEST_SUITE_END() // ExportToCLImage
1525 TEST_SUITE_END() // FP16
1526 
1527 TEST_SUITE(MixedPrecision)
1528 
1531  m_values,
1532  n_values),
1533  k_values),
1534  b_values),
1535  m0_values_precommit),
1536  n0_values_precommit),
1537  k0_values_precommit),
1538  v0_values_precommit),
1539  h0_values_precommit),
1540  i_values_lhs),
1541  i_values_rhs),
1542  framework::dataset::make("export_to_cl_image_rhs", false)),
1543  framework::dataset::make("DataType", DataType::F16)),
1544  a_values_precommit),
1545  beta_values_precommit),
1546  broadcast_bias_values),
1547  lhs_transpose_values),
1548  act_values))
1549 {
1550  // Validate output
1551  if(validate_result)
1552  {
1553  validate(CLAccessor(_target), _reference, rel_tolerance_f16_mixed_precision, 0.f, abs_tolerance_f16_mixed_precision);
1554  }
1555  else
1556  {
1557  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
1559  }
1560 }
1561 
1564  m_values,
1565  n_values),
1566  k_values),
1567  b_values),
1568  m0_values_nightly),
1569  n0_values_nightly),
1570  k0_values_nightly),
1571  v0_values_nightly),
1572  h0_values_nightly),
1573  i_values_lhs),
1574  i_values_rhs),
1575  framework::dataset::make("export_to_cl_image_rhs", false)),
1577  a_values_nightly),
1578  beta_values_nightly),
1579  broadcast_bias_values),
1580  lhs_transpose_values),
1581  act_values))
1582 {
1583  // Validate output
1584  if(validate_result)
1585  {
1586  validate(CLAccessor(_target), _reference, rel_tolerance_f16_mixed_precision, 0.f, abs_tolerance_f16_mixed_precision);
1587  }
1588  else
1589  {
1590  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
1592  }
1593 }
1594 
1597  m_w_values,
1598  m_h_values),
1599  n_values),
1600  k_values),
1601  b_values),
1602  m0_values_precommit),
1603  n0_values_precommit),
1604  k0_values_precommit),
1605  v0_values_precommit),
1606  h0_values_precommit),
1607  i_values_lhs),
1608  i_values_rhs),
1609  framework::dataset::make("export_to_cl_image_rhs", false)),
1611  a_values_precommit),
1612  beta_values_precommit),
1613  lhs_transpose_values),
1614  act_values))
1615 {
1616  // Validate output
1617  if(validate_result)
1618  {
1619  validate(CLAccessor(_target), _reference, rel_tolerance_f16_mixed_precision, 0.f, abs_tolerance_f16_mixed_precision);
1620  }
1621  else
1622  {
1623  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
1625  }
1626 }
1627 
1630  m_w_values,
1631  m_h_values),
1632  n_values),
1633  k_values),
1634  b_values),
1635  m0_values_nightly),
1636  n0_values_nightly),
1637  k0_values_nightly),
1638  v0_values_nightly),
1639  h0_values_nightly),
1640  i_values_lhs),
1641  i_values_rhs),
1642  framework::dataset::make("export_to_cl_image_rhs", false)),
1644  a_values_nightly),
1645  beta_values_nightly),
1646  lhs_transpose_values),
1647  act_values))
1648 {
1649  // Validate output
1650  if(validate_result)
1651  {
1652  validate(CLAccessor(_target), _reference, rel_tolerance_f16_mixed_precision, 0.f, abs_tolerance_f16_mixed_precision);
1653  }
1654  else
1655  {
1656  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
1658  }
1659 }
1660 
1661 TEST_SUITE(FusedPostOps)
1662 
1665  m_values,
1666  n_values),
1667  k_values),
1668  b_values),
1669  m0_values_precommit),
1670  n0_values_precommit),
1671  k0_values_precommit),
1672  v0_values_precommit),
1673  h0_values_precommit),
1674  framework::dataset::make("interleave_lhs", { false })),
1675  framework::dataset::make("interleave_rhs", { false })),
1676  framework::dataset::make("export_to_cl_image_rhs", { true, false })),
1678  a_values_precommit),
1679  beta_values_precommit),
1680  framework::dataset::make("broadcast_bias", { true } )),
1681  lhs_transpose_values),
1682  act_values),
1683  post_op_lists)
1684  )
1685 {
1686  // Validate output
1687  if(validate_result)
1688  {
1689  validate(CLAccessor(_target), _reference, rel_tolerance_f16_mixed_precision, 0.f, abs_tolerance_f16_mixed_precision);
1690  }
1691  else
1692  {
1693  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
1695  }
1696 }
1697 
1698 TEST_SUITE_END() // FusedPostOps
1699 
1700 TEST_SUITE_END() // MixedPrecision
1701 TEST_SUITE_END() // Float
1702 TEST_SUITE_END() // GEMMMatrixMultiplyReshaped
1703 TEST_SUITE_END() // CL
1704 } // namespace validation
1705 } // namespace test
1706 } // namespace arm_compute
GEMMMatrixMultiplyReshapedWithPostOpsValidationFixture< CLTensor, CLAccessor, T, CLGEMMReshapeLHSMatrix, CLGEMMReshapeRHSMatrix, CLGEMMMatrixMultiplyReshaped > CLGEMMMatrixMultiplyReshapedWithPostOpsFixture
bool image2d_from_buffer_supported(const cl::Device &device)
Helper function to check whether the cl_khr_image2d_from_buffer extension is supported.
Definition: CLHelpers.cpp:374
Shape of a tensor.
Definition: TensorShape.h:39
Definition: PostOps.h:80
experimental::PostOpList< ITensorInfo * > post_ops
std::unique_ptr< ITensorInfo > clone() const override
Provide a clone of the current object of class T.
Definition: TensorInfo.cpp:282
Descriptor used by the GEMM kernels.
GEMMMatrixMultiplyReshaped3DValidationFixture< CLTensor, CLAccessor, T, CLGEMMReshapeLHSMatrix, CLGEMMReshapeRHSMatrix, CLGEMMMatrixMultiplyReshaped > CLGEMMMatrixMultiplyReshaped3DFixture
half_float::half half
16-bit floating point type
Definition: Types.h:48
1 channel, 1 F32 per channel
ARM_COMPUTE_EXPECT(has_error==expected, framework::LogLevel::ERRORS)
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
GEMM LHS (Left Hand Side) matrix information.
Definition: Types.h:2236
std::enable_if< is_container< T >::value, ContainerDataset< T > >::type make(std::string name, T &&values)
Helper function to create a ContainerDataset.
Activation Layer Information class.
Definition: Types.h:1625
#define ARM_COMPUTE_TEST_INFO(INFO)
Definition: Asserts.h:72
Copyright (c) 2017-2022 Arm Limited.
GEMMMatrixMultiplyReshapedValidationFixture< CLTensor, CLAccessor, T, CLGEMMReshapeLHSMatrix, CLGEMMReshapeRHSMatrix, CLGEMMMatrixMultiplyReshaped, true > CLGEMMMatrixMultiplyReshapedMixedPrecisionFixture
1 channel, 1 F16 per channel
CLSynthetizeOperator< opencl::kernels::ClGemmReshapeRhsMatrixKernel > CLGEMMReshapeRHSMatrix
static Status validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMKernelInfo &gemm_info)
Static function to check if given info will lead to a valid configuration.
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16), }), framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(30U, 11U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16, QuantizationInfo(1.f/32768.f, 0)), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16, QuantizationInfo(1.f/32768.f, 0)), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16, QuantizationInfo(1.f/32768.f, 0)), })), framework::dataset::make("ActivationInfo", { ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SQRT), })), framework::dataset::make("Expected", { false, true, true, true, false, false, true, true, false })), input_info, output_info, act_info, expected)
Definition: PostOps.h:121
DatasetMode
Possible dataset modes.
Definition: DatasetModes.h:40
GEMMMatrixMultiplyReshaped3DValidationFixture< CLTensor, CLAccessor, T, CLGEMMReshapeLHSMatrix, CLGEMMReshapeRHSMatrix, CLGEMMMatrixMultiplyReshaped, true > CLGEMMMatrixMultiplyReshaped3DMixedPrecisionFixture
TensorShape compute_lhs_reshaped_shape(const ITensorInfo &a, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d=false)
Calculate the Left Hand Side matrix reshaped shape.
GEMM RHS (Right Hand Side) matrix information.
Definition: Types.h:2251
TEST_SUITE_END() FIXTURE_DATA_TEST_CASE(RunSmall
[CLActivationLayer Test snippet]
quantized, asymmetric fixed-point 8-bit number unsigned
Accessor implementation for CLTensor objects.
Definition: CLAccessor.h:36
GEMMMatrixMultiplyReshapedWithPostOpsValidationFixture< CLTensor, CLAccessor, T, CLGEMMReshapeLHSMatrix, CLGEMMReshapeRHSMatrix, CLGEMMMatrixMultiplyReshaped, true > CLGEMMMatrixMultiplyReshapedMixedPrecisionWithPostOpsFixture
TensorShape compute_rhs_reshaped_shape(const ITensorInfo &a, const GEMMRHSMatrixInfo &rhs_info)
Calculate the Right Hand Side matrix reshaped shape.
validate(CLAccessor(output_state), expected_output)
FIXTURE_DATA_TEST_CASE(RunSmall, CLAbsLayerFixture< half >, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)))
Definition: AbsLayer.cpp:50
Class reprensenting a relative tolerance value.
Definition: Validation.h:97
GEMMMatrixMultiplyReshapedValidationFixture< CLTensor, CLAccessor, T, CLGEMMReshapeLHSMatrix, CLGEMMReshapeRHSMatrix, CLGEMMMatrixMultiplyReshaped > CLGEMMMatrixMultiplyReshapedFixture
Store the tensor&#39;s metadata.
Definition: TensorInfo.h:43
TEST_CASE(FusedActivation, framework::DatasetMode::ALL)
Validate fused activation expecting the following behaviours:
zip(zip(framework::dataset::make("Weights", { TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U, 1U), 1, DataType::F32), }), framework::dataset::make("MVBGInfo",{ TensorInfo(TensorShape(2U), 1, DataType::F32), TensorInfo(TensorShape(2U), 1, DataType::F16), TensorInfo(TensorShape(5U), 1, DataType::F32), })), framework::dataset::make("Expected", { true, false, false}))
TEST_SUITE(QASYMM8_to_F32) FIXTURE_DATA_TEST_CASE(RunSmall
DataType
Available data types.
Definition: Types.h:79
constexpr float abs_tolerance_f32(0.0001f)
F32 Absolute tolerance value for comparing reference&#39;s output against implementation&#39;s output for flo...
A sequence of PostOps that can be appended to the end of other operators.
Definition: IPostOp.h:119
combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)))
Definition: AbsLayer.cpp:65
(EXPERIMENTAL_POST_OPS) Implementation of specific IPostOps
Definition: PostOps.h:42