Compute Library
 22.05
GEMMMatrixMultiplyReshapedOnlyRHS.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 #include "arm_compute/core/Types.h"
32 #include "tests/CL/CLAccessor.h"
33 #include "tests/CL/Helper.h"
35 #include "tests/datasets/ShapeDatasets.h"
37 #include "tests/framework/Macros.h"
40 #include "tests/validation/fixtures/GEMMFixture.h"
41 
42 namespace arm_compute
43 {
44 namespace test
45 {
46 namespace validation
47 {
49 using namespace arm_compute::opencl::kernels;
50 
51 // Create function for ClGemmReshapeRhsMatrixKernel
52 using CLGEMMReshapeRHSMatrix = CLSynthetizeOperator<ClGemmReshapeRhsMatrixKernel>;
53 
54 // Create function for ClGemmMatrixMultiplyReshapedOnlyRhsKernel
56 
57 // Fixture for CLGEMMMatrixMultiplyReshapedOnlyRHS
58 template <typename T>
59 using CLGEMMMatrixMultiplyReshapedOnlyRHSFixture = GEMMMatrixMultiplyReshapedOnlyRHSValidationFixture<CLTensor, CLAccessor, T, CLGEMMReshapeRHSMatrix, CLGEMMMatrixMultiplyReshapedOnlyRHS>;
60 
61 // Fixture for CLGEMMMatrixMultiplyReshapedOnlyRHS3D
62 template <typename T>
63 using CLGEMMMatrixMultiplyReshapedOnlyRHS3DFixture = GEMMMatrixMultiplyReshapedOnlyRHS3DValidationFixture<CLTensor, CLAccessor, T, CLGEMMReshapeRHSMatrix, CLGEMMMatrixMultiplyReshapedOnlyRHS>;
64 
65 // Fixture for CLGEMMMatrixMultiplyReshapedOnlyRHS with post ops
66 template <typename T>
68  GEMMMatrixMultiplyReshapedOnlyRHSWithPostOpsValidationFixture<CLTensor, CLAccessor, T, CLGEMMReshapeRHSMatrix, CLGEMMMatrixMultiplyReshapedOnlyRHS>;
69 
70 namespace
71 {
72 // *INDENT-OFF*
73 // clang-format off
74 RelativeTolerance<float> rel_tolerance_f32(0.001f);
75 constexpr float abs_tolerance_f32(0.0001f);
76 
77 RelativeTolerance<float> rel_tolerance_f16(0.001f);
78 constexpr float abs_tolerance_f16(0.01f);
79 
80 /** Alpha values to test */
81 const auto a_values = framework::dataset::make("alpha", {-0.75f} );
82 
83 /** Beta values to test */
84 const auto beta_values = framework::dataset::make("beta", {-0.35f} );
85 
86 /** M values to test */
87 const auto m_values = framework::dataset::make("M", 37);
88 
89 /** M_W values to test */
90 const auto m_w_values = framework::dataset::make("M_W", 5);
91 
92 /** M_H values to test */
93 const auto m_h_values = framework::dataset::make("M_H", 7);
94 
95 /** N values to test */
96 const auto n_values = framework::dataset::make("N", 51);
97 
98 /** K values to test */
99 const auto k_values = framework::dataset::make("K", 23);
100 
101 /** Batch size values to test */
102 const auto b_values = framework::dataset::make("batch_size", 2);
103 
104 /** Activation values to test */
105 const auto act_values = framework::dataset::make("Activation",
106 {
108 });
109 
110 /** M0 values to test - precommit */
111 const auto m0_values_precommit = framework::dataset::make("M0", { 4 });
112 
113 /** N0 values to test - precommit*/
114 const auto n0_values_precommit = framework::dataset::make("N0", { 4 });
115 
116 /** K0 values to test - precommit*/
117 const auto k0_values_precommit = framework::dataset::make("K0", { 4 });
118 
119 /** M0 values to test - nightly */
120 const auto m0_values_nightly = framework::dataset::make("M0", { 8 });
121 
122 /** N0 values to test - nightly */
123 const auto n0_values_nightly = framework::dataset::make("N0", { 16 });
124 
125 /** K0 values to test - nightly */
126 const auto k0_values_nightly = framework::dataset::make("K0", { 16 });
127 
128 /** H0 values to test */
129 const auto h0_values = framework::dataset::make("H0", 1, 3);
130 
131 /** Interleave values to test with RHS matrix */
132 const auto i_values_rhs = framework::dataset::make("interleave_rhs", { true, false });
133 
134 /** Transpose values to test with RHS matrix */
135 const auto t_values_rhs = framework::dataset::make("transpose_rhs", { true, false });
136 
137 /** Broadcast bias from vector to matrix */
138 const auto broadcast_bias_values = framework::dataset::make("broadcast_bias", { false, true } );
139 
140 /** Boundary handling cases for testing partial/non-partial (full) block dimensions, resulting from different combinations
141  * of M, M0, N and N0 values.
142  * M0 and N0 are kept constant, while the different test cases need to vary M and N.
143  *
144  * Eg. M = 64 and N = 33 result in a block dimension that has no partial blocks (all full blocks) in Y dimension and
145  * parital blocks in X dimension.
146  */
148  // Large k to force potential out-of-bound reads on input0
149  framework::dataset::make("K", 315),
150  // Batch size == 1 to force potential out-of-bound reads on input0
151  framework::dataset::make("batch_size", 1)),
152  framework::dataset::make("M0", 4)),
153  framework::dataset::make("N0", 4)),
154  framework::dataset::make("K0", 4)),
155  framework::dataset::make("H0", 3)),
156  i_values_rhs),
157  t_values_rhs),
158  framework::dataset::make("export_to_cl_image_rhs", {true, false})),
159  // Only need to test F32 as F16 shares identical boundary handling logics
161  framework::dataset::make("alpha", -0.75f )),
162  framework::dataset::make("beta", -0.35f )),
163  broadcast_bias_values),
165 
166 /** Post Ops */
169 {
173  std::make_tuple(true, true, false), // If broadcast in dims 0, 1 and 2
174  0,
177  return post_ops;
178 }
180 {
183  std::make_tuple(false, true, true), // If broadcast in dims 0, 1 and 2
184  1,
187  return post_ops;
188 }
190 {
194  std::make_tuple(false, false, true), // If broadcast in dims 0, 1 and 2
195  1,
197  return post_ops;
198 }
199 // To test that the output of the main op is the first parameter in prelu post op
201 {
205  std::make_tuple(false, false, true), // If true, broadcast in corresponding dim: 0, 1 or 2
206  0,
209  return post_ops;
210 }
211 // To test that the output of the main op is the second parameter in prelu post op i.e. it is the alpha_param
213 {
217  std::make_tuple(false, false, false), // If true, broadcast in corresponding dim: 0, 1 or 2
218  1,
221  return post_ops;
222 }
223 /** Different Post Op Lists */
224 const auto post_op_lists = framework::dataset::make("post_op_lists", {
225  post_ops_1(),
226  post_ops_2(),
227  post_ops_3(),
228  post_ops_4(),
229  post_ops_5()
230  } );
231 
232  bool is_post_op_list_valid(unsigned int m, unsigned int n, unsigned int k, unsigned int batch, DataType data_type, const experimental::PostOpList<ITensorInfo*>& post_ops)
233 {
234  const auto lhs_info = GEMMLHSMatrixInfo(4,4,1,false,true);
235  const auto rhs_info = GEMMRHSMatrixInfo(4,4,1,true,true,false);
236 
237  // Create TensorInfo for post op arguments
238  TensorInfo input0_info(TensorShape(k, m, batch), 1, data_type);
239  TensorInfo input1_info(TensorShape(n, k, batch), 1, data_type);
240  TensorInfo input2_info(TensorShape(n), 1, data_type);
241  TensorInfo output_info(TensorShape(n, m, batch), 1, data_type);
242 
243  const TensorInfo reshaped_input1_info = input1_info.clone()->set_tensor_shape(misc::shape_calculator::compute_rhs_reshaped_shape(input1_info, rhs_info));
244 
245  GEMMKernelInfo gemm_info(m, n, k, 0 /**< Depth of the output tensor in case is reinterpreted as 3D */,
246  false /**< reinterpret the input as 3D */,
247  true /**< Flag used to broadcast the bias addition */,
248  false /**< wider accumm */,
249  false /**< has pad y */,
251  1 /**< Multiplication factor for the width of the 1xW transposed block */,
252  1 /**< Multiplication factor for the height of the 4x4 interleaved block */,
253  lhs_info,
254  rhs_info,
255  0 /**< Offset to be added to each element of the matrix A */,
256  0 /**< Offset to be added to each element of the matrix B */,
257  post_ops);
258  return bool(ClGemmMatrixMultiplyReshapedOnlyRhsKernel::validate(&input0_info.clone()->set_is_resizable(true),
259  &reshaped_input1_info.clone()->set_is_resizable(true),
260  &input2_info.clone()->set_is_resizable(true),
261  &output_info.clone()->set_is_resizable(true),1.f,1.f,
262  lhs_info,
263  rhs_info,
264  gemm_info));
265 }
266 /** Configuration test */
267 bool validate_configuration(unsigned int m_value, unsigned int n_value, unsigned int k_value, unsigned int b_value,
268  unsigned int m0_value, unsigned int n0_value, unsigned int k0_value, unsigned int h0_value,
269  bool i_value_rhs, bool t_value_rhs, bool export_to_cl_image, bool broadcast_bias, bool input_as_3d, unsigned int depth_output_gemm3d, const ActivationLayerInfo &act_info,
270  DataType dt_input0, DataType dt_input1, DataType dt_input2, DataType dt_output, float alpha, float beta)
271 {
272  const unsigned int M = m_value;
273  const unsigned int N = n_value;
274  const unsigned int K = k_value;
275 
277  lhs_info.m0 = m0_value;
278  lhs_info.k0 = k0_value;
279 
281  rhs_info.n0 = n0_value;
282  rhs_info.k0 = k0_value;
283  rhs_info.h0 = h0_value;
284  rhs_info.interleave = i_value_rhs;
285  rhs_info.transpose = t_value_rhs;
286  rhs_info.export_to_cl_image = export_to_cl_image;
287 
288  GEMMKernelInfo kernel_info;
289  kernel_info.m = M;
290  kernel_info.n = N;
291  kernel_info.k = K;
292  kernel_info.depth_output_gemm3d = depth_output_gemm3d;
293  kernel_info.reinterpret_input_as_3d = input_as_3d;
294  kernel_info.broadcast_bias = broadcast_bias;
295  kernel_info.activation_info = act_info;
296 
297  const TensorShape lhs_shape(K, M, b_value);
298  const TensorShape rhs_shape(N, K, b_value);
299  const TensorShape rhs_shape_reshaped = compute_rhs_reshaped_shape(TensorInfo(rhs_shape, 1, dt_input1),
300  rhs_info);
301 
302  const TensorShape dst_shape = compute_mm_shape(TensorInfo(lhs_shape, 1, dt_input0),
303  TensorInfo(rhs_shape_reshaped, 1, dt_input1),
304  kernel_info);
305 
306  const TensorShape bias_shape(N,
307  M, // Correct calculation should be: broadcast_bias? 1 : M, it's wrong here on purpose just for validation test
308  broadcast_bias? 1 : b_value);
309 
310  // Create tensor info
311  TensorInfo lhs = TensorInfo(lhs_shape, 1, dt_input0);
312  TensorInfo rhs_reshaped = TensorInfo(rhs_shape_reshaped, 1, dt_input1);
313  TensorInfo bias = TensorInfo(bias_shape, 1, dt_input2);
314  TensorInfo dst = TensorInfo(dst_shape, 1, dt_output);
315 
316  // Create and configure function
318  return bool(gemm.validate(&lhs, &rhs_reshaped, &bias, &dst, alpha, beta, lhs_info, rhs_info, kernel_info));
319 }
320 
321 } // namespace
322 
323 TEST_SUITE(CL)
324 TEST_SUITE(GEMMMatrixMultiplyReshapedOnlyRHS)
325 
326 /** Validate tests
327  *
328  * A series of validation tests on configurations which according to the API specification
329  * the function should fail against.
330  *
331  * Checks performed in order:
332  * - Mismachting data type: input1, input2 and output need to have same data type as input0. Support data type: F32/F16.
333  * - Unsupported M0: MO can only be 1,2,3,4,5,6,7,8
334  * - Unsupported N0: NO can only be 2,3,4,8,16
335  * - Unsupported K0: KO can only be 2,3,4,8,16
336  * - Unsupported bias addition: bias broadcast mode is 0 if the input or output has to be reinterpreted as 3D
337  * - Incorrect bias diemension when bias broadcast mode is 1 and beta is not 0.0f, should be (n, 1), not (n, m)
338  * - Incorrect input0 dimension when input is reinterpreted as 3D: input0->dimension(1) * input0->dimension(2) != m
339  * - Correct support for creating an OpenCL image object from buffer
340  * - Incorrect support for creating an OpenCL image object from buffer. N0 is 2 but it can only be 4,8 and 16
341  * - Correct F16 support for creating an OpenCL image object from buffer.
342  */
344 framework::dataset::make("batch_size", { 1, 1, 1, 1, 1, 1, 2, 1, 1, 1 }),
345 framework::dataset::make("M0", { 4, 9, 4, 4, 4, 4, 4, 4, 4, 4 })),
346 framework::dataset::make("N0", { 4, 4, 18, 4, 4, 4, 4, 8, 2, 8 })),
347 framework::dataset::make("K0", { 4, 4, 4, 1, 4, 4, 4, 4, 4, 4 })),
348 framework::dataset::make("broadcast_bias", { false, false, false, false, false, true, true, false, false, false })),
349 framework::dataset::make("input_as_3d", { 0, 0, 0, 0, 1, 0, 1, 0, 0, 0 })),
350 framework::dataset::make("depth_output_gemm3d", { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 })),
351 framework::dataset::make("export_to_cl_image", { false, false, false, false, false, false, false, true, true, true })),
356 framework::dataset::make("Beta", { 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 1.0f, 0.0f , 1.0f})),
357 framework::dataset::make("Expected", { false, false, false, false, false, false, false, true, false, true })),
358 b_value, m0_value, n0_value, k0_value, broadcast_bias, input_as_3d, depth_output_gemm3d, export_to_cl_image, dt_input0, dt_intpu1, dt_input2, dt_output, beta, expected)
359 {
360  bool expected_value = expected;
361 
362  // Change expected to false if the target platform does not support the OpenCL cl_khr_image2d_from_buffer extension
363  if(!image2d_from_buffer_supported(CLKernelLibrary::get().get_device()) && export_to_cl_image)
364  {
365  expected_value = false;
366  }
367 
368  bool status = validate_configuration(37, 51, 23, b_value, m0_value, n0_value, k0_value, 1, false, false, export_to_cl_image, broadcast_bias, input_as_3d, depth_output_gemm3d, ActivationLayerInfo(), dt_input0, dt_intpu1, dt_input2, dt_output, 1.0f, beta);
369  ARM_COMPUTE_EXPECT(status == expected_value, framework::LogLevel::ERRORS);
370 }
371 
372 TEST_SUITE(ValidateFusedPostOpsConfigs)
373 TEST_SUITE(Invalid)
374 TEST_CASE(UnsupportedPostOpSequence, framework::DatasetMode::ALL)
375 {
376  const auto data_type = DataType::F32;
377  const unsigned int m = 17;
378  const unsigned int n = 1;
379  const unsigned int k = 13;
380  const unsigned int batch = 2;
381  TensorShape post_op_arg0_shape(n, m, batch);
382  TensorInfo post_op_arg_info(post_op_arg0_shape, 1, data_type);
383  auto post_op_arg1_info = post_op_arg_info.clone();
384 
385  // Unsupported sequence of post ops
388  &post_op_arg_info,
389  1,
392  post_op_arg1_info.get(),
393  0,
395 
396  ARM_COMPUTE_EXPECT(is_post_op_list_valid(m, n, k, batch, data_type, post_ops) == false, framework::LogLevel::ERRORS);
397 }
399 {
400  // Invalid broadcast: post op tensors "widen" the output tensor
401  const auto data_type = DataType::F32;
402  const unsigned int m = 17;
403  const unsigned int n = 1;
404  const unsigned int k = 1;
405  const unsigned int batch = 1;
406  TensorShape post_op_arg_shape(n, m, batch + 4); // output's batch dimension is "widened", which is not allowed
407  TensorInfo post_op_arg_info(post_op_arg_shape, 1, data_type);
410 
411  ARM_COMPUTE_EXPECT(is_post_op_list_valid(m, n, k, batch, data_type, post_ops) == false, framework::LogLevel::ERRORS);
412 }
413 TEST_CASE(BroadcastInXDimOnly, framework::DatasetMode::ALL)
414 {
415  // Invalid broadcast: post op tensors broadcast in the first dimension (X) only
416  const auto data_type = DataType::F32;
417  const unsigned int m = 22;
418  const unsigned int n = 16;
419  const unsigned int k = 15;
420  const unsigned int batch = 3;
421  TensorShape post_op_arg_shape(1, m, batch);
422  TensorInfo post_op_arg_info(post_op_arg_shape, 1, data_type);
425 
426  ARM_COMPUTE_EXPECT(is_post_op_list_valid(m, n, k, batch, data_type, post_ops) == false, framework::LogLevel::ERRORS);
427 }
428 TEST_SUITE_END() // Invalid
429 TEST_SUITE(Valid)
430 TEST_CASE(EmptyPostOpList, framework::DatasetMode::ALL)
431 {
432  const auto data_type = DataType::F32;
433  const unsigned int m = 22;
434  const unsigned int n = 16;
435  const unsigned int k = 15;
436  const unsigned int batch = 3;
438 
439  ARM_COMPUTE_EXPECT(is_post_op_list_valid(m, n, k, batch, data_type, post_ops) == true, framework::LogLevel::ERRORS);
440 }
441 TEST_CASE(BroadcastInYDimOnly, framework::DatasetMode::ALL)
442 {
443  const auto data_type = DataType::F32;
444  const unsigned int m = 22;
445  const unsigned int n = 16;
446  const unsigned int k = 15;
447  const unsigned int batch = 3;
448  TensorShape post_op_arg_shape(n, 1, batch);
449  TensorInfo post_op_arg_info(post_op_arg_shape, 1, data_type);
452 
453  ARM_COMPUTE_EXPECT(is_post_op_list_valid(m, n, k, batch, data_type, post_ops) == true, framework::LogLevel::ERRORS);
454 }
455 TEST_CASE(BroadcastInBothXandYDims, framework::DatasetMode::ALL)
456 {
457  const auto data_type = DataType::F32;
458  const unsigned int m = 22;
459  const unsigned int n = 16;
460  const unsigned int k = 15;
461  const unsigned int batch = 3;
462  TensorShape post_op_arg_shape(1, 1, batch);
463  TensorInfo post_op_arg_info(post_op_arg_shape, 1, data_type);
466 
467  ARM_COMPUTE_EXPECT(is_post_op_list_valid(m, n, k, batch, data_type, post_ops) == true, framework::LogLevel::ERRORS);
468 }
469 TEST_CASE(BroadcastInAllDims, framework::DatasetMode::ALL)
470 {
471  const auto data_type = DataType::F32;
472  const unsigned int m = 22;
473  const unsigned int n = 16;
474  const unsigned int k = 15;
475  const unsigned int batch = 3;
476  TensorShape post_op_arg_shape(1, 1, 1);
477  TensorInfo post_op_arg_info(post_op_arg_shape, 1, data_type);
480 
481  ARM_COMPUTE_EXPECT(is_post_op_list_valid(m, n, k, batch, data_type, post_ops) == true, framework::LogLevel::ERRORS);
482 }
483 TEST_SUITE_END() // Valid
484 TEST_SUITE_END() // ValidateFusedPostOps
485 TEST_SUITE(Float)
486 TEST_SUITE(FP32)
487 
488 FIXTURE_DATA_TEST_CASE(RunPrecommitBoundaryHandlingPartialInXPartialInY, CLGEMMMatrixMultiplyReshapedOnlyRHSFixture<float>, framework::DatasetMode::PRECOMMIT,
490  framework::dataset::make("M", 3),
491  framework::dataset::make("N", 1)),
492  boundary_handling_cases))
493 {
494  // Validate output
495  if(validate_result)
496  {
497  validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
498  }
499  else
500  {
501  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
503  }
504 }
505 
508  framework::dataset::make("M", 64),
509  framework::dataset::make("N", 43)),
510  boundary_handling_cases))
511 {
512  // Validate output
513  if(validate_result)
514  {
515  validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
516  }
517  else
518  {
519  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
521  }
522 }
523 
526  framework::dataset::make("M", 64),
527  framework::dataset::make("N", 32)),
528  boundary_handling_cases))
529 {
530  // Validate output
531  if(validate_result)
532  {
533  validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
534  }
535  else
536  {
537  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
539  }
540 }
541 
544  framework::dataset::make("M", 37),
545  framework::dataset::make("N", 32)),
546  boundary_handling_cases))
547 {
548  // Validate output
549  if(validate_result)
550  {
551  validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
552  }
553  else
554  {
555  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
557  }
558 }
559 
562  m_values,
563  n_values),
564  k_values),
565  b_values),
566  m0_values_precommit),
567  n0_values_precommit),
568  k0_values_precommit),
569  h0_values),
570  i_values_rhs),
571  t_values_rhs),
572  framework::dataset::make("export_to_cl_image_rhs", {false, true})),
574  a_values),
575  beta_values),
576  broadcast_bias_values),
577  act_values))
578 {
579  // Validate output only if the target platform supports the OpenCL cl_khr_image2d_from_buffer extension
580  if(validate_result)
581  {
582  validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
583  }
584  else
585  {
586  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
588  }
589 }
590 
593  m_values,
594  n_values),
595  k_values),
596  b_values),
597  m0_values_nightly),
598  n0_values_nightly),
599  k0_values_nightly),
600  h0_values),
601  i_values_rhs),
602  t_values_rhs),
603  framework::dataset::make("export_to_cl_image_rhs", {false, true})),
605  a_values),
606  beta_values),
607  broadcast_bias_values),
608  act_values))
609 {
610  // Validate output only if the target platform supports the OpenCL cl_khr_image2d_from_buffer extension
611  if(validate_result)
612  {
613  validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
614  }
615  else
616  {
617  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
619  }
620 }
621 
624  m_w_values,
625  m_h_values),
626  n_values),
627  k_values),
628  b_values),
629  m0_values_precommit),
630  n0_values_precommit),
631  k0_values_precommit),
632  h0_values),
633  i_values_rhs),
634  t_values_rhs),
635  framework::dataset::make("export_to_cl_image_rhs", {false, true})),
636  framework::dataset::make("has_pad_y", {false, true})),
638  a_values),
639  beta_values),
640  act_values))
641 {
642  // Validate output only if the target platform supports the OpenCL cl_khr_image2d_from_buffer extension
643  if(validate_result)
644  {
645  validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
646  }
647  else
648  {
649  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
651  }
652 }
653 
656  m_w_values,
657  m_h_values),
658  n_values),
659  k_values),
660  b_values),
661  m0_values_nightly),
662  n0_values_nightly),
663  k0_values_nightly),
664  h0_values),
665  i_values_rhs),
666  t_values_rhs),
667  framework::dataset::make("export_to_cl_image_rhs", {false, true})),
668  framework::dataset::make("has_pad_y", {false, true})),
670  a_values),
671  beta_values),
672  act_values))
673 {
674  // Validate output only if the target platform supports the OpenCL cl_khr_image2d_from_buffer extension
675  if(validate_result)
676  {
677  validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
678  }
679  else
680  {
681  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
683  }
684 }
685 
686 TEST_SUITE(FusedPostOps)
687 
690  m_values,
691  n_values),
692  k_values),
693  b_values),
694  m0_values_precommit),
695  n0_values_precommit),
696  k0_values_precommit),
697  framework::dataset::make("H0", {1})),
698  framework::dataset::make("interleave_rhs", { true })),
699  t_values_rhs),
700  framework::dataset::make("export_to_cl_image_rhs", {false, true})),
702  a_values),
703  beta_values),
704  framework::dataset::make("broadcast_bias", { false } )),
705  act_values),
706  post_op_lists)
707  )
708 {
709  // Validate output only if the target platform supports the OpenCL cl_khr_image2d_from_buffer extension
710  if(validate_result)
711  {
712  validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
713  }
714  else
715  {
716  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
718  }
719 }
720 
721 TEST_SUITE_END() // FusedPostOps
722 
723 TEST_SUITE_END() // FP32
724 
725 TEST_SUITE(FP16)
728  m_values,
729  n_values),
730  k_values),
731  b_values),
732  m0_values_precommit),
733  n0_values_precommit),
734  k0_values_precommit),
735  h0_values),
736  i_values_rhs),
737  t_values_rhs),
738  framework::dataset::make("export_to_cl_image_rhs", true)),
739  framework::dataset::make("DataType", DataType::F16)),
740  a_values),
741  beta_values),
742  broadcast_bias_values),
743  act_values))
744 {
745  // Validate output only if the target platform supports the OpenCL cl_khr_image2d_from_buffer extension
746  if(validate_result)
747  {
748  validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
749  }
750  else
751  {
752  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
754  }
755 }
756 
759  m_values,
760  n_values),
761  k_values),
762  b_values),
763  m0_values_nightly),
764  n0_values_nightly),
765  k0_values_nightly),
766  h0_values),
767  i_values_rhs),
768  t_values_rhs),
769  framework::dataset::make("export_to_cl_image_rhs", true)),
771  a_values),
772  beta_values),
773  broadcast_bias_values),
774  act_values))
775 {
776  // Validate output only if the target platform supports the OpenCL cl_khr_image2d_from_buffer extension
777  if(validate_result)
778  {
779  validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
780  }
781  else
782  {
783  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
785  }
786 }
787 
790  m_w_values,
791  m_h_values),
792  n_values),
793  k_values),
794  b_values),
795  m0_values_precommit),
796  n0_values_precommit),
797  k0_values_precommit),
798  h0_values),
799  i_values_rhs),
800  t_values_rhs),
801  framework::dataset::make("export_to_cl_image_rhs", true)),
802  framework::dataset::make("has_pad_y", {false, true})),
804  a_values),
805  beta_values),
806  act_values))
807 {
808  // Validate output only if the target platform supports the OpenCL cl_khr_image2d_from_buffer extension
809  if(validate_result)
810  {
811  validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
812  }
813  else
814  {
815  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
817  }
818 }
819 
822  m_w_values,
823  m_h_values),
824  n_values),
825  k_values),
826  b_values),
827  m0_values_nightly),
828  n0_values_nightly),
829  k0_values_nightly),
830  h0_values),
831  i_values_rhs),
832  t_values_rhs),
833  framework::dataset::make("export_to_cl_image_rhs", true)),
834  framework::dataset::make("has_pad_y", {false, true})),
836  a_values),
837  beta_values),
838  act_values))
839 {
840  // Validate output only if the target platform supports the OpenCL cl_khr_image2d_from_buffer extension
841  if(validate_result)
842  {
843  validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
844  }
845  else
846  {
847  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
849  }
850 }
851 TEST_SUITE(FusedPostOps)
852 
855  m_values,
856  n_values),
857  k_values),
858  b_values),
859  m0_values_precommit),
860  n0_values_precommit),
861  k0_values_precommit),
862  framework::dataset::make("H0", {1})),
863  framework::dataset::make("interleave_rhs", { true })),
864  t_values_rhs),
865  framework::dataset::make("export_to_cl_image_rhs", true)),
867  a_values),
868  beta_values),
869  framework::dataset::make("broadcast_bias", { false } )),
870  act_values),
871  post_op_lists)
872  )
873 {
874  // Validate output only if the target platform supports the OpenCL cl_khr_image2d_from_buffer extension
875  if(validate_result)
876  {
877  validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
878  }
879  else
880  {
881  ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
883  }
884 }
885 
886 TEST_SUITE_END() // FusedPostOps
887 
888 TEST_SUITE_END() // FP16
889 
890 TEST_SUITE_END() // Float
891 TEST_SUITE_END() // GEMMMatrixMulipltyReshapedOnlyRHS
892 TEST_SUITE_END() // CL
893 } // namespace validation
894 } // namespace test
895 } // namespace arm_compute
bool broadcast_bias
Flag used to broadcast the bias addition.
bool image2d_from_buffer_supported(const cl::Device &device)
Helper function to check whether the cl_khr_image2d_from_buffer extension is supported.
Definition: CLHelpers.cpp:370
Shape of a tensor.
Definition: TensorShape.h:39
Definition: PostOps.h:80
experimental::PostOpList< ITensorInfo * > post_ops
GEMMMatrixMultiplyReshapedOnlyRHSWithPostOpsValidationFixture< CLTensor, CLAccessor, T, CLGEMMReshapeRHSMatrix, CLGEMMMatrixMultiplyReshapedOnlyRHS > CLGEMMMatrixMultiplyReshapedOnlyRHSWithPostOpsFixture
std::unique_ptr< ITensorInfo > clone() const override
Provide a clone of the current object of class T.
Definition: TensorInfo.cpp:282
Descriptor used by the GEMM kernels.
static Status validate(Args &&... args)
Validate input arguments.
Definition: Helper.h:78
unsigned int depth_output_gemm3d
Depth of the output tensor in case is reinterpreted as 3D.
TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info)
Calculate the matrix multiplication output shape of two tensors.
half_float::half half
16-bit floating point type
Definition: Types.h:48
1 channel, 1 F32 per channel
ARM_COMPUTE_EXPECT(has_error==expected, framework::LogLevel::ERRORS)
unsigned int h0
Number of horizontal blocks of size (k0xn0) stored on the same output row.
Definition: Types.h:2078
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
GEMM LHS (Left Hand Side) matrix information.
Definition: Types.h:2054
std::enable_if< is_container< T >::value, ContainerDataset< T > >::type make(std::string name, T &&values)
Helper function to create a ContainerDataset.
unsigned int M
ActivationLayerInfo activation_info
Activation function to perform after the matrix multiplication.
Activation Layer Information class.
Definition: Types.h:1625
bool transpose
True if the (k0xn0) block has to be transposed before been stored.
Definition: Types.h:2079
bool export_to_cl_image
True if the reshaped rhs has to be exported to cl_image.
Definition: Types.h:2081
#define ARM_COMPUTE_TEST_INFO(INFO)
Definition: Asserts.h:65
Copyright (c) 2017-2022 Arm Limited.
1 channel, 1 F16 per channel
CLSynthetizeOperator< opencl::kernels::ClGemmReshapeRhsMatrixKernel > CLGEMMReshapeRHSMatrix
unsigned int k0
Number of partial accumulations performed by the matrix multiplication.
Definition: Types.h:2077
unsigned int m
Number of LHS rows.
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16), }), framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(30U, 11U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16, QuantizationInfo(1.f/32768.f, 0)), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16, QuantizationInfo(1.f/32768.f, 0)), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16, QuantizationInfo(1.f/32768.f, 0)), })), framework::dataset::make("ActivationInfo", { ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SQRT), })), framework::dataset::make("Expected", { false, true, true, true, false, false, true, true, false })), input_info, output_info, act_info, expected)
static Status validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMKernelInfo &gemm_info)
Static function to check if given info will lead to a valid configuration.
unsigned int n
Number of RHS columns.
Definition: PostOps.h:121
DatasetMode
Possible dataset modes.
Definition: DatasetModes.h:40
GEMM RHS (Right Hand Side) matrix information.
Definition: Types.h:2069
TEST_SUITE_END() FIXTURE_DATA_TEST_CASE(RunSmall
[CLActivationLayer Test snippet]
GEMMMatrixMultiplyReshapedOnlyRHS3DValidationFixture< CLTensor, CLAccessor, T, CLGEMMReshapeRHSMatrix, CLGEMMMatrixMultiplyReshapedOnlyRHS > CLGEMMMatrixMultiplyReshapedOnlyRHS3DFixture
unsigned int n0
Number of columns processed by the matrix multiplication.
Definition: Types.h:2076
Accessor implementation for CLTensor objects.
Definition: CLAccessor.h:36
unsigned int N
TensorShape compute_rhs_reshaped_shape(const ITensorInfo &a, const GEMMRHSMatrixInfo &rhs_info)
Calculate the Right Hand Side matrix reshaped shape.
bool reinterpret_input_as_3d
Flag used to reinterpret the input as 3D.
validate(CLAccessor(output_state), expected_output)
UniqueGemmCommon< Top, Tret > gemm(const GemmArgs &args, const OutputStage &os)
FIXTURE_DATA_TEST_CASE(RunSmall, CLAbsLayerFixture< half >, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)))
Definition: AbsLayer.cpp:50
Class reprensenting a relative tolerance value.
Definition: Validation.h:97
unsigned int k
Number of LHS columns or RHS rows.
bool interleave
True if the h0 (k0xn0) blocks have to be interleaved in the output row.
Definition: Types.h:2080
Store the tensor&#39;s metadata.
Definition: TensorInfo.h:43
unsigned int k0
Number of partial accumulations performed by the matrix multiplication.
Definition: Types.h:2062
unsigned int m0
Number of rows processed by the matrix multiplication.
Definition: Types.h:2061
TEST_CASE(FusedActivation, framework::DatasetMode::ALL)
Validate fused activation expecting the following behaviours:
GEMMMatrixMultiplyReshapedOnlyRHSValidationFixture< CLTensor, CLAccessor, T, CLGEMMReshapeRHSMatrix, CLGEMMMatrixMultiplyReshapedOnlyRHS > CLGEMMMatrixMultiplyReshapedOnlyRHSFixture
zip(zip(framework::dataset::make("Weights", { TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U, 1U), 1, DataType::F32), }), framework::dataset::make("MVBGInfo",{ TensorInfo(TensorShape(2U), 1, DataType::F32), TensorInfo(TensorShape(2U), 1, DataType::F16), TensorInfo(TensorShape(5U), 1, DataType::F32), })), framework::dataset::make("Expected", { true, false, false}))
TEST_SUITE(QASYMM8_to_F32) FIXTURE_DATA_TEST_CASE(RunSmall
DataType
Available data types.
Definition: Types.h:79
constexpr float abs_tolerance_f32(0.0001f)
F32 Absolute tolerance value for comparing reference&#39;s output against implementation&#39;s output for flo...
A sequence of PostOps that can be appended to the end of other operators.
Definition: IPostOp.h:119
combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)))
Definition: AbsLayer.cpp:65
(EXPERIMENTAL_POST_OPS) Implementation of specific IPostOps
Definition: PostOps.h:42
const cl::Device & get_device()
Gets the CL device for which the programs are created.
const int32_t * bias
unsigned int K