35 #include "tests/datasets/ShapeDatasets.h" 40 #include "tests/validation/fixtures/GEMMFixture.h" 68 GEMMMatrixMultiplyReshapedOnlyRHSWithPostOpsValidationFixture<CLTensor, CLAccessor, T, CLGEMMReshapeRHSMatrix, CLGEMMMatrixMultiplyReshapedOnlyRHS>;
78 constexpr
float abs_tolerance_f16(0.01f);
164 broadcast_bias_values),
174 std::make_tuple(
true,
true,
false),
184 std::make_tuple(
false,
true,
true),
195 std::make_tuple(
false,
false,
true),
206 std::make_tuple(
false,
false,
true),
218 std::make_tuple(
false,
false,
false),
260 &reshaped_input1_info.
clone()->set_is_resizable(
true),
261 &input2_info.
clone()->set_is_resizable(
true),
262 &output_info.
clone()->set_is_resizable(
true),1.f,1.f,
284 rhs_info.
h0 = h0_value;
309 broadcast_bias? 1 : b_value);
319 return bool(gemm.
validate(&lhs, &rhs_reshaped, &bias, &dst, alpha, beta, lhs_info, rhs_info, kernel_info));
345 framework::dataset::
make("batch_size", { 1, 1, 1, 1, 1, 1, 2, 1, 1, 1 }),
346 framework::dataset::make(
"M0", { 4, 9, 4, 4, 4, 4, 4, 4, 4, 4 })),
347 framework::dataset::make(
"N0", { 4, 4, 18, 4, 4, 4, 4, 8, 2, 8 })),
348 framework::dataset::make(
"K0", { 4, 4, 4, 1, 4, 4, 4, 4, 4, 4 })),
349 framework::dataset::make(
"broadcast_bias", {
false,
false,
false,
false,
false,
true,
true,
false,
false,
false })),
350 framework::dataset::make(
"input_as_3d", { 0, 0, 0, 0, 1, 0, 1, 0, 0, 0 })),
351 framework::dataset::make(
"depth_output_gemm3d", { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 })),
352 framework::dataset::make(
"export_to_cl_image", {
false,
false,
false,
false,
false,
false,
false,
true,
true,
true })),
353 framework::dataset::make(
"data_type_input0", {
DataType::F32,
DataType::F32,
DataType::F32,
DataType::F32,
DataType::F32,
DataType::F32,
DataType::F32,
DataType::F32,
DataType::F32,
DataType::F16})),
354 framework::dataset::make(
"data_type_input1", {
DataType::F32,
DataType::F32,
DataType::F32,
DataType::F32,
DataType::F32,
DataType::F32,
DataType::F32,
DataType::F32,
DataType::F32,
DataType::F16})),
355 framework::dataset::make(
"data_type_input2", {
DataType::F32,
DataType::F32,
DataType::F32,
DataType::F32,
DataType::F32,
DataType::F32,
DataType::F32,
DataType::F32,
DataType::F32,
DataType::F16})),
356 framework::dataset::make(
"data_type_output", {
DataType::F16,
DataType::F32,
DataType::F32,
DataType::F32,
DataType::F32,
DataType::F32,
DataType::F32,
DataType::F32,
DataType::F32,
DataType::F16})),
357 framework::dataset::make(
"Beta", { 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 1.0f, 0.0f , 1.0f})),
358 framework::dataset::make(
"Expected", {
false,
false,
false,
false,
false,
false,
false,
true,
false,
true })),
359 b_value,
m0_value,
n0_value,
k0_value,
broadcast_bias, input_as_3d, depth_output_gemm3d,
export_to_cl_image, dt_input0, dt_intpu1, dt_input2, dt_output, beta,
expected)
366 expected_value =
false;
369 bool status = validate_configuration(37, 51, 23,
b_value, m0_value, n0_value, k0_value, 1,
false,
false, export_to_cl_image, broadcast_bias, input_as_3d, depth_output_gemm3d,
ActivationLayerInfo(), dt_input0, dt_intpu1, dt_input2, dt_output, 1.0f, beta);
378 const unsigned int m = 17;
379 const unsigned int n = 1;
380 const unsigned int k = 13;
381 const unsigned int batch = 2;
384 auto post_op_arg1_info = post_op_arg_info.
clone();
393 post_op_arg1_info.get(),
403 const unsigned int m = 17;
404 const unsigned int n = 1;
405 const unsigned int k = 1;
406 const unsigned int batch = 1;
418 const unsigned int m = 22;
419 const unsigned int n = 16;
420 const unsigned int k = 15;
421 const unsigned int batch = 3;
434 const unsigned int m = 22;
435 const unsigned int n = 16;
436 const unsigned int k = 15;
437 const unsigned int batch = 3;
445 const unsigned int m = 22;
446 const unsigned int n = 16;
447 const unsigned int k = 15;
448 const unsigned int batch = 3;
459 const unsigned int m = 22;
460 const unsigned int n = 16;
461 const unsigned int k = 15;
462 const unsigned int batch = 3;
473 const unsigned int m = 22;
474 const unsigned int n = 16;
475 const unsigned int k = 15;
476 const unsigned int batch = 3;
491 framework::dataset::
make("
M", 3),
492 framework::dataset::
make("
N", 1)),
493 boundary_handling_cases))
511 boundary_handling_cases))
529 boundary_handling_cases))
547 boundary_handling_cases))
567 m0_values_precommit),
568 n0_values_precommit),
569 k0_values_precommit),
577 broadcast_bias_values),
608 broadcast_bias_values),
630 m0_values_precommit),
631 n0_values_precommit),
632 k0_values_precommit),
695 m0_values_precommit),
696 n0_values_precommit),
697 k0_values_precommit),
698 framework::dataset::
make("H0", {1})),
733 m0_values_precommit),
734 n0_values_precommit),
735 k0_values_precommit),
739 framework::dataset::
make("export_to_cl_image_rhs", true)),
743 broadcast_bias_values),
774 broadcast_bias_values),
796 m0_values_precommit),
797 n0_values_precommit),
798 k0_values_precommit),
860 m0_values_precommit),
861 n0_values_precommit),
862 k0_values_precommit),
863 framework::dataset::
make("H0", {1})),
bool broadcast_bias
Flag used to broadcast the bias addition.
bool image2d_from_buffer_supported(const cl::Device &device)
Helper function to check whether the cl_khr_image2d_from_buffer extension is supported.
experimental::PostOpList< ITensorInfo * > post_ops
GEMMMatrixMultiplyReshapedOnlyRHSWithPostOpsValidationFixture< CLTensor, CLAccessor, T, CLGEMMReshapeRHSMatrix, CLGEMMMatrixMultiplyReshapedOnlyRHS > CLGEMMMatrixMultiplyReshapedOnlyRHSWithPostOpsFixture
std::unique_ptr< ITensorInfo > clone() const override
Provide a clone of the current object of class T.
Descriptor used by the GEMM kernels.
static Status validate(Args &&... args)
Validate input arguments.
TEST_SUITE(QASYMM8_to_F32) FIXTURE_DATA_TEST_CASE(RunSmall
unsigned int depth_output_gemm3d
Depth of the output tensor in case is reinterpreted as 3D.
TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info)
Calculate the matrix multiplication output shape of two tensors.
half_float::half half
16-bit floating point type
1 channel, 1 F32 per channel
ARM_COMPUTE_EXPECT(has_error==expected, framework::LogLevel::ERRORS)
unsigned int h0
Number of horizontal blocks of size (k0xn0) stored on the same output row.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
GEMM LHS (Left Hand Side) matrix information.
std::enable_if< is_container< T >::value, ContainerDataset< T > >::type make(std::string name, T &&values)
Helper function to create a ContainerDataset.
ActivationLayerInfo activation_info
Activation function to perform after the matrix multiplication.
Activation Layer Information class.
bool transpose
True if the (k0xn0) block has to be transposed before been stored.
bool export_to_cl_image
True if the reshaped rhs has to be exported to cl_image.
#define ARM_COMPUTE_TEST_INFO(INFO)
Copyright (c) 2017-2023 Arm Limited.
1 channel, 1 F16 per channel
CLSynthetizeOperator< opencl::kernels::ClGemmReshapeRhsMatrixKernel > CLGEMMReshapeRHSMatrix
unsigned int k0
Number of partial accumulations performed by the matrix multiplication.
unsigned int m
Number of LHS rows.
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16), }), framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(30U, 11U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16, QuantizationInfo(1.f/32768.f, 0)), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16, QuantizationInfo(1.f/32768.f, 0)), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16, QuantizationInfo(1.f/32768.f, 0)), })), framework::dataset::make("ActivationInfo", { ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SQRT), })), framework::dataset::make("Expected", { false, true, true, true, false, false, true, true, false })), input_info, output_info, act_info, expected)
static Status validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMKernelInfo &gemm_info)
Static function to check if given info will lead to a valid configuration.
unsigned int n
Number of RHS columns.
DatasetMode
Possible dataset modes.
Exponential Linear Unit ( )
GEMM RHS (Right Hand Side) matrix information.
TEST_SUITE_END() FIXTURE_DATA_TEST_CASE(RunSmall
[CLActivationLayer Test snippet]
GEMMMatrixMultiplyReshapedOnlyRHS3DValidationFixture< CLTensor, CLAccessor, T, CLGEMMReshapeRHSMatrix, CLGEMMMatrixMultiplyReshapedOnlyRHS > CLGEMMMatrixMultiplyReshapedOnlyRHS3DFixture
unsigned int n0
Number of columns processed by the matrix multiplication.
Accessor implementation for CLTensor objects.
TensorShape compute_rhs_reshaped_shape(const ITensorInfo &a, const GEMMRHSMatrixInfo &rhs_info)
Calculate the Right Hand Side matrix reshaped shape.
bool reinterpret_input_as_3d
Flag used to reinterpret the input as 3D.
validate(CLAccessor(output_state), expected_output)
UniqueGemmCommon< Top, Tret > gemm(const GemmArgs &args, const OutputStage &os)
void ARM_COMPUTE_PRINT_INFO()
FIXTURE_DATA_TEST_CASE(RunSmall, CLAbsLayerFixture< half >, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)))
Upper Bounded Rectifier ( )
Class reprensenting a relative tolerance value.
unsigned int k
Number of LHS columns or RHS rows.
bool interleave
True if the h0 (k0xn0) blocks have to be interleaved in the output row.
Store the tensor's metadata.
unsigned int k0
Number of partial accumulations performed by the matrix multiplication.
unsigned int m0
Number of rows processed by the matrix multiplication.
TEST_CASE(FusedActivation, framework::DatasetMode::ALL)
Validate fused activation expecting the following behaviours:
GEMMMatrixMultiplyReshapedOnlyRHSValidationFixture< CLTensor, CLAccessor, T, CLGEMMReshapeRHSMatrix, CLGEMMMatrixMultiplyReshapedOnlyRHS > CLGEMMMatrixMultiplyReshapedOnlyRHSFixture
zip(zip(framework::dataset::make("Weights", { TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U, 1U), 1, DataType::F32), }), framework::dataset::make("MVBGInfo",{ TensorInfo(TensorShape(2U), 1, DataType::F32), TensorInfo(TensorShape(2U), 1, DataType::F16), TensorInfo(TensorShape(5U), 1, DataType::F32), })), framework::dataset::make("Expected", { true, false, false}))
DataType
Available data types.
bool export_to_cl_image(const ITensorInfo *tensor)
constexpr float abs_tolerance_f32(0.0001f)
F32 Absolute tolerance value for comparing reference's output against implementation's output for flo...
A sequence of PostOps that can be appended to the end of other operators.
combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)))
(EXPERIMENTAL_POST_OPS) Implementation of specific IPostOps
const cl::Device & get_device()
Gets the CL device for which the programs are created.