37 #include "tests/datasets/LargeGEMMDataset.h"
38 #include "tests/datasets/SmallGEMMDataset.h"
39 #include "tests/datasets/TinyGEMMDataset.h"
44 #include "tests/validation/fixtures/GEMMFixture.h"
45 #include "tests/validation/fixtures/GEMMInterleave4x4Fixture.h"
46 #include "tests/validation/fixtures/GEMMTranspose1xWFixture.h"
56 constexpr AbsoluteTolerance<float>
tolerance_f(0.001f);
57 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
58 RelativeTolerance<half_float::half> rel_tolerance_f16(
half(0.2));
59 const AbsoluteTolerance<float> abs_tolerance_f16(0.2f);
65 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
75 template <
typename FunctionType>
76 bool validate_zero_padding(
unsigned int dim0_value,
unsigned int dim1_value)
78 const TensorShape in_shape(dim0_value, dim1_value);
87 func.configure(&in, &
dst);
89 return in.padding().empty();
93 bool validate_gemm_zero_padding(
const TensorShape shape0,
const TensorShape shape1)
101 cpu::kernels::CpuGemmMatrixMultiplyKernel
gemm;
102 gemm.configure(&in0, &in1, &
dst, 1.0,
false);
104 return in0.padding().empty() && in1.padding().empty() &&
dst.padding().empty();
120 auto gemm = std::make_unique<cpu::CpuGemm>();
121 const auto lhs_info = TensorInfo(TensorShape(3U, 3U), 1,
DataType::F32);
122 const auto rhs_info = TensorInfo(TensorShape(4U, 3U), 1,
DataType::F32);
123 const auto c_info = TensorInfo(TensorShape(4U, 3U), 1,
DataType::F32);
124 auto dst_info = TensorInfo(TensorShape(4U, 3U), 1,
DataType::F32);
125 const auto gemm_info = GEMMInfo{};
126 gemm->configure(&lhs_info, &rhs_info, &c_info, &dst_info, 1.f, 1.f, gemm_info);
129 auto lhs = create_tensor<Tensor>(lhs_info);
130 auto rhs = create_tensor<Tensor>(rhs_info);
131 auto c = create_tensor<Tensor>(c_info);
132 lhs.allocator()->allocate();
133 rhs.allocator()->allocate();
134 c.allocator()->allocate();
139 auto mg = MemoryGroup{};
140 auto ws = manage_workspace<Tensor>(
gemm->workspace(), mg, run_pack, prep_pack);
142 auto run_conv = [&]() -> Tensor
144 auto dst = create_tensor<Tensor>(dst_info);
145 dst.allocator()->allocate();
148 library->fill_tensor_value(Accessor(lhs), 1.f);
149 library->fill_tensor_value(Accessor(rhs), 2.f);
150 library->fill_tensor_value(Accessor(c), 3.f);
152 gemm->prepare(prep_pack);
156 auto result_0 = run_conv();
157 auto result_1 = run_conv();
158 for(
size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
173 auto gemm = std::make_unique<NEGEMM>();
174 const auto lhs_info = TensorInfo(TensorShape(3U, 3U), 1,
DataType::F32);
175 const auto rhs_info = TensorInfo(TensorShape(4U, 3U), 1,
DataType::F32);
176 const auto c_info = TensorInfo(TensorShape(4U, 3U), 1,
DataType::F32);
177 auto dst_info = TensorInfo(TensorShape(4U, 3U), 1,
DataType::F32);
178 const auto gemm_info = GEMMInfo{};
179 auto run_conv = [&]()
181 auto lhs = create_tensor<Tensor>(lhs_info);
182 auto rhs = create_tensor<Tensor>(rhs_info);
183 auto c = create_tensor<Tensor>(c_info);
184 auto dst = create_tensor<Tensor>(dst_info);
185 gemm->configure(&lhs, &rhs, &c, &
dst, 1.f, 1.f, gemm_info);
186 lhs.allocator()->allocate();
187 rhs.allocator()->allocate();
188 c.allocator()->allocate();
189 dst.allocator()->allocate();
190 library->fill_tensor_value(Accessor(lhs), 1.f);
191 library->fill_tensor_value(Accessor(rhs), 2.f);
192 library->fill_tensor_value(Accessor(c), 3.f);
196 auto result_0 = run_conv();
197 auto result_1 = run_conv();
198 for(
size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
219 constexpr
float alpha = 1.0;
220 constexpr
float beta = 0.0;
221 const auto gemm_info = GEMMInfo();
222 bool is_valid = bool(
NEGEMM::validate(&lhs_info.clone()->set_is_resizable(
true), &rhs_info.clone()->set_is_resizable(
true),
nullptr, &
output_info.clone()->set_is_resizable(
true), alpha, beta, gemm_info));
229 combine(framework::dataset::
make("CpuExt", std::
string("NEON")),
235 using namespace cpu::kernels;
246 std::string
actual = selected_impl_mul->name;
255 actual = selected_impl_add->name;
262 using CpuGemmTranspose1xW = NESynthetizeFunctionWithZeroConstantKernelBorder<cpu::kernels::CpuGemmTranspose1xWKernel>;
264 framework::dataset::
make("
N", { 1, 23, 63, 101 }),
268 bool status = validate_zero_padding<CpuGemmTranspose1xW>(n_value, k_value);
295 validate(Accessor(_target), _reference);
302 using CpuGemmInterleave4x4 = NESynthetizeFunctionWithZeroConstantKernelBorder<cpu::kernels::CpuGemmInterleave4x4Kernel>;
305 framework::dataset::
make("
M", { 1, 23, 63, 101 }),
309 bool status = validate_zero_padding<cpu::kernels::CpuGemmInterleave4x4Kernel>(m_value, k_value);
336 validate(Accessor(_target), _reference);
342 template <typename T>
343 using NEGEMMFixture = GEMMValidationFixture<Tensor, Accessor, NEGEMM, T>;
345 template <typename T>
365 bool status = validate_gemm_zero_padding(shape0, shape1);
369 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
391 framework::dataset::
make("ReshapeWeights", {
true,
false })),
403 framework::dataset::
make("ReshapeWeights", {
true,
false })),
423 framework::dataset::
make("ReshapeWeights", {
false })),