Compute Library
 23.11
GEMM.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2023 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "arm_compute/core/Types.h"
34 #include "tests/NEON/Accessor.h"
35 #include "tests/NEON/Helper.h"
37 #include "tests/datasets/LargeGEMMDataset.h"
38 #include "tests/datasets/SmallGEMMDataset.h"
39 #include "tests/datasets/TinyGEMMDataset.h"
41 #include "tests/framework/Macros.h"
44 #include "tests/validation/fixtures/GEMMFixture.h"
45 #include "tests/validation/fixtures/GEMMInterleave4x4Fixture.h"
46 #include "tests/validation/fixtures/GEMMTranspose1xWFixture.h"
47 
48 namespace arm_compute
49 {
50 namespace test
51 {
52 namespace validation
53 {
54 namespace
55 {
56 constexpr AbsoluteTolerance<float> tolerance_f(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for FP32 data types */
57 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
58 RelativeTolerance<half_float::half> rel_tolerance_f16(half(0.2)); /**< Relative tolerance value for comparing reference's output against implementation's output for FP16 data types */
59 const AbsoluteTolerance<float> abs_tolerance_f16(0.2f); /**< Absolute tolerance value for comparing reference's output against implementation's output for FP16 data types */
60 constexpr float tolerance_num = 0.07f; /**< Tolerance number for FP16 data types */
61 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
62 /** CNN data types */
63 const auto CNNDataTypes = framework::dataset::make("DataType",
64 {
65 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
67 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
69 });
70 
71 const auto data_interleave = framework::dataset::make("M", 8, 12) * framework::dataset::make("N", 8, 12);
72 const auto data_transpose = framework::dataset::make("M", 8, 14) * framework::dataset::make("N", 7, 14);
73 
74 /** Zero padding test */
75 template <typename FunctionType>
76 bool validate_zero_padding(unsigned int dim0_value, unsigned int dim1_value)
77 {
78  const TensorShape in_shape(dim0_value, dim1_value);
79  TensorInfo in(in_shape, 1, DataType::U32);
80  TensorInfo dst;
81 
83 
84  // Validate zero-padding
85  FunctionType func;
86 
87  func.configure(&in, &dst);
88 
89  return in.padding().empty();
90 }
91 
92 /* Zero padding test for GEMM kernels */
93 bool validate_gemm_zero_padding(const TensorShape shape0, const TensorShape shape1)
94 {
95  // Create tensors
96  TensorInfo in0(shape0, 1, DataType::F32);
97  TensorInfo in1(shape1, 1, DataType::F32);
98  TensorInfo dst;
99 
100  // Validate zero-padding
101  cpu::kernels::CpuGemmMatrixMultiplyKernel gemm;
102  gemm.configure(&in0, &in1, &dst, 1.0, false);
103 
104  return in0.padding().empty() && in1.padding().empty() && dst.padding().empty();
105 }
106 } // namespace
107 
108 TEST_SUITE(NEON)
109 TEST_SUITE(GEMM)
110 
111 /** Test case for memory injection in @ref cpu::CpuGemm.
112  *
113  * Configure the operator once and inject memory at run-time in multiple executions.
114  *
115  * Checks performed in order:
116  * - Both runs compute the same output
117  */
118 TEST_CASE(MemoryInjection, framework::DatasetMode::ALL)
119 {
120  auto gemm = std::make_unique<cpu::CpuGemm>();
121  const auto lhs_info = TensorInfo(TensorShape(3U, 3U), 1, DataType::F32);
122  const auto rhs_info = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32);
123  const auto c_info = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32);
124  auto dst_info = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32);
125  const auto gemm_info = GEMMInfo{};
126  gemm->configure(&lhs_info, &rhs_info, &c_info, &dst_info, 1.f, 1.f, gemm_info);
127 
128  // telhs are newly created every call of this lambda function
129  auto lhs = create_tensor<Tensor>(lhs_info);
130  auto rhs = create_tensor<Tensor>(rhs_info);
131  auto c = create_tensor<Tensor>(c_info);
132  lhs.allocator()->allocate();
133  rhs.allocator()->allocate();
134  c.allocator()->allocate();
135 
136  ITensorPack run_pack{ { TensorType::ACL_SRC_0, &lhs }, { TensorType::ACL_SRC_1, &rhs }, { TensorType::ACL_SRC_2, &c } };
137  ITensorPack prep_pack{ { TensorType::ACL_SRC_1, &rhs }, { TensorType::ACL_SRC_2, &c } };
138 
139  auto mg = MemoryGroup{};
140  auto ws = manage_workspace<Tensor>(gemm->workspace(), mg, run_pack, prep_pack);
141 
142  auto run_conv = [&]() -> Tensor
143  {
144  auto dst = create_tensor<Tensor>(dst_info);
145  dst.allocator()->allocate();
146  run_pack.add_tensor(TensorType::ACL_DST, &dst);
147 
148  library->fill_tensor_value(Accessor(lhs), 1.f);
149  library->fill_tensor_value(Accessor(rhs), 2.f);
150  library->fill_tensor_value(Accessor(c), 3.f);
151  // This operator is configured once and captured by this lambda.
152  gemm->prepare(prep_pack);
153  gemm->run(run_pack);
154  return dst;
155  };
156  auto result_0 = run_conv();
157  auto result_1 = run_conv();
158  for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
159  {
160  ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
161  }
162 }
163 
164 /** Test case for memory injection in @ref NEGEMM.
165  *
166  * Make sure @ref NEGEMM still works through injecting the memory at configure time using the old API.
167  *
168  * Checks performed in order:
169  * - Both runs compute the same output
170  */
171 TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL)
172 {
173  auto gemm = std::make_unique<NEGEMM>();
174  const auto lhs_info = TensorInfo(TensorShape(3U, 3U), 1, DataType::F32);
175  const auto rhs_info = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32);
176  const auto c_info = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32);
177  auto dst_info = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32);
178  const auto gemm_info = GEMMInfo{};
179  auto run_conv = [&]()
180  {
181  auto lhs = create_tensor<Tensor>(lhs_info);
182  auto rhs = create_tensor<Tensor>(rhs_info);
183  auto c = create_tensor<Tensor>(c_info);
184  auto dst = create_tensor<Tensor>(dst_info);
185  gemm->configure(&lhs, &rhs, &c, &dst, 1.f, 1.f, gemm_info);
186  lhs.allocator()->allocate();
187  rhs.allocator()->allocate();
188  c.allocator()->allocate();
189  dst.allocator()->allocate();
190  library->fill_tensor_value(Accessor(lhs), 1.f);
191  library->fill_tensor_value(Accessor(rhs), 2.f);
192  library->fill_tensor_value(Accessor(c), 3.f);
193  gemm->run();
194  return dst;
195  };
196  auto result_0 = run_conv();
197  auto result_1 = run_conv();
198  for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
199  {
200  ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
201  }
202 }
203 
204 // *INDENT-OFF*
205 // clang-format off
207  framework::dataset::make("LhsInfo", { TensorInfo(TensorShape(27U, 13U), 1, DataType::S32), // Unsupported data type
208  TensorInfo(TensorShape(27U, 13U), 1, DataType::F32),
209  }),
210  framework::dataset::make("RhsInfo",{ TensorInfo(TensorShape(8U, 27U), 1, DataType::S32),
211  TensorInfo(TensorShape(8U, 27U), 1, DataType::F32),
212  })),
213  framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(8U, 13U), 1, DataType::S32),
214  TensorInfo(TensorShape(8U, 13U), 1, DataType::F32),
215  })),
216  framework::dataset::make("Expected", { false, true })),
217  lhs_info, rhs_info, output_info, expected)
218 {
219  constexpr float alpha = 1.0;
220  constexpr float beta = 0.0;
221  const auto gemm_info = GEMMInfo();
222  bool is_valid = bool(NEGEMM::validate(&lhs_info.clone()->set_is_resizable(true), &rhs_info.clone()->set_is_resizable(true), nullptr, &output_info.clone()->set_is_resizable(true), alpha, beta, gemm_info));
224 }
225 // clang-format on
226 // *INDENT-ON*
227 TEST_SUITE(KERNEL_SELECTION)
228 DATA_TEST_CASE(KernelSelection_mul_and_add, framework::DatasetMode::ALL,
229  combine(framework::dataset::make("CpuExt", std::string("NEON")),
230  framework::dataset::make("DataType", { DataType::F32,
232  })),
234 {
235  using namespace cpu::kernels;
236 
238  cpu_isa.neon = (cpu_ext == "NEON");
240 
241  const auto *selected_impl_mul = CpuGemmMatrixMultiplyKernel::get_implementation(DataTypeISASelectorData{ data_type, cpu_isa }, cpu::KernelSelectionType::Preferred);
242 
243  ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl_mul);
244 
245  std::string expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_gemm_matrix_mul";
246  std::string actual = selected_impl_mul->name;
247 
249 
250  const auto *selected_impl_add = CpuGemmMatrixAdditionKernel::get_implementation(DataTypeISASelectorData{ data_type, cpu_isa }, cpu::KernelSelectionType::Preferred);
251 
252  ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl_add);
253 
254  expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_gemm_matrix_add";
255  actual = selected_impl_add->name;
256 
258 }
259 TEST_SUITE_END() // KERNEL_SELECTION
260 
261 TEST_SUITE(TRANSPOSE_1XW)
262 using CpuGemmTranspose1xW = NESynthetizeFunctionWithZeroConstantKernelBorder<cpu::kernels::CpuGemmTranspose1xWKernel>;
263 DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, zip(
264  framework::dataset::make("N", { 1, 23, 63, 101 }),
265  framework::dataset::make("K", { 1, 47, 29, 27 })),
266  n_value, k_value)
267 {
268  bool status = validate_zero_padding<CpuGemmTranspose1xW>(n_value, k_value);
270 }
271 
272 TEST_SUITE(U32)
273 using CpuGemmTranspose1xWFixture = GEMMTranspose1xWValidationFixture<Tensor, Accessor, CpuGemmTranspose1xW, uint32_t>;
274 FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose * framework::dataset::make("DataType", DataType::U32))
275 {
276  // Validate output
277  validate(Accessor(_target), _reference);
278 }
279 TEST_SUITE_END() // U32
280 
281 TEST_SUITE(U16)
282 using CpuGemmTranspose1xWFixture = GEMMTranspose1xWValidationFixture<Tensor, Accessor, CpuGemmTranspose1xW, uint16_t>;
283 FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose * framework::dataset::make("DataType", DataType::U16))
284 {
285  // Validate output
286  validate(Accessor(_target), _reference);
287 }
288 TEST_SUITE_END() // U16
289 
290 TEST_SUITE(U8)
291 using CpuGemmTranspose1xWFixture = GEMMTranspose1xWValidationFixture<Tensor, Accessor, CpuGemmTranspose1xW, uint8_t>;
292 FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose * framework::dataset::make("DataType", DataType::U8))
293 {
294  // Validate output
295  validate(Accessor(_target), _reference);
296 }
297 TEST_SUITE_END() // U8
298 
299 TEST_SUITE_END() // TRANSPOSE_1XW
300 
301 TEST_SUITE(INTERLEAVE_4X4)
302 using CpuGemmInterleave4x4 = NESynthetizeFunctionWithZeroConstantKernelBorder<cpu::kernels::CpuGemmInterleave4x4Kernel>;
303 
304 DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, zip(
305  framework::dataset::make("M", { 1, 23, 63, 101 }),
306  framework::dataset::make("K", { 1, 47, 29, 27 })),
307  m_value, k_value)
308 {
309  bool status = validate_zero_padding<cpu::kernels::CpuGemmInterleave4x4Kernel>(m_value, k_value);
311 }
312 
313 TEST_SUITE(U32)
314 using CpuGemmInterleave4x4Fixture = GEMMInterleave4x4ValidationFixture<Tensor, Accessor, CpuGemmInterleave4x4, uint32_t>;
315 FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave * framework::dataset::make("DataType", DataType::U32))
316 {
317  // Validate output
318  validate(Accessor(_target), _reference);
319 }
320 TEST_SUITE_END() // U32
321 
322 TEST_SUITE(U16)
323 using CpuGemmInterleave4x4Fixture = GEMMInterleave4x4ValidationFixture<Tensor, Accessor, CpuGemmInterleave4x4, uint16_t>;
324 FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave * framework::dataset::make("DataType", DataType::U16))
325 {
326  // Validate output
327  validate(Accessor(_target), _reference);
328 }
329 TEST_SUITE_END() // U16
330 
331 TEST_SUITE(U8)
332 using CpuGemmInterleave4x4Fixture = GEMMInterleave4x4ValidationFixture<Tensor, Accessor, CpuGemmInterleave4x4, uint8_t>;
333 FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave * framework::dataset::make("DataType", DataType::QASYMM8))
334 {
335  // Validate output
336  validate(Accessor(_target), _reference);
337 }
338 TEST_SUITE_END() // U8
339 
340 TEST_SUITE_END() // INTERLEAVE_4X4
341 
342 template <typename T>
343 using NEGEMMFixture = GEMMValidationFixture<Tensor, Accessor, NEGEMM, T>;
344 
345 template <typename T>
346 using NEBatchedMatMulFixture = GEMMValidationFixture<Tensor, Accessor, NEGEMM, T, true, false, false, false, false, true>;
347 
348 TEST_SUITE(Float)
349 DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, zip(framework::dataset::make("In0", { TensorShape(21U, 13U),
350  TensorShape(31U, 1U),
351  TensorShape(31U, 1U),
352  TensorShape(8U, 2U),
353  TensorShape(38U, 12U),
354  TensorShape(32U, 1U)
355  }),
356  framework::dataset::make("In1", { TensorShape(33U, 21U),
357  TensorShape(23U, 31U),
358  TensorShape(23U, 31U),
359  TensorShape(16U, 8U),
360  TensorShape(21U, 38U),
361  TensorShape(17U, 32U)
362  })),
363  shape0, shape1)
364 {
365  bool status = validate_gemm_zero_padding(shape0, shape1);
367 }
368 
369 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
370 TEST_SUITE(FP16)
371 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallGEMMDataset(),
372  framework::dataset::make("ReshapeWeights", { true, false })),
374 {
375  // Validate output
376  validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16);
377 }
378 
379 TEST_SUITE(BATCHED_MATMUL)
380 
381 FIXTURE_DATA_TEST_CASE(RunSmall, NEBatchedMatMulFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallBatchedMatMulDataset(),
382  framework::dataset::make("ReshapeWeights", { false })),
384 {
385  // Validate output
386  validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16);
387 }
389 
390 FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeGEMMDataset(),
391  framework::dataset::make("ReshapeWeights", { true, false })),
392 
394 {
395  // Validate output
396  validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16);
397 }
399 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
400 
401 TEST_SUITE(FP32)
402 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallGEMMDataset(),
403  framework::dataset::make("ReshapeWeights", { true, false })),
404 
406 {
407  // Validate output
408  validate(Accessor(_target), _reference, tolerance_f);
409 }
410 FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeGEMMDataset(),
411  framework::dataset::make("ReshapeWeights", { true, false })),
412 
414 {
415  // Validate output
416  validate(Accessor(_target), _reference, tolerance_f);
417 }
418 
419 TEST_SUITE(BATCHED_MATMUL)
420 
421 TEST_SUITE(FP32)
422 FIXTURE_DATA_TEST_CASE(RunSmall, NEBatchedMatMulFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallBatchedMatMulDataset(),
423  framework::dataset::make("ReshapeWeights", { false })),
425 {
426  // Validate output
427  validate(Accessor(_target), _reference, tolerance_f);
428 }
430 
432 
435 
438 } // namespace validation
439 } // namespace test
440 } // namespace arm_compute
Datasets.h
arm_compute::test::validation::TEST_SUITE_END
TEST_SUITE_END() FIXTURE_DATA_TEST_CASE(RunSmall
[CLActivationLayer Test snippet]
Definition: DequantizationLayer.cpp:111
PaddingCalculator.h
arm_compute::cpuinfo::CpuIsaInfo
CPU ISA (Instruction Set Architecture) information.
Definition: CpuIsaInfo.h:37
arm_compute::test::validation::tolerance_f
constexpr AbsoluteTolerance< float > tolerance_f(0.0001f)
Tolerance value for comparing reference's output against implementation's output for DataType::F32 an...
arm_compute::test::validation::TEST_CASE
TEST_CASE(FusedActivation, framework::DatasetMode::ALL)
Validate fused activation expecting the following behaviours:
Definition: ArithmeticAddition.cpp:93
arm_compute::test::validation::actual
std::string actual
Definition: Cast.cpp:234
N
unsigned int N
Definition: CpuGemmAssemblyDispatch.cpp:103
arm_compute::test::validation::FIXTURE_DATA_TEST_CASE
FIXTURE_DATA_TEST_CASE(RunSmall, CLAbsLayerFixture< half >, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)))
Definition: AbsLayer.cpp:50
arm_compute::test::validation::DATA_TEST_CASE
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16), }), framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(30U, 11U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16, QuantizationInfo(1.f/32768.f, 0)), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16, QuantizationInfo(1.f/32768.f, 0)), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16, QuantizationInfo(1.f/32768.f, 0)), })), framework::dataset::make("ActivationInfo", { ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SQRT), })), framework::dataset::make("Expected", { false, true, true, true, false, false, true, true, false })), input_info, output_info, act_info, expected)
Definition: ActivationLayer.cpp:100
arm_compute::cpuinfo::CpuIsaInfo::neon
bool neon
Definition: CpuIsaInfo.h:40
arm_compute::test::validation::combine
combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)))
Definition: AbsLayer.cpp:65
arm_compute::test::validation::CpuGemmTranspose1xWFixture
GEMMTranspose1xWValidationFixture< Tensor, Accessor, CpuGemmTranspose1xW, uint32_t > CpuGemmTranspose1xWFixture
Definition: GEMM.cpp:273
CpuGemmMatrixMultiplyKernel.h
arm_compute::TensorShape
Shape of a tensor.
Definition: TensorShape.h:39
arm_compute::test::validation::dst
auto dst
Definition: DFT.cpp:170
arm_compute::lower_string
std::string lower_string(const std::string &val)
Lower a given string.
Definition: StringUtils.cpp:38
Types.h
arm_compute::test::validation::tolerance_num
constexpr float tolerance_num
Tolerance number.
Definition: Add.cpp:107
arm_compute::cpuinfo::CpuIsaInfo::fp16
bool fp16
Definition: CpuIsaInfo.h:47
CpuGemmTranspose1xWKernel.h
arm_compute::NEGEMM
Basic function to execute GEMM.
Definition: NEGEMM.h:40
arm_compute::test::validation::reference::gemm
template SimpleTensor< half > gemm(const SimpleTensor< half > &a, const SimpleTensor< half > &b, const SimpleTensor< half > &c, float alpha, float beta)
arm_compute::ACL_SRC_0
@ ACL_SRC_0
Definition: Types.h:45
arm_compute::cpu::KernelSelectionType::Preferred
@ Preferred
Retrieve the best implementation available for the given Cpu ISA, ignoring the build flags.
arm_compute::test::Accessor
Accessor implementation for Tensor objects.
Definition: Accessor.h:35
arm_compute::test::validation::cpu_isa
cpuinfo::CpuIsaInfo cpu_isa
Definition: Cast.cpp:226
arm_compute::ACL_SRC_1
@ ACL_SRC_1
Definition: Types.h:46
arm_compute::test::validation::is_valid
bool is_valid
Definition: DirectConv2d.cpp:162
arm_compute::ACL_SRC_2
@ ACL_SRC_2
Definition: Types.h:47
StringUtils.h
arm_compute::test::validation::cpu_ext
cpu_ext
Definition: Cast.cpp:221
arm_compute::test::validation::validate
validate(CLAccessor(output_state), expected_output)
arm_compute::half
half_float::half half
16-bit floating point type
Definition: CoreTypes.h:36
arm_compute::utils::cast::U
U
Definition: SaturateCast.h:65
TensorAllocator.h
arm_compute::test::validation::output_info
output_info
Definition: DirectConvolutionLayer.cpp:547
arm_compute::test::framework::DatasetMode::ALL
@ ALL
arm_compute::DataType::U32
@ U32
unsigned 32-bit number
arm_compute::test::validation::ARM_COMPUTE_EXPECT
ARM_COMPUTE_EXPECT(has_error==expected, framework::LogLevel::ERRORS)
CpuGemm.h
CpuGemmInterleave4x4Kernel.h
arm_compute::ACL_DST
@ ACL_DST
Definition: Types.h:55
arm_compute::test::framework::DatasetMode::NIGHTLY
@ NIGHTLY
Asserts.h
Accessor.h
arm_compute::cpu_impl_dt
std::string cpu_impl_dt(const DataType &data_type)
Returns the suffix string of CPU kernel implementation names based on the given data type.
Definition: DataTypeUtils.h:487
Macros.h
arm_compute::test::framework::DatasetMode::PRECOMMIT
@ PRECOMMIT
Tensor.h
arm_compute::test::framework::dataset::make
std::enable_if< is_container< T >::value, ContainerDataset< T > >::type make(std::string name, T &&values)
Helper function to create a ContainerDataset.
Definition: ContainerDataset.h:160
arm_compute::test::validation::data_type
data_type
Definition: Cast.cpp:222
Validation.h
arm_compute::test::library
std::unique_ptr< AssetsLibrary > library
Definition: main.cpp:77
MemoryHelpers.h
arm_compute::test::validation::zip
zip(zip(framework::dataset::make("Weights", { TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U, 1U), 1, DataType::F32), }), framework::dataset::make("MVBGInfo",{ TensorInfo(TensorShape(2U), 1, DataType::F32), TensorInfo(TensorShape(2U), 1, DataType::F16), TensorInfo(TensorShape(5U), 1, DataType::F32), })), framework::dataset::make("Expected", { true, false, false}))
arm_compute::test::validation::NEBatchedMatMulFixture
GEMMValidationFixture< Tensor, Accessor, NEGEMM, T, true, false, false, false, false, true > NEBatchedMatMulFixture
Definition: GEMM.cpp:346
M
unsigned int M
Definition: CpuGemmAssemblyDispatch.cpp:102
arm_compute::test::validation::ARM_COMPUTE_EXPECT_EQUAL
ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS)
arm_compute
Copyright (c) 2017-2023 Arm Limited.
Definition: introduction.dox:24
arm_compute::test::validation::TEST_SUITE
TEST_SUITE(QASYMM8_to_F32) FIXTURE_DATA_TEST_CASE(RunSmall
arm_compute::DataType::F16
@ F16
16-bit floating-point number
NEGEMM.h
arm_compute::DataType::S32
@ S32
signed 32-bit number
arm_compute::test::validation::expected
expected
Definition: BatchNormalizationLayer.cpp:166
arm_compute::DataType::F32
@ F32
32-bit floating-point number
arm_compute::Tensor
Basic implementation of the tensor interface.
Definition: Tensor.h:37
arm_compute::NEGEMM::validate
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of NEGEMM.
Definition: NEGEMM.cpp:99
arm_compute::test::validation::ARM_COMPUTE_ERROR_ON_NULLPTR
ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl)
arm_compute::test::framework::DatasetMode
DatasetMode
Possible dataset modes.
Definition: DatasetModes.h:40
arm_compute::DataType
DataType
Available data types.
Definition: CoreTypes.h:83
Helper.h
arm_compute::test::framework::LogLevel::ERRORS
@ ERRORS
arm_compute::test::validation::CpuGemmInterleave4x4Fixture
GEMMInterleave4x4ValidationFixture< Tensor, Accessor, CpuGemmInterleave4x4, uint32_t > CpuGemmInterleave4x4Fixture
Definition: GEMM.cpp:314