Compute Library
 23.05
ConvolutionLayer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2023 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "arm_compute/core/Types.h"
35 #include "tests/NEON/Accessor.h"
37 #include "tests/datasets/LargeConvolutionLayerDataset.h"
38 #include "tests/datasets/SmallConvolutionLayerDataset.h"
39 #include "tests/datasets/TinyConvolutionLayerDataset.h"
41 #include "tests/framework/Macros.h"
44 #include "tests/validation/fixtures/ConvolutionLayerFixture.h"
45 #include "tests/validation/fixtures/WinogradConvolutionLayerFixture.h"
46 
47 namespace arm_compute
48 {
49 namespace test
50 {
51 namespace validation
52 {
53 namespace detail
54 {
55 template <>
57  Tensor *src, const Tensor *weights, const Tensor *bias, Tensor *dst,
59  const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups)
60 {
61  ARM_COMPUTE_UNUSED(weights_info);
62 
63  Conv2dInfo conv_info(info, dilation, act_info, false, num_groups);
64  func.configure(src, weights, bias, dst, conv_info);
65 }
66 } // namespace detail
67 namespace
68 {
69 const RelativeTolerance<float> rel_tolerance_f32(0.01f); /**< Relative tolerance for FP32 types */
70 const RelativeTolerance<float> rel_tolerance_winograd_3x3_f32(0.05f); /**< Relative tolerance for FP32 types */
71 const AbsoluteTolerance<float> abs_tolerance_f32(0.002f); /**< Absolute tolerance for FP32 types */
72 const AbsoluteTolerance<float> abs_tolerance_1xN_f32(0.0041f); /**< Absolute tolerance for FP32 types */
73 
74 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
75 const AbsoluteTolerance<half> tolerance_convolution_layer_f16(half(0.4f));
76 constexpr float tolerance_num_f16 = 0.15f;
77 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
78 
79 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
80 const RelativeTolerance<half_float::half> rel_tolerance_f16(half_float::half(0.2f)); /**< Relative tolerance value for FP16 types */
81 const AbsoluteTolerance<float> abs_tolerance_f16(0.2f); /**< Absolute tolerance for FP16 types */
82 constexpr float tolerance_num = 0.07f; /**< Tolerance number for the FP16 implementation */
83 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
84 
85 #ifdef ARM_COMPUTE_ENABLE_SME
86 // TODO(COMPMID-6011): SME kernels and the reference model use different rounding mode.
87 // Temporarily increase the tolerance for quantized data.
88 constexpr AbsoluteTolerance<float> tolerance_qasymm8(1.0); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
89 #else // ARM_COMPUTE_ENABLE_SME
90 constexpr AbsoluteTolerance<float> tolerance_qasymm8(0.0); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
91 #endif // ARM_COMPUTE_ENABLE_SME
92 
93 /** CNN data types */
94 const auto CNNDataTypes = framework::dataset::make("DataType",
95 {
96 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
98 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
101 });
102 const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
103 {
107 });
108 
109 const auto QuantizationData = framework::dataset::make("QuantizationInfo",
110 {
111  QuantizationInfo(0.5f, 10),
112  QuantizationInfo(0.3f, 3),
113  QuantizationInfo(1.f, 10),
114  QuantizationInfo(1.1f, 10),
115 });
116 } // namespace
117 
119 TEST_SUITE(ConvolutionLayer)
120 
121 // *INDENT-OFF*
122 // clang-format off
123 DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
124  framework::dataset::make("InputInfo", { TensorInfo(TensorShape(18U, 18U, 32U), 1, DataType::F32),
125  TensorInfo(TensorShape(23U, 27U, 32U, 4U), 1, DataType::F32),
126  TensorInfo(TensorShape(3U, 3U, 2U, 1U), 1, DataType::F32),
127  TensorInfo(TensorShape(33U, 27U, 7U, 4U), 1, DataType::F32)
128  }),
129  framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 32U, 21U), 1, DataType::F32),
130  TensorInfo(TensorShape(5U, 5U, 32U, 21U), 1, DataType::F32),
131  TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32),
132  TensorInfo(TensorShape(5U, 5U, 7U, 16U), 1, DataType::F16)
133  })),
134  framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(16U, 16U, 21U), 1, DataType::F32),
135  TensorInfo(TensorShape(19U, 23U, 21U, 4U), 1, DataType::F32),
136  TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32),
137  TensorInfo(TensorShape(11U, 12U, 16U, 4U), 1, DataType::F32)
138  })),
139  framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
140  PadStrideInfo(1, 1, 0, 0),
141  PadStrideInfo(2, 1, 0, 0),
142  PadStrideInfo(3, 2, 1, 0)
143  })),
144  framework::dataset::make("FastMath", { true,
145  true,
146  false,
147  false
148  })),
151 {
153  &weights_info.clone()->set_is_resizable(true),
154  &output_info.clone()->set_is_resizable(true), conv_info, WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), fast_math);
156 }
157 // clang-format on
158 // *INDENT-ON*
159 TEST_SUITE_END() // ConvolutionLayer
160 
161 TEST_SUITE(WinogradLayer)
162 template <typename T>
163 using NEWinogradConvolutionLayerFixture = WinogradConvolutionLayerFastMathValidationFixture<Tensor, Accessor, NEWinogradConvolutionLayer, T>;
164 template <typename T>
165 using NEWinogradConvolutionLayerMixedDataLayoutFixture = WinogradConvolutionLayerFastMathValidationFixture<Tensor, Accessor, NEWinogradConvolutionLayer, T, T, true, true>;
166 
167 template <typename T>
168 using NEWinogradConvolutionLayerNoBiasFixture = WinogradConvolutionLayerFastMathValidationFixture<Tensor, Accessor, NEWinogradConvolutionLayer, T, T, false>;
169 
170 /** Test case for memory injection in @ref cpu::CpuWinogradConv2d.
171  *
172  * Configure the operator once and inject memory at run-time in multiple executions.
173  *
174  * Checks performed in order:
175  * - Both runs compute the same output
176  */
178 {
179  auto winograd = std::make_unique<cpu::CpuWinogradConv2d>();
180  const auto src_info = TensorInfo(TensorShape(8U, 8U, 32U), 1, DataType::F32);
181  const auto w_info = TensorInfo(TensorShape(1U), 1, DataType::F32);
182  const auto b_info = TensorInfo(TensorShape(1U, 3U, 32U, 1U), 1, DataType::F32);
183  auto dst_info = TensorInfo(TensorShape(8U, 6U, 1U), 1, DataType::F32);
184  const PadStrideInfo pad_info{};
185 
186  winograd->configure(&src_info, &b_info, &w_info, &dst_info, pad_info);
187 
188  // telhs are newly created every call of this lambda function
189  auto a = create_tensor<Tensor>(src_info);
190  auto b = create_tensor<Tensor>(b_info);
191  auto c = create_tensor<Tensor>(w_info);
192  a.allocator()->allocate();
193  b.allocator()->allocate();
194  c.allocator()->allocate();
195 
197  ITensorPack prep_pack{ { TensorType::ACL_SRC_1, &b }, { TensorType::ACL_SRC_2, &c } };
198 
199  auto mg = MemoryGroup{};
200  auto ws = manage_workspace<Tensor>(winograd->workspace(), mg, run_pack, prep_pack);
201  auto run_conv = [&]() -> Tensor
202  {
203  auto dst = create_tensor<Tensor>(dst_info);
204  dst.allocator()->allocate();
205 
206  run_pack.add_tensor(TensorType::ACL_DST, &dst);
207  library->fill_tensor_value(Accessor(a), 1.f);
208  library->fill_tensor_value(Accessor(b), 2.f);
209  library->fill_tensor_value(Accessor(c), 3.f);
210 
211  // This operator is configured once and captured by this lambda.
212  winograd->prepare(prep_pack);
213  winograd->run(run_pack);
214  return dst;
215  };
216 
217  auto result_0 = run_conv();
218  auto result_1 = run_conv();
219 
220  for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
221  {
222  ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
223  }
224 }
225 
226 /** Test case for memory injection in @ref NEWinogradConvolutionLayer.
227  *
228  * Make sure @ref NEWinogradConvolutionLayer still works through injecting the memory at configure time using the old API.
229  *
230  * Checks performed in order:
231  * - Both runs compute the same output
232  */
233 TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL)
234 {
235  auto gemm = std::make_unique<NEWinogradConvolutionLayer>();
236  const auto src_info = TensorInfo(TensorShape(8U, 8U, 32U), 1, DataType::F32);
237  const auto w_info = TensorInfo(TensorShape(1U), 1, DataType::F32);
238  const auto b_info = TensorInfo(TensorShape(1U, 3U, 32U, 1U), 1, DataType::F32);
239  auto dst_info = TensorInfo(TensorShape(8U, 6U, 1U), 1, DataType::F32);
240  const PadStrideInfo pad_info{};
241 
242  auto run_conv = [&]()
243  {
244  auto src = create_tensor<Tensor>(src_info);
245  auto w = create_tensor<Tensor>(w_info);
246  auto b = create_tensor<Tensor>(b_info);
247  auto dst = create_tensor<Tensor>(dst_info);
248 
249  gemm->configure(&src, &b, &w, &dst, pad_info);
250 
251  src.allocator()->allocate();
252  b.allocator()->allocate();
253  w.allocator()->allocate();
254  dst.allocator()->allocate();
255 
256  library->fill_tensor_value(Accessor(src), 1.f);
257  library->fill_tensor_value(Accessor(b), 2.f);
258  library->fill_tensor_value(Accessor(w), 3.f);
259  gemm->run();
260  return dst;
261  };
262 
263  auto result_0 = run_conv();
264  auto result_1 = run_conv();
265 
266  for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
267  {
268  ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
269  }
270 }
271 
272 TEST_SUITE(FP32)
273 
274 TEST_SUITE(Conv1x3)
275 FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
276  combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x3Dataset(),
277  framework::dataset::make("DataType", { DataType::F32 })),
278  ActivationFunctionsDataset),
280 {
281  // Validate output
282  validate(Accessor(_target), _reference, abs_tolerance_f32);
283 }
286  framework::dataset::make("Input", TensorShape(8U, 8U, 32U)),
287  framework::dataset::make("Weight", TensorShape(1U, 3U, 32U, 1U))),
289  framework::dataset::make("Output", TensorShape(8U, 6U, 1U))),
290  framework::dataset::make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0))),
291  framework::dataset::make("Dilation", Size2D(1U, 1U))),
292  framework::dataset::make("DataType", { DataType::F32 })),
293  ActivationFunctionsDataset),
295 {
296  // Validate output
297  validate(Accessor(_target), _reference, abs_tolerance_f32);
298 }
300  combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x3Dataset(),
301  framework::dataset::make("DataType", { DataType::F32 })),
302  ActivationFunctionsDataset),
304 {
305  // Validate output
306  validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
307 }
308 
309 TEST_SUITE_END() // Conv1x3
310 
311 TEST_SUITE(Conv3x1)
312 FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
313  combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x1Dataset(),
314  framework::dataset::make("DataType", { DataType::F32 })),
315  ActivationFunctionsDataset),
317 {
318  // Validate output
319  validate(Accessor(_target), _reference, abs_tolerance_f32);
320 }
322  combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x1Dataset(),
323  framework::dataset::make("DataType", { DataType::F32 })),
324  ActivationFunctionsDataset),
326 {
327  // Validate output
328  validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
329 }
330 
331 TEST_SUITE_END() // Conv3x1
332 
333 TEST_SUITE(Conv1x5)
335  combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x5Dataset(),
336  framework::dataset::make("DataType", { DataType::F32 })),
337  ActivationFunctionsDataset),
339 {
340  // Validate output
341  validate(Accessor(_target), _reference, abs_tolerance_f32);
342 }
344  combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x5Dataset(),
345  framework::dataset::make("DataType", { DataType::F32 })),
346  ActivationFunctionsDataset),
348 {
349  // Validate output
350  validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
351 }
352 
353 TEST_SUITE_END() // Conv1x5
354 
355 TEST_SUITE(Conv5x1)
357  combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x1Dataset(),
358  framework::dataset::make("DataType", { DataType::F32 })),
359  ActivationFunctionsDataset),
361 {
362  // Validate output
363  validate(Accessor(_target), _reference, abs_tolerance_f32);
364 }
366  combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x1Dataset(),
367  framework::dataset::make("DataType", { DataType::F32 })),
368  ActivationFunctionsDataset),
370 {
371  // Validate output
372  validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
373 }
374 
375 TEST_SUITE_END() // Conv5x1
376 
377 TEST_SUITE(Conv7x1)
379  combine(combine(combine(datasets::SmallWinogradConvolutionLayer7x1Dataset(),
380  framework::dataset::make("DataType", { DataType::F32 })),
381  ActivationFunctionsDataset),
383 {
384  // Validate output
385  validate(Accessor(_target), _reference, abs_tolerance_f32);
386 }
387 
389  combine(combine(combine(datasets::LargeWinogradConvolutionLayer7x1Dataset(),
390  framework::dataset::make("DataType", { DataType::F32 })),
391  ActivationFunctionsDataset),
393 {
394  // Validate output
395  validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
396 }
397 TEST_SUITE_END() // Conv7x1
398 
399 TEST_SUITE(Conv1x7)
401  combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x7Dataset(),
402  framework::dataset::make("DataType", { DataType::F32 })),
403  ActivationFunctionsDataset),
405 {
406  // Validate output
407  validate(Accessor(_target), _reference, abs_tolerance_f32);
408 }
409 
411  combine(combine(combine(datasets::LargeWinogradConvolutionLayer7x1Dataset(),
412  framework::dataset::make("DataType", { DataType::F32 })),
413  ActivationFunctionsDataset),
415 {
416  // Validate output
417  validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
418 }
419 TEST_SUITE_END() // Conv1x7
420 
421 TEST_SUITE(Conv3x3)
423  combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
424  framework::dataset::make("DataType", { DataType::F32 })),
425  ActivationFunctionsDataset),
427 
428 {
429  // Validate output
430  validate(Accessor(_target), _reference, abs_tolerance_f32);
431 }
433  combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(),
434  framework::dataset::make("DataType", { DataType::F32 })),
435  ActivationFunctionsDataset),
437 
438 {
439  // Validate output
440  // floating point arithmetic the Winograd results will not be exactly the same as direct convolution, especially for big shapes
441  validate(Accessor(_target), _reference, rel_tolerance_winograd_3x3_f32, 0.f, float(abs_tolerance_f32));
442 }
443 TEST_SUITE_END() // Conv3x3
444 
445 TEST_SUITE(Conv5x5)
447  combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x5Dataset(),
448  framework::dataset::make("DataType", { DataType::F32 })),
449  ActivationFunctionsDataset),
451 
452 {
453  // Validate output
454  validate(Accessor(_target), _reference, abs_tolerance_f32);
455 }
457  combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x5Dataset(),
458  framework::dataset::make("DataType", { DataType::F32 })),
459  ActivationFunctionsDataset),
461 
462 {
463  // Validate output
464  validate(Accessor(_target), _reference, abs_tolerance_f32);
465 }
466 
467 TEST_SUITE_END() // Conv5x5
468 
469 FIXTURE_DATA_TEST_CASE(RunSmallNoBias, NEWinogradConvolutionLayerNoBiasFixture<float>, framework::DatasetMode::PRECOMMIT,
470  combine(combine(combine(framework::dataset::concat(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
471  datasets::SmallWinogradConvolutionLayer5x5Dataset()),
472  framework::dataset::make("DataType", { DataType::F32 })),
473  ActivationFunctionsDataset),
474 
476 {
477  // Validate output
478  validate(Accessor(_target), _reference, abs_tolerance_f32);
479 }
480 
481 TEST_SUITE_END() // FP32
482 
483 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
484 TEST_SUITE(FP16)
485 using CLWinogradConvolutionLayerFastMathFixture16 = WinogradConvolutionLayerFastMathValidationFixture<Tensor, Accessor, NEWinogradConvolutionLayer, half, float>;
486 
487 DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
488  framework::dataset::make("InputInfo", { TensorInfo(TensorShape(18U, 18U, 32U), 1, DataType::F16),
489  TensorInfo(TensorShape(18U, 18U, 32U), 1, DataType::F16)
490  }),
491  framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 32U, 21U), 1, DataType::F16),
492  TensorInfo(TensorShape(3U, 3U, 32U, 21U), 1, DataType::F16)
493  })),
494  framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(16U, 16U, 21U), 1, DataType::F32),
495  TensorInfo(TensorShape(16U, 16U, 21U), 1, DataType::F16)
496  })),
497  framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
498  PadStrideInfo(1, 1, 0, 0)
499  })),
500  framework::dataset::make("FastMath", { false, // case fp16 and fast_math False then disable Winograd
501  true // case fp16 and fast_math True then enable Winograd
502  })),
505 {
507  &weights_info.clone()->set_is_resizable(true),
508  &output_info.clone()->set_is_resizable(true), conv_info, WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), fast_math);
510 }
511 
512 TEST_SUITE(Conv3x3)
513 FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT,
514  combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
515  framework::dataset::make("DataType", { DataType::F16 })),
516  ActivationFunctionsDataset),
518 
519 {
520  // Validate output
521  validate(Accessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16);
522 }
523 
524 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
525  combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(),
526  framework::dataset::make("DataType", { DataType::F16 })),
527  ActivationFunctionsDataset),
529 
530 {
531  // Validate output
532  validate(Accessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16);
533 }
534 TEST_SUITE_END() // Conv3x3
535 TEST_SUITE_END() // FP16
536 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
537 TEST_SUITE_END() // WinogradLayer
538 
539 #ifdef ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS
540 TEST_SUITE(FIXED_FORMAT_KERNELS)
541 TEST_SUITE(VariableWeightUtils)
542 
543 // UC2_1_* tests: the user requests a specific fixed format, but there is no kernel that supports it.
544 
545 template <typename ConvolutionClass>
546 using HasOptImplFixtureNoFastMath = HasOptImplFixture<ConvolutionClass, /*enable_fast_math*/ false>;
547 
548 template <typename ConvolutionClass>
549 using HasOptImplFixtureFastMath = HasOptImplFixture<ConvolutionClass, /*enable_fast_math*/ true>;
550 
551 // UC2_1
552 
553 FIXTURE_DATA_TEST_CASE(UC2_1_CpuGemmConv2d, HasOptImplFixtureNoFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
556 {
558 }
559 FIXTURE_DATA_TEST_CASE(UC2_1_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
562 {
564 }
565 
566 FIXTURE_DATA_TEST_CASE(UC2_1_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
569 {
571 }
572 
573 FIXTURE_DATA_TEST_CASE(UC2_1_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
576 {
578 }
579 
580 // UC2_2_* tests: the user requests a specific fixed format, and a
581 // kernel that support that fixed format is found.
582 
583 FIXTURE_DATA_TEST_CASE(UC2_2_CpuGemmConv2d, HasOptImplFixtureNoFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
586 {
589 }
590 
591 FIXTURE_DATA_TEST_CASE(UC2_2_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
594 {
597 }
598 
599 #if defined(ARM_COMPUTE_ENABLE_BF16)
600 
601 FIXTURE_DATA_TEST_CASE(UC2_2_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
604 {
607 }
608 
609 FIXTURE_DATA_TEST_CASE(UC2_2_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
612 {
615 }
616 
617 #endif // ARM_COMPUTE_ENABLE_BF16
618 
619 // UC3_1_* tests: the user queries for ANY fixed format, but there is
620 // no kernel that support the use case specified by the user (for
621 // example, there is no fixed format kernel for the datatype of the
622 // problem).
623 
624 FIXTURE_DATA_TEST_CASE(UC3_1_CpuGemmConv2d, HasOptImplFixtureNoFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
626  framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
627 {
629 }
630 
631 FIXTURE_DATA_TEST_CASE(UC3_1_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
633  framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
634 {
636 }
637 
638 FIXTURE_DATA_TEST_CASE(UC3_1_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
640  framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
641 {
643 }
644 
645 FIXTURE_DATA_TEST_CASE(UC3_1_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
647  framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
648 {
650 }
651 
652 // UC3_2_* tests: the user queries for ANY fixed format. The search
653 // succeeded and the fixed format found is prompted back for
654 // consumption by the user. Note that we just test the
655 // _computed_weight_format to be anything but not the formats that are
656 // not fixed formats (ANY and UNSPECIFIED). This is because the weight
657 // format that the runtime produces depends on the size of the vector
658 // units of the hardware where the tests is executed. For example, a
659 // format like OHWIo4 for FP32 data returned for 128-bit NEON hardware
660 // is replaced by OHWIo8 when running on 256-bit SVE.
661 
662 FIXTURE_DATA_TEST_CASE(UC3_2_CpuGemmConv2d, HasOptImplFixtureNoFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
664  framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
665 {
669 }
670 
671 FIXTURE_DATA_TEST_CASE(UC3_2_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
673  framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
674 {
677 }
678 
679 #if defined(ARM_COMPUTE_ENABLE_BF16)
680 
681 FIXTURE_DATA_TEST_CASE(UC3_2_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
683  framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
684 {
689 }
690 
691 FIXTURE_DATA_TEST_CASE(UC3_2_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
693  framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
694 {
699 }
700 
701 #endif // ARM_COMPUTE_ENABLE_BF16
702 
703 namespace
704 {
705 using TestCaseType = std::tuple<TensorShape, TensorShape, arm_compute::WeightFormat>;
706 auto prepare_weights_shapes = framework::dataset::make("TensorShape",
707 {
708  // OHWIo<interleave_by>i<block_by>
709  //
710  // OHWI --> O'HWI', where:
711  //
712  // O'= smallest multiple of <interleave_by> such that O<=O'
713  // I'= smallest multiple of <block_by> such that I<=I'
714  //
715 
716  // Change N for OHWIo4
717  TestCaseType({ { 1U, 1U, 1U, 1U }, { 1U, 1U, 1U, 4U }, arm_compute::WeightFormat::OHWIo4 }),
718  TestCaseType({ { 1U, 1U, 1U, 2U }, { 1U, 1U, 1U, 4U }, arm_compute::WeightFormat::OHWIo4 }),
719  TestCaseType({ { 1U, 1U, 1U, 3U }, { 1U, 1U, 1U, 4U }, arm_compute::WeightFormat::OHWIo4 }),
720  TestCaseType({ { 1U, 1U, 1U, 4U }, { 1U, 1U, 1U, 4U }, arm_compute::WeightFormat::OHWIo4 }),
721  TestCaseType({ { 1U, 1U, 1U, 5U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo4 }),
722  TestCaseType({ { 1U, 1U, 1U, 6U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo4 }),
723  TestCaseType({ { 1U, 1U, 1U, 7U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo4 }),
724  TestCaseType({ { 1U, 1U, 1U, 8U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo4 }),
725  TestCaseType({ { 1U, 1U, 1U, 9U }, { 1U, 1U, 1U, 12U }, arm_compute::WeightFormat::OHWIo4 }),
726  // // Change N for OHWIo8
727  TestCaseType({ { 1U, 1U, 1U, 1U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
728  TestCaseType({ { 1U, 1U, 1U, 2U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
729  TestCaseType({ { 1U, 1U, 1U, 3U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
730  TestCaseType({ { 1U, 1U, 1U, 4U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
731  TestCaseType({ { 1U, 1U, 1U, 5U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
732  TestCaseType({ { 1U, 1U, 1U, 6U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
733  TestCaseType({ { 1U, 1U, 1U, 7U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
734  TestCaseType({ { 1U, 1U, 1U, 8U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
735  TestCaseType({ { 1U, 1U, 1U, 9U }, { 1U, 1U, 1U, 16U }, arm_compute::WeightFormat::OHWIo8 }),
736  // // Change N for OHWIo4 when H, W and C are not 1
737  TestCaseType({ { 3U, 4U, 2U, 1U }, { 3, 4, 2, 4 }, arm_compute::WeightFormat::OHWIo4 }),
738  TestCaseType({ { 3U, 4U, 2U, 2U }, { 3, 4, 2, 4 }, arm_compute::WeightFormat::OHWIo4 }),
739  TestCaseType({ { 3U, 4U, 2U, 3U }, { 3, 4, 2, 4 }, arm_compute::WeightFormat::OHWIo4 }),
740  TestCaseType({ { 3U, 4U, 2U, 4U }, { 3, 4, 2, 4 }, arm_compute::WeightFormat::OHWIo4 }),
741  TestCaseType({ { 3U, 4U, 2U, 5U }, { 3, 4, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }),
742  TestCaseType({ { 3U, 4U, 2U, 6U }, { 3, 4, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }),
743  TestCaseType({ { 3U, 4U, 2U, 7U }, { 3, 4, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }),
744  TestCaseType({ { 3U, 4U, 2U, 8U }, { 3, 4, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }),
745  TestCaseType({ { 3U, 4U, 2U, 9U }, { 3, 4, 2, 12 }, arm_compute::WeightFormat::OHWIo4 }),
746 
747  // // Fix N and move HWI around, with different data layouts and formats
748  TestCaseType({ { 2U, 4U, 3U, 5U }, { 2, 4, 3, 8 }, arm_compute::WeightFormat::OHWIo4 }),
749  TestCaseType({ { 3U, 4U, 2U, 5U }, { 3, 4, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }),
750  TestCaseType({ { 2U, 4U, 3U, 9U }, { 2, 4, 3, 16 }, arm_compute::WeightFormat::OHWIo8 }),
751  TestCaseType({ { 3U, 4U, 2U, 9U }, { 3, 4, 2, 16 }, arm_compute::WeightFormat::OHWIo8 }),
752  TestCaseType({ { 1024U, 1U, 1U, 1001U }, { 1024, 1, 1, 1008 }, arm_compute::WeightFormat::OHWIo8 }),
753 
754  // // Adding <block_by> on I (=C)
755  TestCaseType({ { 1U, 4U, 3U, 5U }, { 2, 4, 3, 8 }, arm_compute::WeightFormat::OHWIo4i2 }),
756  TestCaseType({ { 2U, 4U, 3U, 5U }, { 2, 4, 3, 8 }, arm_compute::WeightFormat::OHWIo4i2 }),
757  TestCaseType({ { 3U, 4U, 3U, 5U }, { 4, 4, 3, 8 }, arm_compute::WeightFormat::OHWIo4i2 }),
758 
759  // ---------
760  TestCaseType({ { 2, 2, 1, 5 }, { 2, 2, 1, 8 }, arm_compute::WeightFormat::OHWIo4 }),
761  TestCaseType({ { 1, 2, 2, 5 }, { 1, 2, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }),
762 
763 });
764 } // unnamed namespace
765 
766 DATA_TEST_CASE(PrepareWeightShape, framework::DatasetMode::ALL,
767  prepare_weights_shapes, shapes)
768 {
769  const TensorShape input_shape = std::get<0>(shapes);
770  const TensorShape expected_shape = std::get<1>(shapes);
771  const arm_compute::WeightFormat wf = std::get<2>(shapes);
772  const DataType DT = DataType::F32;
773  const DataLayout DL = DataLayout::NHWC;
774  const auto TI = TensorInfo(input_shape, 1 /*num_channels, deprecated*/, DT, DL);
775  const TensorInfo computed_info = ::arm_compute::test::validation::prepare_weights(TI, wf);
776  ARM_COMPUTE_EXPECT_EQUAL(computed_info.tensor_shape(), expected_shape, framework::LogLevel::ERRORS);
777 }
778 
779 TEST_SUITE_END() // VariableWeightUtils
780 
781 TEST_SUITE(ExperimentalCpuAPIVariableWeightWithFixtures)
782 
783 template <typename ScalarType>
784 using VarWidth = VariableWeightsFixture<cpu::CpuGemmConv2d, Tensor, Accessor, ScalarType, /*enable_fast_math*/ false>;
785 
786 FIXTURE_DATA_TEST_CASE(RunSmallFloat, VarWidth<float>, framework::DatasetMode::ALL,
787  combine(combine(datasets::SmallConvolutionLayerDataset(),
788  framework::dataset::make("DataLayout", { DataLayout::NHWC })),
789  framework::dataset::make("ACL Scalar type", { DataType::F32 })))
790 {
791  // Validate output
792  validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
793 }
794 
795 #if defined(ARM_COMPUTE_ENABLE_FP16)
796 FIXTURE_DATA_TEST_CASE(RunSmallHalf, VarWidth<half>, framework::DatasetMode::ALL,
797  combine(combine(datasets::SmallConvolutionLayerDataset(),
798  framework::dataset::make("DataLayout", { DataLayout::NHWC })),
799  framework::dataset::make("ACL Scalar type", { DataType::F16 })))
800 {
801  // Validate output
802  validate(Accessor(_target), _reference, rel_tolerance_f16, 0.f, half(abs_tolerance_f16));
803 }
804 #endif // ARM_COMPUTE_ENABLE_FP16
805 
806 #if defined(ARM_COMPUTE_ENABLE_BF16)
807 template <typename ScalarType>
808 using VarWidthFastMath = VariableWeightsFixture<cpu::CpuGemmConv2d, Tensor, Accessor, ScalarType, /*enable_fast_math*/ true>;
809 
810 FIXTURE_DATA_TEST_CASE(RunSmallFloatFastMath, VarWidthFastMath<float>, framework::DatasetMode::ALL,
811  combine(combine(datasets::SmallConvolutionLayerDataset(),
812  framework::dataset::make("DataLayout", { DataLayout::NHWC })),
813  framework::dataset::make("ACL Scalar type", { DataType::F32 })))
814 {
815  // Validate output
816  validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
817 }
818 #endif // ARM_COMPUTE_ENABLE_BF16
819 
820 TEST_SUITE_END() // ExperimentalCpuAPIVariableWeightWithFixtures
821 
822 TEST_SUITE(ExperimentalNEAPIVariableWeightWithFixtures)
823 
824 template <typename ScalarType>
825 using NEGEMMVarWidth = VariableWeightsFixtureNEInterface<NEGEMMConvolutionLayer, Tensor, Accessor, ScalarType, /*enable_fast_math*/ false>;
826 
827 FIXTURE_DATA_TEST_CASE(NEGEMMRunSmallFloat, NEGEMMVarWidth<float>, framework::DatasetMode::ALL,
828  combine(combine(datasets::SmallConvolutionLayerDataset(),
829  framework::dataset::make("DataLayout", { DataLayout::NHWC })),
830  framework::dataset::make("ACL Scalar type", { DataType::F32 })))
831 {
832  // Validate output
833  validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
834 }
835 
836 #if defined(ARM_COMPUTE_ENABLE_FP16)
837 FIXTURE_DATA_TEST_CASE(NEGEMMRunSmallHalf, NEGEMMVarWidth<half>, framework::DatasetMode::ALL,
838  combine(combine(datasets::SmallConvolutionLayerDataset(),
839  framework::dataset::make("DataLayout", { DataLayout::NHWC })),
840  framework::dataset::make("ACL Scalar type", { DataType::F16 })))
841 {
842  // Validate output
843  validate(Accessor(_target), _reference, rel_tolerance_f16, 0.f, half(abs_tolerance_f16));
844 }
845 #endif // ARM_COMPUTE_ENABLE_FP16
846 
847 #if defined(ARM_COMPUTE_ENABLE_BF16)
848 template <typename ScalarType>
849 using NEGEMMVarWidthFastMath = VariableWeightsFixtureNEInterface<NEGEMMConvolutionLayer, Tensor, Accessor, ScalarType, /*enable_fast_math*/ true>;
850 
851 FIXTURE_DATA_TEST_CASE(NEGEMMRunSmallFloatFastMath, NEGEMMVarWidthFastMath<float>, framework::DatasetMode::ALL,
852  combine(combine(datasets::SmallConvolutionLayerDataset(),
853  framework::dataset::make("DataLayout", { DataLayout::NHWC })),
854  framework::dataset::make("ACL Scalar type", { DataType::F32 })))
855 {
856  // Validate output
857  validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
858 }
859 #endif // ARM_COMPUTE_ENABLE_BF16
860 
861 TEST_SUITE_END() // ExperimentalNEAPIVariableWeightWithFixtures
862 TEST_SUITE_END() // FIXED_FORMAT_KERNELS
863 
864 #endif // ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS
865 
866 TEST_SUITE(GEMMConvolutionLayer)
867 template <typename T>
868 using NEGEMMConvolutionLayerFixture = ConvolutionValidationFixture<Tensor, Accessor, NEConvolutionLayer, T>;
869 template <typename T>
870 using NEGEMMConvolutionLayerMixedDataLayoutFixture = ConvolutionValidationFixture<Tensor, Accessor, NEConvolutionLayer, T, true>;
871 
872 /** Test case for memory injection in @ref cpu::CpuGemmConv2d.
873  *
874  * Configure the operator once and inject memory at run-time in multiple executions.
875  *
876  * Checks performed in order:
877  * - Both runs compute the same output
878  */
879 TEST_CASE(MemoryInjection, framework::DatasetMode::ALL)
880 {
881  auto conv = std::make_unique<cpu::CpuGemmConv2d>();
883  const auto weight_info = TensorInfo(TensorShape(1U, 3U, 2U, 3U), 1, DataType::F32, DataLayout::NCHW);
884  const auto bias_info = TensorInfo(TensorShape(3U), 1, DataType::F32, DataLayout::NCHW);
885  auto dst_info = TensorInfo(TensorShape(1U, 7U, 3U), 1, DataType::F32, DataLayout::NCHW);
886  const auto conv_info = PadStrideInfo(1, 1, 0, 0, 2, 2, DimensionRoundingType::FLOOR);
887  WeightsInfo weights_info(false, 3U, 3U, 1U);
888  conv->configure(&src_info, &weight_info, &bias_info, &dst_info, conv_info, weights_info);
889 
890  // tensors are newly created every call of this lambda function
891  auto src = create_tensor<Tensor>(src_info);
892  auto weight = create_tensor<Tensor>(weight_info);
893  auto bias = create_tensor<Tensor>(bias_info);
894  src.allocator()->allocate();
895  weight.allocator()->allocate();
896  bias.allocator()->allocate();
897 
898  ITensorPack run_pack{ { TensorType::ACL_SRC_0, &src }, { TensorType::ACL_SRC_1, &weight }, { TensorType::ACL_SRC_2, &bias } };
899  ITensorPack prep_pack{ { TensorType::ACL_SRC_1, &weight }, { TensorType::ACL_SRC_2, &bias } };
900 
901  auto mg = MemoryGroup{};
902  auto ws = manage_workspace<Tensor>(conv->workspace(), mg, run_pack, prep_pack);
903 
904  auto run_conv = [&]() -> Tensor
905  {
906  auto dst = create_tensor<Tensor>(dst_info);
907  dst.allocator()->allocate();
908  run_pack.add_tensor(TensorType::ACL_DST, &dst);
909 
910  library->fill_tensor_value(Accessor(src), 1.f);
911  library->fill_tensor_value(Accessor(weight), 2.f);
912  library->fill_tensor_value(Accessor(bias), 3.f);
913  // This operator is configured once and captured by this lambda.
914  conv->prepare(prep_pack);
915  conv->run(run_pack);
916  return dst;
917  };
918  auto result_0 = run_conv();
919  auto result_1 = run_conv();
920  for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
921  {
922  ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
923  }
924 }
925 
926 /** Test case for memory injection in @ref NEGEMMConvolutionLayer.
927  *
928  * Make sure @ref NEGEMMConvolutionLayer still works through injecting the memory at configure time using the old API.
929  *
930  * Checks performed in order:
931  * - Both runs compute the same output
932  */
933 TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL)
934 {
935  auto conv = std::make_unique<NEGEMMConvolutionLayer>();
937  const auto weight_info = TensorInfo(TensorShape(1U, 3U, 2U, 3U), 1, DataType::F32, DataLayout::NCHW);
938  const auto bias_info = TensorInfo(TensorShape(3U), 1, DataType::F32, DataLayout::NCHW);
939  auto dst_info = TensorInfo(TensorShape(1U, 7U, 3U), 1, DataType::F32, DataLayout::NCHW);
940  const auto conv_info = PadStrideInfo(1, 1, 0, 0, 2, 2, DimensionRoundingType::FLOOR);
941  WeightsInfo weights_info(false, 3U, 3U, 1U);
942  auto run_conv = [&]()
943  {
944  auto src = create_tensor<Tensor>(src_info);
945  auto weight = create_tensor<Tensor>(weight_info);
946  auto bias = create_tensor<Tensor>(bias_info);
947  auto dst = create_tensor<Tensor>(dst_info);
948  conv->configure(&src, &weight, &bias, &dst, conv_info, weights_info);
949  src.allocator()->allocate();
950  weight.allocator()->allocate();
951  bias.allocator()->allocate();
952  dst.allocator()->allocate();
953  library->fill_tensor_value(Accessor(src), 1.f);
954  library->fill_tensor_value(Accessor(weight), 2.f);
955  library->fill_tensor_value(Accessor(bias), 3.f);
956  conv->run();
957  return dst;
958  };
959  auto result_0 = run_conv();
960  auto result_1 = run_conv();
961  for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
962  {
963  ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
964  }
965 }
966 
967 TEST_SUITE(Float)
968 #if defined(ARM_COMPUTE_ENABLE_BF16)
970 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
971  framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::BFLOAT16)), framework::dataset::make("DataLayout", { DataLayout::NHWC })),
972  ActivationFunctionsDataset))
973 {
974  // Validate output
975  validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
976 }
977 TEST_SUITE_END() // BFLOAT16
978 #endif /* defined(ARM_COMPUTE_ENABLE_BF16) */
979 
980 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
981 TEST_SUITE(FP16)
982 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
983  framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataLayout", { DataLayout::NCHW })), ActivationFunctionsDataset))
984 {
985  // Validate output
986  validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16);
987 }
988 TEST_SUITE_END() // FP16
989 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
990 
991 TEST_SUITE(FP32)
992 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
993  framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
994  ActivationFunctionsDataset))
995 {
996  // Validate output
997  validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
998 }
1001  framework::dataset::make("Input", TensorShape(23U, 27U, 5U)),
1002  framework::dataset::make("Weights", TensorShape(3U, 3U, 5U, 2U))),
1003  framework::dataset::make("Bias", TensorShape(2U))),
1004  framework::dataset::make("Output", TensorShape(11U, 25U, 2U))),
1005  framework::dataset::make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0))),
1006  framework::dataset::make("Dilation", Size2D(1, 1))),
1007  framework::dataset::make("ReshapeWeights", { true })),
1010  ActivationFunctionsDataset))
1011 {
1012  // Validate output
1013  validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
1014 }
1015 TEST_SUITE_END() // FP32
1016 TEST_SUITE_END() // Float
1017 
1018 template <typename T>
1019 using NEGEMMConvolutionLayerQuantizedFixture = ConvolutionValidationQuantizedFixture<Tensor, Accessor, NEConvolutionLayer, T>;
1020 template <typename T>
1021 using NEGEMMConvolutionLayerQuantizedMixedDataLayoutFixture = ConvolutionValidationQuantizedFixture<Tensor, Accessor, NEConvolutionLayer, T, true>;
1022 
1023 template <typename T>
1024 using NEGEMMConvolutionLayerQuantizedPerChannelFixture = ConvolutionValidationQuantizedPerChannelFixture<Tensor, Accessor, NEConvolutionLayer, T, int8_t>;
1025 
1027 {
1031 });
1032 TEST_SUITE(Quantized)
1034 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
1035  framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
1036  framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })), QuantizedActivationFunctionsDataset))
1037 {
1038  // Validate output
1039  validate(Accessor(_target), _reference, tolerance_qasymm8);
1040 }
1041 FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEGEMMConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL,
1043  framework::dataset::make("Input", TensorShape(23U, 27U, 5U)),
1044  framework::dataset::make("Weights", TensorShape(3U, 3U, 5U, 2U))),
1045  framework::dataset::make("Bias", TensorShape(2U))),
1046  framework::dataset::make("Output", TensorShape(11U, 25U, 2U))),
1047  framework::dataset::make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0))),
1048  framework::dataset::make("Dilation", Size2D(1, 1))),
1049  framework::dataset::make("ReshapeWeights", { true })),
1052  framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
1054 {
1055  // Validate output
1056  validate(Accessor(_target), _reference, tolerance_qasymm8);
1057 }
1058 TEST_SUITE_END() // QASYMM8
1059 
1061 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
1062  framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
1063  framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.01f, -10) })), QuantizedActivationFunctionsDataset))
1064 {
1065  // Validate output
1066  validate(Accessor(_target), _reference, tolerance_qasymm8);
1067 }
1070  framework::dataset::make("Input", TensorShape(23U, 27U, 5U)),
1071  framework::dataset::make("Weights", TensorShape(3U, 3U, 5U, 2U))),
1072  framework::dataset::make("Bias", TensorShape(2U))),
1073  framework::dataset::make("Output", TensorShape(11U, 25U, 2U))),
1074  framework::dataset::make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0))),
1075  framework::dataset::make("Dilation", Size2D(1, 1))),
1076  framework::dataset::make("ReshapeWeights", { true })),
1079  framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
1081 {
1082  // Validate output
1083  validate(Accessor(_target), _reference, tolerance_qasymm8);
1084 }
1085 TEST_SUITE_END() // QASYMM8_SIGNED
1086 
1089  combine(combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
1090  framework::dataset::make("ReshapeWeights", { true })),
1091  framework::dataset::make("DataType", { DataType::QASYMM8 })),
1095  framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
1096 {
1097  // Validate output
1098  validate(Accessor(_target), _reference, tolerance_qasymm8);
1099 }
1101  combine(combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
1102  framework::dataset::make("ReshapeWeights", { true })),
1107  framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
1108 {
1109  // Validate output
1110  validate(Accessor(_target), _reference, tolerance_qasymm8);
1111 }
1112 TEST_SUITE_END() // QSYMM8_PER_CHANNEL
1113 TEST_SUITE_END() // Quantized
1114 
1115 TEST_SUITE_END() // GEMMConvolutionLayer
1116 
1117 TEST_SUITE(DirectGEMMConv2d)
1118 template <typename T>
1119 using NEDirectGEMMConv2dLayerFixture = ConvolutionValidationFixture<Tensor, Accessor, NEGEMMConv2d, T>;
1120 
1121 /** Test case for memory injection in @ref cpu::CpuGemmDirectConv2d.
1122  *
1123  * Configure the operator once and inject memory at run-time in multiple executions.
1124  *
1125  * Checks performed in order:
1126  * - Both runs compute the same output
1127  */
1128 TEST_CASE(MemoryInjection, framework::DatasetMode::ALL)
1129 {
1130  auto conv = std::make_unique<cpu::CpuGemmDirectConv2d>();
1131  const auto src_info = TensorInfo(TensorShape(1U, 5U, 2U), 1, DataType::F32, DataLayout::NHWC);
1132  const auto weight_info = TensorInfo(TensorShape(1U, 3U, 2U, 3U), 1, DataType::F32, DataLayout::NHWC);
1133  const auto bias_info = TensorInfo(TensorShape(3U), 1, DataType::F32, DataLayout::NHWC);
1134  auto dst_info = TensorInfo(TensorShape(1U, 7U, 3U), 1, DataType::F32, DataLayout::NHWC);
1135  const auto conv_info = Conv2dInfo{};
1136  conv->configure(&src_info, &weight_info, &bias_info, &dst_info, conv_info);
1137 
1138  // tensors are newly created every call of this lambda function
1139  auto src = create_tensor<Tensor>(src_info);
1140  auto weight = create_tensor<Tensor>(weight_info);
1141  auto bias = create_tensor<Tensor>(bias_info);
1142  src.allocator()->allocate();
1143  weight.allocator()->allocate();
1144  bias.allocator()->allocate();
1145 
1146  ITensorPack run_pack{ { TensorType::ACL_SRC_0, &src }, { TensorType::ACL_SRC_1, &weight }, { TensorType::ACL_SRC_2, &bias } };
1147  ITensorPack prep_pack{ { TensorType::ACL_SRC_1, &weight }, { TensorType::ACL_SRC_2, &bias } };
1148 
1149  auto mg = MemoryGroup{};
1150  auto ws = manage_workspace<Tensor>(conv->workspace(), mg, run_pack, prep_pack);
1151 
1152  auto run_conv = [&]() -> Tensor
1153  {
1154  auto dst = create_tensor<Tensor>(dst_info);
1155  dst.allocator()->allocate();
1156  run_pack.add_tensor(TensorType::ACL_DST, &dst);
1157 
1158  library->fill_tensor_value(Accessor(src), 1.f);
1159  library->fill_tensor_value(Accessor(weight), 2.f);
1160  library->fill_tensor_value(Accessor(bias), 3.f);
1161  // This operator is configured once and captured by this lambda.
1162  conv->prepare(prep_pack);
1163  conv->run(run_pack);
1164  return dst;
1165  };
1166  auto result_0 = run_conv();
1167  auto result_1 = run_conv();
1168  for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
1169  {
1170  ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
1171  }
1172 }
1173 
1174 /** Test case for memory injection in @ref NEGEMMConv2d.
1175  *
1176  * Make sure @ref NEGEMMConv2d still works through injecting the memory at configure time using the old API.
1177  *
1178  * Checks performed in order:
1179  * - Both runs compute the same output
1180  */
1181 TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL)
1182 {
1183  auto conv = std::make_unique<NEGEMMConv2d>();
1184  const auto src_info = TensorInfo(TensorShape(1U, 5U, 2U), 1, DataType::F32, DataLayout::NHWC);
1185  const auto weight_info = TensorInfo(TensorShape(1U, 3U, 2U, 3U), 1, DataType::F32, DataLayout::NHWC);
1186  const auto bias_info = TensorInfo(TensorShape(3U), 1, DataType::F32, DataLayout::NHWC);
1187  auto dst_info = TensorInfo(TensorShape(1U, 7U, 3U), 1, DataType::F32, DataLayout::NHWC);
1188  const auto conv_info = Conv2dInfo{};
1189  auto run_conv = [&]()
1190  {
1191  auto src = create_tensor<Tensor>(src_info);
1192  auto weight = create_tensor<Tensor>(weight_info);
1193  auto bias = create_tensor<Tensor>(bias_info);
1194  auto dst = create_tensor<Tensor>(dst_info);
1195  conv->configure(&src, &weight, &bias, &dst, conv_info);
1196  src.allocator()->allocate();
1197  weight.allocator()->allocate();
1198  bias.allocator()->allocate();
1199  dst.allocator()->allocate();
1200  library->fill_tensor_value(Accessor(src), 1.f);
1201  library->fill_tensor_value(Accessor(weight), 2.f);
1202  library->fill_tensor_value(Accessor(bias), 3.f);
1203  conv->run();
1204  return dst;
1205  };
1206  auto result_0 = run_conv();
1207  auto result_1 = run_conv();
1208  for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
1209  {
1210  ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
1211  }
1212 }
1213 
1214 TEST_SUITE(Float)
1215 TEST_SUITE(FP32)
1216 FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectGEMMConv2dLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
1217  framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", { DataLayout::NHWC })), ActivationFunctionsDataset))
1218 {
1219  // Validate output
1220  validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
1221 }
1222 TEST_SUITE_END() // FP32
1223 TEST_SUITE_END() // Float
1224 
1225 #ifdef __aarch64__
1226 template <typename T>
1227 using NEDirectGEMMConv2dLayerQuantizedFixture = ConvolutionValidationQuantizedFixture<Tensor, Accessor, NEGEMMConv2d, T>;
1228 
1229 template <typename T>
1230 using NEDirectGEMMConv2dLayerQuantizedPerChannelFixture = ConvolutionValidationQuantizedPerChannelFixture<Tensor, Accessor, NEGEMMConv2d, T, int8_t>;
1231 
1233 {
1237 });
1238 TEST_SUITE(Quantized)
1240 FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectGEMMConv2dLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
1241  framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("DataLayout", { DataLayout::NHWC })),
1242  framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })), QuantizedActivationFunctionsDataset))
1243 {
1244  // Validate output
1245  validate(Accessor(_target), _reference, tolerance_qasymm8);
1246 }
1247 TEST_SUITE_END() // QASYMM8
1248 
1250 FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectGEMMConv2dLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
1251  framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("DataLayout", { DataLayout::NHWC })),
1252  framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.01f, -10) })), QuantizedActivationFunctionsDataset))
1253 {
1254  // Validate output
1255  validate(Accessor(_target), _reference, tolerance_qasymm8);
1256 }
1257 TEST_SUITE_END() // QASYMM8_SIGNED
1258 
1260 FIXTURE_DATA_TEST_CASE(RunSmallSigned, NEDirectGEMMConv2dLayerQuantizedPerChannelFixture<int8_t>, framework::DatasetMode::ALL,
1261  combine(combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
1262  framework::dataset::make("ReshapeWeights", { true })),
1264  framework::dataset::make("DataLayout", { DataLayout::NHWC })),
1267  framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
1268 {
1269  // Validate output
1270  validate(Accessor(_target), _reference, tolerance_qasymm8);
1271 }
1272 TEST_SUITE_END() // QSYMM8_PER_CHANNEL
1273 TEST_SUITE_END() // Quantized
1274 #endif // __aarch64__
1275 
1276 TEST_SUITE_END() // DirectGEMMConv2d
1277 
1278 TEST_SUITE_END() // Neon
1279 } // namespace validation
1280 } // namespace test
1281 } // namespace arm_compute
SimpleTensor< float > w
Definition: DFT.cpp:156
Basic function to compute the convolution layer.
Definition: CpuGemmConv2d.h:58
Shape of a tensor.
Definition: TensorShape.h:39
Class reprensenting an absolute tolerance value.
Definition: Validation.h:61
constexpr float tolerance_num_f16
F16 Tolerance number.
Definition: cl_gemm.cpp:75
TEST_SUITE(QASYMM8_to_F32) FIXTURE_DATA_TEST_CASE(RunSmall
ConvolutionValidationQuantizedFixture< Tensor, Accessor, NEConvolutionLayer, T, true > NEGEMMConvolutionLayerQuantizedMixedDataLayoutFixture
SimpleTensor< float > b
Definition: DFT.cpp:157
ConvolutionValidationFixture< Tensor, Accessor, NEConvolutionLayer, T, true > NEGEMMConvolutionLayerMixedDataLayoutFixture
half_float::half half
16-bit floating point type
Definition: Types.h:48
1 channel, 1 F32 per channel
ARM_COMPUTE_EXPECT(has_error==expected, framework::LogLevel::ERRORS)
ConvolutionValidationQuantizedPerChannelFixture< Tensor, Accessor, NEConvolutionLayer, T, int8_t > NEGEMMConvolutionLayerQuantizedPerChannelFixture
std::enable_if< is_container< T >::value, ContainerDataset< T > >::type make(std::string name, T &&values)
Helper function to create a ContainerDataset.
WeightFormat
Memory layouts for the weights tensor.
Definition: Types.h:2035
ConvolutionMethod
Available ConvolutionMethod.
Definition: Types.h:134
Activation Layer Information class.
Definition: Types.h:1659
WinogradConvolutionLayerFastMathValidationFixture< Tensor, Accessor, NEWinogradConvolutionLayer, T, T, false > NEWinogradConvolutionLayerNoBiasFixture
Basic function to compute the convolution layer.
SimpleTensor< float > src
Definition: DFT.cpp:155
Copyright (c) 2017-2023 Arm Limited.
1 channel, 1 F16 per channel
Convolution Layer Weights Information class.
Definition: Types.h:2093
1 channel, 1 S32 per channel
16-bit brain floating-point number
Quantization information.
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16), }), framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(30U, 11U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16, QuantizationInfo(1.f/32768.f, 0)), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16, QuantizationInfo(1.f/32768.f, 0)), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16, QuantizationInfo(1.f/32768.f, 0)), })), framework::dataset::make("ActivationInfo", { ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SQRT), })), framework::dataset::make("Expected", { false, true, true, true, false, false, true, true, false })), input_info, output_info, act_info, expected)
const auto input_shape
Validate test suite is to test ARM_COMPUTE_RETURN_ON_* macros we use to check the validity of given a...
Accessor implementation for Tensor objects.
Definition: Accessor.h:35
DatasetMode
Possible dataset modes.
Definition: DatasetModes.h:40
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
std::unique_ptr< AssetsLibrary > library
Definition: main.cpp:76
TEST_SUITE_END() FIXTURE_DATA_TEST_CASE(RunSmall
[CLActivationLayer Test snippet]
quantized, asymmetric fixed-point 8-bit number unsigned
const unsigned int num_groups
Definition: Im2Col.cpp:153
const auto QuantizedActivationFunctionsDataset
Input data sets.
Basic implementation of the tensor interface.
Definition: Tensor.h:37
Padding and stride information class.
Definition: Types.h:671
validate(CLAccessor(output_state), expected_output)
UniqueGemmCommon< Top, Tret > gemm(const GemmArgs &args, const OutputStage &os)
Descriptor used by the 2d Convolution function.
Num samples, channels, height, width.
constexpr float tolerance_num
Tolerance number.
Definition: Add.cpp:104
TensorInfo src_info(src_shape, 1, data_type)
quantized, symmetric per channel fixed-point 8-bit number
Convolution using Winograd.
FIXTURE_DATA_TEST_CASE(RunSmall, CLAbsLayerFixture< half >, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)))
Definition: AbsLayer.cpp:50
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
bool is_fixed_format_fast_math(const WeightFormat &wf)
Definition: Types.h:2087
static ConvolutionMethod get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info=WeightsInfo(), const Size2D &dilation=Size2D(1U, 1U), const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Static function to check if given info will return the convolution called by NEConvolutionLayer.
WinogradConvolutionLayerFastMathValidationFixture< Tensor, Accessor, NEWinogradConvolutionLayer, T, T, true, true > NEWinogradConvolutionLayerMixedDataLayoutFixture
Class reprensenting a relative tolerance value.
Definition: Validation.h:97
Class for specifying the size of an image or rectangle.
Definition: Size2D.h:34
Num samples, height, width, channels.
ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS)
Tensor packing service.
Definition: ITensorPack.h:39
Store the tensor&#39;s metadata.
Definition: TensorInfo.h:43
JoinDataset< T, U > concat(T &&dataset1, U &&dataset2)
Helper function to create a JoinDataset.
Definition: JoinDataset.h:160
TEST_CASE(FusedActivation, framework::DatasetMode::ALL)
Validate fused activation expecting the following behaviours:
quantized, asymmetric fixed-point 8-bit number signed
Basic function to compute the convolution layer.
Definition: NEGEMMConv2d.h:49
zip(zip(framework::dataset::make("Weights", { TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U, 1U), 1, DataType::F32), }), framework::dataset::make("MVBGInfo",{ TensorInfo(TensorShape(2U), 1, DataType::F32), TensorInfo(TensorShape(2U), 1, DataType::F16), TensorInfo(TensorShape(5U), 1, DataType::F32), })), framework::dataset::make("Expected", { true, false, false}))
const TensorShape & tensor_shape() const override
Size for each dimension of the tensor.
Definition: TensorInfo.h:236
DataType
Available data types.
Definition: Types.h:79
constexpr float abs_tolerance_f32(0.0001f)
F32 Absolute tolerance value for comparing reference&#39;s output against implementation&#39;s output for flo...
DataLayout
[DataLayout enum definition]
Definition: Types.h:113
combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)))
Definition: AbsLayer.cpp:65
Convolution using GEMM.
void configure_conv_function< NEGEMMConv2d, Tensor >(NEGEMMConv2d &func, Tensor *src, const Tensor *weights, const Tensor *bias, Tensor *dst, const PadStrideInfo &info, const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups)
const int32_t * bias