Compute Library
 22.08
ConvolutionLayer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "arm_compute/core/Types.h"
35 #include "tests/NEON/Accessor.h"
37 #include "tests/datasets/LargeConvolutionLayerDataset.h"
38 #include "tests/datasets/SmallConvolutionLayerDataset.h"
39 #include "tests/datasets/TinyConvolutionLayerDataset.h"
41 #include "tests/framework/Macros.h"
44 #include "tests/validation/fixtures/ConvolutionLayerFixture.h"
45 #include "tests/validation/fixtures/WinogradConvolutionLayerFixture.h"
46 
47 namespace arm_compute
48 {
49 namespace test
50 {
51 namespace validation
52 {
53 namespace detail
54 {
55 template <>
57  Tensor *src, const Tensor *weights, const Tensor *bias, Tensor *dst,
59  const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups)
60 {
61  ARM_COMPUTE_UNUSED(weights_info);
62 
63  Conv2dInfo conv_info(info, dilation, act_info, false, num_groups);
64  func.configure(src, weights, bias, dst, conv_info);
65 }
66 } // namespace detail
67 namespace
68 {
69 const RelativeTolerance<float> rel_tolerance_f32(0.01f); /**< Relative tolerance for FP32 types */
70 const RelativeTolerance<float> rel_tolerance_winograd_3x3_f32(0.05f); /**< Relative tolerance for FP32 types */
71 const AbsoluteTolerance<float> abs_tolerance_f32(0.002f); /**< Absolute tolerance for FP32 types */
72 const AbsoluteTolerance<float> abs_tolerance_1xN_f32(0.0041f); /**< Absolute tolerance for FP32 types */
73 
74 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
75 const AbsoluteTolerance<half> tolerance_convolution_layer_f16(half(0.4f));
76 constexpr float tolerance_num_f16 = 0.15f;
77 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
78 
79 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
80 const RelativeTolerance<half_float::half> rel_tolerance_f16(half_float::half(0.2f)); /**< Relative tolerance value for FP16 types */
81 const AbsoluteTolerance<float> abs_tolerance_f16(0.2f); /**< Absolute tolerance for FP16 types */
82 constexpr float tolerance_num = 0.07f; /**< Tolerance number for the FP16 implementation */
83 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
84 constexpr AbsoluteTolerance<float> tolerance_qasymm8(0.0); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
85 
86 /** CNN data types */
87 const auto CNNDataTypes = framework::dataset::make("DataType",
88 {
89 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
91 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
94 });
95 const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
96 {
100 });
101 
102 const auto QuantizationData = framework::dataset::make("QuantizationInfo",
103 {
104  QuantizationInfo(0.5f, 10),
105  QuantizationInfo(0.3f, 3),
106  QuantizationInfo(1.f, 10),
107  QuantizationInfo(1.1f, 10),
108 });
109 } // namespace
110 
112 TEST_SUITE(ConvolutionLayer)
113 
114 // *INDENT-OFF*
115 // clang-format off
116 DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
117  framework::dataset::make("InputInfo", { TensorInfo(TensorShape(18U, 18U, 32U), 1, DataType::F32),
118  TensorInfo(TensorShape(23U, 27U, 32U, 4U), 1, DataType::F32),
119  TensorInfo(TensorShape(3U, 3U, 2U, 1U), 1, DataType::F32),
120  TensorInfo(TensorShape(33U, 27U, 7U, 4U), 1, DataType::F32)
121  }),
122  framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 32U, 21U), 1, DataType::F32),
123  TensorInfo(TensorShape(5U, 5U, 32U, 21U), 1, DataType::F32),
124  TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32),
125  TensorInfo(TensorShape(5U, 5U, 7U, 16U), 1, DataType::F16)
126  })),
127  framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(16U, 16U, 21U), 1, DataType::F32),
128  TensorInfo(TensorShape(19U, 23U, 21U, 4U), 1, DataType::F32),
129  TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32),
130  TensorInfo(TensorShape(11U, 12U, 16U, 4U), 1, DataType::F32)
131  })),
132  framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
133  PadStrideInfo(1, 1, 0, 0),
134  PadStrideInfo(2, 1, 0, 0),
135  PadStrideInfo(3, 2, 1, 0)
136  })),
137  framework::dataset::make("FastMath", { true,
138  true,
139  false,
140  false
141  })),
144 {
145  ConvolutionMethod is_valid = NEConvolutionLayer::get_convolution_method(&input_info.clone()->set_is_resizable(true),
146  &weights_info.clone()->set_is_resizable(true),
147  &output_info.clone()->set_is_resizable(true), conv_info, WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), fast_math);
149 }
150 // clang-format on
151 // *INDENT-ON*
152 TEST_SUITE_END() // ConvolutionLayer
153 
154 TEST_SUITE(WinogradLayer)
155 template <typename T>
156 using NEWinogradConvolutionLayerFixture = WinogradConvolutionLayerFastMathValidationFixture<Tensor, Accessor, NEWinogradConvolutionLayer, T>;
157 template <typename T>
158 using NEWinogradConvolutionLayerMixedDataLayoutFixture = WinogradConvolutionLayerFastMathValidationFixture<Tensor, Accessor, NEWinogradConvolutionLayer, T, T, true, true>;
159 
160 template <typename T>
161 using NEWinogradConvolutionLayerNoBiasFixture = WinogradConvolutionLayerFastMathValidationFixture<Tensor, Accessor, NEWinogradConvolutionLayer, T, T, false>;
162 
163 /** Test case for memory injection in @ref cpu::CpuWinogradConv2d.
164  *
165  * Configure the operator once and inject memory at run-time in multiple executions.
166  *
167  * Checks performed in order:
168  * - Both runs compute the same output
169  */
171 {
172  auto winograd = std::make_unique<cpu::CpuWinogradConv2d>();
173  const auto src_info = TensorInfo(TensorShape(8U, 8U, 32U), 1, DataType::F32);
174  const auto w_info = TensorInfo(TensorShape(1U), 1, DataType::F32);
175  const auto b_info = TensorInfo(TensorShape(1U, 3U, 32U, 1U), 1, DataType::F32);
176  auto dst_info = TensorInfo(TensorShape(8U, 6U, 1U), 1, DataType::F32);
177  const PadStrideInfo pad_info{};
178 
179  winograd->configure(&src_info, &b_info, &w_info, &dst_info, pad_info);
180 
181  // telhs are newly created every call of this lambda function
182  auto a = create_tensor<Tensor>(src_info);
183  auto b = create_tensor<Tensor>(b_info);
184  auto c = create_tensor<Tensor>(w_info);
185  a.allocator()->allocate();
186  b.allocator()->allocate();
187  c.allocator()->allocate();
188 
190  ITensorPack prep_pack{ { TensorType::ACL_SRC_1, &b }, { TensorType::ACL_SRC_2, &c } };
191 
192  auto mg = MemoryGroup{};
193  auto ws = manage_workspace<Tensor>(winograd->workspace(), mg, run_pack, prep_pack);
194  auto run_conv = [&]() -> Tensor
195  {
196  auto dst = create_tensor<Tensor>(dst_info);
197  dst.allocator()->allocate();
198 
199  run_pack.add_tensor(TensorType::ACL_DST, &dst);
200  library->fill_tensor_value(Accessor(a), 1.f);
201  library->fill_tensor_value(Accessor(b), 2.f);
202  library->fill_tensor_value(Accessor(c), 3.f);
203 
204  // This operator is configured once and captured by this lambda.
205  winograd->prepare(prep_pack);
206  winograd->run(run_pack);
207  return dst;
208  };
209 
210  auto result_0 = run_conv();
211  auto result_1 = run_conv();
212 
213  for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
214  {
215  ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
216  }
217 }
218 
219 /** Test case for memory injection in @ref NEWinogradConvolutionLayer.
220  *
221  * Make sure @ref NEWinogradConvolutionLayer still works through injecting the memory at configure time using the old API.
222  *
223  * Checks performed in order:
224  * - Both runs compute the same output
225  */
226 TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL)
227 {
228  auto gemm = std::make_unique<NEWinogradConvolutionLayer>();
229  const auto src_info = TensorInfo(TensorShape(8U, 8U, 32U), 1, DataType::F32);
230  const auto w_info = TensorInfo(TensorShape(1U), 1, DataType::F32);
231  const auto b_info = TensorInfo(TensorShape(1U, 3U, 32U, 1U), 1, DataType::F32);
232  auto dst_info = TensorInfo(TensorShape(8U, 6U, 1U), 1, DataType::F32);
233  const PadStrideInfo pad_info{};
234 
235  auto run_conv = [&]()
236  {
237  auto src = create_tensor<Tensor>(src_info);
238  auto w = create_tensor<Tensor>(w_info);
239  auto b = create_tensor<Tensor>(b_info);
240  auto dst = create_tensor<Tensor>(dst_info);
241 
242  gemm->configure(&src, &b, &w, &dst, pad_info);
243 
244  src.allocator()->allocate();
245  b.allocator()->allocate();
246  w.allocator()->allocate();
247  dst.allocator()->allocate();
248 
249  library->fill_tensor_value(Accessor(src), 1.f);
250  library->fill_tensor_value(Accessor(b), 2.f);
251  library->fill_tensor_value(Accessor(w), 3.f);
252  gemm->run();
253  return dst;
254  };
255 
256  auto result_0 = run_conv();
257  auto result_1 = run_conv();
258 
259  for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
260  {
261  ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
262  }
263 }
264 
265 TEST_SUITE(FP32)
266 
267 TEST_SUITE(Conv1x3)
268 FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
269  combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x3Dataset(),
270  framework::dataset::make("DataType", { DataType::F32 })),
271  ActivationFunctionsDataset),
273 {
274  // Validate output
275  validate(Accessor(_target), _reference, abs_tolerance_f32);
276 }
279  framework::dataset::make("Input", TensorShape(8U, 8U, 32U)),
280  framework::dataset::make("Weight", TensorShape(1U, 3U, 32U, 1U))),
282  framework::dataset::make("Output", TensorShape(8U, 6U, 1U))),
283  framework::dataset::make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0))),
284  framework::dataset::make("Dilation", Size2D(1U, 1U))),
285  framework::dataset::make("DataType", { DataType::F32 })),
286  ActivationFunctionsDataset),
288 {
289  // Validate output
290  validate(Accessor(_target), _reference, abs_tolerance_f32);
291 }
293  combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x3Dataset(),
294  framework::dataset::make("DataType", { DataType::F32 })),
295  ActivationFunctionsDataset),
297 {
298  // Validate output
299  validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
300 }
301 
302 TEST_SUITE_END() // Conv1x3
303 
304 TEST_SUITE(Conv3x1)
305 FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
306  combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x1Dataset(),
307  framework::dataset::make("DataType", { DataType::F32 })),
308  ActivationFunctionsDataset),
310 {
311  // Validate output
312  validate(Accessor(_target), _reference, abs_tolerance_f32);
313 }
315  combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x1Dataset(),
316  framework::dataset::make("DataType", { DataType::F32 })),
317  ActivationFunctionsDataset),
319 {
320  // Validate output
321  validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
322 }
323 
324 TEST_SUITE_END() // Conv3x1
325 
326 TEST_SUITE(Conv1x5)
328  combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x5Dataset(),
329  framework::dataset::make("DataType", { DataType::F32 })),
330  ActivationFunctionsDataset),
332 {
333  // Validate output
334  validate(Accessor(_target), _reference, abs_tolerance_f32);
335 }
337  combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x5Dataset(),
338  framework::dataset::make("DataType", { DataType::F32 })),
339  ActivationFunctionsDataset),
341 {
342  // Validate output
343  validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
344 }
345 
346 TEST_SUITE_END() // Conv1x5
347 
348 TEST_SUITE(Conv5x1)
350  combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x1Dataset(),
351  framework::dataset::make("DataType", { DataType::F32 })),
352  ActivationFunctionsDataset),
354 {
355  // Validate output
356  validate(Accessor(_target), _reference, abs_tolerance_f32);
357 }
359  combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x1Dataset(),
360  framework::dataset::make("DataType", { DataType::F32 })),
361  ActivationFunctionsDataset),
363 {
364  // Validate output
365  validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
366 }
367 
368 TEST_SUITE_END() // Conv5x1
369 
370 TEST_SUITE(Conv7x1)
372  combine(combine(combine(datasets::SmallWinogradConvolutionLayer7x1Dataset(),
373  framework::dataset::make("DataType", { DataType::F32 })),
374  ActivationFunctionsDataset),
376 {
377  // Validate output
378  validate(Accessor(_target), _reference, abs_tolerance_f32);
379 }
380 
382  combine(combine(combine(datasets::LargeWinogradConvolutionLayer7x1Dataset(),
383  framework::dataset::make("DataType", { DataType::F32 })),
384  ActivationFunctionsDataset),
386 {
387  // Validate output
388  validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
389 }
390 TEST_SUITE_END() // Conv7x1
391 
392 TEST_SUITE(Conv1x7)
394  combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x7Dataset(),
395  framework::dataset::make("DataType", { DataType::F32 })),
396  ActivationFunctionsDataset),
398 {
399  // Validate output
400  validate(Accessor(_target), _reference, abs_tolerance_f32);
401 }
402 
404  combine(combine(combine(datasets::LargeWinogradConvolutionLayer7x1Dataset(),
405  framework::dataset::make("DataType", { DataType::F32 })),
406  ActivationFunctionsDataset),
408 {
409  // Validate output
410  validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
411 }
412 TEST_SUITE_END() // Conv1x7
413 
414 TEST_SUITE(Conv3x3)
416  combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
417  framework::dataset::make("DataType", { DataType::F32 })),
418  ActivationFunctionsDataset),
420 
421 {
422  // Validate output
423  validate(Accessor(_target), _reference, abs_tolerance_f32);
424 }
426  combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(),
427  framework::dataset::make("DataType", { DataType::F32 })),
428  ActivationFunctionsDataset),
430 
431 {
432  // Validate output
433  // floating point arithmetic the Winograd results will not be exactly the same as direct convolution, especially for big shapes
434  validate(Accessor(_target), _reference, rel_tolerance_winograd_3x3_f32, 0.f, float(abs_tolerance_f32));
435 }
436 TEST_SUITE_END() // Conv3x3
437 
438 TEST_SUITE(Conv5x5)
440  combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x5Dataset(),
441  framework::dataset::make("DataType", { DataType::F32 })),
442  ActivationFunctionsDataset),
444 
445 {
446  // Validate output
447  validate(Accessor(_target), _reference, abs_tolerance_f32);
448 }
450  combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x5Dataset(),
451  framework::dataset::make("DataType", { DataType::F32 })),
452  ActivationFunctionsDataset),
454 
455 {
456  // Validate output
457  validate(Accessor(_target), _reference, abs_tolerance_f32);
458 }
459 
460 TEST_SUITE_END() // Conv5x5
461 
462 FIXTURE_DATA_TEST_CASE(RunSmallNoBias, NEWinogradConvolutionLayerNoBiasFixture<float>, framework::DatasetMode::PRECOMMIT,
463  combine(combine(combine(framework::dataset::concat(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
464  datasets::SmallWinogradConvolutionLayer5x5Dataset()),
465  framework::dataset::make("DataType", { DataType::F32 })),
466  ActivationFunctionsDataset),
467 
469 {
470  // Validate output
471  validate(Accessor(_target), _reference, abs_tolerance_f32);
472 }
473 
474 TEST_SUITE_END() // FP32
475 
476 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
477 TEST_SUITE(FP16)
478 using CLWinogradConvolutionLayerFastMathFixture16 = WinogradConvolutionLayerFastMathValidationFixture<Tensor, Accessor, NEWinogradConvolutionLayer, half, float>;
479 
480 TEST_SUITE(Conv3x3)
481 FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT,
482  combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
483  framework::dataset::make("DataType", { DataType::F16 })),
484  ActivationFunctionsDataset),
486 
487 {
488  // Validate output
489  validate(Accessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16);
490 }
491 
492 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
493  combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(),
494  framework::dataset::make("DataType", { DataType::F16 })),
495  ActivationFunctionsDataset),
497 
498 {
499  // Validate output
500  validate(Accessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16);
501 }
502 TEST_SUITE_END() // Conv3x3
503 TEST_SUITE_END() // FP16
504 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
505 TEST_SUITE_END() // WinogradLayer
506 
507 #ifdef ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS
508 TEST_SUITE(VariableWeightUtils)
509 
510 // UC2_1_* tests: the user requests a specific fixed format, but there is no kernel that supports it.
511 
512 template <typename ConvolutionClass>
513 using HasOptImplFixtureNoFastMath = HasOptImplFixture<ConvolutionClass, /*enable_fast_math*/ false>;
514 
515 template <typename ConvolutionClass>
516 using HasOptImplFixtureFastMath = HasOptImplFixture<ConvolutionClass, /*enable_fast_math*/ true>;
517 
518 // UC2_1
519 
520 FIXTURE_DATA_TEST_CASE(UC2_1_CpuGemmConv2d, HasOptImplFixtureNoFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
523 {
525 }
526 FIXTURE_DATA_TEST_CASE(UC2_1_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
529 {
531 }
532 
533 FIXTURE_DATA_TEST_CASE(UC2_1_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
536 {
538 }
539 
540 FIXTURE_DATA_TEST_CASE(UC2_1_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
543 {
545 }
546 
547 // UC2_2_* tests: the user requests a specific fixed format, and a
548 // kernel that support that fixed format is found.
549 
550 FIXTURE_DATA_TEST_CASE(UC2_2_CpuGemmConv2d, HasOptImplFixtureNoFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
553 {
556 }
557 
558 FIXTURE_DATA_TEST_CASE(UC2_2_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
561 {
564 }
565 
566 FIXTURE_DATA_TEST_CASE(UC2_2_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
569 {
572 }
573 
574 FIXTURE_DATA_TEST_CASE(UC2_2_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
577 {
580 }
581 
582 // UC3_1_* tests: the user queries for ANY fixed format, but there is
583 // no kernel that support the use case specified by the user (for
584 // example, there is no fixed format kernel for the datatype of the
585 // problem).
586 
587 FIXTURE_DATA_TEST_CASE(UC3_1_CpuGemmConv2d, HasOptImplFixtureNoFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
589  framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
590 {
592 }
593 
594 FIXTURE_DATA_TEST_CASE(UC3_1_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
596  framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
597 {
599 }
600 
601 FIXTURE_DATA_TEST_CASE(UC3_1_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
603  framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
604 {
606 }
607 
608 FIXTURE_DATA_TEST_CASE(UC3_1_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
610  framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
611 {
613 }
614 
615 // UC3_2_* tests: the user queries for ANY fixed format. The search
616 // succeeded and the fixed format found is prompted back for
617 // consumption by the user. Note that we just test the
618 // _computed_weight_format to be anything but not the formats that are
619 // not fixed formats (ANY and UNSPECIFIED). This is because the weight
620 // format that the runtime produces depends on the size of the vector
621 // units of the hardware where the tests is executed. For example, a
622 // format like OHWIo4 for FP32 data returned for 128-bit NEON hardware
623 // is replaced by OHWIo8 when running on 256-bit SVE.
624 
625 FIXTURE_DATA_TEST_CASE(UC3_2_CpuGemmConv2d, HasOptImplFixtureNoFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
627  framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
628 {
632 }
633 
634 FIXTURE_DATA_TEST_CASE(UC3_2_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
636  framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
637 {
640 }
641 
642 FIXTURE_DATA_TEST_CASE(UC3_2_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
644  framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
645 {
650 }
651 
652 FIXTURE_DATA_TEST_CASE(UC3_2_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
654  framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
655 {
660 }
661 
662 namespace
663 {
664 using TestCaseType = std::tuple<TensorShape, TensorShape, arm_compute::WeightFormat>;
665 auto prepare_weights_shapes = framework::dataset::make("TensorShape",
666 {
667  // OHWIo<interleave_by>i<block_by>
668  //
669  // OHWI --> O'HWI', where:
670  //
671  // O'= smallest multiple of <interleave_by> such that O<=O'
672  // I'= smallest multiple of <block_by> such that I<=I'
673  //
674 
675  // Change N for OHWIo4
676  TestCaseType({ { 1U, 1U, 1U, 1U }, { 1U, 1U, 1U, 4U }, arm_compute::WeightFormat::OHWIo4 }),
677  TestCaseType({ { 1U, 1U, 1U, 2U }, { 1U, 1U, 1U, 4U }, arm_compute::WeightFormat::OHWIo4 }),
678  TestCaseType({ { 1U, 1U, 1U, 3U }, { 1U, 1U, 1U, 4U }, arm_compute::WeightFormat::OHWIo4 }),
679  TestCaseType({ { 1U, 1U, 1U, 4U }, { 1U, 1U, 1U, 4U }, arm_compute::WeightFormat::OHWIo4 }),
680  TestCaseType({ { 1U, 1U, 1U, 5U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo4 }),
681  TestCaseType({ { 1U, 1U, 1U, 6U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo4 }),
682  TestCaseType({ { 1U, 1U, 1U, 7U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo4 }),
683  TestCaseType({ { 1U, 1U, 1U, 8U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo4 }),
684  TestCaseType({ { 1U, 1U, 1U, 9U }, { 1U, 1U, 1U, 12U }, arm_compute::WeightFormat::OHWIo4 }),
685  // // Change N for OHWIo8
686  TestCaseType({ { 1U, 1U, 1U, 1U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
687  TestCaseType({ { 1U, 1U, 1U, 2U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
688  TestCaseType({ { 1U, 1U, 1U, 3U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
689  TestCaseType({ { 1U, 1U, 1U, 4U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
690  TestCaseType({ { 1U, 1U, 1U, 5U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
691  TestCaseType({ { 1U, 1U, 1U, 6U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
692  TestCaseType({ { 1U, 1U, 1U, 7U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
693  TestCaseType({ { 1U, 1U, 1U, 8U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
694  TestCaseType({ { 1U, 1U, 1U, 9U }, { 1U, 1U, 1U, 16U }, arm_compute::WeightFormat::OHWIo8 }),
695  // // Change N for OHWIo4 when H, W and C are not 1
696  TestCaseType({ { 3U, 4U, 2U, 1U }, { 3, 4, 2, 4 }, arm_compute::WeightFormat::OHWIo4 }),
697  TestCaseType({ { 3U, 4U, 2U, 2U }, { 3, 4, 2, 4 }, arm_compute::WeightFormat::OHWIo4 }),
698  TestCaseType({ { 3U, 4U, 2U, 3U }, { 3, 4, 2, 4 }, arm_compute::WeightFormat::OHWIo4 }),
699  TestCaseType({ { 3U, 4U, 2U, 4U }, { 3, 4, 2, 4 }, arm_compute::WeightFormat::OHWIo4 }),
700  TestCaseType({ { 3U, 4U, 2U, 5U }, { 3, 4, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }),
701  TestCaseType({ { 3U, 4U, 2U, 6U }, { 3, 4, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }),
702  TestCaseType({ { 3U, 4U, 2U, 7U }, { 3, 4, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }),
703  TestCaseType({ { 3U, 4U, 2U, 8U }, { 3, 4, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }),
704  TestCaseType({ { 3U, 4U, 2U, 9U }, { 3, 4, 2, 12 }, arm_compute::WeightFormat::OHWIo4 }),
705 
706  // // Fix N and move HWI around, with different data layouts and formats
707  TestCaseType({ { 2U, 4U, 3U, 5U }, { 2, 4, 3, 8 }, arm_compute::WeightFormat::OHWIo4 }),
708  TestCaseType({ { 3U, 4U, 2U, 5U }, { 3, 4, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }),
709  TestCaseType({ { 2U, 4U, 3U, 9U }, { 2, 4, 3, 16 }, arm_compute::WeightFormat::OHWIo8 }),
710  TestCaseType({ { 3U, 4U, 2U, 9U }, { 3, 4, 2, 16 }, arm_compute::WeightFormat::OHWIo8 }),
711  TestCaseType({ { 1024U, 1U, 1U, 1001U }, { 1024, 1, 1, 1008 }, arm_compute::WeightFormat::OHWIo8 }),
712 
713  // // Adding <block_by> on I (=C)
714  TestCaseType({ { 1U, 4U, 3U, 5U }, { 2, 4, 3, 8 }, arm_compute::WeightFormat::OHWIo4i2 }),
715  TestCaseType({ { 2U, 4U, 3U, 5U }, { 2, 4, 3, 8 }, arm_compute::WeightFormat::OHWIo4i2 }),
716  TestCaseType({ { 3U, 4U, 3U, 5U }, { 4, 4, 3, 8 }, arm_compute::WeightFormat::OHWIo4i2 }),
717 
718  // ---------
719  TestCaseType({ { 2, 2, 1, 5 }, { 2, 2, 1, 8 }, arm_compute::WeightFormat::OHWIo4 }),
720  TestCaseType({ { 1, 2, 2, 5 }, { 1, 2, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }),
721 
722 });
723 } // unnamed namespace
724 
725 DATA_TEST_CASE(PrepareWeightShape, framework::DatasetMode::ALL,
726  prepare_weights_shapes, shapes)
727 {
728  const TensorShape input_shape = std::get<0>(shapes);
729  const TensorShape expected_shape = std::get<1>(shapes);
730  const arm_compute::WeightFormat wf = std::get<2>(shapes);
731  const DataType DT = DataType::F32;
732  const DataLayout DL = DataLayout::NHWC;
733  const auto TI = TensorInfo(input_shape, 1 /*num_channels, deprecated*/, DT, DL);
734  const TensorInfo computed = ::arm_compute::test::validation::prepare_weights(TI, wf);
735  const TensorInfo expected = TensorInfo(expected_shape, 1 /*num_channels, deprecated*/, DT, DL);
737 }
738 
739 TEST_SUITE_END() // VariableWeightUtils
740 
741 TEST_SUITE(ExperimentalCpuAPIVariableWeightWithFixtures)
742 
743 template <typename ScalarType>
744 using VarWidth = VariableWeightsFixture<cpu::CpuGemmConv2d, Tensor, Accessor, ScalarType, /*enable_fast_math*/ false>;
745 
746 FIXTURE_DATA_TEST_CASE(RunSmallFloat, VarWidth<float>, framework::DatasetMode::ALL,
747  combine(combine(datasets::SmallConvolutionLayerDataset(),
748  framework::dataset::make("DataLayout", { DataLayout::NHWC })),
749  framework::dataset::make("ACL Scalar type", { DataType::F32 })))
750 {
751  // Validate output
752  validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
753 }
754 
755 FIXTURE_DATA_TEST_CASE(RunSmallHalf, VarWidth<half>, framework::DatasetMode::ALL,
756  combine(combine(datasets::SmallConvolutionLayerDataset(),
757  framework::dataset::make("DataLayout", { DataLayout::NHWC })),
758  framework::dataset::make("ACL Scalar type", { DataType::F16 })))
759 {
760  // Validate output
761  validate(Accessor(_target), _reference, rel_tolerance_f16, 0.f, half(abs_tolerance_f16));
762 }
763 
764 #if defined(ARM_COMPUTE_ENABLE_BF16)
765 template <typename ScalarType>
766 using VarWidthFastMath = VariableWeightsFixture<cpu::CpuGemmConv2d, Tensor, Accessor, ScalarType, /*enable_fast_math*/ true>;
767 
768 FIXTURE_DATA_TEST_CASE(RunSmallFloatFastMath, VarWidthFastMath<float>, framework::DatasetMode::ALL,
769  combine(combine(datasets::SmallConvolutionLayerDataset(),
770  framework::dataset::make("DataLayout", { DataLayout::NHWC })),
771  framework::dataset::make("ACL Scalar type", { DataType::F32 })))
772 {
773  // Validate output
774  validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
775 }
776 #endif // ARM_COMPUTE_ENABLE_BF16
777 
778 TEST_SUITE_END() // ExperimentalCpuAPIVariableWeightWithFixtures
779 
780 TEST_SUITE(ExperimentalNEAPIVariableWeightWithFixtures)
781 
782 template <typename ScalarType>
783 using NEGEMMVarWidth = VariableWeightsFixtureNEInterface<NEGEMMConvolutionLayer, Tensor, Accessor, ScalarType, /*enable_fast_math*/ false>;
784 
785 FIXTURE_DATA_TEST_CASE(NEGEMMRunSmallFloat, NEGEMMVarWidth<float>, framework::DatasetMode::ALL,
786  combine(combine(datasets::SmallConvolutionLayerDataset(),
787  framework::dataset::make("DataLayout", { DataLayout::NHWC })),
788  framework::dataset::make("ACL Scalar type", { DataType::F32 })))
789 {
790  // Validate output
791  validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
792 }
793 
794 FIXTURE_DATA_TEST_CASE(NEGEMMRunSmallHalf, NEGEMMVarWidth<half>, framework::DatasetMode::ALL,
795  combine(combine(datasets::SmallConvolutionLayerDataset(),
796  framework::dataset::make("DataLayout", { DataLayout::NHWC })),
797  framework::dataset::make("ACL Scalar type", { DataType::F16 })))
798 {
799  // Validate output
800  validate(Accessor(_target), _reference, rel_tolerance_f16, 0.f, half(abs_tolerance_f16));
801 }
802 
803 #if defined(ARM_COMPUTE_ENABLE_BF16)
804 template <typename ScalarType>
805 using NEGEMMVarWidthFastMath = VariableWeightsFixtureNEInterface<NEGEMMConvolutionLayer, Tensor, Accessor, ScalarType, /*enable_fast_math*/ true>;
806 
807 FIXTURE_DATA_TEST_CASE(NEGEMMRunSmallFloatFastMath, NEGEMMVarWidthFastMath<float>, framework::DatasetMode::ALL,
808  combine(combine(datasets::SmallConvolutionLayerDataset(),
809  framework::dataset::make("DataLayout", { DataLayout::NHWC })),
810  framework::dataset::make("ACL Scalar type", { DataType::F32 })))
811 {
812  // Validate output
813  validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
814 }
815 #endif // ARM_COMPUTE_ENABLE_BF16
816 
817 TEST_SUITE_END() // ExperimentalNEAPIVariableWeightWithFixtures
818 
819 #endif // ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS
820 
821 TEST_SUITE(GEMMConvolutionLayer)
822 template <typename T>
823 using NEGEMMConvolutionLayerFixture = ConvolutionValidationFixture<Tensor, Accessor, NEConvolutionLayer, T>;
824 template <typename T>
825 using NEGEMMConvolutionLayerMixedDataLayoutFixture = ConvolutionValidationFixture<Tensor, Accessor, NEConvolutionLayer, T, true>;
826 
827 /** Test case for memory injection in @ref cpu::CpuGemmConv2d.
828  *
829  * Configure the operator once and inject memory at run-time in multiple executions.
830  *
831  * Checks performed in order:
832  * - Both runs compute the same output
833  */
834 TEST_CASE(MemoryInjection, framework::DatasetMode::ALL)
835 {
836  auto conv = std::make_unique<cpu::CpuGemmConv2d>();
838  const auto weight_info = TensorInfo(TensorShape(1U, 3U, 2U, 3U), 1, DataType::F32, DataLayout::NCHW);
839  const auto bias_info = TensorInfo(TensorShape(3U), 1, DataType::F32, DataLayout::NCHW);
840  auto dst_info = TensorInfo(TensorShape(1U, 7U, 3U), 1, DataType::F32, DataLayout::NCHW);
841  const auto conv_info = PadStrideInfo(1, 1, 0, 0, 2, 2, DimensionRoundingType::FLOOR);
842  WeightsInfo weights_info(false, 3U, 3U, 1U);
843  conv->configure(&src_info, &weight_info, &bias_info, &dst_info, conv_info, weights_info);
844 
845  // tensors are newly created every call of this lambda function
846  auto src = create_tensor<Tensor>(src_info);
847  auto weight = create_tensor<Tensor>(weight_info);
848  auto bias = create_tensor<Tensor>(bias_info);
849  src.allocator()->allocate();
850  weight.allocator()->allocate();
851  bias.allocator()->allocate();
852 
853  ITensorPack run_pack{ { TensorType::ACL_SRC_0, &src }, { TensorType::ACL_SRC_1, &weight }, { TensorType::ACL_SRC_2, &bias } };
854  ITensorPack prep_pack{ { TensorType::ACL_SRC_1, &weight }, { TensorType::ACL_SRC_2, &bias } };
855 
856  auto mg = MemoryGroup{};
857  auto ws = manage_workspace<Tensor>(conv->workspace(), mg, run_pack, prep_pack);
858 
859  auto run_conv = [&]() -> Tensor
860  {
861  auto dst = create_tensor<Tensor>(dst_info);
862  dst.allocator()->allocate();
863  run_pack.add_tensor(TensorType::ACL_DST, &dst);
864 
865  library->fill_tensor_value(Accessor(src), 1.f);
866  library->fill_tensor_value(Accessor(weight), 2.f);
867  library->fill_tensor_value(Accessor(bias), 3.f);
868  // This operator is configured once and captured by this lambda.
869  conv->prepare(prep_pack);
870  conv->run(run_pack);
871  return dst;
872  };
873  auto result_0 = run_conv();
874  auto result_1 = run_conv();
875  for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
876  {
877  ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
878  }
879 }
880 
881 /** Test case for memory injection in @ref NEGEMMConvolutionLayer.
882  *
883  * Make sure @ref NEGEMMConvolutionLayer still works through injecting the memory at configure time using the old API.
884  *
885  * Checks performed in order:
886  * - Both runs compute the same output
887  */
888 TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL)
889 {
890  auto conv = std::make_unique<NEGEMMConvolutionLayer>();
892  const auto weight_info = TensorInfo(TensorShape(1U, 3U, 2U, 3U), 1, DataType::F32, DataLayout::NCHW);
893  const auto bias_info = TensorInfo(TensorShape(3U), 1, DataType::F32, DataLayout::NCHW);
894  auto dst_info = TensorInfo(TensorShape(1U, 7U, 3U), 1, DataType::F32, DataLayout::NCHW);
895  const auto conv_info = PadStrideInfo(1, 1, 0, 0, 2, 2, DimensionRoundingType::FLOOR);
896  WeightsInfo weights_info(false, 3U, 3U, 1U);
897  auto run_conv = [&]()
898  {
899  auto src = create_tensor<Tensor>(src_info);
900  auto weight = create_tensor<Tensor>(weight_info);
901  auto bias = create_tensor<Tensor>(bias_info);
902  auto dst = create_tensor<Tensor>(dst_info);
903  conv->configure(&src, &weight, &bias, &dst, conv_info, weights_info);
904  src.allocator()->allocate();
905  weight.allocator()->allocate();
906  bias.allocator()->allocate();
907  dst.allocator()->allocate();
908  library->fill_tensor_value(Accessor(src), 1.f);
909  library->fill_tensor_value(Accessor(weight), 2.f);
910  library->fill_tensor_value(Accessor(bias), 3.f);
911  conv->run();
912  return dst;
913  };
914  auto result_0 = run_conv();
915  auto result_1 = run_conv();
916  for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
917  {
918  ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
919  }
920 }
921 
922 TEST_SUITE(Float)
923 #if defined(ARM_COMPUTE_ENABLE_BF16)
925 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
926  framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::BFLOAT16)), framework::dataset::make("DataLayout", { DataLayout::NHWC })),
927  ActivationFunctionsDataset))
928 {
929  // Validate output
930  validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
931 }
932 TEST_SUITE_END() // BFLOAT16
933 #endif /* defined(ARM_COMPUTE_ENABLE_BF16) */
934 
935 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
936 TEST_SUITE(FP16)
937 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
938  framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataLayout", { DataLayout::NCHW })), ActivationFunctionsDataset))
939 {
940  // Validate output
941  validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16);
942 }
943 TEST_SUITE_END() // FP16
944 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
945 
946 TEST_SUITE(FP32)
947 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
948  framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
949  ActivationFunctionsDataset))
950 {
951  // Validate output
952  validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
953 }
956  framework::dataset::make("Input", TensorShape(23U, 27U, 5U)),
957  framework::dataset::make("Weights", TensorShape(3U, 3U, 5U, 2U))),
959  framework::dataset::make("Output", TensorShape(11U, 25U, 2U))),
960  framework::dataset::make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0))),
961  framework::dataset::make("Dilation", Size2D(1, 1))),
962  framework::dataset::make("ReshapeWeights", { true })),
965  ActivationFunctionsDataset))
966 {
967  // Validate output
968  validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
969 }
970 TEST_SUITE_END() // FP32
971 TEST_SUITE_END() // Float
972 
973 template <typename T>
974 using NEGEMMConvolutionLayerQuantizedFixture = ConvolutionValidationQuantizedFixture<Tensor, Accessor, NEConvolutionLayer, T>;
975 template <typename T>
976 using NEGEMMConvolutionLayerQuantizedMixedDataLayoutFixture = ConvolutionValidationQuantizedFixture<Tensor, Accessor, NEConvolutionLayer, T, true>;
977 
978 template <typename T>
979 using NEGEMMConvolutionLayerQuantizedPerChannelFixture = ConvolutionValidationQuantizedPerChannelFixture<Tensor, Accessor, NEConvolutionLayer, T, int8_t>;
980 
982 {
986 });
987 TEST_SUITE(Quantized)
989 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
990  framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
991  framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })), QuantizedActivationFunctionsDataset))
992 {
993  // Validate output
994  validate(Accessor(_target), _reference, tolerance_qasymm8);
995 }
996 FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEGEMMConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL,
998  framework::dataset::make("Input", TensorShape(23U, 27U, 5U)),
999  framework::dataset::make("Weights", TensorShape(3U, 3U, 5U, 2U))),
1000  framework::dataset::make("Bias", TensorShape(2U))),
1001  framework::dataset::make("Output", TensorShape(11U, 25U, 2U))),
1002  framework::dataset::make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0))),
1003  framework::dataset::make("Dilation", Size2D(1, 1))),
1004  framework::dataset::make("ReshapeWeights", { true })),
1007  framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
1009 {
1010  // Validate output
1011  validate(Accessor(_target), _reference, tolerance_qasymm8);
1012 }
1013 TEST_SUITE_END() // QASYMM8
1014 
1016 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
1017  framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
1018  framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.01f, -10) })), QuantizedActivationFunctionsDataset))
1019 {
1020  // Validate output
1021  validate(Accessor(_target), _reference, tolerance_qasymm8);
1022 }
1025  framework::dataset::make("Input", TensorShape(23U, 27U, 5U)),
1026  framework::dataset::make("Weights", TensorShape(3U, 3U, 5U, 2U))),
1027  framework::dataset::make("Bias", TensorShape(2U))),
1028  framework::dataset::make("Output", TensorShape(11U, 25U, 2U))),
1029  framework::dataset::make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0))),
1030  framework::dataset::make("Dilation", Size2D(1, 1))),
1031  framework::dataset::make("ReshapeWeights", { true })),
1034  framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
1036 {
1037  // Validate output
1038  validate(Accessor(_target), _reference, tolerance_qasymm8);
1039 }
1040 TEST_SUITE_END() // QASYMM8_SIGNED
1041 
1044  combine(combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
1045  framework::dataset::make("ReshapeWeights", { true })),
1046  framework::dataset::make("DataType", { DataType::QASYMM8 })),
1050  framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
1051 {
1052  // Validate output
1053  validate(Accessor(_target), _reference, tolerance_qasymm8);
1054 }
1056  combine(combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
1057  framework::dataset::make("ReshapeWeights", { true })),
1062  framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
1063 {
1064  // Validate output
1065  validate(Accessor(_target), _reference, tolerance_qasymm8);
1066 }
1067 TEST_SUITE_END() // QSYMM8_PER_CHANNEL
1068 TEST_SUITE_END() // Quantized
1069 
1070 TEST_SUITE_END() // GEMMConvolutionLayer
1071 
1072 TEST_SUITE(DirectGEMMConv2d)
1073 template <typename T>
1074 using NEDirectGEMMConv2dLayerFixture = ConvolutionValidationFixture<Tensor, Accessor, NEGEMMConv2d, T>;
1075 
1076 /** Test case for memory injection in @ref cpu::CpuGemmDirectConv2d.
1077  *
1078  * Configure the operator once and inject memory at run-time in multiple executions.
1079  *
1080  * Checks performed in order:
1081  * - Both runs compute the same output
1082  */
1083 TEST_CASE(MemoryInjection, framework::DatasetMode::ALL)
1084 {
1085  auto conv = std::make_unique<cpu::CpuGemmDirectConv2d>();
1086  const auto src_info = TensorInfo(TensorShape(1U, 5U, 2U), 1, DataType::F32, DataLayout::NHWC);
1087  const auto weight_info = TensorInfo(TensorShape(1U, 3U, 2U, 3U), 1, DataType::F32, DataLayout::NHWC);
1088  const auto bias_info = TensorInfo(TensorShape(3U), 1, DataType::F32, DataLayout::NHWC);
1089  auto dst_info = TensorInfo(TensorShape(1U, 7U, 3U), 1, DataType::F32, DataLayout::NHWC);
1090  const auto conv_info = Conv2dInfo{};
1091  conv->configure(&src_info, &weight_info, &bias_info, &dst_info, conv_info);
1092 
1093  // tensors are newly created every call of this lambda function
1094  auto src = create_tensor<Tensor>(src_info);
1095  auto weight = create_tensor<Tensor>(weight_info);
1096  auto bias = create_tensor<Tensor>(bias_info);
1097  src.allocator()->allocate();
1098  weight.allocator()->allocate();
1099  bias.allocator()->allocate();
1100 
1101  ITensorPack run_pack{ { TensorType::ACL_SRC_0, &src }, { TensorType::ACL_SRC_1, &weight }, { TensorType::ACL_SRC_2, &bias } };
1102  ITensorPack prep_pack{ { TensorType::ACL_SRC_1, &weight }, { TensorType::ACL_SRC_2, &bias } };
1103 
1104  auto mg = MemoryGroup{};
1105  auto ws = manage_workspace<Tensor>(conv->workspace(), mg, run_pack, prep_pack);
1106 
1107  auto run_conv = [&]() -> Tensor
1108  {
1109  auto dst = create_tensor<Tensor>(dst_info);
1110  dst.allocator()->allocate();
1111  run_pack.add_tensor(TensorType::ACL_DST, &dst);
1112 
1113  library->fill_tensor_value(Accessor(src), 1.f);
1114  library->fill_tensor_value(Accessor(weight), 2.f);
1115  library->fill_tensor_value(Accessor(bias), 3.f);
1116  // This operator is configured once and captured by this lambda.
1117  conv->prepare(prep_pack);
1118  conv->run(run_pack);
1119  return dst;
1120  };
1121  auto result_0 = run_conv();
1122  auto result_1 = run_conv();
1123  for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
1124  {
1125  ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
1126  }
1127 }
1128 
1129 /** Test case for memory injection in @ref NEGEMMConv2d.
1130  *
1131  * Make sure @ref NEGEMMConv2d still works through injecting the memory at configure time using the old API.
1132  *
1133  * Checks performed in order:
1134  * - Both runs compute the same output
1135  */
1136 TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL)
1137 {
1138  auto conv = std::make_unique<NEGEMMConv2d>();
1139  const auto src_info = TensorInfo(TensorShape(1U, 5U, 2U), 1, DataType::F32, DataLayout::NHWC);
1140  const auto weight_info = TensorInfo(TensorShape(1U, 3U, 2U, 3U), 1, DataType::F32, DataLayout::NHWC);
1141  const auto bias_info = TensorInfo(TensorShape(3U), 1, DataType::F32, DataLayout::NHWC);
1142  auto dst_info = TensorInfo(TensorShape(1U, 7U, 3U), 1, DataType::F32, DataLayout::NHWC);
1143  const auto conv_info = Conv2dInfo{};
1144  auto run_conv = [&]()
1145  {
1146  auto src = create_tensor<Tensor>(src_info);
1147  auto weight = create_tensor<Tensor>(weight_info);
1148  auto bias = create_tensor<Tensor>(bias_info);
1149  auto dst = create_tensor<Tensor>(dst_info);
1150  conv->configure(&src, &weight, &bias, &dst, conv_info);
1151  src.allocator()->allocate();
1152  weight.allocator()->allocate();
1153  bias.allocator()->allocate();
1154  dst.allocator()->allocate();
1155  library->fill_tensor_value(Accessor(src), 1.f);
1156  library->fill_tensor_value(Accessor(weight), 2.f);
1157  library->fill_tensor_value(Accessor(bias), 3.f);
1158  conv->run();
1159  return dst;
1160  };
1161  auto result_0 = run_conv();
1162  auto result_1 = run_conv();
1163  for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
1164  {
1165  ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
1166  }
1167 }
1168 
1169 TEST_SUITE(Float)
1170 TEST_SUITE(FP32)
1171 FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectGEMMConv2dLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
1172  framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", { DataLayout::NHWC })), ActivationFunctionsDataset))
1173 {
1174  // Validate output
1175  validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
1176 }
1177 TEST_SUITE_END() // FP32
1178 TEST_SUITE_END() // Float
1179 
1180 #ifdef __aarch64__
1181 template <typename T>
1182 using NEDirectGEMMConv2dLayerQuantizedFixture = ConvolutionValidationQuantizedFixture<Tensor, Accessor, NEGEMMConv2d, T>;
1183 
1184 template <typename T>
1185 using NEDirectGEMMConv2dLayerQuantizedPerChannelFixture = ConvolutionValidationQuantizedPerChannelFixture<Tensor, Accessor, NEGEMMConv2d, T, int8_t>;
1186 
1188 {
1192 });
1193 TEST_SUITE(Quantized)
1195 FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectGEMMConv2dLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
1196  framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("DataLayout", { DataLayout::NHWC })),
1197  framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })), QuantizedActivationFunctionsDataset))
1198 {
1199  // Validate output
1200  validate(Accessor(_target), _reference, tolerance_qasymm8);
1201 }
1202 TEST_SUITE_END() // QASYMM8
1203 
1205 FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectGEMMConv2dLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
1206  framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("DataLayout", { DataLayout::NHWC })),
1207  framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.01f, -10) })), QuantizedActivationFunctionsDataset))
1208 {
1209  // Validate output
1210  validate(Accessor(_target), _reference, tolerance_qasymm8);
1211 }
1212 TEST_SUITE_END() // QASYMM8_SIGNED
1213 
1215 FIXTURE_DATA_TEST_CASE(RunSmallSigned, NEDirectGEMMConv2dLayerQuantizedPerChannelFixture<int8_t>, framework::DatasetMode::ALL,
1216  combine(combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
1217  framework::dataset::make("ReshapeWeights", { true })),
1219  framework::dataset::make("DataLayout", { DataLayout::NHWC })),
1222  framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
1223 {
1224  // Validate output
1225  validate(Accessor(_target), _reference, tolerance_qasymm8);
1226 }
1227 TEST_SUITE_END() // QSYMM8_PER_CHANNEL
1228 TEST_SUITE_END() // Quantized
1229 #endif // __aarch64__
1230 
1231 TEST_SUITE_END() // DirectGEMMConv2d
1232 
1233 TEST_SUITE_END() // Neon
1234 } // namespace validation
1235 } // namespace test
1236 } // namespace arm_compute
SimpleTensor< float > w
Definition: DFT.cpp:156
Basic function to compute the convolution layer.
Definition: CpuGemmConv2d.h:58
Shape of a tensor.
Definition: TensorShape.h:39
Class reprensenting an absolute tolerance value.
Definition: Validation.h:61
constexpr float tolerance_num_f16
F16 Tolerance number.
Definition: cl_gemm.cpp:75
ConvolutionValidationQuantizedFixture< Tensor, Accessor, NEConvolutionLayer, T, true > NEGEMMConvolutionLayerQuantizedMixedDataLayoutFixture
SimpleTensor< float > b
Definition: DFT.cpp:157
ConvolutionValidationFixture< Tensor, Accessor, NEConvolutionLayer, T, true > NEGEMMConvolutionLayerMixedDataLayoutFixture
half_float::half half
16-bit floating point type
Definition: Types.h:48
1 channel, 1 F32 per channel
ARM_COMPUTE_EXPECT(has_error==expected, framework::LogLevel::ERRORS)
ConvolutionValidationQuantizedPerChannelFixture< Tensor, Accessor, NEConvolutionLayer, T, int8_t > NEGEMMConvolutionLayerQuantizedPerChannelFixture
std::enable_if< is_container< T >::value, ContainerDataset< T > >::type make(std::string name, T &&values)
Helper function to create a ContainerDataset.
WeightFormat
Memory layouts for the weights tensor.
Definition: Types.h:1948
ConvolutionMethod
Available ConvolutionMethod.
Definition: Types.h:134
Activation Layer Information class.
Definition: Types.h:1625
WinogradConvolutionLayerFastMathValidationFixture< Tensor, Accessor, NEWinogradConvolutionLayer, T, T, false > NEWinogradConvolutionLayerNoBiasFixture
Basic function to compute the convolution layer.
SimpleTensor< float > src
Definition: DFT.cpp:155
Copyright (c) 2017-2022 Arm Limited.
1 channel, 1 F16 per channel
Convolution Layer Weights Information class.
Definition: Types.h:2006
1 channel, 1 S32 per channel
16-bit brain floating-point number
Quantization information.
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16), }), framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(30U, 11U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16, QuantizationInfo(1.f/32768.f, 0)), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16, QuantizationInfo(1.f/32768.f, 0)), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16, QuantizationInfo(1.f/32768.f, 0)), })), framework::dataset::make("ActivationInfo", { ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SQRT), })), framework::dataset::make("Expected", { false, true, true, true, false, false, true, true, false })), input_info, output_info, act_info, expected)
const auto input_shape
Validate test suite is to test ARM_COMPUTE_RETURN_ON_* macros we use to check the validity of given a...
Accessor implementation for Tensor objects.
Definition: Accessor.h:35
DatasetMode
Possible dataset modes.
Definition: DatasetModes.h:40
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
std::unique_ptr< AssetsLibrary > library
Definition: main.cpp:76
TEST_SUITE_END() FIXTURE_DATA_TEST_CASE(RunSmall
[CLActivationLayer Test snippet]
quantized, asymmetric fixed-point 8-bit number unsigned
const unsigned int num_groups
Definition: Im2Col.cpp:153
const auto QuantizedActivationFunctionsDataset
Input data sets.
Basic implementation of the tensor interface.
Definition: Tensor.h:37
Padding and stride information class.
Definition: Types.h:669
validate(CLAccessor(output_state), expected_output)
UniqueGemmCommon< Top, Tret > gemm(const GemmArgs &args, const OutputStage &os)
Descriptor used by the 2d Convolution function.
Num samples, channels, height, width.
TensorInfo src_info(src_shape, 1, data_type)
quantized, symmetric per channel fixed-point 8-bit number
Convolution using Winograd.
FIXTURE_DATA_TEST_CASE(RunSmall, CLAbsLayerFixture< half >, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)))
Definition: AbsLayer.cpp:50
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
bool is_fixed_format_fast_math(const WeightFormat &wf)
Definition: Types.h:2000
static ConvolutionMethod get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info=WeightsInfo(), const Size2D &dilation=Size2D(1U, 1U), const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Static function to check if given info will return the convolution called by NEConvolutionLayer.
WinogradConvolutionLayerFastMathValidationFixture< Tensor, Accessor, NEWinogradConvolutionLayer, T, T, true, true > NEWinogradConvolutionLayerMixedDataLayoutFixture
Class reprensenting a relative tolerance value.
Definition: Validation.h:97
Class for specifying the size of an image or rectangle.
Definition: Size2D.h:34
Num samples, height, width, channels.
ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS)
Tensor packing service.
Definition: ITensorPack.h:39
Store the tensor&#39;s metadata.
Definition: TensorInfo.h:43
JoinDataset< T, U > concat(T &&dataset1, U &&dataset2)
Helper function to create a JoinDataset.
Definition: JoinDataset.h:160
TEST_CASE(FusedActivation, framework::DatasetMode::ALL)
Validate fused activation expecting the following behaviours:
quantized, asymmetric fixed-point 8-bit number signed
Basic function to compute the convolution layer.
Definition: NEGEMMConv2d.h:49
zip(zip(framework::dataset::make("Weights", { TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U, 1U), 1, DataType::F32), }), framework::dataset::make("MVBGInfo",{ TensorInfo(TensorShape(2U), 1, DataType::F32), TensorInfo(TensorShape(2U), 1, DataType::F16), TensorInfo(TensorShape(5U), 1, DataType::F32), })), framework::dataset::make("Expected", { true, false, false}))
TEST_SUITE(QASYMM8_to_F32) FIXTURE_DATA_TEST_CASE(RunSmall
DataType
Available data types.
Definition: Types.h:79
constexpr float abs_tolerance_f32(0.0001f)
F32 Absolute tolerance value for comparing reference&#39;s output against implementation&#39;s output for flo...
DataLayout
[DataLayout enum definition]
Definition: Types.h:113
combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)))
Definition: AbsLayer.cpp:65
Convolution using GEMM.
void configure_conv_function< NEGEMMConv2d, Tensor >(NEGEMMConv2d &func, Tensor *src, const Tensor *weights, const Tensor *bias, Tensor *dst, const PadStrideInfo &info, const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups)
const int32_t * bias