Compute Library
 22.05
DepthwiseConvolutionLayerNative.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 #include "tests/NEON/Accessor.h"
26 #include "tests/NEON/Helper.h"
27 #include "tests/framework/Macros.h"
30 #include "tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h"
31 
32 namespace arm_compute
33 {
34 namespace test
35 {
36 namespace validation
37 {
39 
40 // Create function for CpuDepthwiseConvolutionKernel
42 
43 // Fixture for NEDepthwiseConvolutionLayerKernel
44 template <typename T>
45 using CpuDepthwiseConvolutionNativeFixture = DepthwiseConvolutionLayerNativeValidationFixture<Tensor, Accessor, CpuDepthwiseConvolutionNative, T>;
46 
47 namespace
48 {
49 // *INDENT-OFF*
50 // clang-format off
51 RelativeTolerance<float> rel_tolerance_f32(0.001f);
52 constexpr float abs_tolerance_f32(0.0001f);
53 
54 /** Width values to test - Precommit */
55 const auto width_values_precommit = framework::dataset::make("width", { 17U } );
56 
57 /** Width values to test - Nightly */
58 const auto width_values_nightly = framework::dataset::make("width", { 53U, 47U } );
59 
60 /** Height values to test - Precommit */
61 const auto height_values_precommit = framework::dataset::make("height", { 19U } );
62 
63 /** Height values to test - Nightly */
64 const auto height_values_nightly = framework::dataset::make("height", { 39U, 43U } );
65 
66 /** Channel values to test - Precommit */
67 const auto channel_values_precommit = framework::dataset::make("channels", { 15U });
68 
69 /** Channel values to test - Nightly */
70 const auto channel_values_nightly = framework::dataset::make("channels", { 33U, 19U });
71 
72 /** Batch values to test - Precommit */
73 const auto batch_values_precommit = framework::dataset::make("batch", { 1U, 2U });
74 
75 /** Batch values to test - Nightly */
76 const auto batch_values_nightly = framework::dataset::make("batch", { 1U, 3U });
77 
78 /** Kernel size values to test - Precommit */
79 const auto kernel_sz_values_precommit = framework::dataset::make("kernel_size", { Size2D(1U, 1U), Size2D(1U, 3U) });
80 
81 /** Kernel size values to test - Nightly */
82 const auto kernel_sz_values_nightly = framework::dataset::make("kernel_size", { Size2D(3U, 5U), Size2D(5U, 1U), Size2D(1U, 7U), Size2D(9U, 7U) });
83 
84 /** Depth multiplier values to test - All */
85 const auto depth_multiplier_values = framework::dataset::make("depth_multiplier", { 1U, 3U });
86 
87 /** Dilation values to test - All */
88 const auto dilation_values = framework::dataset::make("dilation", { Size2D(1U, 1U), Size2D(3U, 3U) });
89 
90 /** Stride values to test - All */
91 const auto stride_values = framework::dataset::make("stride", { Size2D(1U, 1U), Size2D(3U, 2U) });
92 
93 /** Padding values to test - All */
94 const auto padding_valid_values = framework::dataset::make("padding_valid", { true, false });
95 
96 /** Data type values to test - All */
97 const auto data_type_values = framework::dataset::make("data_type", { DataType::F32 });
98 
99 /** Data layout values to test - All */
100 const auto data_layout_values = framework::dataset::make("data_layout", { DataLayout::NHWC });
101 } // namespace
102 
104 TEST_SUITE(DepthwiseConvolutionLayerNative)
105 
106 TEST_CASE(ValidateNoPadding, framework::DatasetMode::ALL)
107 {
108  // this test case will ensure that the kernel is not adding implicit padding
109  constexpr uint32_t vector_size = 8; // Asummed vector size of the current native kernel
110  constexpr auto depth = vector_size * 2 + 1; // mis-aligned depth to force padding if exists.
111  constexpr auto data_layout = DataLayout::NHWC;
112  constexpr auto data_type = DataType::F32;
113 
114  const auto input_size = Size2D{ 100, 100 }; // random plane size of the input
115  const auto kernel_size = Size2D{ 4, 4 }; // random plane size of the kernel
116  const auto pad_stride_info = PadStrideInfo(3, 3); // random convolution information to
117 
118  TensorShape src_shape{ depth, input_size.x(), input_size.y() };
119  TensorShape weights_shape{ depth, kernel_size.x(), kernel_size.y() };
120  TensorShape bias_shape{ depth };
121 
122  auto src = create_tensor<Tensor>(src_shape, data_type, 1, QuantizationInfo(), data_layout);
123  auto weights = create_tensor<Tensor>(weights_shape, data_type, 1, QuantizationInfo(), data_layout);
124  auto biases = create_tensor<Tensor>(bias_shape, data_type, 1, QuantizationInfo(), data_layout);
125  auto dst = create_tensor<Tensor>(TensorShape(), data_type, 1, QuantizationInfo(), data_layout);
126 
128  const ConvolutionInfo info{pad_stride_info, 1, ActivationLayerInfo(), Size2D(1, 1)};
129  dwc.configure(src.info(), weights.info(), biases.info(), dst.info(), info);
130 
131  ARM_COMPUTE_EXPECT(src.info()->padding().empty(), framework::LogLevel::ERRORS);
132  ARM_COMPUTE_EXPECT(weights.info()->padding().empty(), framework::LogLevel::ERRORS);
133  ARM_COMPUTE_EXPECT(biases.info()->padding().empty(), framework::LogLevel::ERRORS);
134  ARM_COMPUTE_EXPECT(dst.info()->padding().empty(), framework::LogLevel::ERRORS);
135 }
136 
137 TEST_SUITE(KERNEL_SELECTION)
138 DATA_TEST_CASE(KernelSelection_mul_and_add, framework::DatasetMode::ALL,
139  combine(combine(framework::dataset::make("CpuExt", std::string("NEON")),
140  framework::dataset::make("DataType", { DataType::F32,
145  })),
146  framework::dataset::make("DataType_per_channel", { DataType::QASYMM8,
148  })),
149  cpu_ext, data_type, data_type_per_channel)
150 {
151  using namespace cpu::kernels;
152 
154  cpu_isa.neon = (cpu_ext == "NEON");
155  cpu_isa.fp16 = (data_type == DataType::F16);
156 
157  const auto *selected_impl = CpuDepthwiseConv2dNativeKernel::get_implementation(
158  DepthwiseConv2dNativeDataTypeISASelectorData{ data_type, data_type_per_channel,cpu_isa },
160 
162 
163  std::string per_channel_str = "_";
164  if (data_type == DataType::QSYMM8_PER_CHANNEL)
165  {
166  per_channel_str = "_" + cpu_impl_dt(data_type_per_channel) + "_" ;
167  }
168  std::string expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + per_channel_str + "deptwiseconv2dnative";
169  std::string actual = selected_impl->name;
170 
172 }
173 TEST_SUITE_END() // KERNEL_SELECTION
174 
175 TEST_SUITE(Float)
176 TEST_SUITE(FP32)
179  height_values_precommit),
180  channel_values_precommit),
181  batch_values_precommit),
182  kernel_sz_values_precommit),
183  depth_multiplier_values),
184  dilation_values),
185  stride_values),
186  padding_valid_values),
187  data_type_values),
189 {
190  // Validate output
191  validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
192 }
193 
196  height_values_nightly),
197  channel_values_nightly),
198  batch_values_nightly),
199  kernel_sz_values_nightly),
200  depth_multiplier_values),
201  dilation_values),
202  stride_values),
203  padding_valid_values),
204  data_type_values),
206 {
207  // Validate output
208  validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
209 }
210 
211 TEST_SUITE_END() // FP32
212 TEST_SUITE_END() // Float
213 TEST_SUITE_END() // DepthwiseConvolutionLayerNative
214 TEST_SUITE_END() // Neon
215 } // namespace validation
216 } // namespace test
217 } // namespace arm_compute
Retrieve the best implementation available for the given Cpu ISA, ignoring the build flags...
Shape of a tensor.
Definition: TensorShape.h:39
const CpuCastKernel::CastKernel * selected_impl
Definition: Cast.cpp:205
1 channel, 1 F32 per channel
ARM_COMPUTE_EXPECT(has_error==expected, framework::LogLevel::ERRORS)
As above but this also setups a Zero border on the input tensor of the kernel&#39;s bordersize.
Definition: Helper.h:109
std::enable_if< is_container< T >::value, ContainerDataset< T > >::type make(std::string name, T &&values)
Helper function to create a ContainerDataset.
std::string lower_string(const std::string &val)
Lower a given string.
Definition: Utils.cpp:351
Activation Layer Information class.
Definition: Types.h:1625
SimpleTensor< float > src
Definition: DFT.cpp:155
Copyright (c) 2017-2022 Arm Limited.
cpuinfo::CpuIsaInfo cpu_isa
Definition: Cast.cpp:207
std::string cpu_impl_dt(const DataType &data_type)
Returns the suffix string of CPU kernel implementation names based on the given data type...
Definition: Utils.h:1245
1 channel, 1 F16 per channel
CPU ISA (Instruction Set Architecture) information.
Definition: CpuIsaInfo.h:37
Quantization information.
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16), }), framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(30U, 11U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16, QuantizationInfo(1.f/32768.f, 0)), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16, QuantizationInfo(1.f/32768.f, 0)), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QSYMM16, QuantizationInfo(1.f/32768.f, 0)), })), framework::dataset::make("ActivationInfo", { ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SQRT), })), framework::dataset::make("Expected", { false, true, true, true, false, false, true, true, false })), input_info, output_info, act_info, expected)
Accessor implementation for Tensor objects.
Definition: Accessor.h:35
DatasetMode
Possible dataset modes.
Definition: DatasetModes.h:40
TEST_SUITE_END() FIXTURE_DATA_TEST_CASE(RunSmall
[CLActivationLayer Test snippet]
quantized, asymmetric fixed-point 8-bit number unsigned
Padding and stride information class.
Definition: Types.h:669
validate(CLAccessor(output_state), expected_output)
quantized, symmetric per channel fixed-point 8-bit number
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
DepthwiseConvolutionLayerNativeValidationFixture< Tensor, Accessor, CpuDepthwiseConvolutionNative, T > CpuDepthwiseConvolutionNativeFixture
Class reprensenting a relative tolerance value.
Definition: Validation.h:97
Class for specifying the size of an image or rectangle.
Definition: Size2D.h:34
Interface for the kernel to run a depthwise convolution native on a tensor.
ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl)
Num samples, height, width, channels.
ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS)
TEST_CASE(FusedActivation, framework::DatasetMode::ALL)
Validate fused activation expecting the following behaviours:
quantized, asymmetric fixed-point 8-bit number signed
const auto data_layout_values
Data layout to test.
TEST_SUITE(QASYMM8_to_F32) FIXTURE_DATA_TEST_CASE(RunSmall
DataType
Available data types.
Definition: Types.h:79
constexpr float abs_tolerance_f32(0.0001f)
F32 Absolute tolerance value for comparing reference&#39;s output against implementation&#39;s output for flo...
combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)))
Definition: AbsLayer.cpp:65
FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture< half >, framework::DatasetMode::ALL, combine(combine(combine(combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), datasets::SmallDepthwiseConvolutionLayerDataset3x3NCHW()), depth_multipliers), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataLayout", DataLayout::NCHW)), ActivationFunctionsDataset))