ComputeLibrary/latest/_n_e_o_n_2_depthwise_convolution_layer_native_8cpp_source.xhtml

/*

 * Copyright (c) 2019-2023 Arm Limited.

 *

 * SPDX-License-Identifier: MIT

 *

 * Permission is hereby granted, free of charge, to any person obtaining a copy

 * of this software and associated documentation files (the "Software"), to

 * deal in the Software without restriction, including without limitation the

 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

 * sell copies of the Software, and to permit persons to whom the Software is

 * furnished to do so, subject to the following conditions:

 *

 * The above copyright notice and this permission notice shall be included in all

 * copies or substantial portions of the Software.

 *

 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

 * SOFTWARE.

 */

#include "arm_compute/core/utils/StringUtils.h"

#include "src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h"

#include "tests/NEON/Accessor.h"

#include "tests/NEON/Helper.h"

#include "tests/framework/Macros.h"

#include "tests/framework/datasets/Datasets.h"

#include "tests/validation/Validation.h"

#include "tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h"


namespace arm_compute

{

namespace test

{

namespace validation

{

using namespace arm_compute::misc::shape_calculator;


// Create function for CpuDepthwiseConvolutionKernel

using CpuDepthwiseConvolutionNative = NESynthetizeFunctionWithZeroConstantKernelBorder<cpu::kernels::CpuDepthwiseConv2dNativeKernel>;


// Fixture for NEDepthwiseConvolutionLayerKernel

template <typename T>

using CpuDepthwiseConvolutionNativeFixture = DepthwiseConvolutionLayerNativeValidationFixture<Tensor, Accessor, CpuDepthwiseConvolutionNative, T>;


namespace

{

// *INDENT-OFF*

// clang-format off

RelativeTolerance<float> rel_tolerance_f32(0.001f);

constexpr float          abs_tolerance_f32(0.0001f);


/** Width values to test - Precommit */

const auto width_values_precommit = framework::dataset::make("width", { 17U } );


/** Width values to test - Nightly */

const auto width_values_nightly = framework::dataset::make("width", { 53U, 47U } );


/** Height values to test - Precommit */

const auto height_values_precommit = framework::dataset::make("height", { 19U } );


/** Height values to test - Nightly */

const auto height_values_nightly = framework::dataset::make("height", { 39U, 43U } );


/** Channel values to test - Precommit */

const auto channel_values_precommit = framework::dataset::make("channels", { 15U });


/** Channel values to test - Nightly */

const auto channel_values_nightly = framework::dataset::make("channels", { 33U, 19U });


/** Batch values to test - Precommit */

const auto batch_values_precommit = framework::dataset::make("batch", { 1U, 2U });


/** Batch values to test - Nightly */

const auto batch_values_nightly = framework::dataset::make("batch", { 1U, 3U });


/** Kernel size values to test - Precommit */

const auto kernel_sz_values_precommit = framework::dataset::make("kernel_size", { Size2D(1U, 1U), Size2D(1U, 3U) });


/** Kernel size values to test - Nightly */

const auto kernel_sz_values_nightly = framework::dataset::make("kernel_size", { Size2D(3U, 5U), Size2D(5U, 1U), Size2D(1U, 7U), Size2D(9U, 7U) });


/** Depth multiplier values to test - All */

const auto depth_multiplier_values = framework::dataset::make("depth_multiplier", { 1U, 3U });


/** Dilation values to test - All */

const auto dilation_values = framework::dataset::make("dilation", { Size2D(1U, 1U), Size2D(3U, 3U) });


/** Stride values to test - All */

const auto stride_values = framework::dataset::make("stride", { Size2D(1U, 1U), Size2D(3U, 2U) });


/** Padding values to test - All */

const auto padding_valid_values = framework::dataset::make("padding_valid", { true, false });


/** Data type values to test - All */

const auto data_type_values = framework::dataset::make("data_type", { DataType::F32 });


/** Data layout values to test - All */

const auto data_layout_values = framework::dataset::make("data_layout", { DataLayout::NHWC });

} // namespace


TEST_SUITE(NEON)

TEST_SUITE(DepthwiseConvolutionLayerNative)


TEST_CASE(ValidateNoPadding, framework::DatasetMode::ALL)

{

    // this test case will ensure that the kernel is not adding implicit padding

    constexpr uint32_t vector_size = 8; // Asummed vector size of the current native kernel

    constexpr auto     depth = vector_size * 2 + 1; // mis-aligned depth to force padding if exists.

    constexpr auto     data_layout = DataLayout::NHWC;

    constexpr auto     data_type = DataType::F32;


    const auto input_size  = Size2D{ 100, 100 }; // random plane size of the input

    const auto kernel_size = Size2D{ 4, 4 }; // random plane size of the kernel

    const auto pad_stride_info = PadStrideInfo(3, 3); // random convolution information to


    TensorShape src_shape{ depth, input_size.x(), input_size.y() };

    TensorShape weights_shape{ depth, kernel_size.x(), kernel_size.y() };

    TensorShape bias_shape{ depth };


    auto src     = create_tensor<Tensor>(src_shape, data_type, 1, QuantizationInfo(), data_layout);

    auto weights = create_tensor<Tensor>(weights_shape, data_type, 1, QuantizationInfo(), data_layout);

    auto biases  = create_tensor<Tensor>(bias_shape, data_type, 1, QuantizationInfo(), data_layout);

    auto dst     = create_tensor<Tensor>(TensorShape(), data_type, 1, QuantizationInfo(), data_layout);


    cpu::kernels::CpuDepthwiseConv2dNativeKernel dwc;

    const ConvolutionInfo info{pad_stride_info, 1, ActivationLayerInfo(), Size2D(1, 1)};

    dwc.configure(src.info(), weights.info(), biases.info(), dst.info(), info);


    ARM_COMPUTE_EXPECT(src.info()->padding().empty(), framework::LogLevel::ERRORS);

    ARM_COMPUTE_EXPECT(weights.info()->padding().empty(), framework::LogLevel::ERRORS);

    ARM_COMPUTE_EXPECT(biases.info()->padding().empty(), framework::LogLevel::ERRORS);

    ARM_COMPUTE_EXPECT(dst.info()->padding().empty(), framework::LogLevel::ERRORS);

}


TEST_SUITE(KERNEL_SELECTION)

DATA_TEST_CASE(KernelSelection_mul_and_add, framework::DatasetMode::ALL,

               combine(combine(framework::dataset::make("CpuExt", std::string("NEON")),

                       framework::dataset::make("DataType", { DataType::F32,

                                                              DataType::F16,

                                                              DataType::QASYMM8_SIGNED,

                                                              DataType::QASYMM8,

                                                              DataType::QSYMM8_PER_CHANNEL

                                                            })),

                       framework::dataset::make("DataType_per_channel", { DataType::QASYMM8,

                                                                          DataType::QASYMM8_SIGNED

                                                            })),

                cpu_ext, data_type, data_type_per_channel)

{

    using namespace cpu::kernels;


    cpuinfo::CpuIsaInfo cpu_isa{};

    cpu_isa.neon = (cpu_ext == "NEON");

    cpu_isa.fp16 = (data_type == DataType::F16);


    const auto *selected_impl = CpuDepthwiseConv2dNativeKernel::get_implementation(

        DepthwiseConv2dNativeDataTypeISASelectorData{ data_type, data_type_per_channel,cpu_isa },

        cpu::KernelSelectionType::Preferred );


    ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl);


    std::string per_channel_str = "_";

    if (data_type == DataType::QSYMM8_PER_CHANNEL)

    {

        per_channel_str = "_" + cpu_impl_dt(data_type_per_channel) + "_" ;

    }

    std::string expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type)  + per_channel_str + "deptwiseconv2dnative";

    std::string actual   = selected_impl->name;


    ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS);

}

TEST_SUITE_END() // KERNEL_SELECTION


TEST_SUITE(Float)

TEST_SUITE(FP32)

FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CpuDepthwiseConvolutionNativeFixture<float>, framework::DatasetMode::ALL,

                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(width_values_precommit,

                                                                                                height_values_precommit),

                                                                                                channel_values_precommit),

                                                                                                batch_values_precommit),

                                                                                                kernel_sz_values_precommit),

                                                                                                depth_multiplier_values),

                                                                                                dilation_values),

                                                                                                stride_values),

                                                                                                padding_valid_values),

                                                                                                data_type_values),

                                                                                                data_layout_values))

{

    // Validate output

    validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);

}


FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CpuDepthwiseConvolutionNativeFixture<float>, framework::DatasetMode::NIGHTLY,

                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(width_values_nightly,

                                                                                                height_values_nightly),

                                                                                                channel_values_nightly),

                                                                                                batch_values_nightly),

                                                                                                kernel_sz_values_nightly),

                                                                                                depth_multiplier_values),

                                                                                                dilation_values),

                                                                                                stride_values),

                                                                                                padding_valid_values),

                                                                                                data_type_values),

                                                                                                data_layout_values))

{

    // Validate output

    validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);

}


TEST_SUITE_END() // FP32

TEST_SUITE_END() // Float

TEST_SUITE_END() // DepthwiseConvolutionLayerNative

TEST_SUITE_END() // Neon

} // namespace validation

} // namespace test

} // namespace arm_compute