ComputeLibrary/latest/core_2_c_l_2_c_l_helpers_8cpp_source.xhtml

/*

 * Copyright (c) 2016-2023 Arm Limited.

 *

 * SPDX-License-Identifier: MIT

 *

 * Permission is hereby granted, free of charge, to any person obtaining a copy

 * of this software and associated documentation files (the "Software"), to

 * deal in the Software without restriction, including without limitation the

 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

 * sell copies of the Software, and to permit persons to whom the Software is

 * furnished to do so, subject to the following conditions:

 *

 * The above copyright notice and this permission notice shall be included in all

 * copies or substantial portions of the Software.

 *

 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

 * SOFTWARE.

 */

#include "arm_compute/core/CL/CLHelpers.h"


#include "arm_compute/core/CL/CLKernelLibrary.h"

#include "arm_compute/core/CL/CLTypes.h"

#include "arm_compute/core/Error.h"

#include "arm_compute/core/Log.h"

#include "arm_compute/core/Types.h"

#include "arm_compute/core/utils/DataTypeUtils.h"


#include "src/gpu/cl/ClCompileContext.h"

#include "src/gpu/cl/ClKernelLibrary.h"


#include <utility>

#include <vector>


namespace arm_compute

{

std::string get_cl_type_from_data_type(const DataType &dt)

{

    switch (dt)

    {

        case DataType::U8:

        case DataType::QASYMM8:

            return "uchar";

        case DataType::S8:

        case DataType::QASYMM8_SIGNED:

        case DataType::QSYMM8:

        case DataType::QSYMM8_PER_CHANNEL:

            return "char";

        case DataType::U16:

        case DataType::QASYMM16:

            return "ushort";

        case DataType::S16:

        case DataType::QSYMM16:

            return "short";

        case DataType::U32:

            return "uint";

        case DataType::S32:

            return "int";

        case DataType::U64:

            return "ulong";

        case DataType::S64:

            return "long";

        case DataType::F16:

            return "half";

        case DataType::F32:

            return "float";

        default:

            ARM_COMPUTE_ERROR("Unsupported input data type.");

            return "";

    }

}


std::string get_cl_promoted_type_from_data_type(const DataType &dt)

{

    switch (dt)

    {

        case DataType::U8:

        case DataType::QASYMM8:

            return "ushort";

        case DataType::S8:

        case DataType::QASYMM8_SIGNED:

        case DataType::QSYMM8:

        case DataType::QSYMM8_PER_CHANNEL:

            return "short";

        case DataType::U16:

        case DataType::QASYMM16:

            return "uint";

        case DataType::S16:

        case DataType::QSYMM16:

            return "int";

        case DataType::U32:

            return "ulong";

        case DataType::S32:

            return "long";

        case DataType::F16:

            return "float";

        default:

            ARM_COMPUTE_ERROR("Cannot get promoted OpenCL type for the input data type.");

            return "";

    }

}


std::string get_cl_unsigned_type_from_element_size(size_t element_size)

{

    switch (element_size)

    {

        case 1:

            return "uchar";

        case 2:

            return "ushort";

        case 4:

            return "uint";

        case 8:

            return "ulong";

        default:

            ARM_COMPUTE_ERROR("Data type not supported");

            return "";

    }

}


std::string get_cl_signed_type_from_element_size(size_t element_size)

{

    switch (element_size)

    {

        case 1:

            return "char";

        case 2:

            return "short";

        case 4:

            return "int";

        case 8:

            return "long";

        default:

            ARM_COMPUTE_ERROR("Data type not supported");

            return "";

    }

}


std::string get_cl_select_type_from_data_type(const DataType &dt)

{

    switch (dt)

    {

        case DataType::U8:

        case DataType::QASYMM8:

        case DataType::S8:

        case DataType::QASYMM8_SIGNED:

        case DataType::QSYMM8:

        case DataType::QSYMM8_PER_CHANNEL:

            return "char";

        case DataType::U16:

        case DataType::QASYMM16:

            return "ushort";

        case DataType::F16:

        case DataType::S16:

        case DataType::QSYMM16:

            return "short";

        case DataType::U32:

            return "uint";

        case DataType::F32:

        case DataType::S32:

            return "int";

        case DataType::U64:

            return "ulong";

        case DataType::S64:

            return "long";

        default:

            ARM_COMPUTE_ERROR("Unsupported input data type.");

            return "";

    }

}


std::string get_cl_dot8_acc_type_from_data_type(const DataType &dt)

{

    switch (dt)

    {

        case DataType::U8:

        case DataType::QASYMM8:

            return "uint";

        case DataType::S8:

        case DataType::QASYMM8_SIGNED:

        case DataType::QSYMM8:

        case DataType::QSYMM8_PER_CHANNEL:

            return "int";

        default:

            ARM_COMPUTE_ERROR("Unsupported data type.");

            return "";

    }

}


std::string get_data_size_from_data_type(const DataType &dt)

{

    switch (dt)

    {

        case DataType::U8:

        case DataType::S8:

        case DataType::QSYMM8:

        case DataType::QASYMM8:

        case DataType::QASYMM8_SIGNED:

        case DataType::QSYMM8_PER_CHANNEL:

            return "8";

        case DataType::U16:

        case DataType::S16:

        case DataType::QSYMM16:

        case DataType::QASYMM16:

        case DataType::F16:

            return "16";

        case DataType::U32:

        case DataType::S32:

        case DataType::F32:

            return "32";

        case DataType::U64:

        case DataType::S64:

            return "64";

        default:

            ARM_COMPUTE_ERROR("Unsupported input data type.");

            return "0";

    }

}


GPUTarget get_target_from_device(const cl::Device &device)

{

    // Query device name size

    std::string device_name = device.getInfo<CL_DEVICE_NAME>();


    return get_target_from_name(device_name);

}


bool arm_non_uniform_workgroup_supported(const cl::Device &device)

{

    return device_supports_extension(device, "cl_arm_non_uniform_work_group_size");

}


bool fp16_supported(const cl::Device &device)

{

    return device_supports_extension(device, "cl_khr_fp16");

}


bool dot8_supported(const cl::Device &device)

{

    std::string     device_name = device.getInfo<CL_DEVICE_NAME>();

    const GPUTarget gpu_target  = get_target_from_name(device_name);


    // SW_WORKAROUND: Workaround for DDK revision r14p0.to enable cl_arm_integer_dot_product_int8

    std::set<GPUTarget> sw_workaround_issue = {GPUTarget::G76};

    return (device_supports_extension(device, "cl_arm_integer_dot_product_int8") ||

            sw_workaround_issue.count(gpu_target) != 0);

}


bool dot8_acc_supported(const cl::Device &device)

{

    return device_supports_extension(device, "cl_arm_integer_dot_product_accumulate_int8");

}


CLVersion get_cl_version(const cl::Device &device)

{

    std::string version_str = device.getInfo<CL_DEVICE_VERSION>();

    if (version_str.find("OpenCL 3") != std::string::npos)

    {

        return CLVersion::CL30;

    }

    else if (version_str.find("OpenCL 2") != std::string::npos)

    {

        return CLVersion::CL20;

    }

    else if (version_str.find("OpenCL 1.2") != std::string::npos)

    {

        return CLVersion::CL12;

    }

    else if (version_str.find("OpenCL 1.1") != std::string::npos)

    {

        return CLVersion::CL11;

    }

    else if (version_str.find("OpenCL 1.0") != std::string::npos)

    {

        return CLVersion::CL10;

    }


    return CLVersion::UNKNOWN;

}


bool device_supports_extension(const cl::Device &device, const char *extension_name)

{

    std::string extensions = device.getInfo<CL_DEVICE_EXTENSIONS>();

    auto        pos        = extensions.find(extension_name);

    return (pos != std::string::npos);

}


bool cl_winograd_convolution_layer_supported(const Size2D &output_tile,

                                             const Size2D &kernel_size,

                                             DataLayout    data_layout)

{

    ARM_COMPUTE_ERROR_ON(data_layout == DataLayout::UNKNOWN);


    using WinogradConfiguration = std::pair<std::pair<int, int>, std::pair<int, int>>;


    std::vector<WinogradConfiguration> winograd_configs_nchw = {

        WinogradConfiguration(std::pair<int, int>(1, 2), std::pair<int, int>(1, 3)),

        WinogradConfiguration(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3)),

        WinogradConfiguration(std::pair<int, int>(2, 1), std::pair<int, int>(3, 1)),

        WinogradConfiguration(std::pair<int, int>(4, 1), std::pair<int, int>(3, 1)),

        WinogradConfiguration(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3)),

        WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3)),

        WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5)),

        WinogradConfiguration(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1)),

        WinogradConfiguration(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5))};


    std::vector<WinogradConfiguration> winograd_configs_nhwc = {

        WinogradConfiguration(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3)),

        WinogradConfiguration(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3)),

        WinogradConfiguration(std::pair<int, int>(4, 1), std::pair<int, int>(3, 1)),

        WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3)),

        WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5)),

        WinogradConfiguration(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1)),

        WinogradConfiguration(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5)),

        WinogradConfiguration(std::pair<int, int>(1, 2), std::pair<int, int>(1, 7)),

        WinogradConfiguration(std::pair<int, int>(2, 1), std::pair<int, int>(7, 1)),

        WinogradConfiguration(std::pair<int, int>(2, 2), std::pair<int, int>(7, 7)),

    };


    auto p = std::make_pair(std::pair<int, int>(output_tile.width, output_tile.height),

                            std::pair<int, int>(kernel_size.width, kernel_size.height));


    // Return true if supported

    if (data_layout == DataLayout::NCHW)

    {

        return (std::find(winograd_configs_nchw.begin(), winograd_configs_nchw.end(), p) !=

                winograd_configs_nchw.end());

    }

    else

    {

        return (std::find(winograd_configs_nhwc.begin(), winograd_configs_nhwc.end(), p) !=

                winograd_configs_nhwc.end());

    }

}


size_t preferred_vector_width(const cl::Device &device, const DataType dt)

{

    switch (dt)

    {

        case DataType::U8:

        case DataType::S8:

        case DataType::QASYMM8:

        case DataType::QASYMM8_SIGNED:

        case DataType::QSYMM8:

        case DataType::QSYMM8_PER_CHANNEL:

            return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR>();

        case DataType::U16:

        case DataType::S16:

        case DataType::QSYMM16:

        case DataType::QASYMM16:

            return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT>();

        case DataType::U32:

        case DataType::S32:

            return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT>();

        case DataType::F16:

        case DataType::F32:

            return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT>();

        case DataType::U64:

        case DataType::S64:

            return device.getInfo<CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG>();

        default:

            return 1;

    }

}


bool preferred_dummy_work_items_support(const cl::Device &device)

{

    ARM_COMPUTE_UNUSED(device);

    // TODO (COMPMID-2044)

    return true;

}


bool image2d_from_buffer_supported(const cl::Device &device)

{

    return device_supports_extension(device, "cl_khr_image2d_from_buffer");

}


size_t get_cl_image_pitch_alignment(const cl::Device &device)

{

    cl_uint pixel_aligment = 0;


    cl_int err = clGetDeviceInfo(device(), CL_DEVICE_IMAGE_PITCH_ALIGNMENT, sizeof(cl_uint), &pixel_aligment, nullptr);


    if (err == CL_SUCCESS)

    {

        return pixel_aligment;

    }

    else

    {

        return 0;

    }

}


bool get_cl_non_uniform_work_group_supported(const cl::Device &device)

{

    cl_bool supported = CL_FALSE;


    cl_int err =

        clGetDeviceInfo(device(), CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT, sizeof(cl_bool), &supported, nullptr);


    return (err == CL_SUCCESS && supported == CL_TRUE);

}


cl::Kernel

create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set<std::string> &build_opts)

{

    opencl::ClKernelLibrary &klib = opencl::ClKernelLibrary::get();


    const std::string program_name = klib.program_name(kernel_name);

    auto              kernel_src   = klib.program(program_name);

    const std::string kernel_path  = klib.kernel_path();


    return static_cast<cl::Kernel>(ctx.create_kernel(kernel_name, program_name, kernel_src.program, kernel_path,

                                                     build_opts, kernel_src.is_binary));

}


cl::NDRange create_lws_hint_parallel_implementations(unsigned int input_dimension, unsigned int vector_size)

{

    const unsigned int width_leftover = input_dimension % vector_size;

    const unsigned int border_width   = (width_leftover != 0) ? vector_size - width_leftover : 0;

    const unsigned int num_of_threads = ((input_dimension + border_width) / 16);

    return cl::NDRange(std::min(8U, num_of_threads));

}


bool get_wbsm_support_info(const cl::Device &device)

{

    cl_bitfield capabilities = 0;

    cl_int      err = clGetDeviceInfo(device.get(), CL_DEVICE_SCHEDULING_CONTROLS_CAPABILITIES_ARM, sizeof(cl_bitfield),

                                      &capabilities, nullptr);

    if ((err == CL_SUCCESS) && (capabilities & CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_MODIFIER_ARM))

    {

        return true;

    }

    return false;

}


void set_wbsm(cl::Kernel &kernel, cl_int wbsm_hint)

{

    cl_int err = clSetKernelExecInfo(kernel.get(), CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_MODIFIER_ARM,

                                     sizeof(cl_int), &wbsm_hint);

    ARM_COMPUTE_UNUSED(err);

    ARM_COMPUTE_ERROR_ON(err != CL_SUCCESS);

}


bool export_to_cl_image(const ITensorInfo *tensor)

{

    if (tensor->tensor_shape()[0] % 4 != 0)

    {

        return false;

    }


    // If not floating point

    if (!is_data_type_float(tensor->data_type()))

    {

        return false;

    }


    // Check if the cl_khr_image2d_from_buffer extension is supported on the target platform

    if (!image2d_from_buffer_supported(CLKernelLibrary::get().get_device()))

    {

        return false;

    }


    // Check cl image pitch alignment

    if (get_cl_image_pitch_alignment(CLKernelLibrary::get().get_device()) == 0)

    {

        return false;

    }


    const size_t image_w     = tensor->tensor_shape()[0] / 4;

    const size_t image_h     = tensor->tensor_shape().total_size() / tensor->tensor_shape()[0];

    const size_t max_image_w = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_WIDTH>();

    const size_t max_image_h = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_HEIGHT>();


    if (image_w > max_image_w || image_h > max_image_h)

    {

        return false;

    }


    return true;

}


void set_unroll_with_pragma(CLBuildOptions &built_opts, std::initializer_list<int> values)

{

    for (const int value : values)

    {

        if (value > max_manual_loop_unrolling)

        {

            built_opts.add_option("-DUNROLL_WITH_PRAGMA");

            return;

        }

    }

}


bool arm_matrix_multiply_supported(const cl::Device &device)

{

    return device_supports_extension(device, "cl_arm_matrix_multiply");

}


bool command_buffer_supported(const cl::Device &device)

{

    return device_supports_extension(device, "cl_khr_command_buffer");

}


bool command_buffer_mutable_dispatch_supported(const cl::Device &device)

{

    return device_supports_extension(device, "cl_khr_command_buffer_mutable_dispatch");

}


} // namespace arm_compute