ComputeLibrary/v21.02/_c_l_g_e_m_m_default_config_reshaped_bifrost_8cpp_source.xhtml

 /*
  * Copyright (c) 2019-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to
  * deal in the Software without restriction, including without limitation the
  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  * sell copies of the Software, and to permit persons to whom the Software is
  * furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice shall be included in all
  * copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
 #include "src/core/CL/gemm/reshaped/CLGEMMDefaultConfigReshapedBifrost.h"

 #include "arm_compute/core/CL/CLHelpers.h"
 #include "arm_compute/core/CL/CLKernelLibrary.h"
 #include "arm_compute/core/GPUTarget.h"
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "src/core/CL/gemm/CLGEMMHelpers.h"

 #include <map>
 #include <utility>

 namespace arm_compute
 {
 namespace cl_gemm
 {
 using namespace arm_compute::misc::shape_calculator;

 CLGEMMDefaultConfigReshapedBifrost::CLGEMMDefaultConfigReshapedBifrost(GPUTarget gpu)
     : ICLGEMMKernelConfiguration(gpu)
 {
 }

 std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMDefaultConfigReshapedBifrost::configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
 {
     using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (CLGEMMDefaultConfigReshapedBifrost::*)(unsigned int m, unsigned int n, unsigned int k, unsigned int b);

     // Configurations for Mali-G76
     static std::map<DataType, ConfigurationFunctionExecutorPtr> gemm_configs_G76 =
     {
         { DataType::F32, &CLGEMMDefaultConfigReshapedBifrost::configure_G76_f32 },
         { DataType::F16, &CLGEMMDefaultConfigReshapedBifrost::configure_G76_f16 },
         { DataType::QASYMM8, &CLGEMMDefaultConfigReshapedBifrost::configure_G76_u8 },
         { DataType::QSYMM8, &CLGEMMDefaultConfigReshapedBifrost::configure_G76_u8 },
         { DataType::QASYMM8_SIGNED, &CLGEMMDefaultConfigReshapedBifrost::configure_G76_u8 },
         { DataType::QSYMM8_PER_CHANNEL, &CLGEMMDefaultConfigReshapedBifrost::configure_G76_u8 }
     };

     // Configurations for Mali-G52
     static std::map<DataType, ConfigurationFunctionExecutorPtr> gemm_configs_G52 =
     {
         { DataType::F32, &CLGEMMDefaultConfigReshapedBifrost::configure_G52_f32 },
         { DataType::F16, &CLGEMMDefaultConfigReshapedBifrost::configure_G52_f16 },
         { DataType::QASYMM8, &CLGEMMDefaultConfigReshapedBifrost::configure_G7x_u8 },
         { DataType::QSYMM8, &CLGEMMDefaultConfigReshapedBifrost::configure_G7x_u8 },
         { DataType::QASYMM8_SIGNED, &CLGEMMDefaultConfigReshapedBifrost::configure_G7x_u8 },
         { DataType::QSYMM8_PER_CHANNEL, &CLGEMMDefaultConfigReshapedBifrost::configure_G7x_u8 }
     };

     // Configurations for Mali-G7x
     static std::map<DataType, ConfigurationFunctionExecutorPtr> gemm_configs_G7x =
     {
         { DataType::F32, &CLGEMMDefaultConfigReshapedBifrost::configure_G7x_f32 },
         { DataType::F16, &CLGEMMDefaultConfigReshapedBifrost::configure_G7x_f16 },
         { DataType::QASYMM8, &CLGEMMDefaultConfigReshapedBifrost::configure_G7x_u8 },
         { DataType::QSYMM8, &CLGEMMDefaultConfigReshapedBifrost::configure_G7x_u8 },
         { DataType::QASYMM8_SIGNED, &CLGEMMDefaultConfigReshapedBifrost::configure_G7x_u8 },
         { DataType::QSYMM8_PER_CHANNEL, &CLGEMMDefaultConfigReshapedBifrost::configure_G7x_u8 }
     };

     switch(_target)
     {
         case GPUTarget::G76:
             if(gemm_configs_G76.find(data_type) != gemm_configs_G76.end())
             {
                 return (this->*gemm_configs_G76[data_type])(m, n, k, b);
             }
             else
             {
                 ARM_COMPUTE_ERROR("Not supported data type");
             }
         default:
             if(gemm_configs_G7x.find(data_type) != gemm_configs_G7x.end())
             {
                 return (this->*gemm_configs_G7x[data_type])(m, n, k, b);
             }
             else
             {
                 ARM_COMPUTE_ERROR("Not supported data type");
             }
     }
 }

 std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMDefaultConfigReshapedBifrost::configure_G7x_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     ARM_COMPUTE_UNUSED(k);
     ARM_COMPUTE_UNUSED(b);

     if(n <= 4)
     {
         return configure_lhs_rhs_info(m, n, 4, 2, 8, 16, 16, true, false, false, true);
     }
     else
     {
         return configure_lhs_rhs_info(m, n, 5, 4, 4, 2, 16, false, true, false, true);
     }
 }

 std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMDefaultConfigReshapedBifrost::configure_G7x_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     ARM_COMPUTE_UNUSED(k);
     ARM_COMPUTE_UNUSED(b);

     if(n <= 4)
     {
         return configure_lhs_rhs_info(m, n, 4, 2, 8, 8, 2, true, true, true, false);
     }
     else
     {
         return configure_lhs_rhs_info(m, n, 4, 8, 4, 4, 2, true, true, true, false);
     }
 }

 std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMDefaultConfigReshapedBifrost::configure_G7x_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     ARM_COMPUTE_UNUSED(k);
     ARM_COMPUTE_UNUSED(b);

     if(dot8_supported(CLKernelLibrary::get().get_device()))
     {
         if(n <= 4)
         {
             return configure_lhs_rhs_info(m, n, 4, 2, 16, 2, 2, true, false, false, true);
         }
         else
         {
             return configure_lhs_rhs_info(m, n, 4, 4, 16, 2, 2, true, false, false, true);
         }
     }
     else
     {
         if(n <= 4)
         {
             return configure_lhs_rhs_info(m, n, 4, 2, 8, 2, 2, true, false, false, true);
         }
         else
         {
             return configure_lhs_rhs_info(m, n, 6, 4, 4, 2, 2, true, true, false, true);
         }
     }
 }

 std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMDefaultConfigReshapedBifrost::configure_G52_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     const float r_mn     = static_cast<float>(m) / static_cast<float>(n);
     const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
     const float r_mk     = static_cast<float>(m) / static_cast<float>(k);
     const float r_nk     = static_cast<float>(n) / static_cast<float>(k);

     GEMMLHSMatrixInfo lhs_info_buf;
     GEMMRHSMatrixInfo rhs_info_buf;
     GEMMLHSMatrixInfo lhs_info_img;
     GEMMRHSMatrixInfo rhs_info_img;

     if(workload <= 274.4000f)
     {
         if(r_nk <= 0.7461f)
         {
             if(r_mn <= 21.1667f)
             {
                 return configure_lhs_rhs_info(m, n, 4, 2, 4, 4, 4, false, true, true, false, false);
             }
             else
             {
                 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, true);
                 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, false);

                 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
                                            std::make_pair(lhs_info_buf, rhs_info_buf),
                                            n, k, b, DataType::F32);
             }
         }
         else
         {
             std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, true);
             std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, false);

             return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
                                        std::make_pair(lhs_info_buf, rhs_info_buf),
                                        n, k, b, DataType::F32);
         }
     }
     else
     {
         if(r_mk <= 17.3926f)
         {
             if(workload <= 542.4000f)
             {
                 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, true);
                 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, false);

                 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
                                            std::make_pair(lhs_info_buf, rhs_info_buf),
                                            n, k, b, DataType::F32);
             }
             else
             {
                 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, true, true, false, true, true);
                 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, true, true, false, true, false);

                 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
                                            std::make_pair(lhs_info_buf, rhs_info_buf),
                                            n, k, b, DataType::F32);
             }
         }
         else
         {
             if(r_nk <= 0.5463f)
             {
                 if(workload <= 11767.6001f)
                 {
                     std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, true);
                     std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, false);

                     return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
                                                std::make_pair(lhs_info_buf, rhs_info_buf),
                                                n, k, b, DataType::F32);
                 }
                 else
                 {
                     std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, true, true, false, true, true);
                     std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, true, true, false, true, false);

                     return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
                                                std::make_pair(lhs_info_buf, rhs_info_buf),
                                                n, k, b, DataType::F32);
                 }
             }
             else
             {
                 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, true);
                 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, false);

                 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
                                            std::make_pair(lhs_info_buf, rhs_info_buf),
                                            n, k, b, DataType::F32);
             }
         }
     }
 }

 std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMDefaultConfigReshapedBifrost::configure_G52_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     ARM_COMPUTE_UNUSED(k);

     const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;

     if(workload <= 323.4000f)
     {
         return configure_lhs_rhs_info(m, n, 2, 2, 8, 4, 8, false, false, false, true, false);
     }
     else
     {
         return configure_lhs_rhs_info(m, n, 4, 8, 4, 2, 2, true, true, true, false, false);
     }
 }

 std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMDefaultConfigReshapedBifrost::configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     ARM_COMPUTE_UNUSED(k);
     ARM_COMPUTE_UNUSED(b);

     GEMMLHSMatrixInfo lhs_info_buf;
     GEMMRHSMatrixInfo rhs_info_buf;
     GEMMLHSMatrixInfo lhs_info_img;
     GEMMRHSMatrixInfo rhs_info_img;

     // Get lhs_info/rhs_info in case of OpenCL buffer
     if(n <= 4)
     {
         std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 2, 8, 16, 16, true, false, false, true);
     }
     else
     {
         std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 2, 8, 16, false, false, false, true);
     }

     // Get lhs_info/rhs_info in case of OpenCL image
     // Condition on the GPU workload
     if((m / 4) * (n / 4) >= 2560)
     {
         // Big workload
         std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 8, true, true, true, false, true);
     }
     else
     {
         // Small workload
         std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 2, 4, 4, 1, 1, true, true, true, false, true);
     }

     const TensorInfo  tensor_rhs_info(TensorShape(n, k, b), 1, DataType::F32);
     const TensorShape shape = compute_rhs_reshaped_shape(tensor_rhs_info, rhs_info_img);
     const TensorInfo  tensor_reshaped_info(shape, 1, DataType::F32);

     // In case of vector by matrix with few work-items, we use the OpenCL buffer rather than the OpenCL image2d
     const bool use_cl_image2d = (n <= 4) ? false : true;

     if(bool(validate_image2d_support_on_rhs(tensor_reshaped_info, rhs_info_img)) && use_cl_image2d)
     {
         return std::make_pair(lhs_info_img, rhs_info_img);
     }
     else
     {
         return std::make_pair(lhs_info_buf, rhs_info_buf);
     }
 }

 std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMDefaultConfigReshapedBifrost::configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
     const float r_mk     = static_cast<float>(m) / static_cast<float>(k);

     if(workload <= 1595.2000f)
     {
         if(r_mk <= 2.1044f)
         {
             if(workload <= 870.4000f)
             {
                 return configure_lhs_rhs_info(m, n, 2, 4, 4, 1, 2, true, false, true, false, false);
             }
             else
             {
                 return configure_lhs_rhs_info(m, n, 4, 2, 4, 2, 2, false, false, true, false, false);
             }
         }
         else
         {
             return configure_lhs_rhs_info(m, n, 4, 2, 4, 2, 2, false, false, true, false, false);
         }
     }
     else
     {
         return configure_lhs_rhs_info(m, n, 4, 8, 4, 4, 2, true, true, true, false, false);
     }
 }

 std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMDefaultConfigReshapedBifrost::configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
 {
     ARM_COMPUTE_UNUSED(k);
     ARM_COMPUTE_UNUSED(b);

     if(n <= 4)
     {
         return configure_lhs_rhs_info(m, n, 4, 2, 16, 4, 1, false, false, false, true);
     }
     else
     {
         return configure_lhs_rhs_info(m, n, 4, 4, 16, 2, 2, false, true, false, true);
     }
 }
 } // namespace cl_gemm
 } // namespace arm_compute
arm_compute::test::validation::shape
shape
Definition: DFT.cpp:115

arm_compute::TensorShape
Shape of a tensor.
Definition: TensorShape.h:39

arm_compute::GPUTarget::G76

arm_compute::ICLGEMMKernelConfiguration
Basic interface for the GEMM kernel configuration.
Definition: ICLGEMMKernelConfiguration.h:33

arm_compute::dot8_supported
bool dot8_supported(const cl::Device &device)
Helper function to check whether the cl_arm_integer_dot_product_int8 extension is supported...
Definition: CLHelpers.cpp:239

arm_compute::test::validation::b
SimpleTensor< float > b
Definition: DFT.cpp:157

ARM_COMPUTE_ERROR
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352

arm_compute::cl_gemm::validate_image2d_support_on_rhs
Status validate_image2d_support_on_rhs(const ITensorInfo &tensor_reshaped_info, const GEMMRHSMatrixInfo &rhs_info)
Utility function to validate the image2d OpenCL object support on the RHS reshaped matrix...
Definition: CLGEMMHelpers.cpp:84

arm_compute::cl_gemm::CLGEMMDefaultConfigReshapedBifrost::configure
std::pair< GEMMLHSMatrixInfo, GEMMRHSMatrixInfo > configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override
Given M, N, K and B, this method returns the GEMMLHSMatrixInfo and GEMMRHSMatrixInfo to be used...
Definition: CLGEMMDefaultConfigReshapedBifrost.cpp:48

arm_compute::Format::F32
1 channel, 1 F32 per channel

arm_compute::CLKernelLibrary::get
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Definition: CLKernelLibrary.cpp:1119

arm_compute::GEMMLHSMatrixInfo
GEMM LHS (Left Hand Side) matrix information.
Definition: Types.h:1968

CLHelpers.h

TensorInfo.h

arm_compute
Copyright (c) 2017-2021 Arm Limited.
Definition: 00_introduction.dox:24

arm_compute::Format::F16
1 channel, 1 F16 per channel

arm_compute::test::validation::data_type
const DataType data_type
Definition: Im2Col.cpp:150

ARM_COMPUTE_UNUSED
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152

arm_compute::GEMMRHSMatrixInfo
GEMM RHS (Right Hand Side) matrix information.
Definition: Types.h:1983

arm_compute::DataType::QASYMM8
quantized, asymmetric fixed-point 8-bit number unsigned

arm_compute::misc::shape_calculator::compute_rhs_reshaped_shape
TensorShape compute_rhs_reshaped_shape(const ITensorInfo &a, const GEMMRHSMatrixInfo &rhs_info)
Calculate the Right Hand Side matrix reshaped shape.
Definition: ShapeCalculator.h:224

ShapeCalculator.h

arm_compute::cl_gemm::CLGEMMDefaultConfigReshapedBifrost::CLGEMMDefaultConfigReshapedBifrost
CLGEMMDefaultConfigReshapedBifrost(GPUTarget gpu)
Constructor.
Definition: CLGEMMDefaultConfigReshapedBifrost.cpp:43

arm_compute::DataType::QSYMM8
quantized, symmetric fixed-point 8-bit number

arm_compute::DataType::QSYMM8_PER_CHANNEL
quantized, symmetric per channel fixed-point 8-bit number

GPUTarget.h

arm_compute::GPUTarget
GPUTarget
Available GPU Targets.
Definition: GPUTarget.h:34

CLKernelLibrary.h
Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context...

arm_compute::cl_gemm::configure_lhs_rhs_info
std::pair< GEMMLHSMatrixInfo, GEMMRHSMatrixInfo > configure_lhs_rhs_info(unsigned int m, unsigned int n, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0, bool lhs_interleave, bool rhs_interleave, bool lhs_transpose, bool rhs_transpose, bool export_to_cl_image)
Configure GEMMLHSMatrixInfo and GEMMRHSMatrixInfo.
Definition: CLGEMMHelpers.cpp:40

arm_compute::TensorInfo
Store the tensor&#39;s metadata.
Definition: TensorInfo.h:45

arm_compute::misc::shape_calculator
Definition: ShapeCalculator.h:40

arm_compute::DataType::QASYMM8_SIGNED
quantized, asymmetric fixed-point 8-bit number signed

CLGEMMHelpers.h

arm_compute::DataType
DataType
Available data types.
Definition: Types.h:77

arm_compute::cl_gemm::select_lhs_rhs_info
std::pair< GEMMLHSMatrixInfo, GEMMRHSMatrixInfo > select_lhs_rhs_info(std::pair< GEMMLHSMatrixInfo, GEMMRHSMatrixInfo > info_img, std::pair< GEMMLHSMatrixInfo, GEMMRHSMatrixInfo > info_buf, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
Select GEMMLHSMatrixInfo and GEMMRHSMatrixInfo.
Definition: CLGEMMHelpers.cpp:53

CLGEMMDefaultConfigReshapedBifrost.h

TensorShape.h