ComputeLibrary/v21.08/utils_8hpp_source.xhtml

 /*
  * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to
  * deal in the Software without restriction, including without limitation the
  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  * sell copies of the Software, and to permit persons to whom the Software is
  * furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice shall be included in all
  * copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */

 #pragma once

 #include "arm_gemm.hpp"

 #include <cstddef>
 #include <tuple>
 #include <limits>

 // Macro for unreachable code (e.g. impossible default cases on switch)
 #define UNREACHABLE(why)  __builtin_unreachable()

 // Paranoid option for the above with assert
 // #define UNREACHABLE(why)   assert(0 && why)

 namespace arm_gemm {

 template<typename T>
 std::string get_type_name() {
 #ifdef __GNUC__
     std::string s = __PRETTY_FUNCTION__;

     auto start = s.find("cls_");

     if (start==std::string::npos) {
         return "(unknown)";
     }

     for(size_t x = start+4; x<s.size(); x++) {
         if (s[x] == ';' || s[x] == ']') {
             return s.substr(start+4, x-(start+4));
         }
     }

     return "(unknown)";
 #else
     return "(unsupported)";
 #endif
 }

 template<typename T>
 inline T iceildiv(const T a, const T b) {
     return (a + b - 1) / b;
 }

 template <typename T>
 inline T roundup(const T a, const T b) {
     T rem = a % b;

     if (rem) {
         return a + b - rem;
     } else {
         return a;
     }
 }

 enum class VLType {
     None,
     SVE,
 };

 template<typename T>
 struct IndirectOutputArg {
     struct {
         T       *base;
         size_t   stride;
     } direct = {};
     struct {
         T * const *ptr;
         size_t     offset;
     } indirect = {};
     bool is_indirect;

     // Direct
     IndirectOutputArg(T *base, size_t stride) : is_indirect(false) {
         direct.base = base;
         direct.stride = stride;
     }

     // Indirect
     IndirectOutputArg(T * const * ptr, size_t offset) : is_indirect(true) {
         indirect.ptr = ptr;
         indirect.offset = offset;
     }

     IndirectOutputArg() : is_indirect(false) {
         direct.base = nullptr;
         direct.stride = 0;
     }
 };

 // Check that the provided Requantize32 doesn't have a left shift.
 inline bool quant_no_left_shift(const Requantize32 &qp) {
     if (qp.per_channel_requant) {
         return (qp.per_channel_left_shifts == nullptr);
     } else {
         return (qp.per_layer_left_shift == 0);
     }
 }

 // Check that the provided Requantize32 is compatible with the "symmetric" hybrid kernels.  These don't include row
 // sums, so the 'b_offset' has to be zero.
 inline bool quant_hybrid_symmetric(const Requantize32 &qp) {
     return quant_no_left_shift(qp) && qp.b_offset == 0;
 }

 // Check that the provided Requantize32 is compatible with the "asymmetric" hybrid kernels.  These don't support per
 // channel quantization.  Technically b_offset==0 cases would work, but it is a waste to sum and then multiply by 0...
 inline bool quant_hybrid_asymmetric(const Requantize32 &qp) {
     return quant_no_left_shift(qp) /*  && qp.b_offset != 0 */ && qp.per_channel_requant==false;
 }

 template<typename T>
 struct IndirectInputArg {
     struct {
         const T *base;
         size_t   stride;
     } direct = {};
     struct {
         const T * const * const * ptr;
         unsigned int start_row;
         unsigned int start_col;
     } indirect = {};
     bool is_indirect;

     // Direct
     IndirectInputArg(const T *base, size_t stride) : is_indirect(false) {
         direct.base = base;
         direct.stride = stride;
     }

     // Indirect
     IndirectInputArg(const T * const * const *ptr, unsigned int start_row, unsigned int start_col) : is_indirect(true) {
         indirect.ptr = ptr;
         indirect.start_row = start_row;
         indirect.start_col = start_col;
     }

     IndirectInputArg() : is_indirect(false) {
         direct.base = nullptr;
         direct.stride = 0;
     }
 };

 namespace utils {

 // get_vector_length(): Returns SVE vector length for type "T".
 //
 // It is required that this can be compiled by a compiler in non-SVE mode, but it must be prevented from running (at
 // runtime) if SVE is not enabled.  Typically this is used by switchyard/driver code which is built in normal mode
 // which then calls SVE kernels (compiled accordingly) iff SVE is detected at runtime.
 template <typename T>
 inline unsigned long get_vector_length() {
 #if defined(__aarch64__)
     uint64_t vl;

     __asm __volatile (
         ".inst 0x0420e3e0\n" // CNTB X0, ALL, MUL #1
         "mov %0, X0\n"
         : "=r" (vl)
         :
         : "x0"
     );

     return vl / sizeof(T);
 #else // !defined(__aarch64__)
     return 16 / sizeof(T);
 #endif // defined(__aarch64__)
 }

 // get_vector_length(VLType): Returns vector length for type "T".
 //
 // This has the same requirements and constraints as the SVE-only form above, so we call into that code for SVE.

 template <typename T>
 inline unsigned long get_vector_length(VLType vl_type) {
   switch (vl_type) {
     case VLType::SVE:
       return get_vector_length<T>();
     default:
       return 16 / sizeof(T);
   }
 }

 // get_default_activation_values(): Returns the default values for activation min and max for integer activation.
 template <typename T>
 inline std::tuple<T, T> get_default_activation_values()
 {
     const T min = static_cast<T>(std::numeric_limits<T>::min());
     const T max = static_cast<T>(std::numeric_limits<T>::max());

     return std::make_tuple(min, max);
 }

 // get_default_activation_values(): Returns the default values for activation min and max for float activation.
 template <>
 inline std::tuple<float, float> get_default_activation_values()
 {
     const float min = static_cast<float>(-std::numeric_limits<float>::infinity());
     const float max = static_cast<float>(std::numeric_limits<float>::infinity());

     return std::make_tuple(min, max);
 }

 #if defined(__ARM_FP16_ARGS)
 // get_default_activation_values(): Returns the default values for activation min and max for __fp16 activation.
 template <>
 inline std::tuple<__fp16, __fp16> get_default_activation_values()
 {
     const __fp16 min = static_cast<__fp16>(-std::numeric_limits<float>::infinity());
     const __fp16 max = static_cast<__fp16>(std::numeric_limits<float>::infinity());

     return std::make_tuple(min, max);
 }
 #endif  // defined(__ARM_FP16_ARGS)
 } // utils namespace
 } // arm_gemm namespace

 using namespace arm_gemm::utils;
arm_gemm::roundup
T roundup(const T a, const T b)
Definition: utils.hpp:70

arm_gemm::IndirectOutputArg::IndirectOutputArg
IndirectOutputArg(T *const *ptr, size_t offset)
Definition: utils.hpp:104

arm_gemm::IndirectInputArg::ptr
const T *const  *const  * ptr
Definition: utils.hpp:143

offset
__global uchar * offset(const Image *img, int x, int y)
Get the pointer position of a Image.
Definition: helpers.h:861

arm_gemm::IndirectOutputArg::base
T * base
Definition: utils.hpp:88

arm_gemm::VLType::None

arm_gemm::Requantize32::per_channel_left_shifts
const int32_t * per_channel_left_shifts
Definition: arm_gemm.hpp:139

arm_gemm::IndirectOutputArg
Definition: utils.hpp:86

arm_compute::test::validation::b
SimpleTensor< float > b
Definition: DFT.cpp:157

arm_gemm::Requantize32::b_offset
int32_t b_offset
Definition: arm_gemm.hpp:133

arm_gemm::quant_hybrid_symmetric
bool quant_hybrid_symmetric(const Requantize32 &qp)
Definition: utils.hpp:126

arm_gemm::iceildiv
T iceildiv(const T a, const T b)
Definition: utils.hpp:65

arm_gemm::VLType::SVE

arm_gemm::Requantize32::per_layer_left_shift
int32_t per_layer_left_shift
Definition: arm_gemm.hpp:136

arm_gemm::IndirectOutputArg::offset
size_t offset
Definition: utils.hpp:93

arm_gemm
Definition: arm_gemm.hpp:33

arm_gemm::IndirectOutputArg::is_indirect
bool is_indirect
Definition: utils.hpp:95

arm_gemm::IndirectInputArg::start_col
unsigned int start_col
Definition: utils.hpp:145

arm_gemm::utils::get_default_activation_values
std::tuple< float, float > get_default_activation_values()
Definition: utils.hpp:220

arm_gemm::quant_no_left_shift
bool quant_no_left_shift(const Requantize32 &qp)
Definition: utils.hpp:116

arm_gemm.hpp

arm_gemm::IndirectInputArg::stride
size_t stride
Definition: utils.hpp:140

arm_gemm::IndirectOutputArg::stride
size_t stride
Definition: utils.hpp:89

arm_gemm::get_type_name
std::string get_type_name()
Definition: utils.hpp:42

arm_gemm::IndirectInputArg::IndirectInputArg
IndirectInputArg(const T *base, size_t stride)
Definition: utils.hpp:150

utils

arm_gemm::IndirectInputArg::base
const T * base
Definition: utils.hpp:139

arm_gemm::IndirectOutputArg::ptr
T *const  * ptr
Definition: utils.hpp:92

arm_gemm::Requantize32::per_channel_requant
bool per_channel_requant
Definition: arm_gemm.hpp:135

indirect
bool indirect
Definition: CpuGemmAssemblyDispatch.cpp:60

arm_gemm::IndirectInputArg::is_indirect
bool is_indirect
Definition: utils.hpp:147

arm_gemm::IndirectInputArg::IndirectInputArg
IndirectInputArg(const T *const *const *ptr, unsigned int start_row, unsigned int start_col)
Definition: utils.hpp:156

arm_gemm::utils::get_vector_length
unsigned long get_vector_length(VLType vl_type)
Definition: utils.hpp:199

arm_gemm::IndirectInputArg::start_row
unsigned int start_row
Definition: utils.hpp:144

arm_gemm::IndirectInputArg
Definition: utils.hpp:137

arm_gemm::Requantize32
Definition: arm_gemm.hpp:127

arm_gemm::IndirectOutputArg::IndirectOutputArg
IndirectOutputArg(T *base, size_t stride)
Definition: utils.hpp:98

arm_gemm::IndirectOutputArg::IndirectOutputArg
IndirectOutputArg()
Definition: utils.hpp:109

arm_gemm::utils
Definition: utils.hpp:168

arm_gemm::quant_hybrid_asymmetric
bool quant_hybrid_asymmetric(const Requantize32 &qp)
Definition: utils.hpp:132

arm_gemm::IndirectInputArg::IndirectInputArg
IndirectInputArg()
Definition: utils.hpp:162

arm_gemm::VLType
VLType
Definition: utils.hpp:80