27 #include "arm_gemm.hpp" 32 #define UNREACHABLE(why) __builtin_unreachable() 41 return (a + b - 1) /
b;
75 direct.stride = stride;
85 direct.base =
nullptr;
92 if (qp.per_channel_requant) {
93 return (qp.per_channel_left_shifts ==
nullptr);
95 return (qp.per_layer_left_shift == 0);
118 const T *
const *
const *
ptr;
127 direct.stride = stride;
131 IndirectInputArg(
const T *
const *
const *ptr,
unsigned int start_row,
unsigned int start_col) : is_indirect(true) {
138 direct.base =
nullptr;
146 #ifdef __ARM_FEATURE_SVE 148 inline unsigned long get_vector_length_sz() {
159 #define VEC_LEN_SPEC(sz, opcode) template <> inline unsigned long get_vector_length_sz<sz>() { unsigned long v; __asm ( opcode " %0" : "=r" (v)); return v; } 161 VEC_LEN_SPEC(8,
"cntd")
162 VEC_LEN_SPEC(4,
"cntw")
163 VEC_LEN_SPEC(2,
"cnth")
164 VEC_LEN_SPEC(1,
"cntb")
169 template <
typename T>
171 #ifdef __ARM_FEATURE_SVE 172 return get_vector_length_sz<sizeof(T)>();
174 return 16 /
sizeof(T);
T roundup(const T a, const T b)
IndirectOutputArg(T *const *ptr, size_t offset)
__global uchar * offset(const Image *img, int x, int y)
Get the pointer position of a Image.
bool quant_hybrid_symmetric(const Requantize32 &qp)
T iceildiv(const T a, const T b)
bool quant_no_left_shift(const Requantize32 &qp)
unsigned long get_vector_length()
IndirectOutputArg(T *base, size_t stride)
bool quant_hybrid_asymmetric(const Requantize32 &qp)