54 bool is_default =
false;
55 uint64_t cycle_estimate = 0;
58 : method(m), name(n), is_default(d), cycle_estimate(c)
69 std::string filter =
"";
70 unsigned int inner_block_size = 0;
71 unsigned int outer_block_size = 0;
96 : type(type), param1(p1), param2(p2)
118 unsigned int K,
unsigned int Ksections,
unsigned int nbatches,
119 unsigned int nmulti,
bool indirect_input,
Activation act,
const int maxthreads,
120 bool fast_mode =
false,
const GemmConfig *cfg =
nullptr)
121 : _ci(ci), _Msize(M), _Nsize(N), _Ksize(K), _Ksections(Ksections), _nbatches(nbatches), _nmulti(nmulti), _indirect_input(indirect_input), _act(act), _maxthreads(maxthreads), _fast_mode(fast_mode),
130 const int32_t *bias =
nullptr;
131 size_t bias_multi_stride = 0;
132 int32_t a_offset = 0;
133 int32_t b_offset = 0;
134 int32_t c_offset = 0;
135 bool per_channel_requant =
false;
136 int32_t per_layer_left_shift = 0;
137 int32_t per_layer_right_shift = 0;
138 int32_t per_layer_mul = 0;
139 const int32_t *per_channel_left_shifts =
nullptr;
140 const int32_t *per_channel_right_shifts =
nullptr;
141 const int32_t *per_channel_muls =
nullptr;
149 int32_t a_offset, int32_t b_offset, int32_t c_offset,
150 int32_t requant_shift, int32_t requant_mul, int32_t minv, int32_t maxv)
151 : bias(bias), bias_multi_stride(bias_multi_stride), a_offset(a_offset), b_offset(b_offset), c_offset(c_offset), per_channel_requant(false), per_layer_left_shift(
std::max<int32_t>(requant_shift, 0)),
152 per_layer_right_shift(
std::min<int32_t>(requant_shift, 0)), per_layer_mul(requant_mul), minval(minv), maxval(maxv)
158 int32_t a_offset, int32_t b_offset, int32_t c_offset,
159 const int32_t *requant_left_shifts,
160 const int32_t *requant_right_shifts,
161 const int32_t *requant_muls,
162 int32_t minv, int32_t maxv)
163 : bias(bias), bias_multi_stride(bias_multi_stride), a_offset(a_offset), b_offset(b_offset), c_offset(c_offset), per_channel_requant(true), per_channel_left_shifts(requant_left_shifts),
164 per_channel_right_shifts(requant_right_shifts), per_channel_muls(requant_muls), minval(minv), maxval(maxv)
173 template <
typename Top,
typename Tret>
181 template <
typename Top,
typename Tret,
class OutputStage = Nothing>
184 template <
typename Top,
typename Tret,
class OutputStage = Nothing>
187 template <
typename Top,
typename Tret,
class OutputStage = Nothing>
GemmArgs(const CPUInfo *ci, unsigned int M, unsigned int N, unsigned int K, unsigned int Ksections, unsigned int nbatches, unsigned int nmulti, bool indirect_input, Activation act, const int maxthreads, bool fast_mode=false, const GemmConfig *cfg=nullptr)
KernelDescription get_gemm_method(const GemmArgs &args, const OutputStage &={})
KernelDescription() noexcept
GemmConfig(GemmMethod method)
Activation(Type type=Type::None, float p1=0.0f, float p2=0.0f)
Requantize32(const int32_t *bias, size_t bias_multi_stride, int32_t a_offset, int32_t b_offset, int32_t c_offset, int32_t requant_shift, int32_t requant_mul, int32_t minv, int32_t maxv)
std::unique_ptr< GemmCommon< Top, Tret > > UniqueGemmCommon
std::vector< KernelDescription > get_compatible_kernels(const GemmArgs &args, const OutputStage &={})
KernelDescription(GemmMethod m, std::string n, bool d=false, uint64_t c=0)
UniqueGemmCommon< Top, Tret > gemm(const GemmArgs &args, const OutputStage &={})
Requantize32(const int32_t *bias, size_t bias_multi_stride, int32_t a_offset, int32_t b_offset, int32_t c_offset, const int32_t *requant_left_shifts, const int32_t *requant_right_shifts, const int32_t *requant_muls, int32_t minv, int32_t maxv)