21.05
|
Namespaces | |
utils | |
Data Structures | |
class | barrier |
class | convolver |
class | GemmHybrid |
class | GemmHybridIndirect |
class | GemmHybridQuantized |
class | GemmHybridQuantizedInline |
struct | GemmImplementation |
struct | GemmImplementation< Top, Tret, Nothing > |
class | GemmInterleaved |
class | GemmInterleavedPretransposed2d |
class | GemvBatched |
class | GemvPretransposed |
struct | IndirectInputArg |
struct | IndirectOutputArg |
struct | PerformanceParameters |
class | QuantizeWrapper |
class | StdTransformsFixed |
class | StdTransformsSVE |
struct | TransformImpl |
Typedefs | |
using | bfloat16 = arm_compute::bfloat16 |
template<typename strategy , typename To , typename Tr , typename OutputStage = Nothing> | |
using | GemmInterleavedNoMerge = GemmInterleaved< strategy, To, Tr, OutputStage, false > |
template<typename strategy , typename To , typename Tr > | |
using | GemmInterleavedPretransposedNoMergeQuantizedInline = GemmInterleaved< strategy, To, Tr, Requantize32, false > |
template<typename strategy , typename To , typename Tr > | |
using | GemmInterleavedQuantized = GemmInterleaved< strategy, To, Tr, Requantize32 > |
Enumerations | |
enum | VLType { None, SVE } |
Functions | |
template<typename T > | |
void | bias_adder (T *out, unsigned int stride, const T *bias, unsigned int rows, unsigned int cols) |
template<bool DoBias, typename T > | |
void | activator (T *out, unsigned int stride, const T *bias, Activation act, unsigned int rows, unsigned int cols) |
template<> | |
const GemmImplementation< bfloat16, float > * | gemm_implementation_list< bfloat16, float > () |
template UniqueGemmCommon< bfloat16, float > | gemm< bfloat16, float, Nothing > (const GemmArgs &args, const Nothing &) |
template KernelDescription | get_gemm_method< bfloat16, float, Nothing > (const GemmArgs &args, const Nothing &) |
template std::vector< KernelDescription > | get_compatible_kernels< bfloat16, float, Nothing > (const GemmArgs &args, const Nothing &) |
template<> | |
const GemmImplementation< float, float > * | gemm_implementation_list< float, float > () |
template UniqueGemmCommon< float, float > | gemm< float, float, Nothing > (const GemmArgs &args, const Nothing &) |
template KernelDescription | get_gemm_method< float, float, Nothing > (const GemmArgs &args, const Nothing &) |
template std::vector< KernelDescription > | get_compatible_kernels< float, float, Nothing > (const GemmArgs &args, const Nothing &) |
template<typename Top , typename Tret , class OutputStage = Nothing> | |
const GemmImplementation< Top, Tret, OutputStage > * | gemm_implementation_list () |
template<typename Top , typename Tret , class OutputStage > | |
bool | find_implementation (const GemmArgs &args, const OutputStage &os, const GemmImplementation< Top, Tret, OutputStage > *&impl) |
template<typename Top , typename Tret , class OutputStage > | |
std::vector< KernelDescription > | get_compatible_kernels (const GemmArgs &args, const OutputStage &os) |
template<typename Top , typename Tret , class OutputStage > | |
UniqueGemmCommon< Top, Tret > | gemm (const GemmArgs &args, const OutputStage &os) |
template<typename Top , typename Tret , class OutputStage > | |
KernelDescription | get_gemm_method (const GemmArgs &args, const OutputStage &os) |
template<unsigned int height_vectors, unsigned int block, VLType vlt, bool integrate_sums, typename TIn , typename TOut > | |
void | interleave_block (TOut *&out, const TIn *const *in, size_t width, size_t height, size_t row_offset, bool first) |
template<unsigned int height_vectors, unsigned int block, VLType vlt, typename TOut > | |
void | FixupRowSums (TOut *&out, const int32_t row_sum_multiplier) |
template<unsigned int height_vectors, unsigned int block, VLType vlt, typename TIn , typename TOut > | |
void | IndirectInterleave (TOut *out, const TIn *const *const *ptr, unsigned int stringlen, unsigned int rounded_stringlen, const unsigned int y0, const unsigned int ymax, const unsigned int k0, const unsigned int kmax, bool integrate_sums, const int32_t row_sum_multiplier) |
template<unsigned int height_vectors, unsigned int block, VLType vlt, typename TIn , typename TOut > | |
void | ConvolutionInterleave (TOut *out, const TIn *in, size_t in_stride, const convolver< TIn > &conv, const unsigned int rounded_stringlen, const unsigned int y0, const unsigned int ymax, const unsigned int k0, const unsigned int kmax, bool integrate_sums, const int32_t row_sum_multiplier) |
template<unsigned int height_vectors, unsigned int block, VLType vlt, typename TIn , typename TOut > | |
void | Interleave (TOut *out, const TIn *in, size_t in_stride, const unsigned int y0, const unsigned int ymax, const unsigned int k0, const unsigned int kmax, bool integrate_sums, const int32_t row_sum_multiplier) |
template<unsigned int twidth, unsigned int height, bool sve = false, typename Tin , typename Tout > | |
void | MergeResults (Tout *out, const Tin *in, int ldc, int y0, int ymax, int x0, int xmax, const Tout *bias, Activation act, bool append) |
template<typename Tin , typename Tout > | |
void | requantize_block_32 (const Requantize32 &qp, unsigned int width, unsigned int height, const Tin *input, unsigned int in_stride, Tout *output, unsigned int out_stride, const int32_t *row_bias, const int32_t *col_bias, unsigned int start_col) |
template<typename T > | |
void | compute_row_sums (const Requantize32 &qp, unsigned int width, unsigned int height, const T *input, unsigned int in_stride, int32_t *row_bias) |
template<typename T > | |
void | compute_col_sums (const Requantize32 &qp, unsigned int width, unsigned int height, const T *input, unsigned int in_stride, int32_t *col_bias, unsigned int depth, unsigned int multi, unsigned int first_col) |
template<typename T > | |
void | row_sums_indirect (unsigned int num_strings, const unsigned int *string_lengths, IndirectInputArg< T > A_arg, size_t M, int32_t *output_ptr, const Requantize32 *qp) |
template<unsigned int IntBy, unsigned int BlockBy, bool Transposed, VLType vlt = VLType::None, typename TOut , typename TIn > | |
void | Transform (TOut *out, const TIn *const in, const int stride, const int k0, const int kmax, const int x0, const int xmax) |
template<typename T > | |
T | iceildiv (const T a, const T b) |
template<typename T > | |
T | roundup (const T a, const T b) |
bool | quant_no_left_shift (const Requantize32 &qp) |
bool | quant_hybrid_symmetric (const Requantize32 &qp) |
bool | quant_hybrid_asymmetric (const Requantize32 &qp) |
Variables | |
std::mutex | report_mutex |
using bfloat16 = arm_compute::bfloat16 |
Definition at line 30 of file bfloat.hpp.
using GemmInterleavedNoMerge = GemmInterleaved<strategy, To, Tr, OutputStage, false> |
Definition at line 1049 of file gemm_interleaved.hpp.
using GemmInterleavedPretransposedNoMergeQuantizedInline = GemmInterleaved<strategy, To, Tr, Requantize32, false> |
Definition at line 1052 of file gemm_interleaved.hpp.
using GemmInterleavedQuantized = GemmInterleaved<strategy, To, Tr, Requantize32> |
Definition at line 1055 of file gemm_interleaved.hpp.
|
inline |
Definition at line 40 of file bias_adder.hpp.
References bias_adder(), caffe_mnist_image_extractor::cols, tf_frozen_model_extractor::None, and caffe_mnist_image_extractor::rows.
|
inline |
Definition at line 31 of file bias_adder.hpp.
References caffe_mnist_image_extractor::cols, and caffe_mnist_image_extractor::rows.
Referenced by activator(), and GemmHybrid< strategy, To, Tr >::execute().
void arm_gemm::compute_col_sums | ( | const Requantize32 & | qp, |
unsigned int | width, | ||
unsigned int | height, | ||
const T * | input, | ||
unsigned int | in_stride, | ||
int32_t * | col_bias, | ||
unsigned int | depth, | ||
unsigned int | multi, | ||
unsigned int | first_col | ||
) |
Referenced by GemmHybridQuantizedInline< strategy, To, Tr >::pretranspose_B_array(), GemmHybridQuantized< strategy, To, Tr >::pretranspose_B_array(), GemmHybridIndirect< strategy, To, Tr, OutputStage, SeparateQuantize >::pretranspose_B_array(), and GemmInterleaved< strategy, To, Tr, OutputStage, MergeStep, ForceThreadColumns >::pretranspose_B_array().
void arm_gemm::compute_row_sums | ( | const Requantize32 & | qp, |
unsigned int | width, | ||
unsigned int | height, | ||
const T * | input, | ||
unsigned int | in_stride, | ||
int32_t * | row_bias | ||
) |
Referenced by GemmHybridQuantized< strategy, To, Tr >::execute().
void ConvolutionInterleave | ( | TOut * | out, |
const TIn * | in, | ||
size_t | in_stride, | ||
const convolver< TIn > & | conv, | ||
const unsigned int | rounded_stringlen, | ||
const unsigned int | y0, | ||
const unsigned int | ymax, | ||
const unsigned int | k0, | ||
const unsigned int | kmax, | ||
bool | integrate_sums, | ||
const int32_t | row_sum_multiplier | ||
) |
Definition at line 224 of file interleave_indirect.cpp.
References offset(), convolver< T >::process_columns(), and SVE.
bool arm_gemm::find_implementation | ( | const GemmArgs & | args, |
const OutputStage & | os, | ||
const GemmImplementation< Top, Tret, OutputStage > *& | impl | ||
) |
Definition at line 166 of file gemm_implementation.hpp.
References GemmTuner::args, GemmImplementation< Top, Tret, OutputStage >::do_cycle_estimate(), and GemmImplementation< Top, Tret, OutputStage >::method.
Referenced by get_compatible_kernels().
|
inline |
Definition at line 123 of file interleave_indirect.cpp.
References SVE.
UniqueGemmCommon<Top, Tret> arm_gemm::gemm | ( | const GemmArgs & | args, |
const OutputStage & | os | ||
) |
Definition at line 239 of file gemm_implementation.hpp.
References GemmTuner::args, and GemmImplementation< Top, Tret, OutputStage >::do_instantiate().
template UniqueGemmCommon<bfloat16, float> arm_gemm::gemm< bfloat16, float, Nothing > | ( | const GemmArgs & | args, |
const Nothing & | |||
) |
template UniqueGemmCommon<float, float> arm_gemm::gemm< float, float, Nothing > | ( | const GemmArgs & | args, |
const Nothing & | |||
) |
const GemmImplementation<Top, Tret, OutputStage>* arm_gemm::gemm_implementation_list | ( | ) |
const GemmImplementation<bfloat16, float>* arm_gemm::gemm_implementation_list< bfloat16, float > | ( | ) |
Definition at line 122 of file gemm_bf16.cpp.
const GemmImplementation<float, float>* arm_gemm::gemm_implementation_list< float, float > | ( | ) |
Definition at line 189 of file gemm_fp32.cpp.
std::vector<KernelDescription> arm_gemm::get_compatible_kernels | ( | const GemmArgs & | args, |
const OutputStage & | os | ||
) |
Definition at line 216 of file gemm_implementation.hpp.
References GemmTuner::args, GemmImplementation< Top, Tret, OutputStage >::do_cycle_estimate(), find_implementation(), and GemmImplementation< Top, Tret, OutputStage >::method.
template std::vector<KernelDescription> arm_gemm::get_compatible_kernels< bfloat16, float, Nothing > | ( | const GemmArgs & | args, |
const Nothing & | |||
) |
template std::vector<KernelDescription> arm_gemm::get_compatible_kernels< float, float, Nothing > | ( | const GemmArgs & | args, |
const Nothing & | |||
) |
KernelDescription arm_gemm::get_gemm_method | ( | const GemmArgs & | args, |
const OutputStage & | os | ||
) |
Definition at line 250 of file gemm_implementation.hpp.
References GemmTuner::args, GemmImplementation< Top, Tret, OutputStage >::method, and GemmImplementation< Top, Tret, OutputStage >::name.
template KernelDescription arm_gemm::get_gemm_method< bfloat16, float, Nothing > | ( | const GemmArgs & | args, |
const Nothing & | |||
) |
template KernelDescription arm_gemm::get_gemm_method< float, float, Nothing > | ( | const GemmArgs & | args, |
const Nothing & | |||
) |
|
inline |
Definition at line 40 of file utils.hpp.
References arm_compute::test::validation::b.
Referenced by NEWinogradConvolutionLayer::configure(), GemmInterleavedPretransposed2d< strategy, To, Tr >::estimate_cycles(), GemmInterleaved< strategy, To, Tr, OutputStage, MergeStep, ForceThreadColumns >::estimate_cycles(), GemvPretransposed< strategy, To, Tr >::execute(), GemmInterleavedPretransposed2d< strategy, To, Tr >::GemmInterleavedPretransposed2d(), GemmInterleavedPretransposed2d< strategy, To, Tr >::get_B_pretransposed_array_size(), GemvPretransposed< strategy, To, Tr >::get_window_size(), GemmInterleaved< strategy, To, Tr, OutputStage, MergeStep, ForceThreadColumns >::get_window_size(), and GemmInterleavedPretransposed2d< strategy, To, Tr >::pretranspose_B_array().
void IndirectInterleave | ( | TOut * | out, |
const TIn *const *const * | ptr, | ||
unsigned int | stringlen, | ||
unsigned int | rounded_stringlen, | ||
const unsigned int | y0, | ||
const unsigned int | ymax, | ||
const unsigned int | k0, | ||
const unsigned int | kmax, | ||
bool | integrate_sums, | ||
const int32_t | row_sum_multiplier | ||
) |
Definition at line 153 of file interleave_indirect.cpp.
References SVE.
void Interleave | ( | TOut * | out, |
const TIn * | in, | ||
size_t | in_stride, | ||
const unsigned int | y0, | ||
const unsigned int | ymax, | ||
const unsigned int | k0, | ||
const unsigned int | kmax, | ||
bool | integrate_sums, | ||
const int32_t | row_sum_multiplier | ||
) |
Definition at line 263 of file interleave_indirect.cpp.
References SVE.
void arm_gemm::interleave_block | ( | TOut *& | out, |
const TIn *const * | in, | ||
size_t | width, | ||
size_t | height, | ||
size_t | row_offset, | ||
bool | first | ||
) |
Definition at line 59 of file interleave_indirect.cpp.
References SVE.
void MergeResults | ( | Tout * | out, |
const Tin * | in, | ||
int | ldc, | ||
int | y0, | ||
int | ymax, | ||
int | x0, | ||
int | xmax, | ||
const Tout * | bias, | ||
Activation | act, | ||
bool | append | ||
) |
Definition at line 39 of file mergeresults.cpp.
References tf_frozen_model_extractor::None.
Referenced by StdTransformsSVE< TOperand, TResult, height, width_vectors, block, mmla, integrate_sums >::Merge().
|
inline |
Definition at line 107 of file utils.hpp.
References quant_no_left_shift().
|
inline |
Definition at line 101 of file utils.hpp.
References quant_no_left_shift().
|
inline |
Definition at line 91 of file utils.hpp.
Referenced by quant_hybrid_asymmetric(), and quant_hybrid_symmetric().
void arm_gemm::requantize_block_32 | ( | const Requantize32 & | qp, |
unsigned int | width, | ||
unsigned int | height, | ||
const Tin * | input, | ||
unsigned int | in_stride, | ||
Tout * | output, | ||
unsigned int | out_stride, | ||
const int32_t * | row_bias, | ||
const int32_t * | col_bias, | ||
unsigned int | start_col | ||
) |
Referenced by GemmHybridQuantized< strategy, To, Tr >::execute().
|
inline |
Definition at line 45 of file utils.hpp.
References arm_compute::test::validation::b.
Referenced by NEWinogradConvolutionLayer::configure(), GemmHybrid< strategy, To, Tr >::estimate_cycles(), GemmInterleavedPretransposed2d< strategy, To, Tr >::estimate_cycles(), GemmInterleaved< strategy, To, Tr, OutputStage, MergeStep, ForceThreadColumns >::estimate_cycles(), GemvPretransposed< strategy, To, Tr >::execute(), GemmHybrid< strategy, To, Tr >::execute(), GemmHybridQuantized< strategy, To, Tr >::execute(), GemmHybridQuantizedInline< strategy, To, Tr >::execute(), GemmHybridIndirect< strategy, To, Tr, OutputStage, SeparateQuantize >::execute(), GemmInterleaved< strategy, To, Tr, OutputStage, MergeStep, ForceThreadColumns >::execute(), GemmHybrid< strategy, To, Tr >::get_B_pretransposed_array_size(), GemmHybridQuantizedInline< strategy, To, Tr >::get_B_pretransposed_array_size(), GemmHybridQuantized< strategy, To, Tr >::get_B_pretransposed_array_size(), GemmHybridIndirect< strategy, To, Tr, OutputStage, SeparateQuantize >::get_B_pretransposed_array_size(), GemmInterleaved< strategy, To, Tr, OutputStage, MergeStep, ForceThreadColumns >::get_B_pretransposed_array_size(), GemmHybrid< strategy, To, Tr >::pretranspose_B_array(), GemmHybridQuantizedInline< strategy, To, Tr >::pretranspose_B_array(), GemmHybridQuantized< strategy, To, Tr >::pretranspose_B_array(), GemmHybridIndirect< strategy, To, Tr, OutputStage, SeparateQuantize >::pretranspose_B_array(), GemmInterleaved< strategy, To, Tr, OutputStage, MergeStep, ForceThreadColumns >::pretranspose_B_array(), and NEWinogradLayerTransformWeightsKernel< T, OutputTileRows, OutputTileCols, KernelRows, KernelCols >::run().
void arm_gemm::row_sums_indirect | ( | unsigned int | num_strings, |
const unsigned int * | string_lengths, | ||
IndirectInputArg< T > | A_arg, | ||
size_t | M, | ||
int32_t * | output_ptr, | ||
const Requantize32 * | qp | ||
) |
void arm_gemm::Transform | ( | TOut * | out, |
const TIn *const | in, | ||
const int | stride, | ||
const int | k0, | ||
const int | kmax, | ||
const int | x0, | ||
const int | xmax | ||
) |
Definition at line 109 of file transform.hpp.