24.02.1
|
#include <algorithm>
#include <cassert>
#include "arm_gemm.hpp"
#include "bfloat.hpp"
#include "convolver.hpp"
#include "kernel_weight_format.hpp"
#include "kernel_traits.hpp"
#include "mergeresults.hpp"
#include "performance_parameters.hpp"
#include "quantized.hpp"
#include "transform.hpp"
#include "utils.hpp"
Go to the source code of this file.
Data Structures | |
class | GemmInterleaved< strategy, To, Tr, OutputStage, MergeStep, FixedFormat, ForceThreadColumns, ForceFloatAccumulate > |
Namespaces | |
arm_gemm | |
Macros | |
#define | ALLOC_ROUND 64 |
#define | ROUND_UP(x) ((((x) + ALLOC_ROUND-1) / ALLOC_ROUND) * ALLOC_ROUND) |
Typedefs | |
template<typename strategy , typename To , typename Tr , typename OutputStage = Nothing> | |
using | GemmInterleavedNoMerge = GemmInterleaved< strategy, To, Tr, OutputStage, false > |
template<typename strategy , typename To , typename Tr , typename OutputStage = Nothing> | |
using | GemmInterleavedFixedFormat = GemmInterleaved< strategy, To, Tr, OutputStage, true, true > |
template<typename strategy , typename To , typename Tr > | |
using | GemmInterleavedPretransposedNoMergeQuantizedInline = GemmInterleaved< strategy, To, Tr, Requantize32, false > |
template<typename strategy , typename To , typename Tr > | |
using | GemmInterleavedQuantized = GemmInterleaved< strategy, To, Tr, Requantize32 > |
#define ALLOC_ROUND 64 |
Definition at line 47 of file gemm_interleaved.hpp.
#define ROUND_UP | ( | x | ) | ((((x) + ALLOC_ROUND-1) / ALLOC_ROUND) * ALLOC_ROUND) |
Definition at line 48 of file gemm_interleaved.hpp.
decltype(strategy::transforms) typedef type |
Definition at line 261 of file gemm_interleaved.hpp.
Referenced by AclTensorImport(), GpuKernelComponentGroup::add_component(), acl::detail::as_cenum(), arm_compute::cpu::bilinear_neon_scale(), CommonOptions::CommonOptions(), CLNormalizationLayerKernel::configure(), arm_compute::detail::convolve_3x3(), arm_compute::cpu::kernels::convolve_nchw(), arm_compute::cpu::kernels::convolve_nhwc(), CLFunctionFactory::create(), NEFunctionFactory::create(), SchedulerFactory::create(), ImageLoaderFactory::create(), arm_compute::cpu::directconv3d_float_neon_ndhwc(), arm_compute::cpu::directconv3d_quantized_neon_ndhwc(), arm_compute::cpu::elementwise_arithm_op(), arm_compute::cpu::elementwise_arithm_op_broadcast(), arm_compute::cpu::elementwise_arithmetic_op(), arm_compute::cpu::elementwise_comparison_op(), arm_compute::cpu::elementwise_comparison_quantized_op(), GemmInterleaved< strategy, To, Tr, OutputStage, MergeStep, FixedFormat, ForceThreadColumns, ForceFloatAccumulate >::execute(), arm_compute::test::validation::utils::fill(), AssetsLibrary::fill_boxes(), arm_compute::utils::fill_random_tensor(), arm_compute::test::validation::reference::gemmlowp_matrix_multiply_core(), arm_compute::utils::get_image_type_from_file(), Framework::get_profiler(), CpuTensor::import(), ClTensor::import(), Tensor::import(), TensorInfo::init(), TensorInfo::init_auto_padding(), Graph::nodes(), arm_compute::test::validation::reference::normalization_layer(), compare< AbsoluteTolerance< U > >::operator bool(), compare< RelativeTolerance< U > >::operator bool(), arm_compute::test::framework::operator&(), arm_compute::test::framework::operator|(), arm_compute::test::framework::operator|=(), arm_compute::test::validation::reference::pooling_layer_internal(), PassManager::run_type(), arm_compute::utils::save_to_npy(), arm_compute::detail::single_convolve_3x3_dilation(), arm_compute::string_from_norm_type(), arm_compute::string_from_pooling_type(), arm_compute::cpu::sve2_softmax_logits_1d_quantized(), arm_compute::test::sync_if_necessary(), arm_compute::to_string(), NENodeValidator::validate(), CLNodeValidator::validate(), and arm_compute::test::validation::validate_wrap().