21.02
|
Basic function to execute GEMMLowpMatrixMultiplyCore on Neon. More...
#include <NEGEMMLowpMatrixMultiplyCore.h>
Public Member Functions | |
NEGEMMLowpMatrixMultiplyCore (std::shared_ptr< IMemoryManager > memory_manager=nullptr, IWeightsManager *weights_manager=nullptr) | |
Constructor. More... | |
NEGEMMLowpMatrixMultiplyCore (const NEGEMMLowpMatrixMultiplyCore &)=delete | |
Prevent instances of this class from being copied (As this class contains pointers) More... | |
NEGEMMLowpMatrixMultiplyCore (NEGEMMLowpMatrixMultiplyCore &&)=default | |
Default move constructor. More... | |
NEGEMMLowpMatrixMultiplyCore & | operator= (const NEGEMMLowpMatrixMultiplyCore &)=delete |
Prevent instances of this class from being copied (As this class contains pointers) More... | |
NEGEMMLowpMatrixMultiplyCore & | operator= (NEGEMMLowpMatrixMultiplyCore &&)=default |
Default move assignment operator. More... | |
~NEGEMMLowpMatrixMultiplyCore () | |
Default destructor. More... | |
void | configure (const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output, const GEMMInfo &gemm_info=GEMMInfo()) |
Initialise the kernel's inputs, output. More... | |
void | run () override |
Run the kernels contained in the function. More... | |
void | prepare () override |
Prepare the function for executing. More... | |
Public Member Functions inherited from IFunction | |
virtual | ~IFunction ()=default |
Destructor. More... | |
Static Public Member Functions | |
static Status | validate (const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info=GEMMInfo()) |
Static function to check if given info will lead to a valid configuration of NEGEMMLowpMatrixMultiplyCore. More... | |
Basic function to execute GEMMLowpMatrixMultiplyCore on Neon.
This function calls the following Neon kernels if the DOT product instruction is not available:
otherwise if the DOT product instruction is available:
Definition at line 63 of file NEGEMMLowpMatrixMultiplyCore.h.
NEGEMMLowpMatrixMultiplyCore | ( | std::shared_ptr< IMemoryManager > | memory_manager = nullptr , |
IWeightsManager * | weights_manager = nullptr |
||
) |
Constructor.
Definition at line 68 of file NEGEMMLowpMatrixMultiplyCore.cpp.
|
delete |
Prevent instances of this class from being copied (As this class contains pointers)
|
default |
Default move constructor.
|
default |
Default destructor.
void configure | ( | const ITensor * | a, |
const ITensor * | b, | ||
const ITensor * | c, | ||
ITensor * | output, | ||
const GEMMInfo & | gemm_info = GEMMInfo() |
||
) |
Initialise the kernel's inputs, output.
output
type is S32 if gemm_info.type
== GEMMLowpOutputStageType::NONE. It is QASYMM8/QASYMM8_SIGNED otherwise[in] | a | First input tensor (Matrix A). Data type supported: QASYMM8/QASYMM8_SIGNED. |
[in] | b | Second input tensor (Matrix B). Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL. |
[in] | c | Third input tensor (Matrix C). It can be a nullptr. Data type supported: S32 |
[out] | output | Output tensor. Data type supported: Data type supported: S32/QASYMM8/QASYMM8_SIGNED |
[in] | gemm_info | (Optional) Specifies if the matrix A and/or matrix B have been reshaped and if the reshape of matrix B should be executed only for the first run |
Definition at line 77 of file NEGEMMLowpMatrixMultiplyCore.cpp.
References GEMMInfo::activation_info(), TensorAllocator::allocate(), Tensor::allocator(), ARM_COMPUTE_ERROR, ARM_COMPUTE_ERROR_ON_NULLPTR, ARM_COMPUTE_ERROR_THROW_ON, ARM_COMPUTE_UNUSED, arm_compute::test::validation::b, ICloneable< T >::clone(), arm_compute::misc::shape_calculator::compute_interleaved_shape(), arm_compute::misc::shape_calculator::compute_reductionA_shape(), arm_compute::misc::shape_calculator::compute_reductionB_shape(), arm_compute::misc::shape_calculator::compute_transpose1xW_shape(), NEActivationLayer::configure(), ITensorInfo::data_type(), ITensorInfo::dimension(), dt, ActivationLayerInfo::enabled(), GEMMLowpOutputStageInfo::gemmlowp_max_bound, GEMMLowpOutputStageInfo::gemmlowp_min_bound, GEMMLowpOutputStageInfo::gemmlowp_offset, GEMMInfo::gemmlowp_output_stage(), ITensor::info(), Tensor::info(), TensorAllocator::init(), NEGEMMAssemblyDispatch::is_activation_supported(), arm_compute::is_data_type_quantized_asymmetric(), arm_compute::is_data_type_quantized_per_channel(), MemoryGroup::manage(), arm_compute::NONE, UniformQuantizationInfo::offset, arm_compute::QASYMM8, arm_compute::QASYMM8_SIGNED, ITensorInfo::quantization_info(), arm_compute::QUANTIZE_DOWN_FIXEDPOINT, GEMMInfo::reshape_b_only_on_first_run(), arm_compute::S32, arm_compute::S8, UniformQuantizationInfo::scale, GEMMInfo::set_gemmlowp_output_stage(), ITensorInfo::tensor_shape(), GEMMLowpOutputStageInfo::type, arm_compute::U8, QuantizationInfo::uniform(), and NEGEMMLowpMatrixMultiplyCore::validate().
Referenced by NELSTMLayerQuantized::configure(), main(), and NEQLSTMLayer::NEQLSTMLayer().
|
delete |
Prevent instances of this class from being copied (As this class contains pointers)
|
default |
Default move assignment operator.
|
overridevirtual |
Prepare the function for executing.
Any one off pre-processing step required by the function is handled here
Reimplemented from IFunction.
Definition at line 573 of file NEGEMMLowpMatrixMultiplyCore.cpp.
References TensorAllocator::allocate(), Tensor::allocator(), IWeightsManager::are_weights_managed(), ARM_COMPUTE_ERROR_ON, Window::DimX, Window::DimY, Scheduler::get(), ITensor::is_used(), ITensor::mark_as_unused(), and IScheduler::schedule().
Referenced by NEGEMMConvolutionLayer::prepare(), and NEGEMMLowpMatrixMultiplyCore::run().
|
overridevirtual |
Run the kernels contained in the function.
For Neon kernels:
For OpenCL kernels:
Implements IFunction.
Definition at line 501 of file NEGEMMLowpMatrixMultiplyCore.cpp.
References Window::DimX, Window::DimY, Scheduler::get(), NEGEMMLowpMatrixMultiplyCore::prepare(), NEActivationLayer::run(), and IScheduler::schedule().
Referenced by main(), NELSTMLayerQuantized::run(), NEFullyConnectedLayer::run(), NEQLSTMLayer::run(), and NEGEMMConvolutionLayer::run().
|
static |
Static function to check if given info will lead to a valid configuration of NEGEMMLowpMatrixMultiplyCore.
output
type is S32 if gemm_info.type
== GEMMLowpOutputStageType::NONE. It is QASYMM8/QASYMM8_SIGNED otherwise[in] | a | First input tensor info (Matrix A). Data type supported: QASYMM8/QASYMM8_SIGNED. |
[in] | b | Second input tensor info (Matrix B). Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL. |
[in] | c | Third input tensor info (Matrix C). It can be a nullptr. Data type supported: S32 |
[in] | output | Output tensor info. Data type supported: Data type supported: S32/QASYMM8/QASYMM8_SIGNED |
[in] | gemm_info | (Optional) Specifies if the matrix A and/or matrix B have been reshaped and if the reshape of matrix B should be executed only for the first run |
Definition at line 305 of file NEGEMMLowpMatrixMultiplyCore.cpp.
References GEMMInfo::activation_info(), ARM_COMPUTE_RETURN_ERROR_ON, ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN, ARM_COMPUTE_RETURN_ERROR_ON_MSG, ARM_COMPUTE_RETURN_ON_ERROR, arm_compute::auto_init_if_empty(), arm_compute::test::validation::b, ICloneable< T >::clone(), arm_compute::misc::shape_calculator::compute_reductionA_shape(), arm_compute::misc::shape_calculator::compute_reductionB_shape(), ITensorInfo::data_type(), ITensorInfo::dimension(), dt, ActivationLayerInfo::enabled(), GEMMLowpOutputStageInfo::gemmlowp_max_bound, GEMMLowpOutputStageInfo::gemmlowp_min_bound, GEMMLowpOutputStageInfo::gemmlowp_offset, GEMMInfo::gemmlowp_output_stage(), GEMMInfo::is_a_reshaped(), GEMMInfo::is_b_reshaped(), arm_compute::is_data_type_quantized_asymmetric(), arm_compute::is_data_type_quantized_per_channel(), arm_compute::NONE, UniformQuantizationInfo::offset, arm_compute::QASYMM8, arm_compute::QASYMM8_SIGNED, arm_compute::QSYMM8, arm_compute::QSYMM8_PER_CHANNEL, ITensorInfo::quantization_info(), arm_compute::QUANTIZE_DOWN_FIXEDPOINT, arm_compute::S32, UniformQuantizationInfo::scale, TensorShape::set(), ITensorInfo::tensor_shape(), GEMMLowpOutputStageInfo::type, QuantizationInfo::uniform(), NEConvertQuantizedSignednessKernel::validate(), NEGEMMLowpMatrixMultiplyKernel::validate(), NEActivationLayer::validate(), NEGEMMInterleave4x4Kernel::validate(), NEGEMMLowpOffsetContributionKernel::validate(), NEGEMMTranspose1xWKernel::validate(), NEGEMMAssemblyDispatch::validate(), NEGEMMLowpOffsetContributionOutputStageKernel::validate(), NEGEMMLowpMatrixAReductionKernel::validate(), and NEGEMMLowpMatrixBReductionKernel::validate().
Referenced by NEGEMMLowpMatrixMultiplyCore::configure(), arm_compute::test::validation::DATA_TEST_CASE(), and NELSTMLayerQuantized::validate().