24.02.1
|
Basic function to execute GEMMLowpMatrixMultiplyCore on OpenCL. More...
#include <ClGemmLowpMatrixMultiplyCore.h>
Public Member Functions | |
ClGemmLowpMatrixMultiplyCore () | |
~ClGemmLowpMatrixMultiplyCore () | |
void | configure (const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, const GEMMInfo &gemm_info=GEMMInfo()) |
Initialise the kernel's inputs, output. More... | |
void | run (ITensorPack &tensors) override |
Run the kernels contained in the function. More... | |
void | prepare (ITensorPack &constants) override |
Prepare the function for executing. More... | |
experimental::MemoryRequirements | workspace () const override |
Return the memory requirements required by the workspace. More... | |
Public Member Functions inherited from ICLOperator | |
ICLOperator (IRuntimeContext *ctx=nullptr) | |
Constructor. More... | |
ICLOperator (const ICLOperator &)=delete | |
Prevent instances of this class from being copied (As this class contains pointers) More... | |
ICLOperator (ICLOperator &&)=default | |
Default move constructor. More... | |
ICLOperator & | operator= (const ICLOperator &)=delete |
Prevent instances of this class from being copied (As this class contains pointers) More... | |
ICLOperator & | operator= (ICLOperator &&)=default |
Default move assignment operator. More... | |
Public Member Functions inherited from IOperator | |
virtual | ~IOperator ()=default |
Destructor. More... | |
Static Public Member Functions | |
static Status | validate (const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info=GEMMInfo()) |
Static function to check if given info will lead to a valid configuration. More... | |
Basic function to execute GEMMLowpMatrixMultiplyCore on OpenCL.
Definition at line 53 of file ClGemmLowpMatrixMultiplyCore.h.
Definition at line 288 of file ClGemmLowpMatrixMultiplyCore.cpp.
|
default |
void configure | ( | const CLCompileContext & | compile_context, |
ITensorInfo * | a, | ||
ITensorInfo * | b, | ||
ITensorInfo * | c, | ||
ITensorInfo * | output, | ||
const GEMMInfo & | gemm_info = GEMMInfo() |
||
) |
Initialise the kernel's inputs, output.
Valid data layouts:
Valid data type configurations:
src0 | src1 | src2 | dst |
---|---|---|---|
QASYMM8 | QASYMM8 | S32 | QASYMM8 |
QASYMM8 | QSYMM8_PER_CHANNEL | S32 | QASYMM8 |
QASYMM8 | QSYMM8 | S32 | QASYMM8 |
QASYMM8 | QASYMM8 | S32 | S32 |
QASYMM8 | QSYMM8_PER_CHANNEL | S32 | S32 |
QASYMM8 | QSYMM8 | S32 | S32 |
QASYMM8_SIGNED | QASYMM8_SIGNED | S32 | QASYMM8_SIGNED |
QASYMM8_SIGNED | QSYMM8_PER_CHANNEL | S32 | QASYMM8_SIGNED |
QASYMM8_SIGNED | QSYMM8 | S32 | QASYMM8_SIGNED |
QASYMM8_SIGNED | QASYMM8_SIGNED | S32 | S32 |
QASYMM8_SIGNED | QSYMM8_PER_CHANNEL | S32 | S32 |
QASYMM8_SIGNED | QSYMM8 | S32 | S32 |
[in] | compile_context | The compile context to be used. |
[in] | a | First input tensor (Matrix A). Data type supported: QASYMM8/QASYMM8_SIGNED. |
[in] | b | Second input tensor (Matrix B). Data type supported: same as a |
[in] | c | Third input tensor (Matrix C). It can be a nullptr. Data type supported: S32 |
[out] | output | Output tensor. Data type supported: S32 or QASYMM8/QASYMM8_SIGNED if gemm_info.gemmlowp_output_stage != NONE |
[in] | gemm_info | (Optional) Specifies if the matrix A and/or matrix B have been reshaped and if the reshape of matrix B should be executed only for the first run |
Definition at line 304 of file ClGemmLowpMatrixMultiplyCore.cpp.
References GEMMKernelInfo::a_offset, ARM_COMPUTE_ERROR_ON_NULLPTR, ARM_COMPUTE_ERROR_THROW_ON, ARM_COMPUTE_LOG_PARAMS, arm_compute::test::validation::b, GEMMKernelInfo::b_offset, arm_compute::misc::shape_calculator::compute_reductionA_shape(), arm_compute::misc::shape_calculator::compute_reductionB_shape(), ITensorInfo::data_type(), GEMMKernelInfo::depth_output_gemm3d, GEMMInfo::depth_output_gemm3d(), ITensorInfo::dimension(), GEMMLowpOutputStageInfo::gemmlowp_multipliers, GEMMInfo::gemmlowp_output_stage(), CLScheduler::get(), arm_compute::is_data_type_quantized_per_channel(), arm_compute::is_data_type_quantized_symmetric(), GEMMLowpOutputStageInfo::is_quantized_per_channel, GEMMKernelInfo::k, GEMMKernelInfo::lhs_info, GEMMKernelInfo::m, GEMMKernelInfo::n, arm_compute::NONE, UniformQuantizationInfo::offset, arm_compute::offset_int_vec(), GEMMLowpOutputStageInfo::output_data_type, GEMMKernelInfo::output_stage, arm_compute::experimental::Prepare, arm_compute::QASYMM8, ITensorInfo::quantization_info(), arm_compute::QUANTIZE_DOWN_FIXEDPOINT, GEMMKernelInfo::reinterpret_input_as_3d, GEMMInfo::reinterpret_input_as_3d(), GEMMInfo::reshape_b_only_on_first_run(), arm_compute::RESHAPED_ONLY_RHS, arm_compute::RESHAPED_ONLY_RHS_MMUL, GEMMKernelInfo::rhs_info, arm_compute::S32, TensorInfo::set_data_type(), CLScheduler::target(), TensorInfo::total_size(), GEMMLowpOutputStageInfo::type, QuantizationInfo::uniform(), ClGemmLowpMatrixMultiplyCore::validate(), and arm_compute::WRAP.
|
overridevirtual |
Prepare the function for executing.
Any one off pre-processing step required by the function is handled here
[in] | constants | Vector that contains the constants tensors. |
Reimplemented from ICLOperator.
Definition at line 877 of file ClGemmLowpMatrixMultiplyCore.cpp.
References arm_compute::ACL_DST, arm_compute::ACL_SRC, arm_compute::ACL_SRC_1, ARM_COMPUTE_ERROR_ON_NULLPTR, arm_compute::test::validation::b, CLScheduler::enqueue_op(), GEMMLowpOutputStageInfo::gemmlowp_multipliers, GEMMInfo::gemmlowp_output_stage(), GEMMLowpOutputStageInfo::gemmlowp_shifts, CLScheduler::get(), CLAuxTensorHandler::get(), ITensorPack::get_const_tensor(), ITensor::info(), GEMMLowpOutputStageInfo::is_quantized_per_channel, ICLTensor::map(), arm_compute::offset_int_vec(), ITensor::ptr_to_element(), CLScheduler::queue(), ITensorInfo::total_size(), and ICLTensor::unmap().
Referenced by ClGemmLowpMatrixMultiplyCore::run().
|
overridevirtual |
Run the kernels contained in the function.
[in] | tensors | Vector that contains the tensors to operate on. |
Reimplemented from ICLOperator.
Definition at line 759 of file ClGemmLowpMatrixMultiplyCore.cpp.
References arm_compute::ACL_BIAS, arm_compute::ACL_DST, arm_compute::ACL_MULTIPLIERS, arm_compute::ACL_SHIFTS, arm_compute::ACL_SRC, arm_compute::ACL_SRC_0, arm_compute::ACL_SRC_1, arm_compute::ACL_SRC_2, arm_compute::ACL_SRC_DST, arm_compute::ACL_VEC_COL_SUM, arm_compute::ACL_VEC_ROW_SUM, ARM_COMPUTE_ERROR, ARM_COMPUTE_ERROR_ON_NULLPTR, arm_compute::test::validation::b, arm_compute::test::validation::dst, CLScheduler::enqueue_op(), CLScheduler::get(), CLAuxTensorHandler::get(), ITensorPack::get_const_tensor(), ITensorPack::get_tensor(), arm_compute::offset_int_vec(), ClGemmLowpMatrixMultiplyCore::prepare(), arm_compute::RESHAPED_ONLY_RHS, and arm_compute::RESHAPED_ONLY_RHS_MMUL.
|
static |
Static function to check if given info will lead to a valid configuration.
Similar to ClGemmLowpMatrixMultiplyCore::configure()
Definition at line 557 of file ClGemmLowpMatrixMultiplyCore.cpp.
References GEMMKernelInfo::a_offset, ARM_COMPUTE_ERROR_ON_NULLPTR, ARM_COMPUTE_RETURN_ERROR_ON, ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN, ARM_COMPUTE_RETURN_ERROR_ON_MSG, ARM_COMPUTE_RETURN_ON_ERROR, arm_compute::auto_init_if_empty(), arm_compute::test::validation::b, GEMMKernelInfo::b_offset, ICloneable< T >::clone(), arm_compute::misc::shape_calculator::compute_mm_shape(), arm_compute::misc::shape_calculator::compute_reductionA_shape(), arm_compute::misc::shape_calculator::compute_reductionB_shape(), arm_compute::misc::shape_calculator::compute_rhs_reshaped_shape(), ITensorInfo::data_type(), GEMMKernelInfo::depth_output_gemm3d, GEMMInfo::depth_output_gemm3d(), ITensorInfo::dimension(), GEMMLowpOutputStageInfo::gemmlowp_multipliers, GEMMInfo::gemmlowp_output_stage(), CLScheduler::get(), GEMMInfo::is_a_reshaped(), GEMMInfo::is_b_reshaped(), arm_compute::is_data_type_quantized_asymmetric(), arm_compute::is_data_type_quantized_per_channel(), arm_compute::is_data_type_quantized_symmetric(), GEMMLowpOutputStageInfo::is_quantized_per_channel, GEMMKernelInfo::k, GEMMKernelInfo::lhs_info, GEMMKernelInfo::m, GEMMKernelInfo::n, arm_compute::NONE, UniformQuantizationInfo::offset, GEMMLowpOutputStageInfo::output_data_type, GEMMKernelInfo::output_stage, arm_compute::QASYMM8, arm_compute::QASYMM8_SIGNED, arm_compute::QSYMM8, arm_compute::QSYMM8_PER_CHANNEL, ITensorInfo::quantization_info(), arm_compute::QUANTIZE_DOWN_FIXEDPOINT, GEMMKernelInfo::reinterpret_input_as_3d, GEMMInfo::reinterpret_input_as_3d(), GEMMInfo::reshape_b_only_on_first_run(), GEMMKernelInfo::rhs_info, arm_compute::S32, arm_compute::cl_gemm::auto_heuristics::select_default_gemm_config_native(), arm_compute::cl_gemm::auto_heuristics::select_default_gemm_config_reshaped_only_rhs(), CLScheduler::target(), ITensorInfo::total_size(), GEMMLowpOutputStageInfo::type, QuantizationInfo::uniform(), ClGemmLowpMatrixMultiplyNativeKernel::validate(), ClCastKernel::validate(), ClGemmReshapeRhsMatrixKernel::validate(), ClGemmLowpOffsetContributionKernel::validate(), ClGemmLowpOffsetContributionOutputStageKernel::validate(), ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel::validate(), ClGemmLowpMatrixAReductionKernel::validate(), ClGemmLowpMatrixBReductionKernel::validate(), arm_compute::test::validation::weights_info, and arm_compute::WRAP.
Referenced by ClGemmLowpMatrixMultiplyCore::configure().
|
overridevirtual |
Return the memory requirements required by the workspace.
Reimplemented from ICLOperator.
Definition at line 945 of file ClGemmLowpMatrixMultiplyCore.cpp.