24 #ifndef ARM_COMPUTE_CL_GEMMLOWP_MATRIXMULTIPLY_CORE_H 25 #define ARM_COMPUTE_CL_GEMMLOWP_MATRIXMULTIPLY_CORE_H 41 class ClGemmLowpMatrixMultiplyNativeKernel;
42 class ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel;
43 class ClGemmReshapeRhsMatrixKernel;
44 class ClGemmLowpMatrixAReductionKernel;
45 class ClGemmLowpMatrixBReductionKernel;
46 class ClGemmLowpOffsetContributionKernel;
47 class ClGemmLowpOffsetContributionOutputStageKernel;
123 std::unique_ptr<kernels::ClCastKernel> _weights_to_qasymm8;
124 std::unique_ptr<kernels::ClGemmLowpMatrixMultiplyNativeKernel> _mm_native_kernel;
125 std::unique_ptr<kernels::ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel> _mm_reshaped_only_rhs_kernel;
126 std::unique_ptr<kernels::ClGemmReshapeRhsMatrixKernel> _mtx_b_reshape_kernel;
127 std::unique_ptr<kernels::ClGemmLowpMatrixAReductionKernel> _mtx_a_reduction_kernel;
128 std::unique_ptr<kernels::ClGemmLowpMatrixBReductionKernel> _mtx_b_reduction_kernel;
129 std::unique_ptr<kernels::ClGemmLowpOffsetContributionKernel> _offset_contribution_kernel;
130 std::unique_ptr<kernels::ClGemmLowpOffsetContributionOutputStageKernel> _offset_contribution_output_stage_kernel;
141 int32_t _a_offset{ 0 };
142 int32_t _b_offset{ 0 };
143 bool _is_gemm_reshaped{
true };
144 bool _reshape_b_only_on_first_run{
false };
145 bool _run_output_stage{
false };
146 bool _convert_to_qasymm8{
false };
147 bool _run_offset_contribution{
false };
148 bool _is_prepared{
false };
Basic function to execute GEMMLowpMatrixMultiplyCore on OpenCL.
Store the tensor's metadata.
Copyright (c) 2017-2021 Arm Limited.
std::vector< MemoryInfo > MemoryRequirements
Store the tensor's metadata.
im2col_func configure(src_target.info(), dst_target.info(), spatial_kernel, conv_info, has_bias)
Basic interface for functions which have a single async CL kernel.
Status validate(const ITensorInfo *scores_in, const ITensorInfo *boxes_in, const ITensorInfo *batch_splits_in, const ITensorInfo *scores_out, const ITensorInfo *boxes_out, const ITensorInfo *classes, const ITensorInfo *batch_splits_out, const ITensorInfo *keeps, const ITensorInfo *keeps_size, const BoxNMSLimitInfo info)