Compute Library
 20.08
NEGEMMLowpMatrixMultiplyCore.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H
25 #define ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H
26 
27 #include "NEActivationLayer.h"
42 
43 #include <memory>
44 
45 namespace arm_compute
46 {
47 class ITensor;
48 
49 /** Basic function to execute GEMMLowpMatrixMultiplyCore on NEON. This function calls the following NEON kernels if the DOT product instruction is not available:
50  *
51  * -# @ref NEGEMMInterleave4x4Kernel
52  * -# @ref NEGEMMTranspose1xWKernel
53  * -# @ref NEGEMMLowpMatrixMultiplyKernel
54  * -# @ref NEGEMMLowpOffsetContributionKernel
55  * -# @ref NEActivationLayer
56  *
57  * otherwise if the DOT product instruction is available:
58  *
59  * -# @ref NEGEMMLowpOffsetContributionKernel
60  *
61 */
63 {
64 public:
65  /** Constructor */
66  NEGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager = nullptr, IWeightsManager *weights_manager = nullptr);
67  /** Prevent instances of this class from being copied (As this class contains pointers) */
69  /** Default move constructor */
71  /** Prevent instances of this class from being copied (As this class contains pointers) */
73  /** Default move assignment operator */
75  /** Initialise the kernel's inputs, output
76  *
77  * @note GEMM_LOWP: low precision GEMM kernel
78  * This kernel performs the following computations:
79  *
80  * -# Convert a values from QASYMM8 to int32 and add a_offset to each of them.
81  * -# Convert b values from QASYMM8 to int32 add b_offset to each of them.
82  * -# Compute the matrix product of the resulting a * b in int32.
83  *
84  * @note The @p output type is S32 if @p gemm_info.type == GEMMLowpOutputStageType::NONE. It is QASYMM8/QASYMM8_SIGNED otherwise
85  *
86  * @param[in] a First input tensor (Matrix A). Data type supported: QASYMM8/QASYMM8_SIGNED.
87  * @param[in] b Second input tensor (Matrix B). Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL.
88  * @param[in] c Third input tensor (Matrix C). It can be a nullptr. Data type supported: S32
89  * @param[out] output Output tensor. Data type supported: Data type supported: S32/QASYMM8/QASYMM8_SIGNED
90  * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
91  * if the reshape of matrix B should be executed only for the first run
92  */
93  void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output, const GEMMInfo &gemm_info = GEMMInfo());
94  /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixMultiplyCore
95  *
96  * @note The @p output type is S32 if @p gemm_info.type == GEMMLowpOutputStageType::NONE. It is QASYMM8/QASYMM8_SIGNED otherwise
97  *
98  * @param[in] a First input tensor info (Matrix A). Data type supported: QASYMM8/QASYMM8_SIGNED.
99  * @param[in] b Second input tensor info (Matrix B). Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL.
100  * @param[in] c Third input tensor info (Matrix C). It can be a nullptr. Data type supported: S32
101  * @param[in] output Output tensor info. Data type supported: Data type supported: S32/QASYMM8/QASYMM8_SIGNED
102  * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
103  * if the reshape of matrix B should be executed only for the first run
104  *
105  * @return a status
106  */
107  static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info = GEMMInfo());
108 
109  // Inherited methods overridden
110  void run() override;
111  void prepare() override;
112 
113 private:
114  MemoryGroup _memory_group;
115  IWeightsManager *_weights_manager;
116  NEGEMMAssemblyDispatch _asm_glue;
118  NEGEMMInterleave4x4Kernel _mtx_a_reshape_kernel;
119  NEGEMMTranspose1xWKernel _mtx_b_reshape_kernel;
120  NEGEMMLowpMatrixAReductionKernel _mtx_a_reduction_kernel;
121  NEGEMMLowpMatrixBReductionKernel _mtx_b_reduction_kernel;
122  NEGEMMLowpOffsetContributionKernel _offset_contribution_kernel;
123  NEGEMMLowpOffsetContributionOutputStageKernel _offset_contribution_output_stage_kernel;
124  NEActivationLayer _activation_func;
125  NEConvertQuantizedSignednessKernel _convert_to_signed_asymm;
126  NEConvertQuantizedSignednessKernel _convert_from_signed_asymm;
127 
128  Tensor _vector_sum_col;
129  Tensor _vector_sum_row;
130  Tensor _tmp_a;
131  Tensor _tmp_b;
132  Tensor _mm_result_s32;
133  Tensor _signed_a;
134  Tensor _signed_output;
135  const ITensor *_original_b;
136  int32_t _a_offset;
137  int32_t _b_offset;
138 
139  bool _run_vector_matrix_multiplication;
140  bool _assembly_path;
141  bool _fused_assembly_path;
142  bool _reshape_b_only_on_first_run;
143  bool _is_prepared;
144  bool _fuse_output_stage;
145  bool _run_activation;
146  bool _flip_signedness;
147 };
148 } // namespace arm_compute
149 #endif /*ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H */
void prepare() override
Prepare the function for executing.
Base class for all functions.
Definition: IFunction.h:30
SimpleTensor< float > b
Definition: DFT.cpp:157
NEGEMMLowpMatrixMultiplyCore(std::shared_ptr< IMemoryManager > memory_manager=nullptr, IWeightsManager *weights_manager=nullptr)
Constructor.
NEON kernel used to compute the row-vectors of sums of all the entries in each row of Matrix A.
Store the tensor's metadata.
Definition: ITensorInfo.h:40
Status class.
Definition: Error.h:52
Interface for NEON tensor.
Definition: ITensor.h:36
Copyright (c) 2017-2020 Arm Limited.
NEON kernel to interleave the elements of a matrix.
void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs, output.
void run() override
Run the kernels contained in the function.
NEON kernel which transposes the elements of a matrix in chunks of 1xW, where W is equal to (16 / ele...
NEON kernel used to compute the row-vectors of sums of all the entries in each column of Matrix B.
Basic implementation of the tensor interface.
Definition: Tensor.h:37
Weights manager interface to handle weights transformations.
NEON kernel used to add the offset contribution after NEGEMMLowpMatrixMultiplyKernel.
Basic function to run NEActivationLayerKernel.
NEON kernel used to add the offset contribution and perform the output stage after NEGEMMLowpMatrixMu...
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of NEGEMMLowpMatrixMultiply...
GEMM information class.
Definition: Types.h:1932
NEGEMMLowpMatrixMultiplyCore & operator=(const NEGEMMLowpMatrixMultiplyCore &)=delete
Prevent instances of this class from being copied (As this class contains pointers)
Basic function to execute GEMMLowpMatrixMultiplyCore on NEON.
NEON kernel to convert asymmetric signed to asymmetric signed and vice-versa.