Compute Library
 22.11
CpuGemm.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021-2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_CPU_GEMM_H
25 #define ARM_COMPUTE_CPU_GEMM_H
26 
27 #include "src/cpu/ICpuOperator.h"
28 
31 #include "arm_compute/core/Types.h"
39 
40 #include <memory>
41 
42 namespace arm_compute
43 {
44 namespace cpu
45 {
46 /** Basic function to execute GEMM. This function calls the following kernels:
47  *
48  * If optimized assembly is available:
49  * -# @ref cpu::CpuGemmAssemblyDispatch
50  * -# @ref cpu::CpuActivation (if alpha != 1.0)
51  * Else:
52  * -# @ref cpu::kernels::CpuGemmInterleave4x4Kernel (if the output tensor is a matrix)
53  * -# @ref cpu::kernels::CpuGemmTranspose1xWKernel (if the output tensor is a matrix)
54  * -# @ref cpu::kernels::CpuGemmMatrixMultiplyKernel
55  * In both cases:
56  * -# @ref cpu::kernels::CpuGemmMatrixAdditionKernel (if c != nullptr and beta != 0.0 and is not reshaped once)
57  * Else:
58  * -# @ref cpu::CpuAdd (if c != nullptr and is reshaped once and not optimized assembly in place)
59  *
60  * -# @ref cpu::CpuActivation (if activation is specified in GEMMInfo)
61  */
62 class CpuGemm : public ICpuOperator
63 {
64 public:
65  /** Default constructor */
66  CpuGemm() = default;
67  /** Default destructor */
68  ~CpuGemm() = default;
69  /** Configure operator for a given list of arguments
70  *
71  * Valid data layouts:
72  * - All
73  *
74  * Valid data type configurations:
75  * |src0 |src1 |src2 |dst |
76  * |:------------|:-----------|:---------|:--------------|
77  * |F32 |F32 |F32 |F32 |
78  * |F16 |F16 |F16 |F16 |
79  * |BFLOAT16 |BFLOAT16 |BFLOAT16 |FP32 |
80  *
81  * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C].
82  * @note GEMM: The tensors a, b, c, d must have the same data type. You should not mix data types when calling this function.
83  *
84  * @param[in] a First input tensor info (Matrix A or Vector A). Data type supported: BFLOAT16/F16/F32
85  * @param[in] b Second input tensor info (Matrix B). Data type supported: same as @p a
86  * @param[in] c Third input tensor info (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a
87  * @param[out] d Output tensor info. Data type supported: same as @p a
88  * @param[in] alpha Weight of the matrix product
89  * @param[in] beta Weight of matrix C
90  * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
91  * if the reshape of matrix B should happen only for the first run
92  */
93  void configure(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, ITensorInfo *d,
94  float alpha, float beta, const GEMMInfo &gemm_info = GEMMInfo());
95  /** Static function to check if given info will lead to a valid configuration of @ref CpuGemm.
96  *
97  * Similar to @ref CpuGemm::configure()
98  *
99  * @return a status
100  */
101  static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *d,
102  float alpha, float beta, const GEMMInfo &gemm_info = GEMMInfo());
103 
104  /** Indicates whether or not there is an optimal assembly implementation that can be used to process the given parameters.
105  *
106  * This method has the same use of @ref
107  * NEGEMMConvolutionLayer::has_opt_impl, with the only caveat that
108  * the value of arm_compute::WeightFormat need to be passed via the
109  * parameter gemm_info.
110  */
111  static Status has_opt_impl(arm_compute::WeightFormat &weight_format, const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *d,
112  const GEMMInfo &gemm_info = GEMMInfo());
113 
114  // Inherited methods overridden:
115  void run(ITensorPack &tensors) override;
116  void prepare(ITensorPack &constants) override;
118 
119  /** Indicates if the convolution executes in variable weights mode.
120  *
121  * When ACL executes convolution in variable weights mode, it does
122  * not perform any processing of the weights tensor. Instead, it
123  * utilizes the data as it is given by the user.
124  */
125  bool isVarWeightsKernel() const;
126 
127 private:
128  enum AuxTensorIdx
129  {
130  AsmGemmWorkspace = 0,
131  Pretraspose,
132  InterleavedLHS,
133  TransposedRHS,
134  TempResult,
135  Count
136  };
137 
138  std::unique_ptr<kernels::CpuGemmInterleave4x4Kernel> _interleave_kernel{ nullptr };
139  std::unique_ptr<kernels::CpuGemmTranspose1xWKernel> _transpose_kernel{ nullptr };
140  std::unique_ptr<kernels::CpuGemmMatrixMultiplyKernel> _mm_kernel{ nullptr };
141  std::unique_ptr<CpuGemmAssemblyDispatch> _asm_glue{ nullptr };
142  std::unique_ptr<kernels::CpuGemmMatrixAdditionKernel> _ma_kernel{ nullptr };
143  std::unique_ptr<CpuActivation> _alpha_scale_func{ nullptr };
144  std::unique_ptr<CpuAdd> _add_bias{ nullptr };
145  std::unique_ptr<CpuActivation> _activation_func{ nullptr };
146 
147  TensorInfo _tmp_a{};
148  TensorInfo _tmp_b{};
149  TensorInfo _tmp_d{};
150 
151  bool _run_vector_matrix_multiplication{ false };
152  bool _run_alpha_scale{ false };
153  bool _run_addition{ false };
154  bool _run_bias_addition{ false };
155  bool _run_activation{ false };
156  bool _reshape_b_only_on_first_run{ false };
157  bool _is_prepared{ false };
158 
159  experimental::MemoryRequirements _aux_mem{ Count };
160 };
161 } // namespace cpu
162 } // namespace arm_compute
163 #endif /*ARM_COMPUTE_CPU_GEMM_H */
experimental::MemoryRequirements workspace() const override
Return the memory requirements required by the workspace.
Definition: CpuGemm.cpp:366
Basic interface for functions which have a single async CPU kernel.
Definition: INEOperator.h:43
static Status has_opt_impl(arm_compute::WeightFormat &weight_format, const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *d, const GEMMInfo &gemm_info=GEMMInfo())
Indicates whether or not there is an optimal assembly implementation that can be used to process the ...
Definition: CpuGemm.cpp:371
SimpleTensor< float > b
Definition: DFT.cpp:157
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *d, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of CpuGemm.
Definition: CpuGemm.cpp:153
void configure(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, ITensorInfo *d, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Configure operator for a given list of arguments.
Definition: CpuGemm.cpp:60
~CpuGemm()=default
Default destructor.
void prepare(ITensorPack &constants) override
Prepare the function for executing.
Definition: CpuGemm.cpp:344
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
WeightFormat
Memory layouts for the weights tensor.
Definition: Types.h:2015
CpuGemm()=default
Default constructor.
Status class.
Definition: Error.h:52
Copyright (c) 2017-2022 Arm Limited.
std::vector< MemoryInfo > MemoryRequirements
Definition: Types.h:134
void run(ITensorPack &tensors) override
Run the kernels contained in the function.
Definition: CpuGemm.cpp:273
bool isVarWeightsKernel() const
Indicates if the convolution executes in variable weights mode.
Definition: CpuGemm.cpp:379
Basic function to execute GEMM.
Definition: CpuGemm.h:62
Tensor packing service.
Definition: ITensorPack.h:39
Store the tensor&#39;s metadata.
Definition: TensorInfo.h:43
GEMM information class.
Definition: Types.h:2339