Compute Library
 21.02
NEGEMMLowpReductionKernel.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_NEGEMMLOWREDUCTIONKERNEL_H
25 #define ARM_COMPUTE_NEGEMMLOWREDUCTIONKERNEL_H
26 
28 
29 namespace arm_compute
30 {
31 // Forward declarations
32 class ITensor;
33 struct GEMMLowpReductionKernelInfo;
34 
35 /** Common interface for all Neon reduction kernels */
37 {
38 public:
39  /** Constructor */
41  /** Prevent instances of this class from being copied (As this class contains pointers)*/
43  /** Prevent instances of this class from being copied (As this class contains pointers)*/
45  /** Allow instances of this class to be moved */
47  /** Allow instances of this class to be moved */
49  /** Default destructor */
50  virtual ~INEGEMMLowpReductionKernel() = default;
51 
52  /** Initialise the kernel's input and output.
53  *
54  * @param[in] input Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
55  * @param[out] output Output row-vector of sums of all the entries in each row/col of input tensor. Data type supported: S32
56  * @param[in] info Kernel metadata:
57  * - k Number of matrix columns/rows depending on the type of reduction.
58  * - is_reshaped True if the matrix has been reshaped.
59  * - scalar Scalar value to multiply each reduced column/row by.
60  * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
61  */
62  virtual void configure(const ITensor *input, ITensor *output, const GEMMLowpReductionKernelInfo &info) = 0;
63 
64 protected:
65  const ITensor *_input;
66  ITensor *_output;
67  int32_t _k;
68  int32_t _scalar;
69  bool _mul_by_scalar;
70 };
71 
72 /** Neon kernel used to compute the row-vectors of sums of all the entries in each row of Matrix A.
73  *
74  * @note This stage is needed to handle the offset of matrix product
75  * https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
76  */
78 {
79 public:
80  const char *name() const override
81  {
82  return "NEGEMMLowpMatrixAReductionKernel";
83  }
84  /** Default constructor */
86  /** Prevent instances of this class from being copied */
88  /** Prevent instances of this class from being copied */
90  /** Allow instances of this class to be moved */
92  /** Allow instances of this class to be moved */
94  /** Default destructor */
96  /** Initialise the kernel's input and output.
97  *
98  * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
99  * @param[out] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32
100  * @param[in] info Kernel metadata:
101  * - k (num_mtx_a_cols) Number of matrix A columns
102  * - is_reshaped (is_interleaved4x4) True if the matrix A has been interleaved4x4
103  * - scalar Scalar value to multiply each reduced row by.
104  * - mul_byscalar True if each reduced column must be multiplied by a scalar value.
105  */
106  void configure(const ITensor *mtx_a, ITensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) override;
107  /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixAReductionKernel
108  *
109  * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
110  * @param[in] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32
111  * @param[in] info Kernel metadata:
112  * - k (num_mtx_a_cols) Number of matrix A columns
113  * - is_reshaped (is_interleaved4x4) True if the matrix A has been interleaved4x4
114  * - scalar Scalar value to multiply each reduced row by.
115  * - mul_byscalar True if each reduced column must be multiplied by a scalar value.
116  *
117  * @return a status
118  */
119  static Status validate(const ITensorInfo *mtx_a, const ITensorInfo *vector_sum_row, const GEMMLowpReductionKernelInfo &info);
120 
121  // Inherited methods overridden:
122  void run(const Window &window, const ThreadInfo &info) override;
123 
124 private:
125  /** Execution of the reduction kernel specialized on the input type
126  *
127  * @param[in] window Execution window
128  */
129  template <typename T>
130  void run_internal(const Window &window);
131 };
132 
133 /** Neon kernel used to compute the row-vectors of sums of all the entries in each column of Matrix B.
134  *
135  * @note This stage is needed to handle the offset of matrix product
136  * https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
137  */
139 {
140 public:
141  const char *name() const override
142  {
143  return "NEGEMMLowpMatrixBReductionKernel";
144  }
145  /** Default constructor */
147  /** Prevent instances of this class from being copied (As this class contains pointers) */
149  /** Prevent instances of this class from being copied (As this class contains pointers) */
151  /** Allow instances of this class to be moved */
153  /** Allow instances of this class to be moved */
155  /** Default destructor */
157  /** Initialise the kernel's input and output.
158  *
159  * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
160  * @param[out] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32
161  * @param[in] info Kernel metadata:
162  * - k (num_mtx_b_rows) Number of matrix B rows.
163  * - is_reshaped (is_transposed1xW) True if the input tensor is transposed 1xW.
164  * - scalar Scalar value to multiply each reduced row by.
165  * - mul_byscalar True if each reduced row must be multiplied by a scalar value.
166  */
167  void configure(const ITensor *mtx_b, ITensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) override;
168  /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixBReductionKernel
169  *
170  * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
171  * @param[in] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32
172  * @param[in] info Kernel metadata:
173  * - k (num_mtx_b_rows) Number of matrix B rows.
174  * - is_reshaped (is_transposed1xW) True if the input tensor is transposed 1xW.
175  * - scalar Scalar value to multiply each reduced row by.
176  * - mul_byscalar True if each reduced row must be multiplied by a scalar value.
177  *
178  * @return a status
179  */
180  static Status validate(const ITensorInfo *mtx_b, const ITensorInfo *vector_sum_col, const GEMMLowpReductionKernelInfo &info);
181 
182  // Inherited methods overridden:
183  void run(const Window &window, const ThreadInfo &info) override;
184 
185 private:
186  /** Execution of the reduction kernel specialized on the input type
187  *
188  * @param[in] window Execution window
189  * @param[in] info Thread-related information
190  */
191  template <typename T>
192  void run_internal(const Window &window, const ThreadInfo &info);
193 };
194 } // namespace arm_compute
195 
196 #endif /* ARM_COMPUTE_NEGEMMLOWREDUCTIONKERNEL_H */
const Window & window() const
The maximum window the kernel can be executed on.
Definition: IKernel.cpp:28
Common interface for all kernels implemented in C++.
Definition: ICPPKernel.h:38
Neon kernel used to compute the row-vectors of sums of all the entries in each row of Matrix A...
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
Status class.
Definition: Error.h:52
Interface for Neon tensor.
Definition: ITensor.h:36
Copyright (c) 2017-2021 Arm Limited.
const char * name() const override
Name of the kernel.
Common interface for all Neon reduction kernels.
virtual void run(const Window &window, const ThreadInfo &info)
Execute the kernel on the passed window.
Definition: ICPPKernel.h:55
Neon kernel used to compute the row-vectors of sums of all the entries in each column of Matrix B...
const char * name() const override
Name of the kernel.
virtual ~INEGEMMLowpReductionKernel()=default
Default destructor.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
Information about executing thread and CPU.
Definition: CPPTypes.h:235
INEGEMMLowpReductionKernel & operator=(const INEGEMMLowpReductionKernel &)=delete
Prevent instances of this class from being copied (As this class contains pointers) ...
virtual void configure(const ITensor *input, ITensor *output, const GEMMLowpReductionKernelInfo &info)=0
Initialise the kernel&#39;s input and output.
Describe a multidimensional execution window.
Definition: Window.h:39
Status validate(const ITensorInfo *scores_in, const ITensorInfo *boxes_in, const ITensorInfo *batch_splits_in, const ITensorInfo *scores_out, const ITensorInfo *boxes_out, const ITensorInfo *classes, const ITensorInfo *batch_splits_out, const ITensorInfo *keeps, const ITensorInfo *keeps_size, const BoxNMSLimitInfo info)