Compute Library
 20.08
NEGEMMConvolutionLayer.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_NEGEMMCONVOLUTIONLAYER_H
25 #define ARM_COMPUTE_NEGEMMCONVOLUTIONLAYER_H
26 
28 
33 #include "arm_compute/core/Types.h"
41 
42 #include <memory>
43 
44 namespace arm_compute
45 {
46 class ITensor;
47 
48 /** Function to reshape the weights. This function calls the following kernel:
49  * -# @ref NEWeightsReshapeKernel
50  */
52 {
53 public:
54  /** Constructor */
56  /** Prevent instances of this class from being copied (As this class contains pointers) */
58  /** Default move constructor */
60  /** Prevent instances of this class from being copied (As this class contains pointers) */
62  /** Default move assignment operator */
64  /** Set the input and output tensors.
65  *
66  * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
67  * Data type supported: All.
68  * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
69  * Data type supported: same as @p weights.
70  * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
71  * @param[out] output Destination tensor. Data types supported: same as @p weights.
72  */
73  void configure(const ITensor *weights, const ITensor *biases, ITensor *output);
74  /** Static function to check if given info will lead to a valid configuration of @ref NEConvolutionLayerReshapeWeights
75  *
76  * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
77  * Data type supported: All.
78  * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
79  * Data type supported: same as @p weights.
80  * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
81  * @param[in] output Destination tensor. Data types supported: same as @p weights.
82  *
83  * @return an error status
84  */
85  static Status validate(const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output);
86 
87  // Inherited methods overridden:
88  void run() override;
89 
90 private:
91  NEWeightsReshapeKernel _weights_reshape_kernel;
92 };
93 
94 namespace weights_transformations
95 {
96 /** Basic function to manage the reshape weights generated from @ref NEConvolutionLayerReshapeWeights */
98 {
99 public:
100  void configure(const ITensor *input, const ITensor *biases)
101  {
102  _bias_bit = (biases != nullptr) ? 1 : 0;
103  _func.configure(input, biases, &_output);
104  }
105 
106  void run() override
107  {
108  _output.allocator()->allocate();
109  _func.run();
110  _reshape_run = true;
111  }
112 
113  ITensor *get_weights() override
114  {
115  return &_output;
116  }
117 
118  void release() override
119  {
120  _output.allocator()->free();
121  }
122 
123  uint32_t uid() override
124  {
125  return ((0x8) | (_bias_bit << 7));
126  }
127 
129  {
130  return _reshape_run;
131  }
132 
133 private:
134  Tensor _output{};
136  int32_t _bias_bit{ 0 };
137 };
138 } // namespace weights_transformations
139 
140 /** Basic function to compute the convolution layer. This function calls the following NEON kernels/functions:
141  *
142  * -# @ref NEIm2ColKernel
143  * -# @ref NEGEMM (if the data type is BFLOAT16/FP16/FP32)
144  * -# @ref NEGEMMLowpMatrixMultiplyCore (if the data type is QASYMM8/QASYMM8_SIGNED)
145  * -# @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if the data type is QASYMM8/QASYMM8_SIGNED)
146  * -# @ref NEArithmeticAdditionKernel (if biases != nullptr and we have a 1x1 convolution with the NHWC data layout)
147  * -# @ref NECol2ImKernel (if NCHW data layout)
148  *
149  */
151 {
152 public:
153  /** Constructor */
154  NEGEMMConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager = nullptr, IWeightsManager *weights_manager = nullptr);
155  /** Prevent instances of this class from being copied (As this class contains pointers) */
157  /** Default move constructor */
159  /** Prevent instances of this class from being copied (As this class contains pointers) */
161  /** Default move assignment operator */
163  /** Set the input and output tensors.
164  *
165  * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
166  * while every optional dimension from 4 and above represent a batch of inputs.
167  * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
168  * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
169  * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
170  * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
171  * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
172  * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
173  * Data types supported: Same as @p input.
174  * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
175  * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
176  * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input.
177  * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
178  * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
179  * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
180  */
181  void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(),
182  const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1);
183  /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConvolutionLayer
184  *
185  * @param[in] input Source tensor info. 3 lower dimensions represent a single input [width, height, IFM],
186  * while every optional dimension from 4 and above represent a batch of inputs.
187  * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
188  * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
189  * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
190  * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
191  * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
192  * @param[in] output Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
193  * Data types supported: Same as @p input.
194  * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
195  * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
196  * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input.
197  * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
198  * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
199  * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
200  *
201  * @return a status
202  */
203  static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
204  const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1);
205 
206  // Inherited methods overridden:
207  void run() override;
208  void prepare() override;
209 
210 private:
211  /** Configures the appropriate matrix multiply routine
212  *
213  * @param[in] input Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
214  * @param[in] weights Weights tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
215  * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
216  * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
217  * @param[out] output Output tensor. Data types supported: Same as @p input,
218  * except for input of QASYMM8/QASYMM8_SIGNED type where output should be of S32 type.
219  * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
220  * @param[in] gemm_3d_depth (Optional) Depth of GEMM 3D (Defaults to 1)
221  */
222  void configure_mm(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo(), int gemm_3d_depth = 1);
223  /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConvolutionLayer matrix multiply routines
224  *
225  * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
226  * @param[in] weights Weights tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
227  * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
228  * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
229  * @param[in] output Output tensor info. Data types supported: Same as @p input,
230  * except for input of QASYMM8/QASYMM8_SIGNED type where output should be of S32 type.
231  * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
232  * @param[in] gemm_3d_depth (Optional) Depth of GEMM 3D (Defaults to 1)
233  * @param[in] skip_im2col (Optional) Flag which specifies if im2col has to be skipped. i.e. 1x1 convolution with NHWC data layout. (Default to false)
234  *
235  * @return a status
236  */
237  static Status validate_mm(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo(),
238  int gemm_3d_depth = 1, bool skip_im2col = false);
239  /** Static function to check if GEMM3D is supported in @ref NEGEMM or in @ref NEGEMMLowpMatrixMultiplyCore
240  *
241  * @param[in] input_info Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
242  * @param[in] weights_info Weights tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
243  * @param[in] act_info Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
244  * @param[in] gemm_3d_depth Depth of GEMM 3D
245  * @param[in] skip_im2col Flag which specifies if im2col has to be skipped. i.e. 1x1 convolution with NHWC data layout
246  *
247  * @return a status
248  */
249  static Status validate_gemm3d(const ITensorInfo *input_info, const ITensorInfo *weights_info, const ActivationLayerInfo &act_info, int gemm_3d_depth, bool skip_im2col);
250 
251 private:
252  MemoryGroup _memory_group;
253  IWeightsManager *_weights_manager;
254  NEConvolutionLayerReshapeWeights _reshape_weights;
256  NEIm2ColKernel _im2col_kernel;
257  NEGEMM _mm_gemm;
258  NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
259  NECol2ImKernel _col2im_kernel;
260  NEReshapeLayer _reshape_layer;
261 
262  const ITensor *_original_weights;
263 
264  Tensor _im2col_output;
265  Tensor _weights_reshaped;
266  Tensor _gemm_output;
267  Tensor _tmp_output;
268 
269  DataLayout _data_layout;
270 
271  bool _skip_im2col;
272  bool _skip_col2im;
273  bool _is_quantized;
274  bool _is_prepared;
275 };
276 } // namespace arm_compute
277 #endif /* ARM_COMPUTE_NECONVOLUTIONGEMMLAYER_H */
ITensor * get_weights() override
Get a pointer to the transformed weights.
Base class for all functions.
Definition: IFunction.h:30
Basic function to execute GEMM on NEON.
Definition: NEGEMM.h:59
Basic function to manage the reshape weights generated from NEConvolutionLayerReshapeWeights.
Interface for the im2col reshape kernel.
Store the tensor's metadata.
Definition: ITensorInfo.h:40
NEConvolutionLayerReshapeWeights & operator=(const NEConvolutionLayerReshapeWeights &)=delete
Prevent instances of this class from being copied (As this class contains pointers)
Status class.
Definition: Error.h:52
Activation Layer Information class.
Definition: Types.h:1517
Interface for NEON tensor.
Definition: ITensor.h:36
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info=WeightsInfo(), const Size2D &dilation=Size2D(1U, 1U), const ActivationLayerInfo &act_info=ActivationLayerInfo(), unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration of NEGEMMConvolutionLayer.
NEON kernel to perform col2im reshaping.
Basic function to compute the convolution layer.
Copyright (c) 2017-2020 Arm Limited.
NEON kernel to perform reshaping on the weights used by convolution and locally connected layer.
uint32_t uid() override
Function that returns a unique id of the reshape function.
TensorAllocator * allocator()
Return a pointer to the tensor's allocator.
Definition: Tensor.cpp:48
Convolution Layer Weights Information class.
Definition: Types.h:1694
static Status validate(const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of NEConvolutionLayerReshap...
const unsigned int num_groups
Definition: Im2Col.cpp:148
void allocate() override
Allocate size specified by TensorInfo of CPU memory.
Basic implementation of the tensor interface.
Definition: Tensor.h:37
Padding and stride information class.
Definition: Types.h:689
void free() override
Free allocated CPU memory.
Weights manager interface to handle weights transformations.
void run() override
Run the kernels contained in the function.
Basic function to run NEReshapeLayerKernel.
NEGEMMConvolutionLayer(const std::shared_ptr< IMemoryManager > &memory_manager=nullptr, IWeightsManager *weights_manager=nullptr)
Constructor.
void run() override
Run the kernels contained in the function.
NEGEMMConvolutionLayer & operator=(const NEGEMMConvolutionLayer &)=delete
Prevent instances of this class from being copied (As this class contains pointers)
void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info=WeightsInfo(), const Size2D &dilation=Size2D(1U, 1U), const ActivationLayerInfo &act_info=ActivationLayerInfo(), unsigned int num_groups=1)
Set the input and output tensors.
Class for specifying the size of an image or rectangle.
Definition: Size2D.h:34
Weights tensor transform interface In order to identify the different reshape functions,...
void prepare() override
Prepare the function for executing.
void configure(const ITensor *weights, const ITensor *biases, ITensor *output)
Set the input and output tensors.
DataLayout
[DataLayout enum definition]
Definition: Types.h:120
Basic function to execute GEMMLowpMatrixMultiplyCore on NEON.