Compute Library
 21.05
CLFullyConnectedLayer.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_CLFULLYCONNECTEDLAYER_H
25 #define ARM_COMPUTE_CLFULLYCONNECTEDLAYER_H
26 
28 
37 
38 namespace arm_compute
39 {
40 namespace weights_transformations
41 {
42 /** Basic function to manage the reshape weights generated from @ref CLTranspose */
44 {
45 public:
46  //Inherited method override
47  void run() override
48  {
49  _output.allocator()->allocate();
50  _func.run();
51  _reshape_run = true;
52  }
53 
54  //Inherited method override
55  void release() override
56  {
57  _output.allocator()->free();
58  }
59 
60  //Inherited method override
61  ICLTensor *get_weights() override
62  {
63  return &_output;
64  }
65 
66  //Inherited method override
67  uint32_t uid() override
68  {
69  return _uid;
70  }
71 
72  /** Configures the @ref CLTranspose function
73  *
74  * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
75  */
76  void configure(const ICLTensor *input)
77  {
78  configure(CLKernelLibrary::get().get_compile_context(), input);
79  }
80  /** Configures the @ref CLTranspose function
81  *
82  * @param[in] compile_context The compile context to be used.
83  * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
84  */
85  void configure(const CLCompileContext &compile_context, const ICLTensor *input)
86  {
87  _func.configure(compile_context, input, &_output);
88  }
89 
90 private:
91  static constexpr uint32_t _uid = 0x0;
92  CLTensor _output{};
93  CLTranspose _func{};
94 };
95 } // namespace weights_transformations
96 
97 /** Basic function to compute a Fully Connected layer on OpenCL. This function calls the following OpenCL kernels:
98  *
99  * -# @ref CLIm2ColKernel (called when the input comes from a convolutional layer)
100  * -# @ref CLTranspose (if @p are_weights_reshaped is set to false and transpose_weights is set to true ) (called once)
101  * -# @ref CLGEMMMatrixMultiplyKernel or @ref CLGEMMLowpMatrixMultiplyCore (if quantized asymmetric)
102  *
103  * @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
104  */
106 {
107 public:
108  /** Constructor */
109  CLFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr, IWeightsManager *weights_manager = nullptr);
110  /** Prevent instances of this class from being copied (As this class contains pointers) */
112  /** Default move constructor */
114  /** Prevent instances of this class from being copied (As this class contains pointers) */
116  /** Default move assignment operator */
118  /** Set the input and output tensors.
119  *
120  * Valid data layouts:
121  * - NHWC
122  * - NCHW
123  *
124  * Valid data type configurations:
125  * |src0 |src1 |src2 |dst |
126  * |:--------------|:------------------|:------|:--------------|
127  * |F16 |F16 |F16 |F16 |
128  * |F32 |F32 |F32 |F32 |
129  * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
130  * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
131  *
132  * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
133  * @param[in] weights Weights tensor. The weights must be 2 dimensional.
134  * If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions.
135  * If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension.
136  * Data type supported: Same as @p input.
137  * @param[in] biases Bias tensor. Can be nullptr. Data type supported:Same as @p input.
138  * @param[out] output Destination tensor. Its shape should be equal to the output of a matrix multiplication between:
139  * - The output of im2col on the input and the (transposed) 2D weights, if the function is called after a Convolution Layer
140  * - The input tensor and the (transposed) 2D weights, if the function is called after another FullyConnected Layer.
141  * Data type supported: Same as @p input.
142  * @param[in] fc_info (Optional) Fully connected layer additional info
143  */
144  void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
146  /** Set the input and output tensors.
147  *
148  * @param[in] compile_context The compile context to be used.
149  * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
150  * @param[in] weights Weights tensor. The weights must be 2 dimensional.
151  * If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions.
152  * If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension.
153  * Data type supported: Same as @p input.
154  * @param[in] biases Bias tensor. Can be nullptr. Data type supported:Same as @p input.
155  * @param[out] output Destination tensor. Its shape should be equal to the output of a matrix multiplication between:
156  * - The output of im2col on the input and the (transposed) 2D weights, if the function is called after a Convolution Layer
157  * - The input tensor and the (transposed) 2D weights, if the function is called after another FullyConnected Layer.
158  * Data type supported: Same as @p input.
159  * @param[in] fc_info (Optional) Fully connected layer additional info
160  */
161  void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
163  /** Static function to check if given info will lead to a valid configuration of @ref CLFullyConnectedLayer
164  *
165  * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
166  * @param[in] weights Weights tensor info. The weights must be 2 dimensional.
167  * If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions.
168  * If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension.
169  * Data type supported: Same as @p input.
170  * @param[in] biases Bias tensor info. Can be nullptr. Data type supported:Same as @p input.
171  * @param[out] output Destination tensor info. Its shape should be equal to the output of a matrix multiplication between:
172  * - The output of im2col on the input and the (transposed) 2D weights, if the function is called after a Convolution Layer
173  * - The input tensor and the (transposed) 2D weights, if the function is called after another FullyConnected Layer.
174  * Data type supported: Same as @p input.
175  * @param[in] fc_info (Optional) Fully connected layer additional info
176  *
177  * @return a status
178  */
179  static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
181 
182  //Inherited methods override
183  void run() override;
184  void prepare() override;
185 
186 private:
187  void configure_fc_fc(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const FullyConnectedLayerInfo &fc_info);
188  void configure_conv_fc(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const FullyConnectedLayerInfo &fc_info);
189  void configure_mm(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const FullyConnectedLayerInfo &fc_info);
190 
191  MemoryGroup _memory_group;
192  IWeightsManager *_weights_manager;
193  CLConvertFullyConnectedWeights _convert_weights;
196  CLFlattenLayer _flatten_layer;
197  CLTranspose _reshape_weights_function;
198  CLGEMM _mm_gemm;
199  CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
200  CLTensor _flatten_output;
201  CLTensor _converted_weights_output;
202  CLTensor _reshape_weights_output;
203  bool _are_weights_converted;
204  bool _are_weights_reshaped;
205  bool _is_fc_after_conv;
206  bool _is_quantized;
207  bool _is_prepared;
208  const ICLTensor *_original_weights;
209 };
210 } // namespace arm_compute
211 #endif /* ARM_COMPUTE_CLFULLYCONNECTEDLAYER_H */
Basic function to compute a Fully Connected layer on OpenCL.
Base class for all functions.
Definition: IFunction.h:30
CLFullyConnectedLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr, IWeightsManager *weights_manager=nullptr)
Constructor.
Basic function to manage the reshape weights generated from CLTranspose.
uint32_t uid() override
Function that returns a unique id of the reshape function.
void configure(const ICLTensor *input)
Configures the CLTranspose function.
Fully connected layer info.
Definition: Types.h:1541
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor's metadata.
Definition: ITensorInfo.h:40
CLTensorAllocator * allocator()
Return a pointer to the tensor's allocator.
Definition: CLTensor.cpp:61
Status class.
Definition: Error.h:52
void prepare() override
Prepare the function for executing.
Copyright (c) 2017-2021 Arm Limited.
Basic function to run an opencl::kernels::ClConvertFullyConnectedWeightsKernel.
Basic function to execute flatten.
void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, FullyConnectedLayerInfo fc_info=FullyConnectedLayerInfo())
Set the input and output tensors.
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, FullyConnectedLayerInfo fc_info=FullyConnectedLayerInfo())
Static function to check if given info will lead to a valid configuration of CLFullyConnectedLayer.
Basic function to execute GEMM on OpenCL.
Definition: CLGEMM.h:108
void run() override
Run the kernels contained in the function.
Definition: CLTranspose.cpp:66
ICLTensor * get_weights() override
Get a pointer to the transformed weights.
void configure(const CLCompileContext &compile_context, const ICLTensor *input)
Configures the CLTranspose function.
void run() override
Run the kernels contained in the function.
Weights manager interface to handle weights transformations.
CLCompileContext class.
void allocate() override
Allocate size specified by TensorInfo of OpenCL memory.
Interface for OpenCL tensor.
Definition: ICLTensor.h:42
Basic function to execute an opencl::kernels::ClTransposeKernel.
Definition: CLTranspose.h:39
CLFullyConnectedLayer & operator=(const CLFullyConnectedLayer &)=delete
Prevent instances of this class from being copied (As this class contains pointers)
void free() override
Free allocated OpenCL memory.
Weights tensor transform interface In order to identify the different reshape functions,...
Basic function to execute GEMMLowpMatrixMultiplyCore on OpenCL.
void configure(const ICLTensor *input, ICLTensor *output)
Initialise the kernel's inputs and output.
Definition: CLTranspose.cpp:47
Basic implementation of the OpenCL tensor interface.
Definition: CLTensor.h:41