Compute Library
 21.02
NEConvolutionLayer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
27 #include "arm_compute/core/Utils.h"
35 
36 #include <cmath>
37 #include <tuple>
38 #include <utility>
39 
40 namespace arm_compute
41 {
42 NEConvolutionLayer::NEConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager) //NOLINT
43  : _memory_manager(std::move(memory_manager)),
44  _function()
45 {
46 }
47 
48 void NEConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
49  const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math, unsigned int num_groups)
50 {
51  // Perform validate step
52  ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
53  ARM_COMPUTE_UNUSED(num_groups);
54  ARM_COMPUTE_ERROR_THROW_ON(NEConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation, act_info,
55  enable_fast_math, num_groups));
56 
57  const Conv2dInfo info(conv_info, dilation, act_info, enable_fast_math, num_groups);
58  switch(NEConvolutionLayer::get_convolution_method(input->info(), weights->info(), output->info(), conv_info, weights_info, dilation, act_info, enable_fast_math))
59  {
61  {
62  auto f = std::make_unique<NEWinogradConvolutionLayer>(_memory_manager);
63  f->configure(input, weights, biases, output, conv_info, act_info, enable_fast_math);
64  _function = std::move(f);
65  break;
66  }
68  {
69  auto f = std::make_unique<NEGEMMConvolutionLayer>(_memory_manager);
70  f->configure(input, weights, biases, output, conv_info, weights_info, dilation, act_info);
71  _function = std::move(f);
72  break;
73  }
75  {
76  auto f = std::make_unique<NEGEMMConv2d>(_memory_manager);
77  f->configure(input, weights, biases, output, info);
78  _function = std::move(f);
79  break;
80  }
82  {
83  auto f = std::make_unique<NEDirectConvolutionLayer>(_memory_manager);
84  f->configure(input, weights, biases, output, conv_info, act_info);
85  _function = std::move(f);
86  break;
87  }
89  {
90  auto f = std::make_unique<NEFFTConvolutionLayer>(_memory_manager);
91  f->configure(input, weights, biases, output, conv_info, act_info);
92  _function = std::move(f);
93  break;
94  }
95  default:
96  ARM_COMPUTE_ERROR("Not supported.");
97  break;
98  }
99 }
100 
102  const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math, unsigned int num_groups)
103 {
104  ARM_COMPUTE_RETURN_ERROR_ON_MSG((num_groups != 1), "Grouping (num_groups != 1) is not supported on Neon");
105 
106  const Conv2dInfo info(conv_info, dilation, act_info, enable_fast_math, num_groups);
107  switch(NEConvolutionLayer::get_convolution_method(input, weights, output, conv_info, weights_info, dilation, act_info, enable_fast_math))
108  {
110  ARM_COMPUTE_RETURN_ON_ERROR(NEWinogradConvolutionLayer::validate(input, weights, biases, output, conv_info, act_info, enable_fast_math));
111  break;
113  ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info, dilation, act_info));
114  break;
116  ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMConv2d::validate(input, weights, biases, output, info));
117  break;
119  ARM_COMPUTE_RETURN_ON_ERROR(NEDirectConvolutionLayer::validate(input, weights, biases, output, conv_info, act_info));
120  break;
122  ARM_COMPUTE_RETURN_ON_ERROR(NEFFTConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info));
123  break;
124  default:
125  ARM_COMPUTE_ERROR("Not supported.");
126  break;
127  }
128 
129  return Status{};
130 }
131 
133  const ITensorInfo *output, const PadStrideInfo &conv_info,
134  const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math)
135 {
136  ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, weights);
137  ARM_COMPUTE_UNUSED(weights_info);
138 
142 
143  const Conv2dInfo info(conv_info, dilation, act_info, enable_fast_math, 1);
144 
145  /* Input spatial dims, kernel size, IFM/OFM, conv info*/
146  using ConvolutionConfiguration = std::tuple<Size2D, Size2D, Size2D, PadStrideInfo>;
147  using ConfigurationMethod = std::pair<ConvolutionConfiguration, ConvolutionMethod>;
148 
149  const std::vector<ConfigurationMethod> known_configs =
150  {
151  // Alexnet
152  ConfigurationMethod(ConvolutionConfiguration(Size2D(27U, 27U), Size2D(5U, 5U), Size2D(48U, 128U), PadStrideInfo(1U, 1U, 2U, 2U)), ConvolutionMethod::GEMM),
153  // VGG16 / VGG19
154  ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 64U), PadStrideInfo(1U, 1U, 1U, 1U)), ConvolutionMethod::GEMM),
155  // Mobilenet 224
156  ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 32U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR)), ConvolutionMethod::GEMM),
157  // Mobilenet 160
158  ConfigurationMethod(ConvolutionConfiguration(Size2D(160U, 160U), Size2D(3U, 3U), Size2D(3U, 24U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR)), ConvolutionMethod::GEMM)
159  };
160 
161  const auto find_config = [&](ConfigurationMethod c)
162  {
163  const ConvolutionConfiguration config = c.first;
164  const PadStrideInfo info = std::get<3>(config);
165 
166  return std::get<0>(config) == Size2D(input->dimension(idx_w), input->dimension(idx_h)) && std::get<1>(config) == Size2D(weights->dimension(idx_w), weights->dimension(idx_h))
167  && std::get<2>(config) == Size2D(weights->dimension(idx_c), weights->dimension(3)) && info.pad_top() == conv_info.pad_top() && info.pad_right() == conv_info.pad_right()
168  && info.pad_bottom() == conv_info.pad_bottom() && info.pad_left() == conv_info.pad_left() && info.stride() == conv_info.stride();
169  };
170 
171  std::vector<ConfigurationMethod>::const_iterator found;
172  if((found = std::find_if(known_configs.begin(), known_configs.end(), find_config)) != known_configs.end())
173  {
174  return (*found).second;
175  }
176 
177  if(dilation != Size2D(1U, 1U))
178  {
180  }
181  else
182  {
183  // SRGAN
184  // Output might not be initialized when it is an internal tensor of the layer using the convolution
185  if(input->total_size() > 1e7 && (weights->dimension(idx_h) > 7)
186  && (NEDirectConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info)))
187  {
189  }
190  if((weights->dimension(idx_h) > 7) && (input->dimension(idx_c) > output->dimension(idx_c)) && (NEFFTConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info)))
191  {
192  return ConvolutionMethod::FFT;
193  }
194  if(input->dimension(idx_c) < 16)
195  {
197  }
198 
199 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
200  // This heuristics only applies to F16 data type on A55r1
201  if(NEScheduler::get().cpu_info().get_cpu_model() == CPUModel::A55r1 && enable_fast_math && input->data_type() == DataType::F16)
202  {
203  // Exclude known bad winograd configs (and defaults to GEMM)
204  const std::vector<ConvolutionConfiguration> known_bad_winograd_f16_with_fastmath_configs =
205  {
206  // Squeezenet_V1_1 fire2 and fire3
207  ConvolutionConfiguration(Size2D(56U, 56U), Size2D(3U, 3U), Size2D(16U, 64U), PadStrideInfo(1U, 1U, 1U, 1U)),
208  // Squeezenet_V1_1 fire6 and fire7
209  ConvolutionConfiguration(Size2D(14U, 14U), Size2D(3U, 3U), Size2D(48U, 192U), PadStrideInfo(1U, 1U, 1U, 1U)),
210  // Squeezenet_V1_1 fire8 and fire9
211  ConvolutionConfiguration(Size2D(14U, 14U), Size2D(3U, 3U), Size2D(64U, 256U), PadStrideInfo(1U, 1U, 1U, 1U)),
212  };
213  const auto find_conv_config = [&](ConvolutionConfiguration c)
214  {
215  const PadStrideInfo info = std::get<3>(c);
216 
217  return std::get<0>(c) == Size2D(input->dimension(idx_w), input->dimension(idx_h)) && std::get<1>(c) == Size2D(weights->dimension(idx_w), weights->dimension(idx_h))
218  && std::get<2>(c) == Size2D(weights->dimension(idx_c), weights->dimension(3)) && info.pad_top() == conv_info.pad_top() && info.pad_right() == conv_info.pad_right()
219  && info.pad_bottom() == conv_info.pad_bottom() && info.pad_left() == conv_info.pad_left() && info.stride() == conv_info.stride();
220  };
221 
222  bool found_bad = std::find_if(known_bad_winograd_f16_with_fastmath_configs.begin(), known_bad_winograd_f16_with_fastmath_configs.end(),
223  find_conv_config)
224  != known_bad_winograd_f16_with_fastmath_configs.end();
225  if(found_bad)
226  {
228  }
229  }
230 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
231  // For 1x1 convolutions run the default GEMM
232  if(weights->dimension(idx_w) == 1 && weights->dimension(idx_h) == 1)
233  {
235  }
236 
237  if(bool(NEWinogradConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info, enable_fast_math)))
238  {
240  }
241  if(bool(NEGEMMConv2d::validate(input, weights, nullptr, output, info)))
242  {
244  }
246  }
247 }
248 
250 {
251  prepare();
252  _function->run();
253 }
254 
256 {
257  _function->prepare();
258 }
259 } // namespace arm_compute
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
virtual DataType data_type() const =0
Data type used for each element of the tensor.
void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info=WeightsInfo(), const Size2D &dilation=Size2D(1U, 1U), const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false, unsigned int num_groups=1)
Set the input and output tensors.
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
CPUInfo & cpu_info()
Get CPU info.
Definition: IScheduler.cpp:42
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Static function to check if given info will lead to a valid configuration of NEFFTConvolutionLayer.
unsigned int pad_top() const
Get the top padding.
Definition: Types.h:806
Status class.
Definition: Error.h:52
ConvolutionMethod
Available ConvolutionMethod.
Definition: Types.h:138
Activation Layer Information class.
Definition: Types.h:1550
Interface for Neon tensor.
Definition: ITensor.h:36
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info=WeightsInfo(), const Size2D &dilation=Size2D(1U, 1U), const ActivationLayerInfo &act_info=ActivationLayerInfo(), unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration of NEGEMMConvolutionLayer.
Copyright (c) 2017-2021 Arm Limited.
CPUModel get_cpu_model(unsigned int cpuid) const
Gets the cpu model for a given cpuid.
Definition: CPPTypes.cpp:68
1 channel, 1 F16 per channel
Convolution Layer Weights Information class.
Definition: Types.h:1765
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const Conv2dInfo &info)
Static function to check if given info will lead to a valid configuration of NEGEMMConv2d.
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Static function to check if given info will lead to a valid configuration of NEGEMMConvolutionLayer.
NEConvolutionLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Constructor.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
const unsigned int num_groups
Definition: Im2Col.cpp:153
std::pair< unsigned int, unsigned int > stride() const
Get the stride.
Definition: Types.h:770
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.
unsigned int pad_right() const
Get the right padding.
Definition: Types.h:801
Padding and stride information class.
Definition: Types.h:722
Descriptor used by the Convolution function.
Convolution using Winograd.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
static ConvolutionMethod get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info=WeightsInfo(), const Size2D &dilation=Size2D(1U, 1U), const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Static function to check if given info will return the convolution called by NEConvolutionLayer.
Class for specifying the size of an image or rectangle.
Definition: Size2D.h:34
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:244
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:161
size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
Get the index of the given dimension.
Definition: Helpers.inl:193
unsigned int pad_bottom() const
Get the bottom padding.
Definition: Types.h:811
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of NEDirectConvolutionLayer...
unsigned int pad_left() const
Get the left padding.
Definition: Types.h:796
Convolution using GEMM.
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info=WeightsInfo(), const Size2D &dilation=Size2D(1U, 1U), const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false, unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration of NEConvolutionLayer.
void prepare() override
Prepare the function for executing.
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.
static IScheduler & get()
Access the scheduler singleton.
Definition: Scheduler.cpp:94