Compute Library
 22.11
CpuConv2d.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
27 #include "src/common/utils/Log.h"
33 
34 namespace arm_compute
35 {
36 namespace cpu
37 {
39  : _function()
40 {
41 }
42 
43 CpuConv2d::~CpuConv2d() = default;
44 
46  const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math, unsigned int num_groups)
47 {
48  // Perform validate step
49  ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
50  ARM_COMPUTE_UNUSED(num_groups);
51  ARM_COMPUTE_ERROR_THROW_ON(CpuConv2d::validate(input, weights, biases, output, conv_info, weights_info, dilation, act_info,
52  enable_fast_math, num_groups));
53 
54  ARM_COMPUTE_LOG_PARAMS(input, weights, biases, output, conv_info, weights_info, dilation, act_info, enable_fast_math, num_groups);
55 
56  const Conv2dInfo info(conv_info, dilation, act_info, enable_fast_math, num_groups);
57  switch(CpuConv2d::get_convolution_method(input, weights, output, conv_info, weights_info, dilation, act_info, enable_fast_math))
58  {
60  {
61  auto f = std::make_unique<CpuWinogradConv2d>();
62  f->configure(input, weights, biases, output, conv_info, act_info, enable_fast_math);
63  _function = std::move(f);
64  break;
65  }
67  {
68  auto f = std::make_unique<CpuGemmConv2d>();
69  f->configure(input, weights, biases, output, conv_info, weights_info, dilation, act_info, enable_fast_math);
70  _function = std::move(f);
71  break;
72  }
74  {
75  auto f = std::make_unique<CpuGemmDirectConv2d>();
76  f->configure(input, weights, biases, output, info);
77  _function = std::move(f);
78  break;
79  }
81  {
82  auto f = std::make_unique<CpuDirectConv2d>();
83  f->configure(input, weights, biases, output, conv_info, act_info);
84  _function = std::move(f);
85  break;
86  }
87  default:
88  ARM_COMPUTE_ERROR("Not supported.");
89  break;
90  }
91 
92  _aux_mem = _function->workspace();
93 }
94 
95 Status CpuConv2d::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
96  const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math, unsigned int num_groups)
97 {
98  ARM_COMPUTE_RETURN_ERROR_ON_MSG((num_groups != 1), "Grouping (num_groups != 1) is not supported on Neon");
99 
100  const Conv2dInfo info(conv_info, dilation, act_info, enable_fast_math, num_groups);
101  switch(CpuConv2d::get_convolution_method(input, weights, output, conv_info, weights_info, dilation, act_info, enable_fast_math))
102  {
104  ARM_COMPUTE_RETURN_ON_ERROR(CpuWinogradConv2d::validate(input, weights, biases, output, conv_info, act_info, enable_fast_math));
105  break;
107  ARM_COMPUTE_RETURN_ON_ERROR(CpuGemmConv2d::validate(input, weights, biases, output, conv_info, weights_info, dilation, act_info, enable_fast_math));
108  break;
110  ARM_COMPUTE_RETURN_ON_ERROR(CpuGemmDirectConv2d::validate(input, weights, biases, output, info));
111  break;
113  ARM_COMPUTE_RETURN_ON_ERROR(CpuDirectConv2d::validate(input, weights, biases, output, conv_info, act_info));
114  break;
115  default:
116  ARM_COMPUTE_ERROR("Not supported.");
117  break;
118  }
119 
120  return Status{};
121 }
122 
124  const ITensorInfo *output, const PadStrideInfo &conv_info,
125  const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math)
126 {
127  ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, weights);
128  ARM_COMPUTE_UNUSED(weights_info);
129 
133 
134  const Conv2dInfo info(conv_info, dilation, act_info, enable_fast_math, 1);
135 
136  /* Input spatial dims, kernel size, IFM/OFM, conv info*/
137  using ConvolutionConfiguration = std::tuple<Size2D, Size2D, Size2D, PadStrideInfo>;
138  using ConfigurationMethod = std::pair<ConvolutionConfiguration, ConvolutionMethod>;
139 
140  const std::vector<ConfigurationMethod> known_configs =
141  {
142  // Alexnet
143  ConfigurationMethod(ConvolutionConfiguration(Size2D(27U, 27U), Size2D(5U, 5U), Size2D(48U, 128U), PadStrideInfo(1U, 1U, 2U, 2U)), ConvolutionMethod::GEMM),
144  // VGG16 / VGG19
145  ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 64U), PadStrideInfo(1U, 1U, 1U, 1U)), ConvolutionMethod::GEMM),
146  // Mobilenet 224
147  ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 32U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR)), ConvolutionMethod::GEMM),
148  // Mobilenet 160
149  ConfigurationMethod(ConvolutionConfiguration(Size2D(160U, 160U), Size2D(3U, 3U), Size2D(3U, 24U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR)), ConvolutionMethod::GEMM)
150  };
151 
152  const auto find_config = [&](ConfigurationMethod c)
153  {
154  const ConvolutionConfiguration config = c.first;
155  const PadStrideInfo info = std::get<3>(config);
156 
157  return std::get<0>(config) == Size2D(input->dimension(idx_w), input->dimension(idx_h)) && std::get<1>(config) == Size2D(weights->dimension(idx_w), weights->dimension(idx_h))
158  && std::get<2>(config) == Size2D(weights->dimension(idx_c), weights->dimension(3)) && info.pad_top() == conv_info.pad_top() && info.pad_right() == conv_info.pad_right()
159  && info.pad_bottom() == conv_info.pad_bottom() && info.pad_left() == conv_info.pad_left() && info.stride() == conv_info.stride();
160  };
161 
162  std::vector<ConfigurationMethod>::const_iterator found;
163  if((found = std::find_if(known_configs.begin(), known_configs.end(), find_config)) != known_configs.end())
164  {
165  return (*found).second;
166  }
167 
168  if(dilation != Size2D(1U, 1U))
169  {
171  }
172  else
173  {
174  // SRGAN
175  // Output might not be initialized when it is an internal tensor of the layer using the convolution
176  if(input->total_size() > 1e7 && (weights->dimension(idx_h) > 7)
177  && (CpuDirectConv2d::validate(input, weights, nullptr, output, conv_info, act_info)))
178  {
180  }
181  if((weights->dimension(idx_h) > 7) && (input->dimension(idx_c) > output->dimension(idx_c)) && (NEFFTConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info)))
182  {
183  return ConvolutionMethod::FFT;
184  }
185  if(input->dimension(idx_c) < 16)
186  {
188  }
189 
190 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
191  // This heuristics only applies to F16 data type on A55r1
192  if(NEScheduler::get().cpu_info().get_cpu_model() == CPUModel::A55r1 && enable_fast_math && input->data_type() == DataType::F16)
193  {
194  // Exclude known bad winograd configs (and defaults to GEMM)
195  const std::vector<ConvolutionConfiguration> known_bad_winograd_f16_with_fastmath_configs =
196  {
197  // Squeezenet_V1_1 fire2 and fire3
198  ConvolutionConfiguration(Size2D(56U, 56U), Size2D(3U, 3U), Size2D(16U, 64U), PadStrideInfo(1U, 1U, 1U, 1U)),
199  // Squeezenet_V1_1 fire6 and fire7
200  ConvolutionConfiguration(Size2D(14U, 14U), Size2D(3U, 3U), Size2D(48U, 192U), PadStrideInfo(1U, 1U, 1U, 1U)),
201  // Squeezenet_V1_1 fire8 and fire9
202  ConvolutionConfiguration(Size2D(14U, 14U), Size2D(3U, 3U), Size2D(64U, 256U), PadStrideInfo(1U, 1U, 1U, 1U)),
203  };
204  const auto find_conv_config = [&](ConvolutionConfiguration c)
205  {
206  const PadStrideInfo info = std::get<3>(c);
207 
208  return std::get<0>(c) == Size2D(input->dimension(idx_w), input->dimension(idx_h)) && std::get<1>(c) == Size2D(weights->dimension(idx_w), weights->dimension(idx_h))
209  && std::get<2>(c) == Size2D(weights->dimension(idx_c), weights->dimension(3)) && info.pad_top() == conv_info.pad_top() && info.pad_right() == conv_info.pad_right()
210  && info.pad_bottom() == conv_info.pad_bottom() && info.pad_left() == conv_info.pad_left() && info.stride() == conv_info.stride();
211  };
212 
213  bool found_bad = std::find_if(known_bad_winograd_f16_with_fastmath_configs.begin(), known_bad_winograd_f16_with_fastmath_configs.end(),
214  find_conv_config)
215  != known_bad_winograd_f16_with_fastmath_configs.end();
216  if(found_bad)
217  {
219  }
220  }
221 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
222  // For 1x1 convolutions run the default GEMM
223  if(weights->dimension(idx_w) == 1 && weights->dimension(idx_h) == 1)
224  {
226  }
227 
228  if(bool(CpuWinogradConv2d::validate(input, weights, nullptr, output, conv_info, act_info, enable_fast_math)))
229  {
231  }
232  if(bool(CpuGemmDirectConv2d::validate(input, weights, nullptr, output, info)))
233  {
235  }
237  }
238 }
239 
241 {
242  prepare(tensors);
243  _function->run(tensors);
244 }
245 
247 {
248  _function->prepare(tensors);
249 }
250 
252 {
253  return _aux_mem;
254 }
255 } // namespace cpu
256 } // namespace arm_compute
static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *dst, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration.
static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info=WeightsInfo(), const Size2D &dilation=Size2D(1U, 1U), const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false, unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration of CpuConv2d.
Definition: CpuConv2d.cpp:95
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
virtual DataType data_type() const =0
Data type used for each element of the tensor.
CpuConv2d()
Constructor.
Definition: CpuConv2d.cpp:38
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
CPUInfo & cpu_info()
Get CPU info.
Definition: IScheduler.cpp:41
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Static function to check if given info will lead to a valid configuration of NEFFTConvolutionLayer.
unsigned int pad_top() const
Get the top padding.
Definition: Types.h:753
Status class.
Definition: Error.h:52
ConvolutionMethod
Available ConvolutionMethod.
Definition: Types.h:134
Activation Layer Information class.
Definition: Types.h:1639
Copyright (c) 2017-2022 Arm Limited.
CPUModel get_cpu_model(unsigned int cpuid) const
Gets the cpu model for a given cpuid.
Definition: CPPTypes.cpp:119
std::vector< MemoryInfo > MemoryRequirements
Definition: Types.h:134
1 channel, 1 F16 per channel
Convolution Layer Weights Information class.
Definition: Types.h:2073
void run(ITensorPack &tensors) override
Run the kernels contained in the function.
Definition: CpuConv2d.cpp:240
void prepare(ITensorPack &constants) override
Prepare the function for executing.
Definition: CpuConv2d.cpp:246
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info=WeightsInfo(), const Size2D &dilation=Size2D(1U, 1U), const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false, unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration.
static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const Conv2dInfo &info)
Static function to check if given info will lead to a valid configuration of CpuGemmDirectConv2d.
const unsigned int num_groups
Definition: Im2Col.cpp:153
void configure(ITensorInfo *src, ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const PadStrideInfo &conv_info, const WeightsInfo &weights_info=WeightsInfo(), const Size2D &dilation=Size2D(1U, 1U), const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false, unsigned int num_groups=1)
Set the input and output tensors.
Definition: CpuConv2d.cpp:45
std::pair< unsigned int, unsigned int > stride() const
Get the stride.
Definition: Types.h:717
unsigned int pad_right() const
Get the right padding.
Definition: Types.h:748
Padding and stride information class.
Definition: Types.h:669
static ConvolutionMethod get_convolution_method(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const PadStrideInfo &conv_info, const WeightsInfo &weights_info=WeightsInfo(), const Size2D &dilation=Size2D(1U, 1U), const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Static function to check if given info will return the convolution called by CpuConv2d.
Definition: CpuConv2d.cpp:123
Descriptor used by the 2d Convolution function.
Convolution using Winograd.
static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Static function to check if given info will lead to a valid configuration of CpuWinogradConv2d.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
size_t get_data_layout_dimension_index(const DataLayout &data_layout, const DataLayoutDimension &data_layout_dimension)
Get the index of the given dimension.
Definition: Helpers.inl:193
Class for specifying the size of an image or rectangle.
Definition: Size2D.h:34
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:244
Tensor packing service.
Definition: ITensorPack.h:39
#define ARM_COMPUTE_LOG_PARAMS(...)
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:157
experimental::MemoryRequirements workspace() const override
Return the memory requirements required by the workspace.
Definition: CpuConv2d.cpp:251
unsigned int pad_bottom() const
Get the bottom padding.
Definition: Types.h:758
unsigned int pad_left() const
Get the left padding.
Definition: Types.h:743
Convolution using GEMM.
~CpuConv2d()
Default destructor.
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.
static IScheduler & get()
Access the scheduler singleton.
Definition: Scheduler.cpp:94