Compute Library
 23.11
CpuConv2d.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2021, 2023 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
28 
29 #include "src/common/utils/Log.h"
35 
36 namespace arm_compute
37 {
38 namespace cpu
39 {
40 CpuConv2d::CpuConv2d() : _function()
41 {
42 }
43 
44 CpuConv2d::~CpuConv2d() = default;
45 
47  ITensorInfo *weights,
48  const ITensorInfo *biases,
49  ITensorInfo *output,
50  const PadStrideInfo &conv_info,
52  const Size2D &dilation,
54  bool enable_fast_math,
55  unsigned int num_groups)
56 {
57  // Perform validate step
58  ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
60  ARM_COMPUTE_ERROR_THROW_ON(CpuConv2d::validate(input, weights, biases, output, conv_info, weights_info, dilation,
61  act_info, enable_fast_math, num_groups));
62 
63  ARM_COMPUTE_LOG_PARAMS(input, weights, biases, output, conv_info, weights_info, dilation, act_info,
64  enable_fast_math, num_groups);
65 
66  const Conv2dInfo info(conv_info, dilation, act_info, enable_fast_math, num_groups);
67  switch (CpuConv2d::get_convolution_method(input, weights, output, conv_info, weights_info, dilation, act_info,
68  enable_fast_math))
69  {
71  {
72  auto f = std::make_unique<CpuWinogradConv2d>();
73  f->configure(input, weights, biases, output, conv_info, act_info, enable_fast_math);
74  _function = std::move(f);
75  break;
76  }
78  {
79  auto f = std::make_unique<CpuGemmConv2d>();
80  f->configure(input, weights, biases, output, conv_info, weights_info, dilation, act_info, enable_fast_math);
81  _function = std::move(f);
82  break;
83  }
85  {
86  auto f = std::make_unique<CpuGemmDirectConv2d>();
87  f->configure(input, weights, biases, output, info);
88  _function = std::move(f);
89  break;
90  }
92  {
93  auto f = std::make_unique<CpuDirectConv2d>();
94  f->configure(input, weights, biases, output, conv_info, act_info);
95  _function = std::move(f);
96  break;
97  }
98  default:
99  ARM_COMPUTE_ERROR("Not supported.");
100  break;
101  }
102 
103  _aux_mem = _function->workspace();
104 }
105 
107  const ITensorInfo *weights,
108  const ITensorInfo *biases,
109  const ITensorInfo *output,
110  const PadStrideInfo &conv_info,
111  const WeightsInfo &weights_info,
112  const Size2D &dilation,
114  bool enable_fast_math,
115  unsigned int num_groups)
116 {
117  ARM_COMPUTE_RETURN_ERROR_ON_MSG((num_groups != 1), "Grouping (num_groups != 1) is not supported on Neon");
118 
119  const Conv2dInfo info(conv_info, dilation, act_info, enable_fast_math, num_groups);
120  switch (CpuConv2d::get_convolution_method(input, weights, output, conv_info, weights_info, dilation, act_info,
121  enable_fast_math))
122  {
125  CpuWinogradConv2d::validate(input, weights, biases, output, conv_info, act_info, enable_fast_math));
126  break;
129  dilation, act_info, enable_fast_math));
130  break;
133  break;
136  break;
137  default:
138  ARM_COMPUTE_ERROR("Not supported.");
139  break;
140  }
141 
142  return Status{};
143 }
144 
146  const ITensorInfo *weights,
147  const ITensorInfo *output,
148  const PadStrideInfo &conv_info,
149  const WeightsInfo &weights_info,
150  const Size2D &dilation,
152  bool enable_fast_math)
153 {
154  ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, weights);
156 
157  const size_t idx_w = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
158  const size_t idx_h = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
159  const size_t idx_c = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);
160 
161  const Conv2dInfo info(conv_info, dilation, act_info, enable_fast_math, 1);
162 
163  /* Input spatial dims, kernel size, IFM/OFM, conv info*/
164  using ConvolutionConfiguration = std::tuple<Size2D, Size2D, Size2D, PadStrideInfo>;
165  using ConfigurationMethod = std::pair<ConvolutionConfiguration, ConvolutionMethod>;
166 
167  const std::vector<ConfigurationMethod> known_configs = {
168  // Alexnet
169  ConfigurationMethod(ConvolutionConfiguration(Size2D(27U, 27U), Size2D(5U, 5U), Size2D(48U, 128U),
170  PadStrideInfo(1U, 1U, 2U, 2U)),
172  // VGG16 / VGG19
173  ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 64U),
174  PadStrideInfo(1U, 1U, 1U, 1U)),
176  // Mobilenet 224
177  ConfigurationMethod(
178  ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 32U),
181  // Mobilenet 160
182  ConfigurationMethod(
183  ConvolutionConfiguration(Size2D(160U, 160U), Size2D(3U, 3U), Size2D(3U, 24U),
186 
187  const auto find_config = [&](ConfigurationMethod c)
188  {
189  const ConvolutionConfiguration config = c.first;
190  const PadStrideInfo info = std::get<3>(config);
191 
192  return std::get<0>(config) == Size2D(input->dimension(idx_w), input->dimension(idx_h)) &&
193  std::get<1>(config) == Size2D(weights->dimension(idx_w), weights->dimension(idx_h)) &&
194  std::get<2>(config) == Size2D(weights->dimension(idx_c), weights->dimension(3)) &&
195  info.pad_top() == conv_info.pad_top() && info.pad_right() == conv_info.pad_right() &&
196  info.pad_bottom() == conv_info.pad_bottom() && info.pad_left() == conv_info.pad_left() &&
197  info.stride() == conv_info.stride();
198  };
199 
200  std::vector<ConfigurationMethod>::const_iterator found;
201  if ((found = std::find_if(known_configs.begin(), known_configs.end(), find_config)) != known_configs.end())
202  {
203  return (*found).second;
204  }
205 
206  if (dilation != Size2D(1U, 1U))
207  {
209  }
210  else
211  {
212  // SRGAN
213  // Output might not be initialized when it is an internal tensor of the layer using the convolution
214  if (input->total_size() > 1e7 && (weights->dimension(idx_h) > 7) &&
215  (CpuDirectConv2d::validate(input, weights, nullptr, output, conv_info, act_info)))
216  {
218  }
219  if (input->dimension(idx_c) < 16)
220  {
222  }
223 
224 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
225  // This heuristics only applies to F16 data type on A55r1
226  if (NEScheduler::get().cpu_info().get_cpu_model() == CPUModel::A55r1 && enable_fast_math &&
227  input->data_type() == DataType::F16)
228  {
229  // Exclude known bad winograd configs (and defaults to GEMM)
230  const std::vector<ConvolutionConfiguration> known_bad_winograd_f16_with_fastmath_configs = {
231  // Squeezenet_V1_1 fire2 and fire3
232  ConvolutionConfiguration(Size2D(56U, 56U), Size2D(3U, 3U), Size2D(16U, 64U),
233  PadStrideInfo(1U, 1U, 1U, 1U)),
234  // Squeezenet_V1_1 fire6 and fire7
235  ConvolutionConfiguration(Size2D(14U, 14U), Size2D(3U, 3U), Size2D(48U, 192U),
236  PadStrideInfo(1U, 1U, 1U, 1U)),
237  // Squeezenet_V1_1 fire8 and fire9
238  ConvolutionConfiguration(Size2D(14U, 14U), Size2D(3U, 3U), Size2D(64U, 256U),
239  PadStrideInfo(1U, 1U, 1U, 1U)),
240  };
241  const auto find_conv_config = [&](ConvolutionConfiguration c)
242  {
243  const PadStrideInfo info = std::get<3>(c);
244 
245  return std::get<0>(c) == Size2D(input->dimension(idx_w), input->dimension(idx_h)) &&
246  std::get<1>(c) == Size2D(weights->dimension(idx_w), weights->dimension(idx_h)) &&
247  std::get<2>(c) == Size2D(weights->dimension(idx_c), weights->dimension(3)) &&
248  info.pad_top() == conv_info.pad_top() && info.pad_right() == conv_info.pad_right() &&
249  info.pad_bottom() == conv_info.pad_bottom() && info.pad_left() == conv_info.pad_left() &&
250  info.stride() == conv_info.stride();
251  };
252 
253  bool found_bad = std::find_if(known_bad_winograd_f16_with_fastmath_configs.begin(),
254  known_bad_winograd_f16_with_fastmath_configs.end(),
255  find_conv_config) != known_bad_winograd_f16_with_fastmath_configs.end();
256  if (found_bad)
257  {
259  }
260  }
261 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
262 
263  // For 1x1 convolutions run the default GEMM
264  if (weights->dimension(idx_w) == 1 && weights->dimension(idx_h) == 1)
265  {
267  }
268 
269  if (bool(CpuWinogradConv2d::validate(input, weights, nullptr, output, conv_info, act_info, enable_fast_math)))
270  {
272  }
273  if (bool(CpuGemmDirectConv2d::validate(input, weights, nullptr, output, info)))
274  {
276  }
278  }
279 }
280 
282 {
283  prepare(tensors);
284  _function->run(tensors);
285 }
286 
288 {
289  _function->prepare(tensors);
290 }
291 
293 {
294  return _aux_mem;
295 }
296 } // namespace cpu
297 } // namespace arm_compute
arm_compute::experimental::MemoryRequirements
std::vector< MemoryInfo > MemoryRequirements
Definition: Types.h:123
arm_compute::WeightsInfo
Convolution Layer Weights Information class.
Definition: Types.h:1670
arm_compute::DataLayoutDimension::CHANNEL
@ CHANNEL
channel
arm_compute::test::validation::weights_info
weights_info
Definition: BatchNormalizationLayer.cpp:165
CpuGemmDirectConv2d.h
ARM_COMPUTE_ERROR
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:354
arm_compute::Size2D
Class for specifying the size of an image or rectangle.
Definition: Size2D.h:34
arm_compute::cpu::CpuConv2d::CpuConv2d
CpuConv2d()
Constructor.
Definition: CpuConv2d.cpp:40
arm_compute::DataLayoutDimension::WIDTH
@ WIDTH
width
CpuGemmConv2d.h
arm_compute::Conv2dInfo
Descriptor used by the 2d Convolution function.
Definition: FunctionDescriptors.h:57
arm_compute::cpu::CpuConv2d::run
void run(ITensorPack &tensors) override
Run the kernels contained in the function.
Definition: CpuConv2d.cpp:281
arm_compute::cpu::CpuDirectConv2d::validate
static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *dst, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration.
Definition: CpuDirectConv2d.cpp:101
ARM_COMPUTE_RETURN_ON_ERROR
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:205
arm_compute::ConvolutionMethod
ConvolutionMethod
Available ConvolutionMethod.
Definition: Types.h:91
arm_compute::ITensorInfo::dimension
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
arm_compute::ActivationLayerInfo
Activation Layer Information class.
Definition: ActivationLayerInfo.h:55
arm_compute::test::validation::act_info
act_info
Definition: DirectConvolutionLayer.cpp:547
arm_compute::cpu::CpuConv2d::configure
void configure(ITensorInfo *src, ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const PadStrideInfo &conv_info, const WeightsInfo &weights_info=WeightsInfo(), const Size2D &dilation=Size2D(1U, 1U), const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false, unsigned int num_groups=1)
Set the input and output tensors.
Definition: CpuConv2d.cpp:46
ARM_COMPUTE_ERROR_ON_NULLPTR
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:159
arm_compute::cpu::CpuGemmDirectConv2d::validate
static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const Conv2dInfo &info)
Static function to check if given info will lead to a valid configuration of CpuGemmDirectConv2d.
Definition: CpuGemmDirectConv2d.cpp:163
arm_compute::utils::cast::U
U
Definition: SaturateCast.h:65
NEFFTConvolutionLayer.h
ARM_COMPUTE_ERROR_THROW_ON
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
arm_compute::ITensorPack
Tensor packing service.
Definition: ITensorPack.h:39
arm_compute::cpu::CpuConv2d::get_convolution_method
static ConvolutionMethod get_convolution_method(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const PadStrideInfo &conv_info, const WeightsInfo &weights_info=WeightsInfo(), const Size2D &dilation=Size2D(1U, 1U), const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Static function to check if given info will return the convolution called by CpuConv2d.
Definition: CpuConv2d.cpp:145
arm_compute::DataLayoutDimension::HEIGHT
@ HEIGHT
height
CpuGemm.h
arm_compute::Scheduler::get
static IScheduler & get()
Access the scheduler singleton.
Definition: Scheduler.cpp:94
arm_compute::cpu::CpuConv2d::prepare
void prepare(ITensorPack &constants) override
Prepare the function for executing.
Definition: CpuConv2d.cpp:287
arm_compute::CPUModel::A55r1
@ A55r1
arm_compute::Status
Status class.
Definition: Error.h:52
CpuWinogradConv2d.h
arm_compute::ConvolutionMethod::WINOGRAD
@ WINOGRAD
Convolution using Winograd.
arm_compute::ConvolutionMethod::GEMM_CONV2D
@ GEMM_CONV2D
Direct 2D GEMM convolution.
arm_compute::ConvolutionMethod::GEMM
@ GEMM
Convolution using GEMM.
ARM_COMPUTE_UNUSED
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:151
arm_compute::PadStrideInfo
Definition: CoreTypes.h:139
arm_compute::ConvolutionMethod::DIRECT
@ DIRECT
Direct convolution.
arm_compute::get_data_layout_dimension_index
size_t get_data_layout_dimension_index(const DataLayout &data_layout, const DataLayoutDimension &data_layout_dimension)
Get the index of the given dimension.
Definition: Helpers.inl:201
NEScheduler.h
arm_compute::DimensionRoundingType::FLOOR
@ FLOOR
Floor rounding.
arm_compute::test::validation::num_groups
const unsigned int num_groups
Definition: Im2Col.cpp:153
ARM_COMPUTE_RETURN_ERROR_ON_MSG
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:245
arm_compute
Copyright (c) 2017-2023 Arm Limited.
Definition: introduction.dox:24
arm_compute::test::validation::conv_info
conv_info
Definition: DirectConvolutionLayer.cpp:547
arm_compute::DataType::F16
@ F16
16-bit floating-point number
Log.h
arm_compute::cpu::CpuConv2d::workspace
experimental::MemoryRequirements workspace() const override
Return the memory requirements required by the workspace.
Definition: CpuConv2d.cpp:292
arm_compute::cpu::CpuConv2d::~CpuConv2d
~CpuConv2d()
Default destructor.
arm_compute::ITensorInfo
Store the tensor's metadata.
Definition: ITensorInfo.h:44
arm_compute::test::validation::info
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
CpuConv2d.h
ARM_COMPUTE_LOG_PARAMS
#define ARM_COMPUTE_LOG_PARAMS(...)
Definition: Log.h:35
arm_compute::cpu::CpuGemmConv2d::validate
static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info=WeightsInfo(), const Size2D &dilation=Size2D(1U, 1U), const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false, unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration.
Definition: CpuGemmConv2d.cpp:646
CpuDirectConv2d.h
arm_compute::cpu::CpuWinogradConv2d::validate
static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Static function to check if given info will lead to a valid configuration of CpuWinogradConv2d.
Definition: CpuWinogradConv2d.cpp:321
arm_compute::test::validation::input
auto input
Definition: LSTMLayerQuantized.cpp:486
arm_compute::cpu::CpuConv2d::validate
static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info=WeightsInfo(), const Size2D &dilation=Size2D(1U, 1U), const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false, unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration of CpuConv2d.
Definition: CpuConv2d.cpp:106