Compute Library
 22.05
ClConv2d.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021-2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
27 #include "arm_compute/core/Utils.h"
36 
37 #include "src/common/utils/Log.h"
38 
39 #include <memory>
40 
41 namespace
42 {
43 /** Get the suitable kernel size for using direct convolution method with NHWC data layout.
44  *
45  * @note Direct convolution should be executed when the kernel has the spatial dimensions greater than or equal to the value returned by this function
46  *
47  * @param[in] gpu_target GPU target
48  *
49  * @return the suitable kernel size for using direct convolution method with NHWC data layout
50  */
51 size_t get_direct_conv_kernel_threshold_nhwc(arm_compute::GPUTarget gpu_target)
52 {
53  switch(gpu_target)
54  {
58  return 5;
63  return 7;
64  default:
65  return 5;
66  }
67 }
68 } // namespace
69 
70 namespace arm_compute
71 {
72 namespace opencl
73 {
75 
77  : _operator()
78 {
79 }
80 
81 ClConv2d::~ClConv2d() = default;
82 
83 void ClConv2d::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, const Conv2dInfo &conv2d_info,
85 {
86  ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst);
87  ARM_COMPUTE_ERROR_THROW_ON(ClConv2d::validate(src, weights, ((biases != nullptr) ? biases : nullptr), dst, conv2d_info, weights_info));
88  ARM_COMPUTE_LOG_PARAMS(src, weights, biases, dst, conv2d_info, weights_info);
89 
90  switch(ClConv2d::get_convolution_method(src, weights, dst, conv2d_info, weights_info, CLScheduler::get().target()))
91  {
93  {
94  ARM_COMPUTE_ERROR_ON(conv2d_info.num_groups != 1);
95  ARM_COMPUTE_ERROR_ON(conv2d_info.post_ops.size() > 0);
96  auto f = std::make_unique<ClWinogradConv2d>();
97  f->configure(compile_context, src, weights, biases, dst, conv2d_info.conv_info, conv2d_info.act_info, conv2d_info.enable_fast_math);
98  _operator = std::move(f);
99  break;
100  }
102  {
103  ARM_COMPUTE_ERROR_ON(conv2d_info.num_groups != 1);
104  ARM_COMPUTE_ERROR_ON(conv2d_info.post_ops.size() > 0);
105  auto f = std::make_unique<ClDirectConv2d>();
106  f->configure(compile_context, src, weights, biases, dst, conv2d_info.conv_info, conv2d_info.act_info);
107  _operator = std::move(f);
108  break;
109  }
111  {
112  auto f = std::make_unique<ClGemmConv2d>();
113  f->configure(compile_context, src, weights, biases, dst, conv2d_info, weights_info);
114  _operator = std::move(f);
115  break;
116  }
117  default:
118  ARM_COMPUTE_ERROR("Not supported.");
119  break;
120  }
121  _aux_mem = _operator->workspace();
122 }
123 
124 Status ClConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const Conv2dInfo &conv2d_info,
125  const WeightsInfo &weights_info)
126 {
127  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst);
128  ARM_COMPUTE_RETURN_ERROR_ON_MSG((conv2d_info.num_groups != 1) && (src->data_layout() != DataLayout::NCHW), "Grouping (num_groups != 1) with NHWC data layout is not supported");
129 
130  const GPUTarget gpu_target = CLScheduler::get().target();
131 
132  switch(ClConv2d::get_convolution_method(src, weights, dst, conv2d_info, weights_info, gpu_target))
133  {
135  {
136  //Validate Winograd
137  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv2d_info.num_groups != 1, "Grouping (num_groups != 1) with ClWinogradConv2d is not supported");
138  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv2d_info.post_ops.size() > 0, "ClWinogradConv2d does not support PostOps");
139  ARM_COMPUTE_RETURN_ON_ERROR(ClWinogradConv2d::validate(src, weights, biases, dst, conv2d_info.conv_info, conv2d_info.act_info, conv2d_info.enable_fast_math));
140  break;
141  }
143  {
144  // Validate direct convolution layer
145  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv2d_info.num_groups != 1, "Grouping (num_groups != 1) with ClDirectConv2d is not supported");
146  ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv2d_info.post_ops.size() > 0, "ClDirectConv2d does not support PostOps");
147  ARM_COMPUTE_RETURN_ON_ERROR(ClDirectConv2d::validate(src, weights, biases, dst, conv2d_info.conv_info, conv2d_info.act_info));
148  break;
149  }
151  {
152  // Validate gemm-based convolution layer
153  ARM_COMPUTE_RETURN_ON_ERROR(ClGemmConv2d::validate(src, weights, biases, dst, conv2d_info, weights_info));
154  break;
155  }
156  default:
157  ARM_COMPUTE_ERROR("Not supported.");
158  break;
159  }
160 
161  return Status{};
162 }
163 
165  const WeightsInfo &weights_info, const GPUTarget gpu_target)
166 {
170  ARM_COMPUTE_UNUSED(weights_info);
171 
172  const PadStrideInfo conv_info = conv2d_info.conv_info;
173  const ActivationLayerInfo act_info = conv2d_info.act_info;
174  const Size2D dilation = conv2d_info.dilation;
175  bool enable_fast_math = conv2d_info.enable_fast_math;
176 
180 
181  /* Input spatial dims, kernel size, IFM/OFM, conv info*/
182  using ConvolutionConfiguration = std::tuple<Size2D, Size2D, Size2D, PadStrideInfo, DataLayout>;
183  using ConfigurationMethod = std::pair<ConvolutionConfiguration, ConvolutionMethod>;
184 
185  const std::vector<ConfigurationMethod> known_configs =
186  {
187  // Alexnet
188  ConfigurationMethod(ConvolutionConfiguration(Size2D(27U, 27U), Size2D(5U, 5U), Size2D(48U, 128U), PadStrideInfo(1U, 1U, 2U, 2U), DataLayout::NCHW), ConvolutionMethod::DIRECT),
189  // VGG16 / VGG19
190  ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 64U), PadStrideInfo(1U, 1U, 1U, 1U), DataLayout::NCHW), ConvolutionMethod::DIRECT),
191  // Mobilenet 224
192  ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 32U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NCHW), ConvolutionMethod::GEMM),
193  // Mobilenet 160
194  ConfigurationMethod(ConvolutionConfiguration(Size2D(160U, 160U), Size2D(3U, 3U), Size2D(3U, 24U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NCHW), ConvolutionMethod::GEMM),
195  // Mobilenet 224
196  ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 32U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NHWC), ConvolutionMethod::GEMM),
197  // Mobilenet 160
198  ConfigurationMethod(ConvolutionConfiguration(Size2D(160U, 160U), Size2D(3U, 3U), Size2D(3U, 24U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NHWC), ConvolutionMethod::GEMM),
199  };
200 
201  const auto find_config = [&](ConfigurationMethod c)
202  {
203  const ConvolutionConfiguration config = c.first;
204  const PadStrideInfo info = std::get<3>(config);
205  const DataLayout data_layout = std::get<4>(config);
206 
207  return std::get<0>(config) == Size2D(src->dimension(idx_w), src->dimension(idx_h)) && std::get<1>(config) == Size2D(weights->dimension(idx_w), weights->dimension(idx_h))
208  && std::get<2>(config) == Size2D(weights->dimension(idx_c), weights->dimension(3)) && info.pad_top() == conv_info.pad_top() && info.pad_right() == conv_info.pad_right()
209  && info.pad_bottom() == conv_info.pad_bottom() && info.pad_left() == conv_info.pad_left() && info.stride() == conv_info.stride() && (data_layout == src->data_layout());
210  };
211 
212  std::vector<ConfigurationMethod>::const_iterator found;
213  if((found = std::find_if(known_configs.begin(), known_configs.end(), find_config)) != known_configs.end())
214  {
215  return (*found).second;
216  }
217 
218  if(dilation != Size2D(1U, 1U))
219  {
221  }
222  else
223  {
224  if(src->data_layout() == DataLayout::NCHW)
225  {
226  // SRGAN
227  if((src->dimension(idx_h) > 720U) && (dst->dimension(idx_h) > 720U) && (weights->dimension(idx_h) == 9) && (conv_info.pad_top() < 3)
228  && (ClDirectConv2d::validate(src, weights, nullptr, dst, conv_info, act_info)))
229  {
231  }
232  if((weights->dimension(idx_h) > 5) && (src->dimension(idx_c) > dst->dimension(idx_c)) && (CLFFTConvolutionLayer::validate(src, weights, nullptr, dst, conv_info, act_info, enable_fast_math)))
233  {
234  return ConvolutionMethod::FFT;
235  }
236  if(src->dimension(idx_c) < 16)
237  {
239  }
240  return bool(ClWinogradConv2d::validate(src, weights, nullptr, dst, conv_info, act_info, enable_fast_math)) ? ConvolutionMethod::WINOGRAD : ConvolutionMethod::GEMM;
241  }
242  else
243  {
244  const bool is_direct_valid = bool(ClDirectConv2d::validate(src, weights, nullptr, dst, conv_info, act_info));
245  const bool is_wino_valid = bool(ClWinogradConv2d::validate(src, weights, nullptr, dst, conv_info, act_info, enable_fast_math));
246  const size_t kernel_sz_direct_conv_thr = get_direct_conv_kernel_threshold_nhwc(gpu_target);
247 
248  // SRGAN case
249  if((src->dimension(idx_h) > 720U) && (dst->dimension(idx_h) > 720U) && (weights->dimension(idx_h) == 9) && (conv_info.pad_top() < 3)
250  && is_direct_valid)
251  {
253  }
254 
255  // Floating-point case: GeMM/Direct/Winograd
256  if(is_data_type_float(src->data_type()))
257  {
258  // Get dst shape
260  const bool is_large_kernel_sz = (weights->dimension(idx_w) >= kernel_sz_direct_conv_thr) && (weights->dimension(idx_h) >= kernel_sz_direct_conv_thr);
261  const bool is_ifm_ge_16 = src->dimension(idx_c) >= 16;
262  const bool is_ofm_lte_8 = weights->dimension(3U) <= 8;
263  const bool workload_gte_8192 = (output_shape[0] * output_shape[1] * output_shape[2]) / 16 >= 8192;
264  const bool is_ifm_gt_ofm = src->dimension(idx_c) > weights->dimension(3U);
265 
266  // Run Winograd if valid and IFM >= 16
267  if(is_wino_valid && is_ifm_ge_16)
268  {
270  }
271 
272  // Direct convolution case
273  if(is_direct_valid)
274  {
275  if((gpu_target == arm_compute::GPUTarget::G71 ||
276  gpu_target == arm_compute::GPUTarget::G72 ||
277  gpu_target == arm_compute::GPUTarget::MIDGARD))
278  {
279  if(is_large_kernel_sz && is_ifm_ge_16 && is_ifm_gt_ofm)
280  {
282  }
283  }
284  else
285  {
286  if((is_large_kernel_sz && workload_gte_8192 && is_ifm_ge_16) || (is_ofm_lte_8 && is_ifm_ge_16))
287  {
289  }
290  }
291  }
292 
293  // Default case
295  }
296 
297  // Generic case for quantized. Only GeMM
299  }
300  }
301 }
302 
304 {
305  prepare(tensors);
306  _operator->run(tensors);
307 }
308 
310 {
311  _operator->prepare(tensors);
312 }
313 
315 {
316  return _aux_mem;
317 }
318 } // namespace opencl
319 } // namespace arm_compute
Shape of a tensor.
Definition: TensorShape.h:39
static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration.
experimental::MemoryRequirements workspace() const override
Return the memory requirements required by the workspace.
Definition: ClConv2d.cpp:314
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
static CLScheduler & get()
Access the scheduler singleton.
ActivationLayerInfo act_info
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352
GPUTarget target() const
Get the target GPU.
Definition: CLScheduler.cpp:49
ClConv2d()
Default constructor.
Definition: ClConv2d.cpp:76
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
virtual DataType data_type() const =0
Data type used for each element of the tensor.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Static function to check if given info will lead to a valid configuration of CLFFTConvolutionLayer.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
void prepare(ITensorPack &tensors) override
Prepare the function for executing.
Definition: ClConv2d.cpp:309
unsigned int pad_top() const
Get the top padding.
Definition: Types.h:753
Status class.
Definition: Error.h:52
ConvolutionMethod
Available ConvolutionMethod.
Definition: Types.h:134
Activation Layer Information class.
Definition: Types.h:1625
SimpleTensor< float > src
Definition: DFT.cpp:155
Copyright (c) 2017-2022 Arm Limited.
std::vector< MemoryInfo > MemoryRequirements
Definition: Types.h:134
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const Conv2dInfo &conv2d_info, const WeightsInfo &weights_info=WeightsInfo())
Static function to check if given info will lead to a valid configuration.
void run(ITensorPack &tensors) override
Run the kernels contained in the function.
Definition: ClConv2d.cpp:303
static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Static function to check if given info will lead to a valid configuration.
Convolution Layer Weights Information class.
Definition: Types.h:1844
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:159
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
~ClConv2d()
Default Destructor.
std::pair< unsigned int, unsigned int > stride() const
Get the stride.
Definition: Types.h:717
unsigned int pad_right() const
Get the right padding.
Definition: Types.h:748
Padding and stride information class.
Definition: Types.h:669
void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, const Conv2dInfo &conv2d_info, const WeightsInfo &weights_info=WeightsInfo())
Set the src and dst tensors.
Definition: ClConv2d.cpp:83
Descriptor used by the 2d Convolution function.
Num samples, channels, height, width.
CLCompileContext class.
Convolution using Winograd.
experimental::PostOpList< ITensorInfo * > post_ops
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
static ConvolutionMethod get_convolution_method(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const Conv2dInfo &conv2d_info, const WeightsInfo &weights_info, const GPUTarget gpu_target)
Static function to check if given info will return the convolution called by ClConv2d.
Definition: ClConv2d.cpp:164
GPUTarget
Available GPU Targets.
Definition: GPUTarget.h:34
size_t get_data_layout_dimension_index(const DataLayout &data_layout, const DataLayoutDimension &data_layout_dimension)
Get the index of the given dimension.
Definition: Helpers.inl:193
Class for specifying the size of an image or rectangle.
Definition: Size2D.h:34
Num samples, height, width, channels.
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:244
Tensor packing service.
Definition: ITensorPack.h:39
#define ARM_COMPUTE_LOG_PARAMS(...)
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:157
unsigned int pad_bottom() const
Get the bottom padding.
Definition: Types.h:758
unsigned int pad_left() const
Get the left padding.
Definition: Types.h:743
DataLayout
[DataLayout enum definition]
Definition: Types.h:113
Convolution using GEMM.
static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const Conv2dInfo &conv2d_info, const WeightsInfo &weights_info=WeightsInfo())
Static function to check if given info will lead to a valid configuration of ClConv2d.
Definition: ClConv2d.cpp:124
TensorShape compute_deep_convolution_shape(const TensorShape &input_shape, DataLayout input_data_layout, const TensorShape &weights_shape, const PadStrideInfo &conv_info)
Calculate the deep convolution shape output shape of a tensor.
bool is_data_type_float(DataType dt)
Check if a given data type is of floating point type.
Definition: Utils.h:1010
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.