Compute Library
 23.11
CLConvolutionLayer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2023 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
31 
32 #include "src/common/utils/Log.h"
33 #include "src/core/CL/ICLKernel.h"
36 #include "support/Cast.h"
37 
38 namespace arm_compute
39 {
41 using namespace arm_compute::experimental;
42 struct CLConvolutionLayer::Impl
43 {
44  MemoryGroup memory_group{};
45  std::shared_ptr<IMemoryManager> memory_manager{};
46  std::unique_ptr<opencl::IClOperator> op{nullptr};
47  ITensorPack run_pack{};
48  ITensorPack prep_pack{};
49  WorkspaceData<CLTensor> workspace{};
51  std::unique_ptr<IFunction> func{nullptr};
52 };
53 
54 CLConvolutionLayer::CLConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager) : _impl(std::make_unique<Impl>())
55 {
56  _impl->memory_manager = std::move(memory_manager);
57 }
58 
60 
62  const ICLTensor *weights,
63  const ICLTensor *biases,
64  ICLTensor *output,
65  const PadStrideInfo &conv_info,
67  const Size2D &dilation,
69  bool enable_fast_math,
70  unsigned int num_groups)
71 {
72  configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info, weights_info,
73  dilation, act_info, enable_fast_math, num_groups);
74 }
75 
76 void CLConvolutionLayer::configure(const CLCompileContext &compile_context,
78  const ICLTensor *weights,
79  const ICLTensor *biases,
80  ICLTensor *output,
81  const PadStrideInfo &conv_info,
83  const Size2D &dilation,
85  bool enable_fast_math,
86  unsigned int num_groups)
87 {
88  ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
90  input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info,
91  weights_info, dilation, act_info, enable_fast_math, num_groups));
92  ARM_COMPUTE_LOG_PARAMS(input, weights, biases, output, conv_info, weights_info, dilation, act_info,
93  enable_fast_math, num_groups);
94 
95  const Conv2dInfo conv2d_info = Conv2dInfo(conv_info, dilation, act_info, enable_fast_math, num_groups);
96 
97  switch (opencl::ClConv2d::get_convolution_method(input->info(), weights->info(), output->info(), conv2d_info,
99  {
104  {
105  auto f = std::make_unique<opencl::ClConv2d>();
106  f->configure(compile_context, input->info(), weights->info(),
107  ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv2d_info, weights_info);
108  _impl->op = std::move(f);
109  break;
110  }
112  {
113  auto f = std::make_unique<CLFFTConvolutionLayer>(_impl->memory_manager);
114  f->configure(compile_context, input, weights, biases, output, conv_info, act_info, enable_fast_math);
115  _impl->func = std::move(f);
116  break;
117  }
118  default:
119  ARM_COMPUTE_ERROR("Not supported.");
120  break;
121  }
122 
123  if (_impl->op)
124  {
125  _impl->memory_group = MemoryGroup(std::move(_impl->memory_manager));
126  _impl->aux_mem_req = _impl->op->workspace();
127  _impl->run_pack = {{ACL_SRC_0, input}, {ACL_SRC_1, weights}, {ACL_SRC_2, biases}, {ACL_DST, output}};
128  _impl->prep_pack = {{ACL_SRC_1, weights}, {ACL_SRC_2, biases}};
129  _impl->workspace =
130  manage_workspace<CLTensor>(_impl->aux_mem_req, _impl->memory_group, _impl->run_pack, _impl->prep_pack);
131  }
132 }
133 
135  const ITensorInfo *weights,
136  const ITensorInfo *biases,
137  const ITensorInfo *output,
138  const PadStrideInfo &conv_info,
139  const WeightsInfo &weights_info,
140  const Size2D &dilation,
142  bool enable_fast_math,
143  unsigned int num_groups)
144 {
145  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
146  ARM_COMPUTE_RETURN_ERROR_ON_MSG(!weights->are_values_constant(), "Dynamic weights are not supported");
148  "Grouping (num_groups != 1) with NHWC data layout is not supported");
149 
150  const GPUTarget gpu_target = CLScheduler::get().target();
151  const Conv2dInfo conv2d_info = Conv2dInfo(conv_info, dilation, act_info, enable_fast_math, num_groups);
152 
153  switch (opencl::ClConv2d::get_convolution_method(input, weights, output, conv2d_info, weights_info, gpu_target))
154  {
159  {
161  opencl::ClConv2d::validate(input, weights, biases, output, conv2d_info, weights_info));
162  break;
163  }
165  {
166  // Validate FFT-based convolution layer
168  act_info, enable_fast_math));
169  break;
170  }
171  default:
172  ARM_COMPUTE_ERROR("Not supported.");
173  break;
174  }
175 
176  return Status{};
177 }
178 
180  const ITensorInfo *weights,
181  const ITensorInfo *output,
182  const PadStrideInfo &conv_info,
183  const WeightsInfo &weights_info,
185  const GPUTarget gpu_target,
186  const Size2D &dilation,
187  bool enable_fast_math)
188 {
189  const Conv2dInfo conv2d_info = Conv2dInfo(conv_info, dilation, act_info, enable_fast_math, 1);
190  return opencl::ClConv2d::get_convolution_method(input, weights, output, conv2d_info, weights_info, gpu_target);
191 }
192 
194 {
195  prepare();
196 
197  MemoryGroupResourceScope scope_mg(_impl->memory_group);
198 
199  if (_impl->func)
200  {
201  _impl->func->run();
202  }
203  else
204  {
205  _impl->op->run(_impl->run_pack);
206  }
207 }
208 
210 {
211  if (_impl->func)
212  {
213  _impl->func->prepare();
214  }
215  else
216  {
217  _impl->op->prepare(_impl->prep_pack);
218 
219  // Release temporary tensors that are only used in prepare stage
220  release_temporaries(_impl->aux_mem_req, _impl->workspace);
221  }
222 }
223 } // namespace arm_compute
arm_compute::DataLayout::NCHW
@ NCHW
Num samples, channels, height, width.
Cast.h
arm_compute::experimental::MemoryRequirements
std::vector< MemoryInfo > MemoryRequirements
Definition: Types.h:123
arm_compute::WeightsInfo
Convolution Layer Weights Information class.
Definition: Types.h:1670
arm_compute::ConvolutionMethod::FFT
@ FFT
Convolution using FFT.
ICLTensor.h
arm_compute::opencl::ClConv2d::validate
static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const Conv2dInfo &conv2d_info, const WeightsInfo &weights_info=WeightsInfo())
Static function to check if given info will lead to a valid configuration of ClConv2d.
Definition: ClConv2d.cpp:134
arm_compute::test::validation::weights_info
weights_info
Definition: BatchNormalizationLayer.cpp:165
arm_compute::CLConvolutionLayer::validate
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info=WeightsInfo(), const Size2D &dilation=Size2D(1U, 1U), const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false, unsigned int num_groups=1)
Static function to check if given info will lead to a valid configuration of CLConvolutionLayer.
Definition: CLConvolutionLayer.cpp:134
arm_compute::CLConvolutionLayer::CLConvolutionLayer
CLConvolutionLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
Definition: CLConvolutionLayer.cpp:54
arm_compute::ConvolutionMethod::INDIRECT
@ INDIRECT
Indirect convolution.
CLConvolutionLayer.h
arm_compute::ICLTensor
Interface for OpenCL tensor.
Definition: ICLTensor.h:41
ARM_COMPUTE_ERROR
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:354
arm_compute::Size2D
Class for specifying the size of an image or rectangle.
Definition: Size2D.h:34
arm_compute::opencl::ClConv2d::get_convolution_method
static ConvolutionMethod get_convolution_method(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const Conv2dInfo &conv2d_info, const WeightsInfo &weights_info, const GPUTarget gpu_target)
Static function to check if given info will return the convolution called by ClConv2d.
Definition: ClConv2d.cpp:190
arm_compute::CLConvolutionLayer::prepare
void prepare() override
Prepare the function for executing.
Definition: CLConvolutionLayer.cpp:209
arm_compute::ACL_SRC_0
@ ACL_SRC_0
Definition: Types.h:45
arm_compute::ACL_SRC_1
@ ACL_SRC_1
Definition: Types.h:46
arm_compute::CLKernelLibrary::get
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Definition: CLKernelLibrary.cpp:41
arm_compute::ACL_SRC_2
@ ACL_SRC_2
Definition: Types.h:47
CLKernelLibrary.h
Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context.
arm_compute::Conv2dInfo
Descriptor used by the 2d Convolution function.
Definition: FunctionDescriptors.h:57
ARM_COMPUTE_RETURN_ON_ERROR
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:205
arm_compute::ConvolutionMethod
ConvolutionMethod
Available ConvolutionMethod.
Definition: Types.h:91
arm_compute::ActivationLayerInfo
Activation Layer Information class.
Definition: ActivationLayerInfo.h:55
arm_compute::test::validation::act_info
act_info
Definition: DirectConvolutionLayer.cpp:547
ARM_COMPUTE_ERROR_ON_NULLPTR
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:159
arm_compute::CLConvolutionLayer::run
void run() override
Run the kernels contained in the function.
Definition: CLConvolutionLayer.cpp:193
arm_compute::ITensor::info
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
ICLKernel.h
ARM_COMPUTE_ERROR_THROW_ON
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
arm_compute::CLCompileContext
CLCompileContext class.
Definition: CLCompileContext.h:204
arm_compute::release_temporaries
void release_temporaries(const experimental::MemoryRequirements &mem_reqs, WorkspaceData< TensorType > &workspace)
Utility function to release tensors with lifetime marked as Prepare.
Definition: MemoryHelpers.h:122
arm_compute::ACL_DST
@ ACL_DST
Definition: Types.h:55
arm_compute::Status
Status class.
Definition: Error.h:52
arm_compute::CLConvolutionLayer::~CLConvolutionLayer
~CLConvolutionLayer()
Default Destructor.
arm_compute::ConvolutionMethod::WINOGRAD
@ WINOGRAD
Convolution using Winograd.
CLFFTConvolutionLayer.h
arm_compute::ConvolutionMethod::GEMM
@ GEMM
Convolution using GEMM.
arm_compute::CLFFTConvolutionLayer::validate
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Static function to check if given info will lead to a valid configuration of CLFFTConvolutionLayer.
Definition: CLFFTConvolutionLayer.cpp:294
arm_compute::CLConvolutionLayer::configure
void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info=WeightsInfo(), const Size2D &dilation=Size2D(1U, 1U), const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false, unsigned int num_groups=1)
Set the input and output tensors.
Definition: CLConvolutionLayer.cpp:61
arm_compute::PadStrideInfo
Definition: CoreTypes.h:139
arm_compute::CLScheduler::get
static CLScheduler & get()
Access the scheduler singleton.
Definition: CLScheduler.cpp:112
arm_compute::ConvolutionMethod::DIRECT
@ DIRECT
Direct convolution.
arm_compute::CLConvolutionLayer::get_convolution_method
static ConvolutionMethod get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, const ActivationLayerInfo &act_info, const GPUTarget gpu_target, const Size2D &dilation=Size2D(1U, 1U), bool enable_fast_math=false)
Static function to check if given info will return the convolution called by CLConvolutionLayer.
Definition: CLConvolutionLayer.cpp:179
arm_compute::CLScheduler::target
GPUTarget target() const
Get the target GPU.
Definition: CLScheduler.cpp:46
MemoryHelpers.h
arm_compute::experimental
Definition: Types.h:83
arm_compute::GPUTarget
GPUTarget
Available GPU Targets.
Definition: GPUTarget.h:34
ShapeCalculator.h
arm_compute::test::validation::num_groups
const unsigned int num_groups
Definition: Im2Col.cpp:153
KernelDescriptors.h
ClConv2d.h
ARM_COMPUTE_RETURN_ERROR_ON_MSG
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:245
arm_compute::MemoryGroupResourceScope
Memory group resources scope handling class.
Definition: IMemoryGroup.h:82
arm_compute
Copyright (c) 2017-2023 Arm Limited.
Definition: introduction.dox:24
arm_compute::test::validation::conv_info
conv_info
Definition: DirectConvolutionLayer.cpp:547
Log.h
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:161
arm_compute::ITensorInfo
Store the tensor's metadata.
Definition: ITensorInfo.h:44
arm_compute::misc::shape_calculator
Definition: ShapeCalculator.h:41
arm_compute::ITensorInfo::are_values_constant
virtual bool are_values_constant() const =0
Flag indicating whether the values of the tensor are constant, meaning that they can change on kernel...
ARM_COMPUTE_LOG_PARAMS
#define ARM_COMPUTE_LOG_PARAMS(...)
Definition: Log.h:35
arm_compute::MemoryGroup
Memory group.
Definition: MemoryGroup.h:42
arm_compute::test::validation::input
auto input
Definition: LSTMLayerQuantized.cpp:486