Compute Library
 21.11
CLConvolutionLayer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
31 #include "src/core/CL/ICLKernel.h"
35 
36 #include "src/common/utils/Log.h"
37 #include "support/Cast.h"
38 
39 namespace arm_compute
40 {
42 using namespace arm_compute::experimental;
43 struct CLConvolutionLayer::Impl
44 {
45  MemoryGroup memory_group{};
46  std::shared_ptr<IMemoryManager> memory_manager{};
47  std::unique_ptr<opencl::IClOperator> op{ nullptr };
48  ITensorPack run_pack{};
49  ITensorPack prep_pack{};
50  WorkspaceData<CLTensor> workspace{};
52  std::unique_ptr<IFunction> func{ nullptr };
53 };
54 
55 CLConvolutionLayer::CLConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
56  : _impl(std::make_unique<Impl>())
57 {
58  _impl->memory_manager = std::move(memory_manager);
59 }
60 
62 
64  const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math, unsigned int num_groups, const experimental::PostOpList<ICLTensor *> &post_ops)
65 {
66  configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info, weights_info, dilation, act_info, enable_fast_math, num_groups, post_ops);
67 }
68 
69 void CLConvolutionLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
71  const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math, unsigned int num_groups, const experimental::PostOpList<ICLTensor *> &post_ops)
72 {
73  ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
74  ARM_COMPUTE_ERROR_THROW_ON(CLConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation, act_info,
75  enable_fast_math, num_groups));
76  ARM_COMPUTE_LOG_PARAMS(input, weights, biases, output, conv_info, weights_info, dilation, act_info, enable_fast_math, num_groups, post_ops);
77 
78  // Convert post op arguments to ITensorInfo
79  auto transformed_post_ops = experimental::transform_post_op_list_arguments<ICLTensor *, ITensorInfo *>(post_ops, [](auto tensor)
80  {
81  return tensor->info();
82  });
83  const Conv2dInfo conv2d_info = Conv2dInfo(conv_info, dilation, act_info, enable_fast_math, num_groups, transformed_post_ops);
84 
85  switch(opencl::ClConv2d::get_convolution_method(input->info(), weights->info(), output->info(), conv2d_info,
87  {
91  {
92  auto f = std::make_unique<opencl::ClConv2d>();
93  f->configure(compile_context, input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv2d_info, weights_info);
94  _impl->op = std::move(f);
95  break;
96  }
98  {
99  ARM_COMPUTE_ERROR_ON_MSG(post_ops.size() > 0, "CLFFTConvolutionLayer does not support post ops");
100  auto f = std::make_unique<CLFFTConvolutionLayer>(_impl->memory_manager);
101  f->configure(compile_context, input, weights, biases, output, conv_info, act_info, enable_fast_math);
102  _impl->func = std::move(f);
103  break;
104  }
105  default:
106  ARM_COMPUTE_ERROR("Not supported.");
107  break;
108  }
109 
110  if(_impl->op)
111  {
112  _impl->memory_group = MemoryGroup(std::move(_impl->memory_manager));
113  _impl->aux_mem_req = _impl->op->workspace();
114  _impl->run_pack = { { ACL_SRC_0, input }, { ACL_SRC_1, weights }, { ACL_SRC_2, biases }, { ACL_DST, output } };
115  size_t post_op_tensor_index = 0;
116  for(const auto &op : post_ops.get_list())
117  {
118  for(auto &tensor : op->arguments())
119  {
120  _impl->run_pack.add_const_tensor(experimental::get_post_op_arg_type(post_op_tensor_index++), *tensor);
121  }
122  }
123  _impl->prep_pack = { { ACL_SRC_1, weights }, { ACL_SRC_2, biases } };
124  _impl->workspace = manage_workspace<CLTensor>(_impl->aux_mem_req, _impl->memory_group, _impl->run_pack, _impl->prep_pack);
125  }
126 }
127 
129  const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math, unsigned int num_groups, const experimental::PostOpList<ITensorInfo *> &post_ops)
130 {
131  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
132  ARM_COMPUTE_RETURN_ERROR_ON_MSG((num_groups != 1) && (input->data_layout() != DataLayout::NCHW), "Grouping (num_groups != 1) with NHWC data layout is not supported");
133 
134  const GPUTarget gpu_target = CLScheduler::get().target();
135  const Conv2dInfo conv2d_info = Conv2dInfo(conv_info, dilation, act_info, enable_fast_math, num_groups, post_ops);
136 
137  switch(opencl::ClConv2d::get_convolution_method(input, weights, output, conv2d_info, weights_info, gpu_target))
138  {
142  {
143  ARM_COMPUTE_RETURN_ON_ERROR(opencl::ClConv2d::validate(input, weights, biases, output, conv2d_info, weights_info));
144  break;
145  }
147  {
148  // Validate FFT-based convolution layer
149  ARM_COMPUTE_RETURN_ERROR_ON_MSG(post_ops.size() > 0, "CLFFTConvolutionLayer does not support post ops");
150  ARM_COMPUTE_RETURN_ON_ERROR(CLFFTConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info, enable_fast_math));
151  break;
152  }
153  default:
154  ARM_COMPUTE_ERROR("Not supported.");
155  break;
156  }
157 
158  return Status{};
159 }
160 
162  const WeightsInfo &weights_info, const ActivationLayerInfo &act_info, const GPUTarget gpu_target, const Size2D &dilation, bool enable_fast_math)
163 {
164  const Conv2dInfo conv2d_info = Conv2dInfo(conv_info, dilation, act_info, enable_fast_math, 1);
165  return opencl::ClConv2d::get_convolution_method(input, weights, output, conv2d_info, weights_info, gpu_target);
166 }
167 
169 {
170  prepare();
171 
172  MemoryGroupResourceScope scope_mg(_impl->memory_group);
173 
174  if(_impl->func)
175  {
176  _impl->func->run();
177  }
178  else
179  {
180  _impl->op->run(_impl->run_pack);
181  }
182 }
183 
185 {
186  if(_impl->func)
187  {
188  _impl->func->prepare();
189  }
190  else
191  {
192  _impl->op->prepare(_impl->prep_pack);
193 
194  // Release temporary tensors that are only used in prepare stage
195  release_temporaries(_impl->aux_mem_req, _impl->workspace);
196  }
197 }
198 } // namespace arm_compute
experimental::PostOpList< ITensorInfo * > post_ops
void run() override
Run the kernels contained in the function.
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info=WeightsInfo(), const Size2D &dilation=Size2D(1U, 1U), const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false, unsigned int num_groups=1, const experimental::PostOpList< ITensorInfo *> &post_ops=experimental::PostOpList< ITensorInfo *> {})
Static function to check if given info will lead to a valid configuration of CLConvolutionLayer.
CLConvolutionLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
static CLScheduler & get()
Access the scheduler singleton.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352
GPUTarget target() const
Get the target GPU.
Definition: CLScheduler.cpp:45
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
~CLConvolutionLayer()
Default Destructor.
void prepare() override
Prepare the function for executing.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false)
Static function to check if given info will lead to a valid configuration of CLFFTConvolutionLayer.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context...
Status class.
Definition: Error.h:52
ConvolutionMethod
Available ConvolutionMethod.
Definition: Types.h:134
Activation Layer Information class.
Definition: Types.h:1509
Copyright (c) 2017-2021 Arm Limited.
std::vector< MemoryInfo > MemoryRequirements
Definition: Types.h:132
TensorType get_post_op_arg_type(size_t index)
Get post op argument TensorType from post op argument index in a flattened, ordered post op argument ...
Definition: PostOpUtils.h:79
Convolution Layer Weights Information class.
Definition: Types.h:1728
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:159
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
Definition: Error.h:456
const unsigned int num_groups
Definition: Im2Col.cpp:153
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.
Padding and stride information class.
Definition: Types.h:656
void release_temporaries(const experimental::MemoryRequirements &mem_reqs, WorkspaceData< TensorType > &workspace)
Utility function to release tensors with lifetime marked as Prepare.
Descriptor used by the 2d Convolution function.
Num samples, channels, height, width.
CLCompileContext class.
Convolution using Winograd.
Memory group resources scope handling class.
Definition: IMemoryGroup.h:82
Interface for OpenCL tensor.
Definition: ICLTensor.h:42
static ConvolutionMethod get_convolution_method(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const Conv2dInfo &conv2d_info, const WeightsInfo &weights_info, const GPUTarget gpu_target)
Static function to check if given info will return the convolution called by ClConv2d.
Definition: ClConv2d.cpp:164
GPUTarget
Available GPU Targets.
Definition: GPUTarget.h:34
Class for specifying the size of an image or rectangle.
Definition: Size2D.h:34
static ConvolutionMethod get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, const ActivationLayerInfo &act_info, const GPUTarget gpu_target, const Size2D &dilation=Size2D(1U, 1U), bool enable_fast_math=false)
Static function to check if given info will return the convolution called by CLConvolutionLayer.
size_t size() const
Number of post ops.
Definition: IPostOp.h:150
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:244
#define ARM_COMPUTE_LOG_PARAMS(...)
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:157
A sequence of PostOps that can be appended to the end of other operators.
Definition: IPostOp.h:119
Convolution using GEMM.
static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const Conv2dInfo &conv2d_info, const WeightsInfo &weights_info=WeightsInfo())
Static function to check if given info will lead to a valid configuration of ClConv2d.
Definition: ClConv2d.cpp:124
void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info=WeightsInfo(), const Size2D &dilation=Size2D(1U, 1U), const ActivationLayerInfo &act_info=ActivationLayerInfo(), bool enable_fast_math=false, unsigned int num_groups=1, const experimental::PostOpList< ICLTensor *> &post_ops=experimental::PostOpList< ICLTensor *> {})
Set the input and output tensors.
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.