Compute Library
 21.02
CLConvolution.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
27 #include "arm_compute/core/Error.h"
30 #include "arm_compute/core/Utils.h"
36 
37 #include <utility>
38 
39 using namespace arm_compute;
40 
41 void CLConvolution3x3::configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
42 {
43  configure(CLKernelLibrary::get().get_compile_context(), input, output, conv, scale, border_mode, constant_border_value);
44 }
45 
46 void CLConvolution3x3::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode,
47  uint8_t constant_border_value)
48 {
49  auto k = std::make_unique<CLConvolution3x3Kernel>();
50  k->configure(compile_context, input, output, conv, scale, border_mode == BorderMode::UNDEFINED);
51  _kernel = std::move(k);
52  _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
53 }
54 
55 template <unsigned int matrix_size>
56 CLConvolutionSquare<matrix_size>::CLConvolutionSquare(std::shared_ptr<IMemoryManager> memory_manager)
57  : _memory_group(std::move(memory_manager)), _tmp(), _is_separable(false), _kernel_hor(std::make_unique<CLSeparableConvolutionHorKernel<matrix_size>>()),
58  _kernel_vert(std::make_unique<CLSeparableConvolutionVertKernel<matrix_size>>()), _kernel(std::make_unique<CLConvolutionKernel<matrix_size>>()), _border_handler(std::make_unique<CLFillBorderKernel>())
59 {
60 }
61 
62 template <unsigned int matrix_size>
64 
65 template <unsigned int matrix_size>
66 void CLConvolutionSquare<matrix_size>::configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode,
67  uint8_t constant_border_value)
68 {
69  configure(CLKernelLibrary::get().get_compile_context(), input, output, conv, scale, border_mode, constant_border_value);
70 }
71 
72 template <unsigned int matrix_size>
73 void CLConvolutionSquare<matrix_size>::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode,
74  uint8_t constant_border_value)
75 {
77  ARM_COMPUTE_ERROR_ON(conv == nullptr);
78  std::array<int16_t, matrix_size> conv_col{ 0 };
79  std::array<int16_t, matrix_size> conv_row{ 0 };
80  _is_separable = separate_matrix(conv, conv_col.data(), conv_row.data(), matrix_size);
81 
82  if(_is_separable)
83  {
84  std::pair<DataType, DataType> type_pair = data_type_for_convolution(conv_col.data(), conv_row.data(), matrix_size);
85  _tmp.allocator()->init(TensorInfo(input->info()->tensor_shape(), 1, type_pair.first));
86 
87  // Manage intermediate buffers
88  _memory_group.manage(&_tmp);
89 
90  if(scale == 0)
91  {
92  scale = calculate_matrix_scale(conv, matrix_size);
93  }
94 
95  _kernel_hor->configure(compile_context, input, &_tmp, conv_row.data(), border_mode == BorderMode::UNDEFINED);
96  _kernel_vert->configure(compile_context, &_tmp, output, conv_col.data(), scale, border_mode == BorderMode::UNDEFINED, type_pair.second);
97  _border_handler->configure(compile_context, input, _kernel_hor->border_size(), border_mode, PixelValue(constant_border_value));
98 
99  // Allocate intermediate buffer
100  _tmp.allocator()->allocate();
101  }
102  else
103  {
104  _kernel->configure(compile_context, input, output, conv, scale, border_mode == BorderMode::UNDEFINED);
105  _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
106  }
107 }
108 
109 template <unsigned int matrix_size>
111 {
112  CLScheduler::get().enqueue(*_border_handler);
113 
114  if(_is_separable)
115  {
116  MemoryGroupResourceScope scope_mg(_memory_group);
117 
118  CLScheduler::get().enqueue(*_kernel_hor, false);
119  CLScheduler::get().enqueue(*_kernel_vert);
120  }
121  else
122  {
123  CLScheduler::get().enqueue(*_kernel);
124  }
125 }
126 
130 
131 void CLConvolutionRectangle::configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
132 {
133  configure(CLKernelLibrary::get().get_compile_context(), input, output, conv, rows, cols, scale, border_mode, constant_border_value);
134 }
135 
136 void CLConvolutionRectangle::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale,
137  BorderMode border_mode, uint8_t constant_border_value)
138 {
139  border_mode = (border_mode == BorderMode::UNDEFINED) ? BorderMode::CONSTANT : border_mode;
140  auto k = std::make_unique<CLConvolutionRectangleKernel>();
141  k->configure(compile_context, input, output, conv, rows, cols, scale, false);
142  _kernel = std::move(k);
143  _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
144 }
BorderMode
Methods available to handle borders.
Definition: Types.h:265
Class describing the value of a pixel for any image format.
Definition: PixelValue.h:34
void configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value=0)
Initialize the function&#39;s source, destination, conv and border_mode.
Kernel for the Horizontal pass of a Separable Convolution.
Definition: CLConvolution.h:42
static CLScheduler & get()
Access the scheduler singleton.
CLConvolutionSquare(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
1 channel, 1 U8 per channel
std::pair< DataType, DataType > data_type_for_convolution(const int16_t *conv_col, const int16_t *conv_row, size_t size)
Calculate accurary required by the horizontal and vertical convolution computations.
Definition: Utils.h:806
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
~CLConvolutionSquare()
Default destructor.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
CLTensorAllocator * allocator()
Return a pointer to the tensor&#39;s allocator.
Definition: CLTensor.cpp:61
uint32_t calculate_matrix_scale(const int16_t *matrix, unsigned int matrix_size)
Calculate the scale of the given square matrix.
Definition: Utils.h:727
void init(const TensorInfo &input, size_t alignment=0)
Initialize a tensor based on the passed TensorInfo.
Copyright (c) 2017-2021 Arm Limited.
void run() override
Run the kernels contained in the function.
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
Definition: MemoryGroup.h:79
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
void configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value=0)
Initialize the function&#39;s source, destination, conv and border_mode.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.
Interface for the kernel to run an arbitrary size convolution on a tensor.
Definition: CLConvolution.h:40
void enqueue(ICLKernel &kernel, bool flush=true)
Schedule the execution of the passed kernel if possible.
Kernel for the Vertical pass of a Separable Convolution.
Definition: CLConvolution.h:44
#define ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:790
CLCompileContext class.
void allocate() override
Allocate size specified by TensorInfo of OpenCL memory.
Basic function to execute square convolution.Currently it supports 5x5, 7x7, 9x9. ...
Definition: CLConvolution.h:92
Interface for filling the border of a kernel.
Memory group resources scope handling class.
Definition: IMemoryGroup.h:82
Interface for OpenCL tensor.
Definition: ICLTensor.h:42
void configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value=0)
Initialize the function&#39;s source, destination, conv and border_mode.
Borders are left undefined.
Store the tensor&#39;s metadata.
Definition: TensorInfo.h:45
bool separate_matrix(const int16_t *conv, int16_t *conv_col, int16_t *conv_row, uint8_t size)
Separate a 2D convolution into two 1D convolutions.
Definition: Utils.h:667