Compute Library
 21.02
CLFastCornersKernel.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
32 #include "support/StringSupport.h"
33 
34 #include <set>
35 #include <string>
36 
37 using namespace arm_compute;
38 
40  : ICLKernel(), _input(nullptr), _output(nullptr)
41 {
42 }
43 
45 {
46  return BorderSize(3);
47 }
48 
50 {
51  configure(CLKernelLibrary::get().get_compile_context(), input, output, threshold, non_max_suppression, border_mode);
52 }
53 
54 void CLFastCornersKernel::configure(const CLCompileContext &compile_context, const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode)
55 {
60  ARM_COMPUTE_ERROR_ON_MSG(border_mode != BorderMode::UNDEFINED, "Not implemented");
61 
62  _input = input;
63  _output = output;
64 
65  // Create build options
66  std::set<std::string> build_opts;
67 
68  if(non_max_suppression)
69  {
70  build_opts.emplace("-DUSE_MAXSUPPRESSION");
71  }
72 
73  // Create kernel
74  const std::string kernel_name = std::string("fast_corners");
75  _kernel = create_kernel(compile_context, kernel_name, build_opts);
76 
77  // Set static kernel arguments
78  unsigned int idx = 2 * num_arguments_per_2D_tensor(); // Skip the input and output parameters
79  _kernel.setArg<cl_float>(idx, static_cast<float>(threshold));
80 
81  // Configure kernel window
82  constexpr unsigned int num_elems_processed_per_iteration = 1;
83  constexpr unsigned int num_elems_read_per_iteration = 7;
84  constexpr unsigned int num_rows_read_per_iteration = 3;
85 
86  Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_mode == BorderMode::UNDEFINED, BorderSize(3));
87 
89  AccessWindowRectangle input_access(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration);
90 
91  update_window_and_padding(win, input_access, output_access);
92 
93  output_access.set_valid_region(win, input->info()->valid_region(), border_mode == BorderMode::UNDEFINED, border_size());
94 
95  ICLKernel::configure_internal(win);
96 
97  // Set config_id for enabling LWS tuning
98  _config_id = kernel_name;
99  _config_id += "_";
100  _config_id += lower_string(string_from_data_type(input->info()->data_type()));
101  _config_id += "_";
102  _config_id += support::cpp11::to_string(input->info()->dimension(0));
103  _config_id += "_";
104  _config_id += support::cpp11::to_string(input->info()->dimension(1));
105  _config_id += "_";
106  _config_id += support::cpp11::to_string(output->info()->dimension(0));
107  _config_id += "_";
108  _config_id += support::cpp11::to_string(output->info()->dimension(1));
109  _config_id += "_";
110  _config_id += support::cpp11::to_string(non_max_suppression);
111  _config_id += "_";
112  _config_id += lower_string(string_from_border_mode(border_mode));
113 }
114 
115 void CLFastCornersKernel::run(const Window &window, cl::CommandQueue &queue)
116 {
119 
121 
122  do
123  {
124  unsigned int idx = 0;
125  add_2D_tensor_argument(idx, _input, slice);
126  add_2D_tensor_argument(idx, _output, slice);
127  enqueue(queue, *this, slice, lws_hint());
128  }
129  while(window.slide_window_slice_2D(slice));
130 }
131 
133  : ICLKernel(), _input(nullptr), _corners(nullptr), _num_buffer(nullptr)
134 {
135 }
136 
137 void CLCopyToArrayKernel::configure(const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers)
138 {
139  configure(CLKernelLibrary::get().get_compile_context(), input, update_number, corners, num_buffers);
140 }
141 
142 void CLCopyToArrayKernel::configure(const CLCompileContext &compile_context, const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers)
143 {
146  ARM_COMPUTE_ERROR_ON(corners == nullptr);
147  ARM_COMPUTE_ERROR_ON(num_buffers == nullptr);
148 
149  _input = input;
150  _corners = corners;
151  _num_buffer = num_buffers;
152 
153  std::set<std::string> build_opts;
154 
155  if(update_number)
156  {
157  build_opts.emplace("-DUPDATE_NUMBER");
158  }
159 
160  // Create kernel
161  const std::string kernel_name = std::string("copy_to_keypoint");
162  _kernel = create_kernel(compile_context, kernel_name, build_opts);
163 
164  //Get how many pixels skipped in the x dimension in the previous stages
165  unsigned int offset = _input->info()->valid_region().anchor.x();
166 
167  // Set static kernel arguments
168  unsigned int idx = num_arguments_per_2D_tensor(); // Skip the input and output parameters
169  _kernel.setArg<unsigned int>(idx++, _corners->max_num_values());
170  _kernel.setArg<cl_uint>(idx++, offset);
171  _kernel.setArg(idx++, *_num_buffer);
172  _kernel.setArg(idx++, _corners->cl_buffer());
173 
174  // Configure kernel window
175  constexpr unsigned int num_elems_processed_per_iteration = 1;
176  Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
179  ICLKernel::configure_internal(win);
180 
181  // Set config_id for enabling LWS tuning
182  _config_id = kernel_name;
183  _config_id += "_";
184  _config_id += lower_string(string_from_data_type(input->info()->data_type()));
185  _config_id += "_";
186  _config_id += support::cpp11::to_string(input->info()->dimension(0));
187  _config_id += "_";
188  _config_id += support::cpp11::to_string(input->info()->dimension(1));
189 }
190 
191 void CLCopyToArrayKernel::run(const Window &window, cl::CommandQueue &queue)
192 {
195 
196  //Initialise the _num_buffer as it used as both input and output
197  static const unsigned int zero_init = 0;
198  queue.enqueueWriteBuffer(*_num_buffer, CL_FALSE, 0, sizeof(unsigned int), &zero_init);
199 
201 
202  do
203  {
204  unsigned int idx = 0;
205  add_2D_tensor_argument(idx, _input, slice);
206  enqueue(queue, *this, slice, lws_hint());
207  }
208  while(window.slide_window_slice_2D(slice));
209 }
BorderMode
Methods available to handle borders.
Definition: Types.h:265
Window first_slice_window_2D() const
First 2D slice of the window.
Definition: Window.h:283
__global uchar * offset(const Image *img, int x, int y)
Get the pointer position of a Image.
Definition: helpers.h:846
unsigned int top
top of the border
Definition: Types.h:375
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
const Window & window() const
The maximum window the kernel can be executed on.
Definition: IKernel.cpp:28
void enqueue(IGCKernel &kernel, const Window &window, const gles::NDRange &lws=gles::NDRange(1U, 1U, 1U))
Add the kernel to the command queue with the given window.
Definition: IGCKernel.cpp:41
CLCopyToArrayKernel()
Default constructor.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
#define ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(t)
Definition: Validate.h:856
Container for 2D border size.
Definition: Types.h:273
cl::NDRange lws_hint() const
Return the Local-Workgroup-Size hint.
Definition: ICLKernel.h:276
1 channel, 1 U8 per channel
std::string to_string(T &&value)
Convert integer and float values to string.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
std::string lower_string(const std::string &val)
Lower a given string.
Definition: Utils.cpp:350
Common interface for all the OpenCL kernels.
Definition: ICLKernel.h:46
void run(const Window &window, cl::CommandQueue &queue) override
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue...
bool slide_window_slice_2D(Window &slice) const
Slide the passed 2D window slice.
Definition: Window.h:323
Copyright (c) 2017-2021 Arm Limited.
virtual ValidRegion valid_region() const =0
Valid region of the tensor.
const std::string & string_from_border_mode(BorderMode border_mode)
Translates a given border mode policy to a string.
Definition: Utils.cpp:224
T x() const
Alias to access the size of the first dimension.
Definition: Dimensions.h:87
Implementation of a rectangular access pattern.
cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set< std::string > &build_opts=std::set< std::string >())
Creates an opencl kernel using a compile context.
Definition: CLHelpers.cpp:403
const std::string & string_from_data_type(DataType dt)
Convert a data type identity into a string.
Definition: Utils.cpp:135
Interface for OpenCL Array.
Definition: ICLArray.h:35
bool update_window_and_padding(Window &win, Ts &&... patterns)
Update window and padding size for each of the access patterns.
Definition: WindowHelpers.h:46
CLFastCornersKernel()
Default constructor.
Class to describe a number of elements in each dimension.
Definition: Steps.h:40
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
Definition: Error.h:456
void configure(const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers)
Initialise the kernel.
Implementation of a row access pattern.
std::string kernel_name
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.
static constexpr unsigned int num_arguments_per_2D_tensor()
Returns the number of arguments enqueued per 2D tensor object.
Definition: ICLKernel.h:206
unsigned int left
left of the border
Definition: Types.h:378
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
Definition: Validate.h:941
#define ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:790
CLCompileContext class.
void add_2D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 2D tensor&#39;s parameters to the object&#39;s kernel&#39;s arguments starting from the index idx...
Definition: ICLKernel.h:148
Interface for OpenCL tensor.
Definition: ICLTensor.h:42
void configure(const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode)
Initialise the kernel.
Borders are left undefined.
Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context...
Wrapper to configure the Khronos OpenCL C++ header.
size_t max_num_values() const
Maximum number of values which can be stored in this array.
Definition: IArray.h:58
__kernel void non_max_suppression(__global uchar *src_ptr, uint src_stride_x, uint src_step_x, uint src_stride_y, uint src_step_y, uint src_offset_first_element_in_bytes, __global uchar *dst_ptr, uint dst_stride_x, uint dst_step_x, uint dst_stride_y, uint dst_step_y, uint dst_offset_first_element_in_bytes)
This function performs Non maxima suppression over a 3x3 window on a given image. ...
Definition: nonmax.cl:41
unsigned int num_elems_processed_per_iteration
void run(const Window &window, cl::CommandQueue &queue) override
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue...
virtual const cl::Buffer & cl_buffer() const =0
Interface to be implemented by the child class to return a reference to the OpenCL buffer containing ...
SimpleTensor< T > threshold(const SimpleTensor< T > &src, T threshold, T false_value, T true_value, ThresholdType type, T upper)
Definition: Threshold.cpp:35
Describe a multidimensional execution window.
Definition: Window.h:39
Coordinates anchor
Anchor for the start of the valid region.
Definition: Types.h:260
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
Definition: Validate.h:205
SimpleTensor< T > slice(const SimpleTensor< T > &src, Coordinates starts, Coordinates ends)
BorderSize border_size() const override
The size of the border for that kernel.