Compute Library
 21.02
CLCannyEdgeKernel.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
33 #include "support/StringSupport.h"
34 
35 using namespace arm_compute;
36 
38  : _gx(nullptr), _gy(nullptr), _magnitude(nullptr), _phase(nullptr)
39 {
40 }
41 
42 void CLGradientKernel::configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type)
43 {
44  configure(CLKernelLibrary::get().get_compile_context(), gx, gy, magnitude, phase, norm_type);
45 }
46 
47 void CLGradientKernel::configure(const CLCompileContext &compile_context, const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type)
48 {
54  "Gx and Gy must have the same pixel size");
56  "Mag must have the same pixel size as Gx and Gy");
57 
58  _gx = gx;
59  _gy = gy;
60  _magnitude = magnitude;
61  _phase = phase;
62 
63  // Create build opts
64  std::set<std::string> built_opts;
65  built_opts.emplace("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(gx->info()->data_type()));
66  built_opts.emplace("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(gx->info()->data_type()));
67 
68  // Create kernel
69  const std::string kernel_name = (norm_type == 1) ? std::string("combine_gradients_L1") : std::string("combine_gradients_L2");
70  _kernel = create_kernel(compile_context, kernel_name, built_opts);
71 
72  // Configure kernel window
73  constexpr unsigned int num_elems_processed_per_iteration = 4;
74 
75  Window win = calculate_max_window(*_gx->info(), Steps(num_elems_processed_per_iteration));
76 
81 
82  update_window_and_padding(win, gx_access, gy_access, mag_access, phase_access);
83 
84  mag_access.set_valid_region(win, _gx->info()->valid_region());
85  phase_access.set_valid_region(win, _gx->info()->valid_region());
86 
87  ICLKernel::configure_internal(win);
88 
89  // Set config_id for enabling LWS tuning
90  _config_id = kernel_name;
91  _config_id += "_";
92  _config_id += lower_string(string_from_data_type(gx->info()->data_type()));
93  _config_id += "_";
94  _config_id += support::cpp11::to_string(gx->info()->dimension(0));
95  _config_id += "_";
96  _config_id += support::cpp11::to_string(gx->info()->dimension(1));
97 }
98 
99 void CLGradientKernel::run(const Window &window, cl::CommandQueue &queue)
100 {
103 
105  do
106  {
107  unsigned int idx = 0;
108  add_2D_tensor_argument(idx, _gx, slice);
109  add_2D_tensor_argument(idx, _gy, slice);
110  add_2D_tensor_argument(idx, _magnitude, slice);
111  add_2D_tensor_argument(idx, _phase, slice);
112  enqueue(queue, *this, slice, lws_hint());
113  }
114  while(window.slide_window_slice_2D(slice));
115 }
116 
118  : _magnitude(nullptr), _phase(nullptr), _output(nullptr)
119 {
120 }
121 
123 {
124  return BorderSize(1);
125 }
126 
127 void CLEdgeNonMaxSuppressionKernel::configure(const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined)
128 {
129  configure(CLKernelLibrary::get().get_compile_context(), magnitude, phase, output, lower_thr, border_undefined);
130 }
131 
132 void CLEdgeNonMaxSuppressionKernel::configure(const CLCompileContext &compile_context, const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined)
133 {
137 
138  _magnitude = magnitude;
139  _phase = phase;
140  _output = output;
141 
142  // Create build opts
143  std::set<std::string> built_opts;
144  built_opts.emplace("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(magnitude->info()->data_type()));
145  built_opts.emplace("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type()));
146 
147  // Create kernel
148  const std::string kernel_name = std::string("suppress_non_maximum");
149  _kernel = create_kernel(compile_context, kernel_name, built_opts);
150 
151  // Set minimum threshold argument
152  unsigned int idx = 3 * num_arguments_per_2D_tensor(); //Skip the input and output parameters
153  _kernel.setArg(idx++, lower_thr);
154 
155  // Configure kernel window
156  constexpr unsigned int num_elems_processed_per_iteration = 1;
157  constexpr unsigned int num_elems_read_written_per_iteration = 3;
158 
159  Window win = calculate_max_window(*_magnitude->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size());
160 
161  AccessWindowRectangle mag_access(_magnitude->info(), -border_size().left, -border_size().top,
162  num_elems_read_written_per_iteration, num_elems_read_written_per_iteration);
164  AccessWindowHorizontal output_access(_output->info(), 0, num_elems_processed_per_iteration);
165 
166  update_window_and_padding(win, mag_access, phase_access, output_access);
167 
168  output_access.set_valid_region(win, _magnitude->info()->valid_region(), border_undefined, border_size());
169 
170  ICLKernel::configure_internal(win);
171 
172  // Set config_id for enabling LWS tuning
173  _config_id = kernel_name;
174  _config_id += "_";
175  _config_id += lower_string(string_from_data_type(output->info()->data_type()));
176  _config_id += "_";
177  _config_id += support::cpp11::to_string(output->info()->dimension(0));
178  _config_id += "_";
179  _config_id += support::cpp11::to_string(output->info()->dimension(1));
180  _config_id += "_";
181  _config_id += support::cpp11::to_string(border_undefined);
182 }
183 
184 void CLEdgeNonMaxSuppressionKernel::run(const Window &window, cl::CommandQueue &queue)
185 {
188 
190  do
191  {
192  unsigned int idx = 0;
193  add_2D_tensor_argument(idx, _magnitude, slice);
194  add_2D_tensor_argument(idx, _phase, slice);
195  add_2D_tensor_argument(idx, _output, slice);
196  enqueue(queue, *this, slice, lws_hint());
197  }
198  while(window.slide_window_slice_2D(slice));
199 }
200 
202  : _input(nullptr), _output(nullptr), _lower_thr(0), _upper_thr(0), _visited(nullptr), _recorded(nullptr), _l1_stack(nullptr), _l1_stack_counter(nullptr)
203 {
204 }
205 
206 void CLEdgeTraceKernel::configure(const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr,
207  ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter)
208 {
209  configure(CLKernelLibrary::get().get_compile_context(), input, output, upper_thr, lower_thr, visited, recorded, l1_stack, l1_stack_counter);
210 }
211 
212 void CLEdgeTraceKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr,
213  ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter)
214 {
221 
222  _input = input;
223  _output = output;
224  _lower_thr = lower_thr;
225  _upper_thr = upper_thr;
226  _visited = visited;
227  _recorded = recorded;
228  _l1_stack = l1_stack;
229  _l1_stack_counter = l1_stack_counter;
230 
231  // Create build opts
232  std::set<std::string> built_opts;
233  built_opts.emplace("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(input->info()->data_type()));
234  built_opts.emplace("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type()));
235 
236  // Create kernel
237  const std::string kernel_name = std::string("hysteresis");
238  _kernel = create_kernel(compile_context, kernel_name, built_opts);
239 
240  // Set constant kernel args
241  unsigned int width = _input->info()->dimension(0);
242  unsigned int height = _input->info()->dimension(1);
243  unsigned int idx = 6 * num_arguments_per_2D_tensor(); //Skip the input and output parameters
244  _kernel.setArg(idx++, static_cast<cl_uint>(_lower_thr));
245  _kernel.setArg(idx++, static_cast<cl_uint>(_upper_thr));
246  _kernel.setArg(idx++, static_cast<cl_uint>(width));
247  _kernel.setArg(idx++, static_cast<cl_uint>(height));
248 
249  // Configure kernel window
250  constexpr unsigned int num_elems_processed_per_iteration = 1;
251  Window win = calculate_max_window(*_input->info(), Steps(num_elems_processed_per_iteration));
252 
253  AccessWindowHorizontal output_access(_output->info(), 0, num_elems_processed_per_iteration);
254  AccessWindowHorizontal visited_access(_visited->info(), 0, num_elems_processed_per_iteration);
255  AccessWindowHorizontal recorded_access(_recorded->info(), 0, num_elems_processed_per_iteration);
256  AccessWindowHorizontal l1_stack_access(_l1_stack->info(), 0, num_elems_processed_per_iteration);
257  AccessWindowHorizontal l1_stack_counter_access(_l1_stack_counter->info(), 0, num_elems_processed_per_iteration);
258 
261  output_access,
262  visited_access,
263  recorded_access,
264  l1_stack_access,
265  l1_stack_counter_access);
266 
267  output_access.set_valid_region(win, _input->info()->valid_region());
268  visited_access.set_valid_region(win, _input->info()->valid_region());
269  recorded_access.set_valid_region(win, _input->info()->valid_region());
270  l1_stack_access.set_valid_region(win, _input->info()->valid_region());
271  l1_stack_counter_access.set_valid_region(win, _input->info()->valid_region());
272 
273  ICLKernel::configure_internal(win);
274 
275  // Set config_id for enabling LWS tuning
276  _config_id = kernel_name;
277  _config_id += "_";
278  _config_id += lower_string(string_from_data_type(input->info()->data_type()));
279  _config_id += "_";
280  _config_id += support::cpp11::to_string(input->info()->dimension(0));
281  _config_id += "_";
282  _config_id += support::cpp11::to_string(input->info()->dimension(1));
283  _config_id += "_";
284  _config_id += lower_string(string_from_format(output->info()->format()));
285  _config_id += "_";
286  _config_id += support::cpp11::to_string(output->info()->dimension(0));
287  _config_id += "_";
288  _config_id += support::cpp11::to_string(output->info()->dimension(1));
289 }
290 
291 void CLEdgeTraceKernel::run(const Window &window, cl::CommandQueue &queue)
292 {
295 
297  do
298  {
299  unsigned int idx = 0;
300  add_2D_tensor_argument(idx, _input, slice);
301  add_2D_tensor_argument(idx, _output, slice);
302  add_2D_tensor_argument(idx, _visited, slice);
303  add_2D_tensor_argument(idx, _recorded, slice);
304  add_2D_tensor_argument(idx, _l1_stack, slice);
305  add_2D_tensor_argument(idx, _l1_stack_counter, slice);
306 
307  enqueue(queue, *this, slice, lws_hint());
308  }
309  while(window.slide_window_slice_2D(slice));
310 }
Window first_slice_window_2D() const
First 2D slice of the window.
Definition: Window.h:283
unsigned int top
top of the border
Definition: Types.h:375
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
const Window & window() const
The maximum window the kernel can be executed on.
Definition: IKernel.cpp:28
void configure(const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined)
Initialise the kernel&#39;s sources, destination and border mode.
void enqueue(IGCKernel &kernel, const Window &window, const gles::NDRange &lws=gles::NDRange(1U, 1U, 1U))
Add the kernel to the command queue with the given window.
Definition: IGCKernel.cpp:41
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
Container for 2D border size.
Definition: Types.h:273
cl::NDRange lws_hint() const
Return the Local-Workgroup-Size hint.
Definition: ICLKernel.h:276
1 channel, 1 U8 per channel
std::string to_string(T &&value)
Convert integer and float values to string.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
SimpleTensor< uint8_t > phase(const SimpleTensor< T > &gx, const SimpleTensor< T > &gy, PhaseType phase_type)
Definition: Phase.cpp:35
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
1 channel, 1 U16 per channel
std::string lower_string(const std::string &val)
Lower a given string.
Definition: Utils.cpp:350
bool slide_window_slice_2D(Window &slice) const
Slide the passed 2D window slice.
Definition: Window.h:323
Copyright (c) 2017-2021 Arm Limited.
virtual ValidRegion valid_region() const =0
Valid region of the tensor.
1 channel, 1 S32 per channel
Implementation of a rectangular access pattern.
void run(const Window &window, cl::CommandQueue &queue) override
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue...
virtual Format format() const =0
Colour format of the image.
cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set< std::string > &build_opts=std::set< std::string >())
Creates an opencl kernel using a compile context.
Definition: CLHelpers.cpp:403
const std::string & string_from_data_type(DataType dt)
Convert a data type identity into a string.
Definition: Utils.cpp:135
bool update_window_and_padding(Window &win, Ts &&... patterns)
Update window and padding size for each of the access patterns.
Definition: WindowHelpers.h:46
1 channel, 1 U32 per channel
Class to describe a number of elements in each dimension.
Definition: Steps.h:40
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
Definition: Error.h:456
Implementation of a row access pattern.
std::string kernel_name
std::string get_cl_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL type.
Definition: CLHelpers.cpp:37
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.
size_t data_size_from_type(DataType data_type)
The size in bytes of the data type.
Definition: Utils.h:106
static constexpr unsigned int num_arguments_per_2D_tensor()
Returns the number of arguments enqueued per 2D tensor object.
Definition: ICLKernel.h:206
void run(const Window &window, cl::CommandQueue &queue) override
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue...
void run(const Window &window, cl::CommandQueue &queue) override
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue...
unsigned int left
left of the border
Definition: Types.h:378
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
Definition: Validate.h:941
1 channel, 1 S16 per channel
#define ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:790
CLCompileContext class.
void configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type)
Initialise the kernel&#39;s sources, destinations and border mode.
void add_2D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 2D tensor&#39;s parameters to the object&#39;s kernel&#39;s arguments starting from the index idx...
Definition: ICLKernel.h:148
void configure(const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter)
Initialise the kernel&#39;s source, destination and border mode.
Interface for OpenCL tensor.
Definition: ICLTensor.h:42
Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context...
Wrapper to configure the Khronos OpenCL C++ header.
unsigned int num_elems_processed_per_iteration
Describe a multidimensional execution window.
Definition: Window.h:39
SimpleTensor< T > magnitude(const SimpleTensor< T > &gx, const SimpleTensor< T > &gy, MagnitudeType magnitude_type)
Definition: Magnitude.cpp:35
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
Definition: Validate.h:205
SimpleTensor< T > slice(const SimpleTensor< T > &src, Coordinates starts, Coordinates ends)
BorderSize border_size() const override
The size of the border for that kernel.
const std::string & string_from_format(Format format)
Convert a tensor format into a string.
Definition: Utils.cpp:76