Compute Library
 20.11
CLLKTrackerKernel.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
35 
36 #include <cmath>
37 
38 using namespace arm_compute;
39 
40 void CLLKTrackerInitKernel::configure(const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates,
41  ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
42  bool use_initial_estimate, size_t level, size_t num_levels, float pyramid_scale)
43 {
44  configure(CLKernelLibrary::get().get_compile_context(), old_points, new_points_estimates, old_points_internal, new_points_internal, use_initial_estimate, level, num_levels, pyramid_scale);
45 }
46 
47 void CLLKTrackerInitKernel::configure(const CLCompileContext &compile_context, const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates,
48  ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
49  bool use_initial_estimate, size_t level, size_t num_levels, float pyramid_scale)
50 
51 {
52  ARM_COMPUTE_ERROR_ON(old_points == nullptr);
53  ARM_COMPUTE_ERROR_ON(old_points_internal == nullptr);
54  ARM_COMPUTE_ERROR_ON(new_points_internal == nullptr);
55 
56  const float scale = std::pow(pyramid_scale, level);
57 
58  // Create kernel
59  std::string kernel_name = "init_level";
60  if(level == (num_levels - 1))
61  {
62  kernel_name += (use_initial_estimate) ? std::string("_max_initial_estimate") : std::string("_max");
63  }
64  _kernel = create_kernel(compile_context, kernel_name);
65 
66  // Set static kernel arguments
67  unsigned int idx = 0;
68  if(level == (num_levels - 1))
69  {
70  _kernel.setArg(idx++, old_points->cl_buffer());
71  if(use_initial_estimate)
72  {
73  _kernel.setArg(idx++, new_points_estimates->cl_buffer());
74  }
75  }
76  _kernel.setArg(idx++, old_points_internal->cl_buffer());
77  _kernel.setArg(idx++, new_points_internal->cl_buffer());
78  _kernel.setArg<cl_float>(idx++, scale);
79 
80  // Configure kernel window
81  Window window;
82  window.set(Window::DimX, Window::Dimension(0, old_points->num_values(), 1));
84  ICLKernel::configure_internal(window);
85 }
86 
87 void CLLKTrackerInitKernel::run(const Window &window, cl::CommandQueue &queue)
88 {
91 
92  enqueue(queue, *this, window, lws_hint());
93 }
94 
96 {
97  configure(CLKernelLibrary::get().get_compile_context(), new_points_internal, new_points);
98 }
99 
100 void CLLKTrackerFinalizeKernel::configure(const CLCompileContext &compile_context, ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points)
101 
102 {
103  ARM_COMPUTE_ERROR_ON(new_points_internal == nullptr);
104  ARM_COMPUTE_ERROR_ON(new_points == nullptr);
105 
106  // Create kernel
107  _kernel = create_kernel(compile_context, "finalize");
108 
109  // Set static kernel arguments
110  unsigned int idx = 0;
111  _kernel.setArg(idx++, new_points_internal->cl_buffer());
112  _kernel.setArg(idx++, new_points->cl_buffer());
113 
114  // Configure kernel window
115  Window window;
116  window.set(Window::DimX, Window::Dimension(0, new_points_internal->num_values(), 1));
118  ICLKernel::configure_internal(window);
119 }
120 
121 void CLLKTrackerFinalizeKernel::run(const Window &window, cl::CommandQueue &queue)
122 {
125 
126  enqueue(queue, *this, window, lws_hint());
127 }
128 
130  : _old_input(nullptr), _old_scharr_gx(nullptr), _old_scharr_gy(nullptr)
131 {
132 }
133 
134 void CLLKTrackerStage0Kernel::configure(const ICLTensor *old_input, const ICLTensor *old_scharr_gx, const ICLTensor *old_scharr_gy,
135  ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
136  ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
137  size_t window_dimension, size_t level)
138 {
139  configure(CLKernelLibrary::get().get_compile_context(), old_input, old_scharr_gx, old_scharr_gy, old_points_internal, new_points_internal, coeff_table, old_ival, window_dimension, level);
140 }
141 
142 void CLLKTrackerStage0Kernel::configure(const CLCompileContext &compile_context, const ICLTensor *old_input, const ICLTensor *old_scharr_gx, const ICLTensor *old_scharr_gy,
143  ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
144  ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
145  size_t window_dimension, size_t level)
146 
147 {
151  ARM_COMPUTE_ERROR_ON(old_points_internal == nullptr);
152  ARM_COMPUTE_ERROR_ON(new_points_internal == nullptr);
153  ARM_COMPUTE_ERROR_ON(coeff_table == nullptr);
154  ARM_COMPUTE_ERROR_ON(old_ival == nullptr);
155 
156  _old_input = old_input;
157  _old_scharr_gx = old_scharr_gx;
158  _old_scharr_gy = old_scharr_gy;
159 
160  // Configure kernel window
161  Window window;
162  window.set(Window::DimX, Window::Dimension(0, new_points_internal->num_values(), 1));
164 
166  old_input->info()->valid_region(),
167  old_scharr_gx->info()->valid_region(),
168  old_scharr_gy->info()->valid_region());
169 
173  AccessWindowStatic(old_scharr_gx->info(), valid_region.start(0), valid_region.start(1),
175  AccessWindowStatic(old_scharr_gy->info(), valid_region.start(0), valid_region.start(1),
176  valid_region.end(0), valid_region.end(1)));
177 
178  ICLKernel::configure_internal(window);
179 
180  // Initialize required variables
181  const int level0 = (level == 0) ? 1 : 0;
182  const int window_size = window_dimension;
183  const int window_size_squared = window_dimension * window_dimension;
184  const int window_size_half = window_dimension / 2;
185  const float eig_const = 1.0f / (2.0f * window_size_squared);
186  const cl_float3 border_limits =
187  {
188  {
189  // -1 because we load 2 values at once for bilinear interpolation
190  static_cast<cl_float>(valid_region.end(0) - window_size - 1),
191  static_cast<cl_float>(valid_region.end(1) - window_size - 1),
192  static_cast<cl_float>(valid_region.start(0))
193  }
194  };
195 
196  // Create kernel
197  _kernel = create_kernel(compile_context, "lktracker_stage0");
198 
199  // Set arguments
200  unsigned int idx = 3 * num_arguments_per_2D_tensor();
201  _kernel.setArg(idx++, old_points_internal->cl_buffer());
202  _kernel.setArg(idx++, new_points_internal->cl_buffer());
203  _kernel.setArg(idx++, coeff_table->cl_buffer());
204  _kernel.setArg(idx++, old_ival->cl_buffer());
205  _kernel.setArg<cl_int>(idx++, window_size);
206  _kernel.setArg<cl_int>(idx++, window_size_squared);
207  _kernel.setArg<cl_int>(idx++, window_size_half);
208  _kernel.setArg<cl_float3>(idx++, border_limits);
209  _kernel.setArg<cl_float>(idx++, eig_const);
210  _kernel.setArg<cl_int>(idx++, level0);
211 }
212 
213 void CLLKTrackerStage0Kernel::run(const Window &window, cl::CommandQueue &queue)
214 {
217 
218  // Set static tensor arguments. Setting here as allocation might be deferred.
219  unsigned int idx = 0;
220  add_2D_tensor_argument(idx, _old_input, window);
221  add_2D_tensor_argument(idx, _old_scharr_gx, window);
222  add_2D_tensor_argument(idx, _old_scharr_gy, window);
223 
224  enqueue(queue, *this, window, lws_hint());
225 }
226 
228  : _new_input(nullptr)
229 {
230 }
231 
232 void CLLKTrackerStage1Kernel::configure(const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
233  Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level)
234 {
235  configure(CLKernelLibrary::get().get_compile_context(), new_input, new_points_internal, coeff_table, old_ival, termination, epsilon, num_iterations, window_dimension, level);
236 }
237 
238 void CLLKTrackerStage1Kernel::configure(const CLCompileContext &compile_context, const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table,
239  ICLOldValArray *old_ival,
240  Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level)
241 
242 {
244  ARM_COMPUTE_ERROR_ON(new_points_internal == nullptr);
245  ARM_COMPUTE_ERROR_ON(coeff_table == nullptr);
246  ARM_COMPUTE_ERROR_ON(old_ival == nullptr);
247 
248  _new_input = new_input;
249 
250  // Configure kernel window
251  Window window;
252  window.set(Window::DimX, Window::Dimension(0, new_points_internal->num_values(), 1));
254 
255  const ValidRegion &valid_region = new_input->info()->valid_region();
256 
259  valid_region.end(0), valid_region.end(1)));
260 
261  ICLKernel::configure_internal(window);
262 
263  // Initialize required variables
264  const int level0 = (level == 0) ? 1 : 0;
265  const int window_size = window_dimension;
266  const int window_size_squared = window_dimension * window_dimension;
267  const int window_size_half = window_dimension / 2;
268  const float eig_const = 1.0f / (2.0f * window_size_squared);
269  const cl_float3 border_limits =
270  {
271  {
272  // -1 because we load 2 values at once for bilinear interpolation
273  static_cast<cl_float>(valid_region.end(0) - window_size - 1),
274  static_cast<cl_float>(valid_region.end(1) - window_size - 1),
275  static_cast<cl_float>(valid_region.start(0))
276  }
277  };
278 
279  // Set maximum number of iterations used for convergence
280  const size_t max_iterations = 1000;
281  num_iterations = (termination == Termination::TERM_CRITERIA_EPSILON) ? max_iterations : num_iterations;
282 
283  const int term_epsilon = (termination == Termination::TERM_CRITERIA_EPSILON || termination == Termination::TERM_CRITERIA_BOTH) ? 1 : 0;
284 
285  // Create kernel
286  _kernel = create_kernel(compile_context, "lktracker_stage1");
287 
288  // Set static kernel arguments
289  unsigned int idx = num_arguments_per_2D_tensor();
290  _kernel.setArg(idx++, new_points_internal->cl_buffer());
291  _kernel.setArg(idx++, coeff_table->cl_buffer());
292  _kernel.setArg(idx++, old_ival->cl_buffer());
293  _kernel.setArg<cl_int>(idx++, window_size);
294  _kernel.setArg<cl_int>(idx++, window_size_squared);
295  _kernel.setArg<cl_int>(idx++, window_size_half);
296  _kernel.setArg<cl_int>(idx++, num_iterations);
297  _kernel.setArg<cl_float>(idx++, epsilon);
298  _kernel.setArg<cl_float3>(idx++, border_limits);
299  _kernel.setArg<cl_float>(idx++, eig_const);
300  _kernel.setArg<cl_int>(idx++, level0);
301  _kernel.setArg<cl_int>(idx++, term_epsilon);
302 }
303 
304 void CLLKTrackerStage1Kernel::run(const Window &window, cl::CommandQueue &queue)
305 {
308 
309  // Set static tensor arguments. Setting here as allocation might be deferred.
310  unsigned int idx = 0;
311  add_2D_tensor_argument(idx, _new_input, window);
312 
313  enqueue(queue, *this, window, lws_hint());
314 }
void configure(const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival, Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level)
Initialise the kernel input and output.
const Window & window() const
The maximum window the kernel can be executed on.
Definition: IKernel.cpp:28
void run(const Window &window, cl::CommandQueue &queue) override
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue.
void enqueue(IGCKernel &kernel, const Window &window, const gles::NDRange &lws=gles::NDRange(1U, 1U, 1U))
Add the kernel to the command queue with the given window.
Definition: IGCKernel.cpp:41
cl::NDRange lws_hint() const
Return the Local-Workgroup-Size hint.
Definition: ICLKernel.h:264
1 channel, 1 U8 per channel
Terminate when within epsilon of a threshold.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Describe one of the image's dimensions with a start, end and step.
Definition: Window.h:77
const ValidRegion valid_region
Definition: Scale.cpp:221
Terminate on whichever of the other conditions occurs first.
Copyright (c) 2017-2020 Arm Limited.
void configure(ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points)
Initialise the kernel input and output.
virtual ValidRegion valid_region() const =0
Valid region of the tensor.
Implementation of a static rectangular access pattern.
void configure(const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates, ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal, bool use_initial_estimate, size_t level, size_t num_levels, float pyramid_scale)
Initialise the kernel input and output.
cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set< std::string > &build_opts=std::set< std::string >())
Creates an opencl kernel using a compile context.
Definition: CLHelpers.cpp:403
Interface for OpenCL Array.
Definition: ICLArray.h:35
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Definition: Window.h:43
bool update_window_and_padding(Window &win, Ts &&... patterns)
Update window and padding size for each of the access patterns.
Definition: WindowHelpers.h:46
size_t num_values() const
Number of values currently stored in the array.
Definition: IArray.h:68
int start(unsigned int d) const
Return the start of the valid region for the given dimension d.
Definition: Types.h:234
std::string kernel_name
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
ValidRegion intersect_valid_regions(const Ts &... regions)
Intersect multiple valid regions.
Definition: WindowHelpers.h:74
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
Definition: Window.inl:49
static constexpr unsigned int num_arguments_per_2D_tensor()
Returns the number of arguments enqueued per 2D tensor object.
Definition: ICLKernel.h:194
Termination
Termination criteria.
Definition: Types.h:414
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
Definition: Validate.h:941
1 channel, 1 S16 per channel
void run(const Window &window, cl::CommandQueue &queue) override
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue.
#define ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:790
CLCompileContext class.
void configure(const ICLTensor *old_input, const ICLTensor *old_scharr_gx, const ICLTensor *old_scharr_gy, ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival, size_t window_dimension, size_t level)
Initialise the kernel input and output.
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
Definition: Window.h:45
void add_2D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx.
Definition: ICLKernel.h:136
Interface for OpenCL tensor.
Definition: ICLTensor.h:42
int end(unsigned int d) const
Return the end of the valid region for the given dimension d.
Definition: Types.h:240
Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context.
void run(const Window &window, cl::CommandQueue &queue) override
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue.
Container for valid region of a window.
Definition: Types.h:188
virtual const cl::Buffer & cl_buffer() const =0
Interface to be implemented by the child class to return a reference to the OpenCL buffer containing ...
void run(const Window &window, cl::CommandQueue &queue) override
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue.
Describe a multidimensional execution window.
Definition: Window.h:39
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
Definition: Validate.h:205