39 : _input(nullptr), _detection_windows(), _num_detection_windows(nullptr)
46 configure(
CLKernelLibrary::get().get_compile_context(), input, hog, detection_windows, num_detection_windows, detection_window_stride, threshold, idx_class);
50 const Size2D &detection_window_stride,
65 _detection_windows = detection_windows;
66 _num_detection_windows = num_detection_windows;
68 const unsigned int num_bins_per_descriptor_x = ((detection_window_size.
width - block_size.
width) / block_stride.
width + 1) * input->
info()->
num_channels();
69 const unsigned int num_blocks_per_descriptor_y = (detection_window_size.
height - block_size.
height) / block_stride.
height + 1;
73 std::stringstream args_str;
74 args_str <<
"-DNUM_BLOCKS_PER_DESCRIPTOR_Y=" << num_blocks_per_descriptor_y <<
" ";
75 args_str <<
"-DNUM_BINS_PER_DESCRIPTOR_X=" << num_bins_per_descriptor_x <<
" ";
76 args_str <<
"-DTHRESHOLD=" << threshold <<
" ";
77 args_str <<
"-DMAX_NUM_DETECTION_WINDOWS=" << detection_windows->
max_num_values() <<
" ";
78 args_str <<
"-DIDX_CLASS=" << idx_class <<
" ";
79 args_str <<
"-DDETECTION_WINDOW_WIDTH=" << detection_window_size.
width <<
" ";
80 args_str <<
"-DDETECTION_WINDOW_HEIGHT=" << detection_window_size.
height <<
" ";
81 args_str <<
"-DDETECTION_WINDOW_STRIDE_WIDTH=" << detection_window_stride.
width <<
" ";
82 args_str <<
"-DDETECTION_WINDOW_STRIDE_HEIGHT=" << detection_window_stride.
height <<
" ";
85 std::set<std::string> build_opts = {};
86 build_opts.insert(args_str.str());
89 const std::string
kernel_name = std::string(
"hog_detector");
90 _kernel =
create_kernel(compile_context, kernel_name, build_opts);
95 _kernel.setArg(idx++, detection_windows->
cl_buffer());
96 _kernel.setArg(idx++, *_num_detection_windows);
100 const size_t num_blocks_x = valid_region.
shape[0];
101 const size_t num_blocks_y = valid_region.
shape[1];
104 const size_t num_blocks_per_detection_window_x = detection_window_size.
width / block_stride.
width;
105 const size_t num_blocks_per_detection_window_y = detection_window_size.
height / block_stride.
height;
107 const size_t window_step_x = detection_window_stride.
width / block_stride.
width;
108 const size_t window_step_y = detection_window_stride.
height / block_stride.
height;
115 constexpr
unsigned int num_elems_read_per_iteration = 1;
116 const unsigned int num_rows_read_per_iteration = num_blocks_per_descriptor_y;
120 ICLKernel::configure_internal(win);
140 unsigned int idx = 0;
Window first_slice_window_2D() const
First 2D slice of the window.
const Window & window() const
The maximum window the kernel can be executed on.
const Size2D & detection_window_size() const
The detection window size in pixels.
void enqueue(IGCKernel &kernel, const Window &window, const gles::NDRange &lws=gles::NDRange(1U, 1U, 1U))
Add the kernel to the command queue with the given window.
TensorShape shape
Shape of the valid region.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
cl::NDRange lws_hint() const
Return the Local-Workgroup-Size hint.
std::string to_string(T &&value)
Convert integer and float values to string.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
1 channel, 1 F32 per channel
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Describe one of the image's dimensions with a start, end and step.
std::string lower_string(const std::string &val)
Lower a given string.
void configure(const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, cl::Buffer *num_detection_windows, const Size2D &detection_window_stride, float threshold=0.0f, uint16_t idx_class=0)
Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window...
const ValidRegion valid_region
auto floor_to_multiple(S value, T divisor) -> decltype((value/divisor) *divisor)
Computes the largest number smaller or equal to value that is a multiple of divisor.
const Size2D & block_stride() const
The block stride in pixels.
bool slide_window_slice_2D(Window &slice) const
Slide the passed 2D window slice.
Copyright (c) 2017-2021 Arm Limited.
size_t height
Height of the image region or rectangle.
virtual ValidRegion valid_region() const =0
Valid region of the tensor.
Implementation of a rectangular access pattern.
cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set< std::string > &build_opts=std::set< std::string >())
Creates an opencl kernel using a compile context.
const std::string & string_from_data_type(DataType dt)
Convert a data type identity into a string.
Interface for OpenCL Array.
#define ARM_COMPUTE_ERROR_ON_DATA_TYPE_NOT_IN(t,...)
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
bool update_window_and_padding(Window &win, Ts &&... patterns)
Update window and padding size for each of the access patterns.
const Size2D & block_size() const
The block size in pixels.
CLHOGDetectorKernel()
Default constructor.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
static constexpr unsigned int num_arguments_per_2D_tensor()
Returns the number of arguments enqueued per 2D tensor object.
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
void run(const Window &window, cl::CommandQueue &queue)
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue...
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
void add_2D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx...
Interface for OpenCL tensor.
size_t width
Width of the image region or rectangle.
virtual const cl::Buffer & cl_buffer() const =0
Interface to be implemented by the child class to return a reference to the OpenCL buffer containing ...
Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context...
Class for specifying the size of an image or rectangle.
size_t max_num_values() const
Maximum number of values which can be stored in this array.
virtual const HOGInfo * info() const =0
Interface to be implemented by the child class to return the HOG's metadata.
Container for valid region of a window.
virtual const cl::Buffer & cl_buffer() const =0
Interface to be implemented by the child class to return a reference to the OpenCL buffer containing ...
size_t descriptor_size() const
The size of HOG descriptor.
SimpleTensor< T > threshold(const SimpleTensor< T > &src, T threshold, T false_value, T true_value, ThresholdType type, T upper)
Describe a multidimensional execution window.
virtual size_t num_channels() const =0
The number of channels for each tensor element.
Interface for OpenCL HOG data-object.
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
SimpleTensor< T > slice(const SimpleTensor< T > &src, Coordinates starts, Coordinates ends)