39 : _input(nullptr), _detection_windows(), _hog_descriptor(nullptr), _bias(0.0f), _threshold(0.0f), _idx_class(0), _num_bins_per_descriptor_x(0), _num_blocks_per_descriptor_y(0), _block_stride_width(0),
40 _block_stride_height(0), _detection_window_width(0), _detection_window_height(0), _max_num_detection_windows(0), _mutex()
57 _detection_windows = detection_windows;
59 _idx_class = idx_class;
63 _num_blocks_per_descriptor_y = (detection_window_size.
height - block_size.
height) / block_stride.
height + 1;
64 _block_stride_width = block_stride.
width;
65 _block_stride_height = block_stride.
height;
66 _detection_window_width = detection_window_size.
width;
67 _detection_window_height = detection_window_size.
height;
74 const size_t num_blocks_x = valid_region.
shape[0];
75 const size_t num_blocks_y = valid_region.shape[1];
78 const size_t num_blocks_per_detection_window_x = detection_window_size.
width / block_stride.
width;
79 const size_t num_blocks_per_detection_window_y = detection_window_size.
height / block_stride.
height;
81 const size_t window_step_x = detection_window_stride.
width / block_stride.
width;
82 const size_t window_step_y = detection_window_stride.
height / block_stride.
height;
89 constexpr
unsigned int num_elems_read_per_iteration = 1;
90 const unsigned int num_rows_read_per_iteration = _num_blocks_per_descriptor_y;
94 INEKernel::configure(win);
110 const auto *in_row_ptr =
reinterpret_cast<const float *
>(in.ptr());
113 float32x4_t score_f32 = vdupq_n_f32(0.0f);
119 for(
size_t yb = 0; yb < _num_blocks_per_descriptor_y; ++yb, in_row_ptr += in_step_y)
123 const int32_t offset_y = yb * _num_bins_per_descriptor_x;
125 for(; xb < static_cast<int32_t>(_num_bins_per_descriptor_x) - 16; xb += 16)
128 const float32x4x4_t a_f32 =
131 vld1q_f32(&in_row_ptr[xb + 0]),
132 vld1q_f32(&in_row_ptr[xb + 4]),
133 vld1q_f32(&in_row_ptr[xb + 8]),
134 vld1q_f32(&in_row_ptr[xb + 12])
139 const float32x4x4_t b_f32 =
142 vld1q_f32(&_hog_descriptor[xb + 0 + offset_y]),
143 vld1q_f32(&_hog_descriptor[xb + 4 + offset_y]),
144 vld1q_f32(&_hog_descriptor[xb + 8 + offset_y]),
145 vld1q_f32(&_hog_descriptor[xb + 12 + offset_y])
150 score_f32 = vmlaq_f32(score_f32, a_f32.val[0], b_f32.val[0]);
151 score_f32 = vmlaq_f32(score_f32, a_f32.val[1], b_f32.val[1]);
152 score_f32 = vmlaq_f32(score_f32, a_f32.val[2], b_f32.val[2]);
153 score_f32 = vmlaq_f32(score_f32, a_f32.val[3], b_f32.val[3]);
156 for(; xb < static_cast<int32_t>(_num_bins_per_descriptor_x); ++xb)
158 const float a = in_row_ptr[xb];
159 const float b = _hog_descriptor[xb + offset_y];
165 score += vgetq_lane_f32(score_f32, 0);
166 score += vgetq_lane_f32(score_f32, 1);
167 score += vgetq_lane_f32(score_f32, 2);
168 score += vgetq_lane_f32(score_f32, 3);
170 if(score > _threshold)
172 if(_detection_windows->
num_values() < _max_num_detection_windows)
175 win.
x = (
id.x() * _block_stride_width);
176 win.
y = (
id.y() * _block_stride_height);
177 win.
width = _detection_window_width;
178 win.
height = _detection_window_height;
NEHOGDetectorKernel()
Default constructor.
const Window & window() const
The maximum window the kernel can be executed on.
Interface for HOG data-object.
const Size2D & detection_window_size() const
The detection window size in pixels.
uint16_t x
Top-left x coordinate.
TensorShape shape
Shape of the valid region.
float score
Confidence value for the detection window.
void run(const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
1 channel, 1 F32 per channel
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Describe one of the image's dimensions with a start, end and step.
std::unique_lock< Mutex > unique_lock
Wrapper of lock_guard data-object.
const ValidRegion valid_region
Interface for Neon tensor.
auto floor_to_multiple(S value, T divisor) -> decltype((value/divisor) *divisor)
Computes the largest number smaller or equal to value that is a multiple of divisor.
const Size2D & block_stride() const
The block stride in pixels.
Copyright (c) 2017-2021 Arm Limited.
size_t height
Height of the image region or rectangle.
virtual ValidRegion valid_region() const =0
Valid region of the tensor.
Implementation of a rectangular access pattern.
#define ARM_COMPUTE_ERROR_ON_DATA_TYPE_NOT_IN(t,...)
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
bool update_window_and_padding(Window &win, Ts &&... patterns)
Update window and padding size for each of the access patterns.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
size_t num_values() const
Number of values currently stored in the array.
uint16_t width
Width of the detection window.
const Size2D & block_size() const
The block size in pixels.
virtual float * descriptor() const =0
Pointer to the first element of the array which stores the linear SVM coefficients of HOG descriptor...
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
size_t data_size_from_type(DataType data_type)
The size in bytes of the data type.
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
uint16_t idx_class
Index of the class.
uint16_t height
Height of the detection window.
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
Information about executing thread and CPU.
size_t width
Width of the image region or rectangle.
Class for specifying the size of an image or rectangle.
Detection window used for the object detection.
uint16_t y
Top-left y coordinate.
size_t max_num_values() const
Maximum number of values which can be stored in this array.
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
virtual const Strides & strides_in_bytes() const =0
The strides in bytes for accessing each dimension of the tensor.
virtual const HOGInfo * info() const =0
Interface to be implemented by the child class to return the HOG's metadata.
Container for valid region of a window.
void configure(const ITensor *input, const IHOG *hog, IDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold=0.0f, uint16_t idx_class=0)
Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window...
bool push_back(const T &val)
Append the passed argument to the end of the array if there is room.
Iterator updated by execute_window_loop for each window element.
size_t descriptor_size() const
The size of HOG descriptor.
SimpleTensor< T > threshold(const SimpleTensor< T > &src, T threshold, T false_value, T true_value, ThresholdType type, T upper)
Describe a multidimensional execution window.
virtual size_t num_channels() const =0
The number of channels for each tensor element.
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)