37 std::vector<int> SoftNMS(
const ITensor *proposals, std::vector<std::vector<T>> &scores_in, std::vector<int> inds,
const BoxNMSLimitInfo &
info,
int class_id)
39 std::vector<int> keep;
40 const int proposals_width = proposals->info()->dimension(1);
42 std::vector<T> x1(proposals_width);
43 std::vector<T> y1(proposals_width);
44 std::vector<T> x2(proposals_width);
45 std::vector<T> y2(proposals_width);
46 std::vector<T> areas(proposals_width);
48 for(
int i = 0; i < proposals_width; ++i)
50 x1[i] = *
reinterpret_cast<T *
>(proposals->ptr_to_element(Coordinates(class_id * 4, i)));
51 y1[i] = *
reinterpret_cast<T *
>(proposals->ptr_to_element(Coordinates(class_id * 4 + 1, i)));
52 x2[i] = *
reinterpret_cast<T *
>(proposals->ptr_to_element(Coordinates(class_id * 4 + 2, i)));
53 y2[i] = *
reinterpret_cast<T *
>(proposals->ptr_to_element(Coordinates(class_id * 4 + 3, i)));
54 areas[i] = (x2[i] - x1[i] + 1.0) * (y2[i] - y1[i] + 1.0);
63 for(
unsigned int i = 1; i < inds.size(); ++i)
65 if(scores_in[class_id][inds.at(i)] > scores_in[class_id][inds.at(max_pos)])
70 int element = inds.at(max_pos);
71 keep.push_back(element);
75 inds.erase(inds.begin());
77 std::vector<int> sorted_indices_temp;
80 const auto xx1 = std::max(x1[idx], x1[element]);
81 const auto yy1 = std::max(y1[idx], y1[element]);
82 const auto xx2 = std::min(x2[idx], x2[element]);
83 const auto yy2 = std::min(y2[idx], y2[element]);
85 const auto w = std::max((xx2 - xx1 + 1.f), 0.f);
86 const auto h = std::max((yy2 - yy1 + 1.f), 0.f);
87 const auto inter =
w * h;
88 const auto ovr = inter / (areas[element] + areas[idx] - inter);
92 switch(info.soft_nms_method())
95 weight = (ovr > info.nms()) ? (1.f - ovr) : 1.f;
98 weight = std::exp(-1.f * ovr * ovr / info.soft_nms_sigma());
101 weight = (ovr > info.nms()) ? 0.f : 1.f;
108 scores_in[class_id][idx] *= weight;
109 if(scores_in[class_id][idx] >= info.soft_nms_min_score_thres())
111 sorted_indices_temp.push_back(idx);
114 inds = sorted_indices_temp;
120 template <
typename T>
121 std::vector<int> NonMaximaSuppression(
const ITensor *proposals, std::vector<int> sorted_indices,
const BoxNMSLimitInfo &info,
int class_id)
123 std::vector<int> keep;
125 const int proposals_width = proposals->info()->dimension(1);
127 std::vector<T> x1(proposals_width);
128 std::vector<T> y1(proposals_width);
129 std::vector<T> x2(proposals_width);
130 std::vector<T> y2(proposals_width);
131 std::vector<T> areas(proposals_width);
133 for(
int i = 0; i < proposals_width; ++i)
135 x1[i] = *
reinterpret_cast<T *
>(proposals->ptr_to_element(Coordinates(class_id * 4, i)));
136 y1[i] = *
reinterpret_cast<T *
>(proposals->ptr_to_element(Coordinates(class_id * 4 + 1, i)));
137 x2[i] = *
reinterpret_cast<T *
>(proposals->ptr_to_element(Coordinates(class_id * 4 + 2, i)));
138 y2[i] = *
reinterpret_cast<T *
>(proposals->ptr_to_element(Coordinates(class_id * 4 + 3, i)));
139 areas[i] = (x2[i] - x1[i] + 1.0) * (y2[i] - y1[i] + 1.0);
142 while(!sorted_indices.empty())
144 int i = sorted_indices.at(0);
147 std::vector<int> sorted_indices_temp = sorted_indices;
148 std::vector<int> new_indices;
149 sorted_indices_temp.erase(sorted_indices_temp.begin());
151 for(
unsigned int j = 0; j < sorted_indices_temp.size(); ++j)
153 const float xx1 = std::max(x1[sorted_indices_temp.at(j)], x1[i]);
154 const float yy1 = std::max(y1[sorted_indices_temp.at(j)], y1[i]);
155 const float xx2 = std::min(x2[sorted_indices_temp.at(j)], x2[i]);
156 const float yy2 = std::min(y2[sorted_indices_temp.at(j)], y2[i]);
158 const float w = std::max((xx2 - xx1 + 1.f), 0.f);
159 const float h = std::max((yy2 - yy1 + 1.f), 0.f);
160 const float inter = w * h;
161 const float ovr = inter / (areas[i] + areas[sorted_indices_temp.at(j)] - inter);
162 const float ctr_x = xx1 + (w / 2);
163 const float ctr_y = yy1 + (h / 2);
166 const bool keep_size = !info.suppress_size() || (w >= info.min_size() && h >= info.min_size() && ctr_x < info.im_width() && ctr_y < info.im_height());
167 if(ovr <= info.nms() && keep_size)
169 new_indices.push_back(j);
173 const unsigned int new_indices_size = new_indices.size();
174 std::vector<int> new_sorted_indices(new_indices_size);
175 for(
unsigned int i = 0; i < new_indices_size; ++i)
177 new_sorted_indices[i] = sorted_indices[new_indices[i] + 1];
179 sorted_indices = new_sorted_indices;
187 : _scores_in(nullptr), _boxes_in(nullptr), _batch_splits_in(nullptr), _scores_out(nullptr), _boxes_out(nullptr), _classes(nullptr), _batch_splits_out(nullptr), _keeps(nullptr), _keeps_size(nullptr),
197 template <
typename T>
200 const int batch_size = _batch_splits_in ==
nullptr ? 1 : _batch_splits_in->
info()->
dimension(0);
203 std::vector<int> total_keep_per_batch(batch_size);
204 std::vector<std::vector<int>> keeps(num_classes);
205 int total_keep_count = 0;
207 std::vector<std::vector<T>> in_scores(num_classes, std::vector<T>(scores_count));
208 for(
int i = 0; i < scores_count; ++i)
210 for(
int j = 0; j < num_classes; ++j)
217 int cur_start_idx = 0;
218 for(
int b = 0;
b < batch_size; ++
b)
220 const int num_boxes = _batch_splits_in ==
nullptr ? 1 :
static_cast<int>(*
reinterpret_cast<T *
>(_batch_splits_in->
ptr_to_element(
Coordinates(
b))));
222 const int j_start = (num_classes == 1 ? 0 : 1);
223 for(
int j = j_start; j < num_classes; ++j)
225 std::vector<T> cur_scores(scores_count);
226 std::vector<int> inds;
227 for(
int i = 0; i < scores_count; ++i)
229 const T score = in_scores[j][i];
230 cur_scores[i] = score;
239 keeps[j] = SoftNMS(_boxes_in, in_scores, inds, _info, j);
243 std::sort(inds.data(), inds.data() + inds.size(),
244 [&cur_scores](
int lhs,
int rhs)
246 return cur_scores[lhs] > cur_scores[rhs];
249 keeps[j] = NonMaximaSuppression<T>(_boxes_in, inds, _info, j);
251 total_keep_count += keeps[j].size();
257 auto get_all_scores_sorted = [&in_scores, &keeps, total_keep_count]()
259 std::vector<T> ret(total_keep_count);
262 for(
unsigned int i = 1; i < keeps.size(); ++i)
264 auto &cur_keep = keeps[i];
265 for(
auto &ckv : cur_keep)
267 ret[ret_idx++] = in_scores[i][ckv];
271 std::sort(ret.data(), ret.data() + ret.size());
276 auto all_scores_sorted = get_all_scores_sorted();
277 const T image_thresh = all_scores_sorted[all_scores_sorted.size() - _info.
detections_per_im()];
278 for(
int j = 1; j < num_classes; ++j)
280 auto &cur_keep = keeps[j];
281 std::vector<int> new_keeps_j;
282 for(
auto &k : cur_keep)
284 if(in_scores[j][k] >= image_thresh)
286 new_keeps_j.push_back(k);
289 keeps[j] = new_keeps_j;
294 total_keep_per_batch[
b] = total_keep_count;
298 for(
int j = j_start; j < num_classes; ++j)
300 auto &cur_keep = keeps[j];
303 const int box_column = (cur_start_idx + cur_out_idx) * 4;
305 for(
unsigned int k = 0; k < cur_keep.size(); ++k)
307 cur_out_scores[k] = in_scores[j][cur_keep[k]];
308 cur_out_classes[k] =
static_cast<T
>(j);
319 cur_out_idx += cur_keep.size();
322 if(_keeps !=
nullptr)
325 for(
int j = 0; j < num_classes; ++j)
327 for(
unsigned int i = 0; i < keeps[j].size(); ++i)
329 *
reinterpret_cast<T *
>(_keeps->
ptr_to_element(
Coordinates(cur_start_idx + cur_out_idx + i))) = static_cast<T>(keeps[j].at(i));
332 cur_out_idx += keeps[j].size();
337 cur_start_idx += total_keep_count;
340 if(_batch_splits_out !=
nullptr)
342 for(
int b = 0;
b < batch_size; ++
b)
355 const unsigned int num_classes = scores_in->
info()->
dimension(0);
366 ARM_COMPUTE_ERROR_ON_MSG(keeps_size ==
nullptr,
"keeps_size cannot be nullptr if keeps has to be provided as output");
372 if(batch_splits_in !=
nullptr)
376 if(batch_splits_out !=
nullptr)
381 _scores_in = scores_in;
382 _boxes_in = boxes_in;
383 _batch_splits_in = batch_splits_in;
384 _scores_out = scores_out;
385 _boxes_out = boxes_out;
387 _batch_splits_out = batch_splits_out;
389 _keeps_size = keeps_size;
395 IKernel::configure(win);
408 run_nmslimit<float>();
411 run_nmslimit<half>();
__global uchar * offset(const Image *img, int x, int y)
Get the pointer position of a Image.
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
const Window & window() const
The maximum window the kernel can be executed on.
uint8_t * ptr_to_element(const Coordinates &id) const
Return a pointer to the element at the passed coordinates.
void configure(const ITensor *scores_in, const ITensor *boxes_in, const ITensor *batch_splits_in, ITensor *scores_out, ITensor *boxes_out, ITensor *classes, ITensor *batch_splits_out=nullptr, ITensor *keeps=nullptr, ITensor *keeps_size=nullptr, const BoxNMSLimitInfo info=BoxNMSLimitInfo())
Initialise the kernel's input and output tensors.
float score_thresh() const
Get the score threshold.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
void run(const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
BoxWithNonMaximaSuppressionLimit Information class.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
1 channel, 1 F32 per channel
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
bool is_parallelisable() const override
Indicates whether or not the kernel is parallelisable.
Interface for CPU tensor.
Copyright (c) 2017-2021 Arm Limited.
1 channel, 1 F16 per channel
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
#define ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(f, w)
1 channel, 1 U32 per channel
#define ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Class to describe a number of elements in each dimension.
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
#define ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
Information about executing thread and CPU.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
CPPBoxWithNonMaximaSuppressionLimitKernel()
Default constructor.
void swap(Window &lhs, Window &rhs)
Describe a multidimensional execution window.
bool soft_nms_enabled() const
Check if soft NMS is enabled.
int detections_per_im() const
Get the number of detections.