40 const ITensorInfo *input_conf,
41 const ITensorInfo *input_priorbox,
42 const ITensorInfo *output,
43 DetectionOutputLayerInfo
info)
51 "The priorbox input tensor should be [C3, 2, N].");
55 const int num_priors = input_priorbox->tensor_shape()[0] / 4;
57 input_loc->tensor_shape()[0],
58 "Number of priors must match number of location predictions.");
60 input_conf->tensor_shape()[0],
61 "Number of priors must match number of confidence predictions.");
64 if (output->total_size() != 0)
66 const unsigned int max_size =
67 info.keep_top_k() * (input_loc->num_dimensions() > 1 ? input_loc->dimension(1) : 1);
78 bool SortScorePairDescend(
const std::pair<float, T> &pair1,
const std::pair<float, T> &pair2)
80 return pair1.first > pair2.first;
94 void retrieve_all_loc_predictions(
const ITensor *input_loc,
97 const int num_loc_classes,
98 const bool share_location,
99 std::vector<LabelBBox> &all_location_predictions)
101 for (
int i = 0; i < num; ++i)
103 for (
int c = 0; c < num_loc_classes; ++c)
105 int label = share_location ? -1 : c;
106 if (all_location_predictions[i].find(label) == all_location_predictions[i].
end())
108 all_location_predictions[i][label].resize(num_priors);
112 ARM_COMPUTE_ERROR_ON(all_location_predictions[i][label].size() !=
static_cast<size_t>(num_priors));
117 for (
int i = 0; i < num; ++i)
119 for (
int p = 0; p < num_priors; ++p)
121 for (
int c = 0; c < num_loc_classes; ++c)
123 const int label = share_location ? -1 : c;
124 const int base_ptr = i * num_priors * num_loc_classes * 4 + p * num_loc_classes * 4 + c * 4;
126 all_location_predictions[i][label][p][0] =
127 *
reinterpret_cast<float *
>(input_loc->ptr_to_element(Coordinates(base_ptr)));
128 all_location_predictions[i][label][p][1] =
129 *
reinterpret_cast<float *
>(input_loc->ptr_to_element(Coordinates(base_ptr + 1)));
130 all_location_predictions[i][label][p][2] =
131 *
reinterpret_cast<float *
>(input_loc->ptr_to_element(Coordinates(base_ptr + 2)));
132 all_location_predictions[i][label][p][3] =
133 *
reinterpret_cast<float *
>(input_loc->ptr_to_element(Coordinates(base_ptr + 3)));
149 void retrieve_all_conf_scores(
const ITensor *input_conf,
151 const int num_priors,
152 const int num_classes,
153 std::vector<
std::map<
int, std::vector<float>>> &all_confidence_scores)
155 std::vector<float> tmp_buffer;
156 tmp_buffer.resize(num * num_priors * num_classes);
157 for (
int i = 0; i < num; ++i)
159 for (
int c = 0; c < num_classes; ++c)
161 for (
int p = 0; p < num_priors; ++p)
163 tmp_buffer[i * num_classes * num_priors + c * num_priors + p] = *
reinterpret_cast<float *
>(
164 input_conf->ptr_to_element(Coordinates(i * num_classes * num_priors + p * num_classes + c)));
168 for (
int i = 0; i < num; ++i)
170 for (
int c = 0; c < num_classes; ++c)
172 all_confidence_scores[i][c].resize(num_priors);
173 all_confidence_scores[i][c].assign(&tmp_buffer[i * num_classes * num_priors + c * num_priors],
174 &tmp_buffer[i * num_classes * num_priors + c * num_priors + num_priors]);
189 void retrieve_all_priorbox(
const ITensor *input_priorbox,
190 const int num_priors,
191 std::vector<BBox> &all_prior_bboxes,
192 std::vector<std::array<float, 4>> &all_prior_variances)
194 for (
int i = 0; i < num_priors; ++i)
196 all_prior_bboxes[i] = {{*
reinterpret_cast<float *
>(input_priorbox->ptr_to_element(Coordinates(i * 4))),
197 *
reinterpret_cast<float *
>(input_priorbox->ptr_to_element(Coordinates(i * 4 + 1))),
198 *
reinterpret_cast<float *
>(input_priorbox->ptr_to_element(Coordinates(i * 4 + 2))),
199 *
reinterpret_cast<float *
>(input_priorbox->ptr_to_element(Coordinates(i * 4 + 3)))}};
202 std::array<float, 4> var({{0, 0, 0, 0}});
203 for (
int i = 0; i < num_priors; ++i)
205 for (
int j = 0; j < 4; ++j)
207 var[j] = *
reinterpret_cast<float *
>(input_priorbox->ptr_to_element(Coordinates((num_priors + i) * 4 + j)));
209 all_prior_variances[i] = var;
224 void DecodeBBox(
const BBox &prior_bbox,
225 const std::array<float, 4> &prior_variance,
227 const bool variance_encoded_in_target,
228 const bool clip_bbox,
238 decode_bbox[0] = prior_bbox[0] + (variance_encoded_in_target ? bbox[0] : prior_variance[0] * bbox[0]);
239 decode_bbox[1] = prior_bbox[1] + (variance_encoded_in_target ? bbox[1] : prior_variance[1] * bbox[1]);
240 decode_bbox[2] = prior_bbox[2] + (variance_encoded_in_target ? bbox[2] : prior_variance[2] * bbox[2]);
241 decode_bbox[3] = prior_bbox[3] + (variance_encoded_in_target ? bbox[3] : prior_variance[3] * bbox[3]);
247 const float prior_width = prior_bbox[2] - prior_bbox[0];
248 const float prior_height = prior_bbox[3] - prior_bbox[1];
254 const float prior_center_x = (prior_bbox[0] + prior_bbox[2]) / 2.;
255 const float prior_center_y = (prior_bbox[1] + prior_bbox[3]) / 2.;
257 const float decode_bbox_center_x =
258 (variance_encoded_in_target ? bbox[0] : prior_variance[0] * bbox[0]) * prior_width + prior_center_x;
259 const float decode_bbox_center_y =
260 (variance_encoded_in_target ? bbox[1] : prior_variance[1] * bbox[1]) * prior_height + prior_center_y;
261 const float decode_bbox_width =
262 (variance_encoded_in_target ? std::exp(bbox[2]) : std::exp(prior_variance[2] * bbox[2])) * prior_width;
263 const float decode_bbox_height =
264 (variance_encoded_in_target ? std::exp(bbox[3]) : std::exp(prior_variance[3] * bbox[3])) * prior_height;
266 decode_bbox[0] = (decode_bbox_center_x - decode_bbox_width / 2.f);
267 decode_bbox[1] = (decode_bbox_center_y - decode_bbox_height / 2.f);
268 decode_bbox[2] = (decode_bbox_center_x + decode_bbox_width / 2.f);
269 decode_bbox[3] = (decode_bbox_center_y + decode_bbox_height / 2.f);
275 const float prior_width = prior_bbox[2] - prior_bbox[0];
276 const float prior_height = prior_bbox[3] - prior_bbox[1];
283 prior_bbox[0] + (variance_encoded_in_target ? bbox[0] : prior_variance[0] * bbox[0]) * prior_width;
285 prior_bbox[1] + (variance_encoded_in_target ? bbox[1] : prior_variance[1] * bbox[1]) * prior_height;
287 prior_bbox[2] + (variance_encoded_in_target ? bbox[2] : prior_variance[2] * bbox[2]) * prior_width;
289 prior_bbox[3] + (variance_encoded_in_target ? bbox[3] : prior_variance[3] * bbox[3]) * prior_height;
299 for (
auto &d_bbox : decode_bbox)
317 void ApplyNMSFast(
const std::vector<BBox> &bboxes,
318 const std::vector<float> &scores,
319 const float score_threshold,
320 const float nms_threshold,
323 std::vector<int> &indices)
328 std::list<std::pair<float, int>> score_index_vec;
331 for (
size_t i = 0; i < scores.size(); ++i)
333 if (scores[i] > score_threshold)
335 score_index_vec.emplace_back(std::make_pair(scores[i], i));
340 score_index_vec.sort(SortScorePairDescend<int>);
343 const int score_index_vec_size = score_index_vec.size();
344 if (top_k > -1 && top_k < score_index_vec_size)
346 score_index_vec.resize(top_k);
350 float adaptive_threshold = nms_threshold;
353 while (!score_index_vec.empty())
355 const int idx = score_index_vec.front().second;
357 for (
int kept_idx : indices)
362 BBox intersect_bbox = std::array<float, 4>({0, 0, 0, 0});
363 if (bboxes[kept_idx][0] > bboxes[idx][2] || bboxes[kept_idx][2] < bboxes[idx][0] ||
364 bboxes[kept_idx][1] > bboxes[idx][3] || bboxes[kept_idx][3] < bboxes[idx][1])
366 intersect_bbox = std::array<float, 4>({{0, 0, 0, 0}});
370 intersect_bbox = std::array<float, 4>(
371 {{std::max(bboxes[idx][0], bboxes[kept_idx][0]), std::max(bboxes[idx][1], bboxes[kept_idx][1]),
372 std::min(bboxes[idx][2], bboxes[kept_idx][2]),
373 std::min(bboxes[idx][3], bboxes[kept_idx][3])}});
376 float intersect_width = intersect_bbox[2] - intersect_bbox[0];
377 float intersect_height = intersect_bbox[3] - intersect_bbox[1];
380 if (intersect_width > 0 && intersect_height > 0)
382 float intersect_size = intersect_width * intersect_height;
383 float bbox1_size = (bboxes[idx][2] < bboxes[idx][0] || bboxes[idx][3] < bboxes[idx][1])
385 : (bboxes[idx][2] - bboxes[idx][0]) *
386 (bboxes[idx][3] - bboxes[idx][1]);
388 (bboxes[kept_idx][2] < bboxes[kept_idx][0] || bboxes[kept_idx][3] < bboxes[kept_idx][1])
390 : (bboxes[kept_idx][2] - bboxes[kept_idx][0]) *
391 (bboxes[kept_idx][3] - bboxes[kept_idx][1]);
392 overlap = intersect_size / (bbox1_size + bbox2_size - intersect_size);
394 keep = (overlap <= adaptive_threshold);
403 indices.push_back(idx);
405 score_index_vec.erase(score_index_vec.begin());
406 if (keep && eta < 1.f && adaptive_threshold > 0.5f)
408 adaptive_threshold *= eta;
415 : _input_loc(nullptr),
416 _input_conf(nullptr),
417 _input_priorbox(nullptr),
422 _all_location_predictions(),
423 _all_confidence_scores(),
425 _all_prior_variances(),
426 _all_decode_bboxes(),
444 const unsigned int max_size =
452 _input_loc = input_loc;
453 _input_conf = input_conf;
454 _input_priorbox = input_priorbox;
460 _all_location_predictions.resize(_num);
461 _all_confidence_scores.resize(_num);
462 _all_prior_bboxes.resize(_num_priors);
463 _all_prior_variances.resize(_num_priors);
464 _all_decode_bboxes.resize(_num);
466 for (
int i = 0; i < _num; ++i)
476 _all_decode_bboxes[i][label].resize(_num_priors);
479 _all_indices.resize(_num);
500 _all_location_predictions);
503 retrieve_all_conf_scores(_input_conf, _num, _num_priors, _info.
num_classes(), _all_confidence_scores);
506 retrieve_all_priorbox(_input_priorbox, _num_priors, _all_prior_bboxes, _all_prior_variances);
509 const bool clip_bbox =
false;
510 for (
int i = 0; i < _num; ++i)
521 "Could not find location predictions for label %d.", label);
523 const std::vector<BBox> &label_loc_preds = _all_location_predictions[i].find(label)->second;
525 const int num_bboxes = _all_prior_bboxes.size();
528 for (
int j = 0; j < num_bboxes; ++j)
530 DecodeBBox(_all_prior_bboxes[j], _all_prior_variances[j], _info.
code_type(),
532 _all_decode_bboxes[i][label][j]);
539 for (
int i = 0; i < _num; ++i)
541 const LabelBBox &decode_bboxes = _all_decode_bboxes[i];
542 const std::map<int, std::vector<float>> &conf_scores = _all_confidence_scores[i];
544 std::map<int, std::vector<int>> indices;
554 if (conf_scores.find(c) == conf_scores.end() || decode_bboxes.find(label) == decode_bboxes.end())
558 const std::vector<float> &scores = conf_scores.find(c)->second;
559 const std::vector<BBox> &bboxes = decode_bboxes.find(label)->second;
562 _info.
top_k(), indices[c]);
564 num_det += indices[c].size();
570 std::vector<std::pair<float, std::pair<int, int>>> score_index_pairs;
571 for (
auto const &it : indices)
573 const int label = it.first;
574 const std::vector<int> &label_indices = it.second;
576 if (conf_scores.find(label) == conf_scores.end())
581 const std::vector<float> &scores = conf_scores.find(label)->second;
582 for (
auto idx : label_indices)
585 score_index_pairs.emplace_back(std::make_pair(scores[idx], std::make_pair(label, idx)));
590 std::sort(score_index_pairs.begin(), score_index_pairs.end(), SortScorePairDescend<std::pair<int, int>>);
595 std::map<int, std::vector<int>> new_indices;
596 for (
auto score_index_pair : score_index_pairs)
598 int label = score_index_pair.second.first;
599 int idx = score_index_pair.second.second;
600 new_indices[label].push_back(idx);
602 _all_indices[i] = new_indices;
607 _all_indices[i] = indices;
608 num_to_add = num_det;
610 num_kept += num_to_add;
617 for (
int i = 0; i < _num; ++i)
619 const std::map<int, std::vector<float>> &conf_scores = _all_confidence_scores[i];
620 const LabelBBox &decode_bboxes = _all_decode_bboxes[i];
621 for (
auto &it : _all_indices[i])
623 const int label = it.first;
624 const std::vector<float> &scores = conf_scores.find(label)->second;
626 if (conf_scores.find(label) == conf_scores.end() || decode_bboxes.find(loc_label) == decode_bboxes.end())
632 const std::vector<BBox> &bboxes = decode_bboxes.find(loc_label)->second;
633 const std::vector<int> &indices = it.second;
635 for (
auto idx : indices)