Compute Library
 21.11
CPPDetectionOutputLayer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
26 #include "arm_compute/core/Error.h"
30 
31 #include "src/common/utils/Log.h"
32 
33 #include <list>
34 
35 namespace arm_compute
36 {
37 namespace
38 {
39 Status validate_arguments(const ITensorInfo *input_loc, const ITensorInfo *input_conf, const ITensorInfo *input_priorbox, const ITensorInfo *output, DetectionOutputLayerInfo info)
40 {
41  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input_loc, input_conf, input_priorbox, output);
43  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input_loc, input_conf, input_priorbox);
44  ARM_COMPUTE_RETURN_ERROR_ON_MSG(input_loc->num_dimensions() > 2, "The location input tensor should be [C1, N].");
45  ARM_COMPUTE_RETURN_ERROR_ON_MSG(input_conf->num_dimensions() > 2, "The location input tensor should be [C2, N].");
46  ARM_COMPUTE_RETURN_ERROR_ON_MSG(input_priorbox->num_dimensions() > 3, "The priorbox input tensor should be [C3, 2, N].");
47 
48  ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.eta() <= 0.f && info.eta() > 1.f, "Eta should be between 0 and 1");
49 
50  const int num_priors = input_priorbox->tensor_shape()[0] / 4;
51  ARM_COMPUTE_RETURN_ERROR_ON_MSG(static_cast<size_t>((num_priors * info.num_loc_classes() * 4)) != input_loc->tensor_shape()[0], "Number of priors must match number of location predictions.");
52  ARM_COMPUTE_RETURN_ERROR_ON_MSG(static_cast<size_t>((num_priors * info.num_classes())) != input_conf->tensor_shape()[0], "Number of priors must match number of confidence predictions.");
53 
54  // Validate configured output
55  if(output->total_size() != 0)
56  {
57  const unsigned int max_size = info.keep_top_k() * (input_loc->num_dimensions() > 1 ? input_loc->dimension(1) : 1);
58  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), TensorShape(7U, max_size));
60  }
61 
62  return Status{};
63 }
64 
65 /** Function used to sort pair<float, T> in descend order based on the score (first) value.
66  */
67 template <typename T>
68 bool SortScorePairDescend(const std::pair<float, T> &pair1,
69  const std::pair<float, T> &pair2)
70 {
71  return pair1.first > pair2.first;
72 }
73 
74 /** Get location predictions from input_loc.
75  *
76  * @param[in] input_loc The input location prediction.
77  * @param[in] num The number of images.
78  * @param[in] num_priors number of predictions per class.
79  * @param[in] num_loc_classes number of location classes. It is 1 if share_location is true,
80  * and is equal to number of classes needed to predict otherwise.
81  * @param[in] share_location If true, all classes share the same location prediction.
82  * @param[out] all_location_predictions All the location predictions.
83  *
84  */
85 void retrieve_all_loc_predictions(const ITensor *input_loc, const int num,
86  const int num_priors, const int num_loc_classes,
87  const bool share_location, std::vector<LabelBBox> &all_location_predictions)
88 {
89  for(int i = 0; i < num; ++i)
90  {
91  for(int c = 0; c < num_loc_classes; ++c)
92  {
93  int label = share_location ? -1 : c;
94  if(all_location_predictions[i].find(label) == all_location_predictions[i].end())
95  {
96  all_location_predictions[i][label].resize(num_priors);
97  }
98  else
99  {
100  ARM_COMPUTE_ERROR_ON(all_location_predictions[i][label].size() != static_cast<size_t>(num_priors));
101  break;
102  }
103  }
104  }
105  for(int i = 0; i < num; ++i)
106  {
107  for(int p = 0; p < num_priors; ++p)
108  {
109  for(int c = 0; c < num_loc_classes; ++c)
110  {
111  const int label = share_location ? -1 : c;
112  const int base_ptr = i * num_priors * num_loc_classes * 4 + p * num_loc_classes * 4 + c * 4;
113  //xmin, ymin, xmax, ymax
114  all_location_predictions[i][label][p][0] = *reinterpret_cast<float *>(input_loc->ptr_to_element(Coordinates(base_ptr)));
115  all_location_predictions[i][label][p][1] = *reinterpret_cast<float *>(input_loc->ptr_to_element(Coordinates(base_ptr + 1)));
116  all_location_predictions[i][label][p][2] = *reinterpret_cast<float *>(input_loc->ptr_to_element(Coordinates(base_ptr + 2)));
117  all_location_predictions[i][label][p][3] = *reinterpret_cast<float *>(input_loc->ptr_to_element(Coordinates(base_ptr + 3)));
118  }
119  }
120  }
121 }
122 
123 /** Get confidence predictions from input_conf.
124  *
125  * @param[in] input_loc The input location prediction.
126  * @param[in] num The number of images.
127  * @param[in] num_priors Number of predictions per class.
128  * @param[in] num_loc_classes Number of location classes. It is 1 if share_location is true,
129  * and is equal to number of classes needed to predict otherwise.
130  * @param[out] all_location_predictions All the location predictions.
131  *
132  */
133 void retrieve_all_conf_scores(const ITensor *input_conf, const int num,
134  const int num_priors, const int num_classes,
135  std::vector<std::map<int, std::vector<float>>> &all_confidence_scores)
136 {
137  std::vector<float> tmp_buffer;
138  tmp_buffer.resize(num * num_priors * num_classes);
139  for(int i = 0; i < num; ++i)
140  {
141  for(int c = 0; c < num_classes; ++c)
142  {
143  for(int p = 0; p < num_priors; ++p)
144  {
145  tmp_buffer[i * num_classes * num_priors + c * num_priors + p] =
146  *reinterpret_cast<float *>(input_conf->ptr_to_element(Coordinates(i * num_classes * num_priors + p * num_classes + c)));
147  }
148  }
149  }
150  for(int i = 0; i < num; ++i)
151  {
152  for(int c = 0; c < num_classes; ++c)
153  {
154  all_confidence_scores[i][c].resize(num_priors);
155  all_confidence_scores[i][c].assign(&tmp_buffer[i * num_classes * num_priors + c * num_priors],
156  &tmp_buffer[i * num_classes * num_priors + c * num_priors + num_priors]);
157  }
158  }
159 }
160 
161 /** Get prior boxes from input_priorbox.
162  *
163  * @param[in] input_priorbox The input location prediction.
164  * @param[in] num_priors Number of priors.
165  * @param[in] num_loc_classes number of location classes. It is 1 if share_location is true,
166  * and is equal to number of classes needed to predict otherwise.
167  * @param[out] all_prior_bboxes If true, all classes share the same location prediction.
168  * @param[out] all_location_predictions All the location predictions.
169  *
170  */
171 void retrieve_all_priorbox(const ITensor *input_priorbox,
172  const int num_priors,
173  std::vector<BBox> &all_prior_bboxes,
174  std::vector<std::array<float, 4>> &all_prior_variances)
175 {
176  for(int i = 0; i < num_priors; ++i)
177  {
178  all_prior_bboxes[i] =
179  {
180  {
181  *reinterpret_cast<float *>(input_priorbox->ptr_to_element(Coordinates(i * 4))),
182  *reinterpret_cast<float *>(input_priorbox->ptr_to_element(Coordinates(i * 4 + 1))),
183  *reinterpret_cast<float *>(input_priorbox->ptr_to_element(Coordinates(i * 4 + 2))),
184  *reinterpret_cast<float *>(input_priorbox->ptr_to_element(Coordinates(i * 4 + 3)))
185  }
186  };
187  }
188 
189  std::array<float, 4> var({ { 0, 0, 0, 0 } });
190  for(int i = 0; i < num_priors; ++i)
191  {
192  for(int j = 0; j < 4; ++j)
193  {
194  var[j] = *reinterpret_cast<float *>(input_priorbox->ptr_to_element(Coordinates((num_priors + i) * 4 + j)));
195  }
196  all_prior_variances[i] = var;
197  }
198 }
199 
200 /** Decode a bbox according to a prior bbox.
201  *
202  * @param[in] prior_bbox The input prior bounding boxes.
203  * @param[in] prior_variance The corresponding input variance.
204  * @param[in] code_type The detection output code type used to decode the results.
205  * @param[in] variance_encoded_in_target If true, the variance is encoded in target.
206  * @param[in] clip_bbox If true, the results should be between 0.f and 1.f.
207  * @param[in] bbox The input bbox to decode
208  * @param[out] decode_bbox The decoded bboxes.
209  *
210  */
211 void DecodeBBox(const BBox &prior_bbox, const std::array<float, 4> &prior_variance,
212  const DetectionOutputLayerCodeType code_type, const bool variance_encoded_in_target,
213  const bool clip_bbox, const BBox &bbox, BBox &decode_bbox)
214 {
215  // if the variance is encoded in target, we simply need to add the offset predictions
216  // otherwise we need to scale the offset accordingly.
217  switch(code_type)
218  {
220  {
221  decode_bbox[0] = prior_bbox[0] + (variance_encoded_in_target ? bbox[0] : prior_variance[0] * bbox[0]);
222  decode_bbox[1] = prior_bbox[1] + (variance_encoded_in_target ? bbox[1] : prior_variance[1] * bbox[1]);
223  decode_bbox[2] = prior_bbox[2] + (variance_encoded_in_target ? bbox[2] : prior_variance[2] * bbox[2]);
224  decode_bbox[3] = prior_bbox[3] + (variance_encoded_in_target ? bbox[3] : prior_variance[3] * bbox[3]);
225 
226  break;
227  }
229  {
230  const float prior_width = prior_bbox[2] - prior_bbox[0];
231  const float prior_height = prior_bbox[3] - prior_bbox[1];
232 
233  // Check if the prior width and height are right
234  ARM_COMPUTE_ERROR_ON(prior_width <= 0.f);
235  ARM_COMPUTE_ERROR_ON(prior_height <= 0.f);
236 
237  const float prior_center_x = (prior_bbox[0] + prior_bbox[2]) / 2.;
238  const float prior_center_y = (prior_bbox[1] + prior_bbox[3]) / 2.;
239 
240  const float decode_bbox_center_x = (variance_encoded_in_target ? bbox[0] : prior_variance[0] * bbox[0]) * prior_width + prior_center_x;
241  const float decode_bbox_center_y = (variance_encoded_in_target ? bbox[1] : prior_variance[1] * bbox[1]) * prior_height + prior_center_y;
242  const float decode_bbox_width = (variance_encoded_in_target ? std::exp(bbox[2]) : std::exp(prior_variance[2] * bbox[2])) * prior_width;
243  const float decode_bbox_height = (variance_encoded_in_target ? std::exp(bbox[3]) : std::exp(prior_variance[3] * bbox[3])) * prior_height;
244 
245  decode_bbox[0] = (decode_bbox_center_x - decode_bbox_width / 2.f);
246  decode_bbox[1] = (decode_bbox_center_y - decode_bbox_height / 2.f);
247  decode_bbox[2] = (decode_bbox_center_x + decode_bbox_width / 2.f);
248  decode_bbox[3] = (decode_bbox_center_y + decode_bbox_height / 2.f);
249 
250  break;
251  }
253  {
254  const float prior_width = prior_bbox[2] - prior_bbox[0];
255  const float prior_height = prior_bbox[3] - prior_bbox[1];
256 
257  // Check if the prior width and height are greater than 0
258  ARM_COMPUTE_ERROR_ON(prior_width <= 0.f);
259  ARM_COMPUTE_ERROR_ON(prior_height <= 0.f);
260 
261  decode_bbox[0] = prior_bbox[0] + (variance_encoded_in_target ? bbox[0] : prior_variance[0] * bbox[0]) * prior_width;
262  decode_bbox[1] = prior_bbox[1] + (variance_encoded_in_target ? bbox[1] : prior_variance[1] * bbox[1]) * prior_height;
263  decode_bbox[2] = prior_bbox[2] + (variance_encoded_in_target ? bbox[2] : prior_variance[2] * bbox[2]) * prior_width;
264  decode_bbox[3] = prior_bbox[3] + (variance_encoded_in_target ? bbox[3] : prior_variance[3] * bbox[3]) * prior_height;
265 
266  break;
267  }
268  default:
269  ARM_COMPUTE_ERROR("Unsupported Detection Output Code Type.");
270  }
271 
272  if(clip_bbox)
273  {
274  for(auto &d_bbox : decode_bbox)
275  {
276  d_bbox = utility::clamp(d_bbox, 0.f, 1.f);
277  }
278  }
279 }
280 
281 /** Do non maximum suppression given bboxes and scores.
282  *
283  * @param[in] bboxes The input bounding boxes.
284  * @param[in] scores The corresponding input confidence.
285  * @param[in] score_threshold The threshold used to filter detection results.
286  * @param[in] nms_threshold The threshold used in non maximum suppression.
287  * @param[in] eta Adaptation rate for nms threshold.
288  * @param[in] top_k If not -1, keep at most top_k picked indices.
289  * @param[out] indices The kept indices of bboxes after nms.
290  *
291  */
292 void ApplyNMSFast(const std::vector<BBox> &bboxes,
293  const std::vector<float> &scores, const float score_threshold,
294  const float nms_threshold, const float eta, const int top_k,
295  std::vector<int> &indices)
296 {
297  ARM_COMPUTE_ERROR_ON_MSG(bboxes.size() != scores.size(), "bboxes and scores have different size.");
298 
299  // Get top_k scores (with corresponding indices).
300  std::list<std::pair<float, int>> score_index_vec;
301 
302  // Generate index score pairs.
303  for(size_t i = 0; i < scores.size(); ++i)
304  {
305  if(scores[i] > score_threshold)
306  {
307  score_index_vec.emplace_back(std::make_pair(scores[i], i));
308  }
309  }
310 
311  // Sort the score pair according to the scores in descending order
312  score_index_vec.sort(SortScorePairDescend<int>);
313 
314  // Keep top_k scores if needed.
315  const int score_index_vec_size = score_index_vec.size();
316  if(top_k > -1 && top_k < score_index_vec_size)
317  {
318  score_index_vec.resize(top_k);
319  }
320 
321  // Do nms.
322  float adaptive_threshold = nms_threshold;
323  indices.clear();
324 
325  while(!score_index_vec.empty())
326  {
327  const int idx = score_index_vec.front().second;
328  bool keep = true;
329  for(int kept_idx : indices)
330  {
331  if(keep)
332  {
333  // Compute the jaccard (intersection over union IoU) overlap between two bboxes.
334  BBox intersect_bbox = std::array<float, 4>({ 0, 0, 0, 0 });
335  if(bboxes[kept_idx][0] > bboxes[idx][2] || bboxes[kept_idx][2] < bboxes[idx][0] || bboxes[kept_idx][1] > bboxes[idx][3] || bboxes[kept_idx][3] < bboxes[idx][1])
336  {
337  intersect_bbox = std::array<float, 4>({ { 0, 0, 0, 0 } });
338  }
339  else
340  {
341  intersect_bbox = std::array<float, 4>({ {
342  std::max(bboxes[idx][0], bboxes[kept_idx][0]),
343  std::max(bboxes[idx][1], bboxes[kept_idx][1]),
344  std::min(bboxes[idx][2], bboxes[kept_idx][2]),
345  std::min(bboxes[idx][3], bboxes[kept_idx][3])
346  }
347  });
348  }
349 
350  float intersect_width = intersect_bbox[2] - intersect_bbox[0];
351  float intersect_height = intersect_bbox[3] - intersect_bbox[1];
352 
353  float overlap = 0.f;
354  if(intersect_width > 0 && intersect_height > 0)
355  {
356  float intersect_size = intersect_width * intersect_height;
357  float bbox1_size = (bboxes[idx][2] < bboxes[idx][0]
358  || bboxes[idx][3] < bboxes[idx][1]) ?
359  0.f :
360  (bboxes[idx][2] - bboxes[idx][0]) * (bboxes[idx][3] - bboxes[idx][1]); //BBoxSize(bboxes[idx]);
361  float bbox2_size = (bboxes[kept_idx][2] < bboxes[kept_idx][0]
362  || bboxes[kept_idx][3] < bboxes[kept_idx][1]) ?
363  0.f :
364  (bboxes[kept_idx][2] - bboxes[kept_idx][0]) * (bboxes[kept_idx][3] - bboxes[kept_idx][1]); // BBoxSize(bboxes[kept_idx]);
365  overlap = intersect_size / (bbox1_size + bbox2_size - intersect_size);
366  }
367  keep = (overlap <= adaptive_threshold);
368  }
369  else
370  {
371  break;
372  }
373  }
374  if(keep)
375  {
376  indices.push_back(idx);
377  }
378  score_index_vec.erase(score_index_vec.begin());
379  if(keep && eta < 1.f && adaptive_threshold > 0.5f)
380  {
381  adaptive_threshold *= eta;
382  }
383  }
384 }
385 } // namespace
386 
388  : _input_loc(nullptr), _input_conf(nullptr), _input_priorbox(nullptr), _output(nullptr), _info(), _num_priors(), _num(), _all_location_predictions(), _all_confidence_scores(), _all_prior_bboxes(),
389  _all_prior_variances(), _all_decode_bboxes(), _all_indices()
390 {
391 }
392 
393 void CPPDetectionOutputLayer::configure(const ITensor *input_loc, const ITensor *input_conf, const ITensor *input_priorbox,
394  ITensor *output, DetectionOutputLayerInfo info)
395 {
396  ARM_COMPUTE_ERROR_ON_NULLPTR(input_loc, input_conf, input_priorbox, output);
397  ARM_COMPUTE_LOG_PARAMS(input_loc, input_conf, input_priorbox, output, info);
398 
399  // Output auto initialization if not yet initialized
400  // Since the number of bboxes to kept is unknown before nms, the shape is set to the maximum
401  // The maximum is keep_top_k * input_loc_size[1]
402  // Each row is a 7 dimension std::vector, which stores [image_id, label, confidence, xmin, ymin, xmax, ymax]
403  const unsigned int max_size = info.keep_top_k() * (input_loc->info()->num_dimensions() > 1 ? input_loc->info()->dimension(1) : 1);
404  auto_init_if_empty(*output->info(), input_loc->info()->clone()->set_tensor_shape(TensorShape(7U, max_size)));
405 
406  // Perform validation step
407  ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input_loc->info(), input_conf->info(), input_priorbox->info(), output->info(), info));
408 
409  _input_loc = input_loc;
410  _input_conf = input_conf;
411  _input_priorbox = input_priorbox;
412  _output = output;
413  _info = info;
414  _num_priors = input_priorbox->info()->dimension(0) / 4;
415  _num = (_input_loc->info()->num_dimensions() > 1 ? _input_loc->info()->dimension(1) : 1);
416 
417  _all_location_predictions.resize(_num);
418  _all_confidence_scores.resize(_num);
419  _all_prior_bboxes.resize(_num_priors);
420  _all_prior_variances.resize(_num_priors);
421  _all_decode_bboxes.resize(_num);
422 
423  for(int i = 0; i < _num; ++i)
424  {
425  for(int c = 0; c < _info.num_loc_classes(); ++c)
426  {
427  const int label = _info.share_location() ? -1 : c;
428  if(label == _info.background_label_id())
429  {
430  // Ignore background class.
431  continue;
432  }
433  _all_decode_bboxes[i][label].resize(_num_priors);
434  }
435  }
436  _all_indices.resize(_num);
437 
438  Coordinates coord;
439  coord.set_num_dimensions(output->info()->num_dimensions());
440  output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
441 }
442 
443 Status CPPDetectionOutputLayer::validate(const ITensorInfo *input_loc, const ITensorInfo *input_conf, const ITensorInfo *input_priorbox, const ITensorInfo *output, DetectionOutputLayerInfo info)
444 {
445  ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input_loc, input_conf, input_priorbox, output, info));
446  return Status{};
447 }
448 
450 {
451  // Retrieve all location predictions.
452  retrieve_all_loc_predictions(_input_loc, _num, _num_priors, _info.num_loc_classes(), _info.share_location(), _all_location_predictions);
453 
454  // Retrieve all confidences.
455  retrieve_all_conf_scores(_input_conf, _num, _num_priors, _info.num_classes(), _all_confidence_scores);
456 
457  // Retrieve all prior bboxes.
458  retrieve_all_priorbox(_input_priorbox, _num_priors, _all_prior_bboxes, _all_prior_variances);
459 
460  // Decode all loc predictions to bboxes
461  const bool clip_bbox = false;
462  for(int i = 0; i < _num; ++i)
463  {
464  for(int c = 0; c < _info.num_loc_classes(); ++c)
465  {
466  const int label = _info.share_location() ? -1 : c;
467  if(label == _info.background_label_id())
468  {
469  // Ignore background class.
470  continue;
471  }
472  ARM_COMPUTE_ERROR_ON_MSG_VAR(_all_location_predictions[i].find(label) == _all_location_predictions[i].end(), "Could not find location predictions for label %d.", label);
473 
474  const std::vector<BBox> &label_loc_preds = _all_location_predictions[i].find(label)->second;
475 
476  const int num_bboxes = _all_prior_bboxes.size();
477  ARM_COMPUTE_ERROR_ON(_all_prior_variances[i].size() != 4);
478 
479  for(int j = 0; j < num_bboxes; ++j)
480  {
481  DecodeBBox(_all_prior_bboxes[j], _all_prior_variances[j], _info.code_type(), _info.variance_encoded_in_target(), clip_bbox, label_loc_preds[j], _all_decode_bboxes[i][label][j]);
482  }
483  }
484  }
485 
486  int num_kept = 0;
487 
488  for(int i = 0; i < _num; ++i)
489  {
490  const LabelBBox &decode_bboxes = _all_decode_bboxes[i];
491  const std::map<int, std::vector<float>> &conf_scores = _all_confidence_scores[i];
492 
493  std::map<int, std::vector<int>> indices;
494  int num_det = 0;
495  for(int c = 0; c < _info.num_classes(); ++c)
496  {
497  if(c == _info.background_label_id())
498  {
499  // Ignore background class
500  continue;
501  }
502  const int label = _info.share_location() ? -1 : c;
503  if(conf_scores.find(c) == conf_scores.end() || decode_bboxes.find(label) == decode_bboxes.end())
504  {
505  ARM_COMPUTE_ERROR_VAR("Could not find predictions for label %d.", label);
506  }
507  const std::vector<float> &scores = conf_scores.find(c)->second;
508  const std::vector<BBox> &bboxes = decode_bboxes.find(label)->second;
509 
510  ApplyNMSFast(bboxes, scores, _info.confidence_threshold(), _info.nms_threshold(), _info.eta(), _info.top_k(), indices[c]);
511 
512  num_det += indices[c].size();
513  }
514 
515  int num_to_add = 0;
516  if(_info.keep_top_k() > -1 && num_det > _info.keep_top_k())
517  {
518  std::vector<std::pair<float, std::pair<int, int>>> score_index_pairs;
519  for(auto const &it : indices)
520  {
521  const int label = it.first;
522  const std::vector<int> &label_indices = it.second;
523 
524  if(conf_scores.find(label) == conf_scores.end())
525  {
526  ARM_COMPUTE_ERROR_VAR("Could not find predictions for label %d.", label);
527  }
528 
529  const std::vector<float> &scores = conf_scores.find(label)->second;
530  for(auto idx : label_indices)
531  {
532  ARM_COMPUTE_ERROR_ON(idx > static_cast<int>(scores.size()));
533  score_index_pairs.emplace_back(std::make_pair(scores[idx], std::make_pair(label, idx)));
534  }
535  }
536 
537  // Keep top k results per image.
538  std::sort(score_index_pairs.begin(), score_index_pairs.end(), SortScorePairDescend<std::pair<int, int>>);
539  score_index_pairs.resize(_info.keep_top_k());
540 
541  // Store the new indices.
542 
543  std::map<int, std::vector<int>> new_indices;
544  for(auto score_index_pair : score_index_pairs)
545  {
546  int label = score_index_pair.second.first;
547  int idx = score_index_pair.second.second;
548  new_indices[label].push_back(idx);
549  }
550  _all_indices[i] = new_indices;
551  num_to_add = _info.keep_top_k();
552  }
553  else
554  {
555  _all_indices[i] = indices;
556  num_to_add = num_det;
557  }
558  num_kept += num_to_add;
559  }
560 
561  //Update the valid region of the ouput to mark the exact number of detection
562  _output->info()->set_valid_region(ValidRegion(Coordinates(0, 0), TensorShape(7, num_kept)));
563 
564  int count = 0;
565  for(int i = 0; i < _num; ++i)
566  {
567  const std::map<int, std::vector<float>> &conf_scores = _all_confidence_scores[i];
568  const LabelBBox &decode_bboxes = _all_decode_bboxes[i];
569  for(auto &it : _all_indices[i])
570  {
571  const int label = it.first;
572  const std::vector<float> &scores = conf_scores.find(label)->second;
573  const int loc_label = _info.share_location() ? -1 : label;
574  if(conf_scores.find(label) == conf_scores.end() || decode_bboxes.find(loc_label) == decode_bboxes.end())
575  {
576  // Either if there are no confidence predictions
577  // or there are no location predictions for current label.
578  ARM_COMPUTE_ERROR_VAR("Could not find predictions for the label %d.", label);
579  }
580  const std::vector<BBox> &bboxes = decode_bboxes.find(loc_label)->second;
581  const std::vector<int> &indices = it.second;
582 
583  for(auto idx : indices)
584  {
585  *(reinterpret_cast<float *>(_output->ptr_to_element(Coordinates(count * 7)))) = i;
586  *(reinterpret_cast<float *>(_output->ptr_to_element(Coordinates(count * 7 + 1)))) = label;
587  *(reinterpret_cast<float *>(_output->ptr_to_element(Coordinates(count * 7 + 2)))) = scores[idx];
588  *(reinterpret_cast<float *>(_output->ptr_to_element(Coordinates(count * 7 + 3)))) = bboxes[idx][0];
589  *(reinterpret_cast<float *>(_output->ptr_to_element(Coordinates(count * 7 + 4)))) = bboxes[idx][1];
590  *(reinterpret_cast<float *>(_output->ptr_to_element(Coordinates(count * 7 + 5)))) = bboxes[idx][2];
591  *(reinterpret_cast<float *>(_output->ptr_to_element(Coordinates(count * 7 + 6)))) = bboxes[idx][3];
592 
593  ++count;
594  }
595  }
596  }
597 }
598 } // namespace arm_compute
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
int num_classes() const
Get num classes.
Definition: Types.h:984
uint8_t * ptr_to_element(const Coordinates &id) const
Return a pointer to the element at the passed coordinates.
Definition: ITensor.h:63
Shape of a tensor.
Definition: TensorShape.h:39
bool share_location() const
Get share location.
Definition: Types.h:989
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
void configure(const ITensor *input_loc, const ITensor *input_conf, const ITensor *input_priorbox, ITensor *output, DetectionOutputLayerInfo info=DetectionOutputLayerInfo())
Configure the detection output layer CPP kernel.
std::map< int, std::vector< BBox > > LabelBBox
Definition: Types.h:923
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
1 channel, 1 F32 per channel
#define ARM_COMPUTE_ERROR_VAR(msg,...)
Print the given message then throw an std::runtime_error.
Definition: Error.h:346
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
#define ARM_COMPUTE_ERROR_ON_MSG_VAR(cond, msg,...)
Definition: Error.h:457
float eta() const
Get eta.
Definition: Types.h:1014
Status class.
Definition: Error.h:52
DetectionOutputLayerCodeType
Available Detection Output code types.
Definition: Types.h:926
Interface for CPU tensor.
Definition: ITensor.h:36
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(...)
Definition: Validate.h:284
int top_k() const
Get top K.
Definition: Types.h:1029
Copyright (c) 2017-2021 Arm Limited.
virtual void set_valid_region(const ValidRegion &valid_region)=0
Set the valid region of the tensor.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:159
DataType clamp(const DataType &n, const DataType &lower=std::numeric_limits< RangeType >::lowest(), const DataType &upper=std::numeric_limits< RangeType >::max())
Performs clamping among a lower and upper value.
Definition: Utility.h:101
bool variance_encoded_in_target() const
Get if variance encoded in target.
Definition: Types.h:999
float nms_threshold() const
Get nms threshold.
Definition: Types.h:1009
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
Definition: Error.h:456
Coordinates of an item.
Definition: Coordinates.h:37
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
std::array< float, 4 > BBox
Definition: Types.h:921
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.
DetectionOutputLayerCodeType code_type() const
Get detection output code type.
Definition: Types.h:994
void end(TokenStream &in, bool &valid)
Definition: MLGOParser.cpp:290
int keep_top_k() const
Get the number of total bounding boxes to be kept per image.
Definition: Types.h:1004
Detection Output layer info.
Definition: Types.h:935
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
void map(T &tensor, bool blocking)
Maps a tensor if needed.
Definition: Utils.h:212
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:541
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:788
static Status validate(const ITensorInfo *input_loc, const ITensorInfo *input_conf, const ITensorInfo *input_priorbox, const ITensorInfo *output, DetectionOutputLayerInfo info=DetectionOutputLayerInfo())
Static function to check if given info will lead to a valid configuration of CPPDetectionOutputLayer...
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:244
#define ARM_COMPUTE_LOG_PARAMS(...)
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:157
void set_num_dimensions(size_t num_dimensions)
Set number of dimensions.
Definition: Dimensions.h:149
Container for valid region of a window.
Definition: Types.h:184
int background_label_id() const
Get background label ID.
Definition: Types.h:1019
float confidence_threshold() const
Get confidence threshold.
Definition: Types.h:1024
int num_loc_classes() const
Get number of location classes.
Definition: Types.h:1034