42 const ITensorInfo *input_class_score,
43 const ITensorInfo *input_anchors,
44 ITensorInfo *output_boxes,
45 ITensorInfo *output_classes,
46 ITensorInfo *output_scores,
47 ITensorInfo *num_detection,
48 DetectionPostProcessLayerInfo
info,
49 const unsigned int kBatchSize,
50 const unsigned int kNumCoordBox)
57 "The location input tensor shape should be [4, N, kBatchSize].");
58 if (input_box_encoding->num_dimensions() > 2)
61 input_box_encoding->dimension(2) != kBatchSize,
62 "The third dimension of the input box_encoding tensor should be equal to %d.", kBatchSize);
65 "The first dimension of the input box_encoding tensor should be equal to %d.",
68 input_class_score->dimension(0) != (
info.num_classes() + 1),
69 "The first dimension of the input class_prediction should be equal to the number of classes plus one.");
72 "The anchors input tensor shape should be [4, N, kBatchSize].");
73 if (input_anchors->num_dimensions() > 2)
76 "The first dimension of the input anchors tensor should be equal to %d.",
80 (input_box_encoding->dimension(1) != input_anchors->dimension(1)),
81 "The second dimension of the inputs should be the same.");
83 "The num_detection output tensor shape should be [M].");
85 "The intersection over union should be positive and less than 1.");
87 "The number of max classes per detection should be positive.");
89 const unsigned int num_detected_boxes =
info.max_detections() *
info.max_classes_per_detection();
92 if (output_boxes->total_size() != 0)
95 TensorShape(4U, num_detected_boxes, 1U));
98 if (output_classes->total_size() != 0)
101 TensorShape(num_detected_boxes, 1U));
104 if (output_scores->total_size() != 0)
107 TensorShape(num_detected_boxes, 1U));
110 if (num_detection->total_size() != 0)
120 DecodeBoxCorner(
BBox &box_centersize,
BBox &anchor, Iterator &decoded_it, DetectionPostProcessLayerInfo
info)
122 const float half_factor = 0.5f;
125 const float y_center = box_centersize[0] /
info.scale_value_y() * anchor[2] + anchor[0];
126 const float x_center = box_centersize[1] /
info.scale_value_x() * anchor[3] + anchor[1];
128 half_factor *
static_cast<float>(std::exp(box_centersize[2] /
info.scale_value_h())) * anchor[2];
130 half_factor *
static_cast<float>(std::exp(box_centersize[3] /
info.scale_value_w())) * anchor[3];
133 auto decoded_ptr =
reinterpret_cast<float *
>(decoded_it.ptr());
134 *(decoded_ptr) = x_center - half_w;
135 *(1 + decoded_ptr) = y_center - half_h;
136 *(2 + decoded_ptr) = x_center + half_w;
137 *(3 + decoded_ptr) = y_center + half_h;
147 void DecodeCenterSizeBoxes(
const ITensor *input_box_encoding,
148 const ITensor *input_anchors,
149 DetectionPostProcessLayerInfo
info,
150 Tensor *decoded_boxes)
152 const QuantizationInfo &qi_box = input_box_encoding->info()->quantization_info();
153 const QuantizationInfo &qi_anchors = input_anchors->info()->quantization_info();
154 BBox box_centersize{{}};
158 win.use_tensor_dimensions(input_box_encoding->info()->tensor_shape());
159 win.set_dimension_step(0U, 4U);
160 win.set_dimension_step(1U, 1U);
161 Iterator box_it(input_box_encoding, win);
162 Iterator anchor_it(input_anchors, win);
163 Iterator decoded_it(decoded_boxes, win);
169 [&](
const Coordinates &)
171 const auto box_ptr =
reinterpret_cast<const qasymm8_t *
>(box_it.ptr());
172 const auto anchor_ptr =
reinterpret_cast<const qasymm8_t *
>(anchor_it.ptr());
180 DecodeBoxCorner(box_centersize, anchor, decoded_it,
info);
182 box_it, anchor_it, decoded_it);
188 [&](
const Coordinates &)
190 const auto box_ptr =
reinterpret_cast<const qasymm8_signed_t *
>(box_it.ptr());
191 const auto anchor_ptr =
reinterpret_cast<const qasymm8_signed_t *
>(anchor_it.ptr());
200 DecodeBoxCorner(box_centersize, anchor, decoded_it,
info);
202 box_it, anchor_it, decoded_it);
208 [&](
const Coordinates &)
210 const auto box_ptr =
reinterpret_cast<const float *
>(box_it.ptr());
211 const auto anchor_ptr =
reinterpret_cast<const float *
>(anchor_it.ptr());
212 box_centersize =
BBox({*box_ptr, *(box_ptr + 1), *(2 + box_ptr), *(3 + box_ptr)});
213 anchor =
BBox({*anchor_ptr, *(anchor_ptr + 1), *(2 + anchor_ptr), *(3 + anchor_ptr)});
214 DecodeBoxCorner(box_centersize, anchor, decoded_it,
info);
216 box_it, anchor_it, decoded_it);
220 void SaveOutputs(
const Tensor *decoded_boxes,
221 const std::vector<int> &result_idx_boxes_after_nms,
222 const std::vector<float> &result_scores_after_nms,
223 const std::vector<int> &result_classes_after_nms,
224 std::vector<unsigned int> &sorted_indices,
225 const unsigned int num_output,
226 const unsigned int max_detections,
227 ITensor *output_boxes,
228 ITensor *output_classes,
229 ITensor *output_scores,
230 ITensor *num_detection)
234 for (; i < num_output; ++i)
236 const unsigned int box_in_idx = result_idx_boxes_after_nms[sorted_indices[i]];
237 *(
reinterpret_cast<float *
>(output_boxes->ptr_to_element(Coordinates(0, i)))) =
238 *(
reinterpret_cast<float *
>(decoded_boxes->ptr_to_element(Coordinates(1, box_in_idx))));
239 *(
reinterpret_cast<float *
>(output_boxes->ptr_to_element(Coordinates(1, i)))) =
240 *(
reinterpret_cast<float *
>(decoded_boxes->ptr_to_element(Coordinates(0, box_in_idx))));
241 *(
reinterpret_cast<float *
>(output_boxes->ptr_to_element(Coordinates(2, i)))) =
242 *(
reinterpret_cast<float *
>(decoded_boxes->ptr_to_element(Coordinates(3, box_in_idx))));
243 *(
reinterpret_cast<float *
>(output_boxes->ptr_to_element(Coordinates(3, i)))) =
244 *(
reinterpret_cast<float *
>(decoded_boxes->ptr_to_element(Coordinates(2, box_in_idx))));
245 *(
reinterpret_cast<float *
>(output_classes->ptr_to_element(Coordinates(i)))) =
246 static_cast<float>(result_classes_after_nms[sorted_indices[i]]);
247 *(
reinterpret_cast<float *
>(output_scores->ptr_to_element(Coordinates(i)))) =
248 result_scores_after_nms[sorted_indices[i]];
250 for (; i < max_detections; ++i)
252 *(
reinterpret_cast<float *
>(output_boxes->ptr_to_element(Coordinates(1, i)))) = 0.0f;
253 *(
reinterpret_cast<float *
>(output_boxes->ptr_to_element(Coordinates(0, i)))) = 0.0f;
254 *(
reinterpret_cast<float *
>(output_boxes->ptr_to_element(Coordinates(3, i)))) = 0.0f;
255 *(
reinterpret_cast<float *
>(output_boxes->ptr_to_element(Coordinates(2, i)))) = 0.0f;
256 *(
reinterpret_cast<float *
>(output_classes->ptr_to_element(Coordinates(i)))) = 0.0f;
257 *(
reinterpret_cast<float *
>(output_scores->ptr_to_element(Coordinates(i)))) = 0.0f;
259 *(
reinterpret_cast<float *
>(num_detection->ptr_to_element(Coordinates(0)))) = num_output;
264 : _memory_group(std::move(memory_manager)),
266 _input_box_encoding(nullptr),
267 _input_scores(nullptr),
268 _input_anchors(nullptr),
269 _output_boxes(nullptr),
270 _output_classes(nullptr),
271 _output_scores(nullptr),
272 _num_detection(nullptr),
275 _num_classes_with_background(),
276 _num_max_detected_boxes(),
277 _dequantize_scores(false),
282 _input_scores_to_use(nullptr)
297 ARM_COMPUTE_LOG_PARAMS(input_box_encoding, input_scores, input_anchors, output_boxes, output_classes, output_scores,
298 num_detection,
info);
300 _num_max_detected_boxes =
info.max_detections() *
info.max_classes_per_detection();
312 input_box_encoding->
info(), input_scores->
info(), input_anchors->
info(), output_boxes->
info(),
313 output_classes->
info(), output_scores->
info(), num_detection->
info(),
info, _kBatchSize, _kNumCoordBox));
315 _input_box_encoding = input_box_encoding;
316 _input_scores = input_scores;
317 _input_anchors = input_anchors;
318 _output_boxes = output_boxes;
319 _output_classes = output_classes;
320 _output_scores = output_scores;
321 _num_detection = num_detection;
324 _num_classes_with_background = _input_scores->
info()->
dimension(0);
331 *_decoded_scores.
info(),
335 *_selected_indices.
info(),
338 const unsigned int num_classes_per_box = std::min(
info.max_classes_per_detection(),
info.num_classes());
340 *_class_scores.
info(),
344 _input_scores_to_use = _dequantize_scores ? &_decoded_scores : _input_scores;
347 _memory_group.
manage(&_decoded_boxes);
348 _memory_group.
manage(&_decoded_scores);
349 _memory_group.
manage(&_selected_indices);
350 _memory_group.
manage(&_class_scores);
351 _nms.
configure(&_decoded_boxes, &_class_scores, &_selected_indices,
352 info.use_regular_nms() ?
info.detection_per_class() :
info.max_detections(),
353 info.nms_score_threshold(),
info.iou_threshold());
371 constexpr
unsigned int kBatchSize = 1;
372 constexpr
unsigned int kNumCoordBox = 4;
379 &_selected_indices_info,
info.max_detections(),
380 info.nms_score_threshold(),
info.iou_threshold()));
382 output_classes, output_scores, num_detection,
info, kBatchSize,
390 const unsigned int num_classes = _info.
num_classes();
393 DecodeCenterSizeBoxes(_input_box_encoding, _input_anchors, _info, &_decoded_boxes);
396 if (_dequantize_scores)
400 for (
unsigned int idx_c = 0; idx_c < _num_classes_with_background; ++idx_c)
402 for (
unsigned int idx_b = 0; idx_b < _num_boxes; ++idx_b)
413 for (
unsigned int idx_c = 0; idx_c < _num_classes_with_background; ++idx_c)
415 for (
unsigned int idx_b = 0; idx_b < _num_boxes; ++idx_b)
429 std::vector<int> result_idx_boxes_after_nms;
430 std::vector<int> result_classes_after_nms;
431 std::vector<float> result_scores_after_nms;
432 std::vector<unsigned int> sorted_indices;
434 for (
unsigned int c = 0; c < num_classes; ++c)
437 for (
unsigned int i = 0; i < _num_boxes; ++i)
449 const auto selected_index =
451 if (selected_index == -1)
456 result_idx_boxes_after_nms.emplace_back(selected_index);
457 result_scores_after_nms.emplace_back(
458 (
reinterpret_cast<float *
>(_class_scores.
buffer()))[selected_index]);
459 result_classes_after_nms.emplace_back(c);
464 const auto num_selected = result_scores_after_nms.size();
465 const auto num_output = std::min<unsigned int>(max_detections, num_selected);
468 sorted_indices.resize(num_selected);
469 std::iota(sorted_indices.begin(), sorted_indices.end(), 0);
470 std::partial_sort(sorted_indices.data(), sorted_indices.data() + num_output,
471 sorted_indices.data() + num_selected,
472 [&](
unsigned int first,
unsigned int second)
473 { return result_scores_after_nms[first] > result_scores_after_nms[second]; });
475 SaveOutputs(&_decoded_boxes, result_idx_boxes_after_nms, result_scores_after_nms, result_classes_after_nms,
476 sorted_indices, num_output, max_detections, _output_boxes, _output_classes, _output_scores,
482 const unsigned int num_classes_per_box =
484 std::vector<float> max_scores;
485 std::vector<int> box_indices;
486 std::vector<int> max_score_classes;
488 for (
unsigned int b = 0;
b < _num_boxes; ++
b)
490 std::vector<float> box_scores;
491 for (
unsigned int c = 0; c < num_classes; ++c)
493 box_scores.emplace_back(
497 std::vector<unsigned int> max_score_indices;
499 std::iota(max_score_indices.data(), max_score_indices.data() + _info.
num_classes(), 0);
500 std::partial_sort(max_score_indices.data(), max_score_indices.data() + num_classes_per_box,
501 max_score_indices.data() + num_classes,
502 [&](
unsigned int first,
unsigned int second)
503 { return box_scores[first] > box_scores[second]; });
505 for (
unsigned int i = 0; i < num_classes_per_box; ++i)
507 const float score_to_add = box_scores[max_score_indices[i]];
510 max_scores.emplace_back(score_to_add);
511 box_indices.emplace_back(
b);
512 max_score_classes.emplace_back(max_score_indices[i]);
518 std::vector<unsigned int> selected_indices;
519 for (
unsigned int i = 0; i < max_detections; ++i)
530 const auto num_output = std::min<unsigned int>(_info.
max_detections(), selected_indices.size());
532 SaveOutputs(&_decoded_boxes, box_indices, max_scores, max_score_classes, selected_indices, num_output,
533 max_detections, _output_boxes, _output_classes, _output_scores, _num_detection);