39 Status validate_arguments(
const ITensorInfo *input_box_encoding,
const ITensorInfo *input_class_score,
const ITensorInfo *input_anchors,
40 ITensorInfo *output_boxes, ITensorInfo *output_classes, ITensorInfo *output_scores, ITensorInfo *num_detection,
41 DetectionPostProcessLayerInfo
info,
const unsigned int kBatchSize,
const unsigned int kNumCoordBox)
47 if(input_box_encoding->num_dimensions() > 2)
49 ARM_COMPUTE_RETURN_ERROR_ON_MSG_VAR(input_box_encoding->dimension(2) != kBatchSize,
"The third dimension of the input box_encoding tensor should be equal to %d.", kBatchSize);
51 ARM_COMPUTE_RETURN_ERROR_ON_MSG_VAR(input_box_encoding->dimension(0) != kNumCoordBox,
"The first dimension of the input box_encoding tensor should be equal to %d.", kNumCoordBox);
53 "The first dimension of the input class_prediction should be equal to the number of classes plus one.");
56 if(input_anchors->num_dimensions() > 2)
61 || (input_box_encoding->dimension(1) != input_anchors->dimension(1)),
62 "The second dimension of the inputs should be the same.");
64 ARM_COMPUTE_RETURN_ERROR_ON_MSG((info.iou_threshold() <= 0.0f) || (info.iou_threshold() > 1.0f),
"The intersection over union should be positive and less than 1.");
67 const unsigned int num_detected_boxes = info.max_detections() * info.max_classes_per_detection();
70 if(output_boxes->total_size() != 0)
75 if(output_classes->total_size() != 0)
80 if(output_scores->total_size() != 0)
85 if(num_detection->total_size() != 0)
94 inline void DecodeBoxCorner(
BBox &box_centersize,
BBox &anchor, Iterator &decoded_it, DetectionPostProcessLayerInfo info)
96 const float half_factor = 0.5f;
99 const float y_center = box_centersize[0] / info.scale_value_y() * anchor[2] + anchor[0];
100 const float x_center = box_centersize[1] / info.scale_value_x() * anchor[3] + anchor[1];
101 const float half_h = half_factor *
static_cast<float>(std::exp(box_centersize[2] / info.scale_value_h())) * anchor[2];
102 const float half_w = half_factor *
static_cast<float>(std::exp(box_centersize[3] / info.scale_value_w())) * anchor[3];
105 auto decoded_ptr =
reinterpret_cast<float *
>(decoded_it.ptr());
106 *(decoded_ptr) = x_center - half_w;
107 *(1 + decoded_ptr) = y_center - half_h;
108 *(2 + decoded_ptr) = x_center + half_w;
109 *(3 + decoded_ptr) = y_center + half_h;
119 void DecodeCenterSizeBoxes(
const ITensor *input_box_encoding,
const ITensor *input_anchors, DetectionPostProcessLayerInfo info, Tensor *decoded_boxes)
121 const QuantizationInfo &qi_box = input_box_encoding->info()->quantization_info();
122 const QuantizationInfo &qi_anchors = input_anchors->info()->quantization_info();
123 BBox box_centersize{ {} };
127 win.use_tensor_dimensions(input_box_encoding->info()->tensor_shape());
128 win.set_dimension_step(0U, 4U);
129 win.set_dimension_step(1U, 1U);
130 Iterator box_it(input_box_encoding, win);
131 Iterator anchor_it(input_anchors, win);
132 Iterator decoded_it(decoded_boxes, win);
138 const auto box_ptr =
reinterpret_cast<const qasymm8_t *
>(box_it.ptr());
139 const auto anchor_ptr =
reinterpret_cast<const qasymm8_t *
>(anchor_it.ptr());
146 DecodeBoxCorner(box_centersize, anchor, decoded_it, info);
148 box_it, anchor_it, decoded_it);
154 const auto box_ptr =
reinterpret_cast<const qasymm8_signed_t *
>(box_it.ptr());
155 const auto anchor_ptr =
reinterpret_cast<const qasymm8_signed_t *
>(anchor_it.ptr());
162 DecodeBoxCorner(box_centersize, anchor, decoded_it, info);
164 box_it, anchor_it, decoded_it);
170 const auto box_ptr =
reinterpret_cast<const float *
>(box_it.ptr());
171 const auto anchor_ptr =
reinterpret_cast<const float *
>(anchor_it.ptr());
172 box_centersize =
BBox({ *box_ptr, *(box_ptr + 1), *(2 + box_ptr), *(3 + box_ptr) });
173 anchor =
BBox({ *anchor_ptr, *(anchor_ptr + 1), *(2 + anchor_ptr), *(3 + anchor_ptr) });
174 DecodeBoxCorner(box_centersize, anchor, decoded_it, info);
176 box_it, anchor_it, decoded_it);
180 void SaveOutputs(
const Tensor *decoded_boxes,
const std::vector<int> &result_idx_boxes_after_nms,
const std::vector<float> &result_scores_after_nms,
const std::vector<int> &result_classes_after_nms,
181 std::vector<unsigned int> &sorted_indices,
const unsigned int num_output,
const unsigned int max_detections, ITensor *output_boxes, ITensor *output_classes, ITensor *output_scores,
182 ITensor *num_detection)
186 for(; i < num_output; ++i)
188 const unsigned int box_in_idx = result_idx_boxes_after_nms[sorted_indices[i]];
189 *(
reinterpret_cast<float *
>(output_boxes->ptr_to_element(Coordinates(0, i)))) = *(
reinterpret_cast<float *
>(decoded_boxes->ptr_to_element(Coordinates(1, box_in_idx))));
190 *(
reinterpret_cast<float *
>(output_boxes->ptr_to_element(Coordinates(1, i)))) = *(
reinterpret_cast<float *
>(decoded_boxes->ptr_to_element(Coordinates(0, box_in_idx))));
191 *(
reinterpret_cast<float *
>(output_boxes->ptr_to_element(Coordinates(2, i)))) = *(
reinterpret_cast<float *
>(decoded_boxes->ptr_to_element(Coordinates(3, box_in_idx))));
192 *(
reinterpret_cast<float *
>(output_boxes->ptr_to_element(Coordinates(3, i)))) = *(
reinterpret_cast<float *
>(decoded_boxes->ptr_to_element(Coordinates(2, box_in_idx))));
193 *(
reinterpret_cast<float *
>(output_classes->ptr_to_element(Coordinates(i)))) =
static_cast<float>(result_classes_after_nms[sorted_indices[i]]);
194 *(
reinterpret_cast<float *
>(output_scores->ptr_to_element(Coordinates(i)))) = result_scores_after_nms[sorted_indices[i]];
196 for(; i < max_detections; ++i)
198 *(
reinterpret_cast<float *
>(output_boxes->ptr_to_element(Coordinates(1, i)))) = 0.0f;
199 *(
reinterpret_cast<float *
>(output_boxes->ptr_to_element(Coordinates(0, i)))) = 0.0f;
200 *(
reinterpret_cast<float *
>(output_boxes->ptr_to_element(Coordinates(3, i)))) = 0.0f;
201 *(
reinterpret_cast<float *
>(output_boxes->ptr_to_element(Coordinates(2, i)))) = 0.0f;
202 *(
reinterpret_cast<float *
>(output_classes->ptr_to_element(Coordinates(i)))) = 0.0f;
203 *(
reinterpret_cast<float *
>(output_scores->ptr_to_element(Coordinates(i)))) = 0.0f;
205 *(
reinterpret_cast<float *
>(num_detection->ptr_to_element(Coordinates(0)))) = num_output;
210 : _memory_group(
std::move(memory_manager)), _nms(), _input_box_encoding(nullptr), _input_scores(nullptr), _input_anchors(nullptr), _output_boxes(nullptr), _output_classes(nullptr),
211 _output_scores(nullptr), _num_detection(nullptr), _info(), _num_boxes(), _num_classes_with_background(), _num_max_detected_boxes(), _dequantize_scores(false), _decoded_boxes(), _decoded_scores(),
212 _selected_indices(), _class_scores(), _input_scores_to_use(nullptr)
229 num_detection->
info(),
230 info, _kBatchSize, _kNumCoordBox));
232 _input_box_encoding = input_box_encoding;
233 _input_scores = input_scores;
234 _input_anchors = input_anchors;
235 _output_boxes = output_boxes;
236 _output_classes = output_classes;
237 _output_scores = output_scores;
238 _num_detection = num_detection;
241 _num_classes_with_background = _input_scores->
info()->
dimension(0);
250 _input_scores_to_use = _dequantize_scores ? &_decoded_scores : _input_scores;
253 _memory_group.
manage(&_decoded_boxes);
254 _memory_group.
manage(&_decoded_scores);
255 _memory_group.
manage(&_selected_indices);
256 _memory_group.
manage(&_class_scores);
269 constexpr
unsigned int kBatchSize = 1;
270 constexpr
unsigned int kNumCoordBox = 4;
277 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input_box_encoding, input_class_score, input_anchors, output_boxes, output_classes, output_scores, num_detection, info, kBatchSize, kNumCoordBox));
284 const unsigned int num_classes = _info.
num_classes();
287 DecodeCenterSizeBoxes(_input_box_encoding, _input_anchors, _info, &_decoded_boxes);
290 if(_dequantize_scores)
294 for(
unsigned int idx_c = 0; idx_c < _num_classes_with_background; ++idx_c)
296 for(
unsigned int idx_b = 0; idx_b < _num_boxes; ++idx_b)
305 for(
unsigned int idx_c = 0; idx_c < _num_classes_with_background; ++idx_c)
307 for(
unsigned int idx_b = 0; idx_b < _num_boxes; ++idx_b)
319 std::vector<int> result_idx_boxes_after_nms;
320 std::vector<int> result_classes_after_nms;
321 std::vector<float> result_scores_after_nms;
322 std::vector<unsigned int> sorted_indices;
324 for(
unsigned int c = 0; c < num_classes; ++c)
327 for(
unsigned int i = 0; i < _num_boxes; ++i)
339 if(selected_index == -1)
344 result_idx_boxes_after_nms.emplace_back(selected_index);
345 result_scores_after_nms.emplace_back((reinterpret_cast<float *>(_class_scores.
buffer()))[selected_index]);
346 result_classes_after_nms.emplace_back(c);
351 const auto num_selected = result_scores_after_nms.size();
352 const auto num_output = std::min<unsigned int>(max_detections, num_selected);
355 sorted_indices.resize(num_selected);
356 std::iota(sorted_indices.begin(), sorted_indices.end(), 0);
357 std::partial_sort(sorted_indices.data(),
358 sorted_indices.data() + num_output,
359 sorted_indices.data() + num_selected,
360 [&](
unsigned int first,
unsigned int second)
363 return result_scores_after_nms[first] > result_scores_after_nms[second];
366 SaveOutputs(&_decoded_boxes, result_idx_boxes_after_nms, result_scores_after_nms, result_classes_after_nms, sorted_indices,
367 num_output, max_detections, _output_boxes, _output_classes, _output_scores, _num_detection);
373 std::vector<float> max_scores;
374 std::vector<int> box_indices;
375 std::vector<int> max_score_classes;
377 for(
unsigned int b = 0;
b < _num_boxes; ++
b)
379 std::vector<float> box_scores;
380 for(
unsigned int c = 0; c < num_classes; ++c)
385 std::vector<unsigned int> max_score_indices;
387 std::iota(max_score_indices.data(), max_score_indices.data() + _info.
num_classes(), 0);
388 std::partial_sort(max_score_indices.data(),
389 max_score_indices.data() + num_classes_per_box,
390 max_score_indices.data() + num_classes,
391 [&](
unsigned int first,
unsigned int second)
393 return box_scores[first] > box_scores[second];
396 for(
unsigned int i = 0; i < num_classes_per_box; ++i)
398 const float score_to_add = box_scores[max_score_indices[i]];
400 max_scores.emplace_back(score_to_add);
401 box_indices.emplace_back(
b);
402 max_score_classes.emplace_back(max_score_indices[i]);
408 std::vector<unsigned int> selected_indices;
409 for(
unsigned int i = 0; i < max_detections; ++i)
420 const auto num_output = std::min<unsigned int>(_info.
max_detections(), selected_indices.size());
422 SaveOutputs(&_decoded_boxes, box_indices, max_scores, max_score_classes, selected_indices,
423 num_output, max_detections, _output_boxes, _output_classes, _output_scores, _num_detection);
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
uint8_t * ptr_to_element(const Coordinates &id) const
Return a pointer to the element at the passed coordinates.
float dequantize_qasymm8(uint8_t value, const INFO_TYPE &qinfo)
Dequantize a value given an unsigned 8-bit asymmetric quantization scheme.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
unsigned int max_detections() const
Get max detections.
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG_VAR(cond, msg,...)
If the condition is true, an error is returned.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
static Status validate(const ITensorInfo *input_box_encoding, const ITensorInfo *input_class_score, const ITensorInfo *input_anchors, ITensorInfo *output_boxes, ITensorInfo *output_classes, ITensorInfo *output_scores, ITensorInfo *num_detection, DetectionPostProcessLayerInfo info=DetectionPostProcessLayerInfo())
Static function to check if given info will lead to a valid configuration of CPPDetectionPostProcessL...
1 channel, 1 F32 per channel
CPPDetectionPostProcessLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Constructor.
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Interface for CPU tensor.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(...)
bool use_regular_nms() const
Get if use regular nms.
Copyright (c) 2017-2021 Arm Limited.
float iou_threshold() const
Get intersection over union threshold.
TensorAllocator * allocator()
Return a pointer to the tensor's allocator.
ITensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
1 channel, 1 S32 per channel
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
void run() override
Run the kernels contained in the function.
quantized, asymmetric fixed-point 8-bit number unsigned
void allocate() override
Allocate size specified by TensorInfo of CPU memory.
float nms_score_threshold() const
Get nms threshold.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
std::array< float, 4 > BBox
bool dequantize_scores() const
Get dequantize_scores value.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
unsigned int max_classes_per_detection() const
Get max_classes per detection.
static Status validate(const ITensorInfo *bboxes, const ITensorInfo *scores, const ITensorInfo *indices, unsigned int max_output_size, const float score_threshold, const float nms_threshold)
Static function to check if given arguments will lead to a valid configuration of CPPNonMaximumSuppre...
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
void configure(const ITensor *input_box_encoding, const ITensor *input_score, const ITensor *input_anchors, ITensor *output_boxes, ITensor *output_classes, ITensor *output_scores, ITensor *num_detection, DetectionPostProcessLayerInfo info=DetectionPostProcessLayerInfo())
Configure the detection output layer CPP function.
uint8_t qasymm8_t
8 bit quantized asymmetric scalar value
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
unsigned int detection_per_class() const
Get detection per class.
int8_t qasymm8_signed_t
8 bit signed quantized asymmetric scalar value
Detection Output layer info.
unsigned int num_classes() const
Get num classes.
uint8_t * buffer() const override
Interface to be implemented by the child class to return a pointer to CPU memory. ...
float dequantize_qasymm8_signed(int8_t value, const INFO_TYPE &qinfo)
Dequantize a value given a signed 8-bit asymmetric quantization scheme.
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
void configure(const ITensor *bboxes, const ITensor *scores, ITensor *indices, unsigned int max_output_size, const float score_threshold, const float nms_threshold)
Configure the function to perform non maximal suppression.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Store the tensor's metadata.
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
void run() override final
Run the kernels contained in the function.
quantized, asymmetric fixed-point 8-bit number signed