38 : _memory_group(memory_manager),
49 _cpp_nms(memory_manager),
54 _deltas_flattened_f32(),
60 _all_proposals_quantized(),
62 _classes_nms_unused(),
63 _proposals_4_roi_values(),
64 _all_proposals_to_use(nullptr),
65 _num_valid_proposals(nullptr),
91 const int total_num_anchors = num_anchors * feat_width * feat_height;
92 const int pre_nms_topN =
info.pre_nms_topN();
93 const int post_nms_topN =
info.post_nms_topN();
94 const size_t values_per_roi =
info.values_per_roi();
101 _memory_group.
manage(&_all_anchors);
102 _compute_anchors_kernel->configure(compile_context, anchors, &_all_anchors,
ComputeAnchorsInfo(feat_width, feat_height,
info.spatial_scale()));
104 const TensorShape flatten_shape_deltas(values_per_roi, total_num_anchors);
108 _memory_group.
manage(&_deltas_flattened);
111 _memory_group.
manage(&_deltas_permuted);
113 _flatten_deltas.
configure(compile_context, &_deltas_permuted, &_deltas_flattened);
118 _flatten_deltas.
configure(compile_context, deltas, &_deltas_flattened);
121 const TensorShape flatten_shape_scores(1, total_num_anchors);
125 _memory_group.
manage(&_scores_flattened);
128 _memory_group.
manage(&_scores_permuted);
130 _flatten_scores.
configure(compile_context, &_scores_permuted, &_scores_flattened);
135 _flatten_scores.
configure(compile_context, scores, &_scores_flattened);
138 CLTensor *anchors_to_use = &_all_anchors;
139 CLTensor *deltas_to_use = &_deltas_flattened;
144 _memory_group.
manage(&_all_anchors_f32);
145 _memory_group.
manage(&_deltas_flattened_f32);
147 _dequantize_anchors->configure(compile_context, &_all_anchors, &_all_anchors_f32);
149 anchors_to_use = &_all_anchors_f32;
151 _dequantize_deltas->configure(compile_context, &_deltas_flattened, &_deltas_flattened_f32);
153 deltas_to_use = &_deltas_flattened_f32;
156 _memory_group.
manage(&_all_proposals);
158 _bounding_box_kernel->configure(compile_context, anchors_to_use, &_all_proposals, deltas_to_use, bbox_info);
162 _all_proposals_to_use = &_all_proposals;
165 _memory_group.
manage(&_all_proposals_quantized);
168 _quantize_all_proposals->configure(compile_context, &_all_proposals, &_all_proposals_quantized);
170 _all_proposals_to_use = &_all_proposals_quantized;
176 const int scores_nms_size = std::min<int>(std::min<int>(post_nms_topN, pre_nms_topN), total_num_anchors);
177 const float min_size_scaled =
info.min_size() *
info.im_scale();
178 _memory_group.
manage(&_classes_nms_unused);
179 _memory_group.
manage(&_keeps_nms_unused);
191 _scores_out = scores_out;
192 _num_valid_proposals = num_valid_proposals;
194 _memory_group.
manage(&_proposals_4_roi_values);
195 _cpp_nms.
configure(&_scores_flattened, _all_proposals_to_use,
nullptr, scores_out, &_proposals_4_roi_values, &_classes_nms_unused,
nullptr, &_keeps_nms_unused, num_valid_proposals,
196 BoxNMSLimitInfo(0.0f,
info.nms_thres(), scores_nms_size,
false,
NMSType::LINEAR, 0.5f, 0.001f,
true, min_size_scaled,
info.im_width(),
info.im_height()));
203 _pad_kernel->configure(compile_context, &_proposals_4_roi_values, proposals,
PaddingList{ { 1, 0 } });
219 const int num_images = scores->
dimension(3);
220 const int total_num_anchors = num_anchors * feat_width * feat_height;
221 const int values_per_roi =
info.values_per_roi();
234 TensorInfo all_anchors_info(anchors->
clone()->set_tensor_shape(
TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(
true));
237 TensorInfo deltas_permuted_info = deltas->
clone()->set_tensor_shape(
TensorShape(values_per_roi * num_anchors, feat_width, feat_height)).set_is_resizable(
true);
238 TensorInfo scores_permuted_info = scores->
clone()->set_tensor_shape(
TensorShape(num_anchors, feat_width, feat_height)).set_is_resizable(
true);
250 TensorInfo deltas_flattened_info(deltas->
clone()->set_tensor_shape(
TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(
true));
253 TensorInfo scores_flattened_info(scores->
clone()->set_tensor_shape(
TensorShape(1, total_num_anchors)).set_is_resizable(
true));
254 TensorInfo proposals_4_roi_values(deltas->
clone()->set_tensor_shape(
TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(
true));
258 TensorInfo *proposals_4_roi_values_to_use = &proposals_4_roi_values;
259 TensorInfo proposals_4_roi_values_quantized(deltas->
clone()->set_tensor_shape(
TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(
true));
274 proposals_4_roi_values_to_use = &proposals_4_roi_values_quantized;
319 void CLGenerateProposalsLayer::run_cpp_nms_kernel()
322 _scores_flattened.
map(
true);
323 _all_proposals_to_use->
map(
true);
329 _keeps_nms_unused.
map(
true);
330 _classes_nms_unused.
map(
true);
336 _keeps_nms_unused.
unmap();
337 _classes_nms_unused.
unmap();
343 _scores_flattened.
unmap();
344 _all_proposals_to_use->
unmap();
358 _permute_deltas.
run();
359 _permute_scores.
run();
361 _flatten_deltas.
run();
362 _flatten_scores.
run();
366 _dequantize_anchors->run();
367 _dequantize_deltas->run();
375 _quantize_all_proposals->run();
379 run_cpp_nms_kernel();
void configure(const ICLTensor *scores, const ICLTensor *deltas, const ICLTensor *anchors, ICLTensor *proposals, ICLTensor *scores_out, ICLTensor *num_valid_proposals, const GenerateProposalsInfo &info)
Set the input and output tensors.
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, PixelValue constant_value=PixelValue(), PaddingMode mode=PaddingMode::CONSTANT)
Static function to check if given info will lead to a valid configuration of CLPadLayerKernel.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CLReshapeLayer.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CLDequantizationLayer.
Generate Proposals Information class.
void map(cl::CommandQueue &q, bool blocking=true)
Enqueue a map operation of the allocated buffer on the given queue.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(t,...)
quantized, symmetric fixed-point 16-bit number
TensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(...)
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
std::vector< PaddingInfo > PaddingList
List of padding information.
static CLScheduler & get()
Access the scheduler singleton.
BoxWithNonMaximaSuppressionLimit Information class.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
void run() override
Run the kernels contained in the function.
void run() override
Run the kernels contained in the function.
1 channel, 1 F32 per channel
ITensorInfo & set_data_type(DataType data_type) override
Set the data type to the specified value.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor's metadata.
CLTensorAllocator * allocator()
Return a pointer to the tensor's allocator.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
quantized, asymmetric fixed-point 16-bit number
Basic function to run opencl::ClDequantization that dequantizes an input tensor.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
void init(const TensorInfo &input, size_t alignment=0)
Initialize a tensor based on the passed TensorInfo.
Copyright (c) 2017-2021 Arm Limited.
void run() override
Run the kernels contained in the function.
1 channel, 1 F16 per channel
void run() override
Run the kernels contained in the function.
void map(bool blocking=true)
Enqueue a map operation of the allocated buffer.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
Quantization information.
static Status validate(const ITensorInfo *anchors, const ITensorInfo *all_anchors, const ComputeAnchorsInfo &info)
Static function to check if given info will lead to a valid configuration of CLComputeAllAnchorsKerne...
1 channel, 1 U32 per channel
quantized, asymmetric fixed-point 8-bit number unsigned
Interface for the PadLayer function.
void unmap(cl::CommandQueue &q)
Enqueue an unmap operation of the allocated and mapped buffer on the given queue.
UniformQuantizationInfo uniform() const
Return per layer quantization info.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
virtual ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info)=0
Set the quantization settings (scale and offset) of the tensor.
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
void enqueue(ICLKernel &kernel, bool flush=true)
Schedule the execution of the passed kernel if possible.
Num samples, channels, height, width.
Strides of an item in bytes.
static Status validate(const ITensorInfo *scores, const ITensorInfo *deltas, const ITensorInfo *anchors, const ITensorInfo *proposals, const ITensorInfo *scores_out, const ITensorInfo *num_valid_proposals, const GenerateProposalsInfo &info)
Static function to check if given info will lead to a valid configuration of CLGenerateProposalsLayer...
void allocate() override
Allocate size specified by TensorInfo of OpenCL memory.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
Memory group resources scope handling class.
Interface for OpenCL tensor.
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
ComputeAnchors information class.
Num samples, height, width, channels.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
CLGenerateProposalsLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Store the tensor's metadata.
void configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm)
Set the input and output tensors.
void configure(const ICLTensor *input, ICLTensor *output)
Initialise the kernel's inputs and outputs.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm)
Static function to check if given info will lead to a valid configuration of CLPermute.
size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
Get the index of the given dimension.
Basic function to simulate a quantization layer.
const TensorShape & tensor_shape() const override
Size for each dimension of the tensor.
DataType
Available data types.
Interface for Compute All Anchors kernel.
void unmap()
Enqueue an unmap operation of the allocated and mapped buffer.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CLQuantizationLayer.
~CLGenerateProposalsLayer()
Default destructor.
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.
void configure(const ITensor *scores_in, const ITensor *boxes_in, const ITensor *batch_splits_in, ITensor *scores_out, ITensor *boxes_out, ITensor *classes, ITensor *batch_splits_out=nullptr, ITensor *keeps=nullptr, ITensor *keeps_size=nullptr, const BoxNMSLimitInfo info=BoxNMSLimitInfo())
Configure the BoxWithNonMaximaSuppressionLimit CPP kernel.
Basic implementation of the OpenCL tensor interface.