35 void dequantize_tensor(
const ITensor *
input, ITensor *output)
37 const UniformQuantizationInfo
qinfo = input->info()->quantization_info().
uniform();
41 window.use_tensor_dimensions(input->info()->tensor_shape());
42 Iterator input_it(input, window);
43 Iterator output_it(output, window);
50 *
reinterpret_cast<float *
>(output_it.ptr()) =
dequantize(*reinterpret_cast<const uint8_t *>(input_it.ptr()), qinfo.scale, qinfo.offset);
57 *
reinterpret_cast<float *
>(output_it.ptr()) =
dequantize_qasymm8_signed(*reinterpret_cast<const int8_t *>(input_it.ptr()), qinfo);
64 *
reinterpret_cast<float *
>(output_it.ptr()) =
dequantize(*reinterpret_cast<const uint16_t *>(input_it.ptr()), qinfo.scale, qinfo.offset);
73 void quantize_tensor(
const ITensor *input, ITensor *output)
75 const UniformQuantizationInfo qinfo = output->info()->quantization_info().uniform();
76 const DataType data_type = output->info()->data_type();
79 window.use_tensor_dimensions(input->info()->tensor_shape());
80 Iterator input_it(input, window);
81 Iterator output_it(output, window);
88 *
reinterpret_cast<uint8_t *
>(output_it.ptr()) =
quantize_qasymm8(*reinterpret_cast<const float *>(input_it.ptr()), qinfo);
95 *
reinterpret_cast<int8_t *
>(output_it.ptr()) =
quantize_qasymm8_signed(*reinterpret_cast<const float *>(input_it.ptr()), qinfo);
102 *
reinterpret_cast<uint16_t *
>(output_it.ptr()) =
quantize_qasymm16(*reinterpret_cast<const float *>(input_it.ptr()), qinfo);
104 input_it, output_it);
113 : _memory_group(
std::move(memory_manager)),
114 _box_with_nms_limit_kernel(),
125 _batch_splits_in_f32(),
129 _batch_splits_out_f32(),
140 ARM_COMPUTE_LOG_PARAMS(scores_in, boxes_in, batch_splits_in, scores_out, boxes_out, classes, batch_splits_out, keeps, keeps_size, info);
144 _scores_in = scores_in;
145 _boxes_in = boxes_in;
146 _batch_splits_in = batch_splits_in;
147 _scores_out = scores_out;
148 _boxes_out = boxes_out;
150 _batch_splits_out = batch_splits_out;
156 _memory_group.
manage(&_scores_in_f32);
157 _memory_group.
manage(&_boxes_in_f32);
158 _memory_group.
manage(&_scores_out_f32);
159 _memory_group.
manage(&_boxes_out_f32);
160 _memory_group.
manage(&_classes_f32);
163 if(batch_splits_in !=
nullptr)
165 _memory_group.
manage(&_batch_splits_in_f32);
171 if(batch_splits_out !=
nullptr)
173 _memory_group.
manage(&_batch_splits_out_f32);
178 _memory_group.
manage(&_keeps_f32);
182 _box_with_nms_limit_kernel.
configure(&_scores_in_f32, &_boxes_in_f32, (batch_splits_in !=
nullptr) ? &_batch_splits_in_f32 :
nullptr,
183 &_scores_out_f32, &_boxes_out_f32, &_classes_f32,
184 (batch_splits_out !=
nullptr) ? &_batch_splits_out_f32 :
nullptr, (keeps !=
nullptr) ? &_keeps_f32 :
nullptr,
189 _box_with_nms_limit_kernel.
configure(scores_in, boxes_in, batch_splits_in, scores_out, boxes_out, classes, batch_splits_out, keeps, keeps_size, info);
196 if(_batch_splits_in !=
nullptr)
203 if(batch_splits_out !=
nullptr)
242 dequantize_tensor(_scores_in, &_scores_in_f32);
243 dequantize_tensor(_boxes_in, &_boxes_in_f32);
244 if(_batch_splits_in !=
nullptr)
246 dequantize_tensor(_batch_splits_in, &_batch_splits_in_f32);
254 quantize_tensor(&_scores_out_f32, _scores_out);
255 quantize_tensor(&_boxes_out_f32, _boxes_out);
256 quantize_tensor(&_classes_f32, _classes);
257 if(_batch_splits_out !=
nullptr)
259 quantize_tensor(&_batch_splits_out_f32, _batch_splits_out);
261 if(_keeps !=
nullptr)
263 quantize_tensor(&_keeps_f32, _keeps);
void configure(const ITensor *scores_in, const ITensor *boxes_in, const ITensor *batch_splits_in, ITensor *scores_out, ITensor *boxes_out, ITensor *classes, ITensor *batch_splits_out=nullptr, ITensor *keeps=nullptr, ITensor *keeps_size=nullptr, const BoxNMSLimitInfo info=BoxNMSLimitInfo())
Initialise the kernel's input and output tensors.
void init(const TensorAllocator &allocator, const Coordinates &coords, TensorInfo &sub_info)
Shares the same backing memory with another tensor allocator, while the tensor info might be differen...
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(...)
uint8_t quantize_qasymm8(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given an unsigned 8-bit asymmetric quantization scheme.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
BoxWithNonMaximaSuppressionLimit Information class.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
void run() override
Run the kernels contained in the function.
1 channel, 1 F32 per channel
Store the tensor's metadata.
quantized, asymmetric fixed-point 16-bit number
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Interface for CPU tensor.
Copyright (c) 2017-2021 Arm Limited.
1 channel, 1 F16 per channel
TensorAllocator * allocator()
Return a pointer to the tensor's allocator.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
int8_t quantize_qasymm8_signed(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given a signed 8-bit asymmetric quantization scheme.
quantized, asymmetric fixed-point 8-bit number unsigned
void allocate() override
Allocate size specified by TensorInfo of CPU memory.
UniformQuantizationInfo uniform() const
Return per layer quantization info.
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
Memory group resources scope handling class.
virtual void schedule(ICPPKernel *kernel, const Hints &hints)=0
Runs the kernel in the same thread as the caller synchronously.
CPPBoxWithNonMaximaSuppressionLimit(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Constructor.
const QuantizationInfo qinfo
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
static Status validate(const ITensorInfo *scores_in, const ITensorInfo *boxes_in, const ITensorInfo *batch_splits_in, const ITensorInfo *scores_out, const ITensorInfo *boxes_out, const ITensorInfo *classes, const ITensorInfo *batch_splits_out=nullptr, const ITensorInfo *keeps=nullptr, const ITensorInfo *keeps_size=nullptr, const BoxNMSLimitInfo info=BoxNMSLimitInfo())
Static function to check if given info will lead to a valid configuration of CPPDetectionOutputLayer...
float dequantize_qasymm8_signed(int8_t value, const INFO_TYPE &qinfo)
Dequantize a value given a signed 8-bit asymmetric quantization scheme.
#define ARM_COMPUTE_LOG_PARAMS(...)
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
float dequantize(uint8_t value, float scale, int32_t offset)
Dequantize a value given an 8-bit asymmetric quantization scheme.
quantized, asymmetric fixed-point 8-bit number signed
DataType
Available data types.
uint16_t quantize_qasymm16(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given a 16-bit asymmetric quantization scheme.
static IScheduler & get()
Access the scheduler singleton.
void configure(const ITensor *scores_in, const ITensor *boxes_in, const ITensor *batch_splits_in, ITensor *scores_out, ITensor *boxes_out, ITensor *classes, ITensor *batch_splits_out=nullptr, ITensor *keeps=nullptr, ITensor *keeps_size=nullptr, const BoxNMSLimitInfo info=BoxNMSLimitInfo())
Configure the BoxWithNonMaximaSuppressionLimit CPP kernel.