Compute Library
 20.08
CPPBoxWithNonMaximaSuppressionLimit.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
28 
29 namespace arm_compute
30 {
31 namespace
32 {
33 void dequantize_tensor(const ITensor *input, ITensor *output)
34 {
35  const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform();
36  const DataType data_type = input->info()->data_type();
37 
38  Window window;
39  window.use_tensor_dimensions(input->info()->tensor_shape());
40  Iterator input_it(input, window);
41  Iterator output_it(output, window);
42 
43  switch(data_type)
44  {
45  case DataType::QASYMM8:
46  execute_window_loop(window, [&](const Coordinates &)
47  {
48  *reinterpret_cast<float *>(output_it.ptr()) = dequantize(*reinterpret_cast<const uint8_t *>(input_it.ptr()), qinfo.scale, qinfo.offset);
49  },
50  input_it, output_it);
51  break;
53  execute_window_loop(window, [&](const Coordinates &)
54  {
55  *reinterpret_cast<float *>(output_it.ptr()) = dequantize_qasymm8_signed(*reinterpret_cast<const int8_t *>(input_it.ptr()), qinfo);
56  },
57  input_it, output_it);
58  break;
59  case DataType::QASYMM16:
60  execute_window_loop(window, [&](const Coordinates &)
61  {
62  *reinterpret_cast<float *>(output_it.ptr()) = dequantize(*reinterpret_cast<const uint16_t *>(input_it.ptr()), qinfo.scale, qinfo.offset);
63  },
64  input_it, output_it);
65  break;
66  default:
67  ARM_COMPUTE_ERROR("Unsupported data type");
68  }
69 }
70 
71 void quantize_tensor(const ITensor *input, ITensor *output)
72 {
73  const UniformQuantizationInfo qinfo = output->info()->quantization_info().uniform();
74  const DataType data_type = output->info()->data_type();
75 
76  Window window;
77  window.use_tensor_dimensions(input->info()->tensor_shape());
78  Iterator input_it(input, window);
79  Iterator output_it(output, window);
80 
81  switch(data_type)
82  {
83  case DataType::QASYMM8:
84  execute_window_loop(window, [&](const Coordinates &)
85  {
86  *reinterpret_cast<uint8_t *>(output_it.ptr()) = quantize_qasymm8(*reinterpret_cast<const float *>(input_it.ptr()), qinfo);
87  },
88  input_it, output_it);
89  break;
91  execute_window_loop(window, [&](const Coordinates &)
92  {
93  *reinterpret_cast<int8_t *>(output_it.ptr()) = quantize_qasymm8_signed(*reinterpret_cast<const float *>(input_it.ptr()), qinfo);
94  },
95  input_it, output_it);
96  break;
97  case DataType::QASYMM16:
98  execute_window_loop(window, [&](const Coordinates &)
99  {
100  *reinterpret_cast<uint16_t *>(output_it.ptr()) = quantize_qasymm16(*reinterpret_cast<const float *>(input_it.ptr()), qinfo);
101  },
102  input_it, output_it);
103  break;
104  default:
105  ARM_COMPUTE_ERROR("Unsupported data type");
106  }
107 }
108 } // namespace
109 
111  : _memory_group(std::move(memory_manager)),
112  _box_with_nms_limit_kernel(),
113  _scores_in(),
114  _boxes_in(),
115  _batch_splits_in(),
116  _scores_out(),
117  _boxes_out(),
118  _classes(),
119  _batch_splits_out(),
120  _keeps(),
121  _scores_in_f32(),
122  _boxes_in_f32(),
123  _batch_splits_in_f32(),
124  _scores_out_f32(),
125  _boxes_out_f32(),
126  _classes_f32(),
127  _batch_splits_out_f32(),
128  _keeps_f32(),
129  _is_qasymm8(false)
130 {
131 }
132 
133 void CPPBoxWithNonMaximaSuppressionLimit::configure(const ITensor *scores_in, const ITensor *boxes_in, const ITensor *batch_splits_in, ITensor *scores_out, ITensor *boxes_out, ITensor *classes,
134  ITensor *batch_splits_out, ITensor *keeps, ITensor *keeps_size, const BoxNMSLimitInfo info)
135 {
136  ARM_COMPUTE_ERROR_ON_NULLPTR(scores_in, boxes_in, scores_out, boxes_out, classes);
137 
138  _is_qasymm8 = scores_in->info()->data_type() == DataType::QASYMM8 || scores_in->info()->data_type() == DataType::QASYMM8_SIGNED;
139 
140  _scores_in = scores_in;
141  _boxes_in = boxes_in;
142  _batch_splits_in = batch_splits_in;
143  _scores_out = scores_out;
144  _boxes_out = boxes_out;
145  _classes = classes;
146  _batch_splits_out = batch_splits_out;
147  _keeps = keeps;
148 
149  if(_is_qasymm8)
150  {
151  // Manage intermediate buffers
152  _memory_group.manage(&_scores_in_f32);
153  _memory_group.manage(&_boxes_in_f32);
154  _memory_group.manage(&_scores_out_f32);
155  _memory_group.manage(&_boxes_out_f32);
156  _memory_group.manage(&_classes_f32);
157  _scores_in_f32.allocator()->init(scores_in->info()->clone()->set_data_type(DataType::F32));
158  _boxes_in_f32.allocator()->init(boxes_in->info()->clone()->set_data_type(DataType::F32));
159  if(batch_splits_in != nullptr)
160  {
161  _memory_group.manage(&_batch_splits_in_f32);
162  _batch_splits_in_f32.allocator()->init(batch_splits_in->info()->clone()->set_data_type(DataType::F32));
163  }
164  _scores_out_f32.allocator()->init(scores_out->info()->clone()->set_data_type(DataType::F32));
165  _boxes_out_f32.allocator()->init(boxes_out->info()->clone()->set_data_type(DataType::F32));
166  _classes_f32.allocator()->init(classes->info()->clone()->set_data_type(DataType::F32));
167  if(batch_splits_out != nullptr)
168  {
169  _memory_group.manage(&_batch_splits_out_f32);
170  _batch_splits_out_f32.allocator()->init(batch_splits_out->info()->clone()->set_data_type(DataType::F32));
171  }
172  if(keeps != nullptr)
173  {
174  _memory_group.manage(&_keeps_f32);
175  _keeps_f32.allocator()->init(keeps->info()->clone()->set_data_type(DataType::F32));
176  }
177 
178  _box_with_nms_limit_kernel.configure(&_scores_in_f32, &_boxes_in_f32, (batch_splits_in != nullptr) ? &_batch_splits_in_f32 : nullptr,
179  &_scores_out_f32, &_boxes_out_f32, &_classes_f32,
180  (batch_splits_out != nullptr) ? &_batch_splits_out_f32 : nullptr, (keeps != nullptr) ? &_keeps_f32 : nullptr,
181  keeps_size, info);
182  }
183  else
184  {
185  _box_with_nms_limit_kernel.configure(scores_in, boxes_in, batch_splits_in, scores_out, boxes_out, classes, batch_splits_out, keeps, keeps_size, info);
186  }
187 
188  if(_is_qasymm8)
189  {
190  _scores_in_f32.allocator()->allocate();
191  _boxes_in_f32.allocator()->allocate();
192  if(_batch_splits_in != nullptr)
193  {
194  _batch_splits_in_f32.allocator()->allocate();
195  }
196  _scores_out_f32.allocator()->allocate();
197  _boxes_out_f32.allocator()->allocate();
198  _classes_f32.allocator()->allocate();
199  if(batch_splits_out != nullptr)
200  {
201  _batch_splits_out_f32.allocator()->allocate();
202  }
203  if(keeps != nullptr)
204  {
205  _keeps_f32.allocator()->allocate();
206  }
207  }
208 }
209 
210 Status validate(const ITensorInfo *scores_in, const ITensorInfo *boxes_in, const ITensorInfo *batch_splits_in, const ITensorInfo *scores_out, const ITensorInfo *boxes_out, const ITensorInfo *classes,
211  const ITensorInfo *batch_splits_out, const ITensorInfo *keeps, const ITensorInfo *keeps_size, const BoxNMSLimitInfo info)
212 {
213  ARM_COMPUTE_UNUSED(batch_splits_in, batch_splits_out, keeps, keeps_size, info);
214  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(scores_in, boxes_in, scores_out, boxes_out, classes);
216 
217  const bool is_qasymm8 = scores_in->data_type() == DataType::QASYMM8 || scores_in->data_type() == DataType::QASYMM8_SIGNED;
218  if(is_qasymm8)
219  {
223  const UniformQuantizationInfo boxes_qinfo = boxes_in->quantization_info().uniform();
224  ARM_COMPUTE_RETURN_ERROR_ON(boxes_qinfo.scale != 0.125f);
225  ARM_COMPUTE_RETURN_ERROR_ON(boxes_qinfo.offset != 0);
226  }
227 
228  return Status{};
229 }
230 
232 {
233  // Acquire all the temporaries
234  MemoryGroupResourceScope scope_mg(_memory_group);
235 
236  if(_is_qasymm8)
237  {
238  dequantize_tensor(_scores_in, &_scores_in_f32);
239  dequantize_tensor(_boxes_in, &_boxes_in_f32);
240  if(_batch_splits_in != nullptr)
241  {
242  dequantize_tensor(_batch_splits_in, &_batch_splits_in_f32);
243  }
244  }
245 
246  Scheduler::get().schedule(&_box_with_nms_limit_kernel, Window::DimY);
247 
248  if(_is_qasymm8)
249  {
250  quantize_tensor(&_scores_out_f32, _scores_out);
251  quantize_tensor(&_boxes_out_f32, _boxes_out);
252  quantize_tensor(&_classes_f32, _classes);
253  if(_batch_splits_out != nullptr)
254  {
255  quantize_tensor(&_batch_splits_out_f32, _batch_splits_out);
256  }
257  if(_keeps != nullptr)
258  {
259  quantize_tensor(&_keeps_f32, _keeps);
260  }
261  }
262 }
263 } // namespace arm_compute
const std::vector< int32_t > & offset() const
Offset vector accessor.
void configure(const ITensor *scores_in, const ITensor *boxes_in, const ITensor *batch_splits_in, ITensor *scores_out, ITensor *boxes_out, ITensor *classes, ITensor *batch_splits_out=nullptr, ITensor *keeps=nullptr, ITensor *keeps_size=nullptr, const BoxNMSLimitInfo info=BoxNMSLimitInfo())
Initialise the kernel's input and output tensors.
void init(const TensorAllocator &allocator, const Coordinates &coords, TensorInfo &sub_info)
Shares the same backing memory with another tensor allocator, while the tensor info might be differen...
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:545
uint8_t quantize_qasymm8(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given an unsigned 8-bit asymmetric quantization scheme.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352
BoxWithNonMaximaSuppressionLimit Information class.
Definition: Types.h:593
virtual DataType data_type() const =0
Data type used for each element of the tensor.
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:792
1 channel, 1 F32 per channel
Store the tensor's metadata.
Definition: ITensorInfo.h:40
Quantization info when assuming per layer quantization.
quantized, asymmetric fixed-point 16-bit number
Status class.
Definition: Error.h:52
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:296
Interface for NEON tensor.
Definition: ITensor.h:36
Copyright (c) 2017-2020 Arm Limited.
1 channel, 1 F16 per channel
TensorAllocator * allocator()
Return a pointer to the tensor's allocator.
Definition: Tensor.cpp:48
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
Definition: MemoryGroup.h:79
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
int8_t quantize_qasymm8_signed(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given a signed 8-bit asymmetric quantization scheme.
quantized, asymmetric fixed-point 8-bit number unsigned
void allocate() override
Allocate size specified by TensorInfo of CPU memory.
UniformQuantizationInfo uniform() const
Return per layer quantization info.
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
const std::vector< float > & scale() const
Scale vector accessor.
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(...)
Definition: Validate.h:610
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:163
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
Definition: Window.h:45
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:161
Memory group resources scope handling class.
Definition: IMemoryGroup.h:82
virtual void schedule(ICPPKernel *kernel, const Hints &hints)=0
Runs the kernel in the same thread as the caller synchronously.
CPPBoxWithNonMaximaSuppressionLimit(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Constructor.
const QuantizationInfo qinfo
Definition: Im2Col.cpp:150
float dequantize_qasymm8_signed(int8_t value, const INFO_TYPE &qinfo)
Dequantize a value given a signed 8-bit asymmetric quantization scheme.
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
Definition: Helpers.inl:128
float dequantize(uint8_t value, float scale, int32_t offset)
Dequantize a value given an 8-bit asymmetric quantization scheme.
quantized, asymmetric fixed-point 8-bit number signed
DataType
Available data types.
Definition: Types.h:77
uint16_t quantize_qasymm16(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given a 16-bit asymmetric quantization scheme.
Status validate(const ITensorInfo *scores_in, const ITensorInfo *boxes_in, const ITensorInfo *batch_splits_in, const ITensorInfo *scores_out, const ITensorInfo *boxes_out, const ITensorInfo *classes, const ITensorInfo *batch_splits_out, const ITensorInfo *keeps, const ITensorInfo *keeps_size, const BoxNMSLimitInfo info)
static IScheduler & get()
Access the scheduler singleton.
Definition: Scheduler.cpp:95
void configure(const ITensor *scores_in, const ITensor *boxes_in, const ITensor *batch_splits_in, ITensor *scores_out, ITensor *boxes_out, ITensor *classes, ITensor *batch_splits_out=nullptr, ITensor *keeps=nullptr, ITensor *keeps_size=nullptr, const BoxNMSLimitInfo info=BoxNMSLimitInfo())
Configure the BoxWithNonMaximaSuppressionLimit CPP kernel.