23.08
|
Go to the documentation of this file.
53 if(
dst->tensor_shape().total_size() > 0)
64 inline void store_result(T *ptr,
const float32x4x4_t &v)
70 inline void store_result<float>(
float *ptr,
const float32x4x4_t &v)
78 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
80 inline void store_result<float16_t>(float16_t *ptr,
const float32x4x4_t &v)
82 wrapper::vstore(ptr, vcombine_f16(vcvt_f16_f32(v.val[0]), vcvt_f16_f32(v.val[1])));
83 wrapper::vstore(ptr + 8, vcombine_f16(vcvt_f16_f32(v.val[2]), vcvt_f16_f32(v.val[3])));
88 inline void store_result(T *ptr,
const float32x4x2_t &v)
94 inline void store_result<float>(
float *ptr,
const float32x4x2_t &v)
100 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
102 inline void store_result<float16_t>(float16_t *ptr,
const float32x4x2_t &v)
104 wrapper::vstore(ptr, vcombine_f16(vcvt_f16_f32(v.val[0]), vcvt_f16_f32(v.val[1])));
108 template <
typename TOut,
typename TIn>
109 void run_dequantization_qasymm8(
const ITensor *
input, ITensor *output,
const Window &window)
111 const UniformQuantizationInfo &
qinfo =
input->info()->quantization_info().uniform();
115 const int window_step_x = 16;
116 const auto window_start_x =
static_cast<int>(window.x().start());
117 const auto window_end_x =
static_cast<int>(window.x().end());
120 Window win_collapsed = window.collapse_if_possible(window,
Window::DimZ);
121 win_collapsed.set(
Window::DimX, Window::Dimension(0, 1, 1));
124 Iterator in(
input, win_collapsed);
125 Iterator out(output, win_collapsed);
129 const auto in_ptr =
reinterpret_cast<const TIn *
>(in.ptr());
130 const auto out_ptr =
reinterpret_cast<TOut *
>(out.ptr());
132 int x = window_start_x;
133 for(; x <= (window_end_x - window_step_x); x += window_step_x)
138 store_result(
reinterpret_cast<TOut *
>(out_ptr + x), vdeq);
142 for(; x < window_end_x; ++x)
144 auto val = *(in_ptr + x);
151 template <
typename T>
152 void run_dequantization_qsymm8_per_channel_nchw(
const ITensor *
input, ITensor *output,
const Window &window)
154 const auto scale =
input->info()->quantization_info().scale();
156 const int window_step_x = 16;
157 const auto window_start_x =
static_cast<int>(window.x().start());
158 const auto window_end_x =
static_cast<int>(window.x().end());
165 Iterator in(
input, win);
166 Iterator out(output, win);
170 const auto in_ptr =
reinterpret_cast<const int8_t *
>(in.ptr());
171 const auto out_ptr =
reinterpret_cast<T *
>(out.ptr());
173 int x = window_start_x;
174 for(; x <= (window_end_x - window_step_x); x += window_step_x)
179 store_result<T>(
reinterpret_cast<T *
>(out_ptr + x), vdeq);
183 for(; x < window_end_x; ++x)
185 int8_t val = *(in_ptr + x);
192 template <
typename T>
193 void run_dequantization_qsymm8_per_channel_nhwc(
const ITensor *
input, ITensor *output,
const Window &window)
195 const auto scale =
input->info()->quantization_info().scale();
197 const int window_step_x = 16;
198 const auto window_start_x =
static_cast<int>(window.x().start());
199 const auto window_end_x =
static_cast<int>(window.x().end());
206 Iterator in(
input, win);
207 Iterator out(output, win);
211 const auto in_ptr =
reinterpret_cast<const int8_t *
>(in.ptr());
212 const auto out_ptr =
reinterpret_cast<T *
>(out.ptr());
214 int x = window_start_x;
215 for(; x <= (window_end_x - window_step_x); x += window_step_x)
217 const float32x4x4_t vscale =
229 store_result<T>(
reinterpret_cast<T *
>(out_ptr + x), vdeq);
233 for(; x < window_end_x; ++x)
235 int8_t val = *(in_ptr + x);
242 template <
typename T>
243 void run_dequantization_qsymm8(
const ITensor *
input, ITensor *output,
const Window &window)
245 const UniformQuantizationInfo &
qinfo =
input->info()->quantization_info().uniform();
248 const int window_step_x = 16;
249 const auto window_start_x =
static_cast<int>(window.x().start());
250 const auto window_end_x =
static_cast<int>(window.x().end());
253 Window win_collapsed = window.collapse_if_possible(window,
Window::DimZ);
254 win_collapsed.set(
Window::DimX, Window::Dimension(0, 1, 1));
257 Iterator in(
input, win_collapsed);
258 Iterator out(output, win_collapsed);
262 const auto in_ptr =
reinterpret_cast<const int8_t *
>(in.ptr());
263 const auto out_ptr =
reinterpret_cast<T *
>(out.ptr());
265 int x = window_start_x;
266 for(; x <= (window_end_x - window_step_x); x += window_step_x)
271 store_result<T>(
reinterpret_cast<T *
>(out_ptr + x), vdeq);
275 for(; x < window_end_x; ++x)
277 int8_t val = *(in_ptr + x);
284 template <
typename T>
285 void run_dequantization_qsymm16(
const ITensor *
input, ITensor *output,
const Window &window)
287 const UniformQuantizationInfo &
qinfo =
input->info()->quantization_info().uniform();
290 const int window_step_x = 8;
291 const auto window_start_x =
static_cast<int>(window.x().start());
292 const auto window_end_x =
static_cast<int>(window.x().end());
295 Window win_collapsed = window.collapse_if_possible(window,
Window::DimZ);
296 win_collapsed.set(
Window::DimX, Window::Dimension(0, 1, 1));
299 Iterator in(
input, win_collapsed);
300 Iterator out(output, win_collapsed);
304 const auto in_ptr =
reinterpret_cast<const int16_t *
>(in.ptr());
305 const auto out_ptr =
reinterpret_cast<T *
>(out.ptr());
307 int x = window_start_x;
308 for(; x <= (window_end_x - window_step_x); x += window_step_x)
313 store_result<T>(
reinterpret_cast<T *
>(out_ptr + x), vdeq);
317 for(; x < window_end_x; ++x)
319 int16_t val = *(in_ptr + x);
326 template <
typename T>
327 void run_dequantization_core(
const ITensor *
input, ITensor *output,
const Window &window)
329 switch(
input->info()->data_type())
332 run_dequantization_qasymm8<T, uint8_t>(
input, output, window);
335 run_dequantization_qasymm8<T, int8_t>(
input, output, window);
338 input->info()->data_layout() ==
DataLayout::NHWC ? run_dequantization_qsymm8_per_channel_nhwc<T>(
input, output, window) : run_dequantization_qsymm8_per_channel_nchw<T>(
input, output, window);
341 run_dequantization_qsymm8<T>(
input, output, window);
344 run_dequantization_qsymm16<T>(
input, output, window);
362 ICpuKernel::configure(win);
380 switch(
dst->info()->data_type())
385 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
396 return "CpuDequantizeKernel";
Class to describe a number of elements in each dimension.
const char * name() const override
Name of the kernel.
@ QSYMM8_PER_CHANNEL
quantized, symmetric per channel fixed-point 8-bit number
float32x4x2_t vdequantize(const uint8x8_t &qv, const UniformQuantizationInfo &qi)
Dequantize a neon vector holding 8 quantized values.
SimpleTensor< float > src
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
@ NHWC
Num samples, height, width, channels.
@ QASYMM8
quantized, asymmetric fixed-point 8-bit number unsigned
Status validate_arguments(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const PadStrideInfo &conv_info)
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
float32x4x2_t vdequantize_int16(const int16x8_t &qv, float scale)
Dequantize a neon vector holding 8 16-bit quantized values.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
@ QSYMM8
quantized, symmetric fixed-point 8-bit number
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
void configure(const ITensorInfo *src, ITensorInfo *dst)
Set input, output tensors.
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
uint8x16_t vloadq(const uint8_t *ptr)
Includes all wrapper headers at once.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
@ QSYMM16
quantized, symmetric fixed-point 16-bit number
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(tensor)
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
const std::vector< float > & scale() const
Scale vector accessor.
@ QASYMM8_SIGNED
quantized, asymmetric fixed-point 8-bit number signed
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
__global uchar * offset(const Image *img, int x, int y)
Get the pointer position of a Image.
float dequantize_qsymm16(int16_t value, const UniformQuantizationInfo &qinfo)
Dequantize a value given a 16-bit symmetric quantization scheme.
static float dequantize(QUANTIZED_TYPE value, const UniformQuantizationInfo &qinfo)
Dequantize a value given a 8-bit asymmetric quantization scheme.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
const Window & window() const
The maximum window the kernel can be executed on.
Information about executing thread and CPU.
void vstore(uint8_t *ptr, uint8x8_t val)
Describe a multidimensional execution window.
Copyright (c) 2017-2023 Arm Limited.
@ F16
16-bit floating-point number
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
float dequantize(uint8_t value, float scale, int32_t offset)
Dequantize a value given an 8-bit asymmetric quantization scheme.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
Store the tensor's metadata.
@ F32
32-bit floating-point number
static Status validate(const ITensorInfo *src, const ITensorInfo *dst)
Static function to check if given info will lead to a valid configuration.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
const QuantizationInfo qinfo
const std::vector< int32_t > & offset() const
Offset vector accessor.