24.02.1
|
Go to the documentation of this file.
56 if (
dst->tensor_shape().total_size() > 0)
67 inline void store_result(T *ptr,
const float32x4x4_t &v)
73 inline void store_result<float>(
float *ptr,
const float32x4x4_t &v)
81 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
83 inline void store_result<float16_t>(float16_t *ptr,
const float32x4x4_t &v)
85 wrapper::vstore(ptr, vcombine_f16(vcvt_f16_f32(v.val[0]), vcvt_f16_f32(v.val[1])));
86 wrapper::vstore(ptr + 8, vcombine_f16(vcvt_f16_f32(v.val[2]), vcvt_f16_f32(v.val[3])));
91 inline void store_result(T *ptr,
const float32x4x2_t &v)
97 inline void store_result<float>(
float *ptr,
const float32x4x2_t &v)
103 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
105 inline void store_result<float16_t>(float16_t *ptr,
const float32x4x2_t &v)
107 wrapper::vstore(ptr, vcombine_f16(vcvt_f16_f32(v.val[0]), vcvt_f16_f32(v.val[1])));
111 template <
typename TOut,
typename TIn>
112 void run_dequantization_qasymm8(
const ITensor *
input, ITensor *output,
const Window &window)
114 const UniformQuantizationInfo &
qinfo =
input->info()->quantization_info().uniform();
118 const int window_step_x = 16;
119 const auto window_start_x =
static_cast<int>(window.x().start());
120 const auto window_end_x =
static_cast<int>(window.x().end());
123 Window win_collapsed = window.collapse_if_possible(window,
Window::DimZ);
124 win_collapsed.set(
Window::DimX, Window::Dimension(0, 1, 1));
127 Iterator in(
input, win_collapsed);
128 Iterator out(output, win_collapsed);
132 [&](
const Coordinates &)
134 const auto in_ptr =
reinterpret_cast<const TIn *
>(in.ptr());
135 const auto out_ptr =
reinterpret_cast<TOut *
>(out.ptr());
137 int x = window_start_x;
138 for (; x <= (window_end_x - window_step_x); x += window_step_x)
143 store_result(
reinterpret_cast<TOut *
>(out_ptr + x), vdeq);
147 for (; x < window_end_x; ++x)
149 auto val = *(in_ptr + x);
156 template <
typename T>
157 void run_dequantization_qsymm8_per_channel_nchw(
const ITensor *
input, ITensor *output,
const Window &window)
159 const auto scale =
input->info()->quantization_info().scale();
161 const int window_step_x = 16;
162 const auto window_start_x =
static_cast<int>(window.x().start());
163 const auto window_end_x =
static_cast<int>(window.x().end());
170 Iterator in(
input, win);
171 Iterator out(output, win);
175 [&](
const Coordinates &
id)
177 const auto in_ptr =
reinterpret_cast<const int8_t *
>(in.ptr());
178 const auto out_ptr =
reinterpret_cast<T *
>(out.ptr());
180 int x = window_start_x;
181 for (; x <= (window_end_x - window_step_x); x += window_step_x)
186 store_result<T>(
reinterpret_cast<T *
>(out_ptr + x), vdeq);
190 for (; x < window_end_x; ++x)
192 int8_t val = *(in_ptr + x);
199 template <
typename T>
200 void run_dequantization_qsymm8_per_channel_nhwc(
const ITensor *
input, ITensor *output,
const Window &window)
202 const auto scale =
input->info()->quantization_info().scale();
204 const int window_step_x = 16;
205 const auto window_start_x =
static_cast<int>(window.x().start());
206 const auto window_end_x =
static_cast<int>(window.x().end());
213 Iterator in(
input, win);
214 Iterator out(output, win);
218 [&](
const Coordinates &)
220 const auto in_ptr =
reinterpret_cast<const int8_t *
>(in.ptr());
221 const auto out_ptr =
reinterpret_cast<T *
>(out.ptr());
223 int x = window_start_x;
224 for (; x <= (window_end_x - window_step_x); x += window_step_x)
233 store_result<T>(
reinterpret_cast<T *
>(out_ptr + x), vdeq);
237 for (; x < window_end_x; ++x)
239 int8_t val = *(in_ptr + x);
246 template <
typename T>
247 void run_dequantization_qsymm8(
const ITensor *
input, ITensor *output,
const Window &window)
249 const UniformQuantizationInfo &
qinfo =
input->info()->quantization_info().uniform();
252 const int window_step_x = 16;
253 const auto window_start_x =
static_cast<int>(window.x().start());
254 const auto window_end_x =
static_cast<int>(window.x().end());
257 Window win_collapsed = window.collapse_if_possible(window,
Window::DimZ);
258 win_collapsed.set(
Window::DimX, Window::Dimension(0, 1, 1));
261 Iterator in(
input, win_collapsed);
262 Iterator out(output, win_collapsed);
266 [&](
const Coordinates &)
268 const auto in_ptr =
reinterpret_cast<const int8_t *
>(in.ptr());
269 const auto out_ptr =
reinterpret_cast<T *
>(out.ptr());
271 int x = window_start_x;
272 for (; x <= (window_end_x - window_step_x); x += window_step_x)
277 store_result<T>(
reinterpret_cast<T *
>(out_ptr + x), vdeq);
281 for (; x < window_end_x; ++x)
283 int8_t val = *(in_ptr + x);
290 template <
typename T>
291 void run_dequantization_qsymm16(
const ITensor *
input, ITensor *output,
const Window &window)
293 const UniformQuantizationInfo &
qinfo =
input->info()->quantization_info().uniform();
296 const int window_step_x = 8;
297 const auto window_start_x =
static_cast<int>(window.x().start());
298 const auto window_end_x =
static_cast<int>(window.x().end());
301 Window win_collapsed = window.collapse_if_possible(window,
Window::DimZ);
302 win_collapsed.set(
Window::DimX, Window::Dimension(0, 1, 1));
305 Iterator in(
input, win_collapsed);
306 Iterator out(output, win_collapsed);
310 [&](
const Coordinates &)
312 const auto in_ptr =
reinterpret_cast<const int16_t *
>(in.ptr());
313 const auto out_ptr =
reinterpret_cast<T *
>(out.ptr());
315 int x = window_start_x;
316 for (; x <= (window_end_x - window_step_x); x += window_step_x)
321 store_result<T>(
reinterpret_cast<T *
>(out_ptr + x), vdeq);
325 for (; x < window_end_x; ++x)
327 int16_t val = *(in_ptr + x);
334 template <
typename T>
335 void run_dequantization_core(
const ITensor *
input, ITensor *output,
const Window &window)
337 switch (
input->info()->data_type())
340 run_dequantization_qasymm8<T, uint8_t>(
input, output, window);
343 run_dequantization_qasymm8<T, int8_t>(
input, output, window);
347 ? run_dequantization_qsymm8_per_channel_nhwc<T>(
input, output, window)
348 : run_dequantization_qsymm8_per_channel_nchw<T>(
input, output, window);
351 run_dequantization_qsymm8<T>(
input, output, window);
354 run_dequantization_qsymm16<T>(
input, output, window);
372 ICpuKernel::configure(win);
390 switch (
dst->info()->data_type())
395 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
406 return "CpuDequantizeKernel";
Class to describe a number of elements in each dimension.
const char * name() const override
Name of the kernel.
@ QSYMM8_PER_CHANNEL
quantized, symmetric per channel fixed-point 8-bit number
float32x4x2_t vdequantize(const uint8x8_t &qv, const UniformQuantizationInfo &qi)
Dequantize a neon vector holding 8 quantized values.
SimpleTensor< float > src
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
@ NHWC
Num samples, height, width, channels.
@ QASYMM8
quantized, asymmetric fixed-point 8-bit number unsigned
Status validate_arguments(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const PadStrideInfo &conv_info)
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
float32x4x2_t vdequantize_int16(const int16x8_t &qv, float scale)
Dequantize a neon vector holding 8 16-bit quantized values.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
@ QSYMM8
quantized, symmetric fixed-point 8-bit number
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
void configure(const ITensorInfo *src, ITensorInfo *dst)
Set input, output tensors.
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
uint8x16_t vloadq(const uint8_t *ptr)
Includes all wrapper headers at once.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
@ QSYMM16
quantized, symmetric fixed-point 16-bit number
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&...iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(tensor)
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
const std::vector< float > & scale() const
Scale vector accessor.
@ QASYMM8_SIGNED
quantized, asymmetric fixed-point 8-bit number signed
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
__global uchar * offset(const Image *img, int x, int y)
Get the pointer position of a Image.
float dequantize_qsymm16(int16_t value, const UniformQuantizationInfo &qinfo)
Dequantize a value given a 16-bit symmetric quantization scheme.
static float dequantize(QUANTIZED_TYPE value, const UniformQuantizationInfo &qinfo)
Dequantize a value given a 8-bit asymmetric quantization scheme.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
const Window & window() const
The maximum window the kernel can be executed on.
Information about executing thread and CPU.
void vstore(uint8_t *ptr, uint8x8_t val)
Describe a multidimensional execution window.
Copyright (c) 2017-2024 Arm Limited.
@ F16
16-bit floating-point number
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
float dequantize(uint8_t value, float scale, int32_t offset)
Dequantize a value given an 8-bit asymmetric quantization scheme.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
Store the tensor's metadata.
@ F32
32-bit floating-point number
static Status validate(const ITensorInfo *src, const ITensorInfo *dst)
Static function to check if given info will lead to a valid configuration.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
const QuantizationInfo qinfo
const std::vector< int32_t > & offset() const
Offset vector accessor.