Compute Library
 22.11
qasymm8_signed.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020-2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
27 
28 #include <cmath>
29 #include <cstddef>
30 
31 #include "src/core/NEON/SVEAsymm.h"
32 #include "src/core/NEON/SVEMath.h"
33 #include <arm_sve.h>
34 
35 namespace arm_compute
36 {
37 namespace cpu
38 {
39 void sve2_qasymm8_signed_activation(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window)
40 {
41  const auto window_start_x = static_cast<int>(window.x().start());
42  const auto window_end_x = static_cast<int>(window.x().end());
44 
45  Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
46  win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
47 
48  Iterator input(src, win_collapsed);
49  Iterator output(dst, win_collapsed);
50 
51  const UniformQuantizationInfo qi_in = src->info()->quantization_info().uniform();
52  const UniformQuantizationInfo qi_out = dst->info()->quantization_info().uniform();
53  const auto va = svdup_n_s8(quantize_qasymm8_signed(act_info.a(), qi_in));
54  const auto vb = svdup_n_s8(quantize_qasymm8_signed(act_info.b(), qi_in));
55  const auto const_0 = quantize_qasymm8_signed(0.f, qi_in);
56  const auto vconst_0 = svdup_n_s8(const_0);
57  const auto vconst_1 = svdup_n_f32(1.f);
58  const auto va_f32 = svdup_n_f32(act_info.a());
59  const auto vb_f32 = svdup_n_f32(act_info.b());
60  const auto const_6_f32 = svdup_n_f32(6.f);
61  const auto const_0_f32 = svdup_n_f32(0.f);
62  const auto const_3_f32 = svdup_n_f32(3.f);
63  const auto const_inv_6_f32 = svdup_n_f32(0.166666667f);
64 
65  // Initialise scale/offset for re-quantization
66  bool requant = true;
67  if(qi_in.scale == qi_out.scale && qi_in.offset == qi_out.offset)
68  {
69  requant = false;
70  }
71  float s = qi_in.scale / qi_out.scale;
72  float o = -qi_in.offset * s + qi_out.offset;
73  auto vs = svdup_n_f32(s);
74  auto vo = svdup_n_f32(o);
75 
76  // Initialise scale/offset for re-quantization with int32_t
77  const auto voffset_in = svdup_n_s32(qi_in.offset);
78  int32_t s_s32 = round(s * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
79  int32_t o_s32 = round(o * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
80  const auto vs_s32 = svdup_n_s32(s_s32);
81  const auto vo_s32 = svdup_n_s32(o_s32);
82 
83  // Initialise scale/offset for re-quantization for leaky relu
84  int32_t s_leaky_s32 = round(s * act_info.a() * (1 << 8), arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
85  int32_t o_leaky_s32 = round((-qi_in.offset * s * act_info.a() + qi_out.offset) * (1 << 8),
87  const auto vs_leaky_s32 = svdup_n_s32(s_leaky_s32);
88  const auto vo_leaky_s32 = svdup_n_s32(o_leaky_s32);
89 
90  execute_window_loop(win_collapsed, [&](const Coordinates &)
91  {
92  const auto input_ptr = reinterpret_cast<const int8_t *>(input.ptr());
93  const auto output_ptr = reinterpret_cast<int8_t *>(output.ptr());
94 
95  svint8_t tmp;
96 
97  int x = window_start_x;
98  svbool_t pg = svwhilelt_b8(x, window_end_x);
99  do
100  {
101  const auto vin = svld1_s8(pg, input_ptr + x);
103  {
104  // Perform activation
105  tmp = svmax_s8_z(pg, vconst_0, vin);
106  // Re-quantize to new output space
107  tmp = requant ? svmla_qasymm8_signed_z(pg, tmp, vs, vo) : tmp;
108  }
110  {
111  // Perform activation
112  tmp = svmin_s8_z(pg, va, svmax_s8_z(pg, vconst_0, vin));
113  // Re-quantize to new output space
114  tmp = requant ? svmla_qasymm8_signed_z(pg, tmp, vs, vo) : tmp;
115  }
117  {
118  // Perform activation
119  tmp = svmin_s8_z(pg, va, svmax_s8_z(pg, vb, vin));
120  // Re-quantize to new output space
121  tmp = requant ? svmla_qasymm8_signed_z(pg, tmp, vs, vo) : tmp;
122  }
124  {
125  // De-quantize
126  const auto vin_deq = svdequantize_z(pg, vin, qi_in);
127  // Perform activation
128  const svfloat32x4_t tmp_dep = svcreate4_f32(
129  svdiv_f32_z(pg, vconst_1, svadd_f32_z(pg, vconst_1, svexp_f32_z(pg, svneg_f32_z(pg, svget4_f32(vin_deq, 0))))),
130  svdiv_f32_z(pg, vconst_1, svadd_f32_z(pg, vconst_1, svexp_f32_z(pg, svneg_f32_z(pg, svget4_f32(vin_deq, 1))))),
131  svdiv_f32_z(pg, vconst_1, svadd_f32_z(pg, vconst_1, svexp_f32_z(pg, svneg_f32_z(pg, svget4_f32(vin_deq, 2))))),
132  svdiv_f32_z(pg, vconst_1, svadd_f32_z(pg, vconst_1, svexp_f32_z(pg, svneg_f32_z(pg, svget4_f32(vin_deq, 3))))));
133  // Re-quantize to new output space
134  tmp = svquantize_signed_z(pg, tmp_dep, qi_out);
135  }
137  {
138  // De-quantize
139  const auto vin_deq = svdequantize_z(pg, vin, qi_in);
140  // Perform activation
141  const svfloat32x4_t tmp_dep = svcreate4_f32(
142  svmul_f32_z(pg, va_f32, svtanh_f32_z(pg, svmul_f32_z(pg, svget4_f32(vin_deq, 0), vb_f32))),
143  svmul_f32_z(pg, va_f32, svtanh_f32_z(pg, svmul_f32_z(pg, svget4_f32(vin_deq, 1), vb_f32))),
144  svmul_f32_z(pg, va_f32, svtanh_f32_z(pg, svmul_f32_z(pg, svget4_f32(vin_deq, 2), vb_f32))),
145  svmul_f32_z(pg, va_f32, svtanh_f32_z(pg, svmul_f32_z(pg, svget4_f32(vin_deq, 3), vb_f32))));
146  // Re-quantize to new output space
147  tmp = svquantize_signed_z(pg, tmp_dep, qi_out);
148  }
150  {
151  // De-quantize
152  const auto vin_deq = svdequantize_z(pg, vin, qi_in);
153  // Perform activation
154  const svfloat32x4_t tmp_dep = svcreate4_f32(
155  svmul_f32_z(pg, svget4_f32(vin_deq, 0), svmul_f32_z(pg, const_inv_6_f32, svmin_f32_z(pg, const_6_f32, svmax_f32_z(pg, const_0_f32, svadd_f32_z(pg, svget4_f32(vin_deq, 0), const_3_f32))))),
156  svmul_f32_z(pg, svget4_f32(vin_deq, 1), svmul_f32_z(pg, const_inv_6_f32, svmin_f32_z(pg, const_6_f32, svmax_f32_z(pg, const_0_f32, svadd_f32_z(pg, svget4_f32(vin_deq, 1), const_3_f32))))),
157  svmul_f32_z(pg, svget4_f32(vin_deq, 2), svmul_f32_z(pg, const_inv_6_f32, svmin_f32_z(pg, const_6_f32, svmax_f32_z(pg, const_0_f32, svadd_f32_z(pg, svget4_f32(vin_deq, 2), const_3_f32))))),
158  svmul_f32_z(pg, svget4_f32(vin_deq, 3), svmul_f32_z(pg, const_inv_6_f32, svmin_f32_z(pg, const_6_f32, svmax_f32_z(pg, const_0_f32, svadd_f32_z(pg, svget4_f32(vin_deq, 3), const_3_f32))))));
159  // Re-quantize to new output space
160  tmp = svquantize_signed_z(pg, tmp_dep, qi_out);
161  }
163  {
164  svbool_t p0, p1, p2, p3;
165  svint32x4_t tmp_dep;
166 
167  // Expand to int32
168  const svint32x4_t vin_s32 = svcreate4_s32(
169  svmovlb_s32(svmovlb_s16(vin)),
170  svmovlt_s32(svmovlb_s16(vin)),
171  svmovlb_s32(svmovlt_s16(vin)),
172  svmovlt_s32(svmovlt_s16(vin)));
173 
174  // Compare elements to input offset
175  if(qi_in.scale >= 0)
176  {
177  p0 = svcmplt_s32(pg, svget4_s32(vin_s32, 0), voffset_in);
178  p1 = svcmplt_s32(pg, svget4_s32(vin_s32, 1), voffset_in);
179  p2 = svcmplt_s32(pg, svget4_s32(vin_s32, 2), voffset_in);
180  p3 = svcmplt_s32(pg, svget4_s32(vin_s32, 3), voffset_in);
181  }
182  else
183  {
184  p0 = svcmpgt_s32(pg, svget4_s32(vin_s32, 0), voffset_in);
185  p1 = svcmpgt_s32(pg, svget4_s32(vin_s32, 1), voffset_in);
186  p2 = svcmpgt_s32(pg, svget4_s32(vin_s32, 2), voffset_in);
187  p3 = svcmpgt_s32(pg, svget4_s32(vin_s32, 3), voffset_in);
188  }
189 
190  // Multiply negative elements and requantize if necessary
191  if(requant)
192  {
193  tmp_dep = svcreate4_s32(
194  svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p0, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 0), svsel(p0, vs_leaky_s32, vs_s32)), 8),
195  svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p1, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 1), svsel(p1, vs_leaky_s32, vs_s32)), 8),
196  svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p2, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 2), svsel(p2, vs_leaky_s32, vs_s32)), 8),
197  svasr_n_s32_m(pg, svmla_s32_m(pg, svsel(p3, vo_leaky_s32, vo_s32), svget4_s32(vin_s32, 3), svsel(p3, vs_leaky_s32, vs_s32)), 8));
198  }
199  else
200  {
201  tmp_dep = svcreate4_s32(
202  svasr_n_s32_m(p0, svmad_s32_m(p0, svget4_s32(vin_s32, 0), vs_leaky_s32, vo_leaky_s32), 8),
203  svasr_n_s32_m(p1, svmad_s32_m(p1, svget4_s32(vin_s32, 1), vs_leaky_s32, vo_leaky_s32), 8),
204  svasr_n_s32_m(p2, svmad_s32_m(p2, svget4_s32(vin_s32, 2), vs_leaky_s32, vo_leaky_s32), 8),
205  svasr_n_s32_m(p3, svmad_s32_m(p3, svget4_s32(vin_s32, 3), vs_leaky_s32, vo_leaky_s32), 8));
206  }
207 
208  // Convert uint32 vectors to uint16 vectors (with saturation)
209  const auto v_low_s16 = svqxtnt_s32(svqxtnb_s32(svget4_s32(tmp_dep, 0)), svget4_s32(tmp_dep, 1));
210  const auto v_high_s16 = svqxtnt_s32(svqxtnb_s32(svget4_s32(tmp_dep, 2)), svget4_s32(tmp_dep, 3));
211 
212  // convert uint16 vectors to uint8 vectors (with saturation)
213  tmp = svqxtnt_s16(svqxtnb_s16(v_low_s16), v_high_s16);
214  }
215  else
216  {
217  ARM_COMPUTE_ERROR("Unsupported activation function");
218  }
219 
220  svst1_s8(pg, output_ptr + x, tmp);
221 
222  x += svcntb();
223  pg = svwhilelt_b8(x, window_end_x);
224 
225  }
226  while(svptest_any(svptrue_b8(), pg));
227  },
228  input, output);
229 }
230 } // namespace cpu
231 } // namespace arm_compute
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352
float a() const
Get the alpha value.
Definition: Types.h:1684
Quantization info when assuming per layer quantization.
Describe one of the image&#39;s dimensions with a start, end and step.
Definition: Window.h:79
void sve2_qasymm8_signed_activation(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window)
Activation Layer Information class.
Definition: Types.h:1639
Interface for CPU tensor.
Definition: ITensor.h:36
SimpleTensor< float > src
Definition: DFT.cpp:155
Copyright (c) 2017-2022 Arm Limited.
ActivationFunction
Available activation functions.
Definition: Types.h:1643
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Definition: Window.h:43
Window collapse_if_possible(const Window &full_window, size_t first, size_t last, bool *has_collapsed=nullptr) const
Collapse the dimensions between first and last if possible.
Definition: Window.inl:68
int8_t quantize_qasymm8_signed(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given a signed 8-bit asymmetric quantization scheme.
Coordinates of an item.
Definition: Coordinates.h:37
UniformQuantizationInfo uniform() const
Return per layer quantization info.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.
constexpr uint8_t * ptr() const
Return a pointer to the current pixel.
Definition: Helpers.inl:139
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
Definition: Window.inl:49
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
int round(float x, RoundingPolicy rounding_policy)
Return a rounded value of x.
Definition: Rounding.cpp:35
Rounds to nearest value; half rounds to nearest even.
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
Definition: Window.h:47
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
Definition: Helpers.inl:77
ActivationFunction activation() const
Get the type of activation function.
Definition: Types.h:1679
float b() const
Get the beta value.
Definition: Types.h:1689
Includes all wrapper headers at once.
constexpr int end() const
Return the end of the dimension.
Definition: Window.h:102
Iterator updated by execute_window_loop for each window element.
Definition: Helpers.h:46
constexpr int start() const
Return the start of the dimension.
Definition: Window.h:97
Describe a multidimensional execution window.
Definition: Window.h:39
constexpr const Dimension & x() const
Alias to access the first dimension of the window.
Definition: Window.h:159