Compute Library
 21.08
add.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_WRAPPER_ADD_H
25 #define ARM_COMPUTE_WRAPPER_ADD_H
26 
27 #include <arm_neon.h>
28 
29 namespace arm_compute
30 {
31 namespace wrapper
32 {
33 #define VADD_IMPL(stype, vtype, prefix, postfix) \
34  inline vtype vadd(const vtype &a, const vtype &b) \
35  { \
36  return prefix##_##postfix(a, b); \
37  }
38 
39 VADD_IMPL(uint8x8_t, uint8x8_t, vadd, u8)
40 VADD_IMPL(int8x8_t, int8x8_t, vadd, s8)
41 VADD_IMPL(uint16x4_t, uint16x4_t, vadd, u16)
42 VADD_IMPL(int16x4_t, int16x4_t, vadd, s16)
43 VADD_IMPL(uint32x2_t, uint32x2_t, vadd, u32)
44 VADD_IMPL(int32x2_t, int32x2_t, vadd, s32)
45 VADD_IMPL(uint64x1_t, uint64x1_t, vadd, u64)
46 VADD_IMPL(int64x1_t, int64x1_t, vadd, s64)
47 VADD_IMPL(float32x2_t, float32x2_t, vadd, f32)
48 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
49 VADD_IMPL(float16x4_t, float16x4_t, vadd, f16)
50 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
51 
52 VADD_IMPL(uint8x16_t, uint8x16_t, vaddq, u8)
53 VADD_IMPL(int8x16_t, int8x16_t, vaddq, s8)
54 VADD_IMPL(uint16x8_t, uint16x8_t, vaddq, u16)
55 VADD_IMPL(int16x8_t, int16x8_t, vaddq, s16)
56 VADD_IMPL(uint32x4_t, uint32x4_t, vaddq, u32)
57 VADD_IMPL(int32x4_t, int32x4_t, vaddq, s32)
58 VADD_IMPL(uint64x2_t, uint64x2_t, vaddq, u64)
59 VADD_IMPL(int64x2_t, int64x2_t, vaddq, s64)
60 VADD_IMPL(float32x4_t, float32x4_t, vaddq, f32)
61 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
62 VADD_IMPL(float16x8_t, float16x8_t, vaddq, f16)
63 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
64 #undef VADD_IMPL
65 
66 // VQADD: Vector saturating add (No notion of saturation for floating point)
67 #define VQADD_IMPL(stype, vtype, prefix, postfix) \
68  inline vtype vqadd(const vtype &a, const vtype &b) \
69  { \
70  return prefix##_##postfix(a, b); \
71  }
72 
73 VQADD_IMPL(uint8x8_t, uint8x8_t, vqadd, u8)
74 VQADD_IMPL(int8x8_t, int8x8_t, vqadd, s8)
75 VQADD_IMPL(uint16x4_t, uint16x4_t, vqadd, u16)
76 VQADD_IMPL(int16x4_t, int16x4_t, vqadd, s16)
77 VQADD_IMPL(uint32x2_t, uint32x2_t, vqadd, u32)
78 VQADD_IMPL(int32x2_t, int32x2_t, vqadd, s32)
79 VQADD_IMPL(uint64x1_t, uint64x1_t, vqadd, u64)
80 VQADD_IMPL(int64x1_t, int64x1_t, vqadd, s64)
81 VQADD_IMPL(float32x2_t, float32x2_t, vadd, f32)
82 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
83 VQADD_IMPL(float16x4_t, float16x4_t, vadd, f16)
84 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
85 
86 VQADD_IMPL(uint8x16_t, uint8x16_t, vqaddq, u8)
87 VQADD_IMPL(int8x16_t, int8x16_t, vqaddq, s8)
88 VQADD_IMPL(uint16x8_t, uint16x8_t, vqaddq, u16)
89 VQADD_IMPL(int16x8_t, int16x8_t, vqaddq, s16)
90 VQADD_IMPL(uint32x4_t, uint32x4_t, vqaddq, u32)
91 VQADD_IMPL(int32x4_t, int32x4_t, vqaddq, s32)
92 VQADD_IMPL(uint64x2_t, uint64x2_t, vqaddq, u64)
93 VQADD_IMPL(int64x2_t, int64x2_t, vqaddq, s64)
94 VQADD_IMPL(float32x4_t, float32x4_t, vaddq, f32)
95 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
96 VQADD_IMPL(float16x8_t, float16x8_t, vaddq, f16)
97 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
98 #undef VQADD_IMPL
99 
100 // VADDW: Vector widening add
101 #define VADDW_IMPL(wtype, vtype, prefix, postfix) \
102  inline wtype vaddw(const wtype &a, const vtype &b) \
103  { \
104  return prefix##_##postfix(a, b); \
105  }
106 
107 VADDW_IMPL(uint16x8_t, uint8x8_t, vaddw, u8)
108 VADDW_IMPL(int16x8_t, int8x8_t, vaddw, s8)
109 VADDW_IMPL(uint32x4_t, uint16x4_t, vaddw, u16)
110 VADDW_IMPL(int32x4_t, int16x4_t, vaddw, s16)
111 VADDW_IMPL(uint64x2_t, uint32x2_t, vaddw, u32)
112 VADDW_IMPL(int64x2_t, int32x2_t, vaddw, s32)
113 #undef VADDW_IMPL
114 
115 // VADDL: Vector long add
116 #define VADDL_IMPL(wtype, vtype, prefix, postfix) \
117  inline wtype vaddl(const vtype &a, const vtype &b) \
118  { \
119  return prefix##_##postfix(a, b); \
120  }
121 
122 VADDL_IMPL(uint16x8_t, uint8x8_t, vaddl, u8)
123 VADDL_IMPL(int16x8_t, int8x8_t, vaddl, s8)
124 VADDL_IMPL(uint32x4_t, uint16x4_t, vaddl, u16)
125 VADDL_IMPL(int32x4_t, int16x4_t, vaddl, s16)
126 VADDL_IMPL(uint64x2_t, uint32x2_t, vaddl, u32)
127 VADDL_IMPL(int64x2_t, int32x2_t, vaddl, s32)
128 #undef VADDL_IMPL
129 
130 #if defined(__aarch64__)
131 // VADDV: Across vector add
132 #define VADDV_IMPL(stype, vtype, prefix, postfix) \
133  inline stype vaddv(const vtype &a) \
134  { \
135  return prefix##_##postfix(a); \
136  }
137 
138 VADDV_IMPL(uint8_t, uint8x8_t, vaddv, u8)
139 VADDV_IMPL(int8_t, int8x8_t, vaddv, s8)
140 VADDV_IMPL(uint16_t, uint16x4_t, vaddv, u16)
141 VADDV_IMPL(int16_t, int16x4_t, vaddv, s16)
142 VADDV_IMPL(uint32_t, uint32x2_t, vaddv, u32)
143 VADDV_IMPL(int32_t, int32x2_t, vaddv, s32)
144 VADDV_IMPL(float, float32x2_t, vaddv, f32)
145 
146 VADDV_IMPL(uint8_t, uint8x16_t, vaddvq, u8)
147 VADDV_IMPL(int8_t, int8x16_t, vaddvq, s8)
148 VADDV_IMPL(uint16_t, uint16x8_t, vaddvq, u16)
149 VADDV_IMPL(int16_t, int16x8_t, vaddvq, s16)
150 VADDV_IMPL(uint32_t, uint32x4_t, vaddvq, u32)
151 VADDV_IMPL(int32_t, int32x4_t, vaddvq, s32)
152 VADDV_IMPL(uint64_t, uint64x2_t, vaddvq, u64)
153 VADDV_IMPL(int64_t, int64x2_t, vaddvq, s64)
154 VADDV_IMPL(float, float32x4_t, vaddvq, f32)
155 #undef VADDV_IMPL
156 #endif // defined(__aarch64__)
157 
158 // VPADDL: Signed add long pairwise
159 #define VPADDL_IMPL(ltype, vtype, prefix, postfix) \
160  inline ltype vpaddl(const vtype &a) \
161  { \
162  return prefix##_##postfix(a); \
163  }
164 
165 VPADDL_IMPL(uint16x4_t, uint8x8_t, vpaddl, u8)
166 VPADDL_IMPL(int16x4_t, int8x8_t, vpaddl, s8)
167 VPADDL_IMPL(uint32x2_t, uint16x4_t, vpaddl, u16)
168 VPADDL_IMPL(int32x2_t, int16x4_t, vpaddl, s16)
169 VPADDL_IMPL(uint64x1_t, uint32x2_t, vpaddl, u32)
170 VPADDL_IMPL(int64x1_t, int32x2_t, vpaddl, s32)
171 
172 VPADDL_IMPL(uint16x8_t, uint8x16_t, vpaddlq, u8)
173 VPADDL_IMPL(int16x8_t, int8x16_t, vpaddlq, s8)
174 VPADDL_IMPL(uint32x4_t, uint16x8_t, vpaddlq, u16)
175 VPADDL_IMPL(int32x4_t, int16x8_t, vpaddlq, s16)
176 VPADDL_IMPL(uint64x2_t, uint32x4_t, vpaddlq, u32)
177 VPADDL_IMPL(int64x2_t, int32x4_t, vpaddlq, s32)
178 #undef VPADDL_IMPL
179 
180 // VPADD: Add pairwise
181 #define VPADD_IMPL(stype, vtype, prefix, postfix) \
182  inline vtype vpadd(const vtype &a, const vtype &b) \
183  { \
184  return prefix##_##postfix(a, b); \
185  }
186 
187 VPADD_IMPL(uint8x8_t, uint8x8_t, vpadd, u8)
188 VPADD_IMPL(int8x8_t, int8x8_t, vpadd, s8)
189 VPADD_IMPL(uint16x4_t, uint16x4_t, vpadd, u16)
190 VPADD_IMPL(int16x4_t, int16x4_t, vpadd, s16)
191 VPADD_IMPL(uint32x2_t, uint32x2_t, vpadd, u32)
192 VPADD_IMPL(int32x2_t, int32x2_t, vpadd, s32)
193 VPADD_IMPL(float32x2_t, float32x2_t, vpadd, f32)
194 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
195 VPADD_IMPL(float16x4_t, float16x4_t, vpadd, f16)
196 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
197 
198 #undef VPADD_IMPL
199 } // namespace wrapper
200 } // namespace arm_compute
201 #endif /* ARM_COMPUTE_WRAPPER_ADD_H */
#define VADD_IMPL(stype, vtype, prefix, postfix)
Definition: add.h:33
uint8x8_t vadd(const uint8x8_t &a, const uint8x8_t &b)
Definition: add.h:39
#define VPADDL_IMPL(ltype, vtype, prefix, postfix)
Definition: add.h:159
#define VQADD_IMPL(stype, vtype, prefix, postfix)
Definition: add.h:67
Copyright (c) 2017-2021 Arm Limited.
uint8x8_t vpadd(const uint8x8_t &a, const uint8x8_t &b)
Definition: add.h:187
#define VADDL_IMPL(wtype, vtype, prefix, postfix)
Definition: add.h:116
uint8x8_t vqadd(const uint8x8_t &a, const uint8x8_t &b)
Definition: add.h:73
uint16x8_t vaddl(const uint8x8_t &a, const uint8x8_t &b)
Definition: add.h:122
#define VPADD_IMPL(stype, vtype, prefix, postfix)
Definition: add.h:181
uint16x8_t vaddw(const uint16x8_t &a, const uint8x8_t &b)
Definition: add.h:107
#define VADDW_IMPL(wtype, vtype, prefix, postfix)
Definition: add.h:101
uint16x4_t vpaddl(const uint8x8_t &a)
Definition: add.h:165