This document is complementary to the main Arm C Language Extensions (ACLE) specification, which can be found on the ACLE project on GitHub.
For the latest release of this document, see the ACLE project on GitHub.
Please report defects in this specification to the issue tracker page on GitHub.
This work is licensed under the Creative Commons Attribution-ShareAlike 4.0 International License. To view a copy of this license, visit http://creativecommons.org/licenses/by-sa/4.0/ or send a letter to Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
Grant of Patent License. Subject to the terms and conditions of this license (both the Public License and this Patent License), each Licensor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Licensed Material, where such license applies only to those patent claims licensable by such Licensor that are necessarily infringed by their contribution(s) alone or by combination of their contribution(s) with the Licensed Material to which such contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Licensed Material or a contribution incorporated within the Licensed Material constitutes direct or contributory patent infringement, then any licenses granted to You under this license for that Licensed Material shall terminate as of the date such litigation is filed.
As identified more fully in the License section, this project is licensed under CC-BY-SA-4.0 along with an additional patent license. The language in the additional patent license is largely identical to that in Apache-2.0 (specifically, Section 3 of Apache-2.0 as reflected at https://www.apache.org/licenses/LICENSE-2.0) with two exceptions.
First, several changes were made related to the defined terms so as to reflect the fact that such defined terms need to align with the terminology in CC-BY-SA-4.0 rather than Apache-2.0 (e.g., changing “Work” to “Licensed Material”).
Second, the defensive termination clause was changed such that the scope of defensive termination applies to “any licenses granted to You” (rather than “any patent licenses granted to You”). This change is intended to help maintain a healthy ecosystem by providing additional protection to the community against patent litigation claims.
Contributions to this project are licensed under an inbound=outbound model such that any such contributions are licensed by the contributor under the same terms as those in the License section.
We do not require copyright assignment. The original contributor will retain the copyright.
The text of and illustrations in this document are licensed by Arm under a Creative Commons Attribution–Share Alike 4.0 International license (“CC-BY-SA-4.0”), with an additional clause on patents. The Arm trademarks featured here are registered trademarks or trademarks of Arm Limited (or its subsidiaries) in the US and/or elsewhere. All rights reserved. Please visit https://www.arm.com/company/policies/trademarks for more information about Arm’s trademarks.
Issue | Date | Change |
---|---|---|
A | 09 May 2014 | First release |
B | 24 March 2016 | Updated for ARMv8.1 |
C | 30 March 2019 | Version ACLE Q1 2019 |
D | 30 June 2019 | Version ACLE Q2 2019 |
E | 30 Sept 2019 | Version ACLE Q3 2019 |
F | 30 May 2020 | Version ACLE Q2 2020 |
G | 30 October 2020 | Version ACLE Q3 2020 |
H | 02 July 2021 | 2021Q2 |
I | 30 September 2021 | 2021Q3 |
J | 11 January 2022 | 2021Q4 |
sdot
, udot
and usdot
specification on AArch32.vaddq_s16
..rst
) to
Markdown (.md
). The tool pandoc
is now
used to render the PDF of the specs. The PDF is rendered using the
standard layout used in Arm specifications.The intrinsics in this section are guarded by the macro __ARM_NEON
.
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int8x8_t vadd_s8( |
a -> Vn.8B b -> Vm.8B |
ADD Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
int8x16_t vaddq_s8( |
a -> Vn.16B b -> Vm.16B |
ADD Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
int16x4_t vadd_s16( |
a -> Vn.4H b -> Vm.4H |
ADD Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
int16x8_t vaddq_s16( |
a -> Vn.8H b -> Vm.8H |
ADD Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
int32x2_t vadd_s32( |
a -> Vn.2S b -> Vm.2S |
ADD Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
int32x4_t vaddq_s32( |
a -> Vn.4S b -> Vm.4S |
ADD Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
int64x1_t vadd_s64( |
a -> Dn b -> Dm |
ADD Dd,Dn,Dm |
Dd -> result |
v7/A32/A64 |
int64x2_t vaddq_s64( |
a -> Vn.2D b -> Vm.2D |
ADD Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
v7/A32/A64 |
uint8x8_t vadd_u8( |
a -> Vn.8B b -> Vm.8B |
ADD Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vaddq_u8( |
a -> Vn.16B b -> Vm.16B |
ADD Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vadd_u16( |
a -> Vn.4H b -> Vm.4H |
ADD Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vaddq_u16( |
a -> Vn.8H b -> Vm.8H |
ADD Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vadd_u32( |
a -> Vn.2S b -> Vm.2S |
ADD Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vaddq_u32( |
a -> Vn.4S b -> Vm.4S |
ADD Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
uint64x1_t vadd_u64( |
a -> Dn b -> Dm |
ADD Dd,Dn,Dm |
Dd -> result |
v7/A32/A64 |
uint64x2_t vaddq_u64( |
a -> Vn.2D b -> Vm.2D |
ADD Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
v7/A32/A64 |
float32x2_t vadd_f32( |
a -> Vn.2S b -> Vm.2S |
FADD Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
float32x4_t vaddq_f32( |
a -> Vn.4S b -> Vm.4S |
FADD Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
float64x1_t vadd_f64( |
a -> Dn b -> Dm |
FADD Dd,Dn,Dm |
Dd -> result |
A64 |
float64x2_t vaddq_f64( |
a -> Vn.2D b -> Vm.2D |
FADD Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
A64 |
int64_t vaddd_s64( |
a -> Dn b -> Dm |
ADD Dd,Dn,Dm |
Dd -> result |
A64 |
uint64_t vaddd_u64( |
a -> Dn b -> Dm |
ADD Dd,Dn,Dm |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int16x8_t vaddl_s8( |
a -> Vn.8B b -> Vm.8B |
SADDL Vd.8H,Vn.8B,Vm.8B |
Vd.8H -> result |
v7/A32/A64 |
int32x4_t vaddl_s16( |
a -> Vn.4H b -> Vm.4H |
SADDL Vd.4S,Vn.4H,Vm.4H |
Vd.4S -> result |
v7/A32/A64 |
int64x2_t vaddl_s32( |
a -> Vn.2S b -> Vm.2S |
SADDL Vd.2D,Vn.2S,Vm.2S |
Vd.2D -> result |
v7/A32/A64 |
uint16x8_t vaddl_u8( |
a -> Vn.8B b -> Vm.8B |
UADDL Vd.8H,Vn.8B,Vm.8B |
Vd.8H -> result |
v7/A32/A64 |
uint32x4_t vaddl_u16( |
a -> Vn.4H b -> Vm.4H |
UADDL Vd.4S,Vn.4H,Vm.4H |
Vd.4S -> result |
v7/A32/A64 |
uint64x2_t vaddl_u32( |
a -> Vn.2S b -> Vm.2S |
UADDL Vd.2D,Vn.2S,Vm.2S |
Vd.2D -> result |
v7/A32/A64 |
int16x8_t vaddl_high_s8( |
a -> Vn.16B b -> Vm.16B |
SADDL2 Vd.8H,Vn.16B,Vm.16B |
Vd.8H -> result |
A64 |
int32x4_t vaddl_high_s16( |
a -> Vn.8H b -> Vm.8H |
SADDL2 Vd.4S,Vn.8H,Vm.8H |
Vd.4S -> result |
A64 |
int64x2_t vaddl_high_s32( |
a -> Vn.4S b -> Vm.4S |
SADDL2 Vd.2D,Vn.4S,Vm.4S |
Vd.2D -> result |
A64 |
uint16x8_t vaddl_high_u8( |
a -> Vn.16B b -> Vm.16B |
UADDL2 Vd.8H,Vn.16B,Vm.16B |
Vd.8H -> result |
A64 |
uint32x4_t vaddl_high_u16( |
a -> Vn.8H b -> Vm.8H |
UADDL2 Vd.4S,Vn.8H,Vm.8H |
Vd.4S -> result |
A64 |
uint64x2_t vaddl_high_u32( |
a -> Vn.4S b -> Vm.4S |
UADDL2 Vd.2D,Vn.4S,Vm.4S |
Vd.2D -> result |
A64 |
int16x8_t vaddw_s8( |
a -> Vn.8H b -> Vm.8B |
SADDW Vd.8H,Vn.8H,Vm.8B |
Vd.8H -> result |
v7/A32/A64 |
int32x4_t vaddw_s16( |
a -> Vn.4S b -> Vm.4H |
SADDW Vd.4S,Vn.4S,Vm.4H |
Vd.4S -> result |
v7/A32/A64 |
int64x2_t vaddw_s32( |
a -> Vn.2D b -> Vm.2S |
SADDW Vd.2D,Vn.2D,Vm.2S |
Vd.2D -> result |
v7/A32/A64 |
uint16x8_t vaddw_u8( |
a -> Vn.8H b -> Vm.8B |
UADDW Vd.8H,Vn.8H,Vm.8B |
Vd.8H -> result |
v7/A32/A64 |
uint32x4_t vaddw_u16( |
a -> Vn.4S b -> Vm.4H |
UADDW Vd.4S,Vn.4S,Vm.4H |
Vd.4S -> result |
v7/A32/A64 |
uint64x2_t vaddw_u32( |
a -> Vn.2D b -> Vm.2S |
UADDW Vd.2D,Vn.2D,Vm.2S |
Vd.2D -> result |
v7/A32/A64 |
int16x8_t vaddw_high_s8( |
a -> Vn.8H b -> Vm.16B |
SADDW2 Vd.8H,Vn.8H,Vm.16B |
Vd.8H -> result |
A64 |
int32x4_t vaddw_high_s16( |
a -> Vn.4S b -> Vm.8H |
SADDW2 Vd.4S,Vn.4S,Vm.8H |
Vd.4S -> result |
A64 |
int64x2_t vaddw_high_s32( |
a -> Vn.2D b -> Vm.4S |
SADDW2 Vd.2D,Vn.2D,Vm.4S |
Vd.2D -> result |
A64 |
uint16x8_t vaddw_high_u8( |
a -> Vn.8H b -> Vm.16B |
UADDW2 Vd.8H,Vn.8H,Vm.16B |
Vd.8H -> result |
A64 |
uint32x4_t vaddw_high_u16( |
a -> Vn.4S b -> Vm.8H |
UADDW2 Vd.4S,Vn.4S,Vm.8H |
Vd.4S -> result |
A64 |
uint64x2_t vaddw_high_u32( |
a -> Vn.2D b -> Vm.4S |
UADDW2 Vd.2D,Vn.2D,Vm.4S |
Vd.2D -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int8x8_t vhadd_s8( |
a -> Vn.8B b -> Vm.8B |
SHADD Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
int8x16_t vhaddq_s8( |
a -> Vn.16B b -> Vm.16B |
SHADD Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
int16x4_t vhadd_s16( |
a -> Vn.4H b -> Vm.4H |
SHADD Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
int16x8_t vhaddq_s16( |
a -> Vn.8H b -> Vm.8H |
SHADD Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
int32x2_t vhadd_s32( |
a -> Vn.2S b -> Vm.2S |
SHADD Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
int32x4_t vhaddq_s32( |
a -> Vn.4S b -> Vm.4S |
SHADD Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
uint8x8_t vhadd_u8( |
a -> Vn.8B b -> Vm.8B |
UHADD Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vhaddq_u8( |
a -> Vn.16B b -> Vm.16B |
UHADD Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vhadd_u16( |
a -> Vn.4H b -> Vm.4H |
UHADD Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vhaddq_u16( |
a -> Vn.8H b -> Vm.8H |
UHADD Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vhadd_u32( |
a -> Vn.2S b -> Vm.2S |
UHADD Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vhaddq_u32( |
a -> Vn.4S b -> Vm.4S |
UHADD Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
int8x8_t vrhadd_s8( |
a -> Vn.8B b -> Vm.8B |
SRHADD Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
int8x16_t vrhaddq_s8( |
a -> Vn.16B b -> Vm.16B |
SRHADD Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
int16x4_t vrhadd_s16( |
a -> Vn.4H b -> Vm.4H |
SRHADD Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
int16x8_t vrhaddq_s16( |
a -> Vn.8H b -> Vm.8H |
SRHADD Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
int32x2_t vrhadd_s32( |
a -> Vn.2S b -> Vm.2S |
SRHADD Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
int32x4_t vrhaddq_s32( |
a -> Vn.4S b -> Vm.4S |
SRHADD Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
uint8x8_t vrhadd_u8( |
a -> Vn.8B b -> Vm.8B |
URHADD Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vrhaddq_u8( |
a -> Vn.16B b -> Vm.16B |
URHADD Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vrhadd_u16( |
a -> Vn.4H b -> Vm.4H |
URHADD Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vrhaddq_u16( |
a -> Vn.8H b -> Vm.8H |
URHADD Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vrhadd_u32( |
a -> Vn.2S b -> Vm.2S |
URHADD Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vrhaddq_u32( |
a -> Vn.4S b -> Vm.4S |
URHADD Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
int8x8_t vaddhn_s16( |
a -> Vn.8H b -> Vm.8H |
ADDHN Vd.8B,Vn.8H,Vm.8H |
Vd.8B -> result |
v7/A32/A64 |
int16x4_t vaddhn_s32( |
a -> Vn.4S b -> Vm.4S |
ADDHN Vd.4H,Vn.4S,Vm.4S |
Vd.4H -> result |
v7/A32/A64 |
int32x2_t vaddhn_s64( |
a -> Vn.2D b -> Vm.2D |
ADDHN Vd.2S,Vn.2D,Vm.2D |
Vd.2S -> result |
v7/A32/A64 |
uint8x8_t vaddhn_u16( |
a -> Vn.8H b -> Vm.8H |
ADDHN Vd.8B,Vn.8H,Vm.8H |
Vd.8B -> result |
v7/A32/A64 |
uint16x4_t vaddhn_u32( |
a -> Vn.4S b -> Vm.4S |
ADDHN Vd.4H,Vn.4S,Vm.4S |
Vd.4H -> result |
v7/A32/A64 |
uint32x2_t vaddhn_u64( |
a -> Vn.2D b -> Vm.2D |
ADDHN Vd.2S,Vn.2D,Vm.2D |
Vd.2S -> result |
v7/A32/A64 |
int8x16_t vaddhn_high_s16( |
r -> Vd.8B a -> Vn.8H b -> Vm.8H |
ADDHN2 Vd.16B,Vn.8H,Vm.8H |
Vd.16B -> result |
A64 |
int16x8_t vaddhn_high_s32( |
r -> Vd.4H a -> Vn.4S b -> Vm.4S |
ADDHN2 Vd.8H,Vn.4S,Vm.4S |
Vd.8H -> result |
A64 |
int32x4_t vaddhn_high_s64( |
r -> Vd.2S a -> Vn.2D b -> Vm.2D |
ADDHN2 Vd.4S,Vn.2D,Vm.2D |
Vd.4S -> result |
A64 |
uint8x16_t vaddhn_high_u16( |
r -> Vd.8B a -> Vn.8H b -> Vm.8H |
ADDHN2 Vd.16B,Vn.8H,Vm.8H |
Vd.16B -> result |
A64 |
uint16x8_t vaddhn_high_u32( |
r -> Vd.4H a -> Vn.4S b -> Vm.4S |
ADDHN2 Vd.8H,Vn.4S,Vm.4S |
Vd.8H -> result |
A64 |
uint32x4_t vaddhn_high_u64( |
r -> Vd.2S a -> Vn.2D b -> Vm.2D |
ADDHN2 Vd.4S,Vn.2D,Vm.2D |
Vd.4S -> result |
A64 |
int8x8_t vraddhn_s16( |
a -> Vn.8H b -> Vm.8H |
RADDHN Vd.8B,Vn.8H,Vm.8H |
Vd.8B -> result |
v7/A32/A64 |
int16x4_t vraddhn_s32( |
a -> Vn.4S b -> Vm.4S |
RADDHN Vd.4H,Vn.4S,Vm.4S |
Vd.4H -> result |
v7/A32/A64 |
int32x2_t vraddhn_s64( |
a -> Vn.2D b -> Vm.2D |
RADDHN Vd.2S,Vn.2D,Vm.2D |
Vd.2S -> result |
v7/A32/A64 |
uint8x8_t vraddhn_u16( |
a -> Vn.8H b -> Vm.8H |
RADDHN Vd.8B,Vn.8H,Vm.8H |
Vd.8B -> result |
v7/A32/A64 |
uint16x4_t vraddhn_u32( |
a -> Vn.4S b -> Vm.4S |
RADDHN Vd.4H,Vn.4S,Vm.4S |
Vd.4H -> result |
v7/A32/A64 |
uint32x2_t vraddhn_u64( |
a -> Vn.2D b -> Vm.2D |
RADDHN Vd.2S,Vn.2D,Vm.2D |
Vd.2S -> result |
v7/A32/A64 |
int8x16_t vraddhn_high_s16( |
r -> Vd.8B a -> Vn.8H b -> Vm.8H |
RADDHN2 Vd.16B,Vn.8H,Vm.8H |
Vd.16B -> result |
A64 |
int16x8_t vraddhn_high_s32( |
r -> Vd.4H a -> Vn.4S b -> Vm.4S |
RADDHN2 Vd.8H,Vn.4S,Vm.4S |
Vd.8H -> result |
A64 |
int32x4_t vraddhn_high_s64( |
r -> Vd.2S a -> Vn.2D b -> Vm.2D |
RADDHN2 Vd.4S,Vn.2D,Vm.2D |
Vd.4S -> result |
A64 |
uint8x16_t vraddhn_high_u16( |
r -> Vd.8B a -> Vn.8H b -> Vm.8H |
RADDHN2 Vd.16B,Vn.8H,Vm.8H |
Vd.16B -> result |
A64 |
uint16x8_t vraddhn_high_u32( |
r -> Vd.4H a -> Vn.4S b -> Vm.4S |
RADDHN2 Vd.8H,Vn.4S,Vm.4S |
Vd.8H -> result |
A64 |
uint32x4_t vraddhn_high_u64( |
r -> Vd.2S a -> Vn.2D b -> Vm.2D |
RADDHN2 Vd.4S,Vn.2D,Vm.2D |
Vd.4S -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int8x8_t vqadd_s8( |
a -> Vn.8B b -> Vm.8B |
SQADD Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
int8x16_t vqaddq_s8( |
a -> Vn.16B b -> Vm.16B |
SQADD Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
int16x4_t vqadd_s16( |
a -> Vn.4H b -> Vm.4H |
SQADD Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
int16x8_t vqaddq_s16( |
a -> Vn.8H b -> Vm.8H |
SQADD Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
int32x2_t vqadd_s32( |
a -> Vn.2S b -> Vm.2S |
SQADD Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
int32x4_t vqaddq_s32( |
a -> Vn.4S b -> Vm.4S |
SQADD Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
int64x1_t vqadd_s64( |
a -> Dn b -> Dm |
SQADD Dd,Dn,Dm |
Dd -> result |
v7/A32/A64 |
int64x2_t vqaddq_s64( |
a -> Vn.2D b -> Vm.2D |
SQADD Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
v7/A32/A64 |
uint8x8_t vqadd_u8( |
a -> Vn.8B b -> Vm.8B |
UQADD Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vqaddq_u8( |
a -> Vn.16B b -> Vm.16B |
UQADD Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vqadd_u16( |
a -> Vn.4H b -> Vm.4H |
UQADD Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vqaddq_u16( |
a -> Vn.8H b -> Vm.8H |
UQADD Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vqadd_u32( |
a -> Vn.2S b -> Vm.2S |
UQADD Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vqaddq_u32( |
a -> Vn.4S b -> Vm.4S |
UQADD Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
uint64x1_t vqadd_u64( |
a -> Dn b -> Dm |
UQADD Dd,Dn,Dm |
Dd -> result |
v7/A32/A64 |
uint64x2_t vqaddq_u64( |
a -> Vn.2D b -> Vm.2D |
UQADD Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
v7/A32/A64 |
int8_t vqaddb_s8( |
a -> Bn b -> Bm |
SQADD Bd,Bn,Bm |
Bd -> result |
A64 |
int16_t vqaddh_s16( |
a -> Hn b -> Hm |
SQADD Hd,Hn,Hm |
Hd -> result |
A64 |
int32_t vqadds_s32( |
a -> Sn b -> Sm |
SQADD Sd,Sn,Sm |
Sd -> result |
A64 |
int64_t vqaddd_s64( |
a -> Dn b -> Dm |
SQADD Dd,Dn,Dm |
Dd -> result |
A64 |
uint8_t vqaddb_u8( |
a -> Bn b -> Bm |
UQADD Bd,Bn,Bm |
Bd -> result |
A64 |
uint16_t vqaddh_u16( |
a -> Hn b -> Hm |
UQADD Hd,Hn,Hm |
Hd -> result |
A64 |
uint32_t vqadds_u32( |
a -> Sn b -> Sm |
UQADD Sd,Sn,Sm |
Sd -> result |
A64 |
uint64_t vqaddd_u64( |
a -> Dn b -> Dm |
UQADD Dd,Dn,Dm |
Dd -> result |
A64 |
int8x8_t vuqadd_s8( |
a -> Vd.8B b -> Vn.8B |
SUQADD Vd.8B,Vn.8B |
Vd.8B -> result |
A64 |
int8x16_t vuqaddq_s8( |
a -> Vd.16B b -> Vn.16B |
SUQADD Vd.16B,Vn.16B |
Vd.16B -> result |
A64 |
int16x4_t vuqadd_s16( |
a -> Vd.4H b -> Vn.4H |
SUQADD Vd.4H,Vn.4H |
Vd.4H -> result |
A64 |
int16x8_t vuqaddq_s16( |
a -> Vd.8H b -> Vn.8H |
SUQADD Vd.8H,Vn.8H |
Vd.8H -> result |
A64 |
int32x2_t vuqadd_s32( |
a -> Vd.2S b -> Vn.2S |
SUQADD Vd.2S,Vn.2S |
Vd.2S -> result |
A64 |
int32x4_t vuqaddq_s32( |
a -> Vd.4S b -> Vn.4S |
SUQADD Vd.4S,Vn.4S |
Vd.4S -> result |
A64 |
int64x1_t vuqadd_s64( |
a -> Dd b -> Dn |
SUQADD Dd,Dn |
Dd -> result |
A64 |
int64x2_t vuqaddq_s64( |
a -> Vd.2D b -> Vn.2D |
SUQADD Vd.2D,Vn.2D |
Vd.2D -> result |
A64 |
int8_t vuqaddb_s8( |
a -> Bd b -> Bn |
SUQADD Bd,Bn |
Bd -> result |
A64 |
int16_t vuqaddh_s16( |
a -> Hd b -> Hn |
SUQADD Hd,Hn |
Hd -> result |
A64 |
int32_t vuqadds_s32( |
a -> Sd b -> Sn |
SUQADD Sd,Sn |
Sd -> result |
A64 |
int64_t vuqaddd_s64( |
a -> Dd b -> Dn |
SUQADD Dd,Dn |
Dd -> result |
A64 |
uint8x8_t vsqadd_u8( |
a -> Vd.8B b -> Vn.8B |
USQADD Vd.8B,Vn.8B |
Vd.8B -> result |
A64 |
uint8x16_t vsqaddq_u8( |
a -> Vd.16B b -> Vn.16B |
USQADD Vd.16B,Vn.16B |
Vd.16B -> result |
A64 |
uint16x4_t vsqadd_u16( |
a -> Vd.4H b -> Vn.4H |
USQADD Vd.4H,Vn.4H |
Vd.4H -> result |
A64 |
uint16x8_t vsqaddq_u16( |
a -> Vd.8H b -> Vn.8H |
USQADD Vd.8H,Vn.8H |
Vd.8H -> result |
A64 |
uint32x2_t vsqadd_u32( |
a -> Vd.2S b -> Vn.2S |
USQADD Vd.2S,Vn.2S |
Vd.2S -> result |
A64 |
uint32x4_t vsqaddq_u32( |
a -> Vd.4S b -> Vn.4S |
USQADD Vd.4S,Vn.4S |
Vd.4S -> result |
A64 |
uint64x1_t vsqadd_u64( |
a -> Dd b -> Dn |
USQADD Dd,Dn |
Dd -> result |
A64 |
uint64x2_t vsqaddq_u64( |
a -> Vd.2D b -> Vn.2D |
USQADD Vd.2D,Vn.2D |
Vd.2D -> result |
A64 |
uint8_t vsqaddb_u8( |
a -> Bd b -> Bn |
USQADD Bd,Bn |
Bd -> result |
A64 |
uint16_t vsqaddh_u16( |
a -> Hd b -> Hn |
USQADD Hd,Hn |
Hd -> result |
A64 |
uint32_t vsqadds_u32( |
a -> Sd b -> Sn |
USQADD Sd,Sn |
Sd -> result |
A64 |
uint64_t vsqaddd_u64( |
a -> Dd b -> Dn |
USQADD Dd,Dn |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int8x8_t vmul_s8( |
a -> Vn.8B b -> Vm.8B |
MUL Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
int8x16_t vmulq_s8( |
a -> Vn.16B b -> Vm.16B |
MUL Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
int16x4_t vmul_s16( |
a -> Vn.4H b -> Vm.4H |
MUL Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
int16x8_t vmulq_s16( |
a -> Vn.8H b -> Vm.8H |
MUL Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
int32x2_t vmul_s32( |
a -> Vn.2S b -> Vm.2S |
MUL Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
int32x4_t vmulq_s32( |
a -> Vn.4S b -> Vm.4S |
MUL Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
uint8x8_t vmul_u8( |
a -> Vn.8B b -> Vm.8B |
MUL Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vmulq_u8( |
a -> Vn.16B b -> Vm.16B |
MUL Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vmul_u16( |
a -> Vn.4H b -> Vm.4H |
MUL Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vmulq_u16( |
a -> Vn.8H b -> Vm.8H |
MUL Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vmul_u32( |
a -> Vn.2S b -> Vm.2S |
MUL Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vmulq_u32( |
a -> Vn.4S b -> Vm.4S |
MUL Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
float32x2_t vmul_f32( |
a -> Vn.2S b -> Vm.2S |
FMUL Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
float32x4_t vmulq_f32( |
a -> Vn.4S b -> Vm.4S |
FMUL Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
float64x1_t vmul_f64( |
a -> Dn b -> Dm |
FMUL Dd,Dn,Dm |
Dd -> result |
A64 |
float64x2_t vmulq_f64( |
a -> Vn.2D b -> Vm.2D |
FMUL Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
A64 |
uint64x2_t vmull_high_u32( |
a -> Vn.4S b -> Vm.4S |
UMULL2 Vd.2D,Vn.4S,Vm.4S |
Vd.2D -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
float32x2_t vmulx_f32( |
a -> Vn.2S b -> Vm.2S |
FMULX Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
A64 |
float32x4_t vmulxq_f32( |
a -> Vn.4S b -> Vm.4S |
FMULX Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
A64 |
float64x1_t vmulx_f64( |
a -> Dn b -> Dm |
FMULX Dd,Dn,Dm |
Dd -> result |
A64 |
float64x2_t vmulxq_f64( |
a -> Vn.2D b -> Vm.2D |
FMULX Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
A64 |
float32_t vmulxs_f32( |
a -> Sn b -> Sm |
FMULX Sd,Sn,Sm |
Sd -> result |
A64 |
float64_t vmulxd_f64( |
a -> Dn b -> Dm |
FMULX Dd,Dn,Dm |
Dd -> result |
A64 |
float32x2_t vmulx_lane_f32( |
a -> Vn.2S v -> Vm.2S 0 <= lane <= 1 |
FMULX Vd.2S,Vn.2S,Vm.S[lane] |
Vd.2S -> result |
A64 |
float32x4_t vmulxq_lane_f32( |
a -> Vn.4S v -> Vm.2S 0 <= lane <= 1 |
FMULX Vd.4S,Vn.4S,Vm.S[lane] |
Vd.4S -> result |
A64 |
float64x1_t vmulx_lane_f64( |
a -> Dn v -> Vm.1D 0 <= lane <= 0 |
FMULX Dd,Dn,Vm.D[lane] |
Dd -> result |
A64 |
float64x2_t vmulxq_lane_f64( |
a -> Vn.2D v -> Vm.1D 0 <= lane <= 0 |
FMULX Vd.2D,Vn.2D,Vm.D[lane] |
Vd.2D -> result |
A64 |
float32_t vmulxs_lane_f32( |
a -> Sn v -> Vm.2S 0 <= lane <= 1 |
FMULX Sd,Sn,Vm.S[lane] |
Sd -> result |
A64 |
float64_t vmulxd_lane_f64( |
a -> Dn v -> Vm.1D 0 <= lane <= 0 |
FMULX Dd,Dn,Vm.D[lane] |
Dd -> result |
A64 |
float32x2_t vmulx_laneq_f32( |
a -> Vn.2S v -> Vm.4S 0 <= lane <= 3 |
FMULX Vd.2S,Vn.2S,Vm.S[lane] |
Vd.2S -> result |
A64 |
float32x4_t vmulxq_laneq_f32( |
a -> Vn.4S v -> Vm.4S 0 <= lane <= 3 |
FMULX Vd.4S,Vn.4S,Vm.S[lane] |
Vd.4S -> result |
A64 |
float64x1_t vmulx_laneq_f64( |
a -> Dn v -> Vm.2D 0 <= lane <= 1 |
FMULX Dd,Dn,Vm.D[lane] |
Dd -> result |
A64 |
float64x2_t vmulxq_laneq_f64( |
a -> Vn.2D v -> Vm.2D 0 <= lane <= 1 |
FMULX Vd.2D,Vn.2D,Vm.D[lane] |
Vd.2D -> result |
A64 |
float32_t vmulxs_laneq_f32( |
a -> Sn v -> Vm.4S 0 <= lane <= 3 |
FMULX Sd,Sn,Vm.S[lane] |
Sd -> result |
A64 |
float64_t vmulxd_laneq_f64( |
a -> Dn v -> Vm.2D 0 <= lane <= 1 |
FMULX Dd,Dn,Vm.D[lane] |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int8x8_t vmla_s8( |
a -> Vd.8B b -> Vn.8B c -> Vm.8B |
MLA Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
int8x16_t vmlaq_s8( |
a -> Vd.16B b -> Vn.16B c -> Vm.16B |
MLA Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
int16x4_t vmla_s16( |
a -> Vd.4H b -> Vn.4H c -> Vm.4H |
MLA Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
int16x8_t vmlaq_s16( |
a -> Vd.8H b -> Vn.8H c -> Vm.8H |
MLA Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
int32x2_t vmla_s32( |
a -> Vd.2S b -> Vn.2S c -> Vm.2S |
MLA Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
int32x4_t vmlaq_s32( |
a -> Vd.4S b -> Vn.4S c -> Vm.4S |
MLA Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
uint8x8_t vmla_u8( |
a -> Vd.8B b -> Vn.8B c -> Vm.8B |
MLA Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vmlaq_u8( |
a -> Vd.16B b -> Vn.16B c -> Vm.16B |
MLA Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vmla_u16( |
a -> Vd.4H b -> Vn.4H c -> Vm.4H |
MLA Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vmlaq_u16( |
a -> Vd.8H b -> Vn.8H c -> Vm.8H |
MLA Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vmla_u32( |
a -> Vd.2S b -> Vn.2S c -> Vm.2S |
MLA Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vmlaq_u32( |
a -> Vd.4S b -> Vn.4S c -> Vm.4S |
MLA Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
float32x2_t vmla_f32( |
N/A |
RESULT[I] = a[i] + (b[i] * c[i]) for i = 0 to 1 |
N/A |
v7/A32/A64 |
float32x4_t vmlaq_f32( |
N/A |
RESULT[I] = a[i] + (b[i] * c[i]) for i = 0 to 3 |
N/A |
v7/A32/A64 |
float64x1_t vmla_f64( |
N/A |
RESULT[I] = a[i] + (b[i] * c[i]) for i = 0 |
N/A |
A64 |
float64x2_t vmlaq_f64( |
N/A |
RESULT[I] = a[i] + (b[i] * c[i]) for i = 0 to 1 |
N/A |
A64 |
int8x8_t vmls_s8( |
a -> Vd.8B b -> Vn.8B c -> Vm.8B |
MLS Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
int8x16_t vmlsq_s8( |
a -> Vd.16B b -> Vn.16B c -> Vm.16B |
MLS Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
int16x4_t vmls_s16( |
a -> Vd.4H b -> Vn.4H c -> Vm.4H |
MLS Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
int16x8_t vmlsq_s16( |
a -> Vd.8H b -> Vn.8H c -> Vm.8H |
MLS Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
int32x2_t vmls_s32( |
a -> Vd.2S b -> Vn.2S c -> Vm.2S |
MLS Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
int32x4_t vmlsq_s32( |
a -> Vd.4S b -> Vn.4S c -> Vm.4S |
MLS Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
uint8x8_t vmls_u8( |
a -> Vd.8B b -> Vn.8B c -> Vm.8B |
MLS Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vmlsq_u8( |
a -> Vd.16B b -> Vn.16B c -> Vm.16B |
MLS Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vmls_u16( |
a -> Vd.4H b -> Vn.4H c -> Vm.4H |
MLS Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vmlsq_u16( |
a -> Vd.8H b -> Vn.8H c -> Vm.8H |
MLS Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vmls_u32( |
a -> Vd.2S b -> Vn.2S c -> Vm.2S |
MLS Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vmlsq_u32( |
a -> Vd.4S b -> Vn.4S c -> Vm.4S |
MLS Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
float32x2_t vmls_f32( |
N/A |
RESULT[I] = a[i] - (b[i] * c[i]) for i = 0 to 1 |
N/A |
v7/A32/A64 |
float32x4_t vmlsq_f32( |
N/A |
RESULT[I] = a[i] - (b[i] * c[i]) for i = 0 to 3 |
N/A |
v7/A32/A64 |
float64x1_t vmls_f64( |
N/A |
RESULT[I] = a[i] - (b[i] * c[i]) for i = 0 |
N/A |
A64 |
float64x2_t vmlsq_f64( |
N/A |
RESULT[I] = a[i] - (b[i] * c[i]) for i = 0 to 1 |
N/A |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int16x8_t vmlal_s8( |
a -> Vd.8H b -> Vn.8B c -> Vm.8B |
SMLAL Vd.8H,Vn.8B,Vm.8B |
Vd.8H -> result |
v7/A32/A64 |
int32x4_t vmlal_s16( |
a -> Vd.4S b -> Vn.4H c -> Vm.4H |
SMLAL Vd.4S,Vn.4H,Vm.4H |
Vd.4S -> result |
v7/A32/A64 |
int64x2_t vmlal_s32( |
a -> Vd.2D b -> Vn.2S c -> Vm.2S |
SMLAL Vd.2D,Vn.2S,Vm.2S |
Vd.2D -> result |
v7/A32/A64 |
uint16x8_t vmlal_u8( |
a -> Vd.8H b -> Vn.8B c -> Vm.8B |
UMLAL Vd.8H,Vn.8B,Vm.8B |
Vd.8H -> result |
v7/A32/A64 |
uint32x4_t vmlal_u16( |
a -> Vd.4S b -> Vn.4H c -> Vm.4H |
UMLAL Vd.4S,Vn.4H,Vm.4H |
Vd.4S -> result |
v7/A32/A64 |
uint64x2_t vmlal_u32( |
a -> Vd.2D b -> Vn.2S c -> Vm.2S |
UMLAL Vd.2D,Vn.2S,Vm.2S |
Vd.2D -> result |
v7/A32/A64 |
int16x8_t vmlal_high_s8( |
a -> Vd.8H b -> Vn.16B c -> Vm.16B |
SMLAL2 Vd.8H,Vn.16B,Vm.16B |
Vd.8H -> result |
A64 |
int32x4_t vmlal_high_s16( |
a -> Vd.4S b -> Vn.8H c -> Vm.8H |
SMLAL2 Vd.4S,Vn.8H,Vm.8H |
Vd.4S -> result |
A64 |
int64x2_t vmlal_high_s32( |
a -> Vd.2D b -> Vn.4S c -> Vm.4S |
SMLAL2 Vd.2D,Vn.4S,Vm.4S |
Vd.2D -> result |
A64 |
uint16x8_t vmlal_high_u8( |
a -> Vd.8H b -> Vn.16B c -> Vm.16B |
UMLAL2 Vd.8H,Vn.16B,Vm.16B |
Vd.8H -> result |
A64 |
uint32x4_t vmlal_high_u16( |
a -> Vd.4S b -> Vn.8H c -> Vm.8H |
UMLAL2 Vd.4S,Vn.8H,Vm.8H |
Vd.4S -> result |
A64 |
uint64x2_t vmlal_high_u32( |
a -> Vd.2D b -> Vn.4S c -> Vm.4S |
UMLAL2 Vd.2D,Vn.4S,Vm.4S |
Vd.2D -> result |
A64 |
int16x8_t vmlsl_s8( |
a -> Vd.8H b -> Vn.8B c -> Vm.8B |
SMLSL Vd.8H,Vn.8B,Vm.8B |
Vd.8H -> result |
v7/A32/A64 |
int32x4_t vmlsl_s16( |
a -> Vd.4S b -> Vn.4H c -> Vm.4H |
SMLSL Vd.4S,Vn.4H,Vm.4H |
Vd.4S -> result |
v7/A32/A64 |
int64x2_t vmlsl_s32( |
a -> Vd.2D b -> Vn.2S c -> Vm.2S |
SMLSL Vd.2D,Vn.2S,Vm.2S |
Vd.2D -> result |
v7/A32/A64 |
uint16x8_t vmlsl_u8( |
a -> Vd.8H b -> Vn.8B c -> Vm.8B |
UMLSL Vd.8H,Vn.8B,Vm.8B |
Vd.8H -> result |
v7/A32/A64 |
uint32x4_t vmlsl_u16( |
a -> Vd.4S b -> Vn.4H c -> Vm.4H |
UMLSL Vd.4S,Vn.4H,Vm.4H |
Vd.4S -> result |
v7/A32/A64 |
uint64x2_t vmlsl_u32( |
a -> Vd.2D b -> Vn.2S c -> Vm.2S |
UMLSL Vd.2D,Vn.2S,Vm.2S |
Vd.2D -> result |
v7/A32/A64 |
int16x8_t vmlsl_high_s8( |
a -> Vd.8H b -> Vn.16B c -> Vm.16B |
SMLSL2 Vd.8H,Vn.16B,Vm.16B |
Vd.8H -> result |
A64 |
int32x4_t vmlsl_high_s16( |
a -> Vd.4S b -> Vn.8H c -> Vm.8H |
SMLSL2 Vd.4S,Vn.8H,Vm.8H |
Vd.4S -> result |
A64 |
int64x2_t vmlsl_high_s32( |
a -> Vd.2D b -> Vn.4S c -> Vm.4S |
SMLSL2 Vd.2D,Vn.4S,Vm.4S |
Vd.2D -> result |
A64 |
uint16x8_t vmlsl_high_u8( |
a -> Vd.8H b -> Vn.16B c -> Vm.16B |
UMLSL2 Vd.8H,Vn.16B,Vm.16B |
Vd.8H -> result |
A64 |
uint32x4_t vmlsl_high_u16( |
a -> Vd.4S b -> Vn.8H c -> Vm.8H |
UMLSL2 Vd.4S,Vn.8H,Vm.8H |
Vd.4S -> result |
A64 |
uint64x2_t vmlsl_high_u32( |
a -> Vd.2D b -> Vn.4S c -> Vm.4S |
UMLSL2 Vd.2D,Vn.4S,Vm.4S |
Vd.2D -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
float32x2_t vfma_f32( |
a -> Vd.2S b -> Vn.2S c -> Vm.2S |
FMLA Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
float32x4_t vfmaq_f32( |
a -> Vd.4S b -> Vn.4S c -> Vm.4S |
FMLA Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
float64x1_t vfma_f64( |
b -> Dn c -> Dm a -> Da |
FMADD Dd,Dn,Dm,Da |
Dd -> result |
A64 |
float64x2_t vfmaq_f64( |
a -> Vd.2D b -> Vn.2D c -> Vm.2D |
FMLA Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
A64 |
float32x2_t vfma_lane_f32( |
a -> Vd.2S b -> Vn.2S v -> Vm.2S 0 <= lane <= 1 |
FMLA Vd.2S,Vn.2S,Vm.S[lane] |
Vd.2S -> result |
A64 |
float32x4_t vfmaq_lane_f32( |
a -> Vd.4S b -> Vn.4S v -> Vm.2S 0 <= lane <= 1 |
FMLA Vd.4S,Vn.4S,Vm.S[lane] |
Vd.4S -> result |
A64 |
float64x1_t vfma_lane_f64( |
a -> Dd b -> Dn v -> Vm.1D 0 <= lane <= 0 |
FMLA Dd,Dn,Vm.D[lane] |
Dd -> result |
A64 |
float64x2_t vfmaq_lane_f64( |
a -> Vd.2D b -> Vn.2D v -> Vm.1D 0 <= lane <= 0 |
FMLA Vd.2D,Vn.2D,Vm.D[lane] |
Vd.2D -> result |
A64 |
float32_t vfmas_lane_f32( |
a -> Sd b -> Sn v -> Vm.2S 0 <= lane <= 1 |
FMLA Sd,Sn,Vm.S[lane] |
Sd -> result |
A64 |
float64_t vfmad_lane_f64( |
a -> Dd b -> Dn v -> Vm.1D 0 <= lane <= 0 |
FMLA Dd,Dn,Vm.D[lane] |
Dd -> result |
A64 |
float32x2_t vfma_laneq_f32( |
a -> Vd.2S b -> Vn.2S v -> Vm.4S 0 <= lane <= 3 |
FMLA Vd.2S,Vn.2S,Vm.S[lane] |
Vd.2S -> result |
A64 |
float32x4_t vfmaq_laneq_f32( |
a -> Vd.4S b -> Vn.4S v -> Vm.4S 0 <= lane <= 3 |
FMLA Vd.4S,Vn.4S,Vm.S[lane] |
Vd.4S -> result |
A64 |
float64x1_t vfma_laneq_f64( |
a -> Dd b -> Dn v -> Vm.2D 0 <= lane <= 1 |
FMLA Dd,Dn,Vm.D[lane] |
Dd -> result |
A64 |
float64x2_t vfmaq_laneq_f64( |
a -> Vd.2D b -> Vn.2D v -> Vm.2D 0 <= lane <= 1 |
FMLA Vd.2D,Vn.2D,Vm.D[lane] |
Vd.2D -> result |
A64 |
float32_t vfmas_laneq_f32( |
a -> Sd b -> Sn v -> Vm.4S 0 <= lane <= 3 |
FMLA Sd,Sn,Vm.S[lane] |
Sd -> result |
A64 |
float64_t vfmad_laneq_f64( |
a -> Dd b -> Dn v -> Vm.2D 0 <= lane <= 1 |
FMLA Dd,Dn,Vm.D[lane] |
Dd -> result |
A64 |
float32x2_t vfms_f32( |
a -> Vd.2S b -> Vn.2S c -> Vm.2S |
FMLS Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
float32x4_t vfmsq_f32( |
a -> Vd.4S b -> Vn.4S c -> Vm.4S |
FMLS Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
float64x1_t vfms_f64( |
b -> Dn c -> Dm a -> Da |
FMSUB Dd,Dn,Dm,Da |
Dd -> result |
A64 |
float64x2_t vfmsq_f64( |
a -> Vd.2D b -> Vn.2D c -> Vm.2D |
FMLS Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
A64 |
float32x2_t vfms_lane_f32( |
a -> Vd.2S b -> Vn.2S v -> Vm.2S 0 <= lane <= 1 |
FMLS Vd.2S,Vn.2S,Vm.S[lane] |
Vd.2S -> result |
A64 |
float32x4_t vfmsq_lane_f32( |
a -> Vd.4S b -> Vn.4S v -> Vm.2S 0 <= lane <= 1 |
FMLS Vd.4S,Vn.4S,Vm.S[lane] |
Vd.4S -> result |
A64 |
float64x1_t vfms_lane_f64( |
a -> Dd b -> Dn v -> Vm.1D 0 <= lane <= 0 |
FMLS Dd,Dn,Vm.D[lane] |
Dd -> result |
A64 |
float64x2_t vfmsq_lane_f64( |
a -> Vd.2D b -> Vn.2D v -> Vm.1D 0 <= lane <= 0 |
FMLS Vd.2D,Vn.2D,Vm.D[lane] |
Vd.2D -> result |
A64 |
float32_t vfmss_lane_f32( |
a -> Sd b -> Sn v -> Vm.2S 0 <= lane <= 1 |
FMLS Sd,Sn,Vm.S[lane] |
Sd -> result |
A64 |
float64_t vfmsd_lane_f64( |
a -> Dd b -> Dn v -> Vm.1D 0 <= lane <= 0 |
FMLS Dd,Dn,Vm.D[lane] |
Dd -> result |
A64 |
float32x2_t vfms_laneq_f32( |
a -> Vd.2S b -> Vn.2S v -> Vm.4S 0 <= lane <= 3 |
FMLS Vd.2S,Vn.2S,Vm.S[lane] |
Vd.2S -> result |
A64 |
float32x4_t vfmsq_laneq_f32( |
a -> Vd.4S b -> Vn.4S v -> Vm.4S 0 <= lane <= 3 |
FMLS Vd.4S,Vn.4S,Vm.S[lane] |
Vd.4S -> result |
A64 |
float64x1_t vfms_laneq_f64( |
a -> Dd b -> Dn v -> Vm.2D 0 <= lane <= 1 |
FMLS Dd,Dn,Vm.D[lane] |
Dd -> result |
A64 |
float64x2_t vfmsq_laneq_f64( |
a -> Vd.2D b -> Vn.2D v -> Vm.2D 0 <= lane <= 1 |
FMLS Vd.2D,Vn.2D,Vm.D[lane] |
Vd.2D -> result |
A64 |
float32_t vfmss_laneq_f32( |
a -> Sd b -> Sn v -> Vm.4S 0 <= lane <= 3 |
FMLS Sd,Sn,Vm.S[lane] |
Sd -> result |
A64 |
float64_t vfmsd_laneq_f64( |
a -> Dd b -> Dn v -> Vm.2D 0 <= lane <= 1 |
FMLS Dd,Dn,Vm.D[lane] |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int16x4_t vqdmulh_s16( |
a -> Vn.4H b -> Vm.4H |
SQDMULH Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
int16x8_t vqdmulhq_s16( |
a -> Vn.8H b -> Vm.8H |
SQDMULH Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
int32x2_t vqdmulh_s32( |
a -> Vn.2S b -> Vm.2S |
SQDMULH Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
int32x4_t vqdmulhq_s32( |
a -> Vn.4S b -> Vm.4S |
SQDMULH Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
int16_t vqdmulhh_s16( |
a -> Hn b -> Hm |
SQDMULH Hd,Hn,Hm |
Hd -> result |
A64 |
int32_t vqdmulhs_s32( |
a -> Sn b -> Sm |
SQDMULH Sd,Sn,Sm |
Sd -> result |
A64 |
int16x4_t vqrdmulh_s16( |
a -> Vn.4H b -> Vm.4H |
SQRDMULH Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
int16x8_t vqrdmulhq_s16( |
a -> Vn.8H b -> Vm.8H |
SQRDMULH Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
int32x2_t vqrdmulh_s32( |
a -> Vn.2S b -> Vm.2S |
SQRDMULH Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
int32x4_t vqrdmulhq_s32( |
a -> Vn.4S b -> Vm.4S |
SQRDMULH Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
int16_t vqrdmulhh_s16( |
a -> Hn b -> Hm |
SQRDMULH Hd,Hn,Hm |
Hd -> result |
A64 |
int32_t vqrdmulhs_s32( |
a -> Sn b -> Sm |
SQRDMULH Sd,Sn,Sm |
Sd -> result |
A64 |
int32x4_t vqdmull_s16( |
a -> Vn.4H b -> Vm.4H |
SQDMULL Vd.4S,Vn.4H,Vm.4H |
Vd.4S -> result |
v7/A32/A64 |
int64x2_t vqdmull_s32( |
a -> Vn.2S b -> Vm.2S |
SQDMULL Vd.2D,Vn.2S,Vm.2S |
Vd.2D -> result |
v7/A32/A64 |
int32_t vqdmullh_s16( |
a -> Hn b -> Hm |
SQDMULL Sd,Hn,Hm |
Sd -> result |
A64 |
int64_t vqdmulls_s32( |
a -> Sn b -> Sm |
SQDMULL Dd,Sn,Sm |
Dd -> result |
A64 |
int32x4_t vqdmull_high_s16( |
a -> Vn.8H b -> Vm.8H |
SQDMULL2 Vd.4S,Vn.8H,Vm.8H |
Vd.4S -> result |
A64 |
int64x2_t vqdmull_high_s32( |
a -> Vn.4S b -> Vm.4S |
SQDMULL2 Vd.2D,Vn.4S,Vm.4S |
Vd.2D -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int32x4_t vqdmlal_s16( |
a -> Vd.4S b -> Vn.4H c -> Vm.4H |
SQDMLAL Vd.4S,Vn.4H,Vm.4H |
Vd.4S -> result |
v7/A32/A64 |
int64x2_t vqdmlal_s32( |
a -> Vd.2D b -> Vn.2S c -> Vm.2S |
SQDMLAL Vd.2D,Vn.2S,Vm.2S |
Vd.2D -> result |
v7/A32/A64 |
int32_t vqdmlalh_s16( |
a -> Sd b -> Hn c -> Hm |
SQDMLAL Sd,Hn,Hm |
Sd -> result |
A64 |
int64_t vqdmlals_s32( |
a -> Dd b -> Sn c -> Sm |
SQDMLAL Dd,Sn,Sm |
Dd -> result |
A64 |
int32x4_t vqdmlal_high_s16( |
a -> Vd.4S b -> Vn.8H c -> Vm.8H |
SQDMLAL2 Vd.4S,Vn.8H,Vm.8H |
Vd.4S -> result |
A64 |
int64x2_t vqdmlal_high_s32( |
a -> Vd.2D b -> Vn.4S c -> Vm.4S |
SQDMLAL2 Vd.2D,Vn.4S,Vm.4S |
Vd.2D -> result |
A64 |
int32x4_t vqdmlsl_s16( |
a -> Vd.4S b -> Vn.4H c -> Vm.4H |
SQDMLSL Vd.4S,Vn.4H,Vm.4H |
Vd.4S -> result |
v7/A32/A64 |
int64x2_t vqdmlsl_s32( |
a -> Vd.2D b -> Vn.2S c -> Vm.2S |
SQDMLSL Vd.2D,Vn.2S,Vm.2S |
Vd.2D -> result |
v7/A32/A64 |
int32_t vqdmlslh_s16( |
a -> Sd b -> Hn c -> Hm |
SQDMLSL Sd,Hn,Hm |
Sd -> result |
A64 |
int64_t vqdmlsls_s32( |
a -> Dd b -> Sn c -> Sm |
SQDMLSL Dd,Sn,Sm |
Dd -> result |
A64 |
int32x4_t vqdmlsl_high_s16( |
a -> Vd.4S b -> Vn.8H c -> Vm.8H |
SQDMLSL2 Vd.4S,Vn.8H,Vm.8H |
Vd.4S -> result |
A64 |
int64x2_t vqdmlsl_high_s32( |
a -> Vd.2D b -> Vn.4S c -> Vm.4S |
SQDMLSL2 Vd.2D,Vn.4S,Vm.4S |
Vd.2D -> result |
A64 |
int32x4_t vqdmlal_lane_s16( |
a -> Vd.4S b -> Vn.4H v -> Vm.4H 0 <= lane <= 3 |
SQDMLAL Vd.4S,Vn.4H,Vm.H[lane] |
Vd.4S -> result |
v7/A32/A64 |
int64x2_t vqdmlal_lane_s32( |
a -> Vd.2D b -> Vn.2S v -> Vm.2S 0 <= lane <= 1 |
SQDMLAL Vd.2D,Vn.2S,Vm.S[lane] |
Vd.2D -> result |
v7/A32/A64 |
int32_t vqdmlalh_lane_s16( |
a -> Sd b -> Hn v -> Vm.4H 0 <= lane <= 3 |
SQDMLAL Sd,Hn,Vm.H[lane] |
Sd -> result |
A64 |
int64_t vqdmlals_lane_s32( |
a -> Dd b -> Sn v -> Vm.2S 0 <= lane <= 1 |
SQDMLAL Dd,Sn,Vm.S[lane] |
Dd -> result |
A64 |
int32x4_t vqdmlal_high_lane_s16( |
a -> Vd.4S b -> Vn.8H v -> Vm.4H 0 <= lane <= 3 |
SQDMLAL2 Vd.4S,Vn.8H,Vm.H[lane] |
Vd.4S -> result |
A64 |
int64x2_t vqdmlal_high_lane_s32( |
a -> Vd.2D b -> Vn.4S v -> Vm.2S 0 <= lane <= 1 |
SQDMLAL2 Vd.2D,Vn.4S,Vm.S[lane] |
Vd.2D -> result |
A64 |
int32x4_t vqdmlal_laneq_s16( |
a -> Vd.4S b -> Vn.4H v -> Vm.8H 0 <= lane <= 7 |
SQDMLAL Vd.4S,Vn.4H,Vm.H[lane] |
Vd.4S -> result |
A64 |
int64x2_t vqdmlal_laneq_s32( |
a -> Vd.2D b -> Vn.2S v -> Vm.4S 0 <= lane <= 3 |
SQDMLAL Vd.2D,Vn.2S,Vm.S[lane] |
Vd.2D -> result |
A64 |
int32_t vqdmlalh_laneq_s16( |
a -> Sd b -> Hn v -> Vm.8H 0 <= lane <= 7 |
SQDMLAL Sd,Hn,Vm.H[lane] |
Sd -> result |
A64 |
int64_t vqdmlals_laneq_s32( |
a -> Dd b -> Sn v -> Vm.4S 0 <= lane <= 3 |
SQDMLAL Dd,Sn,Vm.S[lane] |
Dd -> result |
A64 |
int32x4_t vqdmlal_high_laneq_s16( |
a -> Vd.4S b -> Vn.8H v -> Vm.8H 0 <= lane <= 7 |
SQDMLAL2 Vd.4S,Vn.8H,Vm.H[lane] |
Vd.4S -> result |
A64 |
int64x2_t vqdmlal_high_laneq_s32( |
a -> Vd.2D b -> Vn.4S v -> Vm.4S 0 <= lane <= 3 |
SQDMLAL2 Vd.2D,Vn.4S,Vm.S[lane] |
Vd.2D -> result |
A64 |
int32x4_t vqdmlsl_lane_s16( |
a -> Vd.4S b -> Vn.4H v -> Vm.4H 0 <= lane <= 3 |
SQDMLSL Vd.4S,Vn.4H,Vm.H[lane] |
Vd.4S -> result |
v7/A32/A64 |
int64x2_t vqdmlsl_lane_s32( |
a -> Vd.2D b -> Vn.2S v -> Vm.2S 0 <= lane <= 1 |
SQDMLSL Vd.2D,Vn.2S,Vm.S[lane] |
Vd.2D -> result |
v7/A32/A64 |
int32_t vqdmlslh_lane_s16( |
a -> Sd b -> Hn v -> Vm.4H 0 <= lane <= 3 |
SQDMLSL Sd,Hn,Vm.H[lane] |
Sd -> result |
A64 |
int64_t vqdmlsls_lane_s32( |
a -> Dd b -> Sn v -> Vm.2S 0 <= lane <= 1 |
SQDMLSL Dd,Sn,Vm.S[lane] |
Dd -> result |
A64 |
int32x4_t vqdmlsl_high_lane_s16( |
a -> Vd.4S b -> Vn.8H v -> Vm.4H 0 <= lane <= 3 |
SQDMLSL2 Vd.4S,Vn.8H,Vm.H[lane] |
Vd.4S -> result |
A64 |
int64x2_t vqdmlsl_high_lane_s32( |
a -> Vd.2D b -> Vn.4S v -> Vm.2S 0 <= lane <= 1 |
SQDMLSL2 Vd.2D,Vn.4S,Vm.S[lane] |
Vd.2D -> result |
A64 |
int32x4_t vqdmlsl_laneq_s16( |
a -> Vd.4S b -> Vn.4H v -> Vm.8H 0 <= lane <= 7 |
SQDMLSL Vd.4S,Vn.4H,Vm.H[lane] |
Vd.4S -> result |
A64 |
int64x2_t vqdmlsl_laneq_s32( |
a -> Vd.2D b -> Vn.2S v -> Vm.4S 0 <= lane <= 3 |
SQDMLSL Vd.2D,Vn.2S,Vm.S[lane] |
Vd.2D -> result |
A64 |
int32_t vqdmlslh_laneq_s16( |
a -> Sd b -> Hn v -> Vm.8H 0 <= lane <= 7 |
SQDMLSL Sd,Hn,Vm.H[lane] |
Sd -> result |
A64 |
int64_t vqdmlsls_laneq_s32( |
a -> Dd b -> Sn v -> Vm.4S 0 <= lane <= 3 |
SQDMLSL Dd,Sn,Vm.S[lane] |
Dd -> result |
A64 |
int32x4_t vqdmlsl_high_laneq_s16( |
a -> Vd.4S b -> Vn.8H v -> Vm.8H 0 <= lane <= 7 |
SQDMLSL2 Vd.4S,Vn.8H,Vm.H[lane] |
Vd.4S -> result |
A64 |
int64x2_t vqdmlsl_high_laneq_s32( |
a -> Vd.2D b -> Vn.4S v -> Vm.4S 0 <= lane <= 3 |
SQDMLSL2 Vd.2D,Vn.4S,Vm.S[lane] |
Vd.2D -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int16x8_t vmull_s8( |
a -> Vn.8B b -> Vm.8B |
SMULL Vd.8H,Vn.8B,Vm.8B |
Vd.8H -> result |
v7/A32/A64 |
int32x4_t vmull_s16( |
a -> Vn.4H b -> Vm.4H |
SMULL Vd.4S,Vn.4H,Vm.4H |
Vd.4S -> result |
v7/A32/A64 |
int64x2_t vmull_s32( |
a -> Vn.2S b -> Vm.2S |
SMULL Vd.2D,Vn.2S,Vm.2S |
Vd.2D -> result |
v7/A32/A64 |
uint16x8_t vmull_u8( |
a -> Vn.8B b -> Vm.8B |
UMULL Vd.8H,Vn.8B,Vm.8B |
Vd.8H -> result |
v7/A32/A64 |
uint32x4_t vmull_u16( |
a -> Vn.4H b -> Vm.4H |
UMULL Vd.4S,Vn.4H,Vm.4H |
Vd.4S -> result |
v7/A32/A64 |
uint64x2_t vmull_u32( |
a -> Vn.2S b -> Vm.2S |
UMULL Vd.2D,Vn.2S,Vm.2S |
Vd.2D -> result |
v7/A32/A64 |
int16x8_t vmull_high_s8( |
a -> Vn.16B b -> Vm.16B |
SMULL2 Vd.8H,Vn.16B,Vm.16B |
Vd.8H -> result |
A64 |
int32x4_t vmull_high_s16( |
a -> Vn.8H b -> Vm.8H |
SMULL2 Vd.4S,Vn.8H,Vm.8H |
Vd.4S -> result |
A64 |
int64x2_t vmull_high_s32( |
a -> Vn.4S b -> Vm.4S |
SMULL2 Vd.2D,Vn.4S,Vm.4S |
Vd.2D -> result |
A64 |
uint16x8_t vmull_high_u8( |
a -> Vn.16B b -> Vm.16B |
UMULL2 Vd.8H,Vn.16B,Vm.16B |
Vd.8H -> result |
A64 |
uint32x4_t vmull_high_u16( |
a -> Vn.8H b -> Vm.8H |
UMULL2 Vd.4S,Vn.8H,Vm.8H |
Vd.4S -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int32x4_t vqdmull_n_s16( |
a -> Vn.4H b -> Vm.H[0] |
SQDMULL Vd.4S,Vn.4H,Vm.H[0] |
Vd.4S -> result |
v7/A32/A64 |
int64x2_t vqdmull_n_s32( |
a -> Vn.2S b -> Vm.S[0] |
SQDMULL Vd.2D,Vn.2S,Vm.S[0] |
Vd.2D -> result |
v7/A32/A64 |
int32x4_t vqdmull_high_n_s16( |
a -> Vn.8H b -> Vm.H[0] |
SQDMULL2 Vd.4S,Vn.8H,Vm.H[0] |
Vd.4S -> result |
A64 |
int64x2_t vqdmull_high_n_s32( |
a -> Vn.4S b -> Vm.S[0] |
SQDMULL2 Vd.2D,Vn.4S,Vm.S[0] |
Vd.2D -> result |
A64 |
int32x4_t vqdmull_lane_s16( |
a -> Vn.4H v -> Vm.4H 0 <= lane <= 3 |
SQDMULL Vd.4S,Vn.4H,Vm.H[lane] |
Vd.4S -> result |
v7/A32/A64 |
int64x2_t vqdmull_lane_s32( |
a -> Vn.2S v -> Vm.2S 0 <= lane <= 1 |
SQDMULL Vd.2D,Vn.2S,Vm.S[lane] |
Vd.2D -> result |
v7/A32/A64 |
int32_t vqdmullh_lane_s16( |
a -> Hn v -> Vm.4H 0 <= lane <= 3 |
SQDMULL Sd,Hn,Vm.H[lane] |
Sd -> result |
A64 |
int64_t vqdmulls_lane_s32( |
a -> Sn v -> Vm.2S 0 <= lane <= 1 |
SQDMULL Dd,Sn,Vm.S[lane] |
Dd -> result |
A64 |
int32x4_t vqdmull_high_lane_s16( |
a -> Vn.8H v -> Vm.4H 0 <= lane <= 3 |
SQDMULL2 Vd.4S,Vn.8H,Vm.H[lane] |
Vd.4S -> result |
A64 |
int64x2_t vqdmull_high_lane_s32( |
a -> Vn.4S v -> Vm.2S 0 <= lane <= 1 |
SQDMULL2 Vd.2D,Vn.4S,Vm.S[lane] |
Vd.2D -> result |
A64 |
int32x4_t vqdmull_laneq_s16( |
a -> Vn.4H v -> Vm.8H 0 <= lane <= 7 |
SQDMULL Vd.4S,Vn.4H,Vm.H[lane] |
Vd.4S -> result |
A64 |
int64x2_t vqdmull_laneq_s32( |
a -> Vn.2S v -> Vm.4S 0 <= lane <= 3 |
SQDMULL Vd.2D,Vn.2S,Vm.S[lane] |
Vd.2D -> result |
A64 |
int32_t vqdmullh_laneq_s16( |
a -> Hn v -> Vm.8H 0 <= lane <= 7 |
SQDMULL Sd,Hn,Vm.H[lane] |
Sd -> result |
A64 |
int64_t vqdmulls_laneq_s32( |
a -> Sn v -> Vm.4S 0 <= lane <= 3 |
SQDMULL Dd,Sn,Vm.S[lane] |
Dd -> result |
A64 |
int32x4_t vqdmull_high_laneq_s16( |
a -> Vn.8H v -> Vm.8H 0 <= lane <= 7 |
SQDMULL2 Vd.4S,Vn.8H,Vm.H[lane] |
Vd.4S -> result |
A64 |
int64x2_t vqdmull_high_laneq_s32( |
a -> Vn.4S v -> Vm.4S 0 <= lane <= 3 |
SQDMULL2 Vd.2D,Vn.4S,Vm.S[lane] |
Vd.2D -> result |
A64 |
int16x4_t vqdmulh_n_s16( |
a -> Vn.4H b -> Vm.H[0] |
SQDMULH Vd.4H,Vn.4H,Vm.H[0] |
Vd.4H -> result |
v7/A32/A64 |
int16x8_t vqdmulhq_n_s16( |
a -> Vn.8H b -> Vm.H[0] |
SQDMULH Vd.8H,Vn.8H,Vm.H[0] |
Vd.8H -> result |
v7/A32/A64 |
int32x2_t vqdmulh_n_s32( |
a -> Vn.2S b -> Vm.S[0] |
SQDMULH Vd.2S,Vn.2S,Vm.S[0] |
Vd.2S -> result |
v7/A32/A64 |
int32x4_t vqdmulhq_n_s32( |
a -> Vn.4S b -> Vm.S[0] |
SQDMULH Vd.4S,Vn.4S,Vm.S[0] |
Vd.4S -> result |
v7/A32/A64 |
int16x4_t vqdmulh_lane_s16( |
a -> Vn.4H v -> Vm.4H 0 <= lane <= 3 |
SQDMULH Vd.4H,Vn.4H,Vm.H[lane] |
Vd.4H -> result |
v7/A32/A64 |
int16x8_t vqdmulhq_lane_s16( |
a -> Vn.8H v -> Vm.4H 0 <= lane <= 3 |
SQDMULH Vd.8H,Vn.8H,Vm.H[lane] |
Vd.8H -> result |
v7/A32/A64 |
int32x2_t vqdmulh_lane_s32( |
a -> Vn.2S v -> Vm.2S 0 <= lane <= 1 |
SQDMULH Vd.2S,Vn.2S,Vm.S[lane] |
Vd.2S -> result |
v7/A32/A64 |
int32x4_t vqdmulhq_lane_s32( |
a -> Vn.4S v -> Vm.2S 0 <= lane <= 1 |
SQDMULH Vd.4S,Vn.4S,Vm.S[lane] |
Vd.4S -> result |
v7/A32/A64 |
int16_t vqdmulhh_lane_s16( |
a -> Hn v -> Vm.4H 0 <= lane <= 3 |
SQDMULH Hd,Hn,Vm.H[lane] |
Hd -> result |
A64 |
int32_t vqdmulhs_lane_s32( |
a -> Sn v -> Vm.2S 0 <= lane <= 1 |
SQDMULH Sd,Sn,Vm.H[lane] |
Sd -> result |
A64 |
int16x4_t vqdmulh_laneq_s16( |
a -> Vn.4H v -> Vm.8H 0 <= lane <= 7 |
SQDMULH Vd.4H,Vn.4H,Vm.H[lane] |
Vd.4H -> result |
A64 |
int16x8_t vqdmulhq_laneq_s16( |
a -> Vn.8H v -> Vm.8H 0 <= lane <= 7 |
SQDMULH Vd.8H,Vn.8H,Vm.H[lane] |
Vd.8H -> result |
A64 |
int32x2_t vqdmulh_laneq_s32( |
a -> Vn.2S v -> Vm.4S 0 <= lane <= 3 |
SQDMULH Vd.2S,Vn.2S,Vm.S[lane] |
Vd.2S -> result |
A64 |
int32x4_t vqdmulhq_laneq_s32( |
a -> Vn.4S v -> Vm.4S 0 <= lane <= 3 |
SQDMULH Vd.4S,Vn.4S,Vm.S[lane] |
Vd.4S -> result |
A64 |
int16_t vqdmulhh_laneq_s16( |
a -> Hn v -> Vm.8H 0 <= lane <= 7 |
SQDMULH Hd,Hn,Vm.H[lane] |
Hd -> result |
A64 |
int32_t vqdmulhs_laneq_s32( |
a -> Sn v -> Vm.4S 0 <= lane <= 3 |
SQDMULH Sd,Sn,Vm.H[lane] |
Sd -> result |
A64 |
int16x4_t vqrdmulh_n_s16( |
a -> Vn.4H b -> Vm.H[0] |
SQRDMULH Vd.4H,Vn.4H,Vm.H[0] |
Vd.4H -> result |
v7/A32/A64 |
int16x8_t vqrdmulhq_n_s16( |
a -> Vn.8H b -> Vm.H[0] |
SQRDMULH Vd.8H,Vn.8H,Vm.H[0] |
Vd.8H -> result |
v7/A32/A64 |
int32x2_t vqrdmulh_n_s32( |
a -> Vn.2S b -> Vm.S[0] |
SQRDMULH Vd.2S,Vn.2S,Vm.S[0] |
Vd.2S -> result |
v7/A32/A64 |
int32x4_t vqrdmulhq_n_s32( |
a -> Vn.4S b -> Vm.S[0] |
SQRDMULH Vd.4S,Vn.4S,Vm.S[0] |
Vd.4S -> result |
v7/A32/A64 |
int16x4_t vqrdmulh_lane_s16( |
a -> Vn.4H v -> Vm.4H 0 <= lane <= 3 |
SQRDMULH Vd.4H,Vn.4H,Vm.H[lane] |
Vd.4H -> result |
v7/A32/A64 |
int16x8_t vqrdmulhq_lane_s16( |
a -> Vn.8H v -> Vm.4H 0 <= lane <= 3 |
SQRDMULH Vd.8H,Vn.8H,Vm.H[lane] |
Vd.8H -> result |
v7/A32/A64 |
int32x2_t vqrdmulh_lane_s32( |
a -> Vn.2S v -> Vm.2S 0 <= lane <= 1 |
SQRDMULH Vd.2S,Vn.2S,Vm.S[lane] |
Vd.2S -> result |
v7/A32/A64 |
int32x4_t vqrdmulhq_lane_s32( |
a -> Vn.4S v -> Vm.2S 0 <= lane <= 1 |
SQRDMULH Vd.4S,Vn.4S,Vm.S[lane] |
Vd.4S -> result |
v7/A32/A64 |
int16_t vqrdmulhh_lane_s16( |
a -> Hn v -> Vm.4H 0 <= lane <= 3 |
SQRDMULH Hd,Hn,Vm.H[lane] |
Hd -> result |
A64 |
int32_t vqrdmulhs_lane_s32( |
a -> Sn v -> Vm.2S 0 <= lane <= 1 |
SQRDMULH Sd,Sn,Vm.S[lane] |
Sd -> result |
A64 |
int16x4_t vqrdmulh_laneq_s16( |
a -> Vn.4H v -> Vm.8H 0 <= lane <= 7 |
SQRDMULH Vd.4H,Vn.4H,Vm.H[lane] |
Vd.4H -> result |
A64 |
int16x8_t vqrdmulhq_laneq_s16( |
a -> Vn.8H v -> Vm.8H 0 <= lane <= 7 |
SQRDMULH Vd.8H,Vn.8H,Vm.H[lane] |
Vd.8H -> result |
A64 |
int32x2_t vqrdmulh_laneq_s32( |
a -> Vn.2S v -> Vm.4S 0 <= lane <= 3 |
SQRDMULH Vd.2S,Vn.2S,Vm.S[lane] |
Vd.2S -> result |
A64 |
int32x4_t vqrdmulhq_laneq_s32( |
a -> Vn.4S v -> Vm.4S 0 <= lane <= 3 |
SQRDMULH Vd.4S,Vn.4S,Vm.S[lane] |
Vd.4S -> result |
A64 |
int16_t vqrdmulhh_laneq_s16( |
a -> Hn v -> Vm.8H 0 <= lane <= 7 |
SQRDMULH Hd,Hn,Vm.H[lane] |
Hd -> result |
A64 |
int32_t vqrdmulhs_laneq_s32( |
a -> Sn v -> Vm.4S 0 <= lane <= 3 |
SQRDMULH Sd,Sn,Vm.S[lane] |
Sd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int32x4_t vqdmlal_n_s16( |
a -> Vd.4S b -> Vn.4H c -> Vm.H[0] |
SQDMLAL Vd.4S,Vn.4H,Vm.H[0] |
Vd.4S -> result |
v7/A32/A64 |
int64x2_t vqdmlal_n_s32( |
a -> Vd.2D b -> Vn.2S c -> Vm.S[0] |
SQDMLAL Vd.2D,Vn.2S,Vm.S[0] |
Vd.2D -> result |
v7/A32/A64 |
int32x4_t vqdmlal_high_n_s16( |
a -> Vd.4S b -> Vn.8H c -> Vm.H[0] |
SQDMLAL2 Vd.4S,Vn.8H,Vm.H[0] |
Vd.4S -> result |
A64 |
int64x2_t vqdmlal_high_n_s32( |
a -> Vd.2D b -> Vn.4S c -> Vm.S[0] |
SQDMLAL2 Vd.2D,Vn.4S,Vm.S[0] |
Vd.2D -> result |
A64 |
int32x4_t vqdmlsl_n_s16( |
a -> Vd.4S b -> Vn.4H c -> Vm.H[0] |
SQDMLSL Vd.4S,Vn.4H,Vm.H[0] |
Vd.4S -> result |
v7/A32/A64 |
int64x2_t vqdmlsl_n_s32( |
a -> Vd.2D b -> Vn.2S c -> Vm.S[0] |
SQDMLSL Vd.2D,Vn.2S,Vm.S[0] |
Vd.2D -> result |
v7/A32/A64 |
int32x4_t vqdmlsl_high_n_s16( |
a -> Vd.4S b -> Vn.8H c -> Vm.H[0] |
SQDMLSL2 Vd.4S,Vn.8H,Vm.H[0] |
Vd.4S -> result |
A64 |
int64x2_t vqdmlsl_high_n_s32( |
a -> Vd.2D b -> Vn.4S c -> Vm.S[0] |
SQDMLSL2 Vd.2D,Vn.4S,Vm.S[0] |
Vd.2D -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
poly8x8_t vmul_p8( |
a -> Vn.8B b -> Vm.8B |
PMUL Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
poly8x16_t vmulq_p8( |
a -> Vn.16B b -> Vm.16B |
PMUL Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
poly16x8_t vmull_p8( |
a -> Vn.8B b -> Vm.8B |
PMULL Vd.8H,Vn.8B,Vm.8B |
Vd.8H -> result |
v7/A32/A64 |
poly16x8_t vmull_high_p8( |
a -> Vn.16B b -> Vm.16B |
PMULL2 Vd.8H,Vn.16B,Vm.16B |
Vd.8H -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
float32x2_t vdiv_f32( |
a -> Vn.2S b -> Vm.2S |
FDIV Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
A64 |
float32x4_t vdivq_f32( |
a -> Vn.4S b -> Vm.4S |
FDIV Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
A64 |
float64x1_t vdiv_f64( |
a -> Dn b -> Dm |
FDIV Dd,Dn,Dm |
Dd -> result |
A64 |
float64x2_t vdivq_f64( |
a -> Vn.2D b -> Vm.2D |
FDIV Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int8x8_t vsub_s8( |
a -> Vn.8B b -> Vm.8B |
SUB Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
int8x16_t vsubq_s8( |
a -> Vn.16B b -> Vm.16B |
SUB Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
int16x4_t vsub_s16( |
a -> Vn.4H b -> Vm.4H |
SUB Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
int16x8_t vsubq_s16( |
a -> Vn.8H b -> Vm.8H |
SUB Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
int32x2_t vsub_s32( |
a -> Vn.2S b -> Vm.2S |
SUB Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
int32x4_t vsubq_s32( |
a -> Vn.4S b -> Vm.4S |
SUB Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
int64x1_t vsub_s64( |
a -> Dn b -> Dm |
SUB Dd,Dn,Dm |
Dd -> result |
v7/A32/A64 |
int64x2_t vsubq_s64( |
a -> Vn.2D b -> Vm.2D |
SUB Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
v7/A32/A64 |
uint8x8_t vsub_u8( |
a -> Vn.8B b -> Vm.8B |
SUB Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vsubq_u8( |
a -> Vn.16B b -> Vm.16B |
SUB Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vsub_u16( |
a -> Vn.4H b -> Vm.4H |
SUB Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vsubq_u16( |
a -> Vn.8H b -> Vm.8H |
SUB Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vsub_u32( |
a -> Vn.2S b -> Vm.2S |
SUB Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vsubq_u32( |
a -> Vn.4S b -> Vm.4S |
SUB Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
uint64x1_t vsub_u64( |
a -> Dn b -> Dm |
SUB Dd,Dn,Dm |
Dd -> result |
v7/A32/A64 |
uint64x2_t vsubq_u64( |
a -> Vn.2D b -> Vm.2D |
SUB Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
v7/A32/A64 |
float32x2_t vsub_f32( |
a -> Vn.2S b -> Vm.2S |
FSUB Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
float32x4_t vsubq_f32( |
a -> Vn.4S b -> Vm.4S |
FSUB Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
float64x1_t vsub_f64( |
a -> Dn b -> Dm |
FSUB Dd,Dn,Dm |
Dd -> result |
A64 |
float64x2_t vsubq_f64( |
a -> Vn.2D b -> Vm.2D |
FSUB Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
A64 |
int64_t vsubd_s64( |
a -> Dn b -> Dm |
SUB Dd,Dn,Dm |
Dd -> result |
A64 |
uint64_t vsubd_u64( |
a -> Dn b -> Dm |
SUB Dd,Dn,Dm |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int16x8_t vsubl_s8( |
a -> Vn.8B b -> Vm.8B |
SSUBL Vd.8H,Vn.8B,Vm.8B |
Vd.8H -> result |
v7/A32/A64 |
int32x4_t vsubl_s16( |
a -> Vn.4H b -> Vm.4H |
SSUBL Vd.4S,Vn.4H,Vm.4H |
Vd.4S -> result |
v7/A32/A64 |
int64x2_t vsubl_s32( |
a -> Vn.2S b -> Vm.2S |
SSUBL Vd.2D,Vn.2S,Vm.2S |
Vd.2D -> result |
v7/A32/A64 |
uint16x8_t vsubl_u8( |
a -> Vn.8B b -> Vm.8B |
USUBL Vd.8H,Vn.8B,Vm.8B |
Vd.8H -> result |
v7/A32/A64 |
uint32x4_t vsubl_u16( |
a -> Vn.4H b -> Vm.4H |
USUBL Vd.4S,Vn.4H,Vm.4H |
Vd.4S -> result |
v7/A32/A64 |
uint64x2_t vsubl_u32( |
a -> Vn.2S b -> Vm.2S |
USUBL Vd.2D,Vn.2S,Vm.2S |
Vd.2D -> result |
v7/A32/A64 |
int16x8_t vsubl_high_s8( |
a -> Vn.16B b -> Vm.16B |
SSUBL2 Vd.8H,Vn.16B,Vm.16B |
Vd.8H -> result |
A64 |
int32x4_t vsubl_high_s16( |
a -> Vn.8H b -> Vm.8H |
SSUBL2 Vd.4S,Vn.8H,Vm.8H |
Vd.4S -> result |
A64 |
int64x2_t vsubl_high_s32( |
a -> Vn.4S b -> Vm.4S |
SSUBL2 Vd.2D,Vn.4S,Vm.4S |
Vd.2D -> result |
A64 |
uint16x8_t vsubl_high_u8( |
a -> Vn.16B b -> Vm.16B |
USUBL2 Vd.8H,Vn.16B,Vm.16B |
Vd.8H -> result |
A64 |
uint32x4_t vsubl_high_u16( |
a -> Vn.8H b -> Vm.8H |
USUBL2 Vd.4S,Vn.8H,Vm.8H |
Vd.4S -> result |
A64 |
uint64x2_t vsubl_high_u32( |
a -> Vn.4S b -> Vm.4S |
USUBL2 Vd.2D,Vn.4S,Vm.4S |
Vd.2D -> result |
A64 |
int16x8_t vsubw_s8( |
a -> Vn.8H b -> Vm.8B |
SSUBW Vd.8H,Vn.8H,Vm.8B |
Vd.8H -> result |
v7/A32/A64 |
int32x4_t vsubw_s16( |
a -> Vn.4S b -> Vm.4H |
SSUBW Vd.4S,Vn.4S,Vm.4H |
Vd.4S -> result |
v7/A32/A64 |
int64x2_t vsubw_s32( |
a -> Vn.2D b -> Vm.2S |
SSUBW Vd.2D,Vn.2D,Vm.2S |
Vd.2D -> result |
v7/A32/A64 |
uint16x8_t vsubw_u8( |
a -> Vn.8H b -> Vm.8B |
USUBW Vd.8H,Vn.8H,Vm.8B |
Vd.8H -> result |
v7/A32/A64 |
uint32x4_t vsubw_u16( |
a -> Vn.4S b -> Vm.4H |
USUBW Vd.4S,Vn.4S,Vm.4H |
Vd.4S -> result |
v7/A32/A64 |
uint64x2_t vsubw_u32( |
a -> Vn.2D b -> Vm.2S |
USUBW Vd.2D,Vn.2D,Vm.2S |
Vd.2D -> result |
v7/A32/A64 |
int16x8_t vsubw_high_s8( |
a -> Vn.8H b -> Vm.16B |
SSUBW2 Vd.8H,Vn.8H,Vm.16B |
Vd.8H -> result |
A64 |
int32x4_t vsubw_high_s16( |
a -> Vn.4S b -> Vm.8H |
SSUBW2 Vd.4S,Vn.4S,Vm.8H |
Vd.4S -> result |
A64 |
int64x2_t vsubw_high_s32( |
a -> Vn.2D b -> Vm.4S |
SSUBW2 Vd.2D,Vn.2D,Vm.4S |
Vd.2D -> result |
A64 |
uint16x8_t vsubw_high_u8( |
a -> Vn.8H b -> Vm.16B |
USUBW2 Vd.8H,Vn.8H,Vm.16B |
Vd.8H -> result |
A64 |
uint32x4_t vsubw_high_u16( |
a -> Vn.4S b -> Vm.8H |
USUBW2 Vd.4S,Vn.4S,Vm.8H |
Vd.4S -> result |
A64 |
uint64x2_t vsubw_high_u32( |
a -> Vn.2D b -> Vm.4S |
USUBW2 Vd.2D,Vn.2D,Vm.4S |
Vd.2D -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int8x8_t vhsub_s8( |
a -> Vn.8B b -> Vm.8B |
SHSUB Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
int8x16_t vhsubq_s8( |
a -> Vn.16B b -> Vm.16B |
SHSUB Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
int16x4_t vhsub_s16( |
a -> Vn.4H b -> Vm.4H |
SHSUB Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
int16x8_t vhsubq_s16( |
a -> Vn.8H b -> Vm.8H |
SHSUB Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
int32x2_t vhsub_s32( |
a -> Vn.2S b -> Vm.2S |
SHSUB Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
int32x4_t vhsubq_s32( |
a -> Vn.4S b -> Vm.4S |
SHSUB Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
uint8x8_t vhsub_u8( |
a -> Vn.8B b -> Vm.8B |
UHSUB Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vhsubq_u8( |
a -> Vn.16B b -> Vm.16B |
UHSUB Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vhsub_u16( |
a -> Vn.4H b -> Vm.4H |
UHSUB Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vhsubq_u16( |
a -> Vn.8H b -> Vm.8H |
UHSUB Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vhsub_u32( |
a -> Vn.2S b -> Vm.2S |
UHSUB Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vhsubq_u32( |
a -> Vn.4S b -> Vm.4S |
UHSUB Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
int8x8_t vsubhn_s16( |
a -> Vn.8H b -> Vm.8H |
SUBHN Vd.8B,Vn.8H,Vm.8H |
Vd.8B -> result |
v7/A32/A64 |
int16x4_t vsubhn_s32( |
a -> Vn.4S b -> Vm.4S |
SUBHN Vd.4H,Vn.4S,Vm.4S |
Vd.4H -> result |
v7/A32/A64 |
int32x2_t vsubhn_s64( |
a -> Vn.2D b -> Vm.2D |
SUBHN Vd.2S,Vn.2D,Vm.2D |
Vd.2S -> result |
v7/A32/A64 |
uint8x8_t vsubhn_u16( |
a -> Vn.8H b -> Vm.8H |
SUBHN Vd.8B,Vn.8H,Vm.8H |
Vd.8B -> result |
v7/A32/A64 |
uint16x4_t vsubhn_u32( |
a -> Vn.4S b -> Vm.4S |
SUBHN Vd.4H,Vn.4S,Vm.4S |
Vd.4H -> result |
v7/A32/A64 |
uint32x2_t vsubhn_u64( |
a -> Vn.2D b -> Vm.2D |
SUBHN Vd.2S,Vn.2D,Vm.2D |
Vd.2S -> result |
v7/A32/A64 |
int8x16_t vsubhn_high_s16( |
r -> Vd.8B a -> Vn.8H b -> Vm.8H |
SUBHN2 Vd.16B,Vn.8H,Vm.8H |
Vd.16B -> result |
A64 |
int16x8_t vsubhn_high_s32( |
r -> Vd.4H a -> Vn.4S b -> Vm.4S |
SUBHN2 Vd.8H,Vn.4S,Vm.4S |
Vd.8H -> result |
A64 |
int32x4_t vsubhn_high_s64( |
r -> Vd.2S a -> Vn.2D b -> Vm.2D |
SUBHN2 Vd.4S,Vn.2D,Vm.2D |
Vd.4S -> result |
A64 |
uint8x16_t vsubhn_high_u16( |
r -> Vd.8B a -> Vn.8H b -> Vm.8H |
SUBHN2 Vd.16B,Vn.8H,Vm.8H |
Vd.16B -> result |
A64 |
uint16x8_t vsubhn_high_u32( |
r -> Vd.4H a -> Vn.4S b -> Vm.4S |
SUBHN2 Vd.8H,Vn.4S,Vm.4S |
Vd.8H -> result |
A64 |
uint32x4_t vsubhn_high_u64( |
r -> Vd.2S a -> Vn.2D b -> Vm.2D |
SUBHN2 Vd.4S,Vn.2D,Vm.2D |
Vd.4S -> result |
A64 |
int8x8_t vrsubhn_s16( |
a -> Vn.8H b -> Vm.8H |
RSUBHN Vd.8B,Vn.8H,Vm.8H |
Vd.8B -> result |
v7/A32/A64 |
int16x4_t vrsubhn_s32( |
a -> Vn.4S b -> Vm.4S |
RSUBHN Vd.4H,Vn.4S,Vm.4S |
Vd.4H -> result |
v7/A32/A64 |
int32x2_t vrsubhn_s64( |
a -> Vn.2D b -> Vm.2D |
RSUBHN Vd.2S,Vn.2D,Vm.2D |
Vd.2S -> result |
v7/A32/A64 |
uint8x8_t vrsubhn_u16( |
a -> Vn.8H b -> Vm.8H |
RSUBHN Vd.8B,Vn.8H,Vm.8H |
Vd.8B -> result |
v7/A32/A64 |
uint16x4_t vrsubhn_u32( |
a -> Vn.4S b -> Vm.4S |
RSUBHN Vd.4H,Vn.4S,Vm.4S |
Vd.4H -> result |
v7/A32/A64 |
uint32x2_t vrsubhn_u64( |
a -> Vn.2D b -> Vm.2D |
RSUBHN Vd.2S,Vn.2D,Vm.2D |
Vd.2S -> result |
v7/A32/A64 |
int8x16_t vrsubhn_high_s16( |
r -> Vd.8B a -> Vn.8H b -> Vm.8H |
RSUBHN2 Vd.16B,Vn.8H,Vm.8H |
Vd.16B -> result |
A64 |
int16x8_t vrsubhn_high_s32( |
r -> Vd.4H a -> Vn.4S b -> Vm.4S |
RSUBHN2 Vd.8H,Vn.4S,Vm.4S |
Vd.8H -> result |
A64 |
int32x4_t vrsubhn_high_s64( |
r -> Vd.2S a -> Vn.2D b -> Vm.2D |
RSUBHN2 Vd.4S,Vn.2D,Vm.2D |
Vd.4S -> result |
A64 |
uint8x16_t vrsubhn_high_u16( |
r -> Vd.8B a -> Vn.8H b -> Vm.8H |
RSUBHN2 Vd.16B,Vn.8H,Vm.8H |
Vd.16B -> result |
A64 |
uint16x8_t vrsubhn_high_u32( |
r -> Vd.4H a -> Vn.4S b -> Vm.4S |
RSUBHN2 Vd.8H,Vn.4S,Vm.4S |
Vd.8H -> result |
A64 |
uint32x4_t vrsubhn_high_u64( |
r -> Vd.2S a -> Vn.2D b -> Vm.2D |
RSUBHN2 Vd.4S,Vn.2D,Vm.2D |
Vd.4S -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int8x8_t vqsub_s8( |
a -> Vn.8B b -> Vm.8B |
SQSUB Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
int8x16_t vqsubq_s8( |
a -> Vn.16B b -> Vm.16B |
SQSUB Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
int16x4_t vqsub_s16( |
a -> Vn.4H b -> Vm.4H |
SQSUB Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
int16x8_t vqsubq_s16( |
a -> Vn.8H b -> Vm.8H |
SQSUB Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
int32x2_t vqsub_s32( |
a -> Vn.2S b -> Vm.2S |
SQSUB Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
int32x4_t vqsubq_s32( |
a -> Vn.4S b -> Vm.4S |
SQSUB Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
int64x1_t vqsub_s64( |
a -> Dn b -> Dm |
SQSUB Dd,Dn,Dm |
Dd -> result |
v7/A32/A64 |
int64x2_t vqsubq_s64( |
a -> Vn.2D b -> Vm.2D |
SQSUB Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
v7/A32/A64 |
uint8x8_t vqsub_u8( |
a -> Vn.8B b -> Vm.8B |
UQSUB Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vqsubq_u8( |
a -> Vn.16B b -> Vm.16B |
UQSUB Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vqsub_u16( |
a -> Vn.4H b -> Vm.4H |
UQSUB Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vqsubq_u16( |
a -> Vn.8H b -> Vm.8H |
UQSUB Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vqsub_u32( |
a -> Vn.2S b -> Vm.2S |
UQSUB Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vqsubq_u32( |
a -> Vn.4S b -> Vm.4S |
UQSUB Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
uint64x1_t vqsub_u64( |
a -> Dn b -> Dm |
UQSUB Dd,Dn,Dm |
Dd -> result |
v7/A32/A64 |
uint64x2_t vqsubq_u64( |
a -> Vn.2D b -> Vm.2D |
UQSUB Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
v7/A32/A64 |
int8_t vqsubb_s8( |
a -> Bn b -> Bm |
SQSUB Bd,Bn,Bm |
Bd -> result |
A64 |
int16_t vqsubh_s16( |
a -> Hn b -> Hm |
SQSUB Hd,Hn,Hm |
Hd -> result |
A64 |
int32_t vqsubs_s32( |
a -> Sn b -> Sm |
SQSUB Sd,Sn,Sm |
Sd -> result |
A64 |
int64_t vqsubd_s64( |
a -> Dn b -> Dm |
SQSUB Dd,Dn,Dm |
Dd -> result |
A64 |
uint8_t vqsubb_u8( |
a -> Bn b -> Bm |
UQSUB Bd,Bn,Bm |
Bd -> result |
A64 |
uint16_t vqsubh_u16( |
a -> Hn b -> Hm |
UQSUB Hd,Hn,Hm |
Hd -> result |
A64 |
uint32_t vqsubs_u32( |
a -> Sn b -> Sm |
UQSUB Sd,Sn,Sm |
Sd -> result |
A64 |
uint64_t vqsubd_u64( |
a -> Dn b -> Dm |
UQSUB Dd,Dn,Dm |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int8x8_t vabd_s8( |
a -> Vn.8B b -> Vm.8B |
SABD Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
int8x16_t vabdq_s8( |
a -> Vn.16B b -> Vm.16B |
SABD Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
int16x4_t vabd_s16( |
a -> Vn.4H b -> Vm.4H |
SABD Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
int16x8_t vabdq_s16( |
a -> Vn.8H b -> Vm.8H |
SABD Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
int32x2_t vabd_s32( |
a -> Vn.2S b -> Vm.2S |
SABD Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
int32x4_t vabdq_s32( |
a -> Vn.4S b -> Vm.4S |
SABD Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
uint8x8_t vabd_u8( |
a -> Vn.8B b -> Vm.8B |
UABD Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vabdq_u8( |
a -> Vn.16B b -> Vm.16B |
UABD Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vabd_u16( |
a -> Vn.4H b -> Vm.4H |
UABD Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vabdq_u16( |
a -> Vn.8H b -> Vm.8H |
UABD Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vabd_u32( |
a -> Vn.2S b -> Vm.2S |
UABD Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vabdq_u32( |
a -> Vn.4S b -> Vm.4S |
UABD Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
float32x2_t vabd_f32( |
a -> Vn.2S b -> Vm.2S |
FABD Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
float32x4_t vabdq_f32( |
a -> Vn.4S b -> Vm.4S |
FABD Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
float64x1_t vabd_f64( |
a -> Dn b -> Dm |
FABD Dd,Dn,Dm |
Dd -> result |
A64 |
float64x2_t vabdq_f64( |
a -> Vn.2D b -> Vm.2D |
FABD Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
A64 |
float32_t vabds_f32( |
a -> Sn b -> Sm |
FABD Sd,Sn,Sm |
Sd -> result |
A64 |
float64_t vabdd_f64( |
a -> Dn b -> Dm |
FABD Dd,Dn,Dm |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int16x8_t vabdl_s8( |
a -> Vn.8B b -> Vm.8B |
SABDL Vd.8H,Vn.8B,Vm.8B |
Vd.8H -> result |
v7/A32/A64 |
int32x4_t vabdl_s16( |
a -> Vn.4H b -> Vm.4H |
SABDL Vd.4S,Vn.4H,Vm.4H |
Vd.4S -> result |
v7/A32/A64 |
int64x2_t vabdl_s32( |
a -> Vn.2S b -> Vm.2S |
SABDL Vd.2D,Vn.2S,Vm.2S |
Vd.2D -> result |
v7/A32/A64 |
uint16x8_t vabdl_u8( |
a -> Vn.8B b -> Vm.8B |
UABDL Vd.8H,Vn.8B,Vm.8B |
Vd.8H -> result |
v7/A32/A64 |
uint32x4_t vabdl_u16( |
a -> Vn.4H b -> Vm.4H |
UABDL Vd.4S,Vn.4H,Vm.4H |
Vd.4S -> result |
v7/A32/A64 |
uint64x2_t vabdl_u32( |
a -> Vn.2S b -> Vm.2S |
UABDL Vd.2D,Vn.2S,Vm.2S |
Vd.2D -> result |
v7/A32/A64 |
int16x8_t vabdl_high_s8( |
a -> Vn.16B b -> Vm.16B |
SABDL2 Vd.8H,Vn.16B,Vm.16B |
Vd.8H -> result |
A64 |
int32x4_t vabdl_high_s16( |
a -> Vn.8H b -> Vm.8H |
SABDL2 Vd.4S,Vn.8H,Vm.8H |
Vd.4S -> result |
A64 |
int64x2_t vabdl_high_s32( |
a -> Vn.4S b -> Vm.4S |
SABDL2 Vd.2D,Vn.4S,Vm.4S |
Vd.2D -> result |
A64 |
uint16x8_t vabdl_high_u8( |
a -> Vn.16B b -> Vm.16B |
UABDL2 Vd.8H,Vn.16B,Vm.16B |
Vd.8H -> result |
A64 |
uint32x4_t vabdl_high_u16( |
a -> Vn.8H b -> Vm.8H |
UABDL2 Vd.4S,Vn.8H,Vm.8H |
Vd.4S -> result |
A64 |
uint64x2_t vabdl_high_u32( |
a -> Vn.4S b -> Vm.4S |
UABDL2 Vd.2D,Vn.4S,Vm.4S |
Vd.2D -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int8x8_t vaba_s8( |
a -> Vd.8B b -> Vn.8B c -> Vm.8B |
SABA Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
int8x16_t vabaq_s8( |
a -> Vd.16B b -> Vn.16B c -> Vm.16B |
SABA Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
int16x4_t vaba_s16( |
a -> Vd.4H b -> Vn.4H c -> Vm.4H |
SABA Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
int16x8_t vabaq_s16( |
a -> Vd.8H b -> Vn.8H c -> Vm.8H |
SABA Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
int32x2_t vaba_s32( |
a -> Vd.2S b -> Vn.2S c -> Vm.2S |
SABA Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
int32x4_t vabaq_s32( |
a -> Vd.4S b -> Vn.4S c -> Vm.4S |
SABA Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
uint8x8_t vaba_u8( |
a -> Vd.8B b -> Vn.8B c -> Vm.8B |
UABA Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vabaq_u8( |
a -> Vd.16B b -> Vn.16B c -> Vm.16B |
UABA Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vaba_u16( |
a -> Vd.4H b -> Vn.4H c -> Vm.4H |
UABA Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vabaq_u16( |
a -> Vd.8H b -> Vn.8H c -> Vm.8H |
UABA Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vaba_u32( |
a -> Vd.2S b -> Vn.2S c -> Vm.2S |
UABA Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vabaq_u32( |
a -> Vd.4S b -> Vn.4S c -> Vm.4S |
UABA Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int16x8_t vabal_s8( |
a -> Vd.8H b -> Vn.8B c -> Vm.8B |
SABAL Vd.8H,Vn.8B,Vm.8B |
Vd.8H -> result |
v7/A32/A64 |
int32x4_t vabal_s16( |
a -> Vd.4S b -> Vn.4H c -> Vm.4H |
SABAL Vd.4S,Vn.4H,Vm.4H |
Vd.4S -> result |
v7/A32/A64 |
int64x2_t vabal_s32( |
a -> Vd.2D b -> Vn.2S c -> Vm.2S |
SABAL Vd.2D,Vn.2S,Vm.2S |
Vd.2D -> result |
v7/A32/A64 |
uint16x8_t vabal_u8( |
a -> Vd.8H b -> Vn.8B c -> Vm.8B |
UABAL Vd.8H,Vn.8B,Vm.8B |
Vd.8H -> result |
v7/A32/A64 |
uint32x4_t vabal_u16( |
a -> Vd.4S b -> Vn.4H c -> Vm.4H |
UABAL Vd.4S,Vn.4H,Vm.4H |
Vd.4S -> result |
v7/A32/A64 |
uint64x2_t vabal_u32( |
a -> Vd.2D b -> Vn.2S c -> Vm.2S |
UABAL Vd.2D,Vn.2S,Vm.2S |
Vd.2D -> result |
v7/A32/A64 |
int16x8_t vabal_high_s8( |
a -> Vd.8H b -> Vn.16B c -> Vm.16B |
SABAL2 Vd.8H,Vn.16B,Vm.16B |
Vd.8H -> result |
A64 |
int32x4_t vabal_high_s16( |
a -> Vd.4S b -> Vn.8H c -> Vm.8H |
SABAL2 Vd.4S,Vn.8H,Vm.8H |
Vd.4S -> result |
A64 |
int64x2_t vabal_high_s32( |
a -> Vd.2D b -> Vn.4S c -> Vm.4S |
SABAL2 Vd.2D,Vn.4S,Vm.4S |
Vd.2D -> result |
A64 |
uint16x8_t vabal_high_u8( |
a -> Vd.8H b -> Vn.16B c -> Vm.16B |
UABAL2 Vd.8H,Vn.16B,Vm.16B |
Vd.8H -> result |
A64 |
uint32x4_t vabal_high_u16( |
a -> Vd.4S b -> Vn.8H c -> Vm.8H |
UABAL2 Vd.4S,Vn.8H,Vm.8H |
Vd.4S -> result |
A64 |
uint64x2_t vabal_high_u32( |
a -> Vd.2D b -> Vn.4S c -> Vm.4S |
UABAL2 Vd.2D,Vn.4S,Vm.4S |
Vd.2D -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int8x8_t vabs_s8(int8x8_t a) |
a -> Vn.8B |
ABS Vd.8B,Vn.8B |
Vd.8B -> result |
v7/A32/A64 |
int8x16_t vabsq_s8(int8x16_t a) |
a -> Vn.16B |
ABS Vd.16B,Vn.16B |
Vd.16B -> result |
v7/A32/A64 |
int16x4_t vabs_s16(int16x4_t a) |
a -> Vn.4H |
ABS Vd.4H,Vn.4H |
Vd.4H -> result |
v7/A32/A64 |
int16x8_t vabsq_s16(int16x8_t a) |
a -> Vn.8H |
ABS Vd.8H,Vn.8H |
Vd.8H -> result |
v7/A32/A64 |
int32x2_t vabs_s32(int32x2_t a) |
a -> Vn.2S |
ABS Vd.2S,Vn.2S |
Vd.2S -> result |
v7/A32/A64 |
int32x4_t vabsq_s32(int32x4_t a) |
a -> Vn.4S |
ABS Vd.4S,Vn.4S |
Vd.4S -> result |
v7/A32/A64 |
float32x2_t vabs_f32(float32x2_t a) |
a -> Vn.2S |
FABS Vd.2S,Vn.2S |
Vd.2S -> result |
v7/A32/A64 |
float32x4_t vabsq_f32(float32x4_t a) |
a -> Vn.4S |
FABS Vd.4S,Vn.4S |
Vd.4S -> result |
v7/A32/A64 |
int64x1_t vabs_s64(int64x1_t a) |
a -> Dn |
ABS Dd,Dn |
Dd -> result |
A64 |
int64_t vabsd_s64(int64_t a) |
a -> Dn |
ABS Dd,Dn |
Dd -> result |
A64 |
int64x2_t vabsq_s64(int64x2_t a) |
a -> Vn.2D |
ABS Vd.2D,Vn.2D |
Vd.2D -> result |
A64 |
float64x1_t vabs_f64(float64x1_t a) |
a -> Dn |
FABS Dd,Dn |
Dd -> result |
A64 |
float64x2_t vabsq_f64(float64x2_t a) |
a -> Vn.2D |
FABS Vd.2D,Vn.2D |
Vd.2D -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int8x8_t vqabs_s8(int8x8_t a) |
a -> Vn.8B |
SQABS Vd.8B,Vn.8B |
Vd.8B -> result |
v7/A32/A64 |
int8x16_t vqabsq_s8(int8x16_t a) |
a -> Vn.16B |
SQABS Vd.16B,Vn.16B |
Vd.16B -> result |
v7/A32/A64 |
int16x4_t vqabs_s16(int16x4_t a) |
a -> Vn.4H |
SQABS Vd.4H,Vn.4H |
Vd.4H -> result |
v7/A32/A64 |
int16x8_t vqabsq_s16(int16x8_t a) |
a -> Vn.8H |
SQABS Vd.8H,Vn.8H |
Vd.8H -> result |
v7/A32/A64 |
int32x2_t vqabs_s32(int32x2_t a) |
a -> Vn.2S |
SQABS Vd.2S,Vn.2S |
Vd.2S -> result |
v7/A32/A64 |
int32x4_t vqabsq_s32(int32x4_t a) |
a -> Vn.4S |
SQABS Vd.4S,Vn.4S |
Vd.4S -> result |
v7/A32/A64 |
int64x1_t vqabs_s64(int64x1_t a) |
a -> Dn |
SQABS Dd,Dn |
Dd -> result |
A64 |
int64x2_t vqabsq_s64(int64x2_t a) |
a -> Vn.2D |
SQABS Vd.2D,Vn.2D |
Vd.2D -> result |
A64 |
int8_t vqabsb_s8(int8_t a) |
a -> Bn |
SQABS Bd,Bn |
Bd -> result |
A64 |
int16_t vqabsh_s16(int16_t a) |
a -> Hn |
SQABS Hd,Hn |
Hd -> result |
A64 |
int32_t vqabss_s32(int32_t a) |
a -> Sn |
SQABS Sd,Sn |
Sd -> result |
A64 |
int64_t vqabsd_s64(int64_t a) |
a -> Dn |
SQABS Dd,Dn |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int8x8_t vmax_s8( |
a -> Vn.8B b -> Vm.8B |
SMAX Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
int8x16_t vmaxq_s8( |
a -> Vn.16B b -> Vm.16B |
SMAX Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
int16x4_t vmax_s16( |
a -> Vn.4H b -> Vm.4H |
SMAX Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
int16x8_t vmaxq_s16( |
a -> Vn.8H b -> Vm.8H |
SMAX Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
int32x2_t vmax_s32( |
a -> Vn.2S b -> Vm.2S |
SMAX Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
int32x4_t vmaxq_s32( |
a -> Vn.4S b -> Vm.4S |
SMAX Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
uint8x8_t vmax_u8( |
a -> Vn.8B b -> Vm.8B |
UMAX Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vmaxq_u8( |
a -> Vn.16B b -> Vm.16B |
UMAX Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vmax_u16( |
a -> Vn.4H b -> Vm.4H |
UMAX Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vmaxq_u16( |
a -> Vn.8H b -> Vm.8H |
UMAX Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vmax_u32( |
a -> Vn.2S b -> Vm.2S |
UMAX Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vmaxq_u32( |
a -> Vn.4S b -> Vm.4S |
UMAX Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
float32x2_t vmax_f32( |
a -> Vn.2S b -> Vm.2S |
FMAX Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
float32x4_t vmaxq_f32( |
a -> Vn.4S b -> Vm.4S |
FMAX Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
float64x1_t vmax_f64( |
a -> Dn b -> Dm |
FMAX Dd,Dn,Dm |
Dd -> result |
A64 |
float64x2_t vmaxq_f64( |
a -> Vn.2D b -> Vm.2D |
FMAX Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int8x8_t vmin_s8( |
a -> Vn.8B b -> Vm.8B |
SMIN Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
int8x16_t vminq_s8( |
a -> Vn.16B b -> Vm.16B |
SMIN Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
int16x4_t vmin_s16( |
a -> Vn.4H b -> Vm.4H |
SMIN Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
int16x8_t vminq_s16( |
a -> Vn.8H b -> Vm.8H |
SMIN Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
int32x2_t vmin_s32( |
a -> Vn.2S b -> Vm.2S |
SMIN Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
int32x4_t vminq_s32( |
a -> Vn.4S b -> Vm.4S |
SMIN Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
uint8x8_t vmin_u8( |
a -> Vn.8B b -> Vm.8B |
UMIN Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vminq_u8( |
a -> Vn.16B b -> Vm.16B |
UMIN Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vmin_u16( |
a -> Vn.4H b -> Vm.4H |
UMIN Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vminq_u16( |
a -> Vn.8H b -> Vm.8H |
UMIN Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vmin_u32( |
a -> Vn.2S b -> Vm.2S |
UMIN Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vminq_u32( |
a -> Vn.4S b -> Vm.4S |
UMIN Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
float32x2_t vmin_f32( |
a -> Vn.2S b -> Vm.2S |
FMIN Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
float32x4_t vminq_f32( |
a -> Vn.4S b -> Vm.4S |
FMIN Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
float64x1_t vmin_f64( |
a -> Dn b -> Dm |
FMIN Dd,Dn,Dm |
Dd -> result |
A64 |
float64x2_t vminq_f64( |
a -> Vn.2D b -> Vm.2D |
FMIN Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
A64 |
float32x2_t vmaxnm_f32( |
a -> Vn.2S b -> Vm.2S |
FMAXNM Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
A32/A64 |
float32x4_t vmaxnmq_f32( |
a -> Vn.4S b -> Vm.4S |
FMAXNM Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
A32/A64 |
float64x1_t vmaxnm_f64( |
a -> Dn b -> Dm |
FMAXNM Dd,Dn,Dm |
Dd -> result |
A64 |
float64x2_t vmaxnmq_f64( |
a -> Vn.2D b -> Vm.2D |
FMAXNM Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
A64 |
float32x2_t vminnm_f32( |
a -> Vn.2S b -> Vm.2S |
FMINNM Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
A32/A64 |
float32x4_t vminnmq_f32( |
a -> Vn.4S b -> Vm.4S |
FMINNM Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
A32/A64 |
float64x1_t vminnm_f64( |
a -> Dn b -> Dm |
FMINNM Dd,Dn,Dm |
Dd -> result |
A64 |
float64x2_t vminnmq_f64( |
a -> Vn.2D b -> Vm.2D |
FMINNM Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
float32x2_t vrnd_f32(float32x2_t a) |
a -> Vn.2S |
FRINTZ Vd.2S,Vn.2S |
Vd.2S -> result |
A32/A64 |
float32x4_t vrndq_f32(float32x4_t a) |
a -> Vn.4S |
FRINTZ Vd.4S,Vn.4S |
Vd.4S -> result |
A32/A64 |
float64x1_t vrnd_f64(float64x1_t a) |
a -> Dn |
FRINTZ Dd,Dn |
Dd -> result |
A64 |
float64x2_t vrndq_f64(float64x2_t a) |
a -> Vn.2D |
FRINTZ Vd.2D,Vn.2D |
Vd.2D -> result |
A64 |
float32x2_t vrndn_f32(float32x2_t a) |
a -> Vn.2S |
FRINTN Vd.2S,Vn.2S |
Vd.2S -> result |
A32/A64 |
float32x4_t vrndnq_f32(float32x4_t a) |
a -> Vn.4S |
FRINTN Vd.4S,Vn.4S |
Vd.4S -> result |
A32/A64 |
float64x1_t vrndn_f64(float64x1_t a) |
a -> Dn |
FRINTN Dd,Dn |
Dd -> result |
A32/A64 |
float64x2_t vrndnq_f64(float64x2_t a) |
a -> Vn.2D |
FRINTN Vd.2D,Vn.2D |
Vd.2D -> result |
A32/A64 |
float32_t vrndns_f32(float32_t a) |
a -> Sn |
FRINTN Sd,Sn |
Sd -> result |
A32/A64 |
float32x2_t vrndm_f32(float32x2_t a) |
a -> Vn.2S |
FRINTM Vd.2S,Vn.2S |
Vd.2S -> result |
A32/A64 |
float32x4_t vrndmq_f32(float32x4_t a) |
a -> Vn.4S |
FRINTM Vd.4S,Vn.4S |
Vd.4S -> result |
A32/A64 |
float64x1_t vrndm_f64(float64x1_t a) |
a -> Dn |
FRINTM Dd,Dn |
Dd -> result |
A64 |
float64x2_t vrndmq_f64(float64x2_t a) |
a -> Vn.2D |
FRINTM Vd.2D,Vn.2D |
Vd.2D -> result |
A64 |
float32x2_t vrndp_f32(float32x2_t a) |
a -> Vn.2S |
FRINTP Vd.2S,Vn.2S |
Vd.2S -> result |
A32/A64 |
float32x4_t vrndpq_f32(float32x4_t a) |
a -> Vn.4S |
FRINTP Vd.4S,Vn.4S |
Vd.4S -> result |
A32/A64 |
float64x1_t vrndp_f64(float64x1_t a) |
a -> Dn |
FRINTP Dd,Dn |
Dd -> result |
A64 |
float64x2_t vrndpq_f64(float64x2_t a) |
a -> Vn.2D |
FRINTP Vd.2D,Vn.2D |
Vd.2D -> result |
A64 |
float32x2_t vrnda_f32(float32x2_t a) |
a -> Vn.2S |
FRINTA Vd.2S,Vn.2S |
Vd.2S -> result |
A32/A64 |
float32x4_t vrndaq_f32(float32x4_t a) |
a -> Vn.4S |
FRINTA Vd.4S,Vn.4S |
Vd.4S -> result |
A32/A64 |
float64x1_t vrnda_f64(float64x1_t a) |
a -> Dn |
FRINTA Dd,Dn |
Dd -> result |
A64 |
float64x2_t vrndaq_f64(float64x2_t a) |
a -> Vn.2D |
FRINTA Vd.2D,Vn.2D |
Vd.2D -> result |
A64 |
float32x2_t vrndi_f32(float32x2_t a) |
a -> Vn.2S |
FRINTI Vd.2S,Vn.2S |
Vd.2S -> result |
A32/A64 |
float32x4_t vrndiq_f32(float32x4_t a) |
a -> Vn.4S |
FRINTI Vd.4S,Vn.4S |
Vd.4S -> result |
A32/A64 |
float64x1_t vrndi_f64(float64x1_t a) |
a -> Dn |
FRINTI Dd,Dn |
Dd -> result |
A64 |
float64x2_t vrndiq_f64(float64x2_t a) |
a -> Vn.2D |
FRINTI Vd.2D,Vn.2D |
Vd.2D -> result |
A64 |
float32x2_t vrndx_f32(float32x2_t a) |
a -> Vn.2S |
FRINTX Vd.2S,Vn.2S |
Vd.2S -> result |
A32/A64 |
float32x4_t vrndxq_f32(float32x4_t a) |
a -> Vn.4S |
FRINTX Vd.4S,Vn.4S |
Vd.4S -> result |
A32/A64 |
float64x1_t vrndx_f64(float64x1_t a) |
a -> Dn |
FRINTX Dd,Dn |
Dd -> result |
A64 |
float64x2_t vrndxq_f64(float64x2_t a) |
a -> Vn.2D |
FRINTX Vd.2D,Vn.2D |
Vd.2D -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
uint32x2_t vrecpe_u32(uint32x2_t a) |
a -> Vn.2S |
URECPE Vd.2S,Vn.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vrecpeq_u32(uint32x4_t a) |
a -> Vn.4S |
URECPE Vd.4S,Vn.4S |
Vd.4S -> result |
v7/A32/A64 |
float32x2_t vrecpe_f32(float32x2_t a) |
a -> Vn.2S |
FRECPE Vd.2S,Vn.2S |
Vd.2S -> result |
v7/A32/A64 |
float32x4_t vrecpeq_f32(float32x4_t a) |
a -> Vn.4S |
FRECPE Vd.4S,Vn.4S |
Vd.4S -> result |
v7/A32/A64 |
float64x1_t vrecpe_f64(float64x1_t a) |
a -> Dn |
FRECPE Dd,Dn |
Dd -> result |
A64 |
float64x2_t vrecpeq_f64(float64x2_t a) |
a -> Vn.2D |
FRECPE Vd.2D,Vn.2D |
Vd.2D -> result |
A64 |
float32_t vrecpes_f32(float32_t a) |
a -> Sn |
FRECPE Sd,Sn |
Sd -> result |
A64 |
float64_t vrecped_f64(float64_t a) |
a -> Dn |
FRECPE Dd,Dn |
Dd -> result |
A64 |
float32x2_t vrecps_f32( |
a -> Vn.2S b -> Vm.2S |
FRECPS Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
float32x4_t vrecpsq_f32( |
a -> Vn.4S b -> Vm.4S |
FRECPS Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
float64x1_t vrecps_f64( |
a -> Dn b -> Dm |
FRECPS Dd,Dn,Dm |
Dd -> result |
A64 |
float64x2_t vrecpsq_f64( |
a -> Vn.2D b -> Vm.2D |
FRECPS Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
A64 |
float32_t vrecpss_f32( |
a -> Sn b -> Sm |
FRECPS Sd,Sn,Sm |
Sd -> result |
A64 |
float64_t vrecpsd_f64( |
a -> Dn b -> Dm |
FRECPS Dd,Dn,Dm |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
uint32x2_t vrsqrte_u32(uint32x2_t a) |
a -> Vn.2S |
URSQRTE Vd.2S,Vn.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vrsqrteq_u32(uint32x4_t a) |
a -> Vn.4S |
URSQRTE Vd.4S,Vn.4S |
Vd.4S -> result |
v7/A32/A64 |
float32x2_t vrsqrte_f32(float32x2_t a) |
a -> Vn.2S |
FRSQRTE Vd.2S,Vn.2S |
Vd.2S -> result |
v7/A32/A64 |
float32x4_t vrsqrteq_f32(float32x4_t a) |
a -> Vn.4S |
FRSQRTE Vd.4S,Vn.4S |
Vd.4S -> result |
v7/A32/A64 |
float64x1_t vrsqrte_f64(float64x1_t a) |
a -> Dn |
FRSQRTE Dd,Dn |
Dd -> result |
A64 |
float64x2_t vrsqrteq_f64(float64x2_t a) |
a -> Vn.2D |
FRSQRTE Vd.2D,Vn.2D |
Vd.2D -> result |
A64 |
float32_t vrsqrtes_f32(float32_t a) |
a -> Sn |
FRSQRTE Sd,Sn |
Sd -> result |
A64 |
float64_t vrsqrted_f64(float64_t a) |
a -> Dn |
FRSQRTE Dd,Dn |
Dd -> result |
A64 |
float32x2_t vrsqrts_f32( |
a -> Vn.2S b -> Vm.2S |
FRSQRTS Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
float32x4_t vrsqrtsq_f32( |
a -> Vn.4S b -> Vm.4S |
FRSQRTS Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
float64x1_t vrsqrts_f64( |
a -> Dn b -> Dm |
FRSQRTS Dd,Dn,Dm |
Dd -> result |
A64 |
float64x2_t vrsqrtsq_f64( |
a -> Vn.2D b -> Vm.2D |
FRSQRTS Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
A64 |
float32_t vrsqrtss_f32( |
a -> Sn b -> Sm |
FRSQRTS Sd,Sn,Sm |
Sd -> result |
A64 |
float64_t vrsqrtsd_f64( |
a -> Dn b -> Dm |
FRSQRTS Dd,Dn,Dm |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
float32_t vrecpxs_f32(float32_t a) |
a -> Sn |
FRECPX Sd,Sn |
Sd -> result |
A64 |
float64_t vrecpxd_f64(float64_t a) |
a -> Dn |
FRECPX Dd,Dn |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
float32x2_t vsqrt_f32(float32x2_t a) |
a -> Vn.2S |
FSQRT Vd.2S,Vn.2S |
Vd.2S -> result |
A64 |
float32x4_t vsqrtq_f32(float32x4_t a) |
a -> Vn.4S |
FSQRT Vd.4S,Vn.4S |
Vd.4S -> result |
A64 |
float64x1_t vsqrt_f64(float64x1_t a) |
a -> Dn |
FSQRT Dd,Dn |
Dd -> result |
A64 |
float64x2_t vsqrtq_f64(float64x2_t a) |
a -> Vn.2D |
FSQRT Vd.2D,Vn.2D |
Vd.2D -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int8x8_t vpadd_s8( |
a -> Vn.8B b -> Vm.8B |
ADDP Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
int16x4_t vpadd_s16( |
a -> Vn.4H b -> Vm.4H |
ADDP Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
int32x2_t vpadd_s32( |
a -> Vn.2S b -> Vm.2S |
ADDP Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
uint8x8_t vpadd_u8( |
a -> Vn.8B b -> Vm.8B |
ADDP Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
uint16x4_t vpadd_u16( |
a -> Vn.4H b -> Vm.4H |
ADDP Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
uint32x2_t vpadd_u32( |
a -> Vn.2S b -> Vm.2S |
ADDP Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
float32x2_t vpadd_f32( |
a -> Vn.2S b -> Vm.2S |
FADDP Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
int8x16_t vpaddq_s8( |
a -> Vn.16B b -> Vm.16B |
ADDP Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
A64 |
int16x8_t vpaddq_s16( |
a -> Vn.8H b -> Vm.8H |
ADDP Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
A64 |
int32x4_t vpaddq_s32( |
a -> Vn.4S b -> Vm.4S |
ADDP Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
A64 |
int64x2_t vpaddq_s64( |
a -> Vn.2D b -> Vm.2D |
ADDP Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
A64 |
uint8x16_t vpaddq_u8( |
a -> Vn.16B b -> Vm.16B |
ADDP Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
A64 |
uint16x8_t vpaddq_u16( |
a -> Vn.8H b -> Vm.8H |
ADDP Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
A64 |
uint32x4_t vpaddq_u32( |
a -> Vn.4S b -> Vm.4S |
ADDP Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
A64 |
uint64x2_t vpaddq_u64( |
a -> Vn.2D b -> Vm.2D |
ADDP Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
A64 |
float32x4_t vpaddq_f32( |
a -> Vn.4S b -> Vm.4S |
FADDP Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
A64 |
float64x2_t vpaddq_f64( |
a -> Vn.2D b -> Vm.2D |
FADDP Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
A64 |
int64_t vpaddd_s64(int64x2_t a) |
a -> Vn.2D |
ADDP Dd,Vn.2D |
Dd -> result |
A64 |
uint64_t vpaddd_u64(uint64x2_t a) |
a -> Vn.2D |
ADDP Dd,Vn.2D |
Dd -> result |
A64 |
float32_t vpadds_f32(float32x2_t a) |
a -> Vn.2S |
FADDP Sd,Vn.2S |
Sd -> result |
A64 |
float64_t vpaddd_f64(float64x2_t a) |
a -> Vn.2D |
FADDP Dd,Vn.2D |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int16x4_t vpaddl_s8(int8x8_t a) |
a -> Vn.8B |
SADDLP Vd.4H,Vn.8B |
Vd.4H -> result |
v7/A32/A64 |
int16x8_t vpaddlq_s8(int8x16_t a) |
a -> Vn.16B |
SADDLP Vd.8H,Vn.16B |
Vd.8H -> result |
v7/A32/A64 |
int32x2_t vpaddl_s16(int16x4_t a) |
a -> Vn.4H |
SADDLP Vd.2S,Vn.4H |
Vd.2S -> result |
v7/A32/A64 |
int32x4_t vpaddlq_s16(int16x8_t a) |
a -> Vn.8H |
SADDLP Vd.4S,Vn.8H |
Vd.4S -> result |
v7/A32/A64 |
int64x1_t vpaddl_s32(int32x2_t a) |
a -> Vn.2S |
SADDLP Vd.1D,Vn.2S |
Vd.1D -> result |
v7/A32/A64 |
int64x2_t vpaddlq_s32(int32x4_t a) |
a -> Vn.4S |
SADDLP Vd.2D,Vn.4S |
Vd.2D -> result |
v7/A32/A64 |
uint16x4_t vpaddl_u8(uint8x8_t a) |
a -> Vn.8B |
UADDLP Vd.4H,Vn.8B |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vpaddlq_u8(uint8x16_t a) |
a -> Vn.16B |
UADDLP Vd.8H,Vn.16B |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vpaddl_u16(uint16x4_t a) |
a -> Vn.4H |
UADDLP Vd.2S,Vn.4H |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vpaddlq_u16(uint16x8_t a) |
a -> Vn.8H |
UADDLP Vd.4S,Vn.8H |
Vd.4S -> result |
v7/A32/A64 |
uint64x1_t vpaddl_u32(uint32x2_t a) |
a -> Vn.2S |
UADDLP Vd.1D,Vn.2S |
Vd.1D -> result |
v7/A32/A64 |
uint64x2_t vpaddlq_u32(uint32x4_t a) |
a -> Vn.4S |
UADDLP Vd.2D,Vn.4S |
Vd.2D -> result |
v7/A32/A64 |
int16x4_t vpadal_s8( |
a -> Vd.4H b -> Vn.8B |
SADALP Vd.4H,Vn.8B |
Vd.4H -> result |
v7/A32/A64 |
int16x8_t vpadalq_s8( |
a -> Vd.8H b -> Vn.16B |
SADALP Vd.8H,Vn.16B |
Vd.8H -> result |
v7/A32/A64 |
int32x2_t vpadal_s16( |
a -> Vd.2S b -> Vn.4H |
SADALP Vd.2S,Vn.4H |
Vd.2S -> result |
v7/A32/A64 |
int32x4_t vpadalq_s16( |
a -> Vd.4S b -> Vn.8H |
SADALP Vd.4S,Vn.8H |
Vd.4S -> result |
v7/A32/A64 |
int64x1_t vpadal_s32( |
a -> Vd.1D b -> Vn.2S |
SADALP Vd.1D,Vn.2S |
Vd.1D -> result |
v7/A32/A64 |
int64x2_t vpadalq_s32( |
a -> Vd.2D b -> Vn.4S |
SADALP Vd.2D,Vn.4S |
Vd.2D -> result |
v7/A32/A64 |
uint16x4_t vpadal_u8( |
a -> Vd.4H b -> Vn.8B |
UADALP Vd.4H,Vn.8B |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vpadalq_u8( |
a -> Vd.8H b -> Vn.16B |
UADALP Vd.8H,Vn.16B |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vpadal_u16( |
a -> Vd.2S b -> Vn.4H |
UADALP Vd.2S,Vn.4H |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vpadalq_u16( |
a -> Vd.4S b -> Vn.8H |
UADALP Vd.4S,Vn.8H |
Vd.4S -> result |
v7/A32/A64 |
uint64x1_t vpadal_u32( |
a -> Vd.1D b -> Vn.2S |
UADALP Vd.1D,Vn.2S |
Vd.1D -> result |
v7/A32/A64 |
uint64x2_t vpadalq_u32( |
a -> Vd.2D b -> Vn.4S |
UADALP Vd.2D,Vn.4S |
Vd.2D -> result |
v7/A32/A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int8x8_t vpmax_s8( |
a -> Vn.8B b -> Vm.8B |
SMAXP Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
int16x4_t vpmax_s16( |
a -> Vn.4H b -> Vm.4H |
SMAXP Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
int32x2_t vpmax_s32( |
a -> Vn.2S b -> Vm.2S |
SMAXP Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
uint8x8_t vpmax_u8( |
a -> Vn.8B b -> Vm.8B |
UMAXP Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
uint16x4_t vpmax_u16( |
a -> Vn.4H b -> Vm.4H |
UMAXP Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
uint32x2_t vpmax_u32( |
a -> Vn.2S b -> Vm.2S |
UMAXP Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
float32x2_t vpmax_f32( |
a -> Vn.2S b -> Vm.2S |
FMAXP Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
int8x16_t vpmaxq_s8( |
a -> Vn.16B b -> Vm.16B |
SMAXP Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
A64 |
int16x8_t vpmaxq_s16( |
a -> Vn.8H b -> Vm.8H |
SMAXP Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
A64 |
int32x4_t vpmaxq_s32( |
a -> Vn.4S b -> Vm.4S |
SMAXP Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
A64 |
uint8x16_t vpmaxq_u8( |
a -> Vn.16B b -> Vm.16B |
UMAXP Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
A64 |
uint16x8_t vpmaxq_u16( |
a -> Vn.8H b -> Vm.8H |
UMAXP Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
A64 |
uint32x4_t vpmaxq_u32( |
a -> Vn.4S b -> Vm.4S |
UMAXP Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
A64 |
float32x4_t vpmaxq_f32( |
a -> Vn.4S b -> Vm.4S |
FMAXP Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
A64 |
float64x2_t vpmaxq_f64( |
a -> Vn.2D b -> Vm.2D |
FMAXP Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
A64 |
float32_t vpmaxs_f32(float32x2_t a) |
a -> Vn.2S |
FMAXP Sd,Vn.2S |
Sd -> result |
A64 |
float64_t vpmaxqd_f64(float64x2_t a) |
a -> Vn.2D |
FMAXP Dd,Vn.2D |
Dd -> result |
A64 |
float32_t vpmaxnms_f32(float32x2_t a) |
a -> Vn.2S |
FMAXNMP Sd,Vn.2S |
Sd -> result |
A64 |
float64_t vpmaxnmqd_f64(float64x2_t a) |
a -> Vn.2D |
FMAXNMP Dd,Vn.2D |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int8x8_t vpmin_s8( |
a -> Vn.8B b -> Vm.8B |
SMINP Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
int16x4_t vpmin_s16( |
a -> Vn.4H b -> Vm.4H |
SMINP Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
int32x2_t vpmin_s32( |
a -> Vn.2S b -> Vm.2S |
SMINP Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
uint8x8_t vpmin_u8( |
a -> Vn.8B b -> Vm.8B |
UMINP Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
uint16x4_t vpmin_u16( |
a -> Vn.4H b -> Vm.4H |
UMINP Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
uint32x2_t vpmin_u32( |
a -> Vn.2S b -> Vm.2S |
UMINP Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
float32x2_t vpmin_f32( |
a -> Vn.2S b -> Vm.2S |
FMINP Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
int8x16_t vpminq_s8( |
a -> Vn.16B b -> Vm.16B |
SMINP Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
A64 |
int16x8_t vpminq_s16( |
a -> Vn.8H b -> Vm.8H |
SMINP Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
A64 |
int32x4_t vpminq_s32( |
a -> Vn.4S b -> Vm.4S |
SMINP Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
A64 |
uint8x16_t vpminq_u8( |
a -> Vn.16B b -> Vm.16B |
UMINP Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
A64 |
uint16x8_t vpminq_u16( |
a -> Vn.8H b -> Vm.8H |
UMINP Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
A64 |
uint32x4_t vpminq_u32( |
a -> Vn.4S b -> Vm.4S |
UMINP Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
A64 |
float32x4_t vpminq_f32( |
a -> Vn.4S b -> Vm.4S |
FMINP Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
A64 |
float64x2_t vpminq_f64( |
a -> Vn.2D b -> Vm.2D |
FMINP Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
A64 |
float32_t vpmins_f32(float32x2_t a) |
a -> Vn.2S |
FMINP Sd,Vn.2S |
Sd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
float32x2_t vpmaxnm_f32( |
a -> Vn.2S b -> Vm.2S |
FMAXNMP Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
A64 |
float32x4_t vpmaxnmq_f32( |
a -> Vn.4S b -> Vm.4S |
FMAXNMP Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
A64 |
float64x2_t vpmaxnmq_f64( |
a -> Vn.2D b -> Vm.2D |
FMAXNMP Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
float32x2_t vpminnm_f32( |
a -> Vn.2S b -> Vm.2S |
FMINNMP Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
A64 |
float32x4_t vpminnmq_f32( |
a -> Vn.4S b -> Vm.4S |
FMINNMP Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
A64 |
float64x2_t vpminnmq_f64( |
a -> Vn.2D b -> Vm.2D |
FMINNMP Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
A64 |
float64_t vpminqd_f64(float64x2_t a) |
a -> Vn.2D |
FMINP Dd,Vn.2D |
Dd -> result |
A64 |
float32_t vpminnms_f32(float32x2_t a) |
a -> Vn.2S |
FMINNMP Sd,Vn.2S |
Sd -> result |
A64 |
float64_t vpminnmqd_f64(float64x2_t a) |
a -> Vn.2D |
FMINNMP Dd,Vn.2D |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int8_t vaddv_s8(int8x8_t a) |
a -> Vn.8B |
ADDV Bd,Vn.8B |
Bd -> result |
A64 |
int8_t vaddvq_s8(int8x16_t a) |
a -> Vn.16B |
ADDV Bd,Vn.16B |
Bd -> result |
A64 |
int16_t vaddv_s16(int16x4_t a) |
a -> Vn.4H |
ADDV Hd,Vn.4H |
Hd -> result |
A64 |
int16_t vaddvq_s16(int16x8_t a) |
a -> Vn.8H |
ADDV Hd,Vn.8H |
Hd -> result |
A64 |
int32_t vaddv_s32(int32x2_t a) |
a -> Vn.2S a -> Vm.2S |
ADDP Vd.2S,Vn.2S,Vm.2S |
Vd.S[0] -> result |
A64 |
int32_t vaddvq_s32(int32x4_t a) |
a -> Vn.4S |
ADDV Sd,Vn.4S |
Sd -> result |
A64 |
int64_t vaddvq_s64(int64x2_t a) |
a -> Vn.2D |
ADDP Dd,Vn.2D |
Dd -> result |
A64 |
uint8_t vaddv_u8(uint8x8_t a) |
a -> Vn.8B |
ADDV Bd,Vn.8B |
Bd -> result |
A64 |
uint8_t vaddvq_u8(uint8x16_t a) |
a -> Vn.16B |
ADDV Bd,Vn.16B |
Bd -> result |
A64 |
uint16_t vaddv_u16(uint16x4_t a) |
a -> Vn.4H |
ADDV Hd,Vn.4H |
Hd -> result |
A64 |
uint16_t vaddvq_u16(uint16x8_t a) |
a -> Vn.8H |
ADDV Hd,Vn.8H |
Hd -> result |
A64 |
uint32_t vaddv_u32(uint32x2_t a) |
a -> Vn.2S a -> Vm.2S |
ADDP Vd.2S,Vn.2S,Vm.2S |
Vd.S[0] -> result |
A64 |
uint32_t vaddvq_u32(uint32x4_t a) |
a -> Vn.4S |
ADDV Sd,Vn.4S |
Sd -> result |
A64 |
uint64_t vaddvq_u64(uint64x2_t a) |
a -> Vn.2D |
ADDP Dd,Vn.2D |
Dd -> result |
A64 |
float32_t vaddv_f32(float32x2_t a) |
a -> Vn.2S |
FADDP Sd,Vn.2S |
Sd -> result |
A64 |
float32_t vaddvq_f32(float32x4_t a) |
a -> Vn.4S a -> Vm.4S |
FADDP Vt.4S,Vn.4S,Vm.4S FADDP Sd,Vt.2S |
Sd -> result |
A64 |
float64_t vaddvq_f64(float64x2_t a) |
a -> Vn.2D |
FADDP Dd,Vn.2D |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int16_t vaddlv_s8(int8x8_t a) |
a -> Vn.8B |
SADDLV Hd,Vn.8B |
Hd -> result |
A64 |
int16_t vaddlvq_s8(int8x16_t a) |
a -> Vn.16B |
SADDLV Hd,Vn.16B |
Hd -> result |
A64 |
int32_t vaddlv_s16(int16x4_t a) |
a -> Vn.4H |
SADDLV Sd,Vn.4H |
Sd -> result |
A64 |
int32_t vaddlvq_s16(int16x8_t a) |
a -> Vn.8H |
SADDLV Sd,Vn.8H |
Sd -> result |
A64 |
int64_t vaddlv_s32(int32x2_t a) |
a -> Vn.2S |
SADDLP Vd.1D,Vn.2S |
Dd -> result |
A64 |
int64_t vaddlvq_s32(int32x4_t a) |
a -> Vn.4S |
SADDLV Dd,Vn.4S |
Dd -> result |
A64 |
uint16_t vaddlv_u8(uint8x8_t a) |
a -> Vn.8B |
UADDLV Hd,Vn.8B |
Hd -> result |
A64 |
uint16_t vaddlvq_u8(uint8x16_t a) |
a -> Vn.16B |
UADDLV Hd,Vn.16B |
Hd -> result |
A64 |
uint32_t vaddlv_u16(uint16x4_t a) |
a -> Vn.4H |
UADDLV Sd,Vn.4H |
Sd -> result |
A64 |
uint32_t vaddlvq_u16(uint16x8_t a) |
a -> Vn.8H |
UADDLV Sd,Vn.8H |
Sd -> result |
A64 |
uint64_t vaddlv_u32(uint32x2_t a) |
a -> Vn.2S |
UADDLP Vd.1D,Vn.2S |
Dd -> result |
A64 |
uint64_t vaddlvq_u32(uint32x4_t a) |
a -> Vn.4S |
UADDLV Dd,Vn.4S |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int8_t vmaxv_s8(int8x8_t a) |
a -> Vn.8B |
SMAXV Bd,Vn.8B |
Bd -> result |
A64 |
int8_t vmaxvq_s8(int8x16_t a) |
a -> Vn.16B |
SMAXV Bd,Vn.16B |
Bd -> result |
A64 |
int16_t vmaxv_s16(int16x4_t a) |
a -> Vn.4H |
SMAXV Hd,Vn.4H |
Hd -> result |
A64 |
int16_t vmaxvq_s16(int16x8_t a) |
a -> Vn.8H |
SMAXV Hd,Vn.8H |
Hd -> result |
A64 |
int32_t vmaxv_s32(int32x2_t a) |
a -> Vn.2S a -> Vm.2S |
SMAXP Vd.2S,Vn.2S,Vm.2S |
Vd.S[0] -> result |
A64 |
int32_t vmaxvq_s32(int32x4_t a) |
a -> Vn.4S |
SMAXV Sd,Vn.4S |
Sd -> result |
A64 |
uint8_t vmaxv_u8(uint8x8_t a) |
a -> Vn.8B |
UMAXV Bd,Vn.8B |
Bd -> result |
A64 |
uint8_t vmaxvq_u8(uint8x16_t a) |
a -> Vn.16B |
UMAXV Bd,Vn.16B |
Bd -> result |
A64 |
uint16_t vmaxv_u16(uint16x4_t a) |
a -> Vn.4H |
UMAXV Hd,Vn.4H |
Hd -> result |
A64 |
uint16_t vmaxvq_u16(uint16x8_t a) |
a -> Vn.8H |
UMAXV Hd,Vn.8H |
Hd -> result |
A64 |
uint32_t vmaxv_u32(uint32x2_t a) |
a -> Vn.2S a -> Vm.2S |
UMAXP Vd.2S,Vn.2S,Vm.2S |
Vd.S[0] -> result |
A64 |
uint32_t vmaxvq_u32(uint32x4_t a) |
a -> Vn.4S |
UMAXV Sd,Vn.4S |
Sd -> result |
A64 |
float32_t vmaxv_f32(float32x2_t a) |
a -> Vn.2S |
FMAXP Sd,Vn.2S |
Sd -> result |
A64 |
float32_t vmaxvq_f32(float32x4_t a) |
a -> Vn.4S |
FMAXV Sd,Vn.4S |
Sd -> result |
A64 |
float64_t vmaxvq_f64(float64x2_t a) |
a -> Vn.2D |
FMAXP Dd,Vn.2D |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int8_t vminv_s8(int8x8_t a) |
a -> Vn.8B |
SMINV Bd,Vn.8B |
Bd -> result |
A64 |
int8_t vminvq_s8(int8x16_t a) |
a -> Vn.16B |
SMINV Bd,Vn.16B |
Bd -> result |
A64 |
int16_t vminv_s16(int16x4_t a) |
a -> Vn.4H |
SMINV Hd,Vn.4H |
Hd -> result |
A64 |
int16_t vminvq_s16(int16x8_t a) |
a -> Vn.8H |
SMINV Hd,Vn.8H |
Hd -> result |
A64 |
int32_t vminv_s32(int32x2_t a) |
a -> Vn.2S a -> Vm.2S |
SMINP Vd.2S,Vn.2S,Vm.2S |
Vd.S[0] -> result |
A64 |
int32_t vminvq_s32(int32x4_t a) |
a -> Vn.4S |
SMINV Sd,Vn.4S |
Sd -> result |
A64 |
uint8_t vminv_u8(uint8x8_t a) |
a -> Vn.8B |
UMINV Bd,Vn.8B |
Bd -> result |
A64 |
uint8_t vminvq_u8(uint8x16_t a) |
a -> Vn.16B |
UMINV Bd,Vn.16B |
Bd -> result |
A64 |
uint16_t vminv_u16(uint16x4_t a) |
a -> Vn.4H |
UMINV Hd,Vn.4H |
Hd -> result |
A64 |
uint16_t vminvq_u16(uint16x8_t a) |
a -> Vn.8H |
UMINV Hd,Vn.8H |
Hd -> result |
A64 |
uint32_t vminv_u32(uint32x2_t a) |
a -> Vn.2S a -> Vm.2S |
UMINP Vd.2S,Vn.2S,Vm.2S |
Vd.S[0] -> result |
A64 |
uint32_t vminvq_u32(uint32x4_t a) |
a -> Vn.4S |
UMINV Sd,Vn.4S |
Sd -> result |
A64 |
float32_t vminv_f32(float32x2_t a) |
a -> Vn.2S |
FMINP Sd,Vn.2S |
Sd -> result |
A64 |
float32_t vminvq_f32(float32x4_t a) |
a -> Vn.4S |
FMINV Sd,Vn.4S |
Sd -> result |
A64 |
float64_t vminvq_f64(float64x2_t a) |
a -> Vn.2D |
FMINP Dd,Vn.2D |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
float32_t vmaxnmv_f32(float32x2_t a) |
a -> Vn.2S |
FMAXNMP Sd,Vn.2S |
Sd -> result |
A64 |
float32_t vmaxnmvq_f32(float32x4_t a) |
a -> Vn.4S |
FMAXNMV Sd,Vn.4S |
Sd -> result |
A64 |
float64_t vmaxnmvq_f64(float64x2_t a) |
a -> Vn.2D |
FMAXNMP Dd,Vn.2D |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
float32_t vminnmv_f32(float32x2_t a) |
a -> Vn.2S |
FMINNMP Sd,Vn.2S |
Sd -> result |
A64 |
float32_t vminnmvq_f32(float32x4_t a) |
a -> Vn.4S |
FMINNMV Sd,Vn.4S |
Sd -> result |
A64 |
float64_t vminnmvq_f64(float64x2_t a) |
a -> Vn.2D |
FMINNMP Dd,Vn.2D |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
uint8x8_t vceq_s8( |
a -> Vn.8B b -> Vm.8B |
CMEQ Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vceqq_s8( |
a -> Vn.16B b -> Vm.16B |
CMEQ Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vceq_s16( |
a -> Vn.4H b -> Vm.4H |
CMEQ Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vceqq_s16( |
a -> Vn.8H b -> Vm.8H |
CMEQ Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vceq_s32( |
a -> Vn.2S b -> Vm.2S |
CMEQ Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vceqq_s32( |
a -> Vn.4S b -> Vm.4S |
CMEQ Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
uint8x8_t vceq_u8( |
a -> Vn.8B b -> Vm.8B |
CMEQ Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vceqq_u8( |
a -> Vn.16B b -> Vm.16B |
CMEQ Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vceq_u16( |
a -> Vn.4H b -> Vm.4H |
CMEQ Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vceqq_u16( |
a -> Vn.8H b -> Vm.8H |
CMEQ Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vceq_u32( |
a -> Vn.2S b -> Vm.2S |
CMEQ Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vceqq_u32( |
a -> Vn.4S b -> Vm.4S |
CMEQ Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
uint32x2_t vceq_f32( |
a -> Vn.2S b -> Vm.2S |
FCMEQ Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vceqq_f32( |
a -> Vn.4S b -> Vm.4S |
FCMEQ Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
uint8x8_t vceq_p8( |
a -> Vn.8B b -> Vm.8B |
CMEQ Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vceqq_p8( |
a -> Vn.16B b -> Vm.16B |
CMEQ Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
uint64x1_t vceq_s64( |
a -> Dn b -> Dm |
CMEQ Dd,Dn,Dm |
Dd -> result |
A64 |
uint64x2_t vceqq_s64( |
a -> Vn.2D b -> Vm.2D |
CMEQ Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
A64 |
uint64x1_t vceq_u64( |
a -> Dn b -> Dm |
CMEQ Dd,Dn,Dm |
Dd -> result |
A64 |
uint64x2_t vceqq_u64( |
a -> Vn.2D b -> Vm.2D |
CMEQ Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
A64 |
uint64x1_t vceq_p64( |
a -> Dn b -> Dm |
CMEQ Dd,Dn,Dm |
Dd -> result |
A32/A64 |
uint64x2_t vceqq_p64( |
a -> Vn.2D b -> Vm.2D |
CMEQ Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
A32/A64 |
uint64x1_t vceq_f64( |
a -> Dn b -> Dm |
FCMEQ Dd,Dn,Dm |
Dd -> result |
A64 |
uint64x2_t vceqq_f64( |
a -> Vn.2D b -> Vm.2D |
FCMEQ Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
A64 |
uint64_t vceqd_s64( |
a -> Dn b -> Dm |
CMEQ Dd,Dn,Dm |
Dd -> result |
A64 |
uint64_t vceqd_u64( |
a -> Dn b -> Dm |
CMEQ Dd,Dn,Dm |
Dd -> result |
A64 |
uint32_t vceqs_f32( |
a -> Sn b -> Sm |
FCMEQ Sd,Sn,Sm |
Sd -> result |
A64 |
uint64_t vceqd_f64( |
a -> Dn b -> Dm |
FCMEQ Dd,Dn,Dm |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
uint8x8_t vceqz_s8(int8x8_t a) |
a -> Vn.8B |
CMEQ Vd.8B,Vn.8B,#0 |
Vd.8B -> result |
A64 |
uint8x16_t vceqzq_s8(int8x16_t a) |
a -> Vn.16B |
CMEQ Vd.16B,Vn.16B,#0 |
Vd.16B -> result |
A64 |
uint16x4_t vceqz_s16(int16x4_t a) |
a -> Vn.4H |
CMEQ Vd.4H,Vn.4H,#0 |
Vd.4H -> result |
A64 |
uint16x8_t vceqzq_s16(int16x8_t a) |
a -> Vn.8H |
CMEQ Vd.8H,Vn.8H,#0 |
Vd.8H -> result |
A64 |
uint32x2_t vceqz_s32(int32x2_t a) |
a -> Vn.2S |
CMEQ Vd.2S,Vn.2S,#0 |
Vd.2S -> result |
A64 |
uint32x4_t vceqzq_s32(int32x4_t a) |
a -> Vn.4S |
CMEQ Vd.4S,Vn.4S,#0 |
Vd.4S -> result |
A64 |
uint8x8_t vceqz_u8(uint8x8_t a) |
a -> Vn.8B |
CMEQ Vd.8B,Vn.8B,#0 |
Vd.8B -> result |
A64 |
uint8x16_t vceqzq_u8(uint8x16_t a) |
a -> Vn.16B |
CMEQ Vd.16B,Vn.16B,#0 |
Vd.16B -> result |
A64 |
uint16x4_t vceqz_u16(uint16x4_t a) |
a -> Vn.4H |
CMEQ Vd.4H,Vn.4H,#0 |
Vd.4H -> result |
A64 |
uint16x8_t vceqzq_u16(uint16x8_t a) |
a -> Vn.8H |
CMEQ Vd.8H,Vn.8H,#0 |
Vd.8H -> result |
A64 |
uint32x2_t vceqz_u32(uint32x2_t a) |
a -> Vn.2S |
CMEQ Vd.2S,Vn.2S,#0 |
Vd.2S -> result |
A64 |
uint32x4_t vceqzq_u32(uint32x4_t a) |
a -> Vn.4S |
CMEQ Vd.4S,Vn.4S,#0 |
Vd.4S -> result |
A64 |
uint32x2_t vceqz_f32(float32x2_t a) |
a -> Vn.2S |
FCMEQ Vd.2S,Vn.2S,#0 |
Vd.2S -> result |
A64 |
uint32x4_t vceqzq_f32(float32x4_t a) |
a -> Vn.4S |
FCMEQ Vd.4S,Vn.4S,#0 |
Vd.4S -> result |
A64 |
uint8x8_t vceqz_p8(poly8x8_t a) |
a -> Vn.8B |
CMEQ Vd.8B,Vn.8B,#0 |
Vd.8B -> result |
A64 |
uint8x16_t vceqzq_p8(poly8x16_t a) |
a -> Vn.16B |
CMEQ Vd.16B,Vn.16B,#0 |
Vd.16B -> result |
A64 |
uint64x1_t vceqz_s64(int64x1_t a) |
a -> Dn |
CMEQ Dd,Dn,#0 |
Dd -> result |
A64 |
uint64x2_t vceqzq_s64(int64x2_t a) |
a -> Vn.2D |
CMEQ Vd.2D,Vn.2D,#0 |
Vd.2D -> result |
A64 |
uint64x1_t vceqz_u64(uint64x1_t a) |
a -> Dn |
CMEQ Dd,Dn,#0 |
Dd -> result |
A64 |
uint64x2_t vceqzq_u64(uint64x2_t a) |
a -> Vn.2D |
CMEQ Vd.2D,Vn.2D,#0 |
Vd.2D -> result |
A64 |
uint64x1_t vceqz_p64(poly64x1_t a) |
a -> Dn |
CMEQ Dd,Dn,#0 |
Dd -> result |
A32/A64 |
uint64x2_t vceqzq_p64(poly64x2_t a) |
a -> Vn.2D |
CMEQ Vd.2D,Vn.2D,#0 |
Vd.2D -> result |
A32/A64 |
uint64x1_t vceqz_f64(float64x1_t a) |
a -> Dn |
FCMEQ Dd,Dn,#0 |
Dd -> result |
A64 |
uint64x2_t vceqzq_f64(float64x2_t a) |
a -> Vn.2D |
FCMEQ Vd.2D,Vn.2D,#0 |
Vd.2D -> result |
A64 |
uint64_t vceqzd_s64(int64_t a) |
a -> Dn |
CMEQ Dd,Dn,#0 |
Dd -> result |
A64 |
uint64_t vceqzd_u64(uint64_t a) |
a -> Dn |
CMEQ Dd,Dn,#0 |
Dd -> result |
A64 |
uint32_t vceqzs_f32(float32_t a) |
a -> Sn |
FCMEQ Sd,Sn,#0 |
Sd -> result |
A64 |
uint64_t vceqzd_f64(float64_t a) |
a -> Dn |
FCMEQ Dd,Dn,#0 |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
uint8x8_t vcge_s8( |
a -> Vn.8B b -> Vm.8B |
CMGE Vd.8B,Vm.8B,Vn.8B |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vcgeq_s8( |
a -> Vn.16B b -> Vm.16B |
CMGE Vd.16B,Vm.16B,Vn.16B |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vcge_s16( |
a -> Vn.4H b -> Vm.4H |
CMGE Vd.4H,Vm.4H,Vn.4H |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vcgeq_s16( |
a -> Vn.8H b -> Vm.8H |
CMGE Vd.8H,Vm.8H,Vn.8H |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vcge_s32( |
a -> Vn.2S b -> Vm.2S |
CMGE Vd.2S,Vm.2S,Vn.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vcgeq_s32( |
a -> Vn.4S b -> Vm.4S |
CMGE Vd.4S,Vm.4S,Vn.4S |
Vd.4S -> result |
v7/A32/A64 |
uint8x8_t vcge_u8( |
a -> Vn.8B b -> Vm.8B |
CMHS Vd.8B,Vm.8B,Vn.8B |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vcgeq_u8( |
a -> Vn.16B b -> Vm.16B |
CMHS Vd.16B,Vm.16B,Vn.16B |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vcge_u16( |
a -> Vn.4H b -> Vm.4H |
CMHS Vd.4H,Vm.4H,Vn.4H |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vcgeq_u16( |
a -> Vn.8H b -> Vm.8H |
CMHS Vd.8H,Vm.8H,Vn.8H |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vcge_u32( |
a -> Vn.2S b -> Vm.2S |
CMHS Vd.2S,Vm.2S,Vn.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vcgeq_u32( |
a -> Vn.4S b -> Vm.4S |
CMHS Vd.4S,Vm.4S,Vn.4S |
Vd.4S -> result |
v7/A32/A64 |
uint32x2_t vcge_f32( |
a -> Vn.2S b -> Vm.2S |
FCMGE Vd.2S,Vm.2S,Vn.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vcgeq_f32( |
a -> Vn.4S b -> Vm.4S |
FCMGE Vd.4S,Vm.4S,Vn.4S |
Vd.4S -> result |
v7/A32/A64 |
uint64x1_t vcge_s64( |
a -> Dn b -> Dm |
CMGE Dd,Dn,Dm |
Dd -> result |
A64 |
uint64x2_t vcgeq_s64( |
a -> Vn.2D b -> Vm.2D |
CMGE Vd.2D,Vm.2D,Vn.2D |
Vd.2D -> result |
A64 |
uint64x1_t vcge_u64( |
a -> Dn b -> Dm |
CMHS Dd,Dn,Dm |
Dd -> result |
A64 |
uint64x2_t vcgeq_u64( |
a -> Vn.2D b -> Vm.2D |
CMHS Vd.2D,Vm.2D,Vn.2D |
Vd.2D -> result |
A64 |
uint64x1_t vcge_f64( |
a -> Dn b -> Dm |
FCMGE Dd,Dn,Dm |
Dd -> result |
A64 |
uint64x2_t vcgeq_f64( |
a -> Vn.2D b -> Vm.2D |
FCMGE Vd.2D,Vm.2D,Vn.2D |
Vd.2D -> result |
A64 |
uint64_t vcged_s64( |
a -> Dn b -> Dm |
CMGE Dd,Dn,Dm |
Dd -> result |
A64 |
uint64_t vcged_u64( |
a -> Dn b -> Dm |
CMHS Dd,Dn,Dm |
Dd -> result |
A64 |
uint32_t vcges_f32( |
a -> Sn b -> Sm |
FCMGE Sd,Sn,Sm |
Sd -> result |
A64 |
uint64_t vcged_f64( |
a -> Dn b -> Dm |
FCMGE Dd,Dn,Dm |
Dd -> result |
A64 |
uint8x8_t vcgez_s8(int8x8_t a) |
a -> Vn.8B |
CMGE Vd.8B,Vn.8B,#0 |
Vd.8B -> result |
A64 |
uint8x16_t vcgezq_s8(int8x16_t a) |
a -> Vn.16B |
CMGE Vd.16B,Vn.16B,#0 |
Vd.16B -> result |
A64 |
uint16x4_t vcgez_s16(int16x4_t a) |
a -> Vn.4H |
CMGE Vd.4H,Vn.4H,#0 |
Vd.4H -> result |
A64 |
uint16x8_t vcgezq_s16(int16x8_t a) |
a -> Vn.8H |
CMGE Vd.8H,Vn.8H,#0 |
Vd.8H -> result |
A64 |
uint32x2_t vcgez_s32(int32x2_t a) |
a -> Vn.2S |
CMGE Vd.2S,Vn.2S,#0 |
Vd.2S -> result |
A64 |
uint32x4_t vcgezq_s32(int32x4_t a) |
a -> Vn.4S |
CMGE Vd.4S,Vn.4S,#0 |
Vd.4S -> result |
A64 |
uint64x1_t vcgez_s64(int64x1_t a) |
a -> Dn |
CMGE Dd,Dn,#0 |
Dd -> result |
A64 |
uint64x2_t vcgezq_s64(int64x2_t a) |
a -> Vn.2D |
CMGE Vd.2D,Vn.2D,#0 |
Vd.2D -> result |
A64 |
uint32x2_t vcgez_f32(float32x2_t a) |
a -> Vn.2S |
FCMGE Vd.2S,Vn.2S,#0 |
Vd.2S -> result |
A64 |
uint32x4_t vcgezq_f32(float32x4_t a) |
a -> Vn.4S |
FCMGE Vd.4S,Vn.4S,#0 |
Vd.4S -> result |
A64 |
uint64x1_t vcgez_f64(float64x1_t a) |
a -> Dn |
FCMGE Dd,Dn,#0 |
Dd -> result |
A64 |
uint64x2_t vcgezq_f64(float64x2_t a) |
a -> Vn.2D |
FCMGE Vd.2D,Vn.2D,#0 |
Vd.2D -> result |
A64 |
uint64_t vcgezd_s64(int64_t a) |
a -> Dn |
CMGE Dd,Dn,#0 |
Dd -> result |
A64 |
uint32_t vcgezs_f32(float32_t a) |
a -> Sn |
FCMGE Sd,Sn,#0 |
Sd -> result |
A64 |
uint64_t vcgezd_f64(float64_t a) |
a -> Dn |
FCMGE Dd,Dn,#0 |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
uint8x8_t vcle_s8( |
a -> Vn.8B b -> Vm.8B |
CMGE Vd.8B,Vm.8B,Vn.8B |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vcleq_s8( |
a -> Vn.16B b -> Vm.16B |
CMGE Vd.16B,Vm.16B,Vn.16B |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vcle_s16( |
a -> Vn.4H b -> Vm.4H |
CMGE Vd.4H,Vm.4H,Vn.4H |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vcleq_s16( |
a -> Vn.8H b -> Vm.8H |
CMGE Vd.8H,Vm.8H,Vn.8H |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vcle_s32( |
a -> Vn.2S b -> Vm.2S |
CMGE Vd.2S,Vm.2S,Vn.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vcleq_s32( |
a -> Vn.4S b -> Vm.4S |
CMGE Vd.4S,Vm.4S,Vn.4S |
Vd.4S -> result |
v7/A32/A64 |
uint8x8_t vcle_u8( |
a -> Vn.8B b -> Vm.8B |
CMHS Vd.8B,Vm.8B,Vn.8B |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vcleq_u8( |
a -> Vn.16B b -> Vm.16B |
CMHS Vd.16B,Vm.16B,Vn.16B |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vcle_u16( |
a -> Vn.4H b -> Vm.4H |
CMHS Vd.4H,Vm.4H,Vn.4H |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vcleq_u16( |
a -> Vn.8H b -> Vm.8H |
CMHS Vd.8H,Vm.8H,Vn.8H |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vcle_u32( |
a -> Vn.2S b -> Vm.2S |
CMHS Vd.2S,Vm.2S,Vn.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vcleq_u32( |
a -> Vn.4S b -> Vm.4S |
CMHS Vd.4S,Vm.4S,Vn.4S |
Vd.4S -> result |
v7/A32/A64 |
uint32x2_t vcle_f32( |
a -> Vn.2S b -> Vm.2S |
FCMGE Vd.2S,Vm.2S,Vn.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vcleq_f32( |
a -> Vn.4S b -> Vm.4S |
FCMGE Vd.4S,Vm.4S,Vn.4S |
Vd.4S -> result |
v7/A32/A64 |
uint64x1_t vcle_s64( |
a -> Dn b -> Dm |
CMGE Dd,Dm,Dn |
Dd -> result |
A64 |
uint64x2_t vcleq_s64( |
a -> Vn.2D b -> Vm.2D |
CMGE Vd.2D,Vm.2D,Vn.2D |
Vd.2D -> result |
A64 |
uint64x1_t vcle_u64( |
a -> Dn b -> Dm |
CMHS Dd,Dm,Dn |
Dd -> result |
A64 |
uint64x2_t vcleq_u64( |
a -> Vn.2D b -> Vm.2D |
CMHS Vd.2D,Vm.2D,Vn.2D |
Vd.2D -> result |
A64 |
uint64x1_t vcle_f64( |
a -> Dn b -> Dm |
FCMGE Dd,Dm,Dn |
Dd -> result |
A64 |
uint64x2_t vcleq_f64( |
a -> Vn.2D b -> Vm.2D |
FCMGE Vd.2D,Vm.2D,Vn.2D |
Vd.2D -> result |
A64 |
uint64_t vcled_s64( |
a -> Dn b -> Dm |
CMGE Dd,Dm,Dn |
Dd -> result |
A64 |
uint64_t vcled_u64( |
a -> Dn b -> Dm |
CMHS Dd,Dm,Dn |
Dd -> result |
A64 |
uint32_t vcles_f32( |
a -> Sn b -> Sm |
FCMGE Sd,Sm,Sn |
Sd -> result |
A64 |
uint64_t vcled_f64( |
a -> Dn b -> Dm |
FCMGE Dd,Dm,Dn |
Dd -> result |
A64 |
uint8x8_t vclez_s8(int8x8_t a) |
a -> Vn.8B |
CMLE Vd.8B,Vn.8B,#0 |
Vd.8B -> result |
A64 |
uint8x16_t vclezq_s8(int8x16_t a) |
a -> Vn.16B |
CMLE Vd.16B,Vn.16B,#0 |
Vd.16B -> result |
A64 |
uint16x4_t vclez_s16(int16x4_t a) |
a -> Vn.4H |
CMLE Vd.4H,Vn.4H,#0 |
Vd.4H -> result |
A64 |
uint16x8_t vclezq_s16(int16x8_t a) |
a -> Vn.8H |
CMLE Vd.8H,Vn.8H,#0 |
Vd.8H -> result |
A64 |
uint32x2_t vclez_s32(int32x2_t a) |
a -> Vn.2S |
CMLE Vd.2S,Vn.2S,#0 |
Vd.2S -> result |
A64 |
uint32x4_t vclezq_s32(int32x4_t a) |
a -> Vn.4S |
CMLE Vd.4S,Vn.4S,#0 |
Vd.4S -> result |
A64 |
uint64x1_t vclez_s64(int64x1_t a) |
a -> Dn |
CMLE Dd,Dn,#0 |
Dd -> result |
A64 |
uint64x2_t vclezq_s64(int64x2_t a) |
a -> Vn.2D |
CMLE Vd.2D,Vn.2D,#0 |
Vd.2D -> result |
A64 |
uint32x2_t vclez_f32(float32x2_t a) |
a -> Vn.2S |
CMLE Vd.2S,Vn.2S,#0 |
Vd.2S -> result |
A64 |
uint32x4_t vclezq_f32(float32x4_t a) |
a -> Vn.4S |
FCMLE Vd.4S,Vn.4S,#0 |
Vd.4S -> result |
A64 |
uint64x1_t vclez_f64(float64x1_t a) |
a -> Dn |
FCMLE Dd,Dn,#0 |
Dd -> result |
A64 |
uint64x2_t vclezq_f64(float64x2_t a) |
a -> Vn.2D |
FCMLE Vd.2D,Vn.2D,#0 |
Vd.2D -> result |
A64 |
uint64_t vclezd_s64(int64_t a) |
a -> Dn |
CMLE Dd,Dn,#0 |
Dd -> result |
A64 |
uint32_t vclezs_f32(float32_t a) |
a -> Sn |
FCMLE Sd,Sn,#0 |
Sd -> result |
A64 |
uint64_t vclezd_f64(float64_t a) |
a -> Dn |
FCMLE Dd,Dn,#0 |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
uint8x8_t vcgt_s8( |
a -> Vn.8B b -> Vm.8B |
CMGT Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vcgtq_s8( |
a -> Vn.16B b -> Vm.16B |
CMGT Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vcgt_s16( |
a -> Vn.4H b -> Vm.4H |
CMGT Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vcgtq_s16( |
a -> Vn.8H b -> Vm.8H |
CMGT Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vcgt_s32( |
a -> Vn.2S b -> Vm.2S |
CMGT Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vcgtq_s32( |
a -> Vn.4S b -> Vm.4S |
CMGT Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
uint8x8_t vcgt_u8( |
a -> Vn.8B b -> Vm.8B |
CMHI Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vcgtq_u8( |
a -> Vn.16B b -> Vm.16B |
CMHI Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vcgt_u16( |
a -> Vn.4H b -> Vm.4H |
CMHI Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vcgtq_u16( |
a -> Vn.8H b -> Vm.8H |
CMHI Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vcgt_u32( |
a -> Vn.2S b -> Vm.2S |
CMHI Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vcgtq_u32( |
a -> Vn.4S b -> Vm.4S |
CMHI Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
uint32x2_t vcgt_f32( |
a -> Vn.2S b -> Vm.2S |
FCMGT Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vcgtq_f32( |
a -> Vn.4S b -> Vm.4S |
FCMGT Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
uint64x1_t vcgt_s64( |
a -> Dn b -> Dm |
CMGT Dd,Dn,Dm |
Dd -> result |
A64 |
uint64x2_t vcgtq_s64( |
a -> Vn.2D b -> Vm.2D |
CMGT Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
A64 |
uint64x1_t vcgt_u64( |
a -> Dn b -> Dm |
CMHI Dd,Dn,Dm |
Dd -> result |
A64 |
uint64x2_t vcgtq_u64( |
a -> Vn.2D b -> Vm.2D |
CMHI Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
A64 |
uint64x1_t vcgt_f64( |
a -> Dn b -> Dm |
FCMGT Dd,Dn,Dm |
Dd -> result |
A64 |
uint64x2_t vcgtq_f64( |
a -> Vn.2D b -> Vm.2D |
FCMGT Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
A64 |
uint64_t vcgtd_s64( |
a -> Dn b -> Dm |
CMGT Dd,Dn,Dm |
Dd -> result |
A64 |
uint64_t vcgtd_u64( |
a -> Dn b -> Dm |
CMHI Dd,Dn,Dm |
Dd -> result |
A64 |
uint32_t vcgts_f32( |
a -> Sn b -> Sm |
FCMGT Sd,Sn,Sm |
Sd -> result |
A64 |
uint64_t vcgtd_f64( |
a -> Dn b -> Dm |
FCMGT Dd,Dn,Dm |
Dd -> result |
A64 |
uint8x8_t vcgtz_s8(int8x8_t a) |
a -> Vn.8B |
CMGT Vd.8B,Vn.8B,#0 |
Vd.8B -> result |
A64 |
uint8x16_t vcgtzq_s8(int8x16_t a) |
a -> Vn.16B |
CMGT Vd.16B,Vn.16B,#0 |
Vd.16B -> result |
A64 |
uint16x4_t vcgtz_s16(int16x4_t a) |
a -> Vn.4H |
CMGT Vd.4H,Vn.4H,#0 |
Vd.4H -> result |
A64 |
uint16x8_t vcgtzq_s16(int16x8_t a) |
a -> Vn.8H |
CMGT Vd.8H,Vn.8H,#0 |
Vd.8H -> result |
A64 |
uint32x2_t vcgtz_s32(int32x2_t a) |
a -> Vn.2S |
CMGT Vd.2S,Vn.2S,#0 |
Vd.2S -> result |
A64 |
uint32x4_t vcgtzq_s32(int32x4_t a) |
a -> Vn.4S |
CMGT Vd.4S,Vn.4S,#0 |
Vd.4S -> result |
A64 |
uint64x1_t vcgtz_s64(int64x1_t a) |
a -> Dn |
CMGT Dd,Dn,#0 |
Dd -> result |
A64 |
uint64x2_t vcgtzq_s64(int64x2_t a) |
a -> Vn.2D |
CMGT Vd.2D,Vn.2D,#0 |
Vd.2D -> result |
A64 |
uint32x2_t vcgtz_f32(float32x2_t a) |
a -> Vn.2S |
FCMGT Vd.2S,Vn.2S,#0 |
Vd.2S -> result |
A64 |
uint32x4_t vcgtzq_f32(float32x4_t a) |
a -> Vn.4S |
FCMGT Vd.4S,Vn.4S,#0 |
Vd.4S -> result |
A64 |
uint64x1_t vcgtz_f64(float64x1_t a) |
a -> Dn |
FCMGT Dd,Dn,#0 |
Dd -> result |
A64 |
uint64x2_t vcgtzq_f64(float64x2_t a) |
a -> Vn.2D |
FCMGT Vd.2D,Vn.2D,#0 |
Vd.2D -> result |
A64 |
uint64_t vcgtzd_s64(int64_t a) |
a -> Dn |
CMGT Dd,Dn,#0 |
Dd -> result |
A64 |
uint32_t vcgtzs_f32(float32_t a) |
a -> Sn |
FCMGT Sd,Sn,#0 |
Sd -> result |
A64 |
uint64_t vcgtzd_f64(float64_t a) |
a -> Dn |
FCMGT Dd,Dn,#0 |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
uint8x8_t vclt_s8( |
a -> Vn.8B b -> Vm.8B |
CMGT Vd.8B,Vm.8B,Vn.8B |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vcltq_s8( |
a -> Vn.16B b -> Vm.16B |
CMGT Vd.16B,Vm.16B,Vn.16B |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vclt_s16( |
a -> Vn.4H b -> Vm.4H |
CMGT Vd.4H,Vm.4H,Vn.4H |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vcltq_s16( |
a -> Vn.8H b -> Vm.8H |
CMGT Vd.8H,Vm.8H,Vn.8H |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vclt_s32( |
a -> Vn.2S b -> Vm.2S |
CMGT Vd.2S,Vm.2S,Vn.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vcltq_s32( |
a -> Vn.4S b -> Vm.4S |
CMGT Vd.4S,Vm.4S,Vn.4S |
Vd.4S -> result |
v7/A32/A64 |
uint8x8_t vclt_u8( |
a -> Vn.8B b -> Vm.8B |
CMHI Vd.8B,Vm.8B,Vn.8B |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vcltq_u8( |
a -> Vn.16B b -> Vm.16B |
CMHI Vd.16B,Vm.16B,Vn.16B |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vclt_u16( |
a -> Vn.4H b -> Vm.4H |
CMHI Vd.4H,Vm.4H,Vn.4H |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vcltq_u16( |
a -> Vn.8H b -> Vm.8H |
CMHI Vd.8H,Vm.8H,Vn.8H |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vclt_u32( |
a -> Vn.2S b -> Vm.2S |
CMHI Vd.2S,Vm.2S,Vn.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vcltq_u32( |
a -> Vn.4S b -> Vm.4S |
CMHI Vd.4S,Vm.4S,Vn.4S |
Vd.4S -> result |
v7/A32/A64 |
uint32x2_t vclt_f32( |
a -> Vn.2S b -> Vm.2S |
FCMGT Vd.2S,Vm.2S,Vn.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vcltq_f32( |
a -> Vn.4S b -> Vm.4S |
FCMGT Vd.4S,Vm.4S,Vn.4S |
Vd.4S -> result |
v7/A32/A64 |
uint64x1_t vclt_s64( |
a -> Dn b -> Dm |
CMGT Dd,Dm,Dn |
Dd -> result |
A64 |
uint64x2_t vcltq_s64( |
a -> Vn.2D b -> Vm.2D |
CMGT Vd.2D,Vm.2D,Vn.2D |
Vd.2D -> result |
A64 |
uint64x1_t vclt_u64( |
a -> Dn b -> Dm |
CMHI Dd,Dm,Dn |
Dd -> result |
A64 |
uint64x2_t vcltq_u64( |
a -> Vn.2D b -> Vm.2D |
CMHI Vd.2D,Vm.2D,Vn.2D |
Vd.2D -> result |
A64 |
uint64x1_t vclt_f64( |
a -> Dn b -> Dm |
FCMGT Dd,Dm,Dn |
Dd -> result |
A64 |
uint64x2_t vcltq_f64( |
a -> Vn.2D b -> Vm.2D |
FCMGT Vd.2D,Vm.2D,Vn.2D |
Vd.2D -> result |
A64 |
uint64_t vcltd_s64( |
a -> Dn b -> Dm |
CMGT Dd,Dm,Dn |
Dd -> result |
A64 |
uint64_t vcltd_u64( |
a -> Dn b -> Dm |
CMHI Dd,Dm,Dn |
Dd -> result |
A64 |
uint32_t vclts_f32( |
a -> Sn b -> Sm |
FCMGT Sd,Sm,Sn |
Sd -> result |
A64 |
uint64_t vcltd_f64( |
a -> Dn b -> Dm |
FCMGT Dd,Dm,Dn |
Dd -> result |
A64 |
uint8x8_t vcltz_s8(int8x8_t a) |
a -> Vn.8B |
CMLT Vd.8B,Vn.8B,#0 |
Vd.8B -> result |
A64 |
uint8x16_t vcltzq_s8(int8x16_t a) |
a -> Vn.16B |
CMLT Vd.16B,Vn.16B,#0 |
Vd.16B -> result |
A64 |
uint16x4_t vcltz_s16(int16x4_t a) |
a -> Vn.4H |
CMLT Vd.4H,Vn.4H,#0 |
Vd.4H -> result |
A64 |
uint16x8_t vcltzq_s16(int16x8_t a) |
a -> Vn.8H |
CMLT Vd.8H,Vn.8H,#0 |
Vd.8H -> result |
A64 |
uint32x2_t vcltz_s32(int32x2_t a) |
a -> Vn.2S |
CMLT Vd.2S,Vn.2S,#0 |
Vd.2S -> result |
A64 |
uint32x4_t vcltzq_s32(int32x4_t a) |
a -> Vn.4S |
CMLT Vd.4S,Vn.4S,#0 |
Vd.4S -> result |
A64 |
uint64x1_t vcltz_s64(int64x1_t a) |
a -> Dn |
CMLT Dd,Dn,#0 |
Dd -> result |
A64 |
uint64x2_t vcltzq_s64(int64x2_t a) |
a -> Vn.2D |
CMLT Vd.2D,Vn.2D,#0 |
Vd.2D -> result |
A64 |
uint32x2_t vcltz_f32(float32x2_t a) |
a -> Vn.2S |
FCMLT Vd.2S,Vn.2S,#0 |
Vd.2S -> result |
A64 |
uint32x4_t vcltzq_f32(float32x4_t a) |
a -> Vn.4S |
FCMLT Vd.4S,Vn.4S,#0 |
Vd.4S -> result |
A64 |
uint64x1_t vcltz_f64(float64x1_t a) |
a -> Dn |
FCMLT Dd,Dn,#0 |
Dd -> result |
A64 |
uint64x2_t vcltzq_f64(float64x2_t a) |
a -> Vn.2D |
FCMLT Vd.2D,Vn.2D,#0 |
Vd.2D -> result |
A64 |
uint64_t vcltzd_s64(int64_t a) |
a -> Dn |
CMLT Dd,Dn,#0 |
Dd -> result |
A64 |
uint32_t vcltzs_f32(float32_t a) |
a -> Sn |
FCMLT Sd,Sn,#0 |
Sd -> result |
A64 |
uint64_t vcltzd_f64(float64_t a) |
a -> Dn |
FCMLT Dd,Dn,#0 |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
uint32x2_t vcage_f32( |
a -> Vn.2S b -> Vm.2S |
FACGE Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vcageq_f32( |
a -> Vn.4S b -> Vm.4S |
FACGE Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
uint64x1_t vcage_f64( |
a -> Dn b -> Dm |
FACGE Dd,Dn,Dm |
Dd -> result |
A64 |
uint64x2_t vcageq_f64( |
a -> Vn.2D b -> Vm.2D |
FACGE Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
A64 |
uint32_t vcages_f32( |
a -> Sn b -> Sm |
FACGE Sd,Sn,Sm |
Sd -> result |
A64 |
uint64_t vcaged_f64( |
a -> Dn b -> Dm |
FACGE Dd,Dn,Dm |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
uint32x2_t vcale_f32( |
a -> Vn.2S b -> Vm.2S |
FACGE Vd.2S,Vm.2S,Vn.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vcaleq_f32( |
a -> Vn.4S b -> Vm.4S |
FACGE Vd.4S,Vm.4S,Vn.4S |
Vd.4S -> result |
v7/A32/A64 |
uint64x1_t vcale_f64( |
a -> Dn b -> Dm |
FACGE Dd,Dm,Dn |
Dd -> result |
A64 |
uint64x2_t vcaleq_f64( |
a -> Vn.2D b -> Vm.2D |
FACGE Vd.2D,Vm.2D,Vn.2D |
Vd.2D -> result |
A64 |
uint32_t vcales_f32( |
a -> Sn b -> Sm |
FACGE Sd,Sm,Sn |
Sd -> result |
A64 |
uint64_t vcaled_f64( |
a -> Dn b -> Dm |
FACGE Dd,Dm,Dn |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
uint32x2_t vcagt_f32( |
a -> Vn.2S b -> Vm.2S |
FACGT Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vcagtq_f32( |
a -> Vn.4S b -> Vm.4S |
FACGT Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
uint64x1_t vcagt_f64( |
a -> Dn b -> Dm |
FACGT Dd,Dn,Dm |
Dd -> result |
A64 |
uint64x2_t vcagtq_f64( |
a -> Vn.2D b -> Vm.2D |
FACGT Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
A64 |
uint32_t vcagts_f32( |
a -> Sn b -> Sm |
FACGT Sd,Sn,Sm |
Sd -> result |
A64 |
uint64_t vcagtd_f64( |
a -> Dn b -> Dm |
FACGT Dd,Dn,Dm |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
uint32x2_t vcalt_f32( |
a -> Vn.2S b -> Vm.2S |
FACGT Vd.2S,Vm.2S,Vn.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vcaltq_f32( |
a -> Vn.4S b -> Vm.4S |
FACGT Vd.4S,Vm.4S,Vn.4S |
Vd.4S -> result |
v7/A32/A64 |
uint64x1_t vcalt_f64( |
a -> Dn b -> Dm |
FACGT Dd,Dm,Dn |
Dd -> result |
A64 |
uint64x2_t vcaltq_f64( |
a -> Vn.2D b -> Vm.2D |
FACGT Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
A64 |
uint32_t vcalts_f32( |
a -> Sn b -> Sm |
FACGT Sd,Sm,Sn |
Sd -> result |
A64 |
uint64_t vcaltd_f64( |
a -> Dn b -> Dm |
FACGT Dd,Dm,Dn |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
uint8x8_t vtst_s8( |
a -> Vn.8B b -> Vm.8B |
CMTST Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vtstq_s8( |
a -> Vn.16B b -> Vm.16B |
CMTST Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vtst_s16( |
a -> Vn.4H b -> Vm.4H |
CMTST Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vtstq_s16( |
a -> Vn.8H b -> Vm.8H |
CMTST Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vtst_s32( |
a -> Vn.2S b -> Vm.2S |
CMTST Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vtstq_s32( |
a -> Vn.4S b -> Vm.4S |
CMTST Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
uint8x8_t vtst_u8( |
a -> Vn.8B b -> Vm.8B |
CMTST Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vtstq_u8( |
a -> Vn.16B b -> Vm.16B |
CMTST Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vtst_u16( |
a -> Vn.4H b -> Vm.4H |
CMTST Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vtstq_u16( |
a -> Vn.8H b -> Vm.8H |
CMTST Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vtst_u32( |
a -> Vn.2S b -> Vm.2S |
CMTST Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vtstq_u32( |
a -> Vn.4S b -> Vm.4S |
CMTST Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
uint8x8_t vtst_p8( |
a -> Vn.8B b -> Vm.8B |
CMTST Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vtstq_p8( |
a -> Vn.16B b -> Vm.16B |
CMTST Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
uint64x1_t vtst_s64( |
a -> Dn b -> Dm |
CMTST Dd,Dn,Dm |
Dd -> result |
A64 |
uint64x2_t vtstq_s64( |
a -> Vn.2D b -> Vm.2D |
CMTST Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
A64 |
uint64x1_t vtst_u64( |
a -> Dn b -> Dm |
CMTST Dd,Dn,Dm |
Dd -> result |
A64 |
uint64x2_t vtstq_u64( |
a -> Vn.2D b -> Vm.2D |
CMTST Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
A64 |
uint64x1_t vtst_p64( |
a -> Dn b -> Dm |
CMTST Dd,Dn,Dm |
Dd -> result |
A32/A64 |
uint64x2_t vtstq_p64( |
a -> Vn.2D b -> Vm.2D |
CMTST Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
A32/A64 |
uint64_t vtstd_s64( |
a -> Dn b -> Dm |
CMTST Dd,Dn,Dm |
Dd -> result |
A64 |
uint64_t vtstd_u64( |
a -> Dn b -> Dm |
CMTST Dd,Dn,Dm |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int8x8_t vshl_s8( |
a -> Vn.8B b -> Vm.8B |
SSHL Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
int8x16_t vshlq_s8( |
a -> Vn.16B b -> Vm.16B |
SSHL Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
int16x4_t vshl_s16( |
a -> Vn.4H b -> Vm.4H |
SSHL Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
int16x8_t vshlq_s16( |
a -> Vn.8H b -> Vm.8H |
SSHL Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
int32x2_t vshl_s32( |
a -> Vn.2S b -> Vm.2S |
SSHL Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
int32x4_t vshlq_s32( |
a -> Vn.4S b -> Vm.4S |
SSHL Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
int64x1_t vshl_s64( |
a -> Dn b -> Dm |
SSHL Dd,Dn,Dm |
Dd -> result |
v7/A32/A64 |
int64x2_t vshlq_s64( |
a -> Vn.2D b -> Vm.2D |
SSHL Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
v7/A32/A64 |
uint8x8_t vshl_u8( |
a -> Vn.8B b -> Vm.8B |
USHL Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vshlq_u8( |
a -> Vn.16B b -> Vm.16B |
USHL Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vshl_u16( |
a -> Vn.4H b -> Vm.4H |
USHL Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vshlq_u16( |
a -> Vn.8H b -> Vm.8H |
USHL Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vshl_u32( |
a -> Vn.2S b -> Vm.2S |
USHL Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vshlq_u32( |
a -> Vn.4S b -> Vm.4S |
USHL Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
uint64x1_t vshl_u64( |
a -> Dn b -> Dm |
USHL Dd,Dn,Dm |
Dd -> result |
v7/A32/A64 |
uint64x2_t vshlq_u64( |
a -> Vn.2D b -> Vm.2D |
USHL Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
v7/A32/A64 |
int64_t vshld_s64( |
a -> Dn b -> Dm |
SSHL Dd,Dn,Dm |
Dd -> result |
A64 |
uint64_t vshld_u64( |
a -> Dn b -> Dm |
USHL Dd,Dn,Dm |
Dd -> result |
A64 |
int8x8_t vshl_n_s8( |
a -> Vn.8B 0 <= n <= 7 |
SHL Vd.8B,Vn.8B,#n |
Vd.8B -> result |
v7/A32/A64 |
int8x16_t vshlq_n_s8( |
a -> Vn.16B 0 <= n <= 7 |
SHL Vd.16B,Vn.16B,#n |
Vd.16B -> result |
v7/A32/A64 |
int16x4_t vshl_n_s16( |
a -> Vn.4H 0 <= n <= 15 |
SHL Vd.4H,Vn.4H,#n |
Vd.4H -> result |
v7/A32/A64 |
int16x8_t vshlq_n_s16( |
a -> Vn.8H 0 <= n <= 15 |
SHL Vd.8H,Vn.8H,#n |
Vd.8H -> result |
v7/A32/A64 |
int32x2_t vshl_n_s32( |
a -> Vn.2S 0 <= n <= 31 |
SHL Vd.2S,Vn.2S,#n |
Vd.2S -> result |
v7/A32/A64 |
int32x4_t vshlq_n_s32( |
a -> Vn.4S 0 <= n <= 31 |
SHL Vd.4S,Vn.4S,#n |
Vd.4S -> result |
v7/A32/A64 |
int64x1_t vshl_n_s64( |
a -> Dn 0 <= n <= 63 |
SHL Dd,Dn,#n |
Dd -> result |
v7/A32/A64 |
int64x2_t vshlq_n_s64( |
a -> Vn.2D 0 <= n <= 63 |
SHL Vd.2D,Vn.2D,#n |
Vd.2D -> result |
v7/A32/A64 |
uint8x8_t vshl_n_u8( |
a -> Vn.8B 0 <= n <= 7 |
SHL Vd.8B,Vn.8B,#n |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vshlq_n_u8( |
a -> Vn.16B 0 <= n <= 7 |
SHL Vd.16B,Vn.16B,#n |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vshl_n_u16( |
a -> Vn.4H 0 <= n <= 15 |
SHL Vd.4H,Vn.4H,#n |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vshlq_n_u16( |
a -> Vn.8H 0 <= n <= 15 |
SHL Vd.8H,Vn.8H,#n |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vshl_n_u32( |
a -> Vn.2S 0 <= n <= 31 |
SHL Vd.2S,Vn.2S,#n |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vshlq_n_u32( |
a -> Vn.4S 0 <= n <= 31 |
SHL Vd.4S,Vn.4S,#n |
Vd.4S -> result |
v7/A32/A64 |
uint64x1_t vshl_n_u64( |
a -> Dn 0 <= n <= 63 |
SHL Dd,Dn,#n |
Dd -> result |
v7/A32/A64 |
uint64x2_t vshlq_n_u64( |
a -> Vn.2D 0 <= n <= 63 |
SHL Vd.2D,Vn.2D,#n |
Vd.2D -> result |
v7/A32/A64 |
int64_t vshld_n_s64( |
a -> Dn 0 <= n <= 63 |
SHL Dd,Dn,#n |
Dd -> result |
A64 |
uint64_t vshld_n_u64( |
a -> Dn 0 <= n <= 63 |
SHL Dd,Dn,#n |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int8x8_t vqshl_s8( |
a -> Vn.8B b -> Vm.8B |
SQSHL Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
int8x16_t vqshlq_s8( |
a -> Vn.16B b -> Vm.16B |
SQSHL Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
int16x4_t vqshl_s16( |
a -> Vn.4H b -> Vm.4H |
SQSHL Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
int16x8_t vqshlq_s16( |
a -> Vn.8H b -> Vm.8H |
SQSHL Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
int32x2_t vqshl_s32( |
a -> Vn.2S b -> Vm.2S |
SQSHL Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
int32x4_t vqshlq_s32( |
a -> Vn.4S b -> Vm.4S |
SQSHL Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
int64x1_t vqshl_s64( |
a -> Dn b -> Dm |
SQSHL Dd,Dn,Dm |
Dd -> result |
v7/A32/A64 |
int64x2_t vqshlq_s64( |
a -> Vn.2D b -> Vm.2D |
SQSHL Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
v7/A32/A64 |
uint8x8_t vqshl_u8( |
a -> Vn.8B b -> Vm.8B |
UQSHL Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vqshlq_u8( |
a -> Vn.16B b -> Vm.16B |
UQSHL Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vqshl_u16( |
a -> Vn.4H b -> Vm.4H |
UQSHL Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vqshlq_u16( |
a -> Vn.8H b -> Vm.8H |
UQSHL Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vqshl_u32( |
a -> Vn.2S b -> Vm.2S |
UQSHL Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vqshlq_u32( |
a -> Vn.4S b -> Vm.4S |
UQSHL Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
uint64x1_t vqshl_u64( |
a -> Dn b -> Dm |
UQSHL Dd,Dn,Dm |
Dd -> result |
v7/A32/A64 |
uint64x2_t vqshlq_u64( |
a -> Vn.2D b -> Vm.2D |
UQSHL Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
v7/A32/A64 |
int8_t vqshlb_s8( |
a -> Bn b -> Bm |
SQSHL Bd,Bn,Bm |
Bd -> result |
A64 |
int16_t vqshlh_s16( |
a -> Hn b -> Hm |
SQSHL Hd,Hn,Hm |
Hd -> result |
A64 |
int32_t vqshls_s32( |
a -> Sn b -> Sm |
SQSHL Sd,Sn,Sm |
Sd -> result |
A64 |
int64_t vqshld_s64( |
a -> Dn b -> Dm |
SQSHL Dd,Dn,Dm |
Dd -> result |
A64 |
uint8_t vqshlb_u8( |
a -> Bn b -> Bm |
UQSHL Bd,Bn,Bm |
Bd -> result |
A64 |
uint16_t vqshlh_u16( |
a -> Hn b -> Hm |
UQSHL Hd,Hn,Hm |
Hd -> result |
A64 |
uint32_t vqshls_u32( |
a -> Sn b -> Sm |
UQSHL Sd,Sn,Sm |
Sd -> result |
A64 |
uint64_t vqshld_u64( |
a -> Dn b -> Dm |
UQSHL Dd,Dn,Dm |
Dd -> result |
A64 |
int8x8_t vqshl_n_s8( |
a -> Vn.8B 0 <= n <= 7 |
SQSHL Vd.8B,Vn.8B,#n |
Vd.8B -> result |
v7/A32/A64 |
int8x16_t vqshlq_n_s8( |
a -> Vn.16B 0 <= n <= 7 |
SQSHL Vd.16B,Vn.16B,#n |
Vd.16B -> result |
v7/A32/A64 |
int16x4_t vqshl_n_s16( |
a -> Vn.4H 0 <= n <= 15 |
SQSHL Vd.4H,Vn.4H,#n |
Vd.4H -> result |
v7/A32/A64 |
int16x8_t vqshlq_n_s16( |
a -> Vn.8H 0 <= n <= 15 |
SQSHL Vd.8H,Vn.8H,#n |
Vd.8H -> result |
v7/A32/A64 |
int32x2_t vqshl_n_s32( |
a -> Vn.2S 0 <= n <= 31 |
SQSHL Vd.2S,Vn.2S,#n |
Vd.2S -> result |
v7/A32/A64 |
int32x4_t vqshlq_n_s32( |
a -> Vn.4S 0 <= n <= 31 |
SQSHL Vd.4S,Vn.4S,#n |
Vd.4S -> result |
v7/A32/A64 |
int64x1_t vqshl_n_s64( |
a -> Dn 0 <= n <= 63 |
SQSHL Dd,Dn,#n |
Dd -> result |
v7/A32/A64 |
int64x2_t vqshlq_n_s64( |
a -> Vn.2D 0 <= n <= 63 |
SQSHL Vd.2D,Vn.2D,#n |
Vd.2D -> result |
v7/A32/A64 |
uint8x8_t vqshl_n_u8( |
a -> Vn.8B 0 <= n <= 7 |
UQSHL Vd.8B,Vn.8B,#n |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vqshlq_n_u8( |
a -> Vn.16B 0 <= n <= 7 |
UQSHL Vd.16B,Vn.16B,#n |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vqshl_n_u16( |
a -> Vn.4H 0 <= n <= 15 |
UQSHL Vd.4H,Vn.4H,#n |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vqshlq_n_u16( |
a -> Vn.8H 0 <= n <= 15 |
UQSHL Vd.8H,Vn.8H,#n |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vqshl_n_u32( |
a -> Vn.2S 0 <= n <= 31 |
UQSHL Vd.2S,Vn.2S,#n |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vqshlq_n_u32( |
a -> Vn.4S 0 <= n <= 31 |
UQSHL Vd.4S,Vn.4S,#n |
Vd.4S -> result |
v7/A32/A64 |
uint64x1_t vqshl_n_u64( |
a -> Dn 0 <= n <= 63 |
UQSHL Dd,Dn,#n |
Dd -> result |
v7/A32/A64 |
uint64x2_t vqshlq_n_u64( |
a -> Vn.2D 0 <= n <= 63 |
UQSHL Vd.2D,Vn.2D,#n |
Vd.2D -> result |
v7/A32/A64 |
int8_t vqshlb_n_s8( |
a -> Bn 0 <= n <= 7 |
SQSHL Bd,Bn,#n |
Bd -> result |
A64 |
int16_t vqshlh_n_s16( |
a -> Hn 0 <= n <= 15 |
SQSHL Hd,Hn,#n |
Hd -> result |
A64 |
int32_t vqshls_n_s32( |
a -> Sn 0 <= n <= 31 |
SQSHL Sd,Sn,#n |
Sd -> result |
A64 |
int64_t vqshld_n_s64( |
a -> Dn 0 <= n <= 63 |
SQSHL Dd,Dn,#n |
Dd -> result |
A64 |
uint8_t vqshlb_n_u8( |
a -> Bn 0 <= n <= 7 |
UQSHL Bd,Bn,#n |
Bd -> result |
A64 |
uint16_t vqshlh_n_u16( |
a -> Hn 0 <= n <= 15 |
UQSHL Hd,Hn,#n |
Hd -> result |
A64 |
uint32_t vqshls_n_u32( |
a -> Sn 0 <= n <= 31 |
UQSHL Sd,Sn,#n |
Sd -> result |
A64 |
uint64_t vqshld_n_u64( |
a -> Dn 0 <= n <= 63 |
UQSHL Dd,Dn,#n |
Dd -> result |
A64 |
uint8x8_t vqshlu_n_s8( |
a -> Vn.8B 0 <= n <= 7 |
SQSHLU Vd.8B,Vn.8B,#n |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vqshluq_n_s8( |
a -> Vn.16B 0 <= n <= 7 |
SQSHLU Vd.16B,Vn.16B,#n |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vqshlu_n_s16( |
a -> Vn.4H 0 <= n <= 15 |
SQSHLU Vd.4H,Vn.4H,#n |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vqshluq_n_s16( |
a -> Vn.8H 0 <= n <= 15 |
SQSHLU Vd.8H,Vn.8H,#n |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vqshlu_n_s32( |
a -> Vn.2S 0 <= n <= 31 |
SQSHLU Vd.2S,Vn.2S,#n |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vqshluq_n_s32( |
a -> Vn.4S 0 <= n <= 31 |
SQSHLU Vd.4S,Vn.4S,#n |
Vd.4S -> result |
v7/A32/A64 |
uint64x1_t vqshlu_n_s64( |
a -> Dn 0 <= n <= 63 |
SQSHLU Dd,Dn,#n |
Dd -> result |
v7/A32/A64 |
uint64x2_t vqshluq_n_s64( |
a -> Vn.2D 0 <= n <= 63 |
SQSHLU Vd.2D,Vn.2D,#n |
Vd.2D -> result |
v7/A32/A64 |
uint8_t vqshlub_n_s8( |
a -> Bn 0 <= n <= 7 |
SQSHLU Bd,Bn,#n |
Bd -> result |
A64 |
uint16_t vqshluh_n_s16( |
a -> Hn 0 <= n <= 15 |
SQSHLU Hd,Hn,#n |
Hd -> result |
A64 |
uint32_t vqshlus_n_s32( |
a -> Sn 0 <= n <= 31 |
SQSHLU Sd,Sn,#n |
Sd -> result |
A64 |
uint64_t vqshlud_n_s64( |
a -> Dn 0 <= n <= 63 |
SQSHLU Dd,Dn,#n |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int8x8_t vrshl_s8( |
a -> Vn.8B b -> Vm.8B |
SRSHL Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
int8x16_t vrshlq_s8( |
a -> Vn.16B b -> Vm.16B |
SRSHL Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
int16x4_t vrshl_s16( |
a -> Vn.4H b -> Vm.4H |
SRSHL Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
int16x8_t vrshlq_s16( |
a -> Vn.8H b -> Vm.8H |
SRSHL Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
int32x2_t vrshl_s32( |
a -> Vn.2S b -> Vm.2S |
SRSHL Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
int32x4_t vrshlq_s32( |
a -> Vn.4S b -> Vm.4S |
SRSHL Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
int64x1_t vrshl_s64( |
a -> Dn b -> Dm |
SRSHL Dd,Dn,Dm |
Dd -> result |
v7/A32/A64 |
int64x2_t vrshlq_s64( |
a -> Vn.2D b -> Vm.2D |
SRSHL Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
v7/A32/A64 |
uint8x8_t vrshl_u8( |
a -> Vn.8B b -> Vm.8B |
URSHL Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vrshlq_u8( |
a -> Vn.16B b -> Vm.16B |
URSHL Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vrshl_u16( |
a -> Vn.4H b -> Vm.4H |
URSHL Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vrshlq_u16( |
a -> Vn.8H b -> Vm.8H |
URSHL Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vrshl_u32( |
a -> Vn.2S b -> Vm.2S |
URSHL Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vrshlq_u32( |
a -> Vn.4S b -> Vm.4S |
URSHL Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
uint64x1_t vrshl_u64( |
a -> Dn b -> Dm |
URSHL Dd,Dn,Dm |
Dd -> result |
v7/A32/A64 |
uint64x2_t vrshlq_u64( |
a -> Vn.2D b -> Vm.2D |
URSHL Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
v7/A32/A64 |
int64_t vrshld_s64( |
a -> Dn b -> Dm |
SRSHL Dd,Dn,Dm |
Dd -> result |
A64 |
uint64_t vrshld_u64( |
a -> Dn b -> Dm |
URSHL Dd,Dn,Dm |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int8x8_t vqrshl_s8( |
a -> Vn.8B b -> Vm.8B |
SQRSHL Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
int8x16_t vqrshlq_s8( |
a -> Vn.16B b -> Vm.16B |
SQRSHL Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
int16x4_t vqrshl_s16( |
a -> Vn.4H b -> Vm.4H |
SQRSHL Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
int16x8_t vqrshlq_s16( |
a -> Vn.8H b -> Vm.8H |
SQRSHL Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
int32x2_t vqrshl_s32( |
a -> Vn.2S b -> Vm.2S |
SQRSHL Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
int32x4_t vqrshlq_s32( |
a -> Vn.4S b -> Vm.4S |
SQRSHL Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
int64x1_t vqrshl_s64( |
a -> Dn b -> Dm |
SQRSHL Dd,Dn,Dm |
Dd -> result |
v7/A32/A64 |
int64x2_t vqrshlq_s64( |
a -> Vn.2D b -> Vm.2D |
SQRSHL Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
v7/A32/A64 |
uint8x8_t vqrshl_u8( |
a -> Vn.8B b -> Vm.8B |
UQRSHL Vd.8B,Vn.8B,Vm.8B |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vqrshlq_u8( |
a -> Vn.16B b -> Vm.16B |
UQRSHL Vd.16B,Vn.16B,Vm.16B |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vqrshl_u16( |
a -> Vn.4H b -> Vm.4H |
UQRSHL Vd.4H,Vn.4H,Vm.4H |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vqrshlq_u16( |
a -> Vn.8H b -> Vm.8H |
UQRSHL Vd.8H,Vn.8H,Vm.8H |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vqrshl_u32( |
a -> Vn.2S b -> Vm.2S |
UQRSHL Vd.2S,Vn.2S,Vm.2S |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vqrshlq_u32( |
a -> Vn.4S b -> Vm.4S |
UQRSHL Vd.4S,Vn.4S,Vm.4S |
Vd.4S -> result |
v7/A32/A64 |
uint64x1_t vqrshl_u64( |
a -> Dn b -> Dm |
UQRSHL Dd,Dn,Dm |
Dd -> result |
v7/A32/A64 |
uint64x2_t vqrshlq_u64( |
a -> Vn.2D b -> Vm.2D |
UQRSHL Vd.2D,Vn.2D,Vm.2D |
Vd.2D -> result |
v7/A32/A64 |
int8_t vqrshlb_s8( |
a -> Bn b -> Bm |
SQRSHL Bd,Bn,Bm |
Bd -> result |
A64 |
int16_t vqrshlh_s16( |
a -> Hn b -> Hm |
SQRSHL Hd,Hn,Hm |
Hd -> result |
A64 |
int32_t vqrshls_s32( |
a -> Sn b -> Sm |
SQRSHL Sd,Sn,Sm |
Sd -> result |
A64 |
int64_t vqrshld_s64( |
a -> Dn b -> Dm |
SQRSHL Dd,Dn,Dm |
Dd -> result |
A64 |
uint8_t vqrshlb_u8( |
a -> Bn b -> Bm |
UQRSHL Bd,Bn,Bm |
Bd -> result |
A64 |
uint16_t vqrshlh_u16( |
a -> Hn b -> Hm |
UQRSHL Hd,Hn,Hm |
Hd -> result |
A64 |
uint32_t vqrshls_u32( |
a -> Sn b -> Sm |
UQRSHL Sd,Sn,Sm |
Sd -> result |
A64 |
uint64_t vqrshld_u64( |
a -> Dn b -> Dm |
UQRSHL Dd,Dn,Dm |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int16x8_t vshll_n_s8( |
a -> Vn.8B 0 <= n <= 7 |
SSHLL Vd.8H,Vn.8B,#n |
Vd.8H -> result |
v7/A32/A64 |
int32x4_t vshll_n_s16( |
a -> Vn.4H 0 <= n <= 15 |
SSHLL Vd.4S,Vn.4H,#n |
Vd.4S -> result |
v7/A32/A64 |
int64x2_t vshll_n_s32( |
a -> Vn.2S 0 <= n <= 31 |
SSHLL Vd.2D,Vn.2S,#n |
Vd.2D -> result |
v7/A32/A64 |
uint16x8_t vshll_n_u8( |
a -> Vn.8B 0 <= n <= 7 |
USHLL Vd.8H,Vn.8B,#n |
Vd.8H -> result |
v7/A32/A64 |
uint32x4_t vshll_n_u16( |
a -> Vn.4H 0 <= n <= 15 |
USHLL Vd.4S,Vn.4H,#n |
Vd.4S -> result |
v7/A32/A64 |
uint64x2_t vshll_n_u32( |
a -> Vn.2S 0 <= n <= 31 |
USHLL Vd.2D,Vn.2S,#n |
Vd.2D -> result |
v7/A32/A64 |
int16x8_t vshll_high_n_s8( |
a -> Vn.16B 0 <= n <= 7 |
SSHLL2 Vd.8H,Vn.16B,#n |
Vd.8H -> result |
A64 |
int32x4_t vshll_high_n_s16( |
a -> Vn.8H 0 <= n <= 15 |
SSHLL2 Vd.4S,Vn.8H,#n |
Vd.4S -> result |
A64 |
int64x2_t vshll_high_n_s32( |
a -> Vn.4S 0 <= n <= 31 |
SSHLL2 Vd.2D,Vn.4S,#n |
Vd.2D -> result |
A64 |
uint16x8_t vshll_high_n_u8( |
a -> Vn.16B 0 <= n <= 7 |
USHLL2 Vd.8H,Vn.16B,#n |
Vd.8H -> result |
A64 |
uint32x4_t vshll_high_n_u16( |
a -> Vn.8H 0 <= n <= 15 |
USHLL2 Vd.4S,Vn.8H,#n |
Vd.4S -> result |
A64 |
uint64x2_t vshll_high_n_u32( |
a -> Vn.4S 0 <= n <= 31 |
USHLL2 Vd.2D,Vn.4S,#n |
Vd.2D -> result |
A64 |
int16x8_t vshll_n_s8( |
a -> Vn.8B n == 8 |
SHLL Vd.8H,Vn.8B,#n |
Vd.8H -> result |
v7/A32/A64 |
int32x4_t vshll_n_s16( |
a -> Vn.4H n == 16 |
SHLL Vd.4S,Vn.4H,#n |
Vd.4S -> result |
v7/A32/A64 |
int64x2_t vshll_n_s32( |
a -> Vn.2S n == 32 |
SHLL Vd.2D,Vn.2S,#n |
Vd.2D -> result |
v7/A32/A64 |
uint16x8_t vshll_n_u8( |
a -> Vn.8B n == 8 |
SHLL Vd.8H,Vn.8B,#n |
Vd.8H -> result |
v7/A32/A64 |
uint32x4_t vshll_n_u16( |
a -> Vn.4H n == 16 |
SHLL Vd.4S,Vn.4H,#n |
Vd.4S -> result |
v7/A32/A64 |
uint64x2_t vshll_n_u32( |
a -> Vn.2S n == 32 |
SHLL Vd.2D,Vn.2S,#n |
Vd.2D -> result |
v7/A32/A64 |
int16x8_t vshll_high_n_s8( |
a -> Vn.16B n == 8 |
SHLL2 Vd.8H,Vn.16B,#n |
Vd.8H -> result |
A64 |
int32x4_t vshll_high_n_s16( |
a -> Vn.8H n == 16 |
SHLL2 Vd.4S,Vn.8H,#n |
Vd.4S -> result |
A64 |
int64x2_t vshll_high_n_s32( |
a -> Vn.4S n == 32 |
SHLL2 Vd.2D,Vn.4S,#n |
Vd.2D -> result |
A64 |
uint16x8_t vshll_high_n_u8( |
a -> Vn.16B n == 8 |
SHLL2 Vd.8H,Vn.16B,#n |
Vd.8H -> result |
A64 |
uint32x4_t vshll_high_n_u16( |
a -> Vn.8H n == 16 |
SHLL2 Vd.4S,Vn.8H,#n |
Vd.4S -> result |
A64 |
uint64x2_t vshll_high_n_u32( |
a -> Vn.4S n == 32 |
SHLL2 Vd.2D,Vn.4S,#n |
Vd.2D -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int8x8_t vsli_n_s8( |
a -> Vd.8B b -> Vn.8B 0 <= n <= 7 |
SLI Vd.8B,Vn.8B,#n |
Vd.8B -> result |
v7/A32/A64 |
int8x16_t vsliq_n_s8( |
a -> Vd.16B b -> Vn.16B 0 <= n <= 7 |
SLI Vd.16B,Vn.16B,#n |
Vd.16B -> result |
v7/A32/A64 |
int16x4_t vsli_n_s16( |
a -> Vd.4H b -> Vn.4H 0 <= n <= 15 |
SLI Vd.4H,Vn.4H,#n |
Vd.4H -> result |
v7/A32/A64 |
int16x8_t vsliq_n_s16( |
a -> Vd.8H b -> Vn.8H 0 <= n <= 15 |
SLI Vd.8H,Vn.8H,#n |
Vd.8H -> result |
v7/A32/A64 |
int32x2_t vsli_n_s32( |
a -> Vd.2S b -> Vn.2S 0 <= n <= 31 |
SLI Vd.2S,Vn.2S,#n |
Vd.2S -> result |
v7/A32/A64 |
int32x4_t vsliq_n_s32( |
a -> Vd.4S b -> Vn.4S 0 <= n <= 31 |
SLI Vd.4S,Vn.4S,#n |
Vd.4S -> result |
v7/A32/A64 |
int64x1_t vsli_n_s64( |
a -> Dd b -> Dn 0 <= n <= 63 |
SLI Dd,Dn,#n |
Dd -> result |
v7/A32/A64 |
int64x2_t vsliq_n_s64( |
a -> Vd.2D b -> Vn.2D 0 <= n <= 63 |
SLI Vd.2D,Vn.2D,#n |
Vd.2D -> result |
v7/A32/A64 |
uint8x8_t vsli_n_u8( |
a -> Vd.8B b -> Vn.8B 0 <= n <= 7 |
SLI Vd.8B,Vn.8B,#n |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vsliq_n_u8( |
a -> Vd.16B b -> Vn.16B 0 <= n <= 7 |
SLI Vd.16B,Vn.16B,#n |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vsli_n_u16( |
a -> Vd.4H b -> Vn.4H 0 <= n <= 15 |
SLI Vd.4H,Vn.4H,#n |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vsliq_n_u16( |
a -> Vd.8H b -> Vn.8H 0 <= n <= 15 |
SLI Vd.8H,Vn.8H,#n |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vsli_n_u32( |
a -> Vd.2S b -> Vn.2S 0 <= n <= 31 |
SLI Vd.2S,Vn.2S,#n |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vsliq_n_u32( |
a -> Vd.4S b -> Vn.4S 0 <= n <= 31 |
SLI Vd.4S,Vn.4S,#n |
Vd.4S -> result |
v7/A32/A64 |
uint64x1_t vsli_n_u64( |
a -> Dd b -> Dn 0 <= n <= 63 |
SLI Dd,Dn,#n |
Dd -> result |
v7/A32/A64 |
uint64x2_t vsliq_n_u64( |
a -> Vd.2D b -> Vn.2D 0 <= n <= 63 |
SLI Vd.2D,Vn.2D,#n |
Vd.2D -> result |
v7/A32/A64 |
poly64x1_t vsli_n_p64( |
a -> Dd b -> Dn 0 <= n <= 63 |
SLI Dd,Dn,#n |
Dd -> result |
A32/A64 |
poly64x2_t vsliq_n_p64( |
a -> Vd.2D b -> Vn.2D 0 <= n <= 63 |
SLI Vd.2D,Vn.2D,#n |
Vd.2D -> result |
A32/A64 |
poly8x8_t vsli_n_p8( |
a -> Vd.8B b -> Vn.8B 0 <= n <= 7 |
SLI Vd.8B,Vn.8B,#n |
Vd.8B -> result |
v7/A32/A64 |
poly8x16_t vsliq_n_p8( |
a -> Vd.16B b -> Vn.16B 0 <= n <= 7 |
SLI Vd.16B,Vn.16B,#n |
Vd.16B -> result |
v7/A32/A64 |
poly16x4_t vsli_n_p16( |
a -> Vd.4H b -> Vn.4H 0 <= n <= 15 |
SLI Vd.4H,Vn.4H,#n |
Vd.4H -> result |
v7/A32/A64 |
poly16x8_t vsliq_n_p16( |
a -> Vd.8H b -> Vn.8H 0 <= n <= 15 |
SLI Vd.8H,Vn.8H,#n |
Vd.8H -> result |
v7/A32/A64 |
int64_t vslid_n_s64( |
a -> Dd b -> Dn 0 <= n <= 63 |
SLI Dd,Dn,#n |
Dd -> result |
A64 |
uint64_t vslid_n_u64( |
a -> Dd b -> Dn 0 <= n <= 63 |
SLI Dd,Dn,#n |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int8x8_t vshr_n_s8( |
a -> Vn.8B 1 <= n <= 8 |
SSHR Vd.8B,Vn.8B,#n |
Vd.8B -> result |
v7/A32/A64 |
int8x16_t vshrq_n_s8( |
a -> Vn.16B 1 <= n <= 8 |
SSHR Vd.16B,Vn.16B,#n |
Vd.16B -> result |
v7/A32/A64 |
int16x4_t vshr_n_s16( |
a -> Vn.4H 1 <= n <= 16 |
SSHR Vd.4H,Vn.4H,#n |
Vd.4H -> result |
v7/A32/A64 |
int16x8_t vshrq_n_s16( |
a -> Vn.8H 1 <= n <= 16 |
SSHR Vd.8H,Vn.8H,#n |
Vd.8H -> result |
v7/A32/A64 |
int32x2_t vshr_n_s32( |
a -> Vn.2S 1 <= n <= 32 |
SSHR Vd.2S,Vn.2S,#n |
Vd.2S -> result |
v7/A32/A64 |
int32x4_t vshrq_n_s32( |
a -> Vn.4S 1 <= n <= 32 |
SSHR Vd.4S,Vn.4S,#n |
Vd.4S -> result |
v7/A32/A64 |
int64x1_t vshr_n_s64( |
a -> Dn 1 <= n <= 64 |
SSHR Dd,Dn,#n |
Dd -> result |
v7/A32/A64 |
int64x2_t vshrq_n_s64( |
a -> Vn.2D 1 <= n <= 64 |
SSHR Vd.2D,Vn.2D,#n |
Vd.2D -> result |
v7/A32/A64 |
uint8x8_t vshr_n_u8( |
a -> Vn.8B 1 <= n <= 8 |
USHR Vd.8B,Vn.8B,#n |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vshrq_n_u8( |
a -> Vn.16B 1 <= n <= 8 |
USHR Vd.16B,Vn.16B,#n |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vshr_n_u16( |
a -> Vn.4H 1 <= n <= 16 |
USHR Vd.4H,Vn.4H,#n |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vshrq_n_u16( |
a -> Vn.8H 1 <= n <= 16 |
USHR Vd.8H,Vn.8H,#n |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vshr_n_u32( |
a -> Vn.2S 1 <= n <= 32 |
USHR Vd.2S,Vn.2S,#n |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vshrq_n_u32( |
a -> Vn.4S 1 <= n <= 32 |
USHR Vd.4S,Vn.4S,#n |
Vd.4S -> result |
v7/A32/A64 |
uint64x1_t vshr_n_u64( |
a -> Dn 1 <= n <= 64 |
USHR Dd,Dn,#n |
Dd -> result |
v7/A32/A64 |
uint64x2_t vshrq_n_u64( |
a -> Vn.2D 1 <= n <= 64 |
USHR Vd.2D,Vn.2D,#n |
Vd.2D -> result |
v7/A32/A64 |
int64_t vshrd_n_s64( |
a -> Dn 1 <= n <= 64 |
SSHR Dd,Dn,#n |
Dd -> result |
A64 |
uint64_t vshrd_n_u64( |
a -> Dn 1 <= n <= 64 |
USHR Dd,Dn,#n |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int8x8_t vrshr_n_s8( |
a -> Vn.8B 1 <= n <= 8 |
SRSHR Vd.8B,Vn.8B,#n |
Vd.8B -> result |
v7/A32/A64 |
int8x16_t vrshrq_n_s8( |
a -> Vn.16B 1 <= n <= 8 |
SRSHR Vd.16B,Vn.16B,#n |
Vd.16B -> result |
v7/A32/A64 |
int16x4_t vrshr_n_s16( |
a -> Vn.4H 1 <= n <= 16 |
SRSHR Vd.4H,Vn.4H,#n |
Vd.4H -> result |
v7/A32/A64 |
int16x8_t vrshrq_n_s16( |
a -> Vn.8H 1 <= n <= 16 |
SRSHR Vd.8H,Vn.8H,#n |
Vd.8H -> result |
v7/A32/A64 |
int32x2_t vrshr_n_s32( |
a -> Vn.2S 1 <= n <= 32 |
SRSHR Vd.2S,Vn.2S,#n |
Vd.2S -> result |
v7/A32/A64 |
int32x4_t vrshrq_n_s32( |
a -> Vn.4S 1 <= n <= 32 |
SRSHR Vd.4S,Vn.4S,#n |
Vd.4S -> result |
v7/A32/A64 |
int64x1_t vrshr_n_s64( |
a -> Dn 1 <= n <= 64 |
SRSHR Dd,Dn,#n |
Dd -> result |
v7/A32/A64 |
int64x2_t vrshrq_n_s64( |
a -> Vn.2D 1 <= n <= 64 |
SRSHR Vd.2D,Vn.2D,#n |
Vd.2D -> result |
v7/A32/A64 |
uint8x8_t vrshr_n_u8( |
a -> Vn.8B 1 <= n <= 8 |
URSHR Vd.8B,Vn.8B,#n |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vrshrq_n_u8( |
a -> Vn.16B 1 <= n <= 8 |
URSHR Vd.16B,Vn.16B,#n |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vrshr_n_u16( |
a -> Vn.4H 1 <= n <= 16 |
URSHR Vd.4H,Vn.4H,#n |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vrshrq_n_u16( |
a -> Vn.8H 1 <= n <= 16 |
URSHR Vd.8H,Vn.8H,#n |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vrshr_n_u32( |
a -> Vn.2S 1 <= n <= 32 |
URSHR Vd.2S,Vn.2S,#n |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vrshrq_n_u32( |
a -> Vn.4S 1 <= n <= 32 |
URSHR Vd.4S,Vn.4S,#n |
Vd.4S -> result |
v7/A32/A64 |
uint64x1_t vrshr_n_u64( |
a -> Dn 1 <= n <= 64 |
URSHR Dd,Dn,#n |
Dd -> result |
v7/A32/A64 |
uint64x2_t vrshrq_n_u64( |
a -> Vn.2D 1 <= n <= 64 |
URSHR Vd.2D,Vn.2D,#n |
Vd.2D -> result |
v7/A32/A64 |
int64_t vrshrd_n_s64( |
a -> Dn 1 <= n <= 64 |
SRSHR Dd,Dn,#n |
Dd -> result |
A64 |
uint64_t vrshrd_n_u64( |
a -> Dn 1 <= n <= 64 |
URSHR Dd,Dn,#n |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int8x8_t vsra_n_s8( |
a -> Vd.8B b -> Vn.8B 1 <= n <= 8 |
SSRA Vd.8B,Vn.8B,#n |
Vd.8B -> result |
v7/A32/A64 |
int8x16_t vsraq_n_s8( |
a -> Vd.16B b -> Vn.16B 1 <= n <= 8 |
SSRA Vd.16B,Vn.16B,#n |
Vd.16B -> result |
v7/A32/A64 |
int16x4_t vsra_n_s16( |
a -> Vd.4H b -> Vn.4H 1 <= n <= 16 |
SSRA Vd.4H,Vn.4H,#n |
Vd.4H -> result |
v7/A32/A64 |
int16x8_t vsraq_n_s16( |
a -> Vd.8H b -> Vn.8H 1 <= n <= 16 |
SSRA Vd.8H,Vn.8H,#n |
Vd.8H -> result |
v7/A32/A64 |
int32x2_t vsra_n_s32( |
a -> Vd.2S b -> Vn.2S 1 <= n <= 32 |
SSRA Vd.2S,Vn.2S,#n |
Vd.2S -> result |
v7/A32/A64 |
int32x4_t vsraq_n_s32( |
a -> Vd.4S b -> Vn.4S 1 <= n <= 32 |
SSRA Vd.4S,Vn.4S,#n |
Vd.4S -> result |
v7/A32/A64 |
int64x1_t vsra_n_s64( |
a -> Dd b -> Dn 1 <= n <= 64 |
SSRA Dd,Dn,#n |
Dd -> result |
v7/A32/A64 |
int64x2_t vsraq_n_s64( |
a -> Vd.2D b -> Vn.2D 1 <= n <= 64 |
SSRA Vd.2D,Vn.2D,#n |
Vd.2D -> result |
v7/A32/A64 |
uint8x8_t vsra_n_u8( |
a -> Vd.8B b -> Vn.8B 1 <= n <= 8 |
USRA Vd.8B,Vn.8B,#n |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vsraq_n_u8( |
a -> Vd.16B b -> Vn.16B 1 <= n <= 8 |
USRA Vd.16B,Vn.16B,#n |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vsra_n_u16( |
a -> Vd.4H b -> Vn.4H 1 <= n <= 16 |
USRA Vd.4H,Vn.4H,#n |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vsraq_n_u16( |
a -> Vd.8H b -> Vn.8H 1 <= n <= 16 |
USRA Vd.8H,Vn.8H,#n |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vsra_n_u32( |
a -> Vd.2S b -> Vn.2S 1 <= n <= 32 |
USRA Vd.2S,Vn.2S,#n |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vsraq_n_u32( |
a -> Vd.4S b -> Vn.4S 1 <= n <= 32 |
USRA Vd.4S,Vn.4S,#n |
Vd.4S -> result |
v7/A32/A64 |
uint64x1_t vsra_n_u64( |
a -> Dd b -> Dn 1 <= n <= 64 |
USRA Dd,Dn,#n |
Dd -> result |
v7/A32/A64 |
uint64x2_t vsraq_n_u64( |
a -> Vd.2D b -> Vn.2D 1 <= n <= 64 |
USRA Vd.2D,Vn.2D,#n |
Vd.2D -> result |
v7/A32/A64 |
int64_t vsrad_n_s64( |
a -> Dd b -> Dn 1 <= n <= 64 |
SSRA Dd,Dn,#n |
Dd -> result |
A64 |
uint64_t vsrad_n_u64( |
a -> Dd b -> Dn 1 <= n <= 64 |
USRA Dd,Dn,#n |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int8x8_t vrsra_n_s8( |
a -> Vd.8B b -> Vn.8B 1 <= n <= 8 |
SRSRA Vd.8B,Vn.8B,#n |
Vd.8B -> result |
v7/A32/A64 |
int8x16_t vrsraq_n_s8( |
a -> Vd.16B b -> Vn.16B 1 <= n <= 8 |
SRSRA Vd.16B,Vn.16B,#n |
Vd.16B -> result |
v7/A32/A64 |
int16x4_t vrsra_n_s16( |
a -> Vd.4H b -> Vn.4H 1 <= n <= 16 |
SRSRA Vd.4H,Vn.4H,#n |
Vd.4H -> result |
v7/A32/A64 |
int16x8_t vrsraq_n_s16( |
a -> Vd.8H b -> Vn.8H 1 <= n <= 16 |
SRSRA Vd.8H,Vn.8H,#n |
Vd.8H -> result |
v7/A32/A64 |
int32x2_t vrsra_n_s32( |
a -> Vd.2S b -> Vn.2S 1 <= n <= 32 |
SRSRA Vd.2S,Vn.2S,#n |
Vd.2S -> result |
v7/A32/A64 |
int32x4_t vrsraq_n_s32( |
a -> Vd.4S b -> Vn.4S 1 <= n <= 32 |
SRSRA Vd.4S,Vn.4S,#n |
Vd.4S -> result |
v7/A32/A64 |
int64x1_t vrsra_n_s64( |
a -> Dd b -> Dn 1 <= n <= 64 |
SRSRA Dd,Dn,#n |
Dd -> result |
v7/A32/A64 |
int64x2_t vrsraq_n_s64( |
a -> Vd.2D b -> Vn.2D 1 <= n <= 64 |
SRSRA Vd.2D,Vn.2D,#n |
Vd.2D -> result |
v7/A32/A64 |
uint8x8_t vrsra_n_u8( |
a -> Vd.8B b -> Vn.8B 1 <= n <= 8 |
URSRA Vd.8B,Vn.8B,#n |
Vd.8B -> result |
v7/A32/A64 |
uint8x16_t vrsraq_n_u8( |
a -> Vd.16B b -> Vn.16B 1 <= n <= 8 |
URSRA Vd.16B,Vn.16B,#n |
Vd.16B -> result |
v7/A32/A64 |
uint16x4_t vrsra_n_u16( |
a -> Vd.4H b -> Vn.4H 1 <= n <= 16 |
URSRA Vd.4H,Vn.4H,#n |
Vd.4H -> result |
v7/A32/A64 |
uint16x8_t vrsraq_n_u16( |
a -> Vd.8H b -> Vn.8H 1 <= n <= 16 |
URSRA Vd.8H,Vn.8H,#n |
Vd.8H -> result |
v7/A32/A64 |
uint32x2_t vrsra_n_u32( |
a -> Vd.2S b -> Vn.2S 1 <= n <= 32 |
URSRA Vd.2S,Vn.2S,#n |
Vd.2S -> result |
v7/A32/A64 |
uint32x4_t vrsraq_n_u32( |
a -> Vd.4S b -> Vn.4S 1 <= n <= 32 |
URSRA Vd.4S,Vn.4S,#n |
Vd.4S -> result |
v7/A32/A64 |
uint64x1_t vrsra_n_u64( |
a -> Dd b -> Dn 1 <= n <= 64 |
URSRA Dd,Dn,#n |
Dd -> result |
v7/A32/A64 |
uint64x2_t vrsraq_n_u64( |
a -> Vd.2D b -> Vn.2D 1 <= n <= 64 |
URSRA Vd.2D,Vn.2D,#n |
Vd.2D -> result |
v7/A32/A64 |
int64_t vrsrad_n_s64( |
a -> Dd b -> Dn 1 <= n <= 64 |
SRSRA Dd,Dn,#n |
Dd -> result |
A64 |
uint64_t vrsrad_n_u64( |
a -> Dd b -> Dn 1 <= n <= 64 |
URSRA Dd,Dn,#n |
Dd -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
int8x8_t vshrn_n_s16( |
a -> Vn.8H 1 <= n <= 8 |
SHRN Vd.8B,Vn.8H,#n |
Vd.8B -> result |
v7/A32/A64 |
int16x4_t vshrn_n_s32( |
a -> Vn.4S 1 <= n <= 16 |
SHRN Vd.4H,Vn.4S,#n |
Vd.4H -> result |
v7/A32/A64 |
int32x2_t vshrn_n_s64( |
a -> Vn.2D 1 <= n <= 32 |
SHRN Vd.2S,Vn.2D,#n |
Vd.2S -> result |
v7/A32/A64 |
uint8x8_t vshrn_n_u16( |
a -> Vn.8H 1 <= n <= 8 |
SHRN Vd.8B,Vn.8H,#n |
Vd.8B -> result |
v7/A32/A64 |
uint16x4_t vshrn_n_u32( |
a -> Vn.4S 1 <= n <= 16 |
SHRN Vd.4H,Vn.4S,#n |
Vd.4H -> result |
v7/A32/A64 |
uint32x2_t vshrn_n_u64( |
a -> Vn.2D 1 <= n <= 32 |
SHRN Vd.2S,Vn.2D,#n |
Vd.2S -> result |
v7/A32/A64 |
int8x16_t vshrn_high_n_s16( |
r -> Vd.8B a -> Vn.8H 1 <= n <= 8 |
SHRN2 Vd.16B,Vn.8H,#n |
Vd.16B -> result |
A64 |
int16x8_t vshrn_high_n_s32( |
r -> Vd.4H a -> Vn.4S 1 <= n <= 16 |
SHRN2 Vd.8H,Vn.4S,#n |
Vd.8H -> result |
A64 |
int32x4_t vshrn_high_n_s64( |
r -> Vd.2S a -> Vn.2D 1 <= n <= 32 |
SHRN2 Vd.4S,Vn.2D,#n |
Vd.4S -> result |
A64 |
uint8x16_t vshrn_high_n_u16( |
r -> Vd.8B a -> Vn.8H 1 <= n <= 8 |
SHRN2 Vd.16B,Vn.8H,#n |
Vd.16B -> result |
A64 |
uint16x8_t vshrn_high_n_u32( |
r -> Vd.4H a -> Vn.4S 1 <= n <= 16 |
SHRN2 Vd.8H,Vn.4S,#n |
Vd.8H -> result |
A64 |
uint32x4_t vshrn_high_n_u64( |
r -> Vd.2S a -> Vn.2D 1 <= n <= 32 |
SHRN2 Vd.4S,Vn.2D,#n |
Vd.4S -> result |
A64 |
Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures |
---|---|---|---|---|
uint8x8_t vqshrun_n_s16( |
a -> Vn.8H 1 <= n <= 8 |
SQSHRUN Vd.8B,Vn.8H,#n |
Vd.8B -> result |
v7/A32/A64 |
uint16x4_t vqshrun_n_s32( |
a -> Vn.4S 1 <= n <= 16 |
SQSHRUN Vd.4H,Vn.4S,#n |
Vd.4H -> result |
v7/A32/A64 |
uint32x2_t vqshrun_n_s64( |
a -> Vn.2D 1 <= n <= 32 |
SQSHRUN Vd.2S,Vn.2D,#n |
Vd.2S -> result |
v7/A32/A64 |
uint8_t vqshrunh_n_s16( |
a -> Hn 1 <= n <= 8 |
SQSHRUN Bd,Hn,#n |
Bd -> result |
A64 |
uint16_t vqshruns_n_s32( |
a -> Sn 1 <= n <= 16 |
SQSHRUN Hd,Sn,#n |
Hd -> result |
A64 |
uint32_t vqshrund_n_s64( |
a -> Dn 1 <= n <= 32 |
SQSHRUN Sd,Dn,#n |
Sd -> result |
A64 |
uint8x16_t vqshrun_high_n_s16( |
r -> Vd.8B a -> Vn.8H 1 <= n <= 8 |
SQSHRUN2 Vd.16B,Vn.8H,#n |
Vd.16B -> result |
A64 |
uint16x8_t vqshrun_high_n_s32( |
r -> Vd.4H a -> Vn.4S 1 <= n <= 16 |
SQSHRUN2 Vd.8H,Vn.4S,#n |
Vd.8H -> result |
A64 |
uint32x4_t vqshrun_high_n_s64( |
r -> Vd.2S a -> Vn.2D 1 <= n <= 32 |
SQSHRUN2 Vd.4S,Vn.2D,#n |
Vd.4S -> result |
A64 |
int8x8_t vqshrn_n_s16( |
a -> Vn.8H 1 <= n <= 8 |
SQSHRN Vd.8B,Vn.8H,#n |
Vd.8B -> result |
v7/A32/A64 |
int16x4_t vqshrn_n_s32( |
a -> Vn.4S 1 <= n <= 16 |
SQSHRN Vd.4H,Vn.4S,#n |
Vd.4H -> result |
v7/A32/A64 |
int32x2_t vqshrn_n_s64( |
a -> Vn.2D 1 <= n <= 32 |
SQSHRN Vd.2S,Vn.2D,#n |
Vd.2S -> result |
v7/A32/A64 |
uint8x8_t vqshrn_n_u16( |
a -> Vn.8H 1 <= n <= 8 |
UQSHRN Vd.8B,Vn.8H,#n |
Vd.8B -> result |
v7/A32/A64 |
uint16x4_t vqshrn_n_u32( |
a -> Vn.4S 1 <= n <= 16 |
UQSHRN Vd.4H,Vn.4S,#n |
Vd.4H -> result |
v7/A32/A64 |
uint32x2_t vqshrn_n_u64( |
a -> Vn.2D 1 <= n <= 32 |
UQSHRN Vd.2S,Vn.2D,#n |
Vd.2S -> result |
v7/A32/A64 |
int8_t vqshrnh_n_s16( |
a -> Hn 1 <= n <= 8 |
SQSHRN Bd,Hn,#n |
Bd -> result |
A64 |
int16_t vqshrns_n_s32( |
a -> Sn 1 <= n <= 16 |
SQSHRN Hd,Sn,#n |
Hd -> result |
A64 |
int32_t vqshrnd_n_s64( |
a -> Dn 1 <= n <= 32 |
SQSHRN Sd,Dn,#n |
Sd -> result |
A64 |
uint8_t vqshrnh_n_u16( |
a -> Hn 1 <= n <= 8 |
UQSHRN Bd,Hn,#n |
Bd -> result |
A64 |
uint16_t vqshrns_n_u32( |
a -> Sn 1 <= n <= 16 |
UQSHRN Hd,Sn,#n |
Hd -> result |
A64 |
uint32_t vqshrnd_n_u64( |
a -> Dn |