summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Walker <paul.walker@arm.com>2023-01-18 16:33:42 +0000
committerTom Stellard <tstellar@redhat.com>2023-02-07 21:37:09 -0800
commit434575c026c81319b393f64047025b54e69e24c2 (patch)
treea4677876c658dea2cd7c51e832240124f25afbe2
parentaa84ee5cc17e19bfd4f77a55428eca01cf9385d7 (diff)
downloadllvmorg-16.0.0-rc2.tar.gz
[SVE] Fix incorrect lowering of predicate permute builtins.llvmorg-16.0.0-rc2
When lowering predicate permute builtins we incorrectly assume only the typically "active" bits for the specified element type play a role with all other bits zero'd. This is not the case because all bits are significant, with the element type specifying how they are grouped: b8 - permute using a block size of 1 bit b16 - permute using a block size of 2 bits b32 - permute using a block size of 4 bits b64 - permute using a block size of 8 bits The affected builtins are svrev, svtrn1, svtrn2, svuzp1, svuzp2, svzip1 and svzip2. This patch adds new intrinsics to support these operations and changes the builtin lowering code to emit them. The b8 case remains unchanged because for that operation the existing intrinsics work as required and their support for other predicate types has been maintained as useful if only as a way to test the correctness of their matching ISD nodes that code generation relies on. Differential Revision: https://reviews.llvm.org/D142065 (cherry picked from commit 26b79ca3fafc525225090646d42837368b3763c3)
-rw-r--r--clang/include/clang/Basic/arm_sve.td41
-rw-r--r--clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rev.c36
-rw-r--r--clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_trn1.c42
-rw-r--r--clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_trn2.c42
-rw-r--r--clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_uzp1.c42
-rw-r--r--clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_uzp2.c42
-rw-r--r--clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_zip1.c42
-rw-r--r--clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_zip2.c42
-rw-r--r--llvm/include/llvm/IR/IntrinsicsAArch64.td31
-rw-r--r--llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td14
-rw-r--r--llvm/lib/Target/AArch64/SVEInstrFormats.td52
-rw-r--r--llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll349
12 files changed, 474 insertions, 301 deletions
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index e910036117b7..e547bbd34b5e 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1249,16 +1249,37 @@ def SVZIP1_BF16 : SInst<"svzip1[_{d}]", "ddd", "b", MergeNone, "aarch64_sve
def SVZIP2_BF16 : SInst<"svzip2[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_zip2">;
}
-def SVREV_B : SInst<"svrev_{d}", "PP", "PcPsPiPl", MergeNone, "aarch64_sve_rev">;
-def SVSEL_B : SInst<"svsel[_b]", "PPPP", "Pc", MergeNone, "aarch64_sve_sel">;
-def SVTRN1_B : SInst<"svtrn1_{d}", "PPP", "PcPsPiPl", MergeNone, "aarch64_sve_trn1">;
-def SVTRN2_B : SInst<"svtrn2_{d}", "PPP", "PcPsPiPl", MergeNone, "aarch64_sve_trn2">;
-def SVPUNPKHI : SInst<"svunpkhi[_b]", "PP", "Pc", MergeNone, "aarch64_sve_punpkhi">;
-def SVPUNPKLO : SInst<"svunpklo[_b]", "PP", "Pc", MergeNone, "aarch64_sve_punpklo">;
-def SVUZP1_B : SInst<"svuzp1_{d}", "PPP", "PcPsPiPl", MergeNone, "aarch64_sve_uzp1">;
-def SVUZP2_B : SInst<"svuzp2_{d}", "PPP", "PcPsPiPl", MergeNone, "aarch64_sve_uzp2">;
-def SVZIP1_B : SInst<"svzip1_{d}", "PPP", "PcPsPiPl", MergeNone, "aarch64_sve_zip1">;
-def SVZIP2_B : SInst<"svzip2_{d}", "PPP", "PcPsPiPl", MergeNone, "aarch64_sve_zip2">;
+def SVREV_B8 : SInst<"svrev_b8", "PP", "Pc", MergeNone, "aarch64_sve_rev">;
+def SVREV_B16 : SInst<"svrev_b16", "PP", "Pc", MergeNone, "aarch64_sve_rev_b16", [IsOverloadNone]>;
+def SVREV_B32 : SInst<"svrev_b32", "PP", "Pc", MergeNone, "aarch64_sve_rev_b32", [IsOverloadNone]>;
+def SVREV_B64 : SInst<"svrev_b64", "PP", "Pc", MergeNone, "aarch64_sve_rev_b64", [IsOverloadNone]>;
+def SVSEL_B : SInst<"svsel[_b]", "PPPP", "Pc", MergeNone, "aarch64_sve_sel">;
+def SVTRN1_B8 : SInst<"svtrn1_b8", "PPP", "Pc", MergeNone, "aarch64_sve_trn1">;
+def SVTRN1_B16 : SInst<"svtrn1_b16", "PPP", "Pc", MergeNone, "aarch64_sve_trn1_b16", [IsOverloadNone]>;
+def SVTRN1_B32 : SInst<"svtrn1_b32", "PPP", "Pc", MergeNone, "aarch64_sve_trn1_b32", [IsOverloadNone]>;
+def SVTRN1_B64 : SInst<"svtrn1_b64", "PPP", "Pc", MergeNone, "aarch64_sve_trn1_b64", [IsOverloadNone]>;
+def SVTRN2_B8 : SInst<"svtrn2_b8", "PPP", "Pc", MergeNone, "aarch64_sve_trn2">;
+def SVTRN2_B16 : SInst<"svtrn2_b16", "PPP", "Pc", MergeNone, "aarch64_sve_trn2_b16", [IsOverloadNone]>;
+def SVTRN2_B32 : SInst<"svtrn2_b32", "PPP", "Pc", MergeNone, "aarch64_sve_trn2_b32", [IsOverloadNone]>;
+def SVTRN2_B64 : SInst<"svtrn2_b64", "PPP", "Pc", MergeNone, "aarch64_sve_trn2_b64", [IsOverloadNone]>;
+def SVPUNPKHI : SInst<"svunpkhi[_b]", "PP", "Pc", MergeNone, "aarch64_sve_punpkhi">;
+def SVPUNPKLO : SInst<"svunpklo[_b]", "PP", "Pc", MergeNone, "aarch64_sve_punpklo">;
+def SVUZP1_B8 : SInst<"svuzp1_b8", "PPP", "Pc", MergeNone, "aarch64_sve_uzp1">;
+def SVUZP1_B16 : SInst<"svuzp1_b16", "PPP", "Pc", MergeNone, "aarch64_sve_uzp1_b16", [IsOverloadNone]>;
+def SVUZP1_B32 : SInst<"svuzp1_b32", "PPP", "Pc", MergeNone, "aarch64_sve_uzp1_b32", [IsOverloadNone]>;
+def SVUZP1_B64 : SInst<"svuzp1_b64", "PPP", "Pc", MergeNone, "aarch64_sve_uzp1_b64", [IsOverloadNone]>;
+def SVUZP2_B8 : SInst<"svuzp2_b8", "PPP", "Pc", MergeNone, "aarch64_sve_uzp2">;
+def SVUZP2_B16 : SInst<"svuzp2_b16", "PPP", "Pc", MergeNone, "aarch64_sve_uzp2_b16", [IsOverloadNone]>;
+def SVUZP2_B32 : SInst<"svuzp2_b32", "PPP", "Pc", MergeNone, "aarch64_sve_uzp2_b32", [IsOverloadNone]>;
+def SVUZP2_B64 : SInst<"svuzp2_b64", "PPP", "Pc", MergeNone, "aarch64_sve_uzp2_b64", [IsOverloadNone]>;
+def SVZIP1_B8 : SInst<"svzip1_b8", "PPP", "Pc", MergeNone, "aarch64_sve_zip1">;
+def SVZIP1_B16 : SInst<"svzip1_b16", "PPP", "Pc", MergeNone, "aarch64_sve_zip1_b16", [IsOverloadNone]>;
+def SVZIP1_B32 : SInst<"svzip1_b32", "PPP", "Pc", MergeNone, "aarch64_sve_zip1_b32", [IsOverloadNone]>;
+def SVZIP1_B64 : SInst<"svzip1_b64", "PPP", "Pc", MergeNone, "aarch64_sve_zip1_b64", [IsOverloadNone]>;
+def SVZIP2_B : SInst<"svzip2_b8", "PPP", "Pc", MergeNone, "aarch64_sve_zip2">;
+def SVZIP2_B16 : SInst<"svzip2_b16", "PPP", "Pc", MergeNone, "aarch64_sve_zip2_b16", [IsOverloadNone]>;
+def SVZIP2_B32 : SInst<"svzip2_b32", "PPP", "Pc", MergeNone, "aarch64_sve_zip2_b32", [IsOverloadNone]>;
+def SVZIP2_B64 : SInst<"svzip2_b64", "PPP", "Pc", MergeNone, "aarch64_sve_zip2_b64", [IsOverloadNone]>;
////////////////////////////////////////////////////////////////////////////////
// Predicate creation
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rev.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rev.c
index ac728eb3ab0c..dbd6149d7899 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rev.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rev.c
@@ -196,17 +196,13 @@ svbool_t test_svrev_b8(svbool_t op)
// CHECK-LABEL: @test_svrev_b16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[OP:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.rev.nxv8i1(<vscale x 8 x i1> [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
-// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b16(<vscale x 16 x i1> [[OP:%.*]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z14test_svrev_b16u10__SVBool_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[OP:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.rev.nxv8i1(<vscale x 8 x i1> [[TMP0]])
-// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
-// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]]
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b16(<vscale x 16 x i1> [[OP:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
svbool_t test_svrev_b16(svbool_t op)
{
@@ -215,17 +211,13 @@ svbool_t test_svrev_b16(svbool_t op)
// CHECK-LABEL: @test_svrev_b32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[OP:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.rev.nxv4i1(<vscale x 4 x i1> [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
-// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b32(<vscale x 16 x i1> [[OP:%.*]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z14test_svrev_b32u10__SVBool_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[OP:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.rev.nxv4i1(<vscale x 4 x i1> [[TMP0]])
-// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
-// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]]
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b32(<vscale x 16 x i1> [[OP:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
svbool_t test_svrev_b32(svbool_t op)
{
@@ -234,17 +226,13 @@ svbool_t test_svrev_b32(svbool_t op)
// CHECK-LABEL: @test_svrev_b64(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[OP:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.rev.nxv2i1(<vscale x 2 x i1> [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
-// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b64(<vscale x 16 x i1> [[OP:%.*]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z14test_svrev_b64u10__SVBool_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[OP:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.rev.nxv2i1(<vscale x 2 x i1> [[TMP0]])
-// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
-// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]]
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b64(<vscale x 16 x i1> [[OP:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
svbool_t test_svrev_b64(svbool_t op)
{
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_trn1.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_trn1.c
index 15998789bc8e..98d37f23a0af 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_trn1.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_trn1.c
@@ -196,19 +196,13 @@ svbool_t test_svtrn1_b8(svbool_t op1, svbool_t op2)
// CHECK-LABEL: @test_svtrn1_b16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[OP1:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[OP2:%.*]])
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.trn1.nxv8i1(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]])
-// CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP2]])
-// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b16(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z15test_svtrn1_b16u10__SVBool_tu10__SVBool_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[OP1:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[OP2:%.*]])
-// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.trn1.nxv8i1(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]])
-// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP2]])
-// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b16(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
svbool_t test_svtrn1_b16(svbool_t op1, svbool_t op2)
{
@@ -217,19 +211,13 @@ svbool_t test_svtrn1_b16(svbool_t op1, svbool_t op2)
// CHECK-LABEL: @test_svtrn1_b32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[OP1:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[OP2:%.*]])
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.trn1.nxv4i1(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]])
-// CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP2]])
-// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b32(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z15test_svtrn1_b32u10__SVBool_tu10__SVBool_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[OP1:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[OP2:%.*]])
-// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.trn1.nxv4i1(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]])
-// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP2]])
-// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b32(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
svbool_t test_svtrn1_b32(svbool_t op1, svbool_t op2)
{
@@ -238,19 +226,13 @@ svbool_t test_svtrn1_b32(svbool_t op1, svbool_t op2)
// CHECK-LABEL: @test_svtrn1_b64(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[OP1:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[OP2:%.*]])
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.trn1.nxv2i1(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]])
-// CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP2]])
-// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b64(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z15test_svtrn1_b64u10__SVBool_tu10__SVBool_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[OP1:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[OP2:%.*]])
-// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.trn1.nxv2i1(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]])
-// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP2]])
-// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b64(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
svbool_t test_svtrn1_b64(svbool_t op1, svbool_t op2)
{
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_trn2.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_trn2.c
index eeb90bef96bb..b228a138bb08 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_trn2.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_trn2.c
@@ -196,19 +196,13 @@ svbool_t test_svtrn2_b8(svbool_t op1, svbool_t op2)
// CHECK-LABEL: @test_svtrn2_b16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[OP1:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[OP2:%.*]])
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.trn2.nxv8i1(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]])
-// CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP2]])
-// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b16(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z15test_svtrn2_b16u10__SVBool_tu10__SVBool_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[OP1:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[OP2:%.*]])
-// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.trn2.nxv8i1(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]])
-// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP2]])
-// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b16(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
svbool_t test_svtrn2_b16(svbool_t op1, svbool_t op2)
{
@@ -217,19 +211,13 @@ svbool_t test_svtrn2_b16(svbool_t op1, svbool_t op2)
// CHECK-LABEL: @test_svtrn2_b32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[OP1:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[OP2:%.*]])
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.trn2.nxv4i1(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]])
-// CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP2]])
-// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b32(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z15test_svtrn2_b32u10__SVBool_tu10__SVBool_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[OP1:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[OP2:%.*]])
-// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.trn2.nxv4i1(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]])
-// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP2]])
-// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b32(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
svbool_t test_svtrn2_b32(svbool_t op1, svbool_t op2)
{
@@ -238,19 +226,13 @@ svbool_t test_svtrn2_b32(svbool_t op1, svbool_t op2)
// CHECK-LABEL: @test_svtrn2_b64(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[OP1:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[OP2:%.*]])
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.trn2.nxv2i1(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]])
-// CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP2]])
-// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b64(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z15test_svtrn2_b64u10__SVBool_tu10__SVBool_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[OP1:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[OP2:%.*]])
-// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.trn2.nxv2i1(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]])
-// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP2]])
-// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b64(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
svbool_t test_svtrn2_b64(svbool_t op1, svbool_t op2)
{
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_uzp1.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_uzp1.c
index de3fcf8881de..3672e566cb18 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_uzp1.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_uzp1.c
@@ -196,19 +196,13 @@ svbool_t test_svuzp1_b8(svbool_t op1, svbool_t op2)
// CHECK-LABEL: @test_svuzp1_b16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[OP1:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[OP2:%.*]])
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.uzp1.nxv8i1(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]])
-// CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP2]])
-// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b16(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z15test_svuzp1_b16u10__SVBool_tu10__SVBool_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[OP1:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[OP2:%.*]])
-// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.uzp1.nxv8i1(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]])
-// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP2]])
-// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b16(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
svbool_t test_svuzp1_b16(svbool_t op1, svbool_t op2)
{
@@ -217,19 +211,13 @@ svbool_t test_svuzp1_b16(svbool_t op1, svbool_t op2)
// CHECK-LABEL: @test_svuzp1_b32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[OP1:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[OP2:%.*]])
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.uzp1.nxv4i1(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]])
-// CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP2]])
-// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b32(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z15test_svuzp1_b32u10__SVBool_tu10__SVBool_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[OP1:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[OP2:%.*]])
-// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.uzp1.nxv4i1(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]])
-// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP2]])
-// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b32(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
svbool_t test_svuzp1_b32(svbool_t op1, svbool_t op2)
{
@@ -238,19 +226,13 @@ svbool_t test_svuzp1_b32(svbool_t op1, svbool_t op2)
// CHECK-LABEL: @test_svuzp1_b64(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[OP1:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[OP2:%.*]])
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.uzp1.nxv2i1(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]])
-// CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP2]])
-// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b64(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z15test_svuzp1_b64u10__SVBool_tu10__SVBool_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[OP1:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[OP2:%.*]])
-// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.uzp1.nxv2i1(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]])
-// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP2]])
-// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b64(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
svbool_t test_svuzp1_b64(svbool_t op1, svbool_t op2)
{
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_uzp2.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_uzp2.c
index 46afb0f8a588..d160f8889805 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_uzp2.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_uzp2.c
@@ -196,19 +196,13 @@ svbool_t test_svuzp2_b8(svbool_t op1, svbool_t op2)
// CHECK-LABEL: @test_svuzp2_b16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[OP1:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[OP2:%.*]])
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.uzp2.nxv8i1(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]])
-// CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP2]])
-// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b16(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z15test_svuzp2_b16u10__SVBool_tu10__SVBool_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[OP1:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[OP2:%.*]])
-// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.uzp2.nxv8i1(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]])
-// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP2]])
-// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b16(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
svbool_t test_svuzp2_b16(svbool_t op1, svbool_t op2)
{
@@ -217,19 +211,13 @@ svbool_t test_svuzp2_b16(svbool_t op1, svbool_t op2)
// CHECK-LABEL: @test_svuzp2_b32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[OP1:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[OP2:%.*]])
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.uzp2.nxv4i1(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]])
-// CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP2]])
-// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b32(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z15test_svuzp2_b32u10__SVBool_tu10__SVBool_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[OP1:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[OP2:%.*]])
-// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.uzp2.nxv4i1(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]])
-// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP2]])
-// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b32(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
svbool_t test_svuzp2_b32(svbool_t op1, svbool_t op2)
{
@@ -238,19 +226,13 @@ svbool_t test_svuzp2_b32(svbool_t op1, svbool_t op2)
// CHECK-LABEL: @test_svuzp2_b64(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[OP1:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[OP2:%.*]])
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.uzp2.nxv2i1(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]])
-// CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP2]])
-// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b64(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z15test_svuzp2_b64u10__SVBool_tu10__SVBool_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[OP1:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[OP2:%.*]])
-// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.uzp2.nxv2i1(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]])
-// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP2]])
-// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b64(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
svbool_t test_svuzp2_b64(svbool_t op1, svbool_t op2)
{
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_zip1.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_zip1.c
index c355cd0447d7..5a66aee828d8 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_zip1.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_zip1.c
@@ -196,19 +196,13 @@ svbool_t test_svzip1_b8(svbool_t op1, svbool_t op2)
// CHECK-LABEL: @test_svzip1_b16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[OP1:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[OP2:%.*]])
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.zip1.nxv8i1(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]])
-// CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP2]])
-// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b16(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z15test_svzip1_b16u10__SVBool_tu10__SVBool_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[OP1:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[OP2:%.*]])
-// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.zip1.nxv8i1(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]])
-// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP2]])
-// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b16(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
svbool_t test_svzip1_b16(svbool_t op1, svbool_t op2)
{
@@ -217,19 +211,13 @@ svbool_t test_svzip1_b16(svbool_t op1, svbool_t op2)
// CHECK-LABEL: @test_svzip1_b32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[OP1:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[OP2:%.*]])
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.zip1.nxv4i1(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]])
-// CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP2]])
-// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b32(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z15test_svzip1_b32u10__SVBool_tu10__SVBool_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[OP1:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[OP2:%.*]])
-// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.zip1.nxv4i1(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]])
-// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP2]])
-// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b32(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
svbool_t test_svzip1_b32(svbool_t op1, svbool_t op2)
{
@@ -238,19 +226,13 @@ svbool_t test_svzip1_b32(svbool_t op1, svbool_t op2)
// CHECK-LABEL: @test_svzip1_b64(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[OP1:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[OP2:%.*]])
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.zip1.nxv2i1(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]])
-// CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP2]])
-// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b64(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z15test_svzip1_b64u10__SVBool_tu10__SVBool_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[OP1:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[OP2:%.*]])
-// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.zip1.nxv2i1(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]])
-// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP2]])
-// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b64(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
svbool_t test_svzip1_b64(svbool_t op1, svbool_t op2)
{
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_zip2.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_zip2.c
index f9be817daf17..bdd89e7a2e8d 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_zip2.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_zip2.c
@@ -196,19 +196,13 @@ svbool_t test_svzip2_b8(svbool_t op1, svbool_t op2)
// CHECK-LABEL: @test_svzip2_b16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[OP1:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[OP2:%.*]])
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.zip2.nxv8i1(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]])
-// CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP2]])
-// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b16(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z15test_svzip2_b16u10__SVBool_tu10__SVBool_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[OP1:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[OP2:%.*]])
-// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.zip2.nxv8i1(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]])
-// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP2]])
-// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b16(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
svbool_t test_svzip2_b16(svbool_t op1, svbool_t op2)
{
@@ -217,19 +211,13 @@ svbool_t test_svzip2_b16(svbool_t op1, svbool_t op2)
// CHECK-LABEL: @test_svzip2_b32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[OP1:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[OP2:%.*]])
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.zip2.nxv4i1(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]])
-// CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP2]])
-// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b32(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z15test_svzip2_b32u10__SVBool_tu10__SVBool_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[OP1:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[OP2:%.*]])
-// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.zip2.nxv4i1(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]])
-// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP2]])
-// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b32(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
svbool_t test_svzip2_b32(svbool_t op1, svbool_t op2)
{
@@ -238,19 +226,13 @@ svbool_t test_svzip2_b32(svbool_t op1, svbool_t op2)
// CHECK-LABEL: @test_svzip2_b64(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[OP1:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[OP2:%.*]])
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.zip2.nxv2i1(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]])
-// CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP2]])
-// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b64(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z15test_svzip2_b64u10__SVBool_tu10__SVBool_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[OP1:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[OP2:%.*]])
-// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.zip2.nxv2i1(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]])
-// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP2]])
-// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b64(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
//
svbool_t test_svzip2_b64(svbool_t op1, svbool_t op2)
{
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index b1f85563195f..92a198befbe4 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -1391,6 +1391,16 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
+class AdvSIMD_SVE_2SVBoolArg_Intrinsic
+ : DefaultAttrsIntrinsic<[llvm_nxv16i1_ty],
+ [llvm_nxv16i1_ty],
+ [IntrNoMem]>;
+
+class AdvSIMD_SVE_3SVBoolArg_Intrinsic
+ : DefaultAttrsIntrinsic<[llvm_nxv16i1_ty],
+ [llvm_nxv16i1_ty, llvm_nxv16i1_ty],
+ [IntrNoMem]>;
+
class AdvSIMD_SVE_Reduce_Intrinsic
: DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
@@ -1836,22 +1846,43 @@ def int_aarch64_sve_sel : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_lasta : AdvSIMD_SVE_Reduce_Intrinsic;
def int_aarch64_sve_lastb : AdvSIMD_SVE_Reduce_Intrinsic;
def int_aarch64_sve_rev : AdvSIMD_1VectorArg_Intrinsic;
+def int_aarch64_sve_rev_b16 : AdvSIMD_SVE_2SVBoolArg_Intrinsic;
+def int_aarch64_sve_rev_b32 : AdvSIMD_SVE_2SVBoolArg_Intrinsic;
+def int_aarch64_sve_rev_b64 : AdvSIMD_SVE_2SVBoolArg_Intrinsic;
def int_aarch64_sve_splice : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_sunpkhi : AdvSIMD_SVE_Unpack_Intrinsic;
def int_aarch64_sve_sunpklo : AdvSIMD_SVE_Unpack_Intrinsic;
def int_aarch64_sve_tbl : AdvSIMD_SVE_TBL_Intrinsic;
def int_aarch64_sve_trn1 : AdvSIMD_2VectorArg_Intrinsic;
+def int_aarch64_sve_trn1_b16 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
+def int_aarch64_sve_trn1_b32 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
+def int_aarch64_sve_trn1_b64 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
def int_aarch64_sve_trn2 : AdvSIMD_2VectorArg_Intrinsic;
+def int_aarch64_sve_trn2_b16 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
+def int_aarch64_sve_trn2_b32 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
+def int_aarch64_sve_trn2_b64 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
def int_aarch64_sve_trn1q : AdvSIMD_2VectorArg_Intrinsic;
def int_aarch64_sve_trn2q : AdvSIMD_2VectorArg_Intrinsic;
def int_aarch64_sve_uunpkhi : AdvSIMD_SVE_Unpack_Intrinsic;
def int_aarch64_sve_uunpklo : AdvSIMD_SVE_Unpack_Intrinsic;
def int_aarch64_sve_uzp1 : AdvSIMD_2VectorArg_Intrinsic;
+def int_aarch64_sve_uzp1_b16 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
+def int_aarch64_sve_uzp1_b32 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
+def int_aarch64_sve_uzp1_b64 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
def int_aarch64_sve_uzp2 : AdvSIMD_2VectorArg_Intrinsic;
+def int_aarch64_sve_uzp2_b16 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
+def int_aarch64_sve_uzp2_b32 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
+def int_aarch64_sve_uzp2_b64 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
def int_aarch64_sve_uzp1q : AdvSIMD_2VectorArg_Intrinsic;
def int_aarch64_sve_uzp2q : AdvSIMD_2VectorArg_Intrinsic;
def int_aarch64_sve_zip1 : AdvSIMD_2VectorArg_Intrinsic;
+def int_aarch64_sve_zip1_b16 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
+def int_aarch64_sve_zip1_b32 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
+def int_aarch64_sve_zip1_b64 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
def int_aarch64_sve_zip2 : AdvSIMD_2VectorArg_Intrinsic;
+def int_aarch64_sve_zip2_b16 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
+def int_aarch64_sve_zip2_b32 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
+def int_aarch64_sve_zip2_b64 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
def int_aarch64_sve_zip1q : AdvSIMD_2VectorArg_Intrinsic;
def int_aarch64_sve_zip2q : AdvSIMD_2VectorArg_Intrinsic;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 6a42c4ff31dc..12dc30a2818b 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -839,7 +839,7 @@ let Predicates = [HasSVEorSME] in {
defm REVH_ZPmZ : sve_int_perm_rev_revh<"revh", AArch64revh_mt>;
defm REVW_ZPmZ : sve_int_perm_rev_revw<"revw", AArch64revw_mt>;
- defm REV_PP : sve_int_perm_reverse_p<"rev", vector_reverse>;
+ defm REV_PP : sve_int_perm_reverse_p<"rev", vector_reverse, int_aarch64_sve_rev_b16, int_aarch64_sve_rev_b32, int_aarch64_sve_rev_b64>;
defm REV_ZZ : sve_int_perm_reverse_z<"rev", vector_reverse>;
defm SUNPKLO_ZZ : sve_int_perm_unpk<0b00, "sunpklo", AArch64sunpklo>;
@@ -1672,12 +1672,12 @@ let Predicates = [HasSVEorSME] in {
defm TRN1_ZZZ : sve_int_perm_bin_perm_zz<0b100, "trn1", AArch64trn1>;
defm TRN2_ZZZ : sve_int_perm_bin_perm_zz<0b101, "trn2", AArch64trn2>;
- defm ZIP1_PPP : sve_int_perm_bin_perm_pp<0b000, "zip1", AArch64zip1>;
- defm ZIP2_PPP : sve_int_perm_bin_perm_pp<0b001, "zip2", AArch64zip2>;
- defm UZP1_PPP : sve_int_perm_bin_perm_pp<0b010, "uzp1", AArch64uzp1>;
- defm UZP2_PPP : sve_int_perm_bin_perm_pp<0b011, "uzp2", AArch64uzp2>;
- defm TRN1_PPP : sve_int_perm_bin_perm_pp<0b100, "trn1", AArch64trn1>;
- defm TRN2_PPP : sve_int_perm_bin_perm_pp<0b101, "trn2", AArch64trn2>;
+ defm ZIP1_PPP : sve_int_perm_bin_perm_pp<0b000, "zip1", AArch64zip1, int_aarch64_sve_zip1_b16, int_aarch64_sve_zip1_b32, int_aarch64_sve_zip1_b64>;
+ defm ZIP2_PPP : sve_int_perm_bin_perm_pp<0b001, "zip2", AArch64zip2, int_aarch64_sve_zip2_b16, int_aarch64_sve_zip2_b32, int_aarch64_sve_zip2_b64>;
+ defm UZP1_PPP : sve_int_perm_bin_perm_pp<0b010, "uzp1", AArch64uzp1, int_aarch64_sve_uzp1_b16, int_aarch64_sve_uzp1_b32, int_aarch64_sve_uzp1_b64>;
+ defm UZP2_PPP : sve_int_perm_bin_perm_pp<0b011, "uzp2", AArch64uzp2, int_aarch64_sve_uzp2_b16, int_aarch64_sve_uzp2_b32, int_aarch64_sve_uzp2_b64>;
+ defm TRN1_PPP : sve_int_perm_bin_perm_pp<0b100, "trn1", AArch64trn1, int_aarch64_sve_trn1_b16, int_aarch64_sve_trn1_b32, int_aarch64_sve_trn1_b64>;
+ defm TRN2_PPP : sve_int_perm_bin_perm_pp<0b101, "trn2", AArch64trn2, int_aarch64_sve_trn2_b16, int_aarch64_sve_trn2_b32, int_aarch64_sve_trn2_b64>;
// Extract lo/hi halves of legal predicate types.
def : Pat<(nxv1i1 (extract_subvector (nxv2i1 PPR:$Ps), (i64 0))),
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index cef8d41218e8..1f0626191c0d 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -1448,11 +1448,12 @@ multiclass sve_int_perm_reverse_z<string asm, SDPatternOperator op> {
def : SVE_1_Op_Pat<nxv8bf16, op, nxv8bf16, !cast<Instruction>(NAME # _H)>;
}
-class sve_int_perm_reverse_p<bits<2> sz8_64, string asm, PPRRegOp pprty>
+class sve_int_perm_reverse_p<bits<2> sz8_64, string asm, PPRRegOp pprty,
+ SDPatternOperator op>
: I<(outs pprty:$Pd), (ins pprty:$Pn),
asm, "\t$Pd, $Pn",
"",
- []>, Sched<[]> {
+ [(set nxv16i1:$Pd, (op nxv16i1:$Pn))]>, Sched<[]> {
bits<4> Pd;
bits<4> Pn;
let Inst{31-24} = 0b00000101;
@@ -1463,16 +1464,18 @@ class sve_int_perm_reverse_p<bits<2> sz8_64, string asm, PPRRegOp pprty>
let Inst{3-0} = Pd;
}
-multiclass sve_int_perm_reverse_p<string asm, SDPatternOperator op> {
- def _B : sve_int_perm_reverse_p<0b00, asm, PPR8>;
- def _H : sve_int_perm_reverse_p<0b01, asm, PPR16>;
- def _S : sve_int_perm_reverse_p<0b10, asm, PPR32>;
- def _D : sve_int_perm_reverse_p<0b11, asm, PPR64>;
+multiclass sve_int_perm_reverse_p<string asm, SDPatternOperator ir_op,
+ SDPatternOperator op_b16,
+ SDPatternOperator op_b32,
+ SDPatternOperator op_b64> {
+ def _B : sve_int_perm_reverse_p<0b00, asm, PPR8, ir_op>;
+ def _H : sve_int_perm_reverse_p<0b01, asm, PPR16, op_b16>;
+ def _S : sve_int_perm_reverse_p<0b10, asm, PPR32, op_b32>;
+ def _D : sve_int_perm_reverse_p<0b11, asm, PPR64, op_b64>;
- def : SVE_1_Op_Pat<nxv16i1, op, nxv16i1, !cast<Instruction>(NAME # _B)>;
- def : SVE_1_Op_Pat<nxv8i1, op, nxv8i1, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_Pat<nxv4i1, op, nxv4i1, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_Pat<nxv2i1, op, nxv2i1, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Pat<nxv8i1, ir_op, nxv8i1, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Pat<nxv4i1, ir_op, nxv4i1, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Pat<nxv2i1, ir_op, nxv2i1, !cast<Instruction>(NAME # _D)>;
}
class sve_int_perm_unpk<bits<2> sz16_64, bits<2> opc, string asm,
@@ -6327,10 +6330,11 @@ multiclass sve_mem_p_spill<string asm> {
//===----------------------------------------------------------------------===//
class sve_int_perm_bin_perm_pp<bits<3> opc, bits<2> sz8_64, string asm,
- PPRRegOp pprty>
+ PPRRegOp pprty, SDPatternOperator op>
: I<(outs pprty:$Pd), (ins pprty:$Pn, pprty:$Pm),
asm, "\t$Pd, $Pn, $Pm",
- "", []>, Sched<[]> {
+ "",
+ [(set nxv16i1:$Pd, (op nxv16i1:$Pn, nxv16i1:$Pm))]>, Sched<[]> {
bits<4> Pd;
bits<4> Pm;
bits<4> Pn;
@@ -6347,16 +6351,18 @@ class sve_int_perm_bin_perm_pp<bits<3> opc, bits<2> sz8_64, string asm,
}
multiclass sve_int_perm_bin_perm_pp<bits<3> opc, string asm,
- SDPatternOperator op> {
- def _B : sve_int_perm_bin_perm_pp<opc, 0b00, asm, PPR8>;
- def _H : sve_int_perm_bin_perm_pp<opc, 0b01, asm, PPR16>;
- def _S : sve_int_perm_bin_perm_pp<opc, 0b10, asm, PPR32>;
- def _D : sve_int_perm_bin_perm_pp<opc, 0b11, asm, PPR64>;
-
- def : SVE_2_Op_Pat<nxv16i1, op, nxv16i1, nxv16i1, !cast<Instruction>(NAME # _B)>;
- def : SVE_2_Op_Pat<nxv8i1, op, nxv8i1, nxv8i1, !cast<Instruction>(NAME # _H)>;
- def : SVE_2_Op_Pat<nxv4i1, op, nxv4i1, nxv4i1, !cast<Instruction>(NAME # _S)>;
- def : SVE_2_Op_Pat<nxv2i1, op, nxv2i1, nxv2i1, !cast<Instruction>(NAME # _D)>;
+ SDPatternOperator ir_op,
+ SDPatternOperator op_b16,
+ SDPatternOperator op_b32,
+ SDPatternOperator op_b64> {
+ def _B : sve_int_perm_bin_perm_pp<opc, 0b00, asm, PPR8, ir_op>;
+ def _H : sve_int_perm_bin_perm_pp<opc, 0b01, asm, PPR16, op_b16>;
+ def _S : sve_int_perm_bin_perm_pp<opc, 0b10, asm, PPR32, op_b32>;
+ def _D : sve_int_perm_bin_perm_pp<opc, 0b11, asm, PPR64, op_b64>;
+
+ def : SVE_2_Op_Pat<nxv8i1, ir_op, nxv8i1, nxv8i1, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4i1, ir_op, nxv4i1, nxv4i1, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2i1, ir_op, nxv2i1, nxv2i1, !cast<Instruction>(NAME # _D)>;
}
class sve_int_perm_punpk<bit opc, string asm>
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
index 8dc0ab649b4d..411f92fbb152 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
@@ -942,8 +942,8 @@ define <vscale x 2 x double> @compact_f64(<vscale x 2 x i1> %pg, <vscale x 2 x d
; REV
;
-define <vscale x 16 x i1> @rev_b8( <vscale x 16 x i1> %a) {
-; CHECK-LABEL: rev_b8:
+define <vscale x 16 x i1> @rev_nxv16i1(<vscale x 16 x i1> %a) {
+; CHECK-LABEL: rev_nxv16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: rev p0.b, p0.b
; CHECK-NEXT: ret
@@ -951,8 +951,8 @@ define <vscale x 16 x i1> @rev_b8( <vscale x 16 x i1> %a) {
ret <vscale x 16 x i1> %res
}
-define <vscale x 8 x i1> @rev_b16(<vscale x 8 x i1> %a) {
-; CHECK-LABEL: rev_b16:
+define <vscale x 8 x i1> @rev_nxv8i1(<vscale x 8 x i1> %a) {
+; CHECK-LABEL: rev_nxv8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: rev p0.h, p0.h
; CHECK-NEXT: ret
@@ -960,8 +960,8 @@ define <vscale x 8 x i1> @rev_b16(<vscale x 8 x i1> %a) {
ret <vscale x 8 x i1> %res
}
-define <vscale x 4 x i1> @rev_b32(<vscale x 4 x i1> %a) {
-; CHECK-LABEL: rev_b32:
+define <vscale x 4 x i1> @rev_nxv4i1(<vscale x 4 x i1> %a) {
+; CHECK-LABEL: rev_nxv4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: rev p0.s, p0.s
; CHECK-NEXT: ret
@@ -969,8 +969,8 @@ define <vscale x 4 x i1> @rev_b32(<vscale x 4 x i1> %a) {
ret <vscale x 4 x i1> %res
}
-define <vscale x 2 x i1> @rev_b64(<vscale x 2 x i1> %a) {
-; CHECK-LABEL: rev_b64:
+define <vscale x 2 x i1> @rev_nxv2i1(<vscale x 2 x i1> %a) {
+; CHECK-LABEL: rev_nxv2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: rev p0.d, p0.d
; CHECK-NEXT: ret
@@ -978,7 +978,34 @@ define <vscale x 2 x i1> @rev_b64(<vscale x 2 x i1> %a) {
ret <vscale x 2 x i1> %res
}
-define <vscale x 16 x i8> @rev_i8( <vscale x 16 x i8> %a) {
+define <vscale x 16 x i1> @rev_b16(<vscale x 16 x i1> %a) {
+; CHECK-LABEL: rev_b16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rev p0.h, p0.h
+; CHECK-NEXT: ret
+ %res = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b16(<vscale x 16 x i1> %a)
+ ret <vscale x 16 x i1> %res
+}
+
+define <vscale x 16 x i1> @rev_b32(<vscale x 16 x i1> %a) {
+; CHECK-LABEL: rev_b32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rev p0.s, p0.s
+; CHECK-NEXT: ret
+ %res = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b32(<vscale x 16 x i1> %a)
+ ret <vscale x 16 x i1> %res
+}
+
+define <vscale x 16 x i1> @rev_b64(<vscale x 16 x i1> %a) {
+; CHECK-LABEL: rev_b64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rev p0.d, p0.d
+; CHECK-NEXT: ret
+ %res = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b64(<vscale x 16 x i1> %a)
+ ret <vscale x 16 x i1> %res
+}
+
+define <vscale x 16 x i8> @rev_i8(<vscale x 16 x i8> %a) {
; CHECK-LABEL: rev_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: rev z0.b, z0.b
@@ -1354,8 +1381,8 @@ define <vscale x 2 x i64> @uunpklo_i64(<vscale x 4 x i32> %a) {
; TRN1
;
-define <vscale x 16 x i1> @trn1_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
-; CHECK-LABEL: trn1_b8:
+define <vscale x 16 x i1> @trn1_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: trn1_nxv16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: trn1 p0.b, p0.b, p1.b
; CHECK-NEXT: ret
@@ -1364,8 +1391,8 @@ define <vscale x 16 x i1> @trn1_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
ret <vscale x 16 x i1> %out
}
-define <vscale x 8 x i1> @trn1_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
-; CHECK-LABEL: trn1_b16:
+define <vscale x 8 x i1> @trn1_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
+; CHECK-LABEL: trn1_nxv8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: trn1 p0.h, p0.h, p1.h
; CHECK-NEXT: ret
@@ -1374,8 +1401,8 @@ define <vscale x 8 x i1> @trn1_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
ret <vscale x 8 x i1> %out
}
-define <vscale x 4 x i1> @trn1_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
-; CHECK-LABEL: trn1_b32:
+define <vscale x 4 x i1> @trn1_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
+; CHECK-LABEL: trn1_nxv4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: trn1 p0.s, p0.s, p1.s
; CHECK-NEXT: ret
@@ -1384,8 +1411,8 @@ define <vscale x 4 x i1> @trn1_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
ret <vscale x 4 x i1> %out
}
-define <vscale x 2 x i1> @trn1_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
-; CHECK-LABEL: trn1_b64:
+define <vscale x 2 x i1> @trn1_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
+; CHECK-LABEL: trn1_nxv2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: trn1 p0.d, p0.d, p1.d
; CHECK-NEXT: ret
@@ -1394,6 +1421,36 @@ define <vscale x 2 x i1> @trn1_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
ret <vscale x 2 x i1> %out
}
+define <vscale x 16 x i1> @trn1_b16(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: trn1_b16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: trn1 p0.h, p0.h, p1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b16(<vscale x 16 x i1> %a,
+ <vscale x 16 x i1> %b)
+ ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 16 x i1> @trn1_b32(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: trn1_b32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: trn1 p0.s, p0.s, p1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b32(<vscale x 16 x i1> %a,
+ <vscale x 16 x i1> %b)
+ ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 16 x i1> @trn1_b64(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: trn1_b64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: trn1 p0.d, p0.d, p1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b64(<vscale x 16 x i1> %a,
+ <vscale x 16 x i1> %b)
+ ret <vscale x 16 x i1> %out
+}
+
define <vscale x 16 x i8> @trn1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: trn1_i8:
; CHECK: // %bb.0:
@@ -1508,8 +1565,8 @@ define <vscale x 2 x double> @trn1_f64(<vscale x 2 x double> %a, <vscale x 2 x d
; TRN2
;
-define <vscale x 16 x i1> @trn2_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
-; CHECK-LABEL: trn2_b8:
+define <vscale x 16 x i1> @trn2_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: trn2_nxv16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: trn2 p0.b, p0.b, p1.b
; CHECK-NEXT: ret
@@ -1518,8 +1575,8 @@ define <vscale x 16 x i1> @trn2_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
ret <vscale x 16 x i1> %out
}
-define <vscale x 8 x i1> @trn2_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
-; CHECK-LABEL: trn2_b16:
+define <vscale x 8 x i1> @trn2_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
+; CHECK-LABEL: trn2_nxv8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: trn2 p0.h, p0.h, p1.h
; CHECK-NEXT: ret
@@ -1528,8 +1585,8 @@ define <vscale x 8 x i1> @trn2_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
ret <vscale x 8 x i1> %out
}
-define <vscale x 4 x i1> @trn2_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
-; CHECK-LABEL: trn2_b32:
+define <vscale x 4 x i1> @trn2_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
+; CHECK-LABEL: trn2_nxv4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: trn2 p0.s, p0.s, p1.s
; CHECK-NEXT: ret
@@ -1538,8 +1595,8 @@ define <vscale x 4 x i1> @trn2_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
ret <vscale x 4 x i1> %out
}
-define <vscale x 2 x i1> @trn2_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
-; CHECK-LABEL: trn2_b64:
+define <vscale x 2 x i1> @trn2_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
+; CHECK-LABEL: trn2_nxv2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: trn2 p0.d, p0.d, p1.d
; CHECK-NEXT: ret
@@ -1548,6 +1605,36 @@ define <vscale x 2 x i1> @trn2_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
ret <vscale x 2 x i1> %out
}
+define <vscale x 16 x i1> @trn2_b16(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: trn2_b16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: trn2 p0.h, p0.h, p1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b16(<vscale x 16 x i1> %a,
+ <vscale x 16 x i1> %b)
+ ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 16 x i1> @trn2_b32(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: trn2_b32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: trn2 p0.s, p0.s, p1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b32(<vscale x 16 x i1> %a,
+ <vscale x 16 x i1> %b)
+ ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 16 x i1> @trn2_b64(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: trn2_b64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: trn2 p0.d, p0.d, p1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b64(<vscale x 16 x i1> %a,
+ <vscale x 16 x i1> %b)
+ ret <vscale x 16 x i1> %out
+}
+
define <vscale x 16 x i8> @trn2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: trn2_i8:
; CHECK: // %bb.0:
@@ -1662,8 +1749,8 @@ define <vscale x 2 x double> @trn2_f64(<vscale x 2 x double> %a, <vscale x 2 x d
; UZP1
;
-define <vscale x 16 x i1> @uzp1_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
-; CHECK-LABEL: uzp1_b8:
+define <vscale x 16 x i1> @uzp1_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: uzp1_nxv16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: uzp1 p0.b, p0.b, p1.b
; CHECK-NEXT: ret
@@ -1672,8 +1759,8 @@ define <vscale x 16 x i1> @uzp1_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
ret <vscale x 16 x i1> %out
}
-define <vscale x 8 x i1> @uzp1_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
-; CHECK-LABEL: uzp1_b16:
+define <vscale x 8 x i1> @uzp1_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
+; CHECK-LABEL: uzp1_nxv8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h
; CHECK-NEXT: ret
@@ -1682,8 +1769,8 @@ define <vscale x 8 x i1> @uzp1_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
ret <vscale x 8 x i1> %out
}
-define <vscale x 4 x i1> @uzp1_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
-; CHECK-LABEL: uzp1_b32:
+define <vscale x 4 x i1> @uzp1_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
+; CHECK-LABEL: uzp1_nxv4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: uzp1 p0.s, p0.s, p1.s
; CHECK-NEXT: ret
@@ -1692,8 +1779,8 @@ define <vscale x 4 x i1> @uzp1_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
ret <vscale x 4 x i1> %out
}
-define <vscale x 2 x i1> @uzp1_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
-; CHECK-LABEL: uzp1_b64:
+define <vscale x 2 x i1> @uzp1_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
+; CHECK-LABEL: uzp1_nxv2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: uzp1 p0.d, p0.d, p1.d
; CHECK-NEXT: ret
@@ -1702,6 +1789,36 @@ define <vscale x 2 x i1> @uzp1_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
ret <vscale x 2 x i1> %out
}
+define <vscale x 16 x i1> @uzp1_b16(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: uzp1_b16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b16(<vscale x 16 x i1> %a,
+ <vscale x 16 x i1> %b)
+ ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 16 x i1> @uzp1_b32(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: uzp1_b32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzp1 p0.s, p0.s, p1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b32(<vscale x 16 x i1> %a,
+ <vscale x 16 x i1> %b)
+ ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 16 x i1> @uzp1_b64(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: uzp1_b64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzp1 p0.d, p0.d, p1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b64(<vscale x 16 x i1> %a,
+ <vscale x 16 x i1> %b)
+ ret <vscale x 16 x i1> %out
+}
+
define <vscale x 16 x i8> @uzp1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: uzp1_i8:
; CHECK: // %bb.0:
@@ -1816,8 +1933,8 @@ define <vscale x 2 x double> @uzp1_f64(<vscale x 2 x double> %a, <vscale x 2 x d
; UZP2
;
-define <vscale x 16 x i1> @uzp2_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
-; CHECK-LABEL: uzp2_b8:
+define <vscale x 16 x i1> @uzp2_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: uzp2_nxv16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: uzp2 p0.b, p0.b, p1.b
; CHECK-NEXT: ret
@@ -1826,8 +1943,8 @@ define <vscale x 16 x i1> @uzp2_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
ret <vscale x 16 x i1> %out
}
-define <vscale x 8 x i1> @uzp2_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
-; CHECK-LABEL: uzp2_b16:
+define <vscale x 8 x i1> @uzp2_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
+; CHECK-LABEL: uzp2_nxv8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: uzp2 p0.h, p0.h, p1.h
; CHECK-NEXT: ret
@@ -1836,8 +1953,8 @@ define <vscale x 8 x i1> @uzp2_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
ret <vscale x 8 x i1> %out
}
-define <vscale x 4 x i1> @uzp2_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
-; CHECK-LABEL: uzp2_b32:
+define <vscale x 4 x i1> @uzp2_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
+; CHECK-LABEL: uzp2_nxv4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: uzp2 p0.s, p0.s, p1.s
; CHECK-NEXT: ret
@@ -1846,8 +1963,8 @@ define <vscale x 4 x i1> @uzp2_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
ret <vscale x 4 x i1> %out
}
-define <vscale x 2 x i1> @uzp2_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
-; CHECK-LABEL: uzp2_b64:
+define <vscale x 2 x i1> @uzp2_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
+; CHECK-LABEL: uzp2_nxv2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: uzp2 p0.d, p0.d, p1.d
; CHECK-NEXT: ret
@@ -1856,6 +1973,36 @@ define <vscale x 2 x i1> @uzp2_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
ret <vscale x 2 x i1> %out
}
+define <vscale x 16 x i1> @uzp2_b16(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: uzp2_b16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzp2 p0.h, p0.h, p1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b16(<vscale x 16 x i1> %a,
+ <vscale x 16 x i1> %b)
+ ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 16 x i1> @uzp2_b32(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: uzp2_b32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzp2 p0.s, p0.s, p1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b32(<vscale x 16 x i1> %a,
+ <vscale x 16 x i1> %b)
+ ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 16 x i1> @uzp2_b64(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: uzp2_b64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzp2 p0.d, p0.d, p1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b64(<vscale x 16 x i1> %a,
+ <vscale x 16 x i1> %b)
+ ret <vscale x 16 x i1> %out
+}
+
define <vscale x 16 x i8> @uzp2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: uzp2_i8:
; CHECK: // %bb.0:
@@ -1970,8 +2117,8 @@ define <vscale x 2 x double> @uzp2_f64(<vscale x 2 x double> %a, <vscale x 2 x d
; ZIP1
;
-define <vscale x 16 x i1> @zip1_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
-; CHECK-LABEL: zip1_b8:
+define <vscale x 16 x i1> @zip1_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: zip1_nxv16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: zip1 p0.b, p0.b, p1.b
; CHECK-NEXT: ret
@@ -1980,8 +2127,8 @@ define <vscale x 16 x i1> @zip1_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
ret <vscale x 16 x i1> %out
}
-define <vscale x 8 x i1> @zip1_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
-; CHECK-LABEL: zip1_b16:
+define <vscale x 8 x i1> @zip1_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
+; CHECK-LABEL: zip1_nxv8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: zip1 p0.h, p0.h, p1.h
; CHECK-NEXT: ret
@@ -1990,8 +2137,8 @@ define <vscale x 8 x i1> @zip1_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
ret <vscale x 8 x i1> %out
}
-define <vscale x 4 x i1> @zip1_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
-; CHECK-LABEL: zip1_b32:
+define <vscale x 4 x i1> @zip1_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
+; CHECK-LABEL: zip1_nxv4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: zip1 p0.s, p0.s, p1.s
; CHECK-NEXT: ret
@@ -2000,8 +2147,8 @@ define <vscale x 4 x i1> @zip1_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
ret <vscale x 4 x i1> %out
}
-define <vscale x 2 x i1> @zip1_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
-; CHECK-LABEL: zip1_b64:
+define <vscale x 2 x i1> @zip1_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
+; CHECK-LABEL: zip1_nxv2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: zip1 p0.d, p0.d, p1.d
; CHECK-NEXT: ret
@@ -2010,6 +2157,36 @@ define <vscale x 2 x i1> @zip1_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
ret <vscale x 2 x i1> %out
}
+define <vscale x 16 x i1> @zip1_b16(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: zip1_b16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: zip1 p0.h, p0.h, p1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b16(<vscale x 16 x i1> %a,
+ <vscale x 16 x i1> %b)
+ ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 16 x i1> @zip1_b32(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: zip1_b32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: zip1 p0.s, p0.s, p1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b32(<vscale x 16 x i1> %a,
+ <vscale x 16 x i1> %b)
+ ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 16 x i1> @zip1_b64(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: zip1_b64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: zip1 p0.d, p0.d, p1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b64(<vscale x 16 x i1> %a,
+ <vscale x 16 x i1> %b)
+ ret <vscale x 16 x i1> %out
+}
+
define <vscale x 16 x i8> @zip1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: zip1_i8:
; CHECK: // %bb.0:
@@ -2124,8 +2301,8 @@ define <vscale x 2 x double> @zip1_f64(<vscale x 2 x double> %a, <vscale x 2 x d
; ZIP2
;
-define <vscale x 16 x i1> @zip2_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
-; CHECK-LABEL: zip2_b8:
+define <vscale x 16 x i1> @zip2_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: zip2_nxv16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: zip2 p0.b, p0.b, p1.b
; CHECK-NEXT: ret
@@ -2134,8 +2311,8 @@ define <vscale x 16 x i1> @zip2_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
ret <vscale x 16 x i1> %out
}
-define <vscale x 8 x i1> @zip2_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
-; CHECK-LABEL: zip2_b16:
+define <vscale x 8 x i1> @zip2_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
+; CHECK-LABEL: zip2_nxv8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: zip2 p0.h, p0.h, p1.h
; CHECK-NEXT: ret
@@ -2144,8 +2321,8 @@ define <vscale x 8 x i1> @zip2_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
ret <vscale x 8 x i1> %out
}
-define <vscale x 4 x i1> @zip2_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
-; CHECK-LABEL: zip2_b32:
+define <vscale x 4 x i1> @zip2_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
+; CHECK-LABEL: zip2_nxv4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
; CHECK-NEXT: ret
@@ -2154,8 +2331,8 @@ define <vscale x 4 x i1> @zip2_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
ret <vscale x 4 x i1> %out
}
-define <vscale x 2 x i1> @zip2_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
-; CHECK-LABEL: zip2_b64:
+define <vscale x 2 x i1> @zip2_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
+; CHECK-LABEL: zip2_nxv2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: zip2 p0.d, p0.d, p1.d
; CHECK-NEXT: ret
@@ -2164,6 +2341,36 @@ define <vscale x 2 x i1> @zip2_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
ret <vscale x 2 x i1> %out
}
+define <vscale x 16 x i1> @zip2_b16(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: zip2_b16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: zip2 p0.h, p0.h, p1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b16(<vscale x 16 x i1> %a,
+ <vscale x 16 x i1> %b)
+ ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 16 x i1> @zip2_b32(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: zip2_b32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b32(<vscale x 16 x i1> %a,
+ <vscale x 16 x i1> %b)
+ ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 16 x i1> @zip2_b64(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: zip2_b64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: zip2 p0.d, p0.d, p1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b64(<vscale x 16 x i1> %a,
+ <vscale x 16 x i1> %b)
+ ret <vscale x 16 x i1> %out
+}
+
define <vscale x 16 x i8> @zip2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: zip2_i8:
; CHECK: // %bb.0:
@@ -2366,6 +2573,10 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.rev.nxv8f16(<vscale x 8 x half>)
declare <vscale x 4 x float> @llvm.aarch64.sve.rev.nxv4f32(<vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.rev.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.rev.b16(<vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.rev.b32(<vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.rev.b64(<vscale x 16 x i1>)
+
declare <vscale x 16 x i8> @llvm.aarch64.sve.splice.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.splice.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.splice.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
@@ -2416,6 +2627,10 @@ declare <vscale x 2 x float> @llvm.aarch64.sve.trn1.nxv2f32(<vscale x 2 x float>
declare <vscale x 4 x float> @llvm.aarch64.sve.trn1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.trn1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b16(<vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b32(<vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b64(<vscale x 16 x i1>, <vscale x 16 x i1>)
+
declare <vscale x 16 x i1> @llvm.aarch64.sve.trn2.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
declare <vscale x 8 x i1> @llvm.aarch64.sve.trn2.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
declare <vscale x 4 x i1> @llvm.aarch64.sve.trn2.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
@@ -2432,6 +2647,10 @@ declare <vscale x 2 x float> @llvm.aarch64.sve.trn2.nxv2f32(<vscale x 2 x float>
declare <vscale x 4 x float> @llvm.aarch64.sve.trn2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.trn2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b16(<vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b32(<vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b64(<vscale x 16 x i1>, <vscale x 16 x i1>)
+
declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
declare <vscale x 8 x i1> @llvm.aarch64.sve.uzp1.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
declare <vscale x 4 x i1> @llvm.aarch64.sve.uzp1.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
@@ -2448,6 +2667,10 @@ declare <vscale x 2 x float> @llvm.aarch64.sve.uzp1.nxv2f32(<vscale x 2 x float>
declare <vscale x 4 x float> @llvm.aarch64.sve.uzp1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.uzp1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b16(<vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b32(<vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b64(<vscale x 16 x i1>, <vscale x 16 x i1>)
+
declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
declare <vscale x 8 x i1> @llvm.aarch64.sve.uzp2.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
declare <vscale x 4 x i1> @llvm.aarch64.sve.uzp2.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
@@ -2464,6 +2687,10 @@ declare <vscale x 2 x float> @llvm.aarch64.sve.uzp2.nxv2f32(<vscale x 2 x float>
declare <vscale x 4 x float> @llvm.aarch64.sve.uzp2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.uzp2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b16(<vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b32(<vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b64(<vscale x 16 x i1>, <vscale x 16 x i1>)
+
declare <vscale x 16 x i1> @llvm.aarch64.sve.zip1.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
declare <vscale x 8 x i1> @llvm.aarch64.sve.zip1.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
declare <vscale x 4 x i1> @llvm.aarch64.sve.zip1.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
@@ -2480,6 +2707,10 @@ declare <vscale x 2 x float> @llvm.aarch64.sve.zip1.nxv2f32(<vscale x 2 x float>
declare <vscale x 4 x float> @llvm.aarch64.sve.zip1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.zip1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b16(<vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b32(<vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b64(<vscale x 16 x i1>, <vscale x 16 x i1>)
+
declare <vscale x 16 x i1> @llvm.aarch64.sve.zip2.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
declare <vscale x 8 x i1> @llvm.aarch64.sve.zip2.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
declare <vscale x 4 x i1> @llvm.aarch64.sve.zip2.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
@@ -2496,6 +2727,10 @@ declare <vscale x 2 x float> @llvm.aarch64.sve.zip2.nxv2f32(<vscale x 2 x float>
declare <vscale x 4 x float> @llvm.aarch64.sve.zip2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.zip2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b16(<vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b32(<vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b64(<vscale x 16 x i1>, <vscale x 16 x i1>)
+
declare <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v2f64(<vscale x 2 x double>, <2 x double>, i64)
declare <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float>, <4 x float>, i64)
declare <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v8f16(<vscale x 8 x half>, <8 x half>, i64)