summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKerry McLaughlin <kerry.mclaughlin@arm.com>2021-01-13 11:21:44 +0000
committerKerry McLaughlin <kerry.mclaughlin@arm.com>2021-01-13 12:24:54 +0000
commit2170e0ee60db638175a8c57230d46fbaafa06d4c (patch)
tree4e9e6c1ed414fb0a68ce8cda38fb3818d300aaa5
parentc6e341c89957db31432baffb72ee015f37d8c48d (diff)
downloadllvm-2170e0ee60db638175a8c57230d46fbaafa06d4c.tar.gz
[SVE][CodeGen] CTLZ, CTTZ & CTPOP operations (predicates)
Canonicalise the following operations in getNode() for predicate types: - CTLZ(Pred) -> bitwise_NOT(Pred) - CTTZ(Pred) -> bitwise_NOT(Pred) - CTPOP(Pred) -> Pred Reviewed By: david-arm Differential Revision: https://reviews.llvm.org/D94428
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp9
-rw-r--r--llvm/test/CodeGen/AArch64/sve-bit-counting-pred.ll141
2 files changed, 150 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index c4f6e89006c1..e080408bbe42 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4796,6 +4796,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::VSCALE:
assert(VT == Operand.getValueType() && "Unexpected VT!");
break;
+ case ISD::CTPOP:
+ if (Operand.getValueType().getScalarType() == MVT::i1)
+ return Operand;
+ break;
+ case ISD::CTLZ:
+ case ISD::CTTZ:
+ if (Operand.getValueType().getScalarType() == MVT::i1)
+ return getNOT(DL, Operand, Operand.getValueType());
+ break;
case ISD::VECREDUCE_SMIN:
case ISD::VECREDUCE_UMAX:
if (Operand.getValueType().getScalarType() == MVT::i1)
diff --git a/llvm/test/CodeGen/AArch64/sve-bit-counting-pred.ll b/llvm/test/CodeGen/AArch64/sve-bit-counting-pred.ll
new file mode 100644
index 000000000000..73c555d98943
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-bit-counting-pred.ll
@@ -0,0 +1,141 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
+; WARN-NOT: warning
+
+;
+; CTPOP
+;
+
+define <vscale x 16 x i1> @ctpop_nxv16i1(<vscale x 16 x i1> %a) {
+; CHECK-LABEL: ctpop_nxv16i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ret
+ %res = call <vscale x 16 x i1> @llvm.ctpop.nxv16i1(<vscale x 16 x i1> %a)
+ ret <vscale x 16 x i1> %res
+}
+
+define <vscale x 8 x i1> @ctpop_nxv8i1(<vscale x 8 x i1> %a) {
+; CHECK-LABEL: ctpop_nxv8i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ret
+ %res = call <vscale x 8 x i1> @llvm.ctpop.nxv8i1(<vscale x 8 x i1> %a)
+ ret <vscale x 8 x i1> %res
+}
+
+define <vscale x 4 x i1> @ctpop_nxv4i1(<vscale x 4 x i1> %a) {
+; CHECK-LABEL: ctpop_nxv4i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ret
+ %res = call <vscale x 4 x i1> @llvm.ctpop.nxv4i1(<vscale x 4 x i1> %a)
+ ret <vscale x 4 x i1> %res
+}
+
+define <vscale x 2 x i1> @ctpop_nxv2i1(<vscale x 2 x i1> %a) {
+; CHECK-LABEL: ctpop_nxv2i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x i1> @llvm.ctpop.nxv2i1(<vscale x 2 x i1> %a)
+ ret <vscale x 2 x i1> %res
+}
+
+; CTLZ
+
+define <vscale x 16 x i1> @ctlz_nxv16i1(<vscale x 16 x i1> %a) {
+; CHECK-LABEL: ctlz_nxv16i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p1.b
+; CHECK-NEXT: not p0.b, p1/z, p0.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 16 x i1> @llvm.ctlz.nxv16i1(<vscale x 16 x i1> %a)
+ ret <vscale x 16 x i1> %res
+}
+
+define <vscale x 8 x i1> @ctlz_nxv8i1(<vscale x 8 x i1> %a) {
+; CHECK-LABEL: ctlz_nxv8i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p1.h
+; CHECK-NEXT: not p0.b, p1/z, p0.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 8 x i1> @llvm.ctlz.nxv8i1(<vscale x 8 x i1> %a)
+ ret <vscale x 8 x i1> %res
+}
+
+define <vscale x 4 x i1> @ctlz_nxv4i1(<vscale x 4 x i1> %a) {
+; CHECK-LABEL: ctlz_nxv4i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p1.s
+; CHECK-NEXT: not p0.b, p1/z, p0.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 4 x i1> @llvm.ctlz.nxv4i1(<vscale x 4 x i1> %a)
+ ret <vscale x 4 x i1> %res
+}
+
+define <vscale x 2 x i1> @ctlz_nxv2i1(<vscale x 2 x i1> %a) {
+; CHECK-LABEL: ctlz_nxv2i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p1.d
+; CHECK-NEXT: not p0.b, p1/z, p0.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x i1> @llvm.ctlz.nxv2i1(<vscale x 2 x i1> %a)
+ ret <vscale x 2 x i1> %res
+}
+
+; CTTZ
+
+define <vscale x 16 x i1> @cttz_nxv16i1(<vscale x 16 x i1> %a) {
+; CHECK-LABEL: cttz_nxv16i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p1.b
+; CHECK-NEXT: not p0.b, p1/z, p0.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 16 x i1> @llvm.cttz.nxv16i1(<vscale x 16 x i1> %a)
+ ret <vscale x 16 x i1> %res
+}
+
+define <vscale x 8 x i1> @cttz_nxv8i1(<vscale x 8 x i1> %a) {
+; CHECK-LABEL: cttz_nxv8i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p1.h
+; CHECK-NEXT: not p0.b, p1/z, p0.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 8 x i1> @llvm.cttz.nxv8i1(<vscale x 8 x i1> %a)
+ ret <vscale x 8 x i1> %res
+}
+
+define <vscale x 4 x i1> @cttz_nxv4i1(<vscale x 4 x i1> %a) {
+; CHECK-LABEL: cttz_nxv4i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p1.s
+; CHECK-NEXT: not p0.b, p1/z, p0.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 4 x i1> @llvm.cttz.nxv4i1(<vscale x 4 x i1> %a)
+ ret <vscale x 4 x i1> %res
+}
+
+define <vscale x 2 x i1> @cttz_nxv2i1(<vscale x 2 x i1> %a) {
+; CHECK-LABEL: cttz_nxv2i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p1.d
+; CHECK-NEXT: not p0.b, p1/z, p0.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x i1> @llvm.cttz.nxv2i1(<vscale x 2 x i1> %a)
+ ret <vscale x 2 x i1> %res
+}
+
+declare <vscale x 16 x i1> @llvm.ctpop.nxv16i1(<vscale x 16 x i1>)
+declare <vscale x 8 x i1> @llvm.ctpop.nxv8i1(<vscale x 8 x i1>)
+declare <vscale x 4 x i1> @llvm.ctpop.nxv4i1(<vscale x 4 x i1>)
+declare <vscale x 2 x i1> @llvm.ctpop.nxv2i1(<vscale x 2 x i1>)
+
+declare <vscale x 16 x i1> @llvm.ctlz.nxv16i1(<vscale x 16 x i1>)
+declare <vscale x 8 x i1> @llvm.ctlz.nxv8i1(<vscale x 8 x i1>)
+declare <vscale x 4 x i1> @llvm.ctlz.nxv4i1(<vscale x 4 x i1>)
+declare <vscale x 2 x i1> @llvm.ctlz.nxv2i1(<vscale x 2 x i1>)
+
+declare <vscale x 16 x i1> @llvm.cttz.nxv16i1(<vscale x 16 x i1>)
+declare <vscale x 8 x i1> @llvm.cttz.nxv8i1(<vscale x 8 x i1>)
+declare <vscale x 4 x i1> @llvm.cttz.nxv4i1(<vscale x 4 x i1>)
+declare <vscale x 2 x i1> @llvm.cttz.nxv2i1(<vscale x 2 x i1>)