diff options
author | Kerry McLaughlin <kerry.mclaughlin@arm.com> | 2021-01-13 11:21:44 +0000 |
---|---|---|
committer | Kerry McLaughlin <kerry.mclaughlin@arm.com> | 2021-01-13 12:24:54 +0000 |
commit | 2170e0ee60db638175a8c57230d46fbaafa06d4c (patch) | |
tree | 4e9e6c1ed414fb0a68ce8cda38fb3818d300aaa5 | |
parent | c6e341c89957db31432baffb72ee015f37d8c48d (diff) | |
download | llvm-2170e0ee60db638175a8c57230d46fbaafa06d4c.tar.gz |
[SVE][CodeGen] CTLZ, CTTZ & CTPOP operations (predicates)
Canonicalise the following operations in getNode() for predicate types:
- CTLZ(Pred) -> bitwise_NOT(Pred)
- CTTZ(Pred) -> bitwise_NOT(Pred)
- CTPOP(Pred) -> Pred
Reviewed By: david-arm
Differential Revision: https://reviews.llvm.org/D94428
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 9 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/sve-bit-counting-pred.ll | 141 |
2 files changed, 150 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index c4f6e89006c1..e080408bbe42 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4796,6 +4796,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::VSCALE: assert(VT == Operand.getValueType() && "Unexpected VT!"); break; + case ISD::CTPOP: + if (Operand.getValueType().getScalarType() == MVT::i1) + return Operand; + break; + case ISD::CTLZ: + case ISD::CTTZ: + if (Operand.getValueType().getScalarType() == MVT::i1) + return getNOT(DL, Operand, Operand.getValueType()); + break; case ISD::VECREDUCE_SMIN: case ISD::VECREDUCE_UMAX: if (Operand.getValueType().getScalarType() == MVT::i1) diff --git a/llvm/test/CodeGen/AArch64/sve-bit-counting-pred.ll b/llvm/test/CodeGen/AArch64/sve-bit-counting-pred.ll new file mode 100644 index 000000000000..73c555d98943 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-bit-counting-pred.ll @@ -0,0 +1,141 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t + +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; WARN-NOT: warning + +; +; CTPOP +; + +define <vscale x 16 x i1> @ctpop_nxv16i1(<vscale x 16 x i1> %a) { +; CHECK-LABEL: ctpop_nxv16i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %res = call <vscale x 16 x i1> @llvm.ctpop.nxv16i1(<vscale x 16 x i1> %a) + ret <vscale x 16 x i1> %res +} + +define <vscale x 8 x i1> @ctpop_nxv8i1(<vscale x 8 x i1> %a) { +; CHECK-LABEL: ctpop_nxv8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %res = call <vscale x 8 x i1> @llvm.ctpop.nxv8i1(<vscale x 8 x i1> %a) + ret <vscale x 8 x i1> %res +} + +define <vscale x 4 x i1> @ctpop_nxv4i1(<vscale x 4 x i1> %a) { +; CHECK-LABEL: ctpop_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %res = call <vscale x 4 x i1> @llvm.ctpop.nxv4i1(<vscale x 4 x i1> %a) + ret <vscale x 4 x i1> %res +} + +define <vscale x 2 x i1> @ctpop_nxv2i1(<vscale x 2 x i1> %a) { +; CHECK-LABEL: ctpop_nxv2i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %res = call <vscale x 2 x i1> @llvm.ctpop.nxv2i1(<vscale x 2 x i1> %a) + ret <vscale x 2 x i1> %res +} + +; CTLZ + +define <vscale x 16 x i1> @ctlz_nxv16i1(<vscale x 16 x i1> %a) { +; CHECK-LABEL: ctlz_nxv16i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.b +; CHECK-NEXT: not p0.b, p1/z, p0.b +; CHECK-NEXT: ret + %res = call <vscale x 16 x i1> @llvm.ctlz.nxv16i1(<vscale x 16 x i1> %a) + ret <vscale x 16 x i1> %res +} + +define <vscale x 8 x i1> @ctlz_nxv8i1(<vscale x 8 x i1> %a) { +; CHECK-LABEL: ctlz_nxv8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.h +; CHECK-NEXT: not p0.b, p1/z, p0.b +; CHECK-NEXT: ret + %res = call <vscale x 8 x i1> @llvm.ctlz.nxv8i1(<vscale x 8 x i1> %a) + ret <vscale x 8 x i1> %res +} + +define <vscale x 4 x i1> @ctlz_nxv4i1(<vscale x 4 x i1> %a) { +; CHECK-LABEL: ctlz_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: not p0.b, p1/z, p0.b +; CHECK-NEXT: ret + %res = call <vscale x 4 x i1> @llvm.ctlz.nxv4i1(<vscale x 4 x i1> %a) + ret <vscale x 4 x i1> %res +} + +define <vscale x 2 x i1> @ctlz_nxv2i1(<vscale x 2 x i1> %a) { +; CHECK-LABEL: ctlz_nxv2i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: not p0.b, p1/z, p0.b +; CHECK-NEXT: ret + %res = call <vscale x 2 x i1> @llvm.ctlz.nxv2i1(<vscale x 2 x i1> %a) + ret <vscale x 2 x i1> %res +} + +; CTTZ + +define <vscale x 16 x i1> @cttz_nxv16i1(<vscale x 16 x i1> %a) { +; CHECK-LABEL: cttz_nxv16i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.b +; CHECK-NEXT: not p0.b, p1/z, p0.b +; CHECK-NEXT: ret + %res = call <vscale x 16 x i1> @llvm.cttz.nxv16i1(<vscale x 16 x i1> %a) + ret <vscale x 16 x i1> %res +} + +define <vscale x 8 x i1> @cttz_nxv8i1(<vscale x 8 x i1> %a) { +; CHECK-LABEL: cttz_nxv8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.h +; CHECK-NEXT: not p0.b, p1/z, p0.b +; CHECK-NEXT: ret + %res = call <vscale x 8 x i1> @llvm.cttz.nxv8i1(<vscale x 8 x i1> %a) + ret <vscale x 8 x i1> %res +} + +define <vscale x 4 x i1> @cttz_nxv4i1(<vscale x 4 x i1> %a) { +; CHECK-LABEL: cttz_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: not p0.b, p1/z, p0.b +; CHECK-NEXT: ret + %res = call <vscale x 4 x i1> @llvm.cttz.nxv4i1(<vscale x 4 x i1> %a) + ret <vscale x 4 x i1> %res +} + +define <vscale x 2 x i1> @cttz_nxv2i1(<vscale x 2 x i1> %a) { +; CHECK-LABEL: cttz_nxv2i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: not p0.b, p1/z, p0.b +; CHECK-NEXT: ret + %res = call <vscale x 2 x i1> @llvm.cttz.nxv2i1(<vscale x 2 x i1> %a) + ret <vscale x 2 x i1> %res +} + +declare <vscale x 16 x i1> @llvm.ctpop.nxv16i1(<vscale x 16 x i1>) +declare <vscale x 8 x i1> @llvm.ctpop.nxv8i1(<vscale x 8 x i1>) +declare <vscale x 4 x i1> @llvm.ctpop.nxv4i1(<vscale x 4 x i1>) +declare <vscale x 2 x i1> @llvm.ctpop.nxv2i1(<vscale x 2 x i1>) + +declare <vscale x 16 x i1> @llvm.ctlz.nxv16i1(<vscale x 16 x i1>) +declare <vscale x 8 x i1> @llvm.ctlz.nxv8i1(<vscale x 8 x i1>) +declare <vscale x 4 x i1> @llvm.ctlz.nxv4i1(<vscale x 4 x i1>) +declare <vscale x 2 x i1> @llvm.ctlz.nxv2i1(<vscale x 2 x i1>) + +declare <vscale x 16 x i1> @llvm.cttz.nxv16i1(<vscale x 16 x i1>) +declare <vscale x 8 x i1> @llvm.cttz.nxv8i1(<vscale x 8 x i1>) +declare <vscale x 4 x i1> @llvm.cttz.nxv4i1(<vscale x 4 x i1>) +declare <vscale x 2 x i1> @llvm.cttz.nxv2i1(<vscale x 2 x i1>) |