diff options
author | Florian Hahn <flo@fhahn.com> | 2021-03-29 20:19:45 +0100 |
---|---|---|
committer | Florian Hahn <flo@fhahn.com> | 2021-03-29 22:22:05 +0100 |
commit | 482283042f795ecc27838a3b2f76b5494991401c (patch) | |
tree | 29f28b52c4dd52a0f02f4610127145632659426c | |
parent | 047cbfe2bbf22a9da1bd27cafcee4eb1453965dc (diff) | |
download | llvm-482283042f795ecc27838a3b2f76b5494991401c.tar.gz |
[AArch64] Remove custom zext/sext legalization code.
Currently performExtendCombine assumes that the src-element bitwidth * 2
is a valid MVT. But this is not the case for i1 and it causes a crash on
the v64i1 test cases added in this patch.
It turns out that this code appears to not be needed; the same patterns are
handled by other code and we end up with the same results, even without the
custom lowering. I also added additional test cases in a50037aaa6d5df.
Let's just remove the unneeded code.
Reviewed By: dmgreen
Differential Revision: https://reviews.llvm.org/D99437
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 73 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll | 64 |
2 files changed, 65 insertions, 72 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 000cbf856c62..59af923ee051 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -13939,78 +13939,7 @@ static SDValue performExtendCombine(SDNode *N, return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD); } - - // This is effectively a custom type legalization for AArch64. - // - // Type legalization will split an extend of a small, legal, type to a larger - // illegal type by first splitting the destination type, often creating - // illegal source types, which then get legalized in isel-confusing ways, - // leading to really terrible codegen. E.g., - // %result = v8i32 sext v8i8 %value - // becomes - // %losrc = extract_subreg %value, ... - // %hisrc = extract_subreg %value, ... - // %lo = v4i32 sext v4i8 %losrc - // %hi = v4i32 sext v4i8 %hisrc - // Things go rapidly downhill from there. - // - // For AArch64, the [sz]ext vector instructions can only go up one element - // size, so we can, e.g., extend from i8 to i16, but to go from i8 to i32 - // take two instructions. - // - // This implies that the most efficient way to do the extend from v8i8 - // to two v4i32 values is to first extend the v8i8 to v8i16, then do - // the normal splitting to happen for the v8i16->v8i32. - - // This is pre-legalization to catch some cases where the default - // type legalization will create ill-tempered code. - if (!DCI.isBeforeLegalizeOps()) - return SDValue(); - - // We're only interested in cleaning things up for non-legal vector types - // here. If both the source and destination are legal, things will just - // work naturally without any fiddling. - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT ResVT = N->getValueType(0); - if (!ResVT.isVector() || TLI.isTypeLegal(ResVT)) - return SDValue(); - // If the vector type isn't a simple VT, it's beyond the scope of what - // we're worried about here. Let legalization do its thing and hope for - // the best. - SDValue Src = N->getOperand(0); - EVT SrcVT = Src->getValueType(0); - if (!ResVT.isSimple() || !SrcVT.isSimple()) - return SDValue(); - - // If the source VT is a 64-bit fixed or scalable vector, we can play games - // and get the better results we want. - if (SrcVT.getSizeInBits().getKnownMinSize() != 64) - return SDValue(); - - unsigned SrcEltSize = SrcVT.getScalarSizeInBits(); - ElementCount SrcEC = SrcVT.getVectorElementCount(); - SrcVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize * 2), SrcEC); - SDLoc DL(N); - Src = DAG.getNode(N->getOpcode(), DL, SrcVT, Src); - - // Now split the rest of the operation into two halves, each with a 64 - // bit source. - EVT LoVT, HiVT; - SDValue Lo, Hi; - LoVT = HiVT = ResVT.getHalfNumVectorElementsVT(*DAG.getContext()); - - EVT InNVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getVectorElementType(), - LoVT.getVectorElementCount()); - Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src, - DAG.getConstant(0, DL, MVT::i64)); - Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src, - DAG.getConstant(InNVT.getVectorMinNumElements(), DL, MVT::i64)); - Lo = DAG.getNode(N->getOpcode(), DL, LoVT, Lo); - Hi = DAG.getNode(N->getOpcode(), DL, HiVT, Hi); - - // Now combine the parts back together so we still have a single result - // like the combiner expects. - return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi); + return SDValue(); } static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St, diff --git a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll index c0eefa145895..d9e882c8b4f7 100644 --- a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll +++ b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll @@ -202,3 +202,67 @@ define <8 x i64> @sext_v8i8_to_v8i64(<8 x i8> %v0) nounwind { %r = sext <8 x i8> %v0 to <8 x i64> ret <8 x i64> %r } + +; Extends of vectors of i1. + +define <32 x i8> @zext_v32i1(<32 x i1> %arg) { +; CHECK-LABEL: zext_v32i1: +; CHECK: and.16b v0, v0, v2 +; CHECK-NEXT: and.16b v1, v1, v2 +; CHECK-NEXT: ret + %res = zext <32 x i1> %arg to <32 x i8> + ret <32 x i8> %res +} + +define <32 x i8> @sext_v32i1(<32 x i1> %arg) { +; CHECK-LABEL: sext_v32i1: +; CHECK: shl.16b v0, v0, #7 +; CHECK-NEXT: shl.16b v1, v1, #7 +; CHECK-NEXT: sshr.16b v0, v0, #7 +; CHECK-NEXT: sshr.16b v1, v1, #7 +; CHECK-NEXT: ret +; + %res = sext <32 x i1> %arg to <32 x i8> + ret <32 x i8> %res +} + +define <64 x i8> @zext_v64i1(<64 x i1> %arg) { +; CHECK-LABEL: zext_v64i1: +; CHECK: and.16b v0, v0, [[V4:v.+]] +; CHECK-NEXT: and.16b v1, v1, [[V4]] +; CHECK-NEXT: and.16b v2, v2, [[V4]] +; CHECK-NEXT: and.16b v3, v3, [[V4]] +; CHECK-NEXT: ret +; + %res = zext <64 x i1> %arg to <64 x i8> + ret <64 x i8> %res +} + +define <64 x i8> @sext_v64i1(<64 x i1> %arg) { +; CHECK-LABEL: sext_v64i1: +; CHECK: shl.16b v0, v0, #7 +; CHECK-NEXT: shl.16b v3, v3, #7 +; CHECK-NEXT: shl.16b v2, v2, #7 +; CHECK-NEXT: shl.16b [[V4:v.+]], v1, #7 +; CHECK-NEXT: sshr.16b v0, v0, #7 +; CHECK-NEXT: sshr.16b v1, v3, #7 +; CHECK-NEXT: sshr.16b v2, v2, #7 +; CHECK-NEXT: sshr.16b v3, [[V4]], #7 +; CHECK-NEXT: ret +; + %res = sext <64 x i1> %arg to <64 x i8> + ret <64 x i8> %res +} + +define <1 x i128> @sext_v1x64(<1 x i64> %arg) { +; CHECK-LABEL: sext_v1x64: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: asr x1, x8, #63 +; CHECK-NEXT: mov.d v0[1], x1 +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +; + %res = sext <1 x i64> %arg to <1 x i128> + ret <1 x i128> %res +} |