diff options
author | Matt Devereau <matthew.devereau@arm.com> | 2021-12-09 15:32:35 +0000 |
---|---|---|
committer | Matt Devereau <matthew.devereau@arm.com> | 2021-12-14 15:58:28 +0000 |
commit | fb47725d1417f48898b7628c58e54c6a02754bf0 (patch) | |
tree | 3ca90d7dc549b5d91a3512a4cd56e06196975234 | |
parent | c13524856bb304e6b4f80da7f5c5ecdc021920ee (diff) | |
download | llvm-fb47725d1417f48898b7628c58e54c6a02754bf0.tar.gz |
[AArch64][SVE] Instcombine SDIV to ASRD
Instcombine SDIV to ASRD when the third operand of SDIV is a power of 2
Differential Revision: https://reviews.llvm.org/D115448
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 36 | ||||
-rw-r--r-- | llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-sdiv.ll | 75 |
2 files changed, 111 insertions, 0 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index e8722af88579..d69997c20159 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1028,6 +1028,40 @@ static Optional<Instruction *> instCombineST1ScatterIndex(InstCombiner &IC, return None; } +static Optional<Instruction *> instCombineSVESDIV(InstCombiner &IC, + IntrinsicInst &II) { + IRBuilder<> Builder(II.getContext()); + Builder.SetInsertPoint(&II); + Type *Int32Ty = Builder.getInt32Ty(); + Value *Pred = II.getOperand(0); + Value *Vec = II.getOperand(1); + Value *DivVec = II.getOperand(2); + + Value *SplatValue = getSplatValue(DivVec); + ConstantInt *SplatConstantInt = dyn_cast_or_null<ConstantInt>(SplatValue); + if (!SplatConstantInt) + return None; + APInt Divisor = SplatConstantInt->getValue(); + + if (Divisor.isPowerOf2()) { + Constant *DivisorLog2 = ConstantInt::get(Int32Ty, Divisor.logBase2()); + auto ASRD = Builder.CreateIntrinsic( + Intrinsic::aarch64_sve_asrd, {II.getType()}, {Pred, Vec, DivisorLog2}); + return IC.replaceInstUsesWith(II, ASRD); + } + if (Divisor.isNegatedPowerOf2()) { + Divisor.negate(); + Constant *DivisorLog2 = ConstantInt::get(Int32Ty, Divisor.logBase2()); + auto ASRD = Builder.CreateIntrinsic( + Intrinsic::aarch64_sve_asrd, {II.getType()}, {Pred, Vec, DivisorLog2}); + auto NEG = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_neg, + {ASRD->getType()}, {ASRD, Pred, ASRD}); + return IC.replaceInstUsesWith(II, NEG); + } + + return None; +} + Optional<Instruction *> AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { @@ -1088,6 +1122,8 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, return instCombineSVELD1(IC, II, DL); case Intrinsic::aarch64_sve_st1: return instCombineSVEST1(IC, II, DL); + case Intrinsic::aarch64_sve_sdiv: + return instCombineSVESDIV(IC, II); } return None; diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-sdiv.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-sdiv.ll new file mode 100644 index 000000000000..54541dae9115 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-sdiv.ll @@ -0,0 +1,75 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -instcombine < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +define <vscale x 4 x i32> @sdiv_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: @sdiv_i32( +; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.asrd.nxv4i32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x i32> [[A:%.*]], i32 23) +; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] +; + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 8388608, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)) + ret <vscale x 4 x i32> %out +} + +define <vscale x 4 x i32> @sdiv_i32_neg(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: @sdiv_i32_neg( +; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.asrd.nxv4i32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x i32> [[A:%.*]], i32 23) +; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.neg.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i1> [[PG]], <vscale x 4 x i32> [[TMP1]]) +; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]] +; + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 -8388608, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @sdiv_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg) #0 { +; CHECK-LABEL: @sdiv_i64( +; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.asrd.nxv2i64(<vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x i64> [[A:%.*]], i32 23) +; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] +; + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sdiv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 8388608, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)) + ret <vscale x 2 x i64> %out +} + +define <vscale x 2 x i64> @sdiv_i64_neg(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg) #0 { +; CHECK-LABEL: @sdiv_i64_neg( +; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.asrd.nxv2i64(<vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x i64> [[A:%.*]], i32 23) +; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.neg.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> [[TMP1]]) +; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]] +; + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sdiv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 -8388608, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)) + ret <vscale x 2 x i64> %out +} + +define <vscale x 4 x i32> @sdiv_i32_not_base2(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: @sdiv_i32_not_base2( +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 8388607, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: ret <vscale x 4 x i32> [[OUT]] +; + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 8388607, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)) + ret <vscale x 4 x i32> %out +} + +define <vscale x 4 x i32> @sdiv_i32_not_base2_neg(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: @sdiv_i32_not_base2_neg( +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 -8388607, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: ret <vscale x 4 x i32> [[OUT]] +; + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 -8388607, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)) + ret <vscale x 4 x i32> %out +} + +define <vscale x 4 x i32> @sdiv_i32_not_zero(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: @sdiv_i32_not_zero( +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 4 x i32> [[OUT]] +; + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 0, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)) + ret <vscale x 4 x i32> %out +} + + +declare <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.sdiv.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) + +attributes #0 = { "target-features"="+sve" } |