summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Devereau <matthew.devereau@arm.com>2021-12-09 15:32:35 +0000
committerMatt Devereau <matthew.devereau@arm.com>2021-12-14 15:58:28 +0000
commitfb47725d1417f48898b7628c58e54c6a02754bf0 (patch)
tree3ca90d7dc549b5d91a3512a4cd56e06196975234
parentc13524856bb304e6b4f80da7f5c5ecdc021920ee (diff)
downloadllvm-fb47725d1417f48898b7628c58e54c6a02754bf0.tar.gz
[AArch64][SVE] Instcombine SDIV to ASRD
Instcombine SDIV to ASRD when the third operand of SDIV is a power of 2 Differential Revision: https://reviews.llvm.org/D115448
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp36
-rw-r--r--llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-sdiv.ll75
2 files changed, 111 insertions, 0 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index e8722af88579..d69997c20159 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1028,6 +1028,40 @@ static Optional<Instruction *> instCombineST1ScatterIndex(InstCombiner &IC,
return None;
}
+static Optional<Instruction *> instCombineSVESDIV(InstCombiner &IC,
+ IntrinsicInst &II) {
+ IRBuilder<> Builder(II.getContext());
+ Builder.SetInsertPoint(&II);
+ Type *Int32Ty = Builder.getInt32Ty();
+ Value *Pred = II.getOperand(0);
+ Value *Vec = II.getOperand(1);
+ Value *DivVec = II.getOperand(2);
+
+ Value *SplatValue = getSplatValue(DivVec);
+ ConstantInt *SplatConstantInt = dyn_cast_or_null<ConstantInt>(SplatValue);
+ if (!SplatConstantInt)
+ return None;
+ APInt Divisor = SplatConstantInt->getValue();
+
+ if (Divisor.isPowerOf2()) {
+ Constant *DivisorLog2 = ConstantInt::get(Int32Ty, Divisor.logBase2());
+ auto ASRD = Builder.CreateIntrinsic(
+ Intrinsic::aarch64_sve_asrd, {II.getType()}, {Pred, Vec, DivisorLog2});
+ return IC.replaceInstUsesWith(II, ASRD);
+ }
+ if (Divisor.isNegatedPowerOf2()) {
+ Divisor.negate();
+ Constant *DivisorLog2 = ConstantInt::get(Int32Ty, Divisor.logBase2());
+ auto ASRD = Builder.CreateIntrinsic(
+ Intrinsic::aarch64_sve_asrd, {II.getType()}, {Pred, Vec, DivisorLog2});
+ auto NEG = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_neg,
+ {ASRD->getType()}, {ASRD, Pred, ASRD});
+ return IC.replaceInstUsesWith(II, NEG);
+ }
+
+ return None;
+}
+
Optional<Instruction *>
AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
IntrinsicInst &II) const {
@@ -1088,6 +1122,8 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
return instCombineSVELD1(IC, II, DL);
case Intrinsic::aarch64_sve_st1:
return instCombineSVEST1(IC, II, DL);
+ case Intrinsic::aarch64_sve_sdiv:
+ return instCombineSVESDIV(IC, II);
}
return None;
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-sdiv.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-sdiv.ll
new file mode 100644
index 000000000000..54541dae9115
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-sdiv.ll
@@ -0,0 +1,75 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define <vscale x 4 x i32> @sdiv_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: @sdiv_i32(
+; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.asrd.nxv4i32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x i32> [[A:%.*]], i32 23)
+; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
+;
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 8388608, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer))
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 4 x i32> @sdiv_i32_neg(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: @sdiv_i32_neg(
+; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.asrd.nxv4i32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x i32> [[A:%.*]], i32 23)
+; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.neg.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i1> [[PG]], <vscale x 4 x i32> [[TMP1]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
+;
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 -8388608, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer))
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @sdiv_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: @sdiv_i64(
+; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.asrd.nxv2i64(<vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x i64> [[A:%.*]], i32 23)
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
+;
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sdiv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 8388608, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer))
+ ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 2 x i64> @sdiv_i64_neg(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: @sdiv_i64_neg(
+; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.asrd.nxv2i64(<vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x i64> [[A:%.*]], i32 23)
+; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.neg.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> [[TMP1]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
+;
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sdiv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 -8388608, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer))
+ ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 4 x i32> @sdiv_i32_not_base2(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: @sdiv_i32_not_base2(
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 8388607, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer))
+; CHECK-NEXT: ret <vscale x 4 x i32> [[OUT]]
+;
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 8388607, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer))
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 4 x i32> @sdiv_i32_not_base2_neg(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: @sdiv_i32_not_base2_neg(
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 -8388607, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer))
+; CHECK-NEXT: ret <vscale x 4 x i32> [[OUT]]
+;
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 -8388607, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer))
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 4 x i32> @sdiv_i32_not_zero(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: @sdiv_i32_not_zero(
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT: ret <vscale x 4 x i32> [[OUT]]
+;
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 0, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer))
+ ret <vscale x 4 x i32> %out
+}
+
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.sdiv.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
+attributes #0 = { "target-features"="+sve" }