summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFlorian Hahn <flo@fhahn.com>2022-01-10 08:39:12 +0000
committerFlorian Hahn <flo@fhahn.com>2022-01-10 08:49:25 +0000
commitad1b8772cf6b16c1162bb8ff425679f5ff046ae9 (patch)
tree07f05d641cf54ec7a3ff54a832d4e3ba17994996
parent38916195c9ce281a44a3378da02c1abdd199db2d (diff)
downloadllvm-ad1b8772cf6b16c1162bb8ff425679f5ff046ae9.tar.gz
[SCEVExpander] Only create multiplication if needed.
9345ab3a4550 updated generateOverflowCheck to skip creating checks that always evaluate to false. This in turn means that we only need to compute |Step| * Trip count if the result of the multiplication is actually used. Sink the multiplication into ComputeEndCheck, so it is only created when there's an actual check.
-rw-r--r--llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp31
-rw-r--r--llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll14
-rw-r--r--llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll7
3 files changed, 22 insertions, 30 deletions
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index d60462329d42..70d284f6776f 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -2494,21 +2494,6 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
Value *TruncTripCount = Builder.CreateZExtOrTrunc(TripCountVal, Ty);
// Compute |Step| * Backedge
- Value *MulV, *OfMul;
- if (Step->isOne()) {
- // Special-case Step of one. Potentially-costly `umul_with_overflow` isn't
- // needed, there is never an overflow, so to avoid artificially inflating
- // the cost of the check, directly emit the optimized IR.
- MulV = TruncTripCount;
- OfMul = ConstantInt::getFalse(MulV->getContext());
- } else {
- auto *MulF = Intrinsic::getDeclaration(Loc->getModule(),
- Intrinsic::umul_with_overflow, Ty);
- CallInst *Mul = Builder.CreateCall(MulF, {AbsStep, TruncTripCount}, "mul");
- MulV = Builder.CreateExtractValue(Mul, 0, "mul.result");
- OfMul = Builder.CreateExtractValue(Mul, 1, "mul.overflow");
- }
-
// Compute:
// 1. Start + |Step| * Backedge < Start
// 2. Start - |Step| * Backedge > Start
@@ -2521,6 +2506,22 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
if (!Signed && Start->isZero() && SE.isKnownPositive(Step))
return ConstantInt::getFalse(Loc->getContext());
+ Value *MulV, *OfMul;
+ if (Step->isOne()) {
+ // Special-case Step of one. Potentially-costly `umul_with_overflow` isn't
+ // needed, there is never an overflow, so to avoid artificially inflating
+ // the cost of the check, directly emit the optimized IR.
+ MulV = TruncTripCount;
+ OfMul = ConstantInt::getFalse(MulV->getContext());
+ } else {
+ auto *MulF = Intrinsic::getDeclaration(Loc->getModule(),
+ Intrinsic::umul_with_overflow, Ty);
+ CallInst *Mul =
+ Builder.CreateCall(MulF, {AbsStep, TruncTripCount}, "mul");
+ MulV = Builder.CreateExtractValue(Mul, 0, "mul.result");
+ OfMul = Builder.CreateExtractValue(Mul, 1, "mul.overflow");
+ }
+
Value *Add = nullptr, *Sub = nullptr;
bool NeedPosCheck = !SE.isKnownNegative(Step);
bool NeedNegCheck = !SE.isKnownPositive(Step);
diff --git a/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll b/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll
index 9f3ebcc9a9af..34f404522825 100644
--- a/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll
+++ b/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll
@@ -14,9 +14,6 @@ define void @f(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32* noalias %
; CHECK: for.body.lver.check:
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
-; CHECK-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP1]])
-; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
-; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
; CHECK-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
@@ -88,10 +85,10 @@ define void @f(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32* noalias %
; CHECK-NEXT: [[ARRAYIDXC:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[MUL_EXT]]
; CHECK-NEXT: store i32 [[MULC]], i32* [[ARRAYIDXC]], align 4
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[ADD]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT6:%.*]], label [[FOR_BODY]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT3:%.*]], label [[FOR_BODY]]
; CHECK: for.end.loopexit:
; CHECK-NEXT: br label [[FOR_END:%.*]]
-; CHECK: for.end.loopexit6:
+; CHECK: for.end.loopexit3:
; CHECK-NEXT: br label [[FOR_END]]
; CHECK: for.end:
; CHECK-NEXT: ret void
@@ -153,9 +150,6 @@ define void @f_with_offset(i32* noalias %b, i32* noalias %c, i32* noalias %d, i3
; CHECK: for.body.lver.check:
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
-; CHECK-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP1]])
-; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
-; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
; CHECK-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
@@ -227,10 +221,10 @@ define void @f_with_offset(i32* noalias %b, i32* noalias %c, i32* noalias %d, i3
; CHECK-NEXT: [[ARRAYIDXC:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[MUL_EXT]]
; CHECK-NEXT: store i32 [[MULC]], i32* [[ARRAYIDXC]], align 4
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[ADD]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT5:%.*]], label [[FOR_BODY]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT2:%.*]], label [[FOR_BODY]]
; CHECK: for.end.loopexit:
; CHECK-NEXT: br label [[FOR_END:%.*]]
-; CHECK: for.end.loopexit5:
+; CHECK: for.end.loopexit2:
; CHECK-NEXT: br label [[FOR_END]]
; CHECK: for.end:
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll b/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll
index 7ce134e66cf8..f2d466563019 100644
--- a/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll
+++ b/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll
@@ -31,9 +31,6 @@ define void @f1(i16* noalias %a,
; LV-NEXT: [[A5:%.*]] = bitcast i16* [[A:%.*]] to i8*
; LV-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1
; LV-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
-; LV-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP1]])
-; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
-; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
; LV-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
; LV-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
@@ -78,10 +75,10 @@ define void @f1(i16* noalias %a,
; LV-NEXT: [[INC]] = add nuw nsw i64 [[IND]], 1
; LV-NEXT: [[INC1]] = add i32 [[IND1]], 1
; LV-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[N]]
-; LV-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT6:%.*]], label [[FOR_BODY]]
+; LV-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT3:%.*]], label [[FOR_BODY]]
; LV: for.end.loopexit:
; LV-NEXT: br label [[FOR_END:%.*]]
-; LV: for.end.loopexit6:
+; LV: for.end.loopexit3:
; LV-NEXT: br label [[FOR_END]]
; LV: for.end:
; LV-NEXT: ret void