summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTobias Grosser <grosser@fim.uni-passau.de>2013-06-06 02:39:42 +0000
committerTobias Grosser <grosser@fim.uni-passau.de>2013-06-06 02:39:42 +0000
commit5dbed5dc02b80cb03c5a46f6a85d0c3927f5b784 (patch)
tree75c3efc5dfaead49da059ce4881344136b676d2c
parent3aeb1b7a6c79dab1e310b6befc6020b6f5035f00 (diff)
downloadllvm-5dbed5dc02b80cb03c5a46f6a85d0c3927f5b784.tar.gz
LoopGenerators: Construct loops such that they are already loop rotated
BeforeBB | v GuardBB / \ __ PreHeaderBB \ / \ / | latch HeaderBB | \ / \ / < \ / \ / ExitBB This does not only remove the need for an explicit loop rotate pass, but it also gives us the possibility to skip the construction of the guard condition in case the loop is known to be executed at least once. We do not yet exploit this, but by implementing this analysis in the isl code generator we should be able to remove more guards than the generic loop rotate pass can. Another point is that loop rotation can introduce additional PHI nodes, which may hide that a loop can be executed in parallel. This change avoids this complication and will make it easier to move the openmp code generation into a separate pass. Merged from https://llvm.org/svn/llvm-project/polly/trunk@181986 llvm-svn: 183373
-rw-r--r--polly/include/polly/CodeGen/LoopGenerators.h3
-rw-r--r--polly/lib/CodeGen/CodeGeneration.cpp6
-rw-r--r--polly/lib/CodeGen/IslCodeGeneration.cpp13
-rw-r--r--polly/lib/CodeGen/LoopGenerators.cpp103
-rw-r--r--polly/test/Cloog/CodeGen/MemAccess/codegen_simple_md.ll12
-rw-r--r--polly/test/Cloog/CodeGen/OpenMP/simple_nested_loop.ll2
-rw-r--r--polly/test/Isl/single_loop_param_less_equal.ll23
-rw-r--r--polly/test/Isl/single_loop_param_less_than.ll25
8 files changed, 111 insertions, 76 deletions
diff --git a/polly/include/polly/CodeGen/LoopGenerators.h b/polly/include/polly/CodeGen/LoopGenerators.h
index 3a7e3b48a87c..3f63c842d3c2 100644
--- a/polly/include/polly/CodeGen/LoopGenerators.h
+++ b/polly/include/polly/CodeGen/LoopGenerators.h
@@ -36,10 +36,11 @@ using namespace llvm;
/// @param Builder The builder used to create the loop.
/// @param P A pointer to the pass that uses this function. It is used
/// to update analysis information.
+/// @param ExitBlock The block the loop will exit to.
/// @param Predicate The predicate used to generate the upper loop bound.
/// @return Value* The newly created induction variable for this loop.
Value *createLoop(Value *LowerBound, Value *UpperBound, Value *Stride,
- IRBuilder<> &Builder, Pass *P, BasicBlock *&AfterBlock,
+ IRBuilder<> &Builder, Pass *P, BasicBlock *&ExitBlock,
ICmpInst::Predicate Predicate);
class OMPGenerator {
diff --git a/polly/lib/CodeGen/CodeGeneration.cpp b/polly/lib/CodeGen/CodeGeneration.cpp
index 051d0287278f..06f0fa7854f7 100644
--- a/polly/lib/CodeGen/CodeGeneration.cpp
+++ b/polly/lib/CodeGen/CodeGeneration.cpp
@@ -468,14 +468,14 @@ void ClastStmtCodeGen::codegen(const clast_block *b) {
void ClastStmtCodeGen::codegenForSequential(const clast_for *f) {
Value *LowerBound, *UpperBound, *IV, *Stride;
- BasicBlock *AfterBB;
+ BasicBlock *ExitBlock;
Type *IntPtrTy = getIntPtrTy();
LowerBound = ExpGen.codegen(f->LB, IntPtrTy);
UpperBound = ExpGen.codegen(f->UB, IntPtrTy);
Stride = Builder.getInt(APInt_from_MPZ(f->stride));
- IV = createLoop(LowerBound, UpperBound, Stride, Builder, P, AfterBB,
+ IV = createLoop(LowerBound, UpperBound, Stride, Builder, P, ExitBlock,
CmpInst::ICMP_SLE);
// Add loop iv to symbols.
@@ -486,7 +486,7 @@ void ClastStmtCodeGen::codegenForSequential(const clast_for *f) {
// Loop is finished, so remove its iv from the live symbols.
ClastVars.erase(f->iterator);
- Builder.SetInsertPoint(AfterBB->begin());
+ Builder.SetInsertPoint(ExitBlock->begin());
}
// Helper class to determine all scalar parameters used in the basic blocks of a
diff --git a/polly/lib/CodeGen/IslCodeGeneration.cpp b/polly/lib/CodeGen/IslCodeGeneration.cpp
index 78e4e2dc7bd7..136a87321f41 100644
--- a/polly/lib/CodeGen/IslCodeGeneration.cpp
+++ b/polly/lib/CodeGen/IslCodeGeneration.cpp
@@ -780,7 +780,7 @@ void IslNodeBuilder::createForSequential(__isl_take isl_ast_node *For) {
isl_id *IteratorID;
Value *ValueLB, *ValueUB, *ValueInc;
Type *MaxType;
- BasicBlock *AfterBlock;
+ BasicBlock *ExitBlock;
Value *IV;
CmpInst::Predicate Predicate;
@@ -814,21 +814,14 @@ void IslNodeBuilder::createForSequential(__isl_take isl_ast_node *For) {
if (MaxType != ValueInc->getType())
ValueInc = Builder.CreateSExt(ValueInc, MaxType);
- // TODO: In case we can proof a loop is executed at least once, we can
- // generate the condition iv != UB + stride (consider possible
- // overflow). This condition will allow LLVM to prove the loop is
- // executed at least once, which will enable a lot of loop invariant
- // code motion.
-
- IV =
- createLoop(ValueLB, ValueUB, ValueInc, Builder, P, AfterBlock, Predicate);
+ IV = createLoop(ValueLB, ValueUB, ValueInc, Builder, P, ExitBlock, Predicate);
IDToValue[IteratorID] = IV;
create(Body);
IDToValue.erase(IteratorID);
- Builder.SetInsertPoint(AfterBlock->begin());
+ Builder.SetInsertPoint(ExitBlock->begin());
isl_ast_node_free(For);
isl_ast_expr_free(Iterator);
diff --git a/polly/lib/CodeGen/LoopGenerators.cpp b/polly/lib/CodeGen/LoopGenerators.cpp
index b2e91b547277..881a0ac24aaf 100644
--- a/polly/lib/CodeGen/LoopGenerators.cpp
+++ b/polly/lib/CodeGen/LoopGenerators.cpp
@@ -22,53 +22,84 @@
using namespace llvm;
using namespace polly;
+// We generate a loop of the following structure
+//
+// BeforeBB
+// |
+// v
+// GuardBB
+// / \
+// __ PreHeaderBB \
+// / \ / |
+// latch HeaderBB |
+// \ / \ /
+// < \ /
+// \ /
+// ExitBB
+//
+// GuardBB checks if the loop is executed at least once. If this is the case
+// we branch to PreHeaderBB and subsequently to the HeaderBB, which contains the
+// loop iv 'polly.indvar', the incremented loop iv 'polly.indvar_next' as well
+// as the condition to check if we execute another iteration of the loop. After
+// the loop has finished, we branch to ExitBB.
+//
+// TODO: We currently always create the GuardBB. If we can prove the loop is
+// always executed at least once, we can get rid of this branch.
Value *polly::createLoop(Value *LB, Value *UB, Value *Stride,
- IRBuilder<> &Builder, Pass *P, BasicBlock *&AfterBlock,
+ IRBuilder<> &Builder, Pass *P, BasicBlock *&ExitBB,
ICmpInst::Predicate Predicate) {
+
DominatorTree &DT = P->getAnalysis<DominatorTree>();
Function *F = Builder.GetInsertBlock()->getParent();
LLVMContext &Context = F->getContext();
- BasicBlock *PreheaderBB = Builder.GetInsertBlock();
- BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F);
- BasicBlock *BodyBB = BasicBlock::Create(Context, "polly.loop_body", F);
- BasicBlock *AfterBB = SplitBlock(PreheaderBB, Builder.GetInsertPoint()++, P);
- AfterBB->setName("polly.loop_after");
-
- PreheaderBB->getTerminator()->setSuccessor(0, HeaderBB);
- DT.addNewBlock(HeaderBB, PreheaderBB);
-
- Builder.SetInsertPoint(HeaderBB);
-
- // Use the type of upper and lower bound.
- assert(LB->getType() == UB->getType() &&
- "Different types for upper and lower bound.");
-
+ assert(LB->getType() == UB->getType() && "Types of loop bounds do not match");
IntegerType *LoopIVType = dyn_cast<IntegerType>(UB->getType());
assert(LoopIVType && "UB is not integer?");
- // IV
- PHINode *IV = Builder.CreatePHI(LoopIVType, 2, "polly.loopiv");
- IV->addIncoming(LB, PreheaderBB);
-
- Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType);
- Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.next_loopiv");
-
- // Exit condition.
- Value *CMP;
- CMP = Builder.CreateICmp(Predicate, IV, UB);
-
- Builder.CreateCondBr(CMP, BodyBB, AfterBB);
- DT.addNewBlock(BodyBB, HeaderBB);
-
- Builder.SetInsertPoint(BodyBB);
+ BasicBlock *BeforeBB = Builder.GetInsertBlock();
+ BasicBlock *GuardBB = BasicBlock::Create(Context, "polly.loop_if", F);
+ BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F);
+ BasicBlock *PreHeaderBB =
+ BasicBlock::Create(Context, "polly.loop_preheader", F);
+
+ // ExitBB
+ ExitBB = SplitBlock(BeforeBB, Builder.GetInsertPoint()++, P);
+ ExitBB->setName("polly.loop_exit");
+
+ // BeforeBB
+ BeforeBB->getTerminator()->setSuccessor(0, GuardBB);
+
+ // GuardBB
+ DT.addNewBlock(GuardBB, BeforeBB);
+ Builder.SetInsertPoint(GuardBB);
+ Value *LoopGuard;
+ LoopGuard = Builder.CreateICmp(Predicate, LB, UB);
+ LoopGuard->setName("polly.loop_guard");
+ Builder.CreateCondBr(LoopGuard, PreHeaderBB, ExitBB);
+
+ // PreHeaderBB
+ DT.addNewBlock(PreHeaderBB, GuardBB);
+ Builder.SetInsertPoint(PreHeaderBB);
Builder.CreateBr(HeaderBB);
- IV->addIncoming(IncrementedIV, BodyBB);
- DT.changeImmediateDominator(AfterBB, HeaderBB);
-
- Builder.SetInsertPoint(BodyBB->begin());
- AfterBlock = AfterBB;
+ // HeaderBB
+ DT.addNewBlock(HeaderBB, PreHeaderBB);
+ Builder.SetInsertPoint(HeaderBB);
+ PHINode *IV = Builder.CreatePHI(LoopIVType, 2, "polly.indvar");
+ IV->addIncoming(LB, PreHeaderBB);
+ Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType);
+ Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.indvar_next");
+ Value *LoopCondition;
+ UB = Builder.CreateSub(UB, Stride, "polly.adjust_ub");
+ LoopCondition = Builder.CreateICmp(Predicate, IV, UB);
+ LoopCondition->setName("polly.loop_cond");
+ Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB);
+ IV->addIncoming(IncrementedIV, HeaderBB);
+ DT.changeImmediateDominator(ExitBB, GuardBB);
+
+ // The loop body should be added here.
+ Builder.SetInsertPoint(HeaderBB->getFirstNonPHI());
return IV;
}
diff --git a/polly/test/Cloog/CodeGen/MemAccess/codegen_simple_md.ll b/polly/test/Cloog/CodeGen/MemAccess/codegen_simple_md.ll
index eb58575b8fb2..132f70f608b7 100644
--- a/polly/test/Cloog/CodeGen/MemAccess/codegen_simple_md.ll
+++ b/polly/test/Cloog/CodeGen/MemAccess/codegen_simple_md.ll
@@ -59,16 +59,16 @@ for.end6: ; preds = %for.cond
; WITHCONST: %p_mul_coeff = mul i64 16, [[REG1]]
; WITHCONST: %p_sum_coeff = add i64 5, %p_mul_coeff
; WITHCONST: [[REG2:%[0-9]+]] = sext i32 %{{[0-9]+}} to i64
-; WITHCONST: %p_mul_coeff6 = mul i64 2, [[REG2]]
-; WITHCONST: %p_sum_coeff7 = add i64 %p_sum_coeff, %p_mul_coeff6
-; WITHCONST: %p_newarrayidx_ = getelementptr [1040 x i32]* @A, i64 0, i64 %p_sum_coeff7
+; WITHCONST: %p_mul_coeff8 = mul i64 2, [[REG2]]
+; WITHCONST: %p_sum_coeff9 = add i64 %p_sum_coeff, %p_mul_coeff8
+; WITHCONST: %p_newarrayidx_ = getelementptr [1040 x i32]* @A, i64 0, i64 %p_sum_coeff9
; WITHCONST: store i32 100, i32* %p_newarrayidx_
; WITHOUTCONST: [[REG1:%[0-9]+]] = sext i32 %{{[0-9]+}} to i64
; WITHOUTCONST: %p_mul_coeff = mul i64 16, [[REG1]]
; WITHOUTCONST: %p_sum_coeff = add i64 0, %p_mul_coeff
; WITHOUTCONST: [[REG2:%[0-9]+]] = sext i32 %{{[0-9]+}} to i64
-; WITHOUTCONST: %p_mul_coeff6 = mul i64 2, [[REG2]]
-; WITHOUTCONST: %p_sum_coeff7 = add i64 %p_sum_coeff, %p_mul_coeff6
-; WITHOUTCONST: %p_newarrayidx_ = getelementptr [1040 x i32]* @A, i64 0, i64 %p_sum_coeff7
+; WITHOUTCONST: %p_mul_coeff8 = mul i64 2, [[REG2]]
+; WITHOUTCONST: %p_sum_coeff9 = add i64 %p_sum_coeff, %p_mul_coeff8
+; WITHOUTCONST: %p_newarrayidx_ = getelementptr [1040 x i32]* @A, i64 0, i64 %p_sum_coeff9
; WITHOUTCONST: store i32 100, i32* %p_newarrayidx_
diff --git a/polly/test/Cloog/CodeGen/OpenMP/simple_nested_loop.ll b/polly/test/Cloog/CodeGen/OpenMP/simple_nested_loop.ll
index 9753b0e92ecb..1f4d68a81c72 100644
--- a/polly/test/Cloog/CodeGen/OpenMP/simple_nested_loop.ll
+++ b/polly/test/Cloog/CodeGen/OpenMP/simple_nested_loop.ll
@@ -80,7 +80,7 @@ declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
; CHECK: %omp.userContext = alloca { i32 }
; CHECK: getelementptr inbounds { i32 }* %omp.userContext, i32 0, i32 0
-; CHECK: store i32 %polly.loopiv, i32* %1
+; CHECK: store i32 %polly.indvar, i32* %0
; CHECK: %omp_data = bitcast { i32 }* %omp.userContext to i8*
; CHECK: call void @GOMP_parallel_loop_runtime_start(void (i8*)* @loop_openmp.omp_subfn, i8* %omp_data, i32 0, i32 0, i32 10, i32 1)
; CHECK: call void @loop_openmp.omp_subfn(i8* %omp_data)
diff --git a/polly/test/Isl/single_loop_param_less_equal.ll b/polly/test/Isl/single_loop_param_less_equal.ll
index 45442b868ff1..06d32085b637 100644
--- a/polly/test/Isl/single_loop_param_less_equal.ll
+++ b/polly/test/Isl/single_loop_param_less_equal.ll
@@ -34,21 +34,26 @@ ret:
; CHECK: Stmt_loop_body(c1)
; CODEGEN: polly.start:
-; CODEGEN: br label %polly.loop_header
+; CODEGEN: br label %polly.loop_if
-; CODEGEN: polly.loop_after:
+; CODEGEN: polly.loop_exit:
; CODEGEN: br label %polly.merge_new_and_old
-; CODEGEN: polly.loop_header:
-; CODEGEN: %polly.loopiv = phi i64 [ 0, %polly.start ], [ %polly.next_loopiv, %polly.stmt.loop.body ]
-; CODEGEN: %polly.next_loopiv = add nsw i64 %polly.loopiv, 1
-; CODEGEN: %0 = icmp sle i64 %polly.loopiv, %n
-; CODEGEN: br i1 %0, label %polly.loop_body, label %polly.loop_after
+; CODEGEN: polly.loop_if:
+; CODEGEN: %polly.loop_guard = icmp sle i64 0, %n
+; CODEGEN: br i1 %polly.loop_guard, label %polly.loop_preheader, label %polly.loop_exit
-; CODEGEN: polly.loop_body:
+; CODEGEN: polly.loop_header:
+; CODEGEN: %polly.indvar = phi i64 [ 0, %polly.loop_preheader ], [ %polly.indvar_next, %polly.stmt.loop.body ]
; CODEGEN: br label %polly.stmt.loop.body
; CODEGEN: polly.stmt.loop.body:
-; CODEGEN: %p_scevgep.moved.to.loop.body = getelementptr [1024 x i32]* @A, i64 0, i64 %polly.loopiv
+; CODEGEN: %p_scevgep.moved.to.loop.body = getelementptr [1024 x i32]* @A, i64 0, i64 %polly.indvar
; CODEGEN: store i32 1, i32* %p_scevgep.moved.to.loop.body
+; CODEGEN: %polly.indvar_next = add nsw i64 %polly.indvar, 1
+; CODEGEN: %polly.adjust_ub = sub i64 %n, 1
+; CODEGEN: %polly.loop_cond = icmp sle i64 %polly.indvar, %polly.adjust_ub
+; CODEGEN: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit
+
+; CODEGEN: polly.loop_preheader:
; CODEGEN: br label %polly.loop_header
diff --git a/polly/test/Isl/single_loop_param_less_than.ll b/polly/test/Isl/single_loop_param_less_than.ll
index 25a44382963c..14970812ce7d 100644
--- a/polly/test/Isl/single_loop_param_less_than.ll
+++ b/polly/test/Isl/single_loop_param_less_than.ll
@@ -33,21 +33,26 @@ ret:
; CHECK: Stmt_loop_body(c1)
; CODEGEN: polly.start:
-; CODEGEN: br label %polly.loop_header
+; CODEGEN: br label %polly.loop_if
-; CODEGEN: polly.loop_after:
+; CODEGEN: polly.loop_exit:
; CODEGEN: br label %polly.merge_new_and_old
-; CODEGEN: polly.loop_header:
-; CODEGEN: %polly.loopiv = phi i64 [ 0, %polly.start ], [ %polly.next_loopiv, %polly.stmt.loop.body ]
-; CODEGEN: %polly.next_loopiv = add nsw i64 %polly.loopiv, 1
-; CODEGEN: %0 = icmp slt i64 %polly.loopiv, %n
-; CODEGEN: br i1 %0, label %polly.loop_body, label %polly.loop_after
+; CODEGEN: polly.loop_if:
+; CODEGEN: %polly.loop_guard = icmp slt i64 0, %n
+; CODEGEN: br i1 %polly.loop_guard, label %polly.loop_preheader, label %polly.loop_exit
-; CODEGEN: polly.loop_body:
+; CODEGEN: polly.loop_header:
+; CODEGEN: %polly.indvar = phi i64 [ 0, %polly.loop_preheader ], [ %polly.indvar_next, %polly.stmt.loop.body ]
; CODEGEN: br label %polly.stmt.loop.body
; CODEGEN: polly.stmt.loop.body:
-; CODEGEN: %p_scevgep.moved.to.loop.body = getelementptr [1024 x i32]* @A, i64 0, i64 %polly.loopiv
+; CODEGEN: %p_scevgep.moved.to.loop.body = getelementptr [1024 x i32]* @A, i64 0, i64 %polly.indvar
; CODEGEN: store i32 1, i32* %p_scevgep.moved.to.loop.body
-; CODEGEN: br label %polly.loop_header
+; CODEGEN: %polly.indvar_next = add nsw i64 %polly.indvar, 1
+; CODEGEN: %polly.adjust_ub = sub i64 %n, 1
+; CODEGEN: %polly.loop_cond = icmp slt i64 %polly.indvar, %polly.adjust_ub
+; CODEGEN: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit
+
+; CODEGEN: polly.loop_preheader:
+; CODEGENL br label %polly.loop_header