diff options
-rw-r--r-- | lib/CodeGen/MachineLICM.cpp | 32 | ||||
-rw-r--r-- | test/CodeGen/X86/2012-03-26-PostRALICMBug.ll | 59 |
2 files changed, 87 insertions, 4 deletions
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 428a9d9bde52..dfc622d0eda8 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -478,6 +478,10 @@ void MachineLICM::ProcessMI(MachineInstr *MI, /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop /// invariants out to the preheader. void MachineLICM::HoistRegionPostRA() { + MachineBasicBlock *Preheader = getCurPreheader(); + if (!Preheader) + return; + unsigned NumRegs = TRI->getNumRegs(); BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop. BitVector PhysRegClobbers(NumRegs); // Regs defined more than once. @@ -514,25 +518,46 @@ void MachineLICM::HoistRegionPostRA() { } } + // Gather the registers read / clobbered by the terminator. + BitVector TermRegs(NumRegs); + MachineBasicBlock::iterator TI = Preheader->getFirstTerminator(); + if (TI != Preheader->end()) { + for (unsigned i = 0, e = TI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = TI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS) + TermRegs.set(*AS); + } + } + // Now evaluate whether the potential candidates qualify. // 1. Check if the candidate defined register is defined by another // instruction in the loop. // 2. If the candidate is a load from stack slot (always true for now), // check if the slot is stored anywhere in the loop. + // 3. Make sure candidate def should not clobber + // registers read by the terminator. Similarly its def should not be + // clobbered by the terminator. for (unsigned i = 0, e = Candidates.size(); i != e; ++i) { if (Candidates[i].FI != INT_MIN && StoredFIs.count(Candidates[i].FI)) continue; - if (!PhysRegClobbers.test(Candidates[i].Def)) { + unsigned Def = Candidates[i].Def; + if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) { bool Safe = true; MachineInstr *MI = Candidates[i].MI; for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { const MachineOperand &MO = MI->getOperand(j); if (!MO.isReg() || MO.isDef() || !MO.getReg()) continue; - if (PhysRegDefs.test(MO.getReg()) || - PhysRegClobbers.test(MO.getReg())) { + unsigned Reg = MO.getReg(); + if (PhysRegDefs.test(Reg) || + PhysRegClobbers.test(Reg)) { // If it's using a non-loop-invariant register, then it's obviously // not safe to hoist. Safe = false; @@ -571,7 +596,6 @@ void MachineLICM::AddToLiveIns(unsigned Reg) { /// dirty work. void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { MachineBasicBlock *Preheader = getCurPreheader(); - if (!Preheader) return; // Now move the instructions to the predecessor, inserting it before any // terminator instructions. diff --git a/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll b/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll new file mode 100644 index 000000000000..101eccabbd49 --- /dev/null +++ b/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll @@ -0,0 +1,59 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -stats |& \ +; RUN: not grep {Number of machine instructions hoisted out of loops post regalloc} + +; rdar://11095580 + +%struct.ref_s = type { %union.color_sample, i16, i16 } +%union.color_sample = type { i64 } + +@table = external global [3891 x i64] + +declare i32 @foo() + +define i32 @zarray(%struct.ref_s* nocapture %op) nounwind ssp { +entry: + %call = tail call i32 @foo() + %tmp = ashr i32 %call, 31 + %0 = and i32 %tmp, 1396 + %index9 = add i32 %0, 2397 + indirectbr i8* undef, [label %return, label %if.end] + +if.end: ; preds = %entry + %size5 = getelementptr inbounds %struct.ref_s* %op, i64 0, i32 2 + %tmp6 = load i16* %size5, align 2 + %tobool1 = icmp eq i16 %tmp6, 0 + %1 = select i1 %tobool1, i32 1396, i32 -1910 + %index10 = add i32 %index9, %1 + indirectbr i8* undef, [label %return, label %while.body.lr.ph] + +while.body.lr.ph: ; preds = %if.end + %refs = bitcast %struct.ref_s* %op to %struct.ref_s** + %tmp9 = load %struct.ref_s** %refs, align 8 + %tmp4 = zext i16 %tmp6 to i64 + %index13 = add i32 %index10, 1658 + %2 = sext i32 %index13 to i64 + %3 = getelementptr [3891 x i64]* @table, i64 0, i64 %2 + %blockaddress14 = load i64* %3, align 8 + %4 = inttoptr i64 %blockaddress14 to i8* + indirectbr i8* %4, [label %while.body] + +while.body: ; preds = %while.body, %while.body.lr.ph + %index7 = phi i32 [ %index15, %while.body ], [ %index13, %while.body.lr.ph ] + %indvar = phi i64 [ %indvar.next, %while.body ], [ 0, %while.body.lr.ph ] + %type_attrs = getelementptr %struct.ref_s* %tmp9, i64 %indvar, i32 1 + store i16 32, i16* %type_attrs, align 2 + %indvar.next = add i64 %indvar, 1 + %exitcond5 = icmp eq i64 %indvar.next, %tmp4 + %tmp7 = select i1 %exitcond5, i32 1648, i32 0 + %index15 = add i32 %index7, %tmp7 + %tmp8 = select i1 %exitcond5, i64 13, i64 0 + %5 = sext i32 %index15 to i64 + %6 = getelementptr [3891 x i64]* @table, i64 0, i64 %5 + %blockaddress16 = load i64* %6, align 8 + %7 = inttoptr i64 %blockaddress16 to i8* + indirectbr i8* %7, [label %return, label %while.body] + +return: ; preds = %while.body, %if.end, %entry + %retval.0 = phi i32 [ %call, %entry ], [ 0, %if.end ], [ 0, %while.body ] + ret i32 %retval.0 +} |