summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2014-10-20 00:24:14 +0000
committerChandler Carruth <chandlerc@gmail.com>2014-10-20 00:24:14 +0000
commit35c4e071bef11d3e6d193a1fa337788a99bbce3d (patch)
tree988f10503d9f87a2a801abb0230725e15fc52123
parentfc1c1ec435bfcd2add086f05be09195eb842d721 (diff)
downloadllvm-35c4e071bef11d3e6d193a1fa337788a99bbce3d.tar.gz
Teach the load analysis driving core instcombine logic and other bits of
logic to look through pointer casts, making them trivially stronger in the face of loads and stores with intervening pointer casts. I've included a few test cases that demonstrate the kind of folding instcombine can do without pointer casts and then variations which obfuscate the logic through bitcasts. Without this patch, the variations all fail to optimize fully. This is more important now than it has been in the past as I've started moving the load canonicialization to more closely follow the value type requirements rather than the pointer type requirements and thus this needs to be prepared for more pointer casts. When I made the same change to stores several test cases regressed without logic along these lines so I wanted to systematically improve matters first. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@220178 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Analysis/Loads.cpp64
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp3
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp11
-rw-r--r--test/Transforms/InstCombine/select.ll109
4 files changed, 164 insertions, 23 deletions
diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp
index 4838d856ae34..65a30f2816fb 100644
--- a/lib/Analysis/Loads.cpp
+++ b/lib/Analysis/Loads.cpp
@@ -86,6 +86,9 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
}
}
+ PointerType *AddrTy = cast<PointerType>(V->getType());
+ uint64_t LoadSize = DL ? DL->getTypeStoreSize(AddrTy->getElementType()) : 0;
+
// If we found a base allocated type from either an alloca or global variable,
// try to see if we are definitively within the allocated region. We need to
// know the size of the base type and the loaded type to do anything in this
@@ -96,8 +99,6 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
if (Align <= BaseAlign) {
// Check if the load is within the bounds of the underlying object.
- PointerType *AddrTy = cast<PointerType>(V->getType());
- uint64_t LoadSize = DL->getTypeStoreSize(AddrTy->getElementType());
if (ByteOffset + LoadSize <= DL->getTypeAllocSize(BaseType) &&
(Align == 0 || (ByteOffset % Align) == 0))
return true;
@@ -111,6 +112,10 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
// the load entirely).
BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin();
+ // We can at least always strip pointer casts even though we can't use the
+ // base here.
+ V = V->stripPointerCasts();
+
while (BBI != E) {
--BBI;
@@ -120,13 +125,25 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
!isa<DbgInfoIntrinsic>(BBI))
return false;
- if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
- if (AreEquivalentAddressValues(LI->getOperand(0), V))
- return true;
- } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
- if (AreEquivalentAddressValues(SI->getOperand(1), V))
- return true;
- }
+ Value *AccessedPtr;
+ if (LoadInst *LI = dyn_cast<LoadInst>(BBI))
+ AccessedPtr = LI->getPointerOperand();
+ else if (StoreInst *SI = dyn_cast<StoreInst>(BBI))
+ AccessedPtr = SI->getPointerOperand();
+ else
+ continue;
+
+ // Handle trivial cases even w/o DataLayout or other work.
+ if (AccessedPtr == V)
+ return true;
+
+ if (!DL)
+ continue;
+
+ auto *AccessedTy = cast<PointerType>(AccessedPtr->getType());
+ if (AreEquivalentAddressValues(AccessedPtr->stripPointerCasts(), V) &&
+ LoadSize <= DL->getTypeStoreSize(AccessedTy->getElementType()))
+ return true;
}
return false;
}
@@ -157,12 +174,12 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
if (MaxInstsToScan == 0)
MaxInstsToScan = ~0U;
+ Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType();
+
// If we're using alias analysis to disambiguate get the size of *Ptr.
- uint64_t AccessSize = 0;
- if (AA) {
- Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType();
- AccessSize = AA->getTypeStoreSize(AccessTy);
- }
+ uint64_t AccessSize = AA ? AA->getTypeStoreSize(AccessTy) : 0;
+
+ Value *StrippedPtr = Ptr->stripPointerCasts();
while (ScanFrom != ScanBB->begin()) {
// We must ignore debug info directives when counting (otherwise they
@@ -183,17 +200,21 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
// (This is true even if the load is volatile or atomic, although
// those cases are unlikely.)
if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
- if (AreEquivalentAddressValues(LI->getOperand(0), Ptr)) {
+ if (AreEquivalentAddressValues(
+ LI->getPointerOperand()->stripPointerCasts(), StrippedPtr) &&
+ CastInst::isBitCastable(LI->getType(), AccessTy)) {
if (AATags)
LI->getAAMetadata(*AATags);
return LI;
}
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ Value *StorePtr = SI->getPointerOperand()->stripPointerCasts();
// If this is a store through Ptr, the value is available!
// (This is true even if the store is volatile or atomic, although
// those cases are unlikely.)
- if (AreEquivalentAddressValues(SI->getOperand(1), Ptr)) {
+ if (AreEquivalentAddressValues(StorePtr, StrippedPtr) &&
+ CastInst::isBitCastable(SI->getValueOperand()->getType(), AccessTy)) {
if (AATags)
SI->getAAMetadata(*AATags);
return SI->getOperand(0);
@@ -202,15 +223,15 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
// If Ptr is an alloca and this is a store to a different alloca, ignore
// the store. This is a trivial form of alias analysis that is important
// for reg2mem'd code.
- if ((isa<AllocaInst>(Ptr) || isa<GlobalVariable>(Ptr)) &&
- (isa<AllocaInst>(SI->getOperand(1)) ||
- isa<GlobalVariable>(SI->getOperand(1))))
+ if ((isa<AllocaInst>(StrippedPtr) || isa<GlobalVariable>(StrippedPtr)) &&
+ (isa<AllocaInst>(StorePtr) || isa<GlobalVariable>(StorePtr)))
continue;
// If we have alias analysis and it says the store won't modify the loaded
// value, ignore the store.
if (AA &&
- (AA->getModRefInfo(SI, Ptr, AccessSize) & AliasAnalysis::Mod) == 0)
+ (AA->getModRefInfo(SI, StrippedPtr, AccessSize) &
+ AliasAnalysis::Mod) == 0)
continue;
// Otherwise the store that may or may not alias the pointer, bail out.
@@ -223,7 +244,8 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
// If alias analysis claims that it really won't modify the load,
// ignore it.
if (AA &&
- (AA->getModRefInfo(Inst, Ptr, AccessSize) & AliasAnalysis::Mod) == 0)
+ (AA->getModRefInfo(Inst, StrippedPtr, AccessSize) &
+ AliasAnalysis::Mod) == 0)
continue;
// May modify the pointer, bail out.
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 8e13dde08542..32ac62e74c05 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -420,7 +420,8 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
// separated by a few arithmetic operations.
BasicBlock::iterator BBI = &LI;
if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6))
- return ReplaceInstUsesWith(LI, AvailableVal);
+ return ReplaceInstUsesWith(
+ LI, Builder->CreateBitCast(AvailableVal, LI.getType()));
// load(gep null, ...) -> unreachable
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 924d4e146776..fbb5c201347e 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -901,6 +901,9 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// If the returned value is the load itself, replace with an undef. This can
// only happen in dead loops.
if (AvailableVal == LI) AvailableVal = UndefValue::get(LI->getType());
+ if (AvailableVal->getType() != LI->getType())
+ AvailableVal = CastInst::Create(CastInst::BitCast, AvailableVal,
+ LI->getType(), "", LI);
LI->replaceAllUsesWith(AvailableVal);
LI->eraseFromParent();
return true;
@@ -1031,7 +1034,13 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
assert(I != AvailablePreds.end() && I->first == P &&
"Didn't find entry for predecessor!");
- PN->addIncoming(I->second, I->first);
+ // If we have an available predecessor but it requires casting, insert the
+ // cast in the predecessor and use the cast.
+ Value *PredV = I->second;
+ if (PredV->getType() != LI->getType())
+ PredV = CastInst::Create(CastInst::BitCast, PredV, LI->getType(), "", P);
+
+ PN->addIncoming(PredV, I->first);
}
//cerr << "PRE: " << *LI << *PN << "\n";
diff --git a/test/Transforms/InstCombine/select.ll b/test/Transforms/InstCombine/select.ll
index 05f65efc0333..6cf9f0f6b4d6 100644
--- a/test/Transforms/InstCombine/select.ll
+++ b/test/Transforms/InstCombine/select.ll
@@ -1276,3 +1276,112 @@ define i32 @test77(i1 %flag, i32* %x) {
%v = load i32* %p
ret i32 %v
}
+
+define i32 @test78(i1 %flag, i32* %x, i32* %y, i32* %z) {
+; Test that we can speculate the loads around the select even when we can't
+; fold the load completely away.
+; CHECK-LABEL: @test78(
+; CHECK: %[[V1:.*]] = load i32* %x
+; CHECK-NEXT: %[[V2:.*]] = load i32* %y
+; CHECK-NEXT: %[[S:.*]] = select i1 %flag, i32 %[[V1]], i32 %[[V2]]
+; CHECK-NEXT: ret i32 %[[S]]
+entry:
+ store i32 0, i32* %x
+ store i32 0, i32* %y
+ ; Block forwarding by storing to %z which could alias either %x or %y.
+ store i32 42, i32* %z
+ %p = select i1 %flag, i32* %x, i32* %y
+ %v = load i32* %p
+ ret i32 %v
+}
+
+define float @test79(i1 %flag, float* %x, i32* %y, i32* %z) {
+; Test that we can speculate the loads around the select even when we can't
+; fold the load completely away.
+; CHECK-LABEL: @test79(
+; CHECK: %[[V1:.*]] = load float* %x
+; CHECK-NEXT: %[[V2:.*]] = load float* %y
+; CHECK-NEXT: %[[S:.*]] = select i1 %flag, float %[[V1]], float %[[V2]]
+; CHECK-NEXT: ret float %[[S]]
+entry:
+ %x1 = bitcast float* %x to i32*
+ %y1 = bitcast i32* %y to float*
+ store i32 0, i32* %x1
+ store i32 0, i32* %y
+ ; Block forwarding by storing to %z which could alias either %x or %y.
+ store i32 42, i32* %z
+ %p = select i1 %flag, float* %x, float* %y1
+ %v = load float* %p
+ ret float %v
+}
+
+define i32 @test80(i1 %flag) {
+; Test that when we speculate the loads around the select they fold throug
+; load->load folding and load->store folding.
+; CHECK-LABEL: @test80(
+; CHECK: %[[X:.*]] = alloca i32
+; CHECK-NEXT: %[[Y:.*]] = alloca i32
+; CHECK: %[[V:.*]] = load i32* %[[X]]
+; CHECK-NEXT: store i32 %[[V]], i32* %[[Y]]
+; CHECK-NEXT: ret i32 %[[V]]
+entry:
+ %x = alloca i32
+ %y = alloca i32
+ call void @scribble_on_memory(i32* %x)
+ call void @scribble_on_memory(i32* %y)
+ %tmp = load i32* %x
+ store i32 %tmp, i32* %y
+ %p = select i1 %flag, i32* %x, i32* %y
+ %v = load i32* %p
+ ret i32 %v
+}
+
+define float @test81(i1 %flag) {
+; Test that we can speculate the load around the select even though they use
+; differently typed pointers.
+; CHECK-LABEL: @test81(
+; CHECK: %[[X:.*]] = alloca i32
+; CHECK-NEXT: %[[Y:.*]] = alloca i32
+; CHECK: %[[V:.*]] = load i32* %[[X]]
+; CHECK-NEXT: store i32 %[[V]], i32* %[[Y]]
+; CHECK-NEXT: %[[C:.*]] = bitcast i32 %[[V]] to float
+; CHECK-NEXT: ret float %[[C]]
+entry:
+ %x = alloca float
+ %y = alloca i32
+ %x1 = bitcast float* %x to i32*
+ %y1 = bitcast i32* %y to float*
+ call void @scribble_on_memory(i32* %x1)
+ call void @scribble_on_memory(i32* %y)
+ %tmp = load i32* %x1
+ store i32 %tmp, i32* %y
+ %p = select i1 %flag, float* %x, float* %y1
+ %v = load float* %p
+ ret float %v
+}
+
+define i32 @test82(i1 %flag) {
+; Test that we can speculate the load around the select even though they use
+; differently typed pointers.
+; CHECK-LABEL: @test82(
+; CHECK: %[[X:.*]] = alloca float
+; CHECK-NEXT: %[[Y:.*]] = alloca i32
+; CHECK-NEXT: %[[X1:.*]] = bitcast float* %[[X]] to i32*
+; CHECK-NEXT: %[[Y1:.*]] = bitcast i32* %[[Y]] to float*
+; CHECK: %[[V:.*]] = load float* %[[X]]
+; CHECK-NEXT: store float %[[V]], float* %[[Y1]]
+; CHECK-NEXT: %[[C:.*]] = bitcast float %[[V]] to i32
+; CHECK-NEXT: ret i32 %[[C]]
+entry:
+ %x = alloca float
+ %y = alloca i32
+ %x1 = bitcast float* %x to i32*
+ %y1 = bitcast i32* %y to float*
+ call void @scribble_on_memory(i32* %x1)
+ call void @scribble_on_memory(i32* %y)
+ %tmp = load float* %x
+ store float %tmp, float* %y1
+ %p = select i1 %flag, i32* %x1, i32* %y
+ %v = load i32* %p
+ ret i32 %v
+}