summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEli Friedman <efriedma@quicinc.com>2022-05-31 16:51:03 -0700
committerTom Stellard <tstellar@redhat.com>2022-06-07 18:24:40 -0700
commitb75bf750fdc287821fca697d103053f62a4b1d77 (patch)
tree3f7e4b5fece219e6416574649acc1b7805cf314c
parent2e857fe6e390ab992fadd59c7d9b1192047c935a (diff)
downloadllvm-b75bf750fdc287821fca697d103053f62a4b1d77.tar.gz
[LoopIdiom] Fix bailout for aliasing in memcpy transform.
Commit dd5991cc modified the aliasing checks here to allow transforming a memcpy where the source and destination point into the same object. However, the change accidentally made the code skip the alias check for other operations in the loop. Instead of completely skipping the alias check, just skip the check for whether the memcpy aliases itself. Differential Revision: https://reviews.llvm.org/D126486 (cherry picked from commit abdf0da8009f272f6c3d6398cf63f9f0a8257637)
-rw-r--r--llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp27
-rw-r--r--llvm/test/Transforms/LoopIdiom/basic.ll43
2 files changed, 53 insertions, 17 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 318c4c06f0f7..aa9b1a5010da 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1420,26 +1420,19 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
// If the store is a memcpy instruction, we must check if it will write to
// the load memory locations. So remove it from the ignored stores.
- if (IsMemCpy)
- IgnoredInsts.erase(TheStore);
MemmoveVerifier Verifier(*LoadBasePtr, *StoreBasePtr, *DL);
+ if (IsMemCpy && !Verifier.IsSameObject)
+ IgnoredInsts.erase(TheStore);
if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
StoreSizeSCEV, *AA, IgnoredInsts)) {
- if (!IsMemCpy) {
- ORE.emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad",
- TheLoad)
- << ore::NV("Inst", InstRemark) << " in "
- << ore::NV("Function", TheStore->getFunction())
- << " function will not be hoisted: "
- << ore::NV("Reason", "The loop may access load location");
- });
- return Changed;
- }
- // At this point loop may access load only for memcpy in same underlying
- // object. If that's not the case bail out.
- if (!Verifier.IsSameObject)
- return Changed;
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad", TheLoad)
+ << ore::NV("Inst", InstRemark) << " in "
+ << ore::NV("Function", TheStore->getFunction())
+ << " function will not be hoisted: "
+ << ore::NV("Reason", "The loop may access load location");
+ });
+ return Changed;
}
bool UseMemMove = IsMemCpy ? Verifier.IsSameObject : LoopAccessStore;
diff --git a/llvm/test/Transforms/LoopIdiom/basic.ll b/llvm/test/Transforms/LoopIdiom/basic.ll
index 0bf4ad4f6495..e30d20a22299 100644
--- a/llvm/test/Transforms/LoopIdiom/basic.ll
+++ b/llvm/test/Transforms/LoopIdiom/basic.ll
@@ -1530,6 +1530,49 @@ for.body: ; preds = %entry, %for.body
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
+; Do not form memmove when there's an aliasing operation, even
+; if the memcpy source and destination are in the same object.
+define void @do_not_form_memmove8(i64* %p) {
+; CHECK-LABEL: @do_not_form_memmove8(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i64 1000
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+; CHECK: loop:
+; CHECK-NEXT: [[X4:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[X13:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[X5:%.*]] = zext i32 [[X4]] to i64
+; CHECK-NEXT: [[X7:%.*]] = getelementptr inbounds i64, i64* [[P2]], i64 [[X5]]
+; CHECK-NEXT: [[X8:%.*]] = bitcast i64* [[X7]] to i8*
+; CHECK-NEXT: store i64 1, i64* [[X7]], align 4
+; CHECK-NEXT: [[X11:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 [[X5]]
+; CHECK-NEXT: [[X12:%.*]] = bitcast i64* [[X11]] to i8*
+; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[X12]], i8* [[X8]], i64 8, i1 false)
+; CHECK-NEXT: [[X13]] = add i32 [[X4]], 1
+; CHECK-NEXT: [[X14:%.*]] = icmp eq i32 [[X13]], 44
+; CHECK-NEXT: br i1 [[X14]], label [[EXIT:%.*]], label [[LOOP]]
+;
+entry:
+ %p2 = getelementptr inbounds i64, i64* %p, i64 1000
+ br label %loop
+
+exit:
+ ret void
+
+loop:
+ %x4 = phi i32 [ 0, %entry ], [ %x13, %loop ]
+ %x5 = zext i32 %x4 to i64
+ %x7 = getelementptr inbounds i64, i64* %p2, i64 %x5
+ %x8 = bitcast i64* %x7 to i8*
+ store i64 1, i64* %x7, align 4
+ %x11 = getelementptr inbounds i64, i64* %p, i64 %x5
+ %x12 = bitcast i64* %x11 to i8*
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %x12, i8* %x8, i64 8, i1 false)
+ %x13 = add i32 %x4, 1
+ %x14 = icmp eq i32 %x13, 44
+ br i1 %x14, label %exit, label %loop
+}
+
;; Memcpy formation is still preferred over memmove.
define void @prefer_memcpy_over_memmove(i8* noalias %Src, i8* noalias %Dest, i64 %Size) {
; CHECK-LABEL: @prefer_memcpy_over_memmove(