summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Long <steplong@quicinc.com>2022-03-30 16:51:37 -0700
committerTom Stellard <tstellar@redhat.com>2022-06-08 11:34:36 -0700
commitd350783a0520d09eb61e2eca1cd61c9cdac00908 (patch)
tree2723dd3f568d7f4cb264798f951da943d738e48a
parent198626ad43fd1e5425fcd57c764057b1979431c6 (diff)
downloadllvm-d350783a0520d09eb61e2eca1cd61c9cdac00908.tar.gz
[LoopIdiom] Merge TBAA of adjacent stores when creating memset
Factor in the TBAA of adjacent stores instead of just the head store when merging stores into a memset. We were seeing GVN remove a load that had a TBAA that matched the 2nd store because GVN determined it didn't match the TBAA of the memset. The memset had the TBAA of only the first store. i.e. Loading the field pi_ of shared_count after memset to create an array of shared_ptr template<class T> class shared_ptr { T *p; shared_count refcount; }; class shared_count { sp_counted_base *pi_; }; Differential Revision: https://reviews.llvm.org/D122205 (cherry picked from commit e02f4976acbf086904bf3903348603cc0f04e6ad)
-rw-r--r--llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp2
-rw-r--r--llvm/test/Transforms/LoopIdiom/memset-tbaa.ll47
2 files changed, 49 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index aa9b1a5010da..2635d0a213ff 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1173,6 +1173,8 @@ bool LoopIdiomRecognize::processLoopStridedStore(
CallInst *NewCall;
if (SplatValue) {
AAMDNodes AATags = TheStore->getAAMetadata();
+ for (Instruction *Store : Stores)
+ AATags = AATags.merge(Store->getAAMetadata());
if (auto CI = dyn_cast<ConstantInt>(NumBytes))
AATags = AATags.extendTo(CI->getZExtValue());
else
diff --git a/llvm/test/Transforms/LoopIdiom/memset-tbaa.ll b/llvm/test/Transforms/LoopIdiom/memset-tbaa.ll
index 096056fa3be3..bc031413dc63 100644
--- a/llvm/test/Transforms/LoopIdiom/memset-tbaa.ll
+++ b/llvm/test/Transforms/LoopIdiom/memset-tbaa.ll
@@ -91,6 +91,45 @@ for.body:
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
}
+%struct.A = type { i32*, %struct.B }
+%struct.B = type { i32* }
+
+define dso_local void @adjacent_store_memset(%struct.A* nocapture %a, i64 %len) {
+; CHECK-LABEL: @adjacent_store_memset(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A1:%.*]] = bitcast %struct.A* [[A:%.*]] to i8*
+; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 %len, i64 1)
+; CHECK-NEXT: [[LEN:%.*]] = shl nuw i64 [[UMAX]], 4
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[A1]], i8 0, i64 [[LEN]], i1 false), !tbaa [[TBAA9:![0-9]+]]
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.cond.cleanup:
+; CHECK-NEXT: ret void
+; CHECK: for.body:
+; CHECK-NEXT: [[I_09:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, %entry ]
+; CHECK-NEXT: %p = getelementptr inbounds %struct.A, %struct.A* [[A]], i64 [[I_09]], i32 0
+; CHECK-NEXT: %p2 = getelementptr inbounds %struct.A, %struct.A* [[A]], i64 [[I_09]], i32 1, i32 0
+; CHECK-NEXT: [[INC]] = add i64 [[I_09]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp ult i64 [[INC]], %len
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
+;
+entry:
+ br label %for.body
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %i.09 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+ %p = getelementptr inbounds %struct.A, %struct.A* %a, i64 %i.09, i32 0
+ store i32* null, i32** %p, align 8, !tbaa !18
+ %p2 = getelementptr inbounds %struct.A, %struct.A* %a, i64 %i.09, i32 1, i32 0
+ store i32* null, i32** %p2, align 8, !tbaa !21
+ %inc = add i64 %i.09, 1
+ %cmp = icmp ult i64 %inc, %len
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+}
+
+
; CHECK: [[TBAA0]] = !{[[TBAA1:.+]], [[TBAA1]], i64 0}
; CHECK: [[TBAA1]] = !{!"double", [[TBAA2:.+]], i64 0}
; CHECK: [[TBAA2]] = !{!"omnipotent char", [[TBAA3:.+]], i64 0}
@@ -99,6 +138,8 @@ for.body:
; CHECK: [[TBAA5]] = !{[[TBAA7:.+]], i64 32, !"_ZTS1A", [[TBAA6]], i64 0, i64 8, [[TBAA6]], i64 8, i64 8, [[TBAA6]], i64 16, i64 8, [[TBAA6]], i64 24, i64 8}
; CHECK: [[TBAA7]] = !{[[TBAA3]], i64 0, !"omnipotent char"}
; CHECK: [[TBAA6]] = !{[[TBAA7]], i64 8, !"double"}
+; CHECK: [[TBAA9]] = !{[[TBAA10:.+]], [[TBAA10]], i64 0}
+; CHECK: [[TBAA10]] = !{!"any pointer", [[TBAA2]], i64 0}
!5 = !{!6, !6, i64 0}
!6 = !{!"double", !7, i64 0}
@@ -109,3 +150,9 @@ for.body:
!17 = !{!15, i64 8, !"double"}
!9 = !{!15, i64 32, !"_ZTS1A", !17, i64 0, i64 8, !17, i64 8, i64 8, !17, i64 16, i64 8, !17, i64 24, i64 8}
!10 = !{!9, !17, i64 0, i64 1}
+
+!18 = !{!19, !20, i64 0}
+!19 = !{!"A", !20, i64 0, !22, i64 8}
+!20 = !{!"any pointer", !7, i64 0}
+!21 = !{!22, !20, i64 0}
+!22 = !{!"B", !20, i64 0}