diff options
author | Jameson Nash <vtjnash@gmail.com> | 2021-07-22 19:42:23 -0400 |
---|---|---|
committer | Jameson Nash <vtjnash@gmail.com> | 2021-09-27 14:06:13 -0400 |
commit | e27a6db5298f6ba3c1dbc8bab25c769cfa761b2a (patch) | |
tree | 53aa42202570673525c99371ddad015b2eb932c5 | |
parent | b2a2c38349a18b89b04d342632d5ea02f86dfdd6 (diff) | |
download | llvm-e27a6db5298f6ba3c1dbc8bab25c769cfa761b2a.tar.gz |
Bad SLPVectorization shufflevector replacement, resulting in write to wrong memory location
We see that it might otherwise do:
%10 = getelementptr {}**, <2 x {}***> %9, <2 x i32> <i32 10, i32 4>
%11 = bitcast <2 x {}***> %10 to <2 x i64*>
...
%27 = extractelement <2 x i64*> %11, i32 0
%28 = bitcast i64* %27 to <2 x i64>*
store <2 x i64> %22, <2 x i64>* %28, align 4, !tbaa !2
Which is an out-of-bounds store (the extractelement got offset 10
instead of offset 4 as intended). With the fix, we correctly generate
extractelement for i32 1 and generate correct code.
Differential Revision: https://reviews.llvm.org/D106613
-rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 25 | ||||
-rw-r--r-- | llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll | 3 |
2 files changed, 20 insertions, 8 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 113d19100465..695224a7e1e2 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6035,8 +6035,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { // The pointer operand uses an in-tree scalar so we add the new BitCast // to ExternalUses list to make sure that an extract will be generated // in the future. - if (getTreeEntry(PO)) - ExternalUses.emplace_back(PO, cast<User>(VecPtr), 0); + if (TreeEntry *Entry = getTreeEntry(PO)) { + // Find which lane we need to extract. + unsigned FoundLane = Entry->findLaneForValue(PO); + ExternalUses.emplace_back(PO, cast<User>(VecPtr), FoundLane); + } NewLI = Builder.CreateAlignedLoad(VecTy, VecPtr, LI->getAlign()); } else { @@ -6077,8 +6080,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { // The pointer operand uses an in-tree scalar, so add the new BitCast to // ExternalUses to make sure that an extract will be generated in the // future. - if (getTreeEntry(ScalarPtr)) - ExternalUses.push_back(ExternalUser(ScalarPtr, cast<User>(VecPtr), 0)); + if (TreeEntry *Entry = getTreeEntry(ScalarPtr)) { + // Find which lane we need to extract. + unsigned FoundLane = Entry->findLaneForValue(ScalarPtr); + ExternalUses.push_back( + ExternalUser(ScalarPtr, cast<User>(VecPtr), FoundLane)); + } Value *V = propagateMetadata(ST, E->Scalars); @@ -6181,8 +6188,14 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { // The scalar argument uses an in-tree scalar so we add the new vectorized // call to ExternalUses list to make sure that an extract will be // generated in the future. - if (ScalarArg && getTreeEntry(ScalarArg)) - ExternalUses.push_back(ExternalUser(ScalarArg, cast<User>(V), 0)); + if (ScalarArg) { + if (TreeEntry *Entry = getTreeEntry(ScalarArg)) { + // Find which lane we need to extract. + unsigned FoundLane = Entry->findLaneForValue(ScalarArg); + ExternalUses.push_back( + ExternalUser(ScalarArg, cast<User>(V), FoundLane)); + } + } propagateIRFlags(V, E->Scalars, VL0); ShuffleBuilder.addInversedMask(E->ReorderIndices); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll index 2348b5ae56fb..cbf6d8ff4fa9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll @@ -108,10 +108,9 @@ define void @externally_used_ptrs() { ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, <2 x i64*> [[TMP2]], <2 x i64> <i64 56, i64 11> ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint <2 x i64*> [[TMP3]] to <2 x i64> ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[TMP0]], i64 12 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64*> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64*> [[TMP3]], i32 1 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>* ; CHECK-NEXT: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[TMP6]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64*> [[TMP3]], i32 1 ; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i64> [[TMP4]], [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>* ; CHECK-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[TMP10]], align 8 |