summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@outlook.com>2023-05-17 05:41:34 -0700
committerAlexey Bataev <a.bataev@outlook.com>2023-05-17 06:32:22 -0700
commit9a7248f56164b44b07df421384c12541a91e6d84 (patch)
tree2311c183de06cd0f09e43f396d20d0252a79a505
parentaf3c7241df9e274d9944f41e8c174b7580c656dd (diff)
downloadllvm-9a7248f56164b44b07df421384c12541a91e6d84.tar.gz
[SLP]Fix crash for scalarized vectors.
Need to remove insertion of the nodes to the InVector in case of scalarized vectors too to avoid compiler crashes.
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp4
-rw-r--r--llvm/test/Transforms/SLPVectorizer/NVPTX/buildvector-scalarized.ll37
2 files changed, 38 insertions, 3 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 1fceae5f2610..fd4fee09c9e0 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7028,10 +7028,8 @@ public:
auto *VecTy = FixedVectorType::get(VL.front()->getType(), VL.size());
// If the resulting type is scalarized, do not adjust the cost.
unsigned VecNumParts = TTI.getNumberOfParts(VecTy);
- if (VecNumParts == VecTy->getNumElements()) {
- InVectors.assign(1, E);
+ if (VecNumParts == VecTy->getNumElements())
return nullptr;
- }
DenseMap<Value *, int> ExtractVectorsTys;
for (auto [I, V] : enumerate(VL)) {
// Ignore non-extractelement scalars.
diff --git a/llvm/test/Transforms/SLPVectorizer/NVPTX/buildvector-scalarized.ll b/llvm/test/Transforms/SLPVectorizer/NVPTX/buildvector-scalarized.ll
new file mode 100644
index 000000000000..e1c3d9affd18
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/NVPTX/buildvector-scalarized.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -passes=slp-vectorizer -S -mtriple=nvptx64-unknown-unknown < %s | FileCheck %s
+
+define <2 x float> @baz() {
+; CHECK-LABEL: define <2 x float> @baz() {
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[EXTRACTELEMENT:%.*]] = extractelement <2 x float> zeroinitializer, i64 0
+; CHECK-NEXT: [[FCMP:%.*]] = fcmp uno float [[EXTRACTELEMENT]], 0.000000e+00
+; CHECK-NEXT: [[FCMP1:%.*]] = fcmp uno float 0.000000e+00, 0.000000e+00
+; CHECK-NEXT: [[OR:%.*]] = or i1 [[FCMP]], [[FCMP1]]
+; CHECK-NEXT: [[FCMP2:%.*]] = fcmp oeq float 0.000000e+00, 0.000000e+00
+; CHECK-NEXT: [[OR3:%.*]] = or i1 [[FCMP2]], [[OR]]
+; CHECK-NEXT: [[FCMP4:%.*]] = fcmp oeq float 0.000000e+00, 0.000000e+00
+; CHECK-NEXT: [[OR5:%.*]] = or i1 [[FCMP4]], [[OR3]]
+; CHECK-NEXT: br i1 [[OR5]], label [[BB6:%.*]], label [[BB7:%.*]]
+; CHECK: bb6:
+; CHECK-NEXT: ret <2 x float> zeroinitializer
+; CHECK: bb7:
+; CHECK-NEXT: ret <2 x float> zeroinitializer
+;
+bb:
+ %extractelement = extractelement <2 x float> zeroinitializer, i64 0
+ %fcmp = fcmp uno float %extractelement, 0.000000e+00
+ %fcmp1 = fcmp uno float 0.000000e+00, 0.000000e+00
+ %or = or i1 %fcmp, %fcmp1
+ %fcmp2 = fcmp oeq float 0.000000e+00, 0.000000e+00
+ %or3 = or i1 %fcmp2, %or
+ %fcmp4 = fcmp oeq float 0.000000e+00, 0.000000e+00
+ %or5 = or i1 %fcmp4, %or3
+ br i1 %or5, label %bb6, label %bb7
+
+bb6: ; preds = %bb
+ ret <2 x float> zeroinitializer
+
+bb7: ; preds = %bb
+ ret <2 x float> zeroinitializer
+}