diff options
author | Alexey Bataev <a.bataev@outlook.com> | 2023-05-17 05:41:34 -0700 |
---|---|---|
committer | Alexey Bataev <a.bataev@outlook.com> | 2023-05-17 06:32:22 -0700 |
commit | 9a7248f56164b44b07df421384c12541a91e6d84 (patch) | |
tree | 2311c183de06cd0f09e43f396d20d0252a79a505 | |
parent | af3c7241df9e274d9944f41e8c174b7580c656dd (diff) | |
download | llvm-9a7248f56164b44b07df421384c12541a91e6d84.tar.gz |
[SLP]Fix crash for scalarized vectors.
Need to remove insertion of the nodes to the InVector in case of
scalarized vectors too to avoid compiler crashes.
-rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 4 | ||||
-rw-r--r-- | llvm/test/Transforms/SLPVectorizer/NVPTX/buildvector-scalarized.ll | 37 |
2 files changed, 38 insertions, 3 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 1fceae5f2610..fd4fee09c9e0 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7028,10 +7028,8 @@ public: auto *VecTy = FixedVectorType::get(VL.front()->getType(), VL.size()); // If the resulting type is scalarized, do not adjust the cost. unsigned VecNumParts = TTI.getNumberOfParts(VecTy); - if (VecNumParts == VecTy->getNumElements()) { - InVectors.assign(1, E); + if (VecNumParts == VecTy->getNumElements()) return nullptr; - } DenseMap<Value *, int> ExtractVectorsTys; for (auto [I, V] : enumerate(VL)) { // Ignore non-extractelement scalars. diff --git a/llvm/test/Transforms/SLPVectorizer/NVPTX/buildvector-scalarized.ll b/llvm/test/Transforms/SLPVectorizer/NVPTX/buildvector-scalarized.ll new file mode 100644 index 000000000000..e1c3d9affd18 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/NVPTX/buildvector-scalarized.ll @@ -0,0 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -passes=slp-vectorizer -S -mtriple=nvptx64-unknown-unknown < %s | FileCheck %s + +define <2 x float> @baz() { +; CHECK-LABEL: define <2 x float> @baz() { +; CHECK-NEXT: bb: +; CHECK-NEXT: [[EXTRACTELEMENT:%.*]] = extractelement <2 x float> zeroinitializer, i64 0 +; CHECK-NEXT: [[FCMP:%.*]] = fcmp uno float [[EXTRACTELEMENT]], 0.000000e+00 +; CHECK-NEXT: [[FCMP1:%.*]] = fcmp uno float 0.000000e+00, 0.000000e+00 +; CHECK-NEXT: [[OR:%.*]] = or i1 [[FCMP]], [[FCMP1]] +; CHECK-NEXT: [[FCMP2:%.*]] = fcmp oeq float 0.000000e+00, 0.000000e+00 +; CHECK-NEXT: [[OR3:%.*]] = or i1 [[FCMP2]], [[OR]] +; CHECK-NEXT: [[FCMP4:%.*]] = fcmp oeq float 0.000000e+00, 0.000000e+00 +; CHECK-NEXT: [[OR5:%.*]] = or i1 [[FCMP4]], [[OR3]] +; CHECK-NEXT: br i1 [[OR5]], label [[BB6:%.*]], label [[BB7:%.*]] +; CHECK: bb6: +; CHECK-NEXT: ret <2 x float> zeroinitializer +; CHECK: bb7: +; CHECK-NEXT: ret <2 x float> zeroinitializer +; +bb: + %extractelement = extractelement <2 x float> zeroinitializer, i64 0 + %fcmp = fcmp uno float %extractelement, 0.000000e+00 + %fcmp1 = fcmp uno float 0.000000e+00, 0.000000e+00 + %or = or i1 %fcmp, %fcmp1 + %fcmp2 = fcmp oeq float 0.000000e+00, 0.000000e+00 + %or3 = or i1 %fcmp2, %or + %fcmp4 = fcmp oeq float 0.000000e+00, 0.000000e+00 + %or5 = or i1 %fcmp4, %or3 + br i1 %or5, label %bb6, label %bb7 + +bb6: ; preds = %bb + ret <2 x float> zeroinitializer + +bb7: ; preds = %bb + ret <2 x float> zeroinitializer +} |