[InstCombine] canonicalizeSaturatedAdd(): last fold is only valid for strict comparison (PR48390)

We could create uadd.sat under incorrect circumstances if a select with -1 as the false value was canonicalized by swapping the T/F values. Unlike the other transforms in the same function, it is not invariant to equality. Some alive proofs: https://alive2.llvm.org/ce/z/emmKKL Based on original patch by David Green! Fixes https://bugs.llvm.org/show_bug.cgi?id=48390 Differential Revision: https://reviews.llvm.org/D92717 (cherry picked from commit e6f2a79d7aa01f8dd7f0194f97a50b480e8ede71)
author: Roman Lebedev <lebedev.ri@gmail.com> 2020-12-09 18:04:08 +0300
committer: Tom Stellard <tstellar@redhat.com> 2020-12-14 17:08:57 -0500
commit: 8511a8df838f8b2766e56c22af4992d9862835fc (patch)
tree: 5d941683ce0aa7be1399925d5d488b630ed788a6
parent: 934376da585156885eca2f38f021367a4981d981 (diff)
download: llvm-8511a8df838f8b2766e56c22af4992d9862835fc.tar.gz
2 files changed, 27 insertions, 17 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index fa695c39cd1e..1e43014e7d32 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -782,25 +782,24 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
 
   // Match unsigned saturated add of 2 variables with an unnecessary 'not'.
   // There are 8 commuted variants.
-  // Canonicalize -1 (saturated result) to true value of the select. Just
-  // swapping the compare operands is legal, because the selected value is the
-  // same in case of equality, so we can interchange u< and u<=.
+  // Canonicalize -1 (saturated result) to true value of the select.
   if (match(FVal, m_AllOnes())) {
     std::swap(TVal, FVal);
-    std::swap(Cmp0, Cmp1);
+    Pred = CmpInst::getInversePredicate(Pred);
   }
   if (!match(TVal, m_AllOnes()))
     return nullptr;
 
-  // Canonicalize predicate to 'ULT'.
-  if (Pred == ICmpInst::ICMP_UGT) {
-    Pred = ICmpInst::ICMP_ULT;
+  // Canonicalize predicate to less-than or less-or-equal-than.
+  if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) {
     std::swap(Cmp0, Cmp1);
+    Pred = CmpInst::getSwappedPredicate(Pred);
   }
-  if (Pred != ICmpInst::ICMP_ULT)
+  if (Pred != ICmpInst::ICMP_ULT && Pred != ICmpInst::ICMP_ULE)
     return nullptr;
 
   // Match unsigned saturated add of 2 variables with an unnecessary 'not'.
+  // Strictness of the comparison is irrelevant.
   Value *Y;
   if (match(Cmp0, m_Not(m_Value(X))) &&
       match(FVal, m_c_Add(m_Specific(X), m_Value(Y))) && Y == Cmp1) {
@@ -809,6 +808,7 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
     return Builder.CreateBinaryIntrinsic(Intrinsic::uadd_sat, X, Y);
   }
   // The 'not' op may be included in the sum but not the compare.
+  // Strictness of the comparison is irrelevant.
   X = Cmp0;
   Y = Cmp1;
   if (match(FVal, m_c_Add(m_Not(m_Specific(X)), m_Specific(Y)))) {
@@ -819,7 +819,9 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
         Intrinsic::uadd_sat, BO->getOperand(0), BO->getOperand(1));
   }
   // The overflow may be detected via the add wrapping round.
-  if (match(Cmp0, m_c_Add(m_Specific(Cmp1), m_Value(Y))) &&
+  // This is only valid for strict comparison!
+  if (Pred == ICmpInst::ICMP_ULT &&
+      match(Cmp0, m_c_Add(m_Specific(Cmp1), m_Value(Y))) &&
       match(FVal, m_c_Add(m_Specific(Cmp1), m_Specific(Y)))) {
     // ((X + Y) u< X) ? -1 : (X + Y) --> uadd.sat(X, Y)
     // ((X + Y) u< Y) ? -1 : (X + Y) --> uadd.sat(X, Y)
diff --git a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
index 3edafa17880b..ca45eb5bf429 100644
--- a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
+++ b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
@@ -1804,8 +1804,10 @@ define i32 @uadd_sat_via_add(i32 %x, i32 %y) {
 
 define i32 @uadd_sat_via_add_nonstrict(i32 %x, i32 %y) {
 ; CHECK-LABEL: @uadd_sat_via_add_nonstrict(
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[Y:%.*]], i32 [[X:%.*]])
-; CHECK-NEXT:    ret i32 [[TMP1]]
+; CHECK-NEXT:    [[A:%.*]] = add i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[C_NOT:%.*]] = icmp ugt i32 [[A]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[C_NOT]], i32 [[A]], i32 -1
+; CHECK-NEXT:    ret i32 [[R]]
 ;
   %a = add i32 %x, %y
   %c = icmp ule i32 %a, %y
@@ -1826,8 +1828,10 @@ define i32 @uadd_sat_via_add_swapped_select(i32 %x, i32 %y) {
 
 define i32 @uadd_sat_via_add_swapped_select_strict(i32 %x, i32 %y) {
 ; CHECK-LABEL: @uadd_sat_via_add_swapped_select_strict(
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[Y:%.*]], i32 [[X:%.*]])
-; CHECK-NEXT:    ret i32 [[TMP1]]
+; CHECK-NEXT:    [[A:%.*]] = add i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 [[A]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 -1
+; CHECK-NEXT:    ret i32 [[R]]
 ;
   %a = add i32 %x, %y
   %c = icmp ugt i32 %a, %y
@@ -1848,8 +1852,10 @@ define i32 @uadd_sat_via_add_swapped_cmp(i32 %x, i32 %y) {
 
 define i32 @uadd_sat_via_add_swapped_cmp_nonstrict(i32 %x, i32 %y) {
 ; CHECK-LABEL: @uadd_sat_via_add_swapped_cmp_nonstrict(
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[Y:%.*]], i32 [[X:%.*]])
-; CHECK-NEXT:    ret i32 [[TMP1]]
+; CHECK-NEXT:    [[A:%.*]] = add i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[C_NOT:%.*]] = icmp ugt i32 [[A]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[C_NOT]], i32 [[A]], i32 -1
+; CHECK-NEXT:    ret i32 [[R]]
 ;
   %a = add i32 %x, %y
   %c = icmp uge i32 %y, %a
@@ -1870,8 +1876,10 @@ define i32 @uadd_sat_via_add_swapped_cmp_nonstric(i32 %x, i32 %y) {
 
 define i32 @uadd_sat_via_add_swapped_cmp_select_nonstrict(i32 %x, i32 %y) {
 ; CHECK-LABEL: @uadd_sat_via_add_swapped_cmp_select_nonstrict(
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[Y:%.*]], i32 [[X:%.*]])
-; CHECK-NEXT:    ret i32 [[TMP1]]
+; CHECK-NEXT:    [[A:%.*]] = add i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 [[A]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 -1
+; CHECK-NEXT:    ret i32 [[R]]
 ;
   %a = add i32 %x, %y
   %c = icmp ult i32 %y, %a
author	Roman Lebedev <lebedev.ri@gmail.com>	2020-12-09 18:04:08 +0300
committer	Tom Stellard <tstellar@redhat.com>	2020-12-14 17:08:57 -0500
commit	8511a8df838f8b2766e56c22af4992d9862835fc (patch)
tree	5d941683ce0aa7be1399925d5d488b630ed788a6
parent	934376da585156885eca2f38f021367a4981d981 (diff)
download	llvm-8511a8df838f8b2766e56c22af4992d9862835fc.tar.gz