summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@sifive.com>2022-01-09 17:28:04 -0800
committerCraig Topper <craig.topper@sifive.com>2022-01-09 17:48:05 -0800
commita500f7f48fdb64def09cb3b7487759b0972f2347 (patch)
tree438e4b28ba1df5863dff424d184eb8ec9c0dc105
parent22430ede7e497d67c87fe900cc685690c5c3ec0c (diff)
downloadllvm-a500f7f48fdb64def09cb3b7487759b0972f2347.tar.gz
[SelectionDAG] Add FP_TO_UINT_SAT/FP_TO_SINT_SAT to computeKnownBits/computeNumSignBits.
These nodes should saturate to their saturating VT. We can use this information to know the bits past the VT are all zeros or all sign bits. I think we might only have test coverage for the unsigned case. I'll verify and add tests. Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D116870
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp10
-rw-r--r--llvm/test/CodeGen/AArch64/fcvt_combine.ll1
-rw-r--r--llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll12
-rw-r--r--llvm/test/CodeGen/RISCV/double-convert.ll30
-rw-r--r--llvm/test/CodeGen/RISCV/float-convert.ll28
-rw-r--r--llvm/test/CodeGen/RISCV/half-convert.ll56
-rw-r--r--llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll119
-rw-r--r--llvm/test/CodeGen/Thumb2/mve-vcvt-float-to-fixed.ll1
8 files changed, 87 insertions, 170 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 2afef122ab5c..2d1dbcb1fbe3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3584,6 +3584,12 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known = KnownBits::smin(Known, Known2);
break;
}
+ case ISD::FP_TO_UINT_SAT: {
+ // FP_TO_UINT_SAT produces an unsigned value that fits in the saturating VT.
+ EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ Known.Zero |= APInt::getBitsSetFrom(BitWidth, VT.getScalarSizeInBits());
+ break;
+ }
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
if (Op.getResNo() == 1) {
// The boolean result conforms to getBooleanContents.
@@ -3860,6 +3866,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
break;
}
+ case ISD::FP_TO_SINT_SAT:
+ // FP_TO_SINT_SAT produces a signed value that fits in the saturating VT.
+ Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarSizeInBits();
+ return VTBits - Tmp + 1;
case ISD::SIGN_EXTEND:
Tmp = VTBits - Op.getOperand(0).getScalarValueSizeInBits();
return ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1) + Tmp;
diff --git a/llvm/test/CodeGen/AArch64/fcvt_combine.ll b/llvm/test/CodeGen/AArch64/fcvt_combine.ll
index eecf92a26a5c..24713c444024 100644
--- a/llvm/test/CodeGen/AArch64/fcvt_combine.ll
+++ b/llvm/test/CodeGen/AArch64/fcvt_combine.ll
@@ -631,7 +631,6 @@ define <4 x i32> @test_extrasat(<4 x float> %f) {
; CHECK-NEXT: movi v1.2d, #0xffffff00ffffff
; CHECK-NEXT: fcvtzu v0.4s, v0.4s, #3
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: bic v0.4s, #255, lsl #24
; CHECK-NEXT: ret
%mul.i = fmul <4 x float> %f, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
%vcvt.i = call <4 x i24> @llvm.fptoui.sat.v4i24.v4f32(<4 x float> %mul.i)
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
index 26ee0694ecea..e1148a51751f 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
@@ -22,8 +22,7 @@ define i1 @test_unsigned_i1_f32(float %f) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtzu w8, s0
; CHECK-NEXT: cmp w8, #1
-; CHECK-NEXT: csinc w8, w8, wzr, lo
-; CHECK-NEXT: and w0, w8, #0x1
+; CHECK-NEXT: csinc w0, w8, wzr, lo
; CHECK-NEXT: ret
%x = call i1 @llvm.fptoui.sat.i1.f32(float %f)
ret i1 %x
@@ -172,8 +171,7 @@ define i1 @test_unsigned_i1_f64(double %f) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtzu w8, d0
; CHECK-NEXT: cmp w8, #1
-; CHECK-NEXT: csinc w8, w8, wzr, lo
-; CHECK-NEXT: and w0, w8, #0x1
+; CHECK-NEXT: csinc w0, w8, wzr, lo
; CHECK-NEXT: ret
%x = call i1 @llvm.fptoui.sat.i1.f64(double %f)
ret i1 %x
@@ -323,16 +321,14 @@ define i1 @test_unsigned_i1_f16(half %f) nounwind {
; CHECK-CVT-NEXT: fcvt s0, h0
; CHECK-CVT-NEXT: fcvtzu w8, s0
; CHECK-CVT-NEXT: cmp w8, #1
-; CHECK-CVT-NEXT: csinc w8, w8, wzr, lo
-; CHECK-CVT-NEXT: and w0, w8, #0x1
+; CHECK-CVT-NEXT: csinc w0, w8, wzr, lo
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: test_unsigned_i1_f16:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: fcvtzu w8, h0
; CHECK-FP16-NEXT: cmp w8, #1
-; CHECK-FP16-NEXT: csinc w8, w8, wzr, lo
-; CHECK-FP16-NEXT: and w0, w8, #0x1
+; CHECK-FP16-NEXT: csinc w0, w8, wzr, lo
; CHECK-FP16-NEXT: ret
%x = call i1 @llvm.fptoui.sat.i1.f16(half %f)
ret i1 %x
diff --git a/llvm/test/CodeGen/RISCV/double-convert.ll b/llvm/test/CodeGen/RISCV/double-convert.ll
index 8cbab1e3a639..59e626df21fd 100644
--- a/llvm/test/CodeGen/RISCV/double-convert.ll
+++ b/llvm/test/CodeGen/RISCV/double-convert.ll
@@ -1602,7 +1602,8 @@ define signext i16 @fcvt_w_s_sat_i16(double %a) nounwind {
; RV32IFD-NEXT: bnez a0, .LBB26_2
; RV32IFD-NEXT: # %bb.1: # %start
; RV32IFD-NEXT: li a0, 0
-; RV32IFD-NEXT: j .LBB26_3
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
; RV32IFD-NEXT: .LBB26_2:
; RV32IFD-NEXT: lui a0, %hi(.LCPI26_0)
; RV32IFD-NEXT: fld ft1, %lo(.LCPI26_0)(a0)
@@ -1611,9 +1612,6 @@ define signext i16 @fcvt_w_s_sat_i16(double %a) nounwind {
; RV32IFD-NEXT: fmax.d ft0, ft0, ft1
; RV32IFD-NEXT: fmin.d ft0, ft0, ft2
; RV32IFD-NEXT: fcvt.w.d a0, ft0, rtz
-; RV32IFD-NEXT: .LBB26_3: # %start
-; RV32IFD-NEXT: slli a0, a0, 16
-; RV32IFD-NEXT: srai a0, a0, 16
; RV32IFD-NEXT: addi sp, sp, 16
; RV32IFD-NEXT: ret
;
@@ -1624,7 +1622,7 @@ define signext i16 @fcvt_w_s_sat_i16(double %a) nounwind {
; RV64IFD-NEXT: bnez a0, .LBB26_2
; RV64IFD-NEXT: # %bb.1: # %start
; RV64IFD-NEXT: li a0, 0
-; RV64IFD-NEXT: j .LBB26_3
+; RV64IFD-NEXT: ret
; RV64IFD-NEXT: .LBB26_2:
; RV64IFD-NEXT: lui a0, %hi(.LCPI26_0)
; RV64IFD-NEXT: fld ft1, %lo(.LCPI26_0)(a0)
@@ -1633,9 +1631,6 @@ define signext i16 @fcvt_w_s_sat_i16(double %a) nounwind {
; RV64IFD-NEXT: fmax.d ft0, ft0, ft1
; RV64IFD-NEXT: fmin.d ft0, ft0, ft2
; RV64IFD-NEXT: fcvt.l.d a0, ft0, rtz
-; RV64IFD-NEXT: .LBB26_3: # %start
-; RV64IFD-NEXT: slli a0, a0, 48
-; RV64IFD-NEXT: srai a0, a0, 48
; RV64IFD-NEXT: ret
;
; RV32I-LABEL: fcvt_w_s_sat_i16:
@@ -1798,9 +1793,6 @@ define zeroext i16 @fcvt_wu_s_sat_i16(double %a) nounwind {
; RV32IFD-NEXT: fmax.d ft0, ft0, ft2
; RV32IFD-NEXT: fmin.d ft0, ft0, ft1
; RV32IFD-NEXT: fcvt.wu.d a0, ft0, rtz
-; RV32IFD-NEXT: lui a1, 16
-; RV32IFD-NEXT: addi a1, a1, -1
-; RV32IFD-NEXT: and a0, a0, a1
; RV32IFD-NEXT: addi sp, sp, 16
; RV32IFD-NEXT: ret
;
@@ -1813,9 +1805,6 @@ define zeroext i16 @fcvt_wu_s_sat_i16(double %a) nounwind {
; RV64IFD-NEXT: fmax.d ft1, ft1, ft2
; RV64IFD-NEXT: fmin.d ft0, ft1, ft0
; RV64IFD-NEXT: fcvt.lu.d a0, ft0, rtz
-; RV64IFD-NEXT: lui a1, 16
-; RV64IFD-NEXT: addiw a1, a1, -1
-; RV64IFD-NEXT: and a0, a0, a1
; RV64IFD-NEXT: ret
;
; RV32I-LABEL: fcvt_wu_s_sat_i16:
@@ -1956,7 +1945,8 @@ define signext i8 @fcvt_w_s_sat_i8(double %a) nounwind {
; RV32IFD-NEXT: bnez a0, .LBB30_2
; RV32IFD-NEXT: # %bb.1: # %start
; RV32IFD-NEXT: li a0, 0
-; RV32IFD-NEXT: j .LBB30_3
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
; RV32IFD-NEXT: .LBB30_2:
; RV32IFD-NEXT: lui a0, %hi(.LCPI30_0)
; RV32IFD-NEXT: fld ft1, %lo(.LCPI30_0)(a0)
@@ -1965,9 +1955,6 @@ define signext i8 @fcvt_w_s_sat_i8(double %a) nounwind {
; RV32IFD-NEXT: fmax.d ft0, ft0, ft1
; RV32IFD-NEXT: fmin.d ft0, ft0, ft2
; RV32IFD-NEXT: fcvt.w.d a0, ft0, rtz
-; RV32IFD-NEXT: .LBB30_3: # %start
-; RV32IFD-NEXT: slli a0, a0, 24
-; RV32IFD-NEXT: srai a0, a0, 24
; RV32IFD-NEXT: addi sp, sp, 16
; RV32IFD-NEXT: ret
;
@@ -1978,7 +1965,7 @@ define signext i8 @fcvt_w_s_sat_i8(double %a) nounwind {
; RV64IFD-NEXT: bnez a0, .LBB30_2
; RV64IFD-NEXT: # %bb.1: # %start
; RV64IFD-NEXT: li a0, 0
-; RV64IFD-NEXT: j .LBB30_3
+; RV64IFD-NEXT: ret
; RV64IFD-NEXT: .LBB30_2:
; RV64IFD-NEXT: lui a0, %hi(.LCPI30_0)
; RV64IFD-NEXT: fld ft1, %lo(.LCPI30_0)(a0)
@@ -1987,9 +1974,6 @@ define signext i8 @fcvt_w_s_sat_i8(double %a) nounwind {
; RV64IFD-NEXT: fmax.d ft0, ft0, ft1
; RV64IFD-NEXT: fmin.d ft0, ft0, ft2
; RV64IFD-NEXT: fcvt.l.d a0, ft0, rtz
-; RV64IFD-NEXT: .LBB30_3: # %start
-; RV64IFD-NEXT: slli a0, a0, 56
-; RV64IFD-NEXT: srai a0, a0, 56
; RV64IFD-NEXT: ret
;
; RV32I-LABEL: fcvt_w_s_sat_i8:
@@ -2154,7 +2138,6 @@ define zeroext i8 @fcvt_wu_s_sat_i8(double %a) nounwind {
; RV32IFD-NEXT: fmax.d ft0, ft0, ft2
; RV32IFD-NEXT: fmin.d ft0, ft0, ft1
; RV32IFD-NEXT: fcvt.wu.d a0, ft0, rtz
-; RV32IFD-NEXT: andi a0, a0, 255
; RV32IFD-NEXT: addi sp, sp, 16
; RV32IFD-NEXT: ret
;
@@ -2167,7 +2150,6 @@ define zeroext i8 @fcvt_wu_s_sat_i8(double %a) nounwind {
; RV64IFD-NEXT: fmax.d ft1, ft1, ft2
; RV64IFD-NEXT: fmin.d ft0, ft1, ft0
; RV64IFD-NEXT: fcvt.lu.d a0, ft0, rtz
-; RV64IFD-NEXT: andi a0, a0, 255
; RV64IFD-NEXT: ret
;
; RV32I-LABEL: fcvt_wu_s_sat_i8:
diff --git a/llvm/test/CodeGen/RISCV/float-convert.ll b/llvm/test/CodeGen/RISCV/float-convert.ll
index c820b553757c..ecc715775ad9 100644
--- a/llvm/test/CodeGen/RISCV/float-convert.ll
+++ b/llvm/test/CodeGen/RISCV/float-convert.ll
@@ -1385,7 +1385,7 @@ define signext i16 @fcvt_w_s_sat_i16(float %a) nounwind {
; RV32IF-NEXT: bnez a0, .LBB24_2
; RV32IF-NEXT: # %bb.1: # %start
; RV32IF-NEXT: li a0, 0
-; RV32IF-NEXT: j .LBB24_3
+; RV32IF-NEXT: ret
; RV32IF-NEXT: .LBB24_2:
; RV32IF-NEXT: lui a0, %hi(.LCPI24_0)
; RV32IF-NEXT: flw ft1, %lo(.LCPI24_0)(a0)
@@ -1394,9 +1394,6 @@ define signext i16 @fcvt_w_s_sat_i16(float %a) nounwind {
; RV32IF-NEXT: fmax.s ft0, ft0, ft1
; RV32IF-NEXT: fmin.s ft0, ft0, ft2
; RV32IF-NEXT: fcvt.w.s a0, ft0, rtz
-; RV32IF-NEXT: .LBB24_3: # %start
-; RV32IF-NEXT: slli a0, a0, 16
-; RV32IF-NEXT: srai a0, a0, 16
; RV32IF-NEXT: ret
;
; RV64IF-LABEL: fcvt_w_s_sat_i16:
@@ -1406,7 +1403,7 @@ define signext i16 @fcvt_w_s_sat_i16(float %a) nounwind {
; RV64IF-NEXT: bnez a0, .LBB24_2
; RV64IF-NEXT: # %bb.1: # %start
; RV64IF-NEXT: li a0, 0
-; RV64IF-NEXT: j .LBB24_3
+; RV64IF-NEXT: ret
; RV64IF-NEXT: .LBB24_2:
; RV64IF-NEXT: lui a0, %hi(.LCPI24_0)
; RV64IF-NEXT: flw ft1, %lo(.LCPI24_0)(a0)
@@ -1415,9 +1412,6 @@ define signext i16 @fcvt_w_s_sat_i16(float %a) nounwind {
; RV64IF-NEXT: fmax.s ft0, ft0, ft1
; RV64IF-NEXT: fmin.s ft0, ft0, ft2
; RV64IF-NEXT: fcvt.l.s a0, ft0, rtz
-; RV64IF-NEXT: .LBB24_3: # %start
-; RV64IF-NEXT: slli a0, a0, 48
-; RV64IF-NEXT: srai a0, a0, 48
; RV64IF-NEXT: ret
;
; RV32I-LABEL: fcvt_w_s_sat_i16:
@@ -1561,9 +1555,6 @@ define zeroext i16 @fcvt_wu_s_sat_i16(float %a) nounwind {
; RV32IF-NEXT: fmax.s ft1, ft1, ft2
; RV32IF-NEXT: fmin.s ft0, ft1, ft0
; RV32IF-NEXT: fcvt.wu.s a0, ft0, rtz
-; RV32IF-NEXT: lui a1, 16
-; RV32IF-NEXT: addi a1, a1, -1
-; RV32IF-NEXT: and a0, a0, a1
; RV32IF-NEXT: ret
;
; RV64IF-LABEL: fcvt_wu_s_sat_i16:
@@ -1575,9 +1566,6 @@ define zeroext i16 @fcvt_wu_s_sat_i16(float %a) nounwind {
; RV64IF-NEXT: fmax.s ft1, ft1, ft2
; RV64IF-NEXT: fmin.s ft0, ft1, ft0
; RV64IF-NEXT: fcvt.lu.s a0, ft0, rtz
-; RV64IF-NEXT: lui a1, 16
-; RV64IF-NEXT: addiw a1, a1, -1
-; RV64IF-NEXT: and a0, a0, a1
; RV64IF-NEXT: ret
;
; RV32I-LABEL: fcvt_wu_s_sat_i16:
@@ -1701,7 +1689,7 @@ define signext i8 @fcvt_w_s_sat_i8(float %a) nounwind {
; RV32IF-NEXT: bnez a0, .LBB28_2
; RV32IF-NEXT: # %bb.1: # %start
; RV32IF-NEXT: li a0, 0
-; RV32IF-NEXT: j .LBB28_3
+; RV32IF-NEXT: ret
; RV32IF-NEXT: .LBB28_2:
; RV32IF-NEXT: lui a0, %hi(.LCPI28_0)
; RV32IF-NEXT: flw ft1, %lo(.LCPI28_0)(a0)
@@ -1710,9 +1698,6 @@ define signext i8 @fcvt_w_s_sat_i8(float %a) nounwind {
; RV32IF-NEXT: fmax.s ft0, ft0, ft1
; RV32IF-NEXT: fmin.s ft0, ft0, ft2
; RV32IF-NEXT: fcvt.w.s a0, ft0, rtz
-; RV32IF-NEXT: .LBB28_3: # %start
-; RV32IF-NEXT: slli a0, a0, 24
-; RV32IF-NEXT: srai a0, a0, 24
; RV32IF-NEXT: ret
;
; RV64IF-LABEL: fcvt_w_s_sat_i8:
@@ -1722,7 +1707,7 @@ define signext i8 @fcvt_w_s_sat_i8(float %a) nounwind {
; RV64IF-NEXT: bnez a0, .LBB28_2
; RV64IF-NEXT: # %bb.1: # %start
; RV64IF-NEXT: li a0, 0
-; RV64IF-NEXT: j .LBB28_3
+; RV64IF-NEXT: ret
; RV64IF-NEXT: .LBB28_2:
; RV64IF-NEXT: lui a0, %hi(.LCPI28_0)
; RV64IF-NEXT: flw ft1, %lo(.LCPI28_0)(a0)
@@ -1731,9 +1716,6 @@ define signext i8 @fcvt_w_s_sat_i8(float %a) nounwind {
; RV64IF-NEXT: fmax.s ft0, ft0, ft1
; RV64IF-NEXT: fmin.s ft0, ft0, ft2
; RV64IF-NEXT: fcvt.l.s a0, ft0, rtz
-; RV64IF-NEXT: .LBB28_3: # %start
-; RV64IF-NEXT: slli a0, a0, 56
-; RV64IF-NEXT: srai a0, a0, 56
; RV64IF-NEXT: ret
;
; RV32I-LABEL: fcvt_w_s_sat_i8:
@@ -1875,7 +1857,6 @@ define zeroext i8 @fcvt_wu_s_sat_i8(float %a) nounwind {
; RV32IF-NEXT: fmax.s ft1, ft1, ft2
; RV32IF-NEXT: fmin.s ft0, ft1, ft0
; RV32IF-NEXT: fcvt.wu.s a0, ft0, rtz
-; RV32IF-NEXT: andi a0, a0, 255
; RV32IF-NEXT: ret
;
; RV64IF-LABEL: fcvt_wu_s_sat_i8:
@@ -1887,7 +1868,6 @@ define zeroext i8 @fcvt_wu_s_sat_i8(float %a) nounwind {
; RV64IF-NEXT: fmax.s ft1, ft1, ft2
; RV64IF-NEXT: fmin.s ft0, ft1, ft0
; RV64IF-NEXT: fcvt.lu.s a0, ft0, rtz
-; RV64IF-NEXT: andi a0, a0, 255
; RV64IF-NEXT: ret
;
; RV32I-LABEL: fcvt_wu_s_sat_i8:
diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll
index cd7fa86f9a2f..6dcd7c79d483 100644
--- a/llvm/test/CodeGen/RISCV/half-convert.ll
+++ b/llvm/test/CodeGen/RISCV/half-convert.ll
@@ -2464,7 +2464,7 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
; RV32IZFH-NEXT: bnez a0, .LBB32_2
; RV32IZFH-NEXT: # %bb.1: # %start
; RV32IZFH-NEXT: li a0, 0
-; RV32IZFH-NEXT: j .LBB32_3
+; RV32IZFH-NEXT: ret
; RV32IZFH-NEXT: .LBB32_2:
; RV32IZFH-NEXT: lui a0, %hi(.LCPI32_0)
; RV32IZFH-NEXT: flw ft1, %lo(.LCPI32_0)(a0)
@@ -2473,9 +2473,6 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
; RV32IZFH-NEXT: fmax.s ft0, ft0, ft1
; RV32IZFH-NEXT: fmin.s ft0, ft0, ft2
; RV32IZFH-NEXT: fcvt.w.s a0, ft0, rtz
-; RV32IZFH-NEXT: .LBB32_3: # %start
-; RV32IZFH-NEXT: slli a0, a0, 16
-; RV32IZFH-NEXT: srai a0, a0, 16
; RV32IZFH-NEXT: ret
;
; RV64IZFH-LABEL: fcvt_w_s_sat_i16:
@@ -2485,7 +2482,7 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
; RV64IZFH-NEXT: bnez a0, .LBB32_2
; RV64IZFH-NEXT: # %bb.1: # %start
; RV64IZFH-NEXT: li a0, 0
-; RV64IZFH-NEXT: j .LBB32_3
+; RV64IZFH-NEXT: ret
; RV64IZFH-NEXT: .LBB32_2:
; RV64IZFH-NEXT: lui a0, %hi(.LCPI32_0)
; RV64IZFH-NEXT: flw ft1, %lo(.LCPI32_0)(a0)
@@ -2494,9 +2491,6 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
; RV64IZFH-NEXT: fmax.s ft0, ft0, ft1
; RV64IZFH-NEXT: fmin.s ft0, ft0, ft2
; RV64IZFH-NEXT: fcvt.l.s a0, ft0, rtz
-; RV64IZFH-NEXT: .LBB32_3: # %start
-; RV64IZFH-NEXT: slli a0, a0, 48
-; RV64IZFH-NEXT: srai a0, a0, 48
; RV64IZFH-NEXT: ret
;
; RV32IDZFH-LABEL: fcvt_w_s_sat_i16:
@@ -2506,7 +2500,7 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
; RV32IDZFH-NEXT: bnez a0, .LBB32_2
; RV32IDZFH-NEXT: # %bb.1: # %start
; RV32IDZFH-NEXT: li a0, 0
-; RV32IDZFH-NEXT: j .LBB32_3
+; RV32IDZFH-NEXT: ret
; RV32IDZFH-NEXT: .LBB32_2:
; RV32IDZFH-NEXT: lui a0, %hi(.LCPI32_0)
; RV32IDZFH-NEXT: flw ft1, %lo(.LCPI32_0)(a0)
@@ -2515,9 +2509,6 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
; RV32IDZFH-NEXT: fmax.s ft0, ft0, ft1
; RV32IDZFH-NEXT: fmin.s ft0, ft0, ft2
; RV32IDZFH-NEXT: fcvt.w.s a0, ft0, rtz
-; RV32IDZFH-NEXT: .LBB32_3: # %start
-; RV32IDZFH-NEXT: slli a0, a0, 16
-; RV32IDZFH-NEXT: srai a0, a0, 16
; RV32IDZFH-NEXT: ret
;
; RV64IDZFH-LABEL: fcvt_w_s_sat_i16:
@@ -2527,7 +2518,7 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
; RV64IDZFH-NEXT: bnez a0, .LBB32_2
; RV64IDZFH-NEXT: # %bb.1: # %start
; RV64IDZFH-NEXT: li a0, 0
-; RV64IDZFH-NEXT: j .LBB32_3
+; RV64IDZFH-NEXT: ret
; RV64IDZFH-NEXT: .LBB32_2:
; RV64IDZFH-NEXT: lui a0, %hi(.LCPI32_0)
; RV64IDZFH-NEXT: flw ft1, %lo(.LCPI32_0)(a0)
@@ -2536,9 +2527,6 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
; RV64IDZFH-NEXT: fmax.s ft0, ft0, ft1
; RV64IDZFH-NEXT: fmin.s ft0, ft0, ft2
; RV64IDZFH-NEXT: fcvt.l.s a0, ft0, rtz
-; RV64IDZFH-NEXT: .LBB32_3: # %start
-; RV64IDZFH-NEXT: slli a0, a0, 48
-; RV64IDZFH-NEXT: srai a0, a0, 48
; RV64IDZFH-NEXT: ret
;
; RV32I-LABEL: fcvt_w_s_sat_i16:
@@ -2705,9 +2693,6 @@ define zeroext i16 @fcvt_wu_s_sat_i16(half %a) nounwind {
; RV32IZFH-NEXT: fmax.s ft1, ft1, ft2
; RV32IZFH-NEXT: fmin.s ft0, ft1, ft0
; RV32IZFH-NEXT: fcvt.wu.s a0, ft0, rtz
-; RV32IZFH-NEXT: lui a1, 16
-; RV32IZFH-NEXT: addi a1, a1, -1
-; RV32IZFH-NEXT: and a0, a0, a1
; RV32IZFH-NEXT: ret
;
; RV64IZFH-LABEL: fcvt_wu_s_sat_i16:
@@ -2719,9 +2704,6 @@ define zeroext i16 @fcvt_wu_s_sat_i16(half %a) nounwind {
; RV64IZFH-NEXT: fmax.s ft1, ft1, ft2
; RV64IZFH-NEXT: fmin.s ft0, ft1, ft0
; RV64IZFH-NEXT: fcvt.lu.s a0, ft0, rtz
-; RV64IZFH-NEXT: lui a1, 16
-; RV64IZFH-NEXT: addiw a1, a1, -1
-; RV64IZFH-NEXT: and a0, a0, a1
; RV64IZFH-NEXT: ret
;
; RV32IDZFH-LABEL: fcvt_wu_s_sat_i16:
@@ -2733,9 +2715,6 @@ define zeroext i16 @fcvt_wu_s_sat_i16(half %a) nounwind {
; RV32IDZFH-NEXT: fmax.s ft1, ft1, ft2
; RV32IDZFH-NEXT: fmin.s ft0, ft1, ft0
; RV32IDZFH-NEXT: fcvt.wu.s a0, ft0, rtz
-; RV32IDZFH-NEXT: lui a1, 16
-; RV32IDZFH-NEXT: addi a1, a1, -1
-; RV32IDZFH-NEXT: and a0, a0, a1
; RV32IDZFH-NEXT: ret
;
; RV64IDZFH-LABEL: fcvt_wu_s_sat_i16:
@@ -2747,9 +2726,6 @@ define zeroext i16 @fcvt_wu_s_sat_i16(half %a) nounwind {
; RV64IDZFH-NEXT: fmax.s ft1, ft1, ft2
; RV64IDZFH-NEXT: fmin.s ft0, ft1, ft0
; RV64IDZFH-NEXT: fcvt.lu.s a0, ft0, rtz
-; RV64IDZFH-NEXT: lui a1, 16
-; RV64IDZFH-NEXT: addiw a1, a1, -1
-; RV64IDZFH-NEXT: and a0, a0, a1
; RV64IDZFH-NEXT: ret
;
; RV32I-LABEL: fcvt_wu_s_sat_i16:
@@ -2896,7 +2872,7 @@ define signext i8 @fcvt_w_s_sat_i8(half %a) nounwind {
; RV32IZFH-NEXT: bnez a0, .LBB36_2
; RV32IZFH-NEXT: # %bb.1: # %start
; RV32IZFH-NEXT: li a0, 0
-; RV32IZFH-NEXT: j .LBB36_3
+; RV32IZFH-NEXT: ret
; RV32IZFH-NEXT: .LBB36_2:
; RV32IZFH-NEXT: lui a0, %hi(.LCPI36_0)
; RV32IZFH-NEXT: flw ft1, %lo(.LCPI36_0)(a0)
@@ -2905,9 +2881,6 @@ define signext i8 @fcvt_w_s_sat_i8(half %a) nounwind {
; RV32IZFH-NEXT: fmax.s ft0, ft0, ft1
; RV32IZFH-NEXT: fmin.s ft0, ft0, ft2
; RV32IZFH-NEXT: fcvt.w.s a0, ft0, rtz
-; RV32IZFH-NEXT: .LBB36_3: # %start
-; RV32IZFH-NEXT: slli a0, a0, 24
-; RV32IZFH-NEXT: srai a0, a0, 24
; RV32IZFH-NEXT: ret
;
; RV64IZFH-LABEL: fcvt_w_s_sat_i8:
@@ -2917,7 +2890,7 @@ define signext i8 @fcvt_w_s_sat_i8(half %a) nounwind {
; RV64IZFH-NEXT: bnez a0, .LBB36_2
; RV64IZFH-NEXT: # %bb.1: # %start
; RV64IZFH-NEXT: li a0, 0
-; RV64IZFH-NEXT: j .LBB36_3
+; RV64IZFH-NEXT: ret
; RV64IZFH-NEXT: .LBB36_2:
; RV64IZFH-NEXT: lui a0, %hi(.LCPI36_0)
; RV64IZFH-NEXT: flw ft1, %lo(.LCPI36_0)(a0)
@@ -2926,9 +2899,6 @@ define signext i8 @fcvt_w_s_sat_i8(half %a) nounwind {
; RV64IZFH-NEXT: fmax.s ft0, ft0, ft1
; RV64IZFH-NEXT: fmin.s ft0, ft0, ft2
; RV64IZFH-NEXT: fcvt.l.s a0, ft0, rtz
-; RV64IZFH-NEXT: .LBB36_3: # %start
-; RV64IZFH-NEXT: slli a0, a0, 56
-; RV64IZFH-NEXT: srai a0, a0, 56
; RV64IZFH-NEXT: ret
;
; RV32IDZFH-LABEL: fcvt_w_s_sat_i8:
@@ -2938,7 +2908,7 @@ define signext i8 @fcvt_w_s_sat_i8(half %a) nounwind {
; RV32IDZFH-NEXT: bnez a0, .LBB36_2
; RV32IDZFH-NEXT: # %bb.1: # %start
; RV32IDZFH-NEXT: li a0, 0
-; RV32IDZFH-NEXT: j .LBB36_3
+; RV32IDZFH-NEXT: ret
; RV32IDZFH-NEXT: .LBB36_2:
; RV32IDZFH-NEXT: lui a0, %hi(.LCPI36_0)
; RV32IDZFH-NEXT: flw ft1, %lo(.LCPI36_0)(a0)
@@ -2947,9 +2917,6 @@ define signext i8 @fcvt_w_s_sat_i8(half %a) nounwind {
; RV32IDZFH-NEXT: fmax.s ft0, ft0, ft1
; RV32IDZFH-NEXT: fmin.s ft0, ft0, ft2
; RV32IDZFH-NEXT: fcvt.w.s a0, ft0, rtz
-; RV32IDZFH-NEXT: .LBB36_3: # %start
-; RV32IDZFH-NEXT: slli a0, a0, 24
-; RV32IDZFH-NEXT: srai a0, a0, 24
; RV32IDZFH-NEXT: ret
;
; RV64IDZFH-LABEL: fcvt_w_s_sat_i8:
@@ -2959,7 +2926,7 @@ define signext i8 @fcvt_w_s_sat_i8(half %a) nounwind {
; RV64IDZFH-NEXT: bnez a0, .LBB36_2
; RV64IDZFH-NEXT: # %bb.1: # %start
; RV64IDZFH-NEXT: li a0, 0
-; RV64IDZFH-NEXT: j .LBB36_3
+; RV64IDZFH-NEXT: ret
; RV64IDZFH-NEXT: .LBB36_2:
; RV64IDZFH-NEXT: lui a0, %hi(.LCPI36_0)
; RV64IDZFH-NEXT: flw ft1, %lo(.LCPI36_0)(a0)
@@ -2968,9 +2935,6 @@ define signext i8 @fcvt_w_s_sat_i8(half %a) nounwind {
; RV64IDZFH-NEXT: fmax.s ft0, ft0, ft1
; RV64IDZFH-NEXT: fmin.s ft0, ft0, ft2
; RV64IDZFH-NEXT: fcvt.l.s a0, ft0, rtz
-; RV64IDZFH-NEXT: .LBB36_3: # %start
-; RV64IDZFH-NEXT: slli a0, a0, 56
-; RV64IDZFH-NEXT: srai a0, a0, 56
; RV64IDZFH-NEXT: ret
;
; RV32I-LABEL: fcvt_w_s_sat_i8:
@@ -3136,7 +3100,6 @@ define zeroext i8 @fcvt_wu_s_sat_i8(half %a) nounwind {
; RV32IZFH-NEXT: fmax.s ft1, ft1, ft2
; RV32IZFH-NEXT: fmin.s ft0, ft1, ft0
; RV32IZFH-NEXT: fcvt.wu.s a0, ft0, rtz
-; RV32IZFH-NEXT: andi a0, a0, 255
; RV32IZFH-NEXT: ret
;
; RV64IZFH-LABEL: fcvt_wu_s_sat_i8:
@@ -3148,7 +3111,6 @@ define zeroext i8 @fcvt_wu_s_sat_i8(half %a) nounwind {
; RV64IZFH-NEXT: fmax.s ft1, ft1, ft2
; RV64IZFH-NEXT: fmin.s ft0, ft1, ft0
; RV64IZFH-NEXT: fcvt.lu.s a0, ft0, rtz
-; RV64IZFH-NEXT: andi a0, a0, 255
; RV64IZFH-NEXT: ret
;
; RV32IDZFH-LABEL: fcvt_wu_s_sat_i8:
@@ -3160,7 +3122,6 @@ define zeroext i8 @fcvt_wu_s_sat_i8(half %a) nounwind {
; RV32IDZFH-NEXT: fmax.s ft1, ft1, ft2
; RV32IDZFH-NEXT: fmin.s ft0, ft1, ft0
; RV32IDZFH-NEXT: fcvt.wu.s a0, ft0, rtz
-; RV32IDZFH-NEXT: andi a0, a0, 255
; RV32IDZFH-NEXT: ret
;
; RV64IDZFH-LABEL: fcvt_wu_s_sat_i8:
@@ -3172,7 +3133,6 @@ define zeroext i8 @fcvt_wu_s_sat_i8(half %a) nounwind {
; RV64IDZFH-NEXT: fmax.s ft1, ft1, ft2
; RV64IDZFH-NEXT: fmin.s ft0, ft1, ft0
; RV64IDZFH-NEXT: fcvt.lu.s a0, ft0, rtz
-; RV64IDZFH-NEXT: andi a0, a0, 255
; RV64IDZFH-NEXT: ret
;
; RV32I-LABEL: fcvt_wu_s_sat_i8:
diff --git a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
index 646acb723dc4..322f15ed37a7 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
@@ -3796,82 +3796,73 @@ define arm_aapcs_vfpcc <8 x i16> @test_unsigned_v8f16_v8i16(<8 x half> %f) {
define arm_aapcs_vfpcc <8 x i19> @test_unsigned_v8f16_v8i19(<8 x half> %f) {
; CHECK-LABEL: test_unsigned_v8f16_v8i19:
; CHECK: @ %bb.0:
-; CHECK-NEXT: .save {r7, lr}
-; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: vldr s4, .LCPI46_0
-; CHECK-NEXT: vcvtb.f32.f16 s8, s1
-; CHECK-NEXT: vcvtt.f32.f16 s12, s1
-; CHECK-NEXT: vcvtt.f32.f16 s1, s3
+; CHECK-NEXT: .save {r4, r5, r7, lr}
+; CHECK-NEXT: push {r4, r5, r7, lr}
; CHECK-NEXT: vldr s6, .LCPI46_1
-; CHECK-NEXT: vmaxnm.f32 s1, s1, s4
-; CHECK-NEXT: vcvtb.f32.f16 s10, s2
-; CHECK-NEXT: vcvtb.f32.f16 s14, s0
-; CHECK-NEXT: vminnm.f32 s1, s1, s6
+; CHECK-NEXT: vcvtb.f32.f16 s8, s0
+; CHECK-NEXT: vcvtb.f32.f16 s12, s2
+; CHECK-NEXT: vcvtb.f32.f16 s10, s1
+; CHECK-NEXT: vcvtt.f32.f16 s14, s1
+; CHECK-NEXT: vcvtb.f32.f16 s1, s3
; CHECK-NEXT: vcvtt.f32.f16 s0, s0
-; CHECK-NEXT: vcvt.u32.f32 s1, s1
; CHECK-NEXT: vcvtt.f32.f16 s2, s2
-; CHECK-NEXT: vcvtb.f32.f16 s3, s3
-; CHECK-NEXT: vmaxnm.f32 s8, s8, s4
-; CHECK-NEXT: vmaxnm.f32 s10, s10, s4
-; CHECK-NEXT: vmaxnm.f32 s12, s12, s4
-; CHECK-NEXT: vmaxnm.f32 s14, s14, s4
-; CHECK-NEXT: vmaxnm.f32 s0, s0, s4
-; CHECK-NEXT: vmaxnm.f32 s2, s2, s4
-; CHECK-NEXT: vmaxnm.f32 s4, s3, s4
-; CHECK-NEXT: vminnm.f32 s4, s4, s6
-; CHECK-NEXT: vminnm.f32 s2, s2, s6
+; CHECK-NEXT: vldr s4, .LCPI46_0
+; CHECK-NEXT: vcvtt.f32.f16 s3, s3
+; CHECK-NEXT: vmaxnm.f32 s8, s8, s6
+; CHECK-NEXT: vmaxnm.f32 s10, s10, s6
+; CHECK-NEXT: vmaxnm.f32 s0, s0, s6
+; CHECK-NEXT: vmaxnm.f32 s12, s12, s6
+; CHECK-NEXT: vmaxnm.f32 s14, s14, s6
+; CHECK-NEXT: vmaxnm.f32 s2, s2, s6
+; CHECK-NEXT: vmaxnm.f32 s1, s1, s6
+; CHECK-NEXT: vmaxnm.f32 s6, s3, s6
+; CHECK-NEXT: vminnm.f32 s8, s8, s4
+; CHECK-NEXT: vminnm.f32 s10, s10, s4
+; CHECK-NEXT: vminnm.f32 s0, s0, s4
+; CHECK-NEXT: vminnm.f32 s12, s12, s4
+; CHECK-NEXT: vminnm.f32 s14, s14, s4
+; CHECK-NEXT: vminnm.f32 s2, s2, s4
+; CHECK-NEXT: vminnm.f32 s1, s1, s4
+; CHECK-NEXT: vminnm.f32 s4, s6, s4
+; CHECK-NEXT: vcvt.u32.f32 s1, s1
; CHECK-NEXT: vcvt.u32.f32 s4, s4
-; CHECK-NEXT: vminnm.f32 s0, s0, s6
-; CHECK-NEXT: vmov r1, s1
-; CHECK-NEXT: vminnm.f32 s14, s14, s6
; CHECK-NEXT: vcvt.u32.f32 s2, s2
-; CHECK-NEXT: vminnm.f32 s10, s10, s6
-; CHECK-NEXT: vcvt.u32.f32 s0, s0
-; CHECK-NEXT: vminnm.f32 s12, s12, s6
; CHECK-NEXT: vcvt.u32.f32 s14, s14
-; CHECK-NEXT: vminnm.f32 s8, s8, s6
-; CHECK-NEXT: vcvt.u32.f32 s10, s10
; CHECK-NEXT: vcvt.u32.f32 s12, s12
+; CHECK-NEXT: vcvt.u32.f32 s0, s0
+; CHECK-NEXT: vcvt.u32.f32 s10, s10
+; CHECK-NEXT: vmov r1, s1
+; CHECK-NEXT: vmov r3, s4
; CHECK-NEXT: vcvt.u32.f32 s8, s8
-; CHECK-NEXT: vmov r12, s2
-; CHECK-NEXT: vmov lr, s0
-; CHECK-NEXT: lsrs r2, r1, #11
-; CHECK-NEXT: strb r2, [r0, #18]
-; CHECK-NEXT: vmov r2, s4
-; CHECK-NEXT: bfc r12, #19, #13
-; CHECK-NEXT: bfc lr, #19, #13
-; CHECK-NEXT: bfc r2, #19, #13
-; CHECK-NEXT: lsrs r3, r2, #14
-; CHECK-NEXT: orr.w r1, r3, r1, lsl #5
-; CHECK-NEXT: lsr.w r3, r12, #1
-; CHECK-NEXT: orr.w r2, r3, r2, lsl #18
-; CHECK-NEXT: vmov r3, s14
-; CHECK-NEXT: strh r1, [r0, #16]
-; CHECK-NEXT: vmov r1, s10
-; CHECK-NEXT: str r2, [r0, #12]
-; CHECK-NEXT: bfc r3, #19, #13
-; CHECK-NEXT: orr.w r3, r3, lr, lsl #19
-; CHECK-NEXT: str r3, [r0]
-; CHECK-NEXT: vmov r3, s12
-; CHECK-NEXT: bfc r1, #19, #13
-; CHECK-NEXT: bfc r3, #19, #13
-; CHECK-NEXT: lsrs r2, r3, #7
-; CHECK-NEXT: orr.w r1, r2, r1, lsl #12
-; CHECK-NEXT: orr.w r1, r1, r12, lsl #31
-; CHECK-NEXT: str r1, [r0, #8]
-; CHECK-NEXT: vmov r1, s8
-; CHECK-NEXT: lsr.w r2, lr, #13
-; CHECK-NEXT: bfc r1, #19, #13
-; CHECK-NEXT: orr.w r1, r2, r1, lsl #6
-; CHECK-NEXT: orr.w r1, r1, r3, lsl #25
-; CHECK-NEXT: str r1, [r0, #4]
-; CHECK-NEXT: pop {r7, pc}
+; CHECK-NEXT: vmov r4, s12
+; CHECK-NEXT: vmov r5, s10
+; CHECK-NEXT: lsrs r2, r1, #14
+; CHECK-NEXT: orr.w r12, r2, r3, lsl #5
+; CHECK-NEXT: vmov r3, s2
+; CHECK-NEXT: strh.w r12, [r0, #16]
+; CHECK-NEXT: lsrs r2, r3, #1
+; CHECK-NEXT: orr.w lr, r2, r1, lsl #18
+; CHECK-NEXT: vmov r2, s14
+; CHECK-NEXT: lsrs r1, r2, #7
+; CHECK-NEXT: orr.w r1, r1, r4, lsl #12
+; CHECK-NEXT: orr.w r1, r1, r3, lsl #31
+; CHECK-NEXT: vmov r3, s0
+; CHECK-NEXT: lsrs r4, r3, #13
+; CHECK-NEXT: orr.w r4, r4, r5, lsl #6
+; CHECK-NEXT: orr.w r2, r4, r2, lsl #25
+; CHECK-NEXT: vmov r4, s8
+; CHECK-NEXT: orr.w r3, r4, r3, lsl #19
+; CHECK-NEXT: strd r3, r2, [r0]
+; CHECK-NEXT: strd r1, lr, [r0, #8]
+; CHECK-NEXT: lsr.w r1, r12, #16
+; CHECK-NEXT: strb r1, [r0, #18]
+; CHECK-NEXT: pop {r4, r5, r7, pc}
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI46_0:
-; CHECK-NEXT: .long 0x00000000 @ float 0
-; CHECK-NEXT: .LCPI46_1:
; CHECK-NEXT: .long 0x48ffffe0 @ float 524287
+; CHECK-NEXT: .LCPI46_1:
+; CHECK-NEXT: .long 0x00000000 @ float 0
%x = call <8 x i19> @llvm.fptoui.sat.v8f16.v8i19(<8 x half> %f)
ret <8 x i19> %x
}
diff --git a/llvm/test/CodeGen/Thumb2/mve-vcvt-float-to-fixed.ll b/llvm/test/CodeGen/Thumb2/mve-vcvt-float-to-fixed.ll
index 2237d1b93628..083a1bc0e3db 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vcvt-float-to-fixed.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vcvt-float-to-fixed.ll
@@ -1114,7 +1114,6 @@ define arm_aapcs_vfpcc <4 x i32> @vcvt_sat_u32_7_24(<4 x float> %0) {
; CHECK-NEXT: vmov.i32 q1, #0xffffff
; CHECK-NEXT: vcvt.u32.f32 q0, q0, #23
; CHECK-NEXT: vmin.u32 q0, q0, q1
-; CHECK-NEXT: vbic.i32 q0, #0xff000000
; CHECK-NEXT: bx lr
%2 = fmul fast <4 x float> %0, <float 0x4160000000000000, float 0x4160000000000000, float 0x4160000000000000, float 0x4160000000000000>
%3 = call <4 x i24> @llvm.fptoui.sat.v4i24.v4f32(<4 x float> %2)