summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@sifive.com>2022-01-09 23:23:45 -0800
committerCraig Topper <craig.topper@sifive.com>2022-01-09 23:37:10 -0800
commitb645bcd98a11c7857cdee51202c64d15b9a4f90d (patch)
treea2a67a8455faa2d75cf37335afbf5e959a4d123c
parent3523876873b25bbb8225fed45e4db79e7d250a4f (diff)
downloadllvm-b645bcd98a11c7857cdee51202c64d15b9a4f90d.tar.gz
[RISCV] Generalize (srl (and X, 0xffff), C) -> (srli (slli X, (XLen-16), (XLen-16) + C) optimization.
This can be generalized to (srl (and X, C2), C) -> (srli (slli X, (XLen-C3), (XLen-C3) + C). Where C2 is a mask with C3 trailing ones. This can avoid constant materialization for C2. This is beneficial even when C2 can be selected to ANDI because the SLLI can become C.SLLI, but C.ANDI cannot cover all the immediates of ANDI. This also enables CSE in some cases of i8 sdiv by constant codegen.
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp61
-rw-r--r--llvm/test/CodeGen/RISCV/alu8.ll8
-rw-r--r--llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll32
-rw-r--r--llvm/test/CodeGen/RISCV/div-by-constant.ll44
-rw-r--r--llvm/test/CodeGen/RISCV/div.ll16
-rw-r--r--llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll12
-rw-r--r--llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll12
-rw-r--r--llvm/test/CodeGen/RISCV/rv64zbb.ll5
-rw-r--r--llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll42
-rw-r--r--llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll88
10 files changed, 142 insertions, 178 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 5afae3265f6c..a49f685f8fa4 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -542,35 +542,38 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
return;
}
case ISD::SRL: {
- // Optimize (srl (and X, 0xffff), C) ->
- // (srli (slli X, (XLen-16), (XLen-16) + C)
- // Taking into account that the 0xffff may have had lower bits unset by
- // SimplifyDemandedBits. This avoids materializing the 0xffff immediate.
- // This pattern occurs when type legalizing i16 right shifts.
- // FIXME: This could be extended to other AND masks.
+ // Optimize (srl (and X, C2), C) ->
+ // (srli (slli X, (XLen-C3), (XLen-C3) + C)
+ // Where C2 is a mask with C3 trailing ones.
+ // Taking into account that the C2 may have had lower bits unset by
+ // SimplifyDemandedBits. This avoids materializing the C2 immediate.
+ // This pattern occurs when type legalizing right shifts for types with
+ // less than XLen bits.
auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
- if (N1C) {
- uint64_t ShAmt = N1C->getZExtValue();
- SDValue N0 = Node->getOperand(0);
- if (ShAmt < 16 && N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
- isa<ConstantSDNode>(N0.getOperand(1))) {
- uint64_t Mask = N0.getConstantOperandVal(1);
- Mask |= maskTrailingOnes<uint64_t>(ShAmt);
- if (Mask == 0xffff) {
- unsigned LShAmt = Subtarget->getXLen() - 16;
- SDNode *SLLI =
- CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
- CurDAG->getTargetConstant(LShAmt, DL, VT));
- SDNode *SRLI = CurDAG->getMachineNode(
- RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
- CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
- ReplaceNode(Node, SRLI);
- return;
- }
- }
- }
-
- break;
+ if (!N1C)
+ break;
+ SDValue N0 = Node->getOperand(0);
+ if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
+ !isa<ConstantSDNode>(N0.getOperand(1)))
+ break;
+ unsigned ShAmt = N1C->getZExtValue();
+ uint64_t Mask = N0.getConstantOperandVal(1);
+ Mask |= maskTrailingOnes<uint64_t>(ShAmt);
+ if (!isMask_64(Mask))
+ break;
+ unsigned TrailingOnes = countTrailingOnes(Mask);
+ // 32 trailing ones should use srliw via tablegen pattern.
+ if (TrailingOnes == 32 || ShAmt >= TrailingOnes)
+ break;
+ unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
+ SDNode *SLLI =
+ CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
+ CurDAG->getTargetConstant(LShAmt, DL, VT));
+ SDNode *SRLI = CurDAG->getMachineNode(
+ RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
+ CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
+ ReplaceNode(Node, SRLI);
+ return;
}
case ISD::SRA: {
// Optimize (sra (sext_inreg X, i16), C) ->
@@ -587,7 +590,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
SDValue N0 = Node->getOperand(0);
if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
break;
- uint64_t ShAmt = N1C->getZExtValue();
+ unsigned ShAmt = N1C->getZExtValue();
unsigned ExtSize =
cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
// ExtSize of 32 should use sraiw via tablegen pattern.
diff --git a/llvm/test/CodeGen/RISCV/alu8.ll b/llvm/test/CodeGen/RISCV/alu8.ll
index 8611e752028d..dafa328450d9 100644
--- a/llvm/test/CodeGen/RISCV/alu8.ll
+++ b/llvm/test/CodeGen/RISCV/alu8.ll
@@ -135,14 +135,14 @@ define i8 @slli(i8 %a) nounwind {
define i8 @srli(i8 %a) nounwind {
; RV32I-LABEL: srli:
; RV32I: # %bb.0:
-; RV32I-NEXT: andi a0, a0, 192
-; RV32I-NEXT: srli a0, a0, 6
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: srli a0, a0, 30
; RV32I-NEXT: ret
;
; RV64I-LABEL: srli:
; RV64I: # %bb.0:
-; RV64I-NEXT: andi a0, a0, 192
-; RV64I-NEXT: srli a0, a0, 6
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srli a0, a0, 62
; RV64I-NEXT: ret
%1 = lshr i8 %a, 6
ret i8 %1
diff --git a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll
index e7be4070fe02..74c2357fe700 100644
--- a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll
@@ -212,10 +212,8 @@ define i16 @test_cttz_i16(i16 %a) nounwind {
; RV32I-NEXT: and a0, a0, a1
; RV32I-NEXT: slli a1, a0, 8
; RV32I-NEXT: add a0, a1, a0
-; RV32I-NEXT: lui a1, 2
-; RV32I-NEXT: addi a1, a1, -256
-; RV32I-NEXT: and a0, a0, a1
-; RV32I-NEXT: srli a0, a0, 8
+; RV32I-NEXT: slli a0, a0, 19
+; RV32I-NEXT: srli a0, a0, 27
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB4_2:
; RV32I-NEXT: li a0, 16
@@ -247,12 +245,10 @@ define i16 @test_cttz_i16(i16 %a) nounwind {
; RV64I-NEXT: lui a1, 1
; RV64I-NEXT: addiw a1, a1, -241
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: slli a1, a0, 8
-; RV64I-NEXT: add a0, a1, a0
-; RV64I-NEXT: lui a1, 2
-; RV64I-NEXT: addiw a1, a1, -256
-; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: srli a0, a0, 8
+; RV64I-NEXT: slliw a1, a0, 8
+; RV64I-NEXT: addw a0, a1, a0
+; RV64I-NEXT: slli a0, a0, 51
+; RV64I-NEXT: srli a0, a0, 59
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB4_2:
; RV64I-NEXT: li a0, 16
@@ -605,10 +601,8 @@ define i16 @test_cttz_i16_zero_undef(i16 %a) nounwind {
; RV32I-NEXT: and a0, a0, a1
; RV32I-NEXT: slli a1, a0, 8
; RV32I-NEXT: add a0, a1, a0
-; RV32I-NEXT: lui a1, 2
-; RV32I-NEXT: addi a1, a1, -256
-; RV32I-NEXT: and a0, a0, a1
-; RV32I-NEXT: srli a0, a0, 8
+; RV32I-NEXT: slli a0, a0, 19
+; RV32I-NEXT: srli a0, a0, 27
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_cttz_i16_zero_undef:
@@ -632,12 +626,10 @@ define i16 @test_cttz_i16_zero_undef(i16 %a) nounwind {
; RV64I-NEXT: lui a1, 1
; RV64I-NEXT: addiw a1, a1, -241
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: slli a1, a0, 8
-; RV64I-NEXT: add a0, a1, a0
-; RV64I-NEXT: lui a1, 2
-; RV64I-NEXT: addiw a1, a1, -256
-; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: srli a0, a0, 8
+; RV64I-NEXT: slliw a1, a0, 8
+; RV64I-NEXT: addw a0, a1, a0
+; RV64I-NEXT: slli a0, a0, 51
+; RV64I-NEXT: srli a0, a0, 59
; RV64I-NEXT: ret
%tmp = call i16 @llvm.cttz.i16(i16 %a, i1 true)
ret i16 %tmp
diff --git a/llvm/test/CodeGen/RISCV/div-by-constant.ll b/llvm/test/CodeGen/RISCV/div-by-constant.ll
index 2f13b18d0ac3..f20ff9b6d4f9 100644
--- a/llvm/test/CodeGen/RISCV/div-by-constant.ll
+++ b/llvm/test/CodeGen/RISCV/div-by-constant.ll
@@ -163,8 +163,8 @@ define i8 @udiv8_constant_add(i8 %a) nounwind {
; RV32IM-NEXT: mul a1, a1, a2
; RV32IM-NEXT: srli a1, a1, 8
; RV32IM-NEXT: sub a0, a0, a1
-; RV32IM-NEXT: andi a0, a0, 254
-; RV32IM-NEXT: srli a0, a0, 1
+; RV32IM-NEXT: slli a0, a0, 24
+; RV32IM-NEXT: srli a0, a0, 25
; RV32IM-NEXT: add a0, a0, a1
; RV32IM-NEXT: srli a0, a0, 2
; RV32IM-NEXT: ret
@@ -176,8 +176,8 @@ define i8 @udiv8_constant_add(i8 %a) nounwind {
; RV32IMZB-NEXT: sh2add a1, a2, a1
; RV32IMZB-NEXT: srli a1, a1, 8
; RV32IMZB-NEXT: sub a0, a0, a1
-; RV32IMZB-NEXT: andi a0, a0, 254
-; RV32IMZB-NEXT: srli a0, a0, 1
+; RV32IMZB-NEXT: slli a0, a0, 24
+; RV32IMZB-NEXT: srli a0, a0, 25
; RV32IMZB-NEXT: add a0, a0, a1
; RV32IMZB-NEXT: srli a0, a0, 2
; RV32IMZB-NEXT: ret
@@ -189,8 +189,8 @@ define i8 @udiv8_constant_add(i8 %a) nounwind {
; RV64IM-NEXT: mul a1, a1, a2
; RV64IM-NEXT: srli a1, a1, 8
; RV64IM-NEXT: subw a0, a0, a1
-; RV64IM-NEXT: andi a0, a0, 254
-; RV64IM-NEXT: srli a0, a0, 1
+; RV64IM-NEXT: slli a0, a0, 56
+; RV64IM-NEXT: srli a0, a0, 57
; RV64IM-NEXT: add a0, a0, a1
; RV64IM-NEXT: srli a0, a0, 2
; RV64IM-NEXT: ret
@@ -202,8 +202,8 @@ define i8 @udiv8_constant_add(i8 %a) nounwind {
; RV64IMZB-NEXT: sh2add a1, a2, a1
; RV64IMZB-NEXT: srli a1, a1, 8
; RV64IMZB-NEXT: subw a0, a0, a1
-; RV64IMZB-NEXT: andi a0, a0, 254
-; RV64IMZB-NEXT: srli a0, a0, 1
+; RV64IMZB-NEXT: slli a0, a0, 56
+; RV64IMZB-NEXT: srli a0, a0, 57
; RV64IMZB-NEXT: add a0, a0, a1
; RV64IMZB-NEXT: srli a0, a0, 2
; RV64IMZB-NEXT: ret
@@ -618,8 +618,6 @@ define i8 @sdiv8_constant_srai(i8 %a) nounwind {
ret i8 %1
}
-; FIXME: Can shorten the code after the mul by using slli+srai/srli like the
-; i16 version without Zbb.
define i8 @sdiv8_constant_add_srai(i8 %a) nounwind {
; RV32IM-LABEL: sdiv8_constant_add_srai:
; RV32IM: # %bb.0:
@@ -629,9 +627,8 @@ define i8 @sdiv8_constant_add_srai(i8 %a) nounwind {
; RV32IM-NEXT: mul a1, a1, a2
; RV32IM-NEXT: srli a1, a1, 8
; RV32IM-NEXT: add a0, a1, a0
-; RV32IM-NEXT: andi a1, a0, 128
-; RV32IM-NEXT: srli a1, a1, 7
; RV32IM-NEXT: slli a0, a0, 24
+; RV32IM-NEXT: srli a1, a0, 31
; RV32IM-NEXT: srai a0, a0, 26
; RV32IM-NEXT: add a0, a0, a1
; RV32IM-NEXT: ret
@@ -643,9 +640,8 @@ define i8 @sdiv8_constant_add_srai(i8 %a) nounwind {
; RV32IMZB-NEXT: mul a1, a1, a2
; RV32IMZB-NEXT: srli a1, a1, 8
; RV32IMZB-NEXT: add a0, a1, a0
-; RV32IMZB-NEXT: andi a1, a0, 128
-; RV32IMZB-NEXT: srli a1, a1, 7
; RV32IMZB-NEXT: slli a0, a0, 24
+; RV32IMZB-NEXT: srli a1, a0, 31
; RV32IMZB-NEXT: srai a0, a0, 26
; RV32IMZB-NEXT: add a0, a0, a1
; RV32IMZB-NEXT: ret
@@ -658,9 +654,8 @@ define i8 @sdiv8_constant_add_srai(i8 %a) nounwind {
; RV64IM-NEXT: mul a1, a1, a2
; RV64IM-NEXT: srli a1, a1, 8
; RV64IM-NEXT: addw a0, a1, a0
-; RV64IM-NEXT: andi a1, a0, 128
-; RV64IM-NEXT: srli a1, a1, 7
; RV64IM-NEXT: slli a0, a0, 56
+; RV64IM-NEXT: srli a1, a0, 63
; RV64IM-NEXT: srai a0, a0, 58
; RV64IM-NEXT: add a0, a0, a1
; RV64IM-NEXT: ret
@@ -672,9 +667,8 @@ define i8 @sdiv8_constant_add_srai(i8 %a) nounwind {
; RV64IMZB-NEXT: mul a1, a1, a2
; RV64IMZB-NEXT: srli a1, a1, 8
; RV64IMZB-NEXT: addw a0, a1, a0
-; RV64IMZB-NEXT: andi a1, a0, 128
-; RV64IMZB-NEXT: srli a1, a1, 7
; RV64IMZB-NEXT: slli a0, a0, 56
+; RV64IMZB-NEXT: srli a1, a0, 63
; RV64IMZB-NEXT: srai a0, a0, 58
; RV64IMZB-NEXT: add a0, a0, a1
; RV64IMZB-NEXT: ret
@@ -682,8 +676,6 @@ define i8 @sdiv8_constant_add_srai(i8 %a) nounwind {
ret i8 %1
}
-; FIXME: Can shorten the code after the mul by using slli+srai/srli like the
-; i16 version without Zbb.
define i8 @sdiv8_constant_sub_srai(i8 %a) nounwind {
; RV32IM-LABEL: sdiv8_constant_sub_srai:
; RV32IM: # %bb.0:
@@ -693,9 +685,8 @@ define i8 @sdiv8_constant_sub_srai(i8 %a) nounwind {
; RV32IM-NEXT: mul a1, a1, a2
; RV32IM-NEXT: srli a1, a1, 8
; RV32IM-NEXT: sub a0, a1, a0
-; RV32IM-NEXT: andi a1, a0, 128
-; RV32IM-NEXT: srli a1, a1, 7
; RV32IM-NEXT: slli a0, a0, 24
+; RV32IM-NEXT: srli a1, a0, 31
; RV32IM-NEXT: srai a0, a0, 26
; RV32IM-NEXT: add a0, a0, a1
; RV32IM-NEXT: ret
@@ -707,9 +698,8 @@ define i8 @sdiv8_constant_sub_srai(i8 %a) nounwind {
; RV32IMZB-NEXT: mul a1, a1, a2
; RV32IMZB-NEXT: srli a1, a1, 8
; RV32IMZB-NEXT: sub a0, a1, a0
-; RV32IMZB-NEXT: andi a1, a0, 128
-; RV32IMZB-NEXT: srli a1, a1, 7
; RV32IMZB-NEXT: slli a0, a0, 24
+; RV32IMZB-NEXT: srli a1, a0, 31
; RV32IMZB-NEXT: srai a0, a0, 26
; RV32IMZB-NEXT: add a0, a0, a1
; RV32IMZB-NEXT: ret
@@ -722,9 +712,8 @@ define i8 @sdiv8_constant_sub_srai(i8 %a) nounwind {
; RV64IM-NEXT: mul a1, a1, a2
; RV64IM-NEXT: srli a1, a1, 8
; RV64IM-NEXT: subw a0, a1, a0
-; RV64IM-NEXT: andi a1, a0, 128
-; RV64IM-NEXT: srli a1, a1, 7
; RV64IM-NEXT: slli a0, a0, 56
+; RV64IM-NEXT: srli a1, a0, 63
; RV64IM-NEXT: srai a0, a0, 58
; RV64IM-NEXT: add a0, a0, a1
; RV64IM-NEXT: ret
@@ -736,9 +725,8 @@ define i8 @sdiv8_constant_sub_srai(i8 %a) nounwind {
; RV64IMZB-NEXT: mul a1, a1, a2
; RV64IMZB-NEXT: srli a1, a1, 8
; RV64IMZB-NEXT: subw a0, a1, a0
-; RV64IMZB-NEXT: andi a1, a0, 128
-; RV64IMZB-NEXT: srli a1, a1, 7
; RV64IMZB-NEXT: slli a0, a0, 56
+; RV64IMZB-NEXT: srli a1, a0, 63
; RV64IMZB-NEXT: srai a0, a0, 58
; RV64IMZB-NEXT: add a0, a0, a1
; RV64IMZB-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/div.ll b/llvm/test/CodeGen/RISCV/div.ll
index af6855e94ff6..3d4db5bbeb69 100644
--- a/llvm/test/CodeGen/RISCV/div.ll
+++ b/llvm/test/CodeGen/RISCV/div.ll
@@ -363,26 +363,26 @@ define i8 @udiv8_constant(i8 %a) nounwind {
define i8 @udiv8_pow2(i8 %a) nounwind {
; RV32I-LABEL: udiv8_pow2:
; RV32I: # %bb.0:
-; RV32I-NEXT: andi a0, a0, 248
-; RV32I-NEXT: srli a0, a0, 3
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: srli a0, a0, 27
; RV32I-NEXT: ret
;
; RV32IM-LABEL: udiv8_pow2:
; RV32IM: # %bb.0:
-; RV32IM-NEXT: andi a0, a0, 248
-; RV32IM-NEXT: srli a0, a0, 3
+; RV32IM-NEXT: slli a0, a0, 24
+; RV32IM-NEXT: srli a0, a0, 27
; RV32IM-NEXT: ret
;
; RV64I-LABEL: udiv8_pow2:
; RV64I: # %bb.0:
-; RV64I-NEXT: andi a0, a0, 248
-; RV64I-NEXT: srli a0, a0, 3
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srli a0, a0, 59
; RV64I-NEXT: ret
;
; RV64IM-LABEL: udiv8_pow2:
; RV64IM: # %bb.0:
-; RV64IM-NEXT: andi a0, a0, 248
-; RV64IM-NEXT: srli a0, a0, 3
+; RV64IM-NEXT: slli a0, a0, 56
+; RV64IM-NEXT: srli a0, a0, 59
; RV64IM-NEXT: ret
%1 = udiv i8 %a, 8
ret i8 %1
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll b/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll
index a4fda68ba021..f9cd53bdf965 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll
@@ -473,20 +473,20 @@ define i64 @rori_i64_fshr(i64 %a) nounwind {
define i8 @srli_i8(i8 %a) nounwind {
; RV32I-LABEL: srli_i8:
; RV32I: # %bb.0:
-; RV32I-NEXT: andi a0, a0, 192
-; RV32I-NEXT: srli a0, a0, 6
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: srli a0, a0, 30
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: srli_i8:
; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: andi a0, a0, 192
-; RV32ZBB-NEXT: srli a0, a0, 6
+; RV32ZBB-NEXT: slli a0, a0, 24
+; RV32ZBB-NEXT: srli a0, a0, 30
; RV32ZBB-NEXT: ret
;
; RV32ZBP-LABEL: srli_i8:
; RV32ZBP: # %bb.0:
-; RV32ZBP-NEXT: andi a0, a0, 192
-; RV32ZBP-NEXT: srli a0, a0, 6
+; RV32ZBP-NEXT: slli a0, a0, 24
+; RV32ZBP-NEXT: srli a0, a0, 30
; RV32ZBP-NEXT: ret
%1 = lshr i8 %a, 6
ret i8 %1
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll b/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll
index 79a91979f2bd..97093ea0a052 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll
@@ -544,20 +544,20 @@ define i64 @rori_i64_fshr(i64 %a) nounwind {
define i8 @srli_i8(i8 %a) nounwind {
; RV64I-LABEL: srli_i8:
; RV64I: # %bb.0:
-; RV64I-NEXT: andi a0, a0, 192
-; RV64I-NEXT: srli a0, a0, 6
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srli a0, a0, 62
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: srli_i8:
; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: andi a0, a0, 192
-; RV64ZBB-NEXT: srli a0, a0, 6
+; RV64ZBB-NEXT: slli a0, a0, 56
+; RV64ZBB-NEXT: srli a0, a0, 62
; RV64ZBB-NEXT: ret
;
; RV64ZBP-LABEL: srli_i8:
; RV64ZBP: # %bb.0:
-; RV64ZBP-NEXT: andi a0, a0, 192
-; RV64ZBP-NEXT: srli a0, a0, 6
+; RV64ZBP-NEXT: slli a0, a0, 56
+; RV64ZBP-NEXT: srli a0, a0, 62
; RV64ZBP-NEXT: ret
%1 = lshr i8 %a, 6
ret i8 %1
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index 0d892e650814..952d4c794275 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -264,9 +264,8 @@ define i32 @ctlz_lshr_i32(i32 signext %a) {
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: srli a1, a0, 8
; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: lui a1, 524272
-; RV64I-NEXT: and a1, a0, a1
-; RV64I-NEXT: srli a1, a1, 16
+; RV64I-NEXT: slli a1, a0, 33
+; RV64I-NEXT: srli a1, a1, 49
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: not a0, a0
; RV64I-NEXT: srli a1, a0, 1
diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index 24a7c78d2666..810fee3464a4 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -380,14 +380,13 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
;
; RV64-LABEL: test_srem_vec:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -64
-; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
-; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
-; RV64-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
-; RV64-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
-; RV64-NEXT: sd s3, 24(sp) # 8-byte Folded Spill
-; RV64-NEXT: sd s4, 16(sp) # 8-byte Folded Spill
-; RV64-NEXT: sd s5, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: addi sp, sp, -48
+; RV64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s4, 0(sp) # 8-byte Folded Spill
; RV64-NEXT: mv s0, a0
; RV64-NEXT: lb a0, 12(a0)
; RV64-NEXT: lwu a1, 8(s0)
@@ -407,7 +406,6 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
; RV64-NEXT: slli a1, a1, 31
; RV64-NEXT: srai s2, a1, 31
; RV64-NEXT: li a1, 7
-; RV64-NEXT: li s5, 7
; RV64-NEXT: call __moddi3@plt
; RV64-NEXT: mv s3, a0
; RV64-NEXT: li a1, -5
@@ -432,9 +430,8 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
; RV64-NEXT: neg a0, a0
; RV64-NEXT: neg a2, a2
; RV64-NEXT: neg a3, a1
-; RV64-NEXT: slli a4, s5, 32
-; RV64-NEXT: and a3, a3, a4
-; RV64-NEXT: srli a3, a3, 32
+; RV64-NEXT: slli a3, a3, 29
+; RV64-NEXT: srli a3, a3, 61
; RV64-NEXT: sb a3, 12(s0)
; RV64-NEXT: slliw a1, a1, 2
; RV64-NEXT: srli a3, s4, 31
@@ -446,14 +443,13 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
; RV64-NEXT: slli a1, a2, 33
; RV64-NEXT: or a0, a0, a1
; RV64-NEXT: sd a0, 0(s0)
-; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
-; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
-; RV64-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
-; RV64-NEXT: ld s2, 32(sp) # 8-byte Folded Reload
-; RV64-NEXT: ld s3, 24(sp) # 8-byte Folded Reload
-; RV64-NEXT: ld s4, 16(sp) # 8-byte Folded Reload
-; RV64-NEXT: ld s5, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 64
+; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s4, 0(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 48
; RV64-NEXT: ret
;
; RV32M-LABEL: test_srem_vec:
@@ -592,10 +588,8 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
; RV64M-NEXT: neg a1, a1
; RV64M-NEXT: neg a4, a2
; RV64M-NEXT: neg a3, a3
-; RV64M-NEXT: li a5, 7
-; RV64M-NEXT: slli a5, a5, 32
-; RV64M-NEXT: and a4, a4, a5
-; RV64M-NEXT: srli a4, a4, 32
+; RV64M-NEXT: slli a4, a4, 29
+; RV64M-NEXT: srli a4, a4, 61
; RV64M-NEXT: sb a4, 12(a0)
; RV64M-NEXT: slliw a2, a2, 2
; RV64M-NEXT: srli a4, a6, 31
diff --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
index 8c3870ee4070..a7c2cdf12248 100644
--- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
@@ -94,12 +94,11 @@ define i1 @test_urem_even(i27 %X) nounwind {
; RV32-NEXT: addi a1, a1, -585
; RV32-NEXT: call __mulsi3@plt
; RV32-NEXT: slli a1, a0, 26
-; RV32-NEXT: lui a2, 32768
-; RV32-NEXT: addi a3, a2, -2
-; RV32-NEXT: and a0, a0, a3
-; RV32-NEXT: srli a0, a0, 1
+; RV32-NEXT: slli a0, a0, 5
+; RV32-NEXT: srli a0, a0, 6
; RV32-NEXT: or a0, a0, a1
-; RV32-NEXT: addi a1, a2, -1
+; RV32-NEXT: lui a1, 32768
+; RV32-NEXT: addi a1, a1, -1
; RV32-NEXT: and a0, a0, a1
; RV32-NEXT: lui a1, 2341
; RV32-NEXT: addi a1, a1, -1755
@@ -116,12 +115,11 @@ define i1 @test_urem_even(i27 %X) nounwind {
; RV64-NEXT: addiw a1, a1, -585
; RV64-NEXT: call __muldi3@plt
; RV64-NEXT: slli a1, a0, 26
-; RV64-NEXT: lui a2, 32768
-; RV64-NEXT: addiw a3, a2, -2
-; RV64-NEXT: and a0, a0, a3
-; RV64-NEXT: srli a0, a0, 1
+; RV64-NEXT: slli a0, a0, 37
+; RV64-NEXT: srli a0, a0, 38
; RV64-NEXT: or a0, a0, a1
-; RV64-NEXT: addiw a1, a2, -1
+; RV64-NEXT: lui a1, 32768
+; RV64-NEXT: addiw a1, a1, -1
; RV64-NEXT: and a0, a0, a1
; RV64-NEXT: lui a1, 2341
; RV64-NEXT: addiw a1, a1, -1755
@@ -136,12 +134,11 @@ define i1 @test_urem_even(i27 %X) nounwind {
; RV32M-NEXT: addi a1, a1, -585
; RV32M-NEXT: mul a0, a0, a1
; RV32M-NEXT: slli a1, a0, 26
-; RV32M-NEXT: lui a2, 32768
-; RV32M-NEXT: addi a3, a2, -2
-; RV32M-NEXT: and a0, a0, a3
-; RV32M-NEXT: srli a0, a0, 1
+; RV32M-NEXT: slli a0, a0, 5
+; RV32M-NEXT: srli a0, a0, 6
; RV32M-NEXT: or a0, a0, a1
-; RV32M-NEXT: addi a1, a2, -1
+; RV32M-NEXT: lui a1, 32768
+; RV32M-NEXT: addi a1, a1, -1
; RV32M-NEXT: and a0, a0, a1
; RV32M-NEXT: lui a1, 2341
; RV32M-NEXT: addi a1, a1, -1755
@@ -154,12 +151,11 @@ define i1 @test_urem_even(i27 %X) nounwind {
; RV64M-NEXT: addiw a1, a1, -585
; RV64M-NEXT: mul a0, a0, a1
; RV64M-NEXT: slli a1, a0, 26
-; RV64M-NEXT: lui a2, 32768
-; RV64M-NEXT: addiw a3, a2, -2
-; RV64M-NEXT: and a0, a0, a3
-; RV64M-NEXT: srli a0, a0, 1
+; RV64M-NEXT: slli a0, a0, 37
+; RV64M-NEXT: srli a0, a0, 38
; RV64M-NEXT: or a0, a0, a1
-; RV64M-NEXT: addiw a1, a2, -1
+; RV64M-NEXT: lui a1, 32768
+; RV64M-NEXT: addiw a1, a1, -1
; RV64M-NEXT: and a0, a0, a1
; RV64M-NEXT: lui a1, 2341
; RV64M-NEXT: addiw a1, a1, -1755
@@ -172,12 +168,11 @@ define i1 @test_urem_even(i27 %X) nounwind {
; RV32MV-NEXT: addi a1, a1, -585
; RV32MV-NEXT: mul a0, a0, a1
; RV32MV-NEXT: slli a1, a0, 26
-; RV32MV-NEXT: lui a2, 32768
-; RV32MV-NEXT: addi a3, a2, -2
-; RV32MV-NEXT: and a0, a0, a3
-; RV32MV-NEXT: srli a0, a0, 1
+; RV32MV-NEXT: slli a0, a0, 5
+; RV32MV-NEXT: srli a0, a0, 6
; RV32MV-NEXT: or a0, a0, a1
-; RV32MV-NEXT: addi a1, a2, -1
+; RV32MV-NEXT: lui a1, 32768
+; RV32MV-NEXT: addi a1, a1, -1
; RV32MV-NEXT: and a0, a0, a1
; RV32MV-NEXT: lui a1, 2341
; RV32MV-NEXT: addi a1, a1, -1755
@@ -190,12 +185,11 @@ define i1 @test_urem_even(i27 %X) nounwind {
; RV64MV-NEXT: addiw a1, a1, -585
; RV64MV-NEXT: mul a0, a0, a1
; RV64MV-NEXT: slli a1, a0, 26
-; RV64MV-NEXT: lui a2, 32768
-; RV64MV-NEXT: addiw a3, a2, -2
-; RV64MV-NEXT: and a0, a0, a3
-; RV64MV-NEXT: srli a0, a0, 1
+; RV64MV-NEXT: slli a0, a0, 37
+; RV64MV-NEXT: srli a0, a0, 38
; RV64MV-NEXT: or a0, a0, a1
-; RV64MV-NEXT: addiw a1, a2, -1
+; RV64MV-NEXT: lui a1, 32768
+; RV64MV-NEXT: addiw a1, a1, -1
; RV64MV-NEXT: and a0, a0, a1
; RV64MV-NEXT: lui a1, 2341
; RV64MV-NEXT: addiw a1, a1, -1755
@@ -358,8 +352,8 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind {
; RV32-NEXT: li a1, 683
; RV32-NEXT: call __mulsi3@plt
; RV32-NEXT: slli a1, a0, 10
-; RV32-NEXT: andi a0, a0, 2046
-; RV32-NEXT: srli a0, a0, 1
+; RV32-NEXT: slli a0, a0, 21
+; RV32-NEXT: srli a0, a0, 22
; RV32-NEXT: or a0, a0, a1
; RV32-NEXT: andi a0, a0, 2047
; RV32-NEXT: li a1, 341
@@ -418,8 +412,8 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind {
; RV64-NEXT: li a1, 683
; RV64-NEXT: call __muldi3@plt
; RV64-NEXT: slli a1, a0, 10
-; RV64-NEXT: andi a0, a0, 2046
-; RV64-NEXT: srli a0, a0, 1
+; RV64-NEXT: slli a0, a0, 53
+; RV64-NEXT: srli a0, a0, 54
; RV64-NEXT: or a0, a0, a1
; RV64-NEXT: andi a0, a0, 2047
; RV64-NEXT: li a1, 341
@@ -447,10 +441,8 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind {
; RV64-NEXT: slli a1, s1, 22
; RV64-NEXT: sub a0, a0, a1
; RV64-NEXT: sw a0, 0(s0)
-; RV64-NEXT: li a1, -1
-; RV64-NEXT: srli a1, a1, 31
-; RV64-NEXT: and a0, a0, a1
-; RV64-NEXT: srli a0, a0, 32
+; RV64-NEXT: slli a0, a0, 31
+; RV64-NEXT: srli a0, a0, 63
; RV64-NEXT: sb a0, 4(s0)
; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
@@ -472,8 +464,8 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind {
; RV32M-NEXT: li a4, 683
; RV32M-NEXT: mul a2, a2, a4
; RV32M-NEXT: slli a4, a2, 10
-; RV32M-NEXT: andi a2, a2, 2046
-; RV32M-NEXT: srli a2, a2, 1
+; RV32M-NEXT: slli a2, a2, 21
+; RV32M-NEXT: srli a2, a2, 22
; RV32M-NEXT: or a2, a2, a4
; RV32M-NEXT: andi a2, a2, 2047
; RV32M-NEXT: li a4, 341
@@ -517,8 +509,8 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind {
; RV64M-NEXT: li a4, 683
; RV64M-NEXT: mul a1, a1, a4
; RV64M-NEXT: slli a4, a1, 10
-; RV64M-NEXT: andi a1, a1, 2046
-; RV64M-NEXT: srli a1, a1, 1
+; RV64M-NEXT: slli a1, a1, 53
+; RV64M-NEXT: srli a1, a1, 54
; RV64M-NEXT: or a1, a1, a4
; RV64M-NEXT: andi a1, a1, 2047
; RV64M-NEXT: li a4, 341
@@ -544,10 +536,8 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind {
; RV64M-NEXT: slli a2, a3, 22
; RV64M-NEXT: sub a1, a1, a2
; RV64M-NEXT: sw a1, 0(a0)
-; RV64M-NEXT: li a2, -1
-; RV64M-NEXT: srli a2, a2, 31
-; RV64M-NEXT: and a1, a1, a2
-; RV64M-NEXT: srli a1, a1, 32
+; RV64M-NEXT: slli a1, a1, 31
+; RV64M-NEXT: srli a1, a1, 63
; RV64M-NEXT: sb a1, 4(a0)
; RV64M-NEXT: ret
;
@@ -676,10 +666,8 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind {
; RV64MV-NEXT: slli a2, a2, 22
; RV64MV-NEXT: or a1, a1, a2
; RV64MV-NEXT: sw a1, 0(a0)
-; RV64MV-NEXT: li a2, -1
-; RV64MV-NEXT: srli a2, a2, 31
-; RV64MV-NEXT: and a1, a1, a2
-; RV64MV-NEXT: srli a1, a1, 32
+; RV64MV-NEXT: slli a1, a1, 31
+; RV64MV-NEXT: srli a1, a1, 63
; RV64MV-NEXT: sb a1, 4(a0)
; RV64MV-NEXT: addi sp, sp, 16
; RV64MV-NEXT: ret