summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <tstellar@redhat.com>2018-05-18 23:24:43 +0000
committerTom Stellard <tstellar@redhat.com>2018-05-18 23:24:43 +0000
commita4be93f6ecfd4ad762ed799a59362b5e1e683707 (patch)
treea4564bc57b6cb6ccb00b1a5bc825f1f4df0e68e5
parent58d8ab305be7a5ff165e8c1b34f324c5e7ab7dfd (diff)
downloadllvm-a4be93f6ecfd4ad762ed799a59362b5e1e683707.tar.gz
Merging r330037:
------------------------------------------------------------------------ r330037 | sdardis | 2018-04-13 09:09:07 -0700 (Fri, 13 Apr 2018) | 21 lines [mips] Materialize constants for multiplication Previously, the MIPS backend would alwyas break down constant multiplications into a series of shifts, adds, and subs. This patch changes that so the cost of doing so is estimated. The cost is estimated against worst case constant materialization and retrieving the results from the HI/LO registers. For cases where the value type of the multiplication is not legal, the cost of legalization is estimated and is accounted for before performing the optimization of breaking down the constant This resolves PR36884. Thanks to npl for reporting the issue! Reviewers: abeserminji, smaksimovic Differential Revision: https://reviews.llvm.org/D45316 ------------------------------------------------------------------------ llvm-svn: 332782
-rw-r--r--llvm/lib/Target/Mips/MipsSEISelLowering.cpp79
-rw-r--r--llvm/test/CodeGen/Mips/const-mult.ll287
2 files changed, 140 insertions, 226 deletions
diff --git a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
index f7d7e2af85e4..eee5b23117f6 100644
--- a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -701,6 +701,77 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT,
+ SelectionDAG &DAG,
+ const MipsSubtarget &Subtarget) {
+ // Estimate the number of operations the below transform will turn a
+ // constant multiply into. The number is approximately how many powers
+ // of two summed together that the constant can be broken down into.
+
+ SmallVector<APInt, 16> WorkStack(1, C);
+ unsigned Steps = 0;
+ unsigned BitWidth = C.getBitWidth();
+
+ while (!WorkStack.empty()) {
+ APInt Val = WorkStack.pop_back_val();
+
+ if (Val == 0 || Val == 1)
+ continue;
+
+ if (Val.isPowerOf2()) {
+ ++Steps;
+ continue;
+ }
+
+ APInt Floor = APInt(BitWidth, 1) << Val.logBase2();
+ APInt Ceil = Val.isNegative() ? APInt(BitWidth, 0)
+ : APInt(BitWidth, 1) << C.ceilLogBase2();
+
+ if ((Val - Floor).ule(Ceil - Val)) {
+ WorkStack.push_back(Floor);
+ WorkStack.push_back(Val - Floor);
+ ++Steps;
+ continue;
+ }
+
+ WorkStack.push_back(Ceil);
+ WorkStack.push_back(Ceil - Val);
+ ++Steps;
+
+ // If we have taken more than 12[1] / 8[2] steps to attempt the
+ // optimization for a native sized value, it is more than likely that this
+ // optimization will make things worse.
+ //
+ // [1] MIPS64 requires 6 instructions at most to materialize any constant,
+ // multiplication requires at least 4 cycles, but another cycle (or two)
+ // to retrieve the result from the HI/LO registers.
+ //
+ // [2] For MIPS32, more than 8 steps is expensive as the constant could be
+ // materialized in 2 instructions, multiplication requires at least 4
+ // cycles, but another cycle (or two) to retrieve the result from the
+ // HI/LO registers.
+
+ if (Steps > 12 && (Subtarget.isABI_N32() || Subtarget.isABI_N64()))
+ return false;
+
+ if (Steps > 8 && Subtarget.isABI_O32())
+ return false;
+ }
+
+ // If the value being multiplied is not supported natively, we have to pay
+ // an additional legalization cost, conservatively assume an increase in the
+ // cost of 3 instructions per step. This values for this heuristic were
+ // determined experimentally.
+ unsigned RegisterSize = DAG.getTargetLoweringInfo()
+ .getRegisterType(*DAG.getContext(), VT)
+ .getSizeInBits();
+ Steps *= (VT.getSizeInBits() != RegisterSize) * 3;
+ if (Steps > 27)
+ return false;
+
+ return true;
+}
+
static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT,
EVT ShiftTy, SelectionDAG &DAG) {
// Return 0.
@@ -739,11 +810,13 @@ static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT,
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,
const TargetLowering::DAGCombinerInfo &DCI,
- const MipsSETargetLowering *TL) {
+ const MipsSETargetLowering *TL,
+ const MipsSubtarget &Subtarget) {
EVT VT = N->getValueType(0);
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
- if (!VT.isVector())
+ if (!VT.isVector() && shouldTransformMulToShiftsAddsSubs(
+ C->getAPIntValue(), VT, DAG, Subtarget))
return genConstMult(N->getOperand(0), C->getAPIntValue(), SDLoc(N), VT,
TL->getScalarShiftAmountTy(DAG.getDataLayout(), VT),
DAG);
@@ -983,7 +1056,7 @@ MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
Val = performORCombine(N, DAG, DCI, Subtarget);
break;
case ISD::MUL:
- return performMULCombine(N, DAG, DCI, this);
+ return performMULCombine(N, DAG, DCI, this, Subtarget);
case ISD::SHL:
Val = performSHLCombine(N, DAG, DCI, Subtarget);
break;
diff --git a/llvm/test/CodeGen/Mips/const-mult.ll b/llvm/test/CodeGen/Mips/const-mult.ll
index 0db52cd67b20..dc4f2f9c862b 100644
--- a/llvm/test/CodeGen/Mips/const-mult.ll
+++ b/llvm/test/CodeGen/Mips/const-mult.ll
@@ -257,77 +257,29 @@ define i32 @mul42949673_32(i32 %a) {
define i64 @mul42949673_64(i64 %a) {
; MIPS32-LABEL: mul42949673_64:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: addiu $sp, $sp, -8
-; MIPS32-NEXT: .cfi_def_cfa_offset 8
-; MIPS32-NEXT: sw $16, 4($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: .cfi_offset 16, -4
-; MIPS32-NEXT: sll $1, $4, 3
-; MIPS32-NEXT: addu $2, $1, $4
-; MIPS32-NEXT: sltu $1, $2, $1
-; MIPS32-NEXT: srl $3, $4, 29
-; MIPS32-NEXT: sll $6, $5, 3
-; MIPS32-NEXT: or $3, $6, $3
+; MIPS32-NEXT: lui $1, 655
+; MIPS32-NEXT: ori $1, $1, 23593
+; MIPS32-NEXT: multu $4, $1
+; MIPS32-NEXT: mflo $2
+; MIPS32-NEXT: mfhi $1
+; MIPS32-NEXT: sll $3, $5, 3
; MIPS32-NEXT: addu $3, $3, $5
-; MIPS32-NEXT: addu $1, $3, $1
-; MIPS32-NEXT: srl $3, $4, 27
-; MIPS32-NEXT: sll $6, $5, 5
-; MIPS32-NEXT: or $3, $6, $3
-; MIPS32-NEXT: addu $1, $3, $1
-; MIPS32-NEXT: sll $3, $4, 5
-; MIPS32-NEXT: addu $2, $3, $2
-; MIPS32-NEXT: sll $6, $4, 10
-; MIPS32-NEXT: subu $7, $6, $2
-; MIPS32-NEXT: sltu $3, $2, $3
-; MIPS32-NEXT: sll $8, $4, 13
-; MIPS32-NEXT: addu $1, $1, $3
-; MIPS32-NEXT: addu $3, $8, $7
-; MIPS32-NEXT: sll $7, $4, 15
-; MIPS32-NEXT: addu $9, $7, $3
-; MIPS32-NEXT: srl $10, $4, 22
-; MIPS32-NEXT: sll $11, $5, 10
-; MIPS32-NEXT: or $10, $11, $10
-; MIPS32-NEXT: sll $11, $4, 20
-; MIPS32-NEXT: subu $1, $10, $1
-; MIPS32-NEXT: subu $10, $11, $9
-; MIPS32-NEXT: sltu $2, $6, $2
-; MIPS32-NEXT: srl $6, $4, 17
-; MIPS32-NEXT: sll $12, $5, 15
-; MIPS32-NEXT: srl $13, $4, 12
-; MIPS32-NEXT: sll $14, $5, 20
-; MIPS32-NEXT: srl $15, $4, 9
-; MIPS32-NEXT: sll $24, $5, 23
-; MIPS32-NEXT: sll $25, $4, 23
-; MIPS32-NEXT: srl $gp, $4, 7
-; MIPS32-NEXT: sll $16, $5, 25
-; MIPS32-NEXT: or $gp, $16, $gp
-; MIPS32-NEXT: sll $16, $4, 25
-; MIPS32-NEXT: subu $1, $1, $2
-; MIPS32-NEXT: addu $10, $25, $10
-; MIPS32-NEXT: or $2, $24, $15
-; MIPS32-NEXT: or $13, $14, $13
-; MIPS32-NEXT: or $6, $12, $6
-; MIPS32-NEXT: srl $4, $4, 19
-; MIPS32-NEXT: sll $5, $5, 13
-; MIPS32-NEXT: or $4, $5, $4
-; MIPS32-NEXT: addu $1, $4, $1
-; MIPS32-NEXT: sltu $3, $3, $8
-; MIPS32-NEXT: addu $1, $1, $3
-; MIPS32-NEXT: addu $1, $6, $1
-; MIPS32-NEXT: sltu $3, $9, $7
-; MIPS32-NEXT: addu $1, $1, $3
-; MIPS32-NEXT: subu $1, $13, $1
-; MIPS32-NEXT: sltu $3, $11, $9
-; MIPS32-NEXT: subu $1, $1, $3
-; MIPS32-NEXT: addu $1, $2, $1
-; MIPS32-NEXT: addu $2, $16, $10
-; MIPS32-NEXT: sltu $3, $10, $25
-; MIPS32-NEXT: addu $1, $1, $3
-; MIPS32-NEXT: sltu $3, $2, $16
-; MIPS32-NEXT: addu $1, $gp, $1
-; MIPS32-NEXT: addu $3, $1, $3
-; MIPS32-NEXT: lw $16, 4($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: sll $4, $5, 5
+; MIPS32-NEXT: addu $3, $4, $3
+; MIPS32-NEXT: sll $4, $5, 10
+; MIPS32-NEXT: subu $3, $4, $3
+; MIPS32-NEXT: sll $4, $5, 13
+; MIPS32-NEXT: addu $3, $4, $3
+; MIPS32-NEXT: sll $4, $5, 15
+; MIPS32-NEXT: addu $3, $4, $3
+; MIPS32-NEXT: sll $4, $5, 20
+; MIPS32-NEXT: subu $3, $4, $3
+; MIPS32-NEXT: sll $4, $5, 25
+; MIPS32-NEXT: sll $5, $5, 23
+; MIPS32-NEXT: addu $3, $5, $3
+; MIPS32-NEXT: addu $3, $4, $3
; MIPS32-NEXT: jr $ra
-; MIPS32-NEXT: addiu $sp, $sp, 8
+; MIPS32-NEXT: addu $3, $1, $3
;
; MIPS64-LABEL: mul42949673_64:
; MIPS64: # %bb.0: # %entry
@@ -412,115 +364,34 @@ entry:
define i64 @mul22224078_64(i64 %a) {
; MIPS32-LABEL: mul22224078_64:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: addiu $sp, $sp, -32
-; MIPS32-NEXT: .cfi_def_cfa_offset 32
-; MIPS32-NEXT: sw $22, 28($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $21, 24($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $20, 20($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $19, 16($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $18, 12($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $17, 8($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: sw $16, 4($sp) # 4-byte Folded Spill
-; MIPS32-NEXT: .cfi_offset 22, -4
-; MIPS32-NEXT: .cfi_offset 21, -8
-; MIPS32-NEXT: .cfi_offset 20, -12
-; MIPS32-NEXT: .cfi_offset 19, -16
-; MIPS32-NEXT: .cfi_offset 18, -20
-; MIPS32-NEXT: .cfi_offset 17, -24
-; MIPS32-NEXT: .cfi_offset 16, -28
-; MIPS32-NEXT: srl $1, $4, 31
-; MIPS32-NEXT: sll $2, $5, 1
-; MIPS32-NEXT: or $1, $2, $1
-; MIPS32-NEXT: srl $2, $4, 28
-; MIPS32-NEXT: sll $3, $5, 4
-; MIPS32-NEXT: or $2, $3, $2
-; MIPS32-NEXT: subu $1, $2, $1
-; MIPS32-NEXT: sll $2, $4, 1
-; MIPS32-NEXT: sll $3, $4, 4
-; MIPS32-NEXT: sltu $6, $3, $2
-; MIPS32-NEXT: subu $1, $1, $6
-; MIPS32-NEXT: subu $2, $3, $2
-; MIPS32-NEXT: sll $3, $4, 6
-; MIPS32-NEXT: subu $6, $3, $2
-; MIPS32-NEXT: srl $7, $4, 26
-; MIPS32-NEXT: sll $8, $5, 6
-; MIPS32-NEXT: or $7, $8, $7
-; MIPS32-NEXT: sll $8, $4, 8
-; MIPS32-NEXT: subu $1, $7, $1
-; MIPS32-NEXT: subu $7, $8, $6
-; MIPS32-NEXT: sll $9, $4, 10
-; MIPS32-NEXT: subu $10, $9, $7
-; MIPS32-NEXT: sltu $2, $3, $2
-; MIPS32-NEXT: sll $3, $4, 13
-; MIPS32-NEXT: srl $11, $4, 24
-; MIPS32-NEXT: sll $12, $5, 8
-; MIPS32-NEXT: subu $1, $1, $2
-; MIPS32-NEXT: or $2, $12, $11
-; MIPS32-NEXT: subu $11, $3, $10
-; MIPS32-NEXT: srl $12, $4, 22
-; MIPS32-NEXT: sll $13, $5, 10
-; MIPS32-NEXT: sll $14, $4, 16
-; MIPS32-NEXT: subu $15, $14, $11
-; MIPS32-NEXT: srl $24, $4, 14
-; MIPS32-NEXT: srl $25, $4, 12
-; MIPS32-NEXT: srl $gp, $4, 10
-; MIPS32-NEXT: srl $16, $4, 8
-; MIPS32-NEXT: subu $1, $2, $1
-; MIPS32-NEXT: or $2, $13, $12
-; MIPS32-NEXT: sltu $6, $8, $6
-; MIPS32-NEXT: srl $8, $4, 19
-; MIPS32-NEXT: sll $12, $5, 13
-; MIPS32-NEXT: srl $13, $4, 16
-; MIPS32-NEXT: sll $17, $5, 16
-; MIPS32-NEXT: sll $18, $5, 18
-; MIPS32-NEXT: sll $19, $5, 20
-; MIPS32-NEXT: sll $20, $5, 22
-; MIPS32-NEXT: sll $5, $5, 24
-; MIPS32-NEXT: sll $21, $4, 18
-; MIPS32-NEXT: subu $22, $21, $15
-; MIPS32-NEXT: or $5, $5, $16
-; MIPS32-NEXT: or $gp, $20, $gp
-; MIPS32-NEXT: or $25, $19, $25
-; MIPS32-NEXT: or $24, $18, $24
-; MIPS32-NEXT: or $13, $17, $13
-; MIPS32-NEXT: or $8, $12, $8
-; MIPS32-NEXT: sll $12, $4, 24
-; MIPS32-NEXT: sll $16, $4, 22
-; MIPS32-NEXT: sll $4, $4, 20
-; MIPS32-NEXT: subu $1, $1, $6
-; MIPS32-NEXT: subu $1, $2, $1
-; MIPS32-NEXT: sltu $2, $9, $7
-; MIPS32-NEXT: subu $1, $1, $2
-; MIPS32-NEXT: subu $1, $8, $1
-; MIPS32-NEXT: sltu $2, $3, $10
-; MIPS32-NEXT: subu $1, $1, $2
-; MIPS32-NEXT: subu $1, $13, $1
-; MIPS32-NEXT: sltu $2, $14, $11
-; MIPS32-NEXT: subu $1, $1, $2
-; MIPS32-NEXT: subu $1, $24, $1
-; MIPS32-NEXT: addu $3, $4, $22
-; MIPS32-NEXT: sltu $2, $21, $15
-; MIPS32-NEXT: subu $1, $1, $2
-; MIPS32-NEXT: addu $6, $16, $3
-; MIPS32-NEXT: addu $1, $25, $1
-; MIPS32-NEXT: addu $2, $12, $6
-; MIPS32-NEXT: sltu $7, $2, $12
-; MIPS32-NEXT: sltu $6, $6, $16
-; MIPS32-NEXT: sltu $3, $3, $4
-; MIPS32-NEXT: addu $1, $1, $3
-; MIPS32-NEXT: addu $1, $gp, $1
-; MIPS32-NEXT: addu $1, $1, $6
-; MIPS32-NEXT: addu $1, $5, $1
-; MIPS32-NEXT: addu $3, $1, $7
-; MIPS32-NEXT: lw $16, 4($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $17, 8($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $18, 12($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $19, 16($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $20, 20($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $21, 24($sp) # 4-byte Folded Reload
-; MIPS32-NEXT: lw $22, 28($sp) # 4-byte Folded Reload
+; MIPS32-NEXT: lui $1, 339
+; MIPS32-NEXT: ori $1, $1, 7374
+; MIPS32-NEXT: multu $4, $1
+; MIPS32-NEXT: mflo $2
+; MIPS32-NEXT: mfhi $1
+; MIPS32-NEXT: sll $3, $5, 1
+; MIPS32-NEXT: sll $4, $5, 4
+; MIPS32-NEXT: subu $3, $4, $3
+; MIPS32-NEXT: sll $4, $5, 6
+; MIPS32-NEXT: subu $3, $4, $3
+; MIPS32-NEXT: sll $4, $5, 8
+; MIPS32-NEXT: subu $3, $4, $3
+; MIPS32-NEXT: sll $4, $5, 10
+; MIPS32-NEXT: subu $3, $4, $3
+; MIPS32-NEXT: sll $4, $5, 13
+; MIPS32-NEXT: subu $3, $4, $3
+; MIPS32-NEXT: sll $4, $5, 16
+; MIPS32-NEXT: subu $3, $4, $3
+; MIPS32-NEXT: sll $4, $5, 24
+; MIPS32-NEXT: sll $6, $5, 22
+; MIPS32-NEXT: sll $7, $5, 20
+; MIPS32-NEXT: sll $5, $5, 18
+; MIPS32-NEXT: subu $3, $5, $3
+; MIPS32-NEXT: addu $3, $7, $3
+; MIPS32-NEXT: addu $3, $6, $3
+; MIPS32-NEXT: addu $3, $4, $3
; MIPS32-NEXT: jr $ra
-; MIPS32-NEXT: addiu $sp, $sp, 32
+; MIPS32-NEXT: addu $3, $1, $3
;
; MIPS64-LABEL: mul22224078_64:
; MIPS64: # %bb.0: # %entry
@@ -592,53 +463,23 @@ entry:
define i64 @mul22245375_64(i64 %a) {
; MIPS32-LABEL: mul22245375_64:
; MIPS32: # %bb.0: # %entry
-; MIPS32-NEXT: sll $1, $4, 12
-; MIPS32-NEXT: addu $2, $1, $4
-; MIPS32-NEXT: sltu $1, $2, $1
-; MIPS32-NEXT: srl $3, $4, 20
-; MIPS32-NEXT: sll $6, $5, 12
-; MIPS32-NEXT: or $3, $6, $3
+; MIPS32-NEXT: lui $1, 339
+; MIPS32-NEXT: ori $1, $1, 28671
+; MIPS32-NEXT: multu $4, $1
+; MIPS32-NEXT: mflo $2
+; MIPS32-NEXT: mfhi $1
+; MIPS32-NEXT: sll $3, $5, 12
; MIPS32-NEXT: addu $3, $3, $5
-; MIPS32-NEXT: addu $1, $3, $1
-; MIPS32-NEXT: srl $3, $4, 17
-; MIPS32-NEXT: sll $6, $5, 15
-; MIPS32-NEXT: or $3, $6, $3
-; MIPS32-NEXT: addu $1, $3, $1
-; MIPS32-NEXT: sll $3, $4, 15
-; MIPS32-NEXT: addu $2, $3, $2
-; MIPS32-NEXT: sltu $3, $2, $3
-; MIPS32-NEXT: addu $1, $1, $3
-; MIPS32-NEXT: srl $3, $4, 14
-; MIPS32-NEXT: sll $6, $5, 18
-; MIPS32-NEXT: or $3, $6, $3
-; MIPS32-NEXT: srl $6, $4, 12
-; MIPS32-NEXT: sll $7, $5, 20
-; MIPS32-NEXT: subu $1, $3, $1
-; MIPS32-NEXT: or $3, $7, $6
-; MIPS32-NEXT: sll $6, $4, 18
-; MIPS32-NEXT: srl $7, $4, 10
-; MIPS32-NEXT: sll $8, $5, 22
-; MIPS32-NEXT: srl $9, $4, 8
-; MIPS32-NEXT: sll $5, $5, 24
-; MIPS32-NEXT: or $5, $5, $9
-; MIPS32-NEXT: or $7, $8, $7
-; MIPS32-NEXT: sltu $8, $6, $2
-; MIPS32-NEXT: sll $9, $4, 24
-; MIPS32-NEXT: sll $10, $4, 22
-; MIPS32-NEXT: sll $4, $4, 20
-; MIPS32-NEXT: subu $1, $1, $8
-; MIPS32-NEXT: addu $1, $3, $1
-; MIPS32-NEXT: subu $2, $6, $2
-; MIPS32-NEXT: addu $2, $4, $2
-; MIPS32-NEXT: sltu $3, $2, $4
-; MIPS32-NEXT: addu $1, $1, $3
-; MIPS32-NEXT: addu $1, $7, $1
-; MIPS32-NEXT: addu $2, $10, $2
-; MIPS32-NEXT: sltu $3, $2, $10
-; MIPS32-NEXT: addu $1, $1, $3
-; MIPS32-NEXT: addu $1, $5, $1
-; MIPS32-NEXT: addu $2, $9, $2
-; MIPS32-NEXT: sltu $3, $2, $9
+; MIPS32-NEXT: sll $4, $5, 15
+; MIPS32-NEXT: addu $3, $4, $3
+; MIPS32-NEXT: sll $4, $5, 18
+; MIPS32-NEXT: subu $3, $4, $3
+; MIPS32-NEXT: sll $4, $5, 20
+; MIPS32-NEXT: addu $3, $4, $3
+; MIPS32-NEXT: sll $4, $5, 22
+; MIPS32-NEXT: addu $3, $4, $3
+; MIPS32-NEXT: sll $4, $5, 24
+; MIPS32-NEXT: addu $3, $4, $3
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: addu $3, $1, $3
;