summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen.Lippmeier@anu.edu.au <unknown>2009-01-20 05:21:13 +0000
committerBen.Lippmeier@anu.edu.au <unknown>2009-01-20 05:21:13 +0000
commit0df5099f0c2088e2ccbb5b8974a7eae4d77eaa1c (patch)
treeb580fa732ad5f534a283725a5b6998eb07f718c2
parentc6e9a86f03efb4fdef5ed10fcb93b64439fdec60 (diff)
downloadhaskell-0df5099f0c2088e2ccbb5b8974a7eae4d77eaa1c.tar.gz
SPARC NCG: Add support for hardware divide
-rw-r--r--compiler/nativeGen/MachCodeGen.hs123
-rw-r--r--compiler/nativeGen/MachInstrs.hs16
-rw-r--r--compiler/nativeGen/PprMach.hs16
-rw-r--r--compiler/nativeGen/RegAllocInfo.hs6
4 files changed, 148 insertions, 13 deletions
diff --git a/compiler/nativeGen/MachCodeGen.hs b/compiler/nativeGen/MachCodeGen.hs
index 0c9aec67cd..93f31fb429 100644
--- a/compiler/nativeGen/MachCodeGen.hs
+++ b/compiler/nativeGen/MachCodeGen.hs
@@ -1519,14 +1519,13 @@ getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
MO_Sub W32 -> trivialCode W32 (SUB False False) x y
MO_S_MulMayOflo rep -> imulMayOflo rep x y
-{-
- -- ToDo: teach about V8+ SPARC div instructions
- MO_S_Quot W32 -> idiv FSLIT(".div") x y
- MO_S_Rem W32 -> idiv FSLIT(".rem") x y
- MO_U_Quot W32 -> idiv FSLIT(".udiv") x y
- MO_U_Rem W32 -> idiv FSLIT(".urem") x y
--}
+ MO_S_Quot W32 -> idiv True False x y
+ MO_U_Quot W32 -> idiv False False x y
+
+ MO_S_Rem W32 -> irem True x y
+ MO_U_Rem W32 -> irem False x y
+
MO_F_Eq w -> condFltReg EQQ x y
MO_F_Ne w -> condFltReg NE x y
@@ -1559,9 +1558,115 @@ getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
-}
other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
where
- --idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y])
+ -- idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y])
+
+
+ -- | Generate an integer division instruction.
+ idiv :: Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register
+
+ -- For unsigned division with a 32 bit numerator,
+ -- we can just clear the Y register.
+ idiv False cc x y = do
+ (a_reg, a_code) <- getSomeReg x
+ (b_reg, b_code) <- getSomeReg y
+
+ let code dst
+ = a_code
+ `appOL` b_code
+ `appOL` toOL
+ [ WRY g0 g0
+ , UDIV cc a_reg (RIReg b_reg) dst]
+
+ return (Any II32 code)
+
+
+ -- For _signed_ division with a 32 bit numerator,
+ -- we have to sign extend the numerator into the Y register.
+ idiv True cc x y = do
+ (a_reg, a_code) <- getSomeReg x
+ (b_reg, b_code) <- getSomeReg y
+
+ tmp <- getNewRegNat II32
+
+ let code dst
+ = a_code
+ `appOL` b_code
+ `appOL` toOL
+ [ SRA a_reg (RIImm (ImmInt 16)) tmp -- sign extend
+ , SRA tmp (RIImm (ImmInt 16)) tmp
+
+ , WRY tmp g0
+ , SDIV cc a_reg (RIReg b_reg) dst]
+
+ return (Any II32 code)
+
+
+ -- | Do an integer remainder.
+ --
+ -- NOTE: The SPARC v8 architecture manual says that integer division
+ -- instructions _may_ generate a remainder, depending on the implementation.
+ -- If so it is _recommended_ that the remainder is placed in the Y register.
+ --
+ -- The UltraSparc 2007 manual says Y is _undefined_ after division.
+ --
+ -- The SPARC T2 doesn't store the remainder, not sure about the others.
+ -- It's probably best not to worry about it, and just generate our own
+ -- remainders.
+ --
+ irem :: Bool -> CmmExpr -> CmmExpr -> NatM Register
+
+ -- For unsigned operands:
+ -- Division is between a 64 bit numerator and a 32 bit denominator,
+ -- so we still have to clear the Y register.
+ irem False x y = do
+ (a_reg, a_code) <- getSomeReg x
+ (b_reg, b_code) <- getSomeReg y
+
+ tmp_reg <- getNewRegNat II32
+
+ let code dst
+ = a_code
+ `appOL` b_code
+ `appOL` toOL
+ [ WRY g0 g0
+ , UDIV False a_reg (RIReg b_reg) tmp_reg
+ , UMUL False tmp_reg (RIReg b_reg) tmp_reg
+ , SUB False False a_reg (RIReg tmp_reg) dst]
+
+ return (Any II32 code)
+
+
+ -- For signed operands:
+ -- Make sure to sign extend into the Y register, or the remainder
+ -- will have the wrong sign when the numerator is negative.
+ --
+ -- TODO: When sign extending, GCC only shifts the a_reg right by 17 bits,
+ -- not the full 32. Not sure why this is, something to do with overflow?
+ -- If anyone cares enough about the speed of signed remainder they
+ -- can work it out themselves (then tell me). -- BL 2009/01/20
+
+ irem True x y = do
+ (a_reg, a_code) <- getSomeReg x
+ (b_reg, b_code) <- getSomeReg y
+
+ tmp1_reg <- getNewRegNat II32
+ tmp2_reg <- getNewRegNat II32
+
+ let code dst
+ = a_code
+ `appOL` b_code
+ `appOL` toOL
+ [ SRA a_reg (RIImm (ImmInt 16)) tmp1_reg -- sign extend
+ , SRA tmp1_reg (RIImm (ImmInt 16)) tmp1_reg -- sign extend
+ , WRY tmp1_reg g0
+
+ , SDIV False a_reg (RIReg b_reg) tmp2_reg
+ , SMUL False tmp2_reg (RIReg b_reg) tmp2_reg
+ , SUB False False a_reg (RIReg tmp2_reg) dst]
+
+ return (Any II32 code)
+
- --------------------
imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
imulMayOflo rep a b = do
(a_reg, a_code) <- getSomeReg a
diff --git a/compiler/nativeGen/MachInstrs.hs b/compiler/nativeGen/MachInstrs.hs
index 7b319afc27..e16dbf3b05 100644
--- a/compiler/nativeGen/MachInstrs.hs
+++ b/compiler/nativeGen/MachInstrs.hs
@@ -558,9 +558,23 @@ is_G_instr instr
-- Int Arithmetic.
| ADD Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
| SUB Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
+
| UMUL Bool Reg RI Reg -- cc?, src1, src2, dst
| SMUL Bool Reg RI Reg -- cc?, src1, src2, dst
- | RDY Reg -- move contents of Y register to reg
+
+
+ -- The SPARC divide instructions perform 64bit by 32bit division
+ -- The Y register is xored into the first operand.
+
+ -- On _some implementations_ the Y register is overwritten by
+ -- the remainder, so we have to make sure it is 0 each time.
+
+ -- dst <- ((Y `shiftL` 32) `or` src1) `div` src2
+ | UDIV Bool Reg RI Reg -- cc?, src1, src2, dst
+ | SDIV Bool Reg RI Reg -- cc?, src1, src2, dst
+
+ | RDY Reg -- move contents of Y register to reg
+ | WRY Reg Reg -- Y <- src1 `xor` src2
-- Simple bit-twiddling.
| AND Bool Reg RI Reg -- cc?, src1, src2, dst
diff --git a/compiler/nativeGen/PprMach.hs b/compiler/nativeGen/PprMach.hs
index eb880fca6a..199fd36498 100644
--- a/compiler/nativeGen/PprMach.hs
+++ b/compiler/nativeGen/PprMach.hs
@@ -2002,9 +2002,19 @@ pprInstr (SLL reg1 ri reg2) = pprRegRIReg (sLit "sll") False reg1 ri reg2
pprInstr (SRL reg1 ri reg2) = pprRegRIReg (sLit "srl") False reg1 ri reg2
pprInstr (SRA reg1 ri reg2) = pprRegRIReg (sLit "sra") False reg1 ri reg2
-pprInstr (RDY rd) = ptext (sLit "\trd\t%y,") <> pprReg rd
-pprInstr (SMUL b reg1 ri reg2) = pprRegRIReg (sLit "smul") b reg1 ri reg2
-pprInstr (UMUL b reg1 ri reg2) = pprRegRIReg (sLit "umul") b reg1 ri reg2
+pprInstr (RDY rd) = ptext (sLit "\trd\t%y,") <> pprReg rd
+pprInstr (WRY reg1 reg2)
+ = ptext (sLit "\twr\t")
+ <> pprReg reg1
+ <> char ','
+ <> pprReg reg2
+ <> char ','
+ <> ptext (sLit "%y")
+
+pprInstr (SMUL b reg1 ri reg2) = pprRegRIReg (sLit "smul") b reg1 ri reg2
+pprInstr (UMUL b reg1 ri reg2) = pprRegRIReg (sLit "umul") b reg1 ri reg2
+pprInstr (SDIV b reg1 ri reg2) = pprRegRIReg (sLit "sdiv") b reg1 ri reg2
+pprInstr (UDIV b reg1 ri reg2) = pprRegRIReg (sLit "udiv") b reg1 ri reg2
pprInstr (SETHI imm reg)
= hcat [
diff --git a/compiler/nativeGen/RegAllocInfo.hs b/compiler/nativeGen/RegAllocInfo.hs
index 1b8bdb6c9d..deb5f34e21 100644
--- a/compiler/nativeGen/RegAllocInfo.hs
+++ b/compiler/nativeGen/RegAllocInfo.hs
@@ -295,7 +295,10 @@ regUsage instr = case instr of
SUB x cc r1 ar r2 -> usage (r1 : regRI ar, [r2])
UMUL cc r1 ar r2 -> usage (r1 : regRI ar, [r2])
SMUL cc r1 ar r2 -> usage (r1 : regRI ar, [r2])
+ UDIV cc r1 ar r2 -> usage (r1 : regRI ar, [r2])
+ SDIV cc r1 ar r2 -> usage (r1 : regRI ar, [r2])
RDY rd -> usage ([], [rd])
+ WRY r1 r2 -> usage ([r1, r2], [])
AND b r1 ar r2 -> usage (r1 : regRI ar, [r2])
ANDN b r1 ar r2 -> usage (r1 : regRI ar, [r2])
OR b r1 ar r2 -> usage (r1 : regRI ar, [r2])
@@ -669,7 +672,10 @@ patchRegs instr env = case instr of
SUB x cc r1 ar r2 -> SUB x cc (env r1) (fixRI ar) (env r2)
UMUL cc r1 ar r2 -> UMUL cc (env r1) (fixRI ar) (env r2)
SMUL cc r1 ar r2 -> SMUL cc (env r1) (fixRI ar) (env r2)
+ UDIV cc r1 ar r2 -> UDIV cc (env r1) (fixRI ar) (env r2)
+ SDIV cc r1 ar r2 -> SDIV cc (env r1) (fixRI ar) (env r2)
RDY rd -> RDY (env rd)
+ WRY r1 r2 -> WRY (env r1) (env r2)
AND b r1 ar r2 -> AND b (env r1) (fixRI ar) (env r2)
ANDN b r1 ar r2 -> ANDN b (env r1) (fixRI ar) (env r2)
OR b r1 ar r2 -> OR b (env r1) (fixRI ar) (env r2)