summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
authorBen.Lippmeier@anu.edu.au <unknown>2009-02-15 05:51:58 +0000
committerBen.Lippmeier@anu.edu.au <unknown>2009-02-15 05:51:58 +0000
commitb04a210e26ca57242fd052f2aa91011a80b76299 (patch)
tree6f26993cc3ef37f4555087bd80da4195edcda4ed /compiler
parent77ed23d51b968505b3ad8541c075657ae94f0ea3 (diff)
downloadhaskell-b04a210e26ca57242fd052f2aa91011a80b76299.tar.gz
NCG: Split up the native code generator into arch specific modules
- nativeGen/Instruction defines a type class for a generic instruction set. Each of the instruction sets we have, X86, PPC and SPARC are instances of it. - The register alloctors use this type class when they need info about a certain register or instruction, such as regUsage, mkSpillInstr, mkJumpInstr, patchRegs.. - nativeGen/Platform defines some data types enumerating the architectures and operating systems supported by the native code generator. - DynFlags now keeps track of the current build platform, and the PositionIndependentCode module uses this to decide what to do instead of relying of #ifdefs. - It's not totally retargetable yet. Some info info about the build target is still hardwired, but I've tried to contain most of it to a single module, TargetRegs. - Moved the SPILL and RELOAD instructions into LiveInstr. - Reg and RegClass now have their own modules, and are shared across all architectures.
Diffstat (limited to 'compiler')
-rw-r--r--compiler/ghc.cabal.in24
-rw-r--r--compiler/main/DynFlags.hs3
-rw-r--r--compiler/nativeGen/Alpha/CodeGen.hs789
-rw-r--r--compiler/nativeGen/Alpha/Instr.hs8
-rw-r--r--compiler/nativeGen/ArchReg.hs14
-rw-r--r--compiler/nativeGen/AsmCodeGen.lhs139
-rw-r--r--compiler/nativeGen/Instrs.hs97
-rw-r--r--compiler/nativeGen/Instruction.hs159
-rw-r--r--compiler/nativeGen/MachCodeGen.hs5199
-rw-r--r--compiler/nativeGen/NCGMonad.hs119
-rw-r--r--compiler/nativeGen/PIC.hs (renamed from compiler/nativeGen/PositionIndependentCode.hs)700
-rw-r--r--compiler/nativeGen/PPC/CodeGen.hs1364
-rw-r--r--compiler/nativeGen/PPC/Cond.hs62
-rw-r--r--compiler/nativeGen/PPC/Instr.hs363
-rw-r--r--compiler/nativeGen/PPC/Ppr.hs128
-rw-r--r--compiler/nativeGen/PPC/RegInfo.hs315
-rw-r--r--compiler/nativeGen/PPC/Regs.hs102
-rw-r--r--compiler/nativeGen/Platform.hs92
-rw-r--r--compiler/nativeGen/PprMach.hs183
-rw-r--r--compiler/nativeGen/Reg.hs113
-rw-r--r--compiler/nativeGen/RegAlloc/Graph/Coalesce.hs23
-rw-r--r--compiler/nativeGen/RegAlloc/Graph/Main.hs56
-rw-r--r--compiler/nativeGen/RegAlloc/Graph/Spill.hs46
-rw-r--r--compiler/nativeGen/RegAlloc/Graph/SpillClean.hs95
-rw-r--r--compiler/nativeGen/RegAlloc/Graph/SpillCost.hs48
-rw-r--r--compiler/nativeGen/RegAlloc/Graph/Stats.hs149
-rw-r--r--compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs (renamed from compiler/nativeGen/Regs.hs)172
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/Base.hs2
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs6
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs47
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/Main.hs117
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/PPC/FreeRegs.hs4
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/SPARC/FreeRegs.hs4
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/StackMap.hs2
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/State.hs16
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/Stats.hs15
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs4
-rw-r--r--compiler/nativeGen/RegAlloc/Liveness.hs268
-rw-r--r--compiler/nativeGen/RegAllocInfo.hs94
-rw-r--r--compiler/nativeGen/RegClass.hs31
-rw-r--r--compiler/nativeGen/RegsBase.hs105
-rw-r--r--compiler/nativeGen/SPARC/CodeGen.hs1545
-rw-r--r--compiler/nativeGen/SPARC/Cond.hs53
-rw-r--r--compiler/nativeGen/SPARC/Instr.hs370
-rw-r--r--compiler/nativeGen/SPARC/Ppr.hs135
-rw-r--r--compiler/nativeGen/SPARC/RegInfo.hs413
-rw-r--r--compiler/nativeGen/SPARC/Regs.hs197
-rw-r--r--compiler/nativeGen/Size.hs103
-rw-r--r--compiler/nativeGen/TargetReg.hs101
-rw-r--r--compiler/nativeGen/X86/CodeGen.hs2313
-rw-r--r--compiler/nativeGen/X86/Cond.hs52
-rw-r--r--compiler/nativeGen/X86/Instr.hs492
-rw-r--r--compiler/nativeGen/X86/Ppr.hs165
-rw-r--r--compiler/nativeGen/X86/RegInfo.hs531
-rw-r--r--compiler/nativeGen/X86/Regs.hs117
55 files changed, 9751 insertions, 8113 deletions
diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in
index 2328eca637..b276943f60 100644
--- a/compiler/ghc.cabal.in
+++ b/compiler/ghc.cabal.in
@@ -454,31 +454,38 @@ Library
Exposed-Modules:
AsmCodeGen
- MachCodeGen
- Regs
- RegsBase
- Instrs
- RegAllocInfo
- PprMach
+ TargetReg
+ NCGMonad
+ Instruction
+ Size
+ Reg
+ RegClass
PprBase
+ PIC
+ Platform
Alpha.Regs
Alpha.RegInfo
Alpha.Instr
Alpha.Ppr
+ Alpha.CodeGen
X86.Regs
X86.RegInfo
X86.Instr
+ X86.Cond
X86.Ppr
+ X86.CodeGen
PPC.Regs
PPC.RegInfo
PPC.Instr
+ PPC.Cond
PPC.Ppr
+ PPC.CodeGen
SPARC.Regs
SPARC.RegInfo
SPARC.Instr
+ SPARC.Cond
SPARC.Ppr
- NCGMonad
- PositionIndependentCode
+ SPARC.CodeGen
RegAlloc.Liveness
RegAlloc.Graph.Main
RegAlloc.Graph.Stats
@@ -488,6 +495,7 @@ Library
RegAlloc.Graph.Spill
RegAlloc.Graph.SpillClean
RegAlloc.Graph.SpillCost
+ RegAlloc.Graph.TrivColorable
RegAlloc.Linear.Main
RegAlloc.Linear.JoinToTargets
RegAlloc.Linear.State
diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs
index 44bd124032..eb9a182997 100644
--- a/compiler/main/DynFlags.hs
+++ b/compiler/main/DynFlags.hs
@@ -64,6 +64,7 @@ module DynFlags (
#include "HsVersions.h"
+import Platform
import Module
import PackageConfig
import PrelNames ( mAIN, main_RDR_Unqual )
@@ -339,6 +340,7 @@ data DynFlags = DynFlags {
specConstrCount :: Maybe Int, -- ^ Max number of specialisations for any one function
liberateCaseThreshold :: Maybe Int, -- ^ Threshold for LiberateCase
+ targetPlatform :: Platform, -- ^ The platform we're compiling for. Used by the NCG.
stolen_x86_regs :: Int,
cmdlineHcIncludes :: [String], -- ^ @\-\#includes@
importPaths :: [FilePath],
@@ -584,6 +586,7 @@ defaultDynFlags =
specConstrThreshold = Just 200,
specConstrCount = Just 3,
liberateCaseThreshold = Just 200,
+ targetPlatform = defaultTargetPlatform,
stolen_x86_regs = 4,
cmdlineHcIncludes = [],
importPaths = ["."],
diff --git a/compiler/nativeGen/Alpha/CodeGen.hs b/compiler/nativeGen/Alpha/CodeGen.hs
new file mode 100644
index 0000000000..4ce774f14f
--- /dev/null
+++ b/compiler/nativeGen/Alpha/CodeGen.hs
@@ -0,0 +1,789 @@
+module Alpha.CodeGen ()
+
+where
+
+{-
+
+getRegister :: CmmExpr -> NatM Register
+
+#if !x86_64_TARGET_ARCH
+ -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
+ -- register, it can only be used for rip-relative addressing.
+getRegister (CmmReg (CmmGlobal PicBaseReg))
+ = do
+ reg <- getPicBaseNat wordSize
+ return (Fixed wordSize reg nilOL)
+#endif
+
+getRegister (CmmReg reg)
+ = return (Fixed (cmmTypeSize (cmmRegType reg))
+ (getRegisterReg reg) nilOL)
+
+getRegister tree@(CmmRegOff _ _)
+ = getRegister (mangleIndexTree tree)
+
+
+#if WORD_SIZE_IN_BITS==32
+ -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
+ -- TO_W_(x), TO_W_(x >> 32)
+
+getRegister (CmmMachOp (MO_UU_Conv W64 W32)
+ [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
+ ChildCode64 code rlo <- iselExpr64 x
+ return $ Fixed II32 (getHiVRegFromLo rlo) code
+
+getRegister (CmmMachOp (MO_SS_Conv W64 W32)
+ [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
+ ChildCode64 code rlo <- iselExpr64 x
+ return $ Fixed II32 (getHiVRegFromLo rlo) code
+
+getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
+ ChildCode64 code rlo <- iselExpr64 x
+ return $ Fixed II32 rlo code
+
+getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
+ ChildCode64 code rlo <- iselExpr64 x
+ return $ Fixed II32 rlo code
+
+#endif
+
+-- end of machine-"independent" bit; here we go on the rest...
+
+
+getRegister (StDouble d)
+ = getBlockIdNat `thenNat` \ lbl ->
+ getNewRegNat PtrRep `thenNat` \ tmp ->
+ let code dst = mkSeqInstrs [
+ LDATA RoDataSegment lbl [
+ DATA TF [ImmLab (rational d)]
+ ],
+ LDA tmp (AddrImm (ImmCLbl lbl)),
+ LD TF dst (AddrReg tmp)]
+ in
+ return (Any FF64 code)
+
+getRegister (StPrim primop [x]) -- unary PrimOps
+ = case primop of
+ IntNegOp -> trivialUCode (NEG Q False) x
+
+ NotOp -> trivialUCode NOT x
+
+ FloatNegOp -> trivialUFCode FloatRep (FNEG TF) x
+ DoubleNegOp -> trivialUFCode FF64 (FNEG TF) x
+
+ OrdOp -> coerceIntCode IntRep x
+ ChrOp -> chrCode x
+
+ Float2IntOp -> coerceFP2Int x
+ Int2FloatOp -> coerceInt2FP pr x
+ Double2IntOp -> coerceFP2Int x
+ Int2DoubleOp -> coerceInt2FP pr x
+
+ Double2FloatOp -> coerceFltCode x
+ Float2DoubleOp -> coerceFltCode x
+
+ other_op -> getRegister (StCall fn CCallConv FF64 [x])
+ where
+ fn = case other_op of
+ FloatExpOp -> fsLit "exp"
+ FloatLogOp -> fsLit "log"
+ FloatSqrtOp -> fsLit "sqrt"
+ FloatSinOp -> fsLit "sin"
+ FloatCosOp -> fsLit "cos"
+ FloatTanOp -> fsLit "tan"
+ FloatAsinOp -> fsLit "asin"
+ FloatAcosOp -> fsLit "acos"
+ FloatAtanOp -> fsLit "atan"
+ FloatSinhOp -> fsLit "sinh"
+ FloatCoshOp -> fsLit "cosh"
+ FloatTanhOp -> fsLit "tanh"
+ DoubleExpOp -> fsLit "exp"
+ DoubleLogOp -> fsLit "log"
+ DoubleSqrtOp -> fsLit "sqrt"
+ DoubleSinOp -> fsLit "sin"
+ DoubleCosOp -> fsLit "cos"
+ DoubleTanOp -> fsLit "tan"
+ DoubleAsinOp -> fsLit "asin"
+ DoubleAcosOp -> fsLit "acos"
+ DoubleAtanOp -> fsLit "atan"
+ DoubleSinhOp -> fsLit "sinh"
+ DoubleCoshOp -> fsLit "cosh"
+ DoubleTanhOp -> fsLit "tanh"
+ where
+ pr = panic "MachCode.getRegister: no primrep needed for Alpha"
+
+getRegister (StPrim primop [x, y]) -- dyadic PrimOps
+ = case primop of
+ CharGtOp -> trivialCode (CMP LTT) y x
+ CharGeOp -> trivialCode (CMP LE) y x
+ CharEqOp -> trivialCode (CMP EQQ) x y
+ CharNeOp -> int_NE_code x y
+ CharLtOp -> trivialCode (CMP LTT) x y
+ CharLeOp -> trivialCode (CMP LE) x y
+
+ IntGtOp -> trivialCode (CMP LTT) y x
+ IntGeOp -> trivialCode (CMP LE) y x
+ IntEqOp -> trivialCode (CMP EQQ) x y
+ IntNeOp -> int_NE_code x y
+ IntLtOp -> trivialCode (CMP LTT) x y
+ IntLeOp -> trivialCode (CMP LE) x y
+
+ WordGtOp -> trivialCode (CMP ULT) y x
+ WordGeOp -> trivialCode (CMP ULE) x y
+ WordEqOp -> trivialCode (CMP EQQ) x y
+ WordNeOp -> int_NE_code x y
+ WordLtOp -> trivialCode (CMP ULT) x y
+ WordLeOp -> trivialCode (CMP ULE) x y
+
+ AddrGtOp -> trivialCode (CMP ULT) y x
+ AddrGeOp -> trivialCode (CMP ULE) y x
+ AddrEqOp -> trivialCode (CMP EQQ) x y
+ AddrNeOp -> int_NE_code x y
+ AddrLtOp -> trivialCode (CMP ULT) x y
+ AddrLeOp -> trivialCode (CMP ULE) x y
+
+ FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
+ FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
+ FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
+ FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
+ FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
+ FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
+
+ DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
+ DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
+ DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
+ DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
+ DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
+ DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
+
+ IntAddOp -> trivialCode (ADD Q False) x y
+ IntSubOp -> trivialCode (SUB Q False) x y
+ IntMulOp -> trivialCode (MUL Q False) x y
+ IntQuotOp -> trivialCode (DIV Q False) x y
+ IntRemOp -> trivialCode (REM Q False) x y
+
+ WordAddOp -> trivialCode (ADD Q False) x y
+ WordSubOp -> trivialCode (SUB Q False) x y
+ WordMulOp -> trivialCode (MUL Q False) x y
+ WordQuotOp -> trivialCode (DIV Q True) x y
+ WordRemOp -> trivialCode (REM Q True) x y
+
+ FloatAddOp -> trivialFCode W32 (FADD TF) x y
+ FloatSubOp -> trivialFCode W32 (FSUB TF) x y
+ FloatMulOp -> trivialFCode W32 (FMUL TF) x y
+ FloatDivOp -> trivialFCode W32 (FDIV TF) x y
+
+ DoubleAddOp -> trivialFCode W64 (FADD TF) x y
+ DoubleSubOp -> trivialFCode W64 (FSUB TF) x y
+ DoubleMulOp -> trivialFCode W64 (FMUL TF) x y
+ DoubleDivOp -> trivialFCode W64 (FDIV TF) x y
+
+ AddrAddOp -> trivialCode (ADD Q False) x y
+ AddrSubOp -> trivialCode (SUB Q False) x y
+ AddrRemOp -> trivialCode (REM Q True) x y
+
+ AndOp -> trivialCode AND x y
+ OrOp -> trivialCode OR x y
+ XorOp -> trivialCode XOR x y
+ SllOp -> trivialCode SLL x y
+ SrlOp -> trivialCode SRL x y
+
+ ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
+ ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
+ ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
+
+ FloatPowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
+ DoublePowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
+ where
+ {- ------------------------------------------------------------
+ Some bizarre special code for getting condition codes into
+ registers. Integer non-equality is a test for equality
+ followed by an XOR with 1. (Integer comparisons always set
+ the result register to 0 or 1.) Floating point comparisons of
+ any kind leave the result in a floating point register, so we
+ need to wrangle an integer register out of things.
+ -}
+ int_NE_code :: StixTree -> StixTree -> NatM Register
+
+ int_NE_code x y
+ = trivialCode (CMP EQQ) x y `thenNat` \ register ->
+ getNewRegNat IntRep `thenNat` \ tmp ->
+ let
+ code = registerCode register tmp
+ src = registerName register tmp
+ code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
+ in
+ return (Any IntRep code__2)
+
+ {- ------------------------------------------------------------
+ Comments for int_NE_code also apply to cmpF_code
+ -}
+ cmpF_code
+ :: (Reg -> Reg -> Reg -> Instr)
+ -> Cond
+ -> StixTree -> StixTree
+ -> NatM Register
+
+ cmpF_code instr cond x y
+ = trivialFCode pr instr x y `thenNat` \ register ->
+ getNewRegNat FF64 `thenNat` \ tmp ->
+ getBlockIdNat `thenNat` \ lbl ->
+ let
+ code = registerCode register tmp
+ result = registerName register tmp
+
+ code__2 dst = code . mkSeqInstrs [
+ OR zeroh (RIImm (ImmInt 1)) dst,
+ BF cond result (ImmCLbl lbl),
+ OR zeroh (RIReg zeroh) dst,
+ NEWBLOCK lbl]
+ in
+ return (Any IntRep code__2)
+ where
+ pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
+ ------------------------------------------------------------
+
+getRegister (CmmLoad pk mem)
+ = getAmode mem `thenNat` \ amode ->
+ let
+ code = amodeCode amode
+ src = amodeAddr amode
+ size = primRepToSize pk
+ code__2 dst = code . mkSeqInstr (LD size dst src)
+ in
+ return (Any pk code__2)
+
+getRegister (StInt i)
+ | fits8Bits i
+ = let
+ code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
+ in
+ return (Any IntRep code)
+ | otherwise
+ = let
+ code dst = mkSeqInstr (LDI Q dst src)
+ in
+ return (Any IntRep code)
+ where
+ src = ImmInt (fromInteger i)
+
+getRegister leaf
+ | isJust imm
+ = let
+ code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
+ in
+ return (Any PtrRep code)
+ where
+ imm = maybeImm leaf
+ imm__2 = case imm of Just x -> x
+
+
+getAmode :: CmmExpr -> NatM Amode
+getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if alpha_TARGET_ARCH
+
+getAmode (StPrim IntSubOp [x, StInt i])
+ = getNewRegNat PtrRep `thenNat` \ tmp ->
+ getRegister x `thenNat` \ register ->
+ let
+ code = registerCode register tmp
+ reg = registerName register tmp
+ off = ImmInt (-(fromInteger i))
+ in
+ return (Amode (AddrRegImm reg off) code)
+
+getAmode (StPrim IntAddOp [x, StInt i])
+ = getNewRegNat PtrRep `thenNat` \ tmp ->
+ getRegister x `thenNat` \ register ->
+ let
+ code = registerCode register tmp
+ reg = registerName register tmp
+ off = ImmInt (fromInteger i)
+ in
+ return (Amode (AddrRegImm reg off) code)
+
+getAmode leaf
+ | isJust imm
+ = return (Amode (AddrImm imm__2) id)
+ where
+ imm = maybeImm leaf
+ imm__2 = case imm of Just x -> x
+
+getAmode other
+ = getNewRegNat PtrRep `thenNat` \ tmp ->
+ getRegister other `thenNat` \ register ->
+ let
+ code = registerCode register tmp
+ reg = registerName register tmp
+ in
+ return (Amode (AddrReg reg) code)
+
+#endif /* alpha_TARGET_ARCH */
+
+
+-- -----------------------------------------------------------------------------
+-- Generating assignments
+
+-- Assignments are really at the heart of the whole code generation
+-- business. Almost all top-level nodes of any real importance are
+-- assignments, which correspond to loads, stores, or register
+-- transfers. If we're really lucky, some of the register transfers
+-- will go away, because we can use the destination register to
+-- complete the code generation for the right hand side. This only
+-- fails when the right hand side is forced into a fixed register
+-- (e.g. the result of a call).
+
+assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignReg_IntCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
+
+assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignReg_FltCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
+
+
+assignIntCode pk (CmmLoad dst _) src
+ = getNewRegNat IntRep `thenNat` \ tmp ->
+ getAmode dst `thenNat` \ amode ->
+ getRegister src `thenNat` \ register ->
+ let
+ code1 = amodeCode amode []
+ dst__2 = amodeAddr amode
+ code2 = registerCode register tmp []
+ src__2 = registerName register tmp
+ sz = primRepToSize pk
+ code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
+ in
+ return code__2
+
+assignIntCode pk dst src
+ = getRegister dst `thenNat` \ register1 ->
+ getRegister src `thenNat` \ register2 ->
+ let
+ dst__2 = registerName register1 zeroh
+ code = registerCode register2 dst__2
+ src__2 = registerName register2 dst__2
+ code__2 = if isFixed register2
+ then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
+ else code
+ in
+ return code__2
+
+assignFltCode pk (CmmLoad dst _) src
+ = getNewRegNat pk `thenNat` \ tmp ->
+ getAmode dst `thenNat` \ amode ->
+ getRegister src `thenNat` \ register ->
+ let
+ code1 = amodeCode amode []
+ dst__2 = amodeAddr amode
+ code2 = registerCode register tmp []
+ src__2 = registerName register tmp
+ sz = primRepToSize pk
+ code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
+ in
+ return code__2
+
+assignFltCode pk dst src
+ = getRegister dst `thenNat` \ register1 ->
+ getRegister src `thenNat` \ register2 ->
+ let
+ dst__2 = registerName register1 zeroh
+ code = registerCode register2 dst__2
+ src__2 = registerName register2 dst__2
+ code__2 = if isFixed register2
+ then code . mkSeqInstr (FMOV src__2 dst__2)
+ else code
+ in
+ return code__2
+
+
+-- -----------------------------------------------------------------------------
+-- Generating an non-local jump
+
+-- (If applicable) Do not fill the delay slots here; you will confuse the
+-- register allocator.
+
+genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+
+genJump (CmmLabel lbl)
+ | isAsmTemp lbl = returnInstr (BR target)
+ | otherwise = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
+ where
+ target = ImmCLbl lbl
+
+genJump tree
+ = getRegister tree `thenNat` \ register ->
+ getNewRegNat PtrRep `thenNat` \ tmp ->
+ let
+ dst = registerName register pv
+ code = registerCode register pv
+ target = registerName register pv
+ in
+ if isFixed register then
+ returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
+ else
+ return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
+
+
+-- -----------------------------------------------------------------------------
+-- Unconditional branches
+
+genBranch :: BlockId -> NatM InstrBlock
+
+genBranch = return . toOL . mkBranchInstr
+
+
+-- -----------------------------------------------------------------------------
+-- Conditional jumps
+
+{-
+Conditional jumps are always to local labels, so we can use branch
+instructions. We peek at the arguments to decide what kind of
+comparison to do.
+
+ALPHA: For comparisons with 0, we're laughing, because we can just do
+the desired conditional branch.
+
+-}
+
+
+genCondJump
+ :: BlockId -- the branch target
+ -> CmmExpr -- the condition on which to branch
+ -> NatM InstrBlock
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+genCondJump id (StPrim op [x, StInt 0])
+ = getRegister x `thenNat` \ register ->
+ getNewRegNat (registerRep register)
+ `thenNat` \ tmp ->
+ let
+ code = registerCode register tmp
+ value = registerName register tmp
+ pk = registerRep register
+ target = ImmCLbl lbl
+ in
+ returnSeq code [BI (cmpOp op) value target]
+ where
+ cmpOp CharGtOp = GTT
+ cmpOp CharGeOp = GE
+ cmpOp CharEqOp = EQQ
+ cmpOp CharNeOp = NE
+ cmpOp CharLtOp = LTT
+ cmpOp CharLeOp = LE
+ cmpOp IntGtOp = GTT
+ cmpOp IntGeOp = GE
+ cmpOp IntEqOp = EQQ
+ cmpOp IntNeOp = NE
+ cmpOp IntLtOp = LTT
+ cmpOp IntLeOp = LE
+ cmpOp WordGtOp = NE
+ cmpOp WordGeOp = ALWAYS
+ cmpOp WordEqOp = EQQ
+ cmpOp WordNeOp = NE
+ cmpOp WordLtOp = NEVER
+ cmpOp WordLeOp = EQQ
+ cmpOp AddrGtOp = NE
+ cmpOp AddrGeOp = ALWAYS
+ cmpOp AddrEqOp = EQQ
+ cmpOp AddrNeOp = NE
+ cmpOp AddrLtOp = NEVER
+ cmpOp AddrLeOp = EQQ
+
+genCondJump lbl (StPrim op [x, StDouble 0.0])
+ = getRegister x `thenNat` \ register ->
+ getNewRegNat (registerRep register)
+ `thenNat` \ tmp ->
+ let
+ code = registerCode register tmp
+ value = registerName register tmp
+ pk = registerRep register
+ target = ImmCLbl lbl
+ in
+ return (code . mkSeqInstr (BF (cmpOp op) value target))
+ where
+ cmpOp FloatGtOp = GTT
+ cmpOp FloatGeOp = GE
+ cmpOp FloatEqOp = EQQ
+ cmpOp FloatNeOp = NE
+ cmpOp FloatLtOp = LTT
+ cmpOp FloatLeOp = LE
+ cmpOp DoubleGtOp = GTT
+ cmpOp DoubleGeOp = GE
+ cmpOp DoubleEqOp = EQQ
+ cmpOp DoubleNeOp = NE
+ cmpOp DoubleLtOp = LTT
+ cmpOp DoubleLeOp = LE
+
+genCondJump lbl (StPrim op [x, y])
+ | fltCmpOp op
+ = trivialFCode pr instr x y `thenNat` \ register ->
+ getNewRegNat FF64 `thenNat` \ tmp ->
+ let
+ code = registerCode register tmp
+ result = registerName register tmp
+ target = ImmCLbl lbl
+ in
+ return (code . mkSeqInstr (BF cond result target))
+ where
+ pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
+
+ fltCmpOp op = case op of
+ FloatGtOp -> True
+ FloatGeOp -> True
+ FloatEqOp -> True
+ FloatNeOp -> True
+ FloatLtOp -> True
+ FloatLeOp -> True
+ DoubleGtOp -> True
+ DoubleGeOp -> True
+ DoubleEqOp -> True
+ DoubleNeOp -> True
+ DoubleLtOp -> True
+ DoubleLeOp -> True
+ _ -> False
+ (instr, cond) = case op of
+ FloatGtOp -> (FCMP TF LE, EQQ)
+ FloatGeOp -> (FCMP TF LTT, EQQ)
+ FloatEqOp -> (FCMP TF EQQ, NE)
+ FloatNeOp -> (FCMP TF EQQ, EQQ)
+ FloatLtOp -> (FCMP TF LTT, NE)
+ FloatLeOp -> (FCMP TF LE, NE)
+ DoubleGtOp -> (FCMP TF LE, EQQ)
+ DoubleGeOp -> (FCMP TF LTT, EQQ)
+ DoubleEqOp -> (FCMP TF EQQ, NE)
+ DoubleNeOp -> (FCMP TF EQQ, EQQ)
+ DoubleLtOp -> (FCMP TF LTT, NE)
+ DoubleLeOp -> (FCMP TF LE, NE)
+
+genCondJump lbl (StPrim op [x, y])
+ = trivialCode instr x y `thenNat` \ register ->
+ getNewRegNat IntRep `thenNat` \ tmp ->
+ let
+ code = registerCode register tmp
+ result = registerName register tmp
+ target = ImmCLbl lbl
+ in
+ return (code . mkSeqInstr (BI cond result target))
+ where
+ (instr, cond) = case op of
+ CharGtOp -> (CMP LE, EQQ)
+ CharGeOp -> (CMP LTT, EQQ)
+ CharEqOp -> (CMP EQQ, NE)
+ CharNeOp -> (CMP EQQ, EQQ)
+ CharLtOp -> (CMP LTT, NE)
+ CharLeOp -> (CMP LE, NE)
+ IntGtOp -> (CMP LE, EQQ)
+ IntGeOp -> (CMP LTT, EQQ)
+ IntEqOp -> (CMP EQQ, NE)
+ IntNeOp -> (CMP EQQ, EQQ)
+ IntLtOp -> (CMP LTT, NE)
+ IntLeOp -> (CMP LE, NE)
+ WordGtOp -> (CMP ULE, EQQ)
+ WordGeOp -> (CMP ULT, EQQ)
+ WordEqOp -> (CMP EQQ, NE)
+ WordNeOp -> (CMP EQQ, EQQ)
+ WordLtOp -> (CMP ULT, NE)
+ WordLeOp -> (CMP ULE, NE)
+ AddrGtOp -> (CMP ULE, EQQ)
+ AddrGeOp -> (CMP ULT, EQQ)
+ AddrEqOp -> (CMP EQQ, NE)
+ AddrNeOp -> (CMP EQQ, EQQ)
+ AddrLtOp -> (CMP ULT, NE)
+ AddrLeOp -> (CMP ULE, NE)
+
+-- -----------------------------------------------------------------------------
+-- Generating C calls
+
+-- Now the biggest nightmare---calls. Most of the nastiness is buried in
+-- @get_arg@, which moves the arguments to the correct registers/stack
+-- locations. Apart from that, the code is easy.
+--
+-- (If applicable) Do not fill the delay slots here; you will confuse the
+-- register allocator.
+
+genCCall
+ :: CmmCallTarget -- function to call
+ -> HintedCmmFormals -- where to put the result
+ -> HintedCmmActuals -- arguments (of mixed type)
+ -> NatM InstrBlock
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+ccallResultRegs =
+
+genCCall fn cconv result_regs args
+ = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
+ `thenNat` \ ((unused,_), argCode) ->
+ let
+ nRegs = length allArgRegs - length unused
+ code = asmSeqThen (map ($ []) argCode)
+ in
+ returnSeq code [
+ LDA pv (AddrImm (ImmLab (ptext fn))),
+ JSR ra (AddrReg pv) nRegs,
+ LDGP gp (AddrReg ra)]
+ where
+ ------------------------
+ {- Try to get a value into a specific register (or registers) for
+ a call. The first 6 arguments go into the appropriate
+ argument register (separate registers for integer and floating
+ point arguments, but used in lock-step), and the remaining
+ arguments are dumped to the stack, beginning at 0(sp). Our
+ first argument is a pair of the list of remaining argument
+ registers to be assigned for this call and the next stack
+ offset to use for overflowing arguments. This way,
+ @get_Arg@ can be applied to all of a call's arguments using
+ @mapAccumLNat@.
+ -}
+ get_arg
+ :: ([(Reg,Reg)], Int) -- Argument registers and stack offset (accumulator)
+ -> StixTree -- Current argument
+ -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
+
+ -- We have to use up all of our argument registers first...
+
+ get_arg ((iDst,fDst):dsts, offset) arg
+ = getRegister arg `thenNat` \ register ->
+ let
+ reg = if isFloatType pk then fDst else iDst
+ code = registerCode register reg
+ src = registerName register reg
+ pk = registerRep register
+ in
+ return (
+ if isFloatType pk then
+ ((dsts, offset), if isFixed register then
+ code . mkSeqInstr (FMOV src fDst)
+ else code)
+ else
+ ((dsts, offset), if isFixed register then
+ code . mkSeqInstr (OR src (RIReg src) iDst)
+ else code))
+
+ -- Once we have run out of argument registers, we move to the
+ -- stack...
+
+ get_arg ([], offset) arg
+ = getRegister arg `thenNat` \ register ->
+ getNewRegNat (registerRep register)
+ `thenNat` \ tmp ->
+ let
+ code = registerCode register tmp
+ src = registerName register tmp
+ pk = registerRep register
+ sz = primRepToSize pk
+ in
+ return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
+
+trivialCode instr x (StInt y)
+ | fits8Bits y
+ = getRegister x `thenNat` \ register ->
+ getNewRegNat IntRep `thenNat` \ tmp ->
+ let
+ code = registerCode register tmp
+ src1 = registerName register tmp
+ src2 = ImmInt (fromInteger y)
+ code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
+ in
+ return (Any IntRep code__2)
+
+trivialCode instr x y
+ = getRegister x `thenNat` \ register1 ->
+ getRegister y `thenNat` \ register2 ->
+ getNewRegNat IntRep `thenNat` \ tmp1 ->
+ getNewRegNat IntRep `thenNat` \ tmp2 ->
+ let
+ code1 = registerCode register1 tmp1 []
+ src1 = registerName register1 tmp1
+ code2 = registerCode register2 tmp2 []
+ src2 = registerName register2 tmp2
+ code__2 dst = asmSeqThen [code1, code2] .
+ mkSeqInstr (instr src1 (RIReg src2) dst)
+ in
+ return (Any IntRep code__2)
+
+------------
+trivialUCode instr x
+ = getRegister x `thenNat` \ register ->
+ getNewRegNat IntRep `thenNat` \ tmp ->
+ let
+ code = registerCode register tmp
+ src = registerName register tmp
+ code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
+ in
+ return (Any IntRep code__2)
+
+------------
+trivialFCode _ instr x y
+ = getRegister x `thenNat` \ register1 ->
+ getRegister y `thenNat` \ register2 ->
+ getNewRegNat FF64 `thenNat` \ tmp1 ->
+ getNewRegNat FF64 `thenNat` \ tmp2 ->
+ let
+ code1 = registerCode register1 tmp1
+ src1 = registerName register1 tmp1
+
+ code2 = registerCode register2 tmp2
+ src2 = registerName register2 tmp2
+
+ code__2 dst = asmSeqThen [code1 [], code2 []] .
+ mkSeqInstr (instr src1 src2 dst)
+ in
+ return (Any FF64 code__2)
+
+trivialUFCode _ instr x
+ = getRegister x `thenNat` \ register ->
+ getNewRegNat FF64 `thenNat` \ tmp ->
+ let
+ code = registerCode register tmp
+ src = registerName register tmp
+ code__2 dst = code . mkSeqInstr (instr src dst)
+ in
+ return (Any FF64 code__2)
+
+#if alpha_TARGET_ARCH
+
+coerceInt2FP _ x
+ = getRegister x `thenNat` \ register ->
+ getNewRegNat IntRep `thenNat` \ reg ->
+ let
+ code = registerCode register reg
+ src = registerName register reg
+
+ code__2 dst = code . mkSeqInstrs [
+ ST Q src (spRel 0),
+ LD TF dst (spRel 0),
+ CVTxy Q TF dst dst]
+ in
+ return (Any FF64 code__2)
+
+-------------
+coerceFP2Int x
+ = getRegister x `thenNat` \ register ->
+ getNewRegNat FF64 `thenNat` \ tmp ->
+ let
+ code = registerCode register tmp
+ src = registerName register tmp
+
+ code__2 dst = code . mkSeqInstrs [
+ CVTxy TF Q src tmp,
+ ST TF tmp (spRel 0),
+ LD Q dst (spRel 0)]
+ in
+ return (Any IntRep code__2)
+
+#endif /* alpha_TARGET_ARCH */
+
+
+-}
+
+
+
+
+
diff --git a/compiler/nativeGen/Alpha/Instr.hs b/compiler/nativeGen/Alpha/Instr.hs
index e2d66d3c02..990ea8bc1a 100644
--- a/compiler/nativeGen/Alpha/Instr.hs
+++ b/compiler/nativeGen/Alpha/Instr.hs
@@ -10,13 +10,14 @@
#include "nativeGen/NCG.h"
module Alpha.Instr (
- Cond(..),
- Instr(..),
- RI(..)
+-- Cond(..),
+-- Instr(..),
+-- RI(..)
)
where
+{-
import BlockId
import Regs
import Cmm
@@ -138,3 +139,4 @@ data Instr
| FUNEND CLabel
+-}
diff --git a/compiler/nativeGen/ArchReg.hs b/compiler/nativeGen/ArchReg.hs
new file mode 100644
index 0000000000..7170228e4c
--- /dev/null
+++ b/compiler/nativeGen/ArchReg.hs
@@ -0,0 +1,14 @@
+
+
+module ArchReg (
+
+)
+
+where
+
+
+class ArchReg reg format where
+ classOfReg :: reg -> RegClass
+ mkVReg :: format -> VirtReg reg
+
+
diff --git a/compiler/nativeGen/AsmCodeGen.lhs b/compiler/nativeGen/AsmCodeGen.lhs
index ce411ed6fe..8613a8ed1f 100644
--- a/compiler/nativeGen/AsmCodeGen.lhs
+++ b/compiler/nativeGen/AsmCodeGen.lhs
@@ -19,21 +19,56 @@ module AsmCodeGen ( nativeCodeGen ) where
#include "HsVersions.h"
#include "nativeGen/NCG.h"
-import Instrs
-import Regs
-import MachCodeGen
-import PprMach
-import RegAllocInfo
-import NCGMonad
-import PositionIndependentCode
-import RegAlloc.Liveness
-import qualified RegAlloc.Linear.Main as Linear
+#if alpha_TARGET_ARCH
+import Alpha.CodeGen
+import Alpha.Regs
+import Alpha.RegInfo
+import Alpha.Instr
+import Alpha.Ppr
+
+#elif i386_TARGET_ARCH || x86_64_TARGET_ARCH
+import X86.CodeGen
+import X86.Regs
+import X86.RegInfo
+import X86.Instr
+import X86.Ppr
+
+#elif sparc_TARGET_ARCH
+import SPARC.CodeGen
+import SPARC.Regs
+import SPARC.RegInfo
+import SPARC.Instr
+import SPARC.Ppr
+
+#elif powerpc_TARGET_ARCH
+import PPC.CodeGen
+import PPC.Regs
+import PPC.RegInfo
+import PPC.Instr
+import PPC.Ppr
+
+#else
+#error "AsmCodeGen: unknown architecture"
+
+#endif
+
+import RegAlloc.Liveness
+import qualified RegAlloc.Linear.Main as Linear
import qualified GraphColor as Color
import qualified RegAlloc.Graph.Main as Color
import qualified RegAlloc.Graph.Stats as Color
import qualified RegAlloc.Graph.Coalesce as Color
+import qualified RegAlloc.Graph.TrivColorable as Color
+
+import qualified TargetReg as Target
+
+import Platform
+import Instruction
+import PIC
+import Reg
+import NCGMonad
import Cmm
import CmmOpt ( cmmMiniInline, cmmMachOpFold )
@@ -160,7 +195,7 @@ nativeCodeGen dflags h us cmms
dumpIfSet_dyn dflags
Opt_D_dump_asm_conflicts "Register conflict graph"
- $ Color.dotGraph Color.regDotColor trivColorable
+ $ Color.dotGraph Target.targetRegDotColor (Color.trivColorable Target.targetRegClass)
$ graphGlobal)
@@ -172,7 +207,7 @@ nativeCodeGen dflags h us cmms
-- write out the imports
Pretty.printDoc Pretty.LeftMode h
- $ makeImportsDoc (concat imports)
+ $ makeImportsDoc dflags (concat imports)
return ()
@@ -225,13 +260,13 @@ cmmNativeGens dflags h us (cmm : cmms) impAcc profAcc count
cmmNativeGen
:: DynFlags
-> UniqSupply
- -> RawCmmTop -- ^ the cmm to generate code for
- -> Int -- ^ sequence number of this top thing
+ -> RawCmmTop -- ^ the cmm to generate code for
+ -> Int -- ^ sequence number of this top thing
-> IO ( UniqSupply
- , [NatCmmTop] -- native code
- , [CLabel] -- things imported by this cmm
- , Maybe [Color.RegAllocStats] -- stats for the coloring register allocator
- , Maybe [Linear.RegAllocStats]) -- stats for the linear register allocators
+ , [NatCmmTop Instr] -- native code
+ , [CLabel] -- things imported by this cmm
+ , Maybe [Color.RegAllocStats Instr] -- stats for the coloring register allocator
+ , Maybe [Linear.RegAllocStats]) -- stats for the linear register allocators
cmmNativeGen dflags us cmm count
= do
@@ -375,8 +410,8 @@ x86fp_kludge top@(CmmProc info lbl params (ListGraph code)) =
-- | Build a doc for all the imports.
--
-makeImportsDoc :: [CLabel] -> Pretty.Doc
-makeImportsDoc imports
+makeImportsDoc :: DynFlags -> [CLabel] -> Pretty.Doc
+makeImportsDoc dflags imports
= dyld_stubs imports
#if HAVE_SUBSECTIONS_VIA_SYMBOLS
@@ -410,13 +445,16 @@ makeImportsDoc imports
{- dyld_stubs imps = Pretty.vcat $ map pprDyldSymbolStub $
map head $ group $ sort imps-}
+ arch = platformArch $ targetPlatform dflags
+ os = platformOS $ targetPlatform dflags
+
-- (Hack) sometimes two Labels pretty-print the same, but have
-- different uniques; so we compare their text versions...
dyld_stubs imps
- | needImportedSymbols
+ | needImportedSymbols arch os
= Pretty.vcat $
- (pprGotDeclaration :) $
- map (pprImportedSymbol . fst . head) $
+ (pprGotDeclaration arch os :) $
+ map ( pprImportedSymbol arch os . fst . head) $
groupBy (\(_,a) (_,b) -> a == b) $
sortBy (\(_,a) (_,b) -> compare a b) $
map doPpr $
@@ -437,7 +475,11 @@ makeImportsDoc imports
-- such that as many of the local jumps as possible turn into
-- fallthroughs.
-sequenceTop :: NatCmmTop -> NatCmmTop
+sequenceTop
+ :: Instruction instr
+ => NatCmmTop instr
+ -> NatCmmTop instr
+
sequenceTop top@(CmmData _ _) = top
sequenceTop (CmmProc info lbl params (ListGraph blocks)) =
CmmProc info lbl params (ListGraph $ makeFarBranches $ sequenceBlocks blocks)
@@ -452,21 +494,36 @@ sequenceTop (CmmProc info lbl params (ListGraph blocks)) =
-- FYI, the classic layout for basic blocks uses postorder DFS; this
-- algorithm is implemented in cmm/ZipCfg.hs (NR 6 Sep 2007).
-sequenceBlocks :: [NatBasicBlock] -> [NatBasicBlock]
+sequenceBlocks
+ :: Instruction instr
+ => [NatBasicBlock instr]
+ -> [NatBasicBlock instr]
+
sequenceBlocks [] = []
sequenceBlocks (entry:blocks) =
seqBlocks (mkNode entry : reverse (flattenSCCs (sccBlocks blocks)))
-- the first block is the entry point ==> it must remain at the start.
-sccBlocks :: [NatBasicBlock] -> [SCC (NatBasicBlock,Unique,[Unique])]
+
+sccBlocks
+ :: Instruction instr
+ => [NatBasicBlock instr]
+ -> [SCC ( NatBasicBlock instr
+ , Unique
+ , [Unique])]
+
sccBlocks blocks = stronglyConnCompFromEdgedVerticesR (map mkNode blocks)
-getOutEdges :: [Instr] -> [Unique]
-getOutEdges instrs = case jumpDests (last instrs) [] of
- [one] -> [getUnique one]
- _many -> []
- -- we're only interested in the last instruction of
- -- the block, and only if it has a single destination.
+-- we're only interested in the last instruction of
+-- the block, and only if it has a single destination.
+getOutEdges
+ :: Instruction instr
+ => [instr] -> [Unique]
+
+getOutEdges instrs
+ = case jumpDestsOfInstr (last instrs) of
+ [one] -> [getUnique one]
+ _many -> []
mkNode block@(BasicBlock id instrs) = (block, getUnique id, getOutEdges instrs)
@@ -494,7 +551,10 @@ reorder id accum (b@(block,id',out) : rest)
-- Conditional branches on PowerPC are limited to +-32KB; if our Procs get too
-- big, we have to work around this limitation.
-makeFarBranches :: [NatBasicBlock] -> [NatBasicBlock]
+makeFarBranches
+ :: Instruction instr
+ => [NatBasicBlock instr]
+ -> [NatBasicBlock instr]
#if powerpc_TARGET_ARCH
makeFarBranches blocks
@@ -530,7 +590,11 @@ makeFarBranches = id
-- -----------------------------------------------------------------------------
-- Shortcut branches
-shortcutBranches :: DynFlags -> [NatCmmTop] -> [NatCmmTop]
+shortcutBranches
+ :: DynFlags
+ -> [NatCmmTop Instr]
+ -> [NatCmmTop Instr]
+
shortcutBranches dflags tops
| optLevel dflags < 1 = tops -- only with -O or higher
| otherwise = map (apply_mapping mapping) tops'
@@ -589,12 +653,17 @@ apply_mapping ufm (CmmProc info lbl params (ListGraph blocks))
-- Switching between the two monads whilst carrying along the same
-- Unique supply breaks abstraction. Is that bad?
-genMachCode :: DynFlags -> RawCmmTop -> UniqSM ([NatCmmTop], [CLabel])
+genMachCode
+ :: DynFlags
+ -> RawCmmTop
+ -> UniqSM
+ ( [NatCmmTop Instr]
+ , [CLabel])
genMachCode dflags cmm_top
= do { initial_us <- getUs
; let initial_st = mkNatM_State initial_us 0 dflags
- (new_tops, final_st) = initNat initial_st (cmmTopCodeGen cmm_top)
+ (new_tops, final_st) = initNat initial_st (cmmTopCodeGen dflags cmm_top)
final_delta = natm_delta final_st
final_imports = natm_imports final_st
; if final_delta == 0
diff --git a/compiler/nativeGen/Instrs.hs b/compiler/nativeGen/Instrs.hs
deleted file mode 100644
index 3f38a361d5..0000000000
--- a/compiler/nativeGen/Instrs.hs
+++ /dev/null
@@ -1,97 +0,0 @@
-{-# OPTIONS -w #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
--- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
-
------------------------------------------------------------------------------
---
--- Machine-dependent assembly language
---
--- (c) The University of Glasgow 1993-2004
---
------------------------------------------------------------------------------
-
-#include "nativeGen/NCG.h"
-
-
-module Instrs (
- NatCmm,
- NatCmmTop,
- NatBasicBlock,
- condUnsigned,
- condToSigned,
- condToUnsigned,
-
-#if alpha_TARGET_ARCH
- module Alpha.Instr
-#elif powerpc_TARGET_ARCH
- module PPC.Instr
-#elif i386_TARGET_ARCH || x86_64_TARGET_ARCH
- module X86.Instr
-#elif sparc_TARGET_ARCH
- module SPARC.Instr
-#else
-#error "Instrs: not defined for this architecture"
-#endif
-)
-
-where
-
-#include "HsVersions.h"
-
-import BlockId
-import Regs
-import Cmm
-import CLabel ( CLabel, pprCLabel )
-import Panic ( panic )
-import Outputable
-import FastString
-import Constants ( wORD_SIZE )
-
-import GHC.Exts
-
-#if alpha_TARGET_ARCH
-import Alpha.Instr
-#elif powerpc_TARGET_ARCH
-import PPC.Instr
-#elif i386_TARGET_ARCH || x86_64_TARGET_ARCH
-import X86.Instr
-#elif sparc_TARGET_ARCH
-import SPARC.Instr
-#else
-#error "Instrs: not defined for this architecture"
-#endif
-
-
--- Our flavours of the Cmm types
--- Type synonyms for Cmm populated with native code
-
-type NatCmm = GenCmm CmmStatic [CmmStatic] (ListGraph Instr)
-type NatCmmTop = GenCmmTop CmmStatic [CmmStatic] (ListGraph Instr)
-type NatBasicBlock = GenBasicBlock Instr
-
-
--- Condition utils
-condUnsigned GU = True
-condUnsigned LU = True
-condUnsigned GEU = True
-condUnsigned LEU = True
-condUnsigned _ = False
-
-condToSigned GU = GTT
-condToSigned LU = LTT
-condToSigned GEU = GE
-condToSigned LEU = LE
-condToSigned x = x
-
-condToUnsigned GTT = GU
-condToUnsigned LTT = LU
-condToUnsigned GE = GEU
-condToUnsigned LE = LEU
-condToUnsigned x = x
-
-
-
-
diff --git a/compiler/nativeGen/Instruction.hs b/compiler/nativeGen/Instruction.hs
new file mode 100644
index 0000000000..22c37a5b12
--- /dev/null
+++ b/compiler/nativeGen/Instruction.hs
@@ -0,0 +1,159 @@
+
+module Instruction (
+ RegUsage(..),
+ noUsage,
+ NatCmm,
+ NatCmmTop,
+ NatBasicBlock,
+ Instruction(..)
+)
+
+where
+
+import Reg
+
+import BlockId
+import Cmm
+
+-- | Holds a list of source and destination registers used by a
+-- particular instruction.
+--
+-- Machine registers that are pre-allocated to stgRegs are filtered
+-- out, because they are uninteresting from a register allocation
+-- standpoint. (We wouldn't want them to end up on the free list!)
+--
+-- As far as we are concerned, the fixed registers simply don't exist
+-- (for allocation purposes, anyway).
+--
+data RegUsage
+ = RU [Reg] [Reg]
+
+-- | No regs read or written to.
+noUsage :: RegUsage
+noUsage = RU [] []
+
+
+-- Our flavours of the Cmm types
+-- Type synonyms for Cmm populated with native code
+type NatCmm instr
+ = GenCmm
+ CmmStatic
+ [CmmStatic]
+ (ListGraph instr)
+
+type NatCmmTop instr
+ = GenCmmTop
+ CmmStatic
+ [CmmStatic]
+ (ListGraph instr)
+
+
+type NatBasicBlock instr
+ = GenBasicBlock instr
+
+
+
+
+-- | Common things that we can do with instructions, on all architectures.
+-- These are used by the shared parts of the native code generator,
+-- specifically the register allocators.
+--
+class Instruction instr where
+
+ -- | Get the registers that are being used by this instruction.
+ -- regUsage doesn't need to do any trickery for jumps and such.
+ -- Just state precisely the regs read and written by that insn.
+ -- The consequences of control flow transfers, as far as register
+ -- allocation goes, are taken care of by the register allocator.
+ --
+ regUsageOfInstr
+ :: instr
+ -> RegUsage
+
+
+ -- | Apply a given mapping to all the register references in this
+ -- instruction.
+ patchRegsOfInstr
+ :: instr
+ -> (Reg -> Reg)
+ -> instr
+
+
+ -- | Checks whether this instruction is a jump/branch instruction.
+ -- One that can change the flow of control in a way that the
+ -- register allocator needs to worry about.
+ isJumpishInstr
+ :: instr -> Bool
+
+
+ -- | Give the possible destinations of this jump instruction.
+ -- Must be defined for all jumpish instructions.
+ jumpDestsOfInstr
+ :: instr -> [BlockId]
+
+
+ -- | Change the destination of this jump instruction.
+ -- Used in the linear allocator when adding fixup blocks for join
+ -- points.
+ patchJumpInstr
+ :: instr
+ -> (BlockId -> BlockId)
+ -> instr
+
+
+ -- | An instruction to spill a register into a spill slot.
+ mkSpillInstr
+ :: Reg -- ^ the reg to spill
+ -> Int -- ^ the current stack delta
+ -> Int -- ^ spill slot to use
+ -> instr
+
+
+ -- | An instruction to reload a register from a spill slot.
+ mkLoadInstr
+ :: Reg -- ^ the reg to reload.
+ -> Int -- ^ the current stack delta
+ -> Int -- ^ the spill slot to use
+ -> instr
+
+ -- | See if this instruction is telling us the current C stack delta
+ takeDeltaInstr
+ :: instr
+ -> Maybe Int
+
+ -- | Check whether this instruction is some meta thing inserted into
+ -- the instruction stream for other purposes.
+ --
+ -- Not something that has to be treated as a real machine instruction
+ -- and have its registers allocated.
+ --
+ -- eg, comments, delta, ldata, etc.
+ isMetaInstr
+ :: instr
+ -> Bool
+
+
+
+ -- | Copy the value in a register to another one.
+ -- Must work for all register classes.
+ mkRegRegMoveInstr
+ :: Reg -- ^ source register
+ -> Reg -- ^ destination register
+ -> instr
+
+ -- | Take the source and destination from this reg -> reg move instruction
+ -- or Nothing if it's not one
+ takeRegRegMoveInstr
+ :: instr
+ -> Maybe (Reg, Reg)
+
+ -- | Make an unconditional jump instruction.
+ -- For architectures with branch delay slots, its ok to put
+ -- a NOP after the jump. Don't fill the delay slot with an
+ -- instruction that references regs or you'll confuse the
+ -- linear allocator.
+ mkJumpInstr
+ :: BlockId
+ -> [instr]
+
+
diff --git a/compiler/nativeGen/MachCodeGen.hs b/compiler/nativeGen/MachCodeGen.hs
deleted file mode 100644
index d94a906bbd..0000000000
--- a/compiler/nativeGen/MachCodeGen.hs
+++ /dev/null
@@ -1,5199 +0,0 @@
-{-# OPTIONS -w #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
--- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
-
------------------------------------------------------------------------------
---
--- Generating machine code (instruction selection)
---
--- (c) The University of Glasgow 1996-2004
---
------------------------------------------------------------------------------
-
--- This is a big module, but, if you pay attention to
--- (a) the sectioning, (b) the type signatures, and
--- (c) the #if blah_TARGET_ARCH} things, the
--- structure should not be too overwhelming.
-
-module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
-
-#include "HsVersions.h"
-#include "nativeGen/NCG.h"
-#include "MachDeps.h"
-
--- NCG stuff:
-import Instrs
-import Regs
-import NCGMonad
-import PositionIndependentCode
-import RegAllocInfo ( mkBranchInstr, mkRegRegMoveInstr )
-import PprMach
-
--- Our intermediate code:
-import BlockId
-import PprCmm ( pprExpr )
-import Cmm
-import CLabel
-import ClosureInfo ( C_SRT(..) )
-
--- The rest:
-import BasicTypes
-import StaticFlags ( opt_PIC )
-import ForeignCall ( CCallConv(..) )
-import OrdList
-import Pretty
-import qualified Outputable as O
-import Outputable
-import FastString
-import FastBool ( isFastTrue )
-import Constants ( wORD_SIZE )
-
-import Debug.Trace ( trace )
-
-import Control.Monad ( mapAndUnzipM )
-import Data.Maybe ( fromJust )
-import Data.Bits
-import Data.Word
-import Data.Int
-
-
--- -----------------------------------------------------------------------------
--- Top-level of the instruction selector
-
--- | 'InstrBlock's are the insn sequences generated by the insn selectors.
--- They are really trees of insns to facilitate fast appending, where a
--- left-to-right traversal (pre-order?) yields the insns in the correct
--- order.
-
-type InstrBlock = OrdList Instr
-
-cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop]
-cmmTopCodeGen (CmmProc info lab params (ListGraph blocks)) = do
- (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
- picBaseMb <- getPicBaseMaybeNat
- let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
- tops = proc : concat statics
- case picBaseMb of
- Just picBase -> initializePicBase picBase tops
- Nothing -> return tops
-
-cmmTopCodeGen (CmmData sec dat) = do
- return [CmmData sec dat] -- no translation, we just use CmmStatic
-
-basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
-basicBlockCodeGen (BasicBlock id stmts) = do
- instrs <- stmtsToInstrs stmts
- -- code generation may introduce new basic block boundaries, which
- -- are indicated by the NEWBLOCK instruction. We must split up the
- -- instruction stream into basic blocks again. Also, we extract
- -- LDATAs here too.
- let
- (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
-
- mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
- = ([], BasicBlock id instrs : blocks, statics)
- mkBlocks (LDATA sec dat) (instrs,blocks,statics)
- = (instrs, blocks, CmmData sec dat:statics)
- mkBlocks instr (instrs,blocks,statics)
- = (instr:instrs, blocks, statics)
- -- in
- return (BasicBlock id top : other_blocks, statics)
-
-stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
-stmtsToInstrs stmts
- = do instrss <- mapM stmtToInstrs stmts
- return (concatOL instrss)
-
-stmtToInstrs :: CmmStmt -> NatM InstrBlock
-stmtToInstrs stmt = case stmt of
- CmmNop -> return nilOL
- CmmComment s -> return (unitOL (COMMENT s))
-
- CmmAssign reg src
- | isFloatType ty -> assignReg_FltCode size reg src
-#if WORD_SIZE_IN_BITS==32
- | isWord64 ty -> assignReg_I64Code reg src
-#endif
- | otherwise -> assignReg_IntCode size reg src
- where ty = cmmRegType reg
- size = cmmTypeSize ty
-
- CmmStore addr src
- | isFloatType ty -> assignMem_FltCode size addr src
-#if WORD_SIZE_IN_BITS==32
- | isWord64 ty -> assignMem_I64Code addr src
-#endif
- | otherwise -> assignMem_IntCode size addr src
- where ty = cmmExprType src
- size = cmmTypeSize ty
-
- CmmCall target result_regs args _ _
- -> genCCall target result_regs args
-
- CmmBranch id -> genBranch id
- CmmCondBranch arg id -> genCondJump id arg
- CmmSwitch arg ids -> genSwitch arg ids
- CmmJump arg params -> genJump arg
- CmmReturn params ->
- panic "stmtToInstrs: return statement should have been cps'd away"
-
--- -----------------------------------------------------------------------------
--- General things for putting together code sequences
-
--- Expand CmmRegOff. ToDo: should we do it this way around, or convert
--- CmmExprs into CmmRegOff?
-mangleIndexTree :: CmmExpr -> CmmExpr
-mangleIndexTree (CmmRegOff reg off)
- = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
- where width = typeWidth (cmmRegType reg)
-
--- -----------------------------------------------------------------------------
--- Code gen for 64-bit arithmetic on 32-bit platforms
-
-{-
-Simple support for generating 64-bit code (ie, 64 bit values and 64
-bit assignments) on 32-bit platforms. Unlike the main code generator
-we merely shoot for generating working code as simply as possible, and
-pay little attention to code quality. Specifically, there is no
-attempt to deal cleverly with the fixed-vs-floating register
-distinction; all values are generated into (pairs of) floating
-registers, even if this would mean some redundant reg-reg moves as a
-result. Only one of the VRegUniques is returned, since it will be
-of the VRegUniqueLo form, and the upper-half VReg can be determined
-by applying getHiVRegFromLo to it.
--}
-
-data ChildCode64 -- a.k.a "Register64"
- = ChildCode64
- InstrBlock -- code
- Reg -- the lower 32-bit temporary which contains the
- -- result; use getHiVRegFromLo to find the other
- -- VRegUnique. Rules of this simplified insn
- -- selection game are therefore that the returned
- -- Reg may be modified
-
-#if WORD_SIZE_IN_BITS==32
-assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
-assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
-#endif
-
-#ifndef x86_64_TARGET_ARCH
-iselExpr64 :: CmmExpr -> NatM ChildCode64
-#endif
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if i386_TARGET_ARCH
-
-assignMem_I64Code addrTree valueTree = do
- Amode addr addr_code <- getAmode addrTree
- ChildCode64 vcode rlo <- iselExpr64 valueTree
- let
- rhi = getHiVRegFromLo rlo
-
- -- Little-endian store
- mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
- mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
- -- in
- return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
-
-
-assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
- ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
- let
- r_dst_lo = mkVReg u_dst II32
- r_dst_hi = getHiVRegFromLo r_dst_lo
- r_src_hi = getHiVRegFromLo r_src_lo
- mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
- mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
- -- in
- return (
- vcode `snocOL` mov_lo `snocOL` mov_hi
- )
-
-assignReg_I64Code lvalue valueTree
- = panic "assignReg_I64Code(i386): invalid lvalue"
-
-------------
-
-iselExpr64 (CmmLit (CmmInt i _)) = do
- (rlo,rhi) <- getNewRegPairNat II32
- let
- r = fromIntegral (fromIntegral i :: Word32)
- q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
- code = toOL [
- MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
- MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
- ]
- -- in
- return (ChildCode64 code rlo)
-
-iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
- Amode addr addr_code <- getAmode addrTree
- (rlo,rhi) <- getNewRegPairNat II32
- let
- mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
- mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
- -- in
- return (
- ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
- rlo
- )
-
-iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
- = return (ChildCode64 nilOL (mkVReg vu II32))
-
--- we handle addition, but rather badly
-iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
- ChildCode64 code1 r1lo <- iselExpr64 e1
- (rlo,rhi) <- getNewRegPairNat II32
- let
- r = fromIntegral (fromIntegral i :: Word32)
- q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
- r1hi = getHiVRegFromLo r1lo
- code = code1 `appOL`
- toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
- ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
- MOV II32 (OpReg r1hi) (OpReg rhi),
- ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
- -- in
- return (ChildCode64 code rlo)
-
-iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
- ChildCode64 code1 r1lo <- iselExpr64 e1
- ChildCode64 code2 r2lo <- iselExpr64 e2
- (rlo,rhi) <- getNewRegPairNat II32
- let
- r1hi = getHiVRegFromLo r1lo
- r2hi = getHiVRegFromLo r2lo
- code = code1 `appOL`
- code2 `appOL`
- toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
- ADD II32 (OpReg r2lo) (OpReg rlo),
- MOV II32 (OpReg r1hi) (OpReg rhi),
- ADC II32 (OpReg r2hi) (OpReg rhi) ]
- -- in
- return (ChildCode64 code rlo)
-
-iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
- fn <- getAnyReg expr
- r_dst_lo <- getNewRegNat II32
- let r_dst_hi = getHiVRegFromLo r_dst_lo
- code = fn r_dst_lo
- return (
- ChildCode64 (code `snocOL`
- MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
- r_dst_lo
- )
-
-iselExpr64 expr
- = pprPanic "iselExpr64(i386)" (ppr expr)
-
-#endif /* i386_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if sparc_TARGET_ARCH
-
-assignMem_I64Code addrTree valueTree = do
- Amode addr addr_code <- getAmode addrTree
- ChildCode64 vcode rlo <- iselExpr64 valueTree
- (src, code) <- getSomeReg addrTree
- let
- rhi = getHiVRegFromLo rlo
- -- Big-endian store
- mov_hi = ST II32 rhi (AddrRegImm src (ImmInt 0))
- mov_lo = ST II32 rlo (AddrRegImm src (ImmInt 4))
- return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
-
-assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
- ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
- let
- r_dst_lo = mkVReg u_dst (cmmTypeSize pk)
- r_dst_hi = getHiVRegFromLo r_dst_lo
- r_src_hi = getHiVRegFromLo r_src_lo
- mov_lo = mkMOV r_src_lo r_dst_lo
- mov_hi = mkMOV r_src_hi r_dst_hi
- mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
- return (vcode `snocOL` mov_hi `snocOL` mov_lo)
-assignReg_I64Code lvalue valueTree
- = panic "assignReg_I64Code(sparc): invalid lvalue"
-
-
--- Load a 64 bit word
-iselExpr64 (CmmLoad addrTree ty)
- | isWord64 ty
- = do Amode amode addr_code <- getAmode addrTree
- let result
-
- | AddrRegReg r1 r2 <- amode
- = do rlo <- getNewRegNat II32
- tmp <- getNewRegNat II32
- let rhi = getHiVRegFromLo rlo
-
- return $ ChildCode64
- ( addr_code
- `appOL` toOL
- [ ADD False False r1 (RIReg r2) tmp
- , LD II32 (AddrRegImm tmp (ImmInt 0)) rhi
- , LD II32 (AddrRegImm tmp (ImmInt 4)) rlo ])
- rlo
-
- | AddrRegImm r1 (ImmInt i) <- amode
- = do rlo <- getNewRegNat II32
- let rhi = getHiVRegFromLo rlo
-
- return $ ChildCode64
- ( addr_code
- `appOL` toOL
- [ LD II32 (AddrRegImm r1 (ImmInt $ 0 + i)) rhi
- , LD II32 (AddrRegImm r1 (ImmInt $ 4 + i)) rlo ])
- rlo
-
- result
-
-
--- Add a literal to a 64 bit integer
-iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)])
- = do ChildCode64 code1 r1_lo <- iselExpr64 e1
- let r1_hi = getHiVRegFromLo r1_lo
-
- r_dst_lo <- getNewRegNat II32
- let r_dst_hi = getHiVRegFromLo r_dst_lo
-
- return $ ChildCode64
- ( toOL
- [ ADD False False r1_lo (RIImm (ImmInteger i)) r_dst_lo
- , ADD True False r1_hi (RIReg g0) r_dst_hi ])
- r_dst_lo
-
-
--- Addition of II64
-iselExpr64 (CmmMachOp (MO_Add width) [e1, e2])
- = do ChildCode64 code1 r1_lo <- iselExpr64 e1
- let r1_hi = getHiVRegFromLo r1_lo
-
- ChildCode64 code2 r2_lo <- iselExpr64 e2
- let r2_hi = getHiVRegFromLo r2_lo
-
- r_dst_lo <- getNewRegNat II32
- let r_dst_hi = getHiVRegFromLo r_dst_lo
-
- let code = code1
- `appOL` code2
- `appOL` toOL
- [ ADD False False r1_lo (RIReg r2_lo) r_dst_lo
- , ADD True False r1_hi (RIReg r2_hi) r_dst_hi ]
-
- return $ ChildCode64 code r_dst_lo
-
-
-iselExpr64 (CmmReg (CmmLocal (LocalReg uq ty))) | isWord64 ty = do
- r_dst_lo <- getNewRegNat II32
- let r_dst_hi = getHiVRegFromLo r_dst_lo
- r_src_lo = mkVReg uq II32
- r_src_hi = getHiVRegFromLo r_src_lo
- mov_lo = mkMOV r_src_lo r_dst_lo
- mov_hi = mkMOV r_src_hi r_dst_hi
- mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
- return (
- ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
- )
-
--- Convert something into II64
-iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr])
- = do
- r_dst_lo <- getNewRegNat II32
- let r_dst_hi = getHiVRegFromLo r_dst_lo
-
- -- compute expr and load it into r_dst_lo
- (a_reg, a_code) <- getSomeReg expr
-
- let code = a_code
- `appOL` toOL
- [ mkRegRegMoveInstr g0 r_dst_hi -- clear high 32 bits
- , mkRegRegMoveInstr a_reg r_dst_lo ]
-
- return $ ChildCode64 code r_dst_lo
-
-
-iselExpr64 expr
- = pprPanic "iselExpr64(sparc)" (ppr expr)
-
-#endif /* sparc_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if powerpc_TARGET_ARCH
-
-getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
-getI64Amodes addrTree = do
- Amode hi_addr addr_code <- getAmode addrTree
- case addrOffset hi_addr 4 of
- Just lo_addr -> return (hi_addr, lo_addr, addr_code)
- Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
- return (AddrRegImm hi_ptr (ImmInt 0),
- AddrRegImm hi_ptr (ImmInt 4),
- code)
-
-assignMem_I64Code addrTree valueTree = do
- (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
- ChildCode64 vcode rlo <- iselExpr64 valueTree
- let
- rhi = getHiVRegFromLo rlo
-
- -- Big-endian store
- mov_hi = ST II32 rhi hi_addr
- mov_lo = ST II32 rlo lo_addr
- -- in
- return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
-
-assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
- ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
- let
- r_dst_lo = mkVReg u_dst II32
- r_dst_hi = getHiVRegFromLo r_dst_lo
- r_src_hi = getHiVRegFromLo r_src_lo
- mov_lo = MR r_dst_lo r_src_lo
- mov_hi = MR r_dst_hi r_src_hi
- -- in
- return (
- vcode `snocOL` mov_lo `snocOL` mov_hi
- )
-
-assignReg_I64Code lvalue valueTree
- = panic "assignReg_I64Code(powerpc): invalid lvalue"
-
-
--- Don't delete this -- it's very handy for debugging.
---iselExpr64 expr
--- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
--- = panic "iselExpr64(???)"
-
-iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
- (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
- (rlo, rhi) <- getNewRegPairNat II32
- let mov_hi = LD II32 rhi hi_addr
- mov_lo = LD II32 rlo lo_addr
- return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
- rlo
-
-iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
- = return (ChildCode64 nilOL (mkVReg vu II32))
-
-iselExpr64 (CmmLit (CmmInt i _)) = do
- (rlo,rhi) <- getNewRegPairNat II32
- let
- half0 = fromIntegral (fromIntegral i :: Word16)
- half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
- half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
- half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
-
- code = toOL [
- LIS rlo (ImmInt half1),
- OR rlo rlo (RIImm $ ImmInt half0),
- LIS rhi (ImmInt half3),
- OR rlo rlo (RIImm $ ImmInt half2)
- ]
- -- in
- return (ChildCode64 code rlo)
-
-iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
- ChildCode64 code1 r1lo <- iselExpr64 e1
- ChildCode64 code2 r2lo <- iselExpr64 e2
- (rlo,rhi) <- getNewRegPairNat II32
- let
- r1hi = getHiVRegFromLo r1lo
- r2hi = getHiVRegFromLo r2lo
- code = code1 `appOL`
- code2 `appOL`
- toOL [ ADDC rlo r1lo r2lo,
- ADDE rhi r1hi r2hi ]
- -- in
- return (ChildCode64 code rlo)
-
-iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
- (expr_reg,expr_code) <- getSomeReg expr
- (rlo, rhi) <- getNewRegPairNat II32
- let mov_hi = LI rhi (ImmInt 0)
- mov_lo = MR rlo expr_reg
- return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
- rlo
-iselExpr64 expr
- = pprPanic "iselExpr64(powerpc)" (ppr expr)
-
-#endif /* powerpc_TARGET_ARCH */
-
-
--- -----------------------------------------------------------------------------
--- The 'Register' type
-
--- 'Register's passed up the tree. If the stix code forces the register
--- to live in a pre-decided machine register, it comes out as @Fixed@;
--- otherwise, it comes out as @Any@, and the parent can decide which
--- register to put it in.
-
-data Register
- = Fixed Size Reg InstrBlock
- | Any Size (Reg -> InstrBlock)
-
-swizzleRegisterRep :: Register -> Size -> Register
--- Change the width; it's a no-op
-swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
-swizzleRegisterRep (Any _ codefn) size = Any size codefn
-
-
--- -----------------------------------------------------------------------------
--- Utils based on getRegister, below
-
--- The dual to getAnyReg: compute an expression into a register, but
--- we don't mind which one it is.
-getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
-getSomeReg expr = do
- r <- getRegister expr
- case r of
- Any rep code -> do
- tmp <- getNewRegNat rep
- return (tmp, code tmp)
- Fixed _ reg code ->
- return (reg, code)
-
--- -----------------------------------------------------------------------------
--- Grab the Reg for a CmmReg
-
-getRegisterReg :: CmmReg -> Reg
-
-getRegisterReg (CmmLocal (LocalReg u pk))
- = mkVReg u (cmmTypeSize pk)
-
-getRegisterReg (CmmGlobal mid)
- = case get_GlobalReg_reg_or_addr mid of
- Left (RealReg rrno) -> RealReg rrno
- _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
- -- By this stage, the only MagicIds remaining should be the
- -- ones which map to a real machine register on this
- -- platform. Hence ...
-
-
--- -----------------------------------------------------------------------------
--- Generate code to get a subtree into a Register
-
--- Don't delete this -- it's very handy for debugging.
---getRegister expr
--- | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
--- = panic "getRegister(???)"
-
-getRegister :: CmmExpr -> NatM Register
-
-#if !x86_64_TARGET_ARCH
- -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
- -- register, it can only be used for rip-relative addressing.
-getRegister (CmmReg (CmmGlobal PicBaseReg))
- = do
- reg <- getPicBaseNat wordSize
- return (Fixed wordSize reg nilOL)
-#endif
-
-getRegister (CmmReg reg)
- = return (Fixed (cmmTypeSize (cmmRegType reg))
- (getRegisterReg reg) nilOL)
-
-getRegister tree@(CmmRegOff _ _)
- = getRegister (mangleIndexTree tree)
-
-
-#if WORD_SIZE_IN_BITS==32
- -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
- -- TO_W_(x), TO_W_(x >> 32)
-
-getRegister (CmmMachOp (MO_UU_Conv W64 W32)
- [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
- ChildCode64 code rlo <- iselExpr64 x
- return $ Fixed II32 (getHiVRegFromLo rlo) code
-
-getRegister (CmmMachOp (MO_SS_Conv W64 W32)
- [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
- ChildCode64 code rlo <- iselExpr64 x
- return $ Fixed II32 (getHiVRegFromLo rlo) code
-
-getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
- ChildCode64 code rlo <- iselExpr64 x
- return $ Fixed II32 rlo code
-
-getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
- ChildCode64 code rlo <- iselExpr64 x
- return $ Fixed II32 rlo code
-
-#endif
-
--- end of machine-"independent" bit; here we go on the rest...
-
-#if alpha_TARGET_ARCH
-
-getRegister (StDouble d)
- = getBlockIdNat `thenNat` \ lbl ->
- getNewRegNat PtrRep `thenNat` \ tmp ->
- let code dst = mkSeqInstrs [
- LDATA RoDataSegment lbl [
- DATA TF [ImmLab (rational d)]
- ],
- LDA tmp (AddrImm (ImmCLbl lbl)),
- LD TF dst (AddrReg tmp)]
- in
- return (Any FF64 code)
-
-getRegister (StPrim primop [x]) -- unary PrimOps
- = case primop of
- IntNegOp -> trivialUCode (NEG Q False) x
-
- NotOp -> trivialUCode NOT x
-
- FloatNegOp -> trivialUFCode FloatRep (FNEG TF) x
- DoubleNegOp -> trivialUFCode FF64 (FNEG TF) x
-
- OrdOp -> coerceIntCode IntRep x
- ChrOp -> chrCode x
-
- Float2IntOp -> coerceFP2Int x
- Int2FloatOp -> coerceInt2FP pr x
- Double2IntOp -> coerceFP2Int x
- Int2DoubleOp -> coerceInt2FP pr x
-
- Double2FloatOp -> coerceFltCode x
- Float2DoubleOp -> coerceFltCode x
-
- other_op -> getRegister (StCall fn CCallConv FF64 [x])
- where
- fn = case other_op of
- FloatExpOp -> fsLit "exp"
- FloatLogOp -> fsLit "log"
- FloatSqrtOp -> fsLit "sqrt"
- FloatSinOp -> fsLit "sin"
- FloatCosOp -> fsLit "cos"
- FloatTanOp -> fsLit "tan"
- FloatAsinOp -> fsLit "asin"
- FloatAcosOp -> fsLit "acos"
- FloatAtanOp -> fsLit "atan"
- FloatSinhOp -> fsLit "sinh"
- FloatCoshOp -> fsLit "cosh"
- FloatTanhOp -> fsLit "tanh"
- DoubleExpOp -> fsLit "exp"
- DoubleLogOp -> fsLit "log"
- DoubleSqrtOp -> fsLit "sqrt"
- DoubleSinOp -> fsLit "sin"
- DoubleCosOp -> fsLit "cos"
- DoubleTanOp -> fsLit "tan"
- DoubleAsinOp -> fsLit "asin"
- DoubleAcosOp -> fsLit "acos"
- DoubleAtanOp -> fsLit "atan"
- DoubleSinhOp -> fsLit "sinh"
- DoubleCoshOp -> fsLit "cosh"
- DoubleTanhOp -> fsLit "tanh"
- where
- pr = panic "MachCode.getRegister: no primrep needed for Alpha"
-
-getRegister (StPrim primop [x, y]) -- dyadic PrimOps
- = case primop of
- CharGtOp -> trivialCode (CMP LTT) y x
- CharGeOp -> trivialCode (CMP LE) y x
- CharEqOp -> trivialCode (CMP EQQ) x y
- CharNeOp -> int_NE_code x y
- CharLtOp -> trivialCode (CMP LTT) x y
- CharLeOp -> trivialCode (CMP LE) x y
-
- IntGtOp -> trivialCode (CMP LTT) y x
- IntGeOp -> trivialCode (CMP LE) y x
- IntEqOp -> trivialCode (CMP EQQ) x y
- IntNeOp -> int_NE_code x y
- IntLtOp -> trivialCode (CMP LTT) x y
- IntLeOp -> trivialCode (CMP LE) x y
-
- WordGtOp -> trivialCode (CMP ULT) y x
- WordGeOp -> trivialCode (CMP ULE) x y
- WordEqOp -> trivialCode (CMP EQQ) x y
- WordNeOp -> int_NE_code x y
- WordLtOp -> trivialCode (CMP ULT) x y
- WordLeOp -> trivialCode (CMP ULE) x y
-
- AddrGtOp -> trivialCode (CMP ULT) y x
- AddrGeOp -> trivialCode (CMP ULE) y x
- AddrEqOp -> trivialCode (CMP EQQ) x y
- AddrNeOp -> int_NE_code x y
- AddrLtOp -> trivialCode (CMP ULT) x y
- AddrLeOp -> trivialCode (CMP ULE) x y
-
- FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
- FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
- FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
- FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
- FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
- FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
-
- DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
- DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
- DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
- DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
- DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
- DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
-
- IntAddOp -> trivialCode (ADD Q False) x y
- IntSubOp -> trivialCode (SUB Q False) x y
- IntMulOp -> trivialCode (MUL Q False) x y
- IntQuotOp -> trivialCode (DIV Q False) x y
- IntRemOp -> trivialCode (REM Q False) x y
-
- WordAddOp -> trivialCode (ADD Q False) x y
- WordSubOp -> trivialCode (SUB Q False) x y
- WordMulOp -> trivialCode (MUL Q False) x y
- WordQuotOp -> trivialCode (DIV Q True) x y
- WordRemOp -> trivialCode (REM Q True) x y
-
- FloatAddOp -> trivialFCode W32 (FADD TF) x y
- FloatSubOp -> trivialFCode W32 (FSUB TF) x y
- FloatMulOp -> trivialFCode W32 (FMUL TF) x y
- FloatDivOp -> trivialFCode W32 (FDIV TF) x y
-
- DoubleAddOp -> trivialFCode W64 (FADD TF) x y
- DoubleSubOp -> trivialFCode W64 (FSUB TF) x y
- DoubleMulOp -> trivialFCode W64 (FMUL TF) x y
- DoubleDivOp -> trivialFCode W64 (FDIV TF) x y
-
- AddrAddOp -> trivialCode (ADD Q False) x y
- AddrSubOp -> trivialCode (SUB Q False) x y
- AddrRemOp -> trivialCode (REM Q True) x y
-
- AndOp -> trivialCode AND x y
- OrOp -> trivialCode OR x y
- XorOp -> trivialCode XOR x y
- SllOp -> trivialCode SLL x y
- SrlOp -> trivialCode SRL x y
-
- ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
- ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
- ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
-
- FloatPowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
- DoublePowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y])
- where
- {- ------------------------------------------------------------
- Some bizarre special code for getting condition codes into
- registers. Integer non-equality is a test for equality
- followed by an XOR with 1. (Integer comparisons always set
- the result register to 0 or 1.) Floating point comparisons of
- any kind leave the result in a floating point register, so we
- need to wrangle an integer register out of things.
- -}
- int_NE_code :: StixTree -> StixTree -> NatM Register
-
- int_NE_code x y
- = trivialCode (CMP EQQ) x y `thenNat` \ register ->
- getNewRegNat IntRep `thenNat` \ tmp ->
- let
- code = registerCode register tmp
- src = registerName register tmp
- code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
- in
- return (Any IntRep code__2)
-
- {- ------------------------------------------------------------
- Comments for int_NE_code also apply to cmpF_code
- -}
- cmpF_code
- :: (Reg -> Reg -> Reg -> Instr)
- -> Cond
- -> StixTree -> StixTree
- -> NatM Register
-
- cmpF_code instr cond x y
- = trivialFCode pr instr x y `thenNat` \ register ->
- getNewRegNat FF64 `thenNat` \ tmp ->
- getBlockIdNat `thenNat` \ lbl ->
- let
- code = registerCode register tmp
- result = registerName register tmp
-
- code__2 dst = code . mkSeqInstrs [
- OR zeroh (RIImm (ImmInt 1)) dst,
- BF cond result (ImmCLbl lbl),
- OR zeroh (RIReg zeroh) dst,
- NEWBLOCK lbl]
- in
- return (Any IntRep code__2)
- where
- pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
- ------------------------------------------------------------
-
-getRegister (CmmLoad pk mem)
- = getAmode mem `thenNat` \ amode ->
- let
- code = amodeCode amode
- src = amodeAddr amode
- size = primRepToSize pk
- code__2 dst = code . mkSeqInstr (LD size dst src)
- in
- return (Any pk code__2)
-
-getRegister (StInt i)
- | fits8Bits i
- = let
- code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
- in
- return (Any IntRep code)
- | otherwise
- = let
- code dst = mkSeqInstr (LDI Q dst src)
- in
- return (Any IntRep code)
- where
- src = ImmInt (fromInteger i)
-
-getRegister leaf
- | isJust imm
- = let
- code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
- in
- return (Any PtrRep code)
- where
- imm = maybeImm leaf
- imm__2 = case imm of Just x -> x
-
-#endif /* alpha_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if i386_TARGET_ARCH
-
-getRegister (CmmLit (CmmFloat f W32)) = do
- lbl <- getNewLabelNat
- dflags <- getDynFlagsNat
- dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
- Amode addr addr_code <- getAmode dynRef
- let code dst =
- LDATA ReadOnlyData
- [CmmDataLabel lbl,
- CmmStaticLit (CmmFloat f W32)]
- `consOL` (addr_code `snocOL`
- GLD FF32 addr dst)
- -- in
- return (Any FF32 code)
-
-
-getRegister (CmmLit (CmmFloat d W64))
- | d == 0.0
- = let code dst = unitOL (GLDZ dst)
- in return (Any FF64 code)
-
- | d == 1.0
- = let code dst = unitOL (GLD1 dst)
- in return (Any FF64 code)
-
- | otherwise = do
- lbl <- getNewLabelNat
- dflags <- getDynFlagsNat
- dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
- Amode addr addr_code <- getAmode dynRef
- let code dst =
- LDATA ReadOnlyData
- [CmmDataLabel lbl,
- CmmStaticLit (CmmFloat d W64)]
- `consOL` (addr_code `snocOL`
- GLD FF64 addr dst)
- -- in
- return (Any FF64 code)
-
-#endif /* i386_TARGET_ARCH */
-
-#if x86_64_TARGET_ARCH
-
-getRegister (CmmLit (CmmFloat 0.0 w)) = do
- let size = floatSize w
- code dst = unitOL (XOR size (OpReg dst) (OpReg dst))
- -- I don't know why there are xorpd, xorps, and pxor instructions.
- -- They all appear to do the same thing --SDM
- return (Any size code)
-
-getRegister (CmmLit (CmmFloat f w)) = do
- lbl <- getNewLabelNat
- let code dst = toOL [
- LDATA ReadOnlyData
- [CmmDataLabel lbl,
- CmmStaticLit (CmmFloat f w)],
- MOV size (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
- ]
- -- in
- return (Any size code)
- where size = floatSize w
-
-#endif /* x86_64_TARGET_ARCH */
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-
--- catch simple cases of zero- or sign-extended load
-getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
- code <- intLoadCode (MOVZxL II8) addr
- return (Any II32 code)
-
-getRegister (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
- code <- intLoadCode (MOVSxL II8) addr
- return (Any II32 code)
-
-getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
- code <- intLoadCode (MOVZxL II16) addr
- return (Any II32 code)
-
-getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
- code <- intLoadCode (MOVSxL II16) addr
- return (Any II32 code)
-
-#endif
-
-#if x86_64_TARGET_ARCH
-
--- catch simple cases of zero- or sign-extended load
-getRegister (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _]) = do
- code <- intLoadCode (MOVZxL II8) addr
- return (Any II64 code)
-
-getRegister (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _]) = do
- code <- intLoadCode (MOVSxL II8) addr
- return (Any II64 code)
-
-getRegister (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _]) = do
- code <- intLoadCode (MOVZxL II16) addr
- return (Any II64 code)
-
-getRegister (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _]) = do
- code <- intLoadCode (MOVSxL II16) addr
- return (Any II64 code)
-
-getRegister (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _]) = do
- code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
- return (Any II64 code)
-
-getRegister (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _]) = do
- code <- intLoadCode (MOVSxL II32) addr
- return (Any II64 code)
-
-#endif
-
-#if x86_64_TARGET_ARCH
-getRegister (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
- CmmLit displacement])
- = return $ Any II64 (\dst -> unitOL $
- LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
-#endif
-
-#if x86_64_TARGET_ARCH
-getRegister (CmmMachOp (MO_F_Neg W32) [x]) = do
- x_code <- getAnyReg x
- lbl <- getNewLabelNat
- let
- code dst = x_code dst `appOL` toOL [
- -- This is how gcc does it, so it can't be that bad:
- LDATA ReadOnlyData16 [
- CmmAlign 16,
- CmmDataLabel lbl,
- CmmStaticLit (CmmInt 0x80000000 W32),
- CmmStaticLit (CmmInt 0 W32),
- CmmStaticLit (CmmInt 0 W32),
- CmmStaticLit (CmmInt 0 W32)
- ],
- XOR FF32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
- -- xorps, so we need the 128-bit constant
- -- ToDo: rip-relative
- ]
- --
- return (Any FF32 code)
-
-getRegister (CmmMachOp (MO_F_Neg W64) [x]) = do
- x_code <- getAnyReg x
- lbl <- getNewLabelNat
- let
- -- This is how gcc does it, so it can't be that bad:
- code dst = x_code dst `appOL` toOL [
- LDATA ReadOnlyData16 [
- CmmAlign 16,
- CmmDataLabel lbl,
- CmmStaticLit (CmmInt 0x8000000000000000 W64),
- CmmStaticLit (CmmInt 0 W64)
- ],
- -- gcc puts an unpck here. Wonder if we need it.
- XOR FF64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
- -- xorpd, so we need the 128-bit constant
- ]
- --
- return (Any FF64 code)
-#endif
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-
-getRegister (CmmMachOp mop [x]) -- unary MachOps
- = case mop of
-#if i386_TARGET_ARCH
- MO_F_Neg W32 -> trivialUFCode FF32 (GNEG FF32) x
- MO_F_Neg W64 -> trivialUFCode FF64 (GNEG FF64) x
-#endif
-
- MO_S_Neg w -> triv_ucode NEGI (intSize w)
- MO_F_Neg w -> triv_ucode NEGI (floatSize w)
- MO_Not w -> triv_ucode NOT (intSize w)
-
- -- Nop conversions
- MO_UU_Conv W32 W8 -> toI8Reg W32 x
- MO_SS_Conv W32 W8 -> toI8Reg W32 x
- MO_UU_Conv W16 W8 -> toI8Reg W16 x
- MO_SS_Conv W16 W8 -> toI8Reg W16 x
- MO_UU_Conv W32 W16 -> toI16Reg W32 x
- MO_SS_Conv W32 W16 -> toI16Reg W32 x
-
-#if x86_64_TARGET_ARCH
- MO_UU_Conv W64 W32 -> conversionNop II64 x
- MO_SS_Conv W64 W32 -> conversionNop II64 x
- MO_UU_Conv W64 W16 -> toI16Reg W64 x
- MO_SS_Conv W64 W16 -> toI16Reg W64 x
- MO_UU_Conv W64 W8 -> toI8Reg W64 x
- MO_SS_Conv W64 W8 -> toI8Reg W64 x
-#endif
-
- MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
- MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
-
- -- widenings
- MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x
- MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
- MO_UU_Conv W8 W16 -> integerExtend W8 W16 MOVZxL x
-
- MO_SS_Conv W8 W32 -> integerExtend W8 W32 MOVSxL x
- MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
- MO_SS_Conv W8 W16 -> integerExtend W8 W16 MOVSxL x
-
-#if x86_64_TARGET_ARCH
- MO_UU_Conv W8 W64 -> integerExtend W8 W64 MOVZxL x
- MO_UU_Conv W16 W64 -> integerExtend W16 W64 MOVZxL x
- MO_UU_Conv W32 W64 -> integerExtend W32 W64 MOVZxL x
- MO_SS_Conv W8 W64 -> integerExtend W8 W64 MOVSxL x
- MO_SS_Conv W16 W64 -> integerExtend W16 W64 MOVSxL x
- MO_SS_Conv W32 W64 -> integerExtend W32 W64 MOVSxL x
- -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
- -- However, we don't want the register allocator to throw it
- -- away as an unnecessary reg-to-reg move, so we keep it in
- -- the form of a movzl and print it as a movl later.
-#endif
-
-#if i386_TARGET_ARCH
- MO_FF_Conv W32 W64 -> conversionNop FF64 x
- MO_FF_Conv W64 W32 -> conversionNop FF32 x
-#else
- MO_FF_Conv W32 W64 -> coerceFP2FP W64 x
- MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
-#endif
-
- MO_FS_Conv from to -> coerceFP2Int from to x
- MO_SF_Conv from to -> coerceInt2FP from to x
-
- other -> pprPanic "getRegister" (pprMachOp mop)
- where
- triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
- triv_ucode instr size = trivialUCode size (instr size) x
-
- -- signed or unsigned extension.
- integerExtend :: Width -> Width
- -> (Size -> Operand -> Operand -> Instr)
- -> CmmExpr -> NatM Register
- integerExtend from to instr expr = do
- (reg,e_code) <- if from == W8 then getByteReg expr
- else getSomeReg expr
- let
- code dst =
- e_code `snocOL`
- instr (intSize from) (OpReg reg) (OpReg dst)
- return (Any (intSize to) code)
-
- toI8Reg :: Width -> CmmExpr -> NatM Register
- toI8Reg new_rep expr
- = do codefn <- getAnyReg expr
- return (Any (intSize new_rep) codefn)
- -- HACK: use getAnyReg to get a byte-addressable register.
- -- If the source was a Fixed register, this will add the
- -- mov instruction to put it into the desired destination.
- -- We're assuming that the destination won't be a fixed
- -- non-byte-addressable register; it won't be, because all
- -- fixed registers are word-sized.
-
- toI16Reg = toI8Reg -- for now
-
- conversionNop :: Size -> CmmExpr -> NatM Register
- conversionNop new_size expr
- = do e_code <- getRegister expr
- return (swizzleRegisterRep e_code new_size)
-
-
-getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
- = case mop of
- MO_F_Eq w -> condFltReg EQQ x y
- MO_F_Ne w -> condFltReg NE x y
- MO_F_Gt w -> condFltReg GTT x y
- MO_F_Ge w -> condFltReg GE x y
- MO_F_Lt w -> condFltReg LTT x y
- MO_F_Le w -> condFltReg LE x y
-
- MO_Eq rep -> condIntReg EQQ x y
- MO_Ne rep -> condIntReg NE x y
-
- MO_S_Gt rep -> condIntReg GTT x y
- MO_S_Ge rep -> condIntReg GE x y
- MO_S_Lt rep -> condIntReg LTT x y
- MO_S_Le rep -> condIntReg LE x y
-
- MO_U_Gt rep -> condIntReg GU x y
- MO_U_Ge rep -> condIntReg GEU x y
- MO_U_Lt rep -> condIntReg LU x y
- MO_U_Le rep -> condIntReg LEU x y
-
-#if i386_TARGET_ARCH
- MO_F_Add w -> trivialFCode w GADD x y
- MO_F_Sub w -> trivialFCode w GSUB x y
- MO_F_Quot w -> trivialFCode w GDIV x y
- MO_F_Mul w -> trivialFCode w GMUL x y
-#endif
-
-#if x86_64_TARGET_ARCH
- MO_F_Add w -> trivialFCode w ADD x y
- MO_F_Sub w -> trivialFCode w SUB x y
- MO_F_Quot w -> trivialFCode w FDIV x y
- MO_F_Mul w -> trivialFCode w MUL x y
-#endif
-
- MO_Add rep -> add_code rep x y
- MO_Sub rep -> sub_code rep x y
-
- MO_S_Quot rep -> div_code rep True True x y
- MO_S_Rem rep -> div_code rep True False x y
- MO_U_Quot rep -> div_code rep False True x y
- MO_U_Rem rep -> div_code rep False False x y
-
- MO_S_MulMayOflo rep -> imulMayOflo rep x y
-
- MO_Mul rep -> triv_op rep IMUL
- MO_And rep -> triv_op rep AND
- MO_Or rep -> triv_op rep OR
- MO_Xor rep -> triv_op rep XOR
-
- {- Shift ops on x86s have constraints on their source, it
- either has to be Imm, CL or 1
- => trivialCode is not restrictive enough (sigh.)
- -}
- MO_Shl rep -> shift_code rep SHL x y {-False-}
- MO_U_Shr rep -> shift_code rep SHR x y {-False-}
- MO_S_Shr rep -> shift_code rep SAR x y {-False-}
-
- other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
- where
- --------------------
- triv_op width instr = trivialCode width op (Just op) x y
- where op = instr (intSize width)
-
- imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
- imulMayOflo rep a b = do
- (a_reg, a_code) <- getNonClobberedReg a
- b_code <- getAnyReg b
- let
- shift_amt = case rep of
- W32 -> 31
- W64 -> 63
- _ -> panic "shift_amt"
-
- size = intSize rep
- code = a_code `appOL` b_code eax `appOL`
- toOL [
- IMUL2 size (OpReg a_reg), -- result in %edx:%eax
- SAR size (OpImm (ImmInt shift_amt)) (OpReg eax),
- -- sign extend lower part
- SUB size (OpReg edx) (OpReg eax)
- -- compare against upper
- -- eax==0 if high part == sign extended low part
- ]
- -- in
- return (Fixed size eax code)
-
- --------------------
- shift_code :: Width
- -> (Size -> Operand -> Operand -> Instr)
- -> CmmExpr
- -> CmmExpr
- -> NatM Register
-
- {- Case1: shift length as immediate -}
- shift_code width instr x y@(CmmLit lit) = do
- x_code <- getAnyReg x
- let
- size = intSize width
- code dst
- = x_code dst `snocOL`
- instr size (OpImm (litToImm lit)) (OpReg dst)
- -- in
- return (Any size code)
-
- {- Case2: shift length is complex (non-immediate)
- * y must go in %ecx.
- * we cannot do y first *and* put its result in %ecx, because
- %ecx might be clobbered by x.
- * if we do y second, then x cannot be
- in a clobbered reg. Also, we cannot clobber x's reg
- with the instruction itself.
- * so we can either:
- - do y first, put its result in a fresh tmp, then copy it to %ecx later
- - do y second and put its result into %ecx. x gets placed in a fresh
- tmp. This is likely to be better, becuase the reg alloc can
- eliminate this reg->reg move here (it won't eliminate the other one,
- because the move is into the fixed %ecx).
- -}
- shift_code width instr x y{-amount-} = do
- x_code <- getAnyReg x
- let size = intSize width
- tmp <- getNewRegNat size
- y_code <- getAnyReg y
- let
- code = x_code tmp `appOL`
- y_code ecx `snocOL`
- instr size (OpReg ecx) (OpReg tmp)
- -- in
- return (Fixed size tmp code)
-
- --------------------
- add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
- add_code rep x (CmmLit (CmmInt y _))
- | is32BitInteger y = add_int rep x y
- add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y
- where size = intSize rep
-
- --------------------
- sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
- sub_code rep x (CmmLit (CmmInt y _))
- | is32BitInteger (-y) = add_int rep x (-y)
- sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y
-
- -- our three-operand add instruction:
- add_int width x y = do
- (x_reg, x_code) <- getSomeReg x
- let
- size = intSize width
- imm = ImmInt (fromInteger y)
- code dst
- = x_code `snocOL`
- LEA size
- (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
- (OpReg dst)
- --
- return (Any size code)
-
- ----------------------
- div_code width signed quotient x y = do
- (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
- x_code <- getAnyReg x
- let
- size = intSize width
- widen | signed = CLTD size
- | otherwise = XOR size (OpReg edx) (OpReg edx)
-
- instr | signed = IDIV
- | otherwise = DIV
-
- code = y_code `appOL`
- x_code eax `appOL`
- toOL [widen, instr size y_op]
-
- result | quotient = eax
- | otherwise = edx
-
- -- in
- return (Fixed size result code)
-
-
-getRegister (CmmLoad mem pk)
- | isFloatType pk
- = do
- Amode src mem_code <- getAmode mem
- let
- size = cmmTypeSize pk
- code dst = mem_code `snocOL`
- IF_ARCH_i386(GLD size src dst,
- MOV size (OpAddr src) (OpReg dst))
- return (Any size code)
-
-#if i386_TARGET_ARCH
-getRegister (CmmLoad mem pk)
- | not (isWord64 pk)
- = do
- code <- intLoadCode instr mem
- return (Any size code)
- where
- width = typeWidth pk
- size = intSize width
- instr = case width of
- W8 -> MOVZxL II8
- _other -> MOV size
- -- We always zero-extend 8-bit loads, if we
- -- can't think of anything better. This is because
- -- we can't guarantee access to an 8-bit variant of every register
- -- (esi and edi don't have 8-bit variants), so to make things
- -- simpler we do our 8-bit arithmetic with full 32-bit registers.
-#endif
-
-#if x86_64_TARGET_ARCH
--- Simpler memory load code on x86_64
-getRegister (CmmLoad mem pk)
- = do
- code <- intLoadCode (MOV size) mem
- return (Any size code)
- where size = intSize $ typeWidth pk
-#endif
-
-getRegister (CmmLit (CmmInt 0 width))
- = let
- size = intSize width
-
- -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
- adj_size = case size of II64 -> II32; _ -> size
- size1 = IF_ARCH_i386( size, adj_size )
- code dst
- = unitOL (XOR size1 (OpReg dst) (OpReg dst))
- in
- return (Any size code)
-
-#if x86_64_TARGET_ARCH
- -- optimisation for loading small literals on x86_64: take advantage
- -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
- -- instruction forms are shorter.
-getRegister (CmmLit lit)
- | isWord64 (cmmLitType lit), not (isBigLit lit)
- = let
- imm = litToImm lit
- code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
- in
- return (Any II64 code)
- where
- isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
- isBigLit _ = False
- -- note1: not the same as (not.is32BitLit), because that checks for
- -- signed literals that fit in 32 bits, but we want unsigned
- -- literals here.
- -- note2: all labels are small, because we're assuming the
- -- small memory model (see gcc docs, -mcmodel=small).
-#endif
-
-getRegister (CmmLit lit)
- = let
- size = cmmTypeSize (cmmLitType lit)
- imm = litToImm lit
- code dst = unitOL (MOV size (OpImm imm) (OpReg dst))
- in
- return (Any size code)
-
-getRegister other = pprPanic "getRegister(x86)" (ppr other)
-
-
-intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
- -> NatM (Reg -> InstrBlock)
-intLoadCode instr mem = do
- Amode src mem_code <- getAmode mem
- return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
-
--- Compute an expression into *any* register, adding the appropriate
--- move instruction if necessary.
-getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
-getAnyReg expr = do
- r <- getRegister expr
- anyReg r
-
-anyReg :: Register -> NatM (Reg -> InstrBlock)
-anyReg (Any _ code) = return code
-anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
-
--- A bit like getSomeReg, but we want a reg that can be byte-addressed.
--- Fixed registers might not be byte-addressable, so we make sure we've
--- got a temporary, inserting an extra reg copy if necessary.
-getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
-#if x86_64_TARGET_ARCH
-getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
-#else
-getByteReg expr = do
- r <- getRegister expr
- case r of
- Any rep code -> do
- tmp <- getNewRegNat rep
- return (tmp, code tmp)
- Fixed rep reg code
- | isVirtualReg reg -> return (reg,code)
- | otherwise -> do
- tmp <- getNewRegNat rep
- return (tmp, code `snocOL` reg2reg rep reg tmp)
- -- ToDo: could optimise slightly by checking for byte-addressable
- -- real registers, but that will happen very rarely if at all.
-#endif
-
--- Another variant: this time we want the result in a register that cannot
--- be modified by code to evaluate an arbitrary expression.
-getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
-getNonClobberedReg expr = do
- r <- getRegister expr
- case r of
- Any rep code -> do
- tmp <- getNewRegNat rep
- return (tmp, code tmp)
- Fixed rep reg code
- -- only free regs can be clobbered
- | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
- tmp <- getNewRegNat rep
- return (tmp, code `snocOL` reg2reg rep reg tmp)
- | otherwise ->
- return (reg, code)
-
-reg2reg :: Size -> Reg -> Reg -> Instr
-reg2reg size src dst
-#if i386_TARGET_ARCH
- | isFloatSize size = GMOV src dst
-#endif
- | otherwise = MOV size (OpReg src) (OpReg dst)
-
-#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if sparc_TARGET_ARCH
-
--- getRegister :: CmmExpr -> NatM Register
-
--- Load a literal float into a float register.
--- The actual literal is stored in a new data area, and we load it
--- at runtime.
-getRegister (CmmLit (CmmFloat f W32)) = do
-
- -- a label for the new data area
- lbl <- getNewLabelNat
- tmp <- getNewRegNat II32
-
- let code dst = toOL [
- -- the data area
- LDATA ReadOnlyData
- [CmmDataLabel lbl,
- CmmStaticLit (CmmFloat f W32)],
-
- -- load the literal
- SETHI (HI (ImmCLbl lbl)) tmp,
- LD II32 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
-
- return (Any FF32 code)
-
-getRegister (CmmLit (CmmFloat d W64)) = do
- lbl <- getNewLabelNat
- tmp <- getNewRegNat II32
- let code dst = toOL [
- LDATA ReadOnlyData
- [CmmDataLabel lbl,
- CmmStaticLit (CmmFloat d W64)],
- SETHI (HI (ImmCLbl lbl)) tmp,
- LD II64 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
- return (Any FF64 code)
-
-getRegister (CmmMachOp mop [x]) -- unary MachOps
- = case mop of
- MO_F_Neg W32 -> trivialUFCode FF32 (FNEG FF32) x
- MO_F_Neg W64 -> trivialUFCode FF64 (FNEG FF64) x
-
- MO_S_Neg rep -> trivialUCode (intSize rep) (SUB False False g0) x
- MO_Not rep -> trivialUCode (intSize rep) (XNOR False g0) x
-
- MO_FF_Conv W64 W32-> coerceDbl2Flt x
- MO_FF_Conv W32 W64-> coerceFlt2Dbl x
-
- MO_FS_Conv from to -> coerceFP2Int from to x
- MO_SF_Conv from to -> coerceInt2FP from to x
-
- -- Conversions which are a nop on sparc
- MO_UU_Conv from to
- | from == to -> conversionNop (intSize to) x
- MO_UU_Conv W32 W8 -> trivialCode W8 (AND False) x (CmmLit (CmmInt 255 W8))
- MO_UU_Conv W32 to -> conversionNop (intSize to) x
- MO_SS_Conv W32 to -> conversionNop (intSize to) x
-
- MO_UU_Conv W8 to@W32 -> conversionNop (intSize to) x
- MO_UU_Conv W16 to@W32 -> conversionNop (intSize to) x
- MO_UU_Conv W8 to@W16 -> conversionNop (intSize to) x
-
- -- sign extension
- MO_SS_Conv W8 W32 -> integerExtend W8 W32 x
- MO_SS_Conv W16 W32 -> integerExtend W16 W32 x
- MO_SS_Conv W8 W16 -> integerExtend W8 W16 x
-
- other_op -> panic ("Unknown unary mach op: " ++ show mop)
- where
-
- -- | sign extend and widen
- integerExtend
- :: Width -- ^ width of source expression
- -> Width -- ^ width of result
- -> CmmExpr -- ^ source expression
- -> NatM Register
-
- integerExtend from to expr
- = do -- load the expr into some register
- (reg, e_code) <- getSomeReg expr
- tmp <- getNewRegNat II32
- let bitCount
- = case (from, to) of
- (W8, W32) -> 24
- (W16, W32) -> 16
- (W8, W16) -> 24
- let code dst
- = e_code
-
- -- local shift word left to load the sign bit
- `snocOL` SLL reg (RIImm (ImmInt bitCount)) tmp
-
- -- arithmetic shift right to sign extend
- `snocOL` SRA tmp (RIImm (ImmInt bitCount)) dst
-
- return (Any (intSize to) code)
-
-
- conversionNop new_rep expr
- = do e_code <- getRegister expr
- return (swizzleRegisterRep e_code new_rep)
-
-getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
- = case mop of
- MO_Eq rep -> condIntReg EQQ x y
- MO_Ne rep -> condIntReg NE x y
-
- MO_S_Gt rep -> condIntReg GTT x y
- MO_S_Ge rep -> condIntReg GE x y
- MO_S_Lt rep -> condIntReg LTT x y
- MO_S_Le rep -> condIntReg LE x y
-
- MO_U_Gt W32 -> condIntReg GTT x y
- MO_U_Ge W32 -> condIntReg GE x y
- MO_U_Lt W32 -> condIntReg LTT x y
- MO_U_Le W32 -> condIntReg LE x y
-
- MO_U_Gt W16 -> condIntReg GU x y
- MO_U_Ge W16 -> condIntReg GEU x y
- MO_U_Lt W16 -> condIntReg LU x y
- MO_U_Le W16 -> condIntReg LEU x y
-
- MO_Add W32 -> trivialCode W32 (ADD False False) x y
- MO_Sub W32 -> trivialCode W32 (SUB False False) x y
-
- MO_S_MulMayOflo rep -> imulMayOflo rep x y
-
- MO_S_Quot W32 -> idiv True False x y
- MO_U_Quot W32 -> idiv False False x y
-
- MO_S_Rem W32 -> irem True x y
- MO_U_Rem W32 -> irem False x y
-
- MO_F_Eq w -> condFltReg EQQ x y
- MO_F_Ne w -> condFltReg NE x y
-
- MO_F_Gt w -> condFltReg GTT x y
- MO_F_Ge w -> condFltReg GE x y
- MO_F_Lt w -> condFltReg LTT x y
- MO_F_Le w -> condFltReg LE x y
-
- MO_F_Add w -> trivialFCode w FADD x y
- MO_F_Sub w -> trivialFCode w FSUB x y
- MO_F_Mul w -> trivialFCode w FMUL x y
- MO_F_Quot w -> trivialFCode w FDIV x y
-
- MO_And rep -> trivialCode rep (AND False) x y
- MO_Or rep -> trivialCode rep (OR False) x y
- MO_Xor rep -> trivialCode rep (XOR False) x y
-
- MO_Mul rep -> trivialCode rep (SMUL False) x y
-
- MO_Shl rep -> trivialCode rep SLL x y
- MO_U_Shr rep -> trivialCode rep SRL x y
- MO_S_Shr rep -> trivialCode rep SRA x y
-
-{-
- MO_F32_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64
- [promote x, promote y])
- where promote x = CmmMachOp MO_F32_to_Dbl [x]
- MO_F64_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64
- [x, y])
--}
- other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
- where
- -- idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y])
-
-
- -- | Generate an integer division instruction.
- idiv :: Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register
-
- -- For unsigned division with a 32 bit numerator,
- -- we can just clear the Y register.
- idiv False cc x y = do
- (a_reg, a_code) <- getSomeReg x
- (b_reg, b_code) <- getSomeReg y
-
- let code dst
- = a_code
- `appOL` b_code
- `appOL` toOL
- [ WRY g0 g0
- , UDIV cc a_reg (RIReg b_reg) dst]
-
- return (Any II32 code)
-
-
- -- For _signed_ division with a 32 bit numerator,
- -- we have to sign extend the numerator into the Y register.
- idiv True cc x y = do
- (a_reg, a_code) <- getSomeReg x
- (b_reg, b_code) <- getSomeReg y
-
- tmp <- getNewRegNat II32
-
- let code dst
- = a_code
- `appOL` b_code
- `appOL` toOL
- [ SRA a_reg (RIImm (ImmInt 16)) tmp -- sign extend
- , SRA tmp (RIImm (ImmInt 16)) tmp
-
- , WRY tmp g0
- , SDIV cc a_reg (RIReg b_reg) dst]
-
- return (Any II32 code)
-
-
- -- | Do an integer remainder.
- --
- -- NOTE: The SPARC v8 architecture manual says that integer division
- -- instructions _may_ generate a remainder, depending on the implementation.
- -- If so it is _recommended_ that the remainder is placed in the Y register.
- --
- -- The UltraSparc 2007 manual says Y is _undefined_ after division.
- --
- -- The SPARC T2 doesn't store the remainder, not sure about the others.
- -- It's probably best not to worry about it, and just generate our own
- -- remainders.
- --
- irem :: Bool -> CmmExpr -> CmmExpr -> NatM Register
-
- -- For unsigned operands:
- -- Division is between a 64 bit numerator and a 32 bit denominator,
- -- so we still have to clear the Y register.
- irem False x y = do
- (a_reg, a_code) <- getSomeReg x
- (b_reg, b_code) <- getSomeReg y
-
- tmp_reg <- getNewRegNat II32
-
- let code dst
- = a_code
- `appOL` b_code
- `appOL` toOL
- [ WRY g0 g0
- , UDIV False a_reg (RIReg b_reg) tmp_reg
- , UMUL False tmp_reg (RIReg b_reg) tmp_reg
- , SUB False False a_reg (RIReg tmp_reg) dst]
-
- return (Any II32 code)
-
-
- -- For signed operands:
- -- Make sure to sign extend into the Y register, or the remainder
- -- will have the wrong sign when the numerator is negative.
- --
- -- TODO: When sign extending, GCC only shifts the a_reg right by 17 bits,
- -- not the full 32. Not sure why this is, something to do with overflow?
- -- If anyone cares enough about the speed of signed remainder they
- -- can work it out themselves (then tell me). -- BL 2009/01/20
-
- irem True x y = do
- (a_reg, a_code) <- getSomeReg x
- (b_reg, b_code) <- getSomeReg y
-
- tmp1_reg <- getNewRegNat II32
- tmp2_reg <- getNewRegNat II32
-
- let code dst
- = a_code
- `appOL` b_code
- `appOL` toOL
- [ SRA a_reg (RIImm (ImmInt 16)) tmp1_reg -- sign extend
- , SRA tmp1_reg (RIImm (ImmInt 16)) tmp1_reg -- sign extend
- , WRY tmp1_reg g0
-
- , SDIV False a_reg (RIReg b_reg) tmp2_reg
- , SMUL False tmp2_reg (RIReg b_reg) tmp2_reg
- , SUB False False a_reg (RIReg tmp2_reg) dst]
-
- return (Any II32 code)
-
-
- imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
- imulMayOflo rep a b = do
- (a_reg, a_code) <- getSomeReg a
- (b_reg, b_code) <- getSomeReg b
- res_lo <- getNewRegNat II32
- res_hi <- getNewRegNat II32
- let
- shift_amt = case rep of
- W32 -> 31
- W64 -> 63
- _ -> panic "shift_amt"
- code dst = a_code `appOL` b_code `appOL`
- toOL [
- SMUL False a_reg (RIReg b_reg) res_lo,
- RDY res_hi,
- SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
- SUB False False res_lo (RIReg res_hi) dst
- ]
- return (Any II32 code)
-
-getRegister (CmmLoad mem pk) = do
- Amode src code <- getAmode mem
- let
- code__2 dst = code `snocOL` LD (cmmTypeSize pk) src dst
- return (Any (cmmTypeSize pk) code__2)
-
-getRegister (CmmLit (CmmInt i _))
- | fits13Bits i
- = let
- src = ImmInt (fromInteger i)
- code dst = unitOL (OR False g0 (RIImm src) dst)
- in
- return (Any II32 code)
-
-getRegister (CmmLit lit)
- = let rep = cmmLitType lit
- imm = litToImm lit
- code dst = toOL [
- SETHI (HI imm) dst,
- OR False dst (RIImm (LO imm)) dst]
- in return (Any II32 code)
-
-#endif /* sparc_TARGET_ARCH */
-
-#if powerpc_TARGET_ARCH
-getRegister (CmmLoad mem pk)
- | not (isWord64 pk)
- = do
- Amode addr addr_code <- getAmode mem
- let code dst = ASSERT((regClass dst == RcDouble) == isFloatType pk)
- addr_code `snocOL` LD size dst addr
- return (Any size code)
- where size = cmmTypeSize pk
-
--- catch simple cases of zero- or sign-extended load
-getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
- Amode addr addr_code <- getAmode mem
- return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
-
--- Note: there is no Load Byte Arithmetic instruction, so no signed case here
-
-getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
- Amode addr addr_code <- getAmode mem
- return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
-
-getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
- Amode addr addr_code <- getAmode mem
- return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
-
-getRegister (CmmMachOp mop [x]) -- unary MachOps
- = case mop of
- MO_Not rep -> triv_ucode_int rep NOT
-
- MO_F_Neg w -> triv_ucode_float w FNEG
- MO_S_Neg w -> triv_ucode_int w NEG
-
- MO_FF_Conv W64 W32 -> trivialUCode FF32 FRSP x
- MO_FF_Conv W32 W64 -> conversionNop FF64 x
-
- MO_FS_Conv from to -> coerceFP2Int from to x
- MO_SF_Conv from to -> coerceInt2FP from to x
-
- MO_SS_Conv from to
- | from == to -> conversionNop (intSize to) x
-
- -- narrowing is a nop: we treat the high bits as undefined
- MO_SS_Conv W32 to -> conversionNop (intSize to) x
- MO_SS_Conv W16 W8 -> conversionNop II8 x
- MO_SS_Conv W8 to -> triv_ucode_int to (EXTS II8)
- MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16)
-
- MO_UU_Conv from to
- | from == to -> conversionNop (intSize to) x
- -- narrowing is a nop: we treat the high bits as undefined
- MO_UU_Conv W32 to -> conversionNop (intSize to) x
- MO_UU_Conv W16 W8 -> conversionNop II8 x
- MO_UU_Conv W8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 W32))
- MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32))
-
- where
- triv_ucode_int width instr = trivialUCode (intSize width) instr x
- triv_ucode_float width instr = trivialUCode (floatSize width) instr x
-
- conversionNop new_size expr
- = do e_code <- getRegister expr
- return (swizzleRegisterRep e_code new_size)
-
-getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
- = case mop of
- MO_F_Eq w -> condFltReg EQQ x y
- MO_F_Ne w -> condFltReg NE x y
- MO_F_Gt w -> condFltReg GTT x y
- MO_F_Ge w -> condFltReg GE x y
- MO_F_Lt w -> condFltReg LTT x y
- MO_F_Le w -> condFltReg LE x y
-
- MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y)
- MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y)
-
- MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y)
- MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y)
- MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y)
- MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y)
-
- MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y)
- MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y)
- MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y)
- MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y)
-
- MO_F_Add w -> triv_float w FADD
- MO_F_Sub w -> triv_float w FSUB
- MO_F_Mul w -> triv_float w FMUL
- MO_F_Quot w -> triv_float w FDIV
-
- -- optimize addition with 32-bit immediate
- -- (needed for PIC)
- MO_Add W32 ->
- case y of
- CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True (-imm)
- -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
- CmmLit lit
- -> do
- (src, srcCode) <- getSomeReg x
- let imm = litToImm lit
- code dst = srcCode `appOL` toOL [
- ADDIS dst src (HA imm),
- ADD dst dst (RIImm (LO imm))
- ]
- return (Any II32 code)
- _ -> trivialCode W32 True ADD x y
-
- MO_Add rep -> trivialCode rep True ADD x y
- MO_Sub rep ->
- case y of -- subfi ('substract from' with immediate) doesn't exist
- CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
- -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
- _ -> trivialCodeNoImm' (intSize rep) SUBF y x
-
- MO_Mul rep -> trivialCode rep True MULLW x y
-
- MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y
-
- MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= II32): not implemented"
- MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
-
- MO_S_Quot rep -> trivialCodeNoImm' (intSize rep) DIVW (extendSExpr rep x) (extendSExpr rep y)
- MO_U_Quot rep -> trivialCodeNoImm' (intSize rep) DIVWU (extendUExpr rep x) (extendUExpr rep y)
-
- MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
- MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
-
- MO_And rep -> trivialCode rep False AND x y
- MO_Or rep -> trivialCode rep False OR x y
- MO_Xor rep -> trivialCode rep False XOR x y
-
- MO_Shl rep -> trivialCode rep False SLW x y
- MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
- MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
- where
- triv_float :: Width -> (Size -> Reg -> Reg -> Reg -> Instr) -> NatM Register
- triv_float width instr = trivialCodeNoImm (floatSize width) instr x y
-
-getRegister (CmmLit (CmmInt i rep))
- | Just imm <- makeImmediate rep True i
- = let
- code dst = unitOL (LI dst imm)
- in
- return (Any (intSize rep) code)
-
-getRegister (CmmLit (CmmFloat f frep)) = do
- lbl <- getNewLabelNat
- dflags <- getDynFlagsNat
- dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
- Amode addr addr_code <- getAmode dynRef
- let size = floatSize frep
- code dst =
- LDATA ReadOnlyData [CmmDataLabel lbl,
- CmmStaticLit (CmmFloat f frep)]
- `consOL` (addr_code `snocOL` LD size dst addr)
- return (Any size code)
-
-getRegister (CmmLit lit)
- = let rep = cmmLitType lit
- imm = litToImm lit
- code dst = toOL [
- LIS dst (HA imm),
- ADD dst dst (RIImm (LO imm))
- ]
- in return (Any (cmmTypeSize rep) code)
-
-getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
-
- -- extend?Rep: wrap integer expression of type rep
- -- in a conversion to II32
-extendSExpr W32 x = x
-extendSExpr rep x = CmmMachOp (MO_SS_Conv rep W32) [x]
-extendUExpr W32 x = x
-extendUExpr rep x = CmmMachOp (MO_UU_Conv rep W32) [x]
-
-#endif /* powerpc_TARGET_ARCH */
-
-
--- -----------------------------------------------------------------------------
--- The 'Amode' type: Memory addressing modes passed up the tree.
-
-data Amode = Amode AddrMode InstrBlock
-
-{-
-Now, given a tree (the argument to an CmmLoad) that references memory,
-produce a suitable addressing mode.
-
-A Rule of the Game (tm) for Amodes: use of the addr bit must
-immediately follow use of the code part, since the code part puts
-values in registers which the addr then refers to. So you can't put
-anything in between, lest it overwrite some of those registers. If
-you need to do some other computation between the code part and use of
-the addr bit, first store the effective address from the amode in a
-temporary, then do the other computation, and then use the temporary:
-
- code
- LEA amode, tmp
- ... other computation ...
- ... (tmp) ...
--}
-
-getAmode :: CmmExpr -> NatM Amode
-getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if alpha_TARGET_ARCH
-
-getAmode (StPrim IntSubOp [x, StInt i])
- = getNewRegNat PtrRep `thenNat` \ tmp ->
- getRegister x `thenNat` \ register ->
- let
- code = registerCode register tmp
- reg = registerName register tmp
- off = ImmInt (-(fromInteger i))
- in
- return (Amode (AddrRegImm reg off) code)
-
-getAmode (StPrim IntAddOp [x, StInt i])
- = getNewRegNat PtrRep `thenNat` \ tmp ->
- getRegister x `thenNat` \ register ->
- let
- code = registerCode register tmp
- reg = registerName register tmp
- off = ImmInt (fromInteger i)
- in
- return (Amode (AddrRegImm reg off) code)
-
-getAmode leaf
- | isJust imm
- = return (Amode (AddrImm imm__2) id)
- where
- imm = maybeImm leaf
- imm__2 = case imm of Just x -> x
-
-getAmode other
- = getNewRegNat PtrRep `thenNat` \ tmp ->
- getRegister other `thenNat` \ register ->
- let
- code = registerCode register tmp
- reg = registerName register tmp
- in
- return (Amode (AddrReg reg) code)
-
-#endif /* alpha_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if x86_64_TARGET_ARCH
-
-getAmode (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
- CmmLit displacement])
- = return $ Amode (ripRel (litToImm displacement)) nilOL
-
-#endif
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-
--- This is all just ridiculous, since it carefully undoes
--- what mangleIndexTree has just done.
-getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
- | is32BitLit lit
- -- ASSERT(rep == II32)???
- = do (x_reg, x_code) <- getSomeReg x
- let off = ImmInt (-(fromInteger i))
- return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
-
-getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
- | is32BitLit lit
- -- ASSERT(rep == II32)???
- = do (x_reg, x_code) <- getSomeReg x
- let off = ImmInt (fromInteger i)
- return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
-
--- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
--- recognised by the next rule.
-getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
- b@(CmmLit _)])
- = getAmode (CmmMachOp (MO_Add rep) [b,a])
-
-getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
- [y, CmmLit (CmmInt shift _)]])
- | shift == 0 || shift == 1 || shift == 2 || shift == 3
- = x86_complex_amode x y shift 0
-
-getAmode (CmmMachOp (MO_Add rep)
- [x, CmmMachOp (MO_Add _)
- [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
- CmmLit (CmmInt offset _)]])
- | shift == 0 || shift == 1 || shift == 2 || shift == 3
- && is32BitInteger offset
- = x86_complex_amode x y shift offset
-
-getAmode (CmmMachOp (MO_Add rep) [x,y])
- = x86_complex_amode x y 0 0
-
-getAmode (CmmLit lit) | is32BitLit lit
- = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
-
-getAmode expr = do
- (reg,code) <- getSomeReg expr
- return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
-
-
-x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
-x86_complex_amode base index shift offset
- = do (x_reg, x_code) <- getNonClobberedReg base
- -- x must be in a temp, because it has to stay live over y_code
- -- we could compre x_reg and y_reg and do something better here...
- (y_reg, y_code) <- getSomeReg index
- let
- code = x_code `appOL` y_code
- base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
- return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
- code)
-
-#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if sparc_TARGET_ARCH
-
-getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
- | fits13Bits (-i)
- = do
- (reg, code) <- getSomeReg x
- let
- off = ImmInt (-(fromInteger i))
- return (Amode (AddrRegImm reg off) code)
-
-
-getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
- | fits13Bits i
- = do
- (reg, code) <- getSomeReg x
- let
- off = ImmInt (fromInteger i)
- return (Amode (AddrRegImm reg off) code)
-
-getAmode (CmmMachOp (MO_Add rep) [x, y])
- = do
- (regX, codeX) <- getSomeReg x
- (regY, codeY) <- getSomeReg y
- let
- code = codeX `appOL` codeY
- return (Amode (AddrRegReg regX regY) code)
-
-getAmode (CmmLit lit)
- = do
- let imm__2 = litToImm lit
- tmp1 <- getNewRegNat II32
- tmp2 <- getNewRegNat II32
-
- let code = toOL [ SETHI (HI imm__2) tmp1
- , OR False tmp1 (RIImm (LO imm__2)) tmp2]
-
- return (Amode (AddrRegReg tmp2 g0) code)
-
-getAmode other
- = do
- (reg, code) <- getSomeReg other
- let
- off = ImmInt 0
- return (Amode (AddrRegImm reg off) code)
-
-#endif /* sparc_TARGET_ARCH */
-
-#ifdef powerpc_TARGET_ARCH
-getAmode (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
- | Just off <- makeImmediate W32 True (-i)
- = do
- (reg, code) <- getSomeReg x
- return (Amode (AddrRegImm reg off) code)
-
-
-getAmode (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
- | Just off <- makeImmediate W32 True i
- = do
- (reg, code) <- getSomeReg x
- return (Amode (AddrRegImm reg off) code)
-
- -- optimize addition with 32-bit immediate
- -- (needed for PIC)
-getAmode (CmmMachOp (MO_Add W32) [x, CmmLit lit])
- = do
- tmp <- getNewRegNat II32
- (src, srcCode) <- getSomeReg x
- let imm = litToImm lit
- code = srcCode `snocOL` ADDIS tmp src (HA imm)
- return (Amode (AddrRegImm tmp (LO imm)) code)
-
-getAmode (CmmLit lit)
- = do
- tmp <- getNewRegNat II32
- let imm = litToImm lit
- code = unitOL (LIS tmp (HA imm))
- return (Amode (AddrRegImm tmp (LO imm)) code)
-
-getAmode (CmmMachOp (MO_Add W32) [x, y])
- = do
- (regX, codeX) <- getSomeReg x
- (regY, codeY) <- getSomeReg y
- return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
-
-getAmode other
- = do
- (reg, code) <- getSomeReg other
- let
- off = ImmInt 0
- return (Amode (AddrRegImm reg off) code)
-#endif /* powerpc_TARGET_ARCH */
-
--- -----------------------------------------------------------------------------
--- getOperand: sometimes any operand will do.
-
--- getNonClobberedOperand: the value of the operand will remain valid across
--- the computation of an arbitrary expression, unless the expression
--- is computed directly into a register which the operand refers to
--- (see trivialCode where this function is used for an example).
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-
-getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
-#if x86_64_TARGET_ARCH
-getNonClobberedOperand (CmmLit lit)
- | isSuitableFloatingPointLit lit = do
- lbl <- getNewLabelNat
- let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
- CmmStaticLit lit])
- return (OpAddr (ripRel (ImmCLbl lbl)), code)
-#endif
-getNonClobberedOperand (CmmLit lit)
- | is32BitLit lit && not (isFloatType (cmmLitType lit)) =
- return (OpImm (litToImm lit), nilOL)
-getNonClobberedOperand (CmmLoad mem pk)
- | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
- Amode src mem_code <- getAmode mem
- (src',save_code) <-
- if (amodeCouldBeClobbered src)
- then do
- tmp <- getNewRegNat wordSize
- return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
- unitOL (LEA II32 (OpAddr src) (OpReg tmp)))
- else
- return (src, nilOL)
- return (OpAddr src', save_code `appOL` mem_code)
-getNonClobberedOperand e = do
- (reg, code) <- getNonClobberedReg e
- return (OpReg reg, code)
-
-amodeCouldBeClobbered :: AddrMode -> Bool
-amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
-
-regClobbered (RealReg rr) = isFastTrue (freeReg rr)
-regClobbered _ = False
-
--- getOperand: the operand is not required to remain valid across the
--- computation of an arbitrary expression.
-getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
-#if x86_64_TARGET_ARCH
-getOperand (CmmLit lit)
- | isSuitableFloatingPointLit lit = do
- lbl <- getNewLabelNat
- let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
- CmmStaticLit lit])
- return (OpAddr (ripRel (ImmCLbl lbl)), code)
-#endif
-getOperand (CmmLit lit)
- | is32BitLit lit && not (isFloatType (cmmLitType lit)) = do
- return (OpImm (litToImm lit), nilOL)
-getOperand (CmmLoad mem pk)
- | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
- Amode src mem_code <- getAmode mem
- return (OpAddr src, mem_code)
-getOperand e = do
- (reg, code) <- getSomeReg e
- return (OpReg reg, code)
-
-isOperand :: CmmExpr -> Bool
-isOperand (CmmLoad _ _) = True
-isOperand (CmmLit lit) = is32BitLit lit
- || isSuitableFloatingPointLit lit
-isOperand _ = False
-
--- if we want a floating-point literal as an operand, we can
--- use it directly from memory. However, if the literal is
--- zero, we're better off generating it into a register using
--- xor.
-isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
-isSuitableFloatingPointLit _ = False
-
-getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
-getRegOrMem (CmmLoad mem pk)
- | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
- Amode src mem_code <- getAmode mem
- return (OpAddr src, mem_code)
-getRegOrMem e = do
- (reg, code) <- getNonClobberedReg e
- return (OpReg reg, code)
-
-#if x86_64_TARGET_ARCH
-is32BitLit (CmmInt i W64) = is32BitInteger i
- -- assume that labels are in the range 0-2^31-1: this assumes the
- -- small memory model (see gcc docs, -mcmodel=small).
-#endif
-is32BitLit x = True
-#endif
-
-is32BitInteger :: Integer -> Bool
-is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
- where i64 = fromIntegral i :: Int64
- -- a CmmInt is intended to be truncated to the appropriate
- -- number of bits, so here we truncate it to Int64. This is
- -- important because e.g. -1 as a CmmInt might be either
- -- -1 or 18446744073709551615.
-
--- -----------------------------------------------------------------------------
--- The 'CondCode' type: Condition codes passed up the tree.
-
-data CondCode = CondCode Bool Cond InstrBlock
-
--- Set up a condition code for a conditional branch.
-
-getCondCode :: CmmExpr -> NatM CondCode
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if alpha_TARGET_ARCH
-getCondCode = panic "MachCode.getCondCode: not on Alphas"
-#endif /* alpha_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
--- yes, they really do seem to want exactly the same!
-
-getCondCode (CmmMachOp mop [x, y])
- =
- case mop of
- MO_F_Eq W32 -> condFltCode EQQ x y
- MO_F_Ne W32 -> condFltCode NE x y
- MO_F_Gt W32 -> condFltCode GTT x y
- MO_F_Ge W32 -> condFltCode GE x y
- MO_F_Lt W32 -> condFltCode LTT x y
- MO_F_Le W32 -> condFltCode LE x y
-
- MO_F_Eq W64 -> condFltCode EQQ x y
- MO_F_Ne W64 -> condFltCode NE x y
- MO_F_Gt W64 -> condFltCode GTT x y
- MO_F_Ge W64 -> condFltCode GE x y
- MO_F_Lt W64 -> condFltCode LTT x y
- MO_F_Le W64 -> condFltCode LE x y
-
- MO_Eq rep -> condIntCode EQQ x y
- MO_Ne rep -> condIntCode NE x y
-
- MO_S_Gt rep -> condIntCode GTT x y
- MO_S_Ge rep -> condIntCode GE x y
- MO_S_Lt rep -> condIntCode LTT x y
- MO_S_Le rep -> condIntCode LE x y
-
- MO_U_Gt rep -> condIntCode GU x y
- MO_U_Ge rep -> condIntCode GEU x y
- MO_U_Lt rep -> condIntCode LU x y
- MO_U_Le rep -> condIntCode LEU x y
-
- other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
-
-getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
-
-#elif powerpc_TARGET_ARCH
-
--- almost the same as everywhere else - but we need to
--- extend small integers to 32 bit first
-
-getCondCode (CmmMachOp mop [x, y])
- = case mop of
- MO_F_Eq W32 -> condFltCode EQQ x y
- MO_F_Ne W32 -> condFltCode NE x y
- MO_F_Gt W32 -> condFltCode GTT x y
- MO_F_Ge W32 -> condFltCode GE x y
- MO_F_Lt W32 -> condFltCode LTT x y
- MO_F_Le W32 -> condFltCode LE x y
-
- MO_F_Eq W64 -> condFltCode EQQ x y
- MO_F_Ne W64 -> condFltCode NE x y
- MO_F_Gt W64 -> condFltCode GTT x y
- MO_F_Ge W64 -> condFltCode GE x y
- MO_F_Lt W64 -> condFltCode LTT x y
- MO_F_Le W64 -> condFltCode LE x y
-
- MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y)
- MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y)
-
- MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y)
- MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y)
- MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y)
- MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y)
-
- MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y)
- MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y)
- MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y)
- MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y)
-
- other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
-
-getCondCode other = panic "getCondCode(2)(powerpc)"
-
-
-#endif
-
-
--- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
--- passed back up the tree.
-
-condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
-
-#if alpha_TARGET_ARCH
-condIntCode = panic "MachCode.condIntCode: not on Alphas"
-condFltCode = panic "MachCode.condFltCode: not on Alphas"
-#endif /* alpha_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-
--- memory vs immediate
-condIntCode cond (CmmLoad x pk) (CmmLit lit) | is32BitLit lit = do
- Amode x_addr x_code <- getAmode x
- let
- imm = litToImm lit
- code = x_code `snocOL`
- CMP (cmmTypeSize pk) (OpImm imm) (OpAddr x_addr)
- --
- return (CondCode False cond code)
-
--- anything vs zero, using a mask
--- TODO: Add some sanity checking!!!!
-condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
- | (CmmLit lit@(CmmInt mask pk2)) <- o2, is32BitLit lit
- = do
- (x_reg, x_code) <- getSomeReg x
- let
- code = x_code `snocOL`
- TEST (intSize pk) (OpImm (ImmInteger mask)) (OpReg x_reg)
- --
- return (CondCode False cond code)
-
--- anything vs zero
-condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
- (x_reg, x_code) <- getSomeReg x
- let
- code = x_code `snocOL`
- TEST (intSize pk) (OpReg x_reg) (OpReg x_reg)
- --
- return (CondCode False cond code)
-
--- anything vs operand
-condIntCode cond x y | isOperand y = do
- (x_reg, x_code) <- getNonClobberedReg x
- (y_op, y_code) <- getOperand y
- let
- code = x_code `appOL` y_code `snocOL`
- CMP (cmmTypeSize (cmmExprType x)) y_op (OpReg x_reg)
- -- in
- return (CondCode False cond code)
-
--- anything vs anything
-condIntCode cond x y = do
- (y_reg, y_code) <- getNonClobberedReg y
- (x_op, x_code) <- getRegOrMem x
- let
- code = y_code `appOL`
- x_code `snocOL`
- CMP (cmmTypeSize (cmmExprType x)) (OpReg y_reg) x_op
- -- in
- return (CondCode False cond code)
-#endif
-
-#if i386_TARGET_ARCH
-condFltCode cond x y
- = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
- (x_reg, x_code) <- getNonClobberedReg x
- (y_reg, y_code) <- getSomeReg y
- let
- code = x_code `appOL` y_code `snocOL`
- GCMP cond x_reg y_reg
- -- The GCMP insn does the test and sets the zero flag if comparable
- -- and true. Hence we always supply EQQ as the condition to test.
- return (CondCode True EQQ code)
-#endif /* i386_TARGET_ARCH */
-
-#if x86_64_TARGET_ARCH
--- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
--- an operand, but the right must be a reg. We can probably do better
--- than this general case...
-condFltCode cond x y = do
- (x_reg, x_code) <- getNonClobberedReg x
- (y_op, y_code) <- getOperand y
- let
- code = x_code `appOL`
- y_code `snocOL`
- CMP (floatSize $ cmmExprWidth x) y_op (OpReg x_reg)
- -- NB(1): we need to use the unsigned comparison operators on the
- -- result of this comparison.
- -- in
- return (CondCode True (condToUnsigned cond) code)
-#endif
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if sparc_TARGET_ARCH
-
-condIntCode cond x (CmmLit (CmmInt y rep))
- | fits13Bits y
- = do
- (src1, code) <- getSomeReg x
- let
- src2 = ImmInt (fromInteger y)
- code' = code `snocOL` SUB False True src1 (RIImm src2) g0
- return (CondCode False cond code')
-
-condIntCode cond x y = do
- (src1, code1) <- getSomeReg x
- (src2, code2) <- getSomeReg y
- let
- code__2 = code1 `appOL` code2 `snocOL`
- SUB False True src1 (RIReg src2) g0
- return (CondCode False cond code__2)
-
------------
-condFltCode cond x y = do
- (src1, code1) <- getSomeReg x
- (src2, code2) <- getSomeReg y
- tmp <- getNewRegNat FF64
- let
- promote x = FxTOy FF32 FF64 x tmp
-
- pk1 = cmmExprType x
- pk2 = cmmExprType y
-
- code__2 =
- if pk1 `cmmEqType` pk2 then
- code1 `appOL` code2 `snocOL`
- FCMP True (cmmTypeSize pk1) src1 src2
- else if typeWidth pk1 == W32 then
- code1 `snocOL` promote src1 `appOL` code2 `snocOL`
- FCMP True FF64 tmp src2
- else
- code1 `appOL` code2 `snocOL` promote src2 `snocOL`
- FCMP True FF64 src1 tmp
- return (CondCode True cond code__2)
-
-#endif /* sparc_TARGET_ARCH */
-
-#if powerpc_TARGET_ARCH
--- ###FIXME: I16 and I8!
-condIntCode cond x (CmmLit (CmmInt y rep))
- | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
- = do
- (src1, code) <- getSomeReg x
- let
- code' = code `snocOL`
- (if condUnsigned cond then CMPL else CMP) II32 src1 (RIImm src2)
- return (CondCode False cond code')
-
-condIntCode cond x y = do
- (src1, code1) <- getSomeReg x
- (src2, code2) <- getSomeReg y
- let
- code' = code1 `appOL` code2 `snocOL`
- (if condUnsigned cond then CMPL else CMP) II32 src1 (RIReg src2)
- return (CondCode False cond code')
-
-condFltCode cond x y = do
- (src1, code1) <- getSomeReg x
- (src2, code2) <- getSomeReg y
- let
- code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
- code'' = case cond of -- twiddle CR to handle unordered case
- GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
- LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
- _ -> code'
- where
- ltbit = 0 ; eqbit = 2 ; gtbit = 1
- return (CondCode True cond code'')
-
-#endif /* powerpc_TARGET_ARCH */
-
--- -----------------------------------------------------------------------------
--- Generating assignments
-
--- Assignments are really at the heart of the whole code generation
--- business. Almost all top-level nodes of any real importance are
--- assignments, which correspond to loads, stores, or register
--- transfers. If we're really lucky, some of the register transfers
--- will go away, because we can use the destination register to
--- complete the code generation for the right hand side. This only
--- fails when the right hand side is forced into a fixed register
--- (e.g. the result of a call).
-
-assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
-assignReg_IntCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
-
-assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
-assignReg_FltCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if alpha_TARGET_ARCH
-
-assignIntCode pk (CmmLoad dst _) src
- = getNewRegNat IntRep `thenNat` \ tmp ->
- getAmode dst `thenNat` \ amode ->
- getRegister src `thenNat` \ register ->
- let
- code1 = amodeCode amode []
- dst__2 = amodeAddr amode
- code2 = registerCode register tmp []
- src__2 = registerName register tmp
- sz = primRepToSize pk
- code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
- in
- return code__2
-
-assignIntCode pk dst src
- = getRegister dst `thenNat` \ register1 ->
- getRegister src `thenNat` \ register2 ->
- let
- dst__2 = registerName register1 zeroh
- code = registerCode register2 dst__2
- src__2 = registerName register2 dst__2
- code__2 = if isFixed register2
- then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
- else code
- in
- return code__2
-
-#endif /* alpha_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-
--- integer assignment to memory
-
--- specific case of adding/subtracting an integer to a particular address.
--- ToDo: catch other cases where we can use an operation directly on a memory
--- address.
-assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
- CmmLit (CmmInt i _)])
- | addr == addr2, pk /= II64 || is32BitInteger i,
- Just instr <- check op
- = do Amode amode code_addr <- getAmode addr
- let code = code_addr `snocOL`
- instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
- return code
- where
- check (MO_Add _) = Just ADD
- check (MO_Sub _) = Just SUB
- check _ = Nothing
- -- ToDo: more?
-
--- general case
-assignMem_IntCode pk addr src = do
- Amode addr code_addr <- getAmode addr
- (code_src, op_src) <- get_op_RI src
- let
- code = code_src `appOL`
- code_addr `snocOL`
- MOV pk op_src (OpAddr addr)
- -- NOTE: op_src is stable, so it will still be valid
- -- after code_addr. This may involve the introduction
- -- of an extra MOV to a temporary register, but we hope
- -- the register allocator will get rid of it.
- --
- return code
- where
- get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
- get_op_RI (CmmLit lit) | is32BitLit lit
- = return (nilOL, OpImm (litToImm lit))
- get_op_RI op
- = do (reg,code) <- getNonClobberedReg op
- return (code, OpReg reg)
-
-
--- Assign; dst is a reg, rhs is mem
-assignReg_IntCode pk reg (CmmLoad src _) = do
- load_code <- intLoadCode (MOV pk) src
- return (load_code (getRegisterReg reg))
-
--- dst is a reg, but src could be anything
-assignReg_IntCode pk reg src = do
- code <- getAnyReg src
- return (code (getRegisterReg reg))
-
-#endif /* i386_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if sparc_TARGET_ARCH
-
-assignMem_IntCode pk addr src = do
- (srcReg, code) <- getSomeReg src
- Amode dstAddr addr_code <- getAmode addr
- return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
-
-assignReg_IntCode pk reg src = do
- r <- getRegister src
- return $ case r of
- Any _ code -> code dst
- Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg freg) dst
- where
- dst = getRegisterReg reg
-
-
-#endif /* sparc_TARGET_ARCH */
-
-#if powerpc_TARGET_ARCH
-
-assignMem_IntCode pk addr src = do
- (srcReg, code) <- getSomeReg src
- Amode dstAddr addr_code <- getAmode addr
- return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
-
--- dst is a reg, but src could be anything
-assignReg_IntCode pk reg src
- = do
- r <- getRegister src
- return $ case r of
- Any _ code -> code dst
- Fixed _ freg fcode -> fcode `snocOL` MR dst freg
- where
- dst = getRegisterReg reg
-
-#endif /* powerpc_TARGET_ARCH */
-
-
--- -----------------------------------------------------------------------------
--- Floating-point assignments
-
-#if alpha_TARGET_ARCH
-
-assignFltCode pk (CmmLoad dst _) src
- = getNewRegNat pk `thenNat` \ tmp ->
- getAmode dst `thenNat` \ amode ->
- getRegister src `thenNat` \ register ->
- let
- code1 = amodeCode amode []
- dst__2 = amodeAddr amode
- code2 = registerCode register tmp []
- src__2 = registerName register tmp
- sz = primRepToSize pk
- code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
- in
- return code__2
-
-assignFltCode pk dst src
- = getRegister dst `thenNat` \ register1 ->
- getRegister src `thenNat` \ register2 ->
- let
- dst__2 = registerName register1 zeroh
- code = registerCode register2 dst__2
- src__2 = registerName register2 dst__2
- code__2 = if isFixed register2
- then code . mkSeqInstr (FMOV src__2 dst__2)
- else code
- in
- return code__2
-
-#endif /* alpha_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-
--- Floating point assignment to memory
-assignMem_FltCode pk addr src = do
- (src_reg, src_code) <- getNonClobberedReg src
- Amode addr addr_code <- getAmode addr
- let
- code = src_code `appOL`
- addr_code `snocOL`
- IF_ARCH_i386(GST pk src_reg addr,
- MOV pk (OpReg src_reg) (OpAddr addr))
- return code
-
--- Floating point assignment to a register/temporary
-assignReg_FltCode pk reg src = do
- src_code <- getAnyReg src
- return (src_code (getRegisterReg reg))
-
-#endif /* i386_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if sparc_TARGET_ARCH
-
--- Floating point assignment to memory
-assignMem_FltCode pk addr src = do
- Amode dst__2 code1 <- getAmode addr
- (src__2, code2) <- getSomeReg src
- tmp1 <- getNewRegNat pk
- let
- pk__2 = cmmExprType src
- code__2 = code1 `appOL` code2 `appOL`
- if sizeToWidth pk == typeWidth pk__2
- then unitOL (ST pk src__2 dst__2)
- else toOL [ FxTOy (cmmTypeSize pk__2) pk src__2 tmp1
- , ST pk tmp1 dst__2]
- return code__2
-
--- Floating point assignment to a register/temporary
-assignReg_FltCode pk dstCmmReg srcCmmExpr = do
- srcRegister <- getRegister srcCmmExpr
- let dstReg = getRegisterReg dstCmmReg
-
- return $ case srcRegister of
- Any _ code -> code dstReg
- Fixed _ srcFixedReg srcCode -> srcCode `snocOL` FMOV pk srcFixedReg dstReg
-
-#endif /* sparc_TARGET_ARCH */
-
-#if powerpc_TARGET_ARCH
-
--- Easy, isn't it?
-assignMem_FltCode = assignMem_IntCode
-assignReg_FltCode = assignReg_IntCode
-
-#endif /* powerpc_TARGET_ARCH */
-
-
--- -----------------------------------------------------------------------------
--- Generating an non-local jump
-
--- (If applicable) Do not fill the delay slots here; you will confuse the
--- register allocator.
-
-genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if alpha_TARGET_ARCH
-
-genJump (CmmLabel lbl)
- | isAsmTemp lbl = returnInstr (BR target)
- | otherwise = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
- where
- target = ImmCLbl lbl
-
-genJump tree
- = getRegister tree `thenNat` \ register ->
- getNewRegNat PtrRep `thenNat` \ tmp ->
- let
- dst = registerName register pv
- code = registerCode register pv
- target = registerName register pv
- in
- if isFixed register then
- returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
- else
- return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
-
-#endif /* alpha_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-
-genJump (CmmLoad mem pk) = do
- Amode target code <- getAmode mem
- return (code `snocOL` JMP (OpAddr target))
-
-genJump (CmmLit lit) = do
- return (unitOL (JMP (OpImm (litToImm lit))))
-
-genJump expr = do
- (reg,code) <- getSomeReg expr
- return (code `snocOL` JMP (OpReg reg))
-
-#endif /* i386_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if sparc_TARGET_ARCH
-
-genJump (CmmLit (CmmLabel lbl))
- = return (toOL [CALL (Left target) 0 True, NOP])
- where
- target = ImmCLbl lbl
-
-genJump tree
- = do
- (target, code) <- getSomeReg tree
- return (code `snocOL` JMP (AddrRegReg target g0) `snocOL` NOP)
-
-#endif /* sparc_TARGET_ARCH */
-
-#if powerpc_TARGET_ARCH
-genJump (CmmLit (CmmLabel lbl))
- = return (unitOL $ JMP lbl)
-
-genJump tree
- = do
- (target,code) <- getSomeReg tree
- return (code `snocOL` MTCTR target `snocOL` BCTR [])
-#endif /* powerpc_TARGET_ARCH */
-
-
--- -----------------------------------------------------------------------------
--- Unconditional branches
-
-genBranch :: BlockId -> NatM InstrBlock
-
-genBranch = return . toOL . mkBranchInstr
-
--- -----------------------------------------------------------------------------
--- Conditional jumps
-
-{-
-Conditional jumps are always to local labels, so we can use branch
-instructions. We peek at the arguments to decide what kind of
-comparison to do.
-
-ALPHA: For comparisons with 0, we're laughing, because we can just do
-the desired conditional branch.
-
-I386: First, we have to ensure that the condition
-codes are set according to the supplied comparison operation.
-
-SPARC: First, we have to ensure that the condition codes are set
-according to the supplied comparison operation. We generate slightly
-different code for floating point comparisons, because a floating
-point operation cannot directly precede a @BF@. We assume the worst
-and fill that slot with a @NOP@.
-
-SPARC: Do not fill the delay slots here; you will confuse the register
-allocator.
--}
-
-
-genCondJump
- :: BlockId -- the branch target
- -> CmmExpr -- the condition on which to branch
- -> NatM InstrBlock
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if alpha_TARGET_ARCH
-
-genCondJump id (StPrim op [x, StInt 0])
- = getRegister x `thenNat` \ register ->
- getNewRegNat (registerRep register)
- `thenNat` \ tmp ->
- let
- code = registerCode register tmp
- value = registerName register tmp
- pk = registerRep register
- target = ImmCLbl lbl
- in
- returnSeq code [BI (cmpOp op) value target]
- where
- cmpOp CharGtOp = GTT
- cmpOp CharGeOp = GE
- cmpOp CharEqOp = EQQ
- cmpOp CharNeOp = NE
- cmpOp CharLtOp = LTT
- cmpOp CharLeOp = LE
- cmpOp IntGtOp = GTT
- cmpOp IntGeOp = GE
- cmpOp IntEqOp = EQQ
- cmpOp IntNeOp = NE
- cmpOp IntLtOp = LTT
- cmpOp IntLeOp = LE
- cmpOp WordGtOp = NE
- cmpOp WordGeOp = ALWAYS
- cmpOp WordEqOp = EQQ
- cmpOp WordNeOp = NE
- cmpOp WordLtOp = NEVER
- cmpOp WordLeOp = EQQ
- cmpOp AddrGtOp = NE
- cmpOp AddrGeOp = ALWAYS
- cmpOp AddrEqOp = EQQ
- cmpOp AddrNeOp = NE
- cmpOp AddrLtOp = NEVER
- cmpOp AddrLeOp = EQQ
-
-genCondJump lbl (StPrim op [x, StDouble 0.0])
- = getRegister x `thenNat` \ register ->
- getNewRegNat (registerRep register)
- `thenNat` \ tmp ->
- let
- code = registerCode register tmp
- value = registerName register tmp
- pk = registerRep register
- target = ImmCLbl lbl
- in
- return (code . mkSeqInstr (BF (cmpOp op) value target))
- where
- cmpOp FloatGtOp = GTT
- cmpOp FloatGeOp = GE
- cmpOp FloatEqOp = EQQ
- cmpOp FloatNeOp = NE
- cmpOp FloatLtOp = LTT
- cmpOp FloatLeOp = LE
- cmpOp DoubleGtOp = GTT
- cmpOp DoubleGeOp = GE
- cmpOp DoubleEqOp = EQQ
- cmpOp DoubleNeOp = NE
- cmpOp DoubleLtOp = LTT
- cmpOp DoubleLeOp = LE
-
-genCondJump lbl (StPrim op [x, y])
- | fltCmpOp op
- = trivialFCode pr instr x y `thenNat` \ register ->
- getNewRegNat FF64 `thenNat` \ tmp ->
- let
- code = registerCode register tmp
- result = registerName register tmp
- target = ImmCLbl lbl
- in
- return (code . mkSeqInstr (BF cond result target))
- where
- pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
-
- fltCmpOp op = case op of
- FloatGtOp -> True
- FloatGeOp -> True
- FloatEqOp -> True
- FloatNeOp -> True
- FloatLtOp -> True
- FloatLeOp -> True
- DoubleGtOp -> True
- DoubleGeOp -> True
- DoubleEqOp -> True
- DoubleNeOp -> True
- DoubleLtOp -> True
- DoubleLeOp -> True
- _ -> False
- (instr, cond) = case op of
- FloatGtOp -> (FCMP TF LE, EQQ)
- FloatGeOp -> (FCMP TF LTT, EQQ)
- FloatEqOp -> (FCMP TF EQQ, NE)
- FloatNeOp -> (FCMP TF EQQ, EQQ)
- FloatLtOp -> (FCMP TF LTT, NE)
- FloatLeOp -> (FCMP TF LE, NE)
- DoubleGtOp -> (FCMP TF LE, EQQ)
- DoubleGeOp -> (FCMP TF LTT, EQQ)
- DoubleEqOp -> (FCMP TF EQQ, NE)
- DoubleNeOp -> (FCMP TF EQQ, EQQ)
- DoubleLtOp -> (FCMP TF LTT, NE)
- DoubleLeOp -> (FCMP TF LE, NE)
-
-genCondJump lbl (StPrim op [x, y])
- = trivialCode instr x y `thenNat` \ register ->
- getNewRegNat IntRep `thenNat` \ tmp ->
- let
- code = registerCode register tmp
- result = registerName register tmp
- target = ImmCLbl lbl
- in
- return (code . mkSeqInstr (BI cond result target))
- where
- (instr, cond) = case op of
- CharGtOp -> (CMP LE, EQQ)
- CharGeOp -> (CMP LTT, EQQ)
- CharEqOp -> (CMP EQQ, NE)
- CharNeOp -> (CMP EQQ, EQQ)
- CharLtOp -> (CMP LTT, NE)
- CharLeOp -> (CMP LE, NE)
- IntGtOp -> (CMP LE, EQQ)
- IntGeOp -> (CMP LTT, EQQ)
- IntEqOp -> (CMP EQQ, NE)
- IntNeOp -> (CMP EQQ, EQQ)
- IntLtOp -> (CMP LTT, NE)
- IntLeOp -> (CMP LE, NE)
- WordGtOp -> (CMP ULE, EQQ)
- WordGeOp -> (CMP ULT, EQQ)
- WordEqOp -> (CMP EQQ, NE)
- WordNeOp -> (CMP EQQ, EQQ)
- WordLtOp -> (CMP ULT, NE)
- WordLeOp -> (CMP ULE, NE)
- AddrGtOp -> (CMP ULE, EQQ)
- AddrGeOp -> (CMP ULT, EQQ)
- AddrEqOp -> (CMP EQQ, NE)
- AddrNeOp -> (CMP EQQ, EQQ)
- AddrLtOp -> (CMP ULT, NE)
- AddrLeOp -> (CMP ULE, NE)
-
-#endif /* alpha_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if i386_TARGET_ARCH
-
-genCondJump id bool = do
- CondCode _ cond code <- getCondCode bool
- return (code `snocOL` JXX cond id)
-
-#endif
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if x86_64_TARGET_ARCH
-
-genCondJump id bool = do
- CondCode is_float cond cond_code <- getCondCode bool
- if not is_float
- then
- return (cond_code `snocOL` JXX cond id)
- else do
- lbl <- getBlockIdNat
-
- -- see comment with condFltReg
- let code = case cond of
- NE -> or_unordered
- GU -> plain_test
- GEU -> plain_test
- _ -> and_ordered
-
- plain_test = unitOL (
- JXX cond id
- )
- or_unordered = toOL [
- JXX cond id,
- JXX PARITY id
- ]
- and_ordered = toOL [
- JXX PARITY lbl,
- JXX cond id,
- JXX ALWAYS lbl,
- NEWBLOCK lbl
- ]
- return (cond_code `appOL` code)
-
-#endif
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if sparc_TARGET_ARCH
-
-genCondJump bid bool = do
- CondCode is_float cond code <- getCondCode bool
- return (
- code `appOL`
- toOL (
- if is_float
- then [NOP, BF cond False bid, NOP]
- else [BI cond False bid, NOP]
- )
- )
-
-#endif /* sparc_TARGET_ARCH */
-
-
-#if powerpc_TARGET_ARCH
-
-genCondJump id bool = do
- CondCode is_float cond code <- getCondCode bool
- return (code `snocOL` BCC cond id)
-
-#endif /* powerpc_TARGET_ARCH */
-
-
--- -----------------------------------------------------------------------------
--- Generating C calls
-
--- Now the biggest nightmare---calls. Most of the nastiness is buried in
--- @get_arg@, which moves the arguments to the correct registers/stack
--- locations. Apart from that, the code is easy.
---
--- (If applicable) Do not fill the delay slots here; you will confuse the
--- register allocator.
-
-genCCall
- :: CmmCallTarget -- function to call
- -> HintedCmmFormals -- where to put the result
- -> HintedCmmActuals -- arguments (of mixed type)
- -> NatM InstrBlock
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if alpha_TARGET_ARCH
-
-ccallResultRegs =
-
-genCCall fn cconv result_regs args
- = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
- `thenNat` \ ((unused,_), argCode) ->
- let
- nRegs = length allArgRegs - length unused
- code = asmSeqThen (map ($ []) argCode)
- in
- returnSeq code [
- LDA pv (AddrImm (ImmLab (ptext fn))),
- JSR ra (AddrReg pv) nRegs,
- LDGP gp (AddrReg ra)]
- where
- ------------------------
- {- Try to get a value into a specific register (or registers) for
- a call. The first 6 arguments go into the appropriate
- argument register (separate registers for integer and floating
- point arguments, but used in lock-step), and the remaining
- arguments are dumped to the stack, beginning at 0(sp). Our
- first argument is a pair of the list of remaining argument
- registers to be assigned for this call and the next stack
- offset to use for overflowing arguments. This way,
- @get_Arg@ can be applied to all of a call's arguments using
- @mapAccumLNat@.
- -}
- get_arg
- :: ([(Reg,Reg)], Int) -- Argument registers and stack offset (accumulator)
- -> StixTree -- Current argument
- -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
-
- -- We have to use up all of our argument registers first...
-
- get_arg ((iDst,fDst):dsts, offset) arg
- = getRegister arg `thenNat` \ register ->
- let
- reg = if isFloatType pk then fDst else iDst
- code = registerCode register reg
- src = registerName register reg
- pk = registerRep register
- in
- return (
- if isFloatType pk then
- ((dsts, offset), if isFixed register then
- code . mkSeqInstr (FMOV src fDst)
- else code)
- else
- ((dsts, offset), if isFixed register then
- code . mkSeqInstr (OR src (RIReg src) iDst)
- else code))
-
- -- Once we have run out of argument registers, we move to the
- -- stack...
-
- get_arg ([], offset) arg
- = getRegister arg `thenNat` \ register ->
- getNewRegNat (registerRep register)
- `thenNat` \ tmp ->
- let
- code = registerCode register tmp
- src = registerName register tmp
- pk = registerRep register
- sz = primRepToSize pk
- in
- return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
-
-#endif /* alpha_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if i386_TARGET_ARCH
-
-genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
- -- write barrier compiles to no code on x86/x86-64;
- -- we keep it this long in order to prevent earlier optimisations.
-
--- we only cope with a single result for foreign calls
-genCCall (CmmPrim op) [CmmHinted r _] args = do
- l1 <- getNewLabelNat
- l2 <- getNewLabelNat
- case op of
- MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
- MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
-
- MO_F32_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args
- MO_F64_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args
-
- MO_F32_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args
- MO_F64_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args
-
- MO_F32_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
- MO_F64_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
-
- other_op -> outOfLineFloatOp op r args
- where
- actuallyInlineFloatOp instr size [CmmHinted x _]
- = do res <- trivialUFCode size (instr size) x
- any <- anyReg res
- return (any (getRegisterReg (CmmLocal r)))
-
-genCCall target dest_regs args = do
- let
- sizes = map (arg_size . cmmExprType . hintlessCmm) (reverse args)
-#if !darwin_TARGET_OS
- tot_arg_size = sum sizes
-#else
- raw_arg_size = sum sizes
- tot_arg_size = roundTo 16 raw_arg_size
- arg_pad_size = tot_arg_size - raw_arg_size
- delta0 <- getDeltaNat
- setDeltaNat (delta0 - arg_pad_size)
-#endif
-
- push_codes <- mapM push_arg (reverse args)
- delta <- getDeltaNat
-
- -- in
- -- deal with static vs dynamic call targets
- (callinsns,cconv) <-
- case target of
- -- CmmPrim -> ...
- CmmCallee (CmmLit (CmmLabel lbl)) conv
- -> -- ToDo: stdcall arg sizes
- return (unitOL (CALL (Left fn_imm) []), conv)
- where fn_imm = ImmCLbl lbl
- CmmCallee expr conv
- -> do { (dyn_c, dyn_r) <- get_op expr
- ; ASSERT( isWord32 (cmmExprType expr) )
- return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
-
- let push_code
-#if darwin_TARGET_OS
- | arg_pad_size /= 0
- = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
- DELTA (delta0 - arg_pad_size)]
- `appOL` concatOL push_codes
- | otherwise
-#endif
- = concatOL push_codes
- call = callinsns `appOL`
- toOL (
- -- Deallocate parameters after call for ccall;
- -- but not for stdcall (callee does it)
- (if cconv == StdCallConv || tot_arg_size==0 then [] else
- [ADD II32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
- ++
- [DELTA (delta + tot_arg_size)]
- )
- -- in
- setDeltaNat (delta + tot_arg_size)
-
- let
- -- assign the results, if necessary
- assign_code [] = nilOL
- assign_code [CmmHinted dest _hint]
- | isFloatType ty = unitOL (GMOV fake0 r_dest)
- | isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
- MOV II32 (OpReg edx) (OpReg r_dest_hi)]
- | otherwise = unitOL (MOV (intSize w) (OpReg eax) (OpReg r_dest))
- where
- ty = localRegType dest
- w = typeWidth ty
- r_dest_hi = getHiVRegFromLo r_dest
- r_dest = getRegisterReg (CmmLocal dest)
- assign_code many = panic "genCCall.assign_code many"
-
- return (push_code `appOL`
- call `appOL`
- assign_code dest_regs)
-
- where
- arg_size :: CmmType -> Int -- Width in bytes
- arg_size ty = widthInBytes (typeWidth ty)
-
- roundTo a x | x `mod` a == 0 = x
- | otherwise = x + a - (x `mod` a)
-
-
- push_arg :: HintedCmmActual {-current argument-}
- -> NatM InstrBlock -- code
-
- push_arg (CmmHinted arg _hint) -- we don't need the hints on x86
- | isWord64 arg_ty = do
- ChildCode64 code r_lo <- iselExpr64 arg
- delta <- getDeltaNat
- setDeltaNat (delta - 8)
- let
- r_hi = getHiVRegFromLo r_lo
- -- in
- return ( code `appOL`
- toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
- PUSH II32 (OpReg r_lo), DELTA (delta - 8),
- DELTA (delta-8)]
- )
-
- | otherwise = do
- (code, reg) <- get_op arg
- delta <- getDeltaNat
- let size = arg_size arg_ty -- Byte size
- setDeltaNat (delta-size)
- if (isFloatType arg_ty)
- then return (code `appOL`
- toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
- DELTA (delta-size),
- GST (floatSize (typeWidth arg_ty))
- reg (AddrBaseIndex (EABaseReg esp)
- EAIndexNone
- (ImmInt 0))]
- )
- else return (code `snocOL`
- PUSH II32 (OpReg reg) `snocOL`
- DELTA (delta-size)
- )
- where
- arg_ty = cmmExprType arg
-
- ------------
- get_op :: CmmExpr -> NatM (InstrBlock, Reg) -- code, reg
- get_op op = do
- (reg,code) <- getSomeReg op
- return (code, reg)
-
-#endif /* i386_TARGET_ARCH */
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-
-outOfLineFloatOp :: CallishMachOp -> CmmFormal -> HintedCmmActuals
- -> NatM InstrBlock
-outOfLineFloatOp mop res args
- = do
- dflags <- getDynFlagsNat
- targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
- let target = CmmCallee targetExpr CCallConv
-
- if isFloat64 (localRegType res)
- then
- stmtToInstrs (CmmCall target [CmmHinted res NoHint] args CmmUnsafe CmmMayReturn)
- else do
- uq <- getUniqueNat
- let
- tmp = LocalReg uq f64
- -- in
- code1 <- stmtToInstrs (CmmCall target [CmmHinted tmp NoHint] args CmmUnsafe CmmMayReturn)
- code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
- return (code1 `appOL` code2)
- where
- lbl = mkForeignLabel fn Nothing False IsFunction
-
- fn = case mop of
- MO_F32_Sqrt -> fsLit "sqrtf"
- MO_F32_Sin -> fsLit "sinf"
- MO_F32_Cos -> fsLit "cosf"
- MO_F32_Tan -> fsLit "tanf"
- MO_F32_Exp -> fsLit "expf"
- MO_F32_Log -> fsLit "logf"
-
- MO_F32_Asin -> fsLit "asinf"
- MO_F32_Acos -> fsLit "acosf"
- MO_F32_Atan -> fsLit "atanf"
-
- MO_F32_Sinh -> fsLit "sinhf"
- MO_F32_Cosh -> fsLit "coshf"
- MO_F32_Tanh -> fsLit "tanhf"
- MO_F32_Pwr -> fsLit "powf"
-
- MO_F64_Sqrt -> fsLit "sqrt"
- MO_F64_Sin -> fsLit "sin"
- MO_F64_Cos -> fsLit "cos"
- MO_F64_Tan -> fsLit "tan"
- MO_F64_Exp -> fsLit "exp"
- MO_F64_Log -> fsLit "log"
-
- MO_F64_Asin -> fsLit "asin"
- MO_F64_Acos -> fsLit "acos"
- MO_F64_Atan -> fsLit "atan"
-
- MO_F64_Sinh -> fsLit "sinh"
- MO_F64_Cosh -> fsLit "cosh"
- MO_F64_Tanh -> fsLit "tanh"
- MO_F64_Pwr -> fsLit "pow"
-
-#endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if x86_64_TARGET_ARCH
-
-genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
- -- write barrier compiles to no code on x86/x86-64;
- -- we keep it this long in order to prevent earlier optimisations.
-
-
-genCCall (CmmPrim op) [CmmHinted r _] args =
- outOfLineFloatOp op r args
-
-genCCall target dest_regs args = do
-
- -- load up the register arguments
- (stack_args, aregs, fregs, load_args_code)
- <- load_args args allArgRegs allFPArgRegs nilOL
-
- let
- fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs))
- int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
- arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
- -- for annotating the call instruction with
-
- sse_regs = length fp_regs_used
-
- tot_arg_size = arg_size * length stack_args
-
- -- On entry to the called function, %rsp should be aligned
- -- on a 16-byte boundary +8 (i.e. the first stack arg after
- -- the return address is 16-byte aligned). In STG land
- -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
- -- need to make sure we push a multiple of 16-bytes of args,
- -- plus the return address, to get the correct alignment.
- -- Urg, this is hard. We need to feed the delta back into
- -- the arg pushing code.
- (real_size, adjust_rsp) <-
- if tot_arg_size `rem` 16 == 0
- then return (tot_arg_size, nilOL)
- else do -- we need to adjust...
- delta <- getDeltaNat
- setDeltaNat (delta-8)
- return (tot_arg_size+8, toOL [
- SUB II64 (OpImm (ImmInt 8)) (OpReg rsp),
- DELTA (delta-8)
- ])
-
- -- push the stack args, right to left
- push_code <- push_args (reverse stack_args) nilOL
- delta <- getDeltaNat
-
- -- deal with static vs dynamic call targets
- (callinsns,cconv) <-
- case target of
- -- CmmPrim -> ...
- CmmCallee (CmmLit (CmmLabel lbl)) conv
- -> -- ToDo: stdcall arg sizes
- return (unitOL (CALL (Left fn_imm) arg_regs), conv)
- where fn_imm = ImmCLbl lbl
- CmmCallee expr conv
- -> do (dyn_r, dyn_c) <- getSomeReg expr
- return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
-
- let
- -- The x86_64 ABI requires us to set %al to the number of SSE
- -- registers that contain arguments, if the called routine
- -- is a varargs function. We don't know whether it's a
- -- varargs function or not, so we have to assume it is.
- --
- -- It's not safe to omit this assignment, even if the number
- -- of SSE regs in use is zero. If %al is larger than 8
- -- on entry to a varargs function, seg faults ensue.
- assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
-
- let call = callinsns `appOL`
- toOL (
- -- Deallocate parameters after call for ccall;
- -- but not for stdcall (callee does it)
- (if cconv == StdCallConv || real_size==0 then [] else
- [ADD (intSize wordWidth) (OpImm (ImmInt real_size)) (OpReg esp)])
- ++
- [DELTA (delta + real_size)]
- )
- -- in
- setDeltaNat (delta + real_size)
-
- let
- -- assign the results, if necessary
- assign_code [] = nilOL
- assign_code [CmmHinted dest _hint] =
- case typeWidth rep of
- W32 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
- W64 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
- _ -> unitOL (MOV (cmmTypeSize rep) (OpReg rax) (OpReg r_dest))
- where
- rep = localRegType dest
- r_dest = getRegisterReg (CmmLocal dest)
- assign_code many = panic "genCCall.assign_code many"
-
- return (load_args_code `appOL`
- adjust_rsp `appOL`
- push_code `appOL`
- assign_eax sse_regs `appOL`
- call `appOL`
- assign_code dest_regs)
-
- where
- arg_size = 8 -- always, at the mo
-
- load_args :: [CmmHinted CmmExpr]
- -> [Reg] -- int regs avail for args
- -> [Reg] -- FP regs avail for args
- -> InstrBlock
- -> NatM ([CmmHinted CmmExpr],[Reg],[Reg],InstrBlock)
- load_args args [] [] code = return (args, [], [], code)
- -- no more regs to use
- load_args [] aregs fregs code = return ([], aregs, fregs, code)
- -- no more args to push
- load_args ((CmmHinted arg hint) : rest) aregs fregs code
- | isFloatType arg_rep =
- case fregs of
- [] -> push_this_arg
- (r:rs) -> do
- arg_code <- getAnyReg arg
- load_args rest aregs rs (code `appOL` arg_code r)
- | otherwise =
- case aregs of
- [] -> push_this_arg
- (r:rs) -> do
- arg_code <- getAnyReg arg
- load_args rest rs fregs (code `appOL` arg_code r)
- where
- arg_rep = cmmExprType arg
-
- push_this_arg = do
- (args',ars,frs,code') <- load_args rest aregs fregs code
- return ((CmmHinted arg hint):args', ars, frs, code')
-
- push_args [] code = return code
- push_args ((CmmHinted arg hint):rest) code
- | isFloatType arg_rep = do
- (arg_reg, arg_code) <- getSomeReg arg
- delta <- getDeltaNat
- setDeltaNat (delta-arg_size)
- let code' = code `appOL` arg_code `appOL` toOL [
- SUB (intSize wordWidth) (OpImm (ImmInt arg_size)) (OpReg rsp) ,
- DELTA (delta-arg_size),
- MOV (floatSize width) (OpReg arg_reg) (OpAddr (spRel 0))]
- push_args rest code'
-
- | otherwise = do
- -- we only ever generate word-sized function arguments. Promotion
- -- has already happened: our Int8# type is kept sign-extended
- -- in an Int#, for example.
- ASSERT(width == W64) return ()
- (arg_op, arg_code) <- getOperand arg
- delta <- getDeltaNat
- setDeltaNat (delta-arg_size)
- let code' = code `appOL` arg_code `appOL` toOL [
- PUSH II64 arg_op,
- DELTA (delta-arg_size)]
- push_args rest code'
- where
- arg_rep = cmmExprType arg
- width = typeWidth arg_rep
-#endif
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if sparc_TARGET_ARCH
-{-
- The SPARC calling convention is an absolute
- nightmare. The first 6x32 bits of arguments are mapped into
- %o0 through %o5, and the remaining arguments are dumped to the
- stack, beginning at [%sp+92]. (Note that %o6 == %sp.)
-
- If we have to put args on the stack, move %o6==%sp down by
- the number of words to go on the stack, to ensure there's enough space.
-
- According to Fraser and Hanson's lcc book, page 478, fig 17.2,
- 16 words above the stack pointer is a word for the address of
- a structure return value. I use this as a temporary location
- for moving values from float to int regs. Certainly it isn't
- safe to put anything in the 16 words starting at %sp, since
- this area can get trashed at any time due to window overflows
- caused by signal handlers.
-
- A final complication (if the above isn't enough) is that
- we can't blithely calculate the arguments one by one into
- %o0 .. %o5. Consider the following nested calls:
-
- fff a (fff b c)
-
- Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately
- the inner call will itself use %o0, which trashes the value put there
- in preparation for the outer call. Upshot: we need to calculate the
- args into temporary regs, and move those to arg regs or onto the
- stack only immediately prior to the call proper. Sigh.
-
-genCCall
- :: CmmCallTarget -- function to call
- -> HintedCmmFormals -- where to put the result
- -> HintedCmmActuals -- arguments (of mixed type)
- -> NatM InstrBlock
-
--}
-
-
--- On SPARC under TSO (Total Store Ordering), writes earlier in the instruction stream
--- are guaranteed to take place before writes afterwards (unlike on PowerPC).
--- Ref: Section 8.4 of the SPARC V9 Architecture manual.
---
--- In the SPARC case we don't need a barrier.
---
-genCCall (CmmPrim (MO_WriteBarrier)) _ _
- = do return nilOL
-
-genCCall target dest_regs argsAndHints
- = do
- -- strip hints from the arg regs
- let args :: [CmmExpr]
- args = map hintlessCmm argsAndHints
-
-
- -- work out the arguments, and assign them to integer regs
- argcode_and_vregs <- mapM arg_to_int_vregs args
- let (argcodes, vregss) = unzip argcode_and_vregs
- let vregs = concat vregss
-
- let n_argRegs = length allArgRegs
- let n_argRegs_used = min (length vregs) n_argRegs
-
-
- -- deal with static vs dynamic call targets
- callinsns <- case target of
- CmmCallee (CmmLit (CmmLabel lbl)) conv ->
- return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
-
- CmmCallee expr conv
- -> do (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
- return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
-
- CmmPrim mop
- -> do res <- outOfLineFloatOp mop
- lblOrMopExpr <- case res of
- Left lbl -> do
- return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
-
- Right mopExpr -> do
- (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
- return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
-
- return lblOrMopExpr
-
- let argcode = concatOL argcodes
-
- let (move_sp_down, move_sp_up)
- = let diff = length vregs - n_argRegs
- nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment
- in if nn <= 0
- then (nilOL, nilOL)
- else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
-
- let transfer_code
- = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
-
- return
- $ argcode `appOL`
- move_sp_down `appOL`
- transfer_code `appOL`
- callinsns `appOL`
- unitOL NOP `appOL`
- move_sp_up `appOL`
- assign_code dest_regs
-
-
--- | Generate code to calculate an argument, and move it into one
--- or two integer vregs.
-arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
-arg_to_int_vregs arg
-
- -- If the expr produces a 64 bit int, then we can just use iselExpr64
- | isWord64 (cmmExprType arg)
- = do (ChildCode64 code r_lo) <- iselExpr64 arg
- let r_hi = getHiVRegFromLo r_lo
- return (code, [r_hi, r_lo])
-
- | otherwise
- = do (src, code) <- getSomeReg arg
- tmp <- getNewRegNat (cmmTypeSize $ cmmExprType arg)
- let pk = cmmExprType arg
-
- case cmmTypeSize pk of
-
- -- Load a 64 bit float return value into two integer regs.
- FF64 -> do
- v1 <- getNewRegNat II32
- v2 <- getNewRegNat II32
-
- let Just f0_high = fPair f0
-
- let code2 =
- code `snocOL`
- FMOV FF64 src f0 `snocOL`
- ST FF32 f0 (spRel 16) `snocOL`
- LD II32 (spRel 16) v1 `snocOL`
- ST FF32 f0_high (spRel 16) `snocOL`
- LD II32 (spRel 16) v2
-
- return (code2, [v1,v2])
-
- -- Load a 32 bit float return value into an integer reg
- FF32 -> do
- v1 <- getNewRegNat II32
-
- let code2 =
- code `snocOL`
- ST FF32 src (spRel 16) `snocOL`
- LD II32 (spRel 16) v1
-
- return (code2, [v1])
-
- -- Move an integer return value into its destination reg.
- other -> do
- v1 <- getNewRegNat II32
-
- let code2 =
- code `snocOL`
- OR False g0 (RIReg src) v1
-
- return (code2, [v1])
-
-
--- | Move args from the integer vregs into which they have been
--- marshalled, into %o0 .. %o5, and the rest onto the stack.
---
-move_final :: [Reg] -> [Reg] -> Int -> [Instr]
-
--- all args done
-move_final [] _ offset
- = []
-
--- out of aregs; move to stack
-move_final (v:vs) [] offset
- = ST II32 v (spRel offset)
- : move_final vs [] (offset+1)
-
--- move into an arg (%o[0..5]) reg
-move_final (v:vs) (a:az) offset
- = OR False g0 (RIReg v) a
- : move_final vs az offset
-
-
--- | Assign results returned from the call into their
--- desination regs.
---
-assign_code :: [CmmHinted LocalReg] -> OrdList Instr
-assign_code [] = nilOL
-
-assign_code [CmmHinted dest _hint]
- = let rep = localRegType dest
- width = typeWidth rep
- r_dest = getRegisterReg (CmmLocal dest)
-
- result
- | isFloatType rep
- , W32 <- width
- = unitOL $ FMOV FF32 (RealReg $ fReg 0) r_dest
-
- | isFloatType rep
- , W64 <- width
- = unitOL $ FMOV FF64 (RealReg $ fReg 0) r_dest
-
- | not $ isFloatType rep
- , W32 <- width
- = unitOL $ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest
-
- | not $ isFloatType rep
- , W64 <- width
- , r_dest_hi <- getHiVRegFromLo r_dest
- = toOL [ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest_hi
- , mkRegRegMoveInstr (RealReg $ oReg 1) r_dest]
- in result
-
-
--- | Generate a call to implement an out-of-line floating point operation
-outOfLineFloatOp
- :: CallishMachOp
- -> NatM (Either CLabel CmmExpr)
-
-outOfLineFloatOp mop
- = do let functionName
- = outOfLineFloatOp_table mop
-
- dflags <- getDynFlagsNat
- mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference
- $ mkForeignLabel functionName Nothing True IsFunction
-
- let mopLabelOrExpr
- = case mopExpr of
- CmmLit (CmmLabel lbl) -> Left lbl
- _ -> Right mopExpr
-
- return mopLabelOrExpr
-
-
--- | Decide what C function to use to implement a CallishMachOp
---
-outOfLineFloatOp_table
- :: CallishMachOp
- -> FastString
-
-outOfLineFloatOp_table mop
- = case mop of
- MO_F32_Exp -> fsLit "expf"
- MO_F32_Log -> fsLit "logf"
- MO_F32_Sqrt -> fsLit "sqrtf"
- MO_F32_Pwr -> fsLit "powf"
-
- MO_F32_Sin -> fsLit "sinf"
- MO_F32_Cos -> fsLit "cosf"
- MO_F32_Tan -> fsLit "tanf"
-
- MO_F32_Asin -> fsLit "asinf"
- MO_F32_Acos -> fsLit "acosf"
- MO_F32_Atan -> fsLit "atanf"
-
- MO_F32_Sinh -> fsLit "sinhf"
- MO_F32_Cosh -> fsLit "coshf"
- MO_F32_Tanh -> fsLit "tanhf"
-
- MO_F64_Exp -> fsLit "exp"
- MO_F64_Log -> fsLit "log"
- MO_F64_Sqrt -> fsLit "sqrt"
- MO_F64_Pwr -> fsLit "pow"
-
- MO_F64_Sin -> fsLit "sin"
- MO_F64_Cos -> fsLit "cos"
- MO_F64_Tan -> fsLit "tan"
-
- MO_F64_Asin -> fsLit "asin"
- MO_F64_Acos -> fsLit "acos"
- MO_F64_Atan -> fsLit "atan"
-
- MO_F64_Sinh -> fsLit "sinh"
- MO_F64_Cosh -> fsLit "cosh"
- MO_F64_Tanh -> fsLit "tanh"
-
- other -> pprPanic "outOfLineFloatOp(sparc): Unknown callish mach op "
- (pprCallishMachOp mop)
-
-
-#endif /* sparc_TARGET_ARCH */
-
-#if powerpc_TARGET_ARCH
-
-#if darwin_TARGET_OS || linux_TARGET_OS
-{-
- The PowerPC calling convention for Darwin/Mac OS X
- is described in Apple's document
- "Inside Mac OS X - Mach-O Runtime Architecture".
-
- PowerPC Linux uses the System V Release 4 Calling Convention
- for PowerPC. It is described in the
- "System V Application Binary Interface PowerPC Processor Supplement".
-
- Both conventions are similar:
- Parameters may be passed in general-purpose registers starting at r3, in
- floating point registers starting at f1, or on the stack.
-
- But there are substantial differences:
- * The number of registers used for parameter passing and the exact set of
- nonvolatile registers differs (see MachRegs.lhs).
- * On Darwin, stack space is always reserved for parameters, even if they are
- passed in registers. The called routine may choose to save parameters from
- registers to the corresponding space on the stack.
- * On Darwin, a corresponding amount of GPRs is skipped when a floating point
- parameter is passed in an FPR.
- * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
- starting with an odd-numbered GPR. It may skip a GPR to achieve this.
- Darwin just treats an I64 like two separate II32s (high word first).
- * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
- 4-byte aligned like everything else on Darwin.
- * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
- PowerPC Linux does not agree, so neither do we.
-
- According to both conventions, The parameter area should be part of the
- caller's stack frame, allocated in the caller's prologue code (large enough
- to hold the parameter lists for all called routines). The NCG already
- uses the stack for register spilling, leaving 64 bytes free at the top.
- If we need a larger parameter area than that, we just allocate a new stack
- frame just before ccalling.
--}
-
-
-genCCall (CmmPrim MO_WriteBarrier) _ _
- = return $ unitOL LWSYNC
-
-genCCall target dest_regs argsAndHints
- = ASSERT (not $ any (`elem` [II8,II16]) $ map cmmTypeSize argReps)
- -- we rely on argument promotion in the codeGen
- do
- (finalStack,passArgumentsCode,usedRegs) <- passArguments
- (zip args argReps)
- allArgRegs allFPArgRegs
- initialStackOffset
- (toOL []) []
-
- (labelOrExpr, reduceToFF32) <- case target of
- CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
- CmmCallee expr conv -> return (Right expr, False)
- CmmPrim mop -> outOfLineFloatOp mop
-
- let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
- codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32
-
- case labelOrExpr of
- Left lbl -> do
- return ( codeBefore
- `snocOL` BL lbl usedRegs
- `appOL` codeAfter)
- Right dyn -> do
- (dynReg, dynCode) <- getSomeReg dyn
- return ( dynCode
- `snocOL` MTCTR dynReg
- `appOL` codeBefore
- `snocOL` BCTRL usedRegs
- `appOL` codeAfter)
- where
-#if darwin_TARGET_OS
- initialStackOffset = 24
- -- size of linkage area + size of arguments, in bytes
- stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
- map (widthInBytes . typeWidth) argReps
-#elif linux_TARGET_OS
- initialStackOffset = 8
- stackDelta finalStack = roundTo 16 finalStack
-#endif
- args = map hintlessCmm argsAndHints
- argReps = map cmmExprType args
-
- roundTo a x | x `mod` a == 0 = x
- | otherwise = x + a - (x `mod` a)
-
- move_sp_down finalStack
- | delta > 64 =
- toOL [STU II32 sp (AddrRegImm sp (ImmInt (-delta))),
- DELTA (-delta)]
- | otherwise = nilOL
- where delta = stackDelta finalStack
- move_sp_up finalStack
- | delta > 64 =
- toOL [ADD sp sp (RIImm (ImmInt delta)),
- DELTA 0]
- | otherwise = nilOL
- where delta = stackDelta finalStack
-
-
- passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
- passArguments ((arg,arg_ty):args) gprs fprs stackOffset
- accumCode accumUsed | isWord64 arg_ty =
- do
- ChildCode64 code vr_lo <- iselExpr64 arg
- let vr_hi = getHiVRegFromLo vr_lo
-
-#if darwin_TARGET_OS
- passArguments args
- (drop 2 gprs)
- fprs
- (stackOffset+8)
- (accumCode `appOL` code
- `snocOL` storeWord vr_hi gprs stackOffset
- `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
- ((take 2 gprs) ++ accumUsed)
- where
- storeWord vr (gpr:_) offset = MR gpr vr
- storeWord vr [] offset = ST II32 vr (AddrRegImm sp (ImmInt offset))
-
-#elif linux_TARGET_OS
- let stackOffset' = roundTo 8 stackOffset
- stackCode = accumCode `appOL` code
- `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
- `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
- regCode hireg loreg =
- accumCode `appOL` code
- `snocOL` MR hireg vr_hi
- `snocOL` MR loreg vr_lo
-
- case gprs of
- hireg : loreg : regs | even (length gprs) ->
- passArguments args regs fprs stackOffset
- (regCode hireg loreg) (hireg : loreg : accumUsed)
- _skipped : hireg : loreg : regs ->
- passArguments args regs fprs stackOffset
- (regCode hireg loreg) (hireg : loreg : accumUsed)
- _ -> -- only one or no regs left
- passArguments args [] fprs (stackOffset'+8)
- stackCode accumUsed
-#endif
-
- passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
- | reg : _ <- regs = do
- register <- getRegister arg
- let code = case register of
- Fixed _ freg fcode -> fcode `snocOL` MR reg freg
- Any _ acode -> acode reg
- passArguments args
- (drop nGprs gprs)
- (drop nFprs fprs)
-#if darwin_TARGET_OS
- -- The Darwin ABI requires that we reserve stack slots for register parameters
- (stackOffset + stackBytes)
-#elif linux_TARGET_OS
- -- ... the SysV ABI doesn't.
- stackOffset
-#endif
- (accumCode `appOL` code)
- (reg : accumUsed)
- | otherwise = do
- (vr, code) <- getSomeReg arg
- passArguments args
- (drop nGprs gprs)
- (drop nFprs fprs)
- (stackOffset' + stackBytes)
- (accumCode `appOL` code `snocOL` ST (cmmTypeSize rep) vr stackSlot)
- accumUsed
- where
-#if darwin_TARGET_OS
- -- stackOffset is at least 4-byte aligned
- -- The Darwin ABI is happy with that.
- stackOffset' = stackOffset
-#else
- -- ... the SysV ABI requires 8-byte alignment for doubles.
- stackOffset' | isFloatType rep && typeWidth rep == W64 =
- roundTo 8 stackOffset
- | otherwise = stackOffset
-#endif
- stackSlot = AddrRegImm sp (ImmInt stackOffset')
- (nGprs, nFprs, stackBytes, regs) = case cmmTypeSize rep of
- II32 -> (1, 0, 4, gprs)
-#if darwin_TARGET_OS
- -- The Darwin ABI requires that we skip a corresponding number of GPRs when
- -- we use the FPRs.
- FF32 -> (1, 1, 4, fprs)
- FF64 -> (2, 1, 8, fprs)
-#elif linux_TARGET_OS
- -- ... the SysV ABI doesn't.
- FF32 -> (0, 1, 4, fprs)
- FF64 -> (0, 1, 8, fprs)
-#endif
-
- moveResult reduceToFF32 =
- case dest_regs of
- [] -> nilOL
- [CmmHinted dest _hint]
- | reduceToFF32 && isFloat32 rep -> unitOL (FRSP r_dest f1)
- | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
- | isWord64 rep -> toOL [MR (getHiVRegFromLo r_dest) r3,
- MR r_dest r4]
- | otherwise -> unitOL (MR r_dest r3)
- where rep = cmmRegType (CmmLocal dest)
- r_dest = getRegisterReg (CmmLocal dest)
-
- outOfLineFloatOp mop =
- do
- dflags <- getDynFlagsNat
- mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
- mkForeignLabel functionName Nothing True IsFunction
- let mopLabelOrExpr = case mopExpr of
- CmmLit (CmmLabel lbl) -> Left lbl
- _ -> Right mopExpr
- return (mopLabelOrExpr, reduce)
- where
- (functionName, reduce) = case mop of
- MO_F32_Exp -> (fsLit "exp", True)
- MO_F32_Log -> (fsLit "log", True)
- MO_F32_Sqrt -> (fsLit "sqrt", True)
-
- MO_F32_Sin -> (fsLit "sin", True)
- MO_F32_Cos -> (fsLit "cos", True)
- MO_F32_Tan -> (fsLit "tan", True)
-
- MO_F32_Asin -> (fsLit "asin", True)
- MO_F32_Acos -> (fsLit "acos", True)
- MO_F32_Atan -> (fsLit "atan", True)
-
- MO_F32_Sinh -> (fsLit "sinh", True)
- MO_F32_Cosh -> (fsLit "cosh", True)
- MO_F32_Tanh -> (fsLit "tanh", True)
- MO_F32_Pwr -> (fsLit "pow", True)
-
- MO_F64_Exp -> (fsLit "exp", False)
- MO_F64_Log -> (fsLit "log", False)
- MO_F64_Sqrt -> (fsLit "sqrt", False)
-
- MO_F64_Sin -> (fsLit "sin", False)
- MO_F64_Cos -> (fsLit "cos", False)
- MO_F64_Tan -> (fsLit "tan", False)
-
- MO_F64_Asin -> (fsLit "asin", False)
- MO_F64_Acos -> (fsLit "acos", False)
- MO_F64_Atan -> (fsLit "atan", False)
-
- MO_F64_Sinh -> (fsLit "sinh", False)
- MO_F64_Cosh -> (fsLit "cosh", False)
- MO_F64_Tanh -> (fsLit "tanh", False)
- MO_F64_Pwr -> (fsLit "pow", False)
- other -> pprPanic "genCCall(ppc): unknown callish op"
- (pprCallishMachOp other)
-
-#endif /* darwin_TARGET_OS || linux_TARGET_OS */
-
-#endif /* powerpc_TARGET_ARCH */
-
-
--- -----------------------------------------------------------------------------
--- Generating a table-branch
-
-genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-genSwitch expr ids
- | opt_PIC
- = do
- (reg,e_code) <- getSomeReg expr
- lbl <- getNewLabelNat
- dflags <- getDynFlagsNat
- dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
- (tableReg,t_code) <- getSomeReg $ dynRef
- let
- jumpTable = map jumpTableEntryRel ids
-
- jumpTableEntryRel Nothing
- = CmmStaticLit (CmmInt 0 wordWidth)
- jumpTableEntryRel (Just (BlockId id))
- = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
- where blockLabel = mkAsmTempLabel id
-
- op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
- (EAIndex reg wORD_SIZE) (ImmInt 0))
-
-#if x86_64_TARGET_ARCH
-#if darwin_TARGET_OS
- -- on Mac OS X/x86_64, put the jump table in the text section
- -- to work around a limitation of the linker.
- -- ld64 is unable to handle the relocations for
- -- .quad L1 - L0
- -- if L0 is not preceded by a non-anonymous label in its section.
-
- code = e_code `appOL` t_code `appOL` toOL [
- ADD (intSize wordWidth) op (OpReg tableReg),
- JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
- LDATA Text (CmmDataLabel lbl : jumpTable)
- ]
-#else
- -- HACK: On x86_64 binutils<2.17 is only able to generate PC32
- -- relocations, hence we only get 32-bit offsets in the jump
- -- table. As these offsets are always negative we need to properly
- -- sign extend them to 64-bit. This hack should be removed in
- -- conjunction with the hack in PprMach.hs/pprDataItem once
- -- binutils 2.17 is standard.
- code = e_code `appOL` t_code `appOL` toOL [
- LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
- MOVSxL II32
- (OpAddr (AddrBaseIndex (EABaseReg tableReg)
- (EAIndex reg wORD_SIZE) (ImmInt 0)))
- (OpReg reg),
- ADD (intSize wordWidth) (OpReg reg) (OpReg tableReg),
- JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
- ]
-#endif
-#else
- code = e_code `appOL` t_code `appOL` toOL [
- LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
- ADD (intSize wordWidth) op (OpReg tableReg),
- JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
- ]
-#endif
- return code
- | otherwise
- = do
- (reg,e_code) <- getSomeReg expr
- lbl <- getNewLabelNat
- let
- jumpTable = map jumpTableEntry ids
- op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
- code = e_code `appOL` toOL [
- LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
- JMP_TBL op [ id | Just id <- ids ]
- ]
- -- in
- return code
-#elif powerpc_TARGET_ARCH
-genSwitch expr ids
- | opt_PIC
- = do
- (reg,e_code) <- getSomeReg expr
- tmp <- getNewRegNat II32
- lbl <- getNewLabelNat
- dflags <- getDynFlagsNat
- dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
- (tableReg,t_code) <- getSomeReg $ dynRef
- let
- jumpTable = map jumpTableEntryRel ids
-
- jumpTableEntryRel Nothing
- = CmmStaticLit (CmmInt 0 wordWidth)
- jumpTableEntryRel (Just (BlockId id))
- = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
- where blockLabel = mkAsmTempLabel id
-
- code = e_code `appOL` t_code `appOL` toOL [
- LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
- SLW tmp reg (RIImm (ImmInt 2)),
- LD II32 tmp (AddrRegReg tableReg tmp),
- ADD tmp tmp (RIReg tableReg),
- MTCTR tmp,
- BCTR [ id | Just id <- ids ]
- ]
- return code
- | otherwise
- = do
- (reg,e_code) <- getSomeReg expr
- tmp <- getNewRegNat II32
- lbl <- getNewLabelNat
- let
- jumpTable = map jumpTableEntry ids
-
- code = e_code `appOL` toOL [
- LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
- SLW tmp reg (RIImm (ImmInt 2)),
- ADDIS tmp tmp (HA (ImmCLbl lbl)),
- LD II32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
- MTCTR tmp,
- BCTR [ id | Just id <- ids ]
- ]
- return code
-#elif sparc_TARGET_ARCH
-genSwitch expr ids
- | opt_PIC
- = error "MachCodeGen: sparc genSwitch PIC not finished\n"
-
- | otherwise
- = do (e_reg, e_code) <- getSomeReg expr
-
- base_reg <- getNewRegNat II32
- offset_reg <- getNewRegNat II32
- dst <- getNewRegNat II32
-
- label <- getNewLabelNat
- let jumpTable = map jumpTableEntry ids
-
- return $ e_code `appOL`
- toOL
- -- the jump table
- [ LDATA ReadOnlyData (CmmDataLabel label : jumpTable)
-
- -- load base of jump table
- , SETHI (HI (ImmCLbl label)) base_reg
- , OR False base_reg (RIImm $ LO $ ImmCLbl label) base_reg
-
- -- the addrs in the table are 32 bits wide..
- , SLL e_reg (RIImm $ ImmInt 2) offset_reg
-
- -- load and jump to the destination
- , LD II32 (AddrRegReg base_reg offset_reg) dst
- , JMP_TBL (AddrRegImm dst (ImmInt 0)) [i | Just i <- ids]
- , NOP ]
-
-#else
-#error "ToDo: genSwitch"
-#endif
-
-
--- | Convert a BlockId to some CmmStatic data
-jumpTableEntry :: Maybe BlockId -> CmmStatic
-jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
-jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
- where blockLabel = mkAsmTempLabel id
-
--- -----------------------------------------------------------------------------
--- Support bits
--- -----------------------------------------------------------------------------
-
-
--- -----------------------------------------------------------------------------
--- 'condIntReg' and 'condFltReg': condition codes into registers
-
--- Turn those condition codes into integers now (when they appear on
--- the right hand side of an assignment).
---
--- (If applicable) Do not fill the delay slots here; you will confuse the
--- register allocator.
-
-condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if alpha_TARGET_ARCH
-condIntReg = panic "MachCode.condIntReg (not on Alpha)"
-condFltReg = panic "MachCode.condFltReg (not on Alpha)"
-#endif /* alpha_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-
-condIntReg cond x y = do
- CondCode _ cond cond_code <- condIntCode cond x y
- tmp <- getNewRegNat II8
- let
- code dst = cond_code `appOL` toOL [
- SETCC cond (OpReg tmp),
- MOVZxL II8 (OpReg tmp) (OpReg dst)
- ]
- -- in
- return (Any II32 code)
-
-#endif
-
-#if i386_TARGET_ARCH
-
-condFltReg cond x y = do
- CondCode _ cond cond_code <- condFltCode cond x y
- tmp <- getNewRegNat II8
- let
- code dst = cond_code `appOL` toOL [
- SETCC cond (OpReg tmp),
- MOVZxL II8 (OpReg tmp) (OpReg dst)
- ]
- -- in
- return (Any II32 code)
-
-#endif
-
-#if x86_64_TARGET_ARCH
-
-condFltReg cond x y = do
- CondCode _ cond cond_code <- condFltCode cond x y
- tmp1 <- getNewRegNat wordSize
- tmp2 <- getNewRegNat wordSize
- let
- -- We have to worry about unordered operands (eg. comparisons
- -- against NaN). If the operands are unordered, the comparison
- -- sets the parity flag, carry flag and zero flag.
- -- All comparisons are supposed to return false for unordered
- -- operands except for !=, which returns true.
- --
- -- Optimisation: we don't have to test the parity flag if we
- -- know the test has already excluded the unordered case: eg >
- -- and >= test for a zero carry flag, which can only occur for
- -- ordered operands.
- --
- -- ToDo: by reversing comparisons we could avoid testing the
- -- parity flag in more cases.
-
- code dst =
- cond_code `appOL`
- (case cond of
- NE -> or_unordered dst
- GU -> plain_test dst
- GEU -> plain_test dst
- _ -> and_ordered dst)
-
- plain_test dst = toOL [
- SETCC cond (OpReg tmp1),
- MOVZxL II8 (OpReg tmp1) (OpReg dst)
- ]
- or_unordered dst = toOL [
- SETCC cond (OpReg tmp1),
- SETCC PARITY (OpReg tmp2),
- OR II8 (OpReg tmp1) (OpReg tmp2),
- MOVZxL II8 (OpReg tmp2) (OpReg dst)
- ]
- and_ordered dst = toOL [
- SETCC cond (OpReg tmp1),
- SETCC NOTPARITY (OpReg tmp2),
- AND II8 (OpReg tmp1) (OpReg tmp2),
- MOVZxL II8 (OpReg tmp2) (OpReg dst)
- ]
- -- in
- return (Any II32 code)
-
-#endif
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if sparc_TARGET_ARCH
-
-condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
- (src, code) <- getSomeReg x
- tmp <- getNewRegNat II32
- let
- code__2 dst = code `appOL` toOL [
- SUB False True g0 (RIReg src) g0,
- SUB True False g0 (RIImm (ImmInt (-1))) dst]
- return (Any II32 code__2)
-
-condIntReg EQQ x y = do
- (src1, code1) <- getSomeReg x
- (src2, code2) <- getSomeReg y
- tmp1 <- getNewRegNat II32
- tmp2 <- getNewRegNat II32
- let
- code__2 dst = code1 `appOL` code2 `appOL` toOL [
- XOR False src1 (RIReg src2) dst,
- SUB False True g0 (RIReg dst) g0,
- SUB True False g0 (RIImm (ImmInt (-1))) dst]
- return (Any II32 code__2)
-
-condIntReg NE x (CmmLit (CmmInt 0 d)) = do
- (src, code) <- getSomeReg x
- tmp <- getNewRegNat II32
- let
- code__2 dst = code `appOL` toOL [
- SUB False True g0 (RIReg src) g0,
- ADD True False g0 (RIImm (ImmInt 0)) dst]
- return (Any II32 code__2)
-
-condIntReg NE x y = do
- (src1, code1) <- getSomeReg x
- (src2, code2) <- getSomeReg y
- tmp1 <- getNewRegNat II32
- tmp2 <- getNewRegNat II32
- let
- code__2 dst = code1 `appOL` code2 `appOL` toOL [
- XOR False src1 (RIReg src2) dst,
- SUB False True g0 (RIReg dst) g0,
- ADD True False g0 (RIImm (ImmInt 0)) dst]
- return (Any II32 code__2)
-
-condIntReg cond x y = do
- bid1@(BlockId lbl1) <- getBlockIdNat
- bid2@(BlockId lbl2) <- getBlockIdNat
- CondCode _ cond cond_code <- condIntCode cond x y
- let
- code__2 dst = cond_code `appOL` toOL [
- BI cond False bid1, NOP,
- OR False g0 (RIImm (ImmInt 0)) dst,
- BI ALWAYS False bid2, NOP,
- NEWBLOCK bid1,
- OR False g0 (RIImm (ImmInt 1)) dst,
- NEWBLOCK bid2]
- return (Any II32 code__2)
-
-condFltReg cond x y = do
- bid1@(BlockId lbl1) <- getBlockIdNat
- bid2@(BlockId lbl2) <- getBlockIdNat
- CondCode _ cond cond_code <- condFltCode cond x y
- let
- code__2 dst = cond_code `appOL` toOL [
- NOP,
- BF cond False bid1, NOP,
- OR False g0 (RIImm (ImmInt 0)) dst,
- BI ALWAYS False bid2, NOP,
- NEWBLOCK bid1,
- OR False g0 (RIImm (ImmInt 1)) dst,
- NEWBLOCK bid2]
- return (Any II32 code__2)
-
-#endif /* sparc_TARGET_ARCH */
-
-#if powerpc_TARGET_ARCH
-condReg getCond = do
- lbl1 <- getBlockIdNat
- lbl2 <- getBlockIdNat
- CondCode _ cond cond_code <- getCond
- let
-{- code dst = cond_code `appOL` toOL [
- BCC cond lbl1,
- LI dst (ImmInt 0),
- BCC ALWAYS lbl2,
- NEWBLOCK lbl1,
- LI dst (ImmInt 1),
- BCC ALWAYS lbl2,
- NEWBLOCK lbl2
- ]-}
- code dst = cond_code
- `appOL` negate_code
- `appOL` toOL [
- MFCR dst,
- RLWINM dst dst (bit + 1) 31 31
- ]
-
- negate_code | do_negate = unitOL (CRNOR bit bit bit)
- | otherwise = nilOL
-
- (bit, do_negate) = case cond of
- LTT -> (0, False)
- LE -> (1, True)
- EQQ -> (2, False)
- GE -> (0, True)
- GTT -> (1, False)
-
- NE -> (2, True)
-
- LU -> (0, False)
- LEU -> (1, True)
- GEU -> (0, True)
- GU -> (1, False)
-
- return (Any II32 code)
-
-condIntReg cond x y = condReg (condIntCode cond x y)
-condFltReg cond x y = condReg (condFltCode cond x y)
-#endif /* powerpc_TARGET_ARCH */
-
-
--- -----------------------------------------------------------------------------
--- 'trivial*Code': deal with trivial instructions
-
--- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
--- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
--- Only look for constants on the right hand side, because that's
--- where the generic optimizer will have put them.
-
--- Similarly, for unary instructions, we don't have to worry about
--- matching an StInt as the argument, because genericOpt will already
--- have handled the constant-folding.
-
-trivialCode
- :: Width -- Int only
- -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr)
- ,IF_ARCH_i386 ((Operand -> Operand -> Instr)
- -> Maybe (Operand -> Operand -> Instr)
- ,IF_ARCH_x86_64 ((Operand -> Operand -> Instr)
- -> Maybe (Operand -> Operand -> Instr)
- ,IF_ARCH_sparc((Reg -> RI -> Reg -> Instr)
- ,IF_ARCH_powerpc(Bool -> (Reg -> Reg -> RI -> Instr)
- ,)))))
- -> CmmExpr -> CmmExpr -- the two arguments
- -> NatM Register
-
-#ifndef powerpc_TARGET_ARCH
-trivialFCode
- :: Width -- Floating point only
- -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr)
- ,IF_ARCH_sparc((Size -> Reg -> Reg -> Reg -> Instr)
- ,IF_ARCH_i386 ((Size -> Reg -> Reg -> Reg -> Instr)
- ,IF_ARCH_x86_64 ((Size -> Operand -> Operand -> Instr)
- ,))))
- -> CmmExpr -> CmmExpr -- the two arguments
- -> NatM Register
-#endif
-
-trivialUCode
- :: Size
- -> IF_ARCH_alpha((RI -> Reg -> Instr)
- ,IF_ARCH_i386 ((Operand -> Instr)
- ,IF_ARCH_x86_64 ((Operand -> Instr)
- ,IF_ARCH_sparc((RI -> Reg -> Instr)
- ,IF_ARCH_powerpc((Reg -> Reg -> Instr)
- ,)))))
- -> CmmExpr -- the one argument
- -> NatM Register
-
-#ifndef powerpc_TARGET_ARCH
-trivialUFCode
- :: Size
- -> IF_ARCH_alpha((Reg -> Reg -> Instr)
- ,IF_ARCH_i386 ((Reg -> Reg -> Instr)
- ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr)
- ,IF_ARCH_sparc((Reg -> Reg -> Instr)
- ,))))
- -> CmmExpr -- the one argument
- -> NatM Register
-#endif
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if alpha_TARGET_ARCH
-
-trivialCode instr x (StInt y)
- | fits8Bits y
- = getRegister x `thenNat` \ register ->
- getNewRegNat IntRep `thenNat` \ tmp ->
- let
- code = registerCode register tmp
- src1 = registerName register tmp
- src2 = ImmInt (fromInteger y)
- code__2 dst = code . mkSeqInstr (instr src1 (RIImm src2) dst)
- in
- return (Any IntRep code__2)
-
-trivialCode instr x y
- = getRegister x `thenNat` \ register1 ->
- getRegister y `thenNat` \ register2 ->
- getNewRegNat IntRep `thenNat` \ tmp1 ->
- getNewRegNat IntRep `thenNat` \ tmp2 ->
- let
- code1 = registerCode register1 tmp1 []
- src1 = registerName register1 tmp1
- code2 = registerCode register2 tmp2 []
- src2 = registerName register2 tmp2
- code__2 dst = asmSeqThen [code1, code2] .
- mkSeqInstr (instr src1 (RIReg src2) dst)
- in
- return (Any IntRep code__2)
-
-------------
-trivialUCode instr x
- = getRegister x `thenNat` \ register ->
- getNewRegNat IntRep `thenNat` \ tmp ->
- let
- code = registerCode register tmp
- src = registerName register tmp
- code__2 dst = code . mkSeqInstr (instr (RIReg src) dst)
- in
- return (Any IntRep code__2)
-
-------------
-trivialFCode _ instr x y
- = getRegister x `thenNat` \ register1 ->
- getRegister y `thenNat` \ register2 ->
- getNewRegNat FF64 `thenNat` \ tmp1 ->
- getNewRegNat FF64 `thenNat` \ tmp2 ->
- let
- code1 = registerCode register1 tmp1
- src1 = registerName register1 tmp1
-
- code2 = registerCode register2 tmp2
- src2 = registerName register2 tmp2
-
- code__2 dst = asmSeqThen [code1 [], code2 []] .
- mkSeqInstr (instr src1 src2 dst)
- in
- return (Any FF64 code__2)
-
-trivialUFCode _ instr x
- = getRegister x `thenNat` \ register ->
- getNewRegNat FF64 `thenNat` \ tmp ->
- let
- code = registerCode register tmp
- src = registerName register tmp
- code__2 dst = code . mkSeqInstr (instr src dst)
- in
- return (Any FF64 code__2)
-
-#endif /* alpha_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-
-{-
-The Rules of the Game are:
-
-* You cannot assume anything about the destination register dst;
- it may be anything, including a fixed reg.
-
-* You may compute an operand into a fixed reg, but you may not
- subsequently change the contents of that fixed reg. If you
- want to do so, first copy the value either to a temporary
- or into dst. You are free to modify dst even if it happens
- to be a fixed reg -- that's not your problem.
-
-* You cannot assume that a fixed reg will stay live over an
- arbitrary computation. The same applies to the dst reg.
-
-* Temporary regs obtained from getNewRegNat are distinct from
- each other and from all other regs, and stay live over
- arbitrary computations.
-
---------------------
-
-SDM's version of The Rules:
-
-* If getRegister returns Any, that means it can generate correct
- code which places the result in any register, period. Even if that
- register happens to be read during the computation.
-
- Corollary #1: this means that if you are generating code for an
- operation with two arbitrary operands, you cannot assign the result
- of the first operand into the destination register before computing
- the second operand. The second operand might require the old value
- of the destination register.
-
- Corollary #2: A function might be able to generate more efficient
- code if it knows the destination register is a new temporary (and
- therefore not read by any of the sub-computations).
-
-* If getRegister returns Any, then the code it generates may modify only:
- (a) fresh temporaries
- (b) the destination register
- (c) known registers (eg. %ecx is used by shifts)
- In particular, it may *not* modify global registers, unless the global
- register happens to be the destination register.
--}
-
-trivialCode width instr (Just revinstr) (CmmLit lit_a) b
- | is32BitLit lit_a = do
- b_code <- getAnyReg b
- let
- code dst
- = b_code dst `snocOL`
- revinstr (OpImm (litToImm lit_a)) (OpReg dst)
- -- in
- return (Any (intSize width) code)
-
-trivialCode width instr maybe_revinstr a b
- = genTrivialCode (intSize width) instr a b
-
--- This is re-used for floating pt instructions too.
-genTrivialCode rep instr a b = do
- (b_op, b_code) <- getNonClobberedOperand b
- a_code <- getAnyReg a
- tmp <- getNewRegNat rep
- let
- -- We want the value of b to stay alive across the computation of a.
- -- But, we want to calculate a straight into the destination register,
- -- because the instruction only has two operands (dst := dst `op` src).
- -- The troublesome case is when the result of b is in the same register
- -- as the destination reg. In this case, we have to save b in a
- -- new temporary across the computation of a.
- code dst
- | dst `regClashesWithOp` b_op =
- b_code `appOL`
- unitOL (MOV rep b_op (OpReg tmp)) `appOL`
- a_code dst `snocOL`
- instr (OpReg tmp) (OpReg dst)
- | otherwise =
- b_code `appOL`
- a_code dst `snocOL`
- instr b_op (OpReg dst)
- -- in
- return (Any rep code)
-
-reg `regClashesWithOp` OpReg reg2 = reg == reg2
-reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
-reg `regClashesWithOp` _ = False
-
------------
-
-trivialUCode rep instr x = do
- x_code <- getAnyReg x
- let
- code dst =
- x_code dst `snocOL`
- instr (OpReg dst)
- return (Any rep code)
-
------------
-
-#if i386_TARGET_ARCH
-
-trivialFCode width instr x y = do
- (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
- (y_reg, y_code) <- getSomeReg y
- let
- size = floatSize width
- code dst =
- x_code `appOL`
- y_code `snocOL`
- instr size x_reg y_reg dst
- return (Any size code)
-
-#endif
-
-#if x86_64_TARGET_ARCH
-trivialFCode pk instr x y
- = genTrivialCode size (instr size) x y
- where size = floatSize pk
-#endif
-
--------------
-
-trivialUFCode size instr x = do
- (x_reg, x_code) <- getSomeReg x
- let
- code dst =
- x_code `snocOL`
- instr x_reg dst
- -- in
- return (Any size code)
-
-#endif /* i386_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if sparc_TARGET_ARCH
-
-trivialCode pk instr x (CmmLit (CmmInt y d))
- | fits13Bits y
- = do
- (src1, code) <- getSomeReg x
- tmp <- getNewRegNat II32
- let
- src2 = ImmInt (fromInteger y)
- code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
- return (Any II32 code__2)
-
-trivialCode pk instr x y = do
- (src1, code1) <- getSomeReg x
- (src2, code2) <- getSomeReg y
- tmp1 <- getNewRegNat II32
- tmp2 <- getNewRegNat II32
- let
- code__2 dst = code1 `appOL` code2 `snocOL`
- instr src1 (RIReg src2) dst
- return (Any II32 code__2)
-
-------------
-trivialFCode pk instr x y = do
- (src1, code1) <- getSomeReg x
- (src2, code2) <- getSomeReg y
- tmp1 <- getNewRegNat (cmmTypeSize $ cmmExprType x)
- tmp2 <- getNewRegNat (cmmTypeSize $ cmmExprType y)
- tmp <- getNewRegNat FF64
- let
- promote x = FxTOy FF32 FF64 x tmp
-
- pk1 = cmmExprType x
- pk2 = cmmExprType y
-
- code__2 dst =
- if pk1 `cmmEqType` pk2 then
- code1 `appOL` code2 `snocOL`
- instr (floatSize pk) src1 src2 dst
- else if typeWidth pk1 == W32 then
- code1 `snocOL` promote src1 `appOL` code2 `snocOL`
- instr FF64 tmp src2 dst
- else
- code1 `appOL` code2 `snocOL` promote src2 `snocOL`
- instr FF64 src1 tmp dst
- return (Any (cmmTypeSize $ if pk1 `cmmEqType` pk2 then pk1 else cmmFloat W64)
- code__2)
-
-------------
-trivialUCode size instr x = do
- (src, code) <- getSomeReg x
- tmp <- getNewRegNat size
- let
- code__2 dst = code `snocOL` instr (RIReg src) dst
- return (Any size code__2)
-
--------------
-trivialUFCode pk instr x = do
- (src, code) <- getSomeReg x
- tmp <- getNewRegNat pk
- let
- code__2 dst = code `snocOL` instr src dst
- return (Any pk code__2)
-
-#endif /* sparc_TARGET_ARCH */
-
-#if powerpc_TARGET_ARCH
-
-{-
-Wolfgang's PowerPC version of The Rules:
-
-A slightly modified version of The Rules to take advantage of the fact
-that PowerPC instructions work on all registers and don't implicitly
-clobber any fixed registers.
-
-* The only expression for which getRegister returns Fixed is (CmmReg reg).
-
-* If getRegister returns Any, then the code it generates may modify only:
- (a) fresh temporaries
- (b) the destination register
- It may *not* modify global registers, unless the global
- register happens to be the destination register.
- It may not clobber any other registers. In fact, only ccalls clobber any
- fixed registers.
- Also, it may not modify the counter register (used by genCCall).
-
- Corollary: If a getRegister for a subexpression returns Fixed, you need
- not move it to a fresh temporary before evaluating the next subexpression.
- The Fixed register won't be modified.
- Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
-
-* SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
- the value of the destination register.
--}
-
-trivialCode rep signed instr x (CmmLit (CmmInt y _))
- | Just imm <- makeImmediate rep signed y
- = do
- (src1, code1) <- getSomeReg x
- let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
- return (Any (intSize rep) code)
-
-trivialCode rep signed instr x y = do
- (src1, code1) <- getSomeReg x
- (src2, code2) <- getSomeReg y
- let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
- return (Any (intSize rep) code)
-
-trivialCodeNoImm' :: Size -> (Reg -> Reg -> Reg -> Instr)
- -> CmmExpr -> CmmExpr -> NatM Register
-trivialCodeNoImm' size instr x y = do
- (src1, code1) <- getSomeReg x
- (src2, code2) <- getSomeReg y
- let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
- return (Any size code)
-
-trivialCodeNoImm :: Size -> (Size -> Reg -> Reg -> Reg -> Instr)
- -> CmmExpr -> CmmExpr -> NatM Register
-trivialCodeNoImm size instr x y = trivialCodeNoImm' size (instr size) x y
-
-trivialUCode rep instr x = do
- (src, code) <- getSomeReg x
- let code' dst = code `snocOL` instr dst src
- return (Any rep code')
-
--- There is no "remainder" instruction on the PPC, so we have to do
--- it the hard way.
--- The "div" parameter is the division instruction to use (DIVW or DIVWU)
-
-remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr)
- -> CmmExpr -> CmmExpr -> NatM Register
-remainderCode rep div x y = do
- (src1, code1) <- getSomeReg x
- (src2, code2) <- getSomeReg y
- let code dst = code1 `appOL` code2 `appOL` toOL [
- div dst src1 src2,
- MULLW dst dst (RIReg src2),
- SUBF dst dst src1
- ]
- return (Any (intSize rep) code)
-
-#endif /* powerpc_TARGET_ARCH */
-
-
--- -----------------------------------------------------------------------------
--- Coercing to/from integer/floating-point...
-
--- When going to integer, we truncate (round towards 0).
-
--- @coerce(Int2FP|FP2Int)@ are more complicated integer/float
--- conversions. We have to store temporaries in memory to move
--- between the integer and the floating point register sets.
-
--- @coerceDbl2Flt@ and @coerceFlt2Dbl@ are done this way because we
--- pretend, on sparc at least, that double and float regs are seperate
--- kinds, so the value has to be computed into one kind before being
--- explicitly "converted" to live in the other kind.
-
-coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
-coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
-
-#if sparc_TARGET_ARCH
-coerceDbl2Flt :: CmmExpr -> NatM Register
-coerceFlt2Dbl :: CmmExpr -> NatM Register
-#endif
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if alpha_TARGET_ARCH
-
-coerceInt2FP _ x
- = getRegister x `thenNat` \ register ->
- getNewRegNat IntRep `thenNat` \ reg ->
- let
- code = registerCode register reg
- src = registerName register reg
-
- code__2 dst = code . mkSeqInstrs [
- ST Q src (spRel 0),
- LD TF dst (spRel 0),
- CVTxy Q TF dst dst]
- in
- return (Any FF64 code__2)
-
--------------
-coerceFP2Int x
- = getRegister x `thenNat` \ register ->
- getNewRegNat FF64 `thenNat` \ tmp ->
- let
- code = registerCode register tmp
- src = registerName register tmp
-
- code__2 dst = code . mkSeqInstrs [
- CVTxy TF Q src tmp,
- ST TF tmp (spRel 0),
- LD Q dst (spRel 0)]
- in
- return (Any IntRep code__2)
-
-#endif /* alpha_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if i386_TARGET_ARCH
-
-coerceInt2FP from to x = do
- (x_reg, x_code) <- getSomeReg x
- let
- opc = case to of W32 -> GITOF; W64 -> GITOD
- code dst = x_code `snocOL` opc x_reg dst
- -- ToDo: works for non-II32 reps?
- return (Any (floatSize to) code)
-
-------------
-
-coerceFP2Int from to x = do
- (x_reg, x_code) <- getSomeReg x
- let
- opc = case from of W32 -> GFTOI; W64 -> GDTOI
- code dst = x_code `snocOL` opc x_reg dst
- -- ToDo: works for non-II32 reps?
- -- in
- return (Any (intSize to) code)
-
-#endif /* i386_TARGET_ARCH */
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if x86_64_TARGET_ARCH
-
-coerceFP2Int from to x = do
- (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
- let
- opc = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ
- code dst = x_code `snocOL` opc x_op dst
- -- in
- return (Any (intSize to) code) -- works even if the destination rep is <II32
-
-coerceInt2FP from to x = do
- (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
- let
- opc = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
- code dst = x_code `snocOL` opc x_op dst
- -- in
- return (Any (floatSize to) code) -- works even if the destination rep is <II32
-
-coerceFP2FP :: Width -> CmmExpr -> NatM Register
-coerceFP2FP to x = do
- (x_reg, x_code) <- getSomeReg x
- let
- opc = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD
- code dst = x_code `snocOL` opc x_reg dst
- -- in
- return (Any (floatSize to) code)
-#endif
-
--- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if sparc_TARGET_ARCH
-
-coerceInt2FP width1 width2 x = do
- (src, code) <- getSomeReg x
- let
- code__2 dst = code `appOL` toOL [
- ST (intSize width1) src (spRel (-2)),
- LD (intSize width1) (spRel (-2)) dst,
- FxTOy (intSize width1) (floatSize width2) dst dst]
- return (Any (floatSize $ width2) code__2)
-
-
--- | Coerce a floating point value to integer
---
--- NOTE: On sparc v9 there are no instructions to move a value from an
--- FP register directly to an int register, so we have to use a load/store.
---
-coerceFP2Int width1 width2 x
- = do let fsize1 = floatSize width1
- fsize2 = floatSize width2
-
- isize2 = intSize width2
-
- (fsrc, code) <- getSomeReg x
- fdst <- getNewRegNat fsize2
-
- let code2 dst
- = code
- `appOL` toOL
- -- convert float to int format, leaving it in a float reg.
- [ FxTOy fsize1 isize2 fsrc fdst
-
- -- store the int into mem, then load it back to move
- -- it into an actual int reg.
- , ST fsize2 fdst (spRel (-2))
- , LD isize2 (spRel (-2)) dst]
-
- return (Any isize2 code2)
-
-------------
-coerceDbl2Flt x = do
- (src, code) <- getSomeReg x
- return (Any FF32 (\dst -> code `snocOL` FxTOy FF64 FF32 src dst))
-
-------------
-coerceFlt2Dbl x = do
- (src, code) <- getSomeReg x
- return (Any FF64 (\dst -> code `snocOL` FxTOy FF32 FF64 src dst))
-
-#endif /* sparc_TARGET_ARCH */
-
-#if powerpc_TARGET_ARCH
-coerceInt2FP fromRep toRep x = do
- (src, code) <- getSomeReg x
- lbl <- getNewLabelNat
- itmp <- getNewRegNat II32
- ftmp <- getNewRegNat FF64
- dflags <- getDynFlagsNat
- dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
- Amode addr addr_code <- getAmode dynRef
- let
- code' dst = code `appOL` maybe_exts `appOL` toOL [
- LDATA ReadOnlyData
- [CmmDataLabel lbl,
- CmmStaticLit (CmmInt 0x43300000 W32),
- CmmStaticLit (CmmInt 0x80000000 W32)],
- XORIS itmp src (ImmInt 0x8000),
- ST II32 itmp (spRel 3),
- LIS itmp (ImmInt 0x4330),
- ST II32 itmp (spRel 2),
- LD FF64 ftmp (spRel 2)
- ] `appOL` addr_code `appOL` toOL [
- LD FF64 dst addr,
- FSUB FF64 dst ftmp dst
- ] `appOL` maybe_frsp dst
-
- maybe_exts = case fromRep of
- W8 -> unitOL $ EXTS II8 src src
- W16 -> unitOL $ EXTS II16 src src
- W32 -> nilOL
- maybe_frsp dst = case toRep of
- W32 -> unitOL $ FRSP dst dst
- W64 -> nilOL
- return (Any (floatSize toRep) code')
-
-coerceFP2Int fromRep toRep x = do
- -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
- (src, code) <- getSomeReg x
- tmp <- getNewRegNat FF64
- let
- code' dst = code `appOL` toOL [
- -- convert to int in FP reg
- FCTIWZ tmp src,
- -- store value (64bit) from FP to stack
- ST FF64 tmp (spRel 2),
- -- read low word of value (high word is undefined)
- LD II32 dst (spRel 3)]
- return (Any (intSize toRep) code')
-#endif /* powerpc_TARGET_ARCH */
-
-
--- -----------------------------------------------------------------------------
--- eXTRA_STK_ARGS_HERE
-
--- We (allegedly) put the first six C-call arguments in registers;
--- where do we start putting the rest of them?
-
--- Moved from Instrs (SDM):
-
-#if alpha_TARGET_ARCH || sparc_TARGET_ARCH
-eXTRA_STK_ARGS_HERE :: Int
-eXTRA_STK_ARGS_HERE
- = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???))
-#endif
diff --git a/compiler/nativeGen/NCGMonad.hs b/compiler/nativeGen/NCGMonad.hs
index d19cda45f4..ed59d2bd0a 100644
--- a/compiler/nativeGen/NCGMonad.hs
+++ b/compiler/nativeGen/NCGMonad.hs
@@ -10,28 +10,43 @@ module NCGMonad (
NatM_State(..), mkNatM_State,
NatM, -- instance Monad
- initNat, addImportNat, getUniqueNat,
- mapAccumLNat, setDeltaNat, getDeltaNat,
- getBlockIdNat, getNewLabelNat, getNewRegNat, getNewRegPairNat,
- getPicBaseMaybeNat, getPicBaseNat, getDynFlagsNat
- ) where
+ initNat,
+ addImportNat,
+ getUniqueNat,
+ mapAccumLNat,
+ setDeltaNat,
+ getDeltaNat,
+ getBlockIdNat,
+ getNewLabelNat,
+ getNewRegNat,
+ getNewRegPairNat,
+ getPicBaseMaybeNat,
+ getPicBaseNat,
+ getDynFlagsNat
+)
+
+where
#include "HsVersions.h"
+import Reg
+import Size
+import TargetReg
+
import BlockId
import CLabel ( CLabel, mkAsmTempLabel )
-import Regs
import UniqSupply
import Unique ( Unique )
import DynFlags
-data NatM_State = NatM_State {
- natm_us :: UniqSupply,
- natm_delta :: Int,
- natm_imports :: [(CLabel)],
- natm_pic :: Maybe Reg,
- natm_dflags :: DynFlags
- }
+data NatM_State
+ = NatM_State {
+ natm_us :: UniqSupply,
+ natm_delta :: Int,
+ natm_imports :: [(CLabel)],
+ natm_pic :: Maybe Reg,
+ natm_dflags :: DynFlags
+ }
newtype NatM result = NatM (NatM_State -> (result, NatM_State))
@@ -39,22 +54,27 @@ unNat :: NatM a -> NatM_State -> (a, NatM_State)
unNat (NatM a) = a
mkNatM_State :: UniqSupply -> Int -> DynFlags -> NatM_State
-mkNatM_State us delta dflags = NatM_State us delta [] Nothing dflags
+mkNatM_State us delta dflags
+ = NatM_State us delta [] Nothing dflags
initNat :: NatM_State -> NatM a -> (a, NatM_State)
-initNat init_st m = case unNat m init_st of { (r,st) -> (r,st) }
+initNat init_st m
+ = case unNat m init_st of { (r,st) -> (r,st) }
+
instance Monad NatM where
(>>=) = thenNat
return = returnNat
+
thenNat :: NatM a -> (a -> NatM b) -> NatM b
thenNat expr cont
- = NatM $ \st -> case unNat expr st of
+ = NatM $ \st -> case unNat expr st of
(result, st') -> unNat (cont result) st'
returnNat :: a -> NatM a
-returnNat result = NatM $ \st -> (result, st)
+returnNat result
+ = NatM $ \st -> (result, st)
mapAccumLNat :: (acc -> x -> NatM (acc, y))
-> acc
@@ -75,43 +95,64 @@ getUniqueNat = NatM $ \ (NatM_State us delta imports pic dflags) ->
getDynFlagsNat :: NatM DynFlags
-getDynFlagsNat = NatM $ \ (NatM_State us delta imports pic dflags) ->
- (dflags, (NatM_State us delta imports pic dflags))
+getDynFlagsNat
+ = NatM $ \ (NatM_State us delta imports pic dflags) ->
+ (dflags, (NatM_State us delta imports pic dflags))
+
getDeltaNat :: NatM Int
-getDeltaNat = NatM $ \ st -> (natm_delta st, st)
+getDeltaNat
+ = NatM $ \ st -> (natm_delta st, st)
+
setDeltaNat :: Int -> NatM ()
-setDeltaNat delta = NatM $ \ (NatM_State us _ imports pic dflags) ->
- ((), NatM_State us delta imports pic dflags)
+setDeltaNat delta
+ = NatM $ \ (NatM_State us _ imports pic dflags) ->
+ ((), NatM_State us delta imports pic dflags)
+
addImportNat :: CLabel -> NatM ()
-addImportNat imp = NatM $ \ (NatM_State us delta imports pic dflags) ->
- ((), NatM_State us delta (imp:imports) pic dflags)
+addImportNat imp
+ = NatM $ \ (NatM_State us delta imports pic dflags) ->
+ ((), NatM_State us delta (imp:imports) pic dflags)
+
getBlockIdNat :: NatM BlockId
-getBlockIdNat = do u <- getUniqueNat; return (BlockId u)
+getBlockIdNat
+ = do u <- getUniqueNat
+ return (BlockId u)
+
getNewLabelNat :: NatM CLabel
-getNewLabelNat = do u <- getUniqueNat; return (mkAsmTempLabel u)
+getNewLabelNat
+ = do u <- getUniqueNat
+ return (mkAsmTempLabel u)
+
getNewRegNat :: Size -> NatM Reg
-getNewRegNat rep = do u <- getUniqueNat; return (mkVReg u rep)
+getNewRegNat rep
+ = do u <- getUniqueNat
+ return (targetMkVReg u rep)
+
getNewRegPairNat :: Size -> NatM (Reg,Reg)
-getNewRegPairNat rep = do
- u <- getUniqueNat
- let lo = mkVReg u rep; hi = getHiVRegFromLo lo
- return (lo,hi)
+getNewRegPairNat rep
+ = do u <- getUniqueNat
+ let lo = targetMkVReg u rep; hi = getHiVRegFromLo lo
+ return (lo,hi)
+
getPicBaseMaybeNat :: NatM (Maybe Reg)
-getPicBaseMaybeNat = NatM (\state -> (natm_pic state, state))
+getPicBaseMaybeNat
+ = NatM (\state -> (natm_pic state, state))
+
getPicBaseNat :: Size -> NatM Reg
-getPicBaseNat rep = do
- mbPicBase <- getPicBaseMaybeNat
- case mbPicBase of
- Just picBase -> return picBase
- Nothing -> do
- reg <- getNewRegNat rep
- NatM (\state -> (reg, state { natm_pic = Just reg }))
+getPicBaseNat rep
+ = do mbPicBase <- getPicBaseMaybeNat
+ case mbPicBase of
+ Just picBase -> return picBase
+ Nothing
+ -> do
+ reg <- getNewRegNat rep
+ NatM (\state -> (reg, state { natm_pic = Just reg }))
diff --git a/compiler/nativeGen/PositionIndependentCode.hs b/compiler/nativeGen/PIC.hs
index a1e11d8a34..98e4f9f2d5 100644
--- a/compiler/nativeGen/PositionIndependentCode.hs
+++ b/compiler/nativeGen/PIC.hs
@@ -1,24 +1,9 @@
-{-# OPTIONS -w #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
--- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
-
-module PositionIndependentCode (
- cmmMakeDynamicReference,
- ReferenceKind(..),
- needImportedSymbols,
- pprImportedSymbol,
- pprGotDeclaration,
- initializePicBase
- ) where
-
-#include "HsVersions.h"
-
{-
This module handles generation of position independent code and
dynamic-linking related issues for the native code generator.
+
+ This depends both the architecture and OS, so we define it here
+ instead of in one of the architecture specific modules.
Things outside this module which are related to this:
@@ -53,7 +38,30 @@ module PositionIndependentCode (
and ppc-linux).
-}
-#include "nativeGen/NCG.h"
+module PIC (
+ cmmMakeDynamicReference,
+ ReferenceKind(..),
+ needImportedSymbols,
+ pprImportedSymbol,
+ pprGotDeclaration,
+
+ initializePicBase_ppc,
+ initializePicBase_x86
+)
+
+where
+
+import qualified PPC.Instr as PPC
+import qualified PPC.Regs as PPC
+
+import qualified X86.Instr as X86
+
+import Platform
+import Instruction
+import Size
+import Reg
+import NCGMonad
+
import Cmm
import CLabel ( CLabel, pprCLabel,
@@ -61,13 +69,8 @@ import CLabel ( CLabel, pprCLabel,
dynamicLinkerLabelInfo, mkPicBaseLabel,
labelDynamic, externallyVisibleCLabel )
-#if linux_TARGET_OS
import CLabel ( mkForeignLabel )
-#endif
-import Regs
-import Instrs
-import NCGMonad ( NatM, getNewRegNat, getNewLabelNat )
import StaticFlags ( opt_PIC, opt_Static )
import BasicTypes
@@ -80,8 +83,7 @@ import DynFlags
import FastString
--- The most important function here is cmmMakeDynamicReference.
-
+--------------------------------------------------------------------------------
-- It gets called by the cmmToCmm pass for every CmmLabel in the Cmm
-- code. It does The Right Thing(tm) to convert the CmmLabel into a
-- position-independent, dynamic-linking-aware reference to the thing
@@ -94,10 +96,12 @@ import FastString
-- - addImportCmmOpt for the CmmOptM monad
-- - addImportNat for the NatM monad.
-data ReferenceKind = DataReference
- | CallReference
- | JumpReference
- deriving(Eq)
+data ReferenceKind
+ = DataReference
+ | CallReference
+ | JumpReference
+ deriving(Eq)
+
cmmMakeDynamicReference
:: Monad m => DynFlags
@@ -110,80 +114,98 @@ cmmMakeDynamicReference
cmmMakeDynamicReference dflags addImport referenceKind lbl
| Just _ <- dynamicLinkerLabelInfo lbl
= return $ CmmLit $ CmmLabel lbl -- already processed it, pass through
- | otherwise = case howToAccessLabel dflags referenceKind lbl of
+
+ | otherwise
+ = case howToAccessLabel
+ dflags
+ (platformArch $ targetPlatform dflags)
+ (platformOS $ targetPlatform dflags)
+ referenceKind lbl of
+
AccessViaStub -> do
let stub = mkDynamicLinkerLabel CodeStub lbl
addImport stub
return $ CmmLit $ CmmLabel stub
+
AccessViaSymbolPtr -> do
let symbolPtr = mkDynamicLinkerLabel SymbolPtr lbl
addImport symbolPtr
- return $ CmmLoad (cmmMakePicReference symbolPtr) bWord
+ return $ CmmLoad (cmmMakePicReference dflags symbolPtr) bWord
+
AccessDirectly -> case referenceKind of
-- for data, we might have to make some calculations:
- DataReference -> return $ cmmMakePicReference lbl
+ DataReference -> return $ cmmMakePicReference dflags lbl
-- all currently supported processors support
-- PC-relative branch and call instructions,
-- so just jump there if it's a call or a jump
_ -> return $ CmmLit $ CmmLabel lbl
-
--- -------------------------------------------------------------------
-
+
+
+-- -----------------------------------------------------------------------------
-- Create a position independent reference to a label.
-- (but do not bother with dynamic linking).
-- We calculate the label's address by adding some (platform-dependent)
-- offset to our base register; this offset is calculated by
-- the function picRelative in the platform-dependent part below.
-cmmMakePicReference :: CLabel -> CmmExpr
-
-#if !mingw32_TARGET_OS
+cmmMakePicReference :: DynFlags -> CLabel -> CmmExpr
+cmmMakePicReference dflags lbl
+
-- Windows doesn't need PIC,
-- everything gets relocated at runtime
+ | OSMinGW32 <- platformOS $ targetPlatform dflags
+ = CmmLit $ CmmLabel lbl
-cmmMakePicReference lbl
- | (opt_PIC || not opt_Static) && absoluteLabel lbl = CmmMachOp (MO_Add wordWidth) [
- CmmReg (CmmGlobal PicBaseReg),
- CmmLit $ picRelative lbl
- ]
- where
- absoluteLabel lbl = case dynamicLinkerLabelInfo lbl of
- Just (GotSymbolPtr, _) -> False
- Just (GotSymbolOffset, _) -> False
- _ -> True
-#endif
-cmmMakePicReference lbl = CmmLit $ CmmLabel lbl
+ | (opt_PIC || not opt_Static) && absoluteLabel lbl
+ = CmmMachOp (MO_Add wordWidth)
+ [ CmmReg (CmmGlobal PicBaseReg)
+ , CmmLit $ picRelative
+ (platformArch $ targetPlatform dflags)
+ (platformOS $ targetPlatform dflags)
+ lbl ]
--- ===================================================================
--- Platform dependent stuff
--- ===================================================================
+ | otherwise
+ = CmmLit $ CmmLabel lbl
+
+
+absoluteLabel :: CLabel -> Bool
+absoluteLabel lbl
+ = case dynamicLinkerLabelInfo lbl of
+ Just (GotSymbolPtr, _) -> False
+ Just (GotSymbolOffset, _) -> False
+ _ -> True
+
+--------------------------------------------------------------------------------
-- Knowledge about how special dynamic linker labels like symbol
-- pointers, code stubs and GOT offsets look like is located in the
-- module CLabel.
--- -------------------------------------------------------------------
-
-- We have to decide which labels need to be accessed
-- indirectly or via a piece of stub code.
+data LabelAccessStyle
+ = AccessViaStub
+ | AccessViaSymbolPtr
+ | AccessDirectly
-data LabelAccessStyle = AccessViaStub
- | AccessViaSymbolPtr
- | AccessDirectly
+howToAccessLabel
+ :: DynFlags -> Arch -> OS -> ReferenceKind -> CLabel -> LabelAccessStyle
-howToAccessLabel :: DynFlags -> ReferenceKind -> CLabel -> LabelAccessStyle
-
-#if mingw32_TARGET_OS
-- Windows
--
-- We need to use access *exactly* those things that
-- are imported from a DLL via an __imp_* label.
-- There are no stubs for imported code.
+--
+howToAccessLabel dflags _ OSMinGW32 _ lbl
+ | labelDynamic (thisPackage dflags) lbl
+ = AccessViaSymbolPtr
+
+ | otherwise
+ = AccessDirectly
+
-howToAccessLabel dflags _ lbl | labelDynamic (thisPackage dflags) lbl = AccessViaSymbolPtr
- | otherwise = AccessDirectly
-#elif darwin_TARGET_OS
-- Mach-O (Darwin, Mac OS X)
--
-- Indirect access is required in the following cases:
@@ -191,54 +213,48 @@ howToAccessLabel dflags _ lbl | labelDynamic (thisPackage dflags) lbl = AccessVi
-- * (not on x86_64) data from a different module, if we're generating PIC code
-- It is always possible to access something indirectly,
-- even when it's not necessary.
+--
+howToAccessLabel dflags arch OSDarwin DataReference lbl
+ -- data access to a dynamic library goes via a symbol pointer
+ | labelDynamic (thisPackage dflags) lbl
+ = AccessViaSymbolPtr
+
+ -- when generating PIC code, all cross-module data references must
+ -- must go via a symbol pointer, too, because the assembler
+ -- cannot generate code for a label difference where one
+ -- label is undefined. Doesn't apply t x86_64.
+ -- Unfortunately, we don't know whether it's cross-module,
+ -- so we do it for all externally visible labels.
+ -- This is a slight waste of time and space, but otherwise
+ -- we'd need to pass the current Module all the way in to
+ -- this function.
+ | arch /= ArchX86_64
+ , opt_PIC && externallyVisibleCLabel lbl
+ = AccessViaSymbolPtr
+
+ | otherwise
+ = AccessDirectly
+
+howToAccessLabel dflags arch OSDarwin JumpReference lbl
+ -- dyld code stubs don't work for tailcalls because the
+ -- stack alignment is only right for regular calls.
+ -- Therefore, we have to go via a symbol pointer:
+ | arch == ArchX86 || arch == ArchX86_64
+ , labelDynamic (thisPackage dflags) lbl
+ = AccessViaSymbolPtr
+
+
+howToAccessLabel dflags arch OSDarwin _ lbl
+ -- Code stubs are the usual method of choice for imported code;
+ -- not needed on x86_64 because Apple's new linker, ld64, generates
+ -- them automatically.
+ | arch /= ArchX86_64
+ , labelDynamic (thisPackage dflags) lbl
+ = AccessViaStub
+
+ | otherwise
+ = AccessDirectly
-howToAccessLabel dflags DataReference lbl
- -- data access to a dynamic library goes via a symbol pointer
- | labelDynamic (thisPackage dflags) lbl = AccessViaSymbolPtr
-
-#if !x86_64_TARGET_ARCH
- -- when generating PIC code, all cross-module data references must
- -- must go via a symbol pointer, too, because the assembler
- -- cannot generate code for a label difference where one
- -- label is undefined. Doesn't apply t x86_64.
- -- Unfortunately, we don't know whether it's cross-module,
- -- so we do it for all externally visible labels.
- -- This is a slight waste of time and space, but otherwise
- -- we'd need to pass the current Module all the way in to
- -- this function.
- | opt_PIC && externallyVisibleCLabel lbl = AccessViaSymbolPtr
-#endif
- | otherwise = AccessDirectly
-
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
- -- dyld code stubs don't work for tailcalls because the
- -- stack alignment is only right for regular calls.
- -- Therefore, we have to go via a symbol pointer:
-howToAccessLabel dflags JumpReference lbl
- | labelDynamic (thisPackage dflags) lbl
- = AccessViaSymbolPtr
-#endif
-
-howToAccessLabel dflags _ lbl
-#if !x86_64_TARGET_ARCH
- -- Code stubs are the usual method of choice for imported code;
- -- not needed on x86_64 because Apple's new linker, ld64, generates
- -- them automatically.
- | labelDynamic (thisPackage dflags) lbl
- = AccessViaStub
-#endif
- | otherwise
- = AccessDirectly
-
-
-#elif linux_TARGET_OS && powerpc64_TARGET_ARCH
--- ELF PPC64 (powerpc64-linux), AIX, MacOS 9, BeOS/PPC
-
-howToAccessLabel _ DataReference lbl = AccessViaSymbolPtr
-howToAccessLabel _ _ lbl = AccessDirectly -- actually, .label instead of label
-
-#elif linux_TARGET_OS
-- ELF (Linux)
--
-- ELF tries to pretend to the main application code that dynamic linking does
@@ -250,63 +266,82 @@ howToAccessLabel _ _ lbl = AccessDirectly -- actually, .label instead of label
-- from position independent code. It is also required from the main program
-- when dynamic libraries containing Haskell code are used.
-howToAccessLabel _ _ lbl
+howToAccessLabel _ ArchPPC_64 OSLinux kind _
+
+ -- ELF PPC64 (powerpc64-linux), AIX, MacOS 9, BeOS/PPC
+ | DataReference <- kind
+ = AccessViaSymbolPtr
+
+ -- actually, .label instead of label
+ | otherwise
+ = AccessDirectly
+
+howToAccessLabel _ _ OSLinux _ _
-- no PIC -> the dynamic linker does everything for us;
-- if we don't dynamically link to Haskell code,
-- it actually manages to do so without messing thins up.
- | not opt_PIC && opt_Static = AccessDirectly
-
-howToAccessLabel dflags DataReference lbl
- -- A dynamic label needs to be accessed via a symbol pointer.
- | labelDynamic (thisPackage dflags) lbl = AccessViaSymbolPtr
-#if powerpc_TARGET_ARCH
+ | not opt_PIC && opt_Static
+ = AccessDirectly
+
+howToAccessLabel dflags arch OSLinux DataReference lbl
+ -- A dynamic label needs to be accessed via a symbol pointer.
+ | labelDynamic (thisPackage dflags) lbl
+ = AccessViaSymbolPtr
+
-- For PowerPC32 -fPIC, we have to access even static data
-- via a symbol pointer (see below for an explanation why
-- PowerPC32 Linux is especially broken).
- | opt_PIC = AccessViaSymbolPtr
-#endif
- | otherwise = AccessDirectly
-
-
--- In most cases, we have to avoid symbol stubs on ELF, for the following reasons:
--- * on i386, the position-independent symbol stubs in the Procedure Linkage Table
--- require the address of the GOT to be loaded into register %ebx on entry.
--- * The linker will take any reference to the symbol stub as a hint that
--- the label in question is a code label. When linking executables, this
--- will cause the linker to replace even data references to the label with
--- references to the symbol stub.
-
--- This leaves calling a (foreign) function from non-PIC code
--- (AccessDirectly, because we get an implicit symbol stub)
--- and calling functions from PIC code on non-i386 platforms (via a symbol stub)
-
-howToAccessLabel dflags CallReference lbl
- | labelDynamic (thisPackage dflags) lbl && not opt_PIC
- = AccessDirectly
-#if !i386_TARGET_ARCH
- | labelDynamic (thisPackage dflags) lbl && opt_PIC
- = AccessViaStub
-#endif
-
-howToAccessLabel dflags _ lbl
- | labelDynamic (thisPackage dflags) lbl = AccessViaSymbolPtr
- | otherwise = AccessDirectly
-#else
---
+ | arch == ArchPPC
+ , opt_PIC
+ = AccessViaSymbolPtr
+
+ | otherwise
+ = AccessDirectly
+
+
+ -- In most cases, we have to avoid symbol stubs on ELF, for the following reasons:
+ -- * on i386, the position-independent symbol stubs in the Procedure Linkage Table
+ -- require the address of the GOT to be loaded into register %ebx on entry.
+ -- * The linker will take any reference to the symbol stub as a hint that
+ -- the label in question is a code label. When linking executables, this
+ -- will cause the linker to replace even data references to the label with
+ -- references to the symbol stub.
+
+ -- This leaves calling a (foreign) function from non-PIC code
+ -- (AccessDirectly, because we get an implicit symbol stub)
+ -- and calling functions from PIC code on non-i386 platforms (via a symbol stub)
+
+howToAccessLabel dflags arch OSLinux CallReference lbl
+ | labelDynamic (thisPackage dflags) lbl && not opt_PIC
+ = AccessDirectly
+
+ | arch /= ArchX86
+ , labelDynamic (thisPackage dflags) lbl && opt_PIC
+ = AccessViaStub
+
+howToAccessLabel dflags _ OSLinux _ lbl
+ | labelDynamic (thisPackage dflags) lbl
+ = AccessViaSymbolPtr
+
+ | otherwise
+ = AccessDirectly
+
-- all other platforms
---
-howToAccessLabel _ _ _
- | not opt_PIC = AccessDirectly
- | otherwise = panic "howToAccessLabel: PIC not defined for this platform"
-#endif
+howToAccessLabel _ _ _ _ _
+ | not opt_PIC
+ = AccessDirectly
+
+ | otherwise
+ = panic "howToAccessLabel: PIC not defined for this platform"
+
+
-- -------------------------------------------------------------------
+-- | Says what we we have to add to our 'PIC base register' in order to
+-- get the address of a label.
--- What do we have to add to our 'PIC base register' in order to
--- get the address of a label?
+picRelative :: Arch -> OS -> CLabel -> CmmLit
-picRelative :: CLabel -> CmmLit
-#if darwin_TARGET_OS && !x86_64_TARGET_ARCH
-- Darwin, but not x86_64:
-- The PIC base register points to the PIC base label at the beginning
-- of the current CmmTop. We just have to use a label difference to
@@ -314,21 +349,21 @@ picRelative :: CLabel -> CmmLit
-- We have already made sure that all labels that are not from the current
-- module are accessed indirectly ('as' can't calculate differences between
-- undefined labels).
+picRelative arch OSDarwin lbl
+ | arch /= ArchX86_64
+ = CmmLabelDiffOff lbl mkPicBaseLabel 0
+
-picRelative lbl
- = CmmLabelDiffOff lbl mkPicBaseLabel 0
-
-#elif powerpc_TARGET_ARCH && linux_TARGET_OS
-- PowerPC Linux:
-- The PIC base register points to our fake GOT. Use a label difference
-- to get the offset.
-- We have made sure that *everything* is accessed indirectly, so this
-- is only used for offsets from the GOT to symbol pointers inside the
-- GOT.
-picRelative lbl
- = CmmLabelDiffOff lbl gotLabel 0
+picRelative ArchPPC OSLinux lbl
+ = CmmLabelDiffOff lbl gotLabel 0
+
-#elif linux_TARGET_OS || (darwin_TARGET_OS && x86_64_TARGET_ARCH)
-- Most Linux versions:
-- The PIC base register points to the GOT. Use foo@got for symbol
-- pointers, and foo@gotoff for everything else.
@@ -336,61 +371,116 @@ picRelative lbl
-- The PIC base register is %rip, we use foo@gotpcrel for symbol pointers,
-- and a GotSymbolOffset label for other things.
-- For reasons of tradition, the symbol offset label is written as a plain label.
+picRelative arch os lbl
+ | os == OSLinux || (os == OSDarwin && arch == ArchX86_64)
+ = let result
+ | Just (SymbolPtr, lbl') <- dynamicLinkerLabelInfo lbl
+ = CmmLabel $ mkDynamicLinkerLabel GotSymbolPtr lbl'
-picRelative lbl
- | Just (SymbolPtr, lbl') <- dynamicLinkerLabelInfo lbl
- = CmmLabel $ mkDynamicLinkerLabel GotSymbolPtr lbl'
- | otherwise
- = CmmLabel $ mkDynamicLinkerLabel GotSymbolOffset lbl
+ | otherwise
+ = CmmLabel $ mkDynamicLinkerLabel GotSymbolOffset lbl
+
+ in result
+
+picRelative _ _ _
+ = panic "PositionIndependentCode.picRelative undefined for this platform"
-#else
-picRelative lbl = panic "PositionIndependentCode.picRelative"
-#endif
--- -------------------------------------------------------------------
--- What do we have to add to every assembly file we generate?
+--------------------------------------------------------------------------------
-- utility function for pretty-printing asm-labels,
-- copied from PprMach
-asmSDoc d = Outputable.withPprStyleDoc (
- Outputable.mkCodeStyle Outputable.AsmStyle) d
-pprCLabel_asm l = asmSDoc (pprCLabel l)
+--
+asmSDoc :: Outputable.SDoc -> Doc
+asmSDoc d
+ = Outputable.withPprStyleDoc
+ (Outputable.mkCodeStyle Outputable.AsmStyle) d
+
+pprCLabel_asm :: CLabel -> Doc
+pprCLabel_asm l
+ = asmSDoc (pprCLabel l)
+
+
+needImportedSymbols :: Arch -> OS -> Bool
+needImportedSymbols arch os
+ | os == OSDarwin
+ , arch /= ArchX86_64
+ = True
+
+ -- PowerPC Linux: -fPIC or -dynamic
+ | os == OSLinux
+ , arch == ArchPPC
+ = opt_PIC || not opt_Static
+
+ -- i386 (and others?): -dynamic but not -fPIC
+ | os == OSLinux
+ , arch /= ArchPPC_64
+ = not opt_Static && not opt_PIC
+
+ | otherwise
+ = False
+-- gotLabel
+-- The label used to refer to our "fake GOT" from
+-- position-independent code.
+gotLabel :: CLabel
+gotLabel
+ = mkForeignLabel -- HACK: it's not really foreign
+ (fsLit ".LCTOC1") Nothing False IsData
-#if darwin_TARGET_OS && !x86_64_TARGET_ARCH
-needImportedSymbols = True
+--------------------------------------------------------------------------------
-- We don't need to declare any offset tables.
-- However, for PIC on x86, we need a small helper function.
-#if i386_TARGET_ARCH
-pprGotDeclaration
- | opt_PIC
- = vcat [
- ptext (sLit ".section __TEXT,__textcoal_nt,coalesced,no_toc"),
- ptext (sLit ".weak_definition ___i686.get_pc_thunk.ax"),
- ptext (sLit ".private_extern ___i686.get_pc_thunk.ax"),
- ptext (sLit "___i686.get_pc_thunk.ax:"),
- ptext (sLit "\tmovl (%esp), %eax"),
- ptext (sLit "\tret")
- ]
- | otherwise = Pretty.empty
-#else
-pprGotDeclaration = Pretty.empty
-#endif
+pprGotDeclaration :: Arch -> OS -> Doc
+pprGotDeclaration ArchX86 OSDarwin
+ | opt_PIC
+ = vcat [
+ ptext (sLit ".section __TEXT,__textcoal_nt,coalesced,no_toc"),
+ ptext (sLit ".weak_definition ___i686.get_pc_thunk.ax"),
+ ptext (sLit ".private_extern ___i686.get_pc_thunk.ax"),
+ ptext (sLit "___i686.get_pc_thunk.ax:"),
+ ptext (sLit "\tmovl (%esp), %eax"),
+ ptext (sLit "\tret") ]
+
+ | otherwise
+ = empty
+
+
+-- pprGotDeclaration
+-- Output whatever needs to be output once per .s file.
+-- The .LCTOC1 label is defined to point 32768 bytes into the table,
+-- to make the most of the PPC's 16-bit displacements.
+-- Only needed for PIC.
+pprGotDeclaration arch OSLinux
+ | arch /= ArchPPC_64
+ , not opt_PIC
+ = Pretty.empty
+
+ | arch /= ArchPPC_64
+ = vcat [
+ ptext (sLit ".section \".got2\",\"aw\""),
+ ptext (sLit ".LCTOC1 = .+32768") ]
+
+pprGotDeclaration _ _
+ = panic "pprGotDeclaration: no match"
+
+--------------------------------------------------------------------------------
-- On Darwin, we have to generate our own stub code for lazy binding..
-- For each processor architecture, there are two versions, one for PIC
-- and one for non-PIC.
--
-- Whenever you change something in this assembler output, make sure
-- the splitter in driver/split/ghc-split.lprl recognizes the new output
-pprImportedSymbol importedLbl
-#if powerpc_TARGET_ARCH
- | Just (CodeStub, lbl) <- dynamicLinkerLabelInfo importedLbl
- = case opt_PIC of
- False ->
+
+pprImportedSymbol :: Arch -> OS -> CLabel -> Doc
+pprImportedSymbol ArchPPC OSDarwin importedLbl
+ | Just (CodeStub, lbl) <- dynamicLinkerLabelInfo importedLbl
+ = case opt_PIC of
+ False ->
vcat [
ptext (sLit ".symbol_stub"),
ptext (sLit "L") <> pprCLabel_asm lbl <> ptext (sLit "$stub:"),
@@ -404,7 +494,7 @@ pprImportedSymbol importedLbl
<> ptext (sLit "$lazy_ptr)"),
ptext (sLit "\tbctr")
]
- True ->
+ True ->
vcat [
ptext (sLit ".section __TEXT,__picsymbolstub1,")
<> ptext (sLit "symbol_stubs,pure_instructions,32"),
@@ -424,16 +514,27 @@ pprImportedSymbol importedLbl
ptext (sLit "\tmtctr r12"),
ptext (sLit "\tbctr")
]
- $+$ vcat [
- ptext (sLit ".lazy_symbol_pointer"),
- ptext (sLit "L") <> pprCLabel_asm lbl <> ptext (sLit "$lazy_ptr:"),
- ptext (sLit "\t.indirect_symbol") <+> pprCLabel_asm lbl,
- ptext (sLit "\t.long dyld_stub_binding_helper")
- ]
-#elif i386_TARGET_ARCH
- | Just (CodeStub, lbl) <- dynamicLinkerLabelInfo importedLbl
- = case opt_PIC of
- False ->
+ $+$ vcat [
+ ptext (sLit ".lazy_symbol_pointer"),
+ ptext (sLit "L") <> pprCLabel_asm lbl <> ptext (sLit "$lazy_ptr:"),
+ ptext (sLit "\t.indirect_symbol") <+> pprCLabel_asm lbl,
+ ptext (sLit "\t.long dyld_stub_binding_helper")]
+
+ | Just (SymbolPtr, lbl) <- dynamicLinkerLabelInfo importedLbl
+ = vcat [
+ ptext (sLit ".non_lazy_symbol_pointer"),
+ char 'L' <> pprCLabel_asm lbl <> ptext (sLit "$non_lazy_ptr:"),
+ ptext (sLit "\t.indirect_symbol") <+> pprCLabel_asm lbl,
+ ptext (sLit "\t.long\t0")]
+
+ | otherwise
+ = empty
+
+
+pprImportedSymbol ArchX86 OSDarwin importedLbl
+ | Just (CodeStub, lbl) <- dynamicLinkerLabelInfo importedLbl
+ = case opt_PIC of
+ False ->
vcat [
ptext (sLit ".symbol_stub"),
ptext (sLit "L") <> pprCLabel_asm lbl <> ptext (sLit "$stub:"),
@@ -446,7 +547,7 @@ pprImportedSymbol importedLbl
<> ptext (sLit "$lazy_ptr"),
ptext (sLit "\tjmp dyld_stub_binding_helper")
]
- True ->
+ True ->
vcat [
ptext (sLit ".section __TEXT,__picsymbolstub2,")
<> ptext (sLit "symbol_stubs,pure_instructions,25"),
@@ -464,27 +565,28 @@ pprImportedSymbol importedLbl
ptext (sLit "\tpushl %eax"),
ptext (sLit "\tjmp dyld_stub_binding_helper")
]
- $+$ vcat [ ptext (sLit ".section __DATA, __la_sym_ptr")
+ $+$ vcat [ ptext (sLit ".section __DATA, __la_sym_ptr")
<> (if opt_PIC then int 2 else int 3)
<> ptext (sLit ",lazy_symbol_pointers"),
- ptext (sLit "L") <> pprCLabel_asm lbl <> ptext (sLit "$lazy_ptr:"),
- ptext (sLit "\t.indirect_symbol") <+> pprCLabel_asm lbl,
- ptext (sLit "\t.long L") <> pprCLabel_asm lbl
- <> ptext (sLit "$stub_binder")
- ]
-#endif
--- We also have to declare our symbol pointers ourselves:
- | Just (SymbolPtr, lbl) <- dynamicLinkerLabelInfo importedLbl
- = vcat [
- ptext (sLit ".non_lazy_symbol_pointer"),
- char 'L' <> pprCLabel_asm lbl <> ptext (sLit "$non_lazy_ptr:"),
- ptext (sLit "\t.indirect_symbol") <+> pprCLabel_asm lbl,
- ptext (sLit "\t.long\t0")
- ]
-
- | otherwise = empty
-
-#elif linux_TARGET_OS && !powerpc64_TARGET_ARCH
+ ptext (sLit "L") <> pprCLabel_asm lbl <> ptext (sLit "$lazy_ptr:"),
+ ptext (sLit "\t.indirect_symbol") <+> pprCLabel_asm lbl,
+ ptext (sLit "\t.long L") <> pprCLabel_asm lbl
+ <> ptext (sLit "$stub_binder")]
+
+ | Just (SymbolPtr, lbl) <- dynamicLinkerLabelInfo importedLbl
+ = vcat [
+ ptext (sLit ".non_lazy_symbol_pointer"),
+ char 'L' <> pprCLabel_asm lbl <> ptext (sLit "$non_lazy_ptr:"),
+ ptext (sLit "\t.indirect_symbol") <+> pprCLabel_asm lbl,
+ ptext (sLit "\t.long\t0")]
+
+ | otherwise
+ = empty
+
+
+pprImportedSymbol _ OSDarwin _
+ = empty
+
-- ELF / Linux
--
@@ -514,64 +616,29 @@ pprImportedSymbol importedLbl
-- When needImportedSymbols is defined,
-- the NCG will keep track of all DynamicLinkerLabels it uses
-- and output each of them using pprImportedSymbol.
-#if powerpc_TARGET_ARCH
- -- PowerPC Linux: -fPIC or -dynamic
-needImportedSymbols = opt_PIC || not opt_Static
-#else
- -- i386 (and others?): -dynamic but not -fPIC
-needImportedSymbols = not opt_Static && not opt_PIC
-#endif
--- gotLabel
--- The label used to refer to our "fake GOT" from
--- position-independent code.
-gotLabel = mkForeignLabel -- HACK: it's not really foreign
- (fsLit ".LCTOC1") Nothing False IsData
+pprImportedSymbol ArchPPC_64 OSLinux _
+ = empty
--- pprGotDeclaration
--- Output whatever needs to be output once per .s file.
--- The .LCTOC1 label is defined to point 32768 bytes into the table,
--- to make the most of the PPC's 16-bit displacements.
--- Only needed for PIC.
-
-pprGotDeclaration
- | not opt_PIC = Pretty.empty
- | otherwise = vcat [
- ptext (sLit ".section \".got2\",\"aw\""),
- ptext (sLit ".LCTOC1 = .+32768")
- ]
-
--- We generate one .long/.quad literal for every symbol we import;
--- the dynamic linker will relocate those addresses.
-
-pprImportedSymbol importedLbl
- | Just (SymbolPtr, lbl) <- dynamicLinkerLabelInfo importedLbl
- = vcat [
- ptext (sLit ".section \".got2\", \"aw\""),
- ptext (sLit ".LC_") <> pprCLabel_asm lbl <> char ':',
- ptext symbolSize <+> pprCLabel_asm lbl
- ]
-
--- PLT code stubs are generated automatically by the dynamic linker.
- | otherwise = empty
- where
- symbolSize = case wordWidth of
+pprImportedSymbol _ OSLinux importedLbl
+ | Just (SymbolPtr, lbl) <- dynamicLinkerLabelInfo importedLbl
+ = let symbolSize = case wordWidth of
W32 -> sLit "\t.long"
W64 -> sLit "\t.quad"
_ -> panic "Unknown wordRep in pprImportedSymbol"
-#else
-
--- For all other currently supported platforms, we don't need to do
--- anything at all.
+ in vcat [
+ ptext (sLit ".section \".got2\", \"aw\""),
+ ptext (sLit ".LC_") <> pprCLabel_asm lbl <> char ':',
+ ptext symbolSize <+> pprCLabel_asm lbl ]
-needImportedSymbols = False
-pprGotDeclaration = Pretty.empty
-pprImportedSymbol _ = empty
-#endif
+ -- PLT code stubs are generated automatically by the dynamic linker.
+ | otherwise = empty
--- -------------------------------------------------------------------
+pprImportedSymbol _ _ _
+ = panic "PIC.pprImportedSymbol: no match"
+--------------------------------------------------------------------------------
-- Generate code to calculate the address that should be put in the
-- PIC base register.
-- This is called by MachCodeGen for every CmmProc that accessed the
@@ -581,10 +648,6 @@ pprImportedSymbol _ = empty
-- It is assumed that the first NatCmmTop in the input list is a Proc
-- and the rest are CmmDatas.
-initializePicBase :: Reg -> [NatCmmTop] -> NatM [NatCmmTop]
-
-#if darwin_TARGET_OS
-
-- Darwin is simple: just fetch the address of a local label.
-- The FETCHPC pseudo-instruction is expanded to multiple instructions
-- during pretty-printing so that we don't have to deal with the
@@ -598,12 +661,7 @@ initializePicBase :: Reg -> [NatCmmTop] -> NatM [NatCmmTop]
-- call 1f
-- 1: popl %picReg
-initializePicBase picReg (CmmProc info lab params (ListGraph blocks) : statics)
- = return (CmmProc info lab params (ListGraph (b':tail blocks)) : statics)
- where BasicBlock bID insns = head blocks
- b' = BasicBlock bID (FETCHPC picReg : insns)
-#elif powerpc_TARGET_ARCH && linux_TARGET_OS
-- Get a pointer to our own fake GOT, which is defined on a per-module basis.
-- This is exactly how GCC does it, and it's quite horrible:
@@ -612,7 +670,13 @@ initializePicBase picReg (CmmProc info lab params (ListGraph blocks) : statics)
-- define in .text space right next to the proc. This .long literal contains
-- the (32-bit) offset from our local label to our global offset table
-- (.LCTOC1 aka gotOffLabel).
-initializePicBase picReg
+
+initializePicBase_ppc
+ :: Arch -> OS -> Reg
+ -> [NatCmmTop PPC.Instr]
+ -> NatM [NatCmmTop PPC.Instr]
+
+initializePicBase_ppc ArchPPC OSLinux picReg
(CmmProc info lab params (ListGraph blocks) : statics)
= do
gotOffLabel <- getNewLabelNat
@@ -624,16 +688,33 @@ initializePicBase picReg
mkPicBaseLabel
0)
]
- offsetToOffset = ImmConstantDiff (ImmCLbl gotOffLabel)
- (ImmCLbl mkPicBaseLabel)
- BasicBlock bID insns = head blocks
- b' = BasicBlock bID (FETCHPC picReg
- : LD wordSize tmp
- (AddrRegImm picReg offsetToOffset)
- : ADD picReg picReg (RIReg tmp)
+ offsetToOffset
+ = PPC.ImmConstantDiff
+ (PPC.ImmCLbl gotOffLabel)
+ (PPC.ImmCLbl mkPicBaseLabel)
+
+ BasicBlock bID insns
+ = head blocks
+
+ b' = BasicBlock bID (PPC.FETCHPC picReg
+ : PPC.LD PPC.archWordSize tmp
+ (PPC.AddrRegImm picReg offsetToOffset)
+ : PPC.ADD picReg picReg (PPC.RIReg tmp)
: insns)
+
return (CmmProc info lab params (ListGraph (b' : tail blocks)) : gotOffset : statics)
-#elif i386_TARGET_ARCH && linux_TARGET_OS
+
+initializePicBase_ppc ArchPPC OSDarwin picReg
+ (CmmProc info lab params (ListGraph blocks) : statics)
+ = return (CmmProc info lab params (ListGraph (b':tail blocks)) : statics)
+
+ where BasicBlock bID insns = head blocks
+ b' = BasicBlock bID (PPC.FETCHPC picReg : insns)
+
+
+initializePicBase_ppc _ _ _ _
+ = panic "initializePicBase_ppc: not needed"
+
-- We cheat a bit here by defining a pseudo-instruction named FETCHGOT
-- which pretty-prints as:
@@ -642,13 +723,24 @@ initializePicBase picReg
-- addl __GLOBAL_OFFSET_TABLE__+.-1b, %picReg
-- (See PprMach.lhs)
-initializePicBase picReg (CmmProc info lab params (ListGraph blocks) : statics)
+initializePicBase_x86
+ :: Arch -> OS -> Reg
+ -> [NatCmmTop X86.Instr]
+ -> NatM [NatCmmTop X86.Instr]
+
+initializePicBase_x86 ArchX86 OSLinux picReg
+ (CmmProc info lab params (ListGraph blocks) : statics)
= return (CmmProc info lab params (ListGraph (b':tail blocks)) : statics)
where BasicBlock bID insns = head blocks
- b' = BasicBlock bID (FETCHGOT picReg : insns)
+ b' = BasicBlock bID (X86.FETCHGOT picReg : insns)
+
+initializePicBase_x86 ArchX86 OSDarwin picReg
+ (CmmProc info lab params (ListGraph blocks) : statics)
+ = return (CmmProc info lab params (ListGraph (b':tail blocks)) : statics)
+
+ where BasicBlock bID insns = head blocks
+ b' = BasicBlock bID (X86.FETCHPC picReg : insns)
-#else
-initializePicBase picReg proc = panic "initializePicBase"
+initializePicBase_x86 _ _ _ _
+ = panic "initializePicBase_x86: not needed"
--- mingw32_TARGET_OS: not needed, won't be called
-#endif
diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs
new file mode 100644
index 0000000000..6661a3ec92
--- /dev/null
+++ b/compiler/nativeGen/PPC/CodeGen.hs
@@ -0,0 +1,1364 @@
+{-# OPTIONS -w #-}
+
+-----------------------------------------------------------------------------
+--
+-- Generating machine code (instruction selection)
+--
+-- (c) The University of Glasgow 1996-2004
+--
+-----------------------------------------------------------------------------
+
+-- This is a big module, but, if you pay attention to
+-- (a) the sectioning, (b) the type signatures, and
+-- (c) the #if blah_TARGET_ARCH} things, the
+-- structure should not be too overwhelming.
+
+module PPC.CodeGen (
+ cmmTopCodeGen,
+ InstrBlock
+)
+
+where
+
+#include "HsVersions.h"
+#include "nativeGen/NCG.h"
+#include "MachDeps.h"
+
+-- NCG stuff:
+import PPC.Instr
+import PPC.Cond
+import PPC.Regs
+import PPC.RegInfo
+import NCGMonad
+import Instruction
+import PIC
+import Size
+import RegClass
+import Reg
+import Platform
+
+-- Our intermediate code:
+import BlockId
+import PprCmm ( pprExpr )
+import Cmm
+import CLabel
+
+-- The rest:
+import StaticFlags ( opt_PIC )
+import OrdList
+import qualified Outputable as O
+import Outputable
+import DynFlags
+
+import Control.Monad ( mapAndUnzipM )
+import Data.Bits
+import Data.Int
+import Data.Word
+
+-- -----------------------------------------------------------------------------
+-- Top-level of the instruction selector
+
+-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
+-- They are really trees of insns to facilitate fast appending, where a
+-- left-to-right traversal (pre-order?) yields the insns in the correct
+-- order.
+
+cmmTopCodeGen
+ :: DynFlags
+ -> RawCmmTop
+ -> NatM [NatCmmTop Instr]
+
+cmmTopCodeGen dflags (CmmProc info lab params (ListGraph blocks)) = do
+ (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
+ picBaseMb <- getPicBaseMaybeNat
+ let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
+ tops = proc : concat statics
+ os = platformOS $ targetPlatform dflags
+ case picBaseMb of
+ Just picBase -> initializePicBase_ppc ArchPPC os picBase tops
+ Nothing -> return tops
+
+cmmTopCodeGen dflags (CmmData sec dat) = do
+ return [CmmData sec dat] -- no translation, we just use CmmStatic
+
+basicBlockCodeGen
+ :: CmmBasicBlock
+ -> NatM ( [NatBasicBlock Instr]
+ , [NatCmmTop Instr])
+
+basicBlockCodeGen (BasicBlock id stmts) = do
+ instrs <- stmtsToInstrs stmts
+ -- code generation may introduce new basic block boundaries, which
+ -- are indicated by the NEWBLOCK instruction. We must split up the
+ -- instruction stream into basic blocks again. Also, we extract
+ -- LDATAs here too.
+ let
+ (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
+
+ mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
+ = ([], BasicBlock id instrs : blocks, statics)
+ mkBlocks (LDATA sec dat) (instrs,blocks,statics)
+ = (instrs, blocks, CmmData sec dat:statics)
+ mkBlocks instr (instrs,blocks,statics)
+ = (instr:instrs, blocks, statics)
+ -- in
+ return (BasicBlock id top : other_blocks, statics)
+
+stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
+stmtsToInstrs stmts
+ = do instrss <- mapM stmtToInstrs stmts
+ return (concatOL instrss)
+
+stmtToInstrs :: CmmStmt -> NatM InstrBlock
+stmtToInstrs stmt = case stmt of
+ CmmNop -> return nilOL
+ CmmComment s -> return (unitOL (COMMENT s))
+
+ CmmAssign reg src
+ | isFloatType ty -> assignReg_FltCode size reg src
+#if WORD_SIZE_IN_BITS==32
+ | isWord64 ty -> assignReg_I64Code reg src
+#endif
+ | otherwise -> assignReg_IntCode size reg src
+ where ty = cmmRegType reg
+ size = cmmTypeSize ty
+
+ CmmStore addr src
+ | isFloatType ty -> assignMem_FltCode size addr src
+#if WORD_SIZE_IN_BITS==32
+ | isWord64 ty -> assignMem_I64Code addr src
+#endif
+ | otherwise -> assignMem_IntCode size addr src
+ where ty = cmmExprType src
+ size = cmmTypeSize ty
+
+ CmmCall target result_regs args _ _
+ -> genCCall target result_regs args
+
+ CmmBranch id -> genBranch id
+ CmmCondBranch arg id -> genCondJump id arg
+ CmmSwitch arg ids -> genSwitch arg ids
+ CmmJump arg params -> genJump arg
+ CmmReturn params ->
+ panic "stmtToInstrs: return statement should have been cps'd away"
+
+
+--------------------------------------------------------------------------------
+-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
+-- They are really trees of insns to facilitate fast appending, where a
+-- left-to-right traversal yields the insns in the correct order.
+--
+type InstrBlock
+ = OrdList Instr
+
+
+-- | Register's passed up the tree. If the stix code forces the register
+-- to live in a pre-decided machine register, it comes out as @Fixed@;
+-- otherwise, it comes out as @Any@, and the parent can decide which
+-- register to put it in.
+--
+data Register
+ = Fixed Size Reg InstrBlock
+ | Any Size (Reg -> InstrBlock)
+
+
+swizzleRegisterRep :: Register -> Size -> Register
+swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
+swizzleRegisterRep (Any _ codefn) size = Any size codefn
+
+
+-- | Grab the Reg for a CmmReg
+getRegisterReg :: CmmReg -> Reg
+
+getRegisterReg (CmmLocal (LocalReg u pk))
+ = mkVReg u (cmmTypeSize pk)
+
+getRegisterReg (CmmGlobal mid)
+ = case get_GlobalReg_reg_or_addr mid of
+ Left (RealReg rrno) -> RealReg rrno
+ _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
+ -- By this stage, the only MagicIds remaining should be the
+ -- ones which map to a real machine register on this
+ -- platform. Hence ...
+
+
+{-
+Now, given a tree (the argument to an CmmLoad) that references memory,
+produce a suitable addressing mode.
+
+A Rule of the Game (tm) for Amodes: use of the addr bit must
+immediately follow use of the code part, since the code part puts
+values in registers which the addr then refers to. So you can't put
+anything in between, lest it overwrite some of those registers. If
+you need to do some other computation between the code part and use of
+the addr bit, first store the effective address from the amode in a
+temporary, then do the other computation, and then use the temporary:
+
+ code
+ LEA amode, tmp
+ ... other computation ...
+ ... (tmp) ...
+-}
+
+
+-- | Check whether an integer will fit in 32 bits.
+-- A CmmInt is intended to be truncated to the appropriate
+-- number of bits, so here we truncate it to Int64. This is
+-- important because e.g. -1 as a CmmInt might be either
+-- -1 or 18446744073709551615.
+--
+is32BitInteger :: Integer -> Bool
+is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
+ where i64 = fromIntegral i :: Int64
+
+
+-- | Convert a BlockId to some CmmStatic data
+jumpTableEntry :: Maybe BlockId -> CmmStatic
+jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
+jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
+ where blockLabel = mkAsmTempLabel id
+
+
+
+-- -----------------------------------------------------------------------------
+-- General things for putting together code sequences
+
+-- Expand CmmRegOff. ToDo: should we do it this way around, or convert
+-- CmmExprs into CmmRegOff?
+mangleIndexTree :: CmmExpr -> CmmExpr
+mangleIndexTree (CmmRegOff reg off)
+ = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
+ where width = typeWidth (cmmRegType reg)
+
+mangleIndexTree _
+ = panic "PPC.CodeGen.mangleIndexTree: no match"
+
+-- -----------------------------------------------------------------------------
+-- Code gen for 64-bit arithmetic on 32-bit platforms
+
+{-
+Simple support for generating 64-bit code (ie, 64 bit values and 64
+bit assignments) on 32-bit platforms. Unlike the main code generator
+we merely shoot for generating working code as simply as possible, and
+pay little attention to code quality. Specifically, there is no
+attempt to deal cleverly with the fixed-vs-floating register
+distinction; all values are generated into (pairs of) floating
+registers, even if this would mean some redundant reg-reg moves as a
+result. Only one of the VRegUniques is returned, since it will be
+of the VRegUniqueLo form, and the upper-half VReg can be determined
+by applying getHiVRegFromLo to it.
+-}
+
+data ChildCode64 -- a.k.a "Register64"
+ = ChildCode64
+ InstrBlock -- code
+ Reg -- the lower 32-bit temporary which contains the
+ -- result; use getHiVRegFromLo to find the other
+ -- VRegUnique. Rules of this simplified insn
+ -- selection game are therefore that the returned
+ -- Reg may be modified
+
+
+-- | The dual to getAnyReg: compute an expression into a register, but
+-- we don't mind which one it is.
+getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
+getSomeReg expr = do
+ r <- getRegister expr
+ case r of
+ Any rep code -> do
+ tmp <- getNewRegNat rep
+ return (tmp, code tmp)
+ Fixed _ reg code ->
+ return (reg, code)
+
+getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
+getI64Amodes addrTree = do
+ Amode hi_addr addr_code <- getAmode addrTree
+ case addrOffset hi_addr 4 of
+ Just lo_addr -> return (hi_addr, lo_addr, addr_code)
+ Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
+ return (AddrRegImm hi_ptr (ImmInt 0),
+ AddrRegImm hi_ptr (ImmInt 4),
+ code)
+
+
+assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
+assignMem_I64Code addrTree valueTree = do
+ (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
+ ChildCode64 vcode rlo <- iselExpr64 valueTree
+ let
+ rhi = getHiVRegFromLo rlo
+
+ -- Big-endian store
+ mov_hi = ST II32 rhi hi_addr
+ mov_lo = ST II32 rlo lo_addr
+ -- in
+ return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
+
+
+assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
+assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
+ ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
+ let
+ r_dst_lo = mkVReg u_dst II32
+ r_dst_hi = getHiVRegFromLo r_dst_lo
+ r_src_hi = getHiVRegFromLo r_src_lo
+ mov_lo = MR r_dst_lo r_src_lo
+ mov_hi = MR r_dst_hi r_src_hi
+ -- in
+ return (
+ vcode `snocOL` mov_lo `snocOL` mov_hi
+ )
+
+assignReg_I64Code lvalue valueTree
+ = panic "assignReg_I64Code(powerpc): invalid lvalue"
+
+
+iselExpr64 :: CmmExpr -> NatM ChildCode64
+iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
+ (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
+ (rlo, rhi) <- getNewRegPairNat II32
+ let mov_hi = LD II32 rhi hi_addr
+ mov_lo = LD II32 rlo lo_addr
+ return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
+ rlo
+
+iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
+ = return (ChildCode64 nilOL (mkVReg vu II32))
+
+iselExpr64 (CmmLit (CmmInt i _)) = do
+ (rlo,rhi) <- getNewRegPairNat II32
+ let
+ half0 = fromIntegral (fromIntegral i :: Word16)
+ half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
+ half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
+ half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
+
+ code = toOL [
+ LIS rlo (ImmInt half1),
+ OR rlo rlo (RIImm $ ImmInt half0),
+ LIS rhi (ImmInt half3),
+ OR rlo rlo (RIImm $ ImmInt half2)
+ ]
+ -- in
+ return (ChildCode64 code rlo)
+
+iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
+ ChildCode64 code1 r1lo <- iselExpr64 e1
+ ChildCode64 code2 r2lo <- iselExpr64 e2
+ (rlo,rhi) <- getNewRegPairNat II32
+ let
+ r1hi = getHiVRegFromLo r1lo
+ r2hi = getHiVRegFromLo r2lo
+ code = code1 `appOL`
+ code2 `appOL`
+ toOL [ ADDC rlo r1lo r2lo,
+ ADDE rhi r1hi r2hi ]
+ -- in
+ return (ChildCode64 code rlo)
+
+iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
+ (expr_reg,expr_code) <- getSomeReg expr
+ (rlo, rhi) <- getNewRegPairNat II32
+ let mov_hi = LI rhi (ImmInt 0)
+ mov_lo = MR rlo expr_reg
+ return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
+ rlo
+iselExpr64 expr
+ = pprPanic "iselExpr64(powerpc)" (ppr expr)
+
+
+
+getRegister :: CmmExpr -> NatM Register
+
+getRegister (CmmReg reg)
+ = return (Fixed (cmmTypeSize (cmmRegType reg))
+ (getRegisterReg reg) nilOL)
+
+getRegister tree@(CmmRegOff _ _)
+ = getRegister (mangleIndexTree tree)
+
+
+#if WORD_SIZE_IN_BITS==32
+ -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
+ -- TO_W_(x), TO_W_(x >> 32)
+
+getRegister (CmmMachOp (MO_UU_Conv W64 W32)
+ [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
+ ChildCode64 code rlo <- iselExpr64 x
+ return $ Fixed II32 (getHiVRegFromLo rlo) code
+
+getRegister (CmmMachOp (MO_SS_Conv W64 W32)
+ [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
+ ChildCode64 code rlo <- iselExpr64 x
+ return $ Fixed II32 (getHiVRegFromLo rlo) code
+
+getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
+ ChildCode64 code rlo <- iselExpr64 x
+ return $ Fixed II32 rlo code
+
+getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
+ ChildCode64 code rlo <- iselExpr64 x
+ return $ Fixed II32 rlo code
+
+#endif
+
+
+getRegister (CmmLoad mem pk)
+ | not (isWord64 pk)
+ = do
+ Amode addr addr_code <- getAmode mem
+ let code dst = ASSERT((regClass dst == RcDouble) == isFloatType pk)
+ addr_code `snocOL` LD size dst addr
+ return (Any size code)
+ where size = cmmTypeSize pk
+
+-- catch simple cases of zero- or sign-extended load
+getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
+ Amode addr addr_code <- getAmode mem
+ return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
+
+-- Note: there is no Load Byte Arithmetic instruction, so no signed case here
+
+getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
+ Amode addr addr_code <- getAmode mem
+ return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
+
+getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
+ Amode addr addr_code <- getAmode mem
+ return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
+
+getRegister (CmmMachOp mop [x]) -- unary MachOps
+ = case mop of
+ MO_Not rep -> triv_ucode_int rep NOT
+
+ MO_F_Neg w -> triv_ucode_float w FNEG
+ MO_S_Neg w -> triv_ucode_int w NEG
+
+ MO_FF_Conv W64 W32 -> trivialUCode FF32 FRSP x
+ MO_FF_Conv W32 W64 -> conversionNop FF64 x
+
+ MO_FS_Conv from to -> coerceFP2Int from to x
+ MO_SF_Conv from to -> coerceInt2FP from to x
+
+ MO_SS_Conv from to
+ | from == to -> conversionNop (intSize to) x
+
+ -- narrowing is a nop: we treat the high bits as undefined
+ MO_SS_Conv W32 to -> conversionNop (intSize to) x
+ MO_SS_Conv W16 W8 -> conversionNop II8 x
+ MO_SS_Conv W8 to -> triv_ucode_int to (EXTS II8)
+ MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16)
+
+ MO_UU_Conv from to
+ | from == to -> conversionNop (intSize to) x
+ -- narrowing is a nop: we treat the high bits as undefined
+ MO_UU_Conv W32 to -> conversionNop (intSize to) x
+ MO_UU_Conv W16 W8 -> conversionNop II8 x
+ MO_UU_Conv W8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 W32))
+ MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32))
+ _ -> panic "PPC.CodeGen.getRegister: no match"
+
+ where
+ triv_ucode_int width instr = trivialUCode (intSize width) instr x
+ triv_ucode_float width instr = trivialUCode (floatSize width) instr x
+
+ conversionNop new_size expr
+ = do e_code <- getRegister expr
+ return (swizzleRegisterRep e_code new_size)
+
+getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
+ = case mop of
+ MO_F_Eq w -> condFltReg EQQ x y
+ MO_F_Ne w -> condFltReg NE x y
+ MO_F_Gt w -> condFltReg GTT x y
+ MO_F_Ge w -> condFltReg GE x y
+ MO_F_Lt w -> condFltReg LTT x y
+ MO_F_Le w -> condFltReg LE x y
+
+ MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y)
+ MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y)
+
+ MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y)
+ MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y)
+ MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y)
+ MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y)
+
+ MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y)
+ MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y)
+ MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y)
+ MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y)
+
+ MO_F_Add w -> triv_float w FADD
+ MO_F_Sub w -> triv_float w FSUB
+ MO_F_Mul w -> triv_float w FMUL
+ MO_F_Quot w -> triv_float w FDIV
+
+ -- optimize addition with 32-bit immediate
+ -- (needed for PIC)
+ MO_Add W32 ->
+ case y of
+ CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True (-imm)
+ -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
+ CmmLit lit
+ -> do
+ (src, srcCode) <- getSomeReg x
+ let imm = litToImm lit
+ code dst = srcCode `appOL` toOL [
+ ADDIS dst src (HA imm),
+ ADD dst dst (RIImm (LO imm))
+ ]
+ return (Any II32 code)
+ _ -> trivialCode W32 True ADD x y
+
+ MO_Add rep -> trivialCode rep True ADD x y
+ MO_Sub rep ->
+ case y of -- subfi ('substract from' with immediate) doesn't exist
+ CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
+ -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
+ _ -> trivialCodeNoImm' (intSize rep) SUBF y x
+
+ MO_Mul rep -> trivialCode rep True MULLW x y
+
+ MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y
+
+ MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= II32): not implemented"
+ MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
+
+ MO_S_Quot rep -> trivialCodeNoImm' (intSize rep) DIVW (extendSExpr rep x) (extendSExpr rep y)
+ MO_U_Quot rep -> trivialCodeNoImm' (intSize rep) DIVWU (extendUExpr rep x) (extendUExpr rep y)
+
+ MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
+ MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
+
+ MO_And rep -> trivialCode rep False AND x y
+ MO_Or rep -> trivialCode rep False OR x y
+ MO_Xor rep -> trivialCode rep False XOR x y
+
+ MO_Shl rep -> trivialCode rep False SLW x y
+ MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
+ MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
+ _ -> panic "PPC.CodeGen.getRegister: no match"
+
+ where
+ triv_float :: Width -> (Size -> Reg -> Reg -> Reg -> Instr) -> NatM Register
+ triv_float width instr = trivialCodeNoImm (floatSize width) instr x y
+
+getRegister (CmmLit (CmmInt i rep))
+ | Just imm <- makeImmediate rep True i
+ = let
+ code dst = unitOL (LI dst imm)
+ in
+ return (Any (intSize rep) code)
+
+getRegister (CmmLit (CmmFloat f frep)) = do
+ lbl <- getNewLabelNat
+ dflags <- getDynFlagsNat
+ dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
+ Amode addr addr_code <- getAmode dynRef
+ let size = floatSize frep
+ code dst =
+ LDATA ReadOnlyData [CmmDataLabel lbl,
+ CmmStaticLit (CmmFloat f frep)]
+ `consOL` (addr_code `snocOL` LD size dst addr)
+ return (Any size code)
+
+getRegister (CmmLit lit)
+ = let rep = cmmLitType lit
+ imm = litToImm lit
+ code dst = toOL [
+ LIS dst (HA imm),
+ ADD dst dst (RIImm (LO imm))
+ ]
+ in return (Any (cmmTypeSize rep) code)
+
+getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
+
+ -- extend?Rep: wrap integer expression of type rep
+ -- in a conversion to II32
+extendSExpr W32 x = x
+extendSExpr rep x = CmmMachOp (MO_SS_Conv rep W32) [x]
+extendUExpr W32 x = x
+extendUExpr rep x = CmmMachOp (MO_UU_Conv rep W32) [x]
+
+-- -----------------------------------------------------------------------------
+-- The 'Amode' type: Memory addressing modes passed up the tree.
+
+data Amode
+ = Amode AddrMode InstrBlock
+
+{-
+Now, given a tree (the argument to an CmmLoad) that references memory,
+produce a suitable addressing mode.
+
+A Rule of the Game (tm) for Amodes: use of the addr bit must
+immediately follow use of the code part, since the code part puts
+values in registers which the addr then refers to. So you can't put
+anything in between, lest it overwrite some of those registers. If
+you need to do some other computation between the code part and use of
+the addr bit, first store the effective address from the amode in a
+temporary, then do the other computation, and then use the temporary:
+
+ code
+ LEA amode, tmp
+ ... other computation ...
+ ... (tmp) ...
+-}
+
+getAmode :: CmmExpr -> NatM Amode
+getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
+
+getAmode (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
+ | Just off <- makeImmediate W32 True (-i)
+ = do
+ (reg, code) <- getSomeReg x
+ return (Amode (AddrRegImm reg off) code)
+
+
+getAmode (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
+ | Just off <- makeImmediate W32 True i
+ = do
+ (reg, code) <- getSomeReg x
+ return (Amode (AddrRegImm reg off) code)
+
+ -- optimize addition with 32-bit immediate
+ -- (needed for PIC)
+getAmode (CmmMachOp (MO_Add W32) [x, CmmLit lit])
+ = do
+ tmp <- getNewRegNat II32
+ (src, srcCode) <- getSomeReg x
+ let imm = litToImm lit
+ code = srcCode `snocOL` ADDIS tmp src (HA imm)
+ return (Amode (AddrRegImm tmp (LO imm)) code)
+
+getAmode (CmmLit lit)
+ = do
+ tmp <- getNewRegNat II32
+ let imm = litToImm lit
+ code = unitOL (LIS tmp (HA imm))
+ return (Amode (AddrRegImm tmp (LO imm)) code)
+
+getAmode (CmmMachOp (MO_Add W32) [x, y])
+ = do
+ (regX, codeX) <- getSomeReg x
+ (regY, codeY) <- getSomeReg y
+ return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
+
+getAmode other
+ = do
+ (reg, code) <- getSomeReg other
+ let
+ off = ImmInt 0
+ return (Amode (AddrRegImm reg off) code)
+
+
+
+-- The 'CondCode' type: Condition codes passed up the tree.
+data CondCode
+ = CondCode Bool Cond InstrBlock
+
+-- Set up a condition code for a conditional branch.
+
+getCondCode :: CmmExpr -> NatM CondCode
+
+-- almost the same as everywhere else - but we need to
+-- extend small integers to 32 bit first
+
+getCondCode (CmmMachOp mop [x, y])
+ = case mop of
+ MO_F_Eq W32 -> condFltCode EQQ x y
+ MO_F_Ne W32 -> condFltCode NE x y
+ MO_F_Gt W32 -> condFltCode GTT x y
+ MO_F_Ge W32 -> condFltCode GE x y
+ MO_F_Lt W32 -> condFltCode LTT x y
+ MO_F_Le W32 -> condFltCode LE x y
+
+ MO_F_Eq W64 -> condFltCode EQQ x y
+ MO_F_Ne W64 -> condFltCode NE x y
+ MO_F_Gt W64 -> condFltCode GTT x y
+ MO_F_Ge W64 -> condFltCode GE x y
+ MO_F_Lt W64 -> condFltCode LTT x y
+ MO_F_Le W64 -> condFltCode LE x y
+
+ MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y)
+ MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y)
+
+ MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y)
+ MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y)
+ MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y)
+ MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y)
+
+ MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y)
+ MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y)
+ MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y)
+ MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y)
+
+ other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
+
+getCondCode other = panic "getCondCode(2)(powerpc)"
+
+
+
+-- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
+-- passed back up the tree.
+
+condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
+
+-- ###FIXME: I16 and I8!
+condIntCode cond x (CmmLit (CmmInt y rep))
+ | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
+ = do
+ (src1, code) <- getSomeReg x
+ let
+ code' = code `snocOL`
+ (if condUnsigned cond then CMPL else CMP) II32 src1 (RIImm src2)
+ return (CondCode False cond code')
+
+condIntCode cond x y = do
+ (src1, code1) <- getSomeReg x
+ (src2, code2) <- getSomeReg y
+ let
+ code' = code1 `appOL` code2 `snocOL`
+ (if condUnsigned cond then CMPL else CMP) II32 src1 (RIReg src2)
+ return (CondCode False cond code')
+
+condFltCode cond x y = do
+ (src1, code1) <- getSomeReg x
+ (src2, code2) <- getSomeReg y
+ let
+ code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
+ code'' = case cond of -- twiddle CR to handle unordered case
+ GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
+ LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
+ _ -> code'
+ where
+ ltbit = 0 ; eqbit = 2 ; gtbit = 1
+ return (CondCode True cond code'')
+
+
+
+-- -----------------------------------------------------------------------------
+-- Generating assignments
+
+-- Assignments are really at the heart of the whole code generation
+-- business. Almost all top-level nodes of any real importance are
+-- assignments, which correspond to loads, stores, or register
+-- transfers. If we're really lucky, some of the register transfers
+-- will go away, because we can use the destination register to
+-- complete the code generation for the right hand side. This only
+-- fails when the right hand side is forced into a fixed register
+-- (e.g. the result of a call).
+
+assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignReg_IntCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
+
+assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignReg_FltCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
+
+assignMem_IntCode pk addr src = do
+ (srcReg, code) <- getSomeReg src
+ Amode dstAddr addr_code <- getAmode addr
+ return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
+
+-- dst is a reg, but src could be anything
+assignReg_IntCode _ reg src
+ = do
+ r <- getRegister src
+ return $ case r of
+ Any _ code -> code dst
+ Fixed _ freg fcode -> fcode `snocOL` MR dst freg
+ where
+ dst = getRegisterReg reg
+
+
+
+-- Easy, isn't it?
+assignMem_FltCode = assignMem_IntCode
+assignReg_FltCode = assignReg_IntCode
+
+
+
+genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
+
+genJump (CmmLit (CmmLabel lbl))
+ = return (unitOL $ JMP lbl)
+
+genJump tree
+ = do
+ (target,code) <- getSomeReg tree
+ return (code `snocOL` MTCTR target `snocOL` BCTR [])
+
+
+-- -----------------------------------------------------------------------------
+-- Unconditional branches
+genBranch :: BlockId -> NatM InstrBlock
+genBranch = return . toOL . mkJumpInstr
+
+
+-- -----------------------------------------------------------------------------
+-- Conditional jumps
+
+{-
+Conditional jumps are always to local labels, so we can use branch
+instructions. We peek at the arguments to decide what kind of
+comparison to do.
+
+SPARC: First, we have to ensure that the condition codes are set
+according to the supplied comparison operation. We generate slightly
+different code for floating point comparisons, because a floating
+point operation cannot directly precede a @BF@. We assume the worst
+and fill that slot with a @NOP@.
+
+SPARC: Do not fill the delay slots here; you will confuse the register
+allocator.
+-}
+
+
+genCondJump
+ :: BlockId -- the branch target
+ -> CmmExpr -- the condition on which to branch
+ -> NatM InstrBlock
+
+genCondJump id bool = do
+ CondCode _ cond code <- getCondCode bool
+ return (code `snocOL` BCC cond id)
+
+
+
+-- -----------------------------------------------------------------------------
+-- Generating C calls
+
+-- Now the biggest nightmare---calls. Most of the nastiness is buried in
+-- @get_arg@, which moves the arguments to the correct registers/stack
+-- locations. Apart from that, the code is easy.
+--
+-- (If applicable) Do not fill the delay slots here; you will confuse the
+-- register allocator.
+
+genCCall
+ :: CmmCallTarget -- function to call
+ -> HintedCmmFormals -- where to put the result
+ -> HintedCmmActuals -- arguments (of mixed type)
+ -> NatM InstrBlock
+
+
+#if darwin_TARGET_OS || linux_TARGET_OS
+{-
+ The PowerPC calling convention for Darwin/Mac OS X
+ is described in Apple's document
+ "Inside Mac OS X - Mach-O Runtime Architecture".
+
+ PowerPC Linux uses the System V Release 4 Calling Convention
+ for PowerPC. It is described in the
+ "System V Application Binary Interface PowerPC Processor Supplement".
+
+ Both conventions are similar:
+ Parameters may be passed in general-purpose registers starting at r3, in
+ floating point registers starting at f1, or on the stack.
+
+ But there are substantial differences:
+ * The number of registers used for parameter passing and the exact set of
+ nonvolatile registers differs (see MachRegs.lhs).
+ * On Darwin, stack space is always reserved for parameters, even if they are
+ passed in registers. The called routine may choose to save parameters from
+ registers to the corresponding space on the stack.
+ * On Darwin, a corresponding amount of GPRs is skipped when a floating point
+ parameter is passed in an FPR.
+ * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
+ starting with an odd-numbered GPR. It may skip a GPR to achieve this.
+ Darwin just treats an I64 like two separate II32s (high word first).
+ * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
+ 4-byte aligned like everything else on Darwin.
+ * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
+ PowerPC Linux does not agree, so neither do we.
+
+ According to both conventions, The parameter area should be part of the
+ caller's stack frame, allocated in the caller's prologue code (large enough
+ to hold the parameter lists for all called routines). The NCG already
+ uses the stack for register spilling, leaving 64 bytes free at the top.
+ If we need a larger parameter area than that, we just allocate a new stack
+ frame just before ccalling.
+-}
+
+
+genCCall (CmmPrim MO_WriteBarrier) _ _
+ = return $ unitOL LWSYNC
+
+genCCall target dest_regs argsAndHints
+ = ASSERT (not $ any (`elem` [II8,II16]) $ map cmmTypeSize argReps)
+ -- we rely on argument promotion in the codeGen
+ do
+ (finalStack,passArgumentsCode,usedRegs) <- passArguments
+ (zip args argReps)
+ allArgRegs allFPArgRegs
+ initialStackOffset
+ (toOL []) []
+
+ (labelOrExpr, reduceToFF32) <- case target of
+ CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
+ CmmCallee expr conv -> return (Right expr, False)
+ CmmPrim mop -> outOfLineFloatOp mop
+
+ let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
+ codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32
+
+ case labelOrExpr of
+ Left lbl -> do
+ return ( codeBefore
+ `snocOL` BL lbl usedRegs
+ `appOL` codeAfter)
+ Right dyn -> do
+ (dynReg, dynCode) <- getSomeReg dyn
+ return ( dynCode
+ `snocOL` MTCTR dynReg
+ `appOL` codeBefore
+ `snocOL` BCTRL usedRegs
+ `appOL` codeAfter)
+ where
+#if darwin_TARGET_OS
+ initialStackOffset = 24
+ -- size of linkage area + size of arguments, in bytes
+ stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
+ map (widthInBytes . typeWidth) argReps
+#elif linux_TARGET_OS
+ initialStackOffset = 8
+ stackDelta finalStack = roundTo 16 finalStack
+#endif
+ args = map hintlessCmm argsAndHints
+ argReps = map cmmExprType args
+
+ roundTo a x | x `mod` a == 0 = x
+ | otherwise = x + a - (x `mod` a)
+
+ move_sp_down finalStack
+ | delta > 64 =
+ toOL [STU II32 sp (AddrRegImm sp (ImmInt (-delta))),
+ DELTA (-delta)]
+ | otherwise = nilOL
+ where delta = stackDelta finalStack
+ move_sp_up finalStack
+ | delta > 64 =
+ toOL [ADD sp sp (RIImm (ImmInt delta)),
+ DELTA 0]
+ | otherwise = nilOL
+ where delta = stackDelta finalStack
+
+
+ passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
+ passArguments ((arg,arg_ty):args) gprs fprs stackOffset
+ accumCode accumUsed | isWord64 arg_ty =
+ do
+ ChildCode64 code vr_lo <- iselExpr64 arg
+ let vr_hi = getHiVRegFromLo vr_lo
+
+#if darwin_TARGET_OS
+ passArguments args
+ (drop 2 gprs)
+ fprs
+ (stackOffset+8)
+ (accumCode `appOL` code
+ `snocOL` storeWord vr_hi gprs stackOffset
+ `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
+ ((take 2 gprs) ++ accumUsed)
+ where
+ storeWord vr (gpr:_) offset = MR gpr vr
+ storeWord vr [] offset = ST II32 vr (AddrRegImm sp (ImmInt offset))
+
+#elif linux_TARGET_OS
+ let stackOffset' = roundTo 8 stackOffset
+ stackCode = accumCode `appOL` code
+ `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
+ `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
+ regCode hireg loreg =
+ accumCode `appOL` code
+ `snocOL` MR hireg vr_hi
+ `snocOL` MR loreg vr_lo
+
+ case gprs of
+ hireg : loreg : regs | even (length gprs) ->
+ passArguments args regs fprs stackOffset
+ (regCode hireg loreg) (hireg : loreg : accumUsed)
+ _skipped : hireg : loreg : regs ->
+ passArguments args regs fprs stackOffset
+ (regCode hireg loreg) (hireg : loreg : accumUsed)
+ _ -> -- only one or no regs left
+ passArguments args [] fprs (stackOffset'+8)
+ stackCode accumUsed
+#endif
+
+ passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
+ | reg : _ <- regs = do
+ register <- getRegister arg
+ let code = case register of
+ Fixed _ freg fcode -> fcode `snocOL` MR reg freg
+ Any _ acode -> acode reg
+ passArguments args
+ (drop nGprs gprs)
+ (drop nFprs fprs)
+#if darwin_TARGET_OS
+ -- The Darwin ABI requires that we reserve stack slots for register parameters
+ (stackOffset + stackBytes)
+#elif linux_TARGET_OS
+ -- ... the SysV ABI doesn't.
+ stackOffset
+#endif
+ (accumCode `appOL` code)
+ (reg : accumUsed)
+ | otherwise = do
+ (vr, code) <- getSomeReg arg
+ passArguments args
+ (drop nGprs gprs)
+ (drop nFprs fprs)
+ (stackOffset' + stackBytes)
+ (accumCode `appOL` code `snocOL` ST (cmmTypeSize rep) vr stackSlot)
+ accumUsed
+ where
+#if darwin_TARGET_OS
+ -- stackOffset is at least 4-byte aligned
+ -- The Darwin ABI is happy with that.
+ stackOffset' = stackOffset
+#else
+ -- ... the SysV ABI requires 8-byte alignment for doubles.
+ stackOffset' | isFloatType rep && typeWidth rep == W64 =
+ roundTo 8 stackOffset
+ | otherwise = stackOffset
+#endif
+ stackSlot = AddrRegImm sp (ImmInt stackOffset')
+ (nGprs, nFprs, stackBytes, regs) = case cmmTypeSize rep of
+ II32 -> (1, 0, 4, gprs)
+#if darwin_TARGET_OS
+ -- The Darwin ABI requires that we skip a corresponding number of GPRs when
+ -- we use the FPRs.
+ FF32 -> (1, 1, 4, fprs)
+ FF64 -> (2, 1, 8, fprs)
+#elif linux_TARGET_OS
+ -- ... the SysV ABI doesn't.
+ FF32 -> (0, 1, 4, fprs)
+ FF64 -> (0, 1, 8, fprs)
+#endif
+
+ moveResult reduceToFF32 =
+ case dest_regs of
+ [] -> nilOL
+ [CmmHinted dest _hint]
+ | reduceToFF32 && isFloat32 rep -> unitOL (FRSP r_dest f1)
+ | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
+ | isWord64 rep -> toOL [MR (getHiVRegFromLo r_dest) r3,
+ MR r_dest r4]
+ | otherwise -> unitOL (MR r_dest r3)
+ where rep = cmmRegType (CmmLocal dest)
+ r_dest = getRegisterReg (CmmLocal dest)
+
+ outOfLineFloatOp mop =
+ do
+ dflags <- getDynFlagsNat
+ mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
+ mkForeignLabel functionName Nothing True
+ let mopLabelOrExpr = case mopExpr of
+ CmmLit (CmmLabel lbl) -> Left lbl
+ _ -> Right mopExpr
+ return (mopLabelOrExpr, reduce)
+ where
+ (functionName, reduce) = case mop of
+ MO_F32_Exp -> (fsLit "exp", True)
+ MO_F32_Log -> (fsLit "log", True)
+ MO_F32_Sqrt -> (fsLit "sqrt", True)
+
+ MO_F32_Sin -> (fsLit "sin", True)
+ MO_F32_Cos -> (fsLit "cos", True)
+ MO_F32_Tan -> (fsLit "tan", True)
+
+ MO_F32_Asin -> (fsLit "asin", True)
+ MO_F32_Acos -> (fsLit "acos", True)
+ MO_F32_Atan -> (fsLit "atan", True)
+
+ MO_F32_Sinh -> (fsLit "sinh", True)
+ MO_F32_Cosh -> (fsLit "cosh", True)
+ MO_F32_Tanh -> (fsLit "tanh", True)
+ MO_F32_Pwr -> (fsLit "pow", True)
+
+ MO_F64_Exp -> (fsLit "exp", False)
+ MO_F64_Log -> (fsLit "log", False)
+ MO_F64_Sqrt -> (fsLit "sqrt", False)
+
+ MO_F64_Sin -> (fsLit "sin", False)
+ MO_F64_Cos -> (fsLit "cos", False)
+ MO_F64_Tan -> (fsLit "tan", False)
+
+ MO_F64_Asin -> (fsLit "asin", False)
+ MO_F64_Acos -> (fsLit "acos", False)
+ MO_F64_Atan -> (fsLit "atan", False)
+
+ MO_F64_Sinh -> (fsLit "sinh", False)
+ MO_F64_Cosh -> (fsLit "cosh", False)
+ MO_F64_Tanh -> (fsLit "tanh", False)
+ MO_F64_Pwr -> (fsLit "pow", False)
+ other -> pprPanic "genCCall(ppc): unknown callish op"
+ (pprCallishMachOp other)
+
+#else /* darwin_TARGET_OS || linux_TARGET_OS */
+genCCall = panic "PPC.CodeGen.genCCall: not defined for this os"
+#endif
+
+
+-- -----------------------------------------------------------------------------
+-- Generating a table-branch
+
+genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
+genSwitch expr ids
+ | opt_PIC
+ = do
+ (reg,e_code) <- getSomeReg expr
+ tmp <- getNewRegNat II32
+ lbl <- getNewLabelNat
+ dflags <- getDynFlagsNat
+ dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
+ (tableReg,t_code) <- getSomeReg $ dynRef
+ let
+ jumpTable = map jumpTableEntryRel ids
+
+ jumpTableEntryRel Nothing
+ = CmmStaticLit (CmmInt 0 wordWidth)
+ jumpTableEntryRel (Just (BlockId id))
+ = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
+ where blockLabel = mkAsmTempLabel id
+
+ code = e_code `appOL` t_code `appOL` toOL [
+ LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
+ SLW tmp reg (RIImm (ImmInt 2)),
+ LD II32 tmp (AddrRegReg tableReg tmp),
+ ADD tmp tmp (RIReg tableReg),
+ MTCTR tmp,
+ BCTR [ id | Just id <- ids ]
+ ]
+ return code
+ | otherwise
+ = do
+ (reg,e_code) <- getSomeReg expr
+ tmp <- getNewRegNat II32
+ lbl <- getNewLabelNat
+ let
+ jumpTable = map jumpTableEntry ids
+
+ code = e_code `appOL` toOL [
+ LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
+ SLW tmp reg (RIImm (ImmInt 2)),
+ ADDIS tmp tmp (HA (ImmCLbl lbl)),
+ LD II32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
+ MTCTR tmp,
+ BCTR [ id | Just id <- ids ]
+ ]
+ return code
+
+
+-- -----------------------------------------------------------------------------
+-- 'condIntReg' and 'condFltReg': condition codes into registers
+
+-- Turn those condition codes into integers now (when they appear on
+-- the right hand side of an assignment).
+--
+-- (If applicable) Do not fill the delay slots here; you will confuse the
+-- register allocator.
+
+condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
+
+condReg :: NatM CondCode -> NatM Register
+condReg getCond = do
+ CondCode _ cond cond_code <- getCond
+ let
+{- code dst = cond_code `appOL` toOL [
+ BCC cond lbl1,
+ LI dst (ImmInt 0),
+ BCC ALWAYS lbl2,
+ NEWBLOCK lbl1,
+ LI dst (ImmInt 1),
+ BCC ALWAYS lbl2,
+ NEWBLOCK lbl2
+ ]-}
+ code dst = cond_code
+ `appOL` negate_code
+ `appOL` toOL [
+ MFCR dst,
+ RLWINM dst dst (bit + 1) 31 31
+ ]
+
+ negate_code | do_negate = unitOL (CRNOR bit bit bit)
+ | otherwise = nilOL
+
+ (bit, do_negate) = case cond of
+ LTT -> (0, False)
+ LE -> (1, True)
+ EQQ -> (2, False)
+ GE -> (0, True)
+ GTT -> (1, False)
+
+ NE -> (2, True)
+
+ LU -> (0, False)
+ LEU -> (1, True)
+ GEU -> (0, True)
+ GU -> (1, False)
+ _ -> panic "PPC.CodeGen.codeReg: no match"
+
+ return (Any II32 code)
+
+condIntReg cond x y = condReg (condIntCode cond x y)
+condFltReg cond x y = condReg (condFltCode cond x y)
+
+
+
+-- -----------------------------------------------------------------------------
+-- 'trivial*Code': deal with trivial instructions
+
+-- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
+-- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
+-- Only look for constants on the right hand side, because that's
+-- where the generic optimizer will have put them.
+
+-- Similarly, for unary instructions, we don't have to worry about
+-- matching an StInt as the argument, because genericOpt will already
+-- have handled the constant-folding.
+
+
+
+{-
+Wolfgang's PowerPC version of The Rules:
+
+A slightly modified version of The Rules to take advantage of the fact
+that PowerPC instructions work on all registers and don't implicitly
+clobber any fixed registers.
+
+* The only expression for which getRegister returns Fixed is (CmmReg reg).
+
+* If getRegister returns Any, then the code it generates may modify only:
+ (a) fresh temporaries
+ (b) the destination register
+ It may *not* modify global registers, unless the global
+ register happens to be the destination register.
+ It may not clobber any other registers. In fact, only ccalls clobber any
+ fixed registers.
+ Also, it may not modify the counter register (used by genCCall).
+
+ Corollary: If a getRegister for a subexpression returns Fixed, you need
+ not move it to a fresh temporary before evaluating the next subexpression.
+ The Fixed register won't be modified.
+ Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
+
+* SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
+ the value of the destination register.
+-}
+
+trivialCode
+ :: Width
+ -> Bool
+ -> (Reg -> Reg -> RI -> Instr)
+ -> CmmExpr
+ -> CmmExpr
+ -> NatM Register
+
+trivialCode rep signed instr x (CmmLit (CmmInt y _))
+ | Just imm <- makeImmediate rep signed y
+ = do
+ (src1, code1) <- getSomeReg x
+ let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
+ return (Any (intSize rep) code)
+
+trivialCode rep _ instr x y = do
+ (src1, code1) <- getSomeReg x
+ (src2, code2) <- getSomeReg y
+ let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
+ return (Any (intSize rep) code)
+
+trivialCodeNoImm' :: Size -> (Reg -> Reg -> Reg -> Instr)
+ -> CmmExpr -> CmmExpr -> NatM Register
+trivialCodeNoImm' size instr x y = do
+ (src1, code1) <- getSomeReg x
+ (src2, code2) <- getSomeReg y
+ let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
+ return (Any size code)
+
+trivialCodeNoImm :: Size -> (Size -> Reg -> Reg -> Reg -> Instr)
+ -> CmmExpr -> CmmExpr -> NatM Register
+trivialCodeNoImm size instr x y = trivialCodeNoImm' size (instr size) x y
+
+
+trivialUCode
+ :: Size
+ -> (Reg -> Reg -> Instr)
+ -> CmmExpr
+ -> NatM Register
+trivialUCode rep instr x = do
+ (src, code) <- getSomeReg x
+ let code' dst = code `snocOL` instr dst src
+ return (Any rep code')
+
+-- There is no "remainder" instruction on the PPC, so we have to do
+-- it the hard way.
+-- The "div" parameter is the division instruction to use (DIVW or DIVWU)
+
+remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr)
+ -> CmmExpr -> CmmExpr -> NatM Register
+remainderCode rep div x y = do
+ (src1, code1) <- getSomeReg x
+ (src2, code2) <- getSomeReg y
+ let code dst = code1 `appOL` code2 `appOL` toOL [
+ div dst src1 src2,
+ MULLW dst dst (RIReg src2),
+ SUBF dst dst src1
+ ]
+ return (Any (intSize rep) code)
+
+
+coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
+coerceInt2FP fromRep toRep x = do
+ (src, code) <- getSomeReg x
+ lbl <- getNewLabelNat
+ itmp <- getNewRegNat II32
+ ftmp <- getNewRegNat FF64
+ dflags <- getDynFlagsNat
+ dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
+ Amode addr addr_code <- getAmode dynRef
+ let
+ code' dst = code `appOL` maybe_exts `appOL` toOL [
+ LDATA ReadOnlyData
+ [CmmDataLabel lbl,
+ CmmStaticLit (CmmInt 0x43300000 W32),
+ CmmStaticLit (CmmInt 0x80000000 W32)],
+ XORIS itmp src (ImmInt 0x8000),
+ ST II32 itmp (spRel 3),
+ LIS itmp (ImmInt 0x4330),
+ ST II32 itmp (spRel 2),
+ LD FF64 ftmp (spRel 2)
+ ] `appOL` addr_code `appOL` toOL [
+ LD FF64 dst addr,
+ FSUB FF64 dst ftmp dst
+ ] `appOL` maybe_frsp dst
+
+ maybe_exts = case fromRep of
+ W8 -> unitOL $ EXTS II8 src src
+ W16 -> unitOL $ EXTS II16 src src
+ W32 -> nilOL
+ _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
+
+ maybe_frsp dst
+ = case toRep of
+ W32 -> unitOL $ FRSP dst dst
+ W64 -> nilOL
+ _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
+
+ return (Any (floatSize toRep) code')
+
+coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
+coerceFP2Int _ toRep x = do
+ -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
+ (src, code) <- getSomeReg x
+ tmp <- getNewRegNat FF64
+ let
+ code' dst = code `appOL` toOL [
+ -- convert to int in FP reg
+ FCTIWZ tmp src,
+ -- store value (64bit) from FP to stack
+ ST FF64 tmp (spRel 2),
+ -- read low word of value (high word is undefined)
+ LD II32 dst (spRel 3)]
+ return (Any (intSize toRep) code')
diff --git a/compiler/nativeGen/PPC/Cond.hs b/compiler/nativeGen/PPC/Cond.hs
new file mode 100644
index 0000000000..7345ee5f1d
--- /dev/null
+++ b/compiler/nativeGen/PPC/Cond.hs
@@ -0,0 +1,62 @@
+
+module PPC.Cond (
+ Cond(..),
+ condNegate,
+ condUnsigned,
+ condToSigned,
+ condToUnsigned,
+)
+
+where
+
+import Panic
+
+data Cond
+ = ALWAYS
+ | EQQ
+ | GE
+ | GEU
+ | GTT
+ | GU
+ | LE
+ | LEU
+ | LTT
+ | LU
+ | NE
+ deriving Eq
+
+
+condNegate :: Cond -> Cond
+condNegate ALWAYS = panic "condNegate: ALWAYS"
+condNegate EQQ = NE
+condNegate GE = LTT
+condNegate GEU = LU
+condNegate GTT = LE
+condNegate GU = LEU
+condNegate LE = GTT
+condNegate LEU = GU
+condNegate LTT = GE
+condNegate LU = GEU
+condNegate NE = EQQ
+
+-- Condition utils
+condUnsigned :: Cond -> Bool
+condUnsigned GU = True
+condUnsigned LU = True
+condUnsigned GEU = True
+condUnsigned LEU = True
+condUnsigned _ = False
+
+condToSigned :: Cond -> Cond
+condToSigned GU = GTT
+condToSigned LU = LTT
+condToSigned GEU = GE
+condToSigned LEU = LE
+condToSigned x = x
+
+condToUnsigned :: Cond -> Cond
+condToUnsigned GTT = GU
+condToUnsigned LTT = LU
+condToUnsigned GE = GEU
+condToUnsigned LE = LEU
+condToUnsigned x = x
diff --git a/compiler/nativeGen/PPC/Instr.hs b/compiler/nativeGen/PPC/Instr.hs
index 85aa494ba4..55affc6e1e 100644
--- a/compiler/nativeGen/PPC/Instr.hs
+++ b/compiler/nativeGen/PPC/Instr.hs
@@ -10,49 +10,50 @@
#include "nativeGen/NCG.h"
module PPC.Instr (
- Cond(..),
- condNegate,
+ archWordSize,
RI(..),
- Instr(..)
+ Instr(..),
+ maxSpillSlots
)
where
-import BlockId
import PPC.Regs
-import RegsBase
+import PPC.Cond
+import Instruction
+import Size
+import RegClass
+import Reg
+
+import Constants (rESERVED_C_STACK_BYTES)
+import BlockId
import Cmm
-import Outputable
import FastString
import CLabel
+import Outputable
+import FastBool
+
+--------------------------------------------------------------------------------
+-- Size of a PPC memory address, in bytes.
+--
+archWordSize :: Size
+archWordSize = II32
-data Cond
- = ALWAYS
- | EQQ
- | GE
- | GEU
- | GTT
- | GU
- | LE
- | LEU
- | LTT
- | LU
- | NE
- deriving Eq
-
-
-condNegate :: Cond -> Cond
-condNegate ALWAYS = panic "condNegate: ALWAYS"
-condNegate EQQ = NE
-condNegate GE = LTT
-condNegate GEU = LU
-condNegate GTT = LE
-condNegate GU = LEU
-condNegate LE = GTT
-condNegate LEU = GU
-condNegate LTT = GE
-condNegate LU = GEU
-condNegate NE = EQQ
+
+-- | Instruction instance for powerpc
+instance Instruction Instr where
+ regUsageOfInstr = ppc_regUsageOfInstr
+ patchRegsOfInstr = ppc_patchRegsOfInstr
+ isJumpishInstr = ppc_isJumpishInstr
+ jumpDestsOfInstr = ppc_jumpDestsOfInstr
+ patchJumpInstr = ppc_patchJumpInstr
+ mkSpillInstr = ppc_mkSpillInstr
+ mkLoadInstr = ppc_mkLoadInstr
+ takeDeltaInstr = ppc_takeDeltaInstr
+ isMetaInstr = ppc_isMetaInstr
+ mkRegRegMoveInstr = ppc_mkRegRegMoveInstr
+ takeRegRegMoveInstr = ppc_takeRegRegMoveInstr
+ mkJumpInstr = ppc_mkJumpInstr
-- -----------------------------------------------------------------------------
@@ -85,12 +86,6 @@ data Instr
-- benefit of subsequent passes
| DELTA Int
- -- | spill this reg to a stack slot
- | SPILL Reg Int
-
- -- | reload this reg from a stack slot
- | RELOAD Int Reg
-
-- Loads and stores.
| LD Size Reg AddrMode -- Load size, dst, src
| LA Size Reg AddrMode -- Load arithmetic size, dst, src
@@ -165,3 +160,293 @@ data Instr
-- bcl to next insn, mflr reg
| LWSYNC -- memory barrier
+
+
+-- | Get the registers that are being used by this instruction.
+-- regUsage doesn't need to do any trickery for jumps and such.
+-- Just state precisely the regs read and written by that insn.
+-- The consequences of control flow transfers, as far as register
+-- allocation goes, are taken care of by the register allocator.
+--
+ppc_regUsageOfInstr :: Instr -> RegUsage
+ppc_regUsageOfInstr instr
+ = case instr of
+ LD _ reg addr -> usage (regAddr addr, [reg])
+ LA _ reg addr -> usage (regAddr addr, [reg])
+ ST _ reg addr -> usage (reg : regAddr addr, [])
+ STU _ reg addr -> usage (reg : regAddr addr, [])
+ LIS reg _ -> usage ([], [reg])
+ LI reg _ -> usage ([], [reg])
+ MR reg1 reg2 -> usage ([reg2], [reg1])
+ CMP _ reg ri -> usage (reg : regRI ri,[])
+ CMPL _ reg ri -> usage (reg : regRI ri,[])
+ BCC _ _ -> noUsage
+ BCCFAR _ _ -> noUsage
+ MTCTR reg -> usage ([reg],[])
+ BCTR _ -> noUsage
+ BL _ params -> usage (params, callClobberedRegs)
+ BCTRL params -> usage (params, callClobberedRegs)
+ ADD reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
+ ADDC reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
+ ADDE reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
+ ADDIS reg1 reg2 _ -> usage ([reg2], [reg1])
+ SUBF reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
+ MULLW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
+ DIVW reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
+ DIVWU reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
+ MULLW_MayOflo reg1 reg2 reg3
+ -> usage ([reg2,reg3], [reg1])
+ AND reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
+ OR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
+ XOR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
+ XORIS reg1 reg2 _ -> usage ([reg2], [reg1])
+ EXTS _ reg1 reg2 -> usage ([reg2], [reg1])
+ NEG reg1 reg2 -> usage ([reg2], [reg1])
+ NOT reg1 reg2 -> usage ([reg2], [reg1])
+ SLW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
+ SRW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
+ SRAW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
+ RLWINM reg1 reg2 _ _ _
+ -> usage ([reg2], [reg1])
+ FADD _ r1 r2 r3 -> usage ([r2,r3], [r1])
+ FSUB _ r1 r2 r3 -> usage ([r2,r3], [r1])
+ FMUL _ r1 r2 r3 -> usage ([r2,r3], [r1])
+ FDIV _ r1 r2 r3 -> usage ([r2,r3], [r1])
+ FNEG r1 r2 -> usage ([r2], [r1])
+ FCMP r1 r2 -> usage ([r1,r2], [])
+ FCTIWZ r1 r2 -> usage ([r2], [r1])
+ FRSP r1 r2 -> usage ([r2], [r1])
+ MFCR reg -> usage ([], [reg])
+ MFLR reg -> usage ([], [reg])
+ FETCHPC reg -> usage ([], [reg])
+ _ -> noUsage
+ where
+ usage (src, dst) = RU (filter interesting src)
+ (filter interesting dst)
+ regAddr (AddrRegReg r1 r2) = [r1, r2]
+ regAddr (AddrRegImm r1 _) = [r1]
+
+ regRI (RIReg r) = [r]
+ regRI _ = []
+
+interesting :: Reg -> Bool
+interesting (VirtualRegI _) = True
+interesting (VirtualRegHi _) = True
+interesting (VirtualRegF _) = True
+interesting (VirtualRegD _) = True
+interesting (RealReg i) = isFastTrue (freeReg i)
+
+
+
+
+-- | Apply a given mapping to all the register references in this
+-- instruction.
+ppc_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
+ppc_patchRegsOfInstr instr env
+ = case instr of
+ LD sz reg addr -> LD sz (env reg) (fixAddr addr)
+ LA sz reg addr -> LA sz (env reg) (fixAddr addr)
+ ST sz reg addr -> ST sz (env reg) (fixAddr addr)
+ STU sz reg addr -> STU sz (env reg) (fixAddr addr)
+ LIS reg imm -> LIS (env reg) imm
+ LI reg imm -> LI (env reg) imm
+ MR reg1 reg2 -> MR (env reg1) (env reg2)
+ CMP sz reg ri -> CMP sz (env reg) (fixRI ri)
+ CMPL sz reg ri -> CMPL sz (env reg) (fixRI ri)
+ BCC cond lbl -> BCC cond lbl
+ BCCFAR cond lbl -> BCCFAR cond lbl
+ MTCTR reg -> MTCTR (env reg)
+ BCTR targets -> BCTR targets
+ BL imm argRegs -> BL imm argRegs -- argument regs
+ BCTRL argRegs -> BCTRL argRegs -- cannot be remapped
+ ADD reg1 reg2 ri -> ADD (env reg1) (env reg2) (fixRI ri)
+ ADDC reg1 reg2 reg3-> ADDC (env reg1) (env reg2) (env reg3)
+ ADDE reg1 reg2 reg3-> ADDE (env reg1) (env reg2) (env reg3)
+ ADDIS reg1 reg2 imm -> ADDIS (env reg1) (env reg2) imm
+ SUBF reg1 reg2 reg3-> SUBF (env reg1) (env reg2) (env reg3)
+ MULLW reg1 reg2 ri -> MULLW (env reg1) (env reg2) (fixRI ri)
+ DIVW reg1 reg2 reg3-> DIVW (env reg1) (env reg2) (env reg3)
+ DIVWU reg1 reg2 reg3-> DIVWU (env reg1) (env reg2) (env reg3)
+ MULLW_MayOflo reg1 reg2 reg3
+ -> MULLW_MayOflo (env reg1) (env reg2) (env reg3)
+ AND reg1 reg2 ri -> AND (env reg1) (env reg2) (fixRI ri)
+ OR reg1 reg2 ri -> OR (env reg1) (env reg2) (fixRI ri)
+ XOR reg1 reg2 ri -> XOR (env reg1) (env reg2) (fixRI ri)
+ XORIS reg1 reg2 imm -> XORIS (env reg1) (env reg2) imm
+ EXTS sz reg1 reg2 -> EXTS sz (env reg1) (env reg2)
+ NEG reg1 reg2 -> NEG (env reg1) (env reg2)
+ NOT reg1 reg2 -> NOT (env reg1) (env reg2)
+ SLW reg1 reg2 ri -> SLW (env reg1) (env reg2) (fixRI ri)
+ SRW reg1 reg2 ri -> SRW (env reg1) (env reg2) (fixRI ri)
+ SRAW reg1 reg2 ri -> SRAW (env reg1) (env reg2) (fixRI ri)
+ RLWINM reg1 reg2 sh mb me
+ -> RLWINM (env reg1) (env reg2) sh mb me
+ FADD sz r1 r2 r3 -> FADD sz (env r1) (env r2) (env r3)
+ FSUB sz r1 r2 r3 -> FSUB sz (env r1) (env r2) (env r3)
+ FMUL sz r1 r2 r3 -> FMUL sz (env r1) (env r2) (env r3)
+ FDIV sz r1 r2 r3 -> FDIV sz (env r1) (env r2) (env r3)
+ FNEG r1 r2 -> FNEG (env r1) (env r2)
+ FCMP r1 r2 -> FCMP (env r1) (env r2)
+ FCTIWZ r1 r2 -> FCTIWZ (env r1) (env r2)
+ FRSP r1 r2 -> FRSP (env r1) (env r2)
+ MFCR reg -> MFCR (env reg)
+ MFLR reg -> MFLR (env reg)
+ FETCHPC reg -> FETCHPC (env reg)
+ _ -> instr
+ where
+ fixAddr (AddrRegReg r1 r2) = AddrRegReg (env r1) (env r2)
+ fixAddr (AddrRegImm r1 i) = AddrRegImm (env r1) i
+
+ fixRI (RIReg r) = RIReg (env r)
+ fixRI other = other
+
+
+--------------------------------------------------------------------------------
+-- | Checks whether this instruction is a jump/branch instruction.
+-- One that can change the flow of control in a way that the
+-- register allocator needs to worry about.
+ppc_isJumpishInstr :: Instr -> Bool
+ppc_isJumpishInstr instr
+ = case instr of
+ BCC{} -> True
+ BCCFAR{} -> True
+ BCTR{} -> True
+ BCTRL{} -> True
+ BL{} -> True
+ JMP{} -> True
+ _ -> False
+
+
+-- | Checks whether this instruction is a jump/branch instruction.
+-- One that can change the flow of control in a way that the
+-- register allocator needs to worry about.
+ppc_jumpDestsOfInstr :: Instr -> [BlockId]
+ppc_jumpDestsOfInstr insn
+ = case insn of
+ BCC _ id -> [id]
+ BCCFAR _ id -> [id]
+ BCTR targets -> targets
+ _ -> []
+
+
+-- | Change the destination of this jump instruction.
+-- Used in the linear allocator when adding fixup blocks for join
+-- points.
+ppc_patchJumpInstr :: Instr -> (BlockId -> BlockId) -> Instr
+ppc_patchJumpInstr insn patchF
+ = case insn of
+ BCC cc id -> BCC cc (patchF id)
+ BCCFAR cc id -> BCCFAR cc (patchF id)
+ BCTR _ -> error "Cannot patch BCTR"
+ _ -> insn
+
+
+-- -----------------------------------------------------------------------------
+
+-- | An instruction to spill a register into a spill slot.
+ppc_mkSpillInstr
+ :: Reg -- register to spill
+ -> Int -- current stack delta
+ -> Int -- spill slot to use
+ -> Instr
+
+ppc_mkSpillInstr reg delta slot
+ = let off = spillSlotToOffset slot
+ in
+ let sz = case regClass reg of
+ RcInteger -> II32
+ RcDouble -> FF64
+ _ -> panic "PPC.Instr.mkSpillInstr: no match"
+ in ST sz reg (AddrRegImm sp (ImmInt (off-delta)))
+
+
+ppc_mkLoadInstr
+ :: Reg -- register to load
+ -> Int -- current stack delta
+ -> Int -- spill slot to use
+ -> Instr
+
+ppc_mkLoadInstr reg delta slot
+ = let off = spillSlotToOffset slot
+ in
+ let sz = case regClass reg of
+ RcInteger -> II32
+ RcDouble -> FF64
+ _ -> panic "PPC.Instr.mkLoadInstr: no match"
+ in LD sz reg (AddrRegImm sp (ImmInt (off-delta)))
+
+
+spillSlotSize :: Int
+spillSlotSize = 8
+
+maxSpillSlots :: Int
+maxSpillSlots = ((rESERVED_C_STACK_BYTES - 64) `div` spillSlotSize) - 1
+
+-- convert a spill slot number to a *byte* offset, with no sign:
+-- decide on a per arch basis whether you are spilling above or below
+-- the C stack pointer.
+spillSlotToOffset :: Int -> Int
+spillSlotToOffset slot
+ | slot >= 0 && slot < maxSpillSlots
+ = 64 + spillSlotSize * slot
+ | otherwise
+ = pprPanic "spillSlotToOffset:"
+ ( text "invalid spill location: " <> int slot
+ $$ text "maxSpillSlots: " <> int maxSpillSlots)
+
+
+--------------------------------------------------------------------------------
+-- | See if this instruction is telling us the current C stack delta
+ppc_takeDeltaInstr
+ :: Instr
+ -> Maybe Int
+
+ppc_takeDeltaInstr instr
+ = case instr of
+ DELTA i -> Just i
+ _ -> Nothing
+
+
+ppc_isMetaInstr
+ :: Instr
+ -> Bool
+
+ppc_isMetaInstr instr
+ = case instr of
+ COMMENT{} -> True
+ LDATA{} -> True
+ NEWBLOCK{} -> True
+ DELTA{} -> True
+ _ -> False
+
+
+-- | Copy the value in a register to another one.
+-- Must work for all register classes.
+ppc_mkRegRegMoveInstr
+ :: Reg
+ -> Reg
+ -> Instr
+
+ppc_mkRegRegMoveInstr src dst
+ = MR dst src
+
+
+-- | Make an unconditional jump instruction.
+-- For architectures with branch delay slots, its ok to put
+-- a NOP after the jump. Don't fill the delay slot with an
+-- instruction that references regs or you'll confuse the
+-- linear allocator.
+ppc_mkJumpInstr
+ :: BlockId
+ -> [Instr]
+
+ppc_mkJumpInstr id
+ = [BCC ALWAYS id]
+
+
+-- | Take the source and destination from this reg -> reg move instruction
+-- or Nothing if it's not one
+ppc_takeRegRegMoveInstr :: Instr -> Maybe (Reg,Reg)
+ppc_takeRegRegMoveInstr (MR dst src) = Just (src,dst)
+ppc_takeRegRegMoveInstr _ = Nothing
+
diff --git a/compiler/nativeGen/PPC/Ppr.hs b/compiler/nativeGen/PPC/Ppr.hs
index ac83600f9c..f12d32a9b0 100644
--- a/compiler/nativeGen/PPC/Ppr.hs
+++ b/compiler/nativeGen/PPC/Ppr.hs
@@ -7,12 +7,15 @@
-----------------------------------------------------------------------------
module PPC.Ppr (
+ pprNatCmmTop,
+ pprBasicBlock,
+ pprSectionHeader,
+ pprData,
+ pprInstr,
pprUserReg,
pprSize,
pprImm,
- pprSectionHeader,
pprDataItem,
- pprInstr
)
where
@@ -20,26 +23,134 @@ where
#include "nativeGen/NCG.h"
#include "HsVersions.h"
-import RegsBase
-import PprBase
import PPC.Regs
import PPC.Instr
+import PPC.Cond
+import PprBase
+import Instruction
+import Size
+import Reg
+import RegClass
import BlockId
import Cmm
-import CLabel ( mkAsmTempLabel )
+import CLabel
import Unique ( pprUnique )
import Pretty
import FastString
import qualified Outputable
-import Outputable ( panic )
+import Outputable ( Outputable, panic )
-import Data.Word(Word32)
+import Data.Word
import Data.Bits
+-- -----------------------------------------------------------------------------
+-- Printing this stuff out
+
+pprNatCmmTop :: NatCmmTop Instr -> Doc
+pprNatCmmTop (CmmData section dats) =
+ pprSectionHeader section $$ vcat (map pprData dats)
+
+ -- special case for split markers:
+pprNatCmmTop (CmmProc [] lbl _ (ListGraph [])) = pprLabel lbl
+
+pprNatCmmTop (CmmProc info lbl _ (ListGraph blocks)) =
+ pprSectionHeader Text $$
+ (if null info then -- blocks guaranteed not null, so label needed
+ pprLabel lbl
+ else
+#if HAVE_SUBSECTIONS_VIA_SYMBOLS
+ pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
+ <> char ':' $$
+#endif
+ vcat (map pprData info) $$
+ pprLabel (entryLblToInfoLbl lbl)
+ ) $$
+ vcat (map pprBasicBlock blocks)
+ -- above: Even the first block gets a label, because with branch-chain
+ -- elimination, it might be the target of a goto.
+#if HAVE_SUBSECTIONS_VIA_SYMBOLS
+ -- If we are using the .subsections_via_symbols directive
+ -- (available on recent versions of Darwin),
+ -- we have to make sure that there is some kind of reference
+ -- from the entry code to a label on the _top_ of of the info table,
+ -- so that the linker will not think it is unreferenced and dead-strip
+ -- it. That's why the label is called a DeadStripPreventer (_dsp).
+ $$ if not (null info)
+ then text "\t.long "
+ <+> pprCLabel_asm (entryLblToInfoLbl lbl)
+ <+> char '-'
+ <+> pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
+ else empty
+#endif
+
+
+pprBasicBlock :: NatBasicBlock Instr -> Doc
+pprBasicBlock (BasicBlock (BlockId id) instrs) =
+ pprLabel (mkAsmTempLabel id) $$
+ vcat (map pprInstr instrs)
+
+
+pprData :: CmmStatic -> Doc
+pprData (CmmAlign bytes) = pprAlign bytes
+pprData (CmmDataLabel lbl) = pprLabel lbl
+pprData (CmmString str) = pprASCII str
+pprData (CmmUninitialised bytes) = ptext (sLit ".skip ") <> int bytes
+pprData (CmmStaticLit lit) = pprDataItem lit
+
+pprGloblDecl :: CLabel -> Doc
+pprGloblDecl lbl
+ | not (externallyVisibleCLabel lbl) = empty
+ | otherwise = ptext IF_ARCH_sparc((sLit ".global "),
+ (sLit ".globl ")) <>
+ pprCLabel_asm lbl
+
+pprTypeAndSizeDecl :: CLabel -> Doc
+#if linux_TARGET_OS
+pprTypeAndSizeDecl lbl
+ | not (externallyVisibleCLabel lbl) = empty
+ | otherwise = ptext (sLit ".type ") <>
+ pprCLabel_asm lbl <> ptext (sLit ", @object")
+#else
+pprTypeAndSizeDecl _
+ = empty
+#endif
+
+pprLabel :: CLabel -> Doc
+pprLabel lbl = pprGloblDecl lbl $$ pprTypeAndSizeDecl lbl $$ (pprCLabel_asm lbl <> char ':')
+
+
+pprASCII :: [Word8] -> Doc
+pprASCII str
+ = vcat (map do1 str) $$ do1 0
+ where
+ do1 :: Word8 -> Doc
+ do1 w = ptext (sLit "\t.byte\t") <> int (fromIntegral w)
+
+pprAlign :: Int -> Doc
+pprAlign bytes =
+ ptext (sLit ".align ") <> int pow2
+ where
+ pow2 = log2 bytes
+
+ log2 :: Int -> Int -- cache the common ones
+ log2 1 = 0
+ log2 2 = 1
+ log2 4 = 2
+ log2 8 = 3
+ log2 n = 1 + log2 (n `quot` 2)
+
+
+-- -----------------------------------------------------------------------------
+-- pprInstr: print an 'Instr'
+
+instance Outputable Instr where
+ ppr instr = Outputable.docToSDoc $ pprInstr instr
+
+
pprUserReg :: Reg -> Doc
pprUserReg = pprReg
@@ -255,7 +366,7 @@ pprInstr (NEWBLOCK _)
pprInstr (LDATA _ _)
= panic "PprMach.pprInstr: LDATA"
-
+{-
pprInstr (SPILL reg slot)
= hcat [
ptext (sLit "\tSPILL"),
@@ -271,6 +382,7 @@ pprInstr (RELOAD slot reg)
ptext (sLit "SLOT") <> parens (int slot),
comma,
pprReg reg]
+-}
pprInstr (LD sz reg addr) = hcat [
char '\t',
diff --git a/compiler/nativeGen/PPC/RegInfo.hs b/compiler/nativeGen/PPC/RegInfo.hs
index ea882a0d23..b2806c74d1 100644
--- a/compiler/nativeGen/PPC/RegInfo.hs
+++ b/compiler/nativeGen/PPC/RegInfo.hs
@@ -7,27 +7,14 @@
-----------------------------------------------------------------------------
module PPC.RegInfo (
- RegUsage(..),
- noUsage,
- regUsage,
- patchRegs,
- jumpDests,
- isJumpish,
- patchJump,
- isRegRegMove,
+ mkVReg,
- JumpDest(..),
+ JumpDest,
canShortcut,
shortcutJump,
- mkSpillInstr,
- mkLoadInstr,
- mkRegRegMoveInstr,
- mkBranchInstr,
-
- spillSlotSize,
- maxSpillSlots,
- spillSlotToOffset
+ shortcutStatic,
+ regDotColor
)
where
@@ -35,203 +22,29 @@ where
#include "nativeGen/NCG.h"
#include "HsVersions.h"
-import BlockId
-import RegsBase
import PPC.Regs
import PPC.Instr
-import Outputable
-import Constants ( rESERVED_C_STACK_BYTES )
-import FastBool
-
-data RegUsage = RU [Reg] [Reg]
-
-noUsage :: RegUsage
-noUsage = RU [] []
-
-regUsage :: Instr -> RegUsage
-regUsage instr = case instr of
- SPILL reg _ -> usage ([reg], [])
- RELOAD _ reg -> usage ([], [reg])
-
- LD _ reg addr -> usage (regAddr addr, [reg])
- LA _ reg addr -> usage (regAddr addr, [reg])
- ST _ reg addr -> usage (reg : regAddr addr, [])
- STU _ reg addr -> usage (reg : regAddr addr, [])
- LIS reg _ -> usage ([], [reg])
- LI reg _ -> usage ([], [reg])
- MR reg1 reg2 -> usage ([reg2], [reg1])
- CMP _ reg ri -> usage (reg : regRI ri,[])
- CMPL _ reg ri -> usage (reg : regRI ri,[])
- BCC _ _ -> noUsage
- BCCFAR _ _ -> noUsage
- MTCTR reg -> usage ([reg],[])
- BCTR _ -> noUsage
- BL _ params -> usage (params, callClobberedRegs)
- BCTRL params -> usage (params, callClobberedRegs)
- ADD reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
- ADDC reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
- ADDE reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
- ADDIS reg1 reg2 _ -> usage ([reg2], [reg1])
- SUBF reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
- MULLW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
- DIVW reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
- DIVWU reg1 reg2 reg3-> usage ([reg2,reg3], [reg1])
- MULLW_MayOflo reg1 reg2 reg3
- -> usage ([reg2,reg3], [reg1])
- AND reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
- OR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
- XOR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
- XORIS reg1 reg2 _ -> usage ([reg2], [reg1])
- EXTS _ reg1 reg2 -> usage ([reg2], [reg1])
- NEG reg1 reg2 -> usage ([reg2], [reg1])
- NOT reg1 reg2 -> usage ([reg2], [reg1])
- SLW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
- SRW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
- SRAW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
- RLWINM reg1 reg2 _ _ _
- -> usage ([reg2], [reg1])
- FADD _ r1 r2 r3 -> usage ([r2,r3], [r1])
- FSUB _ r1 r2 r3 -> usage ([r2,r3], [r1])
- FMUL _ r1 r2 r3 -> usage ([r2,r3], [r1])
- FDIV _ r1 r2 r3 -> usage ([r2,r3], [r1])
- FNEG r1 r2 -> usage ([r2], [r1])
- FCMP r1 r2 -> usage ([r1,r2], [])
- FCTIWZ r1 r2 -> usage ([r2], [r1])
- FRSP r1 r2 -> usage ([r2], [r1])
- MFCR reg -> usage ([], [reg])
- MFLR reg -> usage ([], [reg])
- FETCHPC reg -> usage ([], [reg])
- _ -> noUsage
- where
- usage (src, dst) = RU (filter interesting src)
- (filter interesting dst)
- regAddr (AddrRegReg r1 r2) = [r1, r2]
- regAddr (AddrRegImm r1 _) = [r1]
-
- regRI (RIReg r) = [r]
- regRI _ = []
-
-interesting :: Reg -> Bool
-interesting (VirtualRegI _) = True
-interesting (VirtualRegHi _) = True
-interesting (VirtualRegF _) = True
-interesting (VirtualRegD _) = True
-interesting (RealReg i) = isFastTrue (freeReg i)
-
-
--- -----------------------------------------------------------------------------
--- 'patchRegs' function
-
--- 'patchRegs' takes an instruction and applies the given mapping to
--- all the register references.
-
-patchRegs :: Instr -> (Reg -> Reg) -> Instr
-patchRegs instr env = case instr of
- SPILL reg slot -> SPILL (env reg) slot
- RELOAD slot reg -> RELOAD slot (env reg)
-
- LD sz reg addr -> LD sz (env reg) (fixAddr addr)
- LA sz reg addr -> LA sz (env reg) (fixAddr addr)
- ST sz reg addr -> ST sz (env reg) (fixAddr addr)
- STU sz reg addr -> STU sz (env reg) (fixAddr addr)
- LIS reg imm -> LIS (env reg) imm
- LI reg imm -> LI (env reg) imm
- MR reg1 reg2 -> MR (env reg1) (env reg2)
- CMP sz reg ri -> CMP sz (env reg) (fixRI ri)
- CMPL sz reg ri -> CMPL sz (env reg) (fixRI ri)
- BCC cond lbl -> BCC cond lbl
- BCCFAR cond lbl -> BCCFAR cond lbl
- MTCTR reg -> MTCTR (env reg)
- BCTR targets -> BCTR targets
- BL imm argRegs -> BL imm argRegs -- argument regs
- BCTRL argRegs -> BCTRL argRegs -- cannot be remapped
- ADD reg1 reg2 ri -> ADD (env reg1) (env reg2) (fixRI ri)
- ADDC reg1 reg2 reg3-> ADDC (env reg1) (env reg2) (env reg3)
- ADDE reg1 reg2 reg3-> ADDE (env reg1) (env reg2) (env reg3)
- ADDIS reg1 reg2 imm -> ADDIS (env reg1) (env reg2) imm
- SUBF reg1 reg2 reg3-> SUBF (env reg1) (env reg2) (env reg3)
- MULLW reg1 reg2 ri -> MULLW (env reg1) (env reg2) (fixRI ri)
- DIVW reg1 reg2 reg3-> DIVW (env reg1) (env reg2) (env reg3)
- DIVWU reg1 reg2 reg3-> DIVWU (env reg1) (env reg2) (env reg3)
- MULLW_MayOflo reg1 reg2 reg3
- -> MULLW_MayOflo (env reg1) (env reg2) (env reg3)
- AND reg1 reg2 ri -> AND (env reg1) (env reg2) (fixRI ri)
- OR reg1 reg2 ri -> OR (env reg1) (env reg2) (fixRI ri)
- XOR reg1 reg2 ri -> XOR (env reg1) (env reg2) (fixRI ri)
- XORIS reg1 reg2 imm -> XORIS (env reg1) (env reg2) imm
- EXTS sz reg1 reg2 -> EXTS sz (env reg1) (env reg2)
- NEG reg1 reg2 -> NEG (env reg1) (env reg2)
- NOT reg1 reg2 -> NOT (env reg1) (env reg2)
- SLW reg1 reg2 ri -> SLW (env reg1) (env reg2) (fixRI ri)
- SRW reg1 reg2 ri -> SRW (env reg1) (env reg2) (fixRI ri)
- SRAW reg1 reg2 ri -> SRAW (env reg1) (env reg2) (fixRI ri)
- RLWINM reg1 reg2 sh mb me
- -> RLWINM (env reg1) (env reg2) sh mb me
- FADD sz r1 r2 r3 -> FADD sz (env r1) (env r2) (env r3)
- FSUB sz r1 r2 r3 -> FSUB sz (env r1) (env r2) (env r3)
- FMUL sz r1 r2 r3 -> FMUL sz (env r1) (env r2) (env r3)
- FDIV sz r1 r2 r3 -> FDIV sz (env r1) (env r2) (env r3)
- FNEG r1 r2 -> FNEG (env r1) (env r2)
- FCMP r1 r2 -> FCMP (env r1) (env r2)
- FCTIWZ r1 r2 -> FCTIWZ (env r1) (env r2)
- FRSP r1 r2 -> FRSP (env r1) (env r2)
- MFCR reg -> MFCR (env reg)
- MFLR reg -> MFLR (env reg)
- FETCHPC reg -> FETCHPC (env reg)
- _ -> instr
- where
- fixAddr (AddrRegReg r1 r2) = AddrRegReg (env r1) (env r2)
- fixAddr (AddrRegImm r1 i) = AddrRegImm (env r1) i
-
- fixRI (RIReg r) = RIReg (env r)
- fixRI other = other
-
+import RegClass
+import Reg
+import Size
+import BlockId
+import Cmm
+import CLabel
-jumpDests :: Instr -> [BlockId] -> [BlockId]
-jumpDests insn acc
- = case insn of
- BCC _ id -> id : acc
- BCCFAR _ id -> id : acc
- BCTR targets -> targets ++ acc
- _ -> acc
-
-
--- | Check whether a particular instruction is a jump, branch or call instruction (jumpish)
--- We can't just use jumpDests above because the jump might take its arg,
--- so the instr won't contain a blockid.
---
-isJumpish :: Instr -> Bool
-isJumpish instr
- = case instr of
- BCC{} -> True
- BCCFAR{} -> True
- BCTR{} -> True
- BCTRL{} -> True
- BL{} -> True
- JMP{} -> True
- _ -> False
-
--- | Change the destination of this jump instruction
--- Used in joinToTargets in the linear allocator, when emitting fixup code
--- for join points.
-patchJump :: Instr -> BlockId -> BlockId -> Instr
-patchJump insn old new
- = case insn of
- BCC cc id
- | id == old -> BCC cc new
-
- BCCFAR cc id
- | id == old -> BCCFAR cc new
-
- BCTR _ -> error "Cannot patch BCTR"
+import Outputable
+import Unique
- _ -> insn
+mkVReg :: Unique -> Size -> Reg
+mkVReg u size
+ | not (isFloatSize size) = VirtualRegI u
+ | otherwise
+ = case size of
+ FF32 -> VirtualRegD u
+ FF64 -> VirtualRegD u
+ _ -> panic "mkVReg"
-isRegRegMove :: Instr -> Maybe (Reg,Reg)
-isRegRegMove (MR dst src) = Just (src,dst)
-isRegRegMove _ = Nothing
data JumpDest = DestBlockId BlockId | DestImm Imm
@@ -243,71 +56,39 @@ shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
shortcutJump _ other = other
+-- Here because it knows about JumpDest
+shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
+shortcutStatic fn (CmmStaticLit (CmmLabel lab))
+ | Just uq <- maybeAsmTemp lab
+ = CmmStaticLit (CmmLabel (shortBlockId fn (BlockId uq)))
--- -----------------------------------------------------------------------------
--- Generating spill instructions
-
-mkSpillInstr
- :: Reg -- register to spill
- -> Int -- current stack delta
- -> Int -- spill slot to use
- -> Instr
-mkSpillInstr reg delta slot
- = let off = spillSlotToOffset slot
- in
- let sz = case regClass reg of
- RcInteger -> II32
- RcDouble -> FF64
- RcFloat -> panic "PPC.RegInfo.mkSpillInstr: no match"
- in ST sz reg (AddrRegImm sp (ImmInt (off-delta)))
+shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off))
+ | Just uq <- maybeAsmTemp lbl1
+ = CmmStaticLit (CmmLabelDiffOff (shortBlockId fn (BlockId uq)) lbl2 off)
+ -- slightly dodgy, we're ignoring the second label, but this
+ -- works with the way we use CmmLabelDiffOff for jump tables now.
+shortcutStatic _ other_static
+ = other_static
-mkLoadInstr
- :: Reg -- register to load
- -> Int -- current stack delta
- -> Int -- spill slot to use
- -> Instr
-mkLoadInstr reg delta slot
- = let off = spillSlotToOffset slot
- in
- let sz = case regClass reg of
- RcInteger -> II32
- RcDouble -> FF64
- RcFloat -> panic "PPC.RegInfo.mkSpillInstr: no match"
- in LD sz reg (AddrRegImm sp (ImmInt (off-delta)))
+shortBlockId
+ :: (BlockId -> Maybe JumpDest)
+ -> BlockId
+ -> CLabel
+shortBlockId fn blockid@(BlockId uq) =
+ case fn blockid of
+ Nothing -> mkAsmTempLabel uq
+ Just (DestBlockId blockid') -> shortBlockId fn blockid'
+ Just (DestImm (ImmCLbl lbl)) -> lbl
+ _other -> panic "shortBlockId"
-mkRegRegMoveInstr
- :: Reg
- -> Reg
- -> Instr
-mkRegRegMoveInstr src dst
- = MR dst src
-mkBranchInstr
- :: BlockId
- -> [Instr]
-
-mkBranchInstr id = [BCC ALWAYS id]
-
-
-
-spillSlotSize :: Int
-spillSlotSize = 8
-
-maxSpillSlots :: Int
-maxSpillSlots = ((rESERVED_C_STACK_BYTES - 64) `div` spillSlotSize) - 1
-
--- convert a spill slot number to a *byte* offset, with no sign:
--- decide on a per arch basis whether you are spilling above or below
--- the C stack pointer.
-spillSlotToOffset :: Int -> Int
-spillSlotToOffset slot
- | slot >= 0 && slot < maxSpillSlots
- = 64 + spillSlotSize * slot
- | otherwise
- = pprPanic "spillSlotToOffset:"
- ( text "invalid spill location: " <> int slot
- $$ text "maxSpillSlots: " <> int maxSpillSlots)
+regDotColor :: Reg -> SDoc
+regDotColor reg
+ = case regClass reg of
+ RcInteger -> text "blue"
+ RcFloat -> text "red"
+ RcDouble -> text "green"
diff --git a/compiler/nativeGen/PPC/Regs.hs b/compiler/nativeGen/PPC/Regs.hs
index d6993b210b..80c68dd096 100644
--- a/compiler/nativeGen/PPC/Regs.hs
+++ b/compiler/nativeGen/PPC/Regs.hs
@@ -5,16 +5,6 @@
-- -----------------------------------------------------------------------------
module PPC.Regs (
- -- sizes
- Size(..),
- intSize,
- floatSize,
- isFloatSize,
- wordSize,
- cmmTypeSize,
- sizeToWidth,
- mkVReg,
-
-- immediates
Imm(..),
strImmLit,
@@ -42,7 +32,10 @@ module PPC.Regs (
-- horrow show
freeReg,
- globalRegMaybe
+ globalRegMaybe,
+ get_GlobalReg_reg_or_addr,
+ allocatableRegs
+
)
where
@@ -51,78 +44,22 @@ where
#include "HsVersions.h"
#include "../includes/MachRegs.h"
-import RegsBase
+import Reg
+import RegClass
+import CgUtils ( get_GlobalReg_addr )
import BlockId
import Cmm
import CLabel ( CLabel )
import Pretty
import Outputable ( Outputable(..), pprPanic, panic )
import qualified Outputable
-import Unique
import Constants
import FastBool
import Data.Word ( Word8, Word16, Word32 )
import Data.Int ( Int8, Int16, Int32 )
--- sizes -----------------------------------------------------------------------
--- For these three, the "size" also gives the int/float
--- distinction, because the instructions for int/float
--- differ only in their suffices
-data Size
- = II8 | II16 | II32 | II64 | FF32 | FF64 | FF80
- deriving Eq
-
-intSize, floatSize :: Width -> Size
-intSize W8 = II8
-intSize W16 = II16
-intSize W32 = II32
-intSize W64 = II64
-intSize other = pprPanic "MachInstrs.intSize" (ppr other)
-
-floatSize W32 = FF32
-floatSize W64 = FF64
-floatSize other = pprPanic "MachInstrs.intSize" (ppr other)
-
-
-isFloatSize :: Size -> Bool
-isFloatSize FF32 = True
-isFloatSize FF64 = True
-isFloatSize FF80 = True
-isFloatSize _ = False
-
-
-wordSize :: Size
-wordSize = intSize wordWidth
-
-
-cmmTypeSize :: CmmType -> Size
-cmmTypeSize ty
- | isFloatType ty = floatSize (typeWidth ty)
- | otherwise = intSize (typeWidth ty)
-
-
-sizeToWidth :: Size -> Width
-sizeToWidth II8 = W8
-sizeToWidth II16 = W16
-sizeToWidth II32 = W32
-sizeToWidth II64 = W64
-sizeToWidth FF32 = W32
-sizeToWidth FF64 = W64
-sizeToWidth _ = panic "MachInstrs.sizeToWidth"
-
-
-mkVReg :: Unique -> Size -> Reg
-mkVReg u size
- | not (isFloatSize size) = VirtualRegI u
- | otherwise
- = case size of
- FF32 -> VirtualRegD u
- FF64 -> VirtualRegD u
- _ -> panic "mkVReg"
-
-
-- immediates ------------------------------------------------------------------
data Imm
@@ -490,7 +427,7 @@ freeReg REG_Hp = fastBool False
#ifdef REG_HpLim
freeReg REG_HpLim = fastBool False
#endif
-freeReg n = fastBool True
+freeReg _ = fastBool True
-- | Returns 'Nothing' if this global register is not stored
@@ -582,3 +519,26 @@ freeReg _ = 0#
globalRegMaybe _ = panic "PPC.Regs.globalRegMaybe: not defined"
#endif /* powerpc_TARGET_ARCH */
+
+
+-- We map STG registers onto appropriate CmmExprs. Either they map
+-- to real machine registers or stored as offsets from BaseReg. Given
+-- a GlobalReg, get_GlobalReg_reg_or_addr produces either the real
+-- register it is in, on this platform, or a CmmExpr denoting the
+-- address in the register table holding it.
+-- (See also get_GlobalReg_addr in CgUtils.)
+
+get_GlobalReg_reg_or_addr :: GlobalReg -> Either Reg CmmExpr
+get_GlobalReg_reg_or_addr mid
+ = case globalRegMaybe mid of
+ Just rr -> Left rr
+ Nothing -> Right (get_GlobalReg_addr mid)
+
+
+-- allocatableRegs is allMachRegNos with the fixed-use regs removed.
+-- i.e., these are the regs for which we are prepared to allow the
+-- register allocator to attempt to map VRegs to.
+allocatableRegs :: [RegNo]
+allocatableRegs
+ = let isFree i = isFastTrue (freeReg i)
+ in filter isFree allMachRegNos
diff --git a/compiler/nativeGen/Platform.hs b/compiler/nativeGen/Platform.hs
new file mode 100644
index 0000000000..8b01f5cbb3
--- /dev/null
+++ b/compiler/nativeGen/Platform.hs
@@ -0,0 +1,92 @@
+
+-- | A description of the platform we're compiling for.
+-- Used by the native code generator.
+-- In the future, this module should be the only one that references
+-- the evil #defines for each TARGET_ARCH and TARGET_OS
+--
+module Platform (
+ Platform(..),
+ Arch(..),
+ OS(..),
+
+ defaultTargetPlatform
+)
+
+where
+
+#include "HsVersions.h"
+
+
+-- | Contains enough information for the native code generator to emit
+-- code for this platform.
+data Platform
+ = Platform
+ { platformArch :: Arch
+ , platformOS :: OS }
+
+
+-- | Architectures that the native code generator knows about.
+-- TODO: It might be nice to extend these constructors with information
+-- about what instruction set extensions an architecture might support.
+--
+data Arch
+ = ArchAlpha
+ | ArchX86
+ | ArchX86_64
+ | ArchPPC
+ | ArchPPC_64
+ | ArchSPARC
+ deriving (Show, Eq)
+
+
+-- | Operating systems that the native code generator knows about.
+-- Having OSUnknown should produce a sensible default, but no promises.
+data OS
+ = OSUnknown
+ | OSLinux
+ | OSDarwin
+ | OSSolaris
+ | OSMinGW32
+ deriving (Show, Eq)
+
+
+-- | This is the target platform as far as the #ifdefs are concerned.
+-- These are set in includes/ghcplatform.h by the autoconf scripts
+defaultTargetPlatform :: Platform
+defaultTargetPlatform
+ = Platform defaultTargetArch defaultTargetOS
+
+
+-- | Move the evil TARGET_ARCH #ifdefs into Haskell land.
+defaultTargetArch :: Arch
+#if alpha_TARGET_ARCH
+defaultTargetArch = ArchAlpha
+#elif i386_TARGET_ARCH
+defaultTargetArch = ArchX86
+#elif x86_64_TARGET_ARCH
+defaultTargetArch = ArchX86_64
+#elif powerpc_TARGET_ARCH
+defaultTargetArch = ArchPPC
+#elif powerpc64_TARGET_ARCH
+defaultTargetArch = ArchPPC_64
+#elif sparc_TARGET_ARCH
+defaultTargetArch = ArchSPARC
+#else
+#error "Platform.buildArch: undefined"
+#endif
+
+
+-- | Move the evil TARGET_OS #ifdefs into Haskell land.
+defaultTargetOS :: OS
+#if linux_TARGET_OS
+defaultTargetOS = OSLinux
+#elif darwin_TARGET_OS
+defaultTargetOS = OSDarwin
+#elif solaris_TARGET_OS
+defaultTargetOS = OSSolaris
+#elif mingw32_TARGET_OS
+defaultTargetOS = OSMinGW32
+#else
+defaultTargetOS = OSUnknown
+#endif
+
diff --git a/compiler/nativeGen/PprMach.hs b/compiler/nativeGen/PprMach.hs
deleted file mode 100644
index 532d852752..0000000000
--- a/compiler/nativeGen/PprMach.hs
+++ /dev/null
@@ -1,183 +0,0 @@
-{-# OPTIONS -w #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
--- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
-
------------------------------------------------------------------------------
---
--- Pretty-printing assembly language
---
--- (c) The University of Glasgow 1993-2005
---
------------------------------------------------------------------------------
-
--- We start with the @pprXXX@s with some cross-platform commonality
--- (e.g., 'pprReg'); we conclude with the no-commonality monster,
--- 'pprInstr'.
-
-#include "nativeGen/NCG.h"
-
-module PprMach (
- pprNatCmmTop, pprBasicBlock, pprSectionHeader, pprData,
- pprInstr, pprSize, pprUserReg, pprImm
- ) where
-
-#include "HsVersions.h"
-
-import PprBase
-
-import BlockId
-import Cmm
-import Regs -- may differ per-platform
-import Instrs
-import Regs
-
-import CLabel ( CLabel, pprCLabel, externallyVisibleCLabel,
- labelDynamic, mkAsmTempLabel, entryLblToInfoLbl )
-#if HAVE_SUBSECTIONS_VIA_SYMBOLS
-import CLabel ( mkDeadStripPreventer )
-#endif
-
-import Panic ( panic )
-import Unique ( pprUnique )
-import Pretty
-import FastString
-import qualified Outputable
-import Outputable ( Outputable, pprPanic, ppr, docToSDoc)
-
-import Data.Array.ST
-import Data.Word ( Word8 )
-import Control.Monad.ST
-import Data.Char ( chr, ord )
-import Data.Maybe ( isJust )
-
-
-#if alpha_TARGET_ARCH
-import Alpha.Ppr
-#elif powerpc_TARGET_ARCH
-import PPC.Ppr
-#elif i386_TARGET_ARCH || x86_64_TARGET_ARCH
-import X86.Ppr
-#elif sparc_TARGET_ARCH
-import SPARC.Ppr
-#else
-#error "Regs: not defined for this architecture"
-#endif
-
-
-
--- -----------------------------------------------------------------------------
--- Printing this stuff out
-
-pprNatCmmTop :: NatCmmTop -> Doc
-pprNatCmmTop (CmmData section dats) =
- pprSectionHeader section $$ vcat (map pprData dats)
-
- -- special case for split markers:
-pprNatCmmTop (CmmProc [] lbl _ (ListGraph [])) = pprLabel lbl
-
-pprNatCmmTop (CmmProc info lbl params (ListGraph blocks)) =
- pprSectionHeader Text $$
- (if null info then -- blocks guaranteed not null, so label needed
- pprLabel lbl
- else
-#if HAVE_SUBSECTIONS_VIA_SYMBOLS
- pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
- <> char ':' $$
-#endif
- vcat (map pprData info) $$
- pprLabel (entryLblToInfoLbl lbl)
- ) $$
- vcat (map pprBasicBlock blocks)
- -- above: Even the first block gets a label, because with branch-chain
- -- elimination, it might be the target of a goto.
-#if HAVE_SUBSECTIONS_VIA_SYMBOLS
- -- If we are using the .subsections_via_symbols directive
- -- (available on recent versions of Darwin),
- -- we have to make sure that there is some kind of reference
- -- from the entry code to a label on the _top_ of of the info table,
- -- so that the linker will not think it is unreferenced and dead-strip
- -- it. That's why the label is called a DeadStripPreventer (_dsp).
- $$ if not (null info)
- then text "\t.long "
- <+> pprCLabel_asm (entryLblToInfoLbl lbl)
- <+> char '-'
- <+> pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
- else empty
-#endif
-
-
-pprBasicBlock :: NatBasicBlock -> Doc
-pprBasicBlock (BasicBlock (BlockId id) instrs) =
- pprLabel (mkAsmTempLabel id) $$
- vcat (map pprInstr instrs)
-
-
-pprData :: CmmStatic -> Doc
-pprData (CmmAlign bytes) = pprAlign bytes
-pprData (CmmDataLabel lbl) = pprLabel lbl
-pprData (CmmString str) = pprASCII str
-pprData (CmmUninitialised bytes) = ptext (sLit s) <> int bytes
- where s =
-#if defined(solaris2_TARGET_OS)
- ".skip "
-#else
- ".space "
-#endif
-pprData (CmmStaticLit lit) = pprDataItem lit
-
-pprGloblDecl :: CLabel -> Doc
-pprGloblDecl lbl
- | not (externallyVisibleCLabel lbl) = empty
- | otherwise = ptext IF_ARCH_sparc((sLit ".global "),
- (sLit ".globl ")) <>
- pprCLabel_asm lbl
-
-pprTypeAndSizeDecl :: CLabel -> Doc
-pprTypeAndSizeDecl lbl
-#if linux_TARGET_OS
- | not (externallyVisibleCLabel lbl) = empty
- | otherwise = ptext (sLit ".type ") <>
- pprCLabel_asm lbl <> ptext (sLit ", @object")
-#else
- = empty
-#endif
-
-pprLabel :: CLabel -> Doc
-pprLabel lbl = pprGloblDecl lbl $$ pprTypeAndSizeDecl lbl $$ (pprCLabel_asm lbl <> char ':')
-
-
-pprASCII str
- = vcat (map do1 str) $$ do1 0
- where
- do1 :: Word8 -> Doc
- do1 w = ptext (sLit "\t.byte\t") <> int (fromIntegral w)
-
-pprAlign bytes =
- IF_ARCH_alpha(ptext (sLit ".align ") <> int pow2,
- IF_ARCH_i386(ptext (sLit ".align ") <> int IF_OS_darwin(pow2,bytes),
- IF_ARCH_x86_64(ptext (sLit ".align ") <> int IF_OS_darwin(pow2,bytes),
- IF_ARCH_sparc(ptext (sLit ".align ") <> int bytes,
- IF_ARCH_powerpc(ptext (sLit ".align ") <> int pow2,)))))
- where
- pow2 = log2 bytes
-
- log2 :: Int -> Int -- cache the common ones
- log2 1 = 0
- log2 2 = 1
- log2 4 = 2
- log2 8 = 3
- log2 n = 1 + log2 (n `quot` 2)
-
-
--- -----------------------------------------------------------------------------
--- pprInstr: print an 'Instr'
-
-instance Outputable Instr where
- ppr instr = Outputable.docToSDoc $ pprInstr instr
-
-
-
-
diff --git a/compiler/nativeGen/Reg.hs b/compiler/nativeGen/Reg.hs
new file mode 100644
index 0000000000..1a341bbdda
--- /dev/null
+++ b/compiler/nativeGen/Reg.hs
@@ -0,0 +1,113 @@
+
+-- | An architecture independent description of a register.
+-- This needs to stay architecture independent because it is used
+-- by NCGMonad and the register allocators, which are shared
+-- by all architectures.
+--
+module Reg (
+ RegNo,
+ Reg(..),
+ isRealReg,
+ unRealReg,
+ isVirtualReg,
+ renameVirtualReg,
+ getHiVRegFromLo
+)
+
+where
+
+import Outputable
+import Unique
+import Panic
+
+-- | An identifier for a real machine register.
+type RegNo
+ = Int
+
+-- RealRegs are machine regs which are available for allocation, in
+-- the usual way. We know what class they are, because that's part of
+-- the processor's architecture.
+
+-- VirtualRegs are virtual registers. The register allocator will
+-- eventually have to map them into RealRegs, or into spill slots.
+--
+-- VirtualRegs are allocated on the fly, usually to represent a single
+-- value in the abstract assembly code (i.e. dynamic registers are
+-- usually single assignment).
+--
+-- With the new register allocator, the
+-- single assignment restriction isn't necessary to get correct code,
+-- although a better register allocation will result if single
+-- assignment is used -- because the allocator maps a VirtualReg into
+-- a single RealReg, even if the VirtualReg has multiple live ranges.
+
+-- Virtual regs can be of either class, so that info is attached.
+data Reg
+ = RealReg {-# UNPACK #-} !RegNo
+ | VirtualRegI {-# UNPACK #-} !Unique
+ | VirtualRegHi {-# UNPACK #-} !Unique -- High part of 2-word register
+ | VirtualRegF {-# UNPACK #-} !Unique
+ | VirtualRegD {-# UNPACK #-} !Unique
+ deriving (Eq, Ord)
+
+
+-- We like to have Uniques for Reg so that we can make UniqFM and UniqSets
+-- in the register allocator.
+instance Uniquable Reg where
+ getUnique (RealReg i) = mkUnique 'C' i
+ getUnique (VirtualRegI u) = u
+ getUnique (VirtualRegHi u) = u
+ getUnique (VirtualRegF u) = u
+ getUnique (VirtualRegD u) = u
+
+
+-- | Print a reg in a generic manner
+-- If you want the architecture specific names, then use the pprReg
+-- function from the appropriate Ppr module.
+instance Outputable Reg where
+ ppr reg
+ = case reg of
+ RealReg i -> text "%r" <> int i
+ VirtualRegI u -> text "%vI_" <> pprUnique u
+ VirtualRegHi u -> text "%vHi_" <> pprUnique u
+ VirtualRegF u -> text "%vF_" <> pprUnique u
+ VirtualRegD u -> text "%vD_" <> pprUnique u
+
+
+
+isRealReg :: Reg -> Bool
+isRealReg = not . isVirtualReg
+
+-- | Take the RegNo from a real reg
+unRealReg :: Reg -> RegNo
+unRealReg (RealReg i) = i
+unRealReg _ = panic "unRealReg on VirtualReg"
+
+isVirtualReg :: Reg -> Bool
+isVirtualReg (RealReg _) = False
+isVirtualReg (VirtualRegI _) = True
+isVirtualReg (VirtualRegHi _) = True
+isVirtualReg (VirtualRegF _) = True
+isVirtualReg (VirtualRegD _) = True
+
+
+renameVirtualReg :: Unique -> Reg -> Reg
+renameVirtualReg u r
+ = case r of
+ RealReg _ -> error "renameVirtualReg: can't change unique on a real reg"
+ VirtualRegI _ -> VirtualRegI u
+ VirtualRegHi _ -> VirtualRegHi u
+ VirtualRegF _ -> VirtualRegF u
+ VirtualRegD _ -> VirtualRegD u
+
+-- Determine the upper-half vreg for a 64-bit quantity on a 32-bit platform
+-- when supplied with the vreg for the lower-half of the quantity.
+-- (NB. Not reversible).
+getHiVRegFromLo :: Reg -> Reg
+getHiVRegFromLo (VirtualRegI u)
+ = VirtualRegHi (newTagUnique u 'H') -- makes a pseudo-unique with tag 'H'
+
+getHiVRegFromLo _
+ = panic "RegsBase.getHiVRegFromLo"
+
+
diff --git a/compiler/nativeGen/RegAlloc/Graph/Coalesce.hs b/compiler/nativeGen/RegAlloc/Graph/Coalesce.hs
index 18e4b0edd1..8521e92601 100644
--- a/compiler/nativeGen/RegAlloc/Graph/Coalesce.hs
+++ b/compiler/nativeGen/RegAlloc/Graph/Coalesce.hs
@@ -8,11 +8,11 @@ module RegAlloc.Graph.Coalesce (
where
-import Cmm
-import Regs
import RegAlloc.Liveness
-import RegAllocInfo
+import Instruction
+import Reg
+import Cmm
import Bag
import UniqFM
import UniqSet
@@ -26,7 +26,11 @@ import Data.List
-- then the mov only serves to join live ranges. The two regs can be renamed to be
-- the same and the move instruction safely erased.
-regCoalesce :: [LiveCmmTop] -> UniqSM [LiveCmmTop]
+regCoalesce
+ :: Instruction instr
+ => [LiveCmmTop instr]
+ -> UniqSM [LiveCmmTop instr]
+
regCoalesce code
= do
let joins = foldl' unionBags emptyBag
@@ -57,7 +61,11 @@ sinkReg fm r
-- During a mov, if the source reg dies and the destiation reg is born
-- then we can rename the two regs to the same thing and eliminate the move.
--
-slurpJoinMovs :: LiveCmmTop -> Bag (Reg, Reg)
+slurpJoinMovs
+ :: Instruction instr
+ => LiveCmmTop instr
+ -> Bag (Reg, Reg)
+
slurpJoinMovs live
= slurpCmm emptyBag live
where
@@ -68,7 +76,7 @@ slurpJoinMovs live
slurpLI rs (Instr _ Nothing) = rs
slurpLI rs (Instr instr (Just live))
- | Just (r1, r2) <- isRegRegMove instr
+ | Just (r1, r2) <- takeRegRegMoveInstr instr
, elementOfUniqSet r1 $ liveDieRead live
, elementOfUniqSet r2 $ liveBorn live
@@ -80,4 +88,7 @@ slurpJoinMovs live
| otherwise
= rs
+ slurpLI rs SPILL{} = rs
+ slurpLI rs RELOAD{} = rs
+
diff --git a/compiler/nativeGen/RegAlloc/Graph/Main.hs b/compiler/nativeGen/RegAlloc/Graph/Main.hs
index fe99aba120..2e584617e9 100644
--- a/compiler/nativeGen/RegAlloc/Graph/Main.hs
+++ b/compiler/nativeGen/RegAlloc/Graph/Main.hs
@@ -5,8 +5,7 @@
--
module RegAlloc.Graph.Main (
- regAlloc,
- regDotColor
+ regAlloc
)
where
@@ -17,9 +16,12 @@ import RegAlloc.Graph.Spill
import RegAlloc.Graph.SpillClean
import RegAlloc.Graph.SpillCost
import RegAlloc.Graph.Stats
-import Regs
-import Instrs
-import PprMach
+import RegAlloc.Graph.TrivColorable
+import Instruction
+import TargetReg
+import RegClass
+import Reg
+
import UniqSupply
import UniqSet
@@ -43,18 +45,26 @@ maxSpinCount = 10
-- | The top level of the graph coloring register allocator.
--
regAlloc
- :: DynFlags
+ :: (Outputable instr, Instruction instr)
+ => DynFlags
-> UniqFM (UniqSet Reg) -- ^ the registers we can use for allocation
-> UniqSet Int -- ^ the set of available spill slots.
- -> [LiveCmmTop] -- ^ code annotated with liveness information.
- -> UniqSM ( [NatCmmTop], [RegAllocStats] )
+ -> [LiveCmmTop instr] -- ^ code annotated with liveness information.
+ -> UniqSM ( [NatCmmTop instr], [RegAllocStats instr] )
-- ^ code with registers allocated and stats for each stage of
-- allocation
regAlloc dflags regsFree slotsFree code
= do
+ -- TODO: the regClass function is currently hard coded to the default target
+ -- architecture. Would prefer to determine this from dflags.
+ -- There are other uses of targetRegClass later in this module.
+ let triv = trivColorable targetRegClass
+
(code_final, debug_codeGraphs, _)
- <- regAlloc_spin dflags 0 trivColorable regsFree slotsFree [] code
+ <- regAlloc_spin dflags 0
+ triv
+ regsFree slotsFree [] code
return ( code_final
, reverse debug_codeGraphs )
@@ -74,7 +84,7 @@ regAlloc_spin dflags spinCount triv regsFree slotsFree debug_codeGraphs code
$ pprPanic "regAlloc_spin: max build/spill cycle count exceeded."
( text "It looks like the register allocator is stuck in an infinite loop."
$$ text "max cycles = " <> int maxSpinCount
- $$ text "regsFree = " <> (hcat $ punctuate space $ map (docToSDoc . pprUserReg)
+ $$ text "regsFree = " <> (hcat $ punctuate space $ map ppr
$ uniqSetToList $ unionManyUniqSets $ eltsUFM regsFree)
$$ text "slotsFree = " <> ppr (sizeUniqSet slotsFree))
@@ -139,12 +149,12 @@ regAlloc_spin dflags spinCount triv regsFree slotsFree debug_codeGraphs code
-- clean out unneeded SPILL/RELOADs
let code_spillclean = map cleanSpills code_patched
- -- strip off liveness information
- let code_nat = map stripLive code_spillclean
+ -- strip off liveness information,
+ -- and rewrite SPILL/RELOAD pseudos into real instructions along the way
+ let code_final = map stripLive code_spillclean
- -- rewrite SPILL/RELOAD pseudos into real instructions
- let spillNatTop = mapGenBlockTop spillNatBlock
- let code_final = map spillNatTop code_nat
+-- let spillNatTop = mapGenBlockTop spillNatBlock
+-- let code_final = map spillNatTop code_nat
-- record what happened in this stage for debugging
let stat =
@@ -213,7 +223,8 @@ regAlloc_spin dflags spinCount triv regsFree slotsFree debug_codeGraphs code
-- | Build a graph from the liveness and coalesce information in this code.
buildGraph
- :: [LiveCmmTop]
+ :: Instruction instr
+ => [LiveCmmTop instr]
-> UniqSM (Color.Graph Reg RegClass Reg)
buildGraph code
@@ -248,8 +259,8 @@ graphAddConflictSet set graph
= let reals = filterUFM isRealReg set
virtuals = filterUFM (not . isRealReg) set
- graph1 = Color.addConflicts virtuals regClass graph
- graph2 = foldr (\(r1, r2) -> Color.addExclusion r1 regClass r2)
+ graph1 = Color.addConflicts virtuals targetRegClass graph
+ graph2 = foldr (\(r1, r2) -> Color.addExclusion r1 targetRegClass r2)
graph1
[ (a, b)
| a <- uniqSetToList virtuals
@@ -276,13 +287,14 @@ graphAddCoalesce (r1, r2) graph
| otherwise
= Color.addCoalesce (regWithClass r1) (regWithClass r2) graph
- where regWithClass r = (r, regClass r)
+ where regWithClass r = (r, targetRegClass r)
-- | Patch registers in code using the reg -> reg mapping in this graph.
patchRegsFromGraph
- :: Color.Graph Reg RegClass Reg
- -> LiveCmmTop -> LiveCmmTop
+ :: (Outputable instr, Instruction instr)
+ => Color.Graph Reg RegClass Reg
+ -> LiveCmmTop instr -> LiveCmmTop instr
patchRegsFromGraph graph code
= let
@@ -303,7 +315,7 @@ patchRegsFromGraph graph code
= pprPanic "patchRegsFromGraph: register mapping failed."
( text "There is no node in the graph for register " <> ppr reg
$$ ppr code
- $$ Color.dotGraph (\_ -> text "white") trivColorable graph)
+ $$ Color.dotGraph (\_ -> text "white") (trivColorable targetRegClass) graph)
in patchEraseLive patchF code
diff --git a/compiler/nativeGen/RegAlloc/Graph/Spill.hs b/compiler/nativeGen/RegAlloc/Graph/Spill.hs
index b5a645188f..e6e5622a02 100644
--- a/compiler/nativeGen/RegAlloc/Graph/Spill.hs
+++ b/compiler/nativeGen/RegAlloc/Graph/Spill.hs
@@ -10,9 +10,8 @@ module RegAlloc.Graph.Spill (
where
import RegAlloc.Liveness
-import RegAllocInfo
-import Regs
-import Instrs
+import Instruction
+import Reg
import Cmm
import State
@@ -35,11 +34,12 @@ import Data.Maybe
-- address the spill slot directly.
--
regSpill
- :: [LiveCmmTop] -- ^ the code
+ :: Instruction instr
+ => [LiveCmmTop instr] -- ^ the code
-> UniqSet Int -- ^ available stack slots
-> UniqSet Reg -- ^ the regs to spill
-> UniqSM
- ([LiveCmmTop] -- code will spill instructions
+ ([LiveCmmTop instr] -- code will spill instructions
, UniqSet Int -- left over slots
, SpillStats ) -- stats about what happened during spilling
@@ -75,6 +75,20 @@ regSpill_block regSlotMap (BasicBlock i instrs)
= do instrss' <- mapM (regSpill_instr regSlotMap) instrs
return $ BasicBlock i (concat instrss')
+
+regSpill_instr
+ :: Instruction instr
+ => UniqFM Int
+ -> LiveInstr instr -> SpillM [LiveInstr instr]
+
+-- | The thing we're spilling shouldn't already have spill or reloads in it
+regSpill_instr _ SPILL{}
+ = panic "regSpill_instr: unexpected SPILL"
+
+regSpill_instr _ RELOAD{}
+ = panic "regSpill_instr: unexpected RELOAD"
+
+
regSpill_instr _ li@(Instr _ Nothing)
= do return [li]
@@ -82,7 +96,7 @@ regSpill_instr regSlotMap
(Instr instr (Just _))
= do
-- work out which regs are read and written in this instr
- let RU rlRead rlWritten = regUsage instr
+ let RU rlRead rlWritten = regUsageOfInstr instr
-- sometimes a register is listed as being read more than once,
-- nub this so we don't end up inserting two lots of spill code.
@@ -109,9 +123,9 @@ regSpill_instr regSlotMap
let postfixes = concat mPostfixes
-- final code
- let instrs' = map (\i -> Instr i Nothing) prefixes
- ++ [ Instr instr3 Nothing ]
- ++ map (\i -> Instr i Nothing) postfixes
+ let instrs' = prefixes
+ ++ [Instr instr3 Nothing]
+ ++ postfixes
return
{- $ pprTrace "* regSpill_instr spill"
@@ -139,6 +153,7 @@ spillRead regSlotMap instr reg
| otherwise = panic "RegSpill.spillRead: no slot defined for spilled reg"
+
spillWrite regSlotMap instr reg
| Just slot <- lookupUFM regSlotMap reg
= do (instr', nReg) <- patchInstr reg instr
@@ -152,6 +167,7 @@ spillWrite regSlotMap instr reg
| otherwise = panic "RegSpill.spillWrite: no slot defined for spilled reg"
+
spillModify regSlotMap instr reg
| Just slot <- lookupUFM regSlotMap reg
= do (instr', nReg) <- patchInstr reg instr
@@ -168,19 +184,25 @@ spillModify regSlotMap instr reg
-- | rewrite uses of this virtual reg in an instr to use a different virtual reg
-patchInstr :: Reg -> Instr -> SpillM (Instr, Reg)
+patchInstr
+ :: Instruction instr
+ => Reg -> instr -> SpillM (instr, Reg)
+
patchInstr reg instr
= do nUnique <- newUnique
let nReg = renameVirtualReg nUnique reg
let instr' = patchReg1 reg nReg instr
return (instr', nReg)
-patchReg1 :: Reg -> Reg -> Instr -> Instr
+patchReg1
+ :: Instruction instr
+ => Reg -> Reg -> instr -> instr
+
patchReg1 old new instr
= let patchF r
| r == old = new
| otherwise = r
- in patchRegs instr patchF
+ in patchRegsOfInstr instr patchF
------------------------------------------------------
diff --git a/compiler/nativeGen/RegAlloc/Graph/SpillClean.hs b/compiler/nativeGen/RegAlloc/Graph/SpillClean.hs
index b68648bdaf..4f129c468a 100644
--- a/compiler/nativeGen/RegAlloc/Graph/SpillClean.hs
+++ b/compiler/nativeGen/RegAlloc/Graph/SpillClean.hs
@@ -29,13 +29,12 @@ module RegAlloc.Graph.SpillClean (
)
where
-import BlockId
import RegAlloc.Liveness
-import RegAllocInfo
-import Regs
-import Instrs
-import Cmm
+import Instruction
+import Reg
+import BlockId
+import Cmm
import UniqSet
import UniqFM
import Unique
@@ -51,12 +50,19 @@ type Slot = Int
-- | Clean out unneeded spill\/reloads from this top level thing.
-cleanSpills :: LiveCmmTop -> LiveCmmTop
+cleanSpills
+ :: Instruction instr
+ => LiveCmmTop instr -> LiveCmmTop instr
+
cleanSpills cmm
= evalState (cleanSpin 0 cmm) initCleanS
-- | do one pass of cleaning
-cleanSpin :: Int -> LiveCmmTop -> CleanM LiveCmmTop
+cleanSpin
+ :: Instruction instr
+ => Int
+ -> LiveCmmTop instr
+ -> CleanM (LiveCmmTop instr)
{-
cleanSpin spinCount code
@@ -103,7 +109,11 @@ cleanSpin spinCount code
-- | Clean one basic block
-cleanBlockForward :: LiveBasicBlock -> CleanM LiveBasicBlock
+cleanBlockForward
+ :: Instruction instr
+ => LiveBasicBlock instr
+ -> CleanM (LiveBasicBlock instr)
+
cleanBlockForward (BasicBlock blockId instrs)
= do
-- see if we have a valid association for the entry to this block
@@ -116,7 +126,11 @@ cleanBlockForward (BasicBlock blockId instrs)
return $ BasicBlock blockId instrs_reload
-cleanBlockBackward :: LiveBasicBlock -> CleanM LiveBasicBlock
+cleanBlockBackward
+ :: Instruction instr
+ => LiveBasicBlock instr
+ -> CleanM (LiveBasicBlock instr)
+
cleanBlockBackward (BasicBlock blockId instrs)
= do instrs_spill <- cleanBackward emptyUniqSet [] instrs
return $ BasicBlock blockId instrs_spill
@@ -130,11 +144,12 @@ cleanBlockBackward (BasicBlock blockId instrs)
-- then we don't need to do the reload.
--
cleanForward
- :: BlockId -- ^ the block that we're currently in
- -> Assoc Store -- ^ two store locations are associated if they have the same value
- -> [LiveInstr] -- ^ acc
- -> [LiveInstr] -- ^ instrs to clean (in backwards order)
- -> CleanM [LiveInstr] -- ^ cleaned instrs (in forward order)
+ :: Instruction instr
+ => BlockId -- ^ the block that we're currently in
+ -> Assoc Store -- ^ two store locations are associated if they have the same value
+ -> [LiveInstr instr] -- ^ acc
+ -> [LiveInstr instr] -- ^ instrs to clean (in backwards order)
+ -> CleanM [LiveInstr instr] -- ^ cleaned instrs (in forward order)
cleanForward _ _ acc []
= return acc
@@ -142,19 +157,19 @@ cleanForward _ _ acc []
-- write out live range joins via spill slots to just a spill and a reg-reg move
-- hopefully the spill will be also be cleaned in the next pass
--
-cleanForward blockId assoc acc (Instr i1 live1 : Instr i2 _ : instrs)
+cleanForward blockId assoc acc (li1 : li2 : instrs)
- | SPILL reg1 slot1 <- i1
- , RELOAD slot2 reg2 <- i2
+ | SPILL reg1 slot1 <- li1
+ , RELOAD slot2 reg2 <- li2
, slot1 == slot2
= do
modify $ \s -> s { sCleanedReloadsAcc = sCleanedReloadsAcc s + 1 }
cleanForward blockId assoc acc
- (Instr i1 live1 : Instr (mkRegRegMoveInstr reg1 reg2) Nothing : instrs)
+ (li1 : Instr (mkRegRegMoveInstr reg1 reg2) Nothing : instrs)
cleanForward blockId assoc acc (li@(Instr i1 _) : instrs)
- | Just (r1, r2) <- isRegRegMove i1
+ | Just (r1, r2) <- takeRegRegMoveInstr i1
= if r1 == r2
-- erase any left over nop reg reg moves while we're here
-- this will also catch any nop moves that the "write out live range joins" case above
@@ -170,38 +185,50 @@ cleanForward blockId assoc acc (li@(Instr i1 _) : instrs)
cleanForward blockId assoc' (li : acc) instrs
-cleanForward blockId assoc acc (li@(Instr instr _) : instrs)
+cleanForward blockId assoc acc (li : instrs)
-- update association due to the spill
- | SPILL reg slot <- instr
+ | SPILL reg slot <- li
= let assoc' = addAssoc (SReg reg) (SSlot slot)
$ delAssoc (SSlot slot)
$ assoc
in cleanForward blockId assoc' (li : acc) instrs
-- clean a reload instr
- | RELOAD{} <- instr
+ | RELOAD{} <- li
= do (assoc', mli) <- cleanReload blockId assoc li
case mli of
Nothing -> cleanForward blockId assoc' acc instrs
Just li' -> cleanForward blockId assoc' (li' : acc) instrs
-- remember the association over a jump
- | targets <- jumpDests instr []
+ | Instr instr _ <- li
+ , targets <- jumpDestsOfInstr instr
, not $ null targets
= do mapM_ (accJumpValid assoc) targets
cleanForward blockId assoc (li : acc) instrs
-- writing to a reg changes its value.
- | RU _ written <- regUsage instr
+ | Instr instr _ <- li
+ , RU _ written <- regUsageOfInstr instr
= let assoc' = foldr delAssoc assoc (map SReg $ nub written)
in cleanForward blockId assoc' (li : acc) instrs
+-- bogus, to stop pattern match warning
+cleanForward _ _ _ _
+ = panic "RegAlloc.Graph.SpillClean.cleanForward: no match"
+
-- | Try and rewrite a reload instruction to something more pleasing
--
-cleanReload :: BlockId -> Assoc Store -> LiveInstr -> CleanM (Assoc Store, Maybe LiveInstr)
-cleanReload blockId assoc li@(Instr (RELOAD slot reg) _)
+cleanReload
+ :: Instruction instr
+ => BlockId
+ -> Assoc Store
+ -> LiveInstr instr
+ -> CleanM (Assoc Store, Maybe (LiveInstr instr))
+
+cleanReload blockId assoc li@(RELOAD slot reg)
-- if the reg we're reloading already has the same value as the slot
-- then we can erase the instruction outright
@@ -264,10 +291,10 @@ cleanReload _ _ _
-- we should really be updating the noReloads set as we cross jumps also.
--
cleanBackward
- :: UniqSet Int -- ^ slots that have been spilled, but not reloaded from
- -> [LiveInstr] -- ^ acc
- -> [LiveInstr] -- ^ instrs to clean (in forwards order)
- -> CleanM [LiveInstr] -- ^ cleaned instrs (in backwards order)
+ :: UniqSet Int -- ^ slots that have been spilled, but not reloaded from
+ -> [LiveInstr instr] -- ^ acc
+ -> [LiveInstr instr] -- ^ instrs to clean (in forwards order)
+ -> CleanM [LiveInstr instr] -- ^ cleaned instrs (in backwards order)
cleanBackward noReloads acc lis
@@ -277,15 +304,15 @@ cleanBackward noReloads acc lis
cleanBackward' _ _ acc []
= return acc
-cleanBackward' reloadedBy noReloads acc (li@(Instr instr _) : instrs)
+cleanBackward' reloadedBy noReloads acc (li : instrs)
-- if nothing ever reloads from this slot then we don't need the spill
- | SPILL _ slot <- instr
+ | SPILL _ slot <- li
, Nothing <- lookupUFM reloadedBy (SSlot slot)
= do modify $ \s -> s { sCleanedSpillsAcc = sCleanedSpillsAcc s + 1 }
cleanBackward noReloads acc instrs
- | SPILL _ slot <- instr
+ | SPILL _ slot <- li
= if elementOfUniqSet slot noReloads
-- we can erase this spill because the slot won't be read until after the next one
@@ -299,7 +326,7 @@ cleanBackward' reloadedBy noReloads acc (li@(Instr instr _) : instrs)
cleanBackward noReloads' (li : acc) instrs
-- if we reload from a slot then it's no longer unused
- | RELOAD slot _ <- instr
+ | RELOAD slot _ <- li
, noReloads' <- delOneFromUniqSet noReloads slot
= cleanBackward noReloads' (li : acc) instrs
diff --git a/compiler/nativeGen/RegAlloc/Graph/SpillCost.hs b/compiler/nativeGen/RegAlloc/Graph/SpillCost.hs
index 1d37cf71d6..d4dd75a4b7 100644
--- a/compiler/nativeGen/RegAlloc/Graph/SpillCost.hs
+++ b/compiler/nativeGen/RegAlloc/Graph/SpillCost.hs
@@ -16,14 +16,16 @@ module RegAlloc.Graph.SpillCost (
where
-import GraphBase
import RegAlloc.Liveness
-import RegAllocInfo
-import Instrs
-import Regs
+import Instruction
+import RegClass
+import Reg
+
+import GraphBase
+
+
import BlockId
import Cmm
-
import UniqFM
import UniqSet
import Outputable
@@ -62,7 +64,8 @@ plusSpillCostRecord (r1, a1, b1, c1) (r2, a2, b2, c2)
-- and the number of instructions it was live on entry to (lifetime)
--
slurpSpillCostInfo
- :: LiveCmmTop
+ :: (Outputable instr, Instruction instr)
+ => LiveCmmTop instr
-> SpillCostInfo
slurpSpillCostInfo cmm
@@ -89,11 +92,14 @@ slurpSpillCostInfo cmm
= return ()
-- skip over comment and delta pseudo instrs
- countLIs rsLive (Instr instr Nothing : lis)
- | COMMENT{} <- instr
+ countLIs rsLive (SPILL{} : lis)
+ = countLIs rsLive lis
+
+ countLIs rsLive (RELOAD{} : lis)
= countLIs rsLive lis
- | DELTA{} <- instr
+ countLIs rsLive (Instr instr Nothing : lis)
+ | isMetaInstr instr
= countLIs rsLive lis
| otherwise
@@ -106,7 +112,7 @@ slurpSpillCostInfo cmm
mapM_ incLifetime $ uniqSetToList rsLiveEntry
-- increment counts for what regs were read/written from
- let (RU read written) = regUsage instr
+ let (RU read written) = regUsageOfInstr instr
mapM_ incUses $ filter (not . isRealReg) $ nub read
mapM_ incDefs $ filter (not . isRealReg) $ nub written
@@ -226,8 +232,11 @@ lifeMapFromSpillCostInfo info
-- | Work out the degree (number of neighbors) of this node which have the same class.
-nodeDegree :: Graph Reg RegClass Reg -> Reg -> Int
-nodeDegree graph reg
+nodeDegree
+ :: (Reg -> RegClass)
+ -> Graph Reg RegClass Reg -> Reg -> Int
+
+nodeDegree regClass graph reg
| Just node <- lookupUFM (graphMap graph) reg
, virtConflicts <- length $ filter (\r -> regClass r == regClass reg)
$ uniqSetToList $ nodeConflicts node
@@ -238,12 +247,17 @@ nodeDegree graph reg
-- | Show a spill cost record, including the degree from the graph and final calulated spill cos
-pprSpillCostRecord :: Graph Reg RegClass Reg -> SpillCostRecord -> SDoc
-pprSpillCostRecord graph (reg, uses, defs, life)
+pprSpillCostRecord
+ :: (Reg -> RegClass)
+ -> (Reg -> SDoc)
+ -> Graph Reg RegClass Reg -> SpillCostRecord -> SDoc
+
+pprSpillCostRecord regClass pprReg graph (reg, uses, defs, life)
= hsep
- [ ppr reg
+ [ pprReg reg
, ppr uses
, ppr defs
, ppr life
- , ppr $ nodeDegree graph reg
- , text $ show $ (fromIntegral (uses + defs) / fromIntegral (nodeDegree graph reg) :: Float) ]
+ , ppr $ nodeDegree regClass graph reg
+ , text $ show $ (fromIntegral (uses + defs)
+ / fromIntegral (nodeDegree regClass graph reg) :: Float) ]
diff --git a/compiler/nativeGen/RegAlloc/Graph/Stats.hs b/compiler/nativeGen/RegAlloc/Graph/Stats.hs
index 8082f9e975..5e3dd3265b 100644
--- a/compiler/nativeGen/RegAlloc/Graph/Stats.hs
+++ b/compiler/nativeGen/RegAlloc/Graph/Stats.hs
@@ -5,7 +5,6 @@
module RegAlloc.Graph.Stats (
RegAllocStats (..),
- regDotColor,
pprStats,
pprStatsSpills,
@@ -22,13 +21,13 @@ where
import qualified GraphColor as Color
import RegAlloc.Liveness
-import RegAllocInfo
import RegAlloc.Graph.Spill
import RegAlloc.Graph.SpillCost
-import Regs
-import Instrs
-import Cmm
+import Instruction
+import RegClass
+import Reg
+import Cmm
import Outputable
import UniqFM
import UniqSet
@@ -36,11 +35,11 @@ import State
import Data.List
-data RegAllocStats
+data RegAllocStats instr
-- initial graph
= RegAllocStatsStart
- { raLiveCmm :: [LiveCmmTop] -- ^ initial code, with liveness
+ { raLiveCmm :: [LiveCmmTop instr] -- ^ initial code, with liveness
, raGraph :: Color.Graph Reg RegClass Reg -- ^ the initial, uncolored graph
, raSpillCosts :: SpillCostInfo } -- ^ information to help choose which regs to spill
@@ -50,35 +49,35 @@ data RegAllocStats
, raCoalesced :: UniqFM Reg -- ^ the regs that were coaleced
, raSpillStats :: SpillStats -- ^ spiller stats
, raSpillCosts :: SpillCostInfo -- ^ number of instrs each reg lives for
- , raSpilled :: [LiveCmmTop] } -- ^ code with spill instructions added
+ , raSpilled :: [LiveCmmTop instr] } -- ^ code with spill instructions added
-- a successful coloring
| RegAllocStatsColored
{ raGraph :: Color.Graph Reg RegClass Reg -- ^ the uncolored graph
, raGraphColored :: Color.Graph Reg RegClass Reg -- ^ the coalesced and colored graph
, raCoalesced :: UniqFM Reg -- ^ the regs that were coaleced
- , raPatched :: [LiveCmmTop] -- ^ code with vregs replaced by hregs
- , raSpillClean :: [LiveCmmTop] -- ^ code with unneeded spill\/reloads cleaned out
- , raFinal :: [NatCmmTop] -- ^ final code
+ , raPatched :: [LiveCmmTop instr] -- ^ code with vregs replaced by hregs
+ , raSpillClean :: [LiveCmmTop instr] -- ^ code with unneeded spill\/reloads cleaned out
+ , raFinal :: [NatCmmTop instr] -- ^ final code
, raSRMs :: (Int, Int, Int) } -- ^ spill\/reload\/reg-reg moves present in this code
-instance Outputable RegAllocStats where
+instance Outputable instr => Outputable (RegAllocStats instr) where
ppr (s@RegAllocStatsStart{})
= text "# Start"
$$ text "# Native code with liveness information."
$$ ppr (raLiveCmm s)
$$ text ""
- $$ text "# Initial register conflict graph."
- $$ Color.dotGraph regDotColor trivColorable (raGraph s)
+-- $$ text "# Initial register conflict graph."
+-- $$ Color.dotGraph regDotColor trivColorable (raGraph s)
ppr (s@RegAllocStatsSpill{})
= text "# Spill"
- $$ text "# Register conflict graph."
- $$ Color.dotGraph regDotColor trivColorable (raGraph s)
- $$ text ""
+-- $$ text "# Register conflict graph."
+-- $$ Color.dotGraph regDotColor trivColorable (raGraph s)
+-- $$ text ""
$$ (if (not $ isNullUFM $ raCoalesced s)
then text "# Registers coalesced."
@@ -86,9 +85,9 @@ instance Outputable RegAllocStats where
$$ text ""
else empty)
- $$ text "# Spill costs. reg uses defs lifetime degree cost"
- $$ vcat (map (pprSpillCostRecord (raGraph s)) $ eltsUFM $ raSpillCosts s)
- $$ text ""
+-- $$ text "# Spill costs. reg uses defs lifetime degree cost"
+-- $$ vcat (map (pprSpillCostRecord (raGraph s)) $ eltsUFM $ raSpillCosts s)
+-- $$ text ""
$$ text "# Spills inserted."
$$ ppr (raSpillStats s)
@@ -101,13 +100,13 @@ instance Outputable RegAllocStats where
ppr (s@RegAllocStatsColored { raSRMs = (spills, reloads, moves) })
= text "# Colored"
- $$ text "# Register conflict graph (initial)."
- $$ Color.dotGraph regDotColor trivColorable (raGraph s)
- $$ text ""
+-- $$ text "# Register conflict graph (initial)."
+-- $$ Color.dotGraph regDotColor trivColorable (raGraph s)
+-- $$ text ""
- $$ text "# Register conflict graph (colored)."
- $$ Color.dotGraph regDotColor trivColorable (raGraphColored s)
- $$ text ""
+-- $$ text "# Register conflict graph (colored)."
+-- $$ Color.dotGraph regDotColor trivColorable (raGraphColored s)
+-- $$ text ""
$$ (if (not $ isNullUFM $ raCoalesced s)
then text "# Registers coalesced."
@@ -133,7 +132,7 @@ instance Outputable RegAllocStats where
$$ text ""
-- | Do all the different analysis on this list of RegAllocStats
-pprStats :: [RegAllocStats] -> Color.Graph Reg RegClass Reg -> SDoc
+pprStats :: [RegAllocStats instr] -> Color.Graph Reg RegClass Reg -> SDoc
pprStats stats graph
= let outSpills = pprStatsSpills stats
outLife = pprStatsLifetimes stats
@@ -145,7 +144,7 @@ pprStats stats graph
-- | Dump a table of how many spill loads \/ stores were inserted for each vreg.
pprStatsSpills
- :: [RegAllocStats] -> SDoc
+ :: [RegAllocStats instr] -> SDoc
pprStatsSpills stats
= let
@@ -163,7 +162,7 @@ pprStatsSpills stats
-- | Dump a table of how long vregs tend to live for in the initial code.
pprStatsLifetimes
- :: [RegAllocStats] -> SDoc
+ :: [RegAllocStats instr] -> SDoc
pprStatsLifetimes stats
= let info = foldl' plusSpillCostInfo zeroSpillCostInfo
@@ -191,7 +190,7 @@ binLifetimeCount fm
-- | Dump a table of how many conflicts vregs tend to have in the initial code.
pprStatsConflict
- :: [RegAllocStats] -> SDoc
+ :: [RegAllocStats instr] -> SDoc
pprStatsConflict stats
= let confMap = foldl' (plusUFM_C (\(c1, n1) (_, n2) -> (c1, n1 + n2)))
@@ -208,7 +207,7 @@ pprStatsConflict stats
-- | For every vreg, dump it's how many conflicts it has and its lifetime
-- good for making a scatter plot.
pprStatsLifeConflict
- :: [RegAllocStats]
+ :: [RegAllocStats instr]
-> Color.Graph Reg RegClass Reg -- ^ global register conflict graph
-> SDoc
@@ -238,7 +237,10 @@ pprStatsLifeConflict stats graph
-- | Count spill/reload/reg-reg moves.
-- Lets us see how well the register allocator has done.
--
-countSRMs :: LiveCmmTop -> (Int, Int, Int)
+countSRMs
+ :: Instruction instr
+ => LiveCmmTop instr -> (Int, Int, Int)
+
countSRMs cmm
= execState (mapBlockTopM countSRM_block cmm) (0, 0, 0)
@@ -246,16 +248,17 @@ countSRM_block (BasicBlock i instrs)
= do instrs' <- mapM countSRM_instr instrs
return $ BasicBlock i instrs'
-countSRM_instr li@(Instr instr _)
- | SPILL _ _ <- instr
+countSRM_instr li
+ | SPILL _ _ <- li
= do modify $ \(s, r, m) -> (s + 1, r, m)
return li
- | RELOAD _ _ <- instr
+ | RELOAD _ _ <- li
= do modify $ \(s, r, m) -> (s, r + 1, m)
return li
- | Just _ <- isRegRegMove instr
+ | Instr instr _ <- li
+ , Just _ <- takeRegRegMoveInstr instr
= do modify $ \(s, r, m) -> (s, r, m + 1)
return li
@@ -266,77 +269,9 @@ countSRM_instr li@(Instr instr _)
addSRM (s1, r1, m1) (s2, r2, m2)
= (s1+s2, r1+r2, m1+m2)
------
--- Register colors for drawing conflict graphs
--- Keep this out of MachRegs.hs because it's specific to the graph coloring allocator.
-
-
--- reg colors for x86
-#if i386_TARGET_ARCH
-regDotColor :: Reg -> SDoc
-regDotColor reg
- = let Just str = lookupUFM regColors reg
- in text str
-
-regColors
- = listToUFM
- $ [ (eax, "#00ff00")
- , (ebx, "#0000ff")
- , (ecx, "#00ffff")
- , (edx, "#0080ff")
-
- , (fake0, "#ff00ff")
- , (fake1, "#ff00aa")
- , (fake2, "#aa00ff")
- , (fake3, "#aa00aa")
- , (fake4, "#ff0055")
- , (fake5, "#5500ff") ]
-
-
--- reg colors for x86_64
-#elif x86_64_TARGET_ARCH
-regDotColor :: Reg -> SDoc
-regDotColor reg
- = let Just str = lookupUFM regColors reg
- in text str
-
-regColors
- = listToUFM
- $ [ (rax, "#00ff00"), (eax, "#00ff00")
- , (rbx, "#0000ff"), (ebx, "#0000ff")
- , (rcx, "#00ffff"), (ecx, "#00ffff")
- , (rdx, "#0080ff"), (edx, "#00ffff")
- , (r8, "#00ff80")
- , (r9, "#008080")
- , (r10, "#0040ff")
- , (r11, "#00ff40")
- , (r12, "#008040")
- , (r13, "#004080")
- , (r14, "#004040")
- , (r15, "#002080") ]
-
- ++ zip (map RealReg [16..31]) (repeat "red")
-
-
--- reg colors for ppc
-#elif powerpc_TARGET_ARCH
-regDotColor :: Reg -> SDoc
-regDotColor reg
- = case regClass reg of
- RcInteger -> text "blue"
- RcFloat -> text "red"
- RcDouble -> text "green"
-
-#elif sparc_TARGET_ARCH
-regDotColor :: Reg -> SDoc
-regDotColor reg
- = case regClass reg of
- RcInteger -> text "blue"
- RcFloat -> text "red"
- RcDouble -> text "green"
-#else
-#error ToDo: regDotColor
-#endif
+
+
+
{-
diff --git a/compiler/nativeGen/Regs.hs b/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs
index 5239520708..6a7211dd06 100644
--- a/compiler/nativeGen/Regs.hs
+++ b/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs
@@ -1,164 +1,21 @@
--- -----------------------------------------------------------------------------
---
--- (c) The University of Glasgow 1994-2004
---
--- Machine-specific info about registers.
---
--- Also includes stuff about immediate operands, which are
--- often/usually quite entangled with registers.
---
--- -----------------------------------------------------------------------------
-
-#include "nativeGen/NCG.h"
-
-module Regs (
- --------------------------------
- -- Generic things, shared by all architectures.
- module RegsBase,
- getHiVRegFromLo,
- get_GlobalReg_reg_or_addr,
- allocatableRegs,
- allocatableRegsInClass,
- trivColorable,
-
- --------------------------------
- -- Things that are defined by the arch specific module.
- --
-
- -- sizes
- Size(..),
- intSize,
- floatSize,
- isFloatSize,
- wordSize,
- cmmTypeSize,
- sizeToWidth,
- mkVReg,
-
- -- immediates
- Imm(..),
- strImmLit,
- litToImm,
-
- -- addressing modes
- AddrMode(..),
- addrOffset,
-
- -- registers
- spRel,
- argRegs,
- allArgRegs,
- callClobberedRegs,
- allMachRegNos,
- regClass,
- showReg,
-
- -- machine specific things
-#if powerpc_TARGET_ARCH
- allFPArgRegs,
- fits16Bits,
- makeImmediate,
- fReg,
- sp, r3, r4, r27, r28, f1, f20, f21,
-
-#elif i386_TARGET_ARCH || x86_64_TARGET_ARCH
- EABase(..), EAIndex(..), addrModeRegs,
-
- eax, ebx, ecx, edx, esi, edi, ebp, esp,
- fake0, fake1, fake2, fake3, fake4, fake5,
- rax, rbx, rcx, rdx, rsi, rdi, rbp, rsp,
- r8, r9, r10, r11, r12, r13, r14, r15,
- xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
- xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
- xmm,
-
- ripRel,
- allFPArgRegs,
-#elif sparc_TARGET_ARCH
- fpRel,
- fits13Bits,
- largeOffsetError,
- gReg, iReg, lReg, oReg, fReg,
- fp, sp, g0, g1, g2, o0, o1, f0, f6, f8, f26, f27,
- nCG_FirstFloatReg,
-#endif
- -- horror show
- freeReg,
- globalRegMaybe
-)
+module RegAlloc.Graph.TrivColorable (
+ trivColorable,
+)
where
#include "HsVersions.h"
-#include "../includes/MachRegs.h"
-
-import Cmm
-import CgUtils ( get_GlobalReg_addr )
-import Outputable ( Outputable(..), pprPanic )
-import qualified Outputable
-import Panic
-import Unique
-import UniqSet
-import FastTypes
-import FastBool
-import UniqFM
-
-
-import RegsBase
-
-#if alpha_TARGET_ARCH
-import Alpha.Regs
-#elif powerpc_TARGET_ARCH
-import PPC.Regs
-#elif i386_TARGET_ARCH || x86_64_TARGET_ARCH
-import X86.Regs
-#elif sparc_TARGET_ARCH
-import SPARC.Regs
-#else
-#error "Regs: not defined for this architecture"
-#endif
+import RegClass
+import Reg
+import GraphBase
-instance Show Reg where
- show (RealReg i) = showReg i
- show (VirtualRegI u) = "%vI_" ++ show u
- show (VirtualRegHi u) = "%vHi_" ++ show u
- show (VirtualRegF u) = "%vF_" ++ show u
- show (VirtualRegD u) = "%vD_" ++ show u
-
-instance Outputable Reg where
- ppr r = Outputable.text (show r)
-
-
--- Determine the upper-half vreg for a 64-bit quantity on a 32-bit platform
--- when supplied with the vreg for the lower-half of the quantity.
--- (NB. Not reversible).
-getHiVRegFromLo :: Reg -> Reg
-getHiVRegFromLo (VirtualRegI u)
- = VirtualRegHi (newTagUnique u 'H') -- makes a pseudo-unique with tag 'H'
-
-getHiVRegFromLo other
- = pprPanic "getHiVRegFromLo" (ppr other)
-
--- -----------------------------------------------------------------------------
--- Global registers
-
--- We map STG registers onto appropriate CmmExprs. Either they map
--- to real machine registers or stored as offsets from BaseReg. Given
--- a GlobalReg, get_GlobalReg_reg_or_addr produces either the real
--- register it is in, on this platform, or a CmmExpr denoting the
--- address in the register table holding it.
--- (See also get_GlobalReg_addr in CgUtils.)
-
-get_GlobalReg_reg_or_addr :: GlobalReg -> Either Reg CmmExpr
-get_GlobalReg_reg_or_addr mid
- = case globalRegMaybe mid of
- Just rr -> Left rr
- Nothing -> Right (get_GlobalReg_addr mid)
-
+import UniqFM
+import FastTypes
+{-
-- allocatableRegs is allMachRegNos with the fixed-use regs removed.
-- i.e., these are the regs for which we are prepared to allow the
-- register allocator to attempt to map VRegs to.
@@ -187,7 +44,7 @@ allocatableRegsDouble :: Int
allocatableRegsDouble
= length $ filter (\r -> regClass r == RcDouble)
$ map RealReg allocatableRegs
-
+-}
-- trivColorable ---------------------------------------------------------------
@@ -277,8 +134,11 @@ worst n classN classC
#error ToDo: define ALLOCATABLE_REGS_INTEGER and ALLOCATABLE_REGS_DOUBLE
#endif
-trivColorable :: RegClass -> UniqSet Reg -> UniqSet Reg -> Bool
-trivColorable _ conflicts exclusions
+trivColorable
+ :: (Reg -> RegClass)
+ -> Triv Reg RegClass Reg
+
+trivColorable regClass _ conflicts exclusions
= {-# SCC "trivColorable" #-}
let
isSqueesed cI cF ufm
@@ -314,5 +174,3 @@ trivColorable _ conflicts exclusions
(# True, _, _ #)
-> False
-
-
diff --git a/compiler/nativeGen/RegAlloc/Linear/Base.hs b/compiler/nativeGen/RegAlloc/Linear/Base.hs
index 60d0175c94..45fd640804 100644
--- a/compiler/nativeGen/RegAlloc/Linear/Base.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/Base.hs
@@ -21,7 +21,7 @@ where
import RegAlloc.Linear.FreeRegs
import RegAlloc.Linear.StackMap
import RegAlloc.Liveness
-import Regs
+import Reg
import Outputable
import Unique
diff --git a/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs
index bee8c98c61..b357160c96 100644
--- a/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs
@@ -5,7 +5,8 @@ module RegAlloc.Linear.FreeRegs (
releaseReg,
initFreeRegs,
getFreeRegs,
- allocateReg
+ allocateReg,
+ maxSpillSlots
)
#include "HsVersions.h"
@@ -27,12 +28,15 @@ where
#if defined(powerpc_TARGET_ARCH)
import RegAlloc.Linear.PPC.FreeRegs
+import PPC.Instr (maxSpillSlots)
#elif defined(sparc_TARGET_ARCH)
import RegAlloc.Linear.SPARC.FreeRegs
+import SPARC.Instr (maxSpillSlots)
#elif defined(i386_TARGET_ARCH) || defined(x86_64_TARGET_ARCH)
import RegAlloc.Linear.X86.FreeRegs
+import X86.Instr (maxSpillSlots)
#else
#error "RegAlloc.Linear.FreeRegs not defined for this architecture."
diff --git a/compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs b/compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs
index d3f821b8aa..7d2cbcd7a7 100644
--- a/compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs
@@ -19,13 +19,11 @@ import RegAlloc.Linear.State
import RegAlloc.Linear.Base
import RegAlloc.Linear.FreeRegs
import RegAlloc.Liveness
+import Instruction
+import Reg
import BlockId
-import Instrs
-import Regs
-import RegAllocInfo
import Cmm hiding (RegSet)
-
import Digraph
import Outputable
import Unique
@@ -37,39 +35,41 @@ import UniqSet
-- vregs are in the correct regs for its destination.
--
joinToTargets
- :: BlockMap RegSet -- ^ maps the unique of the blockid to the set of vregs
+ :: Instruction instr
+ => BlockMap RegSet -- ^ maps the unique of the blockid to the set of vregs
-- that are known to be live on the entry to each block.
-> BlockId -- ^ id of the current block
- -> Instr -- ^ branch instr on the end of the source block.
+ -> instr -- ^ branch instr on the end of the source block.
- -> RegM ([NatBasicBlock] -- fresh blocks of fixup code.
- , Instr) -- the original branch instruction, but maybe patched to jump
+ -> RegM ([NatBasicBlock instr] -- fresh blocks of fixup code.
+ , instr) -- the original branch instruction, but maybe patched to jump
-- to a fixup block first.
joinToTargets block_live id instr
-- we only need to worry about jump instructions.
- | not $ isJumpish instr
+ | not $ isJumpishInstr instr
= return ([], instr)
| otherwise
- = joinToTargets' block_live [] id instr (jumpDests instr [])
+ = joinToTargets' block_live [] id instr (jumpDestsOfInstr instr)
-----
joinToTargets'
- :: BlockMap RegSet -- ^ maps the unique of the blockid to the set of vregs
+ :: Instruction instr
+ => BlockMap RegSet -- ^ maps the unique of the blockid to the set of vregs
-- that are known to be live on the entry to each block.
- -> [NatBasicBlock] -- ^ acc blocks of fixup code.
+ -> [NatBasicBlock instr] -- ^ acc blocks of fixup code.
-> BlockId -- ^ id of the current block
- -> Instr -- ^ branch instr on the end of the source block.
+ -> instr -- ^ branch instr on the end of the source block.
-> [BlockId] -- ^ branch destinations still to consider.
- -> RegM ( [NatBasicBlock]
- , Instr)
+ -> RegM ( [NatBasicBlock instr]
+ , instr)
-- no more targets to consider. all done.
joinToTargets' _ new_blocks _ instr []
@@ -173,7 +173,7 @@ joinToTargets_again
-- then that will jump to our original destination.
fixup_block_id <- getUniqueR
let block = BasicBlock (BlockId fixup_block_id)
- $ fixUpInstrs ++ mkBranchInstr dest
+ $ fixUpInstrs ++ mkJumpInstr dest
{- pprTrace
("joinToTargets: fixup code is:")
@@ -187,7 +187,11 @@ joinToTargets_again
-- patch the original branch instruction so it goes to our
-- fixup block instead.
- _ -> let instr' = patchJump instr dest (BlockId fixup_block_id)
+ _ -> let instr' = patchJumpInstr instr
+ (\bid -> if bid == dest
+ then BlockId fixup_block_id
+ else dest)
+
in joinToTargets' block_live (block : new_blocks) block_id instr' dests
@@ -256,7 +260,9 @@ expandNode vreg src dst
-- destinations. We have eliminated any possibility of single-node
-- cycles in expandNode above.
--
-handleComponent :: Int -> Instr -> SCC (Unique, Loc, [Loc]) -> RegM [Instr]
+handleComponent
+ :: Instruction instr
+ => Int -> instr -> SCC (Unique, Loc, [Loc]) -> RegM [instr]
-- If the graph is acyclic then we won't get the swapping problem below.
-- In this case we can just do the moves directly, and avoid having to
@@ -305,11 +311,12 @@ handleComponent _ _ (CyclicSCC _)
-- | Move a vreg between these two locations.
--
makeMove
- :: Int -- ^ current C stack delta.
+ :: Instruction instr
+ => Int -- ^ current C stack delta.
-> Unique -- ^ unique of the vreg that we're moving.
-> Loc -- ^ source location.
-> Loc -- ^ destination location.
- -> RegM Instr -- ^ move instruction.
+ -> RegM instr -- ^ move instruction.
makeMove _ vreg (InReg src) (InReg dst)
= do recordSpill (SpillJoinRR vreg)
diff --git a/compiler/nativeGen/RegAlloc/Linear/Main.hs b/compiler/nativeGen/RegAlloc/Linear/Main.hs
index bfd9ca543e..47529d2c96 100644
--- a/compiler/nativeGen/RegAlloc/Linear/Main.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/Main.hs
@@ -96,14 +96,14 @@ import RegAlloc.Linear.StackMap
import RegAlloc.Linear.FreeRegs
import RegAlloc.Linear.Stats
import RegAlloc.Linear.JoinToTargets
+import TargetReg
import RegAlloc.Liveness
+import Instruction
+import Reg
-- import PprMach
import BlockId
-import Regs
-import Instrs
-import RegAllocInfo
import Cmm hiding (RegSet)
import Digraph
@@ -112,7 +112,6 @@ import UniqSet
import UniqFM
import UniqSupply
import Outputable
-import FastString
import Data.Maybe
import Data.List
@@ -126,8 +125,9 @@ import Control.Monad
-- Allocate registers
regAlloc
- :: LiveCmmTop
- -> UniqSM (NatCmmTop, Maybe RegAllocStats)
+ :: (Outputable instr, Instruction instr)
+ => LiveCmmTop instr
+ -> UniqSM (NatCmmTop instr, Maybe RegAllocStats)
regAlloc (CmmData sec d)
= return
@@ -171,10 +171,11 @@ regAlloc (CmmProc _ _ _ _)
-- an entry in the block map or it is the first block.
--
linearRegAlloc
- :: BlockId -- ^ the first block
+ :: (Outputable instr, Instruction instr)
+ => BlockId -- ^ the first block
-> BlockMap RegSet -- ^ live regs on entry to each basic block
- -> [SCC LiveBasicBlock] -- ^ instructions annotated with "deaths"
- -> UniqSM ([NatBasicBlock], RegAllocStats)
+ -> [SCC (LiveBasicBlock instr)] -- ^ instructions annotated with "deaths"
+ -> UniqSM ([NatBasicBlock instr], RegAllocStats)
linearRegAlloc first_id block_live sccs
= do us <- getUs
@@ -234,9 +235,10 @@ process first_id block_live (b@(BasicBlock id _) : blocks) next_round accum
-- | Do register allocation on this basic block
--
processBlock
- :: BlockMap RegSet -- ^ live regs on entry to each basic block
- -> LiveBasicBlock -- ^ block to do register allocation on
- -> RegM [NatBasicBlock] -- ^ block with registers allocated
+ :: (Outputable instr, Instruction instr)
+ => BlockMap RegSet -- ^ live regs on entry to each basic block
+ -> LiveBasicBlock instr -- ^ block to do register allocation on
+ -> RegM [NatBasicBlock instr] -- ^ block with registers allocated
processBlock block_live (BasicBlock id instrs)
= do initBlock id
@@ -265,20 +267,21 @@ initBlock id
-- | Do allocation for a sequence of instructions.
linearRA
- :: BlockMap RegSet -- ^ map of what vregs are live on entry to each block.
- -> [Instr] -- ^ accumulator for instructions already processed.
- -> [NatBasicBlock] -- ^ accumulator for blocks of fixup code.
- -> BlockId -- ^ id of the current block, for debugging.
- -> [LiveInstr] -- ^ liveness annotated instructions in this block.
+ :: (Outputable instr, Instruction instr)
+ => BlockMap RegSet -- ^ map of what vregs are live on entry to each block.
+ -> [instr] -- ^ accumulator for instructions already processed.
+ -> [NatBasicBlock instr] -- ^ accumulator for blocks of fixup code.
+ -> BlockId -- ^ id of the current block, for debugging.
+ -> [LiveInstr instr] -- ^ liveness annotated instructions in this block.
- -> RegM ( [Instr] -- instructions after register allocation
- , [NatBasicBlock]) -- fresh blocks of fixup code.
+ -> RegM ( [instr] -- instructions after register allocation
+ , [NatBasicBlock instr]) -- fresh blocks of fixup code.
linearRA _ accInstr accFixup _ []
= return
- ( reverse accInstr -- instrs need to be returned in the correct order.
- , accFixup) -- it doesn't matter what order the fixup blocks are returned in.
+ ( reverse accInstr -- instrs need to be returned in the correct order.
+ , accFixup) -- it doesn't matter what order the fixup blocks are returned in.
linearRA block_live accInstr accFixups id (instr:instrs)
@@ -291,21 +294,24 @@ linearRA block_live accInstr accFixups id (instr:instrs)
-- | Do allocation for a single instruction.
raInsn
- :: BlockMap RegSet -- ^ map of what vregs are love on entry to each block.
- -> [Instr] -- ^ accumulator for instructions already processed.
- -> BlockId -- ^ the id of the current block, for debugging
- -> LiveInstr -- ^ the instr to have its regs allocated, with liveness info.
+ :: (Outputable instr, Instruction instr)
+ => BlockMap RegSet -- ^ map of what vregs are love on entry to each block.
+ -> [instr] -- ^ accumulator for instructions already processed.
+ -> BlockId -- ^ the id of the current block, for debugging
+ -> LiveInstr instr -- ^ the instr to have its regs allocated, with liveness info.
-> RegM
- ( [Instr] -- new instructions
- , [NatBasicBlock]) -- extra fixup blocks
+ ( [instr] -- new instructions
+ , [NatBasicBlock instr]) -- extra fixup blocks
-raInsn _ new_instrs _ (Instr (COMMENT _) Nothing)
- = return (new_instrs, [])
+raInsn _ new_instrs _ (Instr ii Nothing)
+ | Just n <- takeDeltaInstr ii
+ = do setDeltaR n
+ return (new_instrs, [])
+
+raInsn _ new_instrs _ (Instr ii Nothing)
+ | isMetaInstr ii
+ = return (new_instrs, [])
-raInsn _ new_instrs _ (Instr (DELTA n) Nothing)
- = do
- setDeltaR n
- return (new_instrs, [])
raInsn block_live new_instrs id (Instr instr (Just live))
= do
@@ -318,7 +324,7 @@ raInsn block_live new_instrs id (Instr instr (Just live))
-- then we can eliminate the instruction.
-- (we can't eliminitate it if the source register is on the stack, because
-- we do not want to use one spill slot for different virtual registers)
- case isRegRegMove instr of
+ case takeRegRegMoveInstr instr of
Just (src,dst) | src `elementOfUniqSet` (liveDieRead live),
isVirtualReg dst,
not (dst `elemUFM` assig),
@@ -354,7 +360,7 @@ raInsn _ _ _ instr
genRaInsn block_live new_instrs block_id instr r_dying w_dying =
- case regUsage instr of { RU read written ->
+ case regUsageOfInstr instr of { RU read written ->
case partition isRealReg written of { (real_written1,virt_written) ->
do
let
@@ -410,7 +416,7 @@ genRaInsn block_live new_instrs block_id instr r_dying w_dying =
(t,r) <- zip virt_read r_allocd
++ zip virt_written w_allocd ]
- patched_instr = patchRegs adjusted_instr patchLookup
+ patched_instr = patchRegsOfInstr adjusted_instr patchLookup
patchLookup x = case lookupUFM patch_map x of
Nothing -> x
Just y -> y
@@ -424,7 +430,7 @@ genRaInsn block_live new_instrs block_id instr r_dying w_dying =
-- erase reg->reg moves where the source and destination are the same.
-- If the src temp didn't die in this instr but happened to be allocated
-- to the same real reg as the destination, then we can erase the move anyway.
- let squashed_instr = case isRegRegMove patched_instr of
+ let squashed_instr = case takeRegRegMoveInstr patched_instr of
Just (src, dst)
| src == dst -> []
_ -> [patched_instr]
@@ -473,10 +479,11 @@ for allocateRegs on the temps *written*,
-}
saveClobberedTemps
- :: [RegNo] -- real registers clobbered by this instruction
- -> [Reg] -- registers which are no longer live after this insn
- -> RegM [Instr] -- return: instructions to spill any temps that will
- -- be clobbered.
+ :: Instruction instr
+ => [RegNo] -- real registers clobbered by this instruction
+ -> [Reg] -- registers which are no longer live after this insn
+ -> RegM [instr] -- return: instructions to spill any temps that will
+ -- be clobbered.
saveClobberedTemps [] _ = return [] -- common case
saveClobberedTemps clobbered dying = do
@@ -498,7 +505,7 @@ saveClobberedTemps clobbered dying = do
recordSpill (SpillClobber temp)
let new_assign = addToUFM assig temp (InBoth reg slot)
- clobber new_assign (spill : COMMENT (fsLit "spill clobber") : instrs) rest
+ clobber new_assign (spill : {- COMMENT (fsLit "spill clobber") : -} instrs) rest
clobberRegs :: [RegNo] -> RegM ()
clobberRegs [] = return () -- common case
@@ -533,12 +540,13 @@ clobberRegs clobbered = do
-- the list of free registers and free stack slots.
allocateRegsAndSpill
- :: Bool -- True <=> reading (load up spilled regs)
+ :: Instruction instr
+ => Bool -- True <=> reading (load up spilled regs)
-> [Reg] -- don't push these out
- -> [Instr] -- spill insns
+ -> [instr] -- spill insns
-> [RegNo] -- real registers allocated (accum.)
-> [Reg] -- temps to allocate
- -> RegM ([Instr], [RegNo])
+ -> RegM ([instr], [RegNo])
allocateRegsAndSpill _ _ spills alloc []
= return (spills,reverse alloc)
@@ -563,7 +571,7 @@ allocateRegsAndSpill reading keep spills alloc (r:rs) = do
loc -> do
freeregs <- getFreeRegsR
- case getFreeRegs (regClass r) freeregs of
+ case getFreeRegs (targetRegClass r) freeregs of
-- case (2): we have a free register
my_reg:_ -> {- pprTrace "alloc" (ppr r <+> ppr my_reg <+> ppr freeClass) $ -}
@@ -582,10 +590,10 @@ allocateRegsAndSpill reading keep spills alloc (r:rs) = do
keep' = map getUnique keep
candidates1 = [ (temp,reg,mem)
| (temp, InBoth reg mem) <- ufmToList assig,
- temp `notElem` keep', regClass (RealReg reg) == regClass r ]
+ temp `notElem` keep', targetRegClass (RealReg reg) == targetRegClass r ]
candidates2 = [ (temp,reg)
| (temp, InReg reg) <- ufmToList assig,
- temp `notElem` keep', regClass (RealReg reg) == regClass r ]
+ temp `notElem` keep', targetRegClass (RealReg reg) == targetRegClass r ]
-- in
ASSERT2(not (null candidates1 && null candidates2),
text (show freeregs) <+> ppr r <+> ppr assig) do
@@ -622,8 +630,8 @@ allocateRegsAndSpill reading keep spills alloc (r:rs) = do
(spill_insn, slot) <- spillR (RealReg my_reg) temp_to_push_out
let spill_store = (if reading then id else reverse)
- [ COMMENT (fsLit "spill alloc")
- , spill_insn ]
+ [ -- COMMENT (fsLit "spill alloc")
+ spill_insn ]
-- record that this temp was spilled
recordSpill (SpillAlloc temp_to_push_out)
@@ -643,18 +651,19 @@ allocateRegsAndSpill reading keep spills alloc (r:rs) = do
-- | Load up a spilled temporary if we need to.
loadTemp
- :: Bool
+ :: Instruction instr
+ => Bool
-> Reg -- the temp being loaded
-> Maybe Loc -- the current location of this temp
-> RegNo -- the hreg to load the temp into
- -> [Instr]
- -> RegM [Instr]
+ -> [instr]
+ -> RegM [instr]
loadTemp True vreg (Just (InMem slot)) hreg spills
= do
insn <- loadR (RealReg hreg) slot
recordSpill (SpillLoad $ getUnique vreg)
- return $ COMMENT (fsLit "spill load") : insn : spills
+ return $ {- COMMENT (fsLit "spill load") : -} insn : spills
loadTemp _ _ _ _ spills =
return spills
diff --git a/compiler/nativeGen/RegAlloc/Linear/PPC/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/PPC/FreeRegs.hs
index 6d8809df52..878bfe313a 100644
--- a/compiler/nativeGen/RegAlloc/Linear/PPC/FreeRegs.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/PPC/FreeRegs.hs
@@ -3,7 +3,9 @@
module RegAlloc.Linear.PPC.FreeRegs
where
-import Regs
+import PPC.Regs
+import RegClass
+import Reg
import Outputable
diff --git a/compiler/nativeGen/RegAlloc/Linear/SPARC/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/SPARC/FreeRegs.hs
index aa716b585f..5514056318 100644
--- a/compiler/nativeGen/RegAlloc/Linear/SPARC/FreeRegs.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/SPARC/FreeRegs.hs
@@ -3,7 +3,9 @@
module RegAlloc.Linear.SPARC.FreeRegs
where
-import Regs
+import SPARC.Regs
+import RegClass
+import Reg
import Outputable
import FastBool
diff --git a/compiler/nativeGen/RegAlloc/Linear/StackMap.hs b/compiler/nativeGen/RegAlloc/Linear/StackMap.hs
index 56569415bb..62bf6adb2a 100644
--- a/compiler/nativeGen/RegAlloc/Linear/StackMap.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/StackMap.hs
@@ -19,7 +19,7 @@ module RegAlloc.Linear.StackMap (
where
-import RegAllocInfo (maxSpillSlots)
+import RegAlloc.Linear.FreeRegs
import Outputable
import UniqFM
diff --git a/compiler/nativeGen/RegAlloc/Linear/State.hs b/compiler/nativeGen/RegAlloc/Linear/State.hs
index 94a8f7b52e..b9f7049844 100644
--- a/compiler/nativeGen/RegAlloc/Linear/State.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/State.hs
@@ -34,11 +34,8 @@ import RegAlloc.Linear.StackMap
import RegAlloc.Linear.Base
import RegAlloc.Linear.FreeRegs
import RegAlloc.Liveness
-
-
-import Instrs
-import Regs
-import RegAllocInfo
+import Instruction
+import Reg
import Unique
import UniqSupply
@@ -85,14 +82,19 @@ makeRAStats state
{ ra_spillInstrs = binSpillReasons (ra_spills state) }
-spillR :: Reg -> Unique -> RegM (Instr, Int)
+spillR :: Instruction instr
+ => Reg -> Unique -> RegM (instr, Int)
+
spillR reg temp = RegM $ \ s@RA_State{ra_delta=delta, ra_stack=stack} ->
let (stack',slot) = getStackSlotFor stack temp
instr = mkSpillInstr reg delta slot
in
(# s{ra_stack=stack'}, (instr,slot) #)
-loadR :: Reg -> Int -> RegM Instr
+
+loadR :: Instruction instr
+ => Reg -> Int -> RegM instr
+
loadR reg slot = RegM $ \ s@RA_State{ra_delta=delta} ->
(# s, mkLoadInstr reg delta slot #)
diff --git a/compiler/nativeGen/RegAlloc/Linear/Stats.hs b/compiler/nativeGen/RegAlloc/Linear/Stats.hs
index 95bf8ede82..137168e942 100644
--- a/compiler/nativeGen/RegAlloc/Linear/Stats.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/Stats.hs
@@ -8,9 +8,8 @@ where
import RegAlloc.Linear.Base
import RegAlloc.Liveness
+import Instruction
-import RegAllocInfo
-import Instrs
import Cmm (GenBasicBlock(..))
import UniqFM
@@ -36,7 +35,10 @@ binSpillReasons reasons
-- | Count reg-reg moves remaining in this code.
-countRegRegMovesNat :: NatCmmTop -> Int
+countRegRegMovesNat
+ :: Instruction instr
+ => NatCmmTop instr -> Int
+
countRegRegMovesNat cmm
= execState (mapGenBlockTopM countBlock cmm) 0
where
@@ -45,7 +47,7 @@ countRegRegMovesNat cmm
return b
countInstr instr
- | Just _ <- isRegRegMove instr
+ | Just _ <- takeRegRegMoveInstr instr
= do modify (+ 1)
return instr
@@ -54,7 +56,10 @@ countRegRegMovesNat cmm
-- | Pretty print some RegAllocStats
-pprStats :: [NatCmmTop] -> [RegAllocStats] -> SDoc
+pprStats
+ :: Instruction instr
+ => [NatCmmTop instr] -> [RegAllocStats] -> SDoc
+
pprStats code statss
= let -- sum up all the instrs inserted by the spiller
spills = foldl' (plusUFM_C (zipWith (+)))
diff --git a/compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs
index 1306deb159..eedaca8cc0 100644
--- a/compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs
@@ -3,7 +3,9 @@
module RegAlloc.Linear.X86.FreeRegs
where
-import Regs
+import X86.Regs
+import RegClass
+import Reg
import Data.Word
import Data.Bits
diff --git a/compiler/nativeGen/RegAlloc/Liveness.hs b/compiler/nativeGen/RegAlloc/Liveness.hs
index 8445034ab9..8faab5af92 100644
--- a/compiler/nativeGen/RegAlloc/Liveness.hs
+++ b/compiler/nativeGen/RegAlloc/Liveness.hs
@@ -20,7 +20,7 @@ module RegAlloc.Liveness (
mapBlockTop, mapBlockTopM,
mapGenBlockTop, mapGenBlockTopM,
stripLive,
- spillNatBlock,
+ stripLiveBlock,
slurpConflicts,
slurpReloadCoalesce,
eraseDeltasLive,
@@ -30,12 +30,13 @@ module RegAlloc.Liveness (
) where
+
+import Reg
+import Instruction
+
import BlockId
-import Regs
-import Instrs
-import PprMach
-import RegAllocInfo
import Cmm hiding (RegSet)
+import PprCmm()
import Digraph
import Outputable
@@ -65,18 +66,25 @@ emptyBlockMap = emptyBlockEnv
-- | A top level thing which carries liveness information.
-type LiveCmmTop
+type LiveCmmTop instr
= GenCmmTop
CmmStatic
LiveInfo
- (ListGraph (GenBasicBlock LiveInstr))
+ (ListGraph (GenBasicBlock (LiveInstr instr)))
-- the "instructions" here are actually more blocks,
-- single blocks are acyclic
-- multiple blocks are taken to be cyclic.
-- | An instruction with liveness information.
-data LiveInstr
- = Instr Instr (Maybe Liveness)
+data LiveInstr instr
+ = Instr instr (Maybe Liveness)
+
+ -- | spill this reg to a stack slot
+ | SPILL Reg Int
+
+ -- | reload this reg from a stack slot
+ | RELOAD Int Reg
+
-- | Liveness information.
-- The regs which die are ones which are no longer live in the *next* instruction
@@ -100,11 +108,28 @@ data LiveInfo
(BlockMap RegSet) -- argument locals live on entry to this block
-- | A basic block with liveness information.
-type LiveBasicBlock
- = GenBasicBlock LiveInstr
+type LiveBasicBlock instr
+ = GenBasicBlock (LiveInstr instr)
+
+
+instance Outputable instr
+ => Outputable (LiveInstr instr) where
+ ppr (SPILL reg slot)
+ = hcat [
+ ptext (sLit "\tSPILL"),
+ char ' ',
+ ppr reg,
+ comma,
+ ptext (sLit "SLOT") <> parens (int slot)]
+
+ ppr (RELOAD slot reg)
+ = hcat [
+ ptext (sLit "\tRELOAD"),
+ char ' ',
+ ptext (sLit "SLOT") <> parens (int slot),
+ comma,
+ ppr reg]
-
-instance Outputable LiveInstr where
ppr (Instr instr Nothing)
= ppr instr
@@ -120,8 +145,7 @@ instance Outputable LiveInstr where
where pprRegs :: SDoc -> RegSet -> SDoc
pprRegs name regs
| isEmptyUniqSet regs = empty
- | otherwise = name <> (hcat $ punctuate space $ map (docToSDoc . pprUserReg) $ uniqSetToList regs)
-
+ | otherwise = name <> (hcat $ punctuate space $ map ppr $ uniqSetToList regs)
instance Outputable LiveInfo where
ppr (LiveInfo static firstId liveOnEntry)
@@ -130,11 +154,12 @@ instance Outputable LiveInfo where
$$ text "# liveOnEntry = " <> ppr liveOnEntry
+
-- | map a function across all the basic blocks in this code
--
mapBlockTop
- :: (LiveBasicBlock -> LiveBasicBlock)
- -> LiveCmmTop -> LiveCmmTop
+ :: (LiveBasicBlock instr -> LiveBasicBlock instr)
+ -> LiveCmmTop instr -> LiveCmmTop instr
mapBlockTop f cmm
= evalState (mapBlockTopM (\x -> return $ f x) cmm) ()
@@ -144,8 +169,8 @@ mapBlockTop f cmm
--
mapBlockTopM
:: Monad m
- => (LiveBasicBlock -> m LiveBasicBlock)
- -> LiveCmmTop -> m LiveCmmTop
+ => (LiveBasicBlock instr -> m (LiveBasicBlock instr))
+ -> LiveCmmTop instr -> m (LiveCmmTop instr)
mapBlockTopM _ cmm@(CmmData{})
= return cmm
@@ -187,7 +212,11 @@ mapGenBlockTopM f (CmmProc header label params (ListGraph blocks))
-- Slurping of conflicts and moves is wrapped up together so we don't have
-- to make two passes over the same code when we want to build the graph.
--
-slurpConflicts :: LiveCmmTop -> (Bag (UniqSet Reg), Bag (Reg, Reg))
+slurpConflicts
+ :: Instruction instr
+ => LiveCmmTop instr
+ -> (Bag (UniqSet Reg), Bag (Reg, Reg))
+
slurpConflicts live
= slurpCmm (emptyBag, emptyBag) live
@@ -205,12 +234,20 @@ slurpConflicts live
= (consBag rsLiveEntry conflicts, moves)
| otherwise
- = panic "RegLiveness.slurpBlock: bad block"
+ = panic "Liveness.slurpConflicts: bad block"
slurpLIs rsLive (conflicts, moves) []
= (consBag rsLive conflicts, moves)
- slurpLIs rsLive rs (Instr _ Nothing : lis) = slurpLIs rsLive rs lis
+ slurpLIs rsLive rs (Instr _ Nothing : lis)
+ = slurpLIs rsLive rs lis
+
+ -- we're not expecting to be slurping conflicts from spilled code
+ slurpLIs _ _ (SPILL _ _ : _)
+ = panic "Liveness.slurpConflicts: unexpected SPILL"
+
+ slurpLIs _ _ (RELOAD _ _ : _)
+ = panic "Liveness.slurpConflicts: unexpected RELOAD"
slurpLIs rsLiveEntry (conflicts, moves) (Instr instr (Just live) : lis)
= let
@@ -234,7 +271,7 @@ slurpConflicts live
--
rsConflicts = unionUniqSets rsLiveNext rsOrphans
- in case isRegRegMove instr of
+ in case takeRegRegMoveInstr instr of
Just rr -> slurpLIs rsLiveNext
( consBag rsConflicts conflicts
, consBag rr moves) lis
@@ -254,7 +291,11 @@ slurpConflicts live
-- the spill\/reload instrs can be cleaned and replaced by a nop reg-reg move.
--
--
-slurpReloadCoalesce :: LiveCmmTop -> Bag (Reg, Reg)
+slurpReloadCoalesce
+ :: Instruction instr
+ => LiveCmmTop instr
+ -> Bag (Reg, Reg)
+
slurpReloadCoalesce live
= slurpCmm emptyBag live
@@ -285,23 +326,24 @@ slurpReloadCoalesce live
(_, mMoves) <- mapAccumLM slurpLI slotMap instrs
return $ listToBag $ catMaybes mMoves
- slurpLI :: UniqFM Reg -- current slotMap
- -> LiveInstr
+ slurpLI :: Instruction instr
+ => UniqFM Reg -- current slotMap
+ -> LiveInstr instr
-> State (UniqFM [UniqFM Reg]) -- blockId -> [slot -> reg]
-- for tracking slotMaps across jumps
( UniqFM Reg -- new slotMap
, Maybe (Reg, Reg)) -- maybe a new coalesce edge
- slurpLI slotMap (Instr instr _)
+ slurpLI slotMap li
-- remember what reg was stored into the slot
- | SPILL reg slot <- instr
+ | SPILL reg slot <- li
, slotMap' <- addToUFM slotMap slot reg
= return (slotMap', Nothing)
-- add an edge betwen the this reg and the last one stored into the slot
- | RELOAD slot reg <- instr
+ | RELOAD slot reg <- li
= case lookupUFM slotMap slot of
Just reg2
| reg /= reg2 -> return (slotMap, Just (reg, reg2))
@@ -310,7 +352,8 @@ slurpReloadCoalesce live
Nothing -> return (slotMap, Nothing)
-- if we hit a jump, remember the current slotMap
- | targets <- jumpDests instr []
+ | Instr instr _ <- li
+ , targets <- jumpDestsOfInstr instr
, not $ null targets
= do mapM_ (accSlotMap slotMap) targets
return (slotMap, Nothing)
@@ -340,7 +383,11 @@ slurpReloadCoalesce live
-- | Strip away liveness information, yielding NatCmmTop
-stripLive :: LiveCmmTop -> NatCmmTop
+stripLive
+ :: Instruction instr
+ => LiveCmmTop instr
+ -> NatCmmTop instr
+
stripLive live
= stripCmm live
@@ -349,26 +396,26 @@ stripLive live
= CmmProc info label params
(ListGraph $ concatMap stripComp comps)
- stripComp (BasicBlock _ blocks) = map stripBlock blocks
- stripBlock (BasicBlock i instrs) = BasicBlock i (map stripLI instrs)
- stripLI (Instr instr _) = instr
+ stripComp (BasicBlock _ blocks) = map stripLiveBlock blocks
--- | Make real spill instructions out of SPILL, RELOAD pseudos
+-- | Strip away liveness information from a basic block,
+-- and make real spill instructions out of SPILL, RELOAD pseudos along the way.
-spillNatBlock :: NatBasicBlock -> NatBasicBlock
-spillNatBlock (BasicBlock i is)
+stripLiveBlock
+ :: Instruction instr
+ => LiveBasicBlock instr
+ -> NatBasicBlock instr
+
+stripLiveBlock (BasicBlock i lis)
= BasicBlock i instrs'
+
where (instrs', _)
- = runState (spillNat [] is) 0
+ = runState (spillNat [] lis) 0
spillNat acc []
= return (reverse acc)
- spillNat acc (DELTA i : instrs)
- = do put i
- spillNat acc instrs
-
spillNat acc (SPILL reg slot : instrs)
= do delta <- get
spillNat (mkSpillInstr reg delta slot : acc) instrs
@@ -377,22 +424,28 @@ spillNatBlock (BasicBlock i is)
= do delta <- get
spillNat (mkLoadInstr reg delta slot : acc) instrs
- spillNat acc (instr : instrs)
+ spillNat acc (Instr instr _ : instrs)
+ | Just i <- takeDeltaInstr instr
+ = do put i
+ spillNat acc instrs
+
+ spillNat acc (Instr instr _ : instrs)
= spillNat (instr : acc) instrs
-- | Erase Delta instructions.
-eraseDeltasLive :: LiveCmmTop -> LiveCmmTop
+eraseDeltasLive
+ :: Instruction instr
+ => LiveCmmTop instr
+ -> LiveCmmTop instr
+
eraseDeltasLive cmm
= mapBlockTop eraseBlock cmm
where
- isDelta (DELTA _) = True
- isDelta _ = False
-
eraseBlock (BasicBlock id lis)
= BasicBlock id
- $ filter (\(Instr i _) -> not $ isDelta i)
+ $ filter (\(Instr i _) -> not $ isJust $ takeDeltaInstr i)
$ lis
@@ -401,8 +454,9 @@ eraseDeltasLive cmm
-- also erase reg -> reg moves when the destination dies in this instr.
patchEraseLive
- :: (Reg -> Reg)
- -> LiveCmmTop -> LiveCmmTop
+ :: Instruction instr
+ => (Reg -> Reg)
+ -> LiveCmmTop instr -> LiveCmmTop instr
patchEraseLive patchF cmm
= patchCmm cmm
@@ -427,7 +481,7 @@ patchEraseLive patchF cmm
patchInstrs (li : lis)
| Instr i (Just live) <- li'
- , Just (r1, r2) <- isRegRegMove i
+ , Just (r1, r2) <- takeRegRegMoveInstr i
, eatMe r1 r2 live
= patchInstrs lis
@@ -451,30 +505,38 @@ patchEraseLive patchF cmm
-- | Patch registers in this LiveInstr, including the liveness information.
--
patchRegsLiveInstr
- :: (Reg -> Reg)
- -> LiveInstr -> LiveInstr
+ :: Instruction instr
+ => (Reg -> Reg)
+ -> LiveInstr instr -> LiveInstr instr
patchRegsLiveInstr patchF li
= case li of
Instr instr Nothing
- -> Instr (patchRegs instr patchF) Nothing
+ -> Instr (patchRegsOfInstr instr patchF) Nothing
Instr instr (Just live)
-> Instr
- (patchRegs instr patchF)
+ (patchRegsOfInstr instr patchF)
(Just live
{ -- WARNING: have to go via lists here because patchF changes the uniq in the Reg
liveBorn = mkUniqSet $ map patchF $ uniqSetToList $ liveBorn live
, liveDieRead = mkUniqSet $ map patchF $ uniqSetToList $ liveDieRead live
, liveDieWrite = mkUniqSet $ map patchF $ uniqSetToList $ liveDieWrite live })
+ SPILL reg slot
+ -> SPILL (patchF reg) slot
+
+ RELOAD slot reg
+ -> RELOAD slot (patchF reg)
+
---------------------------------------------------------------------------------
-- Annotate code with register liveness information
--
regLiveness
- :: NatCmmTop
- -> UniqSM LiveCmmTop
+ :: Instruction instr
+ => NatCmmTop instr
+ -> UniqSM (LiveCmmTop instr)
regLiveness (CmmData i d)
= returnUs $ CmmData i d
@@ -501,11 +563,15 @@ regLiveness (CmmProc info lbl params (ListGraph blocks@(first : _)))
lbl params (ListGraph liveBlocks)
-sccBlocks :: [NatBasicBlock] -> [SCC NatBasicBlock]
+sccBlocks
+ :: Instruction instr
+ => [NatBasicBlock instr]
+ -> [SCC (NatBasicBlock instr)]
+
sccBlocks blocks = stronglyConnCompFromEdgedVertices graph
where
- getOutEdges :: [Instr] -> [BlockId]
- getOutEdges instrs = foldl' (\a x -> jumpDests x a) [] instrs
+ getOutEdges :: Instruction instr => [instr] -> [BlockId]
+ getOutEdges instrs = concat $ map jumpDestsOfInstr instrs
graph = [ (block, getUnique id, map getUnique (getOutEdges instrs))
| block@(BasicBlock id instrs) <- blocks ]
@@ -515,12 +581,13 @@ sccBlocks blocks = stronglyConnCompFromEdgedVertices graph
-- Computing liveness
computeLiveness
- :: [SCC NatBasicBlock]
- -> ([SCC LiveBasicBlock], -- instructions annotated with list of registers
- -- which are "dead after this instruction".
- BlockMap RegSet) -- blocks annontated with set of live registers
- -- on entry to the block.
-
+ :: Instruction instr
+ => [SCC (NatBasicBlock instr)]
+ -> ([SCC (LiveBasicBlock instr)], -- instructions annotated with list of registers
+ -- which are "dead after this instruction".
+ BlockMap RegSet) -- blocks annontated with set of live registers
+ -- on entry to the block.
+
-- NOTE: on entry, the SCCs are in "reverse" order: later blocks may transfer
-- control to earlier ones only. The SCCs returned are in the *opposite*
-- order, which is exactly what we want for the next pass.
@@ -530,10 +597,12 @@ computeLiveness sccs
livenessSCCs
- :: BlockMap RegSet
- -> [SCC LiveBasicBlock] -- accum
- -> [SCC NatBasicBlock]
- -> ([SCC LiveBasicBlock], BlockMap RegSet)
+ :: Instruction instr
+ => BlockMap RegSet
+ -> [SCC (LiveBasicBlock instr)] -- accum
+ -> [SCC (NatBasicBlock instr)]
+ -> ( [SCC (LiveBasicBlock instr)]
+ , BlockMap RegSet)
livenessSCCs blockmap done [] = (done, blockmap)
@@ -561,8 +630,11 @@ livenessSCCs blockmap done
(a, panic "RegLiveness.livenessSCCs")
- linearLiveness :: BlockMap RegSet -> [NatBasicBlock]
- -> (BlockMap RegSet, [LiveBasicBlock])
+ linearLiveness
+ :: Instruction instr
+ => BlockMap RegSet -> [NatBasicBlock instr]
+ -> (BlockMap RegSet, [LiveBasicBlock instr])
+
linearLiveness = mapAccumL livenessBlock
-- probably the least efficient way to compare two
@@ -578,9 +650,10 @@ livenessSCCs blockmap done
-- | Annotate a basic block with register liveness information.
--
livenessBlock
- :: BlockMap RegSet
- -> NatBasicBlock
- -> (BlockMap RegSet, LiveBasicBlock)
+ :: Instruction instr
+ => BlockMap RegSet
+ -> NatBasicBlock instr
+ -> (BlockMap RegSet, LiveBasicBlock instr)
livenessBlock blockmap (BasicBlock block_id instrs)
= let
@@ -598,8 +671,9 @@ livenessBlock blockmap (BasicBlock block_id instrs)
-- filling in when regs are born
livenessForward
- :: RegSet -- regs live on this instr
- -> [LiveInstr] -> [LiveInstr]
+ :: Instruction instr
+ => RegSet -- regs live on this instr
+ -> [LiveInstr instr] -> [LiveInstr instr]
livenessForward _ [] = []
livenessForward rsLiveEntry (li@(Instr instr mLive) : lis)
@@ -607,7 +681,7 @@ livenessForward rsLiveEntry (li@(Instr instr mLive) : lis)
= li : livenessForward rsLiveEntry lis
| Just live <- mLive
- , RU _ written <- regUsage instr
+ , RU _ written <- regUsageOfInstr instr
= let
-- Regs that are written to but weren't live on entry to this instruction
-- are recorded as being born here.
@@ -628,11 +702,12 @@ livenessForward _ _ = panic "RegLiveness.livenessForward: no match"
-- filling in when regs die, and what regs are live across each instruction
livenessBack
- :: RegSet -- regs live on this instr
+ :: Instruction instr
+ => RegSet -- regs live on this instr
-> BlockMap RegSet -- regs live on entry to other BBs
- -> [LiveInstr] -- instructions (accum)
- -> [Instr] -- instructions
- -> (RegSet, [LiveInstr])
+ -> [LiveInstr instr] -- instructions (accum)
+ -> [instr] -- instructions
+ -> (RegSet, [LiveInstr instr])
livenessBack liveregs _ done [] = (liveregs, done)
@@ -640,32 +715,37 @@ livenessBack liveregs blockmap acc (instr : instrs)
= let (liveregs', instr') = liveness1 liveregs blockmap instr
in livenessBack liveregs' blockmap (instr' : acc) instrs
--- don't bother tagging comments or deltas with liveness
-liveness1 :: RegSet -> BlockMap RegSet -> Instr -> (RegSet, LiveInstr)
-liveness1 liveregs _ (instr@COMMENT{})
- = (liveregs, Instr instr Nothing)
-liveness1 liveregs _ (instr@DELTA{})
+-- don't bother tagging comments or deltas with liveness
+liveness1
+ :: Instruction instr
+ => RegSet
+ -> BlockMap RegSet
+ -> instr
+ -> (RegSet, LiveInstr instr)
+
+liveness1 liveregs _ instr
+ | isMetaInstr instr
= (liveregs, Instr instr Nothing)
liveness1 liveregs blockmap instr
- | not_a_branch
- = (liveregs1, Instr instr
+ | not_a_branch
+ = (liveregs1, Instr instr
(Just $ Liveness
{ liveBorn = emptyUniqSet
, liveDieRead = mkUniqSet r_dying
, liveDieWrite = mkUniqSet w_dying }))
- | otherwise
- = (liveregs_br, Instr instr
+ | otherwise
+ = (liveregs_br, Instr instr
(Just $ Liveness
{ liveBorn = emptyUniqSet
, liveDieRead = mkUniqSet r_dying_br
, liveDieWrite = mkUniqSet w_dying }))
- where
- RU read written = regUsage instr
+ where
+ RU read written = regUsageOfInstr instr
-- registers that were written here are dead going backwards.
-- registers that were read here are live going backwards.
@@ -682,7 +762,7 @@ liveness1 liveregs blockmap instr
-- union in the live regs from all the jump destinations of this
-- instruction.
- targets = jumpDests instr [] -- where we go from here
+ targets = jumpDestsOfInstr instr -- where we go from here
not_a_branch = null targets
targetLiveRegs target
diff --git a/compiler/nativeGen/RegAllocInfo.hs b/compiler/nativeGen/RegAllocInfo.hs
deleted file mode 100644
index f0cb8b5c12..0000000000
--- a/compiler/nativeGen/RegAllocInfo.hs
+++ /dev/null
@@ -1,94 +0,0 @@
-{-# OPTIONS -w #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
--- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
-
------------------------------------------------------------------------------
---
--- Machine-specific parts of the register allocator
---
--- (c) The University of Glasgow 1996-2004
---
------------------------------------------------------------------------------
-
-
-module RegAllocInfo (
- -- shared code
- shortcutStatic,
-
- -- machine specific
- RegUsage(..),
- noUsage,
- regUsage,
- patchRegs,
- jumpDests,
- isJumpish,
- patchJump,
- isRegRegMove,
-
- JumpDest,
- canShortcut,
- shortcutJump,
-
- mkSpillInstr,
- mkLoadInstr,
- mkRegRegMoveInstr,
- mkBranchInstr,
-
- maxSpillSlots,
- spillSlotToOffset
- ) where
-
-#include "nativeGen/NCG.h"
-#include "HsVersions.h"
-
-import BlockId
-import Cmm
-import CLabel
-import Instrs
-import Regs
-import Outputable
-import Constants ( rESERVED_C_STACK_BYTES )
-import FastBool
-
-#if alpha_TARGET_ARCH
-import Alpha.RegInfo
-
-#elif i386_TARGET_ARCH || x86_64_TARGET_ARCH
-import X86.RegInfo
-
-#elif powerpc_TARGET_ARCH
-import PPC.RegInfo
-
-#elif sparc_TARGET_ARCH
-import SPARC.RegInfo
-
-#endif
-
-
--- Here because it knows about JumpDest
-shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
-shortcutStatic fn (CmmStaticLit (CmmLabel lab))
- | Just uq <- maybeAsmTemp lab
- = CmmStaticLit (CmmLabel (shortBlockId fn (BlockId uq)))
-shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off))
- | Just uq <- maybeAsmTemp lbl1
- = CmmStaticLit (CmmLabelDiffOff (shortBlockId fn (BlockId uq)) lbl2 off)
- -- slightly dodgy, we're ignoring the second label, but this
- -- works with the way we use CmmLabelDiffOff for jump tables now.
-shortcutStatic fn other_static
- = other_static
-
-shortBlockId fn blockid@(BlockId uq) =
- case fn blockid of
- Nothing -> mkAsmTempLabel uq
- Just (DestBlockId blockid') -> shortBlockId fn blockid'
- Just (DestImm (ImmCLbl lbl)) -> lbl
- _other -> panic "shortBlockId"
-
-
-
-
-
diff --git a/compiler/nativeGen/RegClass.hs b/compiler/nativeGen/RegClass.hs
new file mode 100644
index 0000000000..8b6b2d4160
--- /dev/null
+++ b/compiler/nativeGen/RegClass.hs
@@ -0,0 +1,31 @@
+
+-- | An architecture independent description of a register's class.
+module RegClass
+ ( RegClass (..) )
+
+where
+
+import Outputable
+import Unique
+
+
+-- | The class of a register.
+-- Used in the register allocator.
+-- We treat all registers in a class as being interchangable.
+--
+data RegClass
+ = RcInteger
+ | RcFloat
+ | RcDouble
+ deriving Eq
+
+
+instance Uniquable RegClass where
+ getUnique RcInteger = mkUnique 'L' 0
+ getUnique RcFloat = mkUnique 'L' 1
+ getUnique RcDouble = mkUnique 'L' 2
+
+instance Outputable RegClass where
+ ppr RcInteger = Outputable.text "I"
+ ppr RcFloat = Outputable.text "F"
+ ppr RcDouble = Outputable.text "D"
diff --git a/compiler/nativeGen/RegsBase.hs b/compiler/nativeGen/RegsBase.hs
deleted file mode 100644
index 00c87cb917..0000000000
--- a/compiler/nativeGen/RegsBase.hs
+++ /dev/null
@@ -1,105 +0,0 @@
-
-module RegsBase (
- RegNo,
- Reg(..),
- isRealReg,
- unRealReg,
- isVirtualReg,
- renameVirtualReg,
-
- RegClass(..)
-)
-
-where
-
-import Outputable ( Outputable(..) )
-import qualified Outputable
-import Panic
-import Unique
-
--- ---------------------------------------------------------------------------
--- Registers
-
--- RealRegs are machine regs which are available for allocation, in
--- the usual way. We know what class they are, because that's part of
--- the processor's architecture.
-
--- VirtualRegs are virtual registers. The register allocator will
--- eventually have to map them into RealRegs, or into spill slots.
--- VirtualRegs are allocated on the fly, usually to represent a single
--- value in the abstract assembly code (i.e. dynamic registers are
--- usually single assignment). With the new register allocator, the
--- single assignment restriction isn't necessary to get correct code,
--- although a better register allocation will result if single
--- assignment is used -- because the allocator maps a VirtualReg into
--- a single RealReg, even if the VirtualReg has multiple live ranges.
-
--- Virtual regs can be of either class, so that info is attached.
-
-type RegNo
- = Int
-
-data Reg
- = RealReg {-# UNPACK #-} !RegNo
- | VirtualRegI {-# UNPACK #-} !Unique
- | VirtualRegHi {-# UNPACK #-} !Unique -- High part of 2-word register
- | VirtualRegF {-# UNPACK #-} !Unique
- | VirtualRegD {-# UNPACK #-} !Unique
- deriving (Eq, Ord)
-
-
--- We like to have Uniques for Reg so that we can make UniqFM and UniqSets
--- in the register allocator.
-instance Uniquable Reg where
- getUnique (RealReg i) = mkUnique 'C' i
- getUnique (VirtualRegI u) = u
- getUnique (VirtualRegHi u) = u
- getUnique (VirtualRegF u) = u
- getUnique (VirtualRegD u) = u
-
-
-isRealReg :: Reg -> Bool
-isRealReg = not . isVirtualReg
-
--- | Take the RegNo from a real reg
-unRealReg :: Reg -> RegNo
-unRealReg (RealReg i) = i
-unRealReg _ = panic "unRealReg on VirtualReg"
-
-isVirtualReg :: Reg -> Bool
-isVirtualReg (RealReg _) = False
-isVirtualReg (VirtualRegI _) = True
-isVirtualReg (VirtualRegHi _) = True
-isVirtualReg (VirtualRegF _) = True
-isVirtualReg (VirtualRegD _) = True
-
-
-renameVirtualReg :: Unique -> Reg -> Reg
-renameVirtualReg u r
- = case r of
- RealReg _ -> error "renameVirtualReg: can't change unique on a real reg"
- VirtualRegI _ -> VirtualRegI u
- VirtualRegHi _ -> VirtualRegHi u
- VirtualRegF _ -> VirtualRegF u
- VirtualRegD _ -> VirtualRegD u
-
-
--- RegClass --------------------------------------------------------------------
-data RegClass
- = RcInteger
- | RcFloat
- | RcDouble
- deriving Eq
-
-instance Uniquable RegClass where
- getUnique RcInteger = mkUnique 'L' 0
- getUnique RcFloat = mkUnique 'L' 1
- getUnique RcDouble = mkUnique 'L' 2
-
-instance Outputable RegClass where
- ppr RcInteger = Outputable.text "I"
- ppr RcFloat = Outputable.text "F"
- ppr RcDouble = Outputable.text "D"
-
-
-
diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs
new file mode 100644
index 0000000000..d921c12e7a
--- /dev/null
+++ b/compiler/nativeGen/SPARC/CodeGen.hs
@@ -0,0 +1,1545 @@
+{-# OPTIONS -w #-}
+-----------------------------------------------------------------------------
+--
+-- Generating machine code (instruction selection)
+--
+-- (c) The University of Glasgow 1996-2004
+--
+-----------------------------------------------------------------------------
+
+module SPARC.CodeGen (
+ cmmTopCodeGen,
+ InstrBlock
+)
+
+where
+
+#include "HsVersions.h"
+#include "nativeGen/NCG.h"
+#include "MachDeps.h"
+
+-- NCG stuff:
+import SPARC.Instr
+import SPARC.Cond
+import SPARC.Regs
+import SPARC.RegInfo
+import Instruction
+import Size
+import Reg
+import PIC
+import NCGMonad
+
+-- Our intermediate code:
+import BlockId
+import Cmm
+import CLabel
+
+-- The rest:
+import BasicTypes
+import StaticFlags ( opt_PIC )
+import OrdList
+import qualified Outputable as O
+import Outputable
+import FastString
+
+import Control.Monad ( mapAndUnzipM )
+import Data.Int
+import DynFlags
+
+-- | Top level code generation
+cmmTopCodeGen
+ :: DynFlags
+ -> RawCmmTop
+ -> NatM [NatCmmTop Instr]
+
+cmmTopCodeGen _
+ (CmmProc info lab params (ListGraph blocks))
+ = do
+ (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
+
+-- picBaseMb <- getPicBaseMaybeNat
+ let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
+ let tops = proc : concat statics
+
+-- case picBaseMb of
+-- Just picBase -> initializePicBase picBase tops
+-- Nothing -> return tops
+
+ return tops
+
+
+cmmTopCodeGen _ (CmmData sec dat) = do
+ return [CmmData sec dat] -- no translation, we just use CmmStatic
+
+
+
+basicBlockCodeGen
+ :: CmmBasicBlock
+ -> NatM ( [NatBasicBlock Instr]
+ , [NatCmmTop Instr])
+
+basicBlockCodeGen (BasicBlock id stmts) = do
+ instrs <- stmtsToInstrs stmts
+ -- code generation may introduce new basic block boundaries, which
+ -- are indicated by the NEWBLOCK instruction. We must split up the
+ -- instruction stream into basic blocks again. Also, we extract
+ -- LDATAs here too.
+ let
+ (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
+
+ mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
+ = ([], BasicBlock id instrs : blocks, statics)
+ mkBlocks (LDATA sec dat) (instrs,blocks,statics)
+ = (instrs, blocks, CmmData sec dat:statics)
+ mkBlocks instr (instrs,blocks,statics)
+ = (instr:instrs, blocks, statics)
+ -- in
+ return (BasicBlock id top : other_blocks, statics)
+
+
+stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
+stmtsToInstrs stmts
+ = do instrss <- mapM stmtToInstrs stmts
+ return (concatOL instrss)
+
+
+stmtToInstrs :: CmmStmt -> NatM InstrBlock
+stmtToInstrs stmt = case stmt of
+ CmmNop -> return nilOL
+ CmmComment s -> return (unitOL (COMMENT s))
+
+ CmmAssign reg src
+ | isFloatType ty -> assignReg_FltCode size reg src
+ | isWord64 ty -> assignReg_I64Code reg src
+ | otherwise -> assignReg_IntCode size reg src
+ where ty = cmmRegType reg
+ size = cmmTypeSize ty
+
+ CmmStore addr src
+ | isFloatType ty -> assignMem_FltCode size addr src
+ | isWord64 ty -> assignMem_I64Code addr src
+ | otherwise -> assignMem_IntCode size addr src
+ where ty = cmmExprType src
+ size = cmmTypeSize ty
+
+ CmmCall target result_regs args _ _
+ -> genCCall target result_regs args
+
+ CmmBranch id -> genBranch id
+ CmmCondBranch arg id -> genCondJump id arg
+ CmmSwitch arg ids -> genSwitch arg ids
+ CmmJump arg _ -> genJump arg
+
+ CmmReturn _
+ -> panic "stmtToInstrs: return statement should have been cps'd away"
+
+
+--------------------------------------------------------------------------------
+-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
+-- They are really trees of insns to facilitate fast appending, where a
+-- left-to-right traversal yields the insns in the correct order.
+--
+type InstrBlock
+ = OrdList Instr
+
+
+-- | Condition codes passed up the tree.
+--
+data CondCode
+ = CondCode Bool Cond InstrBlock
+
+
+-- | a.k.a "Register64"
+-- Reg is the lower 32-bit temporary which contains the result.
+-- Use getHiVRegFromLo to find the other VRegUnique.
+--
+-- Rules of this simplified insn selection game are therefore that
+-- the returned Reg may be modified
+--
+data ChildCode64
+ = ChildCode64
+ InstrBlock
+ Reg
+
+
+-- | Register's passed up the tree. If the stix code forces the register
+-- to live in a pre-decided machine register, it comes out as @Fixed@;
+-- otherwise, it comes out as @Any@, and the parent can decide which
+-- register to put it in.
+--
+data Register
+ = Fixed Size Reg InstrBlock
+ | Any Size (Reg -> InstrBlock)
+
+
+swizzleRegisterRep :: Register -> Size -> Register
+swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
+swizzleRegisterRep (Any _ codefn) size = Any size codefn
+
+
+-- | Grab the Reg for a CmmReg
+getRegisterReg :: CmmReg -> Reg
+
+getRegisterReg (CmmLocal (LocalReg u pk))
+ = mkVReg u (cmmTypeSize pk)
+
+getRegisterReg (CmmGlobal mid)
+ = case get_GlobalReg_reg_or_addr mid of
+ Left (RealReg rrno) -> RealReg rrno
+ _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
+ -- By this stage, the only MagicIds remaining should be the
+ -- ones which map to a real machine register on this
+ -- platform. Hence ...
+
+
+-- | Memory addressing modes passed up the tree.
+data Amode
+ = Amode AddrMode InstrBlock
+
+{-
+Now, given a tree (the argument to an CmmLoad) that references memory,
+produce a suitable addressing mode.
+
+A Rule of the Game (tm) for Amodes: use of the addr bit must
+immediately follow use of the code part, since the code part puts
+values in registers which the addr then refers to. So you can't put
+anything in between, lest it overwrite some of those registers. If
+you need to do some other computation between the code part and use of
+the addr bit, first store the effective address from the amode in a
+temporary, then do the other computation, and then use the temporary:
+
+ code
+ LEA amode, tmp
+ ... other computation ...
+ ... (tmp) ...
+-}
+
+
+-- | Check whether an integer will fit in 32 bits.
+-- A CmmInt is intended to be truncated to the appropriate
+-- number of bits, so here we truncate it to Int64. This is
+-- important because e.g. -1 as a CmmInt might be either
+-- -1 or 18446744073709551615.
+--
+is32BitInteger :: Integer -> Bool
+is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
+ where i64 = fromIntegral i :: Int64
+
+
+-- | Convert a BlockId to some CmmStatic data
+jumpTableEntry :: Maybe BlockId -> CmmStatic
+jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
+jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
+ where blockLabel = mkAsmTempLabel id
+
+
+
+
+-- -----------------------------------------------------------------------------
+-- General things for putting together code sequences
+
+-- Expand CmmRegOff. ToDo: should we do it this way around, or convert
+-- CmmExprs into CmmRegOff?
+mangleIndexTree :: CmmExpr -> CmmExpr
+mangleIndexTree (CmmRegOff reg off)
+ = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
+ where width = typeWidth (cmmRegType reg)
+
+
+assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
+assignMem_I64Code addrTree valueTree = do
+ Amode _ addr_code <- getAmode addrTree
+ ChildCode64 vcode rlo <- iselExpr64 valueTree
+
+ (src, code) <- getSomeReg addrTree
+ let
+ rhi = getHiVRegFromLo rlo
+ -- Big-endian store
+ mov_hi = ST II32 rhi (AddrRegImm src (ImmInt 0))
+ mov_lo = ST II32 rlo (AddrRegImm src (ImmInt 4))
+
+ return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
+
+
+assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
+assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
+ ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
+ let
+ r_dst_lo = mkVReg u_dst (cmmTypeSize pk)
+ r_dst_hi = getHiVRegFromLo r_dst_lo
+ r_src_hi = getHiVRegFromLo r_src_lo
+ mov_lo = mkMOV r_src_lo r_dst_lo
+ mov_hi = mkMOV r_src_hi r_dst_hi
+ mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
+ return (vcode `snocOL` mov_hi `snocOL` mov_lo)
+assignReg_I64Code lvalue valueTree
+ = panic "assignReg_I64Code(sparc): invalid lvalue"
+
+
+-- Load a 64 bit word
+iselExpr64 (CmmLoad addrTree ty)
+ | isWord64 ty
+ = do Amode amode addr_code <- getAmode addrTree
+ let result
+
+ | AddrRegReg r1 r2 <- amode
+ = do rlo <- getNewRegNat II32
+ tmp <- getNewRegNat II32
+ let rhi = getHiVRegFromLo rlo
+
+ return $ ChildCode64
+ ( addr_code
+ `appOL` toOL
+ [ ADD False False r1 (RIReg r2) tmp
+ , LD II32 (AddrRegImm tmp (ImmInt 0)) rhi
+ , LD II32 (AddrRegImm tmp (ImmInt 4)) rlo ])
+ rlo
+
+ | AddrRegImm r1 (ImmInt i) <- amode
+ = do rlo <- getNewRegNat II32
+ let rhi = getHiVRegFromLo rlo
+
+ return $ ChildCode64
+ ( addr_code
+ `appOL` toOL
+ [ LD II32 (AddrRegImm r1 (ImmInt $ 0 + i)) rhi
+ , LD II32 (AddrRegImm r1 (ImmInt $ 4 + i)) rlo ])
+ rlo
+
+ result
+
+
+-- Add a literal to a 64 bit integer
+iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)])
+ = do ChildCode64 code1 r1_lo <- iselExpr64 e1
+ let r1_hi = getHiVRegFromLo r1_lo
+
+ r_dst_lo <- getNewRegNat II32
+ let r_dst_hi = getHiVRegFromLo r_dst_lo
+
+ return $ ChildCode64
+ ( toOL
+ [ ADD False False r1_lo (RIImm (ImmInteger i)) r_dst_lo
+ , ADD True False r1_hi (RIReg g0) r_dst_hi ])
+ r_dst_lo
+
+
+-- Addition of II64
+iselExpr64 (CmmMachOp (MO_Add width) [e1, e2])
+ = do ChildCode64 code1 r1_lo <- iselExpr64 e1
+ let r1_hi = getHiVRegFromLo r1_lo
+
+ ChildCode64 code2 r2_lo <- iselExpr64 e2
+ let r2_hi = getHiVRegFromLo r2_lo
+
+ r_dst_lo <- getNewRegNat II32
+ let r_dst_hi = getHiVRegFromLo r_dst_lo
+
+ let code = code1
+ `appOL` code2
+ `appOL` toOL
+ [ ADD False False r1_lo (RIReg r2_lo) r_dst_lo
+ , ADD True False r1_hi (RIReg r2_hi) r_dst_hi ]
+
+ return $ ChildCode64 code r_dst_lo
+
+
+iselExpr64 (CmmReg (CmmLocal (LocalReg uq ty))) | isWord64 ty = do
+ r_dst_lo <- getNewRegNat II32
+ let r_dst_hi = getHiVRegFromLo r_dst_lo
+ r_src_lo = mkVReg uq II32
+ r_src_hi = getHiVRegFromLo r_src_lo
+ mov_lo = mkMOV r_src_lo r_dst_lo
+ mov_hi = mkMOV r_src_hi r_dst_hi
+ mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
+ return (
+ ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
+ )
+
+-- Convert something into II64
+iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr])
+ = do
+ r_dst_lo <- getNewRegNat II32
+ let r_dst_hi = getHiVRegFromLo r_dst_lo
+
+ -- compute expr and load it into r_dst_lo
+ (a_reg, a_code) <- getSomeReg expr
+
+ let code = a_code
+ `appOL` toOL
+ [ mkRegRegMoveInstr g0 r_dst_hi -- clear high 32 bits
+ , mkRegRegMoveInstr a_reg r_dst_lo ]
+
+ return $ ChildCode64 code r_dst_lo
+
+
+iselExpr64 expr
+ = pprPanic "iselExpr64(sparc)" (ppr expr)
+
+
+-- | The dual to getAnyReg: compute an expression into a register, but
+-- we don't mind which one it is.
+getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
+getSomeReg expr = do
+ r <- getRegister expr
+ case r of
+ Any rep code -> do
+ tmp <- getNewRegNat rep
+ return (tmp, code tmp)
+ Fixed _ reg code ->
+ return (reg, code)
+
+
+--
+getRegister :: CmmExpr -> NatM Register
+
+getRegister (CmmReg reg)
+ = return (Fixed (cmmTypeSize (cmmRegType reg))
+ (getRegisterReg reg) nilOL)
+
+getRegister tree@(CmmRegOff _ _)
+ = getRegister (mangleIndexTree tree)
+
+getRegister (CmmMachOp (MO_UU_Conv W64 W32)
+ [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
+ ChildCode64 code rlo <- iselExpr64 x
+ return $ Fixed II32 (getHiVRegFromLo rlo) code
+
+getRegister (CmmMachOp (MO_SS_Conv W64 W32)
+ [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
+ ChildCode64 code rlo <- iselExpr64 x
+ return $ Fixed II32 (getHiVRegFromLo rlo) code
+
+getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
+ ChildCode64 code rlo <- iselExpr64 x
+ return $ Fixed II32 rlo code
+
+getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
+ ChildCode64 code rlo <- iselExpr64 x
+ return $ Fixed II32 rlo code
+
+
+
+-- Load a literal float into a float register.
+-- The actual literal is stored in a new data area, and we load it
+-- at runtime.
+getRegister (CmmLit (CmmFloat f W32)) = do
+
+ -- a label for the new data area
+ lbl <- getNewLabelNat
+ tmp <- getNewRegNat II32
+
+ let code dst = toOL [
+ -- the data area
+ LDATA ReadOnlyData
+ [CmmDataLabel lbl,
+ CmmStaticLit (CmmFloat f W32)],
+
+ -- load the literal
+ SETHI (HI (ImmCLbl lbl)) tmp,
+ LD II32 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
+
+ return (Any FF32 code)
+
+getRegister (CmmLit (CmmFloat d W64)) = do
+ lbl <- getNewLabelNat
+ tmp <- getNewRegNat II32
+ let code dst = toOL [
+ LDATA ReadOnlyData
+ [CmmDataLabel lbl,
+ CmmStaticLit (CmmFloat d W64)],
+ SETHI (HI (ImmCLbl lbl)) tmp,
+ LD II64 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
+ return (Any FF64 code)
+
+getRegister (CmmMachOp mop [x]) -- unary MachOps
+ = case mop of
+ MO_F_Neg W32 -> trivialUFCode FF32 (FNEG FF32) x
+ MO_F_Neg W64 -> trivialUFCode FF64 (FNEG FF64) x
+
+ MO_S_Neg rep -> trivialUCode (intSize rep) (SUB False False g0) x
+ MO_Not rep -> trivialUCode (intSize rep) (XNOR False g0) x
+
+ MO_FF_Conv W64 W32-> coerceDbl2Flt x
+ MO_FF_Conv W32 W64-> coerceFlt2Dbl x
+
+ MO_FS_Conv from to -> coerceFP2Int from to x
+ MO_SF_Conv from to -> coerceInt2FP from to x
+
+ -- Conversions which are a nop on sparc
+ MO_UU_Conv from to
+ | from == to -> conversionNop (intSize to) x
+ MO_UU_Conv W32 W8 -> trivialCode W8 (AND False) x (CmmLit (CmmInt 255 W8))
+ MO_UU_Conv W32 to -> conversionNop (intSize to) x
+ MO_SS_Conv W32 to -> conversionNop (intSize to) x
+
+ MO_UU_Conv W8 to@W32 -> conversionNop (intSize to) x
+ MO_UU_Conv W16 to@W32 -> conversionNop (intSize to) x
+ MO_UU_Conv W8 to@W16 -> conversionNop (intSize to) x
+
+ -- sign extension
+ MO_SS_Conv W8 W32 -> integerExtend W8 W32 x
+ MO_SS_Conv W16 W32 -> integerExtend W16 W32 x
+ MO_SS_Conv W8 W16 -> integerExtend W8 W16 x
+
+ other_op -> panic ("Unknown unary mach op: " ++ show mop)
+ where
+
+ -- | sign extend and widen
+ integerExtend
+ :: Width -- ^ width of source expression
+ -> Width -- ^ width of result
+ -> CmmExpr -- ^ source expression
+ -> NatM Register
+
+ integerExtend from to expr
+ = do -- load the expr into some register
+ (reg, e_code) <- getSomeReg expr
+ tmp <- getNewRegNat II32
+ let bitCount
+ = case (from, to) of
+ (W8, W32) -> 24
+ (W16, W32) -> 16
+ (W8, W16) -> 24
+ let code dst
+ = e_code
+
+ -- local shift word left to load the sign bit
+ `snocOL` SLL reg (RIImm (ImmInt bitCount)) tmp
+
+ -- arithmetic shift right to sign extend
+ `snocOL` SRA tmp (RIImm (ImmInt bitCount)) dst
+
+ return (Any (intSize to) code)
+
+
+ conversionNop new_rep expr
+ = do e_code <- getRegister expr
+ return (swizzleRegisterRep e_code new_rep)
+
+getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
+ = case mop of
+ MO_Eq rep -> condIntReg EQQ x y
+ MO_Ne rep -> condIntReg NE x y
+
+ MO_S_Gt rep -> condIntReg GTT x y
+ MO_S_Ge rep -> condIntReg GE x y
+ MO_S_Lt rep -> condIntReg LTT x y
+ MO_S_Le rep -> condIntReg LE x y
+
+ MO_U_Gt W32 -> condIntReg GTT x y
+ MO_U_Ge W32 -> condIntReg GE x y
+ MO_U_Lt W32 -> condIntReg LTT x y
+ MO_U_Le W32 -> condIntReg LE x y
+
+ MO_U_Gt W16 -> condIntReg GU x y
+ MO_U_Ge W16 -> condIntReg GEU x y
+ MO_U_Lt W16 -> condIntReg LU x y
+ MO_U_Le W16 -> condIntReg LEU x y
+
+ MO_Add W32 -> trivialCode W32 (ADD False False) x y
+ MO_Sub W32 -> trivialCode W32 (SUB False False) x y
+
+ MO_S_MulMayOflo rep -> imulMayOflo rep x y
+
+ MO_S_Quot W32 -> idiv True False x y
+ MO_U_Quot W32 -> idiv False False x y
+
+ MO_S_Rem W32 -> irem True x y
+ MO_U_Rem W32 -> irem False x y
+
+ MO_F_Eq w -> condFltReg EQQ x y
+ MO_F_Ne w -> condFltReg NE x y
+
+ MO_F_Gt w -> condFltReg GTT x y
+ MO_F_Ge w -> condFltReg GE x y
+ MO_F_Lt w -> condFltReg LTT x y
+ MO_F_Le w -> condFltReg LE x y
+
+ MO_F_Add w -> trivialFCode w FADD x y
+ MO_F_Sub w -> trivialFCode w FSUB x y
+ MO_F_Mul w -> trivialFCode w FMUL x y
+ MO_F_Quot w -> trivialFCode w FDIV x y
+
+ MO_And rep -> trivialCode rep (AND False) x y
+ MO_Or rep -> trivialCode rep (OR False) x y
+ MO_Xor rep -> trivialCode rep (XOR False) x y
+
+ MO_Mul rep -> trivialCode rep (SMUL False) x y
+
+ MO_Shl rep -> trivialCode rep SLL x y
+ MO_U_Shr rep -> trivialCode rep SRL x y
+ MO_S_Shr rep -> trivialCode rep SRA x y
+
+{-
+ MO_F32_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64
+ [promote x, promote y])
+ where promote x = CmmMachOp MO_F32_to_Dbl [x]
+ MO_F64_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64
+ [x, y])
+-}
+ other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
+ where
+ -- idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y])
+
+
+ -- | Generate an integer division instruction.
+ idiv :: Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register
+
+ -- For unsigned division with a 32 bit numerator,
+ -- we can just clear the Y register.
+ idiv False cc x y = do
+ (a_reg, a_code) <- getSomeReg x
+ (b_reg, b_code) <- getSomeReg y
+
+ let code dst
+ = a_code
+ `appOL` b_code
+ `appOL` toOL
+ [ WRY g0 g0
+ , UDIV cc a_reg (RIReg b_reg) dst]
+
+ return (Any II32 code)
+
+
+ -- For _signed_ division with a 32 bit numerator,
+ -- we have to sign extend the numerator into the Y register.
+ idiv True cc x y = do
+ (a_reg, a_code) <- getSomeReg x
+ (b_reg, b_code) <- getSomeReg y
+
+ tmp <- getNewRegNat II32
+
+ let code dst
+ = a_code
+ `appOL` b_code
+ `appOL` toOL
+ [ SRA a_reg (RIImm (ImmInt 16)) tmp -- sign extend
+ , SRA tmp (RIImm (ImmInt 16)) tmp
+
+ , WRY tmp g0
+ , SDIV cc a_reg (RIReg b_reg) dst]
+
+ return (Any II32 code)
+
+
+ -- | Do an integer remainder.
+ --
+ -- NOTE: The SPARC v8 architecture manual says that integer division
+ -- instructions _may_ generate a remainder, depending on the implementation.
+ -- If so it is _recommended_ that the remainder is placed in the Y register.
+ --
+ -- The UltraSparc 2007 manual says Y is _undefined_ after division.
+ --
+ -- The SPARC T2 doesn't store the remainder, not sure about the others.
+ -- It's probably best not to worry about it, and just generate our own
+ -- remainders.
+ --
+ irem :: Bool -> CmmExpr -> CmmExpr -> NatM Register
+
+ -- For unsigned operands:
+ -- Division is between a 64 bit numerator and a 32 bit denominator,
+ -- so we still have to clear the Y register.
+ irem False x y = do
+ (a_reg, a_code) <- getSomeReg x
+ (b_reg, b_code) <- getSomeReg y
+
+ tmp_reg <- getNewRegNat II32
+
+ let code dst
+ = a_code
+ `appOL` b_code
+ `appOL` toOL
+ [ WRY g0 g0
+ , UDIV False a_reg (RIReg b_reg) tmp_reg
+ , UMUL False tmp_reg (RIReg b_reg) tmp_reg
+ , SUB False False a_reg (RIReg tmp_reg) dst]
+
+ return (Any II32 code)
+
+
+ -- For signed operands:
+ -- Make sure to sign extend into the Y register, or the remainder
+ -- will have the wrong sign when the numerator is negative.
+ --
+ -- TODO: When sign extending, GCC only shifts the a_reg right by 17 bits,
+ -- not the full 32. Not sure why this is, something to do with overflow?
+ -- If anyone cares enough about the speed of signed remainder they
+ -- can work it out themselves (then tell me). -- BL 2009/01/20
+
+ irem True x y = do
+ (a_reg, a_code) <- getSomeReg x
+ (b_reg, b_code) <- getSomeReg y
+
+ tmp1_reg <- getNewRegNat II32
+ tmp2_reg <- getNewRegNat II32
+
+ let code dst
+ = a_code
+ `appOL` b_code
+ `appOL` toOL
+ [ SRA a_reg (RIImm (ImmInt 16)) tmp1_reg -- sign extend
+ , SRA tmp1_reg (RIImm (ImmInt 16)) tmp1_reg -- sign extend
+ , WRY tmp1_reg g0
+
+ , SDIV False a_reg (RIReg b_reg) tmp2_reg
+ , SMUL False tmp2_reg (RIReg b_reg) tmp2_reg
+ , SUB False False a_reg (RIReg tmp2_reg) dst]
+
+ return (Any II32 code)
+
+
+ imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
+ imulMayOflo rep a b = do
+ (a_reg, a_code) <- getSomeReg a
+ (b_reg, b_code) <- getSomeReg b
+ res_lo <- getNewRegNat II32
+ res_hi <- getNewRegNat II32
+ let
+ shift_amt = case rep of
+ W32 -> 31
+ W64 -> 63
+ _ -> panic "shift_amt"
+ code dst = a_code `appOL` b_code `appOL`
+ toOL [
+ SMUL False a_reg (RIReg b_reg) res_lo,
+ RDY res_hi,
+ SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
+ SUB False False res_lo (RIReg res_hi) dst
+ ]
+ return (Any II32 code)
+
+getRegister (CmmLoad mem pk) = do
+ Amode src code <- getAmode mem
+ let
+ code__2 dst = code `snocOL` LD (cmmTypeSize pk) src dst
+ return (Any (cmmTypeSize pk) code__2)
+
+getRegister (CmmLit (CmmInt i _))
+ | fits13Bits i
+ = let
+ src = ImmInt (fromInteger i)
+ code dst = unitOL (OR False g0 (RIImm src) dst)
+ in
+ return (Any II32 code)
+
+getRegister (CmmLit lit)
+ = let rep = cmmLitType lit
+ imm = litToImm lit
+ code dst = toOL [
+ SETHI (HI imm) dst,
+ OR False dst (RIImm (LO imm)) dst]
+ in return (Any II32 code)
+
+
+
+getAmode :: CmmExpr -> NatM Amode
+getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
+
+getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
+ | fits13Bits (-i)
+ = do
+ (reg, code) <- getSomeReg x
+ let
+ off = ImmInt (-(fromInteger i))
+ return (Amode (AddrRegImm reg off) code)
+
+
+getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
+ | fits13Bits i
+ = do
+ (reg, code) <- getSomeReg x
+ let
+ off = ImmInt (fromInteger i)
+ return (Amode (AddrRegImm reg off) code)
+
+getAmode (CmmMachOp (MO_Add rep) [x, y])
+ = do
+ (regX, codeX) <- getSomeReg x
+ (regY, codeY) <- getSomeReg y
+ let
+ code = codeX `appOL` codeY
+ return (Amode (AddrRegReg regX regY) code)
+
+getAmode (CmmLit lit)
+ = do
+ let imm__2 = litToImm lit
+ tmp1 <- getNewRegNat II32
+ tmp2 <- getNewRegNat II32
+
+ let code = toOL [ SETHI (HI imm__2) tmp1
+ , OR False tmp1 (RIImm (LO imm__2)) tmp2]
+
+ return (Amode (AddrRegReg tmp2 g0) code)
+
+getAmode other
+ = do
+ (reg, code) <- getSomeReg other
+ let
+ off = ImmInt 0
+ return (Amode (AddrRegImm reg off) code)
+
+
+getCondCode :: CmmExpr -> NatM CondCode
+getCondCode (CmmMachOp mop [x, y])
+ =
+ case mop of
+ MO_F_Eq W32 -> condFltCode EQQ x y
+ MO_F_Ne W32 -> condFltCode NE x y
+ MO_F_Gt W32 -> condFltCode GTT x y
+ MO_F_Ge W32 -> condFltCode GE x y
+ MO_F_Lt W32 -> condFltCode LTT x y
+ MO_F_Le W32 -> condFltCode LE x y
+
+ MO_F_Eq W64 -> condFltCode EQQ x y
+ MO_F_Ne W64 -> condFltCode NE x y
+ MO_F_Gt W64 -> condFltCode GTT x y
+ MO_F_Ge W64 -> condFltCode GE x y
+ MO_F_Lt W64 -> condFltCode LTT x y
+ MO_F_Le W64 -> condFltCode LE x y
+
+ MO_Eq rep -> condIntCode EQQ x y
+ MO_Ne rep -> condIntCode NE x y
+
+ MO_S_Gt rep -> condIntCode GTT x y
+ MO_S_Ge rep -> condIntCode GE x y
+ MO_S_Lt rep -> condIntCode LTT x y
+ MO_S_Le rep -> condIntCode LE x y
+
+ MO_U_Gt rep -> condIntCode GU x y
+ MO_U_Ge rep -> condIntCode GEU x y
+ MO_U_Lt rep -> condIntCode LU x y
+ MO_U_Le rep -> condIntCode LEU x y
+
+ other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
+
+getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
+
+
+
+
+
+-- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
+-- passed back up the tree.
+
+condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
+condIntCode cond x (CmmLit (CmmInt y rep))
+ | fits13Bits y
+ = do
+ (src1, code) <- getSomeReg x
+ let
+ src2 = ImmInt (fromInteger y)
+ code' = code `snocOL` SUB False True src1 (RIImm src2) g0
+ return (CondCode False cond code')
+
+condIntCode cond x y = do
+ (src1, code1) <- getSomeReg x
+ (src2, code2) <- getSomeReg y
+ let
+ code__2 = code1 `appOL` code2 `snocOL`
+ SUB False True src1 (RIReg src2) g0
+ return (CondCode False cond code__2)
+
+
+condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
+condFltCode cond x y = do
+ (src1, code1) <- getSomeReg x
+ (src2, code2) <- getSomeReg y
+ tmp <- getNewRegNat FF64
+ let
+ promote x = FxTOy FF32 FF64 x tmp
+
+ pk1 = cmmExprType x
+ pk2 = cmmExprType y
+
+ code__2 =
+ if pk1 `cmmEqType` pk2 then
+ code1 `appOL` code2 `snocOL`
+ FCMP True (cmmTypeSize pk1) src1 src2
+ else if typeWidth pk1 == W32 then
+ code1 `snocOL` promote src1 `appOL` code2 `snocOL`
+ FCMP True FF64 tmp src2
+ else
+ code1 `appOL` code2 `snocOL` promote src2 `snocOL`
+ FCMP True FF64 src1 tmp
+ return (CondCode True cond code__2)
+
+
+
+-- -----------------------------------------------------------------------------
+-- Generating assignments
+
+-- Assignments are really at the heart of the whole code generation
+-- business. Almost all top-level nodes of any real importance are
+-- assignments, which correspond to loads, stores, or register
+-- transfers. If we're really lucky, some of the register transfers
+-- will go away, because we can use the destination register to
+-- complete the code generation for the right hand side. This only
+-- fails when the right hand side is forced into a fixed register
+-- (e.g. the result of a call).
+
+assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignMem_IntCode pk addr src = do
+ (srcReg, code) <- getSomeReg src
+ Amode dstAddr addr_code <- getAmode addr
+ return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
+
+
+assignReg_IntCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
+assignReg_IntCode pk reg src = do
+ r <- getRegister src
+ return $ case r of
+ Any _ code -> code dst
+ Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg freg) dst
+ where
+ dst = getRegisterReg reg
+
+
+
+-- Floating point assignment to memory
+assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignMem_FltCode pk addr src = do
+ Amode dst__2 code1 <- getAmode addr
+ (src__2, code2) <- getSomeReg src
+ tmp1 <- getNewRegNat pk
+ let
+ pk__2 = cmmExprType src
+ code__2 = code1 `appOL` code2 `appOL`
+ if sizeToWidth pk == typeWidth pk__2
+ then unitOL (ST pk src__2 dst__2)
+ else toOL [ FxTOy (cmmTypeSize pk__2) pk src__2 tmp1
+ , ST pk tmp1 dst__2]
+ return code__2
+
+-- Floating point assignment to a register/temporary
+assignReg_FltCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
+assignReg_FltCode pk dstCmmReg srcCmmExpr = do
+ srcRegister <- getRegister srcCmmExpr
+ let dstReg = getRegisterReg dstCmmReg
+
+ return $ case srcRegister of
+ Any _ code -> code dstReg
+ Fixed _ srcFixedReg srcCode -> srcCode `snocOL` FMOV pk srcFixedReg dstReg
+
+
+
+
+genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
+
+genJump (CmmLit (CmmLabel lbl))
+ = return (toOL [CALL (Left target) 0 True, NOP])
+ where
+ target = ImmCLbl lbl
+
+genJump tree
+ = do
+ (target, code) <- getSomeReg tree
+ return (code `snocOL` JMP (AddrRegReg target g0) `snocOL` NOP)
+
+-- -----------------------------------------------------------------------------
+-- Unconditional branches
+
+genBranch :: BlockId -> NatM InstrBlock
+genBranch = return . toOL . mkJumpInstr
+
+
+-- -----------------------------------------------------------------------------
+-- Conditional jumps
+
+{-
+Conditional jumps are always to local labels, so we can use branch
+instructions. We peek at the arguments to decide what kind of
+comparison to do.
+
+SPARC: First, we have to ensure that the condition codes are set
+according to the supplied comparison operation. We generate slightly
+different code for floating point comparisons, because a floating
+point operation cannot directly precede a @BF@. We assume the worst
+and fill that slot with a @NOP@.
+
+SPARC: Do not fill the delay slots here; you will confuse the register
+allocator.
+-}
+
+
+genCondJump
+ :: BlockId -- the branch target
+ -> CmmExpr -- the condition on which to branch
+ -> NatM InstrBlock
+
+
+
+genCondJump bid bool = do
+ CondCode is_float cond code <- getCondCode bool
+ return (
+ code `appOL`
+ toOL (
+ if is_float
+ then [NOP, BF cond False bid, NOP]
+ else [BI cond False bid, NOP]
+ )
+ )
+
+
+
+-- -----------------------------------------------------------------------------
+-- Generating C calls
+
+-- Now the biggest nightmare---calls. Most of the nastiness is buried in
+-- @get_arg@, which moves the arguments to the correct registers/stack
+-- locations. Apart from that, the code is easy.
+--
+-- (If applicable) Do not fill the delay slots here; you will confuse the
+-- register allocator.
+
+genCCall
+ :: CmmCallTarget -- function to call
+ -> HintedCmmFormals -- where to put the result
+ -> HintedCmmActuals -- arguments (of mixed type)
+ -> NatM InstrBlock
+
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+{-
+ The SPARC calling convention is an absolute
+ nightmare. The first 6x32 bits of arguments are mapped into
+ %o0 through %o5, and the remaining arguments are dumped to the
+ stack, beginning at [%sp+92]. (Note that %o6 == %sp.)
+
+ If we have to put args on the stack, move %o6==%sp down by
+ the number of words to go on the stack, to ensure there's enough space.
+
+ According to Fraser and Hanson's lcc book, page 478, fig 17.2,
+ 16 words above the stack pointer is a word for the address of
+ a structure return value. I use this as a temporary location
+ for moving values from float to int regs. Certainly it isn't
+ safe to put anything in the 16 words starting at %sp, since
+ this area can get trashed at any time due to window overflows
+ caused by signal handlers.
+
+ A final complication (if the above isn't enough) is that
+ we can't blithely calculate the arguments one by one into
+ %o0 .. %o5. Consider the following nested calls:
+
+ fff a (fff b c)
+
+ Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately
+ the inner call will itself use %o0, which trashes the value put there
+ in preparation for the outer call. Upshot: we need to calculate the
+ args into temporary regs, and move those to arg regs or onto the
+ stack only immediately prior to the call proper. Sigh.
+
+genCCall
+ :: CmmCallTarget -- function to call
+ -> HintedCmmFormals -- where to put the result
+ -> HintedCmmActuals -- arguments (of mixed type)
+ -> NatM InstrBlock
+
+-}
+
+
+-- On SPARC under TSO (Total Store Ordering), writes earlier in the instruction stream
+-- are guaranteed to take place before writes afterwards (unlike on PowerPC).
+-- Ref: Section 8.4 of the SPARC V9 Architecture manual.
+--
+-- In the SPARC case we don't need a barrier.
+--
+genCCall (CmmPrim (MO_WriteBarrier)) _ _
+ = do return nilOL
+
+genCCall target dest_regs argsAndHints
+ = do
+ -- strip hints from the arg regs
+ let args :: [CmmExpr]
+ args = map hintlessCmm argsAndHints
+
+
+ -- work out the arguments, and assign them to integer regs
+ argcode_and_vregs <- mapM arg_to_int_vregs args
+ let (argcodes, vregss) = unzip argcode_and_vregs
+ let vregs = concat vregss
+
+ let n_argRegs = length allArgRegs
+ let n_argRegs_used = min (length vregs) n_argRegs
+
+
+ -- deal with static vs dynamic call targets
+ callinsns <- case target of
+ CmmCallee (CmmLit (CmmLabel lbl)) conv ->
+ return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
+
+ CmmCallee expr conv
+ -> do (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
+ return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
+
+ CmmPrim mop
+ -> do res <- outOfLineFloatOp mop
+ lblOrMopExpr <- case res of
+ Left lbl -> do
+ return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
+
+ Right mopExpr -> do
+ (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
+ return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
+
+ return lblOrMopExpr
+
+ let argcode = concatOL argcodes
+
+ let (move_sp_down, move_sp_up)
+ = let diff = length vregs - n_argRegs
+ nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment
+ in if nn <= 0
+ then (nilOL, nilOL)
+ else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
+
+ let transfer_code
+ = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
+
+ return
+ $ argcode `appOL`
+ move_sp_down `appOL`
+ transfer_code `appOL`
+ callinsns `appOL`
+ unitOL NOP `appOL`
+ move_sp_up `appOL`
+ assign_code dest_regs
+
+
+-- | Generate code to calculate an argument, and move it into one
+-- or two integer vregs.
+arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
+arg_to_int_vregs arg
+
+ -- If the expr produces a 64 bit int, then we can just use iselExpr64
+ | isWord64 (cmmExprType arg)
+ = do (ChildCode64 code r_lo) <- iselExpr64 arg
+ let r_hi = getHiVRegFromLo r_lo
+ return (code, [r_hi, r_lo])
+
+ | otherwise
+ = do (src, code) <- getSomeReg arg
+ tmp <- getNewRegNat (cmmTypeSize $ cmmExprType arg)
+ let pk = cmmExprType arg
+
+ case cmmTypeSize pk of
+
+ -- Load a 64 bit float return value into two integer regs.
+ FF64 -> do
+ v1 <- getNewRegNat II32
+ v2 <- getNewRegNat II32
+
+ let Just f0_high = fPair f0
+
+ let code2 =
+ code `snocOL`
+ FMOV FF64 src f0 `snocOL`
+ ST FF32 f0 (spRel 16) `snocOL`
+ LD II32 (spRel 16) v1 `snocOL`
+ ST FF32 f0_high (spRel 16) `snocOL`
+ LD II32 (spRel 16) v2
+
+ return (code2, [v1,v2])
+
+ -- Load a 32 bit float return value into an integer reg
+ FF32 -> do
+ v1 <- getNewRegNat II32
+
+ let code2 =
+ code `snocOL`
+ ST FF32 src (spRel 16) `snocOL`
+ LD II32 (spRel 16) v1
+
+ return (code2, [v1])
+
+ -- Move an integer return value into its destination reg.
+ other -> do
+ v1 <- getNewRegNat II32
+
+ let code2 =
+ code `snocOL`
+ OR False g0 (RIReg src) v1
+
+ return (code2, [v1])
+
+
+-- | Move args from the integer vregs into which they have been
+-- marshalled, into %o0 .. %o5, and the rest onto the stack.
+--
+move_final :: [Reg] -> [Reg] -> Int -> [Instr]
+
+-- all args done
+move_final [] _ offset
+ = []
+
+-- out of aregs; move to stack
+move_final (v:vs) [] offset
+ = ST II32 v (spRel offset)
+ : move_final vs [] (offset+1)
+
+-- move into an arg (%o[0..5]) reg
+move_final (v:vs) (a:az) offset
+ = OR False g0 (RIReg v) a
+ : move_final vs az offset
+
+
+-- | Assign results returned from the call into their
+-- desination regs.
+--
+assign_code :: [CmmHinted LocalReg] -> OrdList Instr
+assign_code [] = nilOL
+
+assign_code [CmmHinted dest _hint]
+ = let rep = localRegType dest
+ width = typeWidth rep
+ r_dest = getRegisterReg (CmmLocal dest)
+
+ result
+ | isFloatType rep
+ , W32 <- width
+ = unitOL $ FMOV FF32 (RealReg $ fReg 0) r_dest
+
+ | isFloatType rep
+ , W64 <- width
+ = unitOL $ FMOV FF64 (RealReg $ fReg 0) r_dest
+
+ | not $ isFloatType rep
+ , W32 <- width
+ = unitOL $ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest
+
+ | not $ isFloatType rep
+ , W64 <- width
+ , r_dest_hi <- getHiVRegFromLo r_dest
+ = toOL [ mkRegRegMoveInstr (RealReg $ oReg 0) r_dest_hi
+ , mkRegRegMoveInstr (RealReg $ oReg 1) r_dest]
+ in result
+
+
+-- | Generate a call to implement an out-of-line floating point operation
+outOfLineFloatOp
+ :: CallishMachOp
+ -> NatM (Either CLabel CmmExpr)
+
+outOfLineFloatOp mop
+ = do let functionName
+ = outOfLineFloatOp_table mop
+
+ dflags <- getDynFlagsNat
+ mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference
+ $ mkForeignLabel functionName Nothing True IsFunction
+
+ let mopLabelOrExpr
+ = case mopExpr of
+ CmmLit (CmmLabel lbl) -> Left lbl
+ _ -> Right mopExpr
+
+ return mopLabelOrExpr
+
+
+-- | Decide what C function to use to implement a CallishMachOp
+--
+outOfLineFloatOp_table
+ :: CallishMachOp
+ -> FastString
+
+outOfLineFloatOp_table mop
+ = case mop of
+ MO_F32_Exp -> fsLit "expf"
+ MO_F32_Log -> fsLit "logf"
+ MO_F32_Sqrt -> fsLit "sqrtf"
+ MO_F32_Pwr -> fsLit "powf"
+
+ MO_F32_Sin -> fsLit "sinf"
+ MO_F32_Cos -> fsLit "cosf"
+ MO_F32_Tan -> fsLit "tanf"
+
+ MO_F32_Asin -> fsLit "asinf"
+ MO_F32_Acos -> fsLit "acosf"
+ MO_F32_Atan -> fsLit "atanf"
+
+ MO_F32_Sinh -> fsLit "sinhf"
+ MO_F32_Cosh -> fsLit "coshf"
+ MO_F32_Tanh -> fsLit "tanhf"
+
+ MO_F64_Exp -> fsLit "exp"
+ MO_F64_Log -> fsLit "log"
+ MO_F64_Sqrt -> fsLit "sqrt"
+ MO_F64_Pwr -> fsLit "pow"
+
+ MO_F64_Sin -> fsLit "sin"
+ MO_F64_Cos -> fsLit "cos"
+ MO_F64_Tan -> fsLit "tan"
+
+ MO_F64_Asin -> fsLit "asin"
+ MO_F64_Acos -> fsLit "acos"
+ MO_F64_Atan -> fsLit "atan"
+
+ MO_F64_Sinh -> fsLit "sinh"
+ MO_F64_Cosh -> fsLit "cosh"
+ MO_F64_Tanh -> fsLit "tanh"
+
+ other -> pprPanic "outOfLineFloatOp(sparc): Unknown callish mach op "
+ (pprCallishMachOp mop)
+
+
+-- -----------------------------------------------------------------------------
+-- Generating a table-branch
+
+genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
+genSwitch expr ids
+ | opt_PIC
+ = error "MachCodeGen: sparc genSwitch PIC not finished\n"
+
+ | otherwise
+ = do (e_reg, e_code) <- getSomeReg expr
+
+ base_reg <- getNewRegNat II32
+ offset_reg <- getNewRegNat II32
+ dst <- getNewRegNat II32
+
+ label <- getNewLabelNat
+ let jumpTable = map jumpTableEntry ids
+
+ return $ e_code `appOL`
+ toOL
+ -- the jump table
+ [ LDATA ReadOnlyData (CmmDataLabel label : jumpTable)
+
+ -- load base of jump table
+ , SETHI (HI (ImmCLbl label)) base_reg
+ , OR False base_reg (RIImm $ LO $ ImmCLbl label) base_reg
+
+ -- the addrs in the table are 32 bits wide..
+ , SLL e_reg (RIImm $ ImmInt 2) offset_reg
+
+ -- load and jump to the destination
+ , LD II32 (AddrRegReg base_reg offset_reg) dst
+ , JMP_TBL (AddrRegImm dst (ImmInt 0)) [i | Just i <- ids]
+ , NOP ]
+
+
+
+-- -----------------------------------------------------------------------------
+-- 'condIntReg' and 'condFltReg': condition codes into registers
+
+-- Turn those condition codes into integers now (when they appear on
+-- the right hand side of an assignment).
+--
+-- (If applicable) Do not fill the delay slots here; you will confuse the
+-- register allocator.
+
+condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
+
+condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do
+ (src, code) <- getSomeReg x
+ tmp <- getNewRegNat II32
+ let
+ code__2 dst = code `appOL` toOL [
+ SUB False True g0 (RIReg src) g0,
+ SUB True False g0 (RIImm (ImmInt (-1))) dst]
+ return (Any II32 code__2)
+
+condIntReg EQQ x y = do
+ (src1, code1) <- getSomeReg x
+ (src2, code2) <- getSomeReg y
+ tmp1 <- getNewRegNat II32
+ tmp2 <- getNewRegNat II32
+ let
+ code__2 dst = code1 `appOL` code2 `appOL` toOL [
+ XOR False src1 (RIReg src2) dst,
+ SUB False True g0 (RIReg dst) g0,
+ SUB True False g0 (RIImm (ImmInt (-1))) dst]
+ return (Any II32 code__2)
+
+condIntReg NE x (CmmLit (CmmInt 0 d)) = do
+ (src, code) <- getSomeReg x
+ tmp <- getNewRegNat II32
+ let
+ code__2 dst = code `appOL` toOL [
+ SUB False True g0 (RIReg src) g0,
+ ADD True False g0 (RIImm (ImmInt 0)) dst]
+ return (Any II32 code__2)
+
+condIntReg NE x y = do
+ (src1, code1) <- getSomeReg x
+ (src2, code2) <- getSomeReg y
+ tmp1 <- getNewRegNat II32
+ tmp2 <- getNewRegNat II32
+ let
+ code__2 dst = code1 `appOL` code2 `appOL` toOL [
+ XOR False src1 (RIReg src2) dst,
+ SUB False True g0 (RIReg dst) g0,
+ ADD True False g0 (RIImm (ImmInt 0)) dst]
+ return (Any II32 code__2)
+
+condIntReg cond x y = do
+ bid1@(BlockId lbl1) <- getBlockIdNat
+ bid2@(BlockId lbl2) <- getBlockIdNat
+ CondCode _ cond cond_code <- condIntCode cond x y
+ let
+ code__2 dst = cond_code `appOL` toOL [
+ BI cond False bid1, NOP,
+ OR False g0 (RIImm (ImmInt 0)) dst,
+ BI ALWAYS False bid2, NOP,
+ NEWBLOCK bid1,
+ OR False g0 (RIImm (ImmInt 1)) dst,
+ NEWBLOCK bid2]
+ return (Any II32 code__2)
+
+condFltReg cond x y = do
+ bid1@(BlockId lbl1) <- getBlockIdNat
+ bid2@(BlockId lbl2) <- getBlockIdNat
+ CondCode _ cond cond_code <- condFltCode cond x y
+ let
+ code__2 dst = cond_code `appOL` toOL [
+ NOP,
+ BF cond False bid1, NOP,
+ OR False g0 (RIImm (ImmInt 0)) dst,
+ BI ALWAYS False bid2, NOP,
+ NEWBLOCK bid1,
+ OR False g0 (RIImm (ImmInt 1)) dst,
+ NEWBLOCK bid2]
+ return (Any II32 code__2)
+
+
+
+-- -----------------------------------------------------------------------------
+-- 'trivial*Code': deal with trivial instructions
+
+-- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
+-- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
+-- Only look for constants on the right hand side, because that's
+-- where the generic optimizer will have put them.
+
+-- Similarly, for unary instructions, we don't have to worry about
+-- matching an StInt as the argument, because genericOpt will already
+-- have handled the constant-folding.
+
+trivialCode pk instr x (CmmLit (CmmInt y d))
+ | fits13Bits y
+ = do
+ (src1, code) <- getSomeReg x
+ tmp <- getNewRegNat II32
+ let
+ src2 = ImmInt (fromInteger y)
+ code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
+ return (Any II32 code__2)
+
+trivialCode pk instr x y = do
+ (src1, code1) <- getSomeReg x
+ (src2, code2) <- getSomeReg y
+ tmp1 <- getNewRegNat II32
+ tmp2 <- getNewRegNat II32
+ let
+ code__2 dst = code1 `appOL` code2 `snocOL`
+ instr src1 (RIReg src2) dst
+ return (Any II32 code__2)
+
+------------
+trivialFCode pk instr x y = do
+ (src1, code1) <- getSomeReg x
+ (src2, code2) <- getSomeReg y
+ tmp1 <- getNewRegNat (cmmTypeSize $ cmmExprType x)
+ tmp2 <- getNewRegNat (cmmTypeSize $ cmmExprType y)
+ tmp <- getNewRegNat FF64
+ let
+ promote x = FxTOy FF32 FF64 x tmp
+
+ pk1 = cmmExprType x
+ pk2 = cmmExprType y
+
+ code__2 dst =
+ if pk1 `cmmEqType` pk2 then
+ code1 `appOL` code2 `snocOL`
+ instr (floatSize pk) src1 src2 dst
+ else if typeWidth pk1 == W32 then
+ code1 `snocOL` promote src1 `appOL` code2 `snocOL`
+ instr FF64 tmp src2 dst
+ else
+ code1 `appOL` code2 `snocOL` promote src2 `snocOL`
+ instr FF64 src1 tmp dst
+ return (Any (cmmTypeSize $ if pk1 `cmmEqType` pk2 then pk1 else cmmFloat W64)
+ code__2)
+
+------------
+trivialUCode size instr x = do
+ (src, code) <- getSomeReg x
+ tmp <- getNewRegNat size
+ let
+ code__2 dst = code `snocOL` instr (RIReg src) dst
+ return (Any size code__2)
+
+-------------
+trivialUFCode pk instr x = do
+ (src, code) <- getSomeReg x
+ tmp <- getNewRegNat pk
+ let
+ code__2 dst = code `snocOL` instr src dst
+ return (Any pk code__2)
+
+
+
+coerceDbl2Flt :: CmmExpr -> NatM Register
+coerceFlt2Dbl :: CmmExpr -> NatM Register
+
+
+coerceInt2FP width1 width2 x = do
+ (src, code) <- getSomeReg x
+ let
+ code__2 dst = code `appOL` toOL [
+ ST (intSize width1) src (spRel (-2)),
+ LD (intSize width1) (spRel (-2)) dst,
+ FxTOy (intSize width1) (floatSize width2) dst dst]
+ return (Any (floatSize $ width2) code__2)
+
+
+-- | Coerce a floating point value to integer
+--
+-- NOTE: On sparc v9 there are no instructions to move a value from an
+-- FP register directly to an int register, so we have to use a load/store.
+--
+coerceFP2Int width1 width2 x
+ = do let fsize1 = floatSize width1
+ fsize2 = floatSize width2
+
+ isize2 = intSize width2
+
+ (fsrc, code) <- getSomeReg x
+ fdst <- getNewRegNat fsize2
+
+ let code2 dst
+ = code
+ `appOL` toOL
+ -- convert float to int format, leaving it in a float reg.
+ [ FxTOy fsize1 isize2 fsrc fdst
+
+ -- store the int into mem, then load it back to move
+ -- it into an actual int reg.
+ , ST fsize2 fdst (spRel (-2))
+ , LD isize2 (spRel (-2)) dst]
+
+ return (Any isize2 code2)
+
+------------
+coerceDbl2Flt x = do
+ (src, code) <- getSomeReg x
+ return (Any FF32 (\dst -> code `snocOL` FxTOy FF64 FF32 src dst))
+
+------------
+coerceFlt2Dbl x = do
+ (src, code) <- getSomeReg x
+ return (Any FF64 (\dst -> code `snocOL` FxTOy FF32 FF64 src dst))
+
+
+
+-- eXTRA_STK_ARGS_HERE
+
+-- We (allegedly) put the first six C-call arguments in registers;
+-- where do we start putting the rest of them?
+
+-- Moved from Instrs (SDM):
+
+eXTRA_STK_ARGS_HERE :: Int
+eXTRA_STK_ARGS_HERE
+ = 23
diff --git a/compiler/nativeGen/SPARC/Cond.hs b/compiler/nativeGen/SPARC/Cond.hs
new file mode 100644
index 0000000000..d0f12efcf5
--- /dev/null
+++ b/compiler/nativeGen/SPARC/Cond.hs
@@ -0,0 +1,53 @@
+
+module SPARC.Cond (
+ Cond(..),
+ condUnsigned,
+ condToSigned,
+ condToUnsigned
+)
+
+where
+
+-- | Branch condition codes.
+data Cond
+ = ALWAYS
+ | EQQ
+ | GE
+ | GEU
+ | GTT
+ | GU
+ | LE
+ | LEU
+ | LTT
+ | LU
+ | NE
+ | NEG
+ | NEVER
+ | POS
+ | VC
+ | VS
+ deriving Eq
+
+
+condUnsigned :: Cond -> Bool
+condUnsigned GU = True
+condUnsigned LU = True
+condUnsigned GEU = True
+condUnsigned LEU = True
+condUnsigned _ = False
+
+
+condToSigned :: Cond -> Cond
+condToSigned GU = GTT
+condToSigned LU = LTT
+condToSigned GEU = GE
+condToSigned LEU = LE
+condToSigned x = x
+
+
+condToUnsigned :: Cond -> Cond
+condToUnsigned GTT = GU
+condToUnsigned LTT = LU
+condToUnsigned GE = GEU
+condToUnsigned LE = LEU
+condToUnsigned x = x
diff --git a/compiler/nativeGen/SPARC/Instr.hs b/compiler/nativeGen/SPARC/Instr.hs
index 9c332317ea..6dc6477f9c 100644
--- a/compiler/nativeGen/SPARC/Instr.hs
+++ b/compiler/nativeGen/SPARC/Instr.hs
@@ -10,56 +10,55 @@
#include "nativeGen/NCG.h"
module SPARC.Instr (
- Cond(..),
RI(..),
Instr(..),
- riZero,
- fpRelEA,
- moveSp,
- fPair,
+ maxSpillSlots
)
where
-import BlockId
-import RegsBase
import SPARC.Regs
+import SPARC.Cond
+import Instruction
+import RegClass
+import Reg
+import Size
+
+import BlockId
import Cmm
import Outputable
-import Constants ( wORD_SIZE )
+import Constants (rESERVED_C_STACK_BYTES )
import FastString
+import FastBool
import GHC.Exts
--- | Branch condition codes.
-data Cond
- = ALWAYS
- | EQQ
- | GE
- | GEU
- | GTT
- | GU
- | LE
- | LEU
- | LTT
- | LU
- | NE
- | NEG
- | NEVER
- | POS
- | VC
- | VS
- deriving Eq
-
-
-- | Register or immediate
data RI
= RIReg Reg
| RIImm Imm
--- | SPARC isntruction set.
+-- | instance for sparc instruction set
+instance Instruction Instr where
+ regUsageOfInstr = sparc_regUsageOfInstr
+ patchRegsOfInstr = sparc_patchRegsOfInstr
+ isJumpishInstr = sparc_isJumpishInstr
+ jumpDestsOfInstr = sparc_jumpDestsOfInstr
+ patchJumpInstr = sparc_patchJumpInstr
+ mkSpillInstr = sparc_mkSpillInstr
+ mkLoadInstr = sparc_mkLoadInstr
+ takeDeltaInstr = sparc_takeDeltaInstr
+ isMetaInstr = sparc_isMetaInstr
+ mkRegRegMoveInstr = sparc_mkRegRegMoveInstr
+ takeRegRegMoveInstr = sparc_takeRegRegMoveInstr
+ mkJumpInstr = sparc_mkJumpInstr
+
+
+-- | SPARC instruction set.
+-- Not complete. This is only the ones we need.
+--
data Instr
-- meta ops --------------------------------------------------
@@ -78,12 +77,6 @@ data Instr
-- specify current stack offset for benefit of subsequent passes.
| DELTA Int
- -- | spill this reg to a stack slot
- | SPILL Reg Int
-
- -- | reload this reg from a stack slot
- | RELOAD Int Reg
-
-- real instrs -----------------------------------------------
-- Loads and stores.
| LD Size AddrMode Reg -- size, src, dst
@@ -157,39 +150,290 @@ data Instr
| CALL (Either Imm Reg) Int Bool -- target, args, terminal
--- | Check if a RI represents a zero value.
--- - a literal zero
--- - register %g0, which is always zero.
+-- | regUsage returns the sets of src and destination registers used
+-- by a particular instruction. Machine registers that are
+-- pre-allocated to stgRegs are filtered out, because they are
+-- uninteresting from a register allocation standpoint. (We wouldn't
+-- want them to end up on the free list!) As far as we are concerned,
+-- the fixed registers simply don't exist (for allocation purposes,
+-- anyway).
+
+-- regUsage doesn't need to do any trickery for jumps and such. Just
+-- state precisely the regs read and written by that insn. The
+-- consequences of control flow transfers, as far as register
+-- allocation goes, are taken care of by the register allocator.
+--
+sparc_regUsageOfInstr :: Instr -> RegUsage
+sparc_regUsageOfInstr instr
+ = case instr of
+ LD _ addr reg -> usage (regAddr addr, [reg])
+ ST _ reg addr -> usage (reg : regAddr addr, [])
+ ADD _ _ r1 ar r2 -> usage (r1 : regRI ar, [r2])
+ SUB _ _ r1 ar r2 -> usage (r1 : regRI ar, [r2])
+ UMUL _ r1 ar r2 -> usage (r1 : regRI ar, [r2])
+ SMUL _ r1 ar r2 -> usage (r1 : regRI ar, [r2])
+ UDIV _ r1 ar r2 -> usage (r1 : regRI ar, [r2])
+ SDIV _ r1 ar r2 -> usage (r1 : regRI ar, [r2])
+ RDY rd -> usage ([], [rd])
+ WRY r1 r2 -> usage ([r1, r2], [])
+ AND _ r1 ar r2 -> usage (r1 : regRI ar, [r2])
+ ANDN _ r1 ar r2 -> usage (r1 : regRI ar, [r2])
+ OR _ r1 ar r2 -> usage (r1 : regRI ar, [r2])
+ ORN _ r1 ar r2 -> usage (r1 : regRI ar, [r2])
+ XOR _ r1 ar r2 -> usage (r1 : regRI ar, [r2])
+ XNOR _ r1 ar r2 -> usage (r1 : regRI ar, [r2])
+ SLL r1 ar r2 -> usage (r1 : regRI ar, [r2])
+ SRL r1 ar r2 -> usage (r1 : regRI ar, [r2])
+ SRA r1 ar r2 -> usage (r1 : regRI ar, [r2])
+ SETHI _ reg -> usage ([], [reg])
+ FABS _ r1 r2 -> usage ([r1], [r2])
+ FADD _ r1 r2 r3 -> usage ([r1, r2], [r3])
+ FCMP _ _ r1 r2 -> usage ([r1, r2], [])
+ FDIV _ r1 r2 r3 -> usage ([r1, r2], [r3])
+ FMOV _ r1 r2 -> usage ([r1], [r2])
+ FMUL _ r1 r2 r3 -> usage ([r1, r2], [r3])
+ FNEG _ r1 r2 -> usage ([r1], [r2])
+ FSQRT _ r1 r2 -> usage ([r1], [r2])
+ FSUB _ r1 r2 r3 -> usage ([r1, r2], [r3])
+ FxTOy _ _ r1 r2 -> usage ([r1], [r2])
+
+ JMP addr -> usage (regAddr addr, [])
+ JMP_TBL addr _ -> usage (regAddr addr, [])
+
+ CALL (Left _ ) _ True -> noUsage
+ CALL (Left _ ) n False -> usage (argRegs n, callClobberedRegs)
+ CALL (Right reg) _ True -> usage ([reg], [])
+ CALL (Right reg) n False -> usage (reg : (argRegs n), callClobberedRegs)
+ _ -> noUsage
+
+ where
+ usage (src, dst)
+ = RU (filter interesting src) (filter interesting dst)
+
+ regAddr (AddrRegReg r1 r2) = [r1, r2]
+ regAddr (AddrRegImm r1 _) = [r1]
+
+ regRI (RIReg r) = [r]
+ regRI _ = []
+
+
+-- | Interesting regs are virtuals, or ones that are allocatable
+-- by the register allocator.
+interesting :: Reg -> Bool
+interesting reg
+ = case reg of
+ VirtualRegI _ -> True
+ VirtualRegHi _ -> True
+ VirtualRegF _ -> True
+ VirtualRegD _ -> True
+ RealReg i -> isFastTrue (freeReg i)
+
+
+
+-- | Apply a given mapping to tall the register references in this instruction.
+sparc_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
+sparc_patchRegsOfInstr instr env = case instr of
+ LD sz addr reg -> LD sz (fixAddr addr) (env reg)
+ ST sz reg addr -> ST sz (env reg) (fixAddr addr)
+
+ ADD x cc r1 ar r2 -> ADD x cc (env r1) (fixRI ar) (env r2)
+ SUB x cc r1 ar r2 -> SUB x cc (env r1) (fixRI ar) (env r2)
+ UMUL cc r1 ar r2 -> UMUL cc (env r1) (fixRI ar) (env r2)
+ SMUL cc r1 ar r2 -> SMUL cc (env r1) (fixRI ar) (env r2)
+ UDIV cc r1 ar r2 -> UDIV cc (env r1) (fixRI ar) (env r2)
+ SDIV cc r1 ar r2 -> SDIV cc (env r1) (fixRI ar) (env r2)
+ RDY rd -> RDY (env rd)
+ WRY r1 r2 -> WRY (env r1) (env r2)
+ AND b r1 ar r2 -> AND b (env r1) (fixRI ar) (env r2)
+ ANDN b r1 ar r2 -> ANDN b (env r1) (fixRI ar) (env r2)
+ OR b r1 ar r2 -> OR b (env r1) (fixRI ar) (env r2)
+ ORN b r1 ar r2 -> ORN b (env r1) (fixRI ar) (env r2)
+ XOR b r1 ar r2 -> XOR b (env r1) (fixRI ar) (env r2)
+ XNOR b r1 ar r2 -> XNOR b (env r1) (fixRI ar) (env r2)
+ SLL r1 ar r2 -> SLL (env r1) (fixRI ar) (env r2)
+ SRL r1 ar r2 -> SRL (env r1) (fixRI ar) (env r2)
+ SRA r1 ar r2 -> SRA (env r1) (fixRI ar) (env r2)
+
+ SETHI imm reg -> SETHI imm (env reg)
+
+ FABS s r1 r2 -> FABS s (env r1) (env r2)
+ FADD s r1 r2 r3 -> FADD s (env r1) (env r2) (env r3)
+ FCMP e s r1 r2 -> FCMP e s (env r1) (env r2)
+ FDIV s r1 r2 r3 -> FDIV s (env r1) (env r2) (env r3)
+ FMOV s r1 r2 -> FMOV s (env r1) (env r2)
+ FMUL s r1 r2 r3 -> FMUL s (env r1) (env r2) (env r3)
+ FNEG s r1 r2 -> FNEG s (env r1) (env r2)
+ FSQRT s r1 r2 -> FSQRT s (env r1) (env r2)
+ FSUB s r1 r2 r3 -> FSUB s (env r1) (env r2) (env r3)
+ FxTOy s1 s2 r1 r2 -> FxTOy s1 s2 (env r1) (env r2)
+
+ JMP addr -> JMP (fixAddr addr)
+ JMP_TBL addr ids -> JMP_TBL (fixAddr addr) ids
+
+ CALL (Left i) n t -> CALL (Left i) n t
+ CALL (Right r) n t -> CALL (Right (env r)) n t
+ _ -> instr
+
+ where
+ fixAddr (AddrRegReg r1 r2) = AddrRegReg (env r1) (env r2)
+ fixAddr (AddrRegImm r1 i) = AddrRegImm (env r1) i
+
+ fixRI (RIReg r) = RIReg (env r)
+ fixRI other = other
+
+
+--------------------------------------------------------------------------------
+sparc_isJumpishInstr :: Instr -> Bool
+sparc_isJumpishInstr instr
+ = case instr of
+ BI{} -> True
+ BF{} -> True
+ JMP{} -> True
+ JMP_TBL{} -> True
+ CALL{} -> True
+ _ -> False
+
+sparc_jumpDestsOfInstr :: Instr -> [BlockId]
+sparc_jumpDestsOfInstr insn
+ = case insn of
+ BI _ _ id -> [id]
+ BF _ _ id -> [id]
+ JMP_TBL _ ids -> ids
+ _ -> []
+
+
+sparc_patchJumpInstr :: Instr -> (BlockId -> BlockId) -> Instr
+sparc_patchJumpInstr insn patchF
+ = case insn of
+ BI cc annul id -> BI cc annul (patchF id)
+ BF cc annul id -> BF cc annul (patchF id)
+ _ -> insn
+
+
+--------------------------------------------------------------------------------
+-- | Make a spill instruction.
+-- On SPARC we spill below frame pointer leaving 2 words/spill
+sparc_mkSpillInstr
+ :: Reg -- ^ register to spill
+ -> Int -- ^ current stack delta
+ -> Int -- ^ spill slot to use
+ -> Instr
+
+sparc_mkSpillInstr reg _ slot
+ = let off = spillSlotToOffset slot
+ off_w = 1 + (off `div` 4)
+ sz = case regClass reg of
+ RcInteger -> II32
+ RcFloat -> FF32
+ RcDouble -> FF64
+
+ in ST sz reg (fpRel (negate off_w))
+
+
+-- | Make a spill reload instruction.
+sparc_mkLoadInstr
+ :: Reg -- ^ register to load
+ -> Int -- ^ current stack delta
+ -> Int -- ^ spill slot to use
+ -> Instr
+
+sparc_mkLoadInstr reg _ slot
+ = let off = spillSlotToOffset slot
+ off_w = 1 + (off `div` 4)
+ sz = case regClass reg of
+ RcInteger -> II32
+ RcFloat -> FF32
+ RcDouble -> FF64
+
+ in LD sz (fpRel (- off_w)) reg
+
+-- | Convert a spill slot number to a *byte* offset, with no sign.
--
-riZero :: RI -> Bool
-riZero (RIImm (ImmInt 0)) = True
-riZero (RIImm (ImmInteger 0)) = True
-riZero (RIReg (RealReg 0)) = True
-riZero _ = False
+spillSlotToOffset :: Int -> Int
+spillSlotToOffset slot
+ | slot >= 0 && slot < maxSpillSlots
+ = 64 + spillSlotSize * slot
+
+ | otherwise
+ = pprPanic "spillSlotToOffset:"
+ ( text "invalid spill location: " <> int slot
+ $$ text "maxSpillSlots: " <> int maxSpillSlots)
+
+
+-- | We need 8 bytes because our largest registers are 64 bit.
+spillSlotSize :: Int
+spillSlotSize = 8
--- | Calculate the effective address which would be used by the
--- corresponding fpRel sequence. fpRel is in MachRegs.lhs,
--- alas -- can't have fpRelEA here because of module dependencies.
-fpRelEA :: Int -> Reg -> Instr
-fpRelEA n dst
- = ADD False False fp (RIImm (ImmInt (n * wORD_SIZE))) dst
+-- | The maximum number of spill slots available on the C stack.
+-- If we use up all of the slots, then we're screwed.
+maxSpillSlots :: Int
+maxSpillSlots = ((rESERVED_C_STACK_BYTES - 64) `div` spillSlotSize) - 1
--- | Code to shift the stack pointer by n words.
-moveSp :: Int -> Instr
-moveSp n
- = ADD False False sp (RIImm (ImmInt (n * wORD_SIZE))) sp
+--------------------------------------------------------------------------------
+-- | See if this instruction is telling us the current C stack delta
+sparc_takeDeltaInstr
+ :: Instr
+ -> Maybe Int
+
+sparc_takeDeltaInstr instr
+ = case instr of
+ DELTA i -> Just i
+ _ -> Nothing
+
+
+sparc_isMetaInstr
+ :: Instr
+ -> Bool
+
+sparc_isMetaInstr instr
+ = case instr of
+ COMMENT{} -> True
+ LDATA{} -> True
+ NEWBLOCK{} -> True
+ DELTA{} -> True
+ _ -> False
+
+
+-- | Make a reg-reg move instruction.
+-- On SPARC v8 there are no instructions to move directly between
+-- floating point and integer regs. If we need to do that then we
+-- have to go via memory.
+--
+sparc_mkRegRegMoveInstr
+ :: Reg
+ -> Reg
+ -> Instr
+
+sparc_mkRegRegMoveInstr src dst
+ = case regClass src of
+ RcInteger -> ADD False False src (RIReg g0) dst
+ RcDouble -> FMOV FF64 src dst
+ RcFloat -> FMOV FF32 src dst
+
+
+-- | Check whether an instruction represents a reg-reg move.
+-- The register allocator attempts to eliminate reg->reg moves whenever it can,
+-- by assigning the src and dest temporaries to the same real register.
+--
+sparc_takeRegRegMoveInstr :: Instr -> Maybe (Reg,Reg)
+sparc_takeRegRegMoveInstr instr
+ = case instr of
+ ADD False False src (RIReg src2) dst
+ | g0 == src2 -> Just (src, dst)
+
+ FMOV FF64 src dst -> Just (src, dst)
+ FMOV FF32 src dst -> Just (src, dst)
+ _ -> Nothing
--- | Produce the second-half-of-a-double register given the first half.
-fPair :: Reg -> Maybe Reg
-fPair (RealReg n)
- | n >= 32 && n `mod` 2 == 0 = Just (RealReg (n+1))
+-- | Make an unconditional branch instruction.
+sparc_mkJumpInstr
+ :: BlockId
+ -> [Instr]
-fPair (VirtualRegD u)
- = Just (VirtualRegHi u)
+sparc_mkJumpInstr id
+ = [BI ALWAYS False id
+ , NOP] -- fill the branch delay slot.
-fPair _
- = trace ("MachInstrs.fPair: can't get high half of supposed double reg ")
- Nothing
diff --git a/compiler/nativeGen/SPARC/Ppr.hs b/compiler/nativeGen/SPARC/Ppr.hs
index 7d64df1b15..a0d5fffce1 100644
--- a/compiler/nativeGen/SPARC/Ppr.hs
+++ b/compiler/nativeGen/SPARC/Ppr.hs
@@ -7,12 +7,15 @@
-----------------------------------------------------------------------------
module SPARC.Ppr (
+ pprNatCmmTop,
+ pprBasicBlock,
+ pprSectionHeader,
+ pprData,
+ pprInstr,
pprUserReg,
pprSize,
pprImm,
- pprSectionHeader,
- pprDataItem,
- pprInstr
+ pprDataItem
)
where
@@ -20,20 +23,119 @@ where
#include "HsVersions.h"
#include "nativeGen/NCG.h"
-import PprBase
-import RegsBase
import SPARC.Regs
+import SPARC.RegInfo
import SPARC.Instr
+import SPARC.Cond
+import Instruction
+import Reg
+import Size
+import PprBase
import BlockId
import Cmm
-
import CLabel
-import Panic ( panic )
import Unique ( pprUnique )
+import qualified Outputable
+import Outputable (Outputable, panic)
import Pretty
import FastString
+import Data.Word
+
+-- -----------------------------------------------------------------------------
+-- Printing this stuff out
+
+pprNatCmmTop :: NatCmmTop Instr -> Doc
+pprNatCmmTop (CmmData section dats) =
+ pprSectionHeader section $$ vcat (map pprData dats)
+
+ -- special case for split markers:
+pprNatCmmTop (CmmProc [] lbl _ (ListGraph [])) = pprLabel lbl
+
+pprNatCmmTop (CmmProc info lbl _ (ListGraph blocks)) =
+ pprSectionHeader Text $$
+ (if null info then -- blocks guaranteed not null, so label needed
+ pprLabel lbl
+ else
+#if HAVE_SUBSECTIONS_VIA_SYMBOLS
+ pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
+ <> char ':' $$
+#endif
+ vcat (map pprData info) $$
+ pprLabel (entryLblToInfoLbl lbl)
+ ) $$
+ vcat (map pprBasicBlock blocks)
+ -- above: Even the first block gets a label, because with branch-chain
+ -- elimination, it might be the target of a goto.
+#if HAVE_SUBSECTIONS_VIA_SYMBOLS
+ -- If we are using the .subsections_via_symbols directive
+ -- (available on recent versions of Darwin),
+ -- we have to make sure that there is some kind of reference
+ -- from the entry code to a label on the _top_ of of the info table,
+ -- so that the linker will not think it is unreferenced and dead-strip
+ -- it. That's why the label is called a DeadStripPreventer (_dsp).
+ $$ if not (null info)
+ then text "\t.long "
+ <+> pprCLabel_asm (entryLblToInfoLbl lbl)
+ <+> char '-'
+ <+> pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
+ else empty
+#endif
+
+
+pprBasicBlock :: NatBasicBlock Instr -> Doc
+pprBasicBlock (BasicBlock (BlockId id) instrs) =
+ pprLabel (mkAsmTempLabel id) $$
+ vcat (map pprInstr instrs)
+
+
+pprData :: CmmStatic -> Doc
+pprData (CmmAlign bytes) = pprAlign bytes
+pprData (CmmDataLabel lbl) = pprLabel lbl
+pprData (CmmString str) = pprASCII str
+pprData (CmmUninitialised bytes) = ptext (sLit ".skip ") <> int bytes
+pprData (CmmStaticLit lit) = pprDataItem lit
+
+pprGloblDecl :: CLabel -> Doc
+pprGloblDecl lbl
+ | not (externallyVisibleCLabel lbl) = empty
+ | otherwise = ptext IF_ARCH_sparc((sLit ".global "),
+ (sLit ".globl ")) <>
+ pprCLabel_asm lbl
+
+pprTypeAndSizeDecl :: CLabel -> Doc
+#if linux_TARGET_OS
+pprTypeAndSizeDecl lbl
+ | not (externallyVisibleCLabel lbl) = empty
+ | otherwise = ptext (sLit ".type ") <>
+ pprCLabel_asm lbl <> ptext (sLit ", @object")
+#else
+pprTypeAndSizeDecl _
+ = empty
+#endif
+
+pprLabel :: CLabel -> Doc
+pprLabel lbl = pprGloblDecl lbl $$ pprTypeAndSizeDecl lbl $$ (pprCLabel_asm lbl <> char ':')
+
+
+pprASCII :: [Word8] -> Doc
+pprASCII str
+ = vcat (map do1 str) $$ do1 0
+ where
+ do1 :: Word8 -> Doc
+ do1 w = ptext (sLit "\t.byte\t") <> int (fromIntegral w)
+
+pprAlign :: Int -> Doc
+pprAlign bytes =
+ ptext (sLit ".align ") <> int bytes
+
+
+-- -----------------------------------------------------------------------------
+-- pprInstr: print an 'Instr'
+
+instance Outputable Instr where
+ ppr instr = Outputable.docToSDoc $ pprInstr instr
-- | Pretty print a register.
@@ -101,12 +203,13 @@ pprSize :: Size -> Doc
pprSize x
= ptext
(case x of
- II8 -> sLit "ub"
- II16 -> sLit "uh"
- II32 -> sLit ""
- II64 -> sLit "d"
- FF32 -> sLit ""
- FF64 -> sLit "d")
+ II8 -> sLit "ub"
+ II16 -> sLit "uh"
+ II32 -> sLit ""
+ II64 -> sLit "d"
+ FF32 -> sLit ""
+ FF64 -> sLit "d"
+ _ -> panic "SPARC.Ppr.pprSize: no match")
-- | Pretty print a size for an instruction suffix.
@@ -120,7 +223,8 @@ pprStSize x
II32 -> sLit ""
II64 -> sLit "x"
FF32 -> sLit ""
- FF64 -> sLit "d")
+ FF64 -> sLit "d"
+ _ -> panic "SPARC.Ppr.pprSize: no match")
-- | Pretty print a condition code.
@@ -258,6 +362,7 @@ pprInstr (NEWBLOCK _)
pprInstr (LDATA _ _)
= panic "PprMach.pprInstr: LDATA"
+{-
pprInstr (SPILL reg slot)
= hcat [
ptext (sLit "\tSPILL"),
@@ -273,7 +378,7 @@ pprInstr (RELOAD slot reg)
ptext (sLit "SLOT") <> parens (int slot),
comma,
pprReg reg]
-
+-}
-- a clumsy hack for now, to handle possible double alignment problems
-- even clumsier, to allow for RegReg regs that show when doing indexed
diff --git a/compiler/nativeGen/SPARC/RegInfo.hs b/compiler/nativeGen/SPARC/RegInfo.hs
index 8f8a977ac7..025e302556 100644
--- a/compiler/nativeGen/SPARC/RegInfo.hs
+++ b/compiler/nativeGen/SPARC/RegInfo.hs
@@ -8,241 +8,115 @@
-----------------------------------------------------------------------------
module SPARC.RegInfo (
- -- machine specific
- RegUsage(..),
- noUsage,
- regUsage,
- patchRegs,
- jumpDests,
- isJumpish,
- patchJump,
- isRegRegMove,
+ mkVReg,
+
+ riZero,
+ fpRelEA,
+ moveSp,
+ fPair,
+
+ shortcutStatic,
+ regDotColor,
JumpDest(..),
canShortcut,
- shortcutJump,
-
- mkSpillInstr,
- mkLoadInstr,
- mkRegRegMoveInstr,
- mkBranchInstr,
-
- spillSlotSize,
- maxSpillSlots,
- spillSlotToOffset
+ shortcutJump,
)
where
-#include "nativeGen/NCG.h"
-#include "HsVersions.h"
-
import SPARC.Instr
import SPARC.Regs
-import RegsBase
+import RegClass
+import Reg
+import Size
+import Constants (wORD_SIZE)
+import Cmm
+import CLabel
import BlockId
import Outputable
-import Constants ( rESERVED_C_STACK_BYTES )
-import FastBool
-
-
--- | Represents what regs are read and written to in an instruction.
---
-data RegUsage
- = RU [Reg] -- regs read from
- [Reg] -- regs written to
-
-
--- | No regs read or written to.
-noUsage :: RegUsage
-noUsage = RU [] []
-
-
--- | regUsage returns the sets of src and destination registers used
--- by a particular instruction. Machine registers that are
--- pre-allocated to stgRegs are filtered out, because they are
--- uninteresting from a register allocation standpoint. (We wouldn't
--- want them to end up on the free list!) As far as we are concerned,
--- the fixed registers simply don't exist (for allocation purposes,
--- anyway).
-
--- regUsage doesn't need to do any trickery for jumps and such. Just
--- state precisely the regs read and written by that insn. The
--- consequences of control flow transfers, as far as register
--- allocation goes, are taken care of by the register allocator.
---
-regUsage :: Instr -> RegUsage
-regUsage instr
- = case instr of
- SPILL reg _ -> usage ([reg], [])
- RELOAD _ reg -> usage ([], [reg])
-
- LD _ addr reg -> usage (regAddr addr, [reg])
- ST _ reg addr -> usage (reg : regAddr addr, [])
- ADD _ _ r1 ar r2 -> usage (r1 : regRI ar, [r2])
- SUB _ _ r1 ar r2 -> usage (r1 : regRI ar, [r2])
- UMUL _ r1 ar r2 -> usage (r1 : regRI ar, [r2])
- SMUL _ r1 ar r2 -> usage (r1 : regRI ar, [r2])
- UDIV _ r1 ar r2 -> usage (r1 : regRI ar, [r2])
- SDIV _ r1 ar r2 -> usage (r1 : regRI ar, [r2])
- RDY rd -> usage ([], [rd])
- WRY r1 r2 -> usage ([r1, r2], [])
- AND _ r1 ar r2 -> usage (r1 : regRI ar, [r2])
- ANDN _ r1 ar r2 -> usage (r1 : regRI ar, [r2])
- OR _ r1 ar r2 -> usage (r1 : regRI ar, [r2])
- ORN _ r1 ar r2 -> usage (r1 : regRI ar, [r2])
- XOR _ r1 ar r2 -> usage (r1 : regRI ar, [r2])
- XNOR _ r1 ar r2 -> usage (r1 : regRI ar, [r2])
- SLL r1 ar r2 -> usage (r1 : regRI ar, [r2])
- SRL r1 ar r2 -> usage (r1 : regRI ar, [r2])
- SRA r1 ar r2 -> usage (r1 : regRI ar, [r2])
- SETHI _ reg -> usage ([], [reg])
- FABS _ r1 r2 -> usage ([r1], [r2])
- FADD _ r1 r2 r3 -> usage ([r1, r2], [r3])
- FCMP _ _ r1 r2 -> usage ([r1, r2], [])
- FDIV _ r1 r2 r3 -> usage ([r1, r2], [r3])
- FMOV _ r1 r2 -> usage ([r1], [r2])
- FMUL _ r1 r2 r3 -> usage ([r1, r2], [r3])
- FNEG _ r1 r2 -> usage ([r1], [r2])
- FSQRT _ r1 r2 -> usage ([r1], [r2])
- FSUB _ r1 r2 r3 -> usage ([r1, r2], [r3])
- FxTOy _ _ r1 r2 -> usage ([r1], [r2])
-
- JMP addr -> usage (regAddr addr, [])
- JMP_TBL addr _ -> usage (regAddr addr, [])
-
- CALL (Left _ ) _ True -> noUsage
- CALL (Left _ ) n False -> usage (argRegs n, callClobberedRegs)
- CALL (Right reg) _ True -> usage ([reg], [])
- CALL (Right reg) n False -> usage (reg : (argRegs n), callClobberedRegs)
- _ -> noUsage
-
- where
- usage (src, dst)
- = RU (filter interesting src) (filter interesting dst)
-
- regAddr (AddrRegReg r1 r2) = [r1, r2]
- regAddr (AddrRegImm r1 _) = [r1]
+import Unique
- regRI (RIReg r) = [r]
- regRI _ = []
+-- | Make a virtual reg with this size.
+mkVReg :: Unique -> Size -> Reg
+mkVReg u size
+ | not (isFloatSize size)
+ = VirtualRegI u
--- | Interesting regs are virtuals, or ones that are allocatable
--- by the register allocator.
-interesting :: Reg -> Bool
-interesting reg
- = case reg of
- VirtualRegI _ -> True
- VirtualRegHi _ -> True
- VirtualRegF _ -> True
- VirtualRegD _ -> True
- RealReg i -> isFastTrue (freeReg i)
-
-
-
--- | Apply a given mapping to tall the register references in this instruction.
-
-patchRegs :: Instr -> (Reg -> Reg) -> Instr
-patchRegs instr env = case instr of
- SPILL reg slot -> SPILL (env reg) slot
- RELOAD slot reg -> RELOAD slot (env reg)
-
- LD sz addr reg -> LD sz (fixAddr addr) (env reg)
- ST sz reg addr -> ST sz (env reg) (fixAddr addr)
-
- ADD x cc r1 ar r2 -> ADD x cc (env r1) (fixRI ar) (env r2)
- SUB x cc r1 ar r2 -> SUB x cc (env r1) (fixRI ar) (env r2)
- UMUL cc r1 ar r2 -> UMUL cc (env r1) (fixRI ar) (env r2)
- SMUL cc r1 ar r2 -> SMUL cc (env r1) (fixRI ar) (env r2)
- UDIV cc r1 ar r2 -> UDIV cc (env r1) (fixRI ar) (env r2)
- SDIV cc r1 ar r2 -> SDIV cc (env r1) (fixRI ar) (env r2)
- RDY rd -> RDY (env rd)
- WRY r1 r2 -> WRY (env r1) (env r2)
- AND b r1 ar r2 -> AND b (env r1) (fixRI ar) (env r2)
- ANDN b r1 ar r2 -> ANDN b (env r1) (fixRI ar) (env r2)
- OR b r1 ar r2 -> OR b (env r1) (fixRI ar) (env r2)
- ORN b r1 ar r2 -> ORN b (env r1) (fixRI ar) (env r2)
- XOR b r1 ar r2 -> XOR b (env r1) (fixRI ar) (env r2)
- XNOR b r1 ar r2 -> XNOR b (env r1) (fixRI ar) (env r2)
- SLL r1 ar r2 -> SLL (env r1) (fixRI ar) (env r2)
- SRL r1 ar r2 -> SRL (env r1) (fixRI ar) (env r2)
- SRA r1 ar r2 -> SRA (env r1) (fixRI ar) (env r2)
-
- SETHI imm reg -> SETHI imm (env reg)
-
- FABS s r1 r2 -> FABS s (env r1) (env r2)
- FADD s r1 r2 r3 -> FADD s (env r1) (env r2) (env r3)
- FCMP e s r1 r2 -> FCMP e s (env r1) (env r2)
- FDIV s r1 r2 r3 -> FDIV s (env r1) (env r2) (env r3)
- FMOV s r1 r2 -> FMOV s (env r1) (env r2)
- FMUL s r1 r2 r3 -> FMUL s (env r1) (env r2) (env r3)
- FNEG s r1 r2 -> FNEG s (env r1) (env r2)
- FSQRT s r1 r2 -> FSQRT s (env r1) (env r2)
- FSUB s r1 r2 r3 -> FSUB s (env r1) (env r2) (env r3)
- FxTOy s1 s2 r1 r2 -> FxTOy s1 s2 (env r1) (env r2)
-
- JMP addr -> JMP (fixAddr addr)
- JMP_TBL addr ids -> JMP_TBL (fixAddr addr) ids
-
- CALL (Left i) n t -> CALL (Left i) n t
- CALL (Right r) n t -> CALL (Right (env r)) n t
- _ -> instr
-
- where
- fixAddr (AddrRegReg r1 r2) = AddrRegReg (env r1) (env r2)
- fixAddr (AddrRegImm r1 i) = AddrRegImm (env r1) i
-
- fixRI (RIReg r) = RIReg (env r)
- fixRI other = other
-
-
--- -----------------------------------------------------------------------------
--- Determine the possible destinations from the current instruction.
-
--- (we always assume that the next instruction is also a valid destination;
--- if this isn't the case then the jump should be at the end of the basic
--- block).
-
-jumpDests :: Instr -> [BlockId] -> [BlockId]
-jumpDests insn acc
- = case insn of
- BI _ _ id -> id : acc
- BF _ _ id -> id : acc
- JMP_TBL _ ids -> ids ++ acc
- _other -> acc
+ | otherwise
+ = case size of
+ FF32 -> VirtualRegF u
+ FF64 -> VirtualRegD u
+ _ -> panic "mkVReg"
--- | Check whether a particular instruction is a jump, branch or call instruction (jumpish)
--- We can't just use jumpDests above because the jump might take its arg,
--- so the instr won't contain a blockid.
+-- | Check if a RI represents a zero value.
+-- - a literal zero
+-- - register %g0, which is always zero.
--
-isJumpish :: Instr -> Bool
-isJumpish instr
- = case instr of
- BI{} -> True
- BF{} -> True
- JMP{} -> True
- JMP_TBL{} -> True
- CALL{} -> True
- _ -> False
-
-
--- | Change the destination of this jump instruction
--- Used in joinToTargets in the linear allocator, when emitting fixup code
--- for join points.
-patchJump :: Instr -> BlockId -> BlockId -> Instr
-patchJump insn old new
- = case insn of
- BI cc annul id
- | id == old -> BI cc annul new
-
- BF cc annul id
- | id == old -> BF cc annul new
-
- _other -> insn
-
+riZero :: RI -> Bool
+riZero (RIImm (ImmInt 0)) = True
+riZero (RIImm (ImmInteger 0)) = True
+riZero (RIReg (RealReg 0)) = True
+riZero _ = False
+
+
+-- | Calculate the effective address which would be used by the
+-- corresponding fpRel sequence. fpRel is in MachRegs.lhs,
+-- alas -- can't have fpRelEA here because of module dependencies.
+fpRelEA :: Int -> Reg -> Instr
+fpRelEA n dst
+ = ADD False False fp (RIImm (ImmInt (n * wORD_SIZE))) dst
+
+
+-- | Code to shift the stack pointer by n words.
+moveSp :: Int -> Instr
+moveSp n
+ = ADD False False sp (RIImm (ImmInt (n * wORD_SIZE))) sp
+
+
+-- | Produce the second-half-of-a-double register given the first half.
+fPair :: Reg -> Maybe Reg
+fPair (RealReg n)
+ | n >= 32 && n `mod` 2 == 0 = Just (RealReg (n+1))
+
+fPair (VirtualRegD u)
+ = Just (VirtualRegHi u)
+
+fPair _
+ = trace ("MachInstrs.fPair: can't get high half of supposed double reg ")
+ Nothing
+
+-- Here because it knows about JumpDest
+shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
+shortcutStatic fn (CmmStaticLit (CmmLabel lab))
+ | Just uq <- maybeAsmTemp lab
+ = CmmStaticLit (CmmLabel (shortBlockId fn (BlockId uq)))
+shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off))
+ | Just uq <- maybeAsmTemp lbl1
+ = CmmStaticLit (CmmLabelDiffOff (shortBlockId fn (BlockId uq)) lbl2 off)
+ -- slightly dodgy, we're ignoring the second label, but this
+ -- works with the way we use CmmLabelDiffOff for jump tables now.
+shortcutStatic _ other_static
+ = other_static
+
+shortBlockId :: (BlockId -> Maybe JumpDest) -> BlockId -> CLabel
+shortBlockId fn blockid@(BlockId uq) =
+ case fn blockid of
+ Nothing -> mkAsmTempLabel uq
+ Just (DestBlockId blockid') -> shortBlockId fn blockid'
+ Just (DestImm (ImmCLbl lbl)) -> lbl
+ _other -> panic "shortBlockId"
+
+
+regDotColor :: Reg -> SDoc
+regDotColor reg
+ = case regClass reg of
+ RcInteger -> text "blue"
+ RcFloat -> text "red"
+ RcDouble -> text "green"
@@ -253,108 +127,3 @@ canShortcut _ = Nothing
shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
shortcutJump _ other = other
-
-
-
--- | Make a spill instruction.
--- On SPARC we spill below frame pointer leaving 2 words/spill
-mkSpillInstr
- :: Reg -- ^ register to spill
- -> Int -- ^ current stack delta
- -> Int -- ^ spill slot to use
- -> Instr
-
-mkSpillInstr reg _ slot
- = let off = spillSlotToOffset slot
- off_w = 1 + (off `div` 4)
- sz = case regClass reg of
- RcInteger -> II32
- RcFloat -> FF32
- RcDouble -> FF64
-
- in ST sz reg (fpRel (negate off_w))
-
-
--- | Make a spill reload instruction.
-mkLoadInstr
- :: Reg -- ^ register to load
- -> Int -- ^ current stack delta
- -> Int -- ^ spill slot to use
- -> Instr
-
-mkLoadInstr reg _ slot
- = let off = spillSlotToOffset slot
- off_w = 1 + (off `div` 4)
- sz = case regClass reg of
- RcInteger -> II32
- RcFloat -> FF32
- RcDouble -> FF64
-
- in LD sz (fpRel (- off_w)) reg
-
-
--- | Make a reg-reg move instruction.
--- On SPARC v8 there are no instructions to move directly between
--- floating point and integer regs. If we need to do that then we
--- have to go via memory.
---
-mkRegRegMoveInstr
- :: Reg
- -> Reg
- -> Instr
-
-mkRegRegMoveInstr src dst
- = case regClass src of
- RcInteger -> ADD False False src (RIReg g0) dst
- RcDouble -> FMOV FF64 src dst
- RcFloat -> FMOV FF32 src dst
-
-
--- | Check whether an instruction represents a reg-reg move.
--- The register allocator attempts to eliminate reg->reg moves whenever it can,
--- by assigning the src and dest temporaries to the same real register.
---
-isRegRegMove :: Instr -> Maybe (Reg,Reg)
-isRegRegMove instr
- = case instr of
- ADD False False src (RIReg src2) dst
- | g0 == src2 -> Just (src, dst)
-
- FMOV FF64 src dst -> Just (src, dst)
- FMOV FF32 src dst -> Just (src, dst)
- _ -> Nothing
-
-
--- | Make an unconditional branch instruction.
-mkBranchInstr
- :: BlockId
- -> [Instr]
-
-mkBranchInstr id
- = [BI ALWAYS False id
- , NOP] -- fill the branch delay slot.
-
-
--- | TODO: Why do we need 8 bytes per slot?? -BL 2009/02
-spillSlotSize :: Int
-spillSlotSize = 8
-
-
--- | The maximum number of spill slots available on the C stack.
--- If we use up all of the slots, then we're screwed.
-maxSpillSlots :: Int
-maxSpillSlots = ((rESERVED_C_STACK_BYTES - 64) `div` spillSlotSize) - 1
-
-
--- | Convert a spill slot number to a *byte* offset, with no sign.
---
-spillSlotToOffset :: Int -> Int
-spillSlotToOffset slot
- | slot >= 0 && slot < maxSpillSlots
- = 64 + spillSlotSize * slot
-
- | otherwise
- = pprPanic "spillSlotToOffset:"
- ( text "invalid spill location: " <> int slot
- $$ text "maxSpillSlots: " <> int maxSpillSlots)
-
diff --git a/compiler/nativeGen/SPARC/Regs.hs b/compiler/nativeGen/SPARC/Regs.hs
index 987fc2da14..1fb6a01b87 100644
--- a/compiler/nativeGen/SPARC/Regs.hs
+++ b/compiler/nativeGen/SPARC/Regs.hs
@@ -5,17 +5,6 @@
-- -----------------------------------------------------------------------------
module SPARC.Regs (
-
- -- sizes
- Size(..),
- intSize,
- floatSize,
- isFloatSize,
- wordSize,
- cmmTypeSize,
- sizeToWidth,
- mkVReg,
-
-- immediate values
Imm(..),
strImmLit,
@@ -39,113 +28,33 @@ module SPARC.Regs (
fits13Bits,
largeOffsetError,
gReg, iReg, lReg, oReg, fReg,
- fp, sp, g0, g1, g2, o0, o1, f0, f6, f8, f26, f27,
+ fp, sp, g0, g1, g2, o0, o1, f0, f6, f8, f22, f26, f27,
nCG_FirstFloatReg,
- -- horror show
+ -- allocatable
freeReg,
- globalRegMaybe
+ allocatableRegs,
+ globalRegMaybe,
+
+ get_GlobalReg_reg_or_addr
)
where
-#include "nativeGen/NCG.h"
-#include "HsVersions.h"
-#include "../includes/MachRegs.h"
-import RegsBase
+import Reg
+import RegClass
+import CgUtils ( get_GlobalReg_addr )
import BlockId
import Cmm
import CLabel ( CLabel )
import Pretty
-import Outputable ( Outputable(..), pprPanic, panic )
+import Outputable ( panic )
import qualified Outputable
-import Unique
import Constants
import FastBool
--- sizes -----------------------------------------------------------------------
-
--- | A 'Size' also includes format information, such as whether
--- the word is signed or unsigned.
---
-data Size
- = II8 -- byte (signed)
- | II16 -- halfword (signed, 2 bytes)
- | II32 -- word (4 bytes)
- | II64 -- word (8 bytes)
- | FF32 -- IEEE single-precision floating pt
- | FF64 -- IEEE single-precision floating pt
- deriving Eq
-
-
--- | Get the integer size of this width.
-intSize :: Width -> Size
-intSize width
- = case width of
- W8 -> II8
- W16 -> II16
- W32 -> II32
- W64 -> II64
- other -> pprPanic "SPARC.Regs.intSize" (ppr other)
-
-
--- | Get the float size of this width.
-floatSize :: Width -> Size
-floatSize width
- = case width of
- W32 -> FF32
- W64 -> FF64
- other -> pprPanic "SPARC.Regs.intSize" (ppr other)
-
-
--- | Check if a size represents a floating point value.
-isFloatSize :: Size -> Bool
-isFloatSize size
- = case size of
- FF32 -> True
- FF64 -> True
- _ -> False
-
-
--- | Size of a machine word.
--- This is big enough to hold a pointer.
-wordSize :: Size
-wordSize = intSize wordWidth
-
-
--- | Convert a Cmm type to a Size.
-cmmTypeSize :: CmmType -> Size
-cmmTypeSize ty
- | isFloatType ty = floatSize (typeWidth ty)
- | otherwise = intSize (typeWidth ty)
-
-
--- | Get the Width of a Size.
-sizeToWidth :: Size -> Width
-sizeToWidth size
- = case size of
- II8 -> W8
- II16 -> W16
- II32 -> W32
- II64 -> W64
- FF32 -> W32
- FF64 -> W64
-
-
--- | Make a virtual reg with this size.
-mkVReg :: Unique -> Size -> Reg
-mkVReg u size
- | not (isFloatSize size)
- = VirtualRegI u
-
- | otherwise
- = case size of
- FF32 -> VirtualRegF u
- FF64 -> VirtualRegD u
- _ -> panic "mkVReg"
-
-- immediates ------------------------------------------------------------------
@@ -390,48 +299,13 @@ o1 = RealReg (oReg 1)
f0 = RealReg (fReg 0)
+-- | We use he first few float regs as double precision.
+-- This is the RegNo of the first float regs we use as single precision.
+--
nCG_FirstFloatReg :: RegNo
-nCG_FirstFloatReg = unRealReg NCG_FirstFloatReg
-#else
-nCG_FirstFloatReg :: RegNo
-nCG_FirstFloatReg = unRealReg f22
-#endif
-
-
--- horror show -----------------------------------------------------------------
-#if sparc_TARGET_ARCH
-#define g0 0
-#define g1 1
-#define g2 2
-#define g3 3
-#define g4 4
-#define g5 5
-#define g6 6
-#define g7 7
-#define o0 8
-#define o1 9
-#define o2 10
-#define o3 11
-#define o4 12
-#define o5 13
-#define o6 14
-#define o7 15
-#define l0 16
-#define l1 17
-#define l2 18
-#define l3 19
-#define l4 20
-#define l5 21
-#define l6 22
-#define l7 23
-#define i0 24
-#define i1 25
-#define i2 26
-#define i3 27
-#define i4 28
-#define i5 29
-#define i6 30
-#define i7 31
+nCG_FirstFloatReg = 54
+
+
-- | Check whether a machine register is free for allocation.
-- This needs to match the info in includes/MachRegs.h otherwise modules
@@ -445,7 +319,11 @@ freeReg regno
-- %g1(r1) - %g4(r4) are allocable -----------------
-freeReg :: RegNo -> FastBool
+ -- %g5(r5) - %g7(r7)
+ -- are reserved for the OS
+ 5 -> fastBool False
+ 6 -> fastBool False
+ 7 -> fastBool False
-- %o0(r8) - %o5(r13) are allocable ----------------
@@ -507,7 +385,15 @@ freeReg :: RegNo -> FastBool
-- regs not matched above are allocable.
_ -> fastBool True
-
+
+
+-- allocatableRegs is allMachRegNos with the fixed-use regs removed.
+-- i.e., these are the regs for which we are prepared to allow the
+-- register allocator to attempt to map VRegs to.
+allocatableRegs :: [RegNo]
+allocatableRegs
+ = let isFree i = isFastTrue (freeReg i)
+ in filter isFree allMachRegNos
-- | Returns Just the real register that a global register is stored in.
@@ -539,15 +425,20 @@ globalRegMaybe gg
Hp -> Just (RealReg 27) -- %i3
HpLim -> Just (RealReg 28) -- %i4
-globalRegMaybe :: GlobalReg -> Maybe Reg
-
-
-
-#else
-
-freeReg _ = 0#
-globalRegMaybe = panic "SPARC.Regs.globalRegMaybe: not defined"
+ BaseReg -> Just (RealReg 25) -- %i1
+
+ _ -> Nothing
-#endif
+-- We map STG registers onto appropriate CmmExprs. Either they map
+-- to real machine registers or stored as offsets from BaseReg. Given
+-- a GlobalReg, get_GlobalReg_reg_or_addr produces either the real
+-- register it is in, on this platform, or a CmmExpr denoting the
+-- address in the register table holding it.
+-- (See also get_GlobalReg_addr in CgUtils.)
+get_GlobalReg_reg_or_addr :: GlobalReg -> Either Reg CmmExpr
+get_GlobalReg_reg_or_addr mid
+ = case globalRegMaybe mid of
+ Just rr -> Left rr
+ Nothing -> Right (get_GlobalReg_addr mid)
diff --git a/compiler/nativeGen/Size.hs b/compiler/nativeGen/Size.hs
new file mode 100644
index 0000000000..3be5430e82
--- /dev/null
+++ b/compiler/nativeGen/Size.hs
@@ -0,0 +1,103 @@
+-- | Sizes on this architecture
+-- A Size is a combination of width and class
+--
+-- TODO: Rename this to "Format" instead of "Size" to reflect
+-- the fact that it represents floating point vs integer.
+--
+-- TODO: Signed vs unsigned?
+--
+-- TODO: This module is currenly shared by all architectures because
+-- NCGMonad need to know about it to make a VReg. It would be better
+-- to have architecture specific formats, and do the overloading
+-- properly. eg SPARC doesn't care about FF80.
+--
+module Size (
+ Size(..),
+ intSize,
+ floatSize,
+ isFloatSize,
+ cmmTypeSize,
+ sizeToWidth
+)
+
+where
+
+import Cmm
+import Outputable
+
+-- It looks very like the old MachRep, but it's now of purely local
+-- significance, here in the native code generator. You can change it
+-- without global consequences.
+--
+-- A major use is as an opcode qualifier; thus the opcode
+-- mov.l a b
+-- might be encoded
+-- MOV II32 a b
+-- where the Size field encodes the ".l" part.
+
+-- ToDo: it's not clear to me that we need separate signed-vs-unsigned sizes
+-- here. I've removed them from the x86 version, we'll see what happens --SDM
+
+-- ToDo: quite a few occurrences of Size could usefully be replaced by Width
+
+data Size
+ = II8
+ | II16
+ | II32
+ | II64
+ | FF32
+ | FF64
+ | FF80
+ deriving (Show, Eq)
+
+
+-- | Get the integer size of this width.
+intSize :: Width -> Size
+intSize width
+ = case width of
+ W8 -> II8
+ W16 -> II16
+ W32 -> II32
+ W64 -> II64
+ other -> pprPanic "Size.intSize" (ppr other)
+
+
+-- | Get the float size of this width.
+floatSize :: Width -> Size
+floatSize width
+ = case width of
+ W32 -> FF32
+ W64 -> FF64
+ other -> pprPanic "Size.floatSize" (ppr other)
+
+
+-- | Check if a size represents a floating point value.
+isFloatSize :: Size -> Bool
+isFloatSize size
+ = case size of
+ FF32 -> True
+ FF64 -> True
+ FF80 -> True
+ _ -> False
+
+
+-- | Convert a Cmm type to a Size.
+cmmTypeSize :: CmmType -> Size
+cmmTypeSize ty
+ | isFloatType ty = floatSize (typeWidth ty)
+ | otherwise = intSize (typeWidth ty)
+
+
+-- | Get the Width of a Size.
+sizeToWidth :: Size -> Width
+sizeToWidth size
+ = case size of
+ II8 -> W8
+ II16 -> W16
+ II32 -> W32
+ II64 -> W64
+ FF32 -> W32
+ FF64 -> W64
+ FF80 -> W80
+
+
diff --git a/compiler/nativeGen/TargetReg.hs b/compiler/nativeGen/TargetReg.hs
new file mode 100644
index 0000000000..2643b00d87
--- /dev/null
+++ b/compiler/nativeGen/TargetReg.hs
@@ -0,0 +1,101 @@
+
+-- | Hard wired things related to registers.
+-- This is module is preventing the native code generator being able to
+-- emit code for non-host architectures.
+--
+-- TODO: Do a better job of the overloading, and eliminate this module.
+-- We'd probably do better with a Register type class, and hook this to
+-- Instruction somehow.
+--
+-- TODO: We should also make arch specific versions of RegAlloc.Graph.TrivColorable
+
+module TargetReg (
+ targetRegClass,
+ targetMkVReg,
+ targetWordSize,
+ targetRegDotColor
+)
+
+where
+
+#include "HsVersions.h"
+
+import Reg
+import RegClass
+import Size
+
+import CmmExpr (wordWidth)
+import Outputable
+import Unique
+
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+import qualified X86.Regs as X86
+import qualified X86.RegInfo as X86
+
+#elif powerpc_TARGET_ARCH
+import qualified PPC.Regs as PPC
+import qualified PPC.RegInfo as PPC
+
+#elif sparc_TARGET_ARCH
+import qualified SPARC.Regs as SPARC
+import qualified SPARC.RegInfo as SPARC
+
+
+#else
+#error "RegAlloc.Graph.TargetReg: not defined"
+#endif
+
+-- x86 -------------------------------------------------------------------------
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+targetRegClass :: Reg -> RegClass
+targetRegClass = X86.regClass
+
+targetWordSize :: Size
+targetWordSize = intSize wordWidth
+
+targetMkVReg :: Unique -> Size -> Reg
+targetMkVReg = X86.mkVReg
+
+targetRegDotColor :: Reg -> SDoc
+targetRegDotColor = X86.regDotColor
+
+
+-- ppc -------------------------------------------------------------------------
+#elif powerpc_TARGET_ARCH
+targetRegClass :: Reg -> RegClass
+targetRegClass = PPC.regClass
+
+targetWordSize :: Size
+targetWordSize = intSize wordWidth
+
+targetMkVReg :: Unique -> Size -> Reg
+targetMkVReg = PPC.mkVReg
+
+targetRegDotColor :: Reg -> SDoc
+targetRegDotColor = PPC.regDotColor
+
+
+-- sparc -----------------------------------------------------------------------
+#elif sparc_TARGET_ARCH
+targetRegClass :: Reg -> RegClass
+targetRegClass = SPARC.regClass
+
+-- | Size of a machine word.
+-- This is big enough to hold a pointer.
+targetWordSize :: Size
+targetWordSize = intSize wordWidth
+
+targetMkVReg :: Unique -> Size -> Reg
+targetMkVReg = SPARC.mkVReg
+
+targetRegDotColor :: Reg -> SDoc
+targetRegDotColor = SPARC.regDotColor
+
+--------------------------------------------------------------------------------
+#else
+#error "RegAlloc.Graph.TargetReg: not defined"
+#endif
+
+
+
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
new file mode 100644
index 0000000000..43495a45a5
--- /dev/null
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -0,0 +1,2313 @@
+{-# OPTIONS -w #-}
+-- The above warning supression flag is a temporary kludge.
+-- While working on this module you are encouraged to remove it and fix
+-- any warnings in the module. See
+-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
+-- for details
+
+-----------------------------------------------------------------------------
+--
+-- Generating machine code (instruction selection)
+--
+-- (c) The University of Glasgow 1996-2004
+--
+-----------------------------------------------------------------------------
+
+-- This is a big module, but, if you pay attention to
+-- (a) the sectioning, (b) the type signatures, and
+-- (c) the #if blah_TARGET_ARCH} things, the
+-- structure should not be too overwhelming.
+
+module X86.CodeGen (
+ cmmTopCodeGen,
+ InstrBlock
+)
+
+where
+
+#include "HsVersions.h"
+#include "nativeGen/NCG.h"
+#include "MachDeps.h"
+
+-- NCG stuff:
+import X86.Instr
+import X86.Cond
+import X86.Regs
+import X86.RegInfo
+import X86.Ppr
+import Instruction
+import PIC
+import NCGMonad
+import Size
+import Reg
+import RegClass
+import Platform
+
+-- Our intermediate code:
+import BasicTypes
+import BlockId
+import PprCmm ( pprExpr )
+import Cmm
+import CLabel
+import ClosureInfo ( C_SRT(..) )
+
+-- The rest:
+import StaticFlags ( opt_PIC )
+import ForeignCall ( CCallConv(..) )
+import OrdList
+import Pretty
+import qualified Outputable as O
+import Outputable
+import FastString
+import FastBool ( isFastTrue )
+import Constants ( wORD_SIZE )
+import DynFlags
+
+import Debug.Trace ( trace )
+
+import Control.Monad ( mapAndUnzipM )
+import Data.Maybe ( fromJust )
+import Data.Bits
+import Data.Word
+import Data.Int
+
+
+cmmTopCodeGen
+ :: DynFlags
+ -> RawCmmTop
+ -> NatM [NatCmmTop Instr]
+
+cmmTopCodeGen dynflags
+ (CmmProc info lab params (ListGraph blocks)) = do
+ (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
+ picBaseMb <- getPicBaseMaybeNat
+ let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
+ tops = proc : concat statics
+ os = platformOS $ targetPlatform dynflags
+
+ case picBaseMb of
+ Just picBase -> initializePicBase_x86 ArchX86 os picBase tops
+ Nothing -> return tops
+
+cmmTopCodeGen _ (CmmData sec dat) = do
+ return [CmmData sec dat] -- no translation, we just use CmmStatic
+
+
+basicBlockCodeGen
+ :: CmmBasicBlock
+ -> NatM ( [NatBasicBlock Instr]
+ , [NatCmmTop Instr])
+
+basicBlockCodeGen (BasicBlock id stmts) = do
+ instrs <- stmtsToInstrs stmts
+ -- code generation may introduce new basic block boundaries, which
+ -- are indicated by the NEWBLOCK instruction. We must split up the
+ -- instruction stream into basic blocks again. Also, we extract
+ -- LDATAs here too.
+ let
+ (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
+
+ mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
+ = ([], BasicBlock id instrs : blocks, statics)
+ mkBlocks (LDATA sec dat) (instrs,blocks,statics)
+ = (instrs, blocks, CmmData sec dat:statics)
+ mkBlocks instr (instrs,blocks,statics)
+ = (instr:instrs, blocks, statics)
+ -- in
+ return (BasicBlock id top : other_blocks, statics)
+
+
+stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
+stmtsToInstrs stmts
+ = do instrss <- mapM stmtToInstrs stmts
+ return (concatOL instrss)
+
+
+stmtToInstrs :: CmmStmt -> NatM InstrBlock
+stmtToInstrs stmt = case stmt of
+ CmmNop -> return nilOL
+ CmmComment s -> return (unitOL (COMMENT s))
+
+ CmmAssign reg src
+ | isFloatType ty -> assignReg_FltCode size reg src
+#if WORD_SIZE_IN_BITS==32
+ | isWord64 ty -> assignReg_I64Code reg src
+#endif
+ | otherwise -> assignReg_IntCode size reg src
+ where ty = cmmRegType reg
+ size = cmmTypeSize ty
+
+ CmmStore addr src
+ | isFloatType ty -> assignMem_FltCode size addr src
+#if WORD_SIZE_IN_BITS==32
+ | isWord64 ty -> assignMem_I64Code addr src
+#endif
+ | otherwise -> assignMem_IntCode size addr src
+ where ty = cmmExprType src
+ size = cmmTypeSize ty
+
+ CmmCall target result_regs args _ _
+ -> genCCall target result_regs args
+
+ CmmBranch id -> genBranch id
+ CmmCondBranch arg id -> genCondJump id arg
+ CmmSwitch arg ids -> genSwitch arg ids
+ CmmJump arg params -> genJump arg
+ CmmReturn params ->
+ panic "stmtToInstrs: return statement should have been cps'd away"
+
+
+--------------------------------------------------------------------------------
+-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
+-- They are really trees of insns to facilitate fast appending, where a
+-- left-to-right traversal yields the insns in the correct order.
+--
+type InstrBlock
+ = OrdList Instr
+
+
+-- | Condition codes passed up the tree.
+--
+data CondCode
+ = CondCode Bool Cond InstrBlock
+
+
+-- | a.k.a "Register64"
+-- Reg is the lower 32-bit temporary which contains the result.
+-- Use getHiVRegFromLo to find the other VRegUnique.
+--
+-- Rules of this simplified insn selection game are therefore that
+-- the returned Reg may be modified
+--
+data ChildCode64
+ = ChildCode64
+ InstrBlock
+ Reg
+
+
+-- | Register's passed up the tree. If the stix code forces the register
+-- to live in a pre-decided machine register, it comes out as @Fixed@;
+-- otherwise, it comes out as @Any@, and the parent can decide which
+-- register to put it in.
+--
+data Register
+ = Fixed Size Reg InstrBlock
+ | Any Size (Reg -> InstrBlock)
+
+
+swizzleRegisterRep :: Register -> Size -> Register
+swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
+swizzleRegisterRep (Any _ codefn) size = Any size codefn
+
+
+-- | Grab the Reg for a CmmReg
+getRegisterReg :: CmmReg -> Reg
+
+getRegisterReg (CmmLocal (LocalReg u pk))
+ = mkVReg u (cmmTypeSize pk)
+
+getRegisterReg (CmmGlobal mid)
+ = case get_GlobalReg_reg_or_addr mid of
+ Left (RealReg rrno) -> RealReg rrno
+ _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
+ -- By this stage, the only MagicIds remaining should be the
+ -- ones which map to a real machine register on this
+ -- platform. Hence ...
+
+
+-- | Memory addressing modes passed up the tree.
+data Amode
+ = Amode AddrMode InstrBlock
+
+{-
+Now, given a tree (the argument to an CmmLoad) that references memory,
+produce a suitable addressing mode.
+
+A Rule of the Game (tm) for Amodes: use of the addr bit must
+immediately follow use of the code part, since the code part puts
+values in registers which the addr then refers to. So you can't put
+anything in between, lest it overwrite some of those registers. If
+you need to do some other computation between the code part and use of
+the addr bit, first store the effective address from the amode in a
+temporary, then do the other computation, and then use the temporary:
+
+ code
+ LEA amode, tmp
+ ... other computation ...
+ ... (tmp) ...
+-}
+
+
+-- | Check whether an integer will fit in 32 bits.
+-- A CmmInt is intended to be truncated to the appropriate
+-- number of bits, so here we truncate it to Int64. This is
+-- important because e.g. -1 as a CmmInt might be either
+-- -1 or 18446744073709551615.
+--
+is32BitInteger :: Integer -> Bool
+is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
+ where i64 = fromIntegral i :: Int64
+
+
+-- | Convert a BlockId to some CmmStatic data
+jumpTableEntry :: Maybe BlockId -> CmmStatic
+jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
+jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
+ where blockLabel = mkAsmTempLabel id
+
+
+-- -----------------------------------------------------------------------------
+-- General things for putting together code sequences
+
+-- Expand CmmRegOff. ToDo: should we do it this way around, or convert
+-- CmmExprs into CmmRegOff?
+mangleIndexTree :: CmmExpr -> CmmExpr
+mangleIndexTree (CmmRegOff reg off)
+ = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
+ where width = typeWidth (cmmRegType reg)
+
+-- | The dual to getAnyReg: compute an expression into a register, but
+-- we don't mind which one it is.
+getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
+getSomeReg expr = do
+ r <- getRegister expr
+ case r of
+ Any rep code -> do
+ tmp <- getNewRegNat rep
+ return (tmp, code tmp)
+ Fixed _ reg code ->
+ return (reg, code)
+
+
+
+
+
+assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
+assignMem_I64Code addrTree valueTree = do
+ Amode addr addr_code <- getAmode addrTree
+ ChildCode64 vcode rlo <- iselExpr64 valueTree
+ let
+ rhi = getHiVRegFromLo rlo
+
+ -- Little-endian store
+ mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
+ mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
+ -- in
+ return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
+
+
+assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
+assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
+ ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
+ let
+ r_dst_lo = mkVReg u_dst II32
+ r_dst_hi = getHiVRegFromLo r_dst_lo
+ r_src_hi = getHiVRegFromLo r_src_lo
+ mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
+ mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
+ -- in
+ return (
+ vcode `snocOL` mov_lo `snocOL` mov_hi
+ )
+
+assignReg_I64Code lvalue valueTree
+ = panic "assignReg_I64Code(i386): invalid lvalue"
+
+
+
+
+iselExpr64 :: CmmExpr -> NatM ChildCode64
+iselExpr64 (CmmLit (CmmInt i _)) = do
+ (rlo,rhi) <- getNewRegPairNat II32
+ let
+ r = fromIntegral (fromIntegral i :: Word32)
+ q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
+ code = toOL [
+ MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
+ MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
+ ]
+ -- in
+ return (ChildCode64 code rlo)
+
+iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
+ Amode addr addr_code <- getAmode addrTree
+ (rlo,rhi) <- getNewRegPairNat II32
+ let
+ mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
+ mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
+ -- in
+ return (
+ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
+ rlo
+ )
+
+iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
+ = return (ChildCode64 nilOL (mkVReg vu II32))
+
+-- we handle addition, but rather badly
+iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
+ ChildCode64 code1 r1lo <- iselExpr64 e1
+ (rlo,rhi) <- getNewRegPairNat II32
+ let
+ r = fromIntegral (fromIntegral i :: Word32)
+ q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
+ r1hi = getHiVRegFromLo r1lo
+ code = code1 `appOL`
+ toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
+ ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
+ MOV II32 (OpReg r1hi) (OpReg rhi),
+ ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
+ -- in
+ return (ChildCode64 code rlo)
+
+iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
+ ChildCode64 code1 r1lo <- iselExpr64 e1
+ ChildCode64 code2 r2lo <- iselExpr64 e2
+ (rlo,rhi) <- getNewRegPairNat II32
+ let
+ r1hi = getHiVRegFromLo r1lo
+ r2hi = getHiVRegFromLo r2lo
+ code = code1 `appOL`
+ code2 `appOL`
+ toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
+ ADD II32 (OpReg r2lo) (OpReg rlo),
+ MOV II32 (OpReg r1hi) (OpReg rhi),
+ ADC II32 (OpReg r2hi) (OpReg rhi) ]
+ -- in
+ return (ChildCode64 code rlo)
+
+iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
+ fn <- getAnyReg expr
+ r_dst_lo <- getNewRegNat II32
+ let r_dst_hi = getHiVRegFromLo r_dst_lo
+ code = fn r_dst_lo
+ return (
+ ChildCode64 (code `snocOL`
+ MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
+ r_dst_lo
+ )
+
+iselExpr64 expr
+ = pprPanic "iselExpr64(i386)" (ppr expr)
+
+
+
+--------------------------------------------------------------------------------
+getRegister :: CmmExpr -> NatM Register
+
+#if !x86_64_TARGET_ARCH
+ -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
+ -- register, it can only be used for rip-relative addressing.
+getRegister (CmmReg (CmmGlobal PicBaseReg))
+ = do
+ reg <- getPicBaseNat archWordSize
+ return (Fixed archWordSize reg nilOL)
+#endif
+
+getRegister (CmmReg reg)
+ = return (Fixed (cmmTypeSize (cmmRegType reg))
+ (getRegisterReg reg) nilOL)
+
+getRegister tree@(CmmRegOff _ _)
+ = getRegister (mangleIndexTree tree)
+
+
+#if WORD_SIZE_IN_BITS==32
+ -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
+ -- TO_W_(x), TO_W_(x >> 32)
+
+getRegister (CmmMachOp (MO_UU_Conv W64 W32)
+ [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
+ ChildCode64 code rlo <- iselExpr64 x
+ return $ Fixed II32 (getHiVRegFromLo rlo) code
+
+getRegister (CmmMachOp (MO_SS_Conv W64 W32)
+ [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
+ ChildCode64 code rlo <- iselExpr64 x
+ return $ Fixed II32 (getHiVRegFromLo rlo) code
+
+getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
+ ChildCode64 code rlo <- iselExpr64 x
+ return $ Fixed II32 rlo code
+
+getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
+ ChildCode64 code rlo <- iselExpr64 x
+ return $ Fixed II32 rlo code
+
+#endif
+
+
+
+
+#if i386_TARGET_ARCH
+
+getRegister (CmmLit (CmmFloat f W32)) = do
+ lbl <- getNewLabelNat
+ dflags <- getDynFlagsNat
+ dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
+ Amode addr addr_code <- getAmode dynRef
+ let code dst =
+ LDATA ReadOnlyData
+ [CmmDataLabel lbl,
+ CmmStaticLit (CmmFloat f W32)]
+ `consOL` (addr_code `snocOL`
+ GLD FF32 addr dst)
+ -- in
+ return (Any FF32 code)
+
+
+getRegister (CmmLit (CmmFloat d W64))
+ | d == 0.0
+ = let code dst = unitOL (GLDZ dst)
+ in return (Any FF64 code)
+
+ | d == 1.0
+ = let code dst = unitOL (GLD1 dst)
+ in return (Any FF64 code)
+
+ | otherwise = do
+ lbl <- getNewLabelNat
+ dflags <- getDynFlagsNat
+ dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
+ Amode addr addr_code <- getAmode dynRef
+ let code dst =
+ LDATA ReadOnlyData
+ [CmmDataLabel lbl,
+ CmmStaticLit (CmmFloat d W64)]
+ `consOL` (addr_code `snocOL`
+ GLD FF64 addr dst)
+ -- in
+ return (Any FF64 code)
+
+#endif /* i386_TARGET_ARCH */
+
+
+
+
+#if x86_64_TARGET_ARCH
+getRegister (CmmLit (CmmFloat 0.0 w)) = do
+ let size = floatSize w
+ code dst = unitOL (XOR size (OpReg dst) (OpReg dst))
+ -- I don't know why there are xorpd, xorps, and pxor instructions.
+ -- They all appear to do the same thing --SDM
+ return (Any size code)
+
+getRegister (CmmLit (CmmFloat f w)) = do
+ lbl <- getNewLabelNat
+ let code dst = toOL [
+ LDATA ReadOnlyData
+ [CmmDataLabel lbl,
+ CmmStaticLit (CmmFloat f w)],
+ MOV size (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
+ ]
+ -- in
+ return (Any size code)
+ where size = floatSize w
+
+#endif /* x86_64_TARGET_ARCH */
+
+
+
+
+
+-- catch simple cases of zero- or sign-extended load
+getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOVZxL II8) addr
+ return (Any II32 code)
+
+getRegister (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOVSxL II8) addr
+ return (Any II32 code)
+
+getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOVZxL II16) addr
+ return (Any II32 code)
+
+getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOVSxL II16) addr
+ return (Any II32 code)
+
+
+#if x86_64_TARGET_ARCH
+
+-- catch simple cases of zero- or sign-extended load
+getRegister (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOVZxL II8) addr
+ return (Any II64 code)
+
+getRegister (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOVSxL II8) addr
+ return (Any II64 code)
+
+getRegister (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOVZxL II16) addr
+ return (Any II64 code)
+
+getRegister (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOVSxL II16) addr
+ return (Any II64 code)
+
+getRegister (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
+ return (Any II64 code)
+
+getRegister (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _]) = do
+ code <- intLoadCode (MOVSxL II32) addr
+ return (Any II64 code)
+
+getRegister (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
+ CmmLit displacement])
+ = return $ Any II64 (\dst -> unitOL $
+ LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
+
+getRegister (CmmMachOp (MO_F_Neg W32) [x]) = do
+ x_code <- getAnyReg x
+ lbl <- getNewLabelNat
+ let
+ code dst = x_code dst `appOL` toOL [
+ -- This is how gcc does it, so it can't be that bad:
+ LDATA ReadOnlyData16 [
+ CmmAlign 16,
+ CmmDataLabel lbl,
+ CmmStaticLit (CmmInt 0x80000000 W32),
+ CmmStaticLit (CmmInt 0 W32),
+ CmmStaticLit (CmmInt 0 W32),
+ CmmStaticLit (CmmInt 0 W32)
+ ],
+ XOR FF32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
+ -- xorps, so we need the 128-bit constant
+ -- ToDo: rip-relative
+ ]
+ --
+ return (Any FF32 code)
+
+getRegister (CmmMachOp (MO_F_Neg W64) [x]) = do
+ x_code <- getAnyReg x
+ lbl <- getNewLabelNat
+ let
+ -- This is how gcc does it, so it can't be that bad:
+ code dst = x_code dst `appOL` toOL [
+ LDATA ReadOnlyData16 [
+ CmmAlign 16,
+ CmmDataLabel lbl,
+ CmmStaticLit (CmmInt 0x8000000000000000 W64),
+ CmmStaticLit (CmmInt 0 W64)
+ ],
+ -- gcc puts an unpck here. Wonder if we need it.
+ XOR FF64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
+ -- xorpd, so we need the 128-bit constant
+ ]
+ --
+ return (Any FF64 code)
+
+#endif /* x86_64_TARGET_ARCH */
+
+
+
+
+
+getRegister (CmmMachOp mop [x]) -- unary MachOps
+ = case mop of
+#if i386_TARGET_ARCH
+ MO_F_Neg W32 -> trivialUFCode FF32 (GNEG FF32) x
+ MO_F_Neg W64 -> trivialUFCode FF64 (GNEG FF64) x
+#endif
+
+ MO_S_Neg w -> triv_ucode NEGI (intSize w)
+ MO_F_Neg w -> triv_ucode NEGI (floatSize w)
+ MO_Not w -> triv_ucode NOT (intSize w)
+
+ -- Nop conversions
+ MO_UU_Conv W32 W8 -> toI8Reg W32 x
+ MO_SS_Conv W32 W8 -> toI8Reg W32 x
+ MO_UU_Conv W16 W8 -> toI8Reg W16 x
+ MO_SS_Conv W16 W8 -> toI8Reg W16 x
+ MO_UU_Conv W32 W16 -> toI16Reg W32 x
+ MO_SS_Conv W32 W16 -> toI16Reg W32 x
+
+#if x86_64_TARGET_ARCH
+ MO_UU_Conv W64 W32 -> conversionNop II64 x
+ MO_SS_Conv W64 W32 -> conversionNop II64 x
+ MO_UU_Conv W64 W16 -> toI16Reg W64 x
+ MO_SS_Conv W64 W16 -> toI16Reg W64 x
+ MO_UU_Conv W64 W8 -> toI8Reg W64 x
+ MO_SS_Conv W64 W8 -> toI8Reg W64 x
+#endif
+
+ MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
+ MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
+
+ -- widenings
+ MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x
+ MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
+ MO_UU_Conv W8 W16 -> integerExtend W8 W16 MOVZxL x
+
+ MO_SS_Conv W8 W32 -> integerExtend W8 W32 MOVSxL x
+ MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
+ MO_SS_Conv W8 W16 -> integerExtend W8 W16 MOVSxL x
+
+#if x86_64_TARGET_ARCH
+ MO_UU_Conv W8 W64 -> integerExtend W8 W64 MOVZxL x
+ MO_UU_Conv W16 W64 -> integerExtend W16 W64 MOVZxL x
+ MO_UU_Conv W32 W64 -> integerExtend W32 W64 MOVZxL x
+ MO_SS_Conv W8 W64 -> integerExtend W8 W64 MOVSxL x
+ MO_SS_Conv W16 W64 -> integerExtend W16 W64 MOVSxL x
+ MO_SS_Conv W32 W64 -> integerExtend W32 W64 MOVSxL x
+ -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
+ -- However, we don't want the register allocator to throw it
+ -- away as an unnecessary reg-to-reg move, so we keep it in
+ -- the form of a movzl and print it as a movl later.
+#endif
+
+#if i386_TARGET_ARCH
+ MO_FF_Conv W32 W64 -> conversionNop FF64 x
+ MO_FF_Conv W64 W32 -> conversionNop FF32 x
+#else
+ MO_FF_Conv W32 W64 -> coerceFP2FP W64 x
+ MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
+#endif
+
+ MO_FS_Conv from to -> coerceFP2Int from to x
+ MO_SF_Conv from to -> coerceInt2FP from to x
+
+ other -> pprPanic "getRegister" (pprMachOp mop)
+ where
+ triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
+ triv_ucode instr size = trivialUCode size (instr size) x
+
+ -- signed or unsigned extension.
+ integerExtend :: Width -> Width
+ -> (Size -> Operand -> Operand -> Instr)
+ -> CmmExpr -> NatM Register
+ integerExtend from to instr expr = do
+ (reg,e_code) <- if from == W8 then getByteReg expr
+ else getSomeReg expr
+ let
+ code dst =
+ e_code `snocOL`
+ instr (intSize from) (OpReg reg) (OpReg dst)
+ return (Any (intSize to) code)
+
+ toI8Reg :: Width -> CmmExpr -> NatM Register
+ toI8Reg new_rep expr
+ = do codefn <- getAnyReg expr
+ return (Any (intSize new_rep) codefn)
+ -- HACK: use getAnyReg to get a byte-addressable register.
+ -- If the source was a Fixed register, this will add the
+ -- mov instruction to put it into the desired destination.
+ -- We're assuming that the destination won't be a fixed
+ -- non-byte-addressable register; it won't be, because all
+ -- fixed registers are word-sized.
+
+ toI16Reg = toI8Reg -- for now
+
+ conversionNop :: Size -> CmmExpr -> NatM Register
+ conversionNop new_size expr
+ = do e_code <- getRegister expr
+ return (swizzleRegisterRep e_code new_size)
+
+
+getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
+ = case mop of
+ MO_F_Eq w -> condFltReg EQQ x y
+ MO_F_Ne w -> condFltReg NE x y
+ MO_F_Gt w -> condFltReg GTT x y
+ MO_F_Ge w -> condFltReg GE x y
+ MO_F_Lt w -> condFltReg LTT x y
+ MO_F_Le w -> condFltReg LE x y
+
+ MO_Eq rep -> condIntReg EQQ x y
+ MO_Ne rep -> condIntReg NE x y
+
+ MO_S_Gt rep -> condIntReg GTT x y
+ MO_S_Ge rep -> condIntReg GE x y
+ MO_S_Lt rep -> condIntReg LTT x y
+ MO_S_Le rep -> condIntReg LE x y
+
+ MO_U_Gt rep -> condIntReg GU x y
+ MO_U_Ge rep -> condIntReg GEU x y
+ MO_U_Lt rep -> condIntReg LU x y
+ MO_U_Le rep -> condIntReg LEU x y
+
+#if i386_TARGET_ARCH
+ MO_F_Add w -> trivialFCode w GADD x y
+ MO_F_Sub w -> trivialFCode w GSUB x y
+ MO_F_Quot w -> trivialFCode w GDIV x y
+ MO_F_Mul w -> trivialFCode w GMUL x y
+#endif
+
+#if x86_64_TARGET_ARCH
+ MO_F_Add w -> trivialFCode w ADD x y
+ MO_F_Sub w -> trivialFCode w SUB x y
+ MO_F_Quot w -> trivialFCode w FDIV x y
+ MO_F_Mul w -> trivialFCode w MUL x y
+#endif
+
+ MO_Add rep -> add_code rep x y
+ MO_Sub rep -> sub_code rep x y
+
+ MO_S_Quot rep -> div_code rep True True x y
+ MO_S_Rem rep -> div_code rep True False x y
+ MO_U_Quot rep -> div_code rep False True x y
+ MO_U_Rem rep -> div_code rep False False x y
+
+ MO_S_MulMayOflo rep -> imulMayOflo rep x y
+
+ MO_Mul rep -> triv_op rep IMUL
+ MO_And rep -> triv_op rep AND
+ MO_Or rep -> triv_op rep OR
+ MO_Xor rep -> triv_op rep XOR
+
+ {- Shift ops on x86s have constraints on their source, it
+ either has to be Imm, CL or 1
+ => trivialCode is not restrictive enough (sigh.)
+ -}
+ MO_Shl rep -> shift_code rep SHL x y {-False-}
+ MO_U_Shr rep -> shift_code rep SHR x y {-False-}
+ MO_S_Shr rep -> shift_code rep SAR x y {-False-}
+
+ other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
+ where
+ --------------------
+ triv_op width instr = trivialCode width op (Just op) x y
+ where op = instr (intSize width)
+
+ imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
+ imulMayOflo rep a b = do
+ (a_reg, a_code) <- getNonClobberedReg a
+ b_code <- getAnyReg b
+ let
+ shift_amt = case rep of
+ W32 -> 31
+ W64 -> 63
+ _ -> panic "shift_amt"
+
+ size = intSize rep
+ code = a_code `appOL` b_code eax `appOL`
+ toOL [
+ IMUL2 size (OpReg a_reg), -- result in %edx:%eax
+ SAR size (OpImm (ImmInt shift_amt)) (OpReg eax),
+ -- sign extend lower part
+ SUB size (OpReg edx) (OpReg eax)
+ -- compare against upper
+ -- eax==0 if high part == sign extended low part
+ ]
+ -- in
+ return (Fixed size eax code)
+
+ --------------------
+ shift_code :: Width
+ -> (Size -> Operand -> Operand -> Instr)
+ -> CmmExpr
+ -> CmmExpr
+ -> NatM Register
+
+ {- Case1: shift length as immediate -}
+ shift_code width instr x y@(CmmLit lit) = do
+ x_code <- getAnyReg x
+ let
+ size = intSize width
+ code dst
+ = x_code dst `snocOL`
+ instr size (OpImm (litToImm lit)) (OpReg dst)
+ -- in
+ return (Any size code)
+
+ {- Case2: shift length is complex (non-immediate)
+ * y must go in %ecx.
+ * we cannot do y first *and* put its result in %ecx, because
+ %ecx might be clobbered by x.
+ * if we do y second, then x cannot be
+ in a clobbered reg. Also, we cannot clobber x's reg
+ with the instruction itself.
+ * so we can either:
+ - do y first, put its result in a fresh tmp, then copy it to %ecx later
+ - do y second and put its result into %ecx. x gets placed in a fresh
+ tmp. This is likely to be better, becuase the reg alloc can
+ eliminate this reg->reg move here (it won't eliminate the other one,
+ because the move is into the fixed %ecx).
+ -}
+ shift_code width instr x y{-amount-} = do
+ x_code <- getAnyReg x
+ let size = intSize width
+ tmp <- getNewRegNat size
+ y_code <- getAnyReg y
+ let
+ code = x_code tmp `appOL`
+ y_code ecx `snocOL`
+ instr size (OpReg ecx) (OpReg tmp)
+ -- in
+ return (Fixed size tmp code)
+
+ --------------------
+ add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
+ add_code rep x (CmmLit (CmmInt y _))
+ | is32BitInteger y = add_int rep x y
+ add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y
+ where size = intSize rep
+
+ --------------------
+ sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
+ sub_code rep x (CmmLit (CmmInt y _))
+ | is32BitInteger (-y) = add_int rep x (-y)
+ sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y
+
+ -- our three-operand add instruction:
+ add_int width x y = do
+ (x_reg, x_code) <- getSomeReg x
+ let
+ size = intSize width
+ imm = ImmInt (fromInteger y)
+ code dst
+ = x_code `snocOL`
+ LEA size
+ (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
+ (OpReg dst)
+ --
+ return (Any size code)
+
+ ----------------------
+ div_code width signed quotient x y = do
+ (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
+ x_code <- getAnyReg x
+ let
+ size = intSize width
+ widen | signed = CLTD size
+ | otherwise = XOR size (OpReg edx) (OpReg edx)
+
+ instr | signed = IDIV
+ | otherwise = DIV
+
+ code = y_code `appOL`
+ x_code eax `appOL`
+ toOL [widen, instr size y_op]
+
+ result | quotient = eax
+ | otherwise = edx
+
+ -- in
+ return (Fixed size result code)
+
+
+getRegister (CmmLoad mem pk)
+ | isFloatType pk
+ = do
+ Amode src mem_code <- getAmode mem
+ let
+ size = cmmTypeSize pk
+ code dst = mem_code `snocOL`
+ IF_ARCH_i386(GLD size src dst,
+ MOV size (OpAddr src) (OpReg dst))
+ return (Any size code)
+
+#if i386_TARGET_ARCH
+getRegister (CmmLoad mem pk)
+ | not (isWord64 pk)
+ = do
+ code <- intLoadCode instr mem
+ return (Any size code)
+ where
+ width = typeWidth pk
+ size = intSize width
+ instr = case width of
+ W8 -> MOVZxL II8
+ _other -> MOV size
+ -- We always zero-extend 8-bit loads, if we
+ -- can't think of anything better. This is because
+ -- we can't guarantee access to an 8-bit variant of every register
+ -- (esi and edi don't have 8-bit variants), so to make things
+ -- simpler we do our 8-bit arithmetic with full 32-bit registers.
+#endif
+
+#if x86_64_TARGET_ARCH
+-- Simpler memory load code on x86_64
+getRegister (CmmLoad mem pk)
+ = do
+ code <- intLoadCode (MOV size) mem
+ return (Any size code)
+ where size = intSize $ typeWidth pk
+#endif
+
+getRegister (CmmLit (CmmInt 0 width))
+ = let
+ size = intSize width
+
+ -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
+ adj_size = case size of II64 -> II32; _ -> size
+ size1 = IF_ARCH_i386( size, adj_size )
+ code dst
+ = unitOL (XOR size1 (OpReg dst) (OpReg dst))
+ in
+ return (Any size code)
+
+#if x86_64_TARGET_ARCH
+ -- optimisation for loading small literals on x86_64: take advantage
+ -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
+ -- instruction forms are shorter.
+getRegister (CmmLit lit)
+ | isWord64 (cmmLitType lit), not (isBigLit lit)
+ = let
+ imm = litToImm lit
+ code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
+ in
+ return (Any II64 code)
+ where
+ isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
+ isBigLit _ = False
+ -- note1: not the same as (not.is32BitLit), because that checks for
+ -- signed literals that fit in 32 bits, but we want unsigned
+ -- literals here.
+ -- note2: all labels are small, because we're assuming the
+ -- small memory model (see gcc docs, -mcmodel=small).
+#endif
+
+getRegister (CmmLit lit)
+ = let
+ size = cmmTypeSize (cmmLitType lit)
+ imm = litToImm lit
+ code dst = unitOL (MOV size (OpImm imm) (OpReg dst))
+ in
+ return (Any size code)
+
+getRegister other = pprPanic "getRegister(x86)" (ppr other)
+
+
+intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
+ -> NatM (Reg -> InstrBlock)
+intLoadCode instr mem = do
+ Amode src mem_code <- getAmode mem
+ return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
+
+-- Compute an expression into *any* register, adding the appropriate
+-- move instruction if necessary.
+getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
+getAnyReg expr = do
+ r <- getRegister expr
+ anyReg r
+
+anyReg :: Register -> NatM (Reg -> InstrBlock)
+anyReg (Any _ code) = return code
+anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
+
+-- A bit like getSomeReg, but we want a reg that can be byte-addressed.
+-- Fixed registers might not be byte-addressable, so we make sure we've
+-- got a temporary, inserting an extra reg copy if necessary.
+getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
+#if x86_64_TARGET_ARCH
+getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
+#else
+getByteReg expr = do
+ r <- getRegister expr
+ case r of
+ Any rep code -> do
+ tmp <- getNewRegNat rep
+ return (tmp, code tmp)
+ Fixed rep reg code
+ | isVirtualReg reg -> return (reg,code)
+ | otherwise -> do
+ tmp <- getNewRegNat rep
+ return (tmp, code `snocOL` reg2reg rep reg tmp)
+ -- ToDo: could optimise slightly by checking for byte-addressable
+ -- real registers, but that will happen very rarely if at all.
+#endif
+
+-- Another variant: this time we want the result in a register that cannot
+-- be modified by code to evaluate an arbitrary expression.
+getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
+getNonClobberedReg expr = do
+ r <- getRegister expr
+ case r of
+ Any rep code -> do
+ tmp <- getNewRegNat rep
+ return (tmp, code tmp)
+ Fixed rep reg code
+ -- only free regs can be clobbered
+ | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
+ tmp <- getNewRegNat rep
+ return (tmp, code `snocOL` reg2reg rep reg tmp)
+ | otherwise ->
+ return (reg, code)
+
+reg2reg :: Size -> Reg -> Reg -> Instr
+reg2reg size src dst
+#if i386_TARGET_ARCH
+ | isFloatSize size = GMOV src dst
+#endif
+ | otherwise = MOV size (OpReg src) (OpReg dst)
+
+
+
+--------------------------------------------------------------------------------
+getAmode :: CmmExpr -> NatM Amode
+getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
+
+#if x86_64_TARGET_ARCH
+
+getAmode (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
+ CmmLit displacement])
+ = return $ Amode (ripRel (litToImm displacement)) nilOL
+
+#endif
+
+
+-- This is all just ridiculous, since it carefully undoes
+-- what mangleIndexTree has just done.
+getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
+ | is32BitLit lit
+ -- ASSERT(rep == II32)???
+ = do (x_reg, x_code) <- getSomeReg x
+ let off = ImmInt (-(fromInteger i))
+ return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
+
+getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
+ | is32BitLit lit
+ -- ASSERT(rep == II32)???
+ = do (x_reg, x_code) <- getSomeReg x
+ let off = ImmInt (fromInteger i)
+ return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
+
+-- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
+-- recognised by the next rule.
+getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
+ b@(CmmLit _)])
+ = getAmode (CmmMachOp (MO_Add rep) [b,a])
+
+getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
+ [y, CmmLit (CmmInt shift _)]])
+ | shift == 0 || shift == 1 || shift == 2 || shift == 3
+ = x86_complex_amode x y shift 0
+
+getAmode (CmmMachOp (MO_Add rep)
+ [x, CmmMachOp (MO_Add _)
+ [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
+ CmmLit (CmmInt offset _)]])
+ | shift == 0 || shift == 1 || shift == 2 || shift == 3
+ && is32BitInteger offset
+ = x86_complex_amode x y shift offset
+
+getAmode (CmmMachOp (MO_Add rep) [x,y])
+ = x86_complex_amode x y 0 0
+
+getAmode (CmmLit lit) | is32BitLit lit
+ = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
+
+getAmode expr = do
+ (reg,code) <- getSomeReg expr
+ return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
+
+
+x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
+x86_complex_amode base index shift offset
+ = do (x_reg, x_code) <- getNonClobberedReg base
+ -- x must be in a temp, because it has to stay live over y_code
+ -- we could compre x_reg and y_reg and do something better here...
+ (y_reg, y_code) <- getSomeReg index
+ let
+ code = x_code `appOL` y_code
+ base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
+ return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
+ code)
+
+
+
+
+-- -----------------------------------------------------------------------------
+-- getOperand: sometimes any operand will do.
+
+-- getNonClobberedOperand: the value of the operand will remain valid across
+-- the computation of an arbitrary expression, unless the expression
+-- is computed directly into a register which the operand refers to
+-- (see trivialCode where this function is used for an example).
+
+getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
+#if x86_64_TARGET_ARCH
+getNonClobberedOperand (CmmLit lit)
+ | isSuitableFloatingPointLit lit = do
+ lbl <- getNewLabelNat
+ let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
+ CmmStaticLit lit])
+ return (OpAddr (ripRel (ImmCLbl lbl)), code)
+#endif
+getNonClobberedOperand (CmmLit lit)
+ | is32BitLit lit && not (isFloatType (cmmLitType lit)) =
+ return (OpImm (litToImm lit), nilOL)
+getNonClobberedOperand (CmmLoad mem pk)
+ | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
+ Amode src mem_code <- getAmode mem
+ (src',save_code) <-
+ if (amodeCouldBeClobbered src)
+ then do
+ tmp <- getNewRegNat archWordSize
+ return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
+ unitOL (LEA II32 (OpAddr src) (OpReg tmp)))
+ else
+ return (src, nilOL)
+ return (OpAddr src', save_code `appOL` mem_code)
+getNonClobberedOperand e = do
+ (reg, code) <- getNonClobberedReg e
+ return (OpReg reg, code)
+
+amodeCouldBeClobbered :: AddrMode -> Bool
+amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
+
+regClobbered (RealReg rr) = isFastTrue (freeReg rr)
+regClobbered _ = False
+
+-- getOperand: the operand is not required to remain valid across the
+-- computation of an arbitrary expression.
+getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
+#if x86_64_TARGET_ARCH
+getOperand (CmmLit lit)
+ | isSuitableFloatingPointLit lit = do
+ lbl <- getNewLabelNat
+ let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
+ CmmStaticLit lit])
+ return (OpAddr (ripRel (ImmCLbl lbl)), code)
+#endif
+getOperand (CmmLit lit)
+ | is32BitLit lit && not (isFloatType (cmmLitType lit)) = do
+ return (OpImm (litToImm lit), nilOL)
+getOperand (CmmLoad mem pk)
+ | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
+ Amode src mem_code <- getAmode mem
+ return (OpAddr src, mem_code)
+getOperand e = do
+ (reg, code) <- getSomeReg e
+ return (OpReg reg, code)
+
+isOperand :: CmmExpr -> Bool
+isOperand (CmmLoad _ _) = True
+isOperand (CmmLit lit) = is32BitLit lit
+ || isSuitableFloatingPointLit lit
+isOperand _ = False
+
+-- if we want a floating-point literal as an operand, we can
+-- use it directly from memory. However, if the literal is
+-- zero, we're better off generating it into a register using
+-- xor.
+isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
+isSuitableFloatingPointLit _ = False
+
+getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
+getRegOrMem (CmmLoad mem pk)
+ | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do
+ Amode src mem_code <- getAmode mem
+ return (OpAddr src, mem_code)
+getRegOrMem e = do
+ (reg, code) <- getNonClobberedReg e
+ return (OpReg reg, code)
+
+#if x86_64_TARGET_ARCH
+is32BitLit (CmmInt i W64) = is32BitInteger i
+ -- assume that labels are in the range 0-2^31-1: this assumes the
+ -- small memory model (see gcc docs, -mcmodel=small).
+#endif
+is32BitLit x = True
+
+
+
+
+-- Set up a condition code for a conditional branch.
+
+getCondCode :: CmmExpr -> NatM CondCode
+
+-- yes, they really do seem to want exactly the same!
+
+getCondCode (CmmMachOp mop [x, y])
+ =
+ case mop of
+ MO_F_Eq W32 -> condFltCode EQQ x y
+ MO_F_Ne W32 -> condFltCode NE x y
+ MO_F_Gt W32 -> condFltCode GTT x y
+ MO_F_Ge W32 -> condFltCode GE x y
+ MO_F_Lt W32 -> condFltCode LTT x y
+ MO_F_Le W32 -> condFltCode LE x y
+
+ MO_F_Eq W64 -> condFltCode EQQ x y
+ MO_F_Ne W64 -> condFltCode NE x y
+ MO_F_Gt W64 -> condFltCode GTT x y
+ MO_F_Ge W64 -> condFltCode GE x y
+ MO_F_Lt W64 -> condFltCode LTT x y
+ MO_F_Le W64 -> condFltCode LE x y
+
+ MO_Eq rep -> condIntCode EQQ x y
+ MO_Ne rep -> condIntCode NE x y
+
+ MO_S_Gt rep -> condIntCode GTT x y
+ MO_S_Ge rep -> condIntCode GE x y
+ MO_S_Lt rep -> condIntCode LTT x y
+ MO_S_Le rep -> condIntCode LE x y
+
+ MO_U_Gt rep -> condIntCode GU x y
+ MO_U_Ge rep -> condIntCode GEU x y
+ MO_U_Lt rep -> condIntCode LU x y
+ MO_U_Le rep -> condIntCode LEU x y
+
+ other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
+
+getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
+
+
+
+
+-- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
+-- passed back up the tree.
+
+condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
+
+-- memory vs immediate
+condIntCode cond (CmmLoad x pk) (CmmLit lit) | is32BitLit lit = do
+ Amode x_addr x_code <- getAmode x
+ let
+ imm = litToImm lit
+ code = x_code `snocOL`
+ CMP (cmmTypeSize pk) (OpImm imm) (OpAddr x_addr)
+ --
+ return (CondCode False cond code)
+
+-- anything vs zero, using a mask
+-- TODO: Add some sanity checking!!!!
+condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
+ | (CmmLit lit@(CmmInt mask pk2)) <- o2, is32BitLit lit
+ = do
+ (x_reg, x_code) <- getSomeReg x
+ let
+ code = x_code `snocOL`
+ TEST (intSize pk) (OpImm (ImmInteger mask)) (OpReg x_reg)
+ --
+ return (CondCode False cond code)
+
+-- anything vs zero
+condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
+ (x_reg, x_code) <- getSomeReg x
+ let
+ code = x_code `snocOL`
+ TEST (intSize pk) (OpReg x_reg) (OpReg x_reg)
+ --
+ return (CondCode False cond code)
+
+-- anything vs operand
+condIntCode cond x y | isOperand y = do
+ (x_reg, x_code) <- getNonClobberedReg x
+ (y_op, y_code) <- getOperand y
+ let
+ code = x_code `appOL` y_code `snocOL`
+ CMP (cmmTypeSize (cmmExprType x)) y_op (OpReg x_reg)
+ -- in
+ return (CondCode False cond code)
+
+-- anything vs anything
+condIntCode cond x y = do
+ (y_reg, y_code) <- getNonClobberedReg y
+ (x_op, x_code) <- getRegOrMem x
+ let
+ code = y_code `appOL`
+ x_code `snocOL`
+ CMP (cmmTypeSize (cmmExprType x)) (OpReg y_reg) x_op
+ -- in
+ return (CondCode False cond code)
+
+
+
+--------------------------------------------------------------------------------
+condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
+
+#if i386_TARGET_ARCH
+condFltCode cond x y
+ = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
+ (x_reg, x_code) <- getNonClobberedReg x
+ (y_reg, y_code) <- getSomeReg y
+ let
+ code = x_code `appOL` y_code `snocOL`
+ GCMP cond x_reg y_reg
+ -- The GCMP insn does the test and sets the zero flag if comparable
+ -- and true. Hence we always supply EQQ as the condition to test.
+ return (CondCode True EQQ code)
+
+#elif x86_64_TARGET_ARCH
+-- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
+-- an operand, but the right must be a reg. We can probably do better
+-- than this general case...
+condFltCode cond x y = do
+ (x_reg, x_code) <- getNonClobberedReg x
+ (y_op, y_code) <- getOperand y
+ let
+ code = x_code `appOL`
+ y_code `snocOL`
+ CMP (floatSize $ cmmExprWidth x) y_op (OpReg x_reg)
+ -- NB(1): we need to use the unsigned comparison operators on the
+ -- result of this comparison.
+ -- in
+ return (CondCode True (condToUnsigned cond) code)
+
+#else
+condFltCode = panic "X86.condFltCode: not defined"
+
+#endif
+
+
+
+-- -----------------------------------------------------------------------------
+-- Generating assignments
+
+-- Assignments are really at the heart of the whole code generation
+-- business. Almost all top-level nodes of any real importance are
+-- assignments, which correspond to loads, stores, or register
+-- transfers. If we're really lucky, some of the register transfers
+-- will go away, because we can use the destination register to
+-- complete the code generation for the right hand side. This only
+-- fails when the right hand side is forced into a fixed register
+-- (e.g. the result of a call).
+
+assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignReg_IntCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
+
+assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
+assignReg_FltCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
+
+
+-- integer assignment to memory
+
+-- specific case of adding/subtracting an integer to a particular address.
+-- ToDo: catch other cases where we can use an operation directly on a memory
+-- address.
+assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
+ CmmLit (CmmInt i _)])
+ | addr == addr2, pk /= II64 || is32BitInteger i,
+ Just instr <- check op
+ = do Amode amode code_addr <- getAmode addr
+ let code = code_addr `snocOL`
+ instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
+ return code
+ where
+ check (MO_Add _) = Just ADD
+ check (MO_Sub _) = Just SUB
+ check _ = Nothing
+ -- ToDo: more?
+
+-- general case
+assignMem_IntCode pk addr src = do
+ Amode addr code_addr <- getAmode addr
+ (code_src, op_src) <- get_op_RI src
+ let
+ code = code_src `appOL`
+ code_addr `snocOL`
+ MOV pk op_src (OpAddr addr)
+ -- NOTE: op_src is stable, so it will still be valid
+ -- after code_addr. This may involve the introduction
+ -- of an extra MOV to a temporary register, but we hope
+ -- the register allocator will get rid of it.
+ --
+ return code
+ where
+ get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
+ get_op_RI (CmmLit lit) | is32BitLit lit
+ = return (nilOL, OpImm (litToImm lit))
+ get_op_RI op
+ = do (reg,code) <- getNonClobberedReg op
+ return (code, OpReg reg)
+
+
+-- Assign; dst is a reg, rhs is mem
+assignReg_IntCode pk reg (CmmLoad src _) = do
+ load_code <- intLoadCode (MOV pk) src
+ return (load_code (getRegisterReg reg))
+
+-- dst is a reg, but src could be anything
+assignReg_IntCode pk reg src = do
+ code <- getAnyReg src
+ return (code (getRegisterReg reg))
+
+
+-- Floating point assignment to memory
+assignMem_FltCode pk addr src = do
+ (src_reg, src_code) <- getNonClobberedReg src
+ Amode addr addr_code <- getAmode addr
+ let
+ code = src_code `appOL`
+ addr_code `snocOL`
+ IF_ARCH_i386(GST pk src_reg addr,
+ MOV pk (OpReg src_reg) (OpAddr addr))
+ return code
+
+-- Floating point assignment to a register/temporary
+assignReg_FltCode pk reg src = do
+ src_code <- getAnyReg src
+ return (src_code (getRegisterReg reg))
+
+
+genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
+
+genJump (CmmLoad mem pk) = do
+ Amode target code <- getAmode mem
+ return (code `snocOL` JMP (OpAddr target))
+
+genJump (CmmLit lit) = do
+ return (unitOL (JMP (OpImm (litToImm lit))))
+
+genJump expr = do
+ (reg,code) <- getSomeReg expr
+ return (code `snocOL` JMP (OpReg reg))
+
+
+-- -----------------------------------------------------------------------------
+-- Unconditional branches
+
+genBranch :: BlockId -> NatM InstrBlock
+genBranch = return . toOL . mkJumpInstr
+
+
+
+-- -----------------------------------------------------------------------------
+-- Conditional jumps
+
+{-
+Conditional jumps are always to local labels, so we can use branch
+instructions. We peek at the arguments to decide what kind of
+comparison to do.
+
+I386: First, we have to ensure that the condition
+codes are set according to the supplied comparison operation.
+-}
+
+genCondJump
+ :: BlockId -- the branch target
+ -> CmmExpr -- the condition on which to branch
+ -> NatM InstrBlock
+
+#if i386_TARGET_ARCH
+genCondJump id bool = do
+ CondCode _ cond code <- getCondCode bool
+ return (code `snocOL` JXX cond id)
+
+#elif x86_64_TARGET_ARCH
+genCondJump id bool = do
+ CondCode is_float cond cond_code <- getCondCode bool
+ if not is_float
+ then
+ return (cond_code `snocOL` JXX cond id)
+ else do
+ lbl <- getBlockIdNat
+
+ -- see comment with condFltReg
+ let code = case cond of
+ NE -> or_unordered
+ GU -> plain_test
+ GEU -> plain_test
+ _ -> and_ordered
+
+ plain_test = unitOL (
+ JXX cond id
+ )
+ or_unordered = toOL [
+ JXX cond id,
+ JXX PARITY id
+ ]
+ and_ordered = toOL [
+ JXX PARITY lbl,
+ JXX cond id,
+ JXX ALWAYS lbl,
+ NEWBLOCK lbl
+ ]
+ return (cond_code `appOL` code)
+
+#else
+genCondJump = panic "X86.genCondJump: not defined"
+
+#endif
+
+
+
+
+-- -----------------------------------------------------------------------------
+-- Generating C calls
+
+-- Now the biggest nightmare---calls. Most of the nastiness is buried in
+-- @get_arg@, which moves the arguments to the correct registers/stack
+-- locations. Apart from that, the code is easy.
+--
+-- (If applicable) Do not fill the delay slots here; you will confuse the
+-- register allocator.
+
+genCCall
+ :: CmmCallTarget -- function to call
+ -> HintedCmmFormals -- where to put the result
+ -> HintedCmmActuals -- arguments (of mixed type)
+ -> NatM InstrBlock
+
+-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if i386_TARGET_ARCH
+
+genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
+ -- write barrier compiles to no code on x86/x86-64;
+ -- we keep it this long in order to prevent earlier optimisations.
+
+-- we only cope with a single result for foreign calls
+genCCall (CmmPrim op) [CmmHinted r _] args = do
+ l1 <- getNewLabelNat
+ l2 <- getNewLabelNat
+ case op of
+ MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
+ MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
+
+ MO_F32_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args
+ MO_F64_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args
+
+ MO_F32_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args
+ MO_F64_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args
+
+ MO_F32_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
+ MO_F64_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
+
+ other_op -> outOfLineFloatOp op r args
+ where
+ actuallyInlineFloatOp instr size [CmmHinted x _]
+ = do res <- trivialUFCode size (instr size) x
+ any <- anyReg res
+ return (any (getRegisterReg (CmmLocal r)))
+
+genCCall target dest_regs args = do
+ let
+ sizes = map (arg_size . cmmExprType . hintlessCmm) (reverse args)
+#if !darwin_TARGET_OS
+ tot_arg_size = sum sizes
+#else
+ raw_arg_size = sum sizes
+ tot_arg_size = roundTo 16 raw_arg_size
+ arg_pad_size = tot_arg_size - raw_arg_size
+ delta0 <- getDeltaNat
+ setDeltaNat (delta0 - arg_pad_size)
+#endif
+
+ push_codes <- mapM push_arg (reverse args)
+ delta <- getDeltaNat
+
+ -- in
+ -- deal with static vs dynamic call targets
+ (callinsns,cconv) <-
+ case target of
+ -- CmmPrim -> ...
+ CmmCallee (CmmLit (CmmLabel lbl)) conv
+ -> -- ToDo: stdcall arg sizes
+ return (unitOL (CALL (Left fn_imm) []), conv)
+ where fn_imm = ImmCLbl lbl
+ CmmCallee expr conv
+ -> do { (dyn_c, dyn_r) <- get_op expr
+ ; ASSERT( isWord32 (cmmExprType expr) )
+ return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
+
+ let push_code
+#if darwin_TARGET_OS
+ | arg_pad_size /= 0
+ = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
+ DELTA (delta0 - arg_pad_size)]
+ `appOL` concatOL push_codes
+ | otherwise
+#endif
+ = concatOL push_codes
+ call = callinsns `appOL`
+ toOL (
+ -- Deallocate parameters after call for ccall;
+ -- but not for stdcall (callee does it)
+ (if cconv == StdCallConv || tot_arg_size==0 then [] else
+ [ADD II32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
+ ++
+ [DELTA (delta + tot_arg_size)]
+ )
+ -- in
+ setDeltaNat (delta + tot_arg_size)
+
+ let
+ -- assign the results, if necessary
+ assign_code [] = nilOL
+ assign_code [CmmHinted dest _hint]
+ | isFloatType ty = unitOL (GMOV fake0 r_dest)
+ | isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
+ MOV II32 (OpReg edx) (OpReg r_dest_hi)]
+ | otherwise = unitOL (MOV (intSize w) (OpReg eax) (OpReg r_dest))
+ where
+ ty = localRegType dest
+ w = typeWidth ty
+ r_dest_hi = getHiVRegFromLo r_dest
+ r_dest = getRegisterReg (CmmLocal dest)
+ assign_code many = panic "genCCall.assign_code many"
+
+ return (push_code `appOL`
+ call `appOL`
+ assign_code dest_regs)
+
+ where
+ arg_size :: CmmType -> Int -- Width in bytes
+ arg_size ty = widthInBytes (typeWidth ty)
+
+ roundTo a x | x `mod` a == 0 = x
+ | otherwise = x + a - (x `mod` a)
+
+
+ push_arg :: HintedCmmActual {-current argument-}
+ -> NatM InstrBlock -- code
+
+ push_arg (CmmHinted arg _hint) -- we don't need the hints on x86
+ | isWord64 arg_ty = do
+ ChildCode64 code r_lo <- iselExpr64 arg
+ delta <- getDeltaNat
+ setDeltaNat (delta - 8)
+ let
+ r_hi = getHiVRegFromLo r_lo
+ -- in
+ return ( code `appOL`
+ toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
+ PUSH II32 (OpReg r_lo), DELTA (delta - 8),
+ DELTA (delta-8)]
+ )
+
+ | otherwise = do
+ (code, reg) <- get_op arg
+ delta <- getDeltaNat
+ let size = arg_size arg_ty -- Byte size
+ setDeltaNat (delta-size)
+ if (isFloatType arg_ty)
+ then return (code `appOL`
+ toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
+ DELTA (delta-size),
+ GST (floatSize (typeWidth arg_ty))
+ reg (AddrBaseIndex (EABaseReg esp)
+ EAIndexNone
+ (ImmInt 0))]
+ )
+ else return (code `snocOL`
+ PUSH II32 (OpReg reg) `snocOL`
+ DELTA (delta-size)
+ )
+ where
+ arg_ty = cmmExprType arg
+
+ ------------
+ get_op :: CmmExpr -> NatM (InstrBlock, Reg) -- code, reg
+ get_op op = do
+ (reg,code) <- getSomeReg op
+ return (code, reg)
+
+#elif x86_64_TARGET_ARCH
+
+genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
+ -- write barrier compiles to no code on x86/x86-64;
+ -- we keep it this long in order to prevent earlier optimisations.
+
+
+genCCall (CmmPrim op) [CmmHinted r _] args =
+ outOfLineFloatOp op r args
+
+genCCall target dest_regs args = do
+
+ -- load up the register arguments
+ (stack_args, aregs, fregs, load_args_code)
+ <- load_args args allArgRegs allFPArgRegs nilOL
+
+ let
+ fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs))
+ int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
+ arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
+ -- for annotating the call instruction with
+
+ sse_regs = length fp_regs_used
+
+ tot_arg_size = arg_size * length stack_args
+
+ -- On entry to the called function, %rsp should be aligned
+ -- on a 16-byte boundary +8 (i.e. the first stack arg after
+ -- the return address is 16-byte aligned). In STG land
+ -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
+ -- need to make sure we push a multiple of 16-bytes of args,
+ -- plus the return address, to get the correct alignment.
+ -- Urg, this is hard. We need to feed the delta back into
+ -- the arg pushing code.
+ (real_size, adjust_rsp) <-
+ if tot_arg_size `rem` 16 == 0
+ then return (tot_arg_size, nilOL)
+ else do -- we need to adjust...
+ delta <- getDeltaNat
+ setDeltaNat (delta-8)
+ return (tot_arg_size+8, toOL [
+ SUB II64 (OpImm (ImmInt 8)) (OpReg rsp),
+ DELTA (delta-8)
+ ])
+
+ -- push the stack args, right to left
+ push_code <- push_args (reverse stack_args) nilOL
+ delta <- getDeltaNat
+
+ -- deal with static vs dynamic call targets
+ (callinsns,cconv) <-
+ case target of
+ -- CmmPrim -> ...
+ CmmCallee (CmmLit (CmmLabel lbl)) conv
+ -> -- ToDo: stdcall arg sizes
+ return (unitOL (CALL (Left fn_imm) arg_regs), conv)
+ where fn_imm = ImmCLbl lbl
+ CmmCallee expr conv
+ -> do (dyn_r, dyn_c) <- getSomeReg expr
+ return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
+
+ let
+ -- The x86_64 ABI requires us to set %al to the number of SSE
+ -- registers that contain arguments, if the called routine
+ -- is a varargs function. We don't know whether it's a
+ -- varargs function or not, so we have to assume it is.
+ --
+ -- It's not safe to omit this assignment, even if the number
+ -- of SSE regs in use is zero. If %al is larger than 8
+ -- on entry to a varargs function, seg faults ensue.
+ assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
+
+ let call = callinsns `appOL`
+ toOL (
+ -- Deallocate parameters after call for ccall;
+ -- but not for stdcall (callee does it)
+ (if cconv == StdCallConv || real_size==0 then [] else
+ [ADD (intSize wordWidth) (OpImm (ImmInt real_size)) (OpReg esp)])
+ ++
+ [DELTA (delta + real_size)]
+ )
+ -- in
+ setDeltaNat (delta + real_size)
+
+ let
+ -- assign the results, if necessary
+ assign_code [] = nilOL
+ assign_code [CmmHinted dest _hint] =
+ case typeWidth rep of
+ W32 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
+ W64 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
+ _ -> unitOL (MOV (cmmTypeSize rep) (OpReg rax) (OpReg r_dest))
+ where
+ rep = localRegType dest
+ r_dest = getRegisterReg (CmmLocal dest)
+ assign_code many = panic "genCCall.assign_code many"
+
+ return (load_args_code `appOL`
+ adjust_rsp `appOL`
+ push_code `appOL`
+ assign_eax sse_regs `appOL`
+ call `appOL`
+ assign_code dest_regs)
+
+ where
+ arg_size = 8 -- always, at the mo
+
+ load_args :: [CmmHinted CmmExpr]
+ -> [Reg] -- int regs avail for args
+ -> [Reg] -- FP regs avail for args
+ -> InstrBlock
+ -> NatM ([CmmHinted CmmExpr],[Reg],[Reg],InstrBlock)
+ load_args args [] [] code = return (args, [], [], code)
+ -- no more regs to use
+ load_args [] aregs fregs code = return ([], aregs, fregs, code)
+ -- no more args to push
+ load_args ((CmmHinted arg hint) : rest) aregs fregs code
+ | isFloatType arg_rep =
+ case fregs of
+ [] -> push_this_arg
+ (r:rs) -> do
+ arg_code <- getAnyReg arg
+ load_args rest aregs rs (code `appOL` arg_code r)
+ | otherwise =
+ case aregs of
+ [] -> push_this_arg
+ (r:rs) -> do
+ arg_code <- getAnyReg arg
+ load_args rest rs fregs (code `appOL` arg_code r)
+ where
+ arg_rep = cmmExprType arg
+
+ push_this_arg = do
+ (args',ars,frs,code') <- load_args rest aregs fregs code
+ return ((CmmHinted arg hint):args', ars, frs, code')
+
+ push_args [] code = return code
+ push_args ((CmmHinted arg hint):rest) code
+ | isFloatType arg_rep = do
+ (arg_reg, arg_code) <- getSomeReg arg
+ delta <- getDeltaNat
+ setDeltaNat (delta-arg_size)
+ let code' = code `appOL` arg_code `appOL` toOL [
+ SUB (intSize wordWidth) (OpImm (ImmInt arg_size)) (OpReg rsp) ,
+ DELTA (delta-arg_size),
+ MOV (floatSize width) (OpReg arg_reg) (OpAddr (spRel 0))]
+ push_args rest code'
+
+ | otherwise = do
+ -- we only ever generate word-sized function arguments. Promotion
+ -- has already happened: our Int8# type is kept sign-extended
+ -- in an Int#, for example.
+ ASSERT(width == W64) return ()
+ (arg_op, arg_code) <- getOperand arg
+ delta <- getDeltaNat
+ setDeltaNat (delta-arg_size)
+ let code' = code `appOL` arg_code `appOL` toOL [
+ PUSH II64 arg_op,
+ DELTA (delta-arg_size)]
+ push_args rest code'
+ where
+ arg_rep = cmmExprType arg
+ width = typeWidth arg_rep
+
+#else
+genCCall = panic "X86.genCCAll: not defined"
+
+#endif /* x86_64_TARGET_ARCH */
+
+
+
+
+outOfLineFloatOp :: CallishMachOp -> CmmFormal -> HintedCmmActuals -> NatM InstrBlock
+outOfLineFloatOp mop res args
+ = do
+ dflags <- getDynFlagsNat
+ targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
+ let target = CmmCallee targetExpr CCallConv
+
+ if isFloat64 (localRegType res)
+ then
+ stmtToInstrs (CmmCall target [CmmHinted res NoHint] args CmmUnsafe CmmMayReturn)
+ else do
+ uq <- getUniqueNat
+ let
+ tmp = LocalReg uq f64
+ -- in
+ code1 <- stmtToInstrs (CmmCall target [CmmHinted tmp NoHint] args CmmUnsafe CmmMayReturn)
+ code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
+ return (code1 `appOL` code2)
+ where
+ lbl = mkForeignLabel fn Nothing False IsFunction
+
+ fn = case mop of
+ MO_F32_Sqrt -> fsLit "sqrtf"
+ MO_F32_Sin -> fsLit "sinf"
+ MO_F32_Cos -> fsLit "cosf"
+ MO_F32_Tan -> fsLit "tanf"
+ MO_F32_Exp -> fsLit "expf"
+ MO_F32_Log -> fsLit "logf"
+
+ MO_F32_Asin -> fsLit "asinf"
+ MO_F32_Acos -> fsLit "acosf"
+ MO_F32_Atan -> fsLit "atanf"
+
+ MO_F32_Sinh -> fsLit "sinhf"
+ MO_F32_Cosh -> fsLit "coshf"
+ MO_F32_Tanh -> fsLit "tanhf"
+ MO_F32_Pwr -> fsLit "powf"
+
+ MO_F64_Sqrt -> fsLit "sqrt"
+ MO_F64_Sin -> fsLit "sin"
+ MO_F64_Cos -> fsLit "cos"
+ MO_F64_Tan -> fsLit "tan"
+ MO_F64_Exp -> fsLit "exp"
+ MO_F64_Log -> fsLit "log"
+
+ MO_F64_Asin -> fsLit "asin"
+ MO_F64_Acos -> fsLit "acos"
+ MO_F64_Atan -> fsLit "atan"
+
+ MO_F64_Sinh -> fsLit "sinh"
+ MO_F64_Cosh -> fsLit "cosh"
+ MO_F64_Tanh -> fsLit "tanh"
+ MO_F64_Pwr -> fsLit "pow"
+
+
+
+
+
+-- -----------------------------------------------------------------------------
+-- Generating a table-branch
+
+genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
+
+genSwitch expr ids
+ | opt_PIC
+ = do
+ (reg,e_code) <- getSomeReg expr
+ lbl <- getNewLabelNat
+ dflags <- getDynFlagsNat
+ dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
+ (tableReg,t_code) <- getSomeReg $ dynRef
+ let
+ jumpTable = map jumpTableEntryRel ids
+
+ jumpTableEntryRel Nothing
+ = CmmStaticLit (CmmInt 0 wordWidth)
+ jumpTableEntryRel (Just (BlockId id))
+ = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
+ where blockLabel = mkAsmTempLabel id
+
+ op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
+ (EAIndex reg wORD_SIZE) (ImmInt 0))
+
+#if x86_64_TARGET_ARCH
+#if darwin_TARGET_OS
+ -- on Mac OS X/x86_64, put the jump table in the text section
+ -- to work around a limitation of the linker.
+ -- ld64 is unable to handle the relocations for
+ -- .quad L1 - L0
+ -- if L0 is not preceded by a non-anonymous label in its section.
+
+ code = e_code `appOL` t_code `appOL` toOL [
+ ADD (intSize wordWidth) op (OpReg tableReg),
+ JMP_TBL (OpReg tableReg) [ id | Just id <- ids ],
+ LDATA Text (CmmDataLabel lbl : jumpTable)
+ ]
+#else
+ -- HACK: On x86_64 binutils<2.17 is only able to generate PC32
+ -- relocations, hence we only get 32-bit offsets in the jump
+ -- table. As these offsets are always negative we need to properly
+ -- sign extend them to 64-bit. This hack should be removed in
+ -- conjunction with the hack in PprMach.hs/pprDataItem once
+ -- binutils 2.17 is standard.
+ code = e_code `appOL` t_code `appOL` toOL [
+ LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
+ MOVSxL II32
+ (OpAddr (AddrBaseIndex (EABaseReg tableReg)
+ (EAIndex reg wORD_SIZE) (ImmInt 0)))
+ (OpReg reg),
+ ADD (intSize wordWidth) (OpReg reg) (OpReg tableReg),
+ JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
+ ]
+#endif
+#else
+ code = e_code `appOL` t_code `appOL` toOL [
+ LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
+ ADD (intSize wordWidth) op (OpReg tableReg),
+ JMP_TBL (OpReg tableReg) [ id | Just id <- ids ]
+ ]
+#endif
+ return code
+ | otherwise
+ = do
+ (reg,e_code) <- getSomeReg expr
+ lbl <- getNewLabelNat
+ let
+ jumpTable = map jumpTableEntry ids
+ op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg wORD_SIZE) (ImmCLbl lbl))
+ code = e_code `appOL` toOL [
+ LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
+ JMP_TBL op [ id | Just id <- ids ]
+ ]
+ -- in
+ return code
+
+
+-- -----------------------------------------------------------------------------
+-- 'condIntReg' and 'condFltReg': condition codes into registers
+
+-- Turn those condition codes into integers now (when they appear on
+-- the right hand side of an assignment).
+--
+-- (If applicable) Do not fill the delay slots here; you will confuse the
+-- register allocator.
+
+condIntReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
+
+condIntReg cond x y = do
+ CondCode _ cond cond_code <- condIntCode cond x y
+ tmp <- getNewRegNat II8
+ let
+ code dst = cond_code `appOL` toOL [
+ SETCC cond (OpReg tmp),
+ MOVZxL II8 (OpReg tmp) (OpReg dst)
+ ]
+ -- in
+ return (Any II32 code)
+
+
+
+condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
+
+#if i386_TARGET_ARCH
+condFltReg cond x y = do
+ CondCode _ cond cond_code <- condFltCode cond x y
+ tmp <- getNewRegNat II8
+ let
+ code dst = cond_code `appOL` toOL [
+ SETCC cond (OpReg tmp),
+ MOVZxL II8 (OpReg tmp) (OpReg dst)
+ ]
+ -- in
+ return (Any II32 code)
+
+#elif x86_64_TARGET_ARCH
+condFltReg cond x y = do
+ CondCode _ cond cond_code <- condFltCode cond x y
+ tmp1 <- getNewRegNat wordSize
+ tmp2 <- getNewRegNat wordSize
+ let
+ -- We have to worry about unordered operands (eg. comparisons
+ -- against NaN). If the operands are unordered, the comparison
+ -- sets the parity flag, carry flag and zero flag.
+ -- All comparisons are supposed to return false for unordered
+ -- operands except for !=, which returns true.
+ --
+ -- Optimisation: we don't have to test the parity flag if we
+ -- know the test has already excluded the unordered case: eg >
+ -- and >= test for a zero carry flag, which can only occur for
+ -- ordered operands.
+ --
+ -- ToDo: by reversing comparisons we could avoid testing the
+ -- parity flag in more cases.
+
+ code dst =
+ cond_code `appOL`
+ (case cond of
+ NE -> or_unordered dst
+ GU -> plain_test dst
+ GEU -> plain_test dst
+ _ -> and_ordered dst)
+
+ plain_test dst = toOL [
+ SETCC cond (OpReg tmp1),
+ MOVZxL II8 (OpReg tmp1) (OpReg dst)
+ ]
+ or_unordered dst = toOL [
+ SETCC cond (OpReg tmp1),
+ SETCC PARITY (OpReg tmp2),
+ OR II8 (OpReg tmp1) (OpReg tmp2),
+ MOVZxL II8 (OpReg tmp2) (OpReg dst)
+ ]
+ and_ordered dst = toOL [
+ SETCC cond (OpReg tmp1),
+ SETCC NOTPARITY (OpReg tmp2),
+ AND II8 (OpReg tmp1) (OpReg tmp2),
+ MOVZxL II8 (OpReg tmp2) (OpReg dst)
+ ]
+ -- in
+ return (Any II32 code)
+
+#else
+condFltReg = panic "X86.condFltReg: not defined"
+
+#endif
+
+
+
+
+-- -----------------------------------------------------------------------------
+-- 'trivial*Code': deal with trivial instructions
+
+-- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
+-- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
+-- Only look for constants on the right hand side, because that's
+-- where the generic optimizer will have put them.
+
+-- Similarly, for unary instructions, we don't have to worry about
+-- matching an StInt as the argument, because genericOpt will already
+-- have handled the constant-folding.
+
+
+{-
+The Rules of the Game are:
+
+* You cannot assume anything about the destination register dst;
+ it may be anything, including a fixed reg.
+
+* You may compute an operand into a fixed reg, but you may not
+ subsequently change the contents of that fixed reg. If you
+ want to do so, first copy the value either to a temporary
+ or into dst. You are free to modify dst even if it happens
+ to be a fixed reg -- that's not your problem.
+
+* You cannot assume that a fixed reg will stay live over an
+ arbitrary computation. The same applies to the dst reg.
+
+* Temporary regs obtained from getNewRegNat are distinct from
+ each other and from all other regs, and stay live over
+ arbitrary computations.
+
+--------------------
+
+SDM's version of The Rules:
+
+* If getRegister returns Any, that means it can generate correct
+ code which places the result in any register, period. Even if that
+ register happens to be read during the computation.
+
+ Corollary #1: this means that if you are generating code for an
+ operation with two arbitrary operands, you cannot assign the result
+ of the first operand into the destination register before computing
+ the second operand. The second operand might require the old value
+ of the destination register.
+
+ Corollary #2: A function might be able to generate more efficient
+ code if it knows the destination register is a new temporary (and
+ therefore not read by any of the sub-computations).
+
+* If getRegister returns Any, then the code it generates may modify only:
+ (a) fresh temporaries
+ (b) the destination register
+ (c) known registers (eg. %ecx is used by shifts)
+ In particular, it may *not* modify global registers, unless the global
+ register happens to be the destination register.
+-}
+
+trivialCode width instr (Just revinstr) (CmmLit lit_a) b
+ | is32BitLit lit_a = do
+ b_code <- getAnyReg b
+ let
+ code dst
+ = b_code dst `snocOL`
+ revinstr (OpImm (litToImm lit_a)) (OpReg dst)
+ -- in
+ return (Any (intSize width) code)
+
+trivialCode width instr maybe_revinstr a b
+ = genTrivialCode (intSize width) instr a b
+
+-- This is re-used for floating pt instructions too.
+genTrivialCode rep instr a b = do
+ (b_op, b_code) <- getNonClobberedOperand b
+ a_code <- getAnyReg a
+ tmp <- getNewRegNat rep
+ let
+ -- We want the value of b to stay alive across the computation of a.
+ -- But, we want to calculate a straight into the destination register,
+ -- because the instruction only has two operands (dst := dst `op` src).
+ -- The troublesome case is when the result of b is in the same register
+ -- as the destination reg. In this case, we have to save b in a
+ -- new temporary across the computation of a.
+ code dst
+ | dst `regClashesWithOp` b_op =
+ b_code `appOL`
+ unitOL (MOV rep b_op (OpReg tmp)) `appOL`
+ a_code dst `snocOL`
+ instr (OpReg tmp) (OpReg dst)
+ | otherwise =
+ b_code `appOL`
+ a_code dst `snocOL`
+ instr b_op (OpReg dst)
+ -- in
+ return (Any rep code)
+
+reg `regClashesWithOp` OpReg reg2 = reg == reg2
+reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
+reg `regClashesWithOp` _ = False
+
+-----------
+
+trivialUCode rep instr x = do
+ x_code <- getAnyReg x
+ let
+ code dst =
+ x_code dst `snocOL`
+ instr (OpReg dst)
+ return (Any rep code)
+
+-----------
+
+#if i386_TARGET_ARCH
+
+trivialFCode width instr x y = do
+ (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
+ (y_reg, y_code) <- getSomeReg y
+ let
+ size = floatSize width
+ code dst =
+ x_code `appOL`
+ y_code `snocOL`
+ instr size x_reg y_reg dst
+ return (Any size code)
+
+#endif
+
+#if x86_64_TARGET_ARCH
+trivialFCode pk instr x y
+ = genTrivialCode size (instr size) x y
+ where size = floatSize pk
+#endif
+
+trivialUFCode size instr x = do
+ (x_reg, x_code) <- getSomeReg x
+ let
+ code dst =
+ x_code `snocOL`
+ instr x_reg dst
+ -- in
+ return (Any size code)
+
+
+--------------------------------------------------------------------------------
+coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
+
+#if i386_TARGET_ARCH
+coerceInt2FP from to x = do
+ (x_reg, x_code) <- getSomeReg x
+ let
+ opc = case to of W32 -> GITOF; W64 -> GITOD
+ code dst = x_code `snocOL` opc x_reg dst
+ -- ToDo: works for non-II32 reps?
+ return (Any (floatSize to) code)
+
+#elif x86_64_TARGET_ARCH
+coerceInt2FP from to x = do
+ (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
+ let
+ opc = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
+ code dst = x_code `snocOL` opc x_op dst
+ -- in
+ return (Any (floatSize to) code) -- works even if the destination rep is <II32
+
+#else
+coerceInt2FP = panic "X86.coerceInt2FP: not defined"
+
+#endif
+
+
+
+
+--------------------------------------------------------------------------------
+coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
+
+#if i386_TARGET_ARCH
+coerceFP2Int from to x = do
+ (x_reg, x_code) <- getSomeReg x
+ let
+ opc = case from of W32 -> GFTOI; W64 -> GDTOI
+ code dst = x_code `snocOL` opc x_reg dst
+ -- ToDo: works for non-II32 reps?
+ -- in
+ return (Any (intSize to) code)
+
+#elif x86_64_TARGET_ARCH
+coerceFP2Int from to x = do
+ (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
+ let
+ opc = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ
+ code dst = x_code `snocOL` opc x_op dst
+ -- in
+ return (Any (intSize to) code) -- works even if the destination rep is <II32
+
+#else
+coerceFP2Int = panic "X86.coerceFP2Int: not defined"
+
+#endif
+
+
+
+
+--------------------------------------------------------------------------------
+coerceFP2FP :: Width -> CmmExpr -> NatM Register
+
+#if x86_64_TARGET_ARCH
+coerceFP2FP to x = do
+ (x_reg, x_code) <- getSomeReg x
+ let
+ opc = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD
+ code dst = x_code `snocOL` opc x_reg dst
+ -- in
+ return (Any (floatSize to) code)
+
+#else
+coerceFP2FP = panic "X86.coerceFP2FP: not defined"
+
+#endif
+
+
+
diff --git a/compiler/nativeGen/X86/Cond.hs b/compiler/nativeGen/X86/Cond.hs
new file mode 100644
index 0000000000..60e40b9654
--- /dev/null
+++ b/compiler/nativeGen/X86/Cond.hs
@@ -0,0 +1,52 @@
+
+module X86.Cond (
+ Cond(..),
+ condUnsigned,
+ condToSigned,
+ condToUnsigned
+)
+
+where
+
+data Cond
+ = ALWAYS -- What's really used? ToDo
+ | EQQ
+ | GE
+ | GEU
+ | GTT
+ | GU
+ | LE
+ | LEU
+ | LTT
+ | LU
+ | NE
+ | NEG
+ | POS
+ | CARRY
+ | OFLO
+ | PARITY
+ | NOTPARITY
+
+
+condUnsigned :: Cond -> Bool
+condUnsigned GU = True
+condUnsigned LU = True
+condUnsigned GEU = True
+condUnsigned LEU = True
+condUnsigned _ = False
+
+
+condToSigned :: Cond -> Cond
+condToSigned GU = GTT
+condToSigned LU = LTT
+condToSigned GEU = GE
+condToSigned LEU = LE
+condToSigned x = x
+
+
+condToUnsigned :: Cond -> Cond
+condToUnsigned GTT = GU
+condToUnsigned LTT = LU
+condToUnsigned GE = GEU
+condToUnsigned LE = LEU
+condToUnsigned x = x
diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs
index 0dea1dd66d..b4b6fb5f4b 100644
--- a/compiler/nativeGen/X86/Instr.hs
+++ b/compiler/nativeGen/X86/Instr.hs
@@ -12,34 +12,46 @@
module X86.Instr
where
-import BlockId
+import X86.Cond
import X86.Regs
-import RegsBase
+import Instruction
+import Size
+import RegClass
+import Reg
+
+import BlockId
import Cmm
import FastString
+import FastBool
import CLabel
import Panic
-data Cond
- = ALWAYS -- What's really used? ToDo
- | EQQ
- | GE
- | GEU
- | GTT
- | GU
- | LE
- | LEU
- | LTT
- | LU
- | NE
- | NEG
- | POS
- | CARRY
- | OFLO
- | PARITY
- | NOTPARITY
- deriving (Eq)
+-- Size of a PPC memory address, in bytes.
+--
+archWordSize :: Size
+#if i386_TARGET_ARCH
+archWordSize = II32
+#elif x86_64_TARGET_ARCH
+archWordSize = II64
+#else
+archWordSize = panic "X86.Instr.archWordSize: not defined"
+#endif
+
+-- | Instruction instance for x86 instruction set.
+instance Instruction Instr where
+ regUsageOfInstr = x86_regUsageOfInstr
+ patchRegsOfInstr = x86_patchRegsOfInstr
+ isJumpishInstr = x86_isJumpishInstr
+ jumpDestsOfInstr = x86_jumpDestsOfInstr
+ patchJumpInstr = x86_patchJumpInstr
+ mkSpillInstr = x86_mkSpillInstr
+ mkLoadInstr = x86_mkLoadInstr
+ takeDeltaInstr = x86_takeDeltaInstr
+ isMetaInstr = x86_isMetaInstr
+ mkRegRegMoveInstr = x86_mkRegRegMoveInstr
+ takeRegRegMoveInstr = x86_takeRegRegMoveInstr
+ mkJumpInstr = x86_mkJumpInstr
-- -----------------------------------------------------------------------------
@@ -154,13 +166,6 @@ data Instr
-- benefit of subsequent passes
| DELTA Int
- -- | spill this reg to a stack slot
- | SPILL Reg Int
-
- -- | reload this reg from a stack slot
- | RELOAD Int Reg
-
-
-- Moves.
| MOV Size Operand Operand
| MOVZxL Size Operand Operand -- size is the size of operand 1
@@ -301,7 +306,436 @@ data Operand
-i386_insert_ffrees :: [GenBasicBlock Instr] -> [GenBasicBlock Instr]
+x86_regUsageOfInstr :: Instr -> RegUsage
+x86_regUsageOfInstr instr
+ = case instr of
+ MOV _ src dst -> usageRW src dst
+ MOVZxL _ src dst -> usageRW src dst
+ MOVSxL _ src dst -> usageRW src dst
+ LEA _ src dst -> usageRW src dst
+ ADD _ src dst -> usageRM src dst
+ ADC _ src dst -> usageRM src dst
+ SUB _ src dst -> usageRM src dst
+ IMUL _ src dst -> usageRM src dst
+ IMUL2 _ src -> mkRU (eax:use_R src) [eax,edx]
+ MUL _ src dst -> usageRM src dst
+ DIV _ op -> mkRU (eax:edx:use_R op) [eax,edx]
+ IDIV _ op -> mkRU (eax:edx:use_R op) [eax,edx]
+ AND _ src dst -> usageRM src dst
+ OR _ src dst -> usageRM src dst
+
+ XOR _ (OpReg src) (OpReg dst)
+ | src == dst -> mkRU [] [dst]
+
+ XOR _ src dst -> usageRM src dst
+ NOT _ op -> usageM op
+ NEGI _ op -> usageM op
+ SHL _ imm dst -> usageRM imm dst
+ SAR _ imm dst -> usageRM imm dst
+ SHR _ imm dst -> usageRM imm dst
+ BT _ _ src -> mkRUR (use_R src)
+
+ PUSH _ op -> mkRUR (use_R op)
+ POP _ op -> mkRU [] (def_W op)
+ TEST _ src dst -> mkRUR (use_R src ++ use_R dst)
+ CMP _ src dst -> mkRUR (use_R src ++ use_R dst)
+ SETCC _ op -> mkRU [] (def_W op)
+ JXX _ _ -> mkRU [] []
+ JXX_GBL _ _ -> mkRU [] []
+ JMP op -> mkRUR (use_R op)
+ JMP_TBL op _ -> mkRUR (use_R op)
+ CALL (Left _) params -> mkRU params callClobberedRegs
+ CALL (Right reg) params -> mkRU (reg:params) callClobberedRegs
+ CLTD _ -> mkRU [eax] [edx]
+ NOP -> mkRU [] []
+
+#if i386_TARGET_ARCH
+ GMOV src dst -> mkRU [src] [dst]
+ GLD _ src dst -> mkRU (use_EA src) [dst]
+ GST _ src dst -> mkRUR (src : use_EA dst)
+
+ GLDZ dst -> mkRU [] [dst]
+ GLD1 dst -> mkRU [] [dst]
+
+ GFTOI src dst -> mkRU [src] [dst]
+ GDTOI src dst -> mkRU [src] [dst]
+
+ GITOF src dst -> mkRU [src] [dst]
+ GITOD src dst -> mkRU [src] [dst]
+
+ GADD _ s1 s2 dst -> mkRU [s1,s2] [dst]
+ GSUB _ s1 s2 dst -> mkRU [s1,s2] [dst]
+ GMUL _ s1 s2 dst -> mkRU [s1,s2] [dst]
+ GDIV _ s1 s2 dst -> mkRU [s1,s2] [dst]
+
+ GCMP _ src1 src2 -> mkRUR [src1,src2]
+ GABS _ src dst -> mkRU [src] [dst]
+ GNEG _ src dst -> mkRU [src] [dst]
+ GSQRT _ src dst -> mkRU [src] [dst]
+ GSIN _ _ _ src dst -> mkRU [src] [dst]
+ GCOS _ _ _ src dst -> mkRU [src] [dst]
+ GTAN _ _ _ src dst -> mkRU [src] [dst]
+#endif
+
+#if x86_64_TARGET_ARCH
+ CVTSS2SD src dst -> mkRU [src] [dst]
+ CVTSD2SS src dst -> mkRU [src] [dst]
+ CVTTSS2SIQ src dst -> mkRU (use_R src) [dst]
+ CVTTSD2SIQ src dst -> mkRU (use_R src) [dst]
+ CVTSI2SS src dst -> mkRU (use_R src) [dst]
+ CVTSI2SD src dst -> mkRU (use_R src) [dst]
+ FDIV _ src dst -> usageRM src dst
+#endif
+
+ FETCHGOT reg -> mkRU [] [reg]
+ FETCHPC reg -> mkRU [] [reg]
+
+ COMMENT _ -> noUsage
+ DELTA _ -> noUsage
+
+ _other -> panic "regUsage: unrecognised instr"
+
+ where
+ -- 2 operand form; first operand Read; second Written
+ usageRW :: Operand -> Operand -> RegUsage
+ usageRW op (OpReg reg) = mkRU (use_R op) [reg]
+ usageRW op (OpAddr ea) = mkRUR (use_R op ++ use_EA ea)
+ usageRW _ _ = panic "X86.RegInfo.usageRW: no match"
+
+ -- 2 operand form; first operand Read; second Modified
+ usageRM :: Operand -> Operand -> RegUsage
+ usageRM op (OpReg reg) = mkRU (use_R op ++ [reg]) [reg]
+ usageRM op (OpAddr ea) = mkRUR (use_R op ++ use_EA ea)
+ usageRM _ _ = panic "X86.RegInfo.usageRM: no match"
+
+ -- 1 operand form; operand Modified
+ usageM :: Operand -> RegUsage
+ usageM (OpReg reg) = mkRU [reg] [reg]
+ usageM (OpAddr ea) = mkRUR (use_EA ea)
+ usageM _ = panic "X86.RegInfo.usageM: no match"
+
+ -- Registers defd when an operand is written.
+ def_W (OpReg reg) = [reg]
+ def_W (OpAddr _ ) = []
+ def_W _ = panic "X86.RegInfo.def_W: no match"
+
+ -- Registers used when an operand is read.
+ use_R (OpReg reg) = [reg]
+ use_R (OpImm _) = []
+ use_R (OpAddr ea) = use_EA ea
+
+ -- Registers used to compute an effective address.
+ use_EA (ImmAddr _ _) = []
+ use_EA (AddrBaseIndex base index _) =
+ use_base base $! use_index index
+ where use_base (EABaseReg r) x = r : x
+ use_base _ x = x
+ use_index EAIndexNone = []
+ use_index (EAIndex i _) = [i]
+
+ mkRUR src = src' `seq` RU src' []
+ where src' = filter interesting src
+
+ mkRU src dst = src' `seq` dst' `seq` RU src' dst'
+ where src' = filter interesting src
+ dst' = filter interesting dst
+
+interesting :: Reg -> Bool
+interesting (VirtualRegI _) = True
+interesting (VirtualRegHi _) = True
+interesting (VirtualRegF _) = True
+interesting (VirtualRegD _) = True
+interesting (RealReg i) = isFastTrue (freeReg i)
+
+
+
+
+x86_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
+x86_patchRegsOfInstr instr env
+ = case instr of
+ MOV sz src dst -> patch2 (MOV sz) src dst
+ MOVZxL sz src dst -> patch2 (MOVZxL sz) src dst
+ MOVSxL sz src dst -> patch2 (MOVSxL sz) src dst
+ LEA sz src dst -> patch2 (LEA sz) src dst
+ ADD sz src dst -> patch2 (ADD sz) src dst
+ ADC sz src dst -> patch2 (ADC sz) src dst
+ SUB sz src dst -> patch2 (SUB sz) src dst
+ IMUL sz src dst -> patch2 (IMUL sz) src dst
+ IMUL2 sz src -> patch1 (IMUL2 sz) src
+ MUL sz src dst -> patch2 (MUL sz) src dst
+ IDIV sz op -> patch1 (IDIV sz) op
+ DIV sz op -> patch1 (DIV sz) op
+ AND sz src dst -> patch2 (AND sz) src dst
+ OR sz src dst -> patch2 (OR sz) src dst
+ XOR sz src dst -> patch2 (XOR sz) src dst
+ NOT sz op -> patch1 (NOT sz) op
+ NEGI sz op -> patch1 (NEGI sz) op
+ SHL sz imm dst -> patch1 (SHL sz imm) dst
+ SAR sz imm dst -> patch1 (SAR sz imm) dst
+ SHR sz imm dst -> patch1 (SHR sz imm) dst
+ BT sz imm src -> patch1 (BT sz imm) src
+ TEST sz src dst -> patch2 (TEST sz) src dst
+ CMP sz src dst -> patch2 (CMP sz) src dst
+ PUSH sz op -> patch1 (PUSH sz) op
+ POP sz op -> patch1 (POP sz) op
+ SETCC cond op -> patch1 (SETCC cond) op
+ JMP op -> patch1 JMP op
+ JMP_TBL op ids -> patch1 JMP_TBL op $ ids
+
+#if i386_TARGET_ARCH
+ GMOV src dst -> GMOV (env src) (env dst)
+ GLD sz src dst -> GLD sz (lookupAddr src) (env dst)
+ GST sz src dst -> GST sz (env src) (lookupAddr dst)
+
+ GLDZ dst -> GLDZ (env dst)
+ GLD1 dst -> GLD1 (env dst)
+
+ GFTOI src dst -> GFTOI (env src) (env dst)
+ GDTOI src dst -> GDTOI (env src) (env dst)
+
+ GITOF src dst -> GITOF (env src) (env dst)
+ GITOD src dst -> GITOD (env src) (env dst)
+
+ GADD sz s1 s2 dst -> GADD sz (env s1) (env s2) (env dst)
+ GSUB sz s1 s2 dst -> GSUB sz (env s1) (env s2) (env dst)
+ GMUL sz s1 s2 dst -> GMUL sz (env s1) (env s2) (env dst)
+ GDIV sz s1 s2 dst -> GDIV sz (env s1) (env s2) (env dst)
+
+ GCMP sz src1 src2 -> GCMP sz (env src1) (env src2)
+ GABS sz src dst -> GABS sz (env src) (env dst)
+ GNEG sz src dst -> GNEG sz (env src) (env dst)
+ GSQRT sz src dst -> GSQRT sz (env src) (env dst)
+ GSIN sz l1 l2 src dst -> GSIN sz l1 l2 (env src) (env dst)
+ GCOS sz l1 l2 src dst -> GCOS sz l1 l2 (env src) (env dst)
+ GTAN sz l1 l2 src dst -> GTAN sz l1 l2 (env src) (env dst)
+#endif
+
+#if x86_64_TARGET_ARCH
+ CVTSS2SD src dst -> CVTSS2SD (env src) (env dst)
+ CVTSD2SS src dst -> CVTSD2SS (env src) (env dst)
+ CVTTSS2SIQ src dst -> CVTTSS2SIQ (patchOp src) (env dst)
+ CVTTSD2SIQ src dst -> CVTTSD2SIQ (patchOp src) (env dst)
+ CVTSI2SS src dst -> CVTSI2SS (patchOp src) (env dst)
+ CVTSI2SD src dst -> CVTSI2SD (patchOp src) (env dst)
+ FDIV sz src dst -> FDIV sz (patchOp src) (patchOp dst)
+#endif
+
+ CALL (Left _) _ -> instr
+ CALL (Right reg) p -> CALL (Right (env reg)) p
+
+ FETCHGOT reg -> FETCHGOT (env reg)
+ FETCHPC reg -> FETCHPC (env reg)
+
+ NOP -> instr
+ COMMENT _ -> instr
+ DELTA _ -> instr
+
+ JXX _ _ -> instr
+ JXX_GBL _ _ -> instr
+ CLTD _ -> instr
+
+ _other -> panic "patchRegs: unrecognised instr"
+
+ where
+ patch1 insn op = insn $! patchOp op
+ patch2 insn src dst = (insn $! patchOp src) $! patchOp dst
+
+ patchOp (OpReg reg) = OpReg $! env reg
+ patchOp (OpImm imm) = OpImm imm
+ patchOp (OpAddr ea) = OpAddr $! lookupAddr ea
+
+ lookupAddr (ImmAddr imm off) = ImmAddr imm off
+ lookupAddr (AddrBaseIndex base index disp)
+ = ((AddrBaseIndex $! lookupBase base) $! lookupIndex index) disp
+ where
+ lookupBase EABaseNone = EABaseNone
+ lookupBase EABaseRip = EABaseRip
+ lookupBase (EABaseReg r) = EABaseReg (env r)
+
+ lookupIndex EAIndexNone = EAIndexNone
+ lookupIndex (EAIndex r i) = EAIndex (env r) i
+
+
+--------------------------------------------------------------------------------
+x86_isJumpishInstr
+ :: Instr -> Bool
+
+x86_isJumpishInstr instr
+ = case instr of
+ JMP{} -> True
+ JXX{} -> True
+ JXX_GBL{} -> True
+ JMP_TBL{} -> True
+ CALL{} -> True
+ _ -> False
+
+
+x86_jumpDestsOfInstr
+ :: Instr
+ -> [BlockId]
+
+x86_jumpDestsOfInstr insn
+ = case insn of
+ JXX _ id -> [id]
+ JMP_TBL _ ids -> ids
+ _ -> []
+
+
+x86_patchJumpInstr
+ :: Instr -> (BlockId -> BlockId) -> Instr
+
+x86_patchJumpInstr insn patchF
+ = case insn of
+ JXX cc id -> JXX cc (patchF id)
+ JMP_TBL _ _ -> error "Cannot patch JMP_TBL"
+ _ -> insn
+
+
+
+
+-- -----------------------------------------------------------------------------
+-- | Make a spill instruction.
+x86_mkSpillInstr
+ :: Reg -- register to spill
+ -> Int -- current stack delta
+ -> Int -- spill slot to use
+ -> Instr
+
+#if i386_TARGET_ARCH
+x86_mkSpillInstr reg delta slot
+ = let off = spillSlotToOffset slot
+ in
+ let off_w = (off-delta) `div` 4
+ in case regClass reg of
+ RcInteger -> MOV II32 (OpReg reg) (OpAddr (spRel off_w))
+ _ -> GST FF80 reg (spRel off_w) {- RcFloat/RcDouble -}
+
+#elif x86_64_TARGET_ARCH
+x86_mkSpillInstr reg delta slot
+ = let off = spillSlotToOffset slot
+ in
+ let off_w = (off-delta) `div` 8
+ in case regClass reg of
+ RcInteger -> MOV II64 (OpReg reg) (OpAddr (spRel off_w))
+ RcDouble -> MOV FF64 (OpReg reg) (OpAddr (spRel off_w))
+ _ -> panic "X86.mkSpillInstr: no match"
+ -- ToDo: will it work to always spill as a double?
+ -- does that cause a stall if the data was a float?
+#else
+x86_mkSpillInstr _ _ _
+ = panic "X86.RegInfo.mkSpillInstr: not defined for this architecture."
+#endif
+
+
+-- | Make a spill reload instruction.
+x86_mkLoadInstr
+ :: Reg -- register to load
+ -> Int -- current stack delta
+ -> Int -- spill slot to use
+ -> Instr
+
+#if i386_TARGET_ARCH
+x86_mkLoadInstr reg delta slot
+ = let off = spillSlotToOffset slot
+ in
+ let off_w = (off-delta) `div` 4
+ in case regClass reg of {
+ RcInteger -> MOV II32 (OpAddr (spRel off_w)) (OpReg reg);
+ _ -> GLD FF80 (spRel off_w) reg} {- RcFloat/RcDouble -}
+#elif x86_64_TARGET_ARCH
+x86_mkLoadInstr reg delta slot
+ = let off = spillSlotToOffset slot
+ in
+ let off_w = (off-delta) `div` 8
+ in case regClass reg of
+ RcInteger -> MOV II64 (OpAddr (spRel off_w)) (OpReg reg)
+ _ -> MOV FF64 (OpAddr (spRel off_w)) (OpReg reg)
+#else
+x86_mkLoadInstr _ _ _
+ = panic "X86.RegInfo.mkLoadInstr: not defined for this architecture."
+#endif
+
+
+--------------------------------------------------------------------------------
+
+-- | See if this instruction is telling us the current C stack delta
+x86_takeDeltaInstr
+ :: Instr
+ -> Maybe Int
+
+x86_takeDeltaInstr instr
+ = case instr of
+ DELTA i -> Just i
+ _ -> Nothing
+
+
+x86_isMetaInstr
+ :: Instr
+ -> Bool
+
+x86_isMetaInstr instr
+ = case instr of
+ COMMENT{} -> True
+ LDATA{} -> True
+ NEWBLOCK{} -> True
+ DELTA{} -> True
+ _ -> False
+
+
+
+-- | Make a reg-reg move instruction.
+-- On SPARC v8 there are no instructions to move directly between
+-- floating point and integer regs. If we need to do that then we
+-- have to go via memory.
+--
+x86_mkRegRegMoveInstr
+ :: Reg
+ -> Reg
+ -> Instr
+
+x86_mkRegRegMoveInstr src dst
+ = case regClass src of
+#if i386_TARGET_ARCH
+ RcInteger -> MOV II32 (OpReg src) (OpReg dst)
+ RcDouble -> GMOV src dst
+ RcFloat -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
+#else
+ RcInteger -> MOV II64 (OpReg src) (OpReg dst)
+ RcDouble -> MOV FF64 (OpReg src) (OpReg dst)
+ RcFloat -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
+#endif
+
+
+-- | Check whether an instruction represents a reg-reg move.
+-- The register allocator attempts to eliminate reg->reg moves whenever it can,
+-- by assigning the src and dest temporaries to the same real register.
+--
+x86_takeRegRegMoveInstr
+ :: Instr
+ -> Maybe (Reg,Reg)
+
+x86_takeRegRegMoveInstr (MOV _ (OpReg r1) (OpReg r2))
+ = Just (r1,r2)
+
+x86_takeRegRegMoveInstr _ = Nothing
+
+
+-- | Make an unconditional branch instruction.
+x86_mkJumpInstr
+ :: BlockId
+ -> [Instr]
+
+x86_mkJumpInstr id
+ = [JXX ALWAYS id]
+
+
+
+
+
+i386_insert_ffrees
+ :: [GenBasicBlock Instr]
+ -> [GenBasicBlock Instr]
+
i386_insert_ffrees blocks
| or (map (any is_G_instr) [ instrs | BasicBlock _ instrs <- blocks ])
= map ffree_before_nonlocal_transfers blocks
diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs
index c0ad496d78..3f181fc056 100644
--- a/compiler/nativeGen/X86/Ppr.hs
+++ b/compiler/nativeGen/X86/Ppr.hs
@@ -7,12 +7,15 @@
-----------------------------------------------------------------------------
module X86.Ppr (
+ pprNatCmmTop,
+ pprBasicBlock,
+ pprSectionHeader,
+ pprData,
+ pprInstr,
pprUserReg,
pprSize,
pprImm,
- pprSectionHeader,
pprDataItem,
- pprInstr
)
where
@@ -20,24 +23,145 @@ where
#include "HsVersions.h"
#include "nativeGen/NCG.h"
-import PprBase
-import RegsBase
import X86.Regs
import X86.Instr
+import X86.Cond
+import Instruction
+import Size
+import Reg
+import PprBase
+
import BlockId
import Cmm
-
-import CLabel ( CLabel, mkAsmTempLabel )
-#if HAVE_SUBSECTIONS_VIA_SYMBOLS
-import CLabel ( mkDeadStripPreventer )
-#endif
-
+import CLabel
import Unique ( pprUnique )
import Pretty
import FastString
import qualified Outputable
-import Outputable (panic)
+import Outputable (panic, Outputable)
+
+import Data.Word
+
+
+
+-- -----------------------------------------------------------------------------
+-- Printing this stuff out
+
+pprNatCmmTop :: NatCmmTop Instr -> Doc
+pprNatCmmTop (CmmData section dats) =
+ pprSectionHeader section $$ vcat (map pprData dats)
+
+ -- special case for split markers:
+pprNatCmmTop (CmmProc [] lbl _ (ListGraph [])) = pprLabel lbl
+
+pprNatCmmTop (CmmProc info lbl _ (ListGraph blocks)) =
+ pprSectionHeader Text $$
+ (if null info then -- blocks guaranteed not null, so label needed
+ pprLabel lbl
+ else
+#if HAVE_SUBSECTIONS_VIA_SYMBOLS
+ pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
+ <> char ':' $$
+#endif
+ vcat (map pprData info) $$
+ pprLabel (entryLblToInfoLbl lbl)
+ ) $$
+ vcat (map pprBasicBlock blocks)
+ -- above: Even the first block gets a label, because with branch-chain
+ -- elimination, it might be the target of a goto.
+#if HAVE_SUBSECTIONS_VIA_SYMBOLS
+ -- If we are using the .subsections_via_symbols directive
+ -- (available on recent versions of Darwin),
+ -- we have to make sure that there is some kind of reference
+ -- from the entry code to a label on the _top_ of of the info table,
+ -- so that the linker will not think it is unreferenced and dead-strip
+ -- it. That's why the label is called a DeadStripPreventer (_dsp).
+ $$ if not (null info)
+ then text "\t.long "
+ <+> pprCLabel_asm (entryLblToInfoLbl lbl)
+ <+> char '-'
+ <+> pprCLabel_asm (mkDeadStripPreventer $ entryLblToInfoLbl lbl)
+ else empty
+#endif
+
+
+pprBasicBlock :: NatBasicBlock Instr -> Doc
+pprBasicBlock (BasicBlock (BlockId id) instrs) =
+ pprLabel (mkAsmTempLabel id) $$
+ vcat (map pprInstr instrs)
+
+
+pprData :: CmmStatic -> Doc
+pprData (CmmAlign bytes) = pprAlign bytes
+pprData (CmmDataLabel lbl) = pprLabel lbl
+pprData (CmmString str) = pprASCII str
+pprData (CmmUninitialised bytes) = ptext (sLit ".skip ") <> int bytes
+pprData (CmmStaticLit lit) = pprDataItem lit
+
+pprGloblDecl :: CLabel -> Doc
+pprGloblDecl lbl
+ | not (externallyVisibleCLabel lbl) = empty
+ | otherwise = ptext IF_ARCH_sparc((sLit ".global "),
+ (sLit ".globl ")) <>
+ pprCLabel_asm lbl
+
+pprTypeAndSizeDecl :: CLabel -> Doc
+#if linux_TARGET_OS
+pprTypeAndSizeDecl lbl
+ | not (externallyVisibleCLabel lbl) = empty
+ | otherwise = ptext (sLit ".type ") <>
+ pprCLabel_asm lbl <> ptext (sLit ", @object")
+#else
+pprTypeAndSizeDecl _
+ = empty
+#endif
+
+pprLabel :: CLabel -> Doc
+pprLabel lbl = pprGloblDecl lbl $$ pprTypeAndSizeDecl lbl $$ (pprCLabel_asm lbl <> char ':')
+
+
+pprASCII :: [Word8] -> Doc
+pprASCII str
+ = vcat (map do1 str) $$ do1 0
+ where
+ do1 :: Word8 -> Doc
+ do1 w = ptext (sLit "\t.byte\t") <> int (fromIntegral w)
+
+pprAlign :: Int -> Doc
+
+
+pprAlign bytes
+ = ptext (sLit ".align ") <> int IF_OS_darwin(pow2, bytes)
+ where
+
+#if darwin_TARGET_OS
+ pow2 = log2 bytes
+
+ log2 :: Int -> Int -- cache the common ones
+ log2 1 = 0
+ log2 2 = 1
+ log2 4 = 2
+ log2 8 = 3
+ log2 n = 1 + log2 (n `quot` 2)
+#endif
+
+-- -----------------------------------------------------------------------------
+-- pprInstr: print an 'Instr'
+
+instance Outputable Instr where
+ ppr instr = Outputable.docToSDoc $ pprInstr instr
+
+
+
+
+
+
+
+
+
+
+
#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
pprUserReg :: Reg -> Doc
@@ -49,7 +173,6 @@ pprUserReg = panic "X86.Ppr.pprUserReg: not defined"
#endif
-
pprReg :: Size -> Reg -> Doc
pprReg s r
@@ -228,7 +351,7 @@ pprAddr (AddrBaseIndex base index displacement)
= let
pp_disp = ppr_disp displacement
pp_off p = pp_disp <> char '(' <> p <> char ')'
- pp_reg r = pprReg wordSize r
+ pp_reg r = pprReg archWordSize r
in
case (base, index) of
(EABaseNone, EAIndexNone) -> pp_disp
@@ -384,6 +507,7 @@ pprInstr (NEWBLOCK _)
pprInstr (LDATA _ _)
= panic "PprMach.pprInstr: LDATA"
+{-
pprInstr (SPILL reg slot)
= hcat [
ptext (sLit "\tSPILL"),
@@ -399,6 +523,7 @@ pprInstr (RELOAD slot reg)
ptext (sLit "SLOT") <> parens (int slot),
comma,
pprUserReg reg]
+-}
pprInstr (MOV size src dst)
= pprSizeOpOp (sLit "mov") size src dst
@@ -414,7 +539,7 @@ pprInstr (MOVZxL sizes src dst) = pprSizeOpOpCoerce (sLit "movz") sizes II32 src
-- the remaining zero-extension to 64 bits is automatic, and the 32-bit
-- instruction is shorter.
-pprInstr (MOVSxL sizes src dst) = pprSizeOpOpCoerce (sLit "movs") sizes wordSize src dst
+pprInstr (MOVSxL sizes src dst) = pprSizeOpOpCoerce (sLit "movs") sizes archWordSize src dst
-- here we do some patching, since the physical registers are only set late
-- in the code generation.
@@ -497,10 +622,10 @@ pprInstr (JXX cond (BlockId id))
pprInstr (JXX_GBL cond imm) = pprCondInstr (sLit "j") cond (pprImm imm)
pprInstr (JMP (OpImm imm)) = (<>) (ptext (sLit "\tjmp ")) (pprImm imm)
-pprInstr (JMP op) = (<>) (ptext (sLit "\tjmp *")) (pprOperand wordSize op)
+pprInstr (JMP op) = (<>) (ptext (sLit "\tjmp *")) (pprOperand archWordSize op)
pprInstr (JMP_TBL op _) = pprInstr (JMP op)
pprInstr (CALL (Left imm) _) = (<>) (ptext (sLit "\tcall ")) (pprImm imm)
-pprInstr (CALL (Right reg) _) = (<>) (ptext (sLit "\tcall *")) (pprReg wordSize reg)
+pprInstr (CALL (Right reg) _) = (<>) (ptext (sLit "\tcall *")) (pprReg archWordSize reg)
pprInstr (IDIV sz op) = pprSizeOp (sLit "idiv") sz op
pprInstr (DIV sz op) = pprSizeOp (sLit "div") sz op
@@ -941,9 +1066,9 @@ pprRegReg :: LitString -> Reg -> Reg -> Doc
pprRegReg name reg1 reg2
= hcat [
pprMnemonic_ name,
- pprReg wordSize reg1,
+ pprReg archWordSize reg1,
comma,
- pprReg wordSize reg2
+ pprReg archWordSize reg2
]
@@ -951,9 +1076,9 @@ pprOpReg :: LitString -> Operand -> Reg -> Doc
pprOpReg name op1 reg2
= hcat [
pprMnemonic_ name,
- pprOperand wordSize op1,
+ pprOperand archWordSize op1,
comma,
- pprReg wordSize reg2
+ pprReg archWordSize reg2
]
diff --git a/compiler/nativeGen/X86/RegInfo.hs b/compiler/nativeGen/X86/RegInfo.hs
index 39bc6dec10..58d063bd88 100644
--- a/compiler/nativeGen/X86/RegInfo.hs
+++ b/compiler/nativeGen/X86/RegInfo.hs
@@ -1,26 +1,17 @@
module X86.RegInfo (
- RegUsage(..),
- noUsage,
- regUsage,
- patchRegs,
- jumpDests,
- isJumpish,
- patchJump,
- isRegRegMove,
-
- JumpDest(..),
+ mkVReg,
+
+ JumpDest,
canShortcut,
shortcutJump,
- mkSpillInstr,
- mkLoadInstr,
- mkRegRegMoveInstr,
- mkBranchInstr,
-
spillSlotSize,
maxSpillSlots,
- spillSlotToOffset
+ spillSlotToOffset,
+
+ shortcutStatic,
+ regDotColor
)
where
@@ -29,341 +20,26 @@ where
#include "HsVersions.h"
import X86.Instr
+import X86.Cond
import X86.Regs
-import RegsBase
+import Size
+import Reg
+import Cmm
+import CLabel
import BlockId
import Outputable
import Constants ( rESERVED_C_STACK_BYTES )
-import FastBool
-
-
--- -----------------------------------------------------------------------------
--- RegUsage type
-
--- @regUsage@ returns the sets of src and destination registers used
--- by a particular instruction. Machine registers that are
--- pre-allocated to stgRegs are filtered out, because they are
--- uninteresting from a register allocation standpoint. (We wouldn't
--- want them to end up on the free list!) As far as we are concerned,
--- the fixed registers simply don't exist (for allocation purposes,
--- anyway).
-
--- regUsage doesn't need to do any trickery for jumps and such. Just
--- state precisely the regs read and written by that insn. The
--- consequences of control flow transfers, as far as register
--- allocation goes, are taken care of by the register allocator.
-
-data RegUsage = RU [Reg] [Reg]
-
-noUsage :: RegUsage
-noUsage = RU [] []
-
-
-regUsage :: Instr -> RegUsage
-regUsage instr = case instr of
- MOV _ src dst -> usageRW src dst
- MOVZxL _ src dst -> usageRW src dst
- MOVSxL _ src dst -> usageRW src dst
- LEA _ src dst -> usageRW src dst
- ADD _ src dst -> usageRM src dst
- ADC _ src dst -> usageRM src dst
- SUB _ src dst -> usageRM src dst
- IMUL _ src dst -> usageRM src dst
- IMUL2 _ src -> mkRU (eax:use_R src) [eax,edx]
- MUL _ src dst -> usageRM src dst
- DIV _ op -> mkRU (eax:edx:use_R op) [eax,edx]
- IDIV _ op -> mkRU (eax:edx:use_R op) [eax,edx]
- AND _ src dst -> usageRM src dst
- OR _ src dst -> usageRM src dst
-
- XOR _ (OpReg src) (OpReg dst)
- | src == dst -> mkRU [] [dst]
-
- XOR _ src dst -> usageRM src dst
- NOT _ op -> usageM op
- NEGI _ op -> usageM op
- SHL _ imm dst -> usageRM imm dst
- SAR _ imm dst -> usageRM imm dst
- SHR _ imm dst -> usageRM imm dst
- BT _ _ src -> mkRUR (use_R src)
-
- PUSH _ op -> mkRUR (use_R op)
- POP _ op -> mkRU [] (def_W op)
- TEST _ src dst -> mkRUR (use_R src ++ use_R dst)
- CMP _ src dst -> mkRUR (use_R src ++ use_R dst)
- SETCC _ op -> mkRU [] (def_W op)
- JXX _ _ -> mkRU [] []
- JXX_GBL _ _ -> mkRU [] []
- JMP op -> mkRUR (use_R op)
- JMP_TBL op _ -> mkRUR (use_R op)
- CALL (Left _) params -> mkRU params callClobberedRegs
- CALL (Right reg) params -> mkRU (reg:params) callClobberedRegs
- CLTD _ -> mkRU [eax] [edx]
- NOP -> mkRU [] []
-
-#if i386_TARGET_ARCH
- GMOV src dst -> mkRU [src] [dst]
- GLD _ src dst -> mkRU (use_EA src) [dst]
- GST _ src dst -> mkRUR (src : use_EA dst)
-
- GLDZ dst -> mkRU [] [dst]
- GLD1 dst -> mkRU [] [dst]
-
- GFTOI src dst -> mkRU [src] [dst]
- GDTOI src dst -> mkRU [src] [dst]
-
- GITOF src dst -> mkRU [src] [dst]
- GITOD src dst -> mkRU [src] [dst]
-
- GADD _ s1 s2 dst -> mkRU [s1,s2] [dst]
- GSUB _ s1 s2 dst -> mkRU [s1,s2] [dst]
- GMUL _ s1 s2 dst -> mkRU [s1,s2] [dst]
- GDIV _ s1 s2 dst -> mkRU [s1,s2] [dst]
-
- GCMP _ src1 src2 -> mkRUR [src1,src2]
- GABS _ src dst -> mkRU [src] [dst]
- GNEG _ src dst -> mkRU [src] [dst]
- GSQRT _ src dst -> mkRU [src] [dst]
- GSIN _ _ _ src dst -> mkRU [src] [dst]
- GCOS _ _ _ src dst -> mkRU [src] [dst]
- GTAN _ _ _ src dst -> mkRU [src] [dst]
-#endif
-
-#if x86_64_TARGET_ARCH
- CVTSS2SD src dst -> mkRU [src] [dst]
- CVTSD2SS src dst -> mkRU [src] [dst]
- CVTTSS2SIQ src dst -> mkRU (use_R src) [dst]
- CVTTSD2SIQ src dst -> mkRU (use_R src) [dst]
- CVTSI2SS src dst -> mkRU (use_R src) [dst]
- CVTSI2SD src dst -> mkRU (use_R src) [dst]
- FDIV _ src dst -> usageRM src dst
-#endif
-
- FETCHGOT reg -> mkRU [] [reg]
- FETCHPC reg -> mkRU [] [reg]
-
- COMMENT _ -> noUsage
- DELTA _ -> noUsage
- SPILL reg _ -> mkRU [reg] []
- RELOAD _ reg -> mkRU [] [reg]
-
- _other -> panic "regUsage: unrecognised instr"
-
- where
- -- 2 operand form; first operand Read; second Written
- usageRW :: Operand -> Operand -> RegUsage
- usageRW op (OpReg reg) = mkRU (use_R op) [reg]
- usageRW op (OpAddr ea) = mkRUR (use_R op ++ use_EA ea)
- usageRW _ _ = panic "X86.RegInfo.usageRW: no match"
-
- -- 2 operand form; first operand Read; second Modified
- usageRM :: Operand -> Operand -> RegUsage
- usageRM op (OpReg reg) = mkRU (use_R op ++ [reg]) [reg]
- usageRM op (OpAddr ea) = mkRUR (use_R op ++ use_EA ea)
- usageRM _ _ = panic "X86.RegInfo.usageRM: no match"
-
- -- 1 operand form; operand Modified
- usageM :: Operand -> RegUsage
- usageM (OpReg reg) = mkRU [reg] [reg]
- usageM (OpAddr ea) = mkRUR (use_EA ea)
- usageM _ = panic "X86.RegInfo.usageM: no match"
-
- -- Registers defd when an operand is written.
- def_W (OpReg reg) = [reg]
- def_W (OpAddr _ ) = []
- def_W _ = panic "X86.RegInfo.def_W: no match"
-
- -- Registers used when an operand is read.
- use_R (OpReg reg) = [reg]
- use_R (OpImm _) = []
- use_R (OpAddr ea) = use_EA ea
-
- -- Registers used to compute an effective address.
- use_EA (ImmAddr _ _) = []
- use_EA (AddrBaseIndex base index _) =
- use_base base $! use_index index
- where use_base (EABaseReg r) x = r : x
- use_base _ x = x
- use_index EAIndexNone = []
- use_index (EAIndex i _) = [i]
-
- mkRUR src = src' `seq` RU src' []
- where src' = filter interesting src
-
- mkRU src dst = src' `seq` dst' `seq` RU src' dst'
- where src' = filter interesting src
- dst' = filter interesting dst
-
-interesting :: Reg -> Bool
-interesting (VirtualRegI _) = True
-interesting (VirtualRegHi _) = True
-interesting (VirtualRegF _) = True
-interesting (VirtualRegD _) = True
-interesting (RealReg i) = isFastTrue (freeReg i)
-
-
-
-
--- -----------------------------------------------------------------------------
--- 'patchRegs' function
-
--- 'patchRegs' takes an instruction and applies the given mapping to
--- all the register references.
-
-patchRegs :: Instr -> (Reg -> Reg) -> Instr
-patchRegs instr env = case instr of
- MOV sz src dst -> patch2 (MOV sz) src dst
- MOVZxL sz src dst -> patch2 (MOVZxL sz) src dst
- MOVSxL sz src dst -> patch2 (MOVSxL sz) src dst
- LEA sz src dst -> patch2 (LEA sz) src dst
- ADD sz src dst -> patch2 (ADD sz) src dst
- ADC sz src dst -> patch2 (ADC sz) src dst
- SUB sz src dst -> patch2 (SUB sz) src dst
- IMUL sz src dst -> patch2 (IMUL sz) src dst
- IMUL2 sz src -> patch1 (IMUL2 sz) src
- MUL sz src dst -> patch2 (MUL sz) src dst
- IDIV sz op -> patch1 (IDIV sz) op
- DIV sz op -> patch1 (DIV sz) op
- AND sz src dst -> patch2 (AND sz) src dst
- OR sz src dst -> patch2 (OR sz) src dst
- XOR sz src dst -> patch2 (XOR sz) src dst
- NOT sz op -> patch1 (NOT sz) op
- NEGI sz op -> patch1 (NEGI sz) op
- SHL sz imm dst -> patch1 (SHL sz imm) dst
- SAR sz imm dst -> patch1 (SAR sz imm) dst
- SHR sz imm dst -> patch1 (SHR sz imm) dst
- BT sz imm src -> patch1 (BT sz imm) src
- TEST sz src dst -> patch2 (TEST sz) src dst
- CMP sz src dst -> patch2 (CMP sz) src dst
- PUSH sz op -> patch1 (PUSH sz) op
- POP sz op -> patch1 (POP sz) op
- SETCC cond op -> patch1 (SETCC cond) op
- JMP op -> patch1 JMP op
- JMP_TBL op ids -> patch1 JMP_TBL op $ ids
-
-#if i386_TARGET_ARCH
- GMOV src dst -> GMOV (env src) (env dst)
- GLD sz src dst -> GLD sz (lookupAddr src) (env dst)
- GST sz src dst -> GST sz (env src) (lookupAddr dst)
-
- GLDZ dst -> GLDZ (env dst)
- GLD1 dst -> GLD1 (env dst)
-
- GFTOI src dst -> GFTOI (env src) (env dst)
- GDTOI src dst -> GDTOI (env src) (env dst)
-
- GITOF src dst -> GITOF (env src) (env dst)
- GITOD src dst -> GITOD (env src) (env dst)
-
- GADD sz s1 s2 dst -> GADD sz (env s1) (env s2) (env dst)
- GSUB sz s1 s2 dst -> GSUB sz (env s1) (env s2) (env dst)
- GMUL sz s1 s2 dst -> GMUL sz (env s1) (env s2) (env dst)
- GDIV sz s1 s2 dst -> GDIV sz (env s1) (env s2) (env dst)
-
- GCMP sz src1 src2 -> GCMP sz (env src1) (env src2)
- GABS sz src dst -> GABS sz (env src) (env dst)
- GNEG sz src dst -> GNEG sz (env src) (env dst)
- GSQRT sz src dst -> GSQRT sz (env src) (env dst)
- GSIN sz l1 l2 src dst -> GSIN sz l1 l2 (env src) (env dst)
- GCOS sz l1 l2 src dst -> GCOS sz l1 l2 (env src) (env dst)
- GTAN sz l1 l2 src dst -> GTAN sz l1 l2 (env src) (env dst)
-#endif
-
-#if x86_64_TARGET_ARCH
- CVTSS2SD src dst -> CVTSS2SD (env src) (env dst)
- CVTSD2SS src dst -> CVTSD2SS (env src) (env dst)
- CVTTSS2SIQ src dst -> CVTTSS2SIQ (patchOp src) (env dst)
- CVTTSD2SIQ src dst -> CVTTSD2SIQ (patchOp src) (env dst)
- CVTSI2SS src dst -> CVTSI2SS (patchOp src) (env dst)
- CVTSI2SD src dst -> CVTSI2SD (patchOp src) (env dst)
- FDIV sz src dst -> FDIV sz (patchOp src) (patchOp dst)
-#endif
-
- CALL (Left _) _ -> instr
- CALL (Right reg) p -> CALL (Right (env reg)) p
-
- FETCHGOT reg -> FETCHGOT (env reg)
- FETCHPC reg -> FETCHPC (env reg)
-
- NOP -> instr
- COMMENT _ -> instr
- DELTA _ -> instr
- SPILL reg slot -> SPILL (env reg) slot
- RELOAD slot reg -> RELOAD slot (env reg)
-
- JXX _ _ -> instr
- JXX_GBL _ _ -> instr
- CLTD _ -> instr
-
- _other -> panic "patchRegs: unrecognised instr"
-
- where
- patch1 insn op = insn $! patchOp op
- patch2 insn src dst = (insn $! patchOp src) $! patchOp dst
-
- patchOp (OpReg reg) = OpReg $! env reg
- patchOp (OpImm imm) = OpImm imm
- patchOp (OpAddr ea) = OpAddr $! lookupAddr ea
-
- lookupAddr (ImmAddr imm off) = ImmAddr imm off
- lookupAddr (AddrBaseIndex base index disp)
- = ((AddrBaseIndex $! lookupBase base) $! lookupIndex index) disp
- where
- lookupBase EABaseNone = EABaseNone
- lookupBase EABaseRip = EABaseRip
- lookupBase (EABaseReg r) = EABaseReg (env r)
-
- lookupIndex EAIndexNone = EAIndexNone
- lookupIndex (EAIndex r i) = EAIndex (env r) i
-
-
--- -----------------------------------------------------------------------------
--- Determine the possible destinations from the current instruction.
-
--- (we always assume that the next instruction is also a valid destination;
--- if this isn't the case then the jump should be at the end of the basic
--- block).
-
-jumpDests :: Instr -> [BlockId] -> [BlockId]
-jumpDests insn acc
- = case insn of
- JXX _ id -> id : acc
- JMP_TBL _ ids -> ids ++ acc
- _ -> acc
-
-
-isJumpish :: Instr -> Bool
-isJumpish instr
- = case instr of
- JMP{} -> True
- JXX{} -> True
- JXX_GBL{} -> True
- JMP_TBL{} -> True
- CALL{} -> True
- _ -> False
-
--- | Change the destination of this jump instruction
--- Used in joinToTargets in the linear allocator, when emitting fixup code
--- for join points.
-patchJump :: Instr -> BlockId -> BlockId -> Instr
-patchJump insn old new
- = case insn of
- JXX cc id | id == old -> JXX cc new
- JMP_TBL _ _ -> error "Cannot patch JMP_TBL"
- _other -> insn
-
-
--- -----------------------------------------------------------------------------
--- Detecting reg->reg moves
-
--- The register allocator attempts to eliminate reg->reg moves whenever it can,
--- by assigning the src and dest temporaries to the same real register.
-
-isRegRegMove :: Instr -> Maybe (Reg,Reg)
-isRegRegMove (MOV _ (OpReg r1) (OpReg r2)) = Just (r1,r2)
-isRegRegMove _ = Nothing
+import Unique
+mkVReg :: Unique -> Size -> Reg
+mkVReg u size
+ | not (isFloatSize size) = VirtualRegI u
+ | otherwise
+ = case size of
+ FF32 -> VirtualRegD u
+ FF64 -> VirtualRegD u
+ _ -> panic "mkVReg"
data JumpDest = DestBlockId BlockId | DestImm Imm
@@ -386,92 +62,6 @@ shortcutJump _ other = other
--- -----------------------------------------------------------------------------
--- Generating spill instructions
-
-mkSpillInstr
- :: Reg -- register to spill
- -> Int -- current stack delta
- -> Int -- spill slot to use
- -> Instr
-
-#if i386_TARGET_ARCH
-mkSpillInstr reg delta slot
- = let off = spillSlotToOffset slot
- in
- let off_w = (off-delta) `div` 4
- in case regClass reg of
- RcInteger -> MOV II32 (OpReg reg) (OpAddr (spRel off_w))
- _ -> GST FF80 reg (spRel off_w) {- RcFloat/RcDouble -}
-
-#elif x86_64_TARGET_ARCH
-mkSpillInstr reg delta slot
- = let off = spillSlotToOffset slot
- in
- let off_w = (off-delta) `div` 8
- in case regClass reg of
- RcInteger -> MOV II64 (OpReg reg) (OpAddr (spRel off_w))
- RcDouble -> MOV FF64 (OpReg reg) (OpAddr (spRel off_w))
- RcFloat -> panic "mkSpillInstr/RcFloat"
- -- ToDo: will it work to always spill as a double?
- -- does that cause a stall if the data was a float?
-#else
-mkSpillInstr _ _ _
- = panic "X86.RegInfo.mkSpillInstr: not defined for this architecture."
-#endif
-
-
-mkLoadInstr
- :: Reg -- register to load
- -> Int -- current stack delta
- -> Int -- spill slot to use
- -> Instr
-#if i386_TARGET_ARCH
-mkLoadInstr reg delta slot
- = let off = spillSlotToOffset slot
- in
- let off_w = (off-delta) `div` 4
- in case regClass reg of {
- RcInteger -> MOV II32 (OpAddr (spRel off_w)) (OpReg reg);
- _ -> GLD FF80 (spRel off_w) reg} {- RcFloat/RcDouble -}
-#elif x86_64_TARGET_ARCH
-mkLoadInstr reg delta slot
- = let off = spillSlotToOffset slot
- in
- let off_w = (off-delta) `div` 8
- in case regClass reg of
- RcInteger -> MOV II64 (OpAddr (spRel off_w)) (OpReg reg)
- _ -> MOV FF64 (OpAddr (spRel off_w)) (OpReg reg)
-#else
-mkLoadInstr _ _ _
- = panic "X86.RegInfo.mkLoadInstr: not defined for this architecture."
-#endif
-
-
-
-mkRegRegMoveInstr
- :: Reg
- -> Reg
- -> Instr
-mkRegRegMoveInstr src dst
- = case regClass src of
- RcInteger -> MOV wordSize (OpReg src) (OpReg dst)
-#if i386_TARGET_ARCH
- RcDouble -> GMOV src dst
- RcFloat -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
-#else
- RcDouble -> MOV FF64 (OpReg src) (OpReg dst)
- RcFloat -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
-#endif
-
-
-mkBranchInstr
- :: BlockId
- -> [Instr]
-
-mkBranchInstr id = [JXX ALWAYS id]
-
-
spillSlotSize :: Int
spillSlotSize = IF_ARCH_i386(12, 8)
@@ -489,3 +79,82 @@ spillSlotToOffset slot
= pprPanic "spillSlotToOffset:"
( text "invalid spill location: " <> int slot
$$ text "maxSpillSlots: " <> int maxSpillSlots)
+
+
+-- Here because it knows about JumpDest
+shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
+shortcutStatic fn (CmmStaticLit (CmmLabel lab))
+ | Just uq <- maybeAsmTemp lab
+ = CmmStaticLit (CmmLabel (shortBlockId fn (BlockId uq)))
+shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off))
+ | Just uq <- maybeAsmTemp lbl1
+ = CmmStaticLit (CmmLabelDiffOff (shortBlockId fn (BlockId uq)) lbl2 off)
+ -- slightly dodgy, we're ignoring the second label, but this
+ -- works with the way we use CmmLabelDiffOff for jump tables now.
+
+shortcutStatic _ other_static
+ = other_static
+
+shortBlockId
+ :: (BlockId -> Maybe JumpDest)
+ -> BlockId
+ -> CLabel
+
+shortBlockId fn blockid@(BlockId uq) =
+ case fn blockid of
+ Nothing -> mkAsmTempLabel uq
+ Just (DestBlockId blockid') -> shortBlockId fn blockid'
+ Just (DestImm (ImmCLbl lbl)) -> lbl
+ _other -> panic "shortBlockId"
+
+
+
+-- reg colors for x86
+#if i386_TARGET_ARCH
+regDotColor :: Reg -> SDoc
+regDotColor reg
+ = let Just str = lookupUFM regColors reg
+ in text str
+
+regColors
+ = listToUFM
+ $ [ (eax, "#00ff00")
+ , (ebx, "#0000ff")
+ , (ecx, "#00ffff")
+ , (edx, "#0080ff")
+
+ , (fake0, "#ff00ff")
+ , (fake1, "#ff00aa")
+ , (fake2, "#aa00ff")
+ , (fake3, "#aa00aa")
+ , (fake4, "#ff0055")
+ , (fake5, "#5500ff") ]
+
+
+-- reg colors for x86_64
+#elif x86_64_TARGET_ARCH
+regDotColor :: Reg -> SDoc
+regDotColor reg
+ = let Just str = lookupUFM regColors reg
+ in text str
+
+regColors
+ = listToUFM
+ $ [ (rax, "#00ff00"), (eax, "#00ff00")
+ , (rbx, "#0000ff"), (ebx, "#0000ff")
+ , (rcx, "#00ffff"), (ecx, "#00ffff")
+ , (rdx, "#0080ff"), (edx, "#00ffff")
+ , (r8, "#00ff80")
+ , (r9, "#008080")
+ , (r10, "#0040ff")
+ , (r11, "#00ff40")
+ , (r12, "#008040")
+ , (r13, "#004080")
+ , (r14, "#004040")
+ , (r15, "#002080") ]
+
+ ++ zip (map RealReg [16..31]) (repeat "red")
+#else
+regDotColor :: Reg -> SDoc
+regDotColor = panic "not defined"
+#endif
diff --git a/compiler/nativeGen/X86/Regs.hs b/compiler/nativeGen/X86/Regs.hs
index 1f1c7249a7..87564b860c 100644
--- a/compiler/nativeGen/X86/Regs.hs
+++ b/compiler/nativeGen/X86/Regs.hs
@@ -1,15 +1,4 @@
module X86.Regs (
-
- -- sizes
- Size(..),
- intSize,
- floatSize,
- isFloatSize,
- wordSize,
- cmmTypeSize,
- sizeToWidth,
- mkVReg,
-
-- immediates
Imm(..),
strImmLit,
@@ -45,7 +34,10 @@ module X86.Regs (
-- horror show
freeReg,
- globalRegMaybe
+ globalRegMaybe,
+
+ get_GlobalReg_reg_or_addr,
+ allocatableRegs
)
where
@@ -60,93 +52,22 @@ where
#include "../includes/MachRegs.h"
-import RegsBase
+import Reg
+import RegClass
+import CgUtils ( get_GlobalReg_addr )
import BlockId
import Cmm
import CLabel ( CLabel )
import Pretty
-import Outputable ( Outputable(..), pprPanic, panic )
+import Outputable ( panic )
import qualified Outputable
-import Unique
import FastBool
#if defined(i386_TARGET_ARCH) || defined(x86_64_TARGET_ARCH)
import Constants
#endif
--- -----------------------------------------------------------------------------
--- Sizes on this architecture
---
--- A Size is usually a combination of width and class
-
--- It looks very like the old MachRep, but it's now of purely local
--- significance, here in the native code generator. You can change it
--- without global consequences.
---
--- A major use is as an opcode qualifier; thus the opcode
--- mov.l a b
--- might be encoded
--- MOV II32 a b
--- where the Size field encodes the ".l" part.
-
--- ToDo: it's not clear to me that we need separate signed-vs-unsigned sizes
--- here. I've removed them from the x86 version, we'll see what happens --SDM
-
--- ToDo: quite a few occurrences of Size could usefully be replaced by Width
-
-data Size
- = II8 | II16 | II32 | II64 | FF32 | FF64 | FF80
- deriving Eq
-
-intSize, floatSize :: Width -> Size
-intSize W8 = II8
-intSize W16 = II16
-intSize W32 = II32
-intSize W64 = II64
-intSize other = pprPanic "MachInstrs.intSize" (ppr other)
-
-
-floatSize W32 = FF32
-floatSize W64 = FF64
-floatSize other = pprPanic "MachInstrs.intSize" (ppr other)
-
-
-isFloatSize :: Size -> Bool
-isFloatSize FF32 = True
-isFloatSize FF64 = True
-isFloatSize FF80 = True
-isFloatSize _ = False
-
-
-wordSize :: Size
-wordSize = intSize wordWidth
-
-
-cmmTypeSize :: CmmType -> Size
-cmmTypeSize ty | isFloatType ty = floatSize (typeWidth ty)
- | otherwise = intSize (typeWidth ty)
-
-
-sizeToWidth :: Size -> Width
-sizeToWidth II8 = W8
-sizeToWidth II16 = W16
-sizeToWidth II32 = W32
-sizeToWidth II64 = W64
-sizeToWidth FF32 = W32
-sizeToWidth FF64 = W64
-sizeToWidth _ = panic "MachInstrs.sizeToWidth"
-
-
-mkVReg :: Unique -> Size -> Reg
-mkVReg u size
- | not (isFloatSize size) = VirtualRegI u
- | otherwise
- = case size of
- FF32 -> VirtualRegD u
- FF64 -> VirtualRegD u
- _ -> panic "mkVReg"
-
-- -----------------------------------------------------------------------------
-- Immediates
@@ -699,4 +620,26 @@ callClobberedRegs = panic "X86.Regs.globalRegMaybe: not defined"
#endif
+-- We map STG registers onto appropriate CmmExprs. Either they map
+-- to real machine registers or stored as offsets from BaseReg. Given
+-- a GlobalReg, get_GlobalReg_reg_or_addr produces either the real
+-- register it is in, on this platform, or a CmmExpr denoting the
+-- address in the register table holding it.
+-- (See also get_GlobalReg_addr in CgUtils.)
+
+get_GlobalReg_reg_or_addr :: GlobalReg -> Either Reg CmmExpr
+get_GlobalReg_reg_or_addr mid
+ = case globalRegMaybe mid of
+ Just rr -> Left rr
+ Nothing -> Right (get_GlobalReg_addr mid)
+
+
+-- allocatableRegs is allMachRegNos with the fixed-use regs removed.
+-- i.e., these are the regs for which we are prepared to allow the
+-- register allocator to attempt to map VRegs to.
+allocatableRegs :: [RegNo]
+allocatableRegs
+ = let isFree i = isFastTrue (freeReg i)
+ in filter isFree allMachRegNos
+