summaryrefslogtreecommitdiff
path: root/compiler/nativeGen
diff options
context:
space:
mode:
authorIan Lynagh <ian@well-typed.com>2012-09-20 18:05:01 +0100
committerIan Lynagh <ian@well-typed.com>2012-09-20 18:05:01 +0100
commitb92c76ec5703a216b0d5553e037da6f66932a82e (patch)
treed6276b2196a9e0abd77855a919e7ea710aef2768 /compiler/nativeGen
parent85a8f79f70cb9d94c9fca9e03ae98f596be8a48c (diff)
parenta9109703c5994a0de97236184672095d4605ae7d (diff)
downloadhaskell-b92c76ec5703a216b0d5553e037da6f66932a82e.tar.gz
Merge branch 'master' of darcs.haskell.org:/srv/darcs//ghc
Diffstat (limited to 'compiler/nativeGen')
-rw-r--r--compiler/nativeGen/AsmCodeGen.lhs70
-rw-r--r--compiler/nativeGen/Instruction.hs13
-rw-r--r--compiler/nativeGen/PPC/Instr.hs2
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/Main.hs25
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/StackMap.hs39
-rw-r--r--compiler/nativeGen/RegAlloc/Liveness.hs5
-rw-r--r--compiler/nativeGen/SPARC/Instr.hs2
-rw-r--r--compiler/nativeGen/X86/Instr.hs117
-rw-r--r--compiler/nativeGen/X86/Regs.hs6
9 files changed, 194 insertions, 85 deletions
diff --git a/compiler/nativeGen/AsmCodeGen.lhs b/compiler/nativeGen/AsmCodeGen.lhs
index 8c608f1bf1..870d285390 100644
--- a/compiler/nativeGen/AsmCodeGen.lhs
+++ b/compiler/nativeGen/AsmCodeGen.lhs
@@ -133,16 +133,17 @@ The machine-dependent bits break down as follows:
data NcgImpl statics instr jumpDest = NcgImpl {
cmmTopCodeGen :: RawCmmDecl -> NatM [NatCmmDecl statics instr],
- generateJumpTableForInstr :: DynFlags -> instr -> Maybe (NatCmmDecl statics instr),
+ generateJumpTableForInstr :: instr -> Maybe (NatCmmDecl statics instr),
getJumpDestBlockId :: jumpDest -> Maybe BlockId,
canShortcut :: instr -> Maybe jumpDest,
shortcutStatics :: (BlockId -> Maybe jumpDest) -> statics -> statics,
shortcutJump :: (BlockId -> Maybe jumpDest) -> instr -> instr,
pprNatCmmDecl :: NatCmmDecl statics instr -> SDoc,
- maxSpillSlots :: DynFlags -> Int,
- allocatableRegs :: Platform -> [RealReg],
+ maxSpillSlots :: Int,
+ allocatableRegs :: [RealReg],
ncg_x86fp_kludge :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr],
ncgExpandTop :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr],
+ ncgAllocMoreStack :: Int -> NatCmmDecl statics instr -> NatCmmDecl statics instr,
ncgMakeFarBranches :: [NatBasicBlock instr] -> [NatBasicBlock instr]
}
@@ -154,15 +155,16 @@ nativeCodeGen dflags h us cmms
nCG' ncgImpl = nativeCodeGen' dflags ncgImpl h us cmms
x86NcgImpl = NcgImpl {
cmmTopCodeGen = X86.CodeGen.cmmTopCodeGen
- ,generateJumpTableForInstr = X86.CodeGen.generateJumpTableForInstr
+ ,generateJumpTableForInstr = X86.CodeGen.generateJumpTableForInstr dflags
,getJumpDestBlockId = X86.Instr.getJumpDestBlockId
,canShortcut = X86.Instr.canShortcut
,shortcutStatics = X86.Instr.shortcutStatics
,shortcutJump = X86.Instr.shortcutJump
,pprNatCmmDecl = X86.Ppr.pprNatCmmDecl
- ,maxSpillSlots = X86.Instr.maxSpillSlots
- ,allocatableRegs = X86.Regs.allocatableRegs
+ ,maxSpillSlots = X86.Instr.maxSpillSlots dflags
+ ,allocatableRegs = X86.Regs.allocatableRegs platform
,ncg_x86fp_kludge = id
+ ,ncgAllocMoreStack = X86.Instr.allocMoreStack platform
,ncgExpandTop = id
,ncgMakeFarBranches = id
}
@@ -172,30 +174,32 @@ nativeCodeGen dflags h us cmms
ArchPPC ->
nCG' $ NcgImpl {
cmmTopCodeGen = PPC.CodeGen.cmmTopCodeGen
- ,generateJumpTableForInstr = PPC.CodeGen.generateJumpTableForInstr
+ ,generateJumpTableForInstr = PPC.CodeGen.generateJumpTableForInstr dflags
,getJumpDestBlockId = PPC.RegInfo.getJumpDestBlockId
,canShortcut = PPC.RegInfo.canShortcut
,shortcutStatics = PPC.RegInfo.shortcutStatics
,shortcutJump = PPC.RegInfo.shortcutJump
,pprNatCmmDecl = PPC.Ppr.pprNatCmmDecl
- ,maxSpillSlots = PPC.Instr.maxSpillSlots
- ,allocatableRegs = PPC.Regs.allocatableRegs
+ ,maxSpillSlots = PPC.Instr.maxSpillSlots dflags
+ ,allocatableRegs = PPC.Regs.allocatableRegs platform
,ncg_x86fp_kludge = id
+ ,ncgAllocMoreStack = noAllocMoreStack
,ncgExpandTop = id
,ncgMakeFarBranches = makeFarBranches
}
ArchSPARC ->
nCG' $ NcgImpl {
cmmTopCodeGen = SPARC.CodeGen.cmmTopCodeGen
- ,generateJumpTableForInstr = SPARC.CodeGen.generateJumpTableForInstr
+ ,generateJumpTableForInstr = SPARC.CodeGen.generateJumpTableForInstr dflags
,getJumpDestBlockId = SPARC.ShortcutJump.getJumpDestBlockId
,canShortcut = SPARC.ShortcutJump.canShortcut
,shortcutStatics = SPARC.ShortcutJump.shortcutStatics
,shortcutJump = SPARC.ShortcutJump.shortcutJump
,pprNatCmmDecl = SPARC.Ppr.pprNatCmmDecl
- ,maxSpillSlots = SPARC.Instr.maxSpillSlots
- ,allocatableRegs = \_ -> SPARC.Regs.allocatableRegs
+ ,maxSpillSlots = SPARC.Instr.maxSpillSlots dflags
+ ,allocatableRegs = SPARC.Regs.allocatableRegs
,ncg_x86fp_kludge = id
+ ,ncgAllocMoreStack = noAllocMoreStack
,ncgExpandTop = map SPARC.CodeGen.Expand.expandTop
,ncgMakeFarBranches = id
}
@@ -206,6 +210,23 @@ nativeCodeGen dflags h us cmms
ArchUnknown ->
panic "nativeCodeGen: No NCG for unknown arch"
+
+--
+-- Allocating more stack space for spilling is currently only
+-- supported for the linear register allocator on x86/x86_64, the rest
+-- default to the panic below. To support allocating extra stack on
+-- more platforms provide a definition of ncgAllocMoreStack.
+--
+noAllocMoreStack :: Int -> NatCmmDecl statics instr -> NatCmmDecl statics instr
+noAllocMoreStack amount _
+ = panic $ "Register allocator: out of stack slots (need " ++ show amount ++ ")\n"
+ ++ " If you are trying to compile SHA1.hs from the crypto library then this\n"
+ ++ " is a known limitation in the linear allocator.\n"
+ ++ "\n"
+ ++ " Try enabling the graph colouring allocator with -fregs-graph instead."
+ ++ " You can still file a bug report if you like.\n"
+
+
nativeCodeGen' :: (Outputable statics, Outputable instr, Instruction instr)
=> DynFlags
-> NcgImpl statics instr jumpDest
@@ -419,7 +440,7 @@ cmmNativeGen dflags ncgImpl us cmm count
= foldr (\r -> plusUFM_C unionUniqSets
$ unitUFM (targetClassOfRealReg platform r) (unitUniqSet r))
emptyUFM
- $ allocatableRegs ncgImpl platform
+ $ allocatableRegs ncgImpl
-- do the graph coloring register allocation
let ((alloced, regAllocStats), usAlloc)
@@ -428,7 +449,7 @@ cmmNativeGen dflags ncgImpl us cmm count
$ Color.regAlloc
dflags
alloc_regs
- (mkUniqSet [0 .. maxSpillSlots ncgImpl dflags])
+ (mkUniqSet [0 .. maxSpillSlots ncgImpl])
withLiveness
-- dump out what happened during register allocation
@@ -457,11 +478,20 @@ cmmNativeGen dflags ncgImpl us cmm count
else do
-- do linear register allocation
+ let reg_alloc proc = do
+ (alloced, maybe_more_stack, ra_stats) <-
+ Linear.regAlloc dflags proc
+ case maybe_more_stack of
+ Nothing -> return ( alloced, ra_stats )
+ Just amount ->
+ return ( ncgAllocMoreStack ncgImpl amount alloced
+ , ra_stats )
+
let ((alloced, regAllocStats), usAlloc)
= {-# SCC "RegAlloc" #-}
initUs usLive
$ liftM unzip
- $ mapM (Linear.regAlloc dflags) withLiveness
+ $ mapM reg_alloc withLiveness
dumpIfSet_dyn dflags
Opt_D_dump_asm_regalloc "Registers allocated"
@@ -490,7 +520,7 @@ cmmNativeGen dflags ncgImpl us cmm count
---- generate jump tables
let tabled =
{-# SCC "generateJumpTables" #-}
- generateJumpTables dflags ncgImpl kludged
+ generateJumpTables ncgImpl kludged
---- shortcut branches
let shorted =
@@ -711,12 +741,12 @@ makeFarBranches blocks
-- Analyzes all native code and generates data sections for all jump
-- table instructions.
generateJumpTables
- :: DynFlags -> NcgImpl statics instr jumpDest
- -> [NatCmmDecl statics instr] -> [NatCmmDecl statics instr]
-generateJumpTables dflags ncgImpl xs = concatMap f xs
+ :: NcgImpl statics instr jumpDest
+ -> [NatCmmDecl statics instr] -> [NatCmmDecl statics instr]
+generateJumpTables ncgImpl xs = concatMap f xs
where f p@(CmmProc _ _ (ListGraph xs)) = p : concatMap g xs
f p = [p]
- g (BasicBlock _ xs) = catMaybes (map (generateJumpTableForInstr ncgImpl dflags) xs)
+ g (BasicBlock _ xs) = catMaybes (map (generateJumpTableForInstr ncgImpl) xs)
-- -----------------------------------------------------------------------------
-- Shortcut branches
diff --git a/compiler/nativeGen/Instruction.hs b/compiler/nativeGen/Instruction.hs
index 64ba32c6dc..86f5ae435d 100644
--- a/compiler/nativeGen/Instruction.hs
+++ b/compiler/nativeGen/Instruction.hs
@@ -163,3 +163,16 @@ class Instruction instr where
-> [instr]
+ -- Subtract an amount from the C stack pointer
+ mkStackAllocInstr
+ :: Platform -- TODO: remove (needed by x86/x86_64
+ -- because they share an Instr type)
+ -> Int
+ -> instr
+
+ -- Add an amount to the C stack pointer
+ mkStackDeallocInstr
+ :: Platform -- TODO: remove (needed by x86/x86_64
+ -- because they share an Instr type)
+ -> Int
+ -> instr
diff --git a/compiler/nativeGen/PPC/Instr.hs b/compiler/nativeGen/PPC/Instr.hs
index 464a88a08b..1f5e809abb 100644
--- a/compiler/nativeGen/PPC/Instr.hs
+++ b/compiler/nativeGen/PPC/Instr.hs
@@ -64,6 +64,8 @@ instance Instruction Instr where
mkRegRegMoveInstr _ = ppc_mkRegRegMoveInstr
takeRegRegMoveInstr = ppc_takeRegRegMoveInstr
mkJumpInstr = ppc_mkJumpInstr
+ mkStackAllocInstr = panic "no ppc_mkStackAllocInstr"
+ mkStackDeallocInstr = panic "no ppc_mkStackDeallocInstr"
-- -----------------------------------------------------------------------------
diff --git a/compiler/nativeGen/RegAlloc/Linear/Main.hs b/compiler/nativeGen/RegAlloc/Linear/Main.hs
index 3f92ed975b..a15bca07e7 100644
--- a/compiler/nativeGen/RegAlloc/Linear/Main.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/Main.hs
@@ -139,22 +139,27 @@ regAlloc
:: (Outputable instr, Instruction instr)
=> DynFlags
-> LiveCmmDecl statics instr
- -> UniqSM (NatCmmDecl statics instr, Maybe RegAllocStats)
+ -> UniqSM ( NatCmmDecl statics instr
+ , Maybe Int -- number of extra stack slots required,
+ -- beyond maxSpillSlots
+ , Maybe RegAllocStats)
regAlloc _ (CmmData sec d)
= return
( CmmData sec d
+ , Nothing
, Nothing )
regAlloc _ (CmmProc (LiveInfo info _ _ _) lbl [])
= return ( CmmProc info lbl (ListGraph [])
+ , Nothing
, Nothing )
regAlloc dflags (CmmProc static lbl sccs)
| LiveInfo info (Just first_id) (Just block_live) _ <- static
= do
-- do register allocation on each component.
- (final_blocks, stats)
+ (final_blocks, stats, stack_use)
<- linearRegAlloc dflags first_id block_live sccs
-- make sure the block that was first in the input list
@@ -162,7 +167,15 @@ regAlloc dflags (CmmProc static lbl sccs)
let ((first':_), rest')
= partition ((== first_id) . blockId) final_blocks
+ let max_spill_slots = maxSpillSlots dflags
+ extra_stack
+ | stack_use > max_spill_slots
+ = Just (stack_use - max_spill_slots)
+ | otherwise
+ = Nothing
+
return ( CmmProc info lbl (ListGraph (first' : rest'))
+ , extra_stack
, Just stats)
-- bogus. to make non-exhaustive match warning go away.
@@ -184,7 +197,7 @@ linearRegAlloc
-> BlockId -- ^ the first block
-> BlockMap RegSet -- ^ live regs on entry to each basic block
-> [SCC (LiveBasicBlock instr)] -- ^ instructions annotated with "deaths"
- -> UniqSM ([NatBasicBlock instr], RegAllocStats)
+ -> UniqSM ([NatBasicBlock instr], RegAllocStats, Int)
linearRegAlloc dflags first_id block_live sccs
= let platform = targetPlatform dflags
@@ -204,14 +217,14 @@ linearRegAlloc'
-> BlockId -- ^ the first block
-> BlockMap RegSet -- ^ live regs on entry to each basic block
-> [SCC (LiveBasicBlock instr)] -- ^ instructions annotated with "deaths"
- -> UniqSM ([NatBasicBlock instr], RegAllocStats)
+ -> UniqSM ([NatBasicBlock instr], RegAllocStats, Int)
linearRegAlloc' dflags initFreeRegs first_id block_live sccs
= do us <- getUs
- let (_, _, stats, blocks) =
+ let (_, stack, stats, blocks) =
runR dflags emptyBlockMap initFreeRegs emptyRegMap (emptyStackMap dflags) us
$ linearRA_SCCs first_id block_live [] sccs
- return (blocks, stats)
+ return (blocks, stats, getStackUse stack)
linearRA_SCCs :: (FR freeRegs, Instruction instr, Outputable instr)
diff --git a/compiler/nativeGen/RegAlloc/Linear/StackMap.hs b/compiler/nativeGen/RegAlloc/Linear/StackMap.hs
index b1fc3c169e..69cf411751 100644
--- a/compiler/nativeGen/RegAlloc/Linear/StackMap.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/StackMap.hs
@@ -21,15 +21,13 @@ module RegAlloc.Linear.StackMap (
StackSlot,
StackMap(..),
emptyStackMap,
- getStackSlotFor
+ getStackSlotFor,
+ getStackUse
)
where
-import RegAlloc.Linear.FreeRegs
-
import DynFlags
-import Outputable
import UniqFM
import Unique
@@ -40,7 +38,7 @@ type StackSlot = Int
data StackMap
= StackMap
{ -- | The slots that are still available to be allocated.
- stackMapFreeSlots :: [StackSlot]
+ stackMapNextFreeSlot :: !Int
-- | Assignment of vregs to stack slots.
, stackMapAssignment :: UniqFM StackSlot }
@@ -48,7 +46,7 @@ data StackMap
-- | An empty stack map, with all slots available.
emptyStackMap :: DynFlags -> StackMap
-emptyStackMap dflags = StackMap [0 .. maxSpillSlots dflags] emptyUFM
+emptyStackMap _ = StackMap 0 emptyUFM
-- | If this vreg unique already has a stack assignment then return the slot number,
@@ -56,24 +54,13 @@ emptyStackMap dflags = StackMap [0 .. maxSpillSlots dflags] emptyUFM
--
getStackSlotFor :: StackMap -> Unique -> (StackMap, Int)
-getStackSlotFor (StackMap [] _) _
-
- -- This happens all the time when trying to compile darcs' SHA1.hs, see Track #1993
- -- SHA1.lhs has also been added to the Crypto library on Hackage,
- -- so we see this all the time.
- --
- -- It would be better to automatically invoke the graph allocator, or do something
- -- else besides panicing, but that's a job for a different day. -- BL 2009/02
- --
- = panic $ "RegAllocLinear.getStackSlotFor: out of stack slots\n"
- ++ " If you are trying to compile SHA1.hs from the crypto library then this\n"
- ++ " is a known limitation in the linear allocator.\n"
- ++ "\n"
- ++ " Try enabling the graph colouring allocator with -fregs-graph instead."
- ++ " You can still file a bug report if you like.\n"
-
-getStackSlotFor fs@(StackMap (freeSlot:stack') reserved) reg =
- case lookupUFM reserved reg of
- Just slot -> (fs, slot)
- Nothing -> (StackMap stack' (addToUFM reserved reg freeSlot), freeSlot)
+getStackSlotFor fs@(StackMap _ reserved) reg
+ | Just slot <- lookupUFM reserved reg = (fs, slot)
+
+getStackSlotFor (StackMap freeSlot reserved) reg =
+ (StackMap (freeSlot+1) (addToUFM reserved reg freeSlot), freeSlot)
+
+-- | Return the number of stack slots that were allocated
+getStackUse :: StackMap -> Int
+getStackUse (StackMap freeSlot _) = freeSlot
diff --git a/compiler/nativeGen/RegAlloc/Liveness.hs b/compiler/nativeGen/RegAlloc/Liveness.hs
index 85b8f969eb..608f0a423b 100644
--- a/compiler/nativeGen/RegAlloc/Liveness.hs
+++ b/compiler/nativeGen/RegAlloc/Liveness.hs
@@ -136,6 +136,11 @@ instance Instruction instr => Instruction (InstrSR instr) where
mkJumpInstr target = map Instr (mkJumpInstr target)
+ mkStackAllocInstr platform amount =
+ Instr (mkStackAllocInstr platform amount)
+
+ mkStackDeallocInstr platform amount =
+ Instr (mkStackDeallocInstr platform amount)
-- | An instruction with liveness information.
diff --git a/compiler/nativeGen/SPARC/Instr.hs b/compiler/nativeGen/SPARC/Instr.hs
index 9404badea6..f55c660118 100644
--- a/compiler/nativeGen/SPARC/Instr.hs
+++ b/compiler/nativeGen/SPARC/Instr.hs
@@ -108,6 +108,8 @@ instance Instruction Instr where
mkRegRegMoveInstr = sparc_mkRegRegMoveInstr
takeRegRegMoveInstr = sparc_takeRegRegMoveInstr
mkJumpInstr = sparc_mkJumpInstr
+ mkStackAllocInstr = panic "no sparc_mkStackAllocInstr"
+ mkStackDeallocInstr = panic "no sparc_mkStackDeallocInstr"
-- | SPARC instruction set.
diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs
index 7f0e48e769..7bd9b0cc9e 100644
--- a/compiler/nativeGen/X86/Instr.hs
+++ b/compiler/nativeGen/X86/Instr.hs
@@ -11,7 +11,7 @@
module X86.Instr (Instr(..), Operand(..),
getJumpDestBlockId, canShortcut, shortcutStatics,
- shortcutJump, i386_insert_ffrees,
+ shortcutJump, i386_insert_ffrees, allocMoreStack,
maxSpillSlots, archWordSize)
where
@@ -58,6 +58,8 @@ instance Instruction Instr where
mkRegRegMoveInstr = x86_mkRegRegMoveInstr
takeRegRegMoveInstr = x86_takeRegRegMoveInstr
mkJumpInstr = x86_mkJumpInstr
+ mkStackAllocInstr = x86_mkStackAllocInstr
+ mkStackDeallocInstr = x86_mkStackDeallocInstr
-- -----------------------------------------------------------------------------
@@ -620,14 +622,13 @@ x86_mkSpillInstr
-> Instr
x86_mkSpillInstr dflags reg delta slot
- = let off = spillSlotToOffset dflags slot
+ = let off = spillSlotToOffset dflags slot - delta
in
- let off_w = (off - delta) `div` (if is32Bit then 4 else 8)
- in case targetClassOfReg platform reg of
+ case targetClassOfReg platform reg of
RcInteger -> MOV (archWordSize is32Bit)
- (OpReg reg) (OpAddr (spRel dflags off_w))
- RcDouble -> GST FF80 reg (spRel dflags off_w) {- RcFloat/RcDouble -}
- RcDoubleSSE -> MOV FF64 (OpReg reg) (OpAddr (spRel dflags off_w))
+ (OpReg reg) (OpAddr (spRel dflags off))
+ RcDouble -> GST FF80 reg (spRel dflags off) {- RcFloat/RcDouble -}
+ RcDoubleSSE -> MOV FF64 (OpReg reg) (OpAddr (spRel dflags off))
_ -> panic "X86.mkSpillInstr: no match"
where platform = targetPlatform dflags
is32Bit = target32Bit platform
@@ -641,14 +642,13 @@ x86_mkLoadInstr
-> Instr
x86_mkLoadInstr dflags reg delta slot
- = let off = spillSlotToOffset dflags slot
+ = let off = spillSlotToOffset dflags slot - delta
in
- let off_w = (off-delta) `div` (if is32Bit then 4 else 8)
- in case targetClassOfReg platform reg of
+ case targetClassOfReg platform reg of
RcInteger -> MOV (archWordSize is32Bit)
- (OpAddr (spRel dflags off_w)) (OpReg reg)
- RcDouble -> GLD FF80 (spRel dflags off_w) reg {- RcFloat/RcDouble -}
- RcDoubleSSE -> MOV FF64 (OpAddr (spRel dflags off_w)) (OpReg reg)
+ (OpAddr (spRel dflags off)) (OpReg reg)
+ RcDouble -> GLD FF80 (spRel dflags off) reg {- RcFloat/RcDouble -}
+ RcDoubleSSE -> MOV FF64 (OpAddr (spRel dflags off)) (OpReg reg)
_ -> panic "X86.x86_mkLoadInstr"
where platform = targetPlatform dflags
is32Bit = target32Bit platform
@@ -666,12 +666,7 @@ maxSpillSlots dflags
-- the C stack pointer.
spillSlotToOffset :: DynFlags -> Int -> Int
spillSlotToOffset dflags slot
- | slot >= 0 && slot < maxSpillSlots dflags
= 64 + spillSlotSize dflags * slot
- | otherwise
- = pprPanic "spillSlotToOffset:"
- ( text "invalid spill location: " <> int slot
- $$ text "maxSpillSlots: " <> int (maxSpillSlots dflags))
--------------------------------------------------------------------------------
@@ -744,8 +739,25 @@ x86_mkJumpInstr id
= [JXX ALWAYS id]
-
-
+x86_mkStackAllocInstr
+ :: Platform
+ -> Int
+ -> Instr
+x86_mkStackAllocInstr platform amount
+ = case platformArch platform of
+ ArchX86 -> SUB II32 (OpImm (ImmInt amount)) (OpReg esp)
+ ArchX86_64 -> SUB II64 (OpImm (ImmInt amount)) (OpReg rsp)
+ _ -> panic "x86_mkStackAllocInstr"
+
+x86_mkStackDeallocInstr
+ :: Platform
+ -> Int
+ -> Instr
+x86_mkStackDeallocInstr platform amount
+ = case platformArch platform of
+ ArchX86 -> ADD II32 (OpImm (ImmInt amount)) (OpReg esp)
+ ArchX86_64 -> ADD II64 (OpImm (ImmInt amount)) (OpReg rsp)
+ _ -> panic "x86_mkStackDeallocInstr"
i386_insert_ffrees
:: [GenBasicBlock Instr]
@@ -753,18 +765,12 @@ i386_insert_ffrees
i386_insert_ffrees blocks
| or (map (any is_G_instr) [ instrs | BasicBlock _ instrs <- blocks ])
- = map ffree_before_nonlocal_transfers blocks
-
+ = map insertGFREEs blocks
| otherwise
= blocks
- where
- ffree_before_nonlocal_transfers (BasicBlock id insns)
- = BasicBlock id (foldr p [] insns)
- where p insn r = case insn of
- CALL _ _ -> GFREE : insn : r
- JMP _ _ -> GFREE : insn : r
- JXX_GBL _ _ -> panic "i386_insert_ffrees: cannot handle JXX_GBL"
- _ -> insn : r
+ where
+ insertGFREEs (BasicBlock id insns)
+ = BasicBlock id (insertBeforeNonlocalTransfers GFREE insns)
-- if you ever add a new FP insn to the fake x86 FP insn set,
-- you must update this too
@@ -796,6 +802,57 @@ is_G_instr instr
_ -> False
+--
+-- Note [extra spill slots]
+--
+-- If the register allocator used more spill slots than we have
+-- pre-allocated (rESERVED_C_STACK_BYTES), then we must allocate more
+-- C stack space on entry and exit from this proc. Therefore we
+-- insert a "sub $N, %rsp" at every entry point, and an "add $N, %rsp"
+-- before every non-local jump.
+--
+-- This became necessary when the new codegen started bundling entire
+-- functions together into one proc, because the register allocator
+-- assigns a different stack slot to each virtual reg within a proc.
+-- To avoid using so many slots we could also:
+--
+-- - split up the proc into connected components before code generator
+--
+-- - rename the virtual regs, so that we re-use vreg names and hence
+-- stack slots for non-overlapping vregs.
+--
+allocMoreStack
+ :: Platform
+ -> Int
+ -> NatCmmDecl statics X86.Instr.Instr
+ -> NatCmmDecl statics X86.Instr.Instr
+
+allocMoreStack _ _ top@(CmmData _ _) = top
+allocMoreStack platform amount (CmmProc info lbl (ListGraph code)) =
+ CmmProc info lbl (ListGraph (map insert_stack_insns code))
+ where
+ alloc = mkStackAllocInstr platform amount
+ dealloc = mkStackDeallocInstr platform amount
+
+ is_entry_point id = id `mapMember` info
+
+ insert_stack_insns (BasicBlock id insns)
+ | is_entry_point id = BasicBlock id (alloc : block')
+ | otherwise = BasicBlock id block'
+ where
+ block' = insertBeforeNonlocalTransfers dealloc insns
+
+
+insertBeforeNonlocalTransfers :: Instr -> [Instr] -> [Instr]
+insertBeforeNonlocalTransfers insert insns
+ = foldr p [] insns
+ where p insn r = case insn of
+ CALL _ _ -> insert : insn : r
+ JMP _ _ -> insert : insn : r
+ JXX_GBL _ _ -> panic "insertBeforeNonlocalTransfers: cannot handle JXX_GBL"
+ _ -> insn : r
+
+
data JumpDest = DestBlockId BlockId | DestImm Imm
getJumpDestBlockId :: JumpDest -> Maybe BlockId
diff --git a/compiler/nativeGen/X86/Regs.hs b/compiler/nativeGen/X86/Regs.hs
index 4eec96f5e1..6b2fe16855 100644
--- a/compiler/nativeGen/X86/Regs.hs
+++ b/compiler/nativeGen/X86/Regs.hs
@@ -196,13 +196,13 @@ addrModeRegs _ = []
spRel :: DynFlags
- -> Int -- ^ desired stack offset in words, positive or negative
+ -> Int -- ^ desired stack offset in bytes, positive or negative
-> AddrMode
spRel dflags n
| target32Bit (targetPlatform dflags)
- = AddrBaseIndex (EABaseReg esp) EAIndexNone (ImmInt (n * wORD_SIZE dflags))
+ = AddrBaseIndex (EABaseReg esp) EAIndexNone (ImmInt n)
| otherwise
- = AddrBaseIndex (EABaseReg rsp) EAIndexNone (ImmInt (n * wORD_SIZE dflags))
+ = AddrBaseIndex (EABaseReg rsp) EAIndexNone (ImmInt n)
-- The register numbers must fit into 32 bits on x86, so that we can
-- use a Word32 to represent the set of free registers in the register