summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIan Lynagh <ian@well-typed.com>2012-09-20 18:05:01 +0100
committerIan Lynagh <ian@well-typed.com>2012-09-20 18:05:01 +0100
commitb92c76ec5703a216b0d5553e037da6f66932a82e (patch)
treed6276b2196a9e0abd77855a919e7ea710aef2768
parent85a8f79f70cb9d94c9fca9e03ae98f596be8a48c (diff)
parenta9109703c5994a0de97236184672095d4605ae7d (diff)
downloadhaskell-b92c76ec5703a216b0d5553e037da6f66932a82e.tar.gz
Merge branch 'master' of darcs.haskell.org:/srv/darcs//ghc
-rw-r--r--compiler/cmm/CmmLayoutStack.hs4
-rw-r--r--compiler/cmm/CmmPipeline.hs3
-rw-r--r--compiler/cmm/CmmProcPoint.hs35
-rw-r--r--compiler/cmm/MkGraph.hs44
-rw-r--r--compiler/codeGen/StgCmmLayout.hs4
-rw-r--r--compiler/nativeGen/AsmCodeGen.lhs70
-rw-r--r--compiler/nativeGen/Instruction.hs13
-rw-r--r--compiler/nativeGen/PPC/Instr.hs2
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/Main.hs25
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/StackMap.hs39
-rw-r--r--compiler/nativeGen/RegAlloc/Liveness.hs5
-rw-r--r--compiler/nativeGen/SPARC/Instr.hs2
-rw-r--r--compiler/nativeGen/X86/Instr.hs117
-rw-r--r--compiler/nativeGen/X86/Regs.hs6
14 files changed, 247 insertions, 122 deletions
diff --git a/compiler/cmm/CmmLayoutStack.hs b/compiler/cmm/CmmLayoutStack.hs
index 5505b92f5a..b4ca273d76 100644
--- a/compiler/cmm/CmmLayoutStack.hs
+++ b/compiler/cmm/CmmLayoutStack.hs
@@ -5,6 +5,7 @@ module CmmLayoutStack (
import StgCmmUtils ( callerSaveVolatileRegs ) -- XXX
import StgCmmForeign ( saveThreadState, loadThreadState ) -- XXX
+import StgCmmLayout ( entryCode ) -- XXX
import Cmm
import BlockId
@@ -939,7 +940,8 @@ lowerSafeForeignCall dflags block
-- received an exception during the call, then the stack might be
-- different. Hence we continue by jumping to the top stack frame,
-- not by jumping to succ.
- jump = CmmCall { cml_target = CmmLoad (CmmReg spReg) (bWord dflags)
+ jump = CmmCall { cml_target = entryCode dflags $
+ CmmLoad (CmmReg spReg) (bWord dflags)
, cml_cont = Just succ
, cml_args_regs = regs
, cml_args = widthInBytes (wordWidth dflags)
diff --git a/compiler/cmm/CmmPipeline.hs b/compiler/cmm/CmmPipeline.hs
index 25fda1ca07..5fca9e7164 100644
--- a/compiler/cmm/CmmPipeline.hs
+++ b/compiler/cmm/CmmPipeline.hs
@@ -114,7 +114,8 @@ cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}})
procPointAnalysis proc_points g
dumpWith dflags Opt_D_dump_cmmz_procmap "procpoint map" pp_map
gs <- {-# SCC "splitAtProcPoints" #-} runUniqSM $
- splitAtProcPoints l call_pps proc_points pp_map (CmmProc h l g)
+ splitAtProcPoints dflags l call_pps proc_points pp_map
+ (CmmProc h l g)
dumps Opt_D_dump_cmmz_split "Post splitting" gs
------------- Populate info tables with stack info -----------------
diff --git a/compiler/cmm/CmmProcPoint.hs b/compiler/cmm/CmmProcPoint.hs
index 58f2e54ffa..471faf8b0c 100644
--- a/compiler/cmm/CmmProcPoint.hs
+++ b/compiler/cmm/CmmProcPoint.hs
@@ -11,6 +11,7 @@ where
import Prelude hiding (last, unzip, succ, zip)
+import DynFlags
import BlockId
import CLabel
import Cmm
@@ -26,8 +27,6 @@ import UniqSupply
import Hoopl
-import qualified Data.Map as Map
-
-- Compute a minimal set of proc points for a control-flow graph.
-- Determine a protocol for each proc point (which live variables will
@@ -207,9 +206,9 @@ extendPPSet platform g blocks procPoints =
-- Input invariant: A block should only be reachable from a single ProcPoint.
-- ToDo: use the _ret naming convention that the old code generator
-- used. -- EZY
-splitAtProcPoints :: CLabel -> ProcPointSet-> ProcPointSet -> BlockEnv Status ->
+splitAtProcPoints :: DynFlags -> CLabel -> ProcPointSet-> ProcPointSet -> BlockEnv Status ->
CmmDecl -> UniqSM [CmmDecl]
-splitAtProcPoints entry_label callPPs procPoints procMap
+splitAtProcPoints dflags entry_label callPPs procPoints procMap
(CmmProc (TopInfo {info_tbls = info_tbls})
top_l g@(CmmGraph {g_entry=entry})) =
do -- Build a map from procpoints to the blocks they reach
@@ -234,12 +233,15 @@ splitAtProcPoints entry_label callPPs procPoints procMap
-- * Labels for the info tables of their new procedures (only if
-- the proc point is a callPP)
-- Due to common blockification, we may overestimate the set of procpoints.
- let add_label map pp = Map.insert pp lbls map
+ let add_label map pp = mapInsert pp lbls map
where lbls | pp == entry = (entry_label, Just (toInfoLbl entry_label))
| otherwise = (blockLbl pp, guard (setMember pp callPPs) >>
Just (infoTblLbl pp))
- procLabels = foldl add_label Map.empty
+
+ procLabels :: LabelMap (CLabel, Maybe CLabel)
+ procLabels = foldl add_label mapEmpty
(filter (flip mapMember (toBlockMap g)) (setElems procPoints))
+
-- In each new graph, add blocks jumping off to the new procedures,
-- and replace branches to procpoints with branches to the jump-off blocks
let add_jump_block (env, bs) (pp, l) =
@@ -259,8 +261,17 @@ splitAtProcPoints entry_label callPPs procPoints procMap
CmmCondBranch _ ti fi -> add_if_pp ti (add_if_pp fi rst)
CmmSwitch _ tbl -> foldr add_if_pp rst (catMaybes tbl)
_ -> rst
- add_if_pp id rst = case Map.lookup id procLabels of
- Just (lbl, mb_info_lbl) -> (id, mb_info_lbl `orElse` lbl) : rst
+
+ -- when jumping to a PP that has an info table, if
+ -- tablesNextToCode is off we must jump to the entry
+ -- label instead.
+ jump_label (Just info_lbl) _
+ | tablesNextToCode dflags = info_lbl
+ | otherwise = toEntryLbl info_lbl
+ jump_label Nothing block_lbl = block_lbl
+
+ add_if_pp id rst = case mapLookup id procLabels of
+ Just (lbl, mb_info_lbl) -> (id, jump_label mb_info_lbl lbl) : rst
Nothing -> rst
(jumpEnv, jumpBlocks) <-
foldM add_jump_block (mapEmpty, []) needed_jumps
@@ -274,8 +285,10 @@ splitAtProcPoints entry_label callPPs procPoints procMap
let g' = ofBlockMap ppId blockEnv'''
-- pprTrace "g' pre jumps" (ppr g') $ do
return (mapInsert ppId g' newGraphEnv)
+
graphEnv <- foldM add_jumps emptyBlockMap $ mapToList graphEnv
- let to_proc (bid, g) = case expectJust "pp label" $ Map.lookup bid procLabels of
+
+ let to_proc (bid, g) = case expectJust "pp label" $ mapLookup bid procLabels of
(lbl, Just info_lbl)
| bid == entry
-> CmmProc (TopInfo {info_tbls=info_tbls, stack_info=stack_info})
@@ -295,7 +308,7 @@ splitAtProcPoints entry_label callPPs procPoints procMap
replacePPIds g = {-# SCC "replacePPIds" #-}
mapGraphNodes (id, mapExp repl, mapExp repl) g
where repl e@(CmmLit (CmmBlock bid)) =
- case Map.lookup bid procLabels of
+ case mapLookup bid procLabels of
Just (_, Just info_lbl) -> CmmLit (CmmLabel info_lbl)
_ -> e
repl e = e
@@ -312,7 +325,7 @@ splitAtProcPoints entry_label callPPs procPoints procMap
return -- pprTrace "procLabels" (ppr procLabels)
-- pprTrace "splitting graphs" (ppr procs)
procs
-splitAtProcPoints _ _ _ _ t@(CmmData _ _) = return [t]
+splitAtProcPoints _ _ _ _ _ t@(CmmData _ _) = return [t]
-- Only called from CmmProcPoint.splitAtProcPoints. NB. does a
diff --git a/compiler/cmm/MkGraph.hs b/compiler/cmm/MkGraph.hs
index 3233dbed8c..4ba82cd8f8 100644
--- a/compiler/cmm/MkGraph.hs
+++ b/compiler/cmm/MkGraph.hs
@@ -11,7 +11,7 @@ module MkGraph
, mkJumpReturnsTo
, mkJump, mkDirectJump, mkForeignJump, mkForeignJumpExtra, mkJumpGC
, mkCbranch, mkSwitch
- , mkReturn, mkReturnSimple, mkComment, mkCallEntry, mkBranch
+ , mkReturn, mkComment, mkCallEntry, mkBranch
, copyInOflow, copyOutOflow
, noExtraStack
, toCall, Transfer(..)
@@ -69,34 +69,38 @@ flattenCmmAGraph id stmts =
CmmGraph { g_entry = id,
g_graph = GMany NothingO body NothingO }
where
- blocks = flatten1 (fromOL stmts) (blockJoinHead (CmmEntry id) emptyBlock) []
- body = foldr addBlock emptyBody blocks
+ body = foldr addBlock emptyBody $ flatten id stmts []
--
- -- flatten: turn a list of CgStmt into a list of Blocks. We know
- -- that any code before the first label is unreachable, so just drop
- -- it.
+ -- flatten: given an entry label and a CmmAGraph, make a list of blocks.
--
-- NB. avoid the quadratic-append trap by passing in the tail of the
-- list. This is important for Very Long Functions (e.g. in T783).
--
- flatten :: [CgStmt] -> [Block CmmNode C C] -> [Block CmmNode C C]
- flatten [] blocks = blocks
+ flatten :: Label -> CmmAGraph -> [Block CmmNode C C] -> [Block CmmNode C C]
+ flatten id g blocks
+ = flatten1 (fromOL g) (blockJoinHead (CmmEntry id) emptyBlock) blocks
- flatten (CgLabel id : stmts) blocks
+ --
+ -- flatten0: we are outside a block at this point: any code before
+ -- the first label is unreachable, so just drop it.
+ --
+ flatten0 :: [CgStmt] -> [Block CmmNode C C] -> [Block CmmNode C C]
+ flatten0 [] blocks = blocks
+
+ flatten0 (CgLabel id : stmts) blocks
= flatten1 stmts block blocks
where !block = blockJoinHead (CmmEntry id) emptyBlock
- flatten (CgFork fork_id stmts : rest) blocks
- = flatten1 (fromOL stmts) (blockJoinHead (CmmEntry fork_id) emptyBlock) $
- flatten rest blocks
+ flatten0 (CgFork fork_id stmts : rest) blocks
+ = flatten fork_id stmts $ flatten0 rest blocks
- flatten (CgLast _ : stmts) blocks = flatten stmts blocks
- flatten (CgStmt _ : stmts) blocks = flatten stmts blocks
+ flatten0 (CgLast _ : stmts) blocks = flatten0 stmts blocks
+ flatten0 (CgStmt _ : stmts) blocks = flatten0 stmts blocks
--
-- flatten1: we have a partial block, collect statements until the
- -- next last node to make a block, then call flatten to get the rest
+ -- next last node to make a block, then call flatten0 to get the rest
-- of the blocks
--
flatten1 :: [CgStmt] -> Block CmmNode C O
@@ -112,7 +116,7 @@ flattenCmmAGraph id stmts =
= blockJoinTail block (CmmBranch (entryLabel block)) : blocks
flatten1 (CgLast stmt : stmts) block blocks
- = block' : flatten stmts blocks
+ = block' : flatten0 stmts blocks
where !block' = blockJoinTail block stmt
flatten1 (CgStmt stmt : stmts) block blocks
@@ -120,8 +124,7 @@ flattenCmmAGraph id stmts =
where !block' = blockSnoc block stmt
flatten1 (CgFork fork_id stmts : rest) block blocks
- = flatten1 (fromOL stmts) (blockJoinHead (CmmEntry fork_id) emptyBlock) $
- flatten1 rest block blocks
+ = flatten fork_id stmts $ flatten1 rest block blocks
-- a label here means that we should start a new block, and the
-- current block should fall through to the new block.
@@ -228,11 +231,6 @@ mkReturn dflags e actuals updfr_off =
lastWithArgs dflags Ret Old NativeReturn actuals updfr_off $
toCall e Nothing updfr_off 0
-mkReturnSimple :: DynFlags -> [CmmActual] -> UpdFrameOffset -> CmmAGraph
-mkReturnSimple dflags actuals updfr_off =
- mkReturn dflags e actuals updfr_off
- where e = CmmLoad (CmmStackSlot Old updfr_off) (gcWord dflags)
-
mkBranch :: BlockId -> CmmAGraph
mkBranch bid = mkLast (CmmBranch bid)
diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs
index fa80edc96c..75d8d1c38f 100644
--- a/compiler/codeGen/StgCmmLayout.hs
+++ b/compiler/codeGen/StgCmmLayout.hs
@@ -85,7 +85,9 @@ emitReturn results
; case sequel of
Return _ ->
do { adjustHpBackwards
- ; emit (mkReturnSimple dflags results updfr_off) }
+ ; let e = CmmLoad (CmmStackSlot Old updfr_off) (gcWord dflags)
+ ; emit (mkReturn dflags (entryCode dflags e) results updfr_off)
+ }
AssignTo regs adjust ->
do { when adjust adjustHpBackwards
; emitMultiAssign regs results }
diff --git a/compiler/nativeGen/AsmCodeGen.lhs b/compiler/nativeGen/AsmCodeGen.lhs
index 8c608f1bf1..870d285390 100644
--- a/compiler/nativeGen/AsmCodeGen.lhs
+++ b/compiler/nativeGen/AsmCodeGen.lhs
@@ -133,16 +133,17 @@ The machine-dependent bits break down as follows:
data NcgImpl statics instr jumpDest = NcgImpl {
cmmTopCodeGen :: RawCmmDecl -> NatM [NatCmmDecl statics instr],
- generateJumpTableForInstr :: DynFlags -> instr -> Maybe (NatCmmDecl statics instr),
+ generateJumpTableForInstr :: instr -> Maybe (NatCmmDecl statics instr),
getJumpDestBlockId :: jumpDest -> Maybe BlockId,
canShortcut :: instr -> Maybe jumpDest,
shortcutStatics :: (BlockId -> Maybe jumpDest) -> statics -> statics,
shortcutJump :: (BlockId -> Maybe jumpDest) -> instr -> instr,
pprNatCmmDecl :: NatCmmDecl statics instr -> SDoc,
- maxSpillSlots :: DynFlags -> Int,
- allocatableRegs :: Platform -> [RealReg],
+ maxSpillSlots :: Int,
+ allocatableRegs :: [RealReg],
ncg_x86fp_kludge :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr],
ncgExpandTop :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr],
+ ncgAllocMoreStack :: Int -> NatCmmDecl statics instr -> NatCmmDecl statics instr,
ncgMakeFarBranches :: [NatBasicBlock instr] -> [NatBasicBlock instr]
}
@@ -154,15 +155,16 @@ nativeCodeGen dflags h us cmms
nCG' ncgImpl = nativeCodeGen' dflags ncgImpl h us cmms
x86NcgImpl = NcgImpl {
cmmTopCodeGen = X86.CodeGen.cmmTopCodeGen
- ,generateJumpTableForInstr = X86.CodeGen.generateJumpTableForInstr
+ ,generateJumpTableForInstr = X86.CodeGen.generateJumpTableForInstr dflags
,getJumpDestBlockId = X86.Instr.getJumpDestBlockId
,canShortcut = X86.Instr.canShortcut
,shortcutStatics = X86.Instr.shortcutStatics
,shortcutJump = X86.Instr.shortcutJump
,pprNatCmmDecl = X86.Ppr.pprNatCmmDecl
- ,maxSpillSlots = X86.Instr.maxSpillSlots
- ,allocatableRegs = X86.Regs.allocatableRegs
+ ,maxSpillSlots = X86.Instr.maxSpillSlots dflags
+ ,allocatableRegs = X86.Regs.allocatableRegs platform
,ncg_x86fp_kludge = id
+ ,ncgAllocMoreStack = X86.Instr.allocMoreStack platform
,ncgExpandTop = id
,ncgMakeFarBranches = id
}
@@ -172,30 +174,32 @@ nativeCodeGen dflags h us cmms
ArchPPC ->
nCG' $ NcgImpl {
cmmTopCodeGen = PPC.CodeGen.cmmTopCodeGen
- ,generateJumpTableForInstr = PPC.CodeGen.generateJumpTableForInstr
+ ,generateJumpTableForInstr = PPC.CodeGen.generateJumpTableForInstr dflags
,getJumpDestBlockId = PPC.RegInfo.getJumpDestBlockId
,canShortcut = PPC.RegInfo.canShortcut
,shortcutStatics = PPC.RegInfo.shortcutStatics
,shortcutJump = PPC.RegInfo.shortcutJump
,pprNatCmmDecl = PPC.Ppr.pprNatCmmDecl
- ,maxSpillSlots = PPC.Instr.maxSpillSlots
- ,allocatableRegs = PPC.Regs.allocatableRegs
+ ,maxSpillSlots = PPC.Instr.maxSpillSlots dflags
+ ,allocatableRegs = PPC.Regs.allocatableRegs platform
,ncg_x86fp_kludge = id
+ ,ncgAllocMoreStack = noAllocMoreStack
,ncgExpandTop = id
,ncgMakeFarBranches = makeFarBranches
}
ArchSPARC ->
nCG' $ NcgImpl {
cmmTopCodeGen = SPARC.CodeGen.cmmTopCodeGen
- ,generateJumpTableForInstr = SPARC.CodeGen.generateJumpTableForInstr
+ ,generateJumpTableForInstr = SPARC.CodeGen.generateJumpTableForInstr dflags
,getJumpDestBlockId = SPARC.ShortcutJump.getJumpDestBlockId
,canShortcut = SPARC.ShortcutJump.canShortcut
,shortcutStatics = SPARC.ShortcutJump.shortcutStatics
,shortcutJump = SPARC.ShortcutJump.shortcutJump
,pprNatCmmDecl = SPARC.Ppr.pprNatCmmDecl
- ,maxSpillSlots = SPARC.Instr.maxSpillSlots
- ,allocatableRegs = \_ -> SPARC.Regs.allocatableRegs
+ ,maxSpillSlots = SPARC.Instr.maxSpillSlots dflags
+ ,allocatableRegs = SPARC.Regs.allocatableRegs
,ncg_x86fp_kludge = id
+ ,ncgAllocMoreStack = noAllocMoreStack
,ncgExpandTop = map SPARC.CodeGen.Expand.expandTop
,ncgMakeFarBranches = id
}
@@ -206,6 +210,23 @@ nativeCodeGen dflags h us cmms
ArchUnknown ->
panic "nativeCodeGen: No NCG for unknown arch"
+
+--
+-- Allocating more stack space for spilling is currently only
+-- supported for the linear register allocator on x86/x86_64, the rest
+-- default to the panic below. To support allocating extra stack on
+-- more platforms provide a definition of ncgAllocMoreStack.
+--
+noAllocMoreStack :: Int -> NatCmmDecl statics instr -> NatCmmDecl statics instr
+noAllocMoreStack amount _
+ = panic $ "Register allocator: out of stack slots (need " ++ show amount ++ ")\n"
+ ++ " If you are trying to compile SHA1.hs from the crypto library then this\n"
+ ++ " is a known limitation in the linear allocator.\n"
+ ++ "\n"
+ ++ " Try enabling the graph colouring allocator with -fregs-graph instead."
+ ++ " You can still file a bug report if you like.\n"
+
+
nativeCodeGen' :: (Outputable statics, Outputable instr, Instruction instr)
=> DynFlags
-> NcgImpl statics instr jumpDest
@@ -419,7 +440,7 @@ cmmNativeGen dflags ncgImpl us cmm count
= foldr (\r -> plusUFM_C unionUniqSets
$ unitUFM (targetClassOfRealReg platform r) (unitUniqSet r))
emptyUFM
- $ allocatableRegs ncgImpl platform
+ $ allocatableRegs ncgImpl
-- do the graph coloring register allocation
let ((alloced, regAllocStats), usAlloc)
@@ -428,7 +449,7 @@ cmmNativeGen dflags ncgImpl us cmm count
$ Color.regAlloc
dflags
alloc_regs
- (mkUniqSet [0 .. maxSpillSlots ncgImpl dflags])
+ (mkUniqSet [0 .. maxSpillSlots ncgImpl])
withLiveness
-- dump out what happened during register allocation
@@ -457,11 +478,20 @@ cmmNativeGen dflags ncgImpl us cmm count
else do
-- do linear register allocation
+ let reg_alloc proc = do
+ (alloced, maybe_more_stack, ra_stats) <-
+ Linear.regAlloc dflags proc
+ case maybe_more_stack of
+ Nothing -> return ( alloced, ra_stats )
+ Just amount ->
+ return ( ncgAllocMoreStack ncgImpl amount alloced
+ , ra_stats )
+
let ((alloced, regAllocStats), usAlloc)
= {-# SCC "RegAlloc" #-}
initUs usLive
$ liftM unzip
- $ mapM (Linear.regAlloc dflags) withLiveness
+ $ mapM reg_alloc withLiveness
dumpIfSet_dyn dflags
Opt_D_dump_asm_regalloc "Registers allocated"
@@ -490,7 +520,7 @@ cmmNativeGen dflags ncgImpl us cmm count
---- generate jump tables
let tabled =
{-# SCC "generateJumpTables" #-}
- generateJumpTables dflags ncgImpl kludged
+ generateJumpTables ncgImpl kludged
---- shortcut branches
let shorted =
@@ -711,12 +741,12 @@ makeFarBranches blocks
-- Analyzes all native code and generates data sections for all jump
-- table instructions.
generateJumpTables
- :: DynFlags -> NcgImpl statics instr jumpDest
- -> [NatCmmDecl statics instr] -> [NatCmmDecl statics instr]
-generateJumpTables dflags ncgImpl xs = concatMap f xs
+ :: NcgImpl statics instr jumpDest
+ -> [NatCmmDecl statics instr] -> [NatCmmDecl statics instr]
+generateJumpTables ncgImpl xs = concatMap f xs
where f p@(CmmProc _ _ (ListGraph xs)) = p : concatMap g xs
f p = [p]
- g (BasicBlock _ xs) = catMaybes (map (generateJumpTableForInstr ncgImpl dflags) xs)
+ g (BasicBlock _ xs) = catMaybes (map (generateJumpTableForInstr ncgImpl) xs)
-- -----------------------------------------------------------------------------
-- Shortcut branches
diff --git a/compiler/nativeGen/Instruction.hs b/compiler/nativeGen/Instruction.hs
index 64ba32c6dc..86f5ae435d 100644
--- a/compiler/nativeGen/Instruction.hs
+++ b/compiler/nativeGen/Instruction.hs
@@ -163,3 +163,16 @@ class Instruction instr where
-> [instr]
+ -- Subtract an amount from the C stack pointer
+ mkStackAllocInstr
+ :: Platform -- TODO: remove (needed by x86/x86_64
+ -- because they share an Instr type)
+ -> Int
+ -> instr
+
+ -- Add an amount to the C stack pointer
+ mkStackDeallocInstr
+ :: Platform -- TODO: remove (needed by x86/x86_64
+ -- because they share an Instr type)
+ -> Int
+ -> instr
diff --git a/compiler/nativeGen/PPC/Instr.hs b/compiler/nativeGen/PPC/Instr.hs
index 464a88a08b..1f5e809abb 100644
--- a/compiler/nativeGen/PPC/Instr.hs
+++ b/compiler/nativeGen/PPC/Instr.hs
@@ -64,6 +64,8 @@ instance Instruction Instr where
mkRegRegMoveInstr _ = ppc_mkRegRegMoveInstr
takeRegRegMoveInstr = ppc_takeRegRegMoveInstr
mkJumpInstr = ppc_mkJumpInstr
+ mkStackAllocInstr = panic "no ppc_mkStackAllocInstr"
+ mkStackDeallocInstr = panic "no ppc_mkStackDeallocInstr"
-- -----------------------------------------------------------------------------
diff --git a/compiler/nativeGen/RegAlloc/Linear/Main.hs b/compiler/nativeGen/RegAlloc/Linear/Main.hs
index 3f92ed975b..a15bca07e7 100644
--- a/compiler/nativeGen/RegAlloc/Linear/Main.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/Main.hs
@@ -139,22 +139,27 @@ regAlloc
:: (Outputable instr, Instruction instr)
=> DynFlags
-> LiveCmmDecl statics instr
- -> UniqSM (NatCmmDecl statics instr, Maybe RegAllocStats)
+ -> UniqSM ( NatCmmDecl statics instr
+ , Maybe Int -- number of extra stack slots required,
+ -- beyond maxSpillSlots
+ , Maybe RegAllocStats)
regAlloc _ (CmmData sec d)
= return
( CmmData sec d
+ , Nothing
, Nothing )
regAlloc _ (CmmProc (LiveInfo info _ _ _) lbl [])
= return ( CmmProc info lbl (ListGraph [])
+ , Nothing
, Nothing )
regAlloc dflags (CmmProc static lbl sccs)
| LiveInfo info (Just first_id) (Just block_live) _ <- static
= do
-- do register allocation on each component.
- (final_blocks, stats)
+ (final_blocks, stats, stack_use)
<- linearRegAlloc dflags first_id block_live sccs
-- make sure the block that was first in the input list
@@ -162,7 +167,15 @@ regAlloc dflags (CmmProc static lbl sccs)
let ((first':_), rest')
= partition ((== first_id) . blockId) final_blocks
+ let max_spill_slots = maxSpillSlots dflags
+ extra_stack
+ | stack_use > max_spill_slots
+ = Just (stack_use - max_spill_slots)
+ | otherwise
+ = Nothing
+
return ( CmmProc info lbl (ListGraph (first' : rest'))
+ , extra_stack
, Just stats)
-- bogus. to make non-exhaustive match warning go away.
@@ -184,7 +197,7 @@ linearRegAlloc
-> BlockId -- ^ the first block
-> BlockMap RegSet -- ^ live regs on entry to each basic block
-> [SCC (LiveBasicBlock instr)] -- ^ instructions annotated with "deaths"
- -> UniqSM ([NatBasicBlock instr], RegAllocStats)
+ -> UniqSM ([NatBasicBlock instr], RegAllocStats, Int)
linearRegAlloc dflags first_id block_live sccs
= let platform = targetPlatform dflags
@@ -204,14 +217,14 @@ linearRegAlloc'
-> BlockId -- ^ the first block
-> BlockMap RegSet -- ^ live regs on entry to each basic block
-> [SCC (LiveBasicBlock instr)] -- ^ instructions annotated with "deaths"
- -> UniqSM ([NatBasicBlock instr], RegAllocStats)
+ -> UniqSM ([NatBasicBlock instr], RegAllocStats, Int)
linearRegAlloc' dflags initFreeRegs first_id block_live sccs
= do us <- getUs
- let (_, _, stats, blocks) =
+ let (_, stack, stats, blocks) =
runR dflags emptyBlockMap initFreeRegs emptyRegMap (emptyStackMap dflags) us
$ linearRA_SCCs first_id block_live [] sccs
- return (blocks, stats)
+ return (blocks, stats, getStackUse stack)
linearRA_SCCs :: (FR freeRegs, Instruction instr, Outputable instr)
diff --git a/compiler/nativeGen/RegAlloc/Linear/StackMap.hs b/compiler/nativeGen/RegAlloc/Linear/StackMap.hs
index b1fc3c169e..69cf411751 100644
--- a/compiler/nativeGen/RegAlloc/Linear/StackMap.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/StackMap.hs
@@ -21,15 +21,13 @@ module RegAlloc.Linear.StackMap (
StackSlot,
StackMap(..),
emptyStackMap,
- getStackSlotFor
+ getStackSlotFor,
+ getStackUse
)
where
-import RegAlloc.Linear.FreeRegs
-
import DynFlags
-import Outputable
import UniqFM
import Unique
@@ -40,7 +38,7 @@ type StackSlot = Int
data StackMap
= StackMap
{ -- | The slots that are still available to be allocated.
- stackMapFreeSlots :: [StackSlot]
+ stackMapNextFreeSlot :: !Int
-- | Assignment of vregs to stack slots.
, stackMapAssignment :: UniqFM StackSlot }
@@ -48,7 +46,7 @@ data StackMap
-- | An empty stack map, with all slots available.
emptyStackMap :: DynFlags -> StackMap
-emptyStackMap dflags = StackMap [0 .. maxSpillSlots dflags] emptyUFM
+emptyStackMap _ = StackMap 0 emptyUFM
-- | If this vreg unique already has a stack assignment then return the slot number,
@@ -56,24 +54,13 @@ emptyStackMap dflags = StackMap [0 .. maxSpillSlots dflags] emptyUFM
--
getStackSlotFor :: StackMap -> Unique -> (StackMap, Int)
-getStackSlotFor (StackMap [] _) _
-
- -- This happens all the time when trying to compile darcs' SHA1.hs, see Track #1993
- -- SHA1.lhs has also been added to the Crypto library on Hackage,
- -- so we see this all the time.
- --
- -- It would be better to automatically invoke the graph allocator, or do something
- -- else besides panicing, but that's a job for a different day. -- BL 2009/02
- --
- = panic $ "RegAllocLinear.getStackSlotFor: out of stack slots\n"
- ++ " If you are trying to compile SHA1.hs from the crypto library then this\n"
- ++ " is a known limitation in the linear allocator.\n"
- ++ "\n"
- ++ " Try enabling the graph colouring allocator with -fregs-graph instead."
- ++ " You can still file a bug report if you like.\n"
-
-getStackSlotFor fs@(StackMap (freeSlot:stack') reserved) reg =
- case lookupUFM reserved reg of
- Just slot -> (fs, slot)
- Nothing -> (StackMap stack' (addToUFM reserved reg freeSlot), freeSlot)
+getStackSlotFor fs@(StackMap _ reserved) reg
+ | Just slot <- lookupUFM reserved reg = (fs, slot)
+
+getStackSlotFor (StackMap freeSlot reserved) reg =
+ (StackMap (freeSlot+1) (addToUFM reserved reg freeSlot), freeSlot)
+
+-- | Return the number of stack slots that were allocated
+getStackUse :: StackMap -> Int
+getStackUse (StackMap freeSlot _) = freeSlot
diff --git a/compiler/nativeGen/RegAlloc/Liveness.hs b/compiler/nativeGen/RegAlloc/Liveness.hs
index 85b8f969eb..608f0a423b 100644
--- a/compiler/nativeGen/RegAlloc/Liveness.hs
+++ b/compiler/nativeGen/RegAlloc/Liveness.hs
@@ -136,6 +136,11 @@ instance Instruction instr => Instruction (InstrSR instr) where
mkJumpInstr target = map Instr (mkJumpInstr target)
+ mkStackAllocInstr platform amount =
+ Instr (mkStackAllocInstr platform amount)
+
+ mkStackDeallocInstr platform amount =
+ Instr (mkStackDeallocInstr platform amount)
-- | An instruction with liveness information.
diff --git a/compiler/nativeGen/SPARC/Instr.hs b/compiler/nativeGen/SPARC/Instr.hs
index 9404badea6..f55c660118 100644
--- a/compiler/nativeGen/SPARC/Instr.hs
+++ b/compiler/nativeGen/SPARC/Instr.hs
@@ -108,6 +108,8 @@ instance Instruction Instr where
mkRegRegMoveInstr = sparc_mkRegRegMoveInstr
takeRegRegMoveInstr = sparc_takeRegRegMoveInstr
mkJumpInstr = sparc_mkJumpInstr
+ mkStackAllocInstr = panic "no sparc_mkStackAllocInstr"
+ mkStackDeallocInstr = panic "no sparc_mkStackDeallocInstr"
-- | SPARC instruction set.
diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs
index 7f0e48e769..7bd9b0cc9e 100644
--- a/compiler/nativeGen/X86/Instr.hs
+++ b/compiler/nativeGen/X86/Instr.hs
@@ -11,7 +11,7 @@
module X86.Instr (Instr(..), Operand(..),
getJumpDestBlockId, canShortcut, shortcutStatics,
- shortcutJump, i386_insert_ffrees,
+ shortcutJump, i386_insert_ffrees, allocMoreStack,
maxSpillSlots, archWordSize)
where
@@ -58,6 +58,8 @@ instance Instruction Instr where
mkRegRegMoveInstr = x86_mkRegRegMoveInstr
takeRegRegMoveInstr = x86_takeRegRegMoveInstr
mkJumpInstr = x86_mkJumpInstr
+ mkStackAllocInstr = x86_mkStackAllocInstr
+ mkStackDeallocInstr = x86_mkStackDeallocInstr
-- -----------------------------------------------------------------------------
@@ -620,14 +622,13 @@ x86_mkSpillInstr
-> Instr
x86_mkSpillInstr dflags reg delta slot
- = let off = spillSlotToOffset dflags slot
+ = let off = spillSlotToOffset dflags slot - delta
in
- let off_w = (off - delta) `div` (if is32Bit then 4 else 8)
- in case targetClassOfReg platform reg of
+ case targetClassOfReg platform reg of
RcInteger -> MOV (archWordSize is32Bit)
- (OpReg reg) (OpAddr (spRel dflags off_w))
- RcDouble -> GST FF80 reg (spRel dflags off_w) {- RcFloat/RcDouble -}
- RcDoubleSSE -> MOV FF64 (OpReg reg) (OpAddr (spRel dflags off_w))
+ (OpReg reg) (OpAddr (spRel dflags off))
+ RcDouble -> GST FF80 reg (spRel dflags off) {- RcFloat/RcDouble -}
+ RcDoubleSSE -> MOV FF64 (OpReg reg) (OpAddr (spRel dflags off))
_ -> panic "X86.mkSpillInstr: no match"
where platform = targetPlatform dflags
is32Bit = target32Bit platform
@@ -641,14 +642,13 @@ x86_mkLoadInstr
-> Instr
x86_mkLoadInstr dflags reg delta slot
- = let off = spillSlotToOffset dflags slot
+ = let off = spillSlotToOffset dflags slot - delta
in
- let off_w = (off-delta) `div` (if is32Bit then 4 else 8)
- in case targetClassOfReg platform reg of
+ case targetClassOfReg platform reg of
RcInteger -> MOV (archWordSize is32Bit)
- (OpAddr (spRel dflags off_w)) (OpReg reg)
- RcDouble -> GLD FF80 (spRel dflags off_w) reg {- RcFloat/RcDouble -}
- RcDoubleSSE -> MOV FF64 (OpAddr (spRel dflags off_w)) (OpReg reg)
+ (OpAddr (spRel dflags off)) (OpReg reg)
+ RcDouble -> GLD FF80 (spRel dflags off) reg {- RcFloat/RcDouble -}
+ RcDoubleSSE -> MOV FF64 (OpAddr (spRel dflags off)) (OpReg reg)
_ -> panic "X86.x86_mkLoadInstr"
where platform = targetPlatform dflags
is32Bit = target32Bit platform
@@ -666,12 +666,7 @@ maxSpillSlots dflags
-- the C stack pointer.
spillSlotToOffset :: DynFlags -> Int -> Int
spillSlotToOffset dflags slot
- | slot >= 0 && slot < maxSpillSlots dflags
= 64 + spillSlotSize dflags * slot
- | otherwise
- = pprPanic "spillSlotToOffset:"
- ( text "invalid spill location: " <> int slot
- $$ text "maxSpillSlots: " <> int (maxSpillSlots dflags))
--------------------------------------------------------------------------------
@@ -744,8 +739,25 @@ x86_mkJumpInstr id
= [JXX ALWAYS id]
-
-
+x86_mkStackAllocInstr
+ :: Platform
+ -> Int
+ -> Instr
+x86_mkStackAllocInstr platform amount
+ = case platformArch platform of
+ ArchX86 -> SUB II32 (OpImm (ImmInt amount)) (OpReg esp)
+ ArchX86_64 -> SUB II64 (OpImm (ImmInt amount)) (OpReg rsp)
+ _ -> panic "x86_mkStackAllocInstr"
+
+x86_mkStackDeallocInstr
+ :: Platform
+ -> Int
+ -> Instr
+x86_mkStackDeallocInstr platform amount
+ = case platformArch platform of
+ ArchX86 -> ADD II32 (OpImm (ImmInt amount)) (OpReg esp)
+ ArchX86_64 -> ADD II64 (OpImm (ImmInt amount)) (OpReg rsp)
+ _ -> panic "x86_mkStackDeallocInstr"
i386_insert_ffrees
:: [GenBasicBlock Instr]
@@ -753,18 +765,12 @@ i386_insert_ffrees
i386_insert_ffrees blocks
| or (map (any is_G_instr) [ instrs | BasicBlock _ instrs <- blocks ])
- = map ffree_before_nonlocal_transfers blocks
-
+ = map insertGFREEs blocks
| otherwise
= blocks
- where
- ffree_before_nonlocal_transfers (BasicBlock id insns)
- = BasicBlock id (foldr p [] insns)
- where p insn r = case insn of
- CALL _ _ -> GFREE : insn : r
- JMP _ _ -> GFREE : insn : r
- JXX_GBL _ _ -> panic "i386_insert_ffrees: cannot handle JXX_GBL"
- _ -> insn : r
+ where
+ insertGFREEs (BasicBlock id insns)
+ = BasicBlock id (insertBeforeNonlocalTransfers GFREE insns)
-- if you ever add a new FP insn to the fake x86 FP insn set,
-- you must update this too
@@ -796,6 +802,57 @@ is_G_instr instr
_ -> False
+--
+-- Note [extra spill slots]
+--
+-- If the register allocator used more spill slots than we have
+-- pre-allocated (rESERVED_C_STACK_BYTES), then we must allocate more
+-- C stack space on entry and exit from this proc. Therefore we
+-- insert a "sub $N, %rsp" at every entry point, and an "add $N, %rsp"
+-- before every non-local jump.
+--
+-- This became necessary when the new codegen started bundling entire
+-- functions together into one proc, because the register allocator
+-- assigns a different stack slot to each virtual reg within a proc.
+-- To avoid using so many slots we could also:
+--
+-- - split up the proc into connected components before code generator
+--
+-- - rename the virtual regs, so that we re-use vreg names and hence
+-- stack slots for non-overlapping vregs.
+--
+allocMoreStack
+ :: Platform
+ -> Int
+ -> NatCmmDecl statics X86.Instr.Instr
+ -> NatCmmDecl statics X86.Instr.Instr
+
+allocMoreStack _ _ top@(CmmData _ _) = top
+allocMoreStack platform amount (CmmProc info lbl (ListGraph code)) =
+ CmmProc info lbl (ListGraph (map insert_stack_insns code))
+ where
+ alloc = mkStackAllocInstr platform amount
+ dealloc = mkStackDeallocInstr platform amount
+
+ is_entry_point id = id `mapMember` info
+
+ insert_stack_insns (BasicBlock id insns)
+ | is_entry_point id = BasicBlock id (alloc : block')
+ | otherwise = BasicBlock id block'
+ where
+ block' = insertBeforeNonlocalTransfers dealloc insns
+
+
+insertBeforeNonlocalTransfers :: Instr -> [Instr] -> [Instr]
+insertBeforeNonlocalTransfers insert insns
+ = foldr p [] insns
+ where p insn r = case insn of
+ CALL _ _ -> insert : insn : r
+ JMP _ _ -> insert : insn : r
+ JXX_GBL _ _ -> panic "insertBeforeNonlocalTransfers: cannot handle JXX_GBL"
+ _ -> insn : r
+
+
data JumpDest = DestBlockId BlockId | DestImm Imm
getJumpDestBlockId :: JumpDest -> Maybe BlockId
diff --git a/compiler/nativeGen/X86/Regs.hs b/compiler/nativeGen/X86/Regs.hs
index 4eec96f5e1..6b2fe16855 100644
--- a/compiler/nativeGen/X86/Regs.hs
+++ b/compiler/nativeGen/X86/Regs.hs
@@ -196,13 +196,13 @@ addrModeRegs _ = []
spRel :: DynFlags
- -> Int -- ^ desired stack offset in words, positive or negative
+ -> Int -- ^ desired stack offset in bytes, positive or negative
-> AddrMode
spRel dflags n
| target32Bit (targetPlatform dflags)
- = AddrBaseIndex (EABaseReg esp) EAIndexNone (ImmInt (n * wORD_SIZE dflags))
+ = AddrBaseIndex (EABaseReg esp) EAIndexNone (ImmInt n)
| otherwise
- = AddrBaseIndex (EABaseReg rsp) EAIndexNone (ImmInt (n * wORD_SIZE dflags))
+ = AddrBaseIndex (EABaseReg rsp) EAIndexNone (ImmInt n)
-- The register numbers must fit into 32 bits on x86, so that we can
-- use a Word32 to represent the set of free registers in the register