diff options
author | Ian Lynagh <ian@well-typed.com> | 2012-09-20 18:05:01 +0100 |
---|---|---|
committer | Ian Lynagh <ian@well-typed.com> | 2012-09-20 18:05:01 +0100 |
commit | b92c76ec5703a216b0d5553e037da6f66932a82e (patch) | |
tree | d6276b2196a9e0abd77855a919e7ea710aef2768 | |
parent | 85a8f79f70cb9d94c9fca9e03ae98f596be8a48c (diff) | |
parent | a9109703c5994a0de97236184672095d4605ae7d (diff) | |
download | haskell-b92c76ec5703a216b0d5553e037da6f66932a82e.tar.gz |
Merge branch 'master' of darcs.haskell.org:/srv/darcs//ghc
-rw-r--r-- | compiler/cmm/CmmLayoutStack.hs | 4 | ||||
-rw-r--r-- | compiler/cmm/CmmPipeline.hs | 3 | ||||
-rw-r--r-- | compiler/cmm/CmmProcPoint.hs | 35 | ||||
-rw-r--r-- | compiler/cmm/MkGraph.hs | 44 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmLayout.hs | 4 | ||||
-rw-r--r-- | compiler/nativeGen/AsmCodeGen.lhs | 70 | ||||
-rw-r--r-- | compiler/nativeGen/Instruction.hs | 13 | ||||
-rw-r--r-- | compiler/nativeGen/PPC/Instr.hs | 2 | ||||
-rw-r--r-- | compiler/nativeGen/RegAlloc/Linear/Main.hs | 25 | ||||
-rw-r--r-- | compiler/nativeGen/RegAlloc/Linear/StackMap.hs | 39 | ||||
-rw-r--r-- | compiler/nativeGen/RegAlloc/Liveness.hs | 5 | ||||
-rw-r--r-- | compiler/nativeGen/SPARC/Instr.hs | 2 | ||||
-rw-r--r-- | compiler/nativeGen/X86/Instr.hs | 117 | ||||
-rw-r--r-- | compiler/nativeGen/X86/Regs.hs | 6 |
14 files changed, 247 insertions, 122 deletions
diff --git a/compiler/cmm/CmmLayoutStack.hs b/compiler/cmm/CmmLayoutStack.hs index 5505b92f5a..b4ca273d76 100644 --- a/compiler/cmm/CmmLayoutStack.hs +++ b/compiler/cmm/CmmLayoutStack.hs @@ -5,6 +5,7 @@ module CmmLayoutStack ( import StgCmmUtils ( callerSaveVolatileRegs ) -- XXX import StgCmmForeign ( saveThreadState, loadThreadState ) -- XXX +import StgCmmLayout ( entryCode ) -- XXX import Cmm import BlockId @@ -939,7 +940,8 @@ lowerSafeForeignCall dflags block -- received an exception during the call, then the stack might be -- different. Hence we continue by jumping to the top stack frame, -- not by jumping to succ. - jump = CmmCall { cml_target = CmmLoad (CmmReg spReg) (bWord dflags) + jump = CmmCall { cml_target = entryCode dflags $ + CmmLoad (CmmReg spReg) (bWord dflags) , cml_cont = Just succ , cml_args_regs = regs , cml_args = widthInBytes (wordWidth dflags) diff --git a/compiler/cmm/CmmPipeline.hs b/compiler/cmm/CmmPipeline.hs index 25fda1ca07..5fca9e7164 100644 --- a/compiler/cmm/CmmPipeline.hs +++ b/compiler/cmm/CmmPipeline.hs @@ -114,7 +114,8 @@ cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}}) procPointAnalysis proc_points g dumpWith dflags Opt_D_dump_cmmz_procmap "procpoint map" pp_map gs <- {-# SCC "splitAtProcPoints" #-} runUniqSM $ - splitAtProcPoints l call_pps proc_points pp_map (CmmProc h l g) + splitAtProcPoints dflags l call_pps proc_points pp_map + (CmmProc h l g) dumps Opt_D_dump_cmmz_split "Post splitting" gs ------------- Populate info tables with stack info ----------------- diff --git a/compiler/cmm/CmmProcPoint.hs b/compiler/cmm/CmmProcPoint.hs index 58f2e54ffa..471faf8b0c 100644 --- a/compiler/cmm/CmmProcPoint.hs +++ b/compiler/cmm/CmmProcPoint.hs @@ -11,6 +11,7 @@ where import Prelude hiding (last, unzip, succ, zip) +import DynFlags import BlockId import CLabel import Cmm @@ -26,8 +27,6 @@ import UniqSupply import Hoopl -import qualified Data.Map as Map - -- Compute a minimal set of proc points for a control-flow graph. -- Determine a protocol for each proc point (which live variables will @@ -207,9 +206,9 @@ extendPPSet platform g blocks procPoints = -- Input invariant: A block should only be reachable from a single ProcPoint. -- ToDo: use the _ret naming convention that the old code generator -- used. -- EZY -splitAtProcPoints :: CLabel -> ProcPointSet-> ProcPointSet -> BlockEnv Status -> +splitAtProcPoints :: DynFlags -> CLabel -> ProcPointSet-> ProcPointSet -> BlockEnv Status -> CmmDecl -> UniqSM [CmmDecl] -splitAtProcPoints entry_label callPPs procPoints procMap +splitAtProcPoints dflags entry_label callPPs procPoints procMap (CmmProc (TopInfo {info_tbls = info_tbls}) top_l g@(CmmGraph {g_entry=entry})) = do -- Build a map from procpoints to the blocks they reach @@ -234,12 +233,15 @@ splitAtProcPoints entry_label callPPs procPoints procMap -- * Labels for the info tables of their new procedures (only if -- the proc point is a callPP) -- Due to common blockification, we may overestimate the set of procpoints. - let add_label map pp = Map.insert pp lbls map + let add_label map pp = mapInsert pp lbls map where lbls | pp == entry = (entry_label, Just (toInfoLbl entry_label)) | otherwise = (blockLbl pp, guard (setMember pp callPPs) >> Just (infoTblLbl pp)) - procLabels = foldl add_label Map.empty + + procLabels :: LabelMap (CLabel, Maybe CLabel) + procLabels = foldl add_label mapEmpty (filter (flip mapMember (toBlockMap g)) (setElems procPoints)) + -- In each new graph, add blocks jumping off to the new procedures, -- and replace branches to procpoints with branches to the jump-off blocks let add_jump_block (env, bs) (pp, l) = @@ -259,8 +261,17 @@ splitAtProcPoints entry_label callPPs procPoints procMap CmmCondBranch _ ti fi -> add_if_pp ti (add_if_pp fi rst) CmmSwitch _ tbl -> foldr add_if_pp rst (catMaybes tbl) _ -> rst - add_if_pp id rst = case Map.lookup id procLabels of - Just (lbl, mb_info_lbl) -> (id, mb_info_lbl `orElse` lbl) : rst + + -- when jumping to a PP that has an info table, if + -- tablesNextToCode is off we must jump to the entry + -- label instead. + jump_label (Just info_lbl) _ + | tablesNextToCode dflags = info_lbl + | otherwise = toEntryLbl info_lbl + jump_label Nothing block_lbl = block_lbl + + add_if_pp id rst = case mapLookup id procLabels of + Just (lbl, mb_info_lbl) -> (id, jump_label mb_info_lbl lbl) : rst Nothing -> rst (jumpEnv, jumpBlocks) <- foldM add_jump_block (mapEmpty, []) needed_jumps @@ -274,8 +285,10 @@ splitAtProcPoints entry_label callPPs procPoints procMap let g' = ofBlockMap ppId blockEnv''' -- pprTrace "g' pre jumps" (ppr g') $ do return (mapInsert ppId g' newGraphEnv) + graphEnv <- foldM add_jumps emptyBlockMap $ mapToList graphEnv - let to_proc (bid, g) = case expectJust "pp label" $ Map.lookup bid procLabels of + + let to_proc (bid, g) = case expectJust "pp label" $ mapLookup bid procLabels of (lbl, Just info_lbl) | bid == entry -> CmmProc (TopInfo {info_tbls=info_tbls, stack_info=stack_info}) @@ -295,7 +308,7 @@ splitAtProcPoints entry_label callPPs procPoints procMap replacePPIds g = {-# SCC "replacePPIds" #-} mapGraphNodes (id, mapExp repl, mapExp repl) g where repl e@(CmmLit (CmmBlock bid)) = - case Map.lookup bid procLabels of + case mapLookup bid procLabels of Just (_, Just info_lbl) -> CmmLit (CmmLabel info_lbl) _ -> e repl e = e @@ -312,7 +325,7 @@ splitAtProcPoints entry_label callPPs procPoints procMap return -- pprTrace "procLabels" (ppr procLabels) -- pprTrace "splitting graphs" (ppr procs) procs -splitAtProcPoints _ _ _ _ t@(CmmData _ _) = return [t] +splitAtProcPoints _ _ _ _ _ t@(CmmData _ _) = return [t] -- Only called from CmmProcPoint.splitAtProcPoints. NB. does a diff --git a/compiler/cmm/MkGraph.hs b/compiler/cmm/MkGraph.hs index 3233dbed8c..4ba82cd8f8 100644 --- a/compiler/cmm/MkGraph.hs +++ b/compiler/cmm/MkGraph.hs @@ -11,7 +11,7 @@ module MkGraph , mkJumpReturnsTo , mkJump, mkDirectJump, mkForeignJump, mkForeignJumpExtra, mkJumpGC , mkCbranch, mkSwitch - , mkReturn, mkReturnSimple, mkComment, mkCallEntry, mkBranch + , mkReturn, mkComment, mkCallEntry, mkBranch , copyInOflow, copyOutOflow , noExtraStack , toCall, Transfer(..) @@ -69,34 +69,38 @@ flattenCmmAGraph id stmts = CmmGraph { g_entry = id, g_graph = GMany NothingO body NothingO } where - blocks = flatten1 (fromOL stmts) (blockJoinHead (CmmEntry id) emptyBlock) [] - body = foldr addBlock emptyBody blocks + body = foldr addBlock emptyBody $ flatten id stmts [] -- - -- flatten: turn a list of CgStmt into a list of Blocks. We know - -- that any code before the first label is unreachable, so just drop - -- it. + -- flatten: given an entry label and a CmmAGraph, make a list of blocks. -- -- NB. avoid the quadratic-append trap by passing in the tail of the -- list. This is important for Very Long Functions (e.g. in T783). -- - flatten :: [CgStmt] -> [Block CmmNode C C] -> [Block CmmNode C C] - flatten [] blocks = blocks + flatten :: Label -> CmmAGraph -> [Block CmmNode C C] -> [Block CmmNode C C] + flatten id g blocks + = flatten1 (fromOL g) (blockJoinHead (CmmEntry id) emptyBlock) blocks - flatten (CgLabel id : stmts) blocks + -- + -- flatten0: we are outside a block at this point: any code before + -- the first label is unreachable, so just drop it. + -- + flatten0 :: [CgStmt] -> [Block CmmNode C C] -> [Block CmmNode C C] + flatten0 [] blocks = blocks + + flatten0 (CgLabel id : stmts) blocks = flatten1 stmts block blocks where !block = blockJoinHead (CmmEntry id) emptyBlock - flatten (CgFork fork_id stmts : rest) blocks - = flatten1 (fromOL stmts) (blockJoinHead (CmmEntry fork_id) emptyBlock) $ - flatten rest blocks + flatten0 (CgFork fork_id stmts : rest) blocks + = flatten fork_id stmts $ flatten0 rest blocks - flatten (CgLast _ : stmts) blocks = flatten stmts blocks - flatten (CgStmt _ : stmts) blocks = flatten stmts blocks + flatten0 (CgLast _ : stmts) blocks = flatten0 stmts blocks + flatten0 (CgStmt _ : stmts) blocks = flatten0 stmts blocks -- -- flatten1: we have a partial block, collect statements until the - -- next last node to make a block, then call flatten to get the rest + -- next last node to make a block, then call flatten0 to get the rest -- of the blocks -- flatten1 :: [CgStmt] -> Block CmmNode C O @@ -112,7 +116,7 @@ flattenCmmAGraph id stmts = = blockJoinTail block (CmmBranch (entryLabel block)) : blocks flatten1 (CgLast stmt : stmts) block blocks - = block' : flatten stmts blocks + = block' : flatten0 stmts blocks where !block' = blockJoinTail block stmt flatten1 (CgStmt stmt : stmts) block blocks @@ -120,8 +124,7 @@ flattenCmmAGraph id stmts = where !block' = blockSnoc block stmt flatten1 (CgFork fork_id stmts : rest) block blocks - = flatten1 (fromOL stmts) (blockJoinHead (CmmEntry fork_id) emptyBlock) $ - flatten1 rest block blocks + = flatten fork_id stmts $ flatten1 rest block blocks -- a label here means that we should start a new block, and the -- current block should fall through to the new block. @@ -228,11 +231,6 @@ mkReturn dflags e actuals updfr_off = lastWithArgs dflags Ret Old NativeReturn actuals updfr_off $ toCall e Nothing updfr_off 0 -mkReturnSimple :: DynFlags -> [CmmActual] -> UpdFrameOffset -> CmmAGraph -mkReturnSimple dflags actuals updfr_off = - mkReturn dflags e actuals updfr_off - where e = CmmLoad (CmmStackSlot Old updfr_off) (gcWord dflags) - mkBranch :: BlockId -> CmmAGraph mkBranch bid = mkLast (CmmBranch bid) diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs index fa80edc96c..75d8d1c38f 100644 --- a/compiler/codeGen/StgCmmLayout.hs +++ b/compiler/codeGen/StgCmmLayout.hs @@ -85,7 +85,9 @@ emitReturn results ; case sequel of Return _ -> do { adjustHpBackwards - ; emit (mkReturnSimple dflags results updfr_off) } + ; let e = CmmLoad (CmmStackSlot Old updfr_off) (gcWord dflags) + ; emit (mkReturn dflags (entryCode dflags e) results updfr_off) + } AssignTo regs adjust -> do { when adjust adjustHpBackwards ; emitMultiAssign regs results } diff --git a/compiler/nativeGen/AsmCodeGen.lhs b/compiler/nativeGen/AsmCodeGen.lhs index 8c608f1bf1..870d285390 100644 --- a/compiler/nativeGen/AsmCodeGen.lhs +++ b/compiler/nativeGen/AsmCodeGen.lhs @@ -133,16 +133,17 @@ The machine-dependent bits break down as follows: data NcgImpl statics instr jumpDest = NcgImpl { cmmTopCodeGen :: RawCmmDecl -> NatM [NatCmmDecl statics instr], - generateJumpTableForInstr :: DynFlags -> instr -> Maybe (NatCmmDecl statics instr), + generateJumpTableForInstr :: instr -> Maybe (NatCmmDecl statics instr), getJumpDestBlockId :: jumpDest -> Maybe BlockId, canShortcut :: instr -> Maybe jumpDest, shortcutStatics :: (BlockId -> Maybe jumpDest) -> statics -> statics, shortcutJump :: (BlockId -> Maybe jumpDest) -> instr -> instr, pprNatCmmDecl :: NatCmmDecl statics instr -> SDoc, - maxSpillSlots :: DynFlags -> Int, - allocatableRegs :: Platform -> [RealReg], + maxSpillSlots :: Int, + allocatableRegs :: [RealReg], ncg_x86fp_kludge :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr], ncgExpandTop :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr], + ncgAllocMoreStack :: Int -> NatCmmDecl statics instr -> NatCmmDecl statics instr, ncgMakeFarBranches :: [NatBasicBlock instr] -> [NatBasicBlock instr] } @@ -154,15 +155,16 @@ nativeCodeGen dflags h us cmms nCG' ncgImpl = nativeCodeGen' dflags ncgImpl h us cmms x86NcgImpl = NcgImpl { cmmTopCodeGen = X86.CodeGen.cmmTopCodeGen - ,generateJumpTableForInstr = X86.CodeGen.generateJumpTableForInstr + ,generateJumpTableForInstr = X86.CodeGen.generateJumpTableForInstr dflags ,getJumpDestBlockId = X86.Instr.getJumpDestBlockId ,canShortcut = X86.Instr.canShortcut ,shortcutStatics = X86.Instr.shortcutStatics ,shortcutJump = X86.Instr.shortcutJump ,pprNatCmmDecl = X86.Ppr.pprNatCmmDecl - ,maxSpillSlots = X86.Instr.maxSpillSlots - ,allocatableRegs = X86.Regs.allocatableRegs + ,maxSpillSlots = X86.Instr.maxSpillSlots dflags + ,allocatableRegs = X86.Regs.allocatableRegs platform ,ncg_x86fp_kludge = id + ,ncgAllocMoreStack = X86.Instr.allocMoreStack platform ,ncgExpandTop = id ,ncgMakeFarBranches = id } @@ -172,30 +174,32 @@ nativeCodeGen dflags h us cmms ArchPPC -> nCG' $ NcgImpl { cmmTopCodeGen = PPC.CodeGen.cmmTopCodeGen - ,generateJumpTableForInstr = PPC.CodeGen.generateJumpTableForInstr + ,generateJumpTableForInstr = PPC.CodeGen.generateJumpTableForInstr dflags ,getJumpDestBlockId = PPC.RegInfo.getJumpDestBlockId ,canShortcut = PPC.RegInfo.canShortcut ,shortcutStatics = PPC.RegInfo.shortcutStatics ,shortcutJump = PPC.RegInfo.shortcutJump ,pprNatCmmDecl = PPC.Ppr.pprNatCmmDecl - ,maxSpillSlots = PPC.Instr.maxSpillSlots - ,allocatableRegs = PPC.Regs.allocatableRegs + ,maxSpillSlots = PPC.Instr.maxSpillSlots dflags + ,allocatableRegs = PPC.Regs.allocatableRegs platform ,ncg_x86fp_kludge = id + ,ncgAllocMoreStack = noAllocMoreStack ,ncgExpandTop = id ,ncgMakeFarBranches = makeFarBranches } ArchSPARC -> nCG' $ NcgImpl { cmmTopCodeGen = SPARC.CodeGen.cmmTopCodeGen - ,generateJumpTableForInstr = SPARC.CodeGen.generateJumpTableForInstr + ,generateJumpTableForInstr = SPARC.CodeGen.generateJumpTableForInstr dflags ,getJumpDestBlockId = SPARC.ShortcutJump.getJumpDestBlockId ,canShortcut = SPARC.ShortcutJump.canShortcut ,shortcutStatics = SPARC.ShortcutJump.shortcutStatics ,shortcutJump = SPARC.ShortcutJump.shortcutJump ,pprNatCmmDecl = SPARC.Ppr.pprNatCmmDecl - ,maxSpillSlots = SPARC.Instr.maxSpillSlots - ,allocatableRegs = \_ -> SPARC.Regs.allocatableRegs + ,maxSpillSlots = SPARC.Instr.maxSpillSlots dflags + ,allocatableRegs = SPARC.Regs.allocatableRegs ,ncg_x86fp_kludge = id + ,ncgAllocMoreStack = noAllocMoreStack ,ncgExpandTop = map SPARC.CodeGen.Expand.expandTop ,ncgMakeFarBranches = id } @@ -206,6 +210,23 @@ nativeCodeGen dflags h us cmms ArchUnknown -> panic "nativeCodeGen: No NCG for unknown arch" + +-- +-- Allocating more stack space for spilling is currently only +-- supported for the linear register allocator on x86/x86_64, the rest +-- default to the panic below. To support allocating extra stack on +-- more platforms provide a definition of ncgAllocMoreStack. +-- +noAllocMoreStack :: Int -> NatCmmDecl statics instr -> NatCmmDecl statics instr +noAllocMoreStack amount _ + = panic $ "Register allocator: out of stack slots (need " ++ show amount ++ ")\n" + ++ " If you are trying to compile SHA1.hs from the crypto library then this\n" + ++ " is a known limitation in the linear allocator.\n" + ++ "\n" + ++ " Try enabling the graph colouring allocator with -fregs-graph instead." + ++ " You can still file a bug report if you like.\n" + + nativeCodeGen' :: (Outputable statics, Outputable instr, Instruction instr) => DynFlags -> NcgImpl statics instr jumpDest @@ -419,7 +440,7 @@ cmmNativeGen dflags ncgImpl us cmm count = foldr (\r -> plusUFM_C unionUniqSets $ unitUFM (targetClassOfRealReg platform r) (unitUniqSet r)) emptyUFM - $ allocatableRegs ncgImpl platform + $ allocatableRegs ncgImpl -- do the graph coloring register allocation let ((alloced, regAllocStats), usAlloc) @@ -428,7 +449,7 @@ cmmNativeGen dflags ncgImpl us cmm count $ Color.regAlloc dflags alloc_regs - (mkUniqSet [0 .. maxSpillSlots ncgImpl dflags]) + (mkUniqSet [0 .. maxSpillSlots ncgImpl]) withLiveness -- dump out what happened during register allocation @@ -457,11 +478,20 @@ cmmNativeGen dflags ncgImpl us cmm count else do -- do linear register allocation + let reg_alloc proc = do + (alloced, maybe_more_stack, ra_stats) <- + Linear.regAlloc dflags proc + case maybe_more_stack of + Nothing -> return ( alloced, ra_stats ) + Just amount -> + return ( ncgAllocMoreStack ncgImpl amount alloced + , ra_stats ) + let ((alloced, regAllocStats), usAlloc) = {-# SCC "RegAlloc" #-} initUs usLive $ liftM unzip - $ mapM (Linear.regAlloc dflags) withLiveness + $ mapM reg_alloc withLiveness dumpIfSet_dyn dflags Opt_D_dump_asm_regalloc "Registers allocated" @@ -490,7 +520,7 @@ cmmNativeGen dflags ncgImpl us cmm count ---- generate jump tables let tabled = {-# SCC "generateJumpTables" #-} - generateJumpTables dflags ncgImpl kludged + generateJumpTables ncgImpl kludged ---- shortcut branches let shorted = @@ -711,12 +741,12 @@ makeFarBranches blocks -- Analyzes all native code and generates data sections for all jump -- table instructions. generateJumpTables - :: DynFlags -> NcgImpl statics instr jumpDest - -> [NatCmmDecl statics instr] -> [NatCmmDecl statics instr] -generateJumpTables dflags ncgImpl xs = concatMap f xs + :: NcgImpl statics instr jumpDest + -> [NatCmmDecl statics instr] -> [NatCmmDecl statics instr] +generateJumpTables ncgImpl xs = concatMap f xs where f p@(CmmProc _ _ (ListGraph xs)) = p : concatMap g xs f p = [p] - g (BasicBlock _ xs) = catMaybes (map (generateJumpTableForInstr ncgImpl dflags) xs) + g (BasicBlock _ xs) = catMaybes (map (generateJumpTableForInstr ncgImpl) xs) -- ----------------------------------------------------------------------------- -- Shortcut branches diff --git a/compiler/nativeGen/Instruction.hs b/compiler/nativeGen/Instruction.hs index 64ba32c6dc..86f5ae435d 100644 --- a/compiler/nativeGen/Instruction.hs +++ b/compiler/nativeGen/Instruction.hs @@ -163,3 +163,16 @@ class Instruction instr where -> [instr] + -- Subtract an amount from the C stack pointer + mkStackAllocInstr + :: Platform -- TODO: remove (needed by x86/x86_64 + -- because they share an Instr type) + -> Int + -> instr + + -- Add an amount to the C stack pointer + mkStackDeallocInstr + :: Platform -- TODO: remove (needed by x86/x86_64 + -- because they share an Instr type) + -> Int + -> instr diff --git a/compiler/nativeGen/PPC/Instr.hs b/compiler/nativeGen/PPC/Instr.hs index 464a88a08b..1f5e809abb 100644 --- a/compiler/nativeGen/PPC/Instr.hs +++ b/compiler/nativeGen/PPC/Instr.hs @@ -64,6 +64,8 @@ instance Instruction Instr where mkRegRegMoveInstr _ = ppc_mkRegRegMoveInstr takeRegRegMoveInstr = ppc_takeRegRegMoveInstr mkJumpInstr = ppc_mkJumpInstr + mkStackAllocInstr = panic "no ppc_mkStackAllocInstr" + mkStackDeallocInstr = panic "no ppc_mkStackDeallocInstr" -- ----------------------------------------------------------------------------- diff --git a/compiler/nativeGen/RegAlloc/Linear/Main.hs b/compiler/nativeGen/RegAlloc/Linear/Main.hs index 3f92ed975b..a15bca07e7 100644 --- a/compiler/nativeGen/RegAlloc/Linear/Main.hs +++ b/compiler/nativeGen/RegAlloc/Linear/Main.hs @@ -139,22 +139,27 @@ regAlloc :: (Outputable instr, Instruction instr) => DynFlags -> LiveCmmDecl statics instr - -> UniqSM (NatCmmDecl statics instr, Maybe RegAllocStats) + -> UniqSM ( NatCmmDecl statics instr + , Maybe Int -- number of extra stack slots required, + -- beyond maxSpillSlots + , Maybe RegAllocStats) regAlloc _ (CmmData sec d) = return ( CmmData sec d + , Nothing , Nothing ) regAlloc _ (CmmProc (LiveInfo info _ _ _) lbl []) = return ( CmmProc info lbl (ListGraph []) + , Nothing , Nothing ) regAlloc dflags (CmmProc static lbl sccs) | LiveInfo info (Just first_id) (Just block_live) _ <- static = do -- do register allocation on each component. - (final_blocks, stats) + (final_blocks, stats, stack_use) <- linearRegAlloc dflags first_id block_live sccs -- make sure the block that was first in the input list @@ -162,7 +167,15 @@ regAlloc dflags (CmmProc static lbl sccs) let ((first':_), rest') = partition ((== first_id) . blockId) final_blocks + let max_spill_slots = maxSpillSlots dflags + extra_stack + | stack_use > max_spill_slots + = Just (stack_use - max_spill_slots) + | otherwise + = Nothing + return ( CmmProc info lbl (ListGraph (first' : rest')) + , extra_stack , Just stats) -- bogus. to make non-exhaustive match warning go away. @@ -184,7 +197,7 @@ linearRegAlloc -> BlockId -- ^ the first block -> BlockMap RegSet -- ^ live regs on entry to each basic block -> [SCC (LiveBasicBlock instr)] -- ^ instructions annotated with "deaths" - -> UniqSM ([NatBasicBlock instr], RegAllocStats) + -> UniqSM ([NatBasicBlock instr], RegAllocStats, Int) linearRegAlloc dflags first_id block_live sccs = let platform = targetPlatform dflags @@ -204,14 +217,14 @@ linearRegAlloc' -> BlockId -- ^ the first block -> BlockMap RegSet -- ^ live regs on entry to each basic block -> [SCC (LiveBasicBlock instr)] -- ^ instructions annotated with "deaths" - -> UniqSM ([NatBasicBlock instr], RegAllocStats) + -> UniqSM ([NatBasicBlock instr], RegAllocStats, Int) linearRegAlloc' dflags initFreeRegs first_id block_live sccs = do us <- getUs - let (_, _, stats, blocks) = + let (_, stack, stats, blocks) = runR dflags emptyBlockMap initFreeRegs emptyRegMap (emptyStackMap dflags) us $ linearRA_SCCs first_id block_live [] sccs - return (blocks, stats) + return (blocks, stats, getStackUse stack) linearRA_SCCs :: (FR freeRegs, Instruction instr, Outputable instr) diff --git a/compiler/nativeGen/RegAlloc/Linear/StackMap.hs b/compiler/nativeGen/RegAlloc/Linear/StackMap.hs index b1fc3c169e..69cf411751 100644 --- a/compiler/nativeGen/RegAlloc/Linear/StackMap.hs +++ b/compiler/nativeGen/RegAlloc/Linear/StackMap.hs @@ -21,15 +21,13 @@ module RegAlloc.Linear.StackMap ( StackSlot, StackMap(..), emptyStackMap, - getStackSlotFor + getStackSlotFor, + getStackUse ) where -import RegAlloc.Linear.FreeRegs - import DynFlags -import Outputable import UniqFM import Unique @@ -40,7 +38,7 @@ type StackSlot = Int data StackMap = StackMap { -- | The slots that are still available to be allocated. - stackMapFreeSlots :: [StackSlot] + stackMapNextFreeSlot :: !Int -- | Assignment of vregs to stack slots. , stackMapAssignment :: UniqFM StackSlot } @@ -48,7 +46,7 @@ data StackMap -- | An empty stack map, with all slots available. emptyStackMap :: DynFlags -> StackMap -emptyStackMap dflags = StackMap [0 .. maxSpillSlots dflags] emptyUFM +emptyStackMap _ = StackMap 0 emptyUFM -- | If this vreg unique already has a stack assignment then return the slot number, @@ -56,24 +54,13 @@ emptyStackMap dflags = StackMap [0 .. maxSpillSlots dflags] emptyUFM -- getStackSlotFor :: StackMap -> Unique -> (StackMap, Int) -getStackSlotFor (StackMap [] _) _ - - -- This happens all the time when trying to compile darcs' SHA1.hs, see Track #1993 - -- SHA1.lhs has also been added to the Crypto library on Hackage, - -- so we see this all the time. - -- - -- It would be better to automatically invoke the graph allocator, or do something - -- else besides panicing, but that's a job for a different day. -- BL 2009/02 - -- - = panic $ "RegAllocLinear.getStackSlotFor: out of stack slots\n" - ++ " If you are trying to compile SHA1.hs from the crypto library then this\n" - ++ " is a known limitation in the linear allocator.\n" - ++ "\n" - ++ " Try enabling the graph colouring allocator with -fregs-graph instead." - ++ " You can still file a bug report if you like.\n" - -getStackSlotFor fs@(StackMap (freeSlot:stack') reserved) reg = - case lookupUFM reserved reg of - Just slot -> (fs, slot) - Nothing -> (StackMap stack' (addToUFM reserved reg freeSlot), freeSlot) +getStackSlotFor fs@(StackMap _ reserved) reg + | Just slot <- lookupUFM reserved reg = (fs, slot) + +getStackSlotFor (StackMap freeSlot reserved) reg = + (StackMap (freeSlot+1) (addToUFM reserved reg freeSlot), freeSlot) + +-- | Return the number of stack slots that were allocated +getStackUse :: StackMap -> Int +getStackUse (StackMap freeSlot _) = freeSlot diff --git a/compiler/nativeGen/RegAlloc/Liveness.hs b/compiler/nativeGen/RegAlloc/Liveness.hs index 85b8f969eb..608f0a423b 100644 --- a/compiler/nativeGen/RegAlloc/Liveness.hs +++ b/compiler/nativeGen/RegAlloc/Liveness.hs @@ -136,6 +136,11 @@ instance Instruction instr => Instruction (InstrSR instr) where mkJumpInstr target = map Instr (mkJumpInstr target) + mkStackAllocInstr platform amount = + Instr (mkStackAllocInstr platform amount) + + mkStackDeallocInstr platform amount = + Instr (mkStackDeallocInstr platform amount) -- | An instruction with liveness information. diff --git a/compiler/nativeGen/SPARC/Instr.hs b/compiler/nativeGen/SPARC/Instr.hs index 9404badea6..f55c660118 100644 --- a/compiler/nativeGen/SPARC/Instr.hs +++ b/compiler/nativeGen/SPARC/Instr.hs @@ -108,6 +108,8 @@ instance Instruction Instr where mkRegRegMoveInstr = sparc_mkRegRegMoveInstr takeRegRegMoveInstr = sparc_takeRegRegMoveInstr mkJumpInstr = sparc_mkJumpInstr + mkStackAllocInstr = panic "no sparc_mkStackAllocInstr" + mkStackDeallocInstr = panic "no sparc_mkStackDeallocInstr" -- | SPARC instruction set. diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs index 7f0e48e769..7bd9b0cc9e 100644 --- a/compiler/nativeGen/X86/Instr.hs +++ b/compiler/nativeGen/X86/Instr.hs @@ -11,7 +11,7 @@ module X86.Instr (Instr(..), Operand(..), getJumpDestBlockId, canShortcut, shortcutStatics, - shortcutJump, i386_insert_ffrees, + shortcutJump, i386_insert_ffrees, allocMoreStack, maxSpillSlots, archWordSize) where @@ -58,6 +58,8 @@ instance Instruction Instr where mkRegRegMoveInstr = x86_mkRegRegMoveInstr takeRegRegMoveInstr = x86_takeRegRegMoveInstr mkJumpInstr = x86_mkJumpInstr + mkStackAllocInstr = x86_mkStackAllocInstr + mkStackDeallocInstr = x86_mkStackDeallocInstr -- ----------------------------------------------------------------------------- @@ -620,14 +622,13 @@ x86_mkSpillInstr -> Instr x86_mkSpillInstr dflags reg delta slot - = let off = spillSlotToOffset dflags slot + = let off = spillSlotToOffset dflags slot - delta in - let off_w = (off - delta) `div` (if is32Bit then 4 else 8) - in case targetClassOfReg platform reg of + case targetClassOfReg platform reg of RcInteger -> MOV (archWordSize is32Bit) - (OpReg reg) (OpAddr (spRel dflags off_w)) - RcDouble -> GST FF80 reg (spRel dflags off_w) {- RcFloat/RcDouble -} - RcDoubleSSE -> MOV FF64 (OpReg reg) (OpAddr (spRel dflags off_w)) + (OpReg reg) (OpAddr (spRel dflags off)) + RcDouble -> GST FF80 reg (spRel dflags off) {- RcFloat/RcDouble -} + RcDoubleSSE -> MOV FF64 (OpReg reg) (OpAddr (spRel dflags off)) _ -> panic "X86.mkSpillInstr: no match" where platform = targetPlatform dflags is32Bit = target32Bit platform @@ -641,14 +642,13 @@ x86_mkLoadInstr -> Instr x86_mkLoadInstr dflags reg delta slot - = let off = spillSlotToOffset dflags slot + = let off = spillSlotToOffset dflags slot - delta in - let off_w = (off-delta) `div` (if is32Bit then 4 else 8) - in case targetClassOfReg platform reg of + case targetClassOfReg platform reg of RcInteger -> MOV (archWordSize is32Bit) - (OpAddr (spRel dflags off_w)) (OpReg reg) - RcDouble -> GLD FF80 (spRel dflags off_w) reg {- RcFloat/RcDouble -} - RcDoubleSSE -> MOV FF64 (OpAddr (spRel dflags off_w)) (OpReg reg) + (OpAddr (spRel dflags off)) (OpReg reg) + RcDouble -> GLD FF80 (spRel dflags off) reg {- RcFloat/RcDouble -} + RcDoubleSSE -> MOV FF64 (OpAddr (spRel dflags off)) (OpReg reg) _ -> panic "X86.x86_mkLoadInstr" where platform = targetPlatform dflags is32Bit = target32Bit platform @@ -666,12 +666,7 @@ maxSpillSlots dflags -- the C stack pointer. spillSlotToOffset :: DynFlags -> Int -> Int spillSlotToOffset dflags slot - | slot >= 0 && slot < maxSpillSlots dflags = 64 + spillSlotSize dflags * slot - | otherwise - = pprPanic "spillSlotToOffset:" - ( text "invalid spill location: " <> int slot - $$ text "maxSpillSlots: " <> int (maxSpillSlots dflags)) -------------------------------------------------------------------------------- @@ -744,8 +739,25 @@ x86_mkJumpInstr id = [JXX ALWAYS id] - - +x86_mkStackAllocInstr + :: Platform + -> Int + -> Instr +x86_mkStackAllocInstr platform amount + = case platformArch platform of + ArchX86 -> SUB II32 (OpImm (ImmInt amount)) (OpReg esp) + ArchX86_64 -> SUB II64 (OpImm (ImmInt amount)) (OpReg rsp) + _ -> panic "x86_mkStackAllocInstr" + +x86_mkStackDeallocInstr + :: Platform + -> Int + -> Instr +x86_mkStackDeallocInstr platform amount + = case platformArch platform of + ArchX86 -> ADD II32 (OpImm (ImmInt amount)) (OpReg esp) + ArchX86_64 -> ADD II64 (OpImm (ImmInt amount)) (OpReg rsp) + _ -> panic "x86_mkStackDeallocInstr" i386_insert_ffrees :: [GenBasicBlock Instr] @@ -753,18 +765,12 @@ i386_insert_ffrees i386_insert_ffrees blocks | or (map (any is_G_instr) [ instrs | BasicBlock _ instrs <- blocks ]) - = map ffree_before_nonlocal_transfers blocks - + = map insertGFREEs blocks | otherwise = blocks - where - ffree_before_nonlocal_transfers (BasicBlock id insns) - = BasicBlock id (foldr p [] insns) - where p insn r = case insn of - CALL _ _ -> GFREE : insn : r - JMP _ _ -> GFREE : insn : r - JXX_GBL _ _ -> panic "i386_insert_ffrees: cannot handle JXX_GBL" - _ -> insn : r + where + insertGFREEs (BasicBlock id insns) + = BasicBlock id (insertBeforeNonlocalTransfers GFREE insns) -- if you ever add a new FP insn to the fake x86 FP insn set, -- you must update this too @@ -796,6 +802,57 @@ is_G_instr instr _ -> False +-- +-- Note [extra spill slots] +-- +-- If the register allocator used more spill slots than we have +-- pre-allocated (rESERVED_C_STACK_BYTES), then we must allocate more +-- C stack space on entry and exit from this proc. Therefore we +-- insert a "sub $N, %rsp" at every entry point, and an "add $N, %rsp" +-- before every non-local jump. +-- +-- This became necessary when the new codegen started bundling entire +-- functions together into one proc, because the register allocator +-- assigns a different stack slot to each virtual reg within a proc. +-- To avoid using so many slots we could also: +-- +-- - split up the proc into connected components before code generator +-- +-- - rename the virtual regs, so that we re-use vreg names and hence +-- stack slots for non-overlapping vregs. +-- +allocMoreStack + :: Platform + -> Int + -> NatCmmDecl statics X86.Instr.Instr + -> NatCmmDecl statics X86.Instr.Instr + +allocMoreStack _ _ top@(CmmData _ _) = top +allocMoreStack platform amount (CmmProc info lbl (ListGraph code)) = + CmmProc info lbl (ListGraph (map insert_stack_insns code)) + where + alloc = mkStackAllocInstr platform amount + dealloc = mkStackDeallocInstr platform amount + + is_entry_point id = id `mapMember` info + + insert_stack_insns (BasicBlock id insns) + | is_entry_point id = BasicBlock id (alloc : block') + | otherwise = BasicBlock id block' + where + block' = insertBeforeNonlocalTransfers dealloc insns + + +insertBeforeNonlocalTransfers :: Instr -> [Instr] -> [Instr] +insertBeforeNonlocalTransfers insert insns + = foldr p [] insns + where p insn r = case insn of + CALL _ _ -> insert : insn : r + JMP _ _ -> insert : insn : r + JXX_GBL _ _ -> panic "insertBeforeNonlocalTransfers: cannot handle JXX_GBL" + _ -> insn : r + + data JumpDest = DestBlockId BlockId | DestImm Imm getJumpDestBlockId :: JumpDest -> Maybe BlockId diff --git a/compiler/nativeGen/X86/Regs.hs b/compiler/nativeGen/X86/Regs.hs index 4eec96f5e1..6b2fe16855 100644 --- a/compiler/nativeGen/X86/Regs.hs +++ b/compiler/nativeGen/X86/Regs.hs @@ -196,13 +196,13 @@ addrModeRegs _ = [] spRel :: DynFlags - -> Int -- ^ desired stack offset in words, positive or negative + -> Int -- ^ desired stack offset in bytes, positive or negative -> AddrMode spRel dflags n | target32Bit (targetPlatform dflags) - = AddrBaseIndex (EABaseReg esp) EAIndexNone (ImmInt (n * wORD_SIZE dflags)) + = AddrBaseIndex (EABaseReg esp) EAIndexNone (ImmInt n) | otherwise - = AddrBaseIndex (EABaseReg rsp) EAIndexNone (ImmInt (n * wORD_SIZE dflags)) + = AddrBaseIndex (EABaseReg rsp) EAIndexNone (ImmInt n) -- The register numbers must fit into 32 bits on x86, so that we can -- use a Word32 to represent the set of free registers in the register |