diff options
author | Ian Lynagh <ian@well-typed.com> | 2012-09-20 18:05:01 +0100 |
---|---|---|
committer | Ian Lynagh <ian@well-typed.com> | 2012-09-20 18:05:01 +0100 |
commit | b92c76ec5703a216b0d5553e037da6f66932a82e (patch) | |
tree | d6276b2196a9e0abd77855a919e7ea710aef2768 /compiler/nativeGen | |
parent | 85a8f79f70cb9d94c9fca9e03ae98f596be8a48c (diff) | |
parent | a9109703c5994a0de97236184672095d4605ae7d (diff) | |
download | haskell-b92c76ec5703a216b0d5553e037da6f66932a82e.tar.gz |
Merge branch 'master' of darcs.haskell.org:/srv/darcs//ghc
Diffstat (limited to 'compiler/nativeGen')
-rw-r--r-- | compiler/nativeGen/AsmCodeGen.lhs | 70 | ||||
-rw-r--r-- | compiler/nativeGen/Instruction.hs | 13 | ||||
-rw-r--r-- | compiler/nativeGen/PPC/Instr.hs | 2 | ||||
-rw-r--r-- | compiler/nativeGen/RegAlloc/Linear/Main.hs | 25 | ||||
-rw-r--r-- | compiler/nativeGen/RegAlloc/Linear/StackMap.hs | 39 | ||||
-rw-r--r-- | compiler/nativeGen/RegAlloc/Liveness.hs | 5 | ||||
-rw-r--r-- | compiler/nativeGen/SPARC/Instr.hs | 2 | ||||
-rw-r--r-- | compiler/nativeGen/X86/Instr.hs | 117 | ||||
-rw-r--r-- | compiler/nativeGen/X86/Regs.hs | 6 |
9 files changed, 194 insertions, 85 deletions
diff --git a/compiler/nativeGen/AsmCodeGen.lhs b/compiler/nativeGen/AsmCodeGen.lhs index 8c608f1bf1..870d285390 100644 --- a/compiler/nativeGen/AsmCodeGen.lhs +++ b/compiler/nativeGen/AsmCodeGen.lhs @@ -133,16 +133,17 @@ The machine-dependent bits break down as follows: data NcgImpl statics instr jumpDest = NcgImpl { cmmTopCodeGen :: RawCmmDecl -> NatM [NatCmmDecl statics instr], - generateJumpTableForInstr :: DynFlags -> instr -> Maybe (NatCmmDecl statics instr), + generateJumpTableForInstr :: instr -> Maybe (NatCmmDecl statics instr), getJumpDestBlockId :: jumpDest -> Maybe BlockId, canShortcut :: instr -> Maybe jumpDest, shortcutStatics :: (BlockId -> Maybe jumpDest) -> statics -> statics, shortcutJump :: (BlockId -> Maybe jumpDest) -> instr -> instr, pprNatCmmDecl :: NatCmmDecl statics instr -> SDoc, - maxSpillSlots :: DynFlags -> Int, - allocatableRegs :: Platform -> [RealReg], + maxSpillSlots :: Int, + allocatableRegs :: [RealReg], ncg_x86fp_kludge :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr], ncgExpandTop :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr], + ncgAllocMoreStack :: Int -> NatCmmDecl statics instr -> NatCmmDecl statics instr, ncgMakeFarBranches :: [NatBasicBlock instr] -> [NatBasicBlock instr] } @@ -154,15 +155,16 @@ nativeCodeGen dflags h us cmms nCG' ncgImpl = nativeCodeGen' dflags ncgImpl h us cmms x86NcgImpl = NcgImpl { cmmTopCodeGen = X86.CodeGen.cmmTopCodeGen - ,generateJumpTableForInstr = X86.CodeGen.generateJumpTableForInstr + ,generateJumpTableForInstr = X86.CodeGen.generateJumpTableForInstr dflags ,getJumpDestBlockId = X86.Instr.getJumpDestBlockId ,canShortcut = X86.Instr.canShortcut ,shortcutStatics = X86.Instr.shortcutStatics ,shortcutJump = X86.Instr.shortcutJump ,pprNatCmmDecl = X86.Ppr.pprNatCmmDecl - ,maxSpillSlots = X86.Instr.maxSpillSlots - ,allocatableRegs = X86.Regs.allocatableRegs + ,maxSpillSlots = X86.Instr.maxSpillSlots dflags + ,allocatableRegs = X86.Regs.allocatableRegs platform ,ncg_x86fp_kludge = id + ,ncgAllocMoreStack = X86.Instr.allocMoreStack platform ,ncgExpandTop = id ,ncgMakeFarBranches = id } @@ -172,30 +174,32 @@ nativeCodeGen dflags h us cmms ArchPPC -> nCG' $ NcgImpl { cmmTopCodeGen = PPC.CodeGen.cmmTopCodeGen - ,generateJumpTableForInstr = PPC.CodeGen.generateJumpTableForInstr + ,generateJumpTableForInstr = PPC.CodeGen.generateJumpTableForInstr dflags ,getJumpDestBlockId = PPC.RegInfo.getJumpDestBlockId ,canShortcut = PPC.RegInfo.canShortcut ,shortcutStatics = PPC.RegInfo.shortcutStatics ,shortcutJump = PPC.RegInfo.shortcutJump ,pprNatCmmDecl = PPC.Ppr.pprNatCmmDecl - ,maxSpillSlots = PPC.Instr.maxSpillSlots - ,allocatableRegs = PPC.Regs.allocatableRegs + ,maxSpillSlots = PPC.Instr.maxSpillSlots dflags + ,allocatableRegs = PPC.Regs.allocatableRegs platform ,ncg_x86fp_kludge = id + ,ncgAllocMoreStack = noAllocMoreStack ,ncgExpandTop = id ,ncgMakeFarBranches = makeFarBranches } ArchSPARC -> nCG' $ NcgImpl { cmmTopCodeGen = SPARC.CodeGen.cmmTopCodeGen - ,generateJumpTableForInstr = SPARC.CodeGen.generateJumpTableForInstr + ,generateJumpTableForInstr = SPARC.CodeGen.generateJumpTableForInstr dflags ,getJumpDestBlockId = SPARC.ShortcutJump.getJumpDestBlockId ,canShortcut = SPARC.ShortcutJump.canShortcut ,shortcutStatics = SPARC.ShortcutJump.shortcutStatics ,shortcutJump = SPARC.ShortcutJump.shortcutJump ,pprNatCmmDecl = SPARC.Ppr.pprNatCmmDecl - ,maxSpillSlots = SPARC.Instr.maxSpillSlots - ,allocatableRegs = \_ -> SPARC.Regs.allocatableRegs + ,maxSpillSlots = SPARC.Instr.maxSpillSlots dflags + ,allocatableRegs = SPARC.Regs.allocatableRegs ,ncg_x86fp_kludge = id + ,ncgAllocMoreStack = noAllocMoreStack ,ncgExpandTop = map SPARC.CodeGen.Expand.expandTop ,ncgMakeFarBranches = id } @@ -206,6 +210,23 @@ nativeCodeGen dflags h us cmms ArchUnknown -> panic "nativeCodeGen: No NCG for unknown arch" + +-- +-- Allocating more stack space for spilling is currently only +-- supported for the linear register allocator on x86/x86_64, the rest +-- default to the panic below. To support allocating extra stack on +-- more platforms provide a definition of ncgAllocMoreStack. +-- +noAllocMoreStack :: Int -> NatCmmDecl statics instr -> NatCmmDecl statics instr +noAllocMoreStack amount _ + = panic $ "Register allocator: out of stack slots (need " ++ show amount ++ ")\n" + ++ " If you are trying to compile SHA1.hs from the crypto library then this\n" + ++ " is a known limitation in the linear allocator.\n" + ++ "\n" + ++ " Try enabling the graph colouring allocator with -fregs-graph instead." + ++ " You can still file a bug report if you like.\n" + + nativeCodeGen' :: (Outputable statics, Outputable instr, Instruction instr) => DynFlags -> NcgImpl statics instr jumpDest @@ -419,7 +440,7 @@ cmmNativeGen dflags ncgImpl us cmm count = foldr (\r -> plusUFM_C unionUniqSets $ unitUFM (targetClassOfRealReg platform r) (unitUniqSet r)) emptyUFM - $ allocatableRegs ncgImpl platform + $ allocatableRegs ncgImpl -- do the graph coloring register allocation let ((alloced, regAllocStats), usAlloc) @@ -428,7 +449,7 @@ cmmNativeGen dflags ncgImpl us cmm count $ Color.regAlloc dflags alloc_regs - (mkUniqSet [0 .. maxSpillSlots ncgImpl dflags]) + (mkUniqSet [0 .. maxSpillSlots ncgImpl]) withLiveness -- dump out what happened during register allocation @@ -457,11 +478,20 @@ cmmNativeGen dflags ncgImpl us cmm count else do -- do linear register allocation + let reg_alloc proc = do + (alloced, maybe_more_stack, ra_stats) <- + Linear.regAlloc dflags proc + case maybe_more_stack of + Nothing -> return ( alloced, ra_stats ) + Just amount -> + return ( ncgAllocMoreStack ncgImpl amount alloced + , ra_stats ) + let ((alloced, regAllocStats), usAlloc) = {-# SCC "RegAlloc" #-} initUs usLive $ liftM unzip - $ mapM (Linear.regAlloc dflags) withLiveness + $ mapM reg_alloc withLiveness dumpIfSet_dyn dflags Opt_D_dump_asm_regalloc "Registers allocated" @@ -490,7 +520,7 @@ cmmNativeGen dflags ncgImpl us cmm count ---- generate jump tables let tabled = {-# SCC "generateJumpTables" #-} - generateJumpTables dflags ncgImpl kludged + generateJumpTables ncgImpl kludged ---- shortcut branches let shorted = @@ -711,12 +741,12 @@ makeFarBranches blocks -- Analyzes all native code and generates data sections for all jump -- table instructions. generateJumpTables - :: DynFlags -> NcgImpl statics instr jumpDest - -> [NatCmmDecl statics instr] -> [NatCmmDecl statics instr] -generateJumpTables dflags ncgImpl xs = concatMap f xs + :: NcgImpl statics instr jumpDest + -> [NatCmmDecl statics instr] -> [NatCmmDecl statics instr] +generateJumpTables ncgImpl xs = concatMap f xs where f p@(CmmProc _ _ (ListGraph xs)) = p : concatMap g xs f p = [p] - g (BasicBlock _ xs) = catMaybes (map (generateJumpTableForInstr ncgImpl dflags) xs) + g (BasicBlock _ xs) = catMaybes (map (generateJumpTableForInstr ncgImpl) xs) -- ----------------------------------------------------------------------------- -- Shortcut branches diff --git a/compiler/nativeGen/Instruction.hs b/compiler/nativeGen/Instruction.hs index 64ba32c6dc..86f5ae435d 100644 --- a/compiler/nativeGen/Instruction.hs +++ b/compiler/nativeGen/Instruction.hs @@ -163,3 +163,16 @@ class Instruction instr where -> [instr] + -- Subtract an amount from the C stack pointer + mkStackAllocInstr + :: Platform -- TODO: remove (needed by x86/x86_64 + -- because they share an Instr type) + -> Int + -> instr + + -- Add an amount to the C stack pointer + mkStackDeallocInstr + :: Platform -- TODO: remove (needed by x86/x86_64 + -- because they share an Instr type) + -> Int + -> instr diff --git a/compiler/nativeGen/PPC/Instr.hs b/compiler/nativeGen/PPC/Instr.hs index 464a88a08b..1f5e809abb 100644 --- a/compiler/nativeGen/PPC/Instr.hs +++ b/compiler/nativeGen/PPC/Instr.hs @@ -64,6 +64,8 @@ instance Instruction Instr where mkRegRegMoveInstr _ = ppc_mkRegRegMoveInstr takeRegRegMoveInstr = ppc_takeRegRegMoveInstr mkJumpInstr = ppc_mkJumpInstr + mkStackAllocInstr = panic "no ppc_mkStackAllocInstr" + mkStackDeallocInstr = panic "no ppc_mkStackDeallocInstr" -- ----------------------------------------------------------------------------- diff --git a/compiler/nativeGen/RegAlloc/Linear/Main.hs b/compiler/nativeGen/RegAlloc/Linear/Main.hs index 3f92ed975b..a15bca07e7 100644 --- a/compiler/nativeGen/RegAlloc/Linear/Main.hs +++ b/compiler/nativeGen/RegAlloc/Linear/Main.hs @@ -139,22 +139,27 @@ regAlloc :: (Outputable instr, Instruction instr) => DynFlags -> LiveCmmDecl statics instr - -> UniqSM (NatCmmDecl statics instr, Maybe RegAllocStats) + -> UniqSM ( NatCmmDecl statics instr + , Maybe Int -- number of extra stack slots required, + -- beyond maxSpillSlots + , Maybe RegAllocStats) regAlloc _ (CmmData sec d) = return ( CmmData sec d + , Nothing , Nothing ) regAlloc _ (CmmProc (LiveInfo info _ _ _) lbl []) = return ( CmmProc info lbl (ListGraph []) + , Nothing , Nothing ) regAlloc dflags (CmmProc static lbl sccs) | LiveInfo info (Just first_id) (Just block_live) _ <- static = do -- do register allocation on each component. - (final_blocks, stats) + (final_blocks, stats, stack_use) <- linearRegAlloc dflags first_id block_live sccs -- make sure the block that was first in the input list @@ -162,7 +167,15 @@ regAlloc dflags (CmmProc static lbl sccs) let ((first':_), rest') = partition ((== first_id) . blockId) final_blocks + let max_spill_slots = maxSpillSlots dflags + extra_stack + | stack_use > max_spill_slots + = Just (stack_use - max_spill_slots) + | otherwise + = Nothing + return ( CmmProc info lbl (ListGraph (first' : rest')) + , extra_stack , Just stats) -- bogus. to make non-exhaustive match warning go away. @@ -184,7 +197,7 @@ linearRegAlloc -> BlockId -- ^ the first block -> BlockMap RegSet -- ^ live regs on entry to each basic block -> [SCC (LiveBasicBlock instr)] -- ^ instructions annotated with "deaths" - -> UniqSM ([NatBasicBlock instr], RegAllocStats) + -> UniqSM ([NatBasicBlock instr], RegAllocStats, Int) linearRegAlloc dflags first_id block_live sccs = let platform = targetPlatform dflags @@ -204,14 +217,14 @@ linearRegAlloc' -> BlockId -- ^ the first block -> BlockMap RegSet -- ^ live regs on entry to each basic block -> [SCC (LiveBasicBlock instr)] -- ^ instructions annotated with "deaths" - -> UniqSM ([NatBasicBlock instr], RegAllocStats) + -> UniqSM ([NatBasicBlock instr], RegAllocStats, Int) linearRegAlloc' dflags initFreeRegs first_id block_live sccs = do us <- getUs - let (_, _, stats, blocks) = + let (_, stack, stats, blocks) = runR dflags emptyBlockMap initFreeRegs emptyRegMap (emptyStackMap dflags) us $ linearRA_SCCs first_id block_live [] sccs - return (blocks, stats) + return (blocks, stats, getStackUse stack) linearRA_SCCs :: (FR freeRegs, Instruction instr, Outputable instr) diff --git a/compiler/nativeGen/RegAlloc/Linear/StackMap.hs b/compiler/nativeGen/RegAlloc/Linear/StackMap.hs index b1fc3c169e..69cf411751 100644 --- a/compiler/nativeGen/RegAlloc/Linear/StackMap.hs +++ b/compiler/nativeGen/RegAlloc/Linear/StackMap.hs @@ -21,15 +21,13 @@ module RegAlloc.Linear.StackMap ( StackSlot, StackMap(..), emptyStackMap, - getStackSlotFor + getStackSlotFor, + getStackUse ) where -import RegAlloc.Linear.FreeRegs - import DynFlags -import Outputable import UniqFM import Unique @@ -40,7 +38,7 @@ type StackSlot = Int data StackMap = StackMap { -- | The slots that are still available to be allocated. - stackMapFreeSlots :: [StackSlot] + stackMapNextFreeSlot :: !Int -- | Assignment of vregs to stack slots. , stackMapAssignment :: UniqFM StackSlot } @@ -48,7 +46,7 @@ data StackMap -- | An empty stack map, with all slots available. emptyStackMap :: DynFlags -> StackMap -emptyStackMap dflags = StackMap [0 .. maxSpillSlots dflags] emptyUFM +emptyStackMap _ = StackMap 0 emptyUFM -- | If this vreg unique already has a stack assignment then return the slot number, @@ -56,24 +54,13 @@ emptyStackMap dflags = StackMap [0 .. maxSpillSlots dflags] emptyUFM -- getStackSlotFor :: StackMap -> Unique -> (StackMap, Int) -getStackSlotFor (StackMap [] _) _ - - -- This happens all the time when trying to compile darcs' SHA1.hs, see Track #1993 - -- SHA1.lhs has also been added to the Crypto library on Hackage, - -- so we see this all the time. - -- - -- It would be better to automatically invoke the graph allocator, or do something - -- else besides panicing, but that's a job for a different day. -- BL 2009/02 - -- - = panic $ "RegAllocLinear.getStackSlotFor: out of stack slots\n" - ++ " If you are trying to compile SHA1.hs from the crypto library then this\n" - ++ " is a known limitation in the linear allocator.\n" - ++ "\n" - ++ " Try enabling the graph colouring allocator with -fregs-graph instead." - ++ " You can still file a bug report if you like.\n" - -getStackSlotFor fs@(StackMap (freeSlot:stack') reserved) reg = - case lookupUFM reserved reg of - Just slot -> (fs, slot) - Nothing -> (StackMap stack' (addToUFM reserved reg freeSlot), freeSlot) +getStackSlotFor fs@(StackMap _ reserved) reg + | Just slot <- lookupUFM reserved reg = (fs, slot) + +getStackSlotFor (StackMap freeSlot reserved) reg = + (StackMap (freeSlot+1) (addToUFM reserved reg freeSlot), freeSlot) + +-- | Return the number of stack slots that were allocated +getStackUse :: StackMap -> Int +getStackUse (StackMap freeSlot _) = freeSlot diff --git a/compiler/nativeGen/RegAlloc/Liveness.hs b/compiler/nativeGen/RegAlloc/Liveness.hs index 85b8f969eb..608f0a423b 100644 --- a/compiler/nativeGen/RegAlloc/Liveness.hs +++ b/compiler/nativeGen/RegAlloc/Liveness.hs @@ -136,6 +136,11 @@ instance Instruction instr => Instruction (InstrSR instr) where mkJumpInstr target = map Instr (mkJumpInstr target) + mkStackAllocInstr platform amount = + Instr (mkStackAllocInstr platform amount) + + mkStackDeallocInstr platform amount = + Instr (mkStackDeallocInstr platform amount) -- | An instruction with liveness information. diff --git a/compiler/nativeGen/SPARC/Instr.hs b/compiler/nativeGen/SPARC/Instr.hs index 9404badea6..f55c660118 100644 --- a/compiler/nativeGen/SPARC/Instr.hs +++ b/compiler/nativeGen/SPARC/Instr.hs @@ -108,6 +108,8 @@ instance Instruction Instr where mkRegRegMoveInstr = sparc_mkRegRegMoveInstr takeRegRegMoveInstr = sparc_takeRegRegMoveInstr mkJumpInstr = sparc_mkJumpInstr + mkStackAllocInstr = panic "no sparc_mkStackAllocInstr" + mkStackDeallocInstr = panic "no sparc_mkStackDeallocInstr" -- | SPARC instruction set. diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs index 7f0e48e769..7bd9b0cc9e 100644 --- a/compiler/nativeGen/X86/Instr.hs +++ b/compiler/nativeGen/X86/Instr.hs @@ -11,7 +11,7 @@ module X86.Instr (Instr(..), Operand(..), getJumpDestBlockId, canShortcut, shortcutStatics, - shortcutJump, i386_insert_ffrees, + shortcutJump, i386_insert_ffrees, allocMoreStack, maxSpillSlots, archWordSize) where @@ -58,6 +58,8 @@ instance Instruction Instr where mkRegRegMoveInstr = x86_mkRegRegMoveInstr takeRegRegMoveInstr = x86_takeRegRegMoveInstr mkJumpInstr = x86_mkJumpInstr + mkStackAllocInstr = x86_mkStackAllocInstr + mkStackDeallocInstr = x86_mkStackDeallocInstr -- ----------------------------------------------------------------------------- @@ -620,14 +622,13 @@ x86_mkSpillInstr -> Instr x86_mkSpillInstr dflags reg delta slot - = let off = spillSlotToOffset dflags slot + = let off = spillSlotToOffset dflags slot - delta in - let off_w = (off - delta) `div` (if is32Bit then 4 else 8) - in case targetClassOfReg platform reg of + case targetClassOfReg platform reg of RcInteger -> MOV (archWordSize is32Bit) - (OpReg reg) (OpAddr (spRel dflags off_w)) - RcDouble -> GST FF80 reg (spRel dflags off_w) {- RcFloat/RcDouble -} - RcDoubleSSE -> MOV FF64 (OpReg reg) (OpAddr (spRel dflags off_w)) + (OpReg reg) (OpAddr (spRel dflags off)) + RcDouble -> GST FF80 reg (spRel dflags off) {- RcFloat/RcDouble -} + RcDoubleSSE -> MOV FF64 (OpReg reg) (OpAddr (spRel dflags off)) _ -> panic "X86.mkSpillInstr: no match" where platform = targetPlatform dflags is32Bit = target32Bit platform @@ -641,14 +642,13 @@ x86_mkLoadInstr -> Instr x86_mkLoadInstr dflags reg delta slot - = let off = spillSlotToOffset dflags slot + = let off = spillSlotToOffset dflags slot - delta in - let off_w = (off-delta) `div` (if is32Bit then 4 else 8) - in case targetClassOfReg platform reg of + case targetClassOfReg platform reg of RcInteger -> MOV (archWordSize is32Bit) - (OpAddr (spRel dflags off_w)) (OpReg reg) - RcDouble -> GLD FF80 (spRel dflags off_w) reg {- RcFloat/RcDouble -} - RcDoubleSSE -> MOV FF64 (OpAddr (spRel dflags off_w)) (OpReg reg) + (OpAddr (spRel dflags off)) (OpReg reg) + RcDouble -> GLD FF80 (spRel dflags off) reg {- RcFloat/RcDouble -} + RcDoubleSSE -> MOV FF64 (OpAddr (spRel dflags off)) (OpReg reg) _ -> panic "X86.x86_mkLoadInstr" where platform = targetPlatform dflags is32Bit = target32Bit platform @@ -666,12 +666,7 @@ maxSpillSlots dflags -- the C stack pointer. spillSlotToOffset :: DynFlags -> Int -> Int spillSlotToOffset dflags slot - | slot >= 0 && slot < maxSpillSlots dflags = 64 + spillSlotSize dflags * slot - | otherwise - = pprPanic "spillSlotToOffset:" - ( text "invalid spill location: " <> int slot - $$ text "maxSpillSlots: " <> int (maxSpillSlots dflags)) -------------------------------------------------------------------------------- @@ -744,8 +739,25 @@ x86_mkJumpInstr id = [JXX ALWAYS id] - - +x86_mkStackAllocInstr + :: Platform + -> Int + -> Instr +x86_mkStackAllocInstr platform amount + = case platformArch platform of + ArchX86 -> SUB II32 (OpImm (ImmInt amount)) (OpReg esp) + ArchX86_64 -> SUB II64 (OpImm (ImmInt amount)) (OpReg rsp) + _ -> panic "x86_mkStackAllocInstr" + +x86_mkStackDeallocInstr + :: Platform + -> Int + -> Instr +x86_mkStackDeallocInstr platform amount + = case platformArch platform of + ArchX86 -> ADD II32 (OpImm (ImmInt amount)) (OpReg esp) + ArchX86_64 -> ADD II64 (OpImm (ImmInt amount)) (OpReg rsp) + _ -> panic "x86_mkStackDeallocInstr" i386_insert_ffrees :: [GenBasicBlock Instr] @@ -753,18 +765,12 @@ i386_insert_ffrees i386_insert_ffrees blocks | or (map (any is_G_instr) [ instrs | BasicBlock _ instrs <- blocks ]) - = map ffree_before_nonlocal_transfers blocks - + = map insertGFREEs blocks | otherwise = blocks - where - ffree_before_nonlocal_transfers (BasicBlock id insns) - = BasicBlock id (foldr p [] insns) - where p insn r = case insn of - CALL _ _ -> GFREE : insn : r - JMP _ _ -> GFREE : insn : r - JXX_GBL _ _ -> panic "i386_insert_ffrees: cannot handle JXX_GBL" - _ -> insn : r + where + insertGFREEs (BasicBlock id insns) + = BasicBlock id (insertBeforeNonlocalTransfers GFREE insns) -- if you ever add a new FP insn to the fake x86 FP insn set, -- you must update this too @@ -796,6 +802,57 @@ is_G_instr instr _ -> False +-- +-- Note [extra spill slots] +-- +-- If the register allocator used more spill slots than we have +-- pre-allocated (rESERVED_C_STACK_BYTES), then we must allocate more +-- C stack space on entry and exit from this proc. Therefore we +-- insert a "sub $N, %rsp" at every entry point, and an "add $N, %rsp" +-- before every non-local jump. +-- +-- This became necessary when the new codegen started bundling entire +-- functions together into one proc, because the register allocator +-- assigns a different stack slot to each virtual reg within a proc. +-- To avoid using so many slots we could also: +-- +-- - split up the proc into connected components before code generator +-- +-- - rename the virtual regs, so that we re-use vreg names and hence +-- stack slots for non-overlapping vregs. +-- +allocMoreStack + :: Platform + -> Int + -> NatCmmDecl statics X86.Instr.Instr + -> NatCmmDecl statics X86.Instr.Instr + +allocMoreStack _ _ top@(CmmData _ _) = top +allocMoreStack platform amount (CmmProc info lbl (ListGraph code)) = + CmmProc info lbl (ListGraph (map insert_stack_insns code)) + where + alloc = mkStackAllocInstr platform amount + dealloc = mkStackDeallocInstr platform amount + + is_entry_point id = id `mapMember` info + + insert_stack_insns (BasicBlock id insns) + | is_entry_point id = BasicBlock id (alloc : block') + | otherwise = BasicBlock id block' + where + block' = insertBeforeNonlocalTransfers dealloc insns + + +insertBeforeNonlocalTransfers :: Instr -> [Instr] -> [Instr] +insertBeforeNonlocalTransfers insert insns + = foldr p [] insns + where p insn r = case insn of + CALL _ _ -> insert : insn : r + JMP _ _ -> insert : insn : r + JXX_GBL _ _ -> panic "insertBeforeNonlocalTransfers: cannot handle JXX_GBL" + _ -> insn : r + + data JumpDest = DestBlockId BlockId | DestImm Imm getJumpDestBlockId :: JumpDest -> Maybe BlockId diff --git a/compiler/nativeGen/X86/Regs.hs b/compiler/nativeGen/X86/Regs.hs index 4eec96f5e1..6b2fe16855 100644 --- a/compiler/nativeGen/X86/Regs.hs +++ b/compiler/nativeGen/X86/Regs.hs @@ -196,13 +196,13 @@ addrModeRegs _ = [] spRel :: DynFlags - -> Int -- ^ desired stack offset in words, positive or negative + -> Int -- ^ desired stack offset in bytes, positive or negative -> AddrMode spRel dflags n | target32Bit (targetPlatform dflags) - = AddrBaseIndex (EABaseReg esp) EAIndexNone (ImmInt (n * wORD_SIZE dflags)) + = AddrBaseIndex (EABaseReg esp) EAIndexNone (ImmInt n) | otherwise - = AddrBaseIndex (EABaseReg rsp) EAIndexNone (ImmInt (n * wORD_SIZE dflags)) + = AddrBaseIndex (EABaseReg rsp) EAIndexNone (ImmInt n) -- The register numbers must fit into 32 bits on x86, so that we can -- use a Word32 to represent the set of free registers in the register |