summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/nativeGen/Instruction.hs10
-rw-r--r--compiler/nativeGen/PPC/Instr.hs16
-rw-r--r--compiler/nativeGen/RegAlloc/Liveness.hs4
-rw-r--r--compiler/nativeGen/X86/Instr.hs100
4 files changed, 103 insertions, 27 deletions
diff --git a/compiler/nativeGen/Instruction.hs b/compiler/nativeGen/Instruction.hs
index 63b5b0df7e..0bd99fbee8 100644
--- a/compiler/nativeGen/Instruction.hs
+++ b/compiler/nativeGen/Instruction.hs
@@ -191,14 +191,12 @@ class Instruction instr where
-- Subtract an amount from the C stack pointer
mkStackAllocInstr
- :: Platform -- TODO: remove (needed by x86/x86_64
- -- because they share an Instr type)
+ :: Platform
-> Int
- -> instr
+ -> [instr]
-- Add an amount to the C stack pointer
mkStackDeallocInstr
- :: Platform -- TODO: remove (needed by x86/x86_64
- -- because they share an Instr type)
+ :: Platform
-> Int
- -> instr
+ -> [instr]
diff --git a/compiler/nativeGen/PPC/Instr.hs b/compiler/nativeGen/PPC/Instr.hs
index d21e7f8176..8eb5e8fa8d 100644
--- a/compiler/nativeGen/PPC/Instr.hs
+++ b/compiler/nativeGen/PPC/Instr.hs
@@ -77,19 +77,19 @@ instance Instruction Instr where
mkStackDeallocInstr = ppc_mkStackDeallocInstr
-ppc_mkStackAllocInstr :: Platform -> Int -> Instr
+ppc_mkStackAllocInstr :: Platform -> Int -> [Instr]
ppc_mkStackAllocInstr platform amount
= ppc_mkStackAllocInstr' platform (-amount)
-ppc_mkStackDeallocInstr :: Platform -> Int -> Instr
+ppc_mkStackDeallocInstr :: Platform -> Int -> [Instr]
ppc_mkStackDeallocInstr platform amount
= ppc_mkStackAllocInstr' platform amount
-ppc_mkStackAllocInstr' :: Platform -> Int -> Instr
+ppc_mkStackAllocInstr' :: Platform -> Int -> [Instr]
ppc_mkStackAllocInstr' platform amount
= case platformArch platform of
- ArchPPC -> UPDATE_SP II32 (ImmInt amount)
- ArchPPC_64 _ -> UPDATE_SP II64 (ImmInt amount)
+ ArchPPC -> [UPDATE_SP II32 (ImmInt amount)]
+ ArchPPC_64 _ -> [UPDATE_SP II64 (ImmInt amount)]
_ -> panic $ "ppc_mkStackAllocInstr' "
++ show (platformArch platform)
@@ -126,7 +126,7 @@ allocMoreStack platform slots (CmmProc info lbl live (ListGraph code)) = do
insert_stack_insns (BasicBlock id insns)
| Just new_blockid <- mapLookup id new_blockmap
- = [ BasicBlock id [alloc, BCC ALWAYS new_blockid Nothing]
+ = [ BasicBlock id $ alloc ++ [BCC ALWAYS new_blockid Nothing]
, BasicBlock new_blockid block'
]
| otherwise
@@ -139,8 +139,8 @@ allocMoreStack platform slots (CmmProc info lbl live (ListGraph code)) = do
-- "labeled-goto" we use JMP, and for "computed-goto" we
-- use MTCTR followed by BCTR. See 'PPC.CodeGen.genJump'.
= case insn of
- JMP _ -> dealloc : insn : r
- BCTR [] Nothing -> dealloc : insn : r
+ JMP _ -> dealloc ++ (insn : r)
+ BCTR [] Nothing -> dealloc ++ (insn : r)
BCTR ids label -> BCTR (map (fmap retarget) ids) label : r
BCCFAR cond b p -> BCCFAR cond (retarget b) p : r
BCC cond b p -> BCC cond (retarget b) p : r
diff --git a/compiler/nativeGen/RegAlloc/Liveness.hs b/compiler/nativeGen/RegAlloc/Liveness.hs
index 2a2990f6ce..9d93564317 100644
--- a/compiler/nativeGen/RegAlloc/Liveness.hs
+++ b/compiler/nativeGen/RegAlloc/Liveness.hs
@@ -147,10 +147,10 @@ instance Instruction instr => Instruction (InstrSR instr) where
mkJumpInstr target = map Instr (mkJumpInstr target)
mkStackAllocInstr platform amount =
- Instr (mkStackAllocInstr platform amount)
+ Instr <$> mkStackAllocInstr platform amount
mkStackDeallocInstr platform amount =
- Instr (mkStackDeallocInstr platform amount)
+ Instr <$> mkStackDeallocInstr platform amount
-- | An instruction with liveness information.
diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs
index ee3e64cf24..c7000c9f4b 100644
--- a/compiler/nativeGen/X86/Instr.hs
+++ b/compiler/nativeGen/X86/Instr.hs
@@ -858,25 +858,104 @@ x86_mkJumpInstr
x86_mkJumpInstr id
= [JXX ALWAYS id]
+-- Note [Windows stack layout]
+-- | On most OSes the kernel will place a guard page after the current stack
+-- page. If you allocate larger than a page worth you may jump over this
+-- guard page. Not only is this a security issue, but on certain OSes such
+-- as Windows a new page won't be allocated if you don't hit the guard. This
+-- will cause a segfault or access fault.
+--
+-- This function defines if the current allocation amount requires a probe.
+-- On Windows (for now) we emit a call to _chkstk for this. For other OSes
+-- this is not yet implemented.
+-- See https://docs.microsoft.com/en-us/windows/desktop/DevNotes/-win32-chkstk
+-- The Windows stack looks like this:
+--
+-- +-------------------+
+-- | SP |
+-- +-------------------+
+-- | |
+-- | GUARD PAGE |
+-- | |
+-- +-------------------+
+-- | |
+-- | |
+-- | UNMAPPED |
+-- | |
+-- | |
+-- +-------------------+
+--
+-- In essense each allocation larger than a page size needs to be chunked and
+-- a probe emitted after each page allocation. You have to hit the guard
+-- page so the kernel can map in the next page, otherwise you'll segfault.
+--
+needs_probe_call :: Platform -> Int -> Bool
+needs_probe_call platform amount
+ = case platformOS platform of
+ OSMinGW32 -> case platformArch platform of
+ ArchX86 -> amount > (4 * 1024)
+ ArchX86_64 -> amount > (8 * 1024)
+ _ -> False
+ _ -> False
x86_mkStackAllocInstr
:: Platform
-> Int
- -> Instr
+ -> [Instr]
x86_mkStackAllocInstr platform amount
- = case platformArch platform of
- ArchX86 -> SUB II32 (OpImm (ImmInt amount)) (OpReg esp)
- ArchX86_64 -> SUB II64 (OpImm (ImmInt amount)) (OpReg rsp)
- _ -> panic "x86_mkStackAllocInstr"
+ = case platformOS platform of
+ OSMinGW32 ->
+ -- These will clobber AX but this should be ok because
+ --
+ -- 1. It is the first thing we do when entering the closure and AX is
+ -- a caller saved registers on Windows both on x86_64 and x86.
+ --
+ -- 2. The closures are only entered via a call or longjmp in which case
+ -- there are no expectations for volatile registers.
+ --
+ -- 3. When the target is a local branch point it is re-targeted
+ -- after the dealloc, preserving #2. See note [extra spill slots].
+ --
+ -- We emit a call because the stack probes are quite involved and
+ -- would bloat code size a lot. GHC doesn't really have an -Os.
+ -- __chkstk is guaranteed to leave all nonvolatile registers and AX
+ -- untouched. It's part of the standard prologue code for any Windows
+ -- function dropping the stack more than a page.
+ -- See Note [Windows stack layout]
+ case platformArch platform of
+ ArchX86 | needs_probe_call platform amount ->
+ [ MOV II32 (OpImm (ImmInt amount)) (OpReg eax)
+ , CALL (Left $ strImmLit "___chkstk_ms") [eax]
+ , SUB II32 (OpReg eax) (OpReg esp)
+ ]
+ | otherwise ->
+ [ SUB II32 (OpImm (ImmInt amount)) (OpReg esp)
+ , TEST II32 (OpReg esp) (OpReg esp)
+ ]
+ ArchX86_64 | needs_probe_call platform amount ->
+ [ MOV II64 (OpImm (ImmInt amount)) (OpReg rax)
+ , CALL (Left $ strImmLit "__chkstk_ms") [rax]
+ , SUB II64 (OpReg rax) (OpReg rsp)
+ ]
+ | otherwise ->
+ [ SUB II64 (OpImm (ImmInt amount)) (OpReg rsp)
+ , TEST II64 (OpReg rsp) (OpReg rsp)
+ ]
+ _ -> panic "x86_mkStackAllocInstr"
+ _ ->
+ case platformArch platform of
+ ArchX86 -> [ SUB II32 (OpImm (ImmInt amount)) (OpReg esp) ]
+ ArchX86_64 -> [ SUB II64 (OpImm (ImmInt amount)) (OpReg rsp) ]
+ _ -> panic "x86_mkStackAllocInstr"
x86_mkStackDeallocInstr
:: Platform
-> Int
- -> Instr
+ -> [Instr]
x86_mkStackDeallocInstr platform amount
= case platformArch platform of
- ArchX86 -> ADD II32 (OpImm (ImmInt amount)) (OpReg esp)
- ArchX86_64 -> ADD II64 (OpImm (ImmInt amount)) (OpReg rsp)
+ ArchX86 -> [ADD II32 (OpImm (ImmInt amount)) (OpReg esp)]
+ ArchX86_64 -> [ADD II64 (OpImm (ImmInt amount)) (OpReg rsp)]
_ -> panic "x86_mkStackDeallocInstr"
i386_insert_ffrees
@@ -996,7 +1075,7 @@ allocMoreStack platform slots proc@(CmmProc info lbl live (ListGraph code)) = do
insert_stack_insns (BasicBlock id insns)
| Just new_blockid <- mapLookup id new_blockmap
- = [ BasicBlock id [alloc, JXX ALWAYS new_blockid]
+ = [ BasicBlock id $ alloc ++ [JXX ALWAYS new_blockid]
, BasicBlock new_blockid block' ]
| otherwise
= [ BasicBlock id block' ]
@@ -1004,7 +1083,7 @@ allocMoreStack platform slots proc@(CmmProc info lbl live (ListGraph code)) = do
block' = foldr insert_dealloc [] insns
insert_dealloc insn r = case insn of
- JMP _ _ -> dealloc : insn : r
+ JMP _ _ -> dealloc ++ (insn : r)
JXX_GBL _ _ -> panic "insert_dealloc: cannot handle JXX_GBL"
_other -> x86_patchJumpInstr insn retarget : r
where retarget b = fromMaybe b (mapLookup b new_blockmap)
@@ -1013,7 +1092,6 @@ allocMoreStack platform slots proc@(CmmProc info lbl live (ListGraph code)) = do
-- in
return (CmmProc info lbl live (ListGraph new_code))
-
data JumpDest = DestBlockId BlockId | DestImm Imm
getJumpDestBlockId :: JumpDest -> Maybe BlockId