diff options
author | Peter Wortmann <scpmw@leeds.ac.uk> | 2014-12-10 12:00:49 +0100 |
---|---|---|
committer | Austin Seipp <austin@well-typed.com> | 2014-12-16 18:34:08 -0600 |
commit | edd6d676847b94648c18b7f3790852ab4043759d (patch) | |
tree | 2e45124f65b0b8872dbc5a9d871c26a103b35e65 | |
parent | cc481ec8657e0b91e2f8f9a9eeb3f9ee030635ae (diff) | |
download | haskell-edd6d676847b94648c18b7f3790852ab4043759d.tar.gz |
Generate DWARF unwind information
This tells debuggers such as GDB how to "unwind" a program state,
which allows them to walk the stack up.
Notes:
* The code is quite general, perhaps unnecessarily so. Unless we get
more unwind information, only the first case of pprSetUnwind will
get used - and pprUnwindExpr and pprUndefUnwind will never be
called. It just so happens that this is a point where we can get a
lot of features cheaply, even if we don't use them.
* When determining what location to show for a return address, most
debuggers check the map for "rip-1", assuming that's where the
"call" instruction is. For tables-next-to-code, that happens to
always be the end of an info table. We therefore cheat a bit here by
shifting .debug_frame information so it covers the end of the info
table, as well as generating a .loc directive for the info table
data.
Debuggers will still show the wrong label for the return address,
though. Haven't found a way around that one yet.
(From Phabricator D396)
-rw-r--r-- | compiler/nativeGen/Dwarf.hs | 45 | ||||
-rw-r--r-- | compiler/nativeGen/Dwarf/Constants.hs | 64 | ||||
-rw-r--r-- | compiler/nativeGen/Dwarf/Types.hs | 236 | ||||
-rw-r--r-- | compiler/nativeGen/X86/Ppr.hs | 6 |
4 files changed, 347 insertions, 4 deletions
diff --git a/compiler/nativeGen/Dwarf.hs b/compiler/nativeGen/Dwarf.hs index 9420424080..4f9bdb64af 100644 --- a/compiler/nativeGen/Dwarf.hs +++ b/compiler/nativeGen/Dwarf.hs @@ -3,6 +3,7 @@ module Dwarf ( ) where import CLabel +import CmmExpr ( GlobalReg(..) ) import Config ( cProjectName, cProjectVersion ) import CoreSyn ( Tickish(..) ) import Debug @@ -18,6 +19,9 @@ import Dwarf.Constants import Dwarf.Types import Data.Maybe +import Data.List ( sortBy ) +import Data.Ord ( comparing ) +import qualified Data.Map as Map import System.FilePath import System.Directory ( getCurrentDirectory ) @@ -62,7 +66,13 @@ dwarfGen df modLoc us blocks = do let lineSct = dwarfLineSection $$ ptext dwarfLineLabel <> colon - return (infoSct $$ abbrevSct $$ lineSct, us') + -- .debug_frame section: Information about the layout of the GHC stack + let (framesU, us'') = takeUniqFromSupply us' + frameSct = dwarfFrameSection $$ + ptext dwarfFrameLabel <> colon $$ + pprDwarfFrame (debugFrame framesU procs) + + return (infoSct $$ abbrevSct $$ lineSct $$ frameSct, us'') -- | Header for a compilation unit, establishing global format -- parameters @@ -118,3 +128,36 @@ blockToDwarf blk dws , dwLabel = dblCLabel blk , dwMarker = mkAsmTempLabel (dblLabel blk) } + +-- | Generates the data for the debug frame section, which encodes the +-- desired stack unwind behaviour for the debugger +debugFrame :: Unique -> [DebugBlock] -> DwarfFrame +debugFrame u procs + = DwarfFrame { dwCieLabel = mkAsmTempLabel u + , dwCieInit = initUws + , dwCieProcs = map (procToFrame initUws) procs + } + where initUws = Map.fromList [(Sp, UwReg Sp 0)] + +-- | Generates unwind information for a procedure debug block +procToFrame :: UnwindTable -> DebugBlock -> DwarfFrameProc +procToFrame initUws blk + = DwarfFrameProc { dwFdeProc = dblCLabel blk + , dwFdeHasInfo = dblHasInfoTbl blk + , dwFdeBlocks = map (uncurry blockToFrame) blockUws + } + where blockUws :: [(DebugBlock, UnwindTable)] + blockUws = map snd $ sortBy (comparing fst) $ flatten initUws blk + flatten uws0 b@DebugBlock{ dblPosition=pos, dblUnwind=uws, + dblBlocks=blocks } + | Just p <- pos = (p, (b, uws')):nested + | otherwise = nested -- block was optimized out + where uws' = uws `Map.union` uws0 + nested = concatMap (flatten uws') blocks + +blockToFrame :: DebugBlock -> UnwindTable -> DwarfFrameBlock +blockToFrame blk uws + = DwarfFrameBlock { dwFdeBlock = mkAsmTempLabel $ dblLabel blk + , dwFdeBlkHasInfo = dblHasInfoTbl blk + , dwFdeUnwind = uws + } diff --git a/compiler/nativeGen/Dwarf/Constants.hs b/compiler/nativeGen/Dwarf/Constants.hs index b6a688d657..a5bbeac477 100644 --- a/compiler/nativeGen/Dwarf/Constants.hs +++ b/compiler/nativeGen/Dwarf/Constants.hs @@ -7,6 +7,9 @@ import FastString import Platform import Outputable +import Reg +import X86.Regs + import Data.Word -- | Language ID used for Haskell. @@ -126,7 +129,66 @@ dwarfSection name = sdocWithPlatform $ \plat -> ".section .debug_" ++ name ++ ",\"\",@progbits" -- | Dwarf section labels -dwarfInfoLabel, dwarfAbbrevLabel, dwarfLineLabel :: LitString +dwarfInfoLabel, dwarfAbbrevLabel, dwarfLineLabel, dwarfFrameLabel :: LitString dwarfInfoLabel = sLit ".Lsection_info" dwarfAbbrevLabel = sLit ".Lsection_abbrev" dwarfLineLabel = sLit ".Lsection_line" +dwarfFrameLabel = sLit ".Lsection_frame" + +-- | Mapping of registers to DWARF register numbers +dwarfRegNo :: Platform -> Reg -> Word8 +dwarfRegNo p r = case platformArch p of + ArchX86 + | r == eax -> 0 + | r == ecx -> 1 -- yes, no typo + | r == edx -> 2 + | r == ebx -> 3 + | r == esp -> 4 + | r == ebp -> 5 + | r == esi -> 6 + | r == edi -> 7 + ArchX86_64 + | r == rax -> 0 + | r == rdx -> 1 -- this neither. The order GCC allocates registers in? + | r == rcx -> 2 + | r == rbx -> 3 + | r == rsi -> 4 + | r == rdi -> 5 + | r == rbp -> 6 + | r == rsp -> 7 + | r == r8 -> 8 + | r == r9 -> 9 + | r == r10 -> 10 + | r == r11 -> 11 + | r == r12 -> 12 + | r == r13 -> 13 + | r == r14 -> 14 + | r == r15 -> 15 + | r == xmm0 -> 17 + | r == xmm1 -> 18 + | r == xmm2 -> 19 + | r == xmm3 -> 20 + | r == xmm4 -> 21 + | r == xmm5 -> 22 + | r == xmm6 -> 23 + | r == xmm7 -> 24 + | r == xmm8 -> 25 + | r == xmm9 -> 26 + | r == xmm10 -> 27 + | r == xmm11 -> 28 + | r == xmm12 -> 29 + | r == xmm13 -> 30 + | r == xmm14 -> 31 + | r == xmm15 -> 32 + _other -> error "dwarfRegNo: Unsupported platform or unknown register!" + +-- | Virtual register number to use for return address. +dwarfReturnRegNo :: Platform -> Word8 +dwarfReturnRegNo p + -- We "overwrite" IP with our pseudo register - that makes sense, as + -- when using this mechanism gdb already knows the IP anyway. Clang + -- does this too, so it must be safe. + = case platformArch p of + ArchX86 -> 8 -- eip + ArchX86_64 -> 16 -- rip + _other -> error "dwarfReturnRegNo: Unsupported platform!" diff --git a/compiler/nativeGen/Dwarf/Types.hs b/compiler/nativeGen/Dwarf/Types.hs index 1d564f30c0..96fea0ab90 100644 --- a/compiler/nativeGen/Dwarf/Types.hs +++ b/compiler/nativeGen/Dwarf/Types.hs @@ -1,26 +1,40 @@ module Dwarf.Types - ( DwarfInfo(..) + ( -- * Dwarf information + DwarfInfo(..) , pprDwarfInfo , pprAbbrevDecls + -- * Dwarf frame + , DwarfFrame(..), DwarfFrameProc(..), DwarfFrameBlock(..) + , pprDwarfFrame + -- * Utilities , pprByte - , pprWord + , pprData4' , pprDwWord + , pprWord , pprLEBWord , pprLEBInt + , wordAlign ) where +import Debug import CLabel +import CmmExpr ( GlobalReg(..) ) import FastString import Outputable import Platform +import Reg import Dwarf.Constants import Data.Bits +import Data.List ( mapAccumL ) +import qualified Data.Map as Map import Data.Word import Data.Char +import CodeGen.Platform + -- | Individual dwarf records. Each one will be encoded as an entry in -- the .debug_info section. data DwarfInfo @@ -74,6 +88,7 @@ pprAbbrevDecls haveDebugLine = , (dW_AT_external, dW_FORM_flag) , (dW_AT_low_pc, dW_FORM_addr) , (dW_AT_high_pc, dW_FORM_addr) + , (dW_AT_frame_base, dW_FORM_block1) ] $$ mkAbbrev DwAbbrBlock dW_TAG_lexical_block dW_CHILDREN_yes [ (dW_AT_name, dW_FORM_string) @@ -107,6 +122,8 @@ pprDwarfInfoOpen _ (DwarfSubprogram _ name label) = sdocWithDynFlags $ \df -> $$ pprFlag (externallyVisibleCLabel label) $$ pprWord (ppr label) $$ pprWord (ppr $ mkAsmTempEndLabel label) + $$ pprByte 1 + $$ pprByte dW_OP_call_frame_cfa pprDwarfInfoOpen _ (DwarfBlock _ label marker) = sdocWithDynFlags $ \df -> pprAbbrev DwAbbrBlock $$ pprString (renderWithStyle df (ppr label) (mkCodeStyle CStyle)) @@ -117,6 +134,221 @@ pprDwarfInfoOpen _ (DwarfBlock _ label marker) = sdocWithDynFlags $ \df -> pprDwarfInfoClose :: SDoc pprDwarfInfoClose = pprAbbrev DwAbbrNull +-- | Information about unwind instructions for a procedure. This +-- corresponds to a "Common Information Entry" (CIE) in DWARF. +data DwarfFrame + = DwarfFrame + { dwCieLabel :: CLabel + , dwCieInit :: UnwindTable + , dwCieProcs :: [DwarfFrameProc] + } + +-- | Unwind instructions for an individual procedure. Corresponds to a +-- "Frame Description Entry" (FDE) in DWARF. +data DwarfFrameProc + = DwarfFrameProc + { dwFdeProc :: CLabel + , dwFdeHasInfo :: Bool + , dwFdeBlocks :: [DwarfFrameBlock] + -- ^ List of blocks. Order must match asm! + } + +-- | Unwind instructions for a block. Will become part of the +-- containing FDE. +data DwarfFrameBlock + = DwarfFrameBlock + { dwFdeBlock :: CLabel + , dwFdeBlkHasInfo :: Bool + , dwFdeUnwind :: UnwindTable + } + +-- | Header for the .debug_frame section. Here we emit the "Common +-- Information Entry" record that etablishes general call frame +-- parameters and the default stack layout. +pprDwarfFrame :: DwarfFrame -> SDoc +pprDwarfFrame DwarfFrame{dwCieLabel=cieLabel,dwCieInit=cieInit,dwCieProcs=procs} + = sdocWithPlatform $ \plat -> + let cieStartLabel= mkAsmTempDerivedLabel cieLabel (fsLit "_start") + cieEndLabel = mkAsmTempEndLabel cieLabel + length = ppr cieEndLabel <> char '-' <> ppr cieStartLabel + spReg = dwarfGlobalRegNo plat Sp + retReg = dwarfReturnRegNo plat + wordSize = platformWordSize plat + pprInit (g, uw) = pprSetUnwind plat g (Nothing, uw) + in vcat [ ppr cieLabel <> colon + , pprData4' length -- Length of CIE + , ppr cieStartLabel <> colon + , pprData4' (ptext (sLit "-1")) + -- Common Information Entry marker (-1 = 0xf..f) + , pprByte 3 -- CIE version (we require DWARF 3) + , pprByte 0 -- Augmentation (none) + , pprByte 1 -- Code offset multiplicator + , pprByte (128-fromIntegral wordSize) + -- Data offset multiplicator + -- (stacks grow down => "-w" in signed LEB128) + , pprByte retReg -- virtual register holding return address + ] $$ + -- Initial unwind table + vcat (map pprInit $ Map.toList cieInit) $$ + vcat [ -- RET = *CFA + pprByte (dW_CFA_offset+retReg) + , pprByte 0 + + -- Sp' = CFA + -- (we need to set this manually as our Sp register is + -- often not the architecture's default stack register) + , pprByte dW_CFA_val_offset + , pprLEBWord (fromIntegral spReg) + , pprLEBWord 0 + ] $$ + wordAlign $$ + ppr cieEndLabel <> colon $$ + -- Procedure unwind tables + vcat (map (pprFrameProc cieLabel cieInit) procs) + +-- | Writes a "Frame Description Entry" for a procedure. This consists +-- mainly of referencing the CIE and writing state machine +-- instructions to describe how the frame base (CFA) changes. +pprFrameProc :: CLabel -> UnwindTable -> DwarfFrameProc -> SDoc +pprFrameProc frameLbl initUw (DwarfFrameProc procLbl hasInfo blocks) + = let fdeLabel = mkAsmTempDerivedLabel procLbl (fsLit "_fde") + fdeEndLabel = mkAsmTempDerivedLabel procLbl (fsLit "_fde_end") + procEnd = mkAsmTempEndLabel procLbl + ifInfo str = if hasInfo then text str else empty + -- see [Note: Info Offset] + in vcat [ pprData4' (ppr fdeEndLabel <> char '-' <> ppr fdeLabel) + , ppr fdeLabel <> colon + , pprData4' (ppr frameLbl <> char '-' <> + ptext dwarfFrameLabel) -- Reference to CIE + , pprWord (ppr procLbl <> ifInfo "-1") -- Code pointer + , pprWord (ppr procEnd <> char '-' <> + ppr procLbl <> ifInfo "+1") -- Block byte length + ] $$ + vcat (snd $ mapAccumL pprFrameBlock initUw blocks) $$ + wordAlign $$ + ppr fdeEndLabel <> colon + +-- | Generates unwind information for a block. We only generate +-- instructions where unwind information actually changes. This small +-- optimisations saves a lot of space, as subsequent blocks often have +-- the same unwind information. +pprFrameBlock :: UnwindTable -> DwarfFrameBlock -> (UnwindTable, SDoc) +pprFrameBlock oldUws (DwarfFrameBlock blockLbl hasInfo uws) + | uws == oldUws + = (oldUws, empty) + | otherwise + = (,) uws $ sdocWithPlatform $ \plat -> + let lbl = ppr blockLbl <> if hasInfo then text "-1" else empty + -- see [Note: Info Offset] + isChanged g v | old == Just v = Nothing + | otherwise = Just (old, v) + where old = Map.lookup g oldUws + changed = Map.toList $ Map.mapMaybeWithKey isChanged uws + died = Map.toList $ Map.difference oldUws uws + in pprByte dW_CFA_set_loc $$ pprWord lbl $$ + vcat (map (uncurry $ pprSetUnwind plat) changed) $$ + vcat (map (pprUndefUnwind plat . fst) died) + +-- [Note: Info Offset] +-- +-- GDB was pretty much written with C-like programs in mind, and as a +-- result they assume that once you have a return address, it is a +-- good idea to look at (PC-1) to unwind further - as that's where the +-- "call" instruction is supposed to be. +-- +-- Now on one hand, code generated by GHC looks nothing like what GDB +-- expects, and in fact going up from a return pointer is guaranteed +-- to land us inside an info table! On the other hand, that actually +-- gives us some wiggle room, as we expect IP to never *actually* end +-- up inside the info table, so we can "cheat" by putting whatever GDB +-- expects to see there. This is probably pretty safe, as GDB cannot +-- assume (PC-1) to be a valid code pointer in the first place - and I +-- have seen no code trying to correct this. +-- +-- Note that this will not prevent GDB from failing to look-up the +-- correct function name for the frame, as that uses the symbol table, +-- which we can not manipulate as easily. + +-- | Get DWARF register ID for a given GlobalReg +dwarfGlobalRegNo :: Platform -> GlobalReg -> Word8 +dwarfGlobalRegNo p = maybe 0 (dwarfRegNo p . RegReal) . globalRegMaybe p + +-- | Generate code for setting the unwind information for a register, +-- optimized using its known old value in the table. Note that "Sp" is +-- special: We see it as synonym for the CFA. +pprSetUnwind :: Platform -> GlobalReg -> (Maybe UnwindExpr, UnwindExpr) -> SDoc +pprSetUnwind _ Sp (Just (UwReg s _), UwReg s' o') | s == s' + = if o' >= 0 + then pprByte dW_CFA_def_cfa_offset $$ pprLEBWord (fromIntegral o') + else pprByte dW_CFA_def_cfa_offset_sf $$ pprLEBInt o' +pprSetUnwind plat Sp (_, UwReg s' o') + = if o' >= 0 + then pprByte dW_CFA_def_cfa $$ + pprLEBWord (fromIntegral $ dwarfGlobalRegNo plat s') $$ + pprLEBWord (fromIntegral o') + else pprByte dW_CFA_def_cfa_sf $$ + pprLEBWord (fromIntegral $ dwarfGlobalRegNo plat s') $$ + pprLEBInt o' +pprSetUnwind _ Sp (_, uw) + = pprByte dW_CFA_def_cfa_expression $$ pprUnwindExpr False uw +pprSetUnwind plat g (_, UwDeref (UwReg Sp o)) + | o < 0 && ((-o) `mod` platformWordSize plat) == 0 -- expected case + = pprByte (dW_CFA_offset + dwarfGlobalRegNo plat g) $$ + pprLEBWord (fromIntegral ((-o) `div` platformWordSize plat)) + | otherwise + = pprByte dW_CFA_offset_extended_sf $$ + pprLEBWord (fromIntegral (dwarfGlobalRegNo plat g)) $$ + pprLEBInt o +pprSetUnwind plat g (_, UwDeref uw) + = pprByte dW_CFA_expression $$ + pprLEBWord (fromIntegral (dwarfGlobalRegNo plat g)) $$ + pprUnwindExpr True uw +pprSetUnwind plat g (_, uw) + = pprByte dW_CFA_val_expression $$ + pprLEBWord (fromIntegral (dwarfGlobalRegNo plat g)) $$ + pprUnwindExpr True uw + +-- | Generates a DWARF expression for the given unwind expression. If +-- @spIsCFA@ is true, we see @Sp@ as the frame base CFA where it gets +-- mentioned. +pprUnwindExpr :: Bool -> UnwindExpr -> SDoc +pprUnwindExpr spIsCFA expr + = sdocWithPlatform $ \plat -> + let ppr (UwConst i) + | i >= 0 && i < 32 = pprByte (dW_OP_lit0 + fromIntegral i) + | otherwise = pprByte dW_OP_consts $$ pprLEBInt i -- lazy... + ppr (UwReg Sp i) | spIsCFA + = if i == 0 + then pprByte dW_OP_call_frame_cfa + else ppr (UwPlus (UwReg Sp 0) (UwConst i)) + ppr (UwReg g i) = pprByte (dW_OP_breg0+dwarfGlobalRegNo plat g) $$ + pprLEBInt i + ppr (UwDeref u) = ppr u $$ pprByte dW_OP_deref + ppr (UwPlus u1 u2) = ppr u1 $$ ppr u2 $$ pprByte dW_OP_plus + ppr (UwMinus u1 u2) = ppr u1 $$ ppr u2 $$ pprByte dW_OP_minus + ppr (UwTimes u1 u2) = ppr u1 $$ ppr u2 $$ pprByte dW_OP_mul + in ptext (sLit "\t.byte 1f-.-1") $$ + ppr expr $$ + ptext (sLit "1:") + +-- | Generate code for re-setting the unwind information for a +-- register to "undefined" +pprUndefUnwind :: Platform -> GlobalReg -> SDoc +pprUndefUnwind _ Sp = panic "pprUndefUnwind Sp" -- should never happen +pprUndefUnwind plat g = pprByte dW_CFA_undefined $$ + pprLEBWord (fromIntegral $ dwarfGlobalRegNo plat g) + + +-- | Align assembly at (machine) word boundary +wordAlign :: SDoc +wordAlign = sdocWithPlatform $ \plat -> + ptext (sLit "\t.align ") <> case platformOS plat of + OSDarwin -> case platformWordSize plat of + 8 -> text "3" + 4 -> text "2" + _other -> error "wordAlign: Unsupported word size!" + _other -> ppr (platformWordSize plat) + -- | Assembly for a single byte of constant DWARF data pprByte :: Word8 -> SDoc pprByte x = ptext (sLit "\t.byte ") <> ppr (fromIntegral x :: Word) diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs index 982f79a561..7022e59647 100644 --- a/compiler/nativeGen/X86/Ppr.hs +++ b/compiler/nativeGen/X86/Ppr.hs @@ -114,8 +114,14 @@ pprBasicBlock info_env (BasicBlock blockid instrs) Nothing -> empty Just (Statics info_lbl info) -> pprSectionHeader Text $$ + infoTableLoc $$ vcat (map pprData info) $$ pprLabel info_lbl + -- Make sure the info table has the right .loc for the block + -- coming right after it. See [Note: Info Offset] + infoTableLoc = case instrs of + (l@LOCATION{} : _) -> pprInstr l + _other -> empty pprDatas :: (Alignment, CmmStatics) -> SDoc pprDatas (align, (Statics lbl dats)) |