summaryrefslogtreecommitdiff
path: root/compiler/GHC/CmmToAsm.hs
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/GHC/CmmToAsm.hs')
-rw-r--r--compiler/GHC/CmmToAsm.hs1236
1 files changed, 1236 insertions, 0 deletions
diff --git a/compiler/GHC/CmmToAsm.hs b/compiler/GHC/CmmToAsm.hs
new file mode 100644
index 0000000000..8dc9b61198
--- /dev/null
+++ b/compiler/GHC/CmmToAsm.hs
@@ -0,0 +1,1236 @@
+-- -----------------------------------------------------------------------------
+--
+-- (c) The University of Glasgow 1993-2004
+--
+-- This is the top-level module in the native code generator.
+--
+-- -----------------------------------------------------------------------------
+
+{-# LANGUAGE BangPatterns, CPP, GADTs, ScopedTypeVariables, PatternSynonyms,
+ DeriveFunctor #-}
+
+#if !defined(GHC_LOADED_INTO_GHCI)
+{-# LANGUAGE UnboxedTuples #-}
+#endif
+
+{-# OPTIONS_GHC -Wno-incomplete-record-updates #-}
+
+module GHC.CmmToAsm (
+ -- * Module entry point
+ nativeCodeGen
+
+ -- * Test-only exports: see trac #12744
+ -- used by testGraphNoSpills, which needs to access
+ -- the register allocator intermediate data structures
+ -- cmmNativeGen emits
+ , cmmNativeGen
+ , NcgImpl(..)
+ , x86NcgImpl
+ ) where
+
+#include "HsVersions.h"
+
+import GhcPrelude
+
+import qualified GHC.CmmToAsm.X86.CodeGen as X86.CodeGen
+import qualified GHC.CmmToAsm.X86.Regs as X86.Regs
+import qualified GHC.CmmToAsm.X86.Instr as X86.Instr
+import qualified GHC.CmmToAsm.X86.Ppr as X86.Ppr
+
+import qualified GHC.CmmToAsm.SPARC.CodeGen as SPARC.CodeGen
+import qualified GHC.CmmToAsm.SPARC.Regs as SPARC.Regs
+import qualified GHC.CmmToAsm.SPARC.Instr as SPARC.Instr
+import qualified GHC.CmmToAsm.SPARC.Ppr as SPARC.Ppr
+import qualified GHC.CmmToAsm.SPARC.ShortcutJump as SPARC.ShortcutJump
+import qualified GHC.CmmToAsm.SPARC.CodeGen.Expand as SPARC.CodeGen.Expand
+
+import qualified GHC.CmmToAsm.PPC.CodeGen as PPC.CodeGen
+import qualified GHC.CmmToAsm.PPC.Regs as PPC.Regs
+import qualified GHC.CmmToAsm.PPC.RegInfo as PPC.RegInfo
+import qualified GHC.CmmToAsm.PPC.Instr as PPC.Instr
+import qualified GHC.CmmToAsm.PPC.Ppr as PPC.Ppr
+
+import GHC.CmmToAsm.Reg.Liveness
+import qualified GHC.CmmToAsm.Reg.Linear as Linear
+
+import qualified GraphColor as Color
+import qualified GHC.CmmToAsm.Reg.Graph as Color
+import qualified GHC.CmmToAsm.Reg.Graph.Stats as Color
+import qualified GHC.CmmToAsm.Reg.Graph.TrivColorable as Color
+
+import AsmUtils
+import GHC.CmmToAsm.Reg.Target
+import GHC.Platform
+import GHC.CmmToAsm.BlockLayout as BlockLayout
+import Config
+import GHC.CmmToAsm.Instr
+import GHC.CmmToAsm.PIC
+import GHC.Platform.Reg
+import GHC.CmmToAsm.Monad
+import GHC.CmmToAsm.CFG
+import GHC.CmmToAsm.Dwarf
+import GHC.Cmm.DebugBlock
+
+import GHC.Cmm.BlockId
+import GHC.StgToCmm.CgUtils ( fixStgRegisters )
+import GHC.Cmm
+import GHC.Cmm.Utils
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow.Label
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Opt ( cmmMachOpFold )
+import GHC.Cmm.Ppr
+import GHC.Cmm.CLabel
+
+import UniqFM
+import UniqSupply
+import GHC.Driver.Session
+import Util
+
+import BasicTypes ( Alignment )
+import qualified Pretty
+import BufWrite
+import Outputable
+import FastString
+import UniqSet
+import ErrUtils
+import Module
+import Stream (Stream)
+import qualified Stream
+
+-- DEBUGGING ONLY
+--import OrdList
+
+import Data.List
+import Data.Maybe
+import Data.Ord ( comparing )
+import Control.Exception
+import Control.Monad
+import System.IO
+
+{-
+The native-code generator has machine-independent and
+machine-dependent modules.
+
+This module ("AsmCodeGen") is the top-level machine-independent
+module. Before entering machine-dependent land, we do some
+machine-independent optimisations (defined below) on the
+'CmmStmts's.
+
+We convert to the machine-specific 'Instr' datatype with
+'cmmCodeGen', assuming an infinite supply of registers. We then use
+a machine-independent register allocator ('regAlloc') to rejoin
+reality. Obviously, 'regAlloc' has machine-specific helper
+functions (see about "RegAllocInfo" below).
+
+Finally, we order the basic blocks of the function so as to minimise
+the number of jumps between blocks, by utilising fallthrough wherever
+possible.
+
+The machine-dependent bits break down as follows:
+
+ * ["MachRegs"] Everything about the target platform's machine
+ registers (and immediate operands, and addresses, which tend to
+ intermingle/interact with registers).
+
+ * ["MachInstrs"] Includes the 'Instr' datatype (possibly should
+ have a module of its own), plus a miscellany of other things
+ (e.g., 'targetDoubleSize', 'smStablePtrTable', ...)
+
+ * ["MachCodeGen"] is where 'Cmm' stuff turns into
+ machine instructions.
+
+ * ["PprMach"] 'pprInstr' turns an 'Instr' into text (well, really
+ a 'SDoc').
+
+ * ["RegAllocInfo"] In the register allocator, we manipulate
+ 'MRegsState's, which are 'BitSet's, one bit per machine register.
+ When we want to say something about a specific machine register
+ (e.g., ``it gets clobbered by this instruction''), we set/unset
+ its bit. Obviously, we do this 'BitSet' thing for efficiency
+ reasons.
+
+ The 'RegAllocInfo' module collects together the machine-specific
+ info needed to do register allocation.
+
+ * ["RegisterAlloc"] The (machine-independent) register allocator.
+-}
+
+--------------------
+nativeCodeGen :: forall a . DynFlags -> Module -> ModLocation -> Handle -> UniqSupply
+ -> Stream IO RawCmmGroup a
+ -> IO a
+nativeCodeGen dflags this_mod modLoc h us cmms
+ = let platform = targetPlatform dflags
+ nCG' :: ( Outputable statics, Outputable instr
+ , Outputable jumpDest, Instruction instr)
+ => NcgImpl statics instr jumpDest -> IO a
+ nCG' ncgImpl = nativeCodeGen' dflags this_mod modLoc ncgImpl h us cmms
+ in case platformArch platform of
+ ArchX86 -> nCG' (x86NcgImpl dflags)
+ ArchX86_64 -> nCG' (x86_64NcgImpl dflags)
+ ArchPPC -> nCG' (ppcNcgImpl dflags)
+ ArchS390X -> panic "nativeCodeGen: No NCG for S390X"
+ ArchSPARC -> nCG' (sparcNcgImpl dflags)
+ ArchSPARC64 -> panic "nativeCodeGen: No NCG for SPARC64"
+ ArchARM {} -> panic "nativeCodeGen: No NCG for ARM"
+ ArchARM64 -> panic "nativeCodeGen: No NCG for ARM64"
+ ArchPPC_64 _ -> nCG' (ppcNcgImpl dflags)
+ ArchAlpha -> panic "nativeCodeGen: No NCG for Alpha"
+ ArchMipseb -> panic "nativeCodeGen: No NCG for mipseb"
+ ArchMipsel -> panic "nativeCodeGen: No NCG for mipsel"
+ ArchUnknown -> panic "nativeCodeGen: No NCG for unknown arch"
+ ArchJavaScript-> panic "nativeCodeGen: No NCG for JavaScript"
+
+x86NcgImpl :: DynFlags -> NcgImpl (Alignment, RawCmmStatics)
+ X86.Instr.Instr X86.Instr.JumpDest
+x86NcgImpl dflags
+ = (x86_64NcgImpl dflags)
+
+x86_64NcgImpl :: DynFlags -> NcgImpl (Alignment, RawCmmStatics)
+ X86.Instr.Instr X86.Instr.JumpDest
+x86_64NcgImpl dflags
+ = NcgImpl {
+ cmmTopCodeGen = X86.CodeGen.cmmTopCodeGen
+ ,generateJumpTableForInstr = X86.CodeGen.generateJumpTableForInstr dflags
+ ,getJumpDestBlockId = X86.Instr.getJumpDestBlockId
+ ,canShortcut = X86.Instr.canShortcut
+ ,shortcutStatics = X86.Instr.shortcutStatics
+ ,shortcutJump = X86.Instr.shortcutJump
+ ,pprNatCmmDecl = X86.Ppr.pprNatCmmDecl
+ ,maxSpillSlots = X86.Instr.maxSpillSlots dflags
+ ,allocatableRegs = X86.Regs.allocatableRegs platform
+ ,ncgAllocMoreStack = X86.Instr.allocMoreStack platform
+ ,ncgExpandTop = id
+ ,ncgMakeFarBranches = const id
+ ,extractUnwindPoints = X86.CodeGen.extractUnwindPoints
+ ,invertCondBranches = X86.CodeGen.invertCondBranches
+ }
+ where platform = targetPlatform dflags
+
+ppcNcgImpl :: DynFlags -> NcgImpl RawCmmStatics PPC.Instr.Instr PPC.RegInfo.JumpDest
+ppcNcgImpl dflags
+ = NcgImpl {
+ cmmTopCodeGen = PPC.CodeGen.cmmTopCodeGen
+ ,generateJumpTableForInstr = PPC.CodeGen.generateJumpTableForInstr dflags
+ ,getJumpDestBlockId = PPC.RegInfo.getJumpDestBlockId
+ ,canShortcut = PPC.RegInfo.canShortcut
+ ,shortcutStatics = PPC.RegInfo.shortcutStatics
+ ,shortcutJump = PPC.RegInfo.shortcutJump
+ ,pprNatCmmDecl = PPC.Ppr.pprNatCmmDecl
+ ,maxSpillSlots = PPC.Instr.maxSpillSlots dflags
+ ,allocatableRegs = PPC.Regs.allocatableRegs platform
+ ,ncgAllocMoreStack = PPC.Instr.allocMoreStack platform
+ ,ncgExpandTop = id
+ ,ncgMakeFarBranches = PPC.Instr.makeFarBranches
+ ,extractUnwindPoints = const []
+ ,invertCondBranches = \_ _ -> id
+ }
+ where platform = targetPlatform dflags
+
+sparcNcgImpl :: DynFlags -> NcgImpl RawCmmStatics SPARC.Instr.Instr SPARC.ShortcutJump.JumpDest
+sparcNcgImpl dflags
+ = NcgImpl {
+ cmmTopCodeGen = SPARC.CodeGen.cmmTopCodeGen
+ ,generateJumpTableForInstr = SPARC.CodeGen.generateJumpTableForInstr dflags
+ ,getJumpDestBlockId = SPARC.ShortcutJump.getJumpDestBlockId
+ ,canShortcut = SPARC.ShortcutJump.canShortcut
+ ,shortcutStatics = SPARC.ShortcutJump.shortcutStatics
+ ,shortcutJump = SPARC.ShortcutJump.shortcutJump
+ ,pprNatCmmDecl = SPARC.Ppr.pprNatCmmDecl
+ ,maxSpillSlots = SPARC.Instr.maxSpillSlots dflags
+ ,allocatableRegs = SPARC.Regs.allocatableRegs
+ ,ncgAllocMoreStack = noAllocMoreStack
+ ,ncgExpandTop = map SPARC.CodeGen.Expand.expandTop
+ ,ncgMakeFarBranches = const id
+ ,extractUnwindPoints = const []
+ ,invertCondBranches = \_ _ -> id
+ }
+
+--
+-- Allocating more stack space for spilling is currently only
+-- supported for the linear register allocator on x86/x86_64, the rest
+-- default to the panic below. To support allocating extra stack on
+-- more platforms provide a definition of ncgAllocMoreStack.
+--
+noAllocMoreStack :: Int -> NatCmmDecl statics instr
+ -> UniqSM (NatCmmDecl statics instr, [(BlockId,BlockId)])
+noAllocMoreStack amount _
+ = panic $ "Register allocator: out of stack slots (need " ++ show amount ++ ")\n"
+ ++ " If you are trying to compile SHA1.hs from the crypto library then this\n"
+ ++ " is a known limitation in the linear allocator.\n"
+ ++ "\n"
+ ++ " Try enabling the graph colouring allocator with -fregs-graph instead."
+ ++ " You can still file a bug report if you like.\n"
+
+
+-- | Data accumulated during code generation. Mostly about statistics,
+-- but also collects debug data for DWARF generation.
+data NativeGenAcc statics instr
+ = NGS { ngs_imports :: ![[CLabel]]
+ , ngs_natives :: ![[NatCmmDecl statics instr]]
+ -- ^ Native code generated, for statistics. This might
+ -- hold a lot of data, so it is important to clear this
+ -- field as early as possible if it isn't actually
+ -- required.
+ , ngs_colorStats :: ![[Color.RegAllocStats statics instr]]
+ , ngs_linearStats :: ![[Linear.RegAllocStats]]
+ , ngs_labels :: ![Label]
+ , ngs_debug :: ![DebugBlock]
+ , ngs_dwarfFiles :: !DwarfFiles
+ , ngs_unwinds :: !(LabelMap [UnwindPoint])
+ -- ^ see Note [Unwinding information in the NCG]
+ -- and Note [What is this unwinding business?] in Debug.
+ }
+
+{-
+Note [Unwinding information in the NCG]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Unwind information is a type of metadata which allows a debugging tool
+to reconstruct the values of machine registers at the time a procedure was
+entered. For the most part, the production of unwind information is handled by
+the Cmm stage, where it is represented by CmmUnwind nodes.
+
+Unfortunately, the Cmm stage doesn't know everything necessary to produce
+accurate unwinding information. For instance, the x86-64 calling convention
+requires that the stack pointer be aligned to 16 bytes, which in turn means that
+GHC must sometimes add padding to $sp prior to performing a foreign call. When
+this happens unwind information must be updated accordingly.
+For this reason, we make the NCG backends responsible for producing
+unwinding tables (with the extractUnwindPoints function in NcgImpl).
+
+We accumulate the produced unwind tables over CmmGroups in the ngs_unwinds
+field of NativeGenAcc. This is a label map which contains an entry for each
+procedure, containing a list of unwinding points (e.g. a label and an associated
+unwinding table).
+
+See also Note [What is this unwinding business?] in Debug.
+-}
+
+nativeCodeGen' :: (Outputable statics, Outputable instr,Outputable jumpDest,
+ Instruction instr)
+ => DynFlags
+ -> Module -> ModLocation
+ -> NcgImpl statics instr jumpDest
+ -> Handle
+ -> UniqSupply
+ -> Stream IO RawCmmGroup a
+ -> IO a
+nativeCodeGen' dflags this_mod modLoc ncgImpl h us cmms
+ = do
+ -- BufHandle is a performance hack. We could hide it inside
+ -- Pretty if it weren't for the fact that we do lots of little
+ -- printDocs here (in order to do codegen in constant space).
+ bufh <- newBufHandle h
+ let ngs0 = NGS [] [] [] [] [] [] emptyUFM mapEmpty
+ (ngs, us', a) <- cmmNativeGenStream dflags this_mod modLoc ncgImpl bufh us
+ cmms ngs0
+ _ <- finishNativeGen dflags modLoc bufh us' ngs
+ return a
+
+finishNativeGen :: Instruction instr
+ => DynFlags
+ -> ModLocation
+ -> BufHandle
+ -> UniqSupply
+ -> NativeGenAcc statics instr
+ -> IO UniqSupply
+finishNativeGen dflags modLoc bufh@(BufHandle _ _ h) us ngs
+ = withTimingSilent dflags (text "NCG") (`seq` ()) $ do
+ -- Write debug data and finish
+ let emitDw = debugLevel dflags > 0
+ us' <- if not emitDw then return us else do
+ (dwarf, us') <- dwarfGen dflags modLoc us (ngs_debug ngs)
+ emitNativeCode dflags bufh dwarf
+ return us'
+ bFlush bufh
+
+ -- dump global NCG stats for graph coloring allocator
+ let stats = concat (ngs_colorStats ngs)
+ unless (null stats) $ do
+
+ -- build the global register conflict graph
+ let graphGlobal
+ = foldl' Color.union Color.initGraph
+ $ [ Color.raGraph stat
+ | stat@Color.RegAllocStatsStart{} <- stats]
+
+ dump_stats (Color.pprStats stats graphGlobal)
+
+ let platform = targetPlatform dflags
+ dumpIfSet_dyn dflags
+ Opt_D_dump_asm_conflicts "Register conflict graph"
+ FormatText
+ $ Color.dotGraph
+ (targetRegDotColor platform)
+ (Color.trivColorable platform
+ (targetVirtualRegSqueeze platform)
+ (targetRealRegSqueeze platform))
+ $ graphGlobal
+
+
+ -- dump global NCG stats for linear allocator
+ let linearStats = concat (ngs_linearStats ngs)
+ unless (null linearStats) $
+ dump_stats (Linear.pprStats (concat (ngs_natives ngs)) linearStats)
+
+ -- write out the imports
+ printSDocLn Pretty.LeftMode dflags h (mkCodeStyle AsmStyle)
+ $ makeImportsDoc dflags (concat (ngs_imports ngs))
+ return us'
+ where
+ dump_stats = dumpAction dflags (mkDumpStyle dflags alwaysQualify)
+ (dumpOptionsFromFlag Opt_D_dump_asm_stats) "NCG stats"
+ FormatText
+
+cmmNativeGenStream :: (Outputable statics, Outputable instr
+ ,Outputable jumpDest, Instruction instr)
+ => DynFlags
+ -> Module -> ModLocation
+ -> NcgImpl statics instr jumpDest
+ -> BufHandle
+ -> UniqSupply
+ -> Stream IO RawCmmGroup a
+ -> NativeGenAcc statics instr
+ -> IO (NativeGenAcc statics instr, UniqSupply, a)
+
+cmmNativeGenStream dflags this_mod modLoc ncgImpl h us cmm_stream ngs
+ = do r <- Stream.runStream cmm_stream
+ case r of
+ Left a ->
+ return (ngs { ngs_imports = reverse $ ngs_imports ngs
+ , ngs_natives = reverse $ ngs_natives ngs
+ , ngs_colorStats = reverse $ ngs_colorStats ngs
+ , ngs_linearStats = reverse $ ngs_linearStats ngs
+ },
+ us,
+ a)
+ Right (cmms, cmm_stream') -> do
+ (us', ngs'') <-
+ withTimingSilent
+ dflags
+ ncglabel (\(a, b) -> a `seq` b `seq` ()) $ do
+ -- Generate debug information
+ let debugFlag = debugLevel dflags > 0
+ !ndbgs | debugFlag = cmmDebugGen modLoc cmms
+ | otherwise = []
+ dbgMap = debugToMap ndbgs
+
+ -- Generate native code
+ (ngs',us') <- cmmNativeGens dflags this_mod modLoc ncgImpl h
+ dbgMap us cmms ngs 0
+
+ -- Link native code information into debug blocks
+ -- See Note [What is this unwinding business?] in Debug.
+ let !ldbgs = cmmDebugLink (ngs_labels ngs') (ngs_unwinds ngs') ndbgs
+ unless (null ldbgs) $
+ dumpIfSet_dyn dflags Opt_D_dump_debug "Debug Infos" FormatText
+ (vcat $ map ppr ldbgs)
+
+ -- Accumulate debug information for emission in finishNativeGen.
+ let ngs'' = ngs' { ngs_debug = ngs_debug ngs' ++ ldbgs, ngs_labels = [] }
+ return (us', ngs'')
+
+ cmmNativeGenStream dflags this_mod modLoc ncgImpl h us'
+ cmm_stream' ngs''
+
+ where ncglabel = text "NCG"
+
+-- | Do native code generation on all these cmms.
+--
+cmmNativeGens :: forall statics instr jumpDest.
+ (Outputable statics, Outputable instr
+ ,Outputable jumpDest, Instruction instr)
+ => DynFlags
+ -> Module -> ModLocation
+ -> NcgImpl statics instr jumpDest
+ -> BufHandle
+ -> LabelMap DebugBlock
+ -> UniqSupply
+ -> [RawCmmDecl]
+ -> NativeGenAcc statics instr
+ -> Int
+ -> IO (NativeGenAcc statics instr, UniqSupply)
+
+cmmNativeGens dflags this_mod modLoc ncgImpl h dbgMap = go
+ where
+ go :: UniqSupply -> [RawCmmDecl]
+ -> NativeGenAcc statics instr -> Int
+ -> IO (NativeGenAcc statics instr, UniqSupply)
+
+ go us [] ngs !_ =
+ return (ngs, us)
+
+ go us (cmm : cmms) ngs count = do
+ let fileIds = ngs_dwarfFiles ngs
+ (us', fileIds', native, imports, colorStats, linearStats, unwinds)
+ <- {-# SCC "cmmNativeGen" #-}
+ cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap
+ cmm count
+
+ -- Generate .file directives for every new file that has been
+ -- used. Note that it is important that we generate these in
+ -- ascending order, as Clang's 3.6 assembler complains.
+ let newFileIds = sortBy (comparing snd) $
+ nonDetEltsUFM $ fileIds' `minusUFM` fileIds
+ -- See Note [Unique Determinism and code generation]
+ pprDecl (f,n) = text "\t.file " <> ppr n <+>
+ pprFilePathString (unpackFS f)
+
+ emitNativeCode dflags h $ vcat $
+ map pprDecl newFileIds ++
+ map (pprNatCmmDecl ncgImpl) native
+
+ -- force evaluation all this stuff to avoid space leaks
+ {-# SCC "seqString" #-} evaluate $ seqList (showSDoc dflags $ vcat $ map ppr imports) ()
+
+ let !labels' = if debugLevel dflags > 0
+ then cmmDebugLabels isMetaInstr native else []
+ !natives' = if dopt Opt_D_dump_asm_stats dflags
+ then native : ngs_natives ngs else []
+
+ mCon = maybe id (:)
+ ngs' = ngs{ ngs_imports = imports : ngs_imports ngs
+ , ngs_natives = natives'
+ , ngs_colorStats = colorStats `mCon` ngs_colorStats ngs
+ , ngs_linearStats = linearStats `mCon` ngs_linearStats ngs
+ , ngs_labels = ngs_labels ngs ++ labels'
+ , ngs_dwarfFiles = fileIds'
+ , ngs_unwinds = ngs_unwinds ngs `mapUnion` unwinds
+ }
+ go us' cmms ngs' (count + 1)
+
+
+emitNativeCode :: DynFlags -> BufHandle -> SDoc -> IO ()
+emitNativeCode dflags h sdoc = do
+
+ {-# SCC "pprNativeCode" #-} bufLeftRenderSDoc dflags h
+ (mkCodeStyle AsmStyle) sdoc
+
+ -- dump native code
+ dumpIfSet_dyn dflags
+ Opt_D_dump_asm "Asm code" FormatASM
+ sdoc
+
+-- | Complete native code generation phase for a single top-level chunk of Cmm.
+-- Dumping the output of each stage along the way.
+-- Global conflict graph and NGC stats
+cmmNativeGen
+ :: forall statics instr jumpDest. (Instruction instr,
+ Outputable statics, Outputable instr, Outputable jumpDest)
+ => DynFlags
+ -> Module -> ModLocation
+ -> NcgImpl statics instr jumpDest
+ -> UniqSupply
+ -> DwarfFiles
+ -> LabelMap DebugBlock
+ -> RawCmmDecl -- ^ the cmm to generate code for
+ -> Int -- ^ sequence number of this top thing
+ -> IO ( UniqSupply
+ , DwarfFiles
+ , [NatCmmDecl statics instr] -- native code
+ , [CLabel] -- things imported by this cmm
+ , Maybe [Color.RegAllocStats statics instr] -- stats for the coloring register allocator
+ , Maybe [Linear.RegAllocStats] -- stats for the linear register allocators
+ , LabelMap [UnwindPoint] -- unwinding information for blocks
+ )
+
+cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap cmm count
+ = do
+ let platform = targetPlatform dflags
+
+ let proc_name = case cmm of
+ (CmmProc _ entry_label _ _) -> ppr entry_label
+ _ -> text "DataChunk"
+
+ -- rewrite assignments to global regs
+ let fixed_cmm =
+ {-# SCC "fixStgRegisters" #-}
+ fixStgRegisters dflags cmm
+
+ -- cmm to cmm optimisations
+ let (opt_cmm, imports) =
+ {-# SCC "cmmToCmm" #-}
+ cmmToCmm dflags this_mod fixed_cmm
+
+ dumpIfSet_dyn dflags
+ Opt_D_dump_opt_cmm "Optimised Cmm" FormatCMM
+ (pprCmmGroup [opt_cmm])
+
+ let cmmCfg = {-# SCC "getCFG" #-}
+ getCfgProc (cfgWeightInfo dflags) opt_cmm
+
+ -- generate native code from cmm
+ let ((native, lastMinuteImports, fileIds', nativeCfgWeights), usGen) =
+ {-# SCC "genMachCode" #-}
+ initUs us $ genMachCode dflags this_mod modLoc
+ (cmmTopCodeGen ncgImpl)
+ fileIds dbgMap opt_cmm cmmCfg
+
+ dumpIfSet_dyn dflags
+ Opt_D_dump_asm_native "Native code" FormatASM
+ (vcat $ map (pprNatCmmDecl ncgImpl) native)
+
+ maybeDumpCfg dflags (Just nativeCfgWeights) "CFG Weights - Native" proc_name
+
+ -- tag instructions with register liveness information
+ -- also drops dead code. We don't keep the cfg in sync on
+ -- some backends, so don't use it there.
+ let livenessCfg = if (backendMaintainsCfg dflags)
+ then Just nativeCfgWeights
+ else Nothing
+ let (withLiveness, usLive) =
+ {-# SCC "regLiveness" #-}
+ initUs usGen
+ $ mapM (cmmTopLiveness livenessCfg platform) native
+
+ dumpIfSet_dyn dflags
+ Opt_D_dump_asm_liveness "Liveness annotations added"
+ FormatCMM
+ (vcat $ map ppr withLiveness)
+
+ -- allocate registers
+ (alloced, usAlloc, ppr_raStatsColor, ppr_raStatsLinear, raStats, stack_updt_blks) <-
+ if ( gopt Opt_RegsGraph dflags
+ || gopt Opt_RegsIterative dflags )
+ then do
+ -- the regs usable for allocation
+ let (alloc_regs :: UniqFM (UniqSet RealReg))
+ = foldr (\r -> plusUFM_C unionUniqSets
+ $ unitUFM (targetClassOfRealReg platform r) (unitUniqSet r))
+ emptyUFM
+ $ allocatableRegs ncgImpl
+
+ -- do the graph coloring register allocation
+ let ((alloced, maybe_more_stack, regAllocStats), usAlloc)
+ = {-# SCC "RegAlloc-color" #-}
+ initUs usLive
+ $ Color.regAlloc
+ dflags
+ alloc_regs
+ (mkUniqSet [0 .. maxSpillSlots ncgImpl])
+ (maxSpillSlots ncgImpl)
+ withLiveness
+ livenessCfg
+
+ let ((alloced', stack_updt_blks), usAlloc')
+ = initUs usAlloc $
+ case maybe_more_stack of
+ Nothing -> return (alloced, [])
+ Just amount -> do
+ (alloced',stack_updt_blks) <- unzip <$>
+ (mapM ((ncgAllocMoreStack ncgImpl) amount) alloced)
+ return (alloced', concat stack_updt_blks )
+
+
+ -- dump out what happened during register allocation
+ dumpIfSet_dyn dflags
+ Opt_D_dump_asm_regalloc "Registers allocated"
+ FormatCMM
+ (vcat $ map (pprNatCmmDecl ncgImpl) alloced)
+
+ dumpIfSet_dyn dflags
+ Opt_D_dump_asm_regalloc_stages "Build/spill stages"
+ FormatText
+ (vcat $ map (\(stage, stats)
+ -> text "# --------------------------"
+ $$ text "# cmm " <> int count <> text " Stage " <> int stage
+ $$ ppr stats)
+ $ zip [0..] regAllocStats)
+
+ let mPprStats =
+ if dopt Opt_D_dump_asm_stats dflags
+ then Just regAllocStats else Nothing
+
+ -- force evaluation of the Maybe to avoid space leak
+ mPprStats `seq` return ()
+
+ return ( alloced', usAlloc'
+ , mPprStats
+ , Nothing
+ , [], stack_updt_blks)
+
+ else do
+ -- do linear register allocation
+ let reg_alloc proc = do
+ (alloced, maybe_more_stack, ra_stats) <-
+ Linear.regAlloc dflags proc
+ case maybe_more_stack of
+ Nothing -> return ( alloced, ra_stats, [] )
+ Just amount -> do
+ (alloced',stack_updt_blks) <-
+ ncgAllocMoreStack ncgImpl amount alloced
+ return (alloced', ra_stats, stack_updt_blks )
+
+ let ((alloced, regAllocStats, stack_updt_blks), usAlloc)
+ = {-# SCC "RegAlloc-linear" #-}
+ initUs usLive
+ $ liftM unzip3
+ $ mapM reg_alloc withLiveness
+
+ dumpIfSet_dyn dflags
+ Opt_D_dump_asm_regalloc "Registers allocated"
+ FormatCMM
+ (vcat $ map (pprNatCmmDecl ncgImpl) alloced)
+
+ let mPprStats =
+ if dopt Opt_D_dump_asm_stats dflags
+ then Just (catMaybes regAllocStats) else Nothing
+
+ -- force evaluation of the Maybe to avoid space leak
+ mPprStats `seq` return ()
+
+ return ( alloced, usAlloc
+ , Nothing
+ , mPprStats, (catMaybes regAllocStats)
+ , concat stack_updt_blks )
+
+ -- Fixupblocks the register allocator inserted (from, regMoves, to)
+ let cfgRegAllocUpdates :: [(BlockId,BlockId,BlockId)]
+ cfgRegAllocUpdates = (concatMap Linear.ra_fixupList raStats)
+
+ let cfgWithFixupBlks =
+ (\cfg -> addNodesBetween cfg cfgRegAllocUpdates) <$> livenessCfg
+
+ -- Insert stack update blocks
+ let postRegCFG =
+ pure (foldl' (\m (from,to) -> addImmediateSuccessor from to m ))
+ <*> cfgWithFixupBlks
+ <*> pure stack_updt_blks
+
+ ---- generate jump tables
+ let tabled =
+ {-# SCC "generateJumpTables" #-}
+ generateJumpTables ncgImpl alloced
+
+ when (not $ null nativeCfgWeights) $ dumpIfSet_dyn dflags
+ Opt_D_dump_cfg_weights "CFG Update information"
+ FormatText
+ ( text "stack:" <+> ppr stack_updt_blks $$
+ text "linearAlloc:" <+> ppr cfgRegAllocUpdates )
+
+ ---- shortcut branches
+ let (shorted, postShortCFG) =
+ {-# SCC "shortcutBranches" #-}
+ shortcutBranches dflags ncgImpl tabled postRegCFG
+
+ let optimizedCFG :: Maybe CFG
+ optimizedCFG =
+ optimizeCFG (cfgWeightInfo dflags) cmm <$!> postShortCFG
+
+ maybeDumpCfg dflags optimizedCFG "CFG Weights - Final" proc_name
+
+ --TODO: Partially check validity of the cfg.
+ let getBlks (CmmProc _info _lbl _live (ListGraph blocks)) = blocks
+ getBlks _ = []
+
+ when ( backendMaintainsCfg dflags &&
+ (gopt Opt_DoAsmLinting dflags || debugIsOn )) $ do
+ let blocks = concatMap getBlks shorted
+ let labels = setFromList $ fmap blockId blocks :: LabelSet
+ let cfg = fromJust optimizedCFG
+ return $! seq (sanityCheckCfg cfg labels $
+ text "cfg not in lockstep") ()
+
+ ---- sequence blocks
+ let sequenced :: [NatCmmDecl statics instr]
+ sequenced =
+ checkLayout shorted $
+ {-# SCC "sequenceBlocks" #-}
+ map (BlockLayout.sequenceTop
+ dflags
+ ncgImpl optimizedCFG)
+ shorted
+
+ let branchOpt :: [NatCmmDecl statics instr]
+ branchOpt =
+ {-# SCC "invertCondBranches" #-}
+ map invert sequenced
+ where
+ invertConds :: LabelMap RawCmmStatics -> [NatBasicBlock instr]
+ -> [NatBasicBlock instr]
+ invertConds = invertCondBranches ncgImpl optimizedCFG
+ invert top@CmmData {} = top
+ invert (CmmProc info lbl live (ListGraph blocks)) =
+ CmmProc info lbl live (ListGraph $ invertConds info blocks)
+
+ ---- expansion of SPARC synthetic instrs
+ let expanded =
+ {-# SCC "sparc_expand" #-}
+ ncgExpandTop ncgImpl branchOpt
+ --ncgExpandTop ncgImpl sequenced
+
+ dumpIfSet_dyn dflags
+ Opt_D_dump_asm_expanded "Synthetic instructions expanded"
+ FormatCMM
+ (vcat $ map (pprNatCmmDecl ncgImpl) expanded)
+
+ -- generate unwinding information from cmm
+ let unwinds :: BlockMap [UnwindPoint]
+ unwinds =
+ {-# SCC "unwindingInfo" #-}
+ foldl' addUnwind mapEmpty expanded
+ where
+ addUnwind acc proc =
+ acc `mapUnion` computeUnwinding dflags ncgImpl proc
+
+ return ( usAlloc
+ , fileIds'
+ , expanded
+ , lastMinuteImports ++ imports
+ , ppr_raStatsColor
+ , ppr_raStatsLinear
+ , unwinds )
+
+maybeDumpCfg :: DynFlags -> Maybe CFG -> String -> SDoc -> IO ()
+maybeDumpCfg _dflags Nothing _ _ = return ()
+maybeDumpCfg dflags (Just cfg) msg proc_name
+ | null cfg = return ()
+ | otherwise
+ = dumpIfSet_dyn
+ dflags Opt_D_dump_cfg_weights msg
+ FormatText
+ (proc_name <> char ':' $$ pprEdgeWeights cfg)
+
+-- | Make sure all blocks we want the layout algorithm to place have been placed.
+checkLayout :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr]
+ -> [NatCmmDecl statics instr]
+checkLayout procsUnsequenced procsSequenced =
+ ASSERT2(setNull diff,
+ ppr "Block sequencing dropped blocks:" <> ppr diff)
+ procsSequenced
+ where
+ blocks1 = foldl' (setUnion) setEmpty $
+ map getBlockIds procsUnsequenced :: LabelSet
+ blocks2 = foldl' (setUnion) setEmpty $
+ map getBlockIds procsSequenced
+ diff = setDifference blocks1 blocks2
+
+ getBlockIds (CmmData _ _) = setEmpty
+ getBlockIds (CmmProc _ _ _ (ListGraph blocks)) =
+ setFromList $ map blockId blocks
+
+-- | Compute unwinding tables for the blocks of a procedure
+computeUnwinding :: Instruction instr
+ => DynFlags -> NcgImpl statics instr jumpDest
+ -> NatCmmDecl statics instr
+ -- ^ the native code generated for the procedure
+ -> LabelMap [UnwindPoint]
+ -- ^ unwinding tables for all points of all blocks of the
+ -- procedure
+computeUnwinding dflags _ _
+ | debugLevel dflags == 0 = mapEmpty
+computeUnwinding _ _ (CmmData _ _) = mapEmpty
+computeUnwinding _ ncgImpl (CmmProc _ _ _ (ListGraph blks)) =
+ -- In general we would need to push unwinding information down the
+ -- block-level call-graph to ensure that we fully account for all
+ -- relevant register writes within a procedure.
+ --
+ -- However, the only unwinding information that we care about in GHC is for
+ -- Sp. The fact that GHC.Cmm.LayoutStack already ensures that we have unwind
+ -- information at the beginning of every block means that there is no need
+ -- to perform this sort of push-down.
+ mapFromList [ (blk_lbl, extractUnwindPoints ncgImpl instrs)
+ | BasicBlock blk_lbl instrs <- blks ]
+
+-- | Build a doc for all the imports.
+--
+makeImportsDoc :: DynFlags -> [CLabel] -> SDoc
+makeImportsDoc dflags imports
+ = dyld_stubs imports
+ $$
+ -- On recent versions of Darwin, the linker supports
+ -- dead-stripping of code and data on a per-symbol basis.
+ -- There's a hack to make this work in PprMach.pprNatCmmDecl.
+ (if platformHasSubsectionsViaSymbols platform
+ then text ".subsections_via_symbols"
+ else Outputable.empty)
+ $$
+ -- On recent GNU ELF systems one can mark an object file
+ -- as not requiring an executable stack. If all objects
+ -- linked into a program have this note then the program
+ -- will not use an executable stack, which is good for
+ -- security. GHC generated code does not need an executable
+ -- stack so add the note in:
+ (if platformHasGnuNonexecStack platform
+ then text ".section .note.GNU-stack,\"\"," <> sectionType "progbits"
+ else Outputable.empty)
+ $$
+ -- And just because every other compiler does, let's stick in
+ -- an identifier directive: .ident "GHC x.y.z"
+ (if platformHasIdentDirective platform
+ then let compilerIdent = text "GHC" <+> text cProjectVersion
+ in text ".ident" <+> doubleQuotes compilerIdent
+ else Outputable.empty)
+
+ where
+ platform = targetPlatform dflags
+ arch = platformArch platform
+ os = platformOS platform
+
+ -- Generate "symbol stubs" for all external symbols that might
+ -- come from a dynamic library.
+ dyld_stubs :: [CLabel] -> SDoc
+{- dyld_stubs imps = vcat $ map pprDyldSymbolStub $
+ map head $ group $ sort imps-}
+ -- (Hack) sometimes two Labels pretty-print the same, but have
+ -- different uniques; so we compare their text versions...
+ dyld_stubs imps
+ | needImportedSymbols dflags arch os
+ = vcat $
+ (pprGotDeclaration dflags arch os :) $
+ map ( pprImportedSymbol dflags platform . fst . head) $
+ groupBy (\(_,a) (_,b) -> a == b) $
+ sortBy (\(_,a) (_,b) -> compare a b) $
+ map doPpr $
+ imps
+ | otherwise
+ = Outputable.empty
+
+ doPpr lbl = (lbl, renderWithStyle
+ (initSDocContext dflags astyle)
+ (pprCLabel dflags lbl))
+ astyle = mkCodeStyle AsmStyle
+
+-- -----------------------------------------------------------------------------
+-- Generate jump tables
+
+-- Analyzes all native code and generates data sections for all jump
+-- table instructions.
+generateJumpTables
+ :: NcgImpl statics instr jumpDest
+ -> [NatCmmDecl statics instr] -> [NatCmmDecl statics instr]
+generateJumpTables ncgImpl xs = concatMap f xs
+ where f p@(CmmProc _ _ _ (ListGraph xs)) = p : concatMap g xs
+ f p = [p]
+ g (BasicBlock _ xs) = catMaybes (map (generateJumpTableForInstr ncgImpl) xs)
+
+-- -----------------------------------------------------------------------------
+-- Shortcut branches
+
+shortcutBranches
+ :: forall statics instr jumpDest. (Outputable jumpDest) => DynFlags
+ -> NcgImpl statics instr jumpDest
+ -> [NatCmmDecl statics instr]
+ -> Maybe CFG
+ -> ([NatCmmDecl statics instr],Maybe CFG)
+
+shortcutBranches dflags ncgImpl tops weights
+ | gopt Opt_AsmShortcutting dflags
+ = ( map (apply_mapping ncgImpl mapping) tops'
+ , shortcutWeightMap mappingBid <$!> weights )
+ | otherwise
+ = (tops, weights)
+ where
+ (tops', mappings) = mapAndUnzip (build_mapping ncgImpl) tops
+ mapping = mapUnions mappings :: LabelMap jumpDest
+ mappingBid = fmap (getJumpDestBlockId ncgImpl) mapping
+
+build_mapping :: forall instr t d statics jumpDest.
+ NcgImpl statics instr jumpDest
+ -> GenCmmDecl d (LabelMap t) (ListGraph instr)
+ -> (GenCmmDecl d (LabelMap t) (ListGraph instr)
+ ,LabelMap jumpDest)
+build_mapping _ top@(CmmData _ _) = (top, mapEmpty)
+build_mapping _ (CmmProc info lbl live (ListGraph []))
+ = (CmmProc info lbl live (ListGraph []), mapEmpty)
+build_mapping ncgImpl (CmmProc info lbl live (ListGraph (head:blocks)))
+ = (CmmProc info lbl live (ListGraph (head:others)), mapping)
+ -- drop the shorted blocks, but don't ever drop the first one,
+ -- because it is pointed to by a global label.
+ where
+ -- find all the blocks that just consist of a jump that can be
+ -- shorted.
+ -- Don't completely eliminate loops here -- that can leave a dangling jump!
+ shortcut_blocks :: [(BlockId, jumpDest)]
+ (_, shortcut_blocks, others) =
+ foldl' split (setEmpty :: LabelSet, [], []) blocks
+ split (s, shortcut_blocks, others) b@(BasicBlock id [insn])
+ | Just jd <- canShortcut ncgImpl insn
+ , Just dest <- getJumpDestBlockId ncgImpl jd
+ , not (has_info id)
+ , (setMember dest s) || dest == id -- loop checks
+ = (s, shortcut_blocks, b : others)
+ split (s, shortcut_blocks, others) (BasicBlock id [insn])
+ | Just dest <- canShortcut ncgImpl insn
+ , not (has_info id)
+ = (setInsert id s, (id,dest) : shortcut_blocks, others)
+ split (s, shortcut_blocks, others) other = (s, shortcut_blocks, other : others)
+
+ -- do not eliminate blocks that have an info table
+ has_info l = mapMember l info
+
+ -- build a mapping from BlockId to JumpDest for shorting branches
+ mapping = mapFromList shortcut_blocks
+
+apply_mapping :: NcgImpl statics instr jumpDest
+ -> LabelMap jumpDest
+ -> GenCmmDecl statics h (ListGraph instr)
+ -> GenCmmDecl statics h (ListGraph instr)
+apply_mapping ncgImpl ufm (CmmData sec statics)
+ = CmmData sec (shortcutStatics ncgImpl (\bid -> mapLookup bid ufm) statics)
+apply_mapping ncgImpl ufm (CmmProc info lbl live (ListGraph blocks))
+ = CmmProc info lbl live (ListGraph $ map short_bb blocks)
+ where
+ short_bb (BasicBlock id insns) = BasicBlock id $! map short_insn insns
+ short_insn i = shortcutJump ncgImpl (\bid -> mapLookup bid ufm) i
+ -- shortcutJump should apply the mapping repeatedly,
+ -- just in case we can short multiple branches.
+
+-- -----------------------------------------------------------------------------
+-- Instruction selection
+
+-- Native code instruction selection for a chunk of stix code. For
+-- this part of the computation, we switch from the UniqSM monad to
+-- the NatM monad. The latter carries not only a Unique, but also an
+-- Int denoting the current C stack pointer offset in the generated
+-- code; this is needed for creating correct spill offsets on
+-- architectures which don't offer, or for which it would be
+-- prohibitively expensive to employ, a frame pointer register. Viz,
+-- x86.
+
+-- The offset is measured in bytes, and indicates the difference
+-- between the current (simulated) C stack-ptr and the value it was at
+-- the beginning of the block. For stacks which grow down, this value
+-- should be either zero or negative.
+
+-- Along with the stack pointer offset, we also carry along a LabelMap of
+-- DebugBlocks, which we read to generate .location directives.
+--
+-- Switching between the two monads whilst carrying along the same
+-- Unique supply breaks abstraction. Is that bad?
+
+genMachCode
+ :: DynFlags
+ -> Module -> ModLocation
+ -> (RawCmmDecl -> NatM [NatCmmDecl statics instr])
+ -> DwarfFiles
+ -> LabelMap DebugBlock
+ -> RawCmmDecl
+ -> CFG
+ -> UniqSM
+ ( [NatCmmDecl statics instr]
+ , [CLabel]
+ , DwarfFiles
+ , CFG
+ )
+
+genMachCode dflags this_mod modLoc cmmTopCodeGen fileIds dbgMap cmm_top cmm_cfg
+ = do { initial_us <- getUniqueSupplyM
+ ; let initial_st = mkNatM_State initial_us 0 dflags this_mod
+ modLoc fileIds dbgMap cmm_cfg
+ (new_tops, final_st) = initNat initial_st (cmmTopCodeGen cmm_top)
+ final_delta = natm_delta final_st
+ final_imports = natm_imports final_st
+ final_cfg = natm_cfg final_st
+ ; if final_delta == 0
+ then return (new_tops, final_imports
+ , natm_fileid final_st, final_cfg)
+ else pprPanic "genMachCode: nonzero final delta" (int final_delta)
+ }
+
+-- -----------------------------------------------------------------------------
+-- Generic Cmm optimiser
+
+{-
+Here we do:
+
+ (a) Constant folding
+ (c) Position independent code and dynamic linking
+ (i) introduce the appropriate indirections
+ and position independent refs
+ (ii) compile a list of imported symbols
+ (d) Some arch-specific optimizations
+
+(a) will be moving to the new Hoopl pipeline, however, (c) and
+(d) are only needed by the native backend and will continue to live
+here.
+
+Ideas for other things we could do (put these in Hoopl please!):
+
+ - shortcut jumps-to-jumps
+ - simple CSE: if an expr is assigned to a temp, then replace later occs of
+ that expr with the temp, until the expr is no longer valid (can push through
+ temp assignments, and certain assigns to mem...)
+-}
+
+cmmToCmm :: DynFlags -> Module -> RawCmmDecl -> (RawCmmDecl, [CLabel])
+cmmToCmm _ _ top@(CmmData _ _) = (top, [])
+cmmToCmm dflags this_mod (CmmProc info lbl live graph)
+ = runCmmOpt dflags this_mod $
+ do blocks' <- mapM cmmBlockConFold (toBlockList graph)
+ return $ CmmProc info lbl live (ofBlockList (g_entry graph) blocks')
+
+-- Avoids using unboxed tuples when loading into GHCi
+#if !defined(GHC_LOADED_INTO_GHCI)
+
+type OptMResult a = (# a, [CLabel] #)
+
+pattern OptMResult :: a -> b -> (# a, b #)
+pattern OptMResult x y = (# x, y #)
+{-# COMPLETE OptMResult #-}
+#else
+
+data OptMResult a = OptMResult !a ![CLabel] deriving (Functor)
+#endif
+
+newtype CmmOptM a = CmmOptM (DynFlags -> Module -> [CLabel] -> OptMResult a)
+ deriving (Functor)
+
+instance Applicative CmmOptM where
+ pure x = CmmOptM $ \_ _ imports -> OptMResult x imports
+ (<*>) = ap
+
+instance Monad CmmOptM where
+ (CmmOptM f) >>= g =
+ CmmOptM $ \dflags this_mod imports0 ->
+ case f dflags this_mod imports0 of
+ OptMResult x imports1 ->
+ case g x of
+ CmmOptM g' -> g' dflags this_mod imports1
+
+instance CmmMakeDynamicReferenceM CmmOptM where
+ addImport = addImportCmmOpt
+ getThisModule = CmmOptM $ \_ this_mod imports -> OptMResult this_mod imports
+
+addImportCmmOpt :: CLabel -> CmmOptM ()
+addImportCmmOpt lbl = CmmOptM $ \_ _ imports -> OptMResult () (lbl:imports)
+
+instance HasDynFlags CmmOptM where
+ getDynFlags = CmmOptM $ \dflags _ imports -> OptMResult dflags imports
+
+runCmmOpt :: DynFlags -> Module -> CmmOptM a -> (a, [CLabel])
+runCmmOpt dflags this_mod (CmmOptM f) =
+ case f dflags this_mod [] of
+ OptMResult result imports -> (result, imports)
+
+cmmBlockConFold :: CmmBlock -> CmmOptM CmmBlock
+cmmBlockConFold block = do
+ let (entry, middle, last) = blockSplit block
+ stmts = blockToList middle
+ stmts' <- mapM cmmStmtConFold stmts
+ last' <- cmmStmtConFold last
+ return $ blockJoin entry (blockFromList stmts') last'
+
+-- This does three optimizations, but they're very quick to check, so we don't
+-- bother turning them off even when the Hoopl code is active. Since
+-- this is on the old Cmm representation, we can't reuse the code either:
+-- * reg = reg --> nop
+-- * if 0 then jump --> nop
+-- * if 1 then jump --> jump
+-- We might be tempted to skip this step entirely of not Opt_PIC, but
+-- there is some PowerPC code for the non-PIC case, which would also
+-- have to be separated.
+cmmStmtConFold :: CmmNode e x -> CmmOptM (CmmNode e x)
+cmmStmtConFold stmt
+ = case stmt of
+ CmmAssign reg src
+ -> do src' <- cmmExprConFold DataReference src
+ return $ case src' of
+ CmmReg reg' | reg == reg' -> CmmComment (fsLit "nop")
+ new_src -> CmmAssign reg new_src
+
+ CmmStore addr src
+ -> do addr' <- cmmExprConFold DataReference addr
+ src' <- cmmExprConFold DataReference src
+ return $ CmmStore addr' src'
+
+ CmmCall { cml_target = addr }
+ -> do addr' <- cmmExprConFold JumpReference addr
+ return $ stmt { cml_target = addr' }
+
+ CmmUnsafeForeignCall target regs args
+ -> do target' <- case target of
+ ForeignTarget e conv -> do
+ e' <- cmmExprConFold CallReference e
+ return $ ForeignTarget e' conv
+ PrimTarget _ ->
+ return target
+ args' <- mapM (cmmExprConFold DataReference) args
+ return $ CmmUnsafeForeignCall target' regs args'
+
+ CmmCondBranch test true false likely
+ -> do test' <- cmmExprConFold DataReference test
+ return $ case test' of
+ CmmLit (CmmInt 0 _) -> CmmBranch false
+ CmmLit (CmmInt _ _) -> CmmBranch true
+ _other -> CmmCondBranch test' true false likely
+
+ CmmSwitch expr ids
+ -> do expr' <- cmmExprConFold DataReference expr
+ return $ CmmSwitch expr' ids
+
+ other
+ -> return other
+
+cmmExprConFold :: ReferenceKind -> CmmExpr -> CmmOptM CmmExpr
+cmmExprConFold referenceKind expr = do
+ dflags <- getDynFlags
+
+ -- With -O1 and greater, the cmmSink pass does constant-folding, so
+ -- we don't need to do it again here.
+ let expr' = if optLevel dflags >= 1
+ then expr
+ else cmmExprCon dflags expr
+
+ cmmExprNative referenceKind expr'
+
+cmmExprCon :: DynFlags -> CmmExpr -> CmmExpr
+cmmExprCon dflags (CmmLoad addr rep) = CmmLoad (cmmExprCon dflags addr) rep
+cmmExprCon dflags (CmmMachOp mop args)
+ = cmmMachOpFold dflags mop (map (cmmExprCon dflags) args)
+cmmExprCon _ other = other
+
+-- handles both PIC and non-PIC cases... a very strange mixture
+-- of things to do.
+cmmExprNative :: ReferenceKind -> CmmExpr -> CmmOptM CmmExpr
+cmmExprNative referenceKind expr = do
+ dflags <- getDynFlags
+ let platform = targetPlatform dflags
+ arch = platformArch platform
+ case expr of
+ CmmLoad addr rep
+ -> do addr' <- cmmExprNative DataReference addr
+ return $ CmmLoad addr' rep
+
+ CmmMachOp mop args
+ -> do args' <- mapM (cmmExprNative DataReference) args
+ return $ CmmMachOp mop args'
+
+ CmmLit (CmmBlock id)
+ -> cmmExprNative referenceKind (CmmLit (CmmLabel (infoTblLbl id)))
+ -- we must convert block Ids to CLabels here, because we
+ -- might have to do the PIC transformation. Hence we must
+ -- not modify BlockIds beyond this point.
+
+ CmmLit (CmmLabel lbl)
+ -> do
+ cmmMakeDynamicReference dflags referenceKind lbl
+ CmmLit (CmmLabelOff lbl off)
+ -> do
+ dynRef <- cmmMakeDynamicReference dflags referenceKind lbl
+ -- need to optimize here, since it's late
+ return $ cmmMachOpFold dflags (MO_Add (wordWidth dflags)) [
+ dynRef,
+ (CmmLit $ CmmInt (fromIntegral off) (wordWidth dflags))
+ ]
+
+ -- On powerpc (non-PIC), it's easier to jump directly to a label than
+ -- to use the register table, so we replace these registers
+ -- with the corresponding labels:
+ CmmReg (CmmGlobal EagerBlackholeInfo)
+ | arch == ArchPPC && not (positionIndependent dflags)
+ -> cmmExprNative referenceKind $
+ CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit "__stg_EAGER_BLACKHOLE_info")))
+ CmmReg (CmmGlobal GCEnter1)
+ | arch == ArchPPC && not (positionIndependent dflags)
+ -> cmmExprNative referenceKind $
+ CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit "__stg_gc_enter_1")))
+ CmmReg (CmmGlobal GCFun)
+ | arch == ArchPPC && not (positionIndependent dflags)
+ -> cmmExprNative referenceKind $
+ CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit "__stg_gc_fun")))
+
+ other
+ -> return other