summaryrefslogtreecommitdiff
path: root/compiler/GHC/Cmm
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/GHC/Cmm')
-rw-r--r--compiler/GHC/Cmm/BlockId.hs46
-rw-r--r--compiler/GHC/Cmm/BlockId.hs-boot8
-rw-r--r--compiler/GHC/Cmm/CLabel.hs1571
-rw-r--r--compiler/GHC/Cmm/CallConv.hs212
-rw-r--r--compiler/GHC/Cmm/CommonBlockElim.hs320
-rw-r--r--compiler/GHC/Cmm/ContFlowOpt.hs451
-rw-r--r--compiler/GHC/Cmm/Dataflow.hs441
-rw-r--r--compiler/GHC/Cmm/Dataflow/Block.hs329
-rw-r--r--compiler/GHC/Cmm/Dataflow/Collections.hs177
-rw-r--r--compiler/GHC/Cmm/Dataflow/Graph.hs186
-rw-r--r--compiler/GHC/Cmm/Dataflow/Label.hs142
-rw-r--r--compiler/GHC/Cmm/DebugBlock.hs546
-rw-r--r--compiler/GHC/Cmm/Expr.hs619
-rw-r--r--compiler/GHC/Cmm/Graph.hs484
-rw-r--r--compiler/GHC/Cmm/Info.hs593
-rw-r--r--compiler/GHC/Cmm/Info/Build.hs892
-rw-r--r--compiler/GHC/Cmm/LayoutStack.hs1236
-rw-r--r--compiler/GHC/Cmm/Lexer.x368
-rw-r--r--compiler/GHC/Cmm/Lint.hs261
-rw-r--r--compiler/GHC/Cmm/Liveness.hs93
-rw-r--r--compiler/GHC/Cmm/MachOp.hs664
-rw-r--r--compiler/GHC/Cmm/Monad.hs59
-rw-r--r--compiler/GHC/Cmm/Node.hs724
-rw-r--r--compiler/GHC/Cmm/Opt.hs423
-rw-r--r--compiler/GHC/Cmm/Parser.y1442
-rw-r--r--compiler/GHC/Cmm/Pipeline.hs367
-rw-r--r--compiler/GHC/Cmm/Ppr.hs309
-rw-r--r--compiler/GHC/Cmm/Ppr/Decl.hs169
-rw-r--r--compiler/GHC/Cmm/Ppr/Expr.hs286
-rw-r--r--compiler/GHC/Cmm/ProcPoint.hs496
-rw-r--r--compiler/GHC/Cmm/Sink.hs854
-rw-r--r--compiler/GHC/Cmm/Switch.hs502
-rw-r--r--compiler/GHC/Cmm/Switch/Implement.hs116
-rw-r--r--compiler/GHC/Cmm/Type.hs432
-rw-r--r--compiler/GHC/Cmm/Utils.hs607
-rw-r--r--compiler/GHC/Cmm/cmm-notes184
36 files changed, 16609 insertions, 0 deletions
diff --git a/compiler/GHC/Cmm/BlockId.hs b/compiler/GHC/Cmm/BlockId.hs
new file mode 100644
index 0000000000..f7f369551b
--- /dev/null
+++ b/compiler/GHC/Cmm/BlockId.hs
@@ -0,0 +1,46 @@
+{-# LANGUAGE TypeSynonymInstances #-}
+{-# OPTIONS_GHC -fno-warn-orphans #-}
+
+{- BlockId module should probably go away completely, being superseded by Label -}
+module GHC.Cmm.BlockId
+ ( BlockId, mkBlockId -- ToDo: BlockId should be abstract, but it isn't yet
+ , newBlockId
+ , blockLbl, infoTblLbl
+ ) where
+
+import GhcPrelude
+
+import GHC.Cmm.CLabel
+import IdInfo
+import Name
+import Unique
+import UniqSupply
+
+import GHC.Cmm.Dataflow.Label (Label, mkHooplLabel)
+
+----------------------------------------------------------------
+--- Block Ids, their environments, and their sets
+
+{- Note [Unique BlockId]
+~~~~~~~~~~~~~~~~~~~~~~~~
+Although a 'BlockId' is a local label, for reasons of implementation,
+'BlockId's must be unique within an entire compilation unit. The reason
+is that each local label is mapped to an assembly-language label, and in
+most assembly languages allow, a label is visible throughout the entire
+compilation unit in which it appears.
+-}
+
+type BlockId = Label
+
+mkBlockId :: Unique -> BlockId
+mkBlockId unique = mkHooplLabel $ getKey unique
+
+newBlockId :: MonadUnique m => m BlockId
+newBlockId = mkBlockId <$> getUniqueM
+
+blockLbl :: BlockId -> CLabel
+blockLbl label = mkLocalBlockLabel (getUnique label)
+
+infoTblLbl :: BlockId -> CLabel
+infoTblLbl label
+ = mkBlockInfoTableLabel (mkFCallName (getUnique label) "block") NoCafRefs
diff --git a/compiler/GHC/Cmm/BlockId.hs-boot b/compiler/GHC/Cmm/BlockId.hs-boot
new file mode 100644
index 0000000000..76fd6180a9
--- /dev/null
+++ b/compiler/GHC/Cmm/BlockId.hs-boot
@@ -0,0 +1,8 @@
+module GHC.Cmm.BlockId (BlockId, mkBlockId) where
+
+import GHC.Cmm.Dataflow.Label (Label)
+import Unique (Unique)
+
+type BlockId = Label
+
+mkBlockId :: Unique -> BlockId
diff --git a/compiler/GHC/Cmm/CLabel.hs b/compiler/GHC/Cmm/CLabel.hs
new file mode 100644
index 0000000000..e84278bf65
--- /dev/null
+++ b/compiler/GHC/Cmm/CLabel.hs
@@ -0,0 +1,1571 @@
+-----------------------------------------------------------------------------
+--
+-- Object-file symbols (called CLabel for histerical raisins).
+--
+-- (c) The University of Glasgow 2004-2006
+--
+-----------------------------------------------------------------------------
+
+{-# LANGUAGE CPP #-}
+
+module GHC.Cmm.CLabel (
+ CLabel, -- abstract type
+ ForeignLabelSource(..),
+ pprDebugCLabel,
+
+ mkClosureLabel,
+ mkSRTLabel,
+ mkInfoTableLabel,
+ mkEntryLabel,
+ mkRednCountsLabel,
+ mkConInfoTableLabel,
+ mkApEntryLabel,
+ mkApInfoTableLabel,
+ mkClosureTableLabel,
+ mkBytesLabel,
+
+ mkLocalBlockLabel,
+ mkLocalClosureLabel,
+ mkLocalInfoTableLabel,
+ mkLocalClosureTableLabel,
+
+ mkBlockInfoTableLabel,
+
+ mkBitmapLabel,
+ mkStringLitLabel,
+
+ mkAsmTempLabel,
+ mkAsmTempDerivedLabel,
+ mkAsmTempEndLabel,
+ mkAsmTempDieLabel,
+
+ mkDirty_MUT_VAR_Label,
+ mkNonmovingWriteBarrierEnabledLabel,
+ mkUpdInfoLabel,
+ mkBHUpdInfoLabel,
+ mkIndStaticInfoLabel,
+ mkMainCapabilityLabel,
+ mkMAP_FROZEN_CLEAN_infoLabel,
+ mkMAP_FROZEN_DIRTY_infoLabel,
+ mkMAP_DIRTY_infoLabel,
+ mkSMAP_FROZEN_CLEAN_infoLabel,
+ mkSMAP_FROZEN_DIRTY_infoLabel,
+ mkSMAP_DIRTY_infoLabel,
+ mkBadAlignmentLabel,
+ mkArrWords_infoLabel,
+ mkSRTInfoLabel,
+
+ mkTopTickyCtrLabel,
+ mkCAFBlackHoleInfoTableLabel,
+ mkRtsPrimOpLabel,
+ mkRtsSlowFastTickyCtrLabel,
+
+ mkSelectorInfoLabel,
+ mkSelectorEntryLabel,
+
+ mkCmmInfoLabel,
+ mkCmmEntryLabel,
+ mkCmmRetInfoLabel,
+ mkCmmRetLabel,
+ mkCmmCodeLabel,
+ mkCmmDataLabel,
+ mkCmmClosureLabel,
+
+ mkRtsApFastLabel,
+
+ mkPrimCallLabel,
+
+ mkForeignLabel,
+ addLabelSize,
+
+ foreignLabelStdcallInfo,
+ isBytesLabel,
+ isForeignLabel,
+ isSomeRODataLabel,
+ isStaticClosureLabel,
+ mkCCLabel, mkCCSLabel,
+
+ DynamicLinkerLabelInfo(..),
+ mkDynamicLinkerLabel,
+ dynamicLinkerLabelInfo,
+
+ mkPicBaseLabel,
+ mkDeadStripPreventer,
+
+ mkHpcTicksLabel,
+
+ -- * Predicates
+ hasCAF,
+ needsCDecl, maybeLocalBlockLabel, externallyVisibleCLabel,
+ isMathFun,
+ isCFunctionLabel, isGcPtrLabel, labelDynamic,
+ isLocalCLabel, mayRedirectTo,
+
+ -- * Conversions
+ toClosureLbl, toSlowEntryLbl, toEntryLbl, toInfoLbl, hasHaskellName,
+
+ pprCLabel,
+ isInfoTableLabel,
+ isConInfoTableLabel
+ ) where
+
+#include "HsVersions.h"
+
+import GhcPrelude
+
+import IdInfo
+import BasicTypes
+import {-# SOURCE #-} GHC.Cmm.BlockId (BlockId, mkBlockId)
+import Packages
+import Module
+import Name
+import Unique
+import PrimOp
+import CostCentre
+import Outputable
+import FastString
+import DynFlags
+import GHC.Platform
+import UniqSet
+import Util
+import PprCore ( {- instances -} )
+
+-- -----------------------------------------------------------------------------
+-- The CLabel type
+
+{- |
+ 'CLabel' is an abstract type that supports the following operations:
+
+ - Pretty printing
+
+ - In a C file, does it need to be declared before use? (i.e. is it
+ guaranteed to be already in scope in the places we need to refer to it?)
+
+ - If it needs to be declared, what type (code or data) should it be
+ declared to have?
+
+ - Is it visible outside this object file or not?
+
+ - Is it "dynamic" (see details below)
+
+ - Eq and Ord, so that we can make sets of CLabels (currently only
+ used in outputting C as far as I can tell, to avoid generating
+ more than one declaration for any given label).
+
+ - Converting an info table label into an entry label.
+
+ CLabel usage is a bit messy in GHC as they are used in a number of different
+ contexts:
+
+ - By the C-- AST to identify labels
+
+ - By the unregisterised C code generator ("PprC") for naming functions (hence
+ the name 'CLabel')
+
+ - By the native and LLVM code generators to identify labels
+
+ For extra fun, each of these uses a slightly different subset of constructors
+ (e.g. 'AsmTempLabel' and 'AsmTempDerivedLabel' are used only in the NCG and
+ LLVM backends).
+
+ In general, we use 'IdLabel' to represent Haskell things early in the
+ pipeline. However, later optimization passes will often represent blocks they
+ create with 'LocalBlockLabel' where there is no obvious 'Name' to hang off the
+ label.
+-}
+
+data CLabel
+ = -- | A label related to the definition of a particular Id or Con in a .hs file.
+ IdLabel
+ Name
+ CafInfo
+ IdLabelInfo -- encodes the suffix of the label
+
+ -- | A label from a .cmm file that is not associated with a .hs level Id.
+ | CmmLabel
+ UnitId -- what package the label belongs to.
+ FastString -- identifier giving the prefix of the label
+ CmmLabelInfo -- encodes the suffix of the label
+
+ -- | A label with a baked-in \/ algorithmically generated name that definitely
+ -- comes from the RTS. The code for it must compile into libHSrts.a \/ libHSrts.so
+ -- If it doesn't have an algorithmically generated name then use a CmmLabel
+ -- instead and give it an appropriate UnitId argument.
+ | RtsLabel
+ RtsLabelInfo
+
+ -- | A label associated with a block. These aren't visible outside of the
+ -- compilation unit in which they are defined. These are generally used to
+ -- name blocks produced by Cmm-to-Cmm passes and the native code generator,
+ -- where we don't have a 'Name' to associate the label to and therefore can't
+ -- use 'IdLabel'.
+ | LocalBlockLabel
+ {-# UNPACK #-} !Unique
+
+ -- | A 'C' (or otherwise foreign) label.
+ --
+ | ForeignLabel
+ FastString -- name of the imported label.
+
+ (Maybe Int) -- possible '@n' suffix for stdcall functions
+ -- When generating C, the '@n' suffix is omitted, but when
+ -- generating assembler we must add it to the label.
+
+ ForeignLabelSource -- what package the foreign label is in.
+
+ FunctionOrData
+
+ -- | Local temporary label used for native (or LLVM) code generation; must not
+ -- appear outside of these contexts. Use primarily for debug information
+ | AsmTempLabel
+ {-# UNPACK #-} !Unique
+
+ -- | A label \"derived\" from another 'CLabel' by the addition of a suffix.
+ -- Must not occur outside of the NCG or LLVM code generators.
+ | AsmTempDerivedLabel
+ CLabel
+ FastString -- suffix
+
+ | StringLitLabel
+ {-# UNPACK #-} !Unique
+
+ | CC_Label CostCentre
+ | CCS_Label CostCentreStack
+
+
+ -- | These labels are generated and used inside the NCG only.
+ -- They are special variants of a label used for dynamic linking
+ -- see module PositionIndependentCode for details.
+ | DynamicLinkerLabel DynamicLinkerLabelInfo CLabel
+
+ -- | This label is generated and used inside the NCG only.
+ -- It is used as a base for PIC calculations on some platforms.
+ -- It takes the form of a local numeric assembler label '1'; and
+ -- is pretty-printed as 1b, referring to the previous definition
+ -- of 1: in the assembler source file.
+ | PicBaseLabel
+
+ -- | A label before an info table to prevent excessive dead-stripping on darwin
+ | DeadStripPreventer CLabel
+
+
+ -- | Per-module table of tick locations
+ | HpcTicksLabel Module
+
+ -- | Static reference table
+ | SRTLabel
+ {-# UNPACK #-} !Unique
+
+ -- | A bitmap (function or case return)
+ | LargeBitmapLabel
+ {-# UNPACK #-} !Unique
+
+ deriving Eq
+
+-- This is laborious, but necessary. We can't derive Ord because
+-- Unique doesn't have an Ord instance. Note nonDetCmpUnique in the
+-- implementation. See Note [No Ord for Unique]
+-- This is non-deterministic but we do not currently support deterministic
+-- code-generation. See Note [Unique Determinism and code generation]
+instance Ord CLabel where
+ compare (IdLabel a1 b1 c1) (IdLabel a2 b2 c2) =
+ compare a1 a2 `thenCmp`
+ compare b1 b2 `thenCmp`
+ compare c1 c2
+ compare (CmmLabel a1 b1 c1) (CmmLabel a2 b2 c2) =
+ compare a1 a2 `thenCmp`
+ compare b1 b2 `thenCmp`
+ compare c1 c2
+ compare (RtsLabel a1) (RtsLabel a2) = compare a1 a2
+ compare (LocalBlockLabel u1) (LocalBlockLabel u2) = nonDetCmpUnique u1 u2
+ compare (ForeignLabel a1 b1 c1 d1) (ForeignLabel a2 b2 c2 d2) =
+ compare a1 a2 `thenCmp`
+ compare b1 b2 `thenCmp`
+ compare c1 c2 `thenCmp`
+ compare d1 d2
+ compare (AsmTempLabel u1) (AsmTempLabel u2) = nonDetCmpUnique u1 u2
+ compare (AsmTempDerivedLabel a1 b1) (AsmTempDerivedLabel a2 b2) =
+ compare a1 a2 `thenCmp`
+ compare b1 b2
+ compare (StringLitLabel u1) (StringLitLabel u2) =
+ nonDetCmpUnique u1 u2
+ compare (CC_Label a1) (CC_Label a2) =
+ compare a1 a2
+ compare (CCS_Label a1) (CCS_Label a2) =
+ compare a1 a2
+ compare (DynamicLinkerLabel a1 b1) (DynamicLinkerLabel a2 b2) =
+ compare a1 a2 `thenCmp`
+ compare b1 b2
+ compare PicBaseLabel PicBaseLabel = EQ
+ compare (DeadStripPreventer a1) (DeadStripPreventer a2) =
+ compare a1 a2
+ compare (HpcTicksLabel a1) (HpcTicksLabel a2) =
+ compare a1 a2
+ compare (SRTLabel u1) (SRTLabel u2) =
+ nonDetCmpUnique u1 u2
+ compare (LargeBitmapLabel u1) (LargeBitmapLabel u2) =
+ nonDetCmpUnique u1 u2
+ compare IdLabel{} _ = LT
+ compare _ IdLabel{} = GT
+ compare CmmLabel{} _ = LT
+ compare _ CmmLabel{} = GT
+ compare RtsLabel{} _ = LT
+ compare _ RtsLabel{} = GT
+ compare LocalBlockLabel{} _ = LT
+ compare _ LocalBlockLabel{} = GT
+ compare ForeignLabel{} _ = LT
+ compare _ ForeignLabel{} = GT
+ compare AsmTempLabel{} _ = LT
+ compare _ AsmTempLabel{} = GT
+ compare AsmTempDerivedLabel{} _ = LT
+ compare _ AsmTempDerivedLabel{} = GT
+ compare StringLitLabel{} _ = LT
+ compare _ StringLitLabel{} = GT
+ compare CC_Label{} _ = LT
+ compare _ CC_Label{} = GT
+ compare CCS_Label{} _ = LT
+ compare _ CCS_Label{} = GT
+ compare DynamicLinkerLabel{} _ = LT
+ compare _ DynamicLinkerLabel{} = GT
+ compare PicBaseLabel{} _ = LT
+ compare _ PicBaseLabel{} = GT
+ compare DeadStripPreventer{} _ = LT
+ compare _ DeadStripPreventer{} = GT
+ compare HpcTicksLabel{} _ = LT
+ compare _ HpcTicksLabel{} = GT
+ compare SRTLabel{} _ = LT
+ compare _ SRTLabel{} = GT
+
+-- | Record where a foreign label is stored.
+data ForeignLabelSource
+
+ -- | Label is in a named package
+ = ForeignLabelInPackage UnitId
+
+ -- | Label is in some external, system package that doesn't also
+ -- contain compiled Haskell code, and is not associated with any .hi files.
+ -- We don't have to worry about Haskell code being inlined from
+ -- external packages. It is safe to treat the RTS package as "external".
+ | ForeignLabelInExternalPackage
+
+ -- | Label is in the package currently being compiled.
+ -- This is only used for creating hacky tmp labels during code generation.
+ -- Don't use it in any code that might be inlined across a package boundary
+ -- (ie, core code) else the information will be wrong relative to the
+ -- destination module.
+ | ForeignLabelInThisPackage
+
+ deriving (Eq, Ord)
+
+
+-- | For debugging problems with the CLabel representation.
+-- We can't make a Show instance for CLabel because lots of its components don't have instances.
+-- The regular Outputable instance only shows the label name, and not its other info.
+--
+pprDebugCLabel :: CLabel -> SDoc
+pprDebugCLabel lbl
+ = case lbl of
+ IdLabel _ _ info-> ppr lbl <> (parens $ text "IdLabel"
+ <> whenPprDebug (text ":" <> text (show info)))
+ CmmLabel pkg _name _info
+ -> ppr lbl <> (parens $ text "CmmLabel" <+> ppr pkg)
+
+ RtsLabel{} -> ppr lbl <> (parens $ text "RtsLabel")
+
+ ForeignLabel _name mSuffix src funOrData
+ -> ppr lbl <> (parens $ text "ForeignLabel"
+ <+> ppr mSuffix
+ <+> ppr src
+ <+> ppr funOrData)
+
+ _ -> ppr lbl <> (parens $ text "other CLabel")
+
+
+data IdLabelInfo
+ = Closure -- ^ Label for closure
+ | InfoTable -- ^ Info tables for closures; always read-only
+ | Entry -- ^ Entry point
+ | Slow -- ^ Slow entry point
+
+ | LocalInfoTable -- ^ Like InfoTable but not externally visible
+ | LocalEntry -- ^ Like Entry but not externally visible
+
+ | RednCounts -- ^ Label of place to keep Ticky-ticky info for this Id
+
+ | ConEntry -- ^ Constructor entry point
+ | ConInfoTable -- ^ Corresponding info table
+
+ | ClosureTable -- ^ Table of closures for Enum tycons
+
+ | Bytes -- ^ Content of a string literal. See
+ -- Note [Bytes label].
+ | BlockInfoTable -- ^ Like LocalInfoTable but for a proc-point block
+ -- instead of a closure entry-point.
+ -- See Note [Proc-point local block entry-point].
+
+ deriving (Eq, Ord, Show)
+
+
+data RtsLabelInfo
+ = RtsSelectorInfoTable Bool{-updatable-} Int{-offset-} -- ^ Selector thunks
+ | RtsSelectorEntry Bool{-updatable-} Int{-offset-}
+
+ | RtsApInfoTable Bool{-updatable-} Int{-arity-} -- ^ AP thunks
+ | RtsApEntry Bool{-updatable-} Int{-arity-}
+
+ | RtsPrimOp PrimOp
+ | RtsApFast FastString -- ^ _fast versions of generic apply
+ | RtsSlowFastTickyCtr String
+
+ deriving (Eq, Ord)
+ -- NOTE: Eq on PtrString compares the pointer only, so this isn't
+ -- a real equality.
+
+
+-- | What type of Cmm label we're dealing with.
+-- Determines the suffix appended to the name when a CLabel.CmmLabel
+-- is pretty printed.
+data CmmLabelInfo
+ = CmmInfo -- ^ misc rts info tables, suffix _info
+ | CmmEntry -- ^ misc rts entry points, suffix _entry
+ | CmmRetInfo -- ^ misc rts ret info tables, suffix _info
+ | CmmRet -- ^ misc rts return points, suffix _ret
+ | CmmData -- ^ misc rts data bits, eg CHARLIKE_closure
+ | CmmCode -- ^ misc rts code
+ | CmmClosure -- ^ closures eg CHARLIKE_closure
+ | CmmPrimCall -- ^ a prim call to some hand written Cmm code
+ deriving (Eq, Ord)
+
+data DynamicLinkerLabelInfo
+ = CodeStub -- MachO: Lfoo$stub, ELF: foo@plt
+ | SymbolPtr -- MachO: Lfoo$non_lazy_ptr, Windows: __imp_foo
+ | GotSymbolPtr -- ELF: foo@got
+ | GotSymbolOffset -- ELF: foo@gotoff
+
+ deriving (Eq, Ord)
+
+
+-- -----------------------------------------------------------------------------
+-- Constructing CLabels
+-- -----------------------------------------------------------------------------
+
+-- Constructing IdLabels
+-- These are always local:
+
+mkSRTLabel :: Unique -> CLabel
+mkSRTLabel u = SRTLabel u
+
+mkRednCountsLabel :: Name -> CLabel
+mkRednCountsLabel name =
+ IdLabel name NoCafRefs RednCounts -- Note [ticky for LNE]
+
+-- These have local & (possibly) external variants:
+mkLocalClosureLabel :: Name -> CafInfo -> CLabel
+mkLocalInfoTableLabel :: Name -> CafInfo -> CLabel
+mkLocalClosureTableLabel :: Name -> CafInfo -> CLabel
+mkLocalClosureLabel name c = IdLabel name c Closure
+mkLocalInfoTableLabel name c = IdLabel name c LocalInfoTable
+mkLocalClosureTableLabel name c = IdLabel name c ClosureTable
+
+mkClosureLabel :: Name -> CafInfo -> CLabel
+mkInfoTableLabel :: Name -> CafInfo -> CLabel
+mkEntryLabel :: Name -> CafInfo -> CLabel
+mkClosureTableLabel :: Name -> CafInfo -> CLabel
+mkConInfoTableLabel :: Name -> CafInfo -> CLabel
+mkBytesLabel :: Name -> CLabel
+mkClosureLabel name c = IdLabel name c Closure
+mkInfoTableLabel name c = IdLabel name c InfoTable
+mkEntryLabel name c = IdLabel name c Entry
+mkClosureTableLabel name c = IdLabel name c ClosureTable
+mkConInfoTableLabel name c = IdLabel name c ConInfoTable
+mkBytesLabel name = IdLabel name NoCafRefs Bytes
+
+mkBlockInfoTableLabel :: Name -> CafInfo -> CLabel
+mkBlockInfoTableLabel name c = IdLabel name c BlockInfoTable
+ -- See Note [Proc-point local block entry-point].
+
+-- Constructing Cmm Labels
+mkDirty_MUT_VAR_Label,
+ mkNonmovingWriteBarrierEnabledLabel,
+ mkUpdInfoLabel,
+ mkBHUpdInfoLabel, mkIndStaticInfoLabel, mkMainCapabilityLabel,
+ mkMAP_FROZEN_CLEAN_infoLabel, mkMAP_FROZEN_DIRTY_infoLabel,
+ mkMAP_DIRTY_infoLabel,
+ mkArrWords_infoLabel,
+ mkTopTickyCtrLabel,
+ mkCAFBlackHoleInfoTableLabel,
+ mkSMAP_FROZEN_CLEAN_infoLabel, mkSMAP_FROZEN_DIRTY_infoLabel,
+ mkSMAP_DIRTY_infoLabel, mkBadAlignmentLabel :: CLabel
+mkDirty_MUT_VAR_Label = mkForeignLabel (fsLit "dirty_MUT_VAR") Nothing ForeignLabelInExternalPackage IsFunction
+mkNonmovingWriteBarrierEnabledLabel
+ = CmmLabel rtsUnitId (fsLit "nonmoving_write_barrier_enabled") CmmData
+mkUpdInfoLabel = CmmLabel rtsUnitId (fsLit "stg_upd_frame") CmmInfo
+mkBHUpdInfoLabel = CmmLabel rtsUnitId (fsLit "stg_bh_upd_frame" ) CmmInfo
+mkIndStaticInfoLabel = CmmLabel rtsUnitId (fsLit "stg_IND_STATIC") CmmInfo
+mkMainCapabilityLabel = CmmLabel rtsUnitId (fsLit "MainCapability") CmmData
+mkMAP_FROZEN_CLEAN_infoLabel = CmmLabel rtsUnitId (fsLit "stg_MUT_ARR_PTRS_FROZEN_CLEAN") CmmInfo
+mkMAP_FROZEN_DIRTY_infoLabel = CmmLabel rtsUnitId (fsLit "stg_MUT_ARR_PTRS_FROZEN_DIRTY") CmmInfo
+mkMAP_DIRTY_infoLabel = CmmLabel rtsUnitId (fsLit "stg_MUT_ARR_PTRS_DIRTY") CmmInfo
+mkTopTickyCtrLabel = CmmLabel rtsUnitId (fsLit "top_ct") CmmData
+mkCAFBlackHoleInfoTableLabel = CmmLabel rtsUnitId (fsLit "stg_CAF_BLACKHOLE") CmmInfo
+mkArrWords_infoLabel = CmmLabel rtsUnitId (fsLit "stg_ARR_WORDS") CmmInfo
+mkSMAP_FROZEN_CLEAN_infoLabel = CmmLabel rtsUnitId (fsLit "stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN") CmmInfo
+mkSMAP_FROZEN_DIRTY_infoLabel = CmmLabel rtsUnitId (fsLit "stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY") CmmInfo
+mkSMAP_DIRTY_infoLabel = CmmLabel rtsUnitId (fsLit "stg_SMALL_MUT_ARR_PTRS_DIRTY") CmmInfo
+mkBadAlignmentLabel = CmmLabel rtsUnitId (fsLit "stg_badAlignment") CmmEntry
+
+mkSRTInfoLabel :: Int -> CLabel
+mkSRTInfoLabel n = CmmLabel rtsUnitId lbl CmmInfo
+ where
+ lbl =
+ case n of
+ 1 -> fsLit "stg_SRT_1"
+ 2 -> fsLit "stg_SRT_2"
+ 3 -> fsLit "stg_SRT_3"
+ 4 -> fsLit "stg_SRT_4"
+ 5 -> fsLit "stg_SRT_5"
+ 6 -> fsLit "stg_SRT_6"
+ 7 -> fsLit "stg_SRT_7"
+ 8 -> fsLit "stg_SRT_8"
+ 9 -> fsLit "stg_SRT_9"
+ 10 -> fsLit "stg_SRT_10"
+ 11 -> fsLit "stg_SRT_11"
+ 12 -> fsLit "stg_SRT_12"
+ 13 -> fsLit "stg_SRT_13"
+ 14 -> fsLit "stg_SRT_14"
+ 15 -> fsLit "stg_SRT_15"
+ 16 -> fsLit "stg_SRT_16"
+ _ -> panic "mkSRTInfoLabel"
+
+-----
+mkCmmInfoLabel, mkCmmEntryLabel, mkCmmRetInfoLabel, mkCmmRetLabel,
+ mkCmmCodeLabel, mkCmmDataLabel, mkCmmClosureLabel
+ :: UnitId -> FastString -> CLabel
+
+mkCmmInfoLabel pkg str = CmmLabel pkg str CmmInfo
+mkCmmEntryLabel pkg str = CmmLabel pkg str CmmEntry
+mkCmmRetInfoLabel pkg str = CmmLabel pkg str CmmRetInfo
+mkCmmRetLabel pkg str = CmmLabel pkg str CmmRet
+mkCmmCodeLabel pkg str = CmmLabel pkg str CmmCode
+mkCmmDataLabel pkg str = CmmLabel pkg str CmmData
+mkCmmClosureLabel pkg str = CmmLabel pkg str CmmClosure
+
+mkLocalBlockLabel :: Unique -> CLabel
+mkLocalBlockLabel u = LocalBlockLabel u
+
+-- Constructing RtsLabels
+mkRtsPrimOpLabel :: PrimOp -> CLabel
+mkRtsPrimOpLabel primop = RtsLabel (RtsPrimOp primop)
+
+mkSelectorInfoLabel :: Bool -> Int -> CLabel
+mkSelectorEntryLabel :: Bool -> Int -> CLabel
+mkSelectorInfoLabel upd off = RtsLabel (RtsSelectorInfoTable upd off)
+mkSelectorEntryLabel upd off = RtsLabel (RtsSelectorEntry upd off)
+
+mkApInfoTableLabel :: Bool -> Int -> CLabel
+mkApEntryLabel :: Bool -> Int -> CLabel
+mkApInfoTableLabel upd off = RtsLabel (RtsApInfoTable upd off)
+mkApEntryLabel upd off = RtsLabel (RtsApEntry upd off)
+
+
+-- A call to some primitive hand written Cmm code
+mkPrimCallLabel :: PrimCall -> CLabel
+mkPrimCallLabel (PrimCall str pkg)
+ = CmmLabel pkg str CmmPrimCall
+
+
+-- Constructing ForeignLabels
+
+-- | Make a foreign label
+mkForeignLabel
+ :: FastString -- name
+ -> Maybe Int -- size prefix
+ -> ForeignLabelSource -- what package it's in
+ -> FunctionOrData
+ -> CLabel
+
+mkForeignLabel = ForeignLabel
+
+
+-- | Update the label size field in a ForeignLabel
+addLabelSize :: CLabel -> Int -> CLabel
+addLabelSize (ForeignLabel str _ src fod) sz
+ = ForeignLabel str (Just sz) src fod
+addLabelSize label _
+ = label
+
+-- | Whether label is a top-level string literal
+isBytesLabel :: CLabel -> Bool
+isBytesLabel (IdLabel _ _ Bytes) = True
+isBytesLabel _lbl = False
+
+-- | Whether label is a non-haskell label (defined in C code)
+isForeignLabel :: CLabel -> Bool
+isForeignLabel (ForeignLabel _ _ _ _) = True
+isForeignLabel _lbl = False
+
+-- | Whether label is a static closure label (can come from haskell or cmm)
+isStaticClosureLabel :: CLabel -> Bool
+-- Closure defined in haskell (.hs)
+isStaticClosureLabel (IdLabel _ _ Closure) = True
+-- Closure defined in cmm
+isStaticClosureLabel (CmmLabel _ _ CmmClosure) = True
+isStaticClosureLabel _lbl = False
+
+-- | Whether label is a .rodata label
+isSomeRODataLabel :: CLabel -> Bool
+-- info table defined in haskell (.hs)
+isSomeRODataLabel (IdLabel _ _ ClosureTable) = True
+isSomeRODataLabel (IdLabel _ _ ConInfoTable) = True
+isSomeRODataLabel (IdLabel _ _ InfoTable) = True
+isSomeRODataLabel (IdLabel _ _ LocalInfoTable) = True
+isSomeRODataLabel (IdLabel _ _ BlockInfoTable) = True
+-- info table defined in cmm (.cmm)
+isSomeRODataLabel (CmmLabel _ _ CmmInfo) = True
+isSomeRODataLabel _lbl = False
+
+-- | Whether label is points to some kind of info table
+isInfoTableLabel :: CLabel -> Bool
+isInfoTableLabel (IdLabel _ _ InfoTable) = True
+isInfoTableLabel (IdLabel _ _ LocalInfoTable) = True
+isInfoTableLabel (IdLabel _ _ ConInfoTable) = True
+isInfoTableLabel (IdLabel _ _ BlockInfoTable) = True
+isInfoTableLabel _ = False
+
+-- | Whether label is points to constructor info table
+isConInfoTableLabel :: CLabel -> Bool
+isConInfoTableLabel (IdLabel _ _ ConInfoTable) = True
+isConInfoTableLabel _ = False
+
+-- | Get the label size field from a ForeignLabel
+foreignLabelStdcallInfo :: CLabel -> Maybe Int
+foreignLabelStdcallInfo (ForeignLabel _ info _ _) = info
+foreignLabelStdcallInfo _lbl = Nothing
+
+
+-- Constructing Large*Labels
+mkBitmapLabel :: Unique -> CLabel
+mkBitmapLabel uniq = LargeBitmapLabel uniq
+
+-- Constructing Cost Center Labels
+mkCCLabel :: CostCentre -> CLabel
+mkCCSLabel :: CostCentreStack -> CLabel
+mkCCLabel cc = CC_Label cc
+mkCCSLabel ccs = CCS_Label ccs
+
+mkRtsApFastLabel :: FastString -> CLabel
+mkRtsApFastLabel str = RtsLabel (RtsApFast str)
+
+mkRtsSlowFastTickyCtrLabel :: String -> CLabel
+mkRtsSlowFastTickyCtrLabel pat = RtsLabel (RtsSlowFastTickyCtr pat)
+
+
+-- Constructing Code Coverage Labels
+mkHpcTicksLabel :: Module -> CLabel
+mkHpcTicksLabel = HpcTicksLabel
+
+
+-- Constructing labels used for dynamic linking
+mkDynamicLinkerLabel :: DynamicLinkerLabelInfo -> CLabel -> CLabel
+mkDynamicLinkerLabel = DynamicLinkerLabel
+
+dynamicLinkerLabelInfo :: CLabel -> Maybe (DynamicLinkerLabelInfo, CLabel)
+dynamicLinkerLabelInfo (DynamicLinkerLabel info lbl) = Just (info, lbl)
+dynamicLinkerLabelInfo _ = Nothing
+
+mkPicBaseLabel :: CLabel
+mkPicBaseLabel = PicBaseLabel
+
+
+-- Constructing miscellaneous other labels
+mkDeadStripPreventer :: CLabel -> CLabel
+mkDeadStripPreventer lbl = DeadStripPreventer lbl
+
+mkStringLitLabel :: Unique -> CLabel
+mkStringLitLabel = StringLitLabel
+
+mkAsmTempLabel :: Uniquable a => a -> CLabel
+mkAsmTempLabel a = AsmTempLabel (getUnique a)
+
+mkAsmTempDerivedLabel :: CLabel -> FastString -> CLabel
+mkAsmTempDerivedLabel = AsmTempDerivedLabel
+
+mkAsmTempEndLabel :: CLabel -> CLabel
+mkAsmTempEndLabel l = mkAsmTempDerivedLabel l (fsLit "_end")
+
+-- | Construct a label for a DWARF Debug Information Entity (DIE)
+-- describing another symbol.
+mkAsmTempDieLabel :: CLabel -> CLabel
+mkAsmTempDieLabel l = mkAsmTempDerivedLabel l (fsLit "_die")
+
+-- -----------------------------------------------------------------------------
+-- Convert between different kinds of label
+
+toClosureLbl :: CLabel -> CLabel
+toClosureLbl (IdLabel n c _) = IdLabel n c Closure
+toClosureLbl (CmmLabel m str _) = CmmLabel m str CmmClosure
+toClosureLbl l = pprPanic "toClosureLbl" (ppr l)
+
+toSlowEntryLbl :: CLabel -> CLabel
+toSlowEntryLbl (IdLabel n _ BlockInfoTable)
+ = pprPanic "toSlowEntryLbl" (ppr n)
+toSlowEntryLbl (IdLabel n c _) = IdLabel n c Slow
+toSlowEntryLbl l = pprPanic "toSlowEntryLbl" (ppr l)
+
+toEntryLbl :: CLabel -> CLabel
+toEntryLbl (IdLabel n c LocalInfoTable) = IdLabel n c LocalEntry
+toEntryLbl (IdLabel n c ConInfoTable) = IdLabel n c ConEntry
+toEntryLbl (IdLabel n _ BlockInfoTable) = mkLocalBlockLabel (nameUnique n)
+ -- See Note [Proc-point local block entry-point].
+toEntryLbl (IdLabel n c _) = IdLabel n c Entry
+toEntryLbl (CmmLabel m str CmmInfo) = CmmLabel m str CmmEntry
+toEntryLbl (CmmLabel m str CmmRetInfo) = CmmLabel m str CmmRet
+toEntryLbl l = pprPanic "toEntryLbl" (ppr l)
+
+toInfoLbl :: CLabel -> CLabel
+toInfoLbl (IdLabel n c LocalEntry) = IdLabel n c LocalInfoTable
+toInfoLbl (IdLabel n c ConEntry) = IdLabel n c ConInfoTable
+toInfoLbl (IdLabel n c _) = IdLabel n c InfoTable
+toInfoLbl (CmmLabel m str CmmEntry) = CmmLabel m str CmmInfo
+toInfoLbl (CmmLabel m str CmmRet) = CmmLabel m str CmmRetInfo
+toInfoLbl l = pprPanic "CLabel.toInfoLbl" (ppr l)
+
+hasHaskellName :: CLabel -> Maybe Name
+hasHaskellName (IdLabel n _ _) = Just n
+hasHaskellName _ = Nothing
+
+-- -----------------------------------------------------------------------------
+-- Does a CLabel's referent itself refer to a CAF?
+hasCAF :: CLabel -> Bool
+hasCAF (IdLabel _ _ RednCounts) = False -- Note [ticky for LNE]
+hasCAF (IdLabel _ MayHaveCafRefs _) = True
+hasCAF _ = False
+
+-- Note [ticky for LNE]
+-- ~~~~~~~~~~~~~~~~~~~~~
+
+-- Until 14 Feb 2013, every ticky counter was associated with a
+-- closure. Thus, ticky labels used IdLabel. It is odd that
+-- GHC.Cmm.Info.Build.cafTransfers would consider such a ticky label
+-- reason to add the name to the CAFEnv (and thus eventually the SRT),
+-- but it was harmless because the ticky was only used if the closure
+-- was also.
+--
+-- Since we now have ticky counters for LNEs, it is no longer the case
+-- that every ticky counter has an actual closure. So I changed the
+-- generation of ticky counters' CLabels to not result in their
+-- associated id ending up in the SRT.
+--
+-- NB IdLabel is still appropriate for ticky ids (as opposed to
+-- CmmLabel) because the LNE's counter is still related to an .hs Id,
+-- that Id just isn't for a proper closure.
+
+-- -----------------------------------------------------------------------------
+-- Does a CLabel need declaring before use or not?
+--
+-- See wiki:commentary/compiler/backends/ppr-c#prototypes
+
+needsCDecl :: CLabel -> Bool
+ -- False <=> it's pre-declared; don't bother
+ -- don't bother declaring Bitmap labels, we always make sure
+ -- they are defined before use.
+needsCDecl (SRTLabel _) = True
+needsCDecl (LargeBitmapLabel _) = False
+needsCDecl (IdLabel _ _ _) = True
+needsCDecl (LocalBlockLabel _) = True
+
+needsCDecl (StringLitLabel _) = False
+needsCDecl (AsmTempLabel _) = False
+needsCDecl (AsmTempDerivedLabel _ _) = False
+needsCDecl (RtsLabel _) = False
+
+needsCDecl (CmmLabel pkgId _ _)
+ -- Prototypes for labels defined in the runtime system are imported
+ -- into HC files via includes/Stg.h.
+ | pkgId == rtsUnitId = False
+
+ -- For other labels we inline one into the HC file directly.
+ | otherwise = True
+
+needsCDecl l@(ForeignLabel{}) = not (isMathFun l)
+needsCDecl (CC_Label _) = True
+needsCDecl (CCS_Label _) = True
+needsCDecl (HpcTicksLabel _) = True
+needsCDecl (DynamicLinkerLabel {}) = panic "needsCDecl DynamicLinkerLabel"
+needsCDecl PicBaseLabel = panic "needsCDecl PicBaseLabel"
+needsCDecl (DeadStripPreventer {}) = panic "needsCDecl DeadStripPreventer"
+
+-- | If a label is a local block label then return just its 'BlockId', otherwise
+-- 'Nothing'.
+maybeLocalBlockLabel :: CLabel -> Maybe BlockId
+maybeLocalBlockLabel (LocalBlockLabel uq) = Just $ mkBlockId uq
+maybeLocalBlockLabel _ = Nothing
+
+
+-- | Check whether a label corresponds to a C function that has
+-- a prototype in a system header somewhere, or is built-in
+-- to the C compiler. For these labels we avoid generating our
+-- own C prototypes.
+isMathFun :: CLabel -> Bool
+isMathFun (ForeignLabel fs _ _ _) = fs `elementOfUniqSet` math_funs
+isMathFun _ = False
+
+math_funs :: UniqSet FastString
+math_funs = mkUniqSet [
+ -- _ISOC99_SOURCE
+ (fsLit "acos"), (fsLit "acosf"), (fsLit "acosh"),
+ (fsLit "acoshf"), (fsLit "acoshl"), (fsLit "acosl"),
+ (fsLit "asin"), (fsLit "asinf"), (fsLit "asinl"),
+ (fsLit "asinh"), (fsLit "asinhf"), (fsLit "asinhl"),
+ (fsLit "atan"), (fsLit "atanf"), (fsLit "atanl"),
+ (fsLit "atan2"), (fsLit "atan2f"), (fsLit "atan2l"),
+ (fsLit "atanh"), (fsLit "atanhf"), (fsLit "atanhl"),
+ (fsLit "cbrt"), (fsLit "cbrtf"), (fsLit "cbrtl"),
+ (fsLit "ceil"), (fsLit "ceilf"), (fsLit "ceill"),
+ (fsLit "copysign"), (fsLit "copysignf"), (fsLit "copysignl"),
+ (fsLit "cos"), (fsLit "cosf"), (fsLit "cosl"),
+ (fsLit "cosh"), (fsLit "coshf"), (fsLit "coshl"),
+ (fsLit "erf"), (fsLit "erff"), (fsLit "erfl"),
+ (fsLit "erfc"), (fsLit "erfcf"), (fsLit "erfcl"),
+ (fsLit "exp"), (fsLit "expf"), (fsLit "expl"),
+ (fsLit "exp2"), (fsLit "exp2f"), (fsLit "exp2l"),
+ (fsLit "expm1"), (fsLit "expm1f"), (fsLit "expm1l"),
+ (fsLit "fabs"), (fsLit "fabsf"), (fsLit "fabsl"),
+ (fsLit "fdim"), (fsLit "fdimf"), (fsLit "fdiml"),
+ (fsLit "floor"), (fsLit "floorf"), (fsLit "floorl"),
+ (fsLit "fma"), (fsLit "fmaf"), (fsLit "fmal"),
+ (fsLit "fmax"), (fsLit "fmaxf"), (fsLit "fmaxl"),
+ (fsLit "fmin"), (fsLit "fminf"), (fsLit "fminl"),
+ (fsLit "fmod"), (fsLit "fmodf"), (fsLit "fmodl"),
+ (fsLit "frexp"), (fsLit "frexpf"), (fsLit "frexpl"),
+ (fsLit "hypot"), (fsLit "hypotf"), (fsLit "hypotl"),
+ (fsLit "ilogb"), (fsLit "ilogbf"), (fsLit "ilogbl"),
+ (fsLit "ldexp"), (fsLit "ldexpf"), (fsLit "ldexpl"),
+ (fsLit "lgamma"), (fsLit "lgammaf"), (fsLit "lgammal"),
+ (fsLit "llrint"), (fsLit "llrintf"), (fsLit "llrintl"),
+ (fsLit "llround"), (fsLit "llroundf"), (fsLit "llroundl"),
+ (fsLit "log"), (fsLit "logf"), (fsLit "logl"),
+ (fsLit "log10l"), (fsLit "log10"), (fsLit "log10f"),
+ (fsLit "log1pl"), (fsLit "log1p"), (fsLit "log1pf"),
+ (fsLit "log2"), (fsLit "log2f"), (fsLit "log2l"),
+ (fsLit "logb"), (fsLit "logbf"), (fsLit "logbl"),
+ (fsLit "lrint"), (fsLit "lrintf"), (fsLit "lrintl"),
+ (fsLit "lround"), (fsLit "lroundf"), (fsLit "lroundl"),
+ (fsLit "modf"), (fsLit "modff"), (fsLit "modfl"),
+ (fsLit "nan"), (fsLit "nanf"), (fsLit "nanl"),
+ (fsLit "nearbyint"), (fsLit "nearbyintf"), (fsLit "nearbyintl"),
+ (fsLit "nextafter"), (fsLit "nextafterf"), (fsLit "nextafterl"),
+ (fsLit "nexttoward"), (fsLit "nexttowardf"), (fsLit "nexttowardl"),
+ (fsLit "pow"), (fsLit "powf"), (fsLit "powl"),
+ (fsLit "remainder"), (fsLit "remainderf"), (fsLit "remainderl"),
+ (fsLit "remquo"), (fsLit "remquof"), (fsLit "remquol"),
+ (fsLit "rint"), (fsLit "rintf"), (fsLit "rintl"),
+ (fsLit "round"), (fsLit "roundf"), (fsLit "roundl"),
+ (fsLit "scalbln"), (fsLit "scalblnf"), (fsLit "scalblnl"),
+ (fsLit "scalbn"), (fsLit "scalbnf"), (fsLit "scalbnl"),
+ (fsLit "sin"), (fsLit "sinf"), (fsLit "sinl"),
+ (fsLit "sinh"), (fsLit "sinhf"), (fsLit "sinhl"),
+ (fsLit "sqrt"), (fsLit "sqrtf"), (fsLit "sqrtl"),
+ (fsLit "tan"), (fsLit "tanf"), (fsLit "tanl"),
+ (fsLit "tanh"), (fsLit "tanhf"), (fsLit "tanhl"),
+ (fsLit "tgamma"), (fsLit "tgammaf"), (fsLit "tgammal"),
+ (fsLit "trunc"), (fsLit "truncf"), (fsLit "truncl"),
+ -- ISO C 99 also defines these function-like macros in math.h:
+ -- fpclassify, isfinite, isinf, isnormal, signbit, isgreater,
+ -- isgreaterequal, isless, islessequal, islessgreater, isunordered
+
+ -- additional symbols from _BSD_SOURCE
+ (fsLit "drem"), (fsLit "dremf"), (fsLit "dreml"),
+ (fsLit "finite"), (fsLit "finitef"), (fsLit "finitel"),
+ (fsLit "gamma"), (fsLit "gammaf"), (fsLit "gammal"),
+ (fsLit "isinf"), (fsLit "isinff"), (fsLit "isinfl"),
+ (fsLit "isnan"), (fsLit "isnanf"), (fsLit "isnanl"),
+ (fsLit "j0"), (fsLit "j0f"), (fsLit "j0l"),
+ (fsLit "j1"), (fsLit "j1f"), (fsLit "j1l"),
+ (fsLit "jn"), (fsLit "jnf"), (fsLit "jnl"),
+ (fsLit "lgamma_r"), (fsLit "lgammaf_r"), (fsLit "lgammal_r"),
+ (fsLit "scalb"), (fsLit "scalbf"), (fsLit "scalbl"),
+ (fsLit "significand"), (fsLit "significandf"), (fsLit "significandl"),
+ (fsLit "y0"), (fsLit "y0f"), (fsLit "y0l"),
+ (fsLit "y1"), (fsLit "y1f"), (fsLit "y1l"),
+ (fsLit "yn"), (fsLit "ynf"), (fsLit "ynl"),
+
+ -- These functions are described in IEEE Std 754-2008 -
+ -- Standard for Floating-Point Arithmetic and ISO/IEC TS 18661
+ (fsLit "nextup"), (fsLit "nextupf"), (fsLit "nextupl"),
+ (fsLit "nextdown"), (fsLit "nextdownf"), (fsLit "nextdownl")
+ ]
+
+-- -----------------------------------------------------------------------------
+-- | Is a CLabel visible outside this object file or not?
+-- From the point of view of the code generator, a name is
+-- externally visible if it has to be declared as exported
+-- in the .o file's symbol table; that is, made non-static.
+externallyVisibleCLabel :: CLabel -> Bool -- not C "static"
+externallyVisibleCLabel (StringLitLabel _) = False
+externallyVisibleCLabel (AsmTempLabel _) = False
+externallyVisibleCLabel (AsmTempDerivedLabel _ _)= False
+externallyVisibleCLabel (RtsLabel _) = True
+externallyVisibleCLabel (LocalBlockLabel _) = False
+externallyVisibleCLabel (CmmLabel _ _ _) = True
+externallyVisibleCLabel (ForeignLabel{}) = True
+externallyVisibleCLabel (IdLabel name _ info) = isExternalName name && externallyVisibleIdLabel info
+externallyVisibleCLabel (CC_Label _) = True
+externallyVisibleCLabel (CCS_Label _) = True
+externallyVisibleCLabel (DynamicLinkerLabel _ _) = False
+externallyVisibleCLabel (HpcTicksLabel _) = True
+externallyVisibleCLabel (LargeBitmapLabel _) = False
+externallyVisibleCLabel (SRTLabel _) = False
+externallyVisibleCLabel (PicBaseLabel {}) = panic "externallyVisibleCLabel PicBaseLabel"
+externallyVisibleCLabel (DeadStripPreventer {}) = panic "externallyVisibleCLabel DeadStripPreventer"
+
+externallyVisibleIdLabel :: IdLabelInfo -> Bool
+externallyVisibleIdLabel LocalInfoTable = False
+externallyVisibleIdLabel LocalEntry = False
+externallyVisibleIdLabel BlockInfoTable = False
+externallyVisibleIdLabel _ = True
+
+-- -----------------------------------------------------------------------------
+-- Finding the "type" of a CLabel
+
+-- For generating correct types in label declarations:
+
+data CLabelType
+ = CodeLabel -- Address of some executable instructions
+ | DataLabel -- Address of data, not a GC ptr
+ | GcPtrLabel -- Address of a (presumably static) GC object
+
+isCFunctionLabel :: CLabel -> Bool
+isCFunctionLabel lbl = case labelType lbl of
+ CodeLabel -> True
+ _other -> False
+
+isGcPtrLabel :: CLabel -> Bool
+isGcPtrLabel lbl = case labelType lbl of
+ GcPtrLabel -> True
+ _other -> False
+
+
+-- | Work out the general type of data at the address of this label
+-- whether it be code, data, or static GC object.
+labelType :: CLabel -> CLabelType
+labelType (IdLabel _ _ info) = idInfoLabelType info
+labelType (CmmLabel _ _ CmmData) = DataLabel
+labelType (CmmLabel _ _ CmmClosure) = GcPtrLabel
+labelType (CmmLabel _ _ CmmCode) = CodeLabel
+labelType (CmmLabel _ _ CmmInfo) = DataLabel
+labelType (CmmLabel _ _ CmmEntry) = CodeLabel
+labelType (CmmLabel _ _ CmmPrimCall) = CodeLabel
+labelType (CmmLabel _ _ CmmRetInfo) = DataLabel
+labelType (CmmLabel _ _ CmmRet) = CodeLabel
+labelType (RtsLabel (RtsSelectorInfoTable _ _)) = DataLabel
+labelType (RtsLabel (RtsApInfoTable _ _)) = DataLabel
+labelType (RtsLabel (RtsApFast _)) = CodeLabel
+labelType (RtsLabel _) = DataLabel
+labelType (LocalBlockLabel _) = CodeLabel
+labelType (SRTLabel _) = DataLabel
+labelType (ForeignLabel _ _ _ IsFunction) = CodeLabel
+labelType (ForeignLabel _ _ _ IsData) = DataLabel
+labelType (AsmTempLabel _) = panic "labelType(AsmTempLabel)"
+labelType (AsmTempDerivedLabel _ _) = panic "labelType(AsmTempDerivedLabel)"
+labelType (StringLitLabel _) = DataLabel
+labelType (CC_Label _) = DataLabel
+labelType (CCS_Label _) = DataLabel
+labelType (DynamicLinkerLabel _ _) = DataLabel -- Is this right?
+labelType PicBaseLabel = DataLabel
+labelType (DeadStripPreventer _) = DataLabel
+labelType (HpcTicksLabel _) = DataLabel
+labelType (LargeBitmapLabel _) = DataLabel
+
+idInfoLabelType :: IdLabelInfo -> CLabelType
+idInfoLabelType info =
+ case info of
+ InfoTable -> DataLabel
+ LocalInfoTable -> DataLabel
+ BlockInfoTable -> DataLabel
+ Closure -> GcPtrLabel
+ ConInfoTable -> DataLabel
+ ClosureTable -> DataLabel
+ RednCounts -> DataLabel
+ Bytes -> DataLabel
+ _ -> CodeLabel
+
+
+-- -----------------------------------------------------------------------------
+
+-- | Is a 'CLabel' defined in the current module being compiled?
+--
+-- Sometimes we can optimise references within a compilation unit in ways that
+-- we couldn't for inter-module references. This provides a conservative
+-- estimate of whether a 'CLabel' lives in the current module.
+isLocalCLabel :: Module -> CLabel -> Bool
+isLocalCLabel this_mod lbl =
+ case lbl of
+ IdLabel name _ _
+ | isInternalName name -> True
+ | otherwise -> nameModule name == this_mod
+ LocalBlockLabel _ -> True
+ _ -> False
+
+-- -----------------------------------------------------------------------------
+
+-- | Does a 'CLabel' need dynamic linkage?
+--
+-- When referring to data in code, we need to know whether
+-- that data resides in a DLL or not. [Win32 only.]
+-- @labelDynamic@ returns @True@ if the label is located
+-- in a DLL, be it a data reference or not.
+labelDynamic :: DynFlags -> Module -> CLabel -> Bool
+labelDynamic dflags this_mod lbl =
+ case lbl of
+ -- is the RTS in a DLL or not?
+ RtsLabel _ ->
+ externalDynamicRefs && (this_pkg /= rtsUnitId)
+
+ IdLabel n _ _ ->
+ isDllName dflags this_mod n
+
+ -- When compiling in the "dyn" way, each package is to be linked into
+ -- its own shared library.
+ CmmLabel pkg _ _
+ | os == OSMinGW32 ->
+ externalDynamicRefs && (this_pkg /= pkg)
+ | otherwise ->
+ gopt Opt_ExternalDynamicRefs dflags
+
+ LocalBlockLabel _ -> False
+
+ ForeignLabel _ _ source _ ->
+ if os == OSMinGW32
+ then case source of
+ -- Foreign label is in some un-named foreign package (or DLL).
+ ForeignLabelInExternalPackage -> True
+
+ -- Foreign label is linked into the same package as the
+ -- source file currently being compiled.
+ ForeignLabelInThisPackage -> False
+
+ -- Foreign label is in some named package.
+ -- When compiling in the "dyn" way, each package is to be
+ -- linked into its own DLL.
+ ForeignLabelInPackage pkgId ->
+ externalDynamicRefs && (this_pkg /= pkgId)
+
+ else -- On Mac OS X and on ELF platforms, false positives are OK,
+ -- so we claim that all foreign imports come from dynamic
+ -- libraries
+ True
+
+ CC_Label cc ->
+ externalDynamicRefs && not (ccFromThisModule cc this_mod)
+
+ -- CCS_Label always contains a CostCentre defined in the current module
+ CCS_Label _ -> False
+
+ HpcTicksLabel m ->
+ externalDynamicRefs && this_mod /= m
+
+ -- Note that DynamicLinkerLabels do NOT require dynamic linking themselves.
+ _ -> False
+ where
+ externalDynamicRefs = gopt Opt_ExternalDynamicRefs dflags
+ os = platformOS (targetPlatform dflags)
+ this_pkg = moduleUnitId this_mod
+
+
+-----------------------------------------------------------------------------
+-- Printing out CLabels.
+
+{-
+Convention:
+
+ <name>_<type>
+
+where <name> is <Module>_<name> for external names and <unique> for
+internal names. <type> is one of the following:
+
+ info Info table
+ srt Static reference table
+ entry Entry code (function, closure)
+ slow Slow entry code (if any)
+ ret Direct return address
+ vtbl Vector table
+ <n>_alt Case alternative (tag n)
+ dflt Default case alternative
+ btm Large bitmap vector
+ closure Static closure
+ con_entry Dynamic Constructor entry code
+ con_info Dynamic Constructor info table
+ static_entry Static Constructor entry code
+ static_info Static Constructor info table
+ sel_info Selector info table
+ sel_entry Selector entry code
+ cc Cost centre
+ ccs Cost centre stack
+
+Many of these distinctions are only for documentation reasons. For
+example, _ret is only distinguished from _entry to make it easy to
+tell whether a code fragment is a return point or a closure/function
+entry.
+
+Note [Closure and info labels]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+For a function 'foo, we have:
+ foo_info : Points to the info table describing foo's closure
+ (and entry code for foo with tables next to code)
+ foo_closure : Static (no-free-var) closure only:
+ points to the statically-allocated closure
+
+For a data constructor (such as Just or Nothing), we have:
+ Just_con_info: Info table for the data constructor itself
+ the first word of a heap-allocated Just
+ Just_info: Info table for the *worker function*, an
+ ordinary Haskell function of arity 1 that
+ allocates a (Just x) box:
+ Just = \x -> Just x
+ Just_closure: The closure for this worker
+
+ Nothing_closure: a statically allocated closure for Nothing
+ Nothing_static_info: info table for Nothing_closure
+
+All these must be exported symbol, EXCEPT Just_info. We don't need to
+export this because in other modules we either have
+ * A reference to 'Just'; use Just_closure
+ * A saturated call 'Just x'; allocate using Just_con_info
+Not exporting these Just_info labels reduces the number of symbols
+somewhat.
+
+Note [Bytes label]
+~~~~~~~~~~~~~~~~~~
+For a top-level string literal 'foo', we have just one symbol 'foo_bytes', which
+points to a static data block containing the content of the literal.
+
+Note [Proc-point local block entry-points]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+A label for a proc-point local block entry-point has no "_entry" suffix. With
+`infoTblLbl` we derive an info table label from a proc-point block ID. If
+we convert such an info table label into an entry label we must produce
+the label without an "_entry" suffix. So an info table label records
+the fact that it was derived from a block ID in `IdLabelInfo` as
+`BlockInfoTable`.
+
+The info table label and the local block label are both local labels
+and are not externally visible.
+-}
+
+instance Outputable CLabel where
+ ppr c = sdocWithDynFlags $ \dynFlags -> pprCLabel dynFlags c
+
+pprCLabel :: DynFlags -> CLabel -> SDoc
+
+pprCLabel _ (LocalBlockLabel u)
+ = tempLabelPrefixOrUnderscore <> pprUniqueAlways u
+
+pprCLabel dynFlags (AsmTempLabel u)
+ | not (platformUnregisterised $ targetPlatform dynFlags)
+ = tempLabelPrefixOrUnderscore <> pprUniqueAlways u
+
+pprCLabel dynFlags (AsmTempDerivedLabel l suf)
+ | platformMisc_ghcWithNativeCodeGen $ platformMisc dynFlags
+ = ptext (asmTempLabelPrefix $ targetPlatform dynFlags)
+ <> case l of AsmTempLabel u -> pprUniqueAlways u
+ LocalBlockLabel u -> pprUniqueAlways u
+ _other -> pprCLabel dynFlags l
+ <> ftext suf
+
+pprCLabel dynFlags (DynamicLinkerLabel info lbl)
+ | platformMisc_ghcWithNativeCodeGen $ platformMisc dynFlags
+ = pprDynamicLinkerAsmLabel (targetPlatform dynFlags) info lbl
+
+pprCLabel dynFlags PicBaseLabel
+ | platformMisc_ghcWithNativeCodeGen $ platformMisc dynFlags
+ = text "1b"
+
+pprCLabel dynFlags (DeadStripPreventer lbl)
+ | platformMisc_ghcWithNativeCodeGen $ platformMisc dynFlags
+ =
+ {-
+ `lbl` can be temp one but we need to ensure that dsp label will stay
+ in the final binary so we prepend non-temp prefix ("dsp_") and
+ optional `_` (underscore) because this is how you mark non-temp symbols
+ on some platforms (Darwin)
+ -}
+ maybe_underscore dynFlags $ text "dsp_"
+ <> pprCLabel dynFlags lbl <> text "_dsp"
+
+pprCLabel dynFlags (StringLitLabel u)
+ | platformMisc_ghcWithNativeCodeGen $ platformMisc dynFlags
+ = pprUniqueAlways u <> ptext (sLit "_str")
+
+pprCLabel dynFlags lbl
+ = getPprStyle $ \ sty ->
+ if platformMisc_ghcWithNativeCodeGen (platformMisc dynFlags) && asmStyle sty
+ then maybe_underscore dynFlags $ pprAsmCLbl (targetPlatform dynFlags) lbl
+ else pprCLbl lbl
+
+maybe_underscore :: DynFlags -> SDoc -> SDoc
+maybe_underscore dynFlags doc =
+ if platformMisc_leadingUnderscore $ platformMisc dynFlags
+ then pp_cSEP <> doc
+ else doc
+
+pprAsmCLbl :: Platform -> CLabel -> SDoc
+pprAsmCLbl platform (ForeignLabel fs (Just sz) _ _)
+ | platformOS platform == OSMinGW32
+ -- In asm mode, we need to put the suffix on a stdcall ForeignLabel.
+ -- (The C compiler does this itself).
+ = ftext fs <> char '@' <> int sz
+pprAsmCLbl _ lbl
+ = pprCLbl lbl
+
+pprCLbl :: CLabel -> SDoc
+pprCLbl (StringLitLabel u)
+ = pprUniqueAlways u <> text "_str"
+
+pprCLbl (SRTLabel u)
+ = tempLabelPrefixOrUnderscore <> pprUniqueAlways u <> pp_cSEP <> text "srt"
+
+pprCLbl (LargeBitmapLabel u) =
+ tempLabelPrefixOrUnderscore
+ <> char 'b' <> pprUniqueAlways u <> pp_cSEP <> text "btm"
+-- Some bitsmaps for tuple constructors have a numeric tag (e.g. '7')
+-- until that gets resolved we'll just force them to start
+-- with a letter so the label will be legal assembly code.
+
+
+pprCLbl (CmmLabel _ str CmmCode) = ftext str
+pprCLbl (CmmLabel _ str CmmData) = ftext str
+pprCLbl (CmmLabel _ str CmmPrimCall) = ftext str
+
+pprCLbl (LocalBlockLabel u) =
+ tempLabelPrefixOrUnderscore <> text "blk_" <> pprUniqueAlways u
+
+pprCLbl (RtsLabel (RtsApFast str)) = ftext str <> text "_fast"
+
+pprCLbl (RtsLabel (RtsSelectorInfoTable upd_reqd offset))
+ = sdocWithDynFlags $ \dflags ->
+ ASSERT(offset >= 0 && offset <= mAX_SPEC_SELECTEE_SIZE dflags)
+ hcat [text "stg_sel_", text (show offset),
+ ptext (if upd_reqd
+ then (sLit "_upd_info")
+ else (sLit "_noupd_info"))
+ ]
+
+pprCLbl (RtsLabel (RtsSelectorEntry upd_reqd offset))
+ = sdocWithDynFlags $ \dflags ->
+ ASSERT(offset >= 0 && offset <= mAX_SPEC_SELECTEE_SIZE dflags)
+ hcat [text "stg_sel_", text (show offset),
+ ptext (if upd_reqd
+ then (sLit "_upd_entry")
+ else (sLit "_noupd_entry"))
+ ]
+
+pprCLbl (RtsLabel (RtsApInfoTable upd_reqd arity))
+ = sdocWithDynFlags $ \dflags ->
+ ASSERT(arity > 0 && arity <= mAX_SPEC_AP_SIZE dflags)
+ hcat [text "stg_ap_", text (show arity),
+ ptext (if upd_reqd
+ then (sLit "_upd_info")
+ else (sLit "_noupd_info"))
+ ]
+
+pprCLbl (RtsLabel (RtsApEntry upd_reqd arity))
+ = sdocWithDynFlags $ \dflags ->
+ ASSERT(arity > 0 && arity <= mAX_SPEC_AP_SIZE dflags)
+ hcat [text "stg_ap_", text (show arity),
+ ptext (if upd_reqd
+ then (sLit "_upd_entry")
+ else (sLit "_noupd_entry"))
+ ]
+
+pprCLbl (CmmLabel _ fs CmmInfo)
+ = ftext fs <> text "_info"
+
+pprCLbl (CmmLabel _ fs CmmEntry)
+ = ftext fs <> text "_entry"
+
+pprCLbl (CmmLabel _ fs CmmRetInfo)
+ = ftext fs <> text "_info"
+
+pprCLbl (CmmLabel _ fs CmmRet)
+ = ftext fs <> text "_ret"
+
+pprCLbl (CmmLabel _ fs CmmClosure)
+ = ftext fs <> text "_closure"
+
+pprCLbl (RtsLabel (RtsPrimOp primop))
+ = text "stg_" <> ppr primop
+
+pprCLbl (RtsLabel (RtsSlowFastTickyCtr pat))
+ = text "SLOW_CALL_fast_" <> text pat <> ptext (sLit "_ctr")
+
+pprCLbl (ForeignLabel str _ _ _)
+ = ftext str
+
+pprCLbl (IdLabel name _cafs flavor) =
+ internalNamePrefix name <> ppr name <> ppIdFlavor flavor
+
+pprCLbl (CC_Label cc) = ppr cc
+pprCLbl (CCS_Label ccs) = ppr ccs
+
+pprCLbl (HpcTicksLabel mod)
+ = text "_hpc_tickboxes_" <> ppr mod <> ptext (sLit "_hpc")
+
+pprCLbl (AsmTempLabel {}) = panic "pprCLbl AsmTempLabel"
+pprCLbl (AsmTempDerivedLabel {})= panic "pprCLbl AsmTempDerivedLabel"
+pprCLbl (DynamicLinkerLabel {}) = panic "pprCLbl DynamicLinkerLabel"
+pprCLbl (PicBaseLabel {}) = panic "pprCLbl PicBaseLabel"
+pprCLbl (DeadStripPreventer {}) = panic "pprCLbl DeadStripPreventer"
+
+ppIdFlavor :: IdLabelInfo -> SDoc
+ppIdFlavor x = pp_cSEP <> text
+ (case x of
+ Closure -> "closure"
+ InfoTable -> "info"
+ LocalInfoTable -> "info"
+ Entry -> "entry"
+ LocalEntry -> "entry"
+ Slow -> "slow"
+ RednCounts -> "ct"
+ ConEntry -> "con_entry"
+ ConInfoTable -> "con_info"
+ ClosureTable -> "closure_tbl"
+ Bytes -> "bytes"
+ BlockInfoTable -> "info"
+ )
+
+
+pp_cSEP :: SDoc
+pp_cSEP = char '_'
+
+
+instance Outputable ForeignLabelSource where
+ ppr fs
+ = case fs of
+ ForeignLabelInPackage pkgId -> parens $ text "package: " <> ppr pkgId
+ ForeignLabelInThisPackage -> parens $ text "this package"
+ ForeignLabelInExternalPackage -> parens $ text "external package"
+
+internalNamePrefix :: Name -> SDoc
+internalNamePrefix name = getPprStyle $ \ sty ->
+ if asmStyle sty && isRandomGenerated then
+ sdocWithPlatform $ \platform ->
+ ptext (asmTempLabelPrefix platform)
+ else
+ empty
+ where
+ isRandomGenerated = not $ isExternalName name
+
+tempLabelPrefixOrUnderscore :: SDoc
+tempLabelPrefixOrUnderscore = sdocWithPlatform $ \platform ->
+ getPprStyle $ \ sty ->
+ if asmStyle sty then
+ ptext (asmTempLabelPrefix platform)
+ else
+ char '_'
+
+-- -----------------------------------------------------------------------------
+-- Machine-dependent knowledge about labels.
+
+asmTempLabelPrefix :: Platform -> PtrString -- for formatting labels
+asmTempLabelPrefix platform = case platformOS platform of
+ OSDarwin -> sLit "L"
+ OSAIX -> sLit "__L" -- follow IBM XL C's convention
+ _ -> sLit ".L"
+
+pprDynamicLinkerAsmLabel :: Platform -> DynamicLinkerLabelInfo -> CLabel -> SDoc
+pprDynamicLinkerAsmLabel platform dllInfo lbl =
+ case platformOS platform of
+ OSDarwin
+ | platformArch platform == ArchX86_64 ->
+ case dllInfo of
+ CodeStub -> char 'L' <> ppr lbl <> text "$stub"
+ SymbolPtr -> char 'L' <> ppr lbl <> text "$non_lazy_ptr"
+ GotSymbolPtr -> ppr lbl <> text "@GOTPCREL"
+ GotSymbolOffset -> ppr lbl
+ | otherwise ->
+ case dllInfo of
+ CodeStub -> char 'L' <> ppr lbl <> text "$stub"
+ SymbolPtr -> char 'L' <> ppr lbl <> text "$non_lazy_ptr"
+ _ -> panic "pprDynamicLinkerAsmLabel"
+
+ OSAIX ->
+ case dllInfo of
+ SymbolPtr -> text "LC.." <> ppr lbl -- GCC's naming convention
+ _ -> panic "pprDynamicLinkerAsmLabel"
+
+ _ | osElfTarget (platformOS platform) -> elfLabel
+
+ OSMinGW32 ->
+ case dllInfo of
+ SymbolPtr -> text "__imp_" <> ppr lbl
+ _ -> panic "pprDynamicLinkerAsmLabel"
+
+ _ -> panic "pprDynamicLinkerAsmLabel"
+ where
+ elfLabel
+ | platformArch platform == ArchPPC
+ = case dllInfo of
+ CodeStub -> -- See Note [.LCTOC1 in PPC PIC code]
+ ppr lbl <> text "+32768@plt"
+ SymbolPtr -> text ".LC_" <> ppr lbl
+ _ -> panic "pprDynamicLinkerAsmLabel"
+
+ | platformArch platform == ArchX86_64
+ = case dllInfo of
+ CodeStub -> ppr lbl <> text "@plt"
+ GotSymbolPtr -> ppr lbl <> text "@gotpcrel"
+ GotSymbolOffset -> ppr lbl
+ SymbolPtr -> text ".LC_" <> ppr lbl
+
+ | platformArch platform == ArchPPC_64 ELF_V1
+ || platformArch platform == ArchPPC_64 ELF_V2
+ = case dllInfo of
+ GotSymbolPtr -> text ".LC_" <> ppr lbl
+ <> text "@toc"
+ GotSymbolOffset -> ppr lbl
+ SymbolPtr -> text ".LC_" <> ppr lbl
+ _ -> panic "pprDynamicLinkerAsmLabel"
+
+ | otherwise
+ = case dllInfo of
+ CodeStub -> ppr lbl <> text "@plt"
+ SymbolPtr -> text ".LC_" <> ppr lbl
+ GotSymbolPtr -> ppr lbl <> text "@got"
+ GotSymbolOffset -> ppr lbl <> text "@gotoff"
+
+-- Figure out whether `symbol` may serve as an alias
+-- to `target` within one compilation unit.
+--
+-- This is true if any of these holds:
+-- * `target` is a module-internal haskell name.
+-- * `target` is an exported name, but comes from the same
+-- module as `symbol`
+--
+-- These are sufficient conditions for establishing e.g. a
+-- GNU assembly alias ('.equiv' directive). Sadly, there is
+-- no such thing as an alias to an imported symbol (conf.
+-- http://blog.omega-prime.co.uk/2011/07/06/the-sad-state-of-symbol-aliases/)
+-- See note [emit-time elimination of static indirections].
+--
+-- Precondition is that both labels represent the
+-- same semantic value.
+
+mayRedirectTo :: CLabel -> CLabel -> Bool
+mayRedirectTo symbol target
+ | Just nam <- haskellName
+ , staticClosureLabel
+ , isExternalName nam
+ , Just mod <- nameModule_maybe nam
+ , Just anam <- hasHaskellName symbol
+ , Just amod <- nameModule_maybe anam
+ = amod == mod
+
+ | Just nam <- haskellName
+ , staticClosureLabel
+ , isInternalName nam
+ = True
+
+ | otherwise = False
+ where staticClosureLabel = isStaticClosureLabel target
+ haskellName = hasHaskellName target
+
+
+{-
+Note [emit-time elimination of static indirections]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+As described in #15155, certain static values are representationally
+equivalent, e.g. 'cast'ed values (when created by 'newtype' wrappers).
+
+ newtype A = A Int
+ {-# NOINLINE a #-}
+ a = A 42
+
+a1_rYB :: Int
+[GblId, Caf=NoCafRefs, Unf=OtherCon []]
+a1_rYB = GHC.Types.I# 42#
+
+a [InlPrag=NOINLINE] :: A
+[GblId, Unf=OtherCon []]
+a = a1_rYB `cast` (Sym (T15155.N:A[0]) :: Int ~R# A)
+
+Formerly we created static indirections for these (IND_STATIC), which
+consist of a statically allocated forwarding closure that contains
+the (possibly tagged) indirectee. (See CMM/assembly below.)
+This approach is suboptimal for two reasons:
+ (a) they occupy extra space,
+ (b) they need to be entered in order to obtain the indirectee,
+ thus they cannot be tagged.
+
+Fortunately there is a common case where static indirections can be
+eliminated while emitting assembly (native or LLVM), viz. when the
+indirectee is in the same module (object file) as the symbol that
+points to it. In this case an assembly-level identification can
+be created ('.equiv' directive), and as such the same object will
+be assigned two names in the symbol table. Any of the identified
+symbols can be referenced by a tagged pointer.
+
+Currently the 'mayRedirectTo' predicate will
+give a clue whether a label can be equated with another, already
+emitted, label (which can in turn be an alias). The general mechanics
+is that we identify data (IND_STATIC closures) that are amenable
+to aliasing while pretty-printing of assembly output, and emit the
+'.equiv' directive instead of static data in such a case.
+
+Here is a sketch how the output is massaged:
+
+ Consider
+newtype A = A Int
+{-# NOINLINE a #-}
+a = A 42 -- I# 42# is the indirectee
+ -- 'a' is exported
+
+ results in STG
+
+a1_rXq :: GHC.Types.Int
+[GblId, Caf=NoCafRefs, Unf=OtherCon []] =
+ CCS_DONT_CARE GHC.Types.I#! [42#];
+
+T15155.a [InlPrag=NOINLINE] :: T15155.A
+[GblId, Unf=OtherCon []] =
+ CAF_ccs \ u [] a1_rXq;
+
+ and CMM
+
+[section ""data" . a1_rXq_closure" {
+ a1_rXq_closure:
+ const GHC.Types.I#_con_info;
+ const 42;
+ }]
+
+[section ""data" . T15155.a_closure" {
+ T15155.a_closure:
+ const stg_IND_STATIC_info;
+ const a1_rXq_closure+1;
+ const 0;
+ const 0;
+ }]
+
+The emitted assembly is
+
+#### INDIRECTEE
+a1_rXq_closure: -- module local haskell value
+ .quad GHC.Types.I#_con_info -- an Int
+ .quad 42
+
+#### BEFORE
+.globl T15155.a_closure -- exported newtype wrapped value
+T15155.a_closure:
+ .quad stg_IND_STATIC_info -- the closure info
+ .quad a1_rXq_closure+1 -- indirectee ('+1' being the tag)
+ .quad 0
+ .quad 0
+
+#### AFTER
+.globl T15155.a_closure -- exported newtype wrapped value
+.equiv a1_rXq_closure,T15155.a_closure -- both are shared
+
+The transformation is performed because
+ T15155.a_closure `mayRedirectTo` a1_rXq_closure+1
+returns True.
+-}
diff --git a/compiler/GHC/Cmm/CallConv.hs b/compiler/GHC/Cmm/CallConv.hs
new file mode 100644
index 0000000000..9200daec57
--- /dev/null
+++ b/compiler/GHC/Cmm/CallConv.hs
@@ -0,0 +1,212 @@
+module GHC.Cmm.CallConv (
+ ParamLocation(..),
+ assignArgumentsPos,
+ assignStack,
+ realArgRegsCover
+) where
+
+import GhcPrelude
+
+import GHC.Cmm.Expr
+import GHC.Runtime.Layout
+import GHC.Cmm (Convention(..))
+import GHC.Cmm.Ppr () -- For Outputable instances
+
+import DynFlags
+import GHC.Platform
+import Outputable
+
+-- Calculate the 'GlobalReg' or stack locations for function call
+-- parameters as used by the Cmm calling convention.
+
+data ParamLocation
+ = RegisterParam GlobalReg
+ | StackParam ByteOff
+
+instance Outputable ParamLocation where
+ ppr (RegisterParam g) = ppr g
+ ppr (StackParam p) = ppr p
+
+-- |
+-- Given a list of arguments, and a function that tells their types,
+-- return a list showing where each argument is passed
+--
+assignArgumentsPos :: DynFlags
+ -> ByteOff -- stack offset to start with
+ -> Convention
+ -> (a -> CmmType) -- how to get a type from an arg
+ -> [a] -- args
+ -> (
+ ByteOff -- bytes of stack args
+ , [(a, ParamLocation)] -- args and locations
+ )
+
+assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments)
+ where
+ regs = case (reps, conv) of
+ (_, NativeNodeCall) -> getRegsWithNode dflags
+ (_, NativeDirectCall) -> getRegsWithoutNode dflags
+ ([_], NativeReturn) -> allRegs dflags
+ (_, NativeReturn) -> getRegsWithNode dflags
+ -- GC calling convention *must* put values in registers
+ (_, GC) -> allRegs dflags
+ (_, Slow) -> nodeOnly
+ -- The calling conventions first assign arguments to registers,
+ -- then switch to the stack when we first run out of registers
+ -- (even if there are still available registers for args of a
+ -- different type). When returning an unboxed tuple, we also
+ -- separate the stack arguments by pointerhood.
+ (reg_assts, stk_args) = assign_regs [] reps regs
+ (stk_off, stk_assts) = assignStack dflags off arg_ty stk_args
+ assignments = reg_assts ++ stk_assts
+
+ assign_regs assts [] _ = (assts, [])
+ assign_regs assts (r:rs) regs | isVecType ty = vec
+ | isFloatType ty = float
+ | otherwise = int
+ where vec = case (w, regs) of
+ (W128, (vs, fs, ds, ls, s:ss))
+ | passVectorInReg W128 dflags -> k (RegisterParam (XmmReg s), (vs, fs, ds, ls, ss))
+ (W256, (vs, fs, ds, ls, s:ss))
+ | passVectorInReg W256 dflags -> k (RegisterParam (YmmReg s), (vs, fs, ds, ls, ss))
+ (W512, (vs, fs, ds, ls, s:ss))
+ | passVectorInReg W512 dflags -> k (RegisterParam (ZmmReg s), (vs, fs, ds, ls, ss))
+ _ -> (assts, (r:rs))
+ float = case (w, regs) of
+ (W32, (vs, fs, ds, ls, s:ss))
+ | passFloatInXmm -> k (RegisterParam (FloatReg s), (vs, fs, ds, ls, ss))
+ (W32, (vs, f:fs, ds, ls, ss))
+ | not passFloatInXmm -> k (RegisterParam f, (vs, fs, ds, ls, ss))
+ (W64, (vs, fs, ds, ls, s:ss))
+ | passFloatInXmm -> k (RegisterParam (DoubleReg s), (vs, fs, ds, ls, ss))
+ (W64, (vs, fs, d:ds, ls, ss))
+ | not passFloatInXmm -> k (RegisterParam d, (vs, fs, ds, ls, ss))
+ _ -> (assts, (r:rs))
+ int = case (w, regs) of
+ (W128, _) -> panic "W128 unsupported register type"
+ (_, (v:vs, fs, ds, ls, ss)) | widthInBits w <= widthInBits (wordWidth dflags)
+ -> k (RegisterParam (v gcp), (vs, fs, ds, ls, ss))
+ (_, (vs, fs, ds, l:ls, ss)) | widthInBits w > widthInBits (wordWidth dflags)
+ -> k (RegisterParam l, (vs, fs, ds, ls, ss))
+ _ -> (assts, (r:rs))
+ k (asst, regs') = assign_regs ((r, asst) : assts) rs regs'
+ ty = arg_ty r
+ w = typeWidth ty
+ gcp | isGcPtrType ty = VGcPtr
+ | otherwise = VNonGcPtr
+ passFloatInXmm = passFloatArgsInXmm dflags
+
+passFloatArgsInXmm :: DynFlags -> Bool
+passFloatArgsInXmm dflags = case platformArch (targetPlatform dflags) of
+ ArchX86_64 -> True
+ ArchX86 -> False
+ _ -> False
+
+-- We used to spill vector registers to the stack since the LLVM backend didn't
+-- support vector registers in its calling convention. However, this has now
+-- been fixed. This function remains only as a convenient way to re-enable
+-- spilling when debugging code generation.
+passVectorInReg :: Width -> DynFlags -> Bool
+passVectorInReg _ _ = True
+
+assignStack :: DynFlags -> ByteOff -> (a -> CmmType) -> [a]
+ -> (
+ ByteOff -- bytes of stack args
+ , [(a, ParamLocation)] -- args and locations
+ )
+assignStack dflags offset arg_ty args = assign_stk offset [] (reverse args)
+ where
+ assign_stk offset assts [] = (offset, assts)
+ assign_stk offset assts (r:rs)
+ = assign_stk off' ((r, StackParam off') : assts) rs
+ where w = typeWidth (arg_ty r)
+ off' = offset + size
+ -- Stack arguments always take a whole number of words, we never
+ -- pack them unlike constructor fields.
+ size = roundUpToWords dflags (widthInBytes w)
+
+-----------------------------------------------------------------------------
+-- Local information about the registers available
+
+type AvailRegs = ( [VGcPtr -> GlobalReg] -- available vanilla regs.
+ , [GlobalReg] -- floats
+ , [GlobalReg] -- doubles
+ , [GlobalReg] -- longs (int64 and word64)
+ , [Int] -- XMM (floats and doubles)
+ )
+
+-- Vanilla registers can contain pointers, Ints, Chars.
+-- Floats and doubles have separate register supplies.
+--
+-- We take these register supplies from the *real* registers, i.e. those
+-- that are guaranteed to map to machine registers.
+
+getRegsWithoutNode, getRegsWithNode :: DynFlags -> AvailRegs
+getRegsWithoutNode dflags =
+ ( filter (\r -> r VGcPtr /= node) (realVanillaRegs dflags)
+ , realFloatRegs dflags
+ , realDoubleRegs dflags
+ , realLongRegs dflags
+ , realXmmRegNos dflags)
+
+-- getRegsWithNode uses R1/node even if it isn't a register
+getRegsWithNode dflags =
+ ( if null (realVanillaRegs dflags)
+ then [VanillaReg 1]
+ else realVanillaRegs dflags
+ , realFloatRegs dflags
+ , realDoubleRegs dflags
+ , realLongRegs dflags
+ , realXmmRegNos dflags)
+
+allFloatRegs, allDoubleRegs, allLongRegs :: DynFlags -> [GlobalReg]
+allVanillaRegs :: DynFlags -> [VGcPtr -> GlobalReg]
+allXmmRegs :: DynFlags -> [Int]
+
+allVanillaRegs dflags = map VanillaReg $ regList (mAX_Vanilla_REG dflags)
+allFloatRegs dflags = map FloatReg $ regList (mAX_Float_REG dflags)
+allDoubleRegs dflags = map DoubleReg $ regList (mAX_Double_REG dflags)
+allLongRegs dflags = map LongReg $ regList (mAX_Long_REG dflags)
+allXmmRegs dflags = regList (mAX_XMM_REG dflags)
+
+realFloatRegs, realDoubleRegs, realLongRegs :: DynFlags -> [GlobalReg]
+realVanillaRegs :: DynFlags -> [VGcPtr -> GlobalReg]
+realXmmRegNos :: DynFlags -> [Int]
+
+realVanillaRegs dflags = map VanillaReg $ regList (mAX_Real_Vanilla_REG dflags)
+realFloatRegs dflags = map FloatReg $ regList (mAX_Real_Float_REG dflags)
+realDoubleRegs dflags = map DoubleReg $ regList (mAX_Real_Double_REG dflags)
+realLongRegs dflags = map LongReg $ regList (mAX_Real_Long_REG dflags)
+
+realXmmRegNos dflags
+ | isSse2Enabled dflags = regList (mAX_Real_XMM_REG dflags)
+ | otherwise = []
+
+regList :: Int -> [Int]
+regList n = [1 .. n]
+
+allRegs :: DynFlags -> AvailRegs
+allRegs dflags = (allVanillaRegs dflags,
+ allFloatRegs dflags,
+ allDoubleRegs dflags,
+ allLongRegs dflags,
+ allXmmRegs dflags)
+
+nodeOnly :: AvailRegs
+nodeOnly = ([VanillaReg 1], [], [], [], [])
+
+-- This returns the set of global registers that *cover* the machine registers
+-- used for argument passing. On platforms where registers can overlap---right
+-- now just x86-64, where Float and Double registers overlap---passing this set
+-- of registers is guaranteed to preserve the contents of all live registers. We
+-- only use this functionality in hand-written C-- code in the RTS.
+realArgRegsCover :: DynFlags -> [GlobalReg]
+realArgRegsCover dflags
+ | passFloatArgsInXmm dflags = map ($VGcPtr) (realVanillaRegs dflags) ++
+ realLongRegs dflags ++
+ map XmmReg (realXmmRegNos dflags)
+ | otherwise = map ($VGcPtr) (realVanillaRegs dflags) ++
+ realFloatRegs dflags ++
+ realDoubleRegs dflags ++
+ realLongRegs dflags ++
+ map XmmReg (realXmmRegNos dflags)
diff --git a/compiler/GHC/Cmm/CommonBlockElim.hs b/compiler/GHC/Cmm/CommonBlockElim.hs
new file mode 100644
index 0000000000..86ea0e94e2
--- /dev/null
+++ b/compiler/GHC/Cmm/CommonBlockElim.hs
@@ -0,0 +1,320 @@
+{-# LANGUAGE GADTs, BangPatterns, ScopedTypeVariables #-}
+
+module GHC.Cmm.CommonBlockElim
+ ( elimCommonBlocks
+ )
+where
+
+
+import GhcPrelude hiding (iterate, succ, unzip, zip)
+
+import GHC.Cmm.BlockId
+import GHC.Cmm
+import GHC.Cmm.Utils
+import GHC.Cmm.Switch (eqSwitchTargetWith)
+import GHC.Cmm.ContFlowOpt
+
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Graph
+import GHC.Cmm.Dataflow.Label
+import GHC.Cmm.Dataflow.Collections
+import Data.Bits
+import Data.Maybe (mapMaybe)
+import qualified Data.List as List
+import Data.Word
+import qualified Data.Map as M
+import Outputable
+import qualified TrieMap as TM
+import UniqFM
+import Unique
+import Control.Arrow (first, second)
+
+-- -----------------------------------------------------------------------------
+-- Eliminate common blocks
+
+-- If two blocks are identical except for the label on the first node,
+-- then we can eliminate one of the blocks. To ensure that the semantics
+-- of the program are preserved, we have to rewrite each predecessor of the
+-- eliminated block to proceed with the block we keep.
+
+-- The algorithm iterates over the blocks in the graph,
+-- checking whether it has seen another block that is equal modulo labels.
+-- If so, then it adds an entry in a map indicating that the new block
+-- is made redundant by the old block.
+-- Otherwise, it is added to the useful blocks.
+
+-- To avoid comparing every block with every other block repeatedly, we group
+-- them by
+-- * a hash of the block, ignoring labels (explained below)
+-- * the list of outgoing labels
+-- The hash is invariant under relabeling, so we only ever compare within
+-- the same group of blocks.
+--
+-- The list of outgoing labels is updated as we merge blocks (that is why they
+-- are not included in the hash, which we want to calculate only once).
+--
+-- All in all, two blocks should never be compared if they have different
+-- hashes, and at most once otherwise. Previously, we were slower, and people
+-- rightfully complained: #10397
+
+-- TODO: Use optimization fuel
+elimCommonBlocks :: CmmGraph -> CmmGraph
+elimCommonBlocks g = replaceLabels env $ copyTicks env g
+ where
+ env = iterate mapEmpty blocks_with_key
+ -- The order of blocks doesn't matter here. While we could use
+ -- revPostorder which drops unreachable blocks this is done in
+ -- ContFlowOpt already which runs before this pass. So we use
+ -- toBlockList since it is faster.
+ groups = groupByInt hash_block (toBlockList g) :: [[CmmBlock]]
+ blocks_with_key = [ [ (successors b, [b]) | b <- bs] | bs <- groups]
+
+-- Invariant: The blocks in the list are pairwise distinct
+-- (so avoid comparing them again)
+type DistinctBlocks = [CmmBlock]
+type Key = [Label]
+type Subst = LabelMap BlockId
+
+-- The outer list groups by hash. We retain this grouping throughout.
+iterate :: Subst -> [[(Key, DistinctBlocks)]] -> Subst
+iterate subst blocks
+ | mapNull new_substs = subst
+ | otherwise = iterate subst' updated_blocks
+ where
+ grouped_blocks :: [[(Key, [DistinctBlocks])]]
+ grouped_blocks = map groupByLabel blocks
+
+ merged_blocks :: [[(Key, DistinctBlocks)]]
+ (new_substs, merged_blocks) = List.mapAccumL (List.mapAccumL go) mapEmpty grouped_blocks
+ where
+ go !new_subst1 (k,dbs) = (new_subst1 `mapUnion` new_subst2, (k,db))
+ where
+ (new_subst2, db) = mergeBlockList subst dbs
+
+ subst' = subst `mapUnion` new_substs
+ updated_blocks = map (map (first (map (lookupBid subst')))) merged_blocks
+
+-- Combine two lists of blocks.
+-- While they are internally distinct they can still share common blocks.
+mergeBlocks :: Subst -> DistinctBlocks -> DistinctBlocks -> (Subst, DistinctBlocks)
+mergeBlocks subst existing new = go new
+ where
+ go [] = (mapEmpty, existing)
+ go (b:bs) = case List.find (eqBlockBodyWith (eqBid subst) b) existing of
+ -- This block is a duplicate. Drop it, and add it to the substitution
+ Just b' -> first (mapInsert (entryLabel b) (entryLabel b')) $ go bs
+ -- This block is not a duplicate, keep it.
+ Nothing -> second (b:) $ go bs
+
+mergeBlockList :: Subst -> [DistinctBlocks] -> (Subst, DistinctBlocks)
+mergeBlockList _ [] = pprPanic "mergeBlockList" empty
+mergeBlockList subst (b:bs) = go mapEmpty b bs
+ where
+ go !new_subst1 b [] = (new_subst1, b)
+ go !new_subst1 b1 (b2:bs) = go new_subst b bs
+ where
+ (new_subst2, b) = mergeBlocks subst b1 b2
+ new_subst = new_subst1 `mapUnion` new_subst2
+
+
+-- -----------------------------------------------------------------------------
+-- Hashing and equality on blocks
+
+-- Below here is mostly boilerplate: hashing blocks ignoring labels,
+-- and comparing blocks modulo a label mapping.
+
+-- To speed up comparisons, we hash each basic block modulo jump labels.
+-- The hashing is a bit arbitrary (the numbers are completely arbitrary),
+-- but it should be fast and good enough.
+
+-- We want to get as many small buckets as possible, as comparing blocks is
+-- expensive. So include as much as possible in the hash. Ideally everything
+-- that is compared with (==) in eqBlockBodyWith.
+
+type HashCode = Int
+
+hash_block :: CmmBlock -> HashCode
+hash_block block =
+ fromIntegral (foldBlockNodesB3 (hash_fst, hash_mid, hash_lst) block (0 :: Word32) .&. (0x7fffffff :: Word32))
+ -- UniqFM doesn't like negative Ints
+ where hash_fst _ h = h
+ hash_mid m h = hash_node m + h `shiftL` 1
+ hash_lst m h = hash_node m + h `shiftL` 1
+
+ hash_node :: CmmNode O x -> Word32
+ hash_node n | dont_care n = 0 -- don't care
+ hash_node (CmmAssign r e) = hash_reg r + hash_e e
+ hash_node (CmmStore e e') = hash_e e + hash_e e'
+ hash_node (CmmUnsafeForeignCall t _ as) = hash_tgt t + hash_list hash_e as
+ hash_node (CmmBranch _) = 23 -- NB. ignore the label
+ hash_node (CmmCondBranch p _ _ _) = hash_e p
+ hash_node (CmmCall e _ _ _ _ _) = hash_e e
+ hash_node (CmmForeignCall t _ _ _ _ _ _) = hash_tgt t
+ hash_node (CmmSwitch e _) = hash_e e
+ hash_node _ = error "hash_node: unknown Cmm node!"
+
+ hash_reg :: CmmReg -> Word32
+ hash_reg (CmmLocal localReg) = hash_unique localReg -- important for performance, see #10397
+ hash_reg (CmmGlobal _) = 19
+
+ hash_e :: CmmExpr -> Word32
+ hash_e (CmmLit l) = hash_lit l
+ hash_e (CmmLoad e _) = 67 + hash_e e
+ hash_e (CmmReg r) = hash_reg r
+ hash_e (CmmMachOp _ es) = hash_list hash_e es -- pessimal - no operator check
+ hash_e (CmmRegOff r i) = hash_reg r + cvt i
+ hash_e (CmmStackSlot _ _) = 13
+
+ hash_lit :: CmmLit -> Word32
+ hash_lit (CmmInt i _) = fromInteger i
+ hash_lit (CmmFloat r _) = truncate r
+ hash_lit (CmmVec ls) = hash_list hash_lit ls
+ hash_lit (CmmLabel _) = 119 -- ugh
+ hash_lit (CmmLabelOff _ i) = cvt $ 199 + i
+ hash_lit (CmmLabelDiffOff _ _ i _) = cvt $ 299 + i
+ hash_lit (CmmBlock _) = 191 -- ugh
+ hash_lit (CmmHighStackMark) = cvt 313
+
+ hash_tgt (ForeignTarget e _) = hash_e e
+ hash_tgt (PrimTarget _) = 31 -- lots of these
+
+ hash_list f = foldl' (\z x -> f x + z) (0::Word32)
+
+ cvt = fromInteger . toInteger
+
+ hash_unique :: Uniquable a => a -> Word32
+ hash_unique = cvt . getKey . getUnique
+
+-- | Ignore these node types for equality
+dont_care :: CmmNode O x -> Bool
+dont_care CmmComment {} = True
+dont_care CmmTick {} = True
+dont_care CmmUnwind {} = True
+dont_care _other = False
+
+-- Utilities: equality and substitution on the graph.
+
+-- Given a map ``subst'' from BlockID -> BlockID, we define equality.
+eqBid :: LabelMap BlockId -> BlockId -> BlockId -> Bool
+eqBid subst bid bid' = lookupBid subst bid == lookupBid subst bid'
+lookupBid :: LabelMap BlockId -> BlockId -> BlockId
+lookupBid subst bid = case mapLookup bid subst of
+ Just bid -> lookupBid subst bid
+ Nothing -> bid
+
+-- Middle nodes and expressions can contain BlockIds, in particular in
+-- CmmStackSlot and CmmBlock, so we have to use a special equality for
+-- these.
+--
+eqMiddleWith :: (BlockId -> BlockId -> Bool)
+ -> CmmNode O O -> CmmNode O O -> Bool
+eqMiddleWith eqBid (CmmAssign r1 e1) (CmmAssign r2 e2)
+ = r1 == r2 && eqExprWith eqBid e1 e2
+eqMiddleWith eqBid (CmmStore l1 r1) (CmmStore l2 r2)
+ = eqExprWith eqBid l1 l2 && eqExprWith eqBid r1 r2
+eqMiddleWith eqBid (CmmUnsafeForeignCall t1 r1 a1)
+ (CmmUnsafeForeignCall t2 r2 a2)
+ = t1 == t2 && r1 == r2 && eqListWith (eqExprWith eqBid) a1 a2
+eqMiddleWith _ _ _ = False
+
+eqExprWith :: (BlockId -> BlockId -> Bool)
+ -> CmmExpr -> CmmExpr -> Bool
+eqExprWith eqBid = eq
+ where
+ CmmLit l1 `eq` CmmLit l2 = eqLit l1 l2
+ CmmLoad e1 _ `eq` CmmLoad e2 _ = e1 `eq` e2
+ CmmReg r1 `eq` CmmReg r2 = r1==r2
+ CmmRegOff r1 i1 `eq` CmmRegOff r2 i2 = r1==r2 && i1==i2
+ CmmMachOp op1 es1 `eq` CmmMachOp op2 es2 = op1==op2 && es1 `eqs` es2
+ CmmStackSlot a1 i1 `eq` CmmStackSlot a2 i2 = eqArea a1 a2 && i1==i2
+ _e1 `eq` _e2 = False
+
+ xs `eqs` ys = eqListWith eq xs ys
+
+ eqLit (CmmBlock id1) (CmmBlock id2) = eqBid id1 id2
+ eqLit l1 l2 = l1 == l2
+
+ eqArea Old Old = True
+ eqArea (Young id1) (Young id2) = eqBid id1 id2
+ eqArea _ _ = False
+
+-- Equality on the body of a block, modulo a function mapping block
+-- IDs to block IDs.
+eqBlockBodyWith :: (BlockId -> BlockId -> Bool) -> CmmBlock -> CmmBlock -> Bool
+eqBlockBodyWith eqBid block block'
+ {-
+ | equal = pprTrace "equal" (vcat [ppr block, ppr block']) True
+ | otherwise = pprTrace "not equal" (vcat [ppr block, ppr block']) False
+ -}
+ = equal
+ where (_,m,l) = blockSplit block
+ nodes = filter (not . dont_care) (blockToList m)
+ (_,m',l') = blockSplit block'
+ nodes' = filter (not . dont_care) (blockToList m')
+
+ equal = eqListWith (eqMiddleWith eqBid) nodes nodes' &&
+ eqLastWith eqBid l l'
+
+
+eqLastWith :: (BlockId -> BlockId -> Bool) -> CmmNode O C -> CmmNode O C -> Bool
+eqLastWith eqBid (CmmBranch bid1) (CmmBranch bid2) = eqBid bid1 bid2
+eqLastWith eqBid (CmmCondBranch c1 t1 f1 l1) (CmmCondBranch c2 t2 f2 l2) =
+ c1 == c2 && l1 == l2 && eqBid t1 t2 && eqBid f1 f2
+eqLastWith eqBid (CmmCall t1 c1 g1 a1 r1 u1) (CmmCall t2 c2 g2 a2 r2 u2) =
+ t1 == t2 && eqMaybeWith eqBid c1 c2 && a1 == a2 && r1 == r2 && u1 == u2 && g1 == g2
+eqLastWith eqBid (CmmSwitch e1 ids1) (CmmSwitch e2 ids2) =
+ e1 == e2 && eqSwitchTargetWith eqBid ids1 ids2
+eqLastWith _ _ _ = False
+
+eqMaybeWith :: (a -> b -> Bool) -> Maybe a -> Maybe b -> Bool
+eqMaybeWith eltEq (Just e) (Just e') = eltEq e e'
+eqMaybeWith _ Nothing Nothing = True
+eqMaybeWith _ _ _ = False
+
+eqListWith :: (a -> b -> Bool) -> [a] -> [b] -> Bool
+eqListWith f (a : as) (b : bs) = f a b && eqListWith f as bs
+eqListWith _ [] [] = True
+eqListWith _ _ _ = False
+
+-- | Given a block map, ensure that all "target" blocks are covered by
+-- the same ticks as the respective "source" blocks. This not only
+-- means copying ticks, but also adjusting tick scopes where
+-- necessary.
+copyTicks :: LabelMap BlockId -> CmmGraph -> CmmGraph
+copyTicks env g
+ | mapNull env = g
+ | otherwise = ofBlockMap (g_entry g) $ mapMap copyTo blockMap
+ where -- Reverse block merge map
+ blockMap = toBlockMap g
+ revEnv = mapFoldlWithKey insertRev M.empty env
+ insertRev m k x = M.insertWith (const (k:)) x [k] m
+ -- Copy ticks and scopes into the given block
+ copyTo block = case M.lookup (entryLabel block) revEnv of
+ Nothing -> block
+ Just ls -> foldr copy block $ mapMaybe (flip mapLookup blockMap) ls
+ copy from to =
+ let ticks = blockTicks from
+ CmmEntry _ scp0 = firstNode from
+ (CmmEntry lbl scp1, code) = blockSplitHead to
+ in CmmEntry lbl (combineTickScopes scp0 scp1) `blockJoinHead`
+ foldr blockCons code (map CmmTick ticks)
+
+-- Group by [Label]
+-- See Note [Compressed TrieMap] in coreSyn/TrieMap about the usage of GenMap.
+groupByLabel :: [(Key, DistinctBlocks)] -> [(Key, [DistinctBlocks])]
+groupByLabel =
+ go (TM.emptyTM :: TM.ListMap (TM.GenMap LabelMap) (Key, [DistinctBlocks]))
+ where
+ go !m [] = TM.foldTM (:) m []
+ go !m ((k,v) : entries) = go (TM.alterTM k adjust m) entries
+ where --k' = map (getKey . getUnique) k
+ adjust Nothing = Just (k,[v])
+ adjust (Just (_,vs)) = Just (k,v:vs)
+
+groupByInt :: (a -> Int) -> [a] -> [[a]]
+groupByInt f xs = nonDetEltsUFM $ List.foldl' go emptyUFM xs
+ -- See Note [Unique Determinism and code generation]
+ where
+ go m x = alterUFM addEntry m (f x)
+ where
+ addEntry xs = Just $! maybe [x] (x:) xs
diff --git a/compiler/GHC/Cmm/ContFlowOpt.hs b/compiler/GHC/Cmm/ContFlowOpt.hs
new file mode 100644
index 0000000000..7765972d02
--- /dev/null
+++ b/compiler/GHC/Cmm/ContFlowOpt.hs
@@ -0,0 +1,451 @@
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE BangPatterns #-}
+{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
+module GHC.Cmm.ContFlowOpt
+ ( cmmCfgOpts
+ , cmmCfgOptsProc
+ , removeUnreachableBlocksProc
+ , replaceLabels
+ )
+where
+
+import GhcPrelude hiding (succ, unzip, zip)
+
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow.Graph
+import GHC.Cmm.Dataflow.Label
+import GHC.Cmm.BlockId
+import GHC.Cmm
+import GHC.Cmm.Utils
+import GHC.Cmm.Switch (mapSwitchTargets, switchTargetsToList)
+import Maybes
+import Panic
+import Util
+
+import Control.Monad
+
+
+-- Note [What is shortcutting]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- Consider this Cmm code:
+--
+-- L1: ...
+-- goto L2;
+-- L2: goto L3;
+-- L3: ...
+--
+-- Here L2 is an empty block and contains only an unconditional branch
+-- to L3. In this situation any block that jumps to L2 can jump
+-- directly to L3:
+--
+-- L1: ...
+-- goto L3;
+-- L2: goto L3;
+-- L3: ...
+--
+-- In this situation we say that we shortcut L2 to L3. One of
+-- consequences of shortcutting is that some blocks of code may become
+-- unreachable (in the example above this is true for L2).
+
+
+-- Note [Control-flow optimisations]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- This optimisation does three things:
+--
+-- - If a block finishes in an unconditional branch to another block
+-- and that is the only jump to that block we concatenate the
+-- destination block at the end of the current one.
+--
+-- - If a block finishes in a call whose continuation block is a
+-- goto, then we can shortcut the destination, making the
+-- continuation block the destination of the goto - but see Note
+-- [Shortcut call returns].
+--
+-- - For any block that is not a call we try to shortcut the
+-- destination(s). Additionally, if a block ends with a
+-- conditional branch we try to invert the condition.
+--
+-- Blocks are processed using postorder DFS traversal. A side effect
+-- of determining traversal order with a graph search is elimination
+-- of any blocks that are unreachable.
+--
+-- Transformations are improved by working from the end of the graph
+-- towards the beginning, because we may be able to perform many
+-- shortcuts in one go.
+
+
+-- Note [Shortcut call returns]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- We are going to maintain the "current" graph (LabelMap CmmBlock) as
+-- we go, and also a mapping from BlockId to BlockId, representing
+-- continuation labels that we have renamed. This latter mapping is
+-- important because we might shortcut a CmmCall continuation. For
+-- example:
+--
+-- Sp[0] = L
+-- call g returns to L
+-- L: goto M
+-- M: ...
+--
+-- So when we shortcut the L block, we need to replace not only
+-- the continuation of the call, but also references to L in the
+-- code (e.g. the assignment Sp[0] = L):
+--
+-- Sp[0] = M
+-- call g returns to M
+-- M: ...
+--
+-- So we keep track of which labels we have renamed and apply the mapping
+-- at the end with replaceLabels.
+
+
+-- Note [Shortcut call returns and proc-points]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- Consider this code that you might get from a recursive
+-- let-no-escape:
+--
+-- goto L1
+-- L1:
+-- if (Hp > HpLim) then L2 else L3
+-- L2:
+-- call stg_gc_noregs returns to L4
+-- L4:
+-- goto L1
+-- L3:
+-- ...
+-- goto L1
+--
+-- Then the control-flow optimiser shortcuts L4. But that turns L1
+-- into the call-return proc point, and every iteration of the loop
+-- has to shuffle variables to and from the stack. So we must *not*
+-- shortcut L4.
+--
+-- Moreover not shortcutting call returns is probably fine. If L4 can
+-- concat with its branch target then it will still do so. And we
+-- save some compile time because we don't have to traverse all the
+-- code in replaceLabels.
+--
+-- However, we probably do want to do this if we are splitting proc
+-- points, because L1 will be a proc-point anyway, so merging it with
+-- L4 reduces the number of proc points. Unfortunately recursive
+-- let-no-escapes won't generate very good code with proc-point
+-- splitting on - we should probably compile them to explicitly use
+-- the native calling convention instead.
+
+cmmCfgOpts :: Bool -> CmmGraph -> CmmGraph
+cmmCfgOpts split g = fst (blockConcat split g)
+
+cmmCfgOptsProc :: Bool -> CmmDecl -> CmmDecl
+cmmCfgOptsProc split (CmmProc info lbl live g) = CmmProc info' lbl live g'
+ where (g', env) = blockConcat split g
+ info' = info{ info_tbls = new_info_tbls }
+ new_info_tbls = mapFromList (map upd_info (mapToList (info_tbls info)))
+
+ -- If we changed any labels, then we have to update the info tables
+ -- too, except for the top-level info table because that might be
+ -- referred to by other procs.
+ upd_info (k,info)
+ | Just k' <- mapLookup k env
+ = (k', if k' == g_entry g'
+ then info
+ else info{ cit_lbl = infoTblLbl k' })
+ | otherwise
+ = (k,info)
+cmmCfgOptsProc _ top = top
+
+
+blockConcat :: Bool -> CmmGraph -> (CmmGraph, LabelMap BlockId)
+blockConcat splitting_procs g@CmmGraph { g_entry = entry_id }
+ = (replaceLabels shortcut_map $ ofBlockMap new_entry new_blocks, shortcut_map')
+ where
+ -- We might be able to shortcut the entry BlockId itself.
+ -- Remember to update the shortcut_map, since we also have to
+ -- update the info_tbls mapping now.
+ (new_entry, shortcut_map')
+ | Just entry_blk <- mapLookup entry_id new_blocks
+ , Just dest <- canShortcut entry_blk
+ = (dest, mapInsert entry_id dest shortcut_map)
+ | otherwise
+ = (entry_id, shortcut_map)
+
+ -- blocks are sorted in reverse postorder, but we want to go from the exit
+ -- towards beginning, so we use foldr below.
+ blocks = revPostorder g
+ blockmap = foldl' (flip addBlock) emptyBody blocks
+
+ -- Accumulator contains three components:
+ -- * map of blocks in a graph
+ -- * map of shortcut labels. See Note [Shortcut call returns]
+ -- * map containing number of predecessors for each block. We discard
+ -- it after we process all blocks.
+ (new_blocks, shortcut_map, _) =
+ foldr maybe_concat (blockmap, mapEmpty, initialBackEdges) blocks
+
+ -- Map of predecessors for initial graph. We increase number of
+ -- predecessors for entry block by one to denote that it is
+ -- target of a jump, even if no block in the current graph jumps
+ -- to it.
+ initialBackEdges = incPreds entry_id (predMap blocks)
+
+ maybe_concat :: CmmBlock
+ -> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int)
+ -> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int)
+ maybe_concat block (!blocks, !shortcut_map, !backEdges)
+ -- If:
+ -- (1) current block ends with unconditional branch to b' and
+ -- (2) it has exactly one predecessor (namely, current block)
+ --
+ -- Then:
+ -- (1) append b' block at the end of current block
+ -- (2) remove b' from the map of blocks
+ -- (3) remove information about b' from predecessors map
+ --
+ -- Since we know that the block has only one predecessor we call
+ -- mapDelete directly instead of calling decPreds.
+ --
+ -- Note that we always maintain an up-to-date list of predecessors, so
+ -- we can ignore the contents of shortcut_map
+ | CmmBranch b' <- last
+ , hasOnePredecessor b'
+ , Just blk' <- mapLookup b' blocks
+ = let bid' = entryLabel blk'
+ in ( mapDelete bid' $ mapInsert bid (splice head blk') blocks
+ , shortcut_map
+ , mapDelete b' backEdges )
+
+ -- If:
+ -- (1) we are splitting proc points (see Note
+ -- [Shortcut call returns and proc-points]) and
+ -- (2) current block is a CmmCall or CmmForeignCall with
+ -- continuation b' and
+ -- (3) we can shortcut that continuation to dest
+ -- Then:
+ -- (1) we change continuation to point to b'
+ -- (2) create mapping from b' to dest
+ -- (3) increase number of predecessors of dest by 1
+ -- (4) decrease number of predecessors of b' by 1
+ --
+ -- Later we will use replaceLabels to substitute all occurrences of b'
+ -- with dest.
+ | splitting_procs
+ , Just b' <- callContinuation_maybe last
+ , Just blk' <- mapLookup b' blocks
+ , Just dest <- canShortcut blk'
+ = ( mapInsert bid (blockJoinTail head (update_cont dest)) blocks
+ , mapInsert b' dest shortcut_map
+ , decPreds b' $ incPreds dest backEdges )
+
+ -- If:
+ -- (1) a block does not end with a call
+ -- Then:
+ -- (1) if it ends with a conditional attempt to invert the
+ -- conditional
+ -- (2) attempt to shortcut all destination blocks
+ -- (3) if new successors of a block are different from the old ones
+ -- update the of predecessors accordingly
+ --
+ -- A special case of this is a situation when a block ends with an
+ -- unconditional jump to a block that can be shortcut.
+ | Nothing <- callContinuation_maybe last
+ = let oldSuccs = successors last
+ newSuccs = successors rewrite_last
+ in ( mapInsert bid (blockJoinTail head rewrite_last) blocks
+ , shortcut_map
+ , if oldSuccs == newSuccs
+ then backEdges
+ else foldr incPreds (foldr decPreds backEdges oldSuccs) newSuccs )
+
+ -- Otherwise don't do anything
+ | otherwise
+ = ( blocks, shortcut_map, backEdges )
+ where
+ (head, last) = blockSplitTail block
+ bid = entryLabel block
+
+ -- Changes continuation of a call to a specified label
+ update_cont dest =
+ case last of
+ CmmCall{} -> last { cml_cont = Just dest }
+ CmmForeignCall{} -> last { succ = dest }
+ _ -> panic "Can't shortcut continuation."
+
+ -- Attempts to shortcut successors of last node
+ shortcut_last = mapSuccessors shortcut last
+ where
+ shortcut l =
+ case mapLookup l blocks of
+ Just b | Just dest <- canShortcut b -> dest
+ _otherwise -> l
+
+ rewrite_last
+ -- Sometimes we can get rid of the conditional completely.
+ | CmmCondBranch _cond t f _l <- shortcut_last
+ , t == f
+ = CmmBranch t
+
+ -- See Note [Invert Cmm conditionals]
+ | CmmCondBranch cond t f l <- shortcut_last
+ , hasOnePredecessor t -- inverting will make t a fallthrough
+ , likelyTrue l || (numPreds f > 1)
+ , Just cond' <- maybeInvertCmmExpr cond
+ = CmmCondBranch cond' f t (invertLikeliness l)
+
+ -- If all jump destinations of a switch go to the
+ -- same target eliminate the switch.
+ | CmmSwitch _expr targets <- shortcut_last
+ , (t:ts) <- switchTargetsToList targets
+ , all (== t) ts
+ = CmmBranch t
+
+ | otherwise
+ = shortcut_last
+
+ likelyTrue (Just True) = True
+ likelyTrue _ = False
+
+ invertLikeliness :: Maybe Bool -> Maybe Bool
+ invertLikeliness = fmap not
+
+ -- Number of predecessors for a block
+ numPreds bid = mapLookup bid backEdges `orElse` 0
+
+ hasOnePredecessor b = numPreds b == 1
+
+{-
+ Note [Invert Cmm conditionals]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ The native code generator always produces jumps to the true branch.
+ Falling through to the false branch is however faster. So we try to
+ arrange for that to happen.
+ This means we invert the condition if:
+ * The likely path will become a fallthrough.
+ * We can't guarantee a fallthrough for the false branch but for the
+ true branch.
+
+ In some cases it's faster to avoid inverting when the false branch is likely.
+ However determining when that is the case is neither easy nor cheap so for
+ now we always invert as this produces smaller binaries and code that is
+ equally fast on average. (On an i7-6700K)
+
+ TODO:
+ There is also the edge case when both branches have multiple predecessors.
+ In this case we could assume that we will end up with a jump for BOTH
+ branches. In this case it might be best to put the likely path in the true
+ branch especially if there are large numbers of predecessors as this saves
+ us the jump thats not taken. However I haven't tested this and as of early
+ 2018 we almost never generate cmm where this would apply.
+-}
+
+-- Functions for incrementing and decrementing number of predecessors. If
+-- decrementing would set the predecessor count to 0, we remove entry from the
+-- map.
+-- Invariant: if a block has no predecessors it should be dropped from the
+-- graph because it is unreachable. maybe_concat is constructed to maintain
+-- that invariant, but calling replaceLabels may introduce unreachable blocks.
+-- We rely on subsequent passes in the Cmm pipeline to remove unreachable
+-- blocks.
+incPreds, decPreds :: BlockId -> LabelMap Int -> LabelMap Int
+incPreds bid edges = mapInsertWith (+) bid 1 edges
+decPreds bid edges = case mapLookup bid edges of
+ Just preds | preds > 1 -> mapInsert bid (preds - 1) edges
+ Just _ -> mapDelete bid edges
+ _ -> edges
+
+
+-- Checks if a block consists only of "goto dest". If it does than we return
+-- "Just dest" label. See Note [What is shortcutting]
+canShortcut :: CmmBlock -> Maybe BlockId
+canShortcut block
+ | (_, middle, CmmBranch dest) <- blockSplit block
+ , all dont_care $ blockToList middle
+ = Just dest
+ | otherwise
+ = Nothing
+ where dont_care CmmComment{} = True
+ dont_care CmmTick{} = True
+ dont_care _other = False
+
+-- Concatenates two blocks. First one is assumed to be open on exit, the second
+-- is assumed to be closed on entry (i.e. it has a label attached to it, which
+-- the splice function removes by calling snd on result of blockSplitHead).
+splice :: Block CmmNode C O -> CmmBlock -> CmmBlock
+splice head rest = entry `blockJoinHead` code0 `blockAppend` code1
+ where (CmmEntry lbl sc0, code0) = blockSplitHead head
+ (CmmEntry _ sc1, code1) = blockSplitHead rest
+ entry = CmmEntry lbl (combineTickScopes sc0 sc1)
+
+-- If node is a call with continuation call return Just label of that
+-- continuation. Otherwise return Nothing.
+callContinuation_maybe :: CmmNode O C -> Maybe BlockId
+callContinuation_maybe (CmmCall { cml_cont = Just b }) = Just b
+callContinuation_maybe (CmmForeignCall { succ = b }) = Just b
+callContinuation_maybe _ = Nothing
+
+
+-- Map over the CmmGraph, replacing each label with its mapping in the
+-- supplied LabelMap.
+replaceLabels :: LabelMap BlockId -> CmmGraph -> CmmGraph
+replaceLabels env g
+ | mapNull env = g
+ | otherwise = replace_eid $ mapGraphNodes1 txnode g
+ where
+ replace_eid g = g {g_entry = lookup (g_entry g)}
+ lookup id = mapLookup id env `orElse` id
+
+ txnode :: CmmNode e x -> CmmNode e x
+ txnode (CmmBranch bid) = CmmBranch (lookup bid)
+ txnode (CmmCondBranch p t f l) =
+ mkCmmCondBranch (exp p) (lookup t) (lookup f) l
+ txnode (CmmSwitch e ids) =
+ CmmSwitch (exp e) (mapSwitchTargets lookup ids)
+ txnode (CmmCall t k rg a res r) =
+ CmmCall (exp t) (liftM lookup k) rg a res r
+ txnode fc@CmmForeignCall{} =
+ fc{ args = map exp (args fc), succ = lookup (succ fc) }
+ txnode other = mapExpDeep exp other
+
+ exp :: CmmExpr -> CmmExpr
+ exp (CmmLit (CmmBlock bid)) = CmmLit (CmmBlock (lookup bid))
+ exp (CmmStackSlot (Young id) i) = CmmStackSlot (Young (lookup id)) i
+ exp e = e
+
+mkCmmCondBranch :: CmmExpr -> Label -> Label -> Maybe Bool -> CmmNode O C
+mkCmmCondBranch p t f l =
+ if t == f then CmmBranch t else CmmCondBranch p t f l
+
+-- Build a map from a block to its set of predecessors.
+predMap :: [CmmBlock] -> LabelMap Int
+predMap blocks = foldr add_preds mapEmpty blocks
+ where
+ add_preds block env = foldr add env (successors block)
+ where add lbl env = mapInsertWith (+) lbl 1 env
+
+-- Removing unreachable blocks
+removeUnreachableBlocksProc :: CmmDecl -> CmmDecl
+removeUnreachableBlocksProc proc@(CmmProc info lbl live g)
+ | used_blocks `lengthLessThan` mapSize (toBlockMap g)
+ = CmmProc info' lbl live g'
+ | otherwise
+ = proc
+ where
+ g' = ofBlockList (g_entry g) used_blocks
+ info' = info { info_tbls = keep_used (info_tbls info) }
+ -- Remove any info_tbls for unreachable
+
+ keep_used :: LabelMap CmmInfoTable -> LabelMap CmmInfoTable
+ keep_used bs = mapFoldlWithKey keep mapEmpty bs
+
+ keep :: LabelMap CmmInfoTable -> Label -> CmmInfoTable -> LabelMap CmmInfoTable
+ keep env l i | l `setMember` used_lbls = mapInsert l i env
+ | otherwise = env
+
+ used_blocks :: [CmmBlock]
+ used_blocks = revPostorder g
+
+ used_lbls :: LabelSet
+ used_lbls = setFromList $ map entryLabel used_blocks
diff --git a/compiler/GHC/Cmm/Dataflow.hs b/compiler/GHC/Cmm/Dataflow.hs
new file mode 100644
index 0000000000..fcabb1df0f
--- /dev/null
+++ b/compiler/GHC/Cmm/Dataflow.hs
@@ -0,0 +1,441 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE DataKinds #-}
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE RankNTypes #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TypeFamilies #-}
+
+--
+-- Copyright (c) 2010, João Dias, Simon Marlow, Simon Peyton Jones,
+-- and Norman Ramsey
+--
+-- Modifications copyright (c) The University of Glasgow 2012
+--
+-- This module is a specialised and optimised version of
+-- Compiler.Hoopl.Dataflow in the hoopl package. In particular it is
+-- specialised to the UniqSM monad.
+--
+
+module GHC.Cmm.Dataflow
+ ( C, O, Block
+ , lastNode, entryLabel
+ , foldNodesBwdOO
+ , foldRewriteNodesBwdOO
+ , DataflowLattice(..), OldFact(..), NewFact(..), JoinedFact(..)
+ , TransferFun, RewriteFun
+ , Fact, FactBase
+ , getFact, mkFactBase
+ , analyzeCmmFwd, analyzeCmmBwd
+ , rewriteCmmBwd
+ , changedIf
+ , joinOutFacts
+ , joinFacts
+ )
+where
+
+import GhcPrelude
+
+import GHC.Cmm
+import UniqSupply
+
+import Data.Array
+import Data.Maybe
+import Data.IntSet (IntSet)
+import qualified Data.IntSet as IntSet
+
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Graph
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow.Label
+
+type family Fact (x :: Extensibility) f :: *
+type instance Fact C f = FactBase f
+type instance Fact O f = f
+
+newtype OldFact a = OldFact a
+
+newtype NewFact a = NewFact a
+
+-- | The result of joining OldFact and NewFact.
+data JoinedFact a
+ = Changed !a -- ^ Result is different than OldFact.
+ | NotChanged !a -- ^ Result is the same as OldFact.
+
+getJoined :: JoinedFact a -> a
+getJoined (Changed a) = a
+getJoined (NotChanged a) = a
+
+changedIf :: Bool -> a -> JoinedFact a
+changedIf True = Changed
+changedIf False = NotChanged
+
+type JoinFun a = OldFact a -> NewFact a -> JoinedFact a
+
+data DataflowLattice a = DataflowLattice
+ { fact_bot :: a
+ , fact_join :: JoinFun a
+ }
+
+data Direction = Fwd | Bwd
+
+type TransferFun f = CmmBlock -> FactBase f -> FactBase f
+
+-- | Function for rewrtiting and analysis combined. To be used with
+-- @rewriteCmm@.
+--
+-- Currently set to work with @UniqSM@ monad, but we could probably abstract
+-- that away (if we do that, we might want to specialize the fixpoint algorithms
+-- to the particular monads through SPECIALIZE).
+type RewriteFun f = CmmBlock -> FactBase f -> UniqSM (CmmBlock, FactBase f)
+
+analyzeCmmBwd, analyzeCmmFwd
+ :: DataflowLattice f
+ -> TransferFun f
+ -> CmmGraph
+ -> FactBase f
+ -> FactBase f
+analyzeCmmBwd = analyzeCmm Bwd
+analyzeCmmFwd = analyzeCmm Fwd
+
+analyzeCmm
+ :: Direction
+ -> DataflowLattice f
+ -> TransferFun f
+ -> CmmGraph
+ -> FactBase f
+ -> FactBase f
+analyzeCmm dir lattice transfer cmmGraph initFact =
+ {-# SCC analyzeCmm #-}
+ let entry = g_entry cmmGraph
+ hooplGraph = g_graph cmmGraph
+ blockMap =
+ case hooplGraph of
+ GMany NothingO bm NothingO -> bm
+ in fixpointAnalysis dir lattice transfer entry blockMap initFact
+
+-- Fixpoint algorithm.
+fixpointAnalysis
+ :: forall f.
+ Direction
+ -> DataflowLattice f
+ -> TransferFun f
+ -> Label
+ -> LabelMap CmmBlock
+ -> FactBase f
+ -> FactBase f
+fixpointAnalysis direction lattice do_block entry blockmap = loop start
+ where
+ -- Sorting the blocks helps to minimize the number of times we need to
+ -- process blocks. For instance, for forward analysis we want to look at
+ -- blocks in reverse postorder. Also, see comments for sortBlocks.
+ blocks = sortBlocks direction entry blockmap
+ num_blocks = length blocks
+ block_arr = {-# SCC "block_arr" #-} listArray (0, num_blocks - 1) blocks
+ start = {-# SCC "start" #-} IntSet.fromDistinctAscList
+ [0 .. num_blocks - 1]
+ dep_blocks = {-# SCC "dep_blocks" #-} mkDepBlocks direction blocks
+ join = fact_join lattice
+
+ loop
+ :: IntHeap -- ^ Worklist, i.e., blocks to process
+ -> FactBase f -- ^ Current result (increases monotonically)
+ -> FactBase f
+ loop todo !fbase1 | Just (index, todo1) <- IntSet.minView todo =
+ let block = block_arr ! index
+ out_facts = {-# SCC "do_block" #-} do_block block fbase1
+ -- For each of the outgoing edges, we join it with the current
+ -- information in fbase1 and (if something changed) we update it
+ -- and add the affected blocks to the worklist.
+ (todo2, fbase2) = {-# SCC "mapFoldWithKey" #-}
+ mapFoldlWithKey
+ (updateFact join dep_blocks) (todo1, fbase1) out_facts
+ in loop todo2 fbase2
+ loop _ !fbase1 = fbase1
+
+rewriteCmmBwd
+ :: DataflowLattice f
+ -> RewriteFun f
+ -> CmmGraph
+ -> FactBase f
+ -> UniqSM (CmmGraph, FactBase f)
+rewriteCmmBwd = rewriteCmm Bwd
+
+rewriteCmm
+ :: Direction
+ -> DataflowLattice f
+ -> RewriteFun f
+ -> CmmGraph
+ -> FactBase f
+ -> UniqSM (CmmGraph, FactBase f)
+rewriteCmm dir lattice rwFun cmmGraph initFact = {-# SCC rewriteCmm #-} do
+ let entry = g_entry cmmGraph
+ hooplGraph = g_graph cmmGraph
+ blockMap1 =
+ case hooplGraph of
+ GMany NothingO bm NothingO -> bm
+ (blockMap2, facts) <-
+ fixpointRewrite dir lattice rwFun entry blockMap1 initFact
+ return (cmmGraph {g_graph = GMany NothingO blockMap2 NothingO}, facts)
+
+fixpointRewrite
+ :: forall f.
+ Direction
+ -> DataflowLattice f
+ -> RewriteFun f
+ -> Label
+ -> LabelMap CmmBlock
+ -> FactBase f
+ -> UniqSM (LabelMap CmmBlock, FactBase f)
+fixpointRewrite dir lattice do_block entry blockmap = loop start blockmap
+ where
+ -- Sorting the blocks helps to minimize the number of times we need to
+ -- process blocks. For instance, for forward analysis we want to look at
+ -- blocks in reverse postorder. Also, see comments for sortBlocks.
+ blocks = sortBlocks dir entry blockmap
+ num_blocks = length blocks
+ block_arr = {-# SCC "block_arr_rewrite" #-}
+ listArray (0, num_blocks - 1) blocks
+ start = {-# SCC "start_rewrite" #-}
+ IntSet.fromDistinctAscList [0 .. num_blocks - 1]
+ dep_blocks = {-# SCC "dep_blocks_rewrite" #-} mkDepBlocks dir blocks
+ join = fact_join lattice
+
+ loop
+ :: IntHeap -- ^ Worklist, i.e., blocks to process
+ -> LabelMap CmmBlock -- ^ Rewritten blocks.
+ -> FactBase f -- ^ Current facts.
+ -> UniqSM (LabelMap CmmBlock, FactBase f)
+ loop todo !blocks1 !fbase1
+ | Just (index, todo1) <- IntSet.minView todo = do
+ -- Note that we use the *original* block here. This is important.
+ -- We're optimistically rewriting blocks even before reaching the fixed
+ -- point, which means that the rewrite might be incorrect. So if the
+ -- facts change, we need to rewrite the original block again (taking
+ -- into account the new facts).
+ let block = block_arr ! index
+ (new_block, out_facts) <- {-# SCC "do_block_rewrite" #-}
+ do_block block fbase1
+ let blocks2 = mapInsert (entryLabel new_block) new_block blocks1
+ (todo2, fbase2) = {-# SCC "mapFoldWithKey_rewrite" #-}
+ mapFoldlWithKey
+ (updateFact join dep_blocks) (todo1, fbase1) out_facts
+ loop todo2 blocks2 fbase2
+ loop _ !blocks1 !fbase1 = return (blocks1, fbase1)
+
+
+{-
+Note [Unreachable blocks]
+~~~~~~~~~~~~~~~~~~~~~~~~~
+A block that is not in the domain of tfb_fbase is "currently unreachable".
+A currently-unreachable block is not even analyzed. Reason: consider
+constant prop and this graph, with entry point L1:
+ L1: x:=3; goto L4
+ L2: x:=4; goto L4
+ L4: if x>3 goto L2 else goto L5
+Here L2 is actually unreachable, but if we process it with bottom input fact,
+we'll propagate (x=4) to L4, and nuke the otherwise-good rewriting of L4.
+
+* If a currently-unreachable block is not analyzed, then its rewritten
+ graph will not be accumulated in tfb_rg. And that is good:
+ unreachable blocks simply do not appear in the output.
+
+* Note that clients must be careful to provide a fact (even if bottom)
+ for each entry point. Otherwise useful blocks may be garbage collected.
+
+* Note that updateFact must set the change-flag if a label goes from
+ not-in-fbase to in-fbase, even if its fact is bottom. In effect the
+ real fact lattice is
+ UNR
+ bottom
+ the points above bottom
+
+* Even if the fact is going from UNR to bottom, we still call the
+ client's fact_join function because it might give the client
+ some useful debugging information.
+
+* All of this only applies for *forward* ixpoints. For the backward
+ case we must treat every block as reachable; it might finish with a
+ 'return', and therefore have no successors, for example.
+-}
+
+
+-----------------------------------------------------------------------------
+-- Pieces that are shared by fixpoint and fixpoint_anal
+-----------------------------------------------------------------------------
+
+-- | Sort the blocks into the right order for analysis. This means reverse
+-- postorder for a forward analysis. For the backward one, we simply reverse
+-- that (see Note [Backward vs forward analysis]).
+sortBlocks
+ :: NonLocal n
+ => Direction -> Label -> LabelMap (Block n C C) -> [Block n C C]
+sortBlocks direction entry blockmap =
+ case direction of
+ Fwd -> fwd
+ Bwd -> reverse fwd
+ where
+ fwd = revPostorderFrom blockmap entry
+
+-- Note [Backward vs forward analysis]
+--
+-- The forward and backward cases are not dual. In the forward case, the entry
+-- points are known, and one simply traverses the body blocks from those points.
+-- In the backward case, something is known about the exit points, but a
+-- backward analysis must also include reachable blocks that don't reach the
+-- exit, as in a procedure that loops forever and has side effects.)
+-- For instance, let E be the entry and X the exit blocks (arrows indicate
+-- control flow)
+-- E -> X
+-- E -> B
+-- B -> C
+-- C -> B
+-- We do need to include B and C even though they're unreachable in the
+-- *reverse* graph (that we could use for backward analysis):
+-- E <- X
+-- E <- B
+-- B <- C
+-- C <- B
+-- So when sorting the blocks for the backward analysis, we simply take the
+-- reverse of what is used for the forward one.
+
+
+-- | Construct a mapping from a @Label@ to the block indexes that should be
+-- re-analyzed if the facts at that @Label@ change.
+--
+-- Note that we're considering here the entry point of the block, so if the
+-- facts change at the entry:
+-- * for a backward analysis we need to re-analyze all the predecessors, but
+-- * for a forward analysis, we only need to re-analyze the current block
+-- (and that will in turn propagate facts into its successors).
+mkDepBlocks :: Direction -> [CmmBlock] -> LabelMap IntSet
+mkDepBlocks Fwd blocks = go blocks 0 mapEmpty
+ where
+ go [] !_ !dep_map = dep_map
+ go (b:bs) !n !dep_map =
+ go bs (n + 1) $ mapInsert (entryLabel b) (IntSet.singleton n) dep_map
+mkDepBlocks Bwd blocks = go blocks 0 mapEmpty
+ where
+ go [] !_ !dep_map = dep_map
+ go (b:bs) !n !dep_map =
+ let insert m l = mapInsertWith IntSet.union l (IntSet.singleton n) m
+ in go bs (n + 1) $ foldl' insert dep_map (successors b)
+
+-- | After some new facts have been generated by analysing a block, we
+-- fold this function over them to generate (a) a list of block
+-- indices to (re-)analyse, and (b) the new FactBase.
+updateFact
+ :: JoinFun f
+ -> LabelMap IntSet
+ -> (IntHeap, FactBase f)
+ -> Label
+ -> f -- out fact
+ -> (IntHeap, FactBase f)
+updateFact fact_join dep_blocks (todo, fbase) lbl new_fact
+ = case lookupFact lbl fbase of
+ Nothing ->
+ -- Note [No old fact]
+ let !z = mapInsert lbl new_fact fbase in (changed, z)
+ Just old_fact ->
+ case fact_join (OldFact old_fact) (NewFact new_fact) of
+ (NotChanged _) -> (todo, fbase)
+ (Changed f) -> let !z = mapInsert lbl f fbase in (changed, z)
+ where
+ changed = todo `IntSet.union`
+ mapFindWithDefault IntSet.empty lbl dep_blocks
+
+{-
+Note [No old fact]
+
+We know that the new_fact is >= _|_, so we don't need to join. However,
+if the new fact is also _|_, and we have already analysed its block,
+we don't need to record a change. So there's a tradeoff here. It turns
+out that always recording a change is faster.
+-}
+
+----------------------------------------------------------------
+-- Utilities
+----------------------------------------------------------------
+
+-- Fact lookup: the fact `orelse` bottom
+getFact :: DataflowLattice f -> Label -> FactBase f -> f
+getFact lat l fb = case lookupFact l fb of Just f -> f
+ Nothing -> fact_bot lat
+
+-- | Returns the result of joining the facts from all the successors of the
+-- provided node or block.
+joinOutFacts :: (NonLocal n) => DataflowLattice f -> n e C -> FactBase f -> f
+joinOutFacts lattice nonLocal fact_base = foldl' join (fact_bot lattice) facts
+ where
+ join new old = getJoined $ fact_join lattice (OldFact old) (NewFact new)
+ facts =
+ [ fromJust fact
+ | s <- successors nonLocal
+ , let fact = lookupFact s fact_base
+ , isJust fact
+ ]
+
+joinFacts :: DataflowLattice f -> [f] -> f
+joinFacts lattice facts = foldl' join (fact_bot lattice) facts
+ where
+ join new old = getJoined $ fact_join lattice (OldFact old) (NewFact new)
+
+-- | Returns the joined facts for each label.
+mkFactBase :: DataflowLattice f -> [(Label, f)] -> FactBase f
+mkFactBase lattice = foldl' add mapEmpty
+ where
+ join = fact_join lattice
+
+ add result (l, f1) =
+ let !newFact =
+ case mapLookup l result of
+ Nothing -> f1
+ Just f2 -> getJoined $ join (OldFact f1) (NewFact f2)
+ in mapInsert l newFact result
+
+-- | Folds backward over all nodes of an open-open block.
+-- Strict in the accumulator.
+foldNodesBwdOO :: (CmmNode O O -> f -> f) -> Block CmmNode O O -> f -> f
+foldNodesBwdOO funOO = go
+ where
+ go (BCat b1 b2) f = go b1 $! go b2 f
+ go (BSnoc h n) f = go h $! funOO n f
+ go (BCons n t) f = funOO n $! go t f
+ go (BMiddle n) f = funOO n f
+ go BNil f = f
+{-# INLINABLE foldNodesBwdOO #-}
+
+-- | Folds backward over all the nodes of an open-open block and allows
+-- rewriting them. The accumulator is both the block of nodes and @f@ (usually
+-- dataflow facts).
+-- Strict in both accumulated parts.
+foldRewriteNodesBwdOO
+ :: forall f.
+ (CmmNode O O -> f -> UniqSM (Block CmmNode O O, f))
+ -> Block CmmNode O O
+ -> f
+ -> UniqSM (Block CmmNode O O, f)
+foldRewriteNodesBwdOO rewriteOO initBlock initFacts = go initBlock initFacts
+ where
+ go (BCons node1 block1) !fact1 = (rewriteOO node1 `comp` go block1) fact1
+ go (BSnoc block1 node1) !fact1 = (go block1 `comp` rewriteOO node1) fact1
+ go (BCat blockA1 blockB1) !fact1 = (go blockA1 `comp` go blockB1) fact1
+ go (BMiddle node) !fact1 = rewriteOO node fact1
+ go BNil !fact = return (BNil, fact)
+
+ comp rew1 rew2 = \f1 -> do
+ (b, f2) <- rew2 f1
+ (a, !f3) <- rew1 f2
+ let !c = joinBlocksOO a b
+ return (c, f3)
+ {-# INLINE comp #-}
+{-# INLINABLE foldRewriteNodesBwdOO #-}
+
+joinBlocksOO :: Block n O O -> Block n O O -> Block n O O
+joinBlocksOO BNil b = b
+joinBlocksOO b BNil = b
+joinBlocksOO (BMiddle n) b = blockCons n b
+joinBlocksOO b (BMiddle n) = blockSnoc b n
+joinBlocksOO b1 b2 = BCat b1 b2
+
+type IntHeap = IntSet
diff --git a/compiler/GHC/Cmm/Dataflow/Block.hs b/compiler/GHC/Cmm/Dataflow/Block.hs
new file mode 100644
index 0000000000..d2e52a8904
--- /dev/null
+++ b/compiler/GHC/Cmm/Dataflow/Block.hs
@@ -0,0 +1,329 @@
+{-# LANGUAGE DataKinds #-}
+{-# LANGUAGE DeriveFunctor #-}
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE PolyKinds #-}
+{-# LANGUAGE RankNTypes #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE StandaloneDeriving #-}
+{-# LANGUAGE TypeFamilies #-}
+module GHC.Cmm.Dataflow.Block
+ ( Extensibility (..)
+ , O
+ , C
+ , MaybeO(..)
+ , IndexedCO
+ , Block(..)
+ , blockAppend
+ , blockCons
+ , blockFromList
+ , blockJoin
+ , blockJoinHead
+ , blockJoinTail
+ , blockSnoc
+ , blockSplit
+ , blockSplitHead
+ , blockSplitTail
+ , blockToList
+ , emptyBlock
+ , firstNode
+ , foldBlockNodesB
+ , foldBlockNodesB3
+ , foldBlockNodesF
+ , isEmptyBlock
+ , lastNode
+ , mapBlock
+ , mapBlock'
+ , mapBlock3'
+ , replaceFirstNode
+ , replaceLastNode
+ ) where
+
+import GhcPrelude
+
+-- -----------------------------------------------------------------------------
+-- Shapes: Open and Closed
+
+-- | Used at the type level to indicate "open" vs "closed" structure.
+data Extensibility
+ -- | An "open" structure with a unique, unnamed control-flow edge flowing in
+ -- or out. "Fallthrough" and concatenation are permitted at an open point.
+ = Open
+ -- | A "closed" structure which supports control transfer only through the use
+ -- of named labels---no "fallthrough" is permitted. The number of control-flow
+ -- edges is unconstrained.
+ | Closed
+
+type O = 'Open
+type C = 'Closed
+
+-- | Either type indexed by closed/open using type families
+type family IndexedCO (ex :: Extensibility) (a :: k) (b :: k) :: k
+type instance IndexedCO C a _b = a
+type instance IndexedCO O _a b = b
+
+-- | Maybe type indexed by open/closed
+data MaybeO ex t where
+ JustO :: t -> MaybeO O t
+ NothingO :: MaybeO C t
+
+-- | Maybe type indexed by closed/open
+data MaybeC ex t where
+ JustC :: t -> MaybeC C t
+ NothingC :: MaybeC O t
+
+deriving instance Functor (MaybeO ex)
+deriving instance Functor (MaybeC ex)
+
+-- -----------------------------------------------------------------------------
+-- The Block type
+
+-- | A sequence of nodes. May be any of four shapes (O/O, O/C, C/O, C/C).
+-- Open at the entry means single entry, mutatis mutandis for exit.
+-- A closed/closed block is a /basic/ block and can't be extended further.
+-- Clients should avoid manipulating blocks and should stick to either nodes
+-- or graphs.
+data Block n e x where
+ BlockCO :: n C O -> Block n O O -> Block n C O
+ BlockCC :: n C O -> Block n O O -> n O C -> Block n C C
+ BlockOC :: Block n O O -> n O C -> Block n O C
+
+ BNil :: Block n O O
+ BMiddle :: n O O -> Block n O O
+ BCat :: Block n O O -> Block n O O -> Block n O O
+ BSnoc :: Block n O O -> n O O -> Block n O O
+ BCons :: n O O -> Block n O O -> Block n O O
+
+
+-- -----------------------------------------------------------------------------
+-- Simple operations on Blocks
+
+-- Predicates
+
+isEmptyBlock :: Block n e x -> Bool
+isEmptyBlock BNil = True
+isEmptyBlock (BCat l r) = isEmptyBlock l && isEmptyBlock r
+isEmptyBlock _ = False
+
+
+-- Building
+
+emptyBlock :: Block n O O
+emptyBlock = BNil
+
+blockCons :: n O O -> Block n O x -> Block n O x
+blockCons n b = case b of
+ BlockOC b l -> (BlockOC $! (n `blockCons` b)) l
+ BNil{} -> BMiddle n
+ BMiddle{} -> n `BCons` b
+ BCat{} -> n `BCons` b
+ BSnoc{} -> n `BCons` b
+ BCons{} -> n `BCons` b
+
+blockSnoc :: Block n e O -> n O O -> Block n e O
+blockSnoc b n = case b of
+ BlockCO f b -> BlockCO f $! (b `blockSnoc` n)
+ BNil{} -> BMiddle n
+ BMiddle{} -> b `BSnoc` n
+ BCat{} -> b `BSnoc` n
+ BSnoc{} -> b `BSnoc` n
+ BCons{} -> b `BSnoc` n
+
+blockJoinHead :: n C O -> Block n O x -> Block n C x
+blockJoinHead f (BlockOC b l) = BlockCC f b l
+blockJoinHead f b = BlockCO f BNil `cat` b
+
+blockJoinTail :: Block n e O -> n O C -> Block n e C
+blockJoinTail (BlockCO f b) t = BlockCC f b t
+blockJoinTail b t = b `cat` BlockOC BNil t
+
+blockJoin :: n C O -> Block n O O -> n O C -> Block n C C
+blockJoin f b t = BlockCC f b t
+
+blockAppend :: Block n e O -> Block n O x -> Block n e x
+blockAppend = cat
+
+
+-- Taking apart
+
+firstNode :: Block n C x -> n C O
+firstNode (BlockCO n _) = n
+firstNode (BlockCC n _ _) = n
+
+lastNode :: Block n x C -> n O C
+lastNode (BlockOC _ n) = n
+lastNode (BlockCC _ _ n) = n
+
+blockSplitHead :: Block n C x -> (n C O, Block n O x)
+blockSplitHead (BlockCO n b) = (n, b)
+blockSplitHead (BlockCC n b t) = (n, BlockOC b t)
+
+blockSplitTail :: Block n e C -> (Block n e O, n O C)
+blockSplitTail (BlockOC b n) = (b, n)
+blockSplitTail (BlockCC f b t) = (BlockCO f b, t)
+
+-- | Split a closed block into its entry node, open middle block, and
+-- exit node.
+blockSplit :: Block n C C -> (n C O, Block n O O, n O C)
+blockSplit (BlockCC f b t) = (f, b, t)
+
+blockToList :: Block n O O -> [n O O]
+blockToList b = go b []
+ where go :: Block n O O -> [n O O] -> [n O O]
+ go BNil r = r
+ go (BMiddle n) r = n : r
+ go (BCat b1 b2) r = go b1 $! go b2 r
+ go (BSnoc b1 n) r = go b1 (n:r)
+ go (BCons n b1) r = n : go b1 r
+
+blockFromList :: [n O O] -> Block n O O
+blockFromList = foldr BCons BNil
+
+-- Modifying
+
+replaceFirstNode :: Block n C x -> n C O -> Block n C x
+replaceFirstNode (BlockCO _ b) f = BlockCO f b
+replaceFirstNode (BlockCC _ b n) f = BlockCC f b n
+
+replaceLastNode :: Block n x C -> n O C -> Block n x C
+replaceLastNode (BlockOC b _) n = BlockOC b n
+replaceLastNode (BlockCC l b _) n = BlockCC l b n
+
+-- -----------------------------------------------------------------------------
+-- General concatenation
+
+cat :: Block n e O -> Block n O x -> Block n e x
+cat x y = case x of
+ BNil -> y
+
+ BlockCO l b1 -> case y of
+ BlockOC b2 n -> (BlockCC l $! (b1 `cat` b2)) n
+ BNil -> x
+ BMiddle _ -> BlockCO l $! (b1 `cat` y)
+ BCat{} -> BlockCO l $! (b1 `cat` y)
+ BSnoc{} -> BlockCO l $! (b1 `cat` y)
+ BCons{} -> BlockCO l $! (b1 `cat` y)
+
+ BMiddle n -> case y of
+ BlockOC b2 n2 -> (BlockOC $! (x `cat` b2)) n2
+ BNil -> x
+ BMiddle{} -> BCons n y
+ BCat{} -> BCons n y
+ BSnoc{} -> BCons n y
+ BCons{} -> BCons n y
+
+ BCat{} -> case y of
+ BlockOC b3 n2 -> (BlockOC $! (x `cat` b3)) n2
+ BNil -> x
+ BMiddle n -> BSnoc x n
+ BCat{} -> BCat x y
+ BSnoc{} -> BCat x y
+ BCons{} -> BCat x y
+
+ BSnoc{} -> case y of
+ BlockOC b2 n2 -> (BlockOC $! (x `cat` b2)) n2
+ BNil -> x
+ BMiddle n -> BSnoc x n
+ BCat{} -> BCat x y
+ BSnoc{} -> BCat x y
+ BCons{} -> BCat x y
+
+
+ BCons{} -> case y of
+ BlockOC b2 n2 -> (BlockOC $! (x `cat` b2)) n2
+ BNil -> x
+ BMiddle n -> BSnoc x n
+ BCat{} -> BCat x y
+ BSnoc{} -> BCat x y
+ BCons{} -> BCat x y
+
+
+-- -----------------------------------------------------------------------------
+-- Mapping
+
+-- | map a function over the nodes of a 'Block'
+mapBlock :: (forall e x. n e x -> n' e x) -> Block n e x -> Block n' e x
+mapBlock f (BlockCO n b ) = BlockCO (f n) (mapBlock f b)
+mapBlock f (BlockOC b n) = BlockOC (mapBlock f b) (f n)
+mapBlock f (BlockCC n b m) = BlockCC (f n) (mapBlock f b) (f m)
+mapBlock _ BNil = BNil
+mapBlock f (BMiddle n) = BMiddle (f n)
+mapBlock f (BCat b1 b2) = BCat (mapBlock f b1) (mapBlock f b2)
+mapBlock f (BSnoc b n) = BSnoc (mapBlock f b) (f n)
+mapBlock f (BCons n b) = BCons (f n) (mapBlock f b)
+
+-- | A strict 'mapBlock'
+mapBlock' :: (forall e x. n e x -> n' e x) -> (Block n e x -> Block n' e x)
+mapBlock' f = mapBlock3' (f, f, f)
+
+-- | map over a block, with different functions to apply to first nodes,
+-- middle nodes and last nodes respectively. The map is strict.
+--
+mapBlock3' :: forall n n' e x .
+ ( n C O -> n' C O
+ , n O O -> n' O O,
+ n O C -> n' O C)
+ -> Block n e x -> Block n' e x
+mapBlock3' (f, m, l) b = go b
+ where go :: forall e x . Block n e x -> Block n' e x
+ go (BlockOC b y) = (BlockOC $! go b) $! l y
+ go (BlockCO x b) = (BlockCO $! f x) $! (go b)
+ go (BlockCC x b y) = ((BlockCC $! f x) $! go b) $! (l y)
+ go BNil = BNil
+ go (BMiddle n) = BMiddle $! m n
+ go (BCat x y) = (BCat $! go x) $! (go y)
+ go (BSnoc x n) = (BSnoc $! go x) $! (m n)
+ go (BCons n x) = (BCons $! m n) $! (go x)
+
+-- -----------------------------------------------------------------------------
+-- Folding
+
+
+-- | Fold a function over every node in a block, forward or backward.
+-- The fold function must be polymorphic in the shape of the nodes.
+foldBlockNodesF3 :: forall n a b c .
+ ( n C O -> a -> b
+ , n O O -> b -> b
+ , n O C -> b -> c)
+ -> (forall e x . Block n e x -> IndexedCO e a b -> IndexedCO x c b)
+foldBlockNodesF :: forall n a .
+ (forall e x . n e x -> a -> a)
+ -> (forall e x . Block n e x -> IndexedCO e a a -> IndexedCO x a a)
+foldBlockNodesB3 :: forall n a b c .
+ ( n C O -> b -> c
+ , n O O -> b -> b
+ , n O C -> a -> b)
+ -> (forall e x . Block n e x -> IndexedCO x a b -> IndexedCO e c b)
+foldBlockNodesB :: forall n a .
+ (forall e x . n e x -> a -> a)
+ -> (forall e x . Block n e x -> IndexedCO x a a -> IndexedCO e a a)
+
+foldBlockNodesF3 (ff, fm, fl) = block
+ where block :: forall e x . Block n e x -> IndexedCO e a b -> IndexedCO x c b
+ block (BlockCO f b ) = ff f `cat` block b
+ block (BlockCC f b l) = ff f `cat` block b `cat` fl l
+ block (BlockOC b l) = block b `cat` fl l
+ block BNil = id
+ block (BMiddle node) = fm node
+ block (b1 `BCat` b2) = block b1 `cat` block b2
+ block (b1 `BSnoc` n) = block b1 `cat` fm n
+ block (n `BCons` b2) = fm n `cat` block b2
+ cat :: forall a b c. (a -> b) -> (b -> c) -> a -> c
+ cat f f' = f' . f
+
+foldBlockNodesF f = foldBlockNodesF3 (f, f, f)
+
+foldBlockNodesB3 (ff, fm, fl) = block
+ where block :: forall e x . Block n e x -> IndexedCO x a b -> IndexedCO e c b
+ block (BlockCO f b ) = ff f `cat` block b
+ block (BlockCC f b l) = ff f `cat` block b `cat` fl l
+ block (BlockOC b l) = block b `cat` fl l
+ block BNil = id
+ block (BMiddle node) = fm node
+ block (b1 `BCat` b2) = block b1 `cat` block b2
+ block (b1 `BSnoc` n) = block b1 `cat` fm n
+ block (n `BCons` b2) = fm n `cat` block b2
+ cat :: forall a b c. (b -> c) -> (a -> b) -> a -> c
+ cat f f' = f . f'
+
+foldBlockNodesB f = foldBlockNodesB3 (f, f, f)
+
diff --git a/compiler/GHC/Cmm/Dataflow/Collections.hs b/compiler/GHC/Cmm/Dataflow/Collections.hs
new file mode 100644
index 0000000000..f131f17cc1
--- /dev/null
+++ b/compiler/GHC/Cmm/Dataflow/Collections.hs
@@ -0,0 +1,177 @@
+{-# LANGUAGE DeriveFunctor #-}
+{-# LANGUAGE DeriveTraversable #-}
+{-# LANGUAGE DeriveFoldable #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+
+module GHC.Cmm.Dataflow.Collections
+ ( IsSet(..)
+ , setInsertList, setDeleteList, setUnions
+ , IsMap(..)
+ , mapInsertList, mapDeleteList, mapUnions
+ , UniqueMap, UniqueSet
+ ) where
+
+import GhcPrelude
+
+import qualified Data.IntMap.Strict as M
+import qualified Data.IntSet as S
+
+import Data.List (foldl1')
+
+class IsSet set where
+ type ElemOf set
+
+ setNull :: set -> Bool
+ setSize :: set -> Int
+ setMember :: ElemOf set -> set -> Bool
+
+ setEmpty :: set
+ setSingleton :: ElemOf set -> set
+ setInsert :: ElemOf set -> set -> set
+ setDelete :: ElemOf set -> set -> set
+
+ setUnion :: set -> set -> set
+ setDifference :: set -> set -> set
+ setIntersection :: set -> set -> set
+ setIsSubsetOf :: set -> set -> Bool
+ setFilter :: (ElemOf set -> Bool) -> set -> set
+
+ setFoldl :: (b -> ElemOf set -> b) -> b -> set -> b
+ setFoldr :: (ElemOf set -> b -> b) -> b -> set -> b
+
+ setElems :: set -> [ElemOf set]
+ setFromList :: [ElemOf set] -> set
+
+-- Helper functions for IsSet class
+setInsertList :: IsSet set => [ElemOf set] -> set -> set
+setInsertList keys set = foldl' (flip setInsert) set keys
+
+setDeleteList :: IsSet set => [ElemOf set] -> set -> set
+setDeleteList keys set = foldl' (flip setDelete) set keys
+
+setUnions :: IsSet set => [set] -> set
+setUnions [] = setEmpty
+setUnions sets = foldl1' setUnion sets
+
+
+class IsMap map where
+ type KeyOf map
+
+ mapNull :: map a -> Bool
+ mapSize :: map a -> Int
+ mapMember :: KeyOf map -> map a -> Bool
+ mapLookup :: KeyOf map -> map a -> Maybe a
+ mapFindWithDefault :: a -> KeyOf map -> map a -> a
+
+ mapEmpty :: map a
+ mapSingleton :: KeyOf map -> a -> map a
+ mapInsert :: KeyOf map -> a -> map a -> map a
+ mapInsertWith :: (a -> a -> a) -> KeyOf map -> a -> map a -> map a
+ mapDelete :: KeyOf map -> map a -> map a
+ mapAlter :: (Maybe a -> Maybe a) -> KeyOf map -> map a -> map a
+ mapAdjust :: (a -> a) -> KeyOf map -> map a -> map a
+
+ mapUnion :: map a -> map a -> map a
+ mapUnionWithKey :: (KeyOf map -> a -> a -> a) -> map a -> map a -> map a
+ mapDifference :: map a -> map a -> map a
+ mapIntersection :: map a -> map a -> map a
+ mapIsSubmapOf :: Eq a => map a -> map a -> Bool
+
+ mapMap :: (a -> b) -> map a -> map b
+ mapMapWithKey :: (KeyOf map -> a -> b) -> map a -> map b
+ mapFoldl :: (b -> a -> b) -> b -> map a -> b
+ mapFoldr :: (a -> b -> b) -> b -> map a -> b
+ mapFoldlWithKey :: (b -> KeyOf map -> a -> b) -> b -> map a -> b
+ mapFoldMapWithKey :: Monoid m => (KeyOf map -> a -> m) -> map a -> m
+ mapFilter :: (a -> Bool) -> map a -> map a
+ mapFilterWithKey :: (KeyOf map -> a -> Bool) -> map a -> map a
+
+
+ mapElems :: map a -> [a]
+ mapKeys :: map a -> [KeyOf map]
+ mapToList :: map a -> [(KeyOf map, a)]
+ mapFromList :: [(KeyOf map, a)] -> map a
+ mapFromListWith :: (a -> a -> a) -> [(KeyOf map,a)] -> map a
+
+-- Helper functions for IsMap class
+mapInsertList :: IsMap map => [(KeyOf map, a)] -> map a -> map a
+mapInsertList assocs map = foldl' (flip (uncurry mapInsert)) map assocs
+
+mapDeleteList :: IsMap map => [KeyOf map] -> map a -> map a
+mapDeleteList keys map = foldl' (flip mapDelete) map keys
+
+mapUnions :: IsMap map => [map a] -> map a
+mapUnions [] = mapEmpty
+mapUnions maps = foldl1' mapUnion maps
+
+-----------------------------------------------------------------------------
+-- Basic instances
+-----------------------------------------------------------------------------
+
+newtype UniqueSet = US S.IntSet deriving (Eq, Ord, Show, Semigroup, Monoid)
+
+instance IsSet UniqueSet where
+ type ElemOf UniqueSet = Int
+
+ setNull (US s) = S.null s
+ setSize (US s) = S.size s
+ setMember k (US s) = S.member k s
+
+ setEmpty = US S.empty
+ setSingleton k = US (S.singleton k)
+ setInsert k (US s) = US (S.insert k s)
+ setDelete k (US s) = US (S.delete k s)
+
+ setUnion (US x) (US y) = US (S.union x y)
+ setDifference (US x) (US y) = US (S.difference x y)
+ setIntersection (US x) (US y) = US (S.intersection x y)
+ setIsSubsetOf (US x) (US y) = S.isSubsetOf x y
+ setFilter f (US s) = US (S.filter f s)
+
+ setFoldl k z (US s) = S.foldl' k z s
+ setFoldr k z (US s) = S.foldr k z s
+
+ setElems (US s) = S.elems s
+ setFromList ks = US (S.fromList ks)
+
+newtype UniqueMap v = UM (M.IntMap v)
+ deriving (Eq, Ord, Show, Functor, Foldable, Traversable)
+
+instance IsMap UniqueMap where
+ type KeyOf UniqueMap = Int
+
+ mapNull (UM m) = M.null m
+ mapSize (UM m) = M.size m
+ mapMember k (UM m) = M.member k m
+ mapLookup k (UM m) = M.lookup k m
+ mapFindWithDefault def k (UM m) = M.findWithDefault def k m
+
+ mapEmpty = UM M.empty
+ mapSingleton k v = UM (M.singleton k v)
+ mapInsert k v (UM m) = UM (M.insert k v m)
+ mapInsertWith f k v (UM m) = UM (M.insertWith f k v m)
+ mapDelete k (UM m) = UM (M.delete k m)
+ mapAlter f k (UM m) = UM (M.alter f k m)
+ mapAdjust f k (UM m) = UM (M.adjust f k m)
+
+ mapUnion (UM x) (UM y) = UM (M.union x y)
+ mapUnionWithKey f (UM x) (UM y) = UM (M.unionWithKey f x y)
+ mapDifference (UM x) (UM y) = UM (M.difference x y)
+ mapIntersection (UM x) (UM y) = UM (M.intersection x y)
+ mapIsSubmapOf (UM x) (UM y) = M.isSubmapOf x y
+
+ mapMap f (UM m) = UM (M.map f m)
+ mapMapWithKey f (UM m) = UM (M.mapWithKey f m)
+ mapFoldl k z (UM m) = M.foldl' k z m
+ mapFoldr k z (UM m) = M.foldr k z m
+ mapFoldlWithKey k z (UM m) = M.foldlWithKey' k z m
+ mapFoldMapWithKey f (UM m) = M.foldMapWithKey f m
+ mapFilter f (UM m) = UM (M.filter f m)
+ mapFilterWithKey f (UM m) = UM (M.filterWithKey f m)
+
+ mapElems (UM m) = M.elems m
+ mapKeys (UM m) = M.keys m
+ mapToList (UM m) = M.toList m
+ mapFromList assocs = UM (M.fromList assocs)
+ mapFromListWith f assocs = UM (M.fromListWith f assocs)
diff --git a/compiler/GHC/Cmm/Dataflow/Graph.hs b/compiler/GHC/Cmm/Dataflow/Graph.hs
new file mode 100644
index 0000000000..3f361de0fb
--- /dev/null
+++ b/compiler/GHC/Cmm/Dataflow/Graph.hs
@@ -0,0 +1,186 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE DataKinds #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE RankNTypes #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TypeFamilies #-}
+module GHC.Cmm.Dataflow.Graph
+ ( Body
+ , Graph
+ , Graph'(..)
+ , NonLocal(..)
+ , addBlock
+ , bodyList
+ , emptyBody
+ , labelsDefined
+ , mapGraph
+ , mapGraphBlocks
+ , revPostorderFrom
+ ) where
+
+
+import GhcPrelude
+import Util
+
+import GHC.Cmm.Dataflow.Label
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Collections
+
+-- | A (possibly empty) collection of closed/closed blocks
+type Body n = LabelMap (Block n C C)
+
+-- | @Body@ abstracted over @block@
+type Body' block (n :: Extensibility -> Extensibility -> *) = LabelMap (block n C C)
+
+-------------------------------
+-- | Gives access to the anchor points for
+-- nonlocal edges as well as the edges themselves
+class NonLocal thing where
+ entryLabel :: thing C x -> Label -- ^ The label of a first node or block
+ successors :: thing e C -> [Label] -- ^ Gives control-flow successors
+
+instance NonLocal n => NonLocal (Block n) where
+ entryLabel (BlockCO f _) = entryLabel f
+ entryLabel (BlockCC f _ _) = entryLabel f
+
+ successors (BlockOC _ n) = successors n
+ successors (BlockCC _ _ n) = successors n
+
+
+emptyBody :: Body' block n
+emptyBody = mapEmpty
+
+bodyList :: Body' block n -> [(Label,block n C C)]
+bodyList body = mapToList body
+
+addBlock
+ :: (NonLocal block, HasDebugCallStack)
+ => block C C -> LabelMap (block C C) -> LabelMap (block C C)
+addBlock block body = mapAlter add lbl body
+ where
+ lbl = entryLabel block
+ add Nothing = Just block
+ add _ = error $ "duplicate label " ++ show lbl ++ " in graph"
+
+
+-- ---------------------------------------------------------------------------
+-- Graph
+
+-- | A control-flow graph, which may take any of four shapes (O/O,
+-- O/C, C/O, C/C). A graph open at the entry has a single,
+-- distinguished, anonymous entry point; if a graph is closed at the
+-- entry, its entry point(s) are supplied by a context.
+type Graph = Graph' Block
+
+-- | @Graph'@ is abstracted over the block type, so that we can build
+-- graphs of annotated blocks for example (Compiler.Hoopl.Dataflow
+-- needs this).
+data Graph' block (n :: Extensibility -> Extensibility -> *) e x where
+ GNil :: Graph' block n O O
+ GUnit :: block n O O -> Graph' block n O O
+ GMany :: MaybeO e (block n O C)
+ -> Body' block n
+ -> MaybeO x (block n C O)
+ -> Graph' block n e x
+
+
+-- -----------------------------------------------------------------------------
+-- Mapping over graphs
+
+-- | Maps over all nodes in a graph.
+mapGraph :: (forall e x. n e x -> n' e x) -> Graph n e x -> Graph n' e x
+mapGraph f = mapGraphBlocks (mapBlock f)
+
+-- | Function 'mapGraphBlocks' enables a change of representation of blocks,
+-- nodes, or both. It lifts a polymorphic block transform into a polymorphic
+-- graph transform. When the block representation stabilizes, a similar
+-- function should be provided for blocks.
+mapGraphBlocks :: forall block n block' n' e x .
+ (forall e x . block n e x -> block' n' e x)
+ -> (Graph' block n e x -> Graph' block' n' e x)
+
+mapGraphBlocks f = map
+ where map :: Graph' block n e x -> Graph' block' n' e x
+ map GNil = GNil
+ map (GUnit b) = GUnit (f b)
+ map (GMany e b x) = GMany (fmap f e) (mapMap f b) (fmap f x)
+
+-- -----------------------------------------------------------------------------
+-- Extracting Labels from graphs
+
+labelsDefined :: forall block n e x . NonLocal (block n) => Graph' block n e x
+ -> LabelSet
+labelsDefined GNil = setEmpty
+labelsDefined (GUnit{}) = setEmpty
+labelsDefined (GMany _ body x) = mapFoldlWithKey addEntry (exitLabel x) body
+ where addEntry :: forall a. LabelSet -> ElemOf LabelSet -> a -> LabelSet
+ addEntry labels label _ = setInsert label labels
+ exitLabel :: MaybeO x (block n C O) -> LabelSet
+ exitLabel NothingO = setEmpty
+ exitLabel (JustO b) = setSingleton (entryLabel b)
+
+
+----------------------------------------------------------------
+
+-- | Returns a list of blocks reachable from the provided Labels in the reverse
+-- postorder.
+--
+-- This is the most important traversal over this data structure. It drops
+-- unreachable code and puts blocks in an order that is good for solving forward
+-- dataflow problems quickly. The reverse order is good for solving backward
+-- dataflow problems quickly. The forward order is also reasonably good for
+-- emitting instructions, except that it will not usually exploit Forrest
+-- Baskett's trick of eliminating the unconditional branch from a loop. For
+-- that you would need a more serious analysis, probably based on dominators, to
+-- identify loop headers.
+--
+-- For forward analyses we want reverse postorder visitation, consider:
+-- @
+-- A -> [B,C]
+-- B -> D
+-- C -> D
+-- @
+-- Postorder: [D, C, B, A] (or [D, B, C, A])
+-- Reverse postorder: [A, B, C, D] (or [A, C, B, D])
+-- This matters for, e.g., forward analysis, because we want to analyze *both*
+-- B and C before we analyze D.
+revPostorderFrom
+ :: forall block. (NonLocal block)
+ => LabelMap (block C C) -> Label -> [block C C]
+revPostorderFrom graph start = go start_worklist setEmpty []
+ where
+ start_worklist = lookup_for_descend start Nil
+
+ -- To compute the postorder we need to "visit" a block (mark as done)
+ -- *after* visiting all its successors. So we need to know whether we
+ -- already processed all successors of each block (and @NonLocal@ allows
+ -- arbitrary many successors). So we use an explicit stack with an extra bit
+ -- of information:
+ -- * @ConsTodo@ means to explore the block if it wasn't visited before
+ -- * @ConsMark@ means that all successors were already done and we can add
+ -- the block to the result.
+ --
+ -- NOTE: We add blocks to the result list in postorder, but we *prepend*
+ -- them (i.e., we use @(:)@), which means that the final list is in reverse
+ -- postorder.
+ go :: DfsStack (block C C) -> LabelSet -> [block C C] -> [block C C]
+ go Nil !_ !result = result
+ go (ConsMark block rest) !wip_or_done !result =
+ go rest wip_or_done (block : result)
+ go (ConsTodo block rest) !wip_or_done !result
+ | entryLabel block `setMember` wip_or_done = go rest wip_or_done result
+ | otherwise =
+ let new_worklist =
+ foldr lookup_for_descend
+ (ConsMark block rest)
+ (successors block)
+ in go new_worklist (setInsert (entryLabel block) wip_or_done) result
+
+ lookup_for_descend :: Label -> DfsStack (block C C) -> DfsStack (block C C)
+ lookup_for_descend label wl
+ | Just b <- mapLookup label graph = ConsTodo b wl
+ | otherwise =
+ error $ "Label that doesn't have a block?! " ++ show label
+
+data DfsStack a = ConsTodo a (DfsStack a) | ConsMark a (DfsStack a) | Nil
diff --git a/compiler/GHC/Cmm/Dataflow/Label.hs b/compiler/GHC/Cmm/Dataflow/Label.hs
new file mode 100644
index 0000000000..c571cedb48
--- /dev/null
+++ b/compiler/GHC/Cmm/Dataflow/Label.hs
@@ -0,0 +1,142 @@
+{-# LANGUAGE DeriveFoldable #-}
+{-# LANGUAGE DeriveFunctor #-}
+{-# LANGUAGE DeriveTraversable #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+
+module GHC.Cmm.Dataflow.Label
+ ( Label
+ , LabelMap
+ , LabelSet
+ , FactBase
+ , lookupFact
+ , mkHooplLabel
+ ) where
+
+import GhcPrelude
+
+import Outputable
+
+-- TODO: This should really just use GHC's Unique and Uniq{Set,FM}
+import GHC.Cmm.Dataflow.Collections
+
+import Unique (Uniquable(..))
+import TrieMap
+
+
+-----------------------------------------------------------------------------
+-- Label
+-----------------------------------------------------------------------------
+
+newtype Label = Label { lblToUnique :: Int }
+ deriving (Eq, Ord)
+
+mkHooplLabel :: Int -> Label
+mkHooplLabel = Label
+
+instance Show Label where
+ show (Label n) = "L" ++ show n
+
+instance Uniquable Label where
+ getUnique label = getUnique (lblToUnique label)
+
+instance Outputable Label where
+ ppr label = ppr (getUnique label)
+
+-----------------------------------------------------------------------------
+-- LabelSet
+
+newtype LabelSet = LS UniqueSet deriving (Eq, Ord, Show, Monoid, Semigroup)
+
+instance IsSet LabelSet where
+ type ElemOf LabelSet = Label
+
+ setNull (LS s) = setNull s
+ setSize (LS s) = setSize s
+ setMember (Label k) (LS s) = setMember k s
+
+ setEmpty = LS setEmpty
+ setSingleton (Label k) = LS (setSingleton k)
+ setInsert (Label k) (LS s) = LS (setInsert k s)
+ setDelete (Label k) (LS s) = LS (setDelete k s)
+
+ setUnion (LS x) (LS y) = LS (setUnion x y)
+ setDifference (LS x) (LS y) = LS (setDifference x y)
+ setIntersection (LS x) (LS y) = LS (setIntersection x y)
+ setIsSubsetOf (LS x) (LS y) = setIsSubsetOf x y
+ setFilter f (LS s) = LS (setFilter (f . mkHooplLabel) s)
+ setFoldl k z (LS s) = setFoldl (\a v -> k a (mkHooplLabel v)) z s
+ setFoldr k z (LS s) = setFoldr (\v a -> k (mkHooplLabel v) a) z s
+
+ setElems (LS s) = map mkHooplLabel (setElems s)
+ setFromList ks = LS (setFromList (map lblToUnique ks))
+
+-----------------------------------------------------------------------------
+-- LabelMap
+
+newtype LabelMap v = LM (UniqueMap v)
+ deriving (Eq, Ord, Show, Functor, Foldable, Traversable)
+
+instance IsMap LabelMap where
+ type KeyOf LabelMap = Label
+
+ mapNull (LM m) = mapNull m
+ mapSize (LM m) = mapSize m
+ mapMember (Label k) (LM m) = mapMember k m
+ mapLookup (Label k) (LM m) = mapLookup k m
+ mapFindWithDefault def (Label k) (LM m) = mapFindWithDefault def k m
+
+ mapEmpty = LM mapEmpty
+ mapSingleton (Label k) v = LM (mapSingleton k v)
+ mapInsert (Label k) v (LM m) = LM (mapInsert k v m)
+ mapInsertWith f (Label k) v (LM m) = LM (mapInsertWith f k v m)
+ mapDelete (Label k) (LM m) = LM (mapDelete k m)
+ mapAlter f (Label k) (LM m) = LM (mapAlter f k m)
+ mapAdjust f (Label k) (LM m) = LM (mapAdjust f k m)
+
+ mapUnion (LM x) (LM y) = LM (mapUnion x y)
+ mapUnionWithKey f (LM x) (LM y) = LM (mapUnionWithKey (f . mkHooplLabel) x y)
+ mapDifference (LM x) (LM y) = LM (mapDifference x y)
+ mapIntersection (LM x) (LM y) = LM (mapIntersection x y)
+ mapIsSubmapOf (LM x) (LM y) = mapIsSubmapOf x y
+
+ mapMap f (LM m) = LM (mapMap f m)
+ mapMapWithKey f (LM m) = LM (mapMapWithKey (f . mkHooplLabel) m)
+ mapFoldl k z (LM m) = mapFoldl k z m
+ mapFoldr k z (LM m) = mapFoldr k z m
+ mapFoldlWithKey k z (LM m) =
+ mapFoldlWithKey (\a v -> k a (mkHooplLabel v)) z m
+ mapFoldMapWithKey f (LM m) = mapFoldMapWithKey (\k v -> f (mkHooplLabel k) v) m
+ mapFilter f (LM m) = LM (mapFilter f m)
+ mapFilterWithKey f (LM m) = LM (mapFilterWithKey (f . mkHooplLabel) m)
+
+ mapElems (LM m) = mapElems m
+ mapKeys (LM m) = map mkHooplLabel (mapKeys m)
+ mapToList (LM m) = [(mkHooplLabel k, v) | (k, v) <- mapToList m]
+ mapFromList assocs = LM (mapFromList [(lblToUnique k, v) | (k, v) <- assocs])
+ mapFromListWith f assocs = LM (mapFromListWith f [(lblToUnique k, v) | (k, v) <- assocs])
+
+-----------------------------------------------------------------------------
+-- Instances
+
+instance Outputable LabelSet where
+ ppr = ppr . setElems
+
+instance Outputable a => Outputable (LabelMap a) where
+ ppr = ppr . mapToList
+
+instance TrieMap LabelMap where
+ type Key LabelMap = Label
+ emptyTM = mapEmpty
+ lookupTM k m = mapLookup k m
+ alterTM k f m = mapAlter f k m
+ foldTM k m z = mapFoldr k z m
+ mapTM f m = mapMap f m
+
+-----------------------------------------------------------------------------
+-- FactBase
+
+type FactBase f = LabelMap f
+
+lookupFact :: Label -> FactBase f -> Maybe f
+lookupFact = mapLookup
diff --git a/compiler/GHC/Cmm/DebugBlock.hs b/compiler/GHC/Cmm/DebugBlock.hs
new file mode 100644
index 0000000000..70fc08ee94
--- /dev/null
+++ b/compiler/GHC/Cmm/DebugBlock.hs
@@ -0,0 +1,546 @@
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE MultiWayIf #-}
+
+-----------------------------------------------------------------------------
+--
+-- Debugging data
+--
+-- Association of debug data on the Cmm level, with methods to encode it in
+-- event log format for later inclusion in profiling event logs.
+--
+-----------------------------------------------------------------------------
+
+module GHC.Cmm.DebugBlock (
+
+ DebugBlock(..),
+ cmmDebugGen,
+ cmmDebugLabels,
+ cmmDebugLink,
+ debugToMap,
+
+ -- * Unwinding information
+ UnwindTable, UnwindPoint(..),
+ UnwindExpr(..), toUnwindExpr
+ ) where
+
+import GhcPrelude
+
+import GHC.Cmm.BlockId
+import GHC.Cmm.CLabel
+import GHC.Cmm
+import GHC.Cmm.Utils
+import CoreSyn
+import FastString ( nilFS, mkFastString )
+import Module
+import Outputable
+import GHC.Cmm.Ppr.Expr ( pprExpr )
+import SrcLoc
+import Util ( seqList )
+
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow.Graph
+import GHC.Cmm.Dataflow.Label
+
+import Data.Maybe
+import Data.List ( minimumBy, nubBy )
+import Data.Ord ( comparing )
+import qualified Data.Map as Map
+import Data.Either ( partitionEithers )
+
+-- | Debug information about a block of code. Ticks scope over nested
+-- blocks.
+data DebugBlock =
+ DebugBlock
+ { dblProcedure :: !Label -- ^ Entry label of containing proc
+ , dblLabel :: !Label -- ^ Hoopl label
+ , dblCLabel :: !CLabel -- ^ Output label
+ , dblHasInfoTbl :: !Bool -- ^ Has an info table?
+ , dblParent :: !(Maybe DebugBlock)
+ -- ^ The parent of this proc. See Note [Splitting DebugBlocks]
+ , dblTicks :: ![CmmTickish] -- ^ Ticks defined in this block
+ , dblSourceTick :: !(Maybe CmmTickish) -- ^ Best source tick covering block
+ , dblPosition :: !(Maybe Int) -- ^ Output position relative to
+ -- other blocks. @Nothing@ means
+ -- the block was optimized out
+ , dblUnwind :: [UnwindPoint]
+ , dblBlocks :: ![DebugBlock] -- ^ Nested blocks
+ }
+
+instance Outputable DebugBlock where
+ ppr blk = (if | dblProcedure blk == dblLabel blk
+ -> text "proc"
+ | dblHasInfoTbl blk
+ -> text "pp-blk"
+ | otherwise
+ -> text "blk") <+>
+ ppr (dblLabel blk) <+> parens (ppr (dblCLabel blk)) <+>
+ (maybe empty ppr (dblSourceTick blk)) <+>
+ (maybe (text "removed") ((text "pos " <>) . ppr)
+ (dblPosition blk)) <+>
+ (ppr (dblUnwind blk)) $+$
+ (if null (dblBlocks blk) then empty else nest 4 (ppr (dblBlocks blk)))
+
+-- | Intermediate data structure holding debug-relevant context information
+-- about a block.
+type BlockContext = (CmmBlock, RawCmmDecl)
+
+-- | Extract debug data from a group of procedures. We will prefer
+-- source notes that come from the given module (presumably the module
+-- that we are currently compiling).
+cmmDebugGen :: ModLocation -> RawCmmGroup -> [DebugBlock]
+cmmDebugGen modLoc decls = map (blocksForScope Nothing) topScopes
+ where
+ blockCtxs :: Map.Map CmmTickScope [BlockContext]
+ blockCtxs = blockContexts decls
+
+ -- Analyse tick scope structure: Each one is either a top-level
+ -- tick scope, or the child of another.
+ (topScopes, childScopes)
+ = partitionEithers $ map (\a -> findP a a) $ Map.keys blockCtxs
+ findP tsc GlobalScope = Left tsc -- top scope
+ findP tsc scp | scp' `Map.member` blockCtxs = Right (scp', tsc)
+ | otherwise = findP tsc scp'
+ where -- Note that we only following the left parent of
+ -- combined scopes. This loses us ticks, which we will
+ -- recover by copying ticks below.
+ scp' | SubScope _ scp' <- scp = scp'
+ | CombinedScope scp' _ <- scp = scp'
+ | otherwise = panic "findP impossible"
+
+ scopeMap = foldr (uncurry insertMulti) Map.empty childScopes
+
+ -- This allows us to recover ticks that we lost by flattening
+ -- the graph. Basically, if the parent is A but the child is
+ -- CBA, we know that there is no BA, because it would have taken
+ -- priority - but there might be a B scope, with ticks that
+ -- would not be associated with our child anymore. Note however
+ -- that there might be other childs (DB), which we have to
+ -- filter out.
+ --
+ -- We expect this to be called rarely, which is why we are not
+ -- trying too hard to be efficient here. In many cases we won't
+ -- have to construct blockCtxsU in the first place.
+ ticksToCopy :: CmmTickScope -> [CmmTickish]
+ ticksToCopy (CombinedScope scp s) = go s
+ where go s | scp `isTickSubScope` s = [] -- done
+ | SubScope _ s' <- s = ticks ++ go s'
+ | CombinedScope s1 s2 <- s = ticks ++ go s1 ++ go s2
+ | otherwise = panic "ticksToCopy impossible"
+ where ticks = bCtxsTicks $ fromMaybe [] $ Map.lookup s blockCtxs
+ ticksToCopy _ = []
+ bCtxsTicks = concatMap (blockTicks . fst)
+
+ -- Finding the "best" source tick is somewhat arbitrary -- we
+ -- select the first source span, while preferring source ticks
+ -- from the same source file. Furthermore, dumps take priority
+ -- (if we generated one, we probably want debug information to
+ -- refer to it).
+ bestSrcTick = minimumBy (comparing rangeRating)
+ rangeRating (SourceNote span _)
+ | srcSpanFile span == thisFile = 1
+ | otherwise = 2 :: Int
+ rangeRating note = pprPanic "rangeRating" (ppr note)
+ thisFile = maybe nilFS mkFastString $ ml_hs_file modLoc
+
+ -- Returns block tree for this scope as well as all nested
+ -- scopes. Note that if there are multiple blocks in the (exact)
+ -- same scope we elect one as the "branch" node and add the rest
+ -- as children.
+ blocksForScope :: Maybe CmmTickish -> CmmTickScope -> DebugBlock
+ blocksForScope cstick scope = mkBlock True (head bctxs)
+ where bctxs = fromJust $ Map.lookup scope blockCtxs
+ nested = fromMaybe [] $ Map.lookup scope scopeMap
+ childs = map (mkBlock False) (tail bctxs) ++
+ map (blocksForScope stick) nested
+
+ mkBlock :: Bool -> BlockContext -> DebugBlock
+ mkBlock top (block, prc)
+ = DebugBlock { dblProcedure = g_entry graph
+ , dblLabel = label
+ , dblCLabel = case info of
+ Just (Statics infoLbl _) -> infoLbl
+ Nothing
+ | g_entry graph == label -> entryLbl
+ | otherwise -> blockLbl label
+ , dblHasInfoTbl = isJust info
+ , dblParent = Nothing
+ , dblTicks = ticks
+ , dblPosition = Nothing -- see cmmDebugLink
+ , dblSourceTick = stick
+ , dblBlocks = blocks
+ , dblUnwind = []
+ }
+ where (CmmProc infos entryLbl _ graph) = prc
+ label = entryLabel block
+ info = mapLookup label infos
+ blocks | top = seqList childs childs
+ | otherwise = []
+
+ -- A source tick scopes over all nested blocks. However
+ -- their source ticks might take priority.
+ isSourceTick SourceNote {} = True
+ isSourceTick _ = False
+ -- Collect ticks from all blocks inside the tick scope.
+ -- We attempt to filter out duplicates while we're at it.
+ ticks = nubBy (flip tickishContains) $
+ bCtxsTicks bctxs ++ ticksToCopy scope
+ stick = case filter isSourceTick ticks of
+ [] -> cstick
+ sticks -> Just $! bestSrcTick (sticks ++ maybeToList cstick)
+
+-- | Build a map of blocks sorted by their tick scopes
+--
+-- This involves a pre-order traversal, as we want blocks in rough
+-- control flow order (so ticks have a chance to be sorted in the
+-- right order).
+blockContexts :: RawCmmGroup -> Map.Map CmmTickScope [BlockContext]
+blockContexts decls = Map.map reverse $ foldr walkProc Map.empty decls
+ where walkProc :: RawCmmDecl
+ -> Map.Map CmmTickScope [BlockContext]
+ -> Map.Map CmmTickScope [BlockContext]
+ walkProc CmmData{} m = m
+ walkProc prc@(CmmProc _ _ _ graph) m
+ | mapNull blocks = m
+ | otherwise = snd $ walkBlock prc entry (emptyLbls, m)
+ where blocks = toBlockMap graph
+ entry = [mapFind (g_entry graph) blocks]
+ emptyLbls = setEmpty :: LabelSet
+
+ walkBlock :: RawCmmDecl -> [Block CmmNode C C]
+ -> (LabelSet, Map.Map CmmTickScope [BlockContext])
+ -> (LabelSet, Map.Map CmmTickScope [BlockContext])
+ walkBlock _ [] c = c
+ walkBlock prc (block:blocks) (visited, m)
+ | lbl `setMember` visited
+ = walkBlock prc blocks (visited, m)
+ | otherwise
+ = walkBlock prc blocks $
+ walkBlock prc succs
+ (lbl `setInsert` visited,
+ insertMulti scope (block, prc) m)
+ where CmmEntry lbl scope = firstNode block
+ (CmmProc _ _ _ graph) = prc
+ succs = map (flip mapFind (toBlockMap graph))
+ (successors (lastNode block))
+ mapFind = mapFindWithDefault (error "contextTree: block not found!")
+
+insertMulti :: Ord k => k -> a -> Map.Map k [a] -> Map.Map k [a]
+insertMulti k v = Map.insertWith (const (v:)) k [v]
+
+cmmDebugLabels :: (i -> Bool) -> GenCmmGroup d g (ListGraph i) -> [Label]
+cmmDebugLabels isMeta nats = seqList lbls lbls
+ where -- Find order in which procedures will be generated by the
+ -- back-end (that actually matters for DWARF generation).
+ --
+ -- Note that we might encounter blocks that are missing or only
+ -- consist of meta instructions -- we will declare them missing,
+ -- which will skip debug data generation without messing up the
+ -- block hierarchy.
+ lbls = map blockId $ filter (not . allMeta) $ concatMap getBlocks nats
+ getBlocks (CmmProc _ _ _ (ListGraph bs)) = bs
+ getBlocks _other = []
+ allMeta (BasicBlock _ instrs) = all isMeta instrs
+
+-- | Sets position and unwind table fields in the debug block tree according to
+-- native generated code.
+cmmDebugLink :: [Label] -> LabelMap [UnwindPoint]
+ -> [DebugBlock] -> [DebugBlock]
+cmmDebugLink labels unwindPts blocks = map link blocks
+ where blockPos :: LabelMap Int
+ blockPos = mapFromList $ flip zip [0..] labels
+ link block = block { dblPosition = mapLookup (dblLabel block) blockPos
+ , dblBlocks = map link (dblBlocks block)
+ , dblUnwind = fromMaybe mempty
+ $ mapLookup (dblLabel block) unwindPts
+ }
+
+-- | Converts debug blocks into a label map for easier lookups
+debugToMap :: [DebugBlock] -> LabelMap DebugBlock
+debugToMap = mapUnions . map go
+ where go b = mapInsert (dblLabel b) b $ mapUnions $ map go (dblBlocks b)
+
+{-
+Note [What is this unwinding business?]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Unwinding tables are a variety of debugging information used by debugging tools
+to reconstruct the execution history of a program at runtime. These tables
+consist of sets of "instructions", one set for every instruction in the program,
+which describe how to reconstruct the state of the machine at the point where
+the current procedure was called. For instance, consider the following annotated
+pseudo-code,
+
+ a_fun:
+ add rsp, 8 -- unwind: rsp = rsp - 8
+ mov rax, 1 -- unwind: rax = unknown
+ call another_block
+ sub rsp, 8 -- unwind: rsp = rsp
+
+We see that attached to each instruction there is an "unwind" annotation, which
+provides a relationship between each updated register and its value at the
+time of entry to a_fun. This is the sort of information that allows gdb to give
+you a stack backtrace given the execution state of your program. This
+unwinding information is captured in various ways by various debug information
+formats; in the case of DWARF (the only format supported by GHC) it is known as
+Call Frame Information (CFI) and can be found in the .debug.frames section of
+your object files.
+
+Currently we only bother to produce unwinding information for registers which
+are necessary to reconstruct flow-of-execution. On x86_64 this includes $rbp
+(which is the STG stack pointer) and $rsp (the C stack pointer).
+
+Let's consider how GHC would annotate a C-- program with unwinding information
+with a typical C-- procedure as would come from the STG-to-Cmm code generator,
+
+ entry()
+ { c2fe:
+ v :: P64 = R2;
+ if ((Sp + 8) - 32 < SpLim) (likely: False) goto c2ff; else goto c2fg;
+ c2ff:
+ R2 = v :: P64;
+ R1 = test_closure;
+ call (stg_gc_fun)(R2, R1) args: 8, res: 0, upd: 8;
+ c2fg:
+ I64[Sp - 8] = c2dD;
+ R1 = v :: P64;
+ Sp = Sp - 8; // Sp updated here
+ if (R1 & 7 != 0) goto c2dD; else goto c2dE;
+ c2dE:
+ call (I64[R1])(R1) returns to c2dD, args: 8, res: 8, upd: 8;
+ c2dD:
+ w :: P64 = R1;
+ Hp = Hp + 48;
+ if (Hp > HpLim) (likely: False) goto c2fj; else goto c2fi;
+ ...
+ },
+
+Let's consider how this procedure will be decorated with unwind information
+(largely by GHC.Cmm.LayoutStack). Naturally, when we enter the procedure `entry` the
+value of Sp is no different from what it was at its call site. Therefore we will
+add an `unwind` statement saying this at the beginning of its unwind-annotated
+code,
+
+ entry()
+ { c2fe:
+ unwind Sp = Just Sp + 0;
+ v :: P64 = R2;
+ if ((Sp + 8) - 32 < SpLim) (likely: False) goto c2ff; else goto c2fg;
+
+After c2fe we may pass to either c2ff or c2fg; let's first consider the
+former. In this case there is nothing in particular that we need to do other
+than reiterate what we already know about Sp,
+
+ c2ff:
+ unwind Sp = Just Sp + 0;
+ R2 = v :: P64;
+ R1 = test_closure;
+ call (stg_gc_fun)(R2, R1) args: 8, res: 0, upd: 8;
+
+In contrast, c2fg updates Sp midway through its body. To ensure that unwinding
+can happen correctly after this point we must include an unwind statement there,
+in addition to the usual beginning-of-block statement,
+
+ c2fg:
+ unwind Sp = Just Sp + 0;
+ I64[Sp - 8] = c2dD;
+ R1 = v :: P64;
+ Sp = Sp - 8;
+ unwind Sp = Just Sp + 8;
+ if (R1 & 7 != 0) goto c2dD; else goto c2dE;
+
+The remaining blocks are simple,
+
+ c2dE:
+ unwind Sp = Just Sp + 8;
+ call (I64[R1])(R1) returns to c2dD, args: 8, res: 8, upd: 8;
+ c2dD:
+ unwind Sp = Just Sp + 8;
+ w :: P64 = R1;
+ Hp = Hp + 48;
+ if (Hp > HpLim) (likely: False) goto c2fj; else goto c2fi;
+ ...
+ },
+
+
+The flow of unwinding information through the compiler is a bit convoluted:
+
+ * C-- begins life in StgToCmm without any unwind information. This is because we
+ haven't actually done any register assignment or stack layout yet, so there
+ is no need for unwind information.
+
+ * GHC.Cmm.LayoutStack figures out how to layout each procedure's stack, and produces
+ appropriate unwinding nodes for each adjustment of the STG Sp register.
+
+ * The unwind nodes are carried through the sinking pass. Currently this is
+ guaranteed not to invalidate unwind information since it won't touch stores
+ to Sp, but this will need revisiting if CmmSink gets smarter in the future.
+
+ * Eventually we make it to the native code generator backend which can then
+ preserve the unwind nodes in its machine-specific instructions. In so doing
+ the backend can also modify or add unwinding information; this is necessary,
+ for instance, in the case of x86-64, where adjustment of $rsp may be
+ necessary during calls to native foreign code due to the native calling
+ convention.
+
+ * The NCG then retrieves the final unwinding table for each block from the
+ backend with extractUnwindPoints.
+
+ * This unwind information is converted to DebugBlocks by Debug.cmmDebugGen
+
+ * These DebugBlocks are then converted to, e.g., DWARF unwinding tables
+ (by the Dwarf module) and emitted in the final object.
+
+See also:
+ Note [Unwinding information in the NCG] in AsmCodeGen,
+ Note [Unwind pseudo-instruction in Cmm],
+ Note [Debugging DWARF unwinding info].
+
+
+Note [Debugging DWARF unwinding info]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+For debugging generated unwinding info I've found it most useful to dump the
+disassembled binary with objdump -D and dump the debug info with
+readelf --debug-dump=frames-interp.
+
+You should get something like this:
+
+ 0000000000000010 <stg_catch_frame_info>:
+ 10: 48 83 c5 18 add $0x18,%rbp
+ 14: ff 65 00 jmpq *0x0(%rbp)
+
+and:
+
+ Contents of the .debug_frame section:
+
+ 00000000 0000000000000014 ffffffff CIE "" cf=1 df=-8 ra=16
+ LOC CFA rbp rsp ra
+ 0000000000000000 rbp+0 v+0 s c+0
+
+ 00000018 0000000000000024 00000000 FDE cie=00000000 pc=000000000000000f..0000000000000017
+ LOC CFA rbp rsp ra
+ 000000000000000f rbp+0 v+0 s c+0
+ 000000000000000f rbp+24 v+0 s c+0
+
+To read it http://www.dwarfstd.org/doc/dwarf-2.0.0.pdf has a nice example in
+Appendix 5 (page 101 of the pdf) and more details in the relevant section.
+
+The key thing to keep in mind is that the value at LOC is the value from
+*before* the instruction at LOC executes. In other words it answers the
+question: if my $rip is at LOC, how do I get the relevant values given the
+values obtained through unwinding so far.
+
+If the readelf --debug-dump=frames-interp output looks wrong, it may also be
+useful to look at readelf --debug-dump=frames, which is closer to the
+information that GHC generated.
+
+It's also useful to dump the relevant Cmm with -ddump-cmm -ddump-opt-cmm
+-ddump-cmm-proc -ddump-cmm-verbose. Note [Unwind pseudo-instruction in Cmm]
+explains how to interpret it.
+
+Inside gdb there are a couple useful commands for inspecting frames.
+For example:
+
+ gdb> info frame <num>
+
+It shows the values of registers obtained through unwinding.
+
+Another useful thing to try when debugging the DWARF unwinding is to enable
+extra debugging output in GDB:
+
+ gdb> set debug frame 1
+
+This makes GDB produce a trace of its internal workings. Having gone this far,
+it's just a tiny step to run GDB in GDB. Make sure you install debugging
+symbols for gdb if you obtain it through a package manager.
+
+Keep in mind that the current release of GDB has an instruction pointer handling
+heuristic that works well for C-like languages, but doesn't always work for
+Haskell. See Note [Info Offset] in Dwarf.Types for more details.
+
+Note [Unwind pseudo-instruction in Cmm]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+One of the possible CmmNodes is a CmmUnwind pseudo-instruction. It doesn't
+generate any assembly, but controls what DWARF unwinding information gets
+generated.
+
+It's important to understand what ranges of code the unwind pseudo-instruction
+refers to.
+For a sequence of CmmNodes like:
+
+ A // starts at addr X and ends at addr Y-1
+ unwind Sp = Just Sp + 16;
+ B // starts at addr Y and ends at addr Z
+
+the unwind statement reflects the state after A has executed, but before B
+has executed. If you consult the Note [Debugging DWARF unwinding info], the
+LOC this information will end up in is Y.
+-}
+
+-- | A label associated with an 'UnwindTable'
+data UnwindPoint = UnwindPoint !CLabel !UnwindTable
+
+instance Outputable UnwindPoint where
+ ppr (UnwindPoint lbl uws) =
+ braces $ ppr lbl<>colon
+ <+> hsep (punctuate comma $ map pprUw $ Map.toList uws)
+ where
+ pprUw (g, expr) = ppr g <> char '=' <> ppr expr
+
+-- | Maps registers to expressions that yield their "old" values
+-- further up the stack. Most interesting for the stack pointer @Sp@,
+-- but might be useful to document saved registers, too. Note that a
+-- register's value will be 'Nothing' when the register's previous
+-- value cannot be reconstructed.
+type UnwindTable = Map.Map GlobalReg (Maybe UnwindExpr)
+
+-- | Expressions, used for unwind information
+data UnwindExpr = UwConst !Int -- ^ literal value
+ | UwReg !GlobalReg !Int -- ^ register plus offset
+ | UwDeref UnwindExpr -- ^ pointer dereferencing
+ | UwLabel CLabel
+ | UwPlus UnwindExpr UnwindExpr
+ | UwMinus UnwindExpr UnwindExpr
+ | UwTimes UnwindExpr UnwindExpr
+ deriving (Eq)
+
+instance Outputable UnwindExpr where
+ pprPrec _ (UwConst i) = ppr i
+ pprPrec _ (UwReg g 0) = ppr g
+ pprPrec p (UwReg g x) = pprPrec p (UwPlus (UwReg g 0) (UwConst x))
+ pprPrec _ (UwDeref e) = char '*' <> pprPrec 3 e
+ pprPrec _ (UwLabel l) = pprPrec 3 l
+ pprPrec p (UwPlus e0 e1) | p <= 0
+ = pprPrec 0 e0 <> char '+' <> pprPrec 0 e1
+ pprPrec p (UwMinus e0 e1) | p <= 0
+ = pprPrec 1 e0 <> char '-' <> pprPrec 1 e1
+ pprPrec p (UwTimes e0 e1) | p <= 1
+ = pprPrec 2 e0 <> char '*' <> pprPrec 2 e1
+ pprPrec _ other = parens (pprPrec 0 other)
+
+-- | Conversion of Cmm expressions to unwind expressions. We check for
+-- unsupported operator usages and simplify the expression as far as
+-- possible.
+toUnwindExpr :: CmmExpr -> UnwindExpr
+toUnwindExpr (CmmLit (CmmInt i _)) = UwConst (fromIntegral i)
+toUnwindExpr (CmmLit (CmmLabel l)) = UwLabel l
+toUnwindExpr (CmmRegOff (CmmGlobal g) i) = UwReg g i
+toUnwindExpr (CmmReg (CmmGlobal g)) = UwReg g 0
+toUnwindExpr (CmmLoad e _) = UwDeref (toUnwindExpr e)
+toUnwindExpr e@(CmmMachOp op [e1, e2]) =
+ case (op, toUnwindExpr e1, toUnwindExpr e2) of
+ (MO_Add{}, UwReg r x, UwConst y) -> UwReg r (x + y)
+ (MO_Sub{}, UwReg r x, UwConst y) -> UwReg r (x - y)
+ (MO_Add{}, UwConst x, UwReg r y) -> UwReg r (x + y)
+ (MO_Add{}, UwConst x, UwConst y) -> UwConst (x + y)
+ (MO_Sub{}, UwConst x, UwConst y) -> UwConst (x - y)
+ (MO_Mul{}, UwConst x, UwConst y) -> UwConst (x * y)
+ (MO_Add{}, u1, u2 ) -> UwPlus u1 u2
+ (MO_Sub{}, u1, u2 ) -> UwMinus u1 u2
+ (MO_Mul{}, u1, u2 ) -> UwTimes u1 u2
+ _otherwise -> pprPanic "Unsupported operator in unwind expression!"
+ (pprExpr e)
+toUnwindExpr e
+ = pprPanic "Unsupported unwind expression!" (ppr e)
diff --git a/compiler/GHC/Cmm/Expr.hs b/compiler/GHC/Cmm/Expr.hs
new file mode 100644
index 0000000000..3b4f0156a0
--- /dev/null
+++ b/compiler/GHC/Cmm/Expr.hs
@@ -0,0 +1,619 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE UndecidableInstances #-}
+
+module GHC.Cmm.Expr
+ ( CmmExpr(..), cmmExprType, cmmExprWidth, cmmExprAlignment, maybeInvertCmmExpr
+ , CmmReg(..), cmmRegType, cmmRegWidth
+ , CmmLit(..), cmmLitType
+ , LocalReg(..), localRegType
+ , GlobalReg(..), isArgReg, globalRegType
+ , spReg, hpReg, spLimReg, hpLimReg, nodeReg
+ , currentTSOReg, currentNurseryReg, hpAllocReg, cccsReg
+ , node, baseReg
+ , VGcPtr(..)
+
+ , DefinerOfRegs, UserOfRegs
+ , foldRegsDefd, foldRegsUsed
+ , foldLocalRegsDefd, foldLocalRegsUsed
+
+ , RegSet, LocalRegSet, GlobalRegSet
+ , emptyRegSet, elemRegSet, extendRegSet, deleteFromRegSet, mkRegSet
+ , plusRegSet, minusRegSet, timesRegSet, sizeRegSet, nullRegSet
+ , regSetToList
+
+ , Area(..)
+ , module GHC.Cmm.MachOp
+ , module GHC.Cmm.Type
+ )
+where
+
+import GhcPrelude
+
+import GHC.Cmm.BlockId
+import GHC.Cmm.CLabel
+import GHC.Cmm.MachOp
+import GHC.Cmm.Type
+import DynFlags
+import Outputable (panic)
+import Unique
+
+import Data.Set (Set)
+import qualified Data.Set as Set
+
+import BasicTypes (Alignment, mkAlignment, alignmentOf)
+
+-----------------------------------------------------------------------------
+-- CmmExpr
+-- An expression. Expressions have no side effects.
+-----------------------------------------------------------------------------
+
+data CmmExpr
+ = CmmLit CmmLit -- Literal
+ | CmmLoad !CmmExpr !CmmType -- Read memory location
+ | CmmReg !CmmReg -- Contents of register
+ | CmmMachOp MachOp [CmmExpr] -- Machine operation (+, -, *, etc.)
+ | CmmStackSlot Area {-# UNPACK #-} !Int
+ -- addressing expression of a stack slot
+ -- See Note [CmmStackSlot aliasing]
+ | CmmRegOff !CmmReg Int
+ -- CmmRegOff reg i
+ -- ** is shorthand only, meaning **
+ -- CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt (fromIntegral i) rep)]
+ -- where rep = typeWidth (cmmRegType reg)
+
+instance Eq CmmExpr where -- Equality ignores the types
+ CmmLit l1 == CmmLit l2 = l1==l2
+ CmmLoad e1 _ == CmmLoad e2 _ = e1==e2
+ CmmReg r1 == CmmReg r2 = r1==r2
+ CmmRegOff r1 i1 == CmmRegOff r2 i2 = r1==r2 && i1==i2
+ CmmMachOp op1 es1 == CmmMachOp op2 es2 = op1==op2 && es1==es2
+ CmmStackSlot a1 i1 == CmmStackSlot a2 i2 = a1==a2 && i1==i2
+ _e1 == _e2 = False
+
+data CmmReg
+ = CmmLocal {-# UNPACK #-} !LocalReg
+ | CmmGlobal GlobalReg
+ deriving( Eq, Ord )
+
+-- | A stack area is either the stack slot where a variable is spilled
+-- or the stack space where function arguments and results are passed.
+data Area
+ = Old -- See Note [Old Area]
+ | Young {-# UNPACK #-} !BlockId -- Invariant: must be a continuation BlockId
+ -- See Note [Continuation BlockId] in GHC.Cmm.Node.
+ deriving (Eq, Ord)
+
+{- Note [Old Area]
+~~~~~~~~~~~~~~~~~~
+There is a single call area 'Old', allocated at the extreme old
+end of the stack frame (ie just younger than the return address)
+which holds:
+ * incoming (overflow) parameters,
+ * outgoing (overflow) parameter to tail calls,
+ * outgoing (overflow) result values
+ * the update frame (if any)
+
+Its size is the max of all these requirements. On entry, the stack
+pointer will point to the youngest incoming parameter, which is not
+necessarily at the young end of the Old area.
+
+End of note -}
+
+
+{- Note [CmmStackSlot aliasing]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+When do two CmmStackSlots alias?
+
+ - T[old+N] aliases with U[young(L)+M] for all T, U, L, N and M
+ - T[old+N] aliases with U[old+M] only if the areas actually overlap
+
+Or more informally, different Areas may overlap with each other.
+
+An alternative semantics, that we previously had, was that different
+Areas do not overlap. The problem that lead to redefining the
+semantics of stack areas is described below.
+
+e.g. if we had
+
+ x = Sp[old + 8]
+ y = Sp[old + 16]
+
+ Sp[young(L) + 8] = L
+ Sp[young(L) + 16] = y
+ Sp[young(L) + 24] = x
+ call f() returns to L
+
+if areas semantically do not overlap, then we might optimise this to
+
+ Sp[young(L) + 8] = L
+ Sp[young(L) + 16] = Sp[old + 8]
+ Sp[young(L) + 24] = Sp[old + 16]
+ call f() returns to L
+
+and now young(L) cannot be allocated at the same place as old, and we
+are doomed to use more stack.
+
+ - old+8 conflicts with young(L)+8
+ - old+16 conflicts with young(L)+16 and young(L)+8
+
+so young(L)+8 == old+24 and we get
+
+ Sp[-8] = L
+ Sp[-16] = Sp[8]
+ Sp[-24] = Sp[0]
+ Sp -= 24
+ call f() returns to L
+
+However, if areas are defined to be "possibly overlapping" in the
+semantics, then we cannot commute any loads/stores of old with
+young(L), and we will be able to re-use both old+8 and old+16 for
+young(L).
+
+ x = Sp[8]
+ y = Sp[0]
+
+ Sp[8] = L
+ Sp[0] = y
+ Sp[-8] = x
+ Sp = Sp - 8
+ call f() returns to L
+
+Now, the assignments of y go away,
+
+ x = Sp[8]
+ Sp[8] = L
+ Sp[-8] = x
+ Sp = Sp - 8
+ call f() returns to L
+-}
+
+data CmmLit
+ = CmmInt !Integer Width
+ -- Interpretation: the 2's complement representation of the value
+ -- is truncated to the specified size. This is easier than trying
+ -- to keep the value within range, because we don't know whether
+ -- it will be used as a signed or unsigned value (the CmmType doesn't
+ -- distinguish between signed & unsigned).
+ | CmmFloat Rational Width
+ | CmmVec [CmmLit] -- Vector literal
+ | CmmLabel CLabel -- Address of label
+ | CmmLabelOff CLabel Int -- Address of label + byte offset
+
+ -- Due to limitations in the C backend, the following
+ -- MUST ONLY be used inside the info table indicated by label2
+ -- (label2 must be the info label), and label1 must be an
+ -- SRT, a slow entrypoint or a large bitmap (see the Mangler)
+ -- Don't use it at all unless tablesNextToCode.
+ -- It is also used inside the NCG during when generating
+ -- position-independent code.
+ | CmmLabelDiffOff CLabel CLabel Int Width -- label1 - label2 + offset
+ -- In an expression, the width just has the effect of MO_SS_Conv
+ -- from wordWidth to the desired width.
+ --
+ -- In a static literal, the supported Widths depend on the
+ -- architecture: wordWidth is supported on all
+ -- architectures. Additionally W32 is supported on x86_64 when
+ -- using the small memory model.
+
+ | CmmBlock {-# UNPACK #-} !BlockId -- Code label
+ -- Invariant: must be a continuation BlockId
+ -- See Note [Continuation BlockId] in GHC.Cmm.Node.
+
+ | CmmHighStackMark -- A late-bound constant that stands for the max
+ -- #bytes of stack space used during a procedure.
+ -- During the stack-layout pass, CmmHighStackMark
+ -- is replaced by a CmmInt for the actual number
+ -- of bytes used
+ deriving Eq
+
+cmmExprType :: DynFlags -> CmmExpr -> CmmType
+cmmExprType dflags (CmmLit lit) = cmmLitType dflags lit
+cmmExprType _ (CmmLoad _ rep) = rep
+cmmExprType dflags (CmmReg reg) = cmmRegType dflags reg
+cmmExprType dflags (CmmMachOp op args) = machOpResultType dflags op (map (cmmExprType dflags) args)
+cmmExprType dflags (CmmRegOff reg _) = cmmRegType dflags reg
+cmmExprType dflags (CmmStackSlot _ _) = bWord dflags -- an address
+-- Careful though: what is stored at the stack slot may be bigger than
+-- an address
+
+cmmLitType :: DynFlags -> CmmLit -> CmmType
+cmmLitType _ (CmmInt _ width) = cmmBits width
+cmmLitType _ (CmmFloat _ width) = cmmFloat width
+cmmLitType _ (CmmVec []) = panic "cmmLitType: CmmVec []"
+cmmLitType cflags (CmmVec (l:ls)) = let ty = cmmLitType cflags l
+ in if all (`cmmEqType` ty) (map (cmmLitType cflags) ls)
+ then cmmVec (1+length ls) ty
+ else panic "cmmLitType: CmmVec"
+cmmLitType dflags (CmmLabel lbl) = cmmLabelType dflags lbl
+cmmLitType dflags (CmmLabelOff lbl _) = cmmLabelType dflags lbl
+cmmLitType _ (CmmLabelDiffOff _ _ _ width) = cmmBits width
+cmmLitType dflags (CmmBlock _) = bWord dflags
+cmmLitType dflags (CmmHighStackMark) = bWord dflags
+
+cmmLabelType :: DynFlags -> CLabel -> CmmType
+cmmLabelType dflags lbl
+ | isGcPtrLabel lbl = gcWord dflags
+ | otherwise = bWord dflags
+
+cmmExprWidth :: DynFlags -> CmmExpr -> Width
+cmmExprWidth dflags e = typeWidth (cmmExprType dflags e)
+
+-- | Returns an alignment in bytes of a CmmExpr when it's a statically
+-- known integer constant, otherwise returns an alignment of 1 byte.
+-- The caller is responsible for using with a sensible CmmExpr
+-- argument.
+cmmExprAlignment :: CmmExpr -> Alignment
+cmmExprAlignment (CmmLit (CmmInt intOff _)) = alignmentOf (fromInteger intOff)
+cmmExprAlignment _ = mkAlignment 1
+--------
+--- Negation for conditional branches
+
+maybeInvertCmmExpr :: CmmExpr -> Maybe CmmExpr
+maybeInvertCmmExpr (CmmMachOp op args) = do op' <- maybeInvertComparison op
+ return (CmmMachOp op' args)
+maybeInvertCmmExpr _ = Nothing
+
+-----------------------------------------------------------------------------
+-- Local registers
+-----------------------------------------------------------------------------
+
+data LocalReg
+ = LocalReg {-# UNPACK #-} !Unique CmmType
+ -- ^ Parameters:
+ -- 1. Identifier
+ -- 2. Type
+
+instance Eq LocalReg where
+ (LocalReg u1 _) == (LocalReg u2 _) = u1 == u2
+
+-- This is non-deterministic but we do not currently support deterministic
+-- code-generation. See Note [Unique Determinism and code generation]
+-- See Note [No Ord for Unique]
+instance Ord LocalReg where
+ compare (LocalReg u1 _) (LocalReg u2 _) = nonDetCmpUnique u1 u2
+
+instance Uniquable LocalReg where
+ getUnique (LocalReg uniq _) = uniq
+
+cmmRegType :: DynFlags -> CmmReg -> CmmType
+cmmRegType _ (CmmLocal reg) = localRegType reg
+cmmRegType dflags (CmmGlobal reg) = globalRegType dflags reg
+
+cmmRegWidth :: DynFlags -> CmmReg -> Width
+cmmRegWidth dflags = typeWidth . cmmRegType dflags
+
+localRegType :: LocalReg -> CmmType
+localRegType (LocalReg _ rep) = rep
+
+-----------------------------------------------------------------------------
+-- Register-use information for expressions and other types
+-----------------------------------------------------------------------------
+
+-- | Sets of registers
+
+-- These are used for dataflow facts, and a common operation is taking
+-- the union of two RegSets and then asking whether the union is the
+-- same as one of the inputs. UniqSet isn't good here, because
+-- sizeUniqSet is O(n) whereas Set.size is O(1), so we use ordinary
+-- Sets.
+
+type RegSet r = Set r
+type LocalRegSet = RegSet LocalReg
+type GlobalRegSet = RegSet GlobalReg
+
+emptyRegSet :: RegSet r
+nullRegSet :: RegSet r -> Bool
+elemRegSet :: Ord r => r -> RegSet r -> Bool
+extendRegSet :: Ord r => RegSet r -> r -> RegSet r
+deleteFromRegSet :: Ord r => RegSet r -> r -> RegSet r
+mkRegSet :: Ord r => [r] -> RegSet r
+minusRegSet, plusRegSet, timesRegSet :: Ord r => RegSet r -> RegSet r -> RegSet r
+sizeRegSet :: RegSet r -> Int
+regSetToList :: RegSet r -> [r]
+
+emptyRegSet = Set.empty
+nullRegSet = Set.null
+elemRegSet = Set.member
+extendRegSet = flip Set.insert
+deleteFromRegSet = flip Set.delete
+mkRegSet = Set.fromList
+minusRegSet = Set.difference
+plusRegSet = Set.union
+timesRegSet = Set.intersection
+sizeRegSet = Set.size
+regSetToList = Set.toList
+
+class Ord r => UserOfRegs r a where
+ foldRegsUsed :: DynFlags -> (b -> r -> b) -> b -> a -> b
+
+foldLocalRegsUsed :: UserOfRegs LocalReg a
+ => DynFlags -> (b -> LocalReg -> b) -> b -> a -> b
+foldLocalRegsUsed = foldRegsUsed
+
+class Ord r => DefinerOfRegs r a where
+ foldRegsDefd :: DynFlags -> (b -> r -> b) -> b -> a -> b
+
+foldLocalRegsDefd :: DefinerOfRegs LocalReg a
+ => DynFlags -> (b -> LocalReg -> b) -> b -> a -> b
+foldLocalRegsDefd = foldRegsDefd
+
+instance UserOfRegs LocalReg CmmReg where
+ foldRegsUsed _ f z (CmmLocal reg) = f z reg
+ foldRegsUsed _ _ z (CmmGlobal _) = z
+
+instance DefinerOfRegs LocalReg CmmReg where
+ foldRegsDefd _ f z (CmmLocal reg) = f z reg
+ foldRegsDefd _ _ z (CmmGlobal _) = z
+
+instance UserOfRegs GlobalReg CmmReg where
+ foldRegsUsed _ _ z (CmmLocal _) = z
+ foldRegsUsed _ f z (CmmGlobal reg) = f z reg
+
+instance DefinerOfRegs GlobalReg CmmReg where
+ foldRegsDefd _ _ z (CmmLocal _) = z
+ foldRegsDefd _ f z (CmmGlobal reg) = f z reg
+
+instance Ord r => UserOfRegs r r where
+ foldRegsUsed _ f z r = f z r
+
+instance Ord r => DefinerOfRegs r r where
+ foldRegsDefd _ f z r = f z r
+
+instance (Ord r, UserOfRegs r CmmReg) => UserOfRegs r CmmExpr where
+ -- The (Ord r) in the context is necessary here
+ -- See Note [Recursive superclasses] in TcInstDcls
+ foldRegsUsed dflags f !z e = expr z e
+ where expr z (CmmLit _) = z
+ expr z (CmmLoad addr _) = foldRegsUsed dflags f z addr
+ expr z (CmmReg r) = foldRegsUsed dflags f z r
+ expr z (CmmMachOp _ exprs) = foldRegsUsed dflags f z exprs
+ expr z (CmmRegOff r _) = foldRegsUsed dflags f z r
+ expr z (CmmStackSlot _ _) = z
+
+instance UserOfRegs r a => UserOfRegs r [a] where
+ foldRegsUsed dflags f set as = foldl' (foldRegsUsed dflags f) set as
+ {-# INLINABLE foldRegsUsed #-}
+
+instance DefinerOfRegs r a => DefinerOfRegs r [a] where
+ foldRegsDefd dflags f set as = foldl' (foldRegsDefd dflags f) set as
+ {-# INLINABLE foldRegsDefd #-}
+
+-----------------------------------------------------------------------------
+-- Global STG registers
+-----------------------------------------------------------------------------
+
+data VGcPtr = VGcPtr | VNonGcPtr deriving( Eq, Show )
+
+-----------------------------------------------------------------------------
+-- Global STG registers
+-----------------------------------------------------------------------------
+{-
+Note [Overlapping global registers]
+
+The backend might not faithfully implement the abstraction of the STG
+machine with independent registers for different values of type
+GlobalReg. Specifically, certain pairs of registers (r1, r2) may
+overlap in the sense that a store to r1 invalidates the value in r2,
+and vice versa.
+
+Currently this occurs only on the x86_64 architecture where FloatReg n
+and DoubleReg n are assigned the same microarchitectural register, in
+order to allow functions to receive more Float# or Double# arguments
+in registers (as opposed to on the stack).
+
+There are no specific rules about which registers might overlap with
+which other registers, but presumably it's safe to assume that nothing
+will overlap with special registers like Sp or BaseReg.
+
+Use GHC.Cmm.Utils.regsOverlap to determine whether two GlobalRegs overlap
+on a particular platform. The instance Eq GlobalReg is syntactic
+equality of STG registers and does not take overlap into
+account. However it is still used in UserOfRegs/DefinerOfRegs and
+there are likely still bugs there, beware!
+-}
+
+data GlobalReg
+ -- Argument and return registers
+ = VanillaReg -- pointers, unboxed ints and chars
+ {-# UNPACK #-} !Int -- its number
+ VGcPtr
+
+ | FloatReg -- single-precision floating-point registers
+ {-# UNPACK #-} !Int -- its number
+
+ | DoubleReg -- double-precision floating-point registers
+ {-# UNPACK #-} !Int -- its number
+
+ | LongReg -- long int registers (64-bit, really)
+ {-# UNPACK #-} !Int -- its number
+
+ | XmmReg -- 128-bit SIMD vector register
+ {-# UNPACK #-} !Int -- its number
+
+ | YmmReg -- 256-bit SIMD vector register
+ {-# UNPACK #-} !Int -- its number
+
+ | ZmmReg -- 512-bit SIMD vector register
+ {-# UNPACK #-} !Int -- its number
+
+ -- STG registers
+ | Sp -- Stack ptr; points to last occupied stack location.
+ | SpLim -- Stack limit
+ | Hp -- Heap ptr; points to last occupied heap location.
+ | HpLim -- Heap limit register
+ | CCCS -- Current cost-centre stack
+ | CurrentTSO -- pointer to current thread's TSO
+ | CurrentNursery -- pointer to allocation area
+ | HpAlloc -- allocation count for heap check failure
+
+ -- We keep the address of some commonly-called
+ -- functions in the register table, to keep code
+ -- size down:
+ | EagerBlackholeInfo -- stg_EAGER_BLACKHOLE_info
+ | GCEnter1 -- stg_gc_enter_1
+ | GCFun -- stg_gc_fun
+
+ -- Base offset for the register table, used for accessing registers
+ -- which do not have real registers assigned to them. This register
+ -- will only appear after we have expanded GlobalReg into memory accesses
+ -- (where necessary) in the native code generator.
+ | BaseReg
+
+ -- The register used by the platform for the C stack pointer. This is
+ -- a break in the STG abstraction used exclusively to setup stack unwinding
+ -- information.
+ | MachSp
+
+ -- The is a dummy register used to indicate to the stack unwinder where
+ -- a routine would return to.
+ | UnwindReturnReg
+
+ -- Base Register for PIC (position-independent code) calculations
+ -- Only used inside the native code generator. It's exact meaning differs
+ -- from platform to platform (see module PositionIndependentCode).
+ | PicBaseReg
+
+ deriving( Show )
+
+instance Eq GlobalReg where
+ VanillaReg i _ == VanillaReg j _ = i==j -- Ignore type when seeking clashes
+ FloatReg i == FloatReg j = i==j
+ DoubleReg i == DoubleReg j = i==j
+ LongReg i == LongReg j = i==j
+ -- NOTE: XMM, YMM, ZMM registers actually are the same registers
+ -- at least with respect to store at YMM i and then read from XMM i
+ -- and similarly for ZMM etc.
+ XmmReg i == XmmReg j = i==j
+ YmmReg i == YmmReg j = i==j
+ ZmmReg i == ZmmReg j = i==j
+ Sp == Sp = True
+ SpLim == SpLim = True
+ Hp == Hp = True
+ HpLim == HpLim = True
+ CCCS == CCCS = True
+ CurrentTSO == CurrentTSO = True
+ CurrentNursery == CurrentNursery = True
+ HpAlloc == HpAlloc = True
+ EagerBlackholeInfo == EagerBlackholeInfo = True
+ GCEnter1 == GCEnter1 = True
+ GCFun == GCFun = True
+ BaseReg == BaseReg = True
+ MachSp == MachSp = True
+ UnwindReturnReg == UnwindReturnReg = True
+ PicBaseReg == PicBaseReg = True
+ _r1 == _r2 = False
+
+instance Ord GlobalReg where
+ compare (VanillaReg i _) (VanillaReg j _) = compare i j
+ -- Ignore type when seeking clashes
+ compare (FloatReg i) (FloatReg j) = compare i j
+ compare (DoubleReg i) (DoubleReg j) = compare i j
+ compare (LongReg i) (LongReg j) = compare i j
+ compare (XmmReg i) (XmmReg j) = compare i j
+ compare (YmmReg i) (YmmReg j) = compare i j
+ compare (ZmmReg i) (ZmmReg j) = compare i j
+ compare Sp Sp = EQ
+ compare SpLim SpLim = EQ
+ compare Hp Hp = EQ
+ compare HpLim HpLim = EQ
+ compare CCCS CCCS = EQ
+ compare CurrentTSO CurrentTSO = EQ
+ compare CurrentNursery CurrentNursery = EQ
+ compare HpAlloc HpAlloc = EQ
+ compare EagerBlackholeInfo EagerBlackholeInfo = EQ
+ compare GCEnter1 GCEnter1 = EQ
+ compare GCFun GCFun = EQ
+ compare BaseReg BaseReg = EQ
+ compare MachSp MachSp = EQ
+ compare UnwindReturnReg UnwindReturnReg = EQ
+ compare PicBaseReg PicBaseReg = EQ
+ compare (VanillaReg _ _) _ = LT
+ compare _ (VanillaReg _ _) = GT
+ compare (FloatReg _) _ = LT
+ compare _ (FloatReg _) = GT
+ compare (DoubleReg _) _ = LT
+ compare _ (DoubleReg _) = GT
+ compare (LongReg _) _ = LT
+ compare _ (LongReg _) = GT
+ compare (XmmReg _) _ = LT
+ compare _ (XmmReg _) = GT
+ compare (YmmReg _) _ = LT
+ compare _ (YmmReg _) = GT
+ compare (ZmmReg _) _ = LT
+ compare _ (ZmmReg _) = GT
+ compare Sp _ = LT
+ compare _ Sp = GT
+ compare SpLim _ = LT
+ compare _ SpLim = GT
+ compare Hp _ = LT
+ compare _ Hp = GT
+ compare HpLim _ = LT
+ compare _ HpLim = GT
+ compare CCCS _ = LT
+ compare _ CCCS = GT
+ compare CurrentTSO _ = LT
+ compare _ CurrentTSO = GT
+ compare CurrentNursery _ = LT
+ compare _ CurrentNursery = GT
+ compare HpAlloc _ = LT
+ compare _ HpAlloc = GT
+ compare GCEnter1 _ = LT
+ compare _ GCEnter1 = GT
+ compare GCFun _ = LT
+ compare _ GCFun = GT
+ compare BaseReg _ = LT
+ compare _ BaseReg = GT
+ compare MachSp _ = LT
+ compare _ MachSp = GT
+ compare UnwindReturnReg _ = LT
+ compare _ UnwindReturnReg = GT
+ compare EagerBlackholeInfo _ = LT
+ compare _ EagerBlackholeInfo = GT
+
+-- convenient aliases
+baseReg, spReg, hpReg, spLimReg, hpLimReg, nodeReg,
+ currentTSOReg, currentNurseryReg, hpAllocReg, cccsReg :: CmmReg
+baseReg = CmmGlobal BaseReg
+spReg = CmmGlobal Sp
+hpReg = CmmGlobal Hp
+hpLimReg = CmmGlobal HpLim
+spLimReg = CmmGlobal SpLim
+nodeReg = CmmGlobal node
+currentTSOReg = CmmGlobal CurrentTSO
+currentNurseryReg = CmmGlobal CurrentNursery
+hpAllocReg = CmmGlobal HpAlloc
+cccsReg = CmmGlobal CCCS
+
+node :: GlobalReg
+node = VanillaReg 1 VGcPtr
+
+globalRegType :: DynFlags -> GlobalReg -> CmmType
+globalRegType dflags (VanillaReg _ VGcPtr) = gcWord dflags
+globalRegType dflags (VanillaReg _ VNonGcPtr) = bWord dflags
+globalRegType _ (FloatReg _) = cmmFloat W32
+globalRegType _ (DoubleReg _) = cmmFloat W64
+globalRegType _ (LongReg _) = cmmBits W64
+-- TODO: improve the internal model of SIMD/vectorized registers
+-- the right design SHOULd improve handling of float and double code too.
+-- see remarks in "NOTE [SIMD Design for the future]"" in GHC.StgToCmm.Prim
+globalRegType _ (XmmReg _) = cmmVec 4 (cmmBits W32)
+globalRegType _ (YmmReg _) = cmmVec 8 (cmmBits W32)
+globalRegType _ (ZmmReg _) = cmmVec 16 (cmmBits W32)
+
+globalRegType dflags Hp = gcWord dflags
+ -- The initialiser for all
+ -- dynamically allocated closures
+globalRegType dflags _ = bWord dflags
+
+isArgReg :: GlobalReg -> Bool
+isArgReg (VanillaReg {}) = True
+isArgReg (FloatReg {}) = True
+isArgReg (DoubleReg {}) = True
+isArgReg (LongReg {}) = True
+isArgReg (XmmReg {}) = True
+isArgReg (YmmReg {}) = True
+isArgReg (ZmmReg {}) = True
+isArgReg _ = False
diff --git a/compiler/GHC/Cmm/Graph.hs b/compiler/GHC/Cmm/Graph.hs
new file mode 100644
index 0000000000..8d19e7fdb9
--- /dev/null
+++ b/compiler/GHC/Cmm/Graph.hs
@@ -0,0 +1,484 @@
+{-# LANGUAGE BangPatterns, GADTs #-}
+
+module GHC.Cmm.Graph
+ ( CmmAGraph, CmmAGraphScoped, CgStmt(..)
+ , (<*>), catAGraphs
+ , mkLabel, mkMiddle, mkLast, outOfLine
+ , lgraphOfAGraph, labelAGraph
+
+ , stackStubExpr
+ , mkNop, mkAssign, mkStore
+ , mkUnsafeCall, mkFinalCall, mkCallReturnsTo
+ , mkJumpReturnsTo
+ , mkJump, mkJumpExtra
+ , mkRawJump
+ , mkCbranch, mkSwitch
+ , mkReturn, mkComment, mkCallEntry, mkBranch
+ , mkUnwind
+ , copyInOflow, copyOutOflow
+ , noExtraStack
+ , toCall, Transfer(..)
+ )
+where
+
+import GhcPrelude hiding ( (<*>) ) -- avoid importing (<*>)
+
+import GHC.Cmm.BlockId
+import GHC.Cmm
+import GHC.Cmm.CallConv
+import GHC.Cmm.Switch (SwitchTargets)
+
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Graph
+import GHC.Cmm.Dataflow.Label
+import DynFlags
+import FastString
+import ForeignCall
+import OrdList
+import GHC.Runtime.Layout (ByteOff)
+import UniqSupply
+import Util
+import Panic
+
+
+-----------------------------------------------------------------------------
+-- Building Graphs
+
+
+-- | CmmAGraph is a chunk of code consisting of:
+--
+-- * ordinary statements (assignments, stores etc.)
+-- * jumps
+-- * labels
+-- * out-of-line labelled blocks
+--
+-- The semantics is that control falls through labels and out-of-line
+-- blocks. Everything after a jump up to the next label is by
+-- definition unreachable code, and will be discarded.
+--
+-- Two CmmAGraphs can be stuck together with <*>, with the meaning that
+-- control flows from the first to the second.
+--
+-- A 'CmmAGraph' can be turned into a 'CmmGraph' (closed at both ends)
+-- by providing a label for the entry point and a tick scope; see
+-- 'labelAGraph'.
+type CmmAGraph = OrdList CgStmt
+-- | Unlabeled graph with tick scope
+type CmmAGraphScoped = (CmmAGraph, CmmTickScope)
+
+data CgStmt
+ = CgLabel BlockId CmmTickScope
+ | CgStmt (CmmNode O O)
+ | CgLast (CmmNode O C)
+ | CgFork BlockId CmmAGraph CmmTickScope
+
+flattenCmmAGraph :: BlockId -> CmmAGraphScoped -> CmmGraph
+flattenCmmAGraph id (stmts_t, tscope) =
+ CmmGraph { g_entry = id,
+ g_graph = GMany NothingO body NothingO }
+ where
+ body = foldr addBlock emptyBody $ flatten id stmts_t tscope []
+
+ --
+ -- flatten: given an entry label and a CmmAGraph, make a list of blocks.
+ --
+ -- NB. avoid the quadratic-append trap by passing in the tail of the
+ -- list. This is important for Very Long Functions (e.g. in T783).
+ --
+ flatten :: Label -> CmmAGraph -> CmmTickScope -> [Block CmmNode C C]
+ -> [Block CmmNode C C]
+ flatten id g tscope blocks
+ = flatten1 (fromOL g) block' blocks
+ where !block' = blockJoinHead (CmmEntry id tscope) emptyBlock
+ --
+ -- flatten0: we are outside a block at this point: any code before
+ -- the first label is unreachable, so just drop it.
+ --
+ flatten0 :: [CgStmt] -> [Block CmmNode C C] -> [Block CmmNode C C]
+ flatten0 [] blocks = blocks
+
+ flatten0 (CgLabel id tscope : stmts) blocks
+ = flatten1 stmts block blocks
+ where !block = blockJoinHead (CmmEntry id tscope) emptyBlock
+
+ flatten0 (CgFork fork_id stmts_t tscope : rest) blocks
+ = flatten fork_id stmts_t tscope $ flatten0 rest blocks
+
+ flatten0 (CgLast _ : stmts) blocks = flatten0 stmts blocks
+ flatten0 (CgStmt _ : stmts) blocks = flatten0 stmts blocks
+
+ --
+ -- flatten1: we have a partial block, collect statements until the
+ -- next last node to make a block, then call flatten0 to get the rest
+ -- of the blocks
+ --
+ flatten1 :: [CgStmt] -> Block CmmNode C O
+ -> [Block CmmNode C C] -> [Block CmmNode C C]
+
+ -- The current block falls through to the end of a function or fork:
+ -- this code should not be reachable, but it may be referenced by
+ -- other code that is not reachable. We'll remove it later with
+ -- dead-code analysis, but for now we have to keep the graph
+ -- well-formed, so we terminate the block with a branch to the
+ -- beginning of the current block.
+ flatten1 [] block blocks
+ = blockJoinTail block (CmmBranch (entryLabel block)) : blocks
+
+ flatten1 (CgLast stmt : stmts) block blocks
+ = block' : flatten0 stmts blocks
+ where !block' = blockJoinTail block stmt
+
+ flatten1 (CgStmt stmt : stmts) block blocks
+ = flatten1 stmts block' blocks
+ where !block' = blockSnoc block stmt
+
+ flatten1 (CgFork fork_id stmts_t tscope : rest) block blocks
+ = flatten fork_id stmts_t tscope $ flatten1 rest block blocks
+
+ -- a label here means that we should start a new block, and the
+ -- current block should fall through to the new block.
+ flatten1 (CgLabel id tscp : stmts) block blocks
+ = blockJoinTail block (CmmBranch id) :
+ flatten1 stmts (blockJoinHead (CmmEntry id tscp) emptyBlock) blocks
+
+
+
+---------- AGraph manipulation
+
+(<*>) :: CmmAGraph -> CmmAGraph -> CmmAGraph
+(<*>) = appOL
+
+catAGraphs :: [CmmAGraph] -> CmmAGraph
+catAGraphs = concatOL
+
+-- | creates a sequence "goto id; id:" as an AGraph
+mkLabel :: BlockId -> CmmTickScope -> CmmAGraph
+mkLabel bid scp = unitOL (CgLabel bid scp)
+
+-- | creates an open AGraph from a given node
+mkMiddle :: CmmNode O O -> CmmAGraph
+mkMiddle middle = unitOL (CgStmt middle)
+
+-- | creates a closed AGraph from a given node
+mkLast :: CmmNode O C -> CmmAGraph
+mkLast last = unitOL (CgLast last)
+
+-- | A labelled code block; should end in a last node
+outOfLine :: BlockId -> CmmAGraphScoped -> CmmAGraph
+outOfLine l (c,s) = unitOL (CgFork l c s)
+
+-- | allocate a fresh label for the entry point
+lgraphOfAGraph :: CmmAGraphScoped -> UniqSM CmmGraph
+lgraphOfAGraph g = do
+ u <- getUniqueM
+ return (labelAGraph (mkBlockId u) g)
+
+-- | use the given BlockId as the label of the entry point
+labelAGraph :: BlockId -> CmmAGraphScoped -> CmmGraph
+labelAGraph lbl ag = flattenCmmAGraph lbl ag
+
+---------- No-ops
+mkNop :: CmmAGraph
+mkNop = nilOL
+
+mkComment :: FastString -> CmmAGraph
+mkComment fs
+ -- SDM: generating all those comments takes time, this saved about 4% for me
+ | debugIsOn = mkMiddle $ CmmComment fs
+ | otherwise = nilOL
+
+---------- Assignment and store
+mkAssign :: CmmReg -> CmmExpr -> CmmAGraph
+mkAssign l (CmmReg r) | l == r = mkNop
+mkAssign l r = mkMiddle $ CmmAssign l r
+
+mkStore :: CmmExpr -> CmmExpr -> CmmAGraph
+mkStore l r = mkMiddle $ CmmStore l r
+
+---------- Control transfer
+mkJump :: DynFlags -> Convention -> CmmExpr
+ -> [CmmExpr]
+ -> UpdFrameOffset
+ -> CmmAGraph
+mkJump dflags conv e actuals updfr_off =
+ lastWithArgs dflags Jump Old conv actuals updfr_off $
+ toCall e Nothing updfr_off 0
+
+-- | A jump where the caller says what the live GlobalRegs are. Used
+-- for low-level hand-written Cmm.
+mkRawJump :: DynFlags -> CmmExpr -> UpdFrameOffset -> [GlobalReg]
+ -> CmmAGraph
+mkRawJump dflags e updfr_off vols =
+ lastWithArgs dflags Jump Old NativeNodeCall [] updfr_off $
+ \arg_space _ -> toCall e Nothing updfr_off 0 arg_space vols
+
+
+mkJumpExtra :: DynFlags -> Convention -> CmmExpr -> [CmmExpr]
+ -> UpdFrameOffset -> [CmmExpr]
+ -> CmmAGraph
+mkJumpExtra dflags conv e actuals updfr_off extra_stack =
+ lastWithArgsAndExtraStack dflags Jump Old conv actuals updfr_off extra_stack $
+ toCall e Nothing updfr_off 0
+
+mkCbranch :: CmmExpr -> BlockId -> BlockId -> Maybe Bool -> CmmAGraph
+mkCbranch pred ifso ifnot likely =
+ mkLast (CmmCondBranch pred ifso ifnot likely)
+
+mkSwitch :: CmmExpr -> SwitchTargets -> CmmAGraph
+mkSwitch e tbl = mkLast $ CmmSwitch e tbl
+
+mkReturn :: DynFlags -> CmmExpr -> [CmmExpr] -> UpdFrameOffset
+ -> CmmAGraph
+mkReturn dflags e actuals updfr_off =
+ lastWithArgs dflags Ret Old NativeReturn actuals updfr_off $
+ toCall e Nothing updfr_off 0
+
+mkBranch :: BlockId -> CmmAGraph
+mkBranch bid = mkLast (CmmBranch bid)
+
+mkFinalCall :: DynFlags
+ -> CmmExpr -> CCallConv -> [CmmExpr] -> UpdFrameOffset
+ -> CmmAGraph
+mkFinalCall dflags f _ actuals updfr_off =
+ lastWithArgs dflags Call Old NativeDirectCall actuals updfr_off $
+ toCall f Nothing updfr_off 0
+
+mkCallReturnsTo :: DynFlags -> CmmExpr -> Convention -> [CmmExpr]
+ -> BlockId
+ -> ByteOff
+ -> UpdFrameOffset
+ -> [CmmExpr]
+ -> CmmAGraph
+mkCallReturnsTo dflags f callConv actuals ret_lbl ret_off updfr_off extra_stack = do
+ lastWithArgsAndExtraStack dflags Call (Young ret_lbl) callConv actuals
+ updfr_off extra_stack $
+ toCall f (Just ret_lbl) updfr_off ret_off
+
+-- Like mkCallReturnsTo, but does not push the return address (it is assumed to be
+-- already on the stack).
+mkJumpReturnsTo :: DynFlags -> CmmExpr -> Convention -> [CmmExpr]
+ -> BlockId
+ -> ByteOff
+ -> UpdFrameOffset
+ -> CmmAGraph
+mkJumpReturnsTo dflags f callConv actuals ret_lbl ret_off updfr_off = do
+ lastWithArgs dflags JumpRet (Young ret_lbl) callConv actuals updfr_off $
+ toCall f (Just ret_lbl) updfr_off ret_off
+
+mkUnsafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual] -> CmmAGraph
+mkUnsafeCall t fs as = mkMiddle $ CmmUnsafeForeignCall t fs as
+
+-- | Construct a 'CmmUnwind' node for the given register and unwinding
+-- expression.
+mkUnwind :: GlobalReg -> CmmExpr -> CmmAGraph
+mkUnwind r e = mkMiddle $ CmmUnwind [(r, Just e)]
+
+--------------------------------------------------------------------------
+
+
+
+
+-- Why are we inserting extra blocks that simply branch to the successors?
+-- Because in addition to the branch instruction, @mkBranch@ will insert
+-- a necessary adjustment to the stack pointer.
+
+
+-- For debugging purposes, we can stub out dead stack slots:
+stackStubExpr :: Width -> CmmExpr
+stackStubExpr w = CmmLit (CmmInt 0 w)
+
+-- When we copy in parameters, we usually want to put overflow
+-- parameters on the stack, but sometimes we want to pass the
+-- variables in their spill slots. Therefore, for copying arguments
+-- and results, we provide different functions to pass the arguments
+-- in an overflow area and to pass them in spill slots.
+copyInOflow :: DynFlags -> Convention -> Area
+ -> [CmmFormal]
+ -> [CmmFormal]
+ -> (Int, [GlobalReg], CmmAGraph)
+
+copyInOflow dflags conv area formals extra_stk
+ = (offset, gregs, catAGraphs $ map mkMiddle nodes)
+ where (offset, gregs, nodes) = copyIn dflags conv area formals extra_stk
+
+-- Return the number of bytes used for copying arguments, as well as the
+-- instructions to copy the arguments.
+copyIn :: DynFlags -> Convention -> Area
+ -> [CmmFormal]
+ -> [CmmFormal]
+ -> (ByteOff, [GlobalReg], [CmmNode O O])
+copyIn dflags conv area formals extra_stk
+ = (stk_size, [r | (_, RegisterParam r) <- args], map ci (stk_args ++ args))
+ where
+ -- See Note [Width of parameters]
+ ci (reg, RegisterParam r@(VanillaReg {})) =
+ let local = CmmLocal reg
+ global = CmmReg (CmmGlobal r)
+ width = cmmRegWidth dflags local
+ expr
+ | width == wordWidth dflags = global
+ | width < wordWidth dflags =
+ CmmMachOp (MO_XX_Conv (wordWidth dflags) width) [global]
+ | otherwise = panic "Parameter width greater than word width"
+
+ in CmmAssign local expr
+
+ -- Non VanillaRegs
+ ci (reg, RegisterParam r) =
+ CmmAssign (CmmLocal reg) (CmmReg (CmmGlobal r))
+
+ ci (reg, StackParam off)
+ | isBitsType $ localRegType reg
+ , typeWidth (localRegType reg) < wordWidth dflags =
+ let
+ stack_slot = (CmmLoad (CmmStackSlot area off) (cmmBits $ wordWidth dflags))
+ local = CmmLocal reg
+ width = cmmRegWidth dflags local
+ expr = CmmMachOp (MO_XX_Conv (wordWidth dflags) width) [stack_slot]
+ in CmmAssign local expr
+
+ | otherwise =
+ CmmAssign (CmmLocal reg) (CmmLoad (CmmStackSlot area off) ty)
+ where ty = localRegType reg
+
+ init_offset = widthInBytes (wordWidth dflags) -- infotable
+
+ (stk_off, stk_args) = assignStack dflags init_offset localRegType extra_stk
+
+ (stk_size, args) = assignArgumentsPos dflags stk_off conv
+ localRegType formals
+
+-- Factoring out the common parts of the copyout functions yielded something
+-- more complicated:
+
+data Transfer = Call | JumpRet | Jump | Ret deriving Eq
+
+copyOutOflow :: DynFlags -> Convention -> Transfer -> Area -> [CmmExpr]
+ -> UpdFrameOffset
+ -> [CmmExpr] -- extra stack args
+ -> (Int, [GlobalReg], CmmAGraph)
+
+-- Generate code to move the actual parameters into the locations
+-- required by the calling convention. This includes a store for the
+-- return address.
+--
+-- The argument layout function ignores the pointer to the info table,
+-- so we slot that in here. When copying-out to a young area, we set
+-- the info table for return and adjust the offsets of the other
+-- parameters. If this is a call instruction, we adjust the offsets
+-- of the other parameters.
+copyOutOflow dflags conv transfer area actuals updfr_off extra_stack_stuff
+ = (stk_size, regs, graph)
+ where
+ (regs, graph) = foldr co ([], mkNop) (setRA ++ args ++ stack_params)
+
+ -- See Note [Width of parameters]
+ co (v, RegisterParam r@(VanillaReg {})) (rs, ms) =
+ let width = cmmExprWidth dflags v
+ value
+ | width == wordWidth dflags = v
+ | width < wordWidth dflags =
+ CmmMachOp (MO_XX_Conv width (wordWidth dflags)) [v]
+ | otherwise = panic "Parameter width greater than word width"
+
+ in (r:rs, mkAssign (CmmGlobal r) value <*> ms)
+
+ -- Non VanillaRegs
+ co (v, RegisterParam r) (rs, ms) =
+ (r:rs, mkAssign (CmmGlobal r) v <*> ms)
+
+ -- See Note [Width of parameters]
+ co (v, StackParam off) (rs, ms)
+ = (rs, mkStore (CmmStackSlot area off) (value v) <*> ms)
+
+ width v = cmmExprWidth dflags v
+ value v
+ | isBitsType $ cmmExprType dflags v
+ , width v < wordWidth dflags =
+ CmmMachOp (MO_XX_Conv (width v) (wordWidth dflags)) [v]
+ | otherwise = v
+
+ (setRA, init_offset) =
+ case area of
+ Young id -> -- Generate a store instruction for
+ -- the return address if making a call
+ case transfer of
+ Call ->
+ ([(CmmLit (CmmBlock id), StackParam init_offset)],
+ widthInBytes (wordWidth dflags))
+ JumpRet ->
+ ([],
+ widthInBytes (wordWidth dflags))
+ _other ->
+ ([], 0)
+ Old -> ([], updfr_off)
+
+ (extra_stack_off, stack_params) =
+ assignStack dflags init_offset (cmmExprType dflags) extra_stack_stuff
+
+ args :: [(CmmExpr, ParamLocation)] -- The argument and where to put it
+ (stk_size, args) = assignArgumentsPos dflags extra_stack_off conv
+ (cmmExprType dflags) actuals
+
+
+-- Note [Width of parameters]
+--
+-- Consider passing a small (< word width) primitive like Int8# to a function.
+-- It's actually non-trivial to do this without extending/narrowing:
+-- * Global registers are considered to have native word width (i.e., 64-bits on
+-- x86-64), so CmmLint would complain if we assigned an 8-bit parameter to a
+-- global register.
+-- * Same problem exists with LLVM IR.
+-- * Lowering gets harder since on x86-32 not every register exposes its lower
+-- 8 bits (e.g., for %eax we can use %al, but there isn't a corresponding
+-- 8-bit register for %edi). So we would either need to extend/narrow anyway,
+-- or complicate the calling convention.
+-- * Passing a small integer in a stack slot, which has native word width,
+-- requires extending to word width when writing to the stack and narrowing
+-- when reading off the stack (see #16258).
+-- So instead, we always extend every parameter smaller than native word width
+-- in copyOutOflow and then truncate it back to the expected width in copyIn.
+-- Note that we do this in cmm using MO_XX_Conv to avoid requiring
+-- zero-/sign-extending - it's up to a backend to handle this in a most
+-- efficient way (e.g., a simple register move or a smaller size store).
+-- This convention (of ignoring the upper bits) is different from some C ABIs,
+-- e.g. all PowerPC ELF ABIs, that require sign or zero extending parameters.
+--
+-- There was some discussion about this on this PR:
+-- https://github.com/ghc-proposals/ghc-proposals/pull/74
+
+
+mkCallEntry :: DynFlags -> Convention -> [CmmFormal] -> [CmmFormal]
+ -> (Int, [GlobalReg], CmmAGraph)
+mkCallEntry dflags conv formals extra_stk
+ = copyInOflow dflags conv Old formals extra_stk
+
+lastWithArgs :: DynFlags -> Transfer -> Area -> Convention -> [CmmExpr]
+ -> UpdFrameOffset
+ -> (ByteOff -> [GlobalReg] -> CmmAGraph)
+ -> CmmAGraph
+lastWithArgs dflags transfer area conv actuals updfr_off last =
+ lastWithArgsAndExtraStack dflags transfer area conv actuals
+ updfr_off noExtraStack last
+
+lastWithArgsAndExtraStack :: DynFlags
+ -> Transfer -> Area -> Convention -> [CmmExpr]
+ -> UpdFrameOffset -> [CmmExpr]
+ -> (ByteOff -> [GlobalReg] -> CmmAGraph)
+ -> CmmAGraph
+lastWithArgsAndExtraStack dflags transfer area conv actuals updfr_off
+ extra_stack last =
+ copies <*> last outArgs regs
+ where
+ (outArgs, regs, copies) = copyOutOflow dflags conv transfer area actuals
+ updfr_off extra_stack
+
+
+noExtraStack :: [CmmExpr]
+noExtraStack = []
+
+toCall :: CmmExpr -> Maybe BlockId -> UpdFrameOffset -> ByteOff
+ -> ByteOff -> [GlobalReg]
+ -> CmmAGraph
+toCall e cont updfr_off res_space arg_space regs =
+ mkLast $ CmmCall e cont regs arg_space res_space updfr_off
diff --git a/compiler/GHC/Cmm/Info.hs b/compiler/GHC/Cmm/Info.hs
new file mode 100644
index 0000000000..a10db2b292
--- /dev/null
+++ b/compiler/GHC/Cmm/Info.hs
@@ -0,0 +1,593 @@
+{-# LANGUAGE CPP #-}
+module GHC.Cmm.Info (
+ mkEmptyContInfoTable,
+ cmmToRawCmm,
+ mkInfoTable,
+ srtEscape,
+
+ -- info table accessors
+ closureInfoPtr,
+ entryCode,
+ getConstrTag,
+ cmmGetClosureType,
+ infoTable,
+ infoTableConstrTag,
+ infoTableSrtBitmap,
+ infoTableClosureType,
+ infoTablePtrs,
+ infoTableNonPtrs,
+ funInfoTable,
+ funInfoArity,
+
+ -- info table sizes and offsets
+ stdInfoTableSizeW,
+ fixedInfoTableSizeW,
+ profInfoTableSizeW,
+ maxStdInfoTableSizeW,
+ maxRetInfoTableSizeW,
+ stdInfoTableSizeB,
+ conInfoTableSizeB,
+ stdSrtBitmapOffset,
+ stdClosureTypeOffset,
+ stdPtrsOffset, stdNonPtrsOffset,
+) where
+
+#include "HsVersions.h"
+
+import GhcPrelude
+
+import GHC.Cmm
+import GHC.Cmm.Utils
+import GHC.Cmm.CLabel
+import GHC.Runtime.Layout
+import GHC.Data.Bitmap
+import Stream (Stream)
+import qualified Stream
+import GHC.Cmm.Dataflow.Collections
+
+import GHC.Platform
+import Maybes
+import DynFlags
+import ErrUtils (withTimingSilent)
+import Panic
+import UniqSupply
+import MonadUtils
+import Util
+import Outputable
+
+import Data.ByteString (ByteString)
+import Data.Bits
+
+-- When we split at proc points, we need an empty info table.
+mkEmptyContInfoTable :: CLabel -> CmmInfoTable
+mkEmptyContInfoTable info_lbl
+ = CmmInfoTable { cit_lbl = info_lbl
+ , cit_rep = mkStackRep []
+ , cit_prof = NoProfilingInfo
+ , cit_srt = Nothing
+ , cit_clo = Nothing }
+
+cmmToRawCmm :: DynFlags -> Stream IO CmmGroup a
+ -> IO (Stream IO RawCmmGroup a)
+cmmToRawCmm dflags cmms
+ = do { uniqs <- mkSplitUniqSupply 'i'
+ ; let do_one :: UniqSupply -> [CmmDecl] -> IO (UniqSupply, [RawCmmDecl])
+ do_one uniqs cmm =
+ -- NB. strictness fixes a space leak. DO NOT REMOVE.
+ withTimingSilent dflags (text "Cmm -> Raw Cmm")
+ forceRes $
+ case initUs uniqs $ concatMapM (mkInfoTable dflags) cmm of
+ (b,uniqs') -> return (uniqs',b)
+ ; return (snd <$> Stream.mapAccumL_ do_one uniqs cmms)
+ }
+
+ where forceRes (uniqs, rawcmms) =
+ uniqs `seq` foldr (\decl r -> decl `seq` r) () rawcmms
+
+-- Make a concrete info table, represented as a list of CmmStatic
+-- (it can't be simply a list of Word, because the SRT field is
+-- represented by a label+offset expression).
+--
+-- With tablesNextToCode, the layout is
+-- <reversed variable part>
+-- <normal forward StgInfoTable, but without
+-- an entry point at the front>
+-- <code>
+--
+-- Without tablesNextToCode, the layout of an info table is
+-- <entry label>
+-- <normal forward rest of StgInfoTable>
+-- <forward variable part>
+--
+-- See includes/rts/storage/InfoTables.h
+--
+-- For return-points these are as follows
+--
+-- Tables next to code:
+--
+-- <srt slot>
+-- <standard info table>
+-- ret-addr --> <entry code (if any)>
+--
+-- Not tables-next-to-code:
+--
+-- ret-addr --> <ptr to entry code>
+-- <standard info table>
+-- <srt slot>
+--
+-- * The SRT slot is only there if there is SRT info to record
+
+mkInfoTable :: DynFlags -> CmmDecl -> UniqSM [RawCmmDecl]
+mkInfoTable _ (CmmData sec dat)
+ = return [CmmData sec dat]
+
+mkInfoTable dflags proc@(CmmProc infos entry_lbl live blocks)
+ --
+ -- in the non-tables-next-to-code case, procs can have at most a
+ -- single info table associated with the entry label of the proc.
+ --
+ | not (tablesNextToCode dflags)
+ = case topInfoTable proc of -- must be at most one
+ -- no info table
+ Nothing ->
+ return [CmmProc mapEmpty entry_lbl live blocks]
+
+ Just info@CmmInfoTable { cit_lbl = info_lbl } -> do
+ (top_decls, (std_info, extra_bits)) <-
+ mkInfoTableContents dflags info Nothing
+ let
+ rel_std_info = map (makeRelativeRefTo dflags info_lbl) std_info
+ rel_extra_bits = map (makeRelativeRefTo dflags info_lbl) extra_bits
+ --
+ -- Separately emit info table (with the function entry
+ -- point as first entry) and the entry code
+ --
+ return (top_decls ++
+ [CmmProc mapEmpty entry_lbl live blocks,
+ mkRODataLits info_lbl
+ (CmmLabel entry_lbl : rel_std_info ++ rel_extra_bits)])
+
+ --
+ -- With tables-next-to-code, we can have many info tables,
+ -- associated with some of the BlockIds of the proc. For each info
+ -- table we need to turn it into CmmStatics, and collect any new
+ -- CmmDecls that arise from doing so.
+ --
+ | otherwise
+ = do
+ (top_declss, raw_infos) <-
+ unzip `fmap` mapM do_one_info (mapToList (info_tbls infos))
+ return (concat top_declss ++
+ [CmmProc (mapFromList raw_infos) entry_lbl live blocks])
+
+ where
+ do_one_info (lbl,itbl) = do
+ (top_decls, (std_info, extra_bits)) <-
+ mkInfoTableContents dflags itbl Nothing
+ let
+ info_lbl = cit_lbl itbl
+ rel_std_info = map (makeRelativeRefTo dflags info_lbl) std_info
+ rel_extra_bits = map (makeRelativeRefTo dflags info_lbl) extra_bits
+ --
+ return (top_decls, (lbl, Statics info_lbl $ map CmmStaticLit $
+ reverse rel_extra_bits ++ rel_std_info))
+
+-----------------------------------------------------
+type InfoTableContents = ( [CmmLit] -- The standard part
+ , [CmmLit] ) -- The "extra bits"
+-- These Lits have *not* had mkRelativeTo applied to them
+
+mkInfoTableContents :: DynFlags
+ -> CmmInfoTable
+ -> Maybe Int -- Override default RTS type tag?
+ -> UniqSM ([RawCmmDecl], -- Auxiliary top decls
+ InfoTableContents) -- Info tbl + extra bits
+
+mkInfoTableContents dflags
+ info@(CmmInfoTable { cit_lbl = info_lbl
+ , cit_rep = smrep
+ , cit_prof = prof
+ , cit_srt = srt })
+ mb_rts_tag
+ | RTSRep rts_tag rep <- smrep
+ = mkInfoTableContents dflags info{cit_rep = rep} (Just rts_tag)
+ -- Completely override the rts_tag that mkInfoTableContents would
+ -- otherwise compute, with the rts_tag stored in the RTSRep
+ -- (which in turn came from a handwritten .cmm file)
+
+ | StackRep frame <- smrep
+ = do { (prof_lits, prof_data) <- mkProfLits dflags prof
+ ; let (srt_label, srt_bitmap) = mkSRTLit dflags info_lbl srt
+ ; (liveness_lit, liveness_data) <- mkLivenessBits dflags frame
+ ; let
+ std_info = mkStdInfoTable dflags prof_lits rts_tag srt_bitmap liveness_lit
+ rts_tag | Just tag <- mb_rts_tag = tag
+ | null liveness_data = rET_SMALL -- Fits in extra_bits
+ | otherwise = rET_BIG -- Does not; extra_bits is
+ -- a label
+ ; return (prof_data ++ liveness_data, (std_info, srt_label)) }
+
+ | HeapRep _ ptrs nonptrs closure_type <- smrep
+ = do { let layout = packIntsCLit dflags ptrs nonptrs
+ ; (prof_lits, prof_data) <- mkProfLits dflags prof
+ ; let (srt_label, srt_bitmap) = mkSRTLit dflags info_lbl srt
+ ; (mb_srt_field, mb_layout, extra_bits, ct_data)
+ <- mk_pieces closure_type srt_label
+ ; let std_info = mkStdInfoTable dflags prof_lits
+ (mb_rts_tag `orElse` rtsClosureType smrep)
+ (mb_srt_field `orElse` srt_bitmap)
+ (mb_layout `orElse` layout)
+ ; return (prof_data ++ ct_data, (std_info, extra_bits)) }
+ where
+ mk_pieces :: ClosureTypeInfo -> [CmmLit]
+ -> UniqSM ( Maybe CmmLit -- Override the SRT field with this
+ , Maybe CmmLit -- Override the layout field with this
+ , [CmmLit] -- "Extra bits" for info table
+ , [RawCmmDecl]) -- Auxiliary data decls
+ mk_pieces (Constr con_tag con_descr) _no_srt -- A data constructor
+ = do { (descr_lit, decl) <- newStringLit con_descr
+ ; return ( Just (CmmInt (fromIntegral con_tag)
+ (halfWordWidth dflags))
+ , Nothing, [descr_lit], [decl]) }
+
+ mk_pieces Thunk srt_label
+ = return (Nothing, Nothing, srt_label, [])
+
+ mk_pieces (ThunkSelector offset) _no_srt
+ = return (Just (CmmInt 0 (halfWordWidth dflags)),
+ Just (mkWordCLit dflags (fromIntegral offset)), [], [])
+ -- Layout known (one free var); we use the layout field for offset
+
+ mk_pieces (Fun arity (ArgSpec fun_type)) srt_label
+ = do { let extra_bits = packIntsCLit dflags fun_type arity : srt_label
+ ; return (Nothing, Nothing, extra_bits, []) }
+
+ mk_pieces (Fun arity (ArgGen arg_bits)) srt_label
+ = do { (liveness_lit, liveness_data) <- mkLivenessBits dflags arg_bits
+ ; let fun_type | null liveness_data = aRG_GEN
+ | otherwise = aRG_GEN_BIG
+ extra_bits = [ packIntsCLit dflags fun_type arity ]
+ ++ (if inlineSRT dflags then [] else [ srt_lit ])
+ ++ [ liveness_lit, slow_entry ]
+ ; return (Nothing, Nothing, extra_bits, liveness_data) }
+ where
+ slow_entry = CmmLabel (toSlowEntryLbl info_lbl)
+ srt_lit = case srt_label of
+ [] -> mkIntCLit dflags 0
+ (lit:_rest) -> ASSERT( null _rest ) lit
+
+ mk_pieces other _ = pprPanic "mk_pieces" (ppr other)
+
+mkInfoTableContents _ _ _ = panic "mkInfoTableContents" -- NonInfoTable dealt with earlier
+
+packIntsCLit :: DynFlags -> Int -> Int -> CmmLit
+packIntsCLit dflags a b = packHalfWordsCLit dflags
+ (toStgHalfWord dflags (fromIntegral a))
+ (toStgHalfWord dflags (fromIntegral b))
+
+
+mkSRTLit :: DynFlags
+ -> CLabel
+ -> Maybe CLabel
+ -> ([CmmLit], -- srt_label, if any
+ CmmLit) -- srt_bitmap
+mkSRTLit dflags info_lbl (Just lbl)
+ | inlineSRT dflags
+ = ([], CmmLabelDiffOff lbl info_lbl 0 (halfWordWidth dflags))
+mkSRTLit dflags _ Nothing = ([], CmmInt 0 (halfWordWidth dflags))
+mkSRTLit dflags _ (Just lbl) = ([CmmLabel lbl], CmmInt 1 (halfWordWidth dflags))
+
+
+-- | Is the SRT offset field inline in the info table on this platform?
+--
+-- See the section "Referring to an SRT from the info table" in
+-- Note [SRTs] in GHC.Cmm.Info.Build
+inlineSRT :: DynFlags -> Bool
+inlineSRT dflags = platformArch (targetPlatform dflags) == ArchX86_64
+ && tablesNextToCode dflags
+
+-------------------------------------------------------------------------
+--
+-- Lay out the info table and handle relative offsets
+--
+-------------------------------------------------------------------------
+
+-- This function takes
+-- * the standard info table portion (StgInfoTable)
+-- * the "extra bits" (StgFunInfoExtraRev etc.)
+-- * the entry label
+-- * the code
+-- and lays them out in memory, producing a list of RawCmmDecl
+
+-------------------------------------------------------------------------
+--
+-- Position independent code
+--
+-------------------------------------------------------------------------
+-- In order to support position independent code, we mustn't put absolute
+-- references into read-only space. Info tables in the tablesNextToCode
+-- case must be in .text, which is read-only, so we doctor the CmmLits
+-- to use relative offsets instead.
+
+-- Note that this is done even when the -fPIC flag is not specified,
+-- as we want to keep binary compatibility between PIC and non-PIC.
+
+makeRelativeRefTo :: DynFlags -> CLabel -> CmmLit -> CmmLit
+
+makeRelativeRefTo dflags info_lbl (CmmLabel lbl)
+ | tablesNextToCode dflags
+ = CmmLabelDiffOff lbl info_lbl 0 (wordWidth dflags)
+makeRelativeRefTo dflags info_lbl (CmmLabelOff lbl off)
+ | tablesNextToCode dflags
+ = CmmLabelDiffOff lbl info_lbl off (wordWidth dflags)
+makeRelativeRefTo _ _ lit = lit
+
+
+-------------------------------------------------------------------------
+--
+-- Build a liveness mask for the stack layout
+--
+-------------------------------------------------------------------------
+
+-- There are four kinds of things on the stack:
+--
+-- - pointer variables (bound in the environment)
+-- - non-pointer variables (bound in the environment)
+-- - free slots (recorded in the stack free list)
+-- - non-pointer data slots (recorded in the stack free list)
+--
+-- The first two are represented with a 'Just' of a 'LocalReg'.
+-- The last two with one or more 'Nothing' constructors.
+-- Each 'Nothing' represents one used word.
+--
+-- The head of the stack layout is the top of the stack and
+-- the least-significant bit.
+
+mkLivenessBits :: DynFlags -> Liveness -> UniqSM (CmmLit, [RawCmmDecl])
+ -- ^ Returns:
+ -- 1. The bitmap (literal value or label)
+ -- 2. Large bitmap CmmData if needed
+
+mkLivenessBits dflags liveness
+ | n_bits > mAX_SMALL_BITMAP_SIZE dflags -- does not fit in one word
+ = do { uniq <- getUniqueM
+ ; let bitmap_lbl = mkBitmapLabel uniq
+ ; return (CmmLabel bitmap_lbl,
+ [mkRODataLits bitmap_lbl lits]) }
+
+ | otherwise -- Fits in one word
+ = return (mkStgWordCLit dflags bitmap_word, [])
+ where
+ n_bits = length liveness
+
+ bitmap :: Bitmap
+ bitmap = mkBitmap dflags liveness
+
+ small_bitmap = case bitmap of
+ [] -> toStgWord dflags 0
+ [b] -> b
+ _ -> panic "mkLiveness"
+ bitmap_word = toStgWord dflags (fromIntegral n_bits)
+ .|. (small_bitmap `shiftL` bITMAP_BITS_SHIFT dflags)
+
+ lits = mkWordCLit dflags (fromIntegral n_bits)
+ : map (mkStgWordCLit dflags) bitmap
+ -- The first word is the size. The structure must match
+ -- StgLargeBitmap in includes/rts/storage/InfoTable.h
+
+-------------------------------------------------------------------------
+--
+-- Generating a standard info table
+--
+-------------------------------------------------------------------------
+
+-- The standard bits of an info table. This part of the info table
+-- corresponds to the StgInfoTable type defined in
+-- includes/rts/storage/InfoTables.h.
+--
+-- Its shape varies with ticky/profiling/tables next to code etc
+-- so we can't use constant offsets from Constants
+
+mkStdInfoTable
+ :: DynFlags
+ -> (CmmLit,CmmLit) -- Closure type descr and closure descr (profiling)
+ -> Int -- Closure RTS tag
+ -> CmmLit -- SRT length
+ -> CmmLit -- layout field
+ -> [CmmLit]
+
+mkStdInfoTable dflags (type_descr, closure_descr) cl_type srt layout_lit
+ = -- Parallel revertible-black hole field
+ prof_info
+ -- Ticky info (none at present)
+ -- Debug info (none at present)
+ ++ [layout_lit, tag, srt]
+
+ where
+ prof_info
+ | gopt Opt_SccProfilingOn dflags = [type_descr, closure_descr]
+ | otherwise = []
+
+ tag = CmmInt (fromIntegral cl_type) (halfWordWidth dflags)
+
+-------------------------------------------------------------------------
+--
+-- Making string literals
+--
+-------------------------------------------------------------------------
+
+mkProfLits :: DynFlags -> ProfilingInfo -> UniqSM ((CmmLit,CmmLit), [RawCmmDecl])
+mkProfLits dflags NoProfilingInfo = return ((zeroCLit dflags, zeroCLit dflags), [])
+mkProfLits _ (ProfilingInfo td cd)
+ = do { (td_lit, td_decl) <- newStringLit td
+ ; (cd_lit, cd_decl) <- newStringLit cd
+ ; return ((td_lit,cd_lit), [td_decl,cd_decl]) }
+
+newStringLit :: ByteString -> UniqSM (CmmLit, GenCmmDecl CmmStatics info stmt)
+newStringLit bytes
+ = do { uniq <- getUniqueM
+ ; return (mkByteStringCLit (mkStringLitLabel uniq) bytes) }
+
+
+-- Misc utils
+
+-- | Value of the srt field of an info table when using an StgLargeSRT
+srtEscape :: DynFlags -> StgHalfWord
+srtEscape dflags = toStgHalfWord dflags (-1)
+
+-------------------------------------------------------------------------
+--
+-- Accessing fields of an info table
+--
+-------------------------------------------------------------------------
+
+-- | Wrap a 'CmmExpr' in an alignment check when @-falignment-sanitisation@ is
+-- enabled.
+wordAligned :: DynFlags -> CmmExpr -> CmmExpr
+wordAligned dflags e
+ | gopt Opt_AlignmentSanitisation dflags
+ = CmmMachOp (MO_AlignmentCheck (wORD_SIZE dflags) (wordWidth dflags)) [e]
+ | otherwise
+ = e
+
+closureInfoPtr :: DynFlags -> CmmExpr -> CmmExpr
+-- Takes a closure pointer and returns the info table pointer
+closureInfoPtr dflags e =
+ CmmLoad (wordAligned dflags e) (bWord dflags)
+
+entryCode :: DynFlags -> CmmExpr -> CmmExpr
+-- Takes an info pointer (the first word of a closure)
+-- and returns its entry code
+entryCode dflags e
+ | tablesNextToCode dflags = e
+ | otherwise = CmmLoad e (bWord dflags)
+
+getConstrTag :: DynFlags -> CmmExpr -> CmmExpr
+-- Takes a closure pointer, and return the *zero-indexed*
+-- constructor tag obtained from the info table
+-- This lives in the SRT field of the info table
+-- (constructors don't need SRTs).
+getConstrTag dflags closure_ptr
+ = CmmMachOp (MO_UU_Conv (halfWordWidth dflags) (wordWidth dflags)) [infoTableConstrTag dflags info_table]
+ where
+ info_table = infoTable dflags (closureInfoPtr dflags closure_ptr)
+
+cmmGetClosureType :: DynFlags -> CmmExpr -> CmmExpr
+-- Takes a closure pointer, and return the closure type
+-- obtained from the info table
+cmmGetClosureType dflags closure_ptr
+ = CmmMachOp (MO_UU_Conv (halfWordWidth dflags) (wordWidth dflags)) [infoTableClosureType dflags info_table]
+ where
+ info_table = infoTable dflags (closureInfoPtr dflags closure_ptr)
+
+infoTable :: DynFlags -> CmmExpr -> CmmExpr
+-- Takes an info pointer (the first word of a closure)
+-- and returns a pointer to the first word of the standard-form
+-- info table, excluding the entry-code word (if present)
+infoTable dflags info_ptr
+ | tablesNextToCode dflags = cmmOffsetB dflags info_ptr (- stdInfoTableSizeB dflags)
+ | otherwise = cmmOffsetW dflags info_ptr 1 -- Past the entry code pointer
+
+infoTableConstrTag :: DynFlags -> CmmExpr -> CmmExpr
+-- Takes an info table pointer (from infoTable) and returns the constr tag
+-- field of the info table (same as the srt_bitmap field)
+infoTableConstrTag = infoTableSrtBitmap
+
+infoTableSrtBitmap :: DynFlags -> CmmExpr -> CmmExpr
+-- Takes an info table pointer (from infoTable) and returns the srt_bitmap
+-- field of the info table
+infoTableSrtBitmap dflags info_tbl
+ = CmmLoad (cmmOffsetB dflags info_tbl (stdSrtBitmapOffset dflags)) (bHalfWord dflags)
+
+infoTableClosureType :: DynFlags -> CmmExpr -> CmmExpr
+-- Takes an info table pointer (from infoTable) and returns the closure type
+-- field of the info table.
+infoTableClosureType dflags info_tbl
+ = CmmLoad (cmmOffsetB dflags info_tbl (stdClosureTypeOffset dflags)) (bHalfWord dflags)
+
+infoTablePtrs :: DynFlags -> CmmExpr -> CmmExpr
+infoTablePtrs dflags info_tbl
+ = CmmLoad (cmmOffsetB dflags info_tbl (stdPtrsOffset dflags)) (bHalfWord dflags)
+
+infoTableNonPtrs :: DynFlags -> CmmExpr -> CmmExpr
+infoTableNonPtrs dflags info_tbl
+ = CmmLoad (cmmOffsetB dflags info_tbl (stdNonPtrsOffset dflags)) (bHalfWord dflags)
+
+funInfoTable :: DynFlags -> CmmExpr -> CmmExpr
+-- Takes the info pointer of a function,
+-- and returns a pointer to the first word of the StgFunInfoExtra struct
+-- in the info table.
+funInfoTable dflags info_ptr
+ | tablesNextToCode dflags
+ = cmmOffsetB dflags info_ptr (- stdInfoTableSizeB dflags - sIZEOF_StgFunInfoExtraRev dflags)
+ | otherwise
+ = cmmOffsetW dflags info_ptr (1 + stdInfoTableSizeW dflags)
+ -- Past the entry code pointer
+
+-- Takes the info pointer of a function, returns the function's arity
+funInfoArity :: DynFlags -> CmmExpr -> CmmExpr
+funInfoArity dflags iptr
+ = cmmToWord dflags (cmmLoadIndex dflags rep fun_info (offset `div` rep_bytes))
+ where
+ fun_info = funInfoTable dflags iptr
+ rep = cmmBits (widthFromBytes rep_bytes)
+
+ (rep_bytes, offset)
+ | tablesNextToCode dflags = ( pc_REP_StgFunInfoExtraRev_arity pc
+ , oFFSET_StgFunInfoExtraRev_arity dflags )
+ | otherwise = ( pc_REP_StgFunInfoExtraFwd_arity pc
+ , oFFSET_StgFunInfoExtraFwd_arity dflags )
+
+ pc = platformConstants dflags
+
+-----------------------------------------------------------------------------
+--
+-- Info table sizes & offsets
+--
+-----------------------------------------------------------------------------
+
+stdInfoTableSizeW :: DynFlags -> WordOff
+-- The size of a standard info table varies with profiling/ticky etc,
+-- so we can't get it from Constants
+-- It must vary in sync with mkStdInfoTable
+stdInfoTableSizeW dflags
+ = fixedInfoTableSizeW
+ + if gopt Opt_SccProfilingOn dflags
+ then profInfoTableSizeW
+ else 0
+
+fixedInfoTableSizeW :: WordOff
+fixedInfoTableSizeW = 2 -- layout, type
+
+profInfoTableSizeW :: WordOff
+profInfoTableSizeW = 2
+
+maxStdInfoTableSizeW :: WordOff
+maxStdInfoTableSizeW =
+ 1 {- entry, when !tablesNextToCode -}
+ + fixedInfoTableSizeW
+ + profInfoTableSizeW
+
+maxRetInfoTableSizeW :: WordOff
+maxRetInfoTableSizeW =
+ maxStdInfoTableSizeW
+ + 1 {- srt label -}
+
+stdInfoTableSizeB :: DynFlags -> ByteOff
+stdInfoTableSizeB dflags = stdInfoTableSizeW dflags * wORD_SIZE dflags
+
+stdSrtBitmapOffset :: DynFlags -> ByteOff
+-- Byte offset of the SRT bitmap half-word which is
+-- in the *higher-addressed* part of the type_lit
+stdSrtBitmapOffset dflags = stdInfoTableSizeB dflags - halfWordSize dflags
+
+stdClosureTypeOffset :: DynFlags -> ByteOff
+-- Byte offset of the closure type half-word
+stdClosureTypeOffset dflags = stdInfoTableSizeB dflags - wORD_SIZE dflags
+
+stdPtrsOffset, stdNonPtrsOffset :: DynFlags -> ByteOff
+stdPtrsOffset dflags = stdInfoTableSizeB dflags - 2 * wORD_SIZE dflags
+stdNonPtrsOffset dflags = stdInfoTableSizeB dflags - 2 * wORD_SIZE dflags + halfWordSize dflags
+
+conInfoTableSizeB :: DynFlags -> Int
+conInfoTableSizeB dflags = stdInfoTableSizeB dflags + wORD_SIZE dflags
diff --git a/compiler/GHC/Cmm/Info/Build.hs b/compiler/GHC/Cmm/Info/Build.hs
new file mode 100644
index 0000000000..1ba79befcd
--- /dev/null
+++ b/compiler/GHC/Cmm/Info/Build.hs
@@ -0,0 +1,892 @@
+{-# LANGUAGE GADTs, BangPatterns, RecordWildCards,
+ GeneralizedNewtypeDeriving, NondecreasingIndentation, TupleSections #-}
+
+module GHC.Cmm.Info.Build
+ ( CAFSet, CAFEnv, cafAnal
+ , doSRTs, ModuleSRTInfo, emptySRT
+ ) where
+
+import GhcPrelude hiding (succ)
+
+import Id
+import GHC.Cmm.BlockId
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Graph
+import GHC.Cmm.Dataflow.Label
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow
+import Module
+import GHC.Platform
+import Digraph
+import GHC.Cmm.CLabel
+import GHC.Cmm
+import GHC.Cmm.Utils
+import DynFlags
+import Maybes
+import Outputable
+import GHC.Runtime.Layout
+import UniqSupply
+import CostCentre
+import GHC.StgToCmm.Heap
+
+import Control.Monad
+import Data.Map (Map)
+import qualified Data.Map as Map
+import Data.Set (Set)
+import qualified Data.Set as Set
+import Data.Tuple
+import Control.Monad.Trans.State
+import Control.Monad.Trans.Class
+
+
+{- Note [SRTs]
+
+SRTs are the mechanism by which the garbage collector can determine
+the live CAFs in the program.
+
+Representation
+^^^^^^^^^^^^^^
+
++------+
+| info |
+| | +-----+---+---+---+
+| -------->|SRT_2| | | | | 0 |
+|------| +-----+-|-+-|-+---+
+| | | |
+| code | | |
+| | v v
+
+An SRT is simply an object in the program's data segment. It has the
+same representation as a static constructor. There are 16
+pre-compiled SRT info tables: stg_SRT_1_info, .. stg_SRT_16_info,
+representing SRT objects with 1-16 pointers, respectively.
+
+The entries of an SRT object point to static closures, which are either
+- FUN_STATIC, THUNK_STATIC or CONSTR
+- Another SRT (actually just a CONSTR)
+
+The final field of the SRT is the static link field, used by the
+garbage collector to chain together static closures that it visits and
+to determine whether a static closure has been visited or not. (see
+Note [STATIC_LINK fields])
+
+By traversing the transitive closure of an SRT, the GC will reach all
+of the CAFs that are reachable from the code associated with this SRT.
+
+If we need to create an SRT with more than 16 entries, we build a
+chain of SRT objects with all but the last having 16 entries.
+
++-----+---+- -+---+---+
+|SRT16| | | | | | 0 |
++-----+-|-+- -+-|-+---+
+ | |
+ v v
+ +----+---+---+---+
+ |SRT2| | | | | 0 |
+ +----+-|-+-|-+---+
+ | |
+ | |
+ v v
+
+Referring to an SRT from the info table
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The following things have SRTs:
+
+- Static functions (FUN)
+- Static thunks (THUNK), ie. CAFs
+- Continuations (RET_SMALL, etc.)
+
+In each case, the info table points to the SRT.
+
+- info->srt is zero if there's no SRT, otherwise:
+- info->srt == 1 and info->f.srt_offset points to the SRT
+
+e.g. for a FUN with an SRT:
+
+StgFunInfoTable +------+
+ info->f.srt_offset | ------------> offset to SRT object
+StgStdInfoTable +------+
+ info->layout.ptrs | ... |
+ info->layout.nptrs | ... |
+ info->srt | 1 |
+ info->type | ... |
+ |------|
+
+On x86_64, we optimise the info table representation further. The
+offset to the SRT can be stored in 32 bits (all code lives within a
+2GB region in x86_64's small memory model), so we can save a word in
+the info table by storing the srt_offset in the srt field, which is
+half a word.
+
+On x86_64 with TABLES_NEXT_TO_CODE (except on MachO, due to #15169):
+
+- info->srt is zero if there's no SRT, otherwise:
+- info->srt is an offset from the info pointer to the SRT object
+
+StgStdInfoTable +------+
+ info->layout.ptrs | |
+ info->layout.nptrs | |
+ info->srt | ------------> offset to SRT object
+ |------|
+
+
+EXAMPLE
+^^^^^^^
+
+f = \x. ... g ...
+ where
+ g = \y. ... h ... c1 ...
+ h = \z. ... c2 ...
+
+c1 & c2 are CAFs
+
+g and h are local functions, but they have no static closures. When
+we generate code for f, we start with a CmmGroup of four CmmDecls:
+
+ [ f_closure, f_entry, g_entry, h_entry ]
+
+we process each CmmDecl separately in cpsTop, giving us a list of
+CmmDecls. e.g. for f_entry, we might end up with
+
+ [ f_entry, f1_ret, f2_proc ]
+
+where f1_ret is a return point, and f2_proc is a proc-point. We have
+a CAFSet for each of these CmmDecls, let's suppose they are
+
+ [ f_entry{g_info}, f1_ret{g_info}, f2_proc{} ]
+ [ g_entry{h_info, c1_closure} ]
+ [ h_entry{c2_closure} ]
+
+Next, we make an SRT for each of these functions:
+
+ f_srt : [g_info]
+ g_srt : [h_info, c1_closure]
+ h_srt : [c2_closure]
+
+Now, for g_info and h_info, we want to refer to the SRTs for g and h
+respectively, which we'll label g_srt and h_srt:
+
+ f_srt : [g_srt]
+ g_srt : [h_srt, c1_closure]
+ h_srt : [c2_closure]
+
+Now, when an SRT has a single entry, we don't actually generate an SRT
+closure for it, instead we just replace references to it with its
+single element. So, since h_srt == c2_closure, we have
+
+ f_srt : [g_srt]
+ g_srt : [c2_closure, c1_closure]
+ h_srt : [c2_closure]
+
+and the only SRT closure we generate is
+
+ g_srt = SRT_2 [c2_closure, c1_closure]
+
+
+Optimisations
+^^^^^^^^^^^^^
+
+To reduce the code size overhead and the cost of traversing SRTs in
+the GC, we want to simplify SRTs where possible. We therefore apply
+the following optimisations. Each has a [keyword]; search for the
+keyword in the code below to see where the optimisation is
+implemented.
+
+1. [Inline] we never create an SRT with a single entry, instead we
+ point to the single entry directly from the info table.
+
+ i.e. instead of
+
+ +------+
+ | info |
+ | | +-----+---+---+
+ | -------->|SRT_1| | | 0 |
+ |------| +-----+-|-+---+
+ | | |
+ | code | |
+ | | v
+ C
+
+ we can point directly to the closure:
+
+ +------+
+ | info |
+ | |
+ | -------->C
+ |------|
+ | |
+ | code |
+ | |
+
+
+ Furthermore, the SRT for any code that refers to this info table
+ can point directly to C.
+
+ The exception to this is when we're doing dynamic linking. In that
+ case, if the closure is not locally defined then we can't point to
+ it directly from the info table, because this is the text section
+ which cannot contain runtime relocations. In this case we skip this
+ optimisation and generate the singleton SRT, because SRTs are in the
+ data section and *can* have relocatable references.
+
+2. [FUN] A static function closure can also be an SRT, we simply put
+ the SRT entries as fields in the static closure. This makes a lot
+ of sense: the static references are just like the free variables of
+ the FUN closure.
+
+ i.e. instead of
+
+ f_closure:
+ +-----+---+
+ | | | 0 |
+ +- |--+---+
+ | +------+
+ | | info | f_srt:
+ | | | +-----+---+---+---+
+ | | -------->|SRT_2| | | | + 0 |
+ `----------->|------| +-----+-|-+-|-+---+
+ | | | |
+ | code | | |
+ | | v v
+
+
+ We can generate:
+
+ f_closure:
+ +-----+---+---+---+
+ | | | | | | | 0 |
+ +- |--+-|-+-|-+---+
+ | | | +------+
+ | v v | info |
+ | | |
+ | | 0 |
+ `----------->|------|
+ | |
+ | code |
+ | |
+
+
+ (note: we can't do this for THUNKs, because the thunk gets
+ overwritten when it is entered, so we wouldn't be able to share
+ this SRT with other info tables that want to refer to it (see
+ [Common] below). FUNs are immutable so don't have this problem.)
+
+3. [Common] Identical SRTs can be commoned up.
+
+4. [Filter] If an SRT A refers to an SRT B and a closure C, and B also
+ refers to C (perhaps transitively), then we can omit the reference
+ to C from A.
+
+
+Note that there are many other optimisations that we could do, but
+aren't implemented. In general, we could omit any reference from an
+SRT if everything reachable from it is also reachable from the other
+fields in the SRT. Our [Filter] optimisation is a special case of
+this.
+
+Another opportunity we don't exploit is this:
+
+A = {X,Y,Z}
+B = {Y,Z}
+C = {X,B}
+
+Here we could use C = {A} and therefore [Inline] C = A.
+-}
+
+-- ---------------------------------------------------------------------
+{- Note [Invalid optimisation: shortcutting]
+
+You might think that if we have something like
+
+A's SRT = {B}
+B's SRT = {X}
+
+that we could replace the reference to B in A's SRT with X.
+
+A's SRT = {X}
+B's SRT = {X}
+
+and thereby perhaps save a little work at runtime, because we don't
+have to visit B.
+
+But this is NOT valid.
+
+Consider these cases:
+
+0. B can't be a constructor, because constructors don't have SRTs
+
+1. B is a CAF. This is the easy one. Obviously we want A's SRT to
+ point to B, so that it keeps B alive.
+
+2. B is a function. This is the tricky one. The reason we can't
+shortcut in this case is that we aren't allowed to resurrect static
+objects.
+
+== How does this cause a problem? ==
+
+The particular case that cropped up when we tried this was #15544.
+- A is a thunk
+- B is a static function
+- X is a CAF
+- suppose we GC when A is alive, and B is not otherwise reachable.
+- B is "collected", meaning that it doesn't make it onto the static
+ objects list during this GC, but nothing bad happens yet.
+- Next, suppose we enter A, and then call B. (remember that A refers to B)
+ At the entry point to B, we GC. This puts B on the stack, as part of the
+ RET_FUN stack frame that gets pushed when we GC at a function entry point.
+- This GC will now reach B
+- But because B was previous "collected", it breaks the assumption
+ that static objects are never resurrected. See Note [STATIC_LINK
+ fields] in rts/sm/Storage.h for why this is bad.
+- In practice, the GC thinks that B has already been visited, and so
+ doesn't visit X, and catastrophe ensues.
+
+== Isn't this caused by the RET_FUN business? ==
+
+Maybe, but could you prove that RET_FUN is the only way that
+resurrection can occur?
+
+So, no shortcutting.
+-}
+
+-- ---------------------------------------------------------------------
+-- Label types
+
+-- Labels that come from cafAnal can be:
+-- - _closure labels for static functions or CAFs
+-- - _info labels for dynamic functions, thunks, or continuations
+-- - _entry labels for functions or thunks
+--
+-- Meanwhile the labels on top-level blocks are _entry labels.
+--
+-- To put everything in the same namespace we convert all labels to
+-- closure labels using toClosureLbl. Note that some of these
+-- labels will not actually exist; that's ok because we're going to
+-- map them to SRTEntry later, which ranges over labels that do exist.
+--
+newtype CAFLabel = CAFLabel CLabel
+ deriving (Eq,Ord,Outputable)
+
+type CAFSet = Set CAFLabel
+type CAFEnv = LabelMap CAFSet
+
+mkCAFLabel :: CLabel -> CAFLabel
+mkCAFLabel lbl = CAFLabel (toClosureLbl lbl)
+
+-- This is a label that we can put in an SRT. It *must* be a closure label,
+-- pointing to either a FUN_STATIC, THUNK_STATIC, or CONSTR.
+newtype SRTEntry = SRTEntry CLabel
+ deriving (Eq, Ord, Outputable)
+
+-- ---------------------------------------------------------------------
+-- CAF analysis
+
+-- |
+-- For each code block:
+-- - collect the references reachable from this code block to FUN,
+-- THUNK or RET labels for which hasCAF == True
+--
+-- This gives us a `CAFEnv`: a mapping from code block to sets of labels
+--
+cafAnal
+ :: LabelSet -- The blocks representing continuations, ie. those
+ -- that will get RET info tables. These labels will
+ -- get their own SRTs, so we don't aggregate CAFs from
+ -- references to these labels, we just use the label.
+ -> CLabel -- The top label of the proc
+ -> CmmGraph
+ -> CAFEnv
+cafAnal contLbls topLbl cmmGraph =
+ analyzeCmmBwd cafLattice
+ (cafTransfers contLbls (g_entry cmmGraph) topLbl) cmmGraph mapEmpty
+
+
+cafLattice :: DataflowLattice CAFSet
+cafLattice = DataflowLattice Set.empty add
+ where
+ add (OldFact old) (NewFact new) =
+ let !new' = old `Set.union` new
+ in changedIf (Set.size new' > Set.size old) new'
+
+
+cafTransfers :: LabelSet -> Label -> CLabel -> TransferFun CAFSet
+cafTransfers contLbls entry topLbl
+ (BlockCC eNode middle xNode) fBase =
+ let joined = cafsInNode xNode $! live'
+ !result = foldNodesBwdOO cafsInNode middle joined
+
+ facts = mapMaybe successorFact (successors xNode)
+ live' = joinFacts cafLattice facts
+
+ successorFact s
+ -- If this is a loop back to the entry, we can refer to the
+ -- entry label.
+ | s == entry = Just (add topLbl Set.empty)
+ -- If this is a continuation, we want to refer to the
+ -- SRT for the continuation's info table
+ | s `setMember` contLbls
+ = Just (Set.singleton (mkCAFLabel (infoTblLbl s)))
+ -- Otherwise, takes the CAF references from the destination
+ | otherwise
+ = lookupFact s fBase
+
+ cafsInNode :: CmmNode e x -> CAFSet -> CAFSet
+ cafsInNode node set = foldExpDeep addCaf node set
+
+ addCaf expr !set =
+ case expr of
+ CmmLit (CmmLabel c) -> add c set
+ CmmLit (CmmLabelOff c _) -> add c set
+ CmmLit (CmmLabelDiffOff c1 c2 _ _) -> add c1 $! add c2 set
+ _ -> set
+ add l s | hasCAF l = Set.insert (mkCAFLabel l) s
+ | otherwise = s
+
+ in mapSingleton (entryLabel eNode) result
+
+
+-- -----------------------------------------------------------------------------
+-- ModuleSRTInfo
+
+data ModuleSRTInfo = ModuleSRTInfo
+ { thisModule :: Module
+ -- ^ Current module being compiled. Required for calling labelDynamic.
+ , dedupSRTs :: Map (Set SRTEntry) SRTEntry
+ -- ^ previous SRTs we've emitted, so we can de-duplicate.
+ -- Used to implement the [Common] optimisation.
+ , flatSRTs :: Map SRTEntry (Set SRTEntry)
+ -- ^ The reverse mapping, so that we can remove redundant
+ -- entries. e.g. if we have an SRT [a,b,c], and we know that b
+ -- points to [c,d], we can omit c and emit [a,b].
+ -- Used to implement the [Filter] optimisation.
+ }
+instance Outputable ModuleSRTInfo where
+ ppr ModuleSRTInfo{..} =
+ text "ModuleSRTInfo:" <+> ppr dedupSRTs <+> ppr flatSRTs
+
+emptySRT :: Module -> ModuleSRTInfo
+emptySRT mod =
+ ModuleSRTInfo
+ { thisModule = mod
+ , dedupSRTs = Map.empty
+ , flatSRTs = Map.empty }
+
+-- -----------------------------------------------------------------------------
+-- Constructing SRTs
+
+{- Implementation notes
+
+- In each CmmDecl there is a mapping info_tbls from Label -> CmmInfoTable
+
+- The entry in info_tbls corresponding to g_entry is the closure info
+ table, the rest are continuations.
+
+- Each entry in info_tbls possibly needs an SRT. We need to make a
+ label for each of these.
+
+- We get the CAFSet for each entry from the CAFEnv
+
+-}
+
+-- | Return a (Label,CLabel) pair for each labelled block of a CmmDecl,
+-- where the label is
+-- - the info label for a continuation or dynamic closure
+-- - the closure label for a top-level function (not a CAF)
+getLabelledBlocks :: CmmDecl -> [(Label, CAFLabel)]
+getLabelledBlocks (CmmData _ _) = []
+getLabelledBlocks (CmmProc top_info _ _ _) =
+ [ (blockId, mkCAFLabel (cit_lbl info))
+ | (blockId, info) <- mapToList (info_tbls top_info)
+ , let rep = cit_rep info
+ , not (isStaticRep rep) || not (isThunkRep rep)
+ ]
+
+
+-- | Put the labelled blocks that we will be annotating with SRTs into
+-- dependency order. This is so that we can process them one at a
+-- time, resolving references to earlier blocks to point to their
+-- SRTs. CAFs themselves are not included here; see getCAFs below.
+depAnalSRTs
+ :: CAFEnv
+ -> [CmmDecl]
+ -> [SCC (Label, CAFLabel, Set CAFLabel)]
+depAnalSRTs cafEnv decls =
+ srtTrace "depAnalSRTs" (ppr graph) graph
+ where
+ labelledBlocks = concatMap getLabelledBlocks decls
+ labelToBlock = Map.fromList (map swap labelledBlocks)
+ graph = stronglyConnCompFromEdgedVerticesOrd
+ [ let cafs' = Set.delete lbl cafs in
+ DigraphNode (l,lbl,cafs') l
+ (mapMaybe (flip Map.lookup labelToBlock) (Set.toList cafs'))
+ | (l, lbl) <- labelledBlocks
+ , Just cafs <- [mapLookup l cafEnv] ]
+
+
+-- | Get (Label, CAFLabel, Set CAFLabel) for each block that represents a CAF.
+-- These are treated differently from other labelled blocks:
+-- - we never shortcut a reference to a CAF to the contents of its
+-- SRT, since the point of SRTs is to keep CAFs alive.
+-- - CAFs therefore don't take part in the dependency analysis in depAnalSRTs.
+-- instead we generate their SRTs after everything else.
+getCAFs :: CAFEnv -> [CmmDecl] -> [(Label, CAFLabel, Set CAFLabel)]
+getCAFs cafEnv decls =
+ [ (g_entry g, mkCAFLabel topLbl, cafs)
+ | CmmProc top_info topLbl _ g <- decls
+ , Just info <- [mapLookup (g_entry g) (info_tbls top_info)]
+ , let rep = cit_rep info
+ , isStaticRep rep && isThunkRep rep
+ , Just cafs <- [mapLookup (g_entry g) cafEnv]
+ ]
+
+
+-- | Get the list of blocks that correspond to the entry points for
+-- FUN_STATIC closures. These are the blocks for which if we have an
+-- SRT we can merge it with the static closure. [FUN]
+getStaticFuns :: [CmmDecl] -> [(BlockId, CLabel)]
+getStaticFuns decls =
+ [ (g_entry g, lbl)
+ | CmmProc top_info _ _ g <- decls
+ , Just info <- [mapLookup (g_entry g) (info_tbls top_info)]
+ , Just (id, _) <- [cit_clo info]
+ , let rep = cit_rep info
+ , isStaticRep rep && isFunRep rep
+ , let lbl = mkLocalClosureLabel (idName id) (idCafInfo id)
+ ]
+
+
+-- | Maps labels from 'cafAnal' to the final CLabel that will appear
+-- in the SRT.
+-- - closures with singleton SRTs resolve to their single entry
+-- - closures with larger SRTs map to the label for that SRT
+-- - CAFs must not map to anything!
+-- - if a labels maps to Nothing, we found that this label's SRT
+-- is empty, so we don't need to refer to it from other SRTs.
+type SRTMap = Map CAFLabel (Maybe SRTEntry)
+
+-- | resolve a CAFLabel to its SRTEntry using the SRTMap
+resolveCAF :: SRTMap -> CAFLabel -> Maybe SRTEntry
+resolveCAF srtMap lbl@(CAFLabel l) =
+ Map.findWithDefault (Just (SRTEntry (toClosureLbl l))) lbl srtMap
+
+
+-- | Attach SRTs to all info tables in the CmmDecls, and add SRT
+-- declarations to the ModuleSRTInfo.
+--
+doSRTs
+ :: DynFlags
+ -> ModuleSRTInfo
+ -> [(CAFEnv, [CmmDecl])]
+ -> IO (ModuleSRTInfo, [CmmDecl])
+
+doSRTs dflags moduleSRTInfo tops = do
+ us <- mkSplitUniqSupply 'u'
+
+ -- Ignore the original grouping of decls, and combine all the
+ -- CAFEnvs into a single CAFEnv.
+ let (cafEnvs, declss) = unzip tops
+ cafEnv = mapUnions cafEnvs
+ decls = concat declss
+ staticFuns = mapFromList (getStaticFuns decls)
+
+ -- Put the decls in dependency order. Why? So that we can implement
+ -- [Inline] and [Filter]. If we need to refer to an SRT that has
+ -- a single entry, we use the entry itself, which means that we
+ -- don't need to generate the singleton SRT in the first place. But
+ -- to do this we need to process blocks before things that depend on
+ -- them.
+ let
+ sccs = depAnalSRTs cafEnv decls
+ cafsWithSRTs = getCAFs cafEnv decls
+
+ -- On each strongly-connected group of decls, construct the SRT
+ -- closures and the SRT fields for info tables.
+ let result ::
+ [ ( [CmmDecl] -- generated SRTs
+ , [(Label, CLabel)] -- SRT fields for info tables
+ , [(Label, [SRTEntry])] -- SRTs to attach to static functions
+ ) ]
+ ((result, _srtMap), moduleSRTInfo') =
+ initUs_ us $
+ flip runStateT moduleSRTInfo $
+ flip runStateT Map.empty $ do
+ nonCAFs <- mapM (doSCC dflags staticFuns) sccs
+ cAFs <- forM cafsWithSRTs $ \(l, cafLbl, cafs) ->
+ oneSRT dflags staticFuns [l] [cafLbl] True{-is a CAF-} cafs
+ return (nonCAFs ++ cAFs)
+
+ (declss, pairs, funSRTs) = unzip3 result
+
+ -- Next, update the info tables with the SRTs
+ let
+ srtFieldMap = mapFromList (concat pairs)
+ funSRTMap = mapFromList (concat funSRTs)
+ decls' = concatMap (updInfoSRTs dflags srtFieldMap funSRTMap) decls
+
+ return (moduleSRTInfo', concat declss ++ decls')
+
+
+-- | Build the SRT for a strongly-connected component of blocks
+doSCC
+ :: DynFlags
+ -> LabelMap CLabel -- which blocks are static function entry points
+ -> SCC (Label, CAFLabel, Set CAFLabel)
+ -> StateT SRTMap
+ (StateT ModuleSRTInfo UniqSM)
+ ( [CmmDecl] -- generated SRTs
+ , [(Label, CLabel)] -- SRT fields for info tables
+ , [(Label, [SRTEntry])] -- SRTs to attach to static functions
+ )
+
+doSCC dflags staticFuns (AcyclicSCC (l, cafLbl, cafs)) =
+ oneSRT dflags staticFuns [l] [cafLbl] False cafs
+
+doSCC dflags staticFuns (CyclicSCC nodes) = do
+ -- build a single SRT for the whole cycle, see Note [recursive SRTs]
+ let (blockids, lbls, cafsets) = unzip3 nodes
+ cafs = Set.unions cafsets
+ oneSRT dflags staticFuns blockids lbls False cafs
+
+
+{- Note [recursive SRTs]
+
+If the dependency analyser has found us a recursive group of
+declarations, then we build a single SRT for the whole group, on the
+grounds that everything in the group is reachable from everything
+else, so we lose nothing by having a single SRT.
+
+However, there are a couple of wrinkles to be aware of.
+
+* The Set CAFLabel for this SRT will contain labels in the group
+itself. The SRTMap will therefore not contain entries for these labels
+yet, so we can't turn them into SRTEntries using resolveCAF. BUT we
+can just remove recursive references from the Set CAFLabel before
+generating the SRT - the SRT will still contain all the CAFLabels that
+we need to refer to from this group's SRT.
+
+* That is, EXCEPT for static function closures. For the same reason
+described in Note [Invalid optimisation: shortcutting], we cannot omit
+references to static function closures.
+ - But, since we will merge the SRT with one of the static function
+ closures (see [FUN]), we can omit references to *that* static
+ function closure from the SRT.
+-}
+
+-- | Build an SRT for a set of blocks
+oneSRT
+ :: DynFlags
+ -> LabelMap CLabel -- which blocks are static function entry points
+ -> [Label] -- blocks in this set
+ -> [CAFLabel] -- labels for those blocks
+ -> Bool -- True <=> this SRT is for a CAF
+ -> Set CAFLabel -- SRT for this set
+ -> StateT SRTMap
+ (StateT ModuleSRTInfo UniqSM)
+ ( [CmmDecl] -- SRT objects we built
+ , [(Label, CLabel)] -- SRT fields for these blocks' itbls
+ , [(Label, [SRTEntry])] -- SRTs to attach to static functions
+ )
+
+oneSRT dflags staticFuns blockids lbls isCAF cafs = do
+ srtMap <- get
+ topSRT <- lift get
+ let
+ -- Can we merge this SRT with a FUN_STATIC closure?
+ (maybeFunClosure, otherFunLabels) =
+ case [ (l,b) | b <- blockids, Just l <- [mapLookup b staticFuns] ] of
+ [] -> (Nothing, [])
+ ((l,b):xs) -> (Just (l,b), map (mkCAFLabel . fst) xs)
+
+ -- Remove recursive references from the SRT, except for (all but
+ -- one of the) static functions. See Note [recursive SRTs].
+ nonRec = cafs `Set.difference`
+ (Set.fromList lbls `Set.difference` Set.fromList otherFunLabels)
+
+ -- First resolve all the CAFLabels to SRTEntries
+ -- Implements the [Inline] optimisation.
+ resolved = mapMaybe (resolveCAF srtMap) (Set.toList nonRec)
+
+ -- The set of all SRTEntries in SRTs that we refer to from here.
+ allBelow =
+ Set.unions [ lbls | caf <- resolved
+ , Just lbls <- [Map.lookup caf (flatSRTs topSRT)] ]
+
+ -- Remove SRTEntries that are also in an SRT that we refer to.
+ -- Implements the [Filter] optimisation.
+ filtered = Set.difference (Set.fromList resolved) allBelow
+
+ srtTrace "oneSRT:"
+ (ppr cafs <+> ppr resolved <+> ppr allBelow <+> ppr filtered) $ return ()
+
+ let
+ isStaticFun = isJust maybeFunClosure
+
+ -- For a label without a closure (e.g. a continuation), we must
+ -- update the SRTMap for the label to point to a closure. It's
+ -- important that we don't do this for static functions or CAFs,
+ -- see Note [Invalid optimisation: shortcutting].
+ updateSRTMap srtEntry =
+ when (not isCAF && (not isStaticFun || isNothing srtEntry)) $ do
+ let newSRTMap = Map.fromList [(cafLbl, srtEntry) | cafLbl <- lbls]
+ put (Map.union newSRTMap srtMap)
+
+ this_mod = thisModule topSRT
+
+ case Set.toList filtered of
+ [] -> do
+ srtTrace "oneSRT: empty" (ppr lbls) $ return ()
+ updateSRTMap Nothing
+ return ([], [], [])
+
+ -- [Inline] - when we have only one entry there is no need to
+ -- build an SRT object at all, instead we put the singleton SRT
+ -- entry in the info table.
+ [one@(SRTEntry lbl)]
+ | -- Info tables refer to SRTs by offset (as noted in the section
+ -- "Referring to an SRT from the info table" of Note [SRTs]). However,
+ -- when dynamic linking is used we cannot guarantee that the offset
+ -- between the SRT and the info table will fit in the offset field.
+ -- Consequently we build a singleton SRT in in this case.
+ not (labelDynamic dflags this_mod lbl)
+
+ -- MachO relocations can't express offsets between compilation units at
+ -- all, so we are always forced to build a singleton SRT in this case.
+ && (not (osMachOTarget $ platformOS $ targetPlatform dflags)
+ || isLocalCLabel this_mod lbl) -> do
+
+ -- If we have a static function closure, then it becomes the
+ -- SRT object, and everything else points to it. (the only way
+ -- we could have multiple labels here is if this is a
+ -- recursive group, see Note [recursive SRTs])
+ case maybeFunClosure of
+ Just (staticFunLbl,staticFunBlock) -> return ([], withLabels, [])
+ where
+ withLabels =
+ [ (b, if b == staticFunBlock then lbl else staticFunLbl)
+ | b <- blockids ]
+ Nothing -> do
+ updateSRTMap (Just one)
+ return ([], map (,lbl) blockids, [])
+
+ cafList ->
+ -- Check whether an SRT with the same entries has been emitted already.
+ -- Implements the [Common] optimisation.
+ case Map.lookup filtered (dedupSRTs topSRT) of
+ Just srtEntry@(SRTEntry srtLbl) -> do
+ srtTrace "oneSRT [Common]" (ppr lbls <+> ppr srtLbl) $ return ()
+ updateSRTMap (Just srtEntry)
+ return ([], map (,srtLbl) blockids, [])
+ Nothing -> do
+ -- No duplicates: we have to build a new SRT object
+ srtTrace "oneSRT: new" (ppr lbls <+> ppr filtered) $ return ()
+ (decls, funSRTs, srtEntry) <-
+ case maybeFunClosure of
+ Just (fun,block) ->
+ return ( [], [(block, cafList)], SRTEntry fun )
+ Nothing -> do
+ (decls, entry) <- lift . lift $ buildSRTChain dflags cafList
+ return (decls, [], entry)
+ updateSRTMap (Just srtEntry)
+ let allBelowThis = Set.union allBelow filtered
+ oldFlatSRTs = flatSRTs topSRT
+ newFlatSRTs = Map.insert srtEntry allBelowThis oldFlatSRTs
+ newDedupSRTs = Map.insert filtered srtEntry (dedupSRTs topSRT)
+ lift (put (topSRT { dedupSRTs = newDedupSRTs
+ , flatSRTs = newFlatSRTs }))
+ let SRTEntry lbl = srtEntry
+ return (decls, map (,lbl) blockids, funSRTs)
+
+
+-- | build a static SRT object (or a chain of objects) from a list of
+-- SRTEntries.
+buildSRTChain
+ :: DynFlags
+ -> [SRTEntry]
+ -> UniqSM
+ ( [CmmDecl] -- The SRT object(s)
+ , SRTEntry -- label to use in the info table
+ )
+buildSRTChain _ [] = panic "buildSRT: empty"
+buildSRTChain dflags cafSet =
+ case splitAt mAX_SRT_SIZE cafSet of
+ (these, []) -> do
+ (decl,lbl) <- buildSRT dflags these
+ return ([decl], lbl)
+ (these,those) -> do
+ (rest, rest_lbl) <- buildSRTChain dflags (head these : those)
+ (decl,lbl) <- buildSRT dflags (rest_lbl : tail these)
+ return (decl:rest, lbl)
+ where
+ mAX_SRT_SIZE = 16
+
+
+buildSRT :: DynFlags -> [SRTEntry] -> UniqSM (CmmDecl, SRTEntry)
+buildSRT dflags refs = do
+ id <- getUniqueM
+ let
+ lbl = mkSRTLabel id
+ srt_n_info = mkSRTInfoLabel (length refs)
+ fields =
+ mkStaticClosure dflags srt_n_info dontCareCCS
+ [ CmmLabel lbl | SRTEntry lbl <- refs ]
+ [] -- no padding
+ [mkIntCLit dflags 0] -- link field
+ [] -- no saved info
+ return (mkDataLits (Section Data lbl) lbl fields, SRTEntry lbl)
+
+
+-- | Update info tables with references to their SRTs. Also generate
+-- static closures, splicing in SRT fields as necessary.
+updInfoSRTs
+ :: DynFlags
+ -> LabelMap CLabel -- SRT labels for each block
+ -> LabelMap [SRTEntry] -- SRTs to merge into FUN_STATIC closures
+ -> CmmDecl
+ -> [CmmDecl]
+
+updInfoSRTs dflags srt_env funSRTEnv (CmmProc top_info top_l live g)
+ | Just (_,closure) <- maybeStaticClosure = [ proc, closure ]
+ | otherwise = [ proc ]
+ where
+ proc = CmmProc top_info { info_tbls = newTopInfo } top_l live g
+ newTopInfo = mapMapWithKey updInfoTbl (info_tbls top_info)
+ updInfoTbl l info_tbl
+ | l == g_entry g, Just (inf, _) <- maybeStaticClosure = inf
+ | otherwise = info_tbl { cit_srt = mapLookup l srt_env }
+
+ -- Generate static closures [FUN]. Note that this also generates
+ -- static closures for thunks (CAFs), because it's easier to treat
+ -- them uniformly in the code generator.
+ maybeStaticClosure :: Maybe (CmmInfoTable, CmmDecl)
+ maybeStaticClosure
+ | Just info_tbl@CmmInfoTable{..} <-
+ mapLookup (g_entry g) (info_tbls top_info)
+ , Just (id, ccs) <- cit_clo
+ , isStaticRep cit_rep =
+ let
+ (newInfo, srtEntries) = case mapLookup (g_entry g) funSRTEnv of
+ Nothing ->
+ -- if we don't add SRT entries to this closure, then we
+ -- want to set the srt field in its info table as usual
+ (info_tbl { cit_srt = mapLookup (g_entry g) srt_env }, [])
+ Just srtEntries -> srtTrace "maybeStaticFun" (ppr res)
+ (info_tbl { cit_rep = new_rep }, res)
+ where res = [ CmmLabel lbl | SRTEntry lbl <- srtEntries ]
+ fields = mkStaticClosureFields dflags info_tbl ccs (idCafInfo id)
+ srtEntries
+ new_rep = case cit_rep of
+ HeapRep sta ptrs nptrs ty ->
+ HeapRep sta (ptrs + length srtEntries) nptrs ty
+ _other -> panic "maybeStaticFun"
+ lbl = mkLocalClosureLabel (idName id) (idCafInfo id)
+ in
+ Just (newInfo, mkDataLits (Section Data lbl) lbl fields)
+ | otherwise = Nothing
+
+updInfoSRTs _ _ _ t = [t]
+
+
+srtTrace :: String -> SDoc -> b -> b
+-- srtTrace = pprTrace
+srtTrace _ _ b = b
diff --git a/compiler/GHC/Cmm/LayoutStack.hs b/compiler/GHC/Cmm/LayoutStack.hs
new file mode 100644
index 0000000000..f6dda7728c
--- /dev/null
+++ b/compiler/GHC/Cmm/LayoutStack.hs
@@ -0,0 +1,1236 @@
+{-# LANGUAGE BangPatterns, RecordWildCards, GADTs #-}
+module GHC.Cmm.LayoutStack (
+ cmmLayoutStack, setInfoTableStackMap
+ ) where
+
+import GhcPrelude hiding ((<*>))
+
+import GHC.StgToCmm.Utils ( callerSaveVolatileRegs, newTemp ) -- XXX layering violation
+import GHC.StgToCmm.Foreign ( saveThreadState, loadThreadState ) -- XXX layering violation
+
+import BasicTypes
+import GHC.Cmm
+import GHC.Cmm.Info
+import GHC.Cmm.BlockId
+import GHC.Cmm.CLabel
+import GHC.Cmm.Utils
+import GHC.Cmm.Graph
+import ForeignCall
+import GHC.Cmm.Liveness
+import GHC.Cmm.ProcPoint
+import GHC.Runtime.Layout
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow
+import GHC.Cmm.Dataflow.Graph
+import GHC.Cmm.Dataflow.Label
+import UniqSupply
+import Maybes
+import UniqFM
+import Util
+
+import DynFlags
+import FastString
+import Outputable hiding ( isEmpty )
+import qualified Data.Set as Set
+import Control.Monad.Fix
+import Data.Array as Array
+import Data.Bits
+import Data.List (nub)
+
+{- Note [Stack Layout]
+
+The job of this pass is to
+
+ - replace references to abstract stack Areas with fixed offsets from Sp.
+
+ - replace the CmmHighStackMark constant used in the stack check with
+ the maximum stack usage of the proc.
+
+ - save any variables that are live across a call, and reload them as
+ necessary.
+
+Before stack allocation, local variables remain live across native
+calls (CmmCall{ cmm_cont = Just _ }), and after stack allocation local
+variables are clobbered by native calls.
+
+We want to do stack allocation so that as far as possible
+ - stack use is minimized, and
+ - unnecessary stack saves and loads are avoided.
+
+The algorithm we use is a variant of linear-scan register allocation,
+where the stack is our register file.
+
+We proceed in two passes, see Note [Two pass approach] for why they are not easy
+to merge into one.
+
+Pass 1:
+
+ - First, we do a liveness analysis, which annotates every block with
+ the variables live on entry to the block.
+
+ - We traverse blocks in reverse postorder DFS; that is, we visit at
+ least one predecessor of a block before the block itself. The
+ stack layout flowing from the predecessor of the block will
+ determine the stack layout on entry to the block.
+
+ - We maintain a data structure
+
+ Map Label StackMap
+
+ which describes the contents of the stack and the stack pointer on
+ entry to each block that is a successor of a block that we have
+ visited.
+
+ - For each block we visit:
+
+ - Look up the StackMap for this block.
+
+ - If this block is a proc point (or a call continuation, if we aren't
+ splitting proc points), we need to reload all the live variables from the
+ stack - but this is done in Pass 2, which calculates more precise liveness
+ information (see description of Pass 2).
+
+ - Walk forwards through the instructions:
+ - At an assignment x = Sp[loc]
+ - Record the fact that Sp[loc] contains x, so that we won't
+ need to save x if it ever needs to be spilled.
+ - At an assignment x = E
+ - If x was previously on the stack, it isn't any more
+ - At the last node, if it is a call or a jump to a proc point
+ - Lay out the stack frame for the call (see setupStackFrame)
+ - emit instructions to save all the live variables
+ - Remember the StackMaps for all the successors
+ - emit an instruction to adjust Sp
+ - If the last node is a branch, then the current StackMap is the
+ StackMap for the successors.
+
+ - Manifest Sp: replace references to stack areas in this block
+ with real Sp offsets. We cannot do this until we have laid out
+ the stack area for the successors above.
+
+ In this phase we also eliminate redundant stores to the stack;
+ see elimStackStores.
+
+ - There is one important gotcha: sometimes we'll encounter a control
+ transfer to a block that we've already processed (a join point),
+ and in that case we might need to rearrange the stack to match
+ what the block is expecting. (exactly the same as in linear-scan
+ register allocation, except here we have the luxury of an infinite
+ supply of temporary variables).
+
+ - Finally, we update the magic CmmHighStackMark constant with the
+ stack usage of the function, and eliminate the whole stack check
+ if there was no stack use. (in fact this is done as part of the
+ main traversal, by feeding the high-water-mark output back in as
+ an input. I hate cyclic programming, but it's just too convenient
+ sometimes.)
+
+ There are plenty of tricky details: update frames, proc points, return
+ addresses, foreign calls, and some ad-hoc optimisations that are
+ convenient to do here and effective in common cases. Comments in the
+ code below explain these.
+
+Pass 2:
+
+- Calculate live registers, but taking into account that nothing is live at the
+ entry to a proc point.
+
+- At each proc point and call continuation insert reloads of live registers from
+ the stack (they were saved by Pass 1).
+
+
+Note [Two pass approach]
+
+The main reason for Pass 2 is being able to insert only the reloads that are
+needed and the fact that the two passes need different liveness information.
+Let's consider an example:
+
+ .....
+ \ /
+ D <- proc point
+ / \
+ E F
+ \ /
+ G <- proc point
+ |
+ X
+
+Pass 1 needs liveness assuming that local variables are preserved across calls.
+This is important because it needs to save any local registers to the stack
+(e.g., if register a is used in block X, it must be saved before any native
+call).
+However, for Pass 2, where we want to reload registers from stack (in a proc
+point), this is overly conservative and would lead us to generate reloads in D
+for things used in X, even though we're going to generate reloads in G anyway
+(since it's also a proc point).
+So Pass 2 calculates liveness knowing that nothing is live at the entry to a
+proc point. This means that in D we only need to reload things used in E or F.
+This can be quite important, for an extreme example see testcase for #3294.
+
+Merging the two passes is not trivial - Pass 2 is a backward rewrite and Pass 1
+is a forward one. Furthermore, Pass 1 is creating code that uses local registers
+(saving them before a call), which the liveness analysis for Pass 2 must see to
+be correct.
+
+-}
+
+
+-- All stack locations are expressed as positive byte offsets from the
+-- "base", which is defined to be the address above the return address
+-- on the stack on entry to this CmmProc.
+--
+-- Lower addresses have higher StackLocs.
+--
+type StackLoc = ByteOff
+
+{-
+ A StackMap describes the stack at any given point. At a continuation
+ it has a particular layout, like this:
+
+ | | <- base
+ |-------------|
+ | ret0 | <- base + 8
+ |-------------|
+ . upd frame . <- base + sm_ret_off
+ |-------------|
+ | |
+ . vars .
+ . (live/dead) .
+ | | <- base + sm_sp - sm_args
+ |-------------|
+ | ret1 |
+ . ret vals . <- base + sm_sp (<--- Sp points here)
+ |-------------|
+
+Why do we include the final return address (ret0) in our stack map? I
+have absolutely no idea, but it seems to be done that way consistently
+in the rest of the code generator, so I played along here. --SDM
+
+Note that we will be constructing an info table for the continuation
+(ret1), which needs to describe the stack down to, but not including,
+the update frame (or ret0, if there is no update frame).
+-}
+
+data StackMap = StackMap
+ { sm_sp :: StackLoc
+ -- ^ the offset of Sp relative to the base on entry
+ -- to this block.
+ , sm_args :: ByteOff
+ -- ^ the number of bytes of arguments in the area for this block
+ -- Defn: the offset of young(L) relative to the base is given by
+ -- (sm_sp - sm_args) of the StackMap for block L.
+ , sm_ret_off :: ByteOff
+ -- ^ Number of words of stack that we do not describe with an info
+ -- table, because it contains an update frame.
+ , sm_regs :: UniqFM (LocalReg,StackLoc)
+ -- ^ regs on the stack
+ }
+
+instance Outputable StackMap where
+ ppr StackMap{..} =
+ text "Sp = " <> int sm_sp $$
+ text "sm_args = " <> int sm_args $$
+ text "sm_ret_off = " <> int sm_ret_off $$
+ text "sm_regs = " <> pprUFM sm_regs ppr
+
+
+cmmLayoutStack :: DynFlags -> ProcPointSet -> ByteOff -> CmmGraph
+ -> UniqSM (CmmGraph, LabelMap StackMap)
+cmmLayoutStack dflags procpoints entry_args
+ graph@(CmmGraph { g_entry = entry })
+ = do
+ -- We need liveness info. Dead assignments are removed later
+ -- by the sinking pass.
+ let liveness = cmmLocalLiveness dflags graph
+ blocks = revPostorder graph
+
+ (final_stackmaps, _final_high_sp, new_blocks) <-
+ mfix $ \ ~(rec_stackmaps, rec_high_sp, _new_blocks) ->
+ layout dflags procpoints liveness entry entry_args
+ rec_stackmaps rec_high_sp blocks
+
+ blocks_with_reloads <-
+ insertReloadsAsNeeded dflags procpoints final_stackmaps entry new_blocks
+ new_blocks' <- mapM (lowerSafeForeignCall dflags) blocks_with_reloads
+ return (ofBlockList entry new_blocks', final_stackmaps)
+
+-- -----------------------------------------------------------------------------
+-- Pass 1
+-- -----------------------------------------------------------------------------
+
+layout :: DynFlags
+ -> LabelSet -- proc points
+ -> LabelMap CmmLocalLive -- liveness
+ -> BlockId -- entry
+ -> ByteOff -- stack args on entry
+
+ -> LabelMap StackMap -- [final] stack maps
+ -> ByteOff -- [final] Sp high water mark
+
+ -> [CmmBlock] -- [in] blocks
+
+ -> UniqSM
+ ( LabelMap StackMap -- [out] stack maps
+ , ByteOff -- [out] Sp high water mark
+ , [CmmBlock] -- [out] new blocks
+ )
+
+layout dflags procpoints liveness entry entry_args final_stackmaps final_sp_high blocks
+ = go blocks init_stackmap entry_args []
+ where
+ (updfr, cont_info) = collectContInfo blocks
+
+ init_stackmap = mapSingleton entry StackMap{ sm_sp = entry_args
+ , sm_args = entry_args
+ , sm_ret_off = updfr
+ , sm_regs = emptyUFM
+ }
+
+ go [] acc_stackmaps acc_hwm acc_blocks
+ = return (acc_stackmaps, acc_hwm, acc_blocks)
+
+ go (b0 : bs) acc_stackmaps acc_hwm acc_blocks
+ = do
+ let (entry0@(CmmEntry entry_lbl tscope), middle0, last0) = blockSplit b0
+
+ let stack0@StackMap { sm_sp = sp0 }
+ = mapFindWithDefault
+ (pprPanic "no stack map for" (ppr entry_lbl))
+ entry_lbl acc_stackmaps
+
+ -- (a) Update the stack map to include the effects of
+ -- assignments in this block
+ let stack1 = foldBlockNodesF (procMiddle acc_stackmaps) middle0 stack0
+
+ -- (b) Look at the last node and if we are making a call or
+ -- jumping to a proc point, we must save the live
+ -- variables, adjust Sp, and construct the StackMaps for
+ -- each of the successor blocks. See handleLastNode for
+ -- details.
+ (middle1, sp_off, last1, fixup_blocks, out)
+ <- handleLastNode dflags procpoints liveness cont_info
+ acc_stackmaps stack1 tscope middle0 last0
+
+ -- (c) Manifest Sp: run over the nodes in the block and replace
+ -- CmmStackSlot with CmmLoad from Sp with a concrete offset.
+ --
+ -- our block:
+ -- middle0 -- the original middle nodes
+ -- middle1 -- live variable saves from handleLastNode
+ -- Sp = Sp + sp_off -- Sp adjustment goes here
+ -- last1 -- the last node
+ --
+ let middle_pre = blockToList $ foldl' blockSnoc middle0 middle1
+
+ let final_blocks =
+ manifestSp dflags final_stackmaps stack0 sp0 final_sp_high
+ entry0 middle_pre sp_off last1 fixup_blocks
+
+ let acc_stackmaps' = mapUnion acc_stackmaps out
+
+ -- If this block jumps to the GC, then we do not take its
+ -- stack usage into account for the high-water mark.
+ -- Otherwise, if the only stack usage is in the stack-check
+ -- failure block itself, we will do a redundant stack
+ -- check. The stack has a buffer designed to accommodate
+ -- the largest amount of stack needed for calling the GC.
+ --
+ this_sp_hwm | isGcJump last0 = 0
+ | otherwise = sp0 - sp_off
+
+ hwm' = maximum (acc_hwm : this_sp_hwm : map sm_sp (mapElems out))
+
+ go bs acc_stackmaps' hwm' (final_blocks ++ acc_blocks)
+
+
+-- -----------------------------------------------------------------------------
+
+-- Not foolproof, but GCFun is the culprit we most want to catch
+isGcJump :: CmmNode O C -> Bool
+isGcJump (CmmCall { cml_target = CmmReg (CmmGlobal l) })
+ = l == GCFun || l == GCEnter1
+isGcJump _something_else = False
+
+-- -----------------------------------------------------------------------------
+
+-- This doesn't seem right somehow. We need to find out whether this
+-- proc will push some update frame material at some point, so that we
+-- can avoid using that area of the stack for spilling. The
+-- updfr_space field of the CmmProc *should* tell us, but it doesn't
+-- (I think maybe it gets filled in later when we do proc-point
+-- splitting).
+--
+-- So we'll just take the max of all the cml_ret_offs. This could be
+-- unnecessarily pessimistic, but probably not in the code we
+-- generate.
+
+collectContInfo :: [CmmBlock] -> (ByteOff, LabelMap ByteOff)
+collectContInfo blocks
+ = (maximum ret_offs, mapFromList (catMaybes mb_argss))
+ where
+ (mb_argss, ret_offs) = mapAndUnzip get_cont blocks
+
+ get_cont :: Block CmmNode x C -> (Maybe (Label, ByteOff), ByteOff)
+ get_cont b =
+ case lastNode b of
+ CmmCall { cml_cont = Just l, .. }
+ -> (Just (l, cml_ret_args), cml_ret_off)
+ CmmForeignCall { .. }
+ -> (Just (succ, ret_args), ret_off)
+ _other -> (Nothing, 0)
+
+
+-- -----------------------------------------------------------------------------
+-- Updating the StackMap from middle nodes
+
+-- Look for loads from stack slots, and update the StackMap. This is
+-- purely for optimisation reasons, so that we can avoid saving a
+-- variable back to a different stack slot if it is already on the
+-- stack.
+--
+-- This happens a lot: for example when function arguments are passed
+-- on the stack and need to be immediately saved across a call, we
+-- want to just leave them where they are on the stack.
+--
+procMiddle :: LabelMap StackMap -> CmmNode e x -> StackMap -> StackMap
+procMiddle stackmaps node sm
+ = case node of
+ CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot area off) _)
+ -> sm { sm_regs = addToUFM (sm_regs sm) r (r,loc) }
+ where loc = getStackLoc area off stackmaps
+ CmmAssign (CmmLocal r) _other
+ -> sm { sm_regs = delFromUFM (sm_regs sm) r }
+ _other
+ -> sm
+
+getStackLoc :: Area -> ByteOff -> LabelMap StackMap -> StackLoc
+getStackLoc Old n _ = n
+getStackLoc (Young l) n stackmaps =
+ case mapLookup l stackmaps of
+ Nothing -> pprPanic "getStackLoc" (ppr l)
+ Just sm -> sm_sp sm - sm_args sm + n
+
+
+-- -----------------------------------------------------------------------------
+-- Handling stack allocation for a last node
+
+-- We take a single last node and turn it into:
+--
+-- C1 (some statements)
+-- Sp = Sp + N
+-- C2 (some more statements)
+-- call f() -- the actual last node
+--
+-- plus possibly some more blocks (we may have to add some fixup code
+-- between the last node and the continuation).
+--
+-- C1: is the code for saving the variables across this last node onto
+-- the stack, if the continuation is a call or jumps to a proc point.
+--
+-- C2: if the last node is a safe foreign call, we have to inject some
+-- extra code that goes *after* the Sp adjustment.
+
+handleLastNode
+ :: DynFlags -> ProcPointSet -> LabelMap CmmLocalLive -> LabelMap ByteOff
+ -> LabelMap StackMap -> StackMap -> CmmTickScope
+ -> Block CmmNode O O
+ -> CmmNode O C
+ -> UniqSM
+ ( [CmmNode O O] -- nodes to go *before* the Sp adjustment
+ , ByteOff -- amount to adjust Sp
+ , CmmNode O C -- new last node
+ , [CmmBlock] -- new blocks
+ , LabelMap StackMap -- stackmaps for the continuations
+ )
+
+handleLastNode dflags procpoints liveness cont_info stackmaps
+ stack0@StackMap { sm_sp = sp0 } tscp middle last
+ = case last of
+ -- At each return / tail call,
+ -- adjust Sp to point to the last argument pushed, which
+ -- is cml_args, after popping any other junk from the stack.
+ CmmCall{ cml_cont = Nothing, .. } -> do
+ let sp_off = sp0 - cml_args
+ return ([], sp_off, last, [], mapEmpty)
+
+ -- At each CmmCall with a continuation:
+ CmmCall{ cml_cont = Just cont_lbl, .. } ->
+ return $ lastCall cont_lbl cml_args cml_ret_args cml_ret_off
+
+ CmmForeignCall{ succ = cont_lbl, .. } -> do
+ return $ lastCall cont_lbl (wORD_SIZE dflags) ret_args ret_off
+ -- one word of args: the return address
+
+ CmmBranch {} -> handleBranches
+ CmmCondBranch {} -> handleBranches
+ CmmSwitch {} -> handleBranches
+
+ where
+ -- Calls and ForeignCalls are handled the same way:
+ lastCall :: BlockId -> ByteOff -> ByteOff -> ByteOff
+ -> ( [CmmNode O O]
+ , ByteOff
+ , CmmNode O C
+ , [CmmBlock]
+ , LabelMap StackMap
+ )
+ lastCall lbl cml_args cml_ret_args cml_ret_off
+ = ( assignments
+ , spOffsetForCall sp0 cont_stack cml_args
+ , last
+ , [] -- no new blocks
+ , mapSingleton lbl cont_stack )
+ where
+ (assignments, cont_stack) = prepareStack lbl cml_ret_args cml_ret_off
+
+
+ prepareStack lbl cml_ret_args cml_ret_off
+ | Just cont_stack <- mapLookup lbl stackmaps
+ -- If we have already seen this continuation before, then
+ -- we just have to make the stack look the same:
+ = (fixupStack stack0 cont_stack, cont_stack)
+ -- Otherwise, we have to allocate the stack frame
+ | otherwise
+ = (save_assignments, new_cont_stack)
+ where
+ (new_cont_stack, save_assignments)
+ = setupStackFrame dflags lbl liveness cml_ret_off cml_ret_args stack0
+
+
+ -- For other last nodes (branches), if any of the targets is a
+ -- proc point, we have to set up the stack to match what the proc
+ -- point is expecting.
+ --
+ handleBranches :: UniqSM ( [CmmNode O O]
+ , ByteOff
+ , CmmNode O C
+ , [CmmBlock]
+ , LabelMap StackMap )
+
+ handleBranches
+ -- Note [diamond proc point]
+ | Just l <- futureContinuation middle
+ , (nub $ filter (`setMember` procpoints) $ successors last) == [l]
+ = do
+ let cont_args = mapFindWithDefault 0 l cont_info
+ (assigs, cont_stack) = prepareStack l cont_args (sm_ret_off stack0)
+ out = mapFromList [ (l', cont_stack)
+ | l' <- successors last ]
+ return ( assigs
+ , spOffsetForCall sp0 cont_stack (wORD_SIZE dflags)
+ , last
+ , []
+ , out)
+
+ | otherwise = do
+ pps <- mapM handleBranch (successors last)
+ let lbl_map :: LabelMap Label
+ lbl_map = mapFromList [ (l,tmp) | (l,tmp,_,_) <- pps ]
+ fix_lbl l = mapFindWithDefault l l lbl_map
+ return ( []
+ , 0
+ , mapSuccessors fix_lbl last
+ , concat [ blk | (_,_,_,blk) <- pps ]
+ , mapFromList [ (l, sm) | (l,_,sm,_) <- pps ] )
+
+ -- For each successor of this block
+ handleBranch :: BlockId -> UniqSM (BlockId, BlockId, StackMap, [CmmBlock])
+ handleBranch l
+ -- (a) if the successor already has a stackmap, we need to
+ -- shuffle the current stack to make it look the same.
+ -- We have to insert a new block to make this happen.
+ | Just stack2 <- mapLookup l stackmaps
+ = do
+ let assigs = fixupStack stack0 stack2
+ (tmp_lbl, block) <- makeFixupBlock dflags sp0 l stack2 tscp assigs
+ return (l, tmp_lbl, stack2, block)
+
+ -- (b) if the successor is a proc point, save everything
+ -- on the stack.
+ | l `setMember` procpoints
+ = do
+ let cont_args = mapFindWithDefault 0 l cont_info
+ (stack2, assigs) =
+ setupStackFrame dflags l liveness (sm_ret_off stack0)
+ cont_args stack0
+ (tmp_lbl, block) <- makeFixupBlock dflags sp0 l stack2 tscp assigs
+ return (l, tmp_lbl, stack2, block)
+
+ -- (c) otherwise, the current StackMap is the StackMap for
+ -- the continuation. But we must remember to remove any
+ -- variables from the StackMap that are *not* live at
+ -- the destination, because this StackMap might be used
+ -- by fixupStack if this is a join point.
+ | otherwise = return (l, l, stack1, [])
+ where live = mapFindWithDefault (panic "handleBranch") l liveness
+ stack1 = stack0 { sm_regs = filterUFM is_live (sm_regs stack0) }
+ is_live (r,_) = r `elemRegSet` live
+
+
+makeFixupBlock :: DynFlags -> ByteOff -> Label -> StackMap
+ -> CmmTickScope -> [CmmNode O O]
+ -> UniqSM (Label, [CmmBlock])
+makeFixupBlock dflags sp0 l stack tscope assigs
+ | null assigs && sp0 == sm_sp stack = return (l, [])
+ | otherwise = do
+ tmp_lbl <- newBlockId
+ let sp_off = sp0 - sm_sp stack
+ block = blockJoin (CmmEntry tmp_lbl tscope)
+ ( maybeAddSpAdj dflags sp0 sp_off
+ $ blockFromList assigs )
+ (CmmBranch l)
+ return (tmp_lbl, [block])
+
+
+-- Sp is currently pointing to current_sp,
+-- we want it to point to
+-- (sm_sp cont_stack - sm_args cont_stack + args)
+-- so the difference is
+-- sp0 - (sm_sp cont_stack - sm_args cont_stack + args)
+spOffsetForCall :: ByteOff -> StackMap -> ByteOff -> ByteOff
+spOffsetForCall current_sp cont_stack args
+ = current_sp - (sm_sp cont_stack - sm_args cont_stack + args)
+
+
+-- | create a sequence of assignments to establish the new StackMap,
+-- given the old StackMap.
+fixupStack :: StackMap -> StackMap -> [CmmNode O O]
+fixupStack old_stack new_stack = concatMap move new_locs
+ where
+ old_map = sm_regs old_stack
+ new_locs = stackSlotRegs new_stack
+
+ move (r,n)
+ | Just (_,m) <- lookupUFM old_map r, n == m = []
+ | otherwise = [CmmStore (CmmStackSlot Old n)
+ (CmmReg (CmmLocal r))]
+
+
+
+setupStackFrame
+ :: DynFlags
+ -> BlockId -- label of continuation
+ -> LabelMap CmmLocalLive -- liveness
+ -> ByteOff -- updfr
+ -> ByteOff -- bytes of return values on stack
+ -> StackMap -- current StackMap
+ -> (StackMap, [CmmNode O O])
+
+setupStackFrame dflags lbl liveness updfr_off ret_args stack0
+ = (cont_stack, assignments)
+ where
+ -- get the set of LocalRegs live in the continuation
+ live = mapFindWithDefault Set.empty lbl liveness
+
+ -- the stack from the base to updfr_off is off-limits.
+ -- our new stack frame contains:
+ -- * saved live variables
+ -- * the return address [young(C) + 8]
+ -- * the args for the call,
+ -- which are replaced by the return values at the return
+ -- point.
+
+ -- everything up to updfr_off is off-limits
+ -- stack1 contains updfr_off, plus everything we need to save
+ (stack1, assignments) = allocate dflags updfr_off live stack0
+
+ -- And the Sp at the continuation is:
+ -- sm_sp stack1 + ret_args
+ cont_stack = stack1{ sm_sp = sm_sp stack1 + ret_args
+ , sm_args = ret_args
+ , sm_ret_off = updfr_off
+ }
+
+
+-- -----------------------------------------------------------------------------
+-- Note [diamond proc point]
+--
+-- This special case looks for the pattern we get from a typical
+-- tagged case expression:
+--
+-- Sp[young(L1)] = L1
+-- if (R1 & 7) != 0 goto L1 else goto L2
+-- L2:
+-- call [R1] returns to L1
+-- L1: live: {y}
+-- x = R1
+--
+-- If we let the generic case handle this, we get
+--
+-- Sp[-16] = L1
+-- if (R1 & 7) != 0 goto L1a else goto L2
+-- L2:
+-- Sp[-8] = y
+-- Sp = Sp - 16
+-- call [R1] returns to L1
+-- L1a:
+-- Sp[-8] = y
+-- Sp = Sp - 16
+-- goto L1
+-- L1:
+-- x = R1
+--
+-- The code for saving the live vars is duplicated in each branch, and
+-- furthermore there is an extra jump in the fast path (assuming L1 is
+-- a proc point, which it probably is if there is a heap check).
+--
+-- So to fix this we want to set up the stack frame before the
+-- conditional jump. How do we know when to do this, and when it is
+-- safe? The basic idea is, when we see the assignment
+--
+-- Sp[young(L)] = L
+--
+-- we know that
+-- * we are definitely heading for L
+-- * there can be no more reads from another stack area, because young(L)
+-- overlaps with it.
+--
+-- We don't necessarily know that everything live at L is live now
+-- (some might be assigned between here and the jump to L). So we
+-- simplify and only do the optimisation when we see
+--
+-- (1) a block containing an assignment of a return address L
+-- (2) ending in a branch where one (and only) continuation goes to L,
+-- and no other continuations go to proc points.
+--
+-- then we allocate the stack frame for L at the end of the block,
+-- before the branch.
+--
+-- We could generalise (2), but that would make it a bit more
+-- complicated to handle, and this currently catches the common case.
+
+futureContinuation :: Block CmmNode O O -> Maybe BlockId
+futureContinuation middle = foldBlockNodesB f middle Nothing
+ where f :: CmmNode a b -> Maybe BlockId -> Maybe BlockId
+ f (CmmStore (CmmStackSlot (Young l) _) (CmmLit (CmmBlock _))) _
+ = Just l
+ f _ r = r
+
+-- -----------------------------------------------------------------------------
+-- Saving live registers
+
+-- | Given a set of live registers and a StackMap, save all the registers
+-- on the stack and return the new StackMap and the assignments to do
+-- the saving.
+--
+allocate :: DynFlags -> ByteOff -> LocalRegSet -> StackMap
+ -> (StackMap, [CmmNode O O])
+allocate dflags ret_off live stackmap@StackMap{ sm_sp = sp0
+ , sm_regs = regs0 }
+ =
+ -- we only have to save regs that are not already in a slot
+ let to_save = filter (not . (`elemUFM` regs0)) (Set.elems live)
+ regs1 = filterUFM (\(r,_) -> elemRegSet r live) regs0
+ in
+
+ -- make a map of the stack
+ let stack = reverse $ Array.elems $
+ accumArray (\_ x -> x) Empty (1, toWords dflags (max sp0 ret_off)) $
+ ret_words ++ live_words
+ where ret_words =
+ [ (x, Occupied)
+ | x <- [ 1 .. toWords dflags ret_off] ]
+ live_words =
+ [ (toWords dflags x, Occupied)
+ | (r,off) <- nonDetEltsUFM regs1,
+ -- See Note [Unique Determinism and code generation]
+ let w = localRegBytes dflags r,
+ x <- [ off, off - wORD_SIZE dflags .. off - w + 1] ]
+ in
+
+ -- Pass over the stack: find slots to save all the new live variables,
+ -- choosing the oldest slots first (hence a foldr).
+ let
+ save slot ([], stack, n, assigs, regs) -- no more regs to save
+ = ([], slot:stack, plusW dflags n 1, assigs, regs)
+ save slot (to_save, stack, n, assigs, regs)
+ = case slot of
+ Occupied -> (to_save, Occupied:stack, plusW dflags n 1, assigs, regs)
+ Empty
+ | Just (stack', r, to_save') <-
+ select_save to_save (slot:stack)
+ -> let assig = CmmStore (CmmStackSlot Old n')
+ (CmmReg (CmmLocal r))
+ n' = plusW dflags n 1
+ in
+ (to_save', stack', n', assig : assigs, (r,(r,n')):regs)
+
+ | otherwise
+ -> (to_save, slot:stack, plusW dflags n 1, assigs, regs)
+
+ -- we should do better here: right now we'll fit the smallest first,
+ -- but it would make more sense to fit the biggest first.
+ select_save :: [LocalReg] -> [StackSlot]
+ -> Maybe ([StackSlot], LocalReg, [LocalReg])
+ select_save regs stack = go regs []
+ where go [] _no_fit = Nothing
+ go (r:rs) no_fit
+ | Just rest <- dropEmpty words stack
+ = Just (replicate words Occupied ++ rest, r, rs++no_fit)
+ | otherwise
+ = go rs (r:no_fit)
+ where words = localRegWords dflags r
+
+ -- fill in empty slots as much as possible
+ (still_to_save, save_stack, n, save_assigs, save_regs)
+ = foldr save (to_save, [], 0, [], []) stack
+
+ -- push any remaining live vars on the stack
+ (push_sp, push_assigs, push_regs)
+ = foldr push (n, [], []) still_to_save
+ where
+ push r (n, assigs, regs)
+ = (n', assig : assigs, (r,(r,n')) : regs)
+ where
+ n' = n + localRegBytes dflags r
+ assig = CmmStore (CmmStackSlot Old n')
+ (CmmReg (CmmLocal r))
+
+ trim_sp
+ | not (null push_regs) = push_sp
+ | otherwise
+ = plusW dflags n (- length (takeWhile isEmpty save_stack))
+
+ final_regs = regs1 `addListToUFM` push_regs
+ `addListToUFM` save_regs
+
+ in
+ -- XXX should be an assert
+ if ( n /= max sp0 ret_off ) then pprPanic "allocate" (ppr n <+> ppr sp0 <+> ppr ret_off) else
+
+ if (trim_sp .&. (wORD_SIZE dflags - 1)) /= 0 then pprPanic "allocate2" (ppr trim_sp <+> ppr final_regs <+> ppr push_sp) else
+
+ ( stackmap { sm_regs = final_regs , sm_sp = trim_sp }
+ , push_assigs ++ save_assigs )
+
+
+-- -----------------------------------------------------------------------------
+-- Manifesting Sp
+
+-- | Manifest Sp: turn all the CmmStackSlots into CmmLoads from Sp. The
+-- block looks like this:
+--
+-- middle_pre -- the middle nodes
+-- Sp = Sp + sp_off -- Sp adjustment goes here
+-- last -- the last node
+--
+-- And we have some extra blocks too (that don't contain Sp adjustments)
+--
+-- The adjustment for middle_pre will be different from that for
+-- middle_post, because the Sp adjustment intervenes.
+--
+manifestSp
+ :: DynFlags
+ -> LabelMap StackMap -- StackMaps for other blocks
+ -> StackMap -- StackMap for this block
+ -> ByteOff -- Sp on entry to the block
+ -> ByteOff -- SpHigh
+ -> CmmNode C O -- first node
+ -> [CmmNode O O] -- middle
+ -> ByteOff -- sp_off
+ -> CmmNode O C -- last node
+ -> [CmmBlock] -- new blocks
+ -> [CmmBlock] -- final blocks with Sp manifest
+
+manifestSp dflags stackmaps stack0 sp0 sp_high
+ first middle_pre sp_off last fixup_blocks
+ = final_block : fixup_blocks'
+ where
+ area_off = getAreaOff stackmaps
+
+ adj_pre_sp, adj_post_sp :: CmmNode e x -> CmmNode e x
+ adj_pre_sp = mapExpDeep (areaToSp dflags sp0 sp_high area_off)
+ adj_post_sp = mapExpDeep (areaToSp dflags (sp0 - sp_off) sp_high area_off)
+
+ final_middle = maybeAddSpAdj dflags sp0 sp_off
+ . blockFromList
+ . map adj_pre_sp
+ . elimStackStores stack0 stackmaps area_off
+ $ middle_pre
+ final_last = optStackCheck (adj_post_sp last)
+
+ final_block = blockJoin first final_middle final_last
+
+ fixup_blocks' = map (mapBlock3' (id, adj_post_sp, id)) fixup_blocks
+
+getAreaOff :: LabelMap StackMap -> (Area -> StackLoc)
+getAreaOff _ Old = 0
+getAreaOff stackmaps (Young l) =
+ case mapLookup l stackmaps of
+ Just sm -> sm_sp sm - sm_args sm
+ Nothing -> pprPanic "getAreaOff" (ppr l)
+
+
+maybeAddSpAdj
+ :: DynFlags -> ByteOff -> ByteOff -> Block CmmNode O O -> Block CmmNode O O
+maybeAddSpAdj dflags sp0 sp_off block =
+ add_initial_unwind $ add_adj_unwind $ adj block
+ where
+ adj block
+ | sp_off /= 0
+ = block `blockSnoc` CmmAssign spReg (cmmOffset dflags spExpr sp_off)
+ | otherwise = block
+ -- Add unwind pseudo-instruction at the beginning of each block to
+ -- document Sp level for debugging
+ add_initial_unwind block
+ | debugLevel dflags > 0
+ = CmmUnwind [(Sp, Just sp_unwind)] `blockCons` block
+ | otherwise
+ = block
+ where sp_unwind = CmmRegOff spReg (sp0 - wORD_SIZE dflags)
+
+ -- Add unwind pseudo-instruction right after the Sp adjustment
+ -- if there is one.
+ add_adj_unwind block
+ | debugLevel dflags > 0
+ , sp_off /= 0
+ = block `blockSnoc` CmmUnwind [(Sp, Just sp_unwind)]
+ | otherwise
+ = block
+ where sp_unwind = CmmRegOff spReg (sp0 - wORD_SIZE dflags - sp_off)
+
+{- Note [SP old/young offsets]
+
+Sp(L) is the Sp offset on entry to block L relative to the base of the
+OLD area.
+
+SpArgs(L) is the size of the young area for L, i.e. the number of
+arguments.
+
+ - in block L, each reference to [old + N] turns into
+ [Sp + Sp(L) - N]
+
+ - in block L, each reference to [young(L') + N] turns into
+ [Sp + Sp(L) - Sp(L') + SpArgs(L') - N]
+
+ - be careful with the last node of each block: Sp has already been adjusted
+ to be Sp + Sp(L) - Sp(L')
+-}
+
+areaToSp :: DynFlags -> ByteOff -> ByteOff -> (Area -> StackLoc) -> CmmExpr -> CmmExpr
+
+areaToSp dflags sp_old _sp_hwm area_off (CmmStackSlot area n)
+ = cmmOffset dflags spExpr (sp_old - area_off area - n)
+ -- Replace (CmmStackSlot area n) with an offset from Sp
+
+areaToSp dflags _ sp_hwm _ (CmmLit CmmHighStackMark)
+ = mkIntExpr dflags sp_hwm
+ -- Replace CmmHighStackMark with the number of bytes of stack used,
+ -- the sp_hwm. See Note [Stack usage] in GHC.StgToCmm.Heap
+
+areaToSp dflags _ _ _ (CmmMachOp (MO_U_Lt _) args)
+ | falseStackCheck args
+ = zeroExpr dflags
+areaToSp dflags _ _ _ (CmmMachOp (MO_U_Ge _) args)
+ | falseStackCheck args
+ = mkIntExpr dflags 1
+ -- Replace a stack-overflow test that cannot fail with a no-op
+ -- See Note [Always false stack check]
+
+areaToSp _ _ _ _ other = other
+
+-- | Determine whether a stack check cannot fail.
+falseStackCheck :: [CmmExpr] -> Bool
+falseStackCheck [ CmmMachOp (MO_Sub _)
+ [ CmmRegOff (CmmGlobal Sp) x_off
+ , CmmLit (CmmInt y_lit _)]
+ , CmmReg (CmmGlobal SpLim)]
+ = fromIntegral x_off >= y_lit
+falseStackCheck _ = False
+
+-- Note [Always false stack check]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- We can optimise stack checks of the form
+--
+-- if ((Sp + x) - y < SpLim) then .. else ..
+--
+-- where are non-negative integer byte offsets. Since we know that
+-- SpLim <= Sp (remember the stack grows downwards), this test must
+-- yield False if (x >= y), so we can rewrite the comparison to False.
+-- A subsequent sinking pass will later drop the dead code.
+-- Optimising this away depends on knowing that SpLim <= Sp, so it is
+-- really the job of the stack layout algorithm, hence we do it now.
+--
+-- The control flow optimiser may negate a conditional to increase
+-- the likelihood of a fallthrough if the branch is not taken. But
+-- not every conditional is inverted as the control flow optimiser
+-- places some requirements on the predecessors of both branch targets.
+-- So we better look for the inverted comparison too.
+
+optStackCheck :: CmmNode O C -> CmmNode O C
+optStackCheck n = -- Note [Always false stack check]
+ case n of
+ CmmCondBranch (CmmLit (CmmInt 0 _)) _true false _ -> CmmBranch false
+ CmmCondBranch (CmmLit (CmmInt _ _)) true _false _ -> CmmBranch true
+ other -> other
+
+
+-- -----------------------------------------------------------------------------
+
+-- | Eliminate stores of the form
+--
+-- Sp[area+n] = r
+--
+-- when we know that r is already in the same slot as Sp[area+n]. We
+-- could do this in a later optimisation pass, but that would involve
+-- a separate analysis and we already have the information to hand
+-- here. It helps clean up some extra stack stores in common cases.
+--
+-- Note that we may have to modify the StackMap as we walk through the
+-- code using procMiddle, since an assignment to a variable in the
+-- StackMap will invalidate its mapping there.
+--
+elimStackStores :: StackMap
+ -> LabelMap StackMap
+ -> (Area -> ByteOff)
+ -> [CmmNode O O]
+ -> [CmmNode O O]
+elimStackStores stackmap stackmaps area_off nodes
+ = go stackmap nodes
+ where
+ go _stackmap [] = []
+ go stackmap (n:ns)
+ = case n of
+ CmmStore (CmmStackSlot area m) (CmmReg (CmmLocal r))
+ | Just (_,off) <- lookupUFM (sm_regs stackmap) r
+ , area_off area + m == off
+ -> go stackmap ns
+ _otherwise
+ -> n : go (procMiddle stackmaps n stackmap) ns
+
+
+-- -----------------------------------------------------------------------------
+-- Update info tables to include stack liveness
+
+
+setInfoTableStackMap :: DynFlags -> LabelMap StackMap -> CmmDecl -> CmmDecl
+setInfoTableStackMap dflags stackmaps (CmmProc top_info@TopInfo{..} l v g)
+ = CmmProc top_info{ info_tbls = mapMapWithKey fix_info info_tbls } l v g
+ where
+ fix_info lbl info_tbl@CmmInfoTable{ cit_rep = StackRep _ } =
+ info_tbl { cit_rep = StackRep (get_liveness lbl) }
+ fix_info _ other = other
+
+ get_liveness :: BlockId -> Liveness
+ get_liveness lbl
+ = case mapLookup lbl stackmaps of
+ Nothing -> pprPanic "setInfoTableStackMap" (ppr lbl <+> ppr info_tbls)
+ Just sm -> stackMapToLiveness dflags sm
+
+setInfoTableStackMap _ _ d = d
+
+
+stackMapToLiveness :: DynFlags -> StackMap -> Liveness
+stackMapToLiveness dflags StackMap{..} =
+ reverse $ Array.elems $
+ accumArray (\_ x -> x) True (toWords dflags sm_ret_off + 1,
+ toWords dflags (sm_sp - sm_args)) live_words
+ where
+ live_words = [ (toWords dflags off, False)
+ | (r,off) <- nonDetEltsUFM sm_regs
+ , isGcPtrType (localRegType r) ]
+ -- See Note [Unique Determinism and code generation]
+
+-- -----------------------------------------------------------------------------
+-- Pass 2
+-- -----------------------------------------------------------------------------
+
+insertReloadsAsNeeded
+ :: DynFlags
+ -> ProcPointSet
+ -> LabelMap StackMap
+ -> BlockId
+ -> [CmmBlock]
+ -> UniqSM [CmmBlock]
+insertReloadsAsNeeded dflags procpoints final_stackmaps entry blocks = do
+ toBlockList . fst <$>
+ rewriteCmmBwd liveLattice rewriteCC (ofBlockList entry blocks) mapEmpty
+ where
+ rewriteCC :: RewriteFun CmmLocalLive
+ rewriteCC (BlockCC e_node middle0 x_node) fact_base0 = do
+ let entry_label = entryLabel e_node
+ stackmap = case mapLookup entry_label final_stackmaps of
+ Just sm -> sm
+ Nothing -> panic "insertReloadsAsNeeded: rewriteCC: stackmap"
+
+ -- Merge the liveness from successor blocks and analyse the last
+ -- node.
+ joined = gen_kill dflags x_node $!
+ joinOutFacts liveLattice x_node fact_base0
+ -- What is live at the start of middle0.
+ live_at_middle0 = foldNodesBwdOO (gen_kill dflags) middle0 joined
+
+ -- If this is a procpoint we need to add the reloads, but only if
+ -- they're actually live. Furthermore, nothing is live at the entry
+ -- to a proc point.
+ (middle1, live_with_reloads)
+ | entry_label `setMember` procpoints
+ = let reloads = insertReloads dflags stackmap live_at_middle0
+ in (foldr blockCons middle0 reloads, emptyRegSet)
+ | otherwise
+ = (middle0, live_at_middle0)
+
+ -- Final liveness for this block.
+ !fact_base2 = mapSingleton entry_label live_with_reloads
+
+ return (BlockCC e_node middle1 x_node, fact_base2)
+
+insertReloads :: DynFlags -> StackMap -> CmmLocalLive -> [CmmNode O O]
+insertReloads dflags stackmap live =
+ [ CmmAssign (CmmLocal reg)
+ -- This cmmOffset basically corresponds to manifesting
+ -- @CmmStackSlot Old sp_off@, see Note [SP old/young offsets]
+ (CmmLoad (cmmOffset dflags spExpr (sp_off - reg_off))
+ (localRegType reg))
+ | (reg, reg_off) <- stackSlotRegs stackmap
+ , reg `elemRegSet` live
+ ]
+ where
+ sp_off = sm_sp stackmap
+
+-- -----------------------------------------------------------------------------
+-- Lowering safe foreign calls
+
+{-
+Note [Lower safe foreign calls]
+
+We start with
+
+ Sp[young(L1)] = L1
+ ,-----------------------
+ | r1 = foo(x,y,z) returns to L1
+ '-----------------------
+ L1:
+ R1 = r1 -- copyIn, inserted by mkSafeCall
+ ...
+
+the stack layout algorithm will arrange to save and reload everything
+live across the call. Our job now is to expand the call so we get
+
+ Sp[young(L1)] = L1
+ ,-----------------------
+ | SAVE_THREAD_STATE()
+ | token = suspendThread(BaseReg, interruptible)
+ | r = foo(x,y,z)
+ | BaseReg = resumeThread(token)
+ | LOAD_THREAD_STATE()
+ | R1 = r -- copyOut
+ | jump Sp[0]
+ '-----------------------
+ L1:
+ r = R1 -- copyIn, inserted by mkSafeCall
+ ...
+
+Note the copyOut, which saves the results in the places that L1 is
+expecting them (see Note [safe foreign call convention]). Note also
+that safe foreign call is replace by an unsafe one in the Cmm graph.
+-}
+
+lowerSafeForeignCall :: DynFlags -> CmmBlock -> UniqSM CmmBlock
+lowerSafeForeignCall dflags block
+ | (entry@(CmmEntry _ tscp), middle, CmmForeignCall { .. }) <- blockSplit block
+ = do
+ -- Both 'id' and 'new_base' are KindNonPtr because they're
+ -- RTS-only objects and are not subject to garbage collection
+ id <- newTemp (bWord dflags)
+ new_base <- newTemp (cmmRegType dflags baseReg)
+ let (caller_save, caller_load) = callerSaveVolatileRegs dflags
+ save_state_code <- saveThreadState dflags
+ load_state_code <- loadThreadState dflags
+ let suspend = save_state_code <*>
+ caller_save <*>
+ mkMiddle (callSuspendThread dflags id intrbl)
+ midCall = mkUnsafeCall tgt res args
+ resume = mkMiddle (callResumeThread new_base id) <*>
+ -- Assign the result to BaseReg: we
+ -- might now have a different Capability!
+ mkAssign baseReg (CmmReg (CmmLocal new_base)) <*>
+ caller_load <*>
+ load_state_code
+
+ (_, regs, copyout) =
+ copyOutOflow dflags NativeReturn Jump (Young succ)
+ (map (CmmReg . CmmLocal) res)
+ ret_off []
+
+ -- NB. after resumeThread returns, the top-of-stack probably contains
+ -- the stack frame for succ, but it might not: if the current thread
+ -- received an exception during the call, then the stack might be
+ -- different. Hence we continue by jumping to the top stack frame,
+ -- not by jumping to succ.
+ jump = CmmCall { cml_target = entryCode dflags $
+ CmmLoad spExpr (bWord dflags)
+ , cml_cont = Just succ
+ , cml_args_regs = regs
+ , cml_args = widthInBytes (wordWidth dflags)
+ , cml_ret_args = ret_args
+ , cml_ret_off = ret_off }
+
+ graph' <- lgraphOfAGraph ( suspend <*>
+ midCall <*>
+ resume <*>
+ copyout <*>
+ mkLast jump, tscp)
+
+ case toBlockList graph' of
+ [one] -> let (_, middle', last) = blockSplit one
+ in return (blockJoin entry (middle `blockAppend` middle') last)
+ _ -> panic "lowerSafeForeignCall0"
+
+ -- Block doesn't end in a safe foreign call:
+ | otherwise = return block
+
+
+foreignLbl :: FastString -> CmmExpr
+foreignLbl name = CmmLit (CmmLabel (mkForeignLabel name Nothing ForeignLabelInExternalPackage IsFunction))
+
+callSuspendThread :: DynFlags -> LocalReg -> Bool -> CmmNode O O
+callSuspendThread dflags id intrbl =
+ CmmUnsafeForeignCall
+ (ForeignTarget (foreignLbl (fsLit "suspendThread"))
+ (ForeignConvention CCallConv [AddrHint, NoHint] [AddrHint] CmmMayReturn))
+ [id] [baseExpr, mkIntExpr dflags (fromEnum intrbl)]
+
+callResumeThread :: LocalReg -> LocalReg -> CmmNode O O
+callResumeThread new_base id =
+ CmmUnsafeForeignCall
+ (ForeignTarget (foreignLbl (fsLit "resumeThread"))
+ (ForeignConvention CCallConv [AddrHint] [AddrHint] CmmMayReturn))
+ [new_base] [CmmReg (CmmLocal id)]
+
+-- -----------------------------------------------------------------------------
+
+plusW :: DynFlags -> ByteOff -> WordOff -> ByteOff
+plusW dflags b w = b + w * wORD_SIZE dflags
+
+data StackSlot = Occupied | Empty
+ -- Occupied: a return address or part of an update frame
+
+instance Outputable StackSlot where
+ ppr Occupied = text "XXX"
+ ppr Empty = text "---"
+
+dropEmpty :: WordOff -> [StackSlot] -> Maybe [StackSlot]
+dropEmpty 0 ss = Just ss
+dropEmpty n (Empty : ss) = dropEmpty (n-1) ss
+dropEmpty _ _ = Nothing
+
+isEmpty :: StackSlot -> Bool
+isEmpty Empty = True
+isEmpty _ = False
+
+localRegBytes :: DynFlags -> LocalReg -> ByteOff
+localRegBytes dflags r
+ = roundUpToWords dflags (widthInBytes (typeWidth (localRegType r)))
+
+localRegWords :: DynFlags -> LocalReg -> WordOff
+localRegWords dflags = toWords dflags . localRegBytes dflags
+
+toWords :: DynFlags -> ByteOff -> WordOff
+toWords dflags x = x `quot` wORD_SIZE dflags
+
+
+stackSlotRegs :: StackMap -> [(LocalReg, StackLoc)]
+stackSlotRegs sm = nonDetEltsUFM (sm_regs sm)
+ -- See Note [Unique Determinism and code generation]
diff --git a/compiler/GHC/Cmm/Lexer.x b/compiler/GHC/Cmm/Lexer.x
new file mode 100644
index 0000000000..d8f15b916c
--- /dev/null
+++ b/compiler/GHC/Cmm/Lexer.x
@@ -0,0 +1,368 @@
+-----------------------------------------------------------------------------
+--
+-- (c) The University of Glasgow, 2004-2006
+--
+-- Lexer for concrete Cmm. We try to stay close to the C-- spec, but there
+-- are a few minor differences:
+--
+-- * extra keywords for our macros, and float32/float64 types
+-- * global registers (Sp,Hp, etc.)
+--
+-----------------------------------------------------------------------------
+
+{
+module GHC.Cmm.Lexer (
+ CmmToken(..), cmmlex,
+ ) where
+
+import GhcPrelude
+
+import GHC.Cmm.Expr
+
+import Lexer
+import GHC.Cmm.Monad
+import SrcLoc
+import UniqFM
+import StringBuffer
+import FastString
+import Ctype
+import Util
+--import TRACE
+
+import Data.Word
+import Data.Char
+}
+
+$whitechar = [\ \t\n\r\f\v\xa0] -- \xa0 is Unicode no-break space
+$white_no_nl = $whitechar # \n
+
+$ascdigit = 0-9
+$unidigit = \x01 -- Trick Alex into handling Unicode. See alexGetChar.
+$digit = [$ascdigit $unidigit]
+$octit = 0-7
+$hexit = [$digit A-F a-f]
+
+$unilarge = \x03 -- Trick Alex into handling Unicode. See alexGetChar.
+$asclarge = [A-Z \xc0-\xd6 \xd8-\xde]
+$large = [$asclarge $unilarge]
+
+$unismall = \x04 -- Trick Alex into handling Unicode. See alexGetChar.
+$ascsmall = [a-z \xdf-\xf6 \xf8-\xff]
+$small = [$ascsmall $unismall \_]
+
+$namebegin = [$large $small \. \$ \@]
+$namechar = [$namebegin $digit]
+
+@decimal = $digit+
+@octal = $octit+
+@hexadecimal = $hexit+
+@exponent = [eE] [\-\+]? @decimal
+
+@floating_point = @decimal \. @decimal @exponent? | @decimal @exponent
+
+@escape = \\ ([abfnrt\\\'\"\?] | x $hexit{1,2} | $octit{1,3})
+@strchar = ($printable # [\"\\]) | @escape
+
+cmm :-
+
+$white_no_nl+ ;
+^\# pragma .* \n ; -- Apple GCC 3.3 CPP generates pragmas in its output
+
+^\# (line)? { begin line_prag }
+
+-- single-line line pragmas, of the form
+-- # <line> "<file>" <extra-stuff> \n
+<line_prag> $digit+ { setLine line_prag1 }
+<line_prag1> \" [^\"]* \" { setFile line_prag2 }
+<line_prag2> .* { pop }
+
+<0> {
+ \n ;
+
+ [\:\;\{\}\[\]\(\)\=\`\~\/\*\%\-\+\&\^\|\>\<\,\!] { special_char }
+
+ ".." { kw CmmT_DotDot }
+ "::" { kw CmmT_DoubleColon }
+ ">>" { kw CmmT_Shr }
+ "<<" { kw CmmT_Shl }
+ ">=" { kw CmmT_Ge }
+ "<=" { kw CmmT_Le }
+ "==" { kw CmmT_Eq }
+ "!=" { kw CmmT_Ne }
+ "&&" { kw CmmT_BoolAnd }
+ "||" { kw CmmT_BoolOr }
+
+ "True" { kw CmmT_True }
+ "False" { kw CmmT_False }
+ "likely" { kw CmmT_likely}
+
+ P@decimal { global_regN (\n -> VanillaReg n VGcPtr) }
+ R@decimal { global_regN (\n -> VanillaReg n VNonGcPtr) }
+ F@decimal { global_regN FloatReg }
+ D@decimal { global_regN DoubleReg }
+ L@decimal { global_regN LongReg }
+ Sp { global_reg Sp }
+ SpLim { global_reg SpLim }
+ Hp { global_reg Hp }
+ HpLim { global_reg HpLim }
+ CCCS { global_reg CCCS }
+ CurrentTSO { global_reg CurrentTSO }
+ CurrentNursery { global_reg CurrentNursery }
+ HpAlloc { global_reg HpAlloc }
+ BaseReg { global_reg BaseReg }
+ MachSp { global_reg MachSp }
+ UnwindReturnReg { global_reg UnwindReturnReg }
+
+ $namebegin $namechar* { name }
+
+ 0 @octal { tok_octal }
+ @decimal { tok_decimal }
+ 0[xX] @hexadecimal { tok_hexadecimal }
+ @floating_point { strtoken tok_float }
+
+ \" @strchar* \" { strtoken tok_string }
+}
+
+{
+data CmmToken
+ = CmmT_SpecChar Char
+ | CmmT_DotDot
+ | CmmT_DoubleColon
+ | CmmT_Shr
+ | CmmT_Shl
+ | CmmT_Ge
+ | CmmT_Le
+ | CmmT_Eq
+ | CmmT_Ne
+ | CmmT_BoolAnd
+ | CmmT_BoolOr
+ | CmmT_CLOSURE
+ | CmmT_INFO_TABLE
+ | CmmT_INFO_TABLE_RET
+ | CmmT_INFO_TABLE_FUN
+ | CmmT_INFO_TABLE_CONSTR
+ | CmmT_INFO_TABLE_SELECTOR
+ | CmmT_else
+ | CmmT_export
+ | CmmT_section
+ | CmmT_goto
+ | CmmT_if
+ | CmmT_call
+ | CmmT_jump
+ | CmmT_foreign
+ | CmmT_never
+ | CmmT_prim
+ | CmmT_reserve
+ | CmmT_return
+ | CmmT_returns
+ | CmmT_import
+ | CmmT_switch
+ | CmmT_case
+ | CmmT_default
+ | CmmT_push
+ | CmmT_unwind
+ | CmmT_bits8
+ | CmmT_bits16
+ | CmmT_bits32
+ | CmmT_bits64
+ | CmmT_bits128
+ | CmmT_bits256
+ | CmmT_bits512
+ | CmmT_float32
+ | CmmT_float64
+ | CmmT_gcptr
+ | CmmT_GlobalReg GlobalReg
+ | CmmT_Name FastString
+ | CmmT_String String
+ | CmmT_Int Integer
+ | CmmT_Float Rational
+ | CmmT_EOF
+ | CmmT_False
+ | CmmT_True
+ | CmmT_likely
+ deriving (Show)
+
+-- -----------------------------------------------------------------------------
+-- Lexer actions
+
+type Action = RealSrcSpan -> StringBuffer -> Int -> PD (RealLocated CmmToken)
+
+begin :: Int -> Action
+begin code _span _str _len = do liftP (pushLexState code); lexToken
+
+pop :: Action
+pop _span _buf _len = liftP popLexState >> lexToken
+
+special_char :: Action
+special_char span buf _len = return (L span (CmmT_SpecChar (currentChar buf)))
+
+kw :: CmmToken -> Action
+kw tok span _buf _len = return (L span tok)
+
+global_regN :: (Int -> GlobalReg) -> Action
+global_regN con span buf len
+ = return (L span (CmmT_GlobalReg (con (fromIntegral n))))
+ where buf' = stepOn buf
+ n = parseUnsignedInteger buf' (len-1) 10 octDecDigit
+
+global_reg :: GlobalReg -> Action
+global_reg r span _buf _len = return (L span (CmmT_GlobalReg r))
+
+strtoken :: (String -> CmmToken) -> Action
+strtoken f span buf len =
+ return (L span $! (f $! lexemeToString buf len))
+
+name :: Action
+name span buf len =
+ case lookupUFM reservedWordsFM fs of
+ Just tok -> return (L span tok)
+ Nothing -> return (L span (CmmT_Name fs))
+ where
+ fs = lexemeToFastString buf len
+
+reservedWordsFM = listToUFM $
+ map (\(x, y) -> (mkFastString x, y)) [
+ ( "CLOSURE", CmmT_CLOSURE ),
+ ( "INFO_TABLE", CmmT_INFO_TABLE ),
+ ( "INFO_TABLE_RET", CmmT_INFO_TABLE_RET ),
+ ( "INFO_TABLE_FUN", CmmT_INFO_TABLE_FUN ),
+ ( "INFO_TABLE_CONSTR", CmmT_INFO_TABLE_CONSTR ),
+ ( "INFO_TABLE_SELECTOR",CmmT_INFO_TABLE_SELECTOR ),
+ ( "else", CmmT_else ),
+ ( "export", CmmT_export ),
+ ( "section", CmmT_section ),
+ ( "goto", CmmT_goto ),
+ ( "if", CmmT_if ),
+ ( "call", CmmT_call ),
+ ( "jump", CmmT_jump ),
+ ( "foreign", CmmT_foreign ),
+ ( "never", CmmT_never ),
+ ( "prim", CmmT_prim ),
+ ( "reserve", CmmT_reserve ),
+ ( "return", CmmT_return ),
+ ( "returns", CmmT_returns ),
+ ( "import", CmmT_import ),
+ ( "switch", CmmT_switch ),
+ ( "case", CmmT_case ),
+ ( "default", CmmT_default ),
+ ( "push", CmmT_push ),
+ ( "unwind", CmmT_unwind ),
+ ( "bits8", CmmT_bits8 ),
+ ( "bits16", CmmT_bits16 ),
+ ( "bits32", CmmT_bits32 ),
+ ( "bits64", CmmT_bits64 ),
+ ( "bits128", CmmT_bits128 ),
+ ( "bits256", CmmT_bits256 ),
+ ( "bits512", CmmT_bits512 ),
+ ( "float32", CmmT_float32 ),
+ ( "float64", CmmT_float64 ),
+-- New forms
+ ( "b8", CmmT_bits8 ),
+ ( "b16", CmmT_bits16 ),
+ ( "b32", CmmT_bits32 ),
+ ( "b64", CmmT_bits64 ),
+ ( "b128", CmmT_bits128 ),
+ ( "b256", CmmT_bits256 ),
+ ( "b512", CmmT_bits512 ),
+ ( "f32", CmmT_float32 ),
+ ( "f64", CmmT_float64 ),
+ ( "gcptr", CmmT_gcptr ),
+ ( "likely", CmmT_likely),
+ ( "True", CmmT_True ),
+ ( "False", CmmT_False )
+ ]
+
+tok_decimal span buf len
+ = return (L span (CmmT_Int $! parseUnsignedInteger buf len 10 octDecDigit))
+
+tok_octal span buf len
+ = return (L span (CmmT_Int $! parseUnsignedInteger (offsetBytes 1 buf) (len-1) 8 octDecDigit))
+
+tok_hexadecimal span buf len
+ = return (L span (CmmT_Int $! parseUnsignedInteger (offsetBytes 2 buf) (len-2) 16 hexDigit))
+
+tok_float str = CmmT_Float $! readRational str
+
+tok_string str = CmmT_String (read str)
+ -- urk, not quite right, but it'll do for now
+
+-- -----------------------------------------------------------------------------
+-- Line pragmas
+
+setLine :: Int -> Action
+setLine code span buf len = do
+ let line = parseUnsignedInteger buf len 10 octDecDigit
+ liftP $ do
+ setSrcLoc (mkRealSrcLoc (srcSpanFile span) (fromIntegral line - 1) 1)
+ -- subtract one: the line number refers to the *following* line
+ -- trace ("setLine " ++ show line) $ do
+ popLexState >> pushLexState code
+ lexToken
+
+setFile :: Int -> Action
+setFile code span buf len = do
+ let file = lexemeToFastString (stepOn buf) (len-2)
+ liftP $ do
+ setSrcLoc (mkRealSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span))
+ popLexState >> pushLexState code
+ lexToken
+
+-- -----------------------------------------------------------------------------
+-- This is the top-level function: called from the parser each time a
+-- new token is to be read from the input.
+
+cmmlex :: (Located CmmToken -> PD a) -> PD a
+cmmlex cont = do
+ (L span tok) <- lexToken
+ --trace ("token: " ++ show tok) $ do
+ cont (L (RealSrcSpan span) tok)
+
+lexToken :: PD (RealLocated CmmToken)
+lexToken = do
+ inp@(loc1,buf) <- getInput
+ sc <- liftP getLexState
+ case alexScan inp sc of
+ AlexEOF -> do let span = mkRealSrcSpan loc1 loc1
+ liftP (setLastToken span 0)
+ return (L span CmmT_EOF)
+ AlexError (loc2,_) -> liftP $ failLocMsgP loc1 loc2 "lexical error"
+ AlexSkip inp2 _ -> do
+ setInput inp2
+ lexToken
+ AlexToken inp2@(end,_buf2) len t -> do
+ setInput inp2
+ let span = mkRealSrcSpan loc1 end
+ span `seq` liftP (setLastToken span len)
+ t span buf len
+
+-- -----------------------------------------------------------------------------
+-- Monad stuff
+
+-- Stuff that Alex needs to know about our input type:
+type AlexInput = (RealSrcLoc,StringBuffer)
+
+alexInputPrevChar :: AlexInput -> Char
+alexInputPrevChar (_,s) = prevChar s '\n'
+
+-- backwards compatibility for Alex 2.x
+alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
+alexGetChar inp = case alexGetByte inp of
+ Nothing -> Nothing
+ Just (b,i) -> c `seq` Just (c,i)
+ where c = chr $ fromIntegral b
+
+alexGetByte :: AlexInput -> Maybe (Word8,AlexInput)
+alexGetByte (loc,s)
+ | atEnd s = Nothing
+ | otherwise = b `seq` loc' `seq` s' `seq` Just (b, (loc', s'))
+ where c = currentChar s
+ b = fromIntegral $ ord $ c
+ loc' = advanceSrcLoc loc c
+ s' = stepOn s
+
+getInput :: PD AlexInput
+getInput = PD $ \_ s@PState{ loc=l, buffer=b } -> POk s (l,b)
+
+setInput :: AlexInput -> PD ()
+setInput (l,b) = PD $ \_ s -> POk s{ loc=l, buffer=b } ()
+}
diff --git a/compiler/GHC/Cmm/Lint.hs b/compiler/GHC/Cmm/Lint.hs
new file mode 100644
index 0000000000..d70fed3b9e
--- /dev/null
+++ b/compiler/GHC/Cmm/Lint.hs
@@ -0,0 +1,261 @@
+-----------------------------------------------------------------------------
+--
+-- (c) The University of Glasgow 2011
+--
+-- CmmLint: checking the correctness of Cmm statements and expressions
+--
+-----------------------------------------------------------------------------
+{-# LANGUAGE DeriveFunctor #-}
+{-# LANGUAGE GADTs #-}
+module GHC.Cmm.Lint (
+ cmmLint, cmmLintGraph
+ ) where
+
+import GhcPrelude
+
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow.Graph
+import GHC.Cmm.Dataflow.Label
+import GHC.Cmm
+import GHC.Cmm.Utils
+import GHC.Cmm.Liveness
+import GHC.Cmm.Switch (switchTargetsToList)
+import GHC.Cmm.Ppr () -- For Outputable instances
+import Outputable
+import DynFlags
+
+import Control.Monad (ap)
+
+-- Things to check:
+-- - invariant on CmmBlock in GHC.Cmm.Expr (see comment there)
+-- - check for branches to blocks that don't exist
+-- - check types
+
+-- -----------------------------------------------------------------------------
+-- Exported entry points:
+
+cmmLint :: (Outputable d, Outputable h)
+ => DynFlags -> GenCmmGroup d h CmmGraph -> Maybe SDoc
+cmmLint dflags tops = runCmmLint dflags (mapM_ (lintCmmDecl dflags)) tops
+
+cmmLintGraph :: DynFlags -> CmmGraph -> Maybe SDoc
+cmmLintGraph dflags g = runCmmLint dflags (lintCmmGraph dflags) g
+
+runCmmLint :: Outputable a => DynFlags -> (a -> CmmLint b) -> a -> Maybe SDoc
+runCmmLint dflags l p =
+ case unCL (l p) dflags of
+ Left err -> Just (vcat [text "Cmm lint error:",
+ nest 2 err,
+ text "Program was:",
+ nest 2 (ppr p)])
+ Right _ -> Nothing
+
+lintCmmDecl :: DynFlags -> GenCmmDecl h i CmmGraph -> CmmLint ()
+lintCmmDecl dflags (CmmProc _ lbl _ g)
+ = addLintInfo (text "in proc " <> ppr lbl) $ lintCmmGraph dflags g
+lintCmmDecl _ (CmmData {})
+ = return ()
+
+
+lintCmmGraph :: DynFlags -> CmmGraph -> CmmLint ()
+lintCmmGraph dflags g =
+ cmmLocalLiveness dflags g `seq` mapM_ (lintCmmBlock labels) blocks
+ -- cmmLiveness throws an error if there are registers
+ -- live on entry to the graph (i.e. undefined
+ -- variables)
+ where
+ blocks = toBlockList g
+ labels = setFromList (map entryLabel blocks)
+
+
+lintCmmBlock :: LabelSet -> CmmBlock -> CmmLint ()
+lintCmmBlock labels block
+ = addLintInfo (text "in basic block " <> ppr (entryLabel block)) $ do
+ let (_, middle, last) = blockSplit block
+ mapM_ lintCmmMiddle (blockToList middle)
+ lintCmmLast labels last
+
+-- -----------------------------------------------------------------------------
+-- lintCmmExpr
+
+-- Checks whether a CmmExpr is "type-correct", and check for obvious-looking
+-- byte/word mismatches.
+
+lintCmmExpr :: CmmExpr -> CmmLint CmmType
+lintCmmExpr (CmmLoad expr rep) = do
+ _ <- lintCmmExpr expr
+ -- Disabled, if we have the inlining phase before the lint phase,
+ -- we can have funny offsets due to pointer tagging. -- EZY
+ -- when (widthInBytes (typeWidth rep) >= wORD_SIZE) $
+ -- cmmCheckWordAddress expr
+ return rep
+lintCmmExpr expr@(CmmMachOp op args) = do
+ dflags <- getDynFlags
+ tys <- mapM lintCmmExpr args
+ if map (typeWidth . cmmExprType dflags) args == machOpArgReps dflags op
+ then cmmCheckMachOp op args tys
+ else cmmLintMachOpErr expr (map (cmmExprType dflags) args) (machOpArgReps dflags op)
+lintCmmExpr (CmmRegOff reg offset)
+ = do dflags <- getDynFlags
+ let rep = typeWidth (cmmRegType dflags reg)
+ lintCmmExpr (CmmMachOp (MO_Add rep)
+ [CmmReg reg, CmmLit (CmmInt (fromIntegral offset) rep)])
+lintCmmExpr expr =
+ do dflags <- getDynFlags
+ return (cmmExprType dflags expr)
+
+-- Check for some common byte/word mismatches (eg. Sp + 1)
+cmmCheckMachOp :: MachOp -> [CmmExpr] -> [CmmType] -> CmmLint CmmType
+cmmCheckMachOp op [lit@(CmmLit (CmmInt { })), reg@(CmmReg _)] tys
+ = cmmCheckMachOp op [reg, lit] tys
+cmmCheckMachOp op _ tys
+ = do dflags <- getDynFlags
+ return (machOpResultType dflags op tys)
+
+{-
+isOffsetOp :: MachOp -> Bool
+isOffsetOp (MO_Add _) = True
+isOffsetOp (MO_Sub _) = True
+isOffsetOp _ = False
+
+-- This expression should be an address from which a word can be loaded:
+-- check for funny-looking sub-word offsets.
+_cmmCheckWordAddress :: CmmExpr -> CmmLint ()
+_cmmCheckWordAddress e@(CmmMachOp op [arg, CmmLit (CmmInt i _)])
+ | isOffsetOp op && notNodeReg arg && i `rem` fromIntegral (wORD_SIZE dflags) /= 0
+ = cmmLintDubiousWordOffset e
+_cmmCheckWordAddress e@(CmmMachOp op [CmmLit (CmmInt i _), arg])
+ | isOffsetOp op && notNodeReg arg && i `rem` fromIntegral (wORD_SIZE dflags) /= 0
+ = cmmLintDubiousWordOffset e
+_cmmCheckWordAddress _
+ = return ()
+
+-- No warnings for unaligned arithmetic with the node register,
+-- which is used to extract fields from tagged constructor closures.
+notNodeReg :: CmmExpr -> Bool
+notNodeReg (CmmReg reg) | reg == nodeReg = False
+notNodeReg _ = True
+-}
+
+lintCmmMiddle :: CmmNode O O -> CmmLint ()
+lintCmmMiddle node = case node of
+ CmmComment _ -> return ()
+ CmmTick _ -> return ()
+ CmmUnwind{} -> return ()
+
+ CmmAssign reg expr -> do
+ dflags <- getDynFlags
+ erep <- lintCmmExpr expr
+ let reg_ty = cmmRegType dflags reg
+ if (erep `cmmEqType_ignoring_ptrhood` reg_ty)
+ then return ()
+ else cmmLintAssignErr (CmmAssign reg expr) erep reg_ty
+
+ CmmStore l r -> do
+ _ <- lintCmmExpr l
+ _ <- lintCmmExpr r
+ return ()
+
+ CmmUnsafeForeignCall target _formals actuals -> do
+ lintTarget target
+ mapM_ lintCmmExpr actuals
+
+
+lintCmmLast :: LabelSet -> CmmNode O C -> CmmLint ()
+lintCmmLast labels node = case node of
+ CmmBranch id -> checkTarget id
+
+ CmmCondBranch e t f _ -> do
+ dflags <- getDynFlags
+ mapM_ checkTarget [t,f]
+ _ <- lintCmmExpr e
+ checkCond dflags e
+
+ CmmSwitch e ids -> do
+ dflags <- getDynFlags
+ mapM_ checkTarget $ switchTargetsToList ids
+ erep <- lintCmmExpr e
+ if (erep `cmmEqType_ignoring_ptrhood` bWord dflags)
+ then return ()
+ else cmmLintErr (text "switch scrutinee is not a word: " <>
+ ppr e <> text " :: " <> ppr erep)
+
+ CmmCall { cml_target = target, cml_cont = cont } -> do
+ _ <- lintCmmExpr target
+ maybe (return ()) checkTarget cont
+
+ CmmForeignCall tgt _ args succ _ _ _ -> do
+ lintTarget tgt
+ mapM_ lintCmmExpr args
+ checkTarget succ
+ where
+ checkTarget id
+ | setMember id labels = return ()
+ | otherwise = cmmLintErr (text "Branch to nonexistent id" <+> ppr id)
+
+
+lintTarget :: ForeignTarget -> CmmLint ()
+lintTarget (ForeignTarget e _) = lintCmmExpr e >> return ()
+lintTarget (PrimTarget {}) = return ()
+
+
+checkCond :: DynFlags -> CmmExpr -> CmmLint ()
+checkCond _ (CmmMachOp mop _) | isComparisonMachOp mop = return ()
+checkCond dflags (CmmLit (CmmInt x t)) | x == 0 || x == 1, t == wordWidth dflags = return () -- constant values
+checkCond _ expr
+ = cmmLintErr (hang (text "expression is not a conditional:") 2
+ (ppr expr))
+
+-- -----------------------------------------------------------------------------
+-- CmmLint monad
+
+-- just a basic error monad:
+
+newtype CmmLint a = CmmLint { unCL :: DynFlags -> Either SDoc a }
+ deriving (Functor)
+
+instance Applicative CmmLint where
+ pure a = CmmLint (\_ -> Right a)
+ (<*>) = ap
+
+instance Monad CmmLint where
+ CmmLint m >>= k = CmmLint $ \dflags ->
+ case m dflags of
+ Left e -> Left e
+ Right a -> unCL (k a) dflags
+
+instance HasDynFlags CmmLint where
+ getDynFlags = CmmLint (\dflags -> Right dflags)
+
+cmmLintErr :: SDoc -> CmmLint a
+cmmLintErr msg = CmmLint (\_ -> Left msg)
+
+addLintInfo :: SDoc -> CmmLint a -> CmmLint a
+addLintInfo info thing = CmmLint $ \dflags ->
+ case unCL thing dflags of
+ Left err -> Left (hang info 2 err)
+ Right a -> Right a
+
+cmmLintMachOpErr :: CmmExpr -> [CmmType] -> [Width] -> CmmLint a
+cmmLintMachOpErr expr argsRep opExpectsRep
+ = cmmLintErr (text "in MachOp application: " $$
+ nest 2 (ppr expr) $$
+ (text "op is expecting: " <+> ppr opExpectsRep) $$
+ (text "arguments provide: " <+> ppr argsRep))
+
+cmmLintAssignErr :: CmmNode e x -> CmmType -> CmmType -> CmmLint a
+cmmLintAssignErr stmt e_ty r_ty
+ = cmmLintErr (text "in assignment: " $$
+ nest 2 (vcat [ppr stmt,
+ text "Reg ty:" <+> ppr r_ty,
+ text "Rhs ty:" <+> ppr e_ty]))
+
+
+{-
+cmmLintDubiousWordOffset :: CmmExpr -> CmmLint a
+cmmLintDubiousWordOffset expr
+ = cmmLintErr (text "offset is not a multiple of words: " $$
+ nest 2 (ppr expr))
+-}
+
diff --git a/compiler/GHC/Cmm/Liveness.hs b/compiler/GHC/Cmm/Liveness.hs
new file mode 100644
index 0000000000..2b598f52e5
--- /dev/null
+++ b/compiler/GHC/Cmm/Liveness.hs
@@ -0,0 +1,93 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+
+module GHC.Cmm.Liveness
+ ( CmmLocalLive
+ , cmmLocalLiveness
+ , cmmGlobalLiveness
+ , liveLattice
+ , gen_kill
+ )
+where
+
+import GhcPrelude
+
+import DynFlags
+import GHC.Cmm.BlockId
+import GHC.Cmm
+import GHC.Cmm.Ppr.Expr () -- For Outputable instances
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow
+import GHC.Cmm.Dataflow.Label
+
+import Maybes
+import Outputable
+
+-----------------------------------------------------------------------------
+-- Calculating what variables are live on entry to a basic block
+-----------------------------------------------------------------------------
+
+-- | The variables live on entry to a block
+type CmmLive r = RegSet r
+type CmmLocalLive = CmmLive LocalReg
+
+-- | The dataflow lattice
+liveLattice :: Ord r => DataflowLattice (CmmLive r)
+{-# SPECIALIZE liveLattice :: DataflowLattice (CmmLive LocalReg) #-}
+{-# SPECIALIZE liveLattice :: DataflowLattice (CmmLive GlobalReg) #-}
+liveLattice = DataflowLattice emptyRegSet add
+ where
+ add (OldFact old) (NewFact new) =
+ let !join = plusRegSet old new
+ in changedIf (sizeRegSet join > sizeRegSet old) join
+
+-- | A mapping from block labels to the variables live on entry
+type BlockEntryLiveness r = LabelMap (CmmLive r)
+
+-----------------------------------------------------------------------------
+-- | Calculated liveness info for a CmmGraph
+-----------------------------------------------------------------------------
+
+cmmLocalLiveness :: DynFlags -> CmmGraph -> BlockEntryLiveness LocalReg
+cmmLocalLiveness dflags graph =
+ check $ analyzeCmmBwd liveLattice (xferLive dflags) graph mapEmpty
+ where
+ entry = g_entry graph
+ check facts =
+ noLiveOnEntry entry (expectJust "check" $ mapLookup entry facts) facts
+
+cmmGlobalLiveness :: DynFlags -> CmmGraph -> BlockEntryLiveness GlobalReg
+cmmGlobalLiveness dflags graph =
+ analyzeCmmBwd liveLattice (xferLive dflags) graph mapEmpty
+
+-- | On entry to the procedure, there had better not be any LocalReg's live-in.
+noLiveOnEntry :: BlockId -> CmmLive LocalReg -> a -> a
+noLiveOnEntry bid in_fact x =
+ if nullRegSet in_fact then x
+ else pprPanic "LocalReg's live-in to graph" (ppr bid <+> ppr in_fact)
+
+gen_kill
+ :: (DefinerOfRegs r n, UserOfRegs r n)
+ => DynFlags -> n -> CmmLive r -> CmmLive r
+gen_kill dflags node set =
+ let !afterKill = foldRegsDefd dflags deleteFromRegSet set node
+ in foldRegsUsed dflags extendRegSet afterKill node
+{-# INLINE gen_kill #-}
+
+xferLive
+ :: forall r.
+ ( UserOfRegs r (CmmNode O O)
+ , DefinerOfRegs r (CmmNode O O)
+ , UserOfRegs r (CmmNode O C)
+ , DefinerOfRegs r (CmmNode O C)
+ )
+ => DynFlags -> TransferFun (CmmLive r)
+xferLive dflags (BlockCC eNode middle xNode) fBase =
+ let joined = gen_kill dflags xNode $! joinOutFacts liveLattice xNode fBase
+ !result = foldNodesBwdOO (gen_kill dflags) middle joined
+ in mapSingleton (entryLabel eNode) result
+{-# SPECIALIZE xferLive :: DynFlags -> TransferFun (CmmLive LocalReg) #-}
+{-# SPECIALIZE xferLive :: DynFlags -> TransferFun (CmmLive GlobalReg) #-}
diff --git a/compiler/GHC/Cmm/MachOp.hs b/compiler/GHC/Cmm/MachOp.hs
new file mode 100644
index 0000000000..234001545c
--- /dev/null
+++ b/compiler/GHC/Cmm/MachOp.hs
@@ -0,0 +1,664 @@
+module GHC.Cmm.MachOp
+ ( MachOp(..)
+ , pprMachOp, isCommutableMachOp, isAssociativeMachOp
+ , isComparisonMachOp, maybeIntComparison, machOpResultType
+ , machOpArgReps, maybeInvertComparison, isFloatComparison
+
+ -- MachOp builders
+ , mo_wordAdd, mo_wordSub, mo_wordEq, mo_wordNe,mo_wordMul, mo_wordSQuot
+ , mo_wordSRem, mo_wordSNeg, mo_wordUQuot, mo_wordURem
+ , mo_wordSGe, mo_wordSLe, mo_wordSGt, mo_wordSLt, mo_wordUGe
+ , mo_wordULe, mo_wordUGt, mo_wordULt
+ , mo_wordAnd, mo_wordOr, mo_wordXor, mo_wordNot
+ , mo_wordShl, mo_wordSShr, mo_wordUShr
+ , mo_u_8To32, mo_s_8To32, mo_u_16To32, mo_s_16To32
+ , mo_u_8ToWord, mo_s_8ToWord, mo_u_16ToWord, mo_s_16ToWord
+ , mo_u_32ToWord, mo_s_32ToWord
+ , mo_32To8, mo_32To16, mo_WordTo8, mo_WordTo16, mo_WordTo32, mo_WordTo64
+
+ -- CallishMachOp
+ , CallishMachOp(..), callishMachOpHints
+ , pprCallishMachOp
+ , machOpMemcpyishAlign
+
+ -- Atomic read-modify-write
+ , AtomicMachOp(..)
+ )
+where
+
+import GhcPrelude
+
+import GHC.Cmm.Type
+import Outputable
+import DynFlags
+
+-----------------------------------------------------------------------------
+-- MachOp
+-----------------------------------------------------------------------------
+
+{- |
+Machine-level primops; ones which we can reasonably delegate to the
+native code generators to handle.
+
+Most operations are parameterised by the 'Width' that they operate on.
+Some operations have separate signed and unsigned versions, and float
+and integer versions.
+-}
+
+data MachOp
+ -- Integer operations (insensitive to signed/unsigned)
+ = MO_Add Width
+ | MO_Sub Width
+ | MO_Eq Width
+ | MO_Ne Width
+ | MO_Mul Width -- low word of multiply
+
+ -- Signed multiply/divide
+ | MO_S_MulMayOflo Width -- nonzero if signed multiply overflows
+ | MO_S_Quot Width -- signed / (same semantics as IntQuotOp)
+ | MO_S_Rem Width -- signed % (same semantics as IntRemOp)
+ | MO_S_Neg Width -- unary -
+
+ -- Unsigned multiply/divide
+ | MO_U_MulMayOflo Width -- nonzero if unsigned multiply overflows
+ | MO_U_Quot Width -- unsigned / (same semantics as WordQuotOp)
+ | MO_U_Rem Width -- unsigned % (same semantics as WordRemOp)
+
+ -- Signed comparisons
+ | MO_S_Ge Width
+ | MO_S_Le Width
+ | MO_S_Gt Width
+ | MO_S_Lt Width
+
+ -- Unsigned comparisons
+ | MO_U_Ge Width
+ | MO_U_Le Width
+ | MO_U_Gt Width
+ | MO_U_Lt Width
+
+ -- Floating point arithmetic
+ | MO_F_Add Width
+ | MO_F_Sub Width
+ | MO_F_Neg Width -- unary -
+ | MO_F_Mul Width
+ | MO_F_Quot Width
+
+ -- Floating point comparison
+ | MO_F_Eq Width
+ | MO_F_Ne Width
+ | MO_F_Ge Width
+ | MO_F_Le Width
+ | MO_F_Gt Width
+ | MO_F_Lt Width
+
+ -- Bitwise operations. Not all of these may be supported
+ -- at all sizes, and only integral Widths are valid.
+ | MO_And Width
+ | MO_Or Width
+ | MO_Xor Width
+ | MO_Not Width
+ | MO_Shl Width
+ | MO_U_Shr Width -- unsigned shift right
+ | MO_S_Shr Width -- signed shift right
+
+ -- Conversions. Some of these will be NOPs.
+ -- Floating-point conversions use the signed variant.
+ | MO_SF_Conv Width Width -- Signed int -> Float
+ | MO_FS_Conv Width Width -- Float -> Signed int
+ | MO_SS_Conv Width Width -- Signed int -> Signed int
+ | MO_UU_Conv Width Width -- unsigned int -> unsigned int
+ | MO_XX_Conv Width Width -- int -> int; puts no requirements on the
+ -- contents of upper bits when extending;
+ -- narrowing is simply truncation; the only
+ -- expectation is that we can recover the
+ -- original value by applying the opposite
+ -- MO_XX_Conv, e.g.,
+ -- MO_XX_CONV W64 W8 (MO_XX_CONV W8 W64 x)
+ -- is equivalent to just x.
+ | MO_FF_Conv Width Width -- Float -> Float
+
+ -- Vector element insertion and extraction operations
+ | MO_V_Insert Length Width -- Insert scalar into vector
+ | MO_V_Extract Length Width -- Extract scalar from vector
+
+ -- Integer vector operations
+ | MO_V_Add Length Width
+ | MO_V_Sub Length Width
+ | MO_V_Mul Length Width
+
+ -- Signed vector multiply/divide
+ | MO_VS_Quot Length Width
+ | MO_VS_Rem Length Width
+ | MO_VS_Neg Length Width
+
+ -- Unsigned vector multiply/divide
+ | MO_VU_Quot Length Width
+ | MO_VU_Rem Length Width
+
+ -- Floating point vector element insertion and extraction operations
+ | MO_VF_Insert Length Width -- Insert scalar into vector
+ | MO_VF_Extract Length Width -- Extract scalar from vector
+
+ -- Floating point vector operations
+ | MO_VF_Add Length Width
+ | MO_VF_Sub Length Width
+ | MO_VF_Neg Length Width -- unary negation
+ | MO_VF_Mul Length Width
+ | MO_VF_Quot Length Width
+
+ -- Alignment check (for -falignment-sanitisation)
+ | MO_AlignmentCheck Int Width
+ deriving (Eq, Show)
+
+pprMachOp :: MachOp -> SDoc
+pprMachOp mo = text (show mo)
+
+
+
+-- -----------------------------------------------------------------------------
+-- Some common MachReps
+
+-- A 'wordRep' is a machine word on the target architecture
+-- Specifically, it is the size of an Int#, Word#, Addr#
+-- and the unit of allocation on the stack and the heap
+-- Any pointer is also guaranteed to be a wordRep.
+
+mo_wordAdd, mo_wordSub, mo_wordEq, mo_wordNe,mo_wordMul, mo_wordSQuot
+ , mo_wordSRem, mo_wordSNeg, mo_wordUQuot, mo_wordURem
+ , mo_wordSGe, mo_wordSLe, mo_wordSGt, mo_wordSLt, mo_wordUGe
+ , mo_wordULe, mo_wordUGt, mo_wordULt
+ , mo_wordAnd, mo_wordOr, mo_wordXor, mo_wordNot, mo_wordShl, mo_wordSShr, mo_wordUShr
+ , mo_u_8ToWord, mo_s_8ToWord, mo_u_16ToWord, mo_s_16ToWord, mo_u_32ToWord, mo_s_32ToWord
+ , mo_WordTo8, mo_WordTo16, mo_WordTo32, mo_WordTo64
+ :: DynFlags -> MachOp
+
+mo_u_8To32, mo_s_8To32, mo_u_16To32, mo_s_16To32
+ , mo_32To8, mo_32To16
+ :: MachOp
+
+mo_wordAdd dflags = MO_Add (wordWidth dflags)
+mo_wordSub dflags = MO_Sub (wordWidth dflags)
+mo_wordEq dflags = MO_Eq (wordWidth dflags)
+mo_wordNe dflags = MO_Ne (wordWidth dflags)
+mo_wordMul dflags = MO_Mul (wordWidth dflags)
+mo_wordSQuot dflags = MO_S_Quot (wordWidth dflags)
+mo_wordSRem dflags = MO_S_Rem (wordWidth dflags)
+mo_wordSNeg dflags = MO_S_Neg (wordWidth dflags)
+mo_wordUQuot dflags = MO_U_Quot (wordWidth dflags)
+mo_wordURem dflags = MO_U_Rem (wordWidth dflags)
+
+mo_wordSGe dflags = MO_S_Ge (wordWidth dflags)
+mo_wordSLe dflags = MO_S_Le (wordWidth dflags)
+mo_wordSGt dflags = MO_S_Gt (wordWidth dflags)
+mo_wordSLt dflags = MO_S_Lt (wordWidth dflags)
+
+mo_wordUGe dflags = MO_U_Ge (wordWidth dflags)
+mo_wordULe dflags = MO_U_Le (wordWidth dflags)
+mo_wordUGt dflags = MO_U_Gt (wordWidth dflags)
+mo_wordULt dflags = MO_U_Lt (wordWidth dflags)
+
+mo_wordAnd dflags = MO_And (wordWidth dflags)
+mo_wordOr dflags = MO_Or (wordWidth dflags)
+mo_wordXor dflags = MO_Xor (wordWidth dflags)
+mo_wordNot dflags = MO_Not (wordWidth dflags)
+mo_wordShl dflags = MO_Shl (wordWidth dflags)
+mo_wordSShr dflags = MO_S_Shr (wordWidth dflags)
+mo_wordUShr dflags = MO_U_Shr (wordWidth dflags)
+
+mo_u_8To32 = MO_UU_Conv W8 W32
+mo_s_8To32 = MO_SS_Conv W8 W32
+mo_u_16To32 = MO_UU_Conv W16 W32
+mo_s_16To32 = MO_SS_Conv W16 W32
+
+mo_u_8ToWord dflags = MO_UU_Conv W8 (wordWidth dflags)
+mo_s_8ToWord dflags = MO_SS_Conv W8 (wordWidth dflags)
+mo_u_16ToWord dflags = MO_UU_Conv W16 (wordWidth dflags)
+mo_s_16ToWord dflags = MO_SS_Conv W16 (wordWidth dflags)
+mo_s_32ToWord dflags = MO_SS_Conv W32 (wordWidth dflags)
+mo_u_32ToWord dflags = MO_UU_Conv W32 (wordWidth dflags)
+
+mo_WordTo8 dflags = MO_UU_Conv (wordWidth dflags) W8
+mo_WordTo16 dflags = MO_UU_Conv (wordWidth dflags) W16
+mo_WordTo32 dflags = MO_UU_Conv (wordWidth dflags) W32
+mo_WordTo64 dflags = MO_UU_Conv (wordWidth dflags) W64
+
+mo_32To8 = MO_UU_Conv W32 W8
+mo_32To16 = MO_UU_Conv W32 W16
+
+
+-- ----------------------------------------------------------------------------
+-- isCommutableMachOp
+
+{- |
+Returns 'True' if the MachOp has commutable arguments. This is used
+in the platform-independent Cmm optimisations.
+
+If in doubt, return 'False'. This generates worse code on the
+native routes, but is otherwise harmless.
+-}
+isCommutableMachOp :: MachOp -> Bool
+isCommutableMachOp mop =
+ case mop of
+ MO_Add _ -> True
+ MO_Eq _ -> True
+ MO_Ne _ -> True
+ MO_Mul _ -> True
+ MO_S_MulMayOflo _ -> True
+ MO_U_MulMayOflo _ -> True
+ MO_And _ -> True
+ MO_Or _ -> True
+ MO_Xor _ -> True
+ MO_F_Add _ -> True
+ MO_F_Mul _ -> True
+ _other -> False
+
+-- ----------------------------------------------------------------------------
+-- isAssociativeMachOp
+
+{- |
+Returns 'True' if the MachOp is associative (i.e. @(x+y)+z == x+(y+z)@)
+This is used in the platform-independent Cmm optimisations.
+
+If in doubt, return 'False'. This generates worse code on the
+native routes, but is otherwise harmless.
+-}
+isAssociativeMachOp :: MachOp -> Bool
+isAssociativeMachOp mop =
+ case mop of
+ MO_Add {} -> True -- NB: does not include
+ MO_Mul {} -> True -- floatint point!
+ MO_And {} -> True
+ MO_Or {} -> True
+ MO_Xor {} -> True
+ _other -> False
+
+
+-- ----------------------------------------------------------------------------
+-- isComparisonMachOp
+
+{- |
+Returns 'True' if the MachOp is a comparison.
+
+If in doubt, return False. This generates worse code on the
+native routes, but is otherwise harmless.
+-}
+isComparisonMachOp :: MachOp -> Bool
+isComparisonMachOp mop =
+ case mop of
+ MO_Eq _ -> True
+ MO_Ne _ -> True
+ MO_S_Ge _ -> True
+ MO_S_Le _ -> True
+ MO_S_Gt _ -> True
+ MO_S_Lt _ -> True
+ MO_U_Ge _ -> True
+ MO_U_Le _ -> True
+ MO_U_Gt _ -> True
+ MO_U_Lt _ -> True
+ MO_F_Eq {} -> True
+ MO_F_Ne {} -> True
+ MO_F_Ge {} -> True
+ MO_F_Le {} -> True
+ MO_F_Gt {} -> True
+ MO_F_Lt {} -> True
+ _other -> False
+
+{- |
+Returns @Just w@ if the operation is an integer comparison with width
+@w@, or @Nothing@ otherwise.
+-}
+maybeIntComparison :: MachOp -> Maybe Width
+maybeIntComparison mop =
+ case mop of
+ MO_Eq w -> Just w
+ MO_Ne w -> Just w
+ MO_S_Ge w -> Just w
+ MO_S_Le w -> Just w
+ MO_S_Gt w -> Just w
+ MO_S_Lt w -> Just w
+ MO_U_Ge w -> Just w
+ MO_U_Le w -> Just w
+ MO_U_Gt w -> Just w
+ MO_U_Lt w -> Just w
+ _ -> Nothing
+
+isFloatComparison :: MachOp -> Bool
+isFloatComparison mop =
+ case mop of
+ MO_F_Eq {} -> True
+ MO_F_Ne {} -> True
+ MO_F_Ge {} -> True
+ MO_F_Le {} -> True
+ MO_F_Gt {} -> True
+ MO_F_Lt {} -> True
+ _other -> False
+
+-- -----------------------------------------------------------------------------
+-- Inverting conditions
+
+-- Sometimes it's useful to be able to invert the sense of a
+-- condition. Not all conditional tests are invertible: in
+-- particular, floating point conditionals cannot be inverted, because
+-- there exist floating-point values which return False for both senses
+-- of a condition (eg. !(NaN > NaN) && !(NaN /<= NaN)).
+
+maybeInvertComparison :: MachOp -> Maybe MachOp
+maybeInvertComparison op
+ = case op of -- None of these Just cases include floating point
+ MO_Eq r -> Just (MO_Ne r)
+ MO_Ne r -> Just (MO_Eq r)
+ MO_U_Lt r -> Just (MO_U_Ge r)
+ MO_U_Gt r -> Just (MO_U_Le r)
+ MO_U_Le r -> Just (MO_U_Gt r)
+ MO_U_Ge r -> Just (MO_U_Lt r)
+ MO_S_Lt r -> Just (MO_S_Ge r)
+ MO_S_Gt r -> Just (MO_S_Le r)
+ MO_S_Le r -> Just (MO_S_Gt r)
+ MO_S_Ge r -> Just (MO_S_Lt r)
+ _other -> Nothing
+
+-- ----------------------------------------------------------------------------
+-- machOpResultType
+
+{- |
+Returns the MachRep of the result of a MachOp.
+-}
+machOpResultType :: DynFlags -> MachOp -> [CmmType] -> CmmType
+machOpResultType dflags mop tys =
+ case mop of
+ MO_Add {} -> ty1 -- Preserve GC-ptr-hood
+ MO_Sub {} -> ty1 -- of first arg
+ MO_Mul r -> cmmBits r
+ MO_S_MulMayOflo r -> cmmBits r
+ MO_S_Quot r -> cmmBits r
+ MO_S_Rem r -> cmmBits r
+ MO_S_Neg r -> cmmBits r
+ MO_U_MulMayOflo r -> cmmBits r
+ MO_U_Quot r -> cmmBits r
+ MO_U_Rem r -> cmmBits r
+
+ MO_Eq {} -> comparisonResultRep dflags
+ MO_Ne {} -> comparisonResultRep dflags
+ MO_S_Ge {} -> comparisonResultRep dflags
+ MO_S_Le {} -> comparisonResultRep dflags
+ MO_S_Gt {} -> comparisonResultRep dflags
+ MO_S_Lt {} -> comparisonResultRep dflags
+
+ MO_U_Ge {} -> comparisonResultRep dflags
+ MO_U_Le {} -> comparisonResultRep dflags
+ MO_U_Gt {} -> comparisonResultRep dflags
+ MO_U_Lt {} -> comparisonResultRep dflags
+
+ MO_F_Add r -> cmmFloat r
+ MO_F_Sub r -> cmmFloat r
+ MO_F_Mul r -> cmmFloat r
+ MO_F_Quot r -> cmmFloat r
+ MO_F_Neg r -> cmmFloat r
+ MO_F_Eq {} -> comparisonResultRep dflags
+ MO_F_Ne {} -> comparisonResultRep dflags
+ MO_F_Ge {} -> comparisonResultRep dflags
+ MO_F_Le {} -> comparisonResultRep dflags
+ MO_F_Gt {} -> comparisonResultRep dflags
+ MO_F_Lt {} -> comparisonResultRep dflags
+
+ MO_And {} -> ty1 -- Used for pointer masking
+ MO_Or {} -> ty1
+ MO_Xor {} -> ty1
+ MO_Not r -> cmmBits r
+ MO_Shl r -> cmmBits r
+ MO_U_Shr r -> cmmBits r
+ MO_S_Shr r -> cmmBits r
+
+ MO_SS_Conv _ to -> cmmBits to
+ MO_UU_Conv _ to -> cmmBits to
+ MO_XX_Conv _ to -> cmmBits to
+ MO_FS_Conv _ to -> cmmBits to
+ MO_SF_Conv _ to -> cmmFloat to
+ MO_FF_Conv _ to -> cmmFloat to
+
+ MO_V_Insert l w -> cmmVec l (cmmBits w)
+ MO_V_Extract _ w -> cmmBits w
+
+ MO_V_Add l w -> cmmVec l (cmmBits w)
+ MO_V_Sub l w -> cmmVec l (cmmBits w)
+ MO_V_Mul l w -> cmmVec l (cmmBits w)
+
+ MO_VS_Quot l w -> cmmVec l (cmmBits w)
+ MO_VS_Rem l w -> cmmVec l (cmmBits w)
+ MO_VS_Neg l w -> cmmVec l (cmmBits w)
+
+ MO_VU_Quot l w -> cmmVec l (cmmBits w)
+ MO_VU_Rem l w -> cmmVec l (cmmBits w)
+
+ MO_VF_Insert l w -> cmmVec l (cmmFloat w)
+ MO_VF_Extract _ w -> cmmFloat w
+
+ MO_VF_Add l w -> cmmVec l (cmmFloat w)
+ MO_VF_Sub l w -> cmmVec l (cmmFloat w)
+ MO_VF_Mul l w -> cmmVec l (cmmFloat w)
+ MO_VF_Quot l w -> cmmVec l (cmmFloat w)
+ MO_VF_Neg l w -> cmmVec l (cmmFloat w)
+
+ MO_AlignmentCheck _ _ -> ty1
+ where
+ (ty1:_) = tys
+
+comparisonResultRep :: DynFlags -> CmmType
+comparisonResultRep = bWord -- is it?
+
+
+-- -----------------------------------------------------------------------------
+-- machOpArgReps
+
+-- | This function is used for debugging only: we can check whether an
+-- application of a MachOp is "type-correct" by checking that the MachReps of
+-- its arguments are the same as the MachOp expects. This is used when
+-- linting a CmmExpr.
+
+machOpArgReps :: DynFlags -> MachOp -> [Width]
+machOpArgReps dflags op =
+ case op of
+ MO_Add r -> [r,r]
+ MO_Sub r -> [r,r]
+ MO_Eq r -> [r,r]
+ MO_Ne r -> [r,r]
+ MO_Mul r -> [r,r]
+ MO_S_MulMayOflo r -> [r,r]
+ MO_S_Quot r -> [r,r]
+ MO_S_Rem r -> [r,r]
+ MO_S_Neg r -> [r]
+ MO_U_MulMayOflo r -> [r,r]
+ MO_U_Quot r -> [r,r]
+ MO_U_Rem r -> [r,r]
+
+ MO_S_Ge r -> [r,r]
+ MO_S_Le r -> [r,r]
+ MO_S_Gt r -> [r,r]
+ MO_S_Lt r -> [r,r]
+
+ MO_U_Ge r -> [r,r]
+ MO_U_Le r -> [r,r]
+ MO_U_Gt r -> [r,r]
+ MO_U_Lt r -> [r,r]
+
+ MO_F_Add r -> [r,r]
+ MO_F_Sub r -> [r,r]
+ MO_F_Mul r -> [r,r]
+ MO_F_Quot r -> [r,r]
+ MO_F_Neg r -> [r]
+ MO_F_Eq r -> [r,r]
+ MO_F_Ne r -> [r,r]
+ MO_F_Ge r -> [r,r]
+ MO_F_Le r -> [r,r]
+ MO_F_Gt r -> [r,r]
+ MO_F_Lt r -> [r,r]
+
+ MO_And r -> [r,r]
+ MO_Or r -> [r,r]
+ MO_Xor r -> [r,r]
+ MO_Not r -> [r]
+ MO_Shl r -> [r, wordWidth dflags]
+ MO_U_Shr r -> [r, wordWidth dflags]
+ MO_S_Shr r -> [r, wordWidth dflags]
+
+ MO_SS_Conv from _ -> [from]
+ MO_UU_Conv from _ -> [from]
+ MO_XX_Conv from _ -> [from]
+ MO_SF_Conv from _ -> [from]
+ MO_FS_Conv from _ -> [from]
+ MO_FF_Conv from _ -> [from]
+
+ MO_V_Insert l r -> [typeWidth (vec l (cmmBits r)),r,wordWidth dflags]
+ MO_V_Extract l r -> [typeWidth (vec l (cmmBits r)),wordWidth dflags]
+
+ MO_V_Add _ r -> [r,r]
+ MO_V_Sub _ r -> [r,r]
+ MO_V_Mul _ r -> [r,r]
+
+ MO_VS_Quot _ r -> [r,r]
+ MO_VS_Rem _ r -> [r,r]
+ MO_VS_Neg _ r -> [r]
+
+ MO_VU_Quot _ r -> [r,r]
+ MO_VU_Rem _ r -> [r,r]
+
+ MO_VF_Insert l r -> [typeWidth (vec l (cmmFloat r)),r,wordWidth dflags]
+ MO_VF_Extract l r -> [typeWidth (vec l (cmmFloat r)),wordWidth dflags]
+
+ MO_VF_Add _ r -> [r,r]
+ MO_VF_Sub _ r -> [r,r]
+ MO_VF_Mul _ r -> [r,r]
+ MO_VF_Quot _ r -> [r,r]
+ MO_VF_Neg _ r -> [r]
+
+ MO_AlignmentCheck _ r -> [r]
+
+-----------------------------------------------------------------------------
+-- CallishMachOp
+-----------------------------------------------------------------------------
+
+-- CallishMachOps tend to be implemented by foreign calls in some backends,
+-- so we separate them out. In Cmm, these can only occur in a
+-- statement position, in contrast to an ordinary MachOp which can occur
+-- anywhere in an expression.
+data CallishMachOp
+ = MO_F64_Pwr
+ | MO_F64_Sin
+ | MO_F64_Cos
+ | MO_F64_Tan
+ | MO_F64_Sinh
+ | MO_F64_Cosh
+ | MO_F64_Tanh
+ | MO_F64_Asin
+ | MO_F64_Acos
+ | MO_F64_Atan
+ | MO_F64_Asinh
+ | MO_F64_Acosh
+ | MO_F64_Atanh
+ | MO_F64_Log
+ | MO_F64_Log1P
+ | MO_F64_Exp
+ | MO_F64_ExpM1
+ | MO_F64_Fabs
+ | MO_F64_Sqrt
+ | MO_F32_Pwr
+ | MO_F32_Sin
+ | MO_F32_Cos
+ | MO_F32_Tan
+ | MO_F32_Sinh
+ | MO_F32_Cosh
+ | MO_F32_Tanh
+ | MO_F32_Asin
+ | MO_F32_Acos
+ | MO_F32_Atan
+ | MO_F32_Asinh
+ | MO_F32_Acosh
+ | MO_F32_Atanh
+ | MO_F32_Log
+ | MO_F32_Log1P
+ | MO_F32_Exp
+ | MO_F32_ExpM1
+ | MO_F32_Fabs
+ | MO_F32_Sqrt
+
+ | MO_UF_Conv Width
+
+ | MO_S_Mul2 Width
+ | MO_S_QuotRem Width
+ | MO_U_QuotRem Width
+ | MO_U_QuotRem2 Width
+ | MO_Add2 Width
+ | MO_AddWordC Width
+ | MO_SubWordC Width
+ | MO_AddIntC Width
+ | MO_SubIntC Width
+ | MO_U_Mul2 Width
+
+ | MO_ReadBarrier
+ | MO_WriteBarrier
+ | MO_Touch -- Keep variables live (when using interior pointers)
+
+ -- Prefetch
+ | MO_Prefetch_Data Int -- Prefetch hint. May change program performance but not
+ -- program behavior.
+ -- the Int can be 0-3. Needs to be known at compile time
+ -- to interact with code generation correctly.
+ -- TODO: add support for prefetch WRITES,
+ -- currently only exposes prefetch reads, which
+ -- would the majority of use cases in ghc anyways
+
+
+ -- These three MachOps are parameterised by the known alignment
+ -- of the destination and source (for memcpy/memmove) pointers.
+ -- This information may be used for optimisation in backends.
+ | MO_Memcpy Int
+ | MO_Memset Int
+ | MO_Memmove Int
+ | MO_Memcmp Int
+
+ | MO_PopCnt Width
+ | MO_Pdep Width
+ | MO_Pext Width
+ | MO_Clz Width
+ | MO_Ctz Width
+
+ | MO_BSwap Width
+ | MO_BRev Width
+
+ -- Atomic read-modify-write.
+ | MO_AtomicRMW Width AtomicMachOp
+ | MO_AtomicRead Width
+ | MO_AtomicWrite Width
+ | MO_Cmpxchg Width
+ deriving (Eq, Show)
+
+-- | The operation to perform atomically.
+data AtomicMachOp =
+ AMO_Add
+ | AMO_Sub
+ | AMO_And
+ | AMO_Nand
+ | AMO_Or
+ | AMO_Xor
+ deriving (Eq, Show)
+
+pprCallishMachOp :: CallishMachOp -> SDoc
+pprCallishMachOp mo = text (show mo)
+
+callishMachOpHints :: CallishMachOp -> ([ForeignHint], [ForeignHint])
+callishMachOpHints op = case op of
+ MO_Memcpy _ -> ([], [AddrHint,AddrHint,NoHint])
+ MO_Memset _ -> ([], [AddrHint,NoHint,NoHint])
+ MO_Memmove _ -> ([], [AddrHint,AddrHint,NoHint])
+ MO_Memcmp _ -> ([], [AddrHint, AddrHint, NoHint])
+ _ -> ([],[])
+ -- empty lists indicate NoHint
+
+-- | The alignment of a 'memcpy'-ish operation.
+machOpMemcpyishAlign :: CallishMachOp -> Maybe Int
+machOpMemcpyishAlign op = case op of
+ MO_Memcpy align -> Just align
+ MO_Memset align -> Just align
+ MO_Memmove align -> Just align
+ MO_Memcmp align -> Just align
+ _ -> Nothing
diff --git a/compiler/GHC/Cmm/Monad.hs b/compiler/GHC/Cmm/Monad.hs
new file mode 100644
index 0000000000..6b8d00a118
--- /dev/null
+++ b/compiler/GHC/Cmm/Monad.hs
@@ -0,0 +1,59 @@
+{-# LANGUAGE CPP #-}
+
+-----------------------------------------------------------------------------
+-- A Parser monad with access to the 'DynFlags'.
+--
+-- The 'P' monad only has access to the subset of of 'DynFlags'
+-- required for parsing Haskell.
+
+-- The parser for C-- requires access to a lot more of the 'DynFlags',
+-- so 'PD' provides access to 'DynFlags' via a 'HasDynFlags' instance.
+-----------------------------------------------------------------------------
+module GHC.Cmm.Monad (
+ PD(..)
+ , liftP
+ ) where
+
+import GhcPrelude
+
+import Control.Monad
+import qualified Control.Monad.Fail as MonadFail
+
+import DynFlags
+import Lexer
+
+newtype PD a = PD { unPD :: DynFlags -> PState -> ParseResult a }
+
+instance Functor PD where
+ fmap = liftM
+
+instance Applicative PD where
+ pure = returnPD
+ (<*>) = ap
+
+instance Monad PD where
+ (>>=) = thenPD
+#if !MIN_VERSION_base(4,13,0)
+ fail = MonadFail.fail
+#endif
+
+instance MonadFail.MonadFail PD where
+ fail = failPD
+
+liftP :: P a -> PD a
+liftP (P f) = PD $ \_ s -> f s
+
+returnPD :: a -> PD a
+returnPD = liftP . return
+
+thenPD :: PD a -> (a -> PD b) -> PD b
+(PD m) `thenPD` k = PD $ \d s ->
+ case m d s of
+ POk s1 a -> unPD (k a) d s1
+ PFailed s1 -> PFailed s1
+
+failPD :: String -> PD a
+failPD = liftP . fail
+
+instance HasDynFlags PD where
+ getDynFlags = PD $ \d s -> POk s d
diff --git a/compiler/GHC/Cmm/Node.hs b/compiler/GHC/Cmm/Node.hs
new file mode 100644
index 0000000000..bb74647910
--- /dev/null
+++ b/compiler/GHC/Cmm/Node.hs
@@ -0,0 +1,724 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE ExplicitForAll #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE StandaloneDeriving #-}
+{-# LANGUAGE UndecidableInstances #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+
+
+-- CmmNode type for representation using Hoopl graphs.
+
+module GHC.Cmm.Node (
+ CmmNode(..), CmmFormal, CmmActual, CmmTickish,
+ UpdFrameOffset, Convention(..),
+ ForeignConvention(..), ForeignTarget(..), foreignTargetHints,
+ CmmReturnInfo(..),
+ mapExp, mapExpDeep, wrapRecExp, foldExp, foldExpDeep, wrapRecExpf,
+ mapExpM, mapExpDeepM, wrapRecExpM, mapSuccessors, mapCollectSuccessors,
+
+ -- * Tick scopes
+ CmmTickScope(..), isTickSubScope, combineTickScopes,
+ ) where
+
+import GhcPrelude hiding (succ)
+
+import GHC.Platform.Regs
+import GHC.Cmm.Expr
+import GHC.Cmm.Switch
+import DynFlags
+import FastString
+import ForeignCall
+import Outputable
+import GHC.Runtime.Layout
+import CoreSyn (Tickish)
+import qualified Unique as U
+
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Graph
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow.Label
+import Data.Maybe
+import Data.List (tails,sortBy)
+import Unique (nonDetCmpUnique)
+import Util
+
+
+------------------------
+-- CmmNode
+
+#define ULabel {-# UNPACK #-} !Label
+
+data CmmNode e x where
+ CmmEntry :: ULabel -> CmmTickScope -> CmmNode C O
+
+ CmmComment :: FastString -> CmmNode O O
+
+ -- Tick annotation, covering Cmm code in our tick scope. We only
+ -- expect non-code @Tickish@ at this point (e.g. @SourceNote@).
+ -- See Note [CmmTick scoping details]
+ CmmTick :: !CmmTickish -> CmmNode O O
+
+ -- Unwind pseudo-instruction, encoding stack unwinding
+ -- instructions for a debugger. This describes how to reconstruct
+ -- the "old" value of a register if we want to navigate the stack
+ -- up one frame. Having unwind information for @Sp@ will allow the
+ -- debugger to "walk" the stack.
+ --
+ -- See Note [What is this unwinding business?] in Debug
+ CmmUnwind :: [(GlobalReg, Maybe CmmExpr)] -> CmmNode O O
+
+ CmmAssign :: !CmmReg -> !CmmExpr -> CmmNode O O
+ -- Assign to register
+
+ CmmStore :: !CmmExpr -> !CmmExpr -> CmmNode O O
+ -- Assign to memory location. Size is
+ -- given by cmmExprType of the rhs.
+
+ CmmUnsafeForeignCall :: -- An unsafe foreign call;
+ -- see Note [Foreign calls]
+ -- Like a "fat machine instruction"; can occur
+ -- in the middle of a block
+ ForeignTarget -> -- call target
+ [CmmFormal] -> -- zero or more results
+ [CmmActual] -> -- zero or more arguments
+ CmmNode O O
+ -- Semantics: clobbers any GlobalRegs for which callerSaves r == True
+ -- See Note [Unsafe foreign calls clobber caller-save registers]
+ --
+ -- Invariant: the arguments and the ForeignTarget must not
+ -- mention any registers for which GHC.Platform.callerSaves
+ -- is True. See Note [Register Parameter Passing].
+
+ CmmBranch :: ULabel -> CmmNode O C
+ -- Goto another block in the same procedure
+
+ CmmCondBranch :: { -- conditional branch
+ cml_pred :: CmmExpr,
+ cml_true, cml_false :: ULabel,
+ cml_likely :: Maybe Bool -- likely result of the conditional,
+ -- if known
+ } -> CmmNode O C
+
+ CmmSwitch
+ :: CmmExpr -- Scrutinee, of some integral type
+ -> SwitchTargets -- Cases. See [Note SwitchTargets]
+ -> CmmNode O C
+
+ CmmCall :: { -- A native call or tail call
+ cml_target :: CmmExpr, -- never a CmmPrim to a CallishMachOp!
+
+ cml_cont :: Maybe Label,
+ -- Label of continuation (Nothing for return or tail call)
+ --
+ -- Note [Continuation BlockIds]: these BlockIds are called
+ -- Continuation BlockIds, and are the only BlockIds that can
+ -- occur in CmmExprs, namely as (CmmLit (CmmBlock b)) or
+ -- (CmmStackSlot (Young b) _).
+
+ cml_args_regs :: [GlobalReg],
+ -- The argument GlobalRegs (Rx, Fx, Dx, Lx) that are passed
+ -- to the call. This is essential information for the
+ -- native code generator's register allocator; without
+ -- knowing which GlobalRegs are live it has to assume that
+ -- they are all live. This list should only include
+ -- GlobalRegs that are mapped to real machine registers on
+ -- the target platform.
+
+ cml_args :: ByteOff,
+ -- Byte offset, from the *old* end of the Area associated with
+ -- the Label (if cml_cont = Nothing, then Old area), of
+ -- youngest outgoing arg. Set the stack pointer to this before
+ -- transferring control.
+ -- (NB: an update frame might also have been stored in the Old
+ -- area, but it'll be in an older part than the args.)
+
+ cml_ret_args :: ByteOff,
+ -- For calls *only*, the byte offset for youngest returned value
+ -- This is really needed at the *return* point rather than here
+ -- at the call, but in practice it's convenient to record it here.
+
+ cml_ret_off :: ByteOff
+ -- For calls *only*, the byte offset of the base of the frame that
+ -- must be described by the info table for the return point.
+ -- The older words are an update frames, which have their own
+ -- info-table and layout information
+
+ -- From a liveness point of view, the stack words older than
+ -- cml_ret_off are treated as live, even if the sequel of
+ -- the call goes into a loop.
+ } -> CmmNode O C
+
+ CmmForeignCall :: { -- A safe foreign call; see Note [Foreign calls]
+ -- Always the last node of a block
+ tgt :: ForeignTarget, -- call target and convention
+ res :: [CmmFormal], -- zero or more results
+ args :: [CmmActual], -- zero or more arguments; see Note [Register parameter passing]
+ succ :: ULabel, -- Label of continuation
+ ret_args :: ByteOff, -- same as cml_ret_args
+ ret_off :: ByteOff, -- same as cml_ret_off
+ intrbl:: Bool -- whether or not the call is interruptible
+ } -> CmmNode O C
+
+{- Note [Foreign calls]
+~~~~~~~~~~~~~~~~~~~~~~~
+A CmmUnsafeForeignCall is used for *unsafe* foreign calls;
+a CmmForeignCall call is used for *safe* foreign calls.
+
+Unsafe ones are mostly easy: think of them as a "fat machine
+instruction". In particular, they do *not* kill all live registers,
+just the registers they return to (there was a bit of code in GHC that
+conservatively assumed otherwise.) However, see [Register parameter passing].
+
+Safe ones are trickier. A safe foreign call
+ r = f(x)
+ultimately expands to
+ push "return address" -- Never used to return to;
+ -- just points an info table
+ save registers into TSO
+ call suspendThread
+ r = f(x) -- Make the call
+ call resumeThread
+ restore registers
+ pop "return address"
+We cannot "lower" a safe foreign call to this sequence of Cmms, because
+after we've saved Sp all the Cmm optimiser's assumptions are broken.
+
+Note that a safe foreign call needs an info table.
+
+So Safe Foreign Calls must remain as last nodes until the stack is
+made manifest in GHC.Cmm.LayoutStack, where they are lowered into the above
+sequence.
+-}
+
+{- Note [Unsafe foreign calls clobber caller-save registers]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+A foreign call is defined to clobber any GlobalRegs that are mapped to
+caller-saves machine registers (according to the prevailing C ABI).
+GHC.StgToCmm.Utils.callerSaves tells you which GlobalRegs are caller-saves.
+
+This is a design choice that makes it easier to generate code later.
+We could instead choose to say that foreign calls do *not* clobber
+caller-saves regs, but then we would have to figure out which regs
+were live across the call later and insert some saves/restores.
+
+Furthermore when we generate code we never have any GlobalRegs live
+across a call, because they are always copied-in to LocalRegs and
+copied-out again before making a call/jump. So all we have to do is
+avoid any code motion that would make a caller-saves GlobalReg live
+across a foreign call during subsequent optimisations.
+-}
+
+{- Note [Register parameter passing]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+On certain architectures, some registers are utilized for parameter
+passing in the C calling convention. For example, in x86-64 Linux
+convention, rdi, rsi, rdx and rcx (as well as r8 and r9) may be used for
+argument passing. These are registers R3-R6, which our generated
+code may also be using; as a result, it's necessary to save these
+values before doing a foreign call. This is done during initial
+code generation in callerSaveVolatileRegs in GHC.StgToCmm.Utils. However,
+one result of doing this is that the contents of these registers
+may mysteriously change if referenced inside the arguments. This
+is dangerous, so you'll need to disable inlining much in the same
+way is done in GHC.Cmm.Opt currently. We should fix this!
+-}
+
+---------------------------------------------
+-- Eq instance of CmmNode
+
+deriving instance Eq (CmmNode e x)
+
+----------------------------------------------
+-- Hoopl instances of CmmNode
+
+instance NonLocal CmmNode where
+ entryLabel (CmmEntry l _) = l
+
+ successors (CmmBranch l) = [l]
+ successors (CmmCondBranch {cml_true=t, cml_false=f}) = [f, t] -- meets layout constraint
+ successors (CmmSwitch _ ids) = switchTargetsToList ids
+ successors (CmmCall {cml_cont=l}) = maybeToList l
+ successors (CmmForeignCall {succ=l}) = [l]
+
+
+--------------------------------------------------
+-- Various helper types
+
+type CmmActual = CmmExpr
+type CmmFormal = LocalReg
+
+type UpdFrameOffset = ByteOff
+
+-- | A convention maps a list of values (function arguments or return
+-- values) to registers or stack locations.
+data Convention
+ = NativeDirectCall
+ -- ^ top-level Haskell functions use @NativeDirectCall@, which
+ -- maps arguments to registers starting with R2, according to
+ -- how many registers are available on the platform. This
+ -- convention ignores R1, because for a top-level function call
+ -- the function closure is implicit, and doesn't need to be passed.
+ | NativeNodeCall
+ -- ^ non-top-level Haskell functions, which pass the address of
+ -- the function closure in R1 (regardless of whether R1 is a
+ -- real register or not), and the rest of the arguments in
+ -- registers or on the stack.
+ | NativeReturn
+ -- ^ a native return. The convention for returns depends on
+ -- how many values are returned: for just one value returned,
+ -- the appropriate register is used (R1, F1, etc.). regardless
+ -- of whether it is a real register or not. For multiple
+ -- values returned, they are mapped to registers or the stack.
+ | Slow
+ -- ^ Slow entry points: all args pushed on the stack
+ | GC
+ -- ^ Entry to the garbage collector: uses the node reg!
+ -- (TODO: I don't think we need this --SDM)
+ deriving( Eq )
+
+data ForeignConvention
+ = ForeignConvention
+ CCallConv -- Which foreign-call convention
+ [ForeignHint] -- Extra info about the args
+ [ForeignHint] -- Extra info about the result
+ CmmReturnInfo
+ deriving Eq
+
+data CmmReturnInfo
+ = CmmMayReturn
+ | CmmNeverReturns
+ deriving ( Eq )
+
+data ForeignTarget -- The target of a foreign call
+ = ForeignTarget -- A foreign procedure
+ CmmExpr -- Its address
+ ForeignConvention -- Its calling convention
+ | PrimTarget -- A possibly-side-effecting machine operation
+ CallishMachOp -- Which one
+ deriving Eq
+
+foreignTargetHints :: ForeignTarget -> ([ForeignHint], [ForeignHint])
+foreignTargetHints target
+ = ( res_hints ++ repeat NoHint
+ , arg_hints ++ repeat NoHint )
+ where
+ (res_hints, arg_hints) =
+ case target of
+ PrimTarget op -> callishMachOpHints op
+ ForeignTarget _ (ForeignConvention _ arg_hints res_hints _) ->
+ (res_hints, arg_hints)
+
+--------------------------------------------------
+-- Instances of register and slot users / definers
+
+instance UserOfRegs LocalReg (CmmNode e x) where
+ foldRegsUsed dflags f !z n = case n of
+ CmmAssign _ expr -> fold f z expr
+ CmmStore addr rval -> fold f (fold f z addr) rval
+ CmmUnsafeForeignCall t _ args -> fold f (fold f z t) args
+ CmmCondBranch expr _ _ _ -> fold f z expr
+ CmmSwitch expr _ -> fold f z expr
+ CmmCall {cml_target=tgt} -> fold f z tgt
+ CmmForeignCall {tgt=tgt, args=args} -> fold f (fold f z tgt) args
+ _ -> z
+ where fold :: forall a b. UserOfRegs LocalReg a
+ => (b -> LocalReg -> b) -> b -> a -> b
+ fold f z n = foldRegsUsed dflags f z n
+
+instance UserOfRegs GlobalReg (CmmNode e x) where
+ foldRegsUsed dflags f !z n = case n of
+ CmmAssign _ expr -> fold f z expr
+ CmmStore addr rval -> fold f (fold f z addr) rval
+ CmmUnsafeForeignCall t _ args -> fold f (fold f z t) args
+ CmmCondBranch expr _ _ _ -> fold f z expr
+ CmmSwitch expr _ -> fold f z expr
+ CmmCall {cml_target=tgt, cml_args_regs=args} -> fold f (fold f z args) tgt
+ CmmForeignCall {tgt=tgt, args=args} -> fold f (fold f z tgt) args
+ _ -> z
+ where fold :: forall a b. UserOfRegs GlobalReg a
+ => (b -> GlobalReg -> b) -> b -> a -> b
+ fold f z n = foldRegsUsed dflags f z n
+
+instance (Ord r, UserOfRegs r CmmReg) => UserOfRegs r ForeignTarget where
+ -- The (Ord r) in the context is necessary here
+ -- See Note [Recursive superclasses] in TcInstDcls
+ foldRegsUsed _ _ !z (PrimTarget _) = z
+ foldRegsUsed dflags f !z (ForeignTarget e _) = foldRegsUsed dflags f z e
+
+instance DefinerOfRegs LocalReg (CmmNode e x) where
+ foldRegsDefd dflags f !z n = case n of
+ CmmAssign lhs _ -> fold f z lhs
+ CmmUnsafeForeignCall _ fs _ -> fold f z fs
+ CmmForeignCall {res=res} -> fold f z res
+ _ -> z
+ where fold :: forall a b. DefinerOfRegs LocalReg a
+ => (b -> LocalReg -> b) -> b -> a -> b
+ fold f z n = foldRegsDefd dflags f z n
+
+instance DefinerOfRegs GlobalReg (CmmNode e x) where
+ foldRegsDefd dflags f !z n = case n of
+ CmmAssign lhs _ -> fold f z lhs
+ CmmUnsafeForeignCall tgt _ _ -> fold f z (foreignTargetRegs tgt)
+ CmmCall {} -> fold f z activeRegs
+ CmmForeignCall {} -> fold f z activeRegs
+ -- See Note [Safe foreign calls clobber STG registers]
+ _ -> z
+ where fold :: forall a b. DefinerOfRegs GlobalReg a
+ => (b -> GlobalReg -> b) -> b -> a -> b
+ fold f z n = foldRegsDefd dflags f z n
+
+ platform = targetPlatform dflags
+ activeRegs = activeStgRegs platform
+ activeCallerSavesRegs = filter (callerSaves platform) activeRegs
+
+ foreignTargetRegs (ForeignTarget _ (ForeignConvention _ _ _ CmmNeverReturns)) = []
+ foreignTargetRegs _ = activeCallerSavesRegs
+
+-- Note [Safe foreign calls clobber STG registers]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- During stack layout phase every safe foreign call is expanded into a block
+-- that contains unsafe foreign call (instead of safe foreign call) and ends
+-- with a normal call (See Note [Foreign calls]). This means that we must
+-- treat safe foreign call as if it was a normal call (because eventually it
+-- will be). This is important if we try to run sinking pass before stack
+-- layout phase. Consider this example of what might go wrong (this is cmm
+-- code from stablename001 test). Here is code after common block elimination
+-- (before stack layout):
+--
+-- c1q6:
+-- _s1pf::P64 = R1;
+-- _c1q8::I64 = performMajorGC;
+-- I64[(young<c1q9> + 8)] = c1q9;
+-- foreign call "ccall" arg hints: [] result hints: [] (_c1q8::I64)(...)
+-- returns to c1q9 args: ([]) ress: ([])ret_args: 8ret_off: 8;
+-- c1q9:
+-- I64[(young<c1qb> + 8)] = c1qb;
+-- R1 = _s1pc::P64;
+-- call stg_makeStableName#(R1) returns to c1qb, args: 8, res: 8, upd: 8;
+--
+-- If we run sinking pass now (still before stack layout) we will get this:
+--
+-- c1q6:
+-- I64[(young<c1q9> + 8)] = c1q9;
+-- foreign call "ccall" arg hints: [] result hints: [] performMajorGC(...)
+-- returns to c1q9 args: ([]) ress: ([])ret_args: 8ret_off: 8;
+-- c1q9:
+-- I64[(young<c1qb> + 8)] = c1qb;
+-- _s1pf::P64 = R1; <------ _s1pf sunk past safe foreign call
+-- R1 = _s1pc::P64;
+-- call stg_makeStableName#(R1) returns to c1qb, args: 8, res: 8, upd: 8;
+--
+-- Notice that _s1pf was sunk past a foreign call. When we run stack layout
+-- safe call to performMajorGC will be turned into:
+--
+-- c1q6:
+-- _s1pc::P64 = P64[Sp + 8];
+-- I64[Sp - 8] = c1q9;
+-- Sp = Sp - 8;
+-- I64[I64[CurrentTSO + 24] + 16] = Sp;
+-- P64[CurrentNursery + 8] = Hp + 8;
+-- (_u1qI::I64) = call "ccall" arg hints: [PtrHint,]
+-- result hints: [PtrHint] suspendThread(BaseReg, 0);
+-- call "ccall" arg hints: [] result hints: [] performMajorGC();
+-- (_u1qJ::I64) = call "ccall" arg hints: [PtrHint]
+-- result hints: [PtrHint] resumeThread(_u1qI::I64);
+-- BaseReg = _u1qJ::I64;
+-- _u1qK::P64 = CurrentTSO;
+-- _u1qL::P64 = I64[_u1qK::P64 + 24];
+-- Sp = I64[_u1qL::P64 + 16];
+-- SpLim = _u1qL::P64 + 192;
+-- HpAlloc = 0;
+-- Hp = I64[CurrentNursery + 8] - 8;
+-- HpLim = I64[CurrentNursery] + (%MO_SS_Conv_W32_W64(I32[CurrentNursery + 48]) * 4096 - 1);
+-- call (I64[Sp])() returns to c1q9, args: 8, res: 8, upd: 8;
+-- c1q9:
+-- I64[(young<c1qb> + 8)] = c1qb;
+-- _s1pf::P64 = R1; <------ INCORRECT!
+-- R1 = _s1pc::P64;
+-- call stg_makeStableName#(R1) returns to c1qb, args: 8, res: 8, upd: 8;
+--
+-- Notice that c1q6 now ends with a call. Sinking _s1pf::P64 = R1 past that
+-- call is clearly incorrect. This is what would happen if we assumed that
+-- safe foreign call has the same semantics as unsafe foreign call. To prevent
+-- this we need to treat safe foreign call as if was normal call.
+
+-----------------------------------
+-- mapping Expr in GHC.Cmm.Node
+
+mapForeignTarget :: (CmmExpr -> CmmExpr) -> ForeignTarget -> ForeignTarget
+mapForeignTarget exp (ForeignTarget e c) = ForeignTarget (exp e) c
+mapForeignTarget _ m@(PrimTarget _) = m
+
+wrapRecExp :: (CmmExpr -> CmmExpr) -> CmmExpr -> CmmExpr
+-- Take a transformer on expressions and apply it recursively.
+-- (wrapRecExp f e) first recursively applies itself to sub-expressions of e
+-- then uses f to rewrite the resulting expression
+wrapRecExp f (CmmMachOp op es) = f (CmmMachOp op $ map (wrapRecExp f) es)
+wrapRecExp f (CmmLoad addr ty) = f (CmmLoad (wrapRecExp f addr) ty)
+wrapRecExp f e = f e
+
+mapExp :: (CmmExpr -> CmmExpr) -> CmmNode e x -> CmmNode e x
+mapExp _ f@(CmmEntry{}) = f
+mapExp _ m@(CmmComment _) = m
+mapExp _ m@(CmmTick _) = m
+mapExp f (CmmUnwind regs) = CmmUnwind (map (fmap (fmap f)) regs)
+mapExp f (CmmAssign r e) = CmmAssign r (f e)
+mapExp f (CmmStore addr e) = CmmStore (f addr) (f e)
+mapExp f (CmmUnsafeForeignCall tgt fs as) = CmmUnsafeForeignCall (mapForeignTarget f tgt) fs (map f as)
+mapExp _ l@(CmmBranch _) = l
+mapExp f (CmmCondBranch e ti fi l) = CmmCondBranch (f e) ti fi l
+mapExp f (CmmSwitch e ids) = CmmSwitch (f e) ids
+mapExp f n@CmmCall {cml_target=tgt} = n{cml_target = f tgt}
+mapExp f (CmmForeignCall tgt fs as succ ret_args updfr intrbl) = CmmForeignCall (mapForeignTarget f tgt) fs (map f as) succ ret_args updfr intrbl
+
+mapExpDeep :: (CmmExpr -> CmmExpr) -> CmmNode e x -> CmmNode e x
+mapExpDeep f = mapExp $ wrapRecExp f
+
+------------------------------------------------------------------------
+-- mapping Expr in GHC.Cmm.Node, but not performing allocation if no changes
+
+mapForeignTargetM :: (CmmExpr -> Maybe CmmExpr) -> ForeignTarget -> Maybe ForeignTarget
+mapForeignTargetM f (ForeignTarget e c) = (\x -> ForeignTarget x c) `fmap` f e
+mapForeignTargetM _ (PrimTarget _) = Nothing
+
+wrapRecExpM :: (CmmExpr -> Maybe CmmExpr) -> (CmmExpr -> Maybe CmmExpr)
+-- (wrapRecExpM f e) first recursively applies itself to sub-expressions of e
+-- then gives f a chance to rewrite the resulting expression
+wrapRecExpM f n@(CmmMachOp op es) = maybe (f n) (f . CmmMachOp op) (mapListM (wrapRecExpM f) es)
+wrapRecExpM f n@(CmmLoad addr ty) = maybe (f n) (f . flip CmmLoad ty) (wrapRecExpM f addr)
+wrapRecExpM f e = f e
+
+mapExpM :: (CmmExpr -> Maybe CmmExpr) -> CmmNode e x -> Maybe (CmmNode e x)
+mapExpM _ (CmmEntry{}) = Nothing
+mapExpM _ (CmmComment _) = Nothing
+mapExpM _ (CmmTick _) = Nothing
+mapExpM f (CmmUnwind regs) = CmmUnwind `fmap` mapM (\(r,e) -> mapM f e >>= \e' -> pure (r,e')) regs
+mapExpM f (CmmAssign r e) = CmmAssign r `fmap` f e
+mapExpM f (CmmStore addr e) = (\[addr', e'] -> CmmStore addr' e') `fmap` mapListM f [addr, e]
+mapExpM _ (CmmBranch _) = Nothing
+mapExpM f (CmmCondBranch e ti fi l) = (\x -> CmmCondBranch x ti fi l) `fmap` f e
+mapExpM f (CmmSwitch e tbl) = (\x -> CmmSwitch x tbl) `fmap` f e
+mapExpM f (CmmCall tgt mb_id r o i s) = (\x -> CmmCall x mb_id r o i s) `fmap` f tgt
+mapExpM f (CmmUnsafeForeignCall tgt fs as)
+ = case mapForeignTargetM f tgt of
+ Just tgt' -> Just (CmmUnsafeForeignCall tgt' fs (mapListJ f as))
+ Nothing -> (\xs -> CmmUnsafeForeignCall tgt fs xs) `fmap` mapListM f as
+mapExpM f (CmmForeignCall tgt fs as succ ret_args updfr intrbl)
+ = case mapForeignTargetM f tgt of
+ Just tgt' -> Just (CmmForeignCall tgt' fs (mapListJ f as) succ ret_args updfr intrbl)
+ Nothing -> (\xs -> CmmForeignCall tgt fs xs succ ret_args updfr intrbl) `fmap` mapListM f as
+
+-- share as much as possible
+mapListM :: (a -> Maybe a) -> [a] -> Maybe [a]
+mapListM f xs = let (b, r) = mapListT f xs
+ in if b then Just r else Nothing
+
+mapListJ :: (a -> Maybe a) -> [a] -> [a]
+mapListJ f xs = snd (mapListT f xs)
+
+mapListT :: (a -> Maybe a) -> [a] -> (Bool, [a])
+mapListT f xs = foldr g (False, []) (zip3 (tails xs) xs (map f xs))
+ where g (_, y, Nothing) (True, ys) = (True, y:ys)
+ g (_, _, Just y) (True, ys) = (True, y:ys)
+ g (ys', _, Nothing) (False, _) = (False, ys')
+ g (_, _, Just y) (False, ys) = (True, y:ys)
+
+mapExpDeepM :: (CmmExpr -> Maybe CmmExpr) -> CmmNode e x -> Maybe (CmmNode e x)
+mapExpDeepM f = mapExpM $ wrapRecExpM f
+
+-----------------------------------
+-- folding Expr in GHC.Cmm.Node
+
+foldExpForeignTarget :: (CmmExpr -> z -> z) -> ForeignTarget -> z -> z
+foldExpForeignTarget exp (ForeignTarget e _) z = exp e z
+foldExpForeignTarget _ (PrimTarget _) z = z
+
+-- Take a folder on expressions and apply it recursively.
+-- Specifically (wrapRecExpf f e z) deals with CmmMachOp and CmmLoad
+-- itself, delegating all the other CmmExpr forms to 'f'.
+wrapRecExpf :: (CmmExpr -> z -> z) -> CmmExpr -> z -> z
+wrapRecExpf f e@(CmmMachOp _ es) z = foldr (wrapRecExpf f) (f e z) es
+wrapRecExpf f e@(CmmLoad addr _) z = wrapRecExpf f addr (f e z)
+wrapRecExpf f e z = f e z
+
+foldExp :: (CmmExpr -> z -> z) -> CmmNode e x -> z -> z
+foldExp _ (CmmEntry {}) z = z
+foldExp _ (CmmComment {}) z = z
+foldExp _ (CmmTick {}) z = z
+foldExp f (CmmUnwind xs) z = foldr (maybe id f) z (map snd xs)
+foldExp f (CmmAssign _ e) z = f e z
+foldExp f (CmmStore addr e) z = f addr $ f e z
+foldExp f (CmmUnsafeForeignCall t _ as) z = foldr f (foldExpForeignTarget f t z) as
+foldExp _ (CmmBranch _) z = z
+foldExp f (CmmCondBranch e _ _ _) z = f e z
+foldExp f (CmmSwitch e _) z = f e z
+foldExp f (CmmCall {cml_target=tgt}) z = f tgt z
+foldExp f (CmmForeignCall {tgt=tgt, args=args}) z = foldr f (foldExpForeignTarget f tgt z) args
+
+foldExpDeep :: (CmmExpr -> z -> z) -> CmmNode e x -> z -> z
+foldExpDeep f = foldExp (wrapRecExpf f)
+
+-- -----------------------------------------------------------------------------
+
+mapSuccessors :: (Label -> Label) -> CmmNode O C -> CmmNode O C
+mapSuccessors f (CmmBranch bid) = CmmBranch (f bid)
+mapSuccessors f (CmmCondBranch p y n l) = CmmCondBranch p (f y) (f n) l
+mapSuccessors f (CmmSwitch e ids) = CmmSwitch e (mapSwitchTargets f ids)
+mapSuccessors _ n = n
+
+mapCollectSuccessors :: forall a. (Label -> (Label,a)) -> CmmNode O C
+ -> (CmmNode O C, [a])
+mapCollectSuccessors f (CmmBranch bid)
+ = let (bid', acc) = f bid in (CmmBranch bid', [acc])
+mapCollectSuccessors f (CmmCondBranch p y n l)
+ = let (bidt, acct) = f y
+ (bidf, accf) = f n
+ in (CmmCondBranch p bidt bidf l, [accf, acct])
+mapCollectSuccessors f (CmmSwitch e ids)
+ = let lbls = switchTargetsToList ids :: [Label]
+ lblMap = mapFromList $ zip lbls (map f lbls) :: LabelMap (Label, a)
+ in ( CmmSwitch e
+ (mapSwitchTargets
+ (\l -> fst $ mapFindWithDefault (error "impossible") l lblMap) ids)
+ , map snd (mapElems lblMap)
+ )
+mapCollectSuccessors _ n = (n, [])
+
+-- -----------------------------------------------------------------------------
+
+-- | Tickish in Cmm context (annotations only)
+type CmmTickish = Tickish ()
+
+-- | Tick scope identifier, allowing us to reason about what
+-- annotations in a Cmm block should scope over. We especially take
+-- care to allow optimisations to reorganise blocks without losing
+-- tick association in the process.
+data CmmTickScope
+ = GlobalScope
+ -- ^ The global scope is the "root" of the scope graph. Every
+ -- scope is a sub-scope of the global scope. It doesn't make sense
+ -- to add ticks to this scope. On the other hand, this means that
+ -- setting this scope on a block means no ticks apply to it.
+
+ | SubScope !U.Unique CmmTickScope
+ -- ^ Constructs a new sub-scope to an existing scope. This allows
+ -- us to translate Core-style scoping rules (see @tickishScoped@)
+ -- into the Cmm world. Suppose the following code:
+ --
+ -- tick<1> case ... of
+ -- A -> tick<2> ...
+ -- B -> tick<3> ...
+ --
+ -- We want the top-level tick annotation to apply to blocks
+ -- generated for the A and B alternatives. We can achieve that by
+ -- generating tick<1> into a block with scope a, while the code
+ -- for alternatives A and B gets generated into sub-scopes a/b and
+ -- a/c respectively.
+
+ | CombinedScope CmmTickScope CmmTickScope
+ -- ^ A combined scope scopes over everything that the two given
+ -- scopes cover. It is therefore a sub-scope of either scope. This
+ -- is required for optimisations. Consider common block elimination:
+ --
+ -- A -> tick<2> case ... of
+ -- C -> [common]
+ -- B -> tick<3> case ... of
+ -- D -> [common]
+ --
+ -- We will generate code for the C and D alternatives, and figure
+ -- out afterwards that it's actually common code. Scoping rules
+ -- dictate that the resulting common block needs to be covered by
+ -- both tick<2> and tick<3>, therefore we need to construct a
+ -- scope that is a child to *both* scope. Now we can do that - if
+ -- we assign the scopes a/c and b/d to the common-ed up blocks,
+ -- the new block could have a combined tick scope a/c+b/d, which
+ -- both tick<2> and tick<3> apply to.
+
+-- Note [CmmTick scoping details]:
+--
+-- The scope of a @CmmTick@ is given by the @CmmEntry@ node of the
+-- same block. Note that as a result of this, optimisations making
+-- tick scopes more specific can *reduce* the amount of code a tick
+-- scopes over. Fixing this would require a separate @CmmTickScope@
+-- field for @CmmTick@. Right now we do not do this simply because I
+-- couldn't find an example where it actually mattered -- multiple
+-- blocks within the same scope generally jump to each other, which
+-- prevents common block elimination from happening in the first
+-- place. But this is no strong reason, so if Cmm optimisations become
+-- more involved in future this might have to be revisited.
+
+-- | Output all scope paths.
+scopeToPaths :: CmmTickScope -> [[U.Unique]]
+scopeToPaths GlobalScope = [[]]
+scopeToPaths (SubScope u s) = map (u:) (scopeToPaths s)
+scopeToPaths (CombinedScope s1 s2) = scopeToPaths s1 ++ scopeToPaths s2
+
+-- | Returns the head uniques of the scopes. This is based on the
+-- assumption that the @Unique@ of @SubScope@ identifies the
+-- underlying super-scope. Used for efficient equality and comparison,
+-- see below.
+scopeUniques :: CmmTickScope -> [U.Unique]
+scopeUniques GlobalScope = []
+scopeUniques (SubScope u _) = [u]
+scopeUniques (CombinedScope s1 s2) = scopeUniques s1 ++ scopeUniques s2
+
+-- Equality and order is based on the head uniques defined above. We
+-- take care to short-cut the (extremely) common cases.
+instance Eq CmmTickScope where
+ GlobalScope == GlobalScope = True
+ GlobalScope == _ = False
+ _ == GlobalScope = False
+ (SubScope u _) == (SubScope u' _) = u == u'
+ (SubScope _ _) == _ = False
+ _ == (SubScope _ _) = False
+ scope == scope' =
+ sortBy nonDetCmpUnique (scopeUniques scope) ==
+ sortBy nonDetCmpUnique (scopeUniques scope')
+ -- This is still deterministic because
+ -- the order is the same for equal lists
+
+-- This is non-deterministic but we do not currently support deterministic
+-- code-generation. See Note [Unique Determinism and code generation]
+-- See Note [No Ord for Unique]
+instance Ord CmmTickScope where
+ compare GlobalScope GlobalScope = EQ
+ compare GlobalScope _ = LT
+ compare _ GlobalScope = GT
+ compare (SubScope u _) (SubScope u' _) = nonDetCmpUnique u u'
+ compare scope scope' = cmpList nonDetCmpUnique
+ (sortBy nonDetCmpUnique $ scopeUniques scope)
+ (sortBy nonDetCmpUnique $ scopeUniques scope')
+
+instance Outputable CmmTickScope where
+ ppr GlobalScope = text "global"
+ ppr (SubScope us GlobalScope)
+ = ppr us
+ ppr (SubScope us s) = ppr s <> char '/' <> ppr us
+ ppr combined = parens $ hcat $ punctuate (char '+') $
+ map (hcat . punctuate (char '/') . map ppr . reverse) $
+ scopeToPaths combined
+
+-- | Checks whether two tick scopes are sub-scopes of each other. True
+-- if the two scopes are equal.
+isTickSubScope :: CmmTickScope -> CmmTickScope -> Bool
+isTickSubScope = cmp
+ where cmp _ GlobalScope = True
+ cmp GlobalScope _ = False
+ cmp (CombinedScope s1 s2) s' = cmp s1 s' && cmp s2 s'
+ cmp s (CombinedScope s1' s2') = cmp s s1' || cmp s s2'
+ cmp (SubScope u s) s'@(SubScope u' _) = u == u' || cmp s s'
+
+-- | Combine two tick scopes. The new scope should be sub-scope of
+-- both parameters. We simplify automatically if one tick scope is a
+-- sub-scope of the other already.
+combineTickScopes :: CmmTickScope -> CmmTickScope -> CmmTickScope
+combineTickScopes s1 s2
+ | s1 `isTickSubScope` s2 = s1
+ | s2 `isTickSubScope` s1 = s2
+ | otherwise = CombinedScope s1 s2
diff --git a/compiler/GHC/Cmm/Opt.hs b/compiler/GHC/Cmm/Opt.hs
new file mode 100644
index 0000000000..1db37ae58c
--- /dev/null
+++ b/compiler/GHC/Cmm/Opt.hs
@@ -0,0 +1,423 @@
+-----------------------------------------------------------------------------
+--
+-- Cmm optimisation
+--
+-- (c) The University of Glasgow 2006
+--
+-----------------------------------------------------------------------------
+
+module GHC.Cmm.Opt (
+ constantFoldNode,
+ constantFoldExpr,
+ cmmMachOpFold,
+ cmmMachOpFoldM
+ ) where
+
+import GhcPrelude
+
+import GHC.Cmm.Utils
+import GHC.Cmm
+import DynFlags
+import Util
+
+import Outputable
+import GHC.Platform
+
+import Data.Bits
+import Data.Maybe
+
+
+constantFoldNode :: DynFlags -> CmmNode e x -> CmmNode e x
+constantFoldNode dflags = mapExp (constantFoldExpr dflags)
+
+constantFoldExpr :: DynFlags -> CmmExpr -> CmmExpr
+constantFoldExpr dflags = wrapRecExp f
+ where f (CmmMachOp op args) = cmmMachOpFold dflags op args
+ f (CmmRegOff r 0) = CmmReg r
+ f e = e
+
+-- -----------------------------------------------------------------------------
+-- MachOp constant folder
+
+-- Now, try to constant-fold the MachOps. The arguments have already
+-- been optimized and folded.
+
+cmmMachOpFold
+ :: DynFlags
+ -> MachOp -- The operation from an CmmMachOp
+ -> [CmmExpr] -- The optimized arguments
+ -> CmmExpr
+
+cmmMachOpFold dflags op args = fromMaybe (CmmMachOp op args) (cmmMachOpFoldM dflags op args)
+
+-- Returns Nothing if no changes, useful for Hoopl, also reduces
+-- allocation!
+cmmMachOpFoldM
+ :: DynFlags
+ -> MachOp
+ -> [CmmExpr]
+ -> Maybe CmmExpr
+
+cmmMachOpFoldM _ op [CmmLit (CmmInt x rep)]
+ = Just $ case op of
+ MO_S_Neg _ -> CmmLit (CmmInt (-x) rep)
+ MO_Not _ -> CmmLit (CmmInt (complement x) rep)
+
+ -- these are interesting: we must first narrow to the
+ -- "from" type, in order to truncate to the correct size.
+ -- The final narrow/widen to the destination type
+ -- is implicit in the CmmLit.
+ MO_SF_Conv _from to -> CmmLit (CmmFloat (fromInteger x) to)
+ MO_SS_Conv from to -> CmmLit (CmmInt (narrowS from x) to)
+ MO_UU_Conv from to -> CmmLit (CmmInt (narrowU from x) to)
+
+ _ -> panic $ "cmmMachOpFoldM: unknown unary op: " ++ show op
+
+
+-- Eliminate conversion NOPs
+cmmMachOpFoldM _ (MO_SS_Conv rep1 rep2) [x] | rep1 == rep2 = Just x
+cmmMachOpFoldM _ (MO_UU_Conv rep1 rep2) [x] | rep1 == rep2 = Just x
+
+-- Eliminate nested conversions where possible
+cmmMachOpFoldM dflags conv_outer [CmmMachOp conv_inner [x]]
+ | Just (rep1,rep2,signed1) <- isIntConversion conv_inner,
+ Just (_, rep3,signed2) <- isIntConversion conv_outer
+ = case () of
+ -- widen then narrow to the same size is a nop
+ _ | rep1 < rep2 && rep1 == rep3 -> Just x
+ -- Widen then narrow to different size: collapse to single conversion
+ -- but remember to use the signedness from the widening, just in case
+ -- the final conversion is a widen.
+ | rep1 < rep2 && rep2 > rep3 ->
+ Just $ cmmMachOpFold dflags (intconv signed1 rep1 rep3) [x]
+ -- Nested widenings: collapse if the signedness is the same
+ | rep1 < rep2 && rep2 < rep3 && signed1 == signed2 ->
+ Just $ cmmMachOpFold dflags (intconv signed1 rep1 rep3) [x]
+ -- Nested narrowings: collapse
+ | rep1 > rep2 && rep2 > rep3 ->
+ Just $ cmmMachOpFold dflags (MO_UU_Conv rep1 rep3) [x]
+ | otherwise ->
+ Nothing
+ where
+ isIntConversion (MO_UU_Conv rep1 rep2)
+ = Just (rep1,rep2,False)
+ isIntConversion (MO_SS_Conv rep1 rep2)
+ = Just (rep1,rep2,True)
+ isIntConversion _ = Nothing
+
+ intconv True = MO_SS_Conv
+ intconv False = MO_UU_Conv
+
+-- ToDo: a narrow of a load can be collapsed into a narrow load, right?
+-- but what if the architecture only supports word-sized loads, should
+-- we do the transformation anyway?
+
+cmmMachOpFoldM dflags mop [CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)]
+ = case mop of
+ -- for comparisons: don't forget to narrow the arguments before
+ -- comparing, since they might be out of range.
+ MO_Eq _ -> Just $ CmmLit (CmmInt (if x_u == y_u then 1 else 0) (wordWidth dflags))
+ MO_Ne _ -> Just $ CmmLit (CmmInt (if x_u /= y_u then 1 else 0) (wordWidth dflags))
+
+ MO_U_Gt _ -> Just $ CmmLit (CmmInt (if x_u > y_u then 1 else 0) (wordWidth dflags))
+ MO_U_Ge _ -> Just $ CmmLit (CmmInt (if x_u >= y_u then 1 else 0) (wordWidth dflags))
+ MO_U_Lt _ -> Just $ CmmLit (CmmInt (if x_u < y_u then 1 else 0) (wordWidth dflags))
+ MO_U_Le _ -> Just $ CmmLit (CmmInt (if x_u <= y_u then 1 else 0) (wordWidth dflags))
+
+ MO_S_Gt _ -> Just $ CmmLit (CmmInt (if x_s > y_s then 1 else 0) (wordWidth dflags))
+ MO_S_Ge _ -> Just $ CmmLit (CmmInt (if x_s >= y_s then 1 else 0) (wordWidth dflags))
+ MO_S_Lt _ -> Just $ CmmLit (CmmInt (if x_s < y_s then 1 else 0) (wordWidth dflags))
+ MO_S_Le _ -> Just $ CmmLit (CmmInt (if x_s <= y_s then 1 else 0) (wordWidth dflags))
+
+ MO_Add r -> Just $ CmmLit (CmmInt (x + y) r)
+ MO_Sub r -> Just $ CmmLit (CmmInt (x - y) r)
+ MO_Mul r -> Just $ CmmLit (CmmInt (x * y) r)
+ MO_U_Quot r | y /= 0 -> Just $ CmmLit (CmmInt (x_u `quot` y_u) r)
+ MO_U_Rem r | y /= 0 -> Just $ CmmLit (CmmInt (x_u `rem` y_u) r)
+ MO_S_Quot r | y /= 0 -> Just $ CmmLit (CmmInt (x `quot` y) r)
+ MO_S_Rem r | y /= 0 -> Just $ CmmLit (CmmInt (x `rem` y) r)
+
+ MO_And r -> Just $ CmmLit (CmmInt (x .&. y) r)
+ MO_Or r -> Just $ CmmLit (CmmInt (x .|. y) r)
+ MO_Xor r -> Just $ CmmLit (CmmInt (x `xor` y) r)
+
+ MO_Shl r -> Just $ CmmLit (CmmInt (x `shiftL` fromIntegral y) r)
+ MO_U_Shr r -> Just $ CmmLit (CmmInt (x_u `shiftR` fromIntegral y) r)
+ MO_S_Shr r -> Just $ CmmLit (CmmInt (x `shiftR` fromIntegral y) r)
+
+ _ -> Nothing
+
+ where
+ x_u = narrowU xrep x
+ y_u = narrowU xrep y
+ x_s = narrowS xrep x
+ y_s = narrowS xrep y
+
+
+-- When possible, shift the constants to the right-hand side, so that we
+-- can match for strength reductions. Note that the code generator will
+-- also assume that constants have been shifted to the right when
+-- possible.
+
+cmmMachOpFoldM dflags op [x@(CmmLit _), y]
+ | not (isLit y) && isCommutableMachOp op
+ = Just (cmmMachOpFold dflags op [y, x])
+
+-- Turn (a+b)+c into a+(b+c) where possible. Because literals are
+-- moved to the right, it is more likely that we will find
+-- opportunities for constant folding when the expression is
+-- right-associated.
+--
+-- ToDo: this appears to introduce a quadratic behaviour due to the
+-- nested cmmMachOpFold. Can we fix this?
+--
+-- Why do we check isLit arg1? If arg1 is a lit, it means that arg2
+-- is also a lit (otherwise arg1 would be on the right). If we
+-- put arg1 on the left of the rearranged expression, we'll get into a
+-- loop: (x1+x2)+x3 => x1+(x2+x3) => (x2+x3)+x1 => x2+(x3+x1) ...
+--
+-- Also don't do it if arg1 is PicBaseReg, so that we don't separate the
+-- PicBaseReg from the corresponding label (or label difference).
+--
+cmmMachOpFoldM dflags mop1 [CmmMachOp mop2 [arg1,arg2], arg3]
+ | mop2 `associates_with` mop1
+ && not (isLit arg1) && not (isPicReg arg1)
+ = Just (cmmMachOpFold dflags mop2 [arg1, cmmMachOpFold dflags mop1 [arg2,arg3]])
+ where
+ MO_Add{} `associates_with` MO_Sub{} = True
+ mop1 `associates_with` mop2 =
+ mop1 == mop2 && isAssociativeMachOp mop1
+
+-- special case: (a - b) + c ==> a + (c - b)
+cmmMachOpFoldM dflags mop1@(MO_Add{}) [CmmMachOp mop2@(MO_Sub{}) [arg1,arg2], arg3]
+ | not (isLit arg1) && not (isPicReg arg1)
+ = Just (cmmMachOpFold dflags mop1 [arg1, cmmMachOpFold dflags mop2 [arg3,arg2]])
+
+-- special case: (PicBaseReg + lit) + N ==> PicBaseReg + (lit+N)
+--
+-- this is better because lit+N is a single link-time constant (e.g. a
+-- CmmLabelOff), so the right-hand expression needs only one
+-- instruction, whereas the left needs two. This happens when pointer
+-- tagging gives us label+offset, and PIC turns the label into
+-- PicBaseReg + label.
+--
+cmmMachOpFoldM _ MO_Add{} [ CmmMachOp op@MO_Add{} [pic, CmmLit lit]
+ , CmmLit (CmmInt n rep) ]
+ | isPicReg pic
+ = Just $ CmmMachOp op [pic, CmmLit $ cmmOffsetLit lit off ]
+ where off = fromIntegral (narrowS rep n)
+
+-- Make a RegOff if we can
+cmmMachOpFoldM _ (MO_Add _) [CmmReg reg, CmmLit (CmmInt n rep)]
+ = Just $ cmmRegOff reg (fromIntegral (narrowS rep n))
+cmmMachOpFoldM _ (MO_Add _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
+ = Just $ cmmRegOff reg (off + fromIntegral (narrowS rep n))
+cmmMachOpFoldM _ (MO_Sub _) [CmmReg reg, CmmLit (CmmInt n rep)]
+ = Just $ cmmRegOff reg (- fromIntegral (narrowS rep n))
+cmmMachOpFoldM _ (MO_Sub _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
+ = Just $ cmmRegOff reg (off - fromIntegral (narrowS rep n))
+
+-- Fold label(+/-)offset into a CmmLit where possible
+
+cmmMachOpFoldM _ (MO_Add _) [CmmLit lit, CmmLit (CmmInt i rep)]
+ = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (narrowU rep i)))
+cmmMachOpFoldM _ (MO_Add _) [CmmLit (CmmInt i rep), CmmLit lit]
+ = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (narrowU rep i)))
+cmmMachOpFoldM _ (MO_Sub _) [CmmLit lit, CmmLit (CmmInt i rep)]
+ = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (negate (narrowU rep i))))
+
+
+-- Comparison of literal with widened operand: perform the comparison
+-- at the smaller width, as long as the literal is within range.
+
+-- We can't do the reverse trick, when the operand is narrowed:
+-- narrowing throws away bits from the operand, there's no way to do
+-- the same comparison at the larger size.
+
+cmmMachOpFoldM dflags cmp [CmmMachOp conv [x], CmmLit (CmmInt i _)]
+ | -- powerPC NCG has a TODO for I8/I16 comparisons, so don't try
+ platformArch (targetPlatform dflags) `elem` [ArchX86, ArchX86_64],
+ -- if the operand is widened:
+ Just (rep, signed, narrow_fn) <- maybe_conversion conv,
+ -- and this is a comparison operation:
+ Just narrow_cmp <- maybe_comparison cmp rep signed,
+ -- and the literal fits in the smaller size:
+ i == narrow_fn rep i
+ -- then we can do the comparison at the smaller size
+ = Just (cmmMachOpFold dflags narrow_cmp [x, CmmLit (CmmInt i rep)])
+ where
+ maybe_conversion (MO_UU_Conv from to)
+ | to > from
+ = Just (from, False, narrowU)
+ maybe_conversion (MO_SS_Conv from to)
+ | to > from
+ = Just (from, True, narrowS)
+
+ -- don't attempt to apply this optimisation when the source
+ -- is a float; see #1916
+ maybe_conversion _ = Nothing
+
+ -- careful (#2080): if the original comparison was signed, but
+ -- we were doing an unsigned widen, then we must do an
+ -- unsigned comparison at the smaller size.
+ maybe_comparison (MO_U_Gt _) rep _ = Just (MO_U_Gt rep)
+ maybe_comparison (MO_U_Ge _) rep _ = Just (MO_U_Ge rep)
+ maybe_comparison (MO_U_Lt _) rep _ = Just (MO_U_Lt rep)
+ maybe_comparison (MO_U_Le _) rep _ = Just (MO_U_Le rep)
+ maybe_comparison (MO_Eq _) rep _ = Just (MO_Eq rep)
+ maybe_comparison (MO_S_Gt _) rep True = Just (MO_S_Gt rep)
+ maybe_comparison (MO_S_Ge _) rep True = Just (MO_S_Ge rep)
+ maybe_comparison (MO_S_Lt _) rep True = Just (MO_S_Lt rep)
+ maybe_comparison (MO_S_Le _) rep True = Just (MO_S_Le rep)
+ maybe_comparison (MO_S_Gt _) rep False = Just (MO_U_Gt rep)
+ maybe_comparison (MO_S_Ge _) rep False = Just (MO_U_Ge rep)
+ maybe_comparison (MO_S_Lt _) rep False = Just (MO_U_Lt rep)
+ maybe_comparison (MO_S_Le _) rep False = Just (MO_U_Le rep)
+ maybe_comparison _ _ _ = Nothing
+
+-- We can often do something with constants of 0 and 1 ...
+-- See Note [Comparison operators]
+
+cmmMachOpFoldM dflags mop [x, y@(CmmLit (CmmInt 0 _))]
+ = case mop of
+ -- Arithmetic
+ MO_Add _ -> Just x -- x + 0 = x
+ MO_Sub _ -> Just x -- x - 0 = x
+ MO_Mul _ -> Just y -- x * 0 = 0
+
+ -- Logical operations
+ MO_And _ -> Just y -- x & 0 = 0
+ MO_Or _ -> Just x -- x | 0 = x
+ MO_Xor _ -> Just x -- x `xor` 0 = x
+
+ -- Shifts
+ MO_Shl _ -> Just x -- x << 0 = x
+ MO_S_Shr _ -> Just x -- ditto shift-right
+ MO_U_Shr _ -> Just x
+
+ -- Comparisons; these ones are trickier
+ -- See Note [Comparison operators]
+ MO_Ne _ | isComparisonExpr x -> Just x -- (x > y) != 0 = x > y
+ MO_Eq _ | Just x' <- maybeInvertCmmExpr x -> Just x' -- (x > y) == 0 = x <= y
+ MO_U_Gt _ | isComparisonExpr x -> Just x -- (x > y) > 0 = x > y
+ MO_S_Gt _ | isComparisonExpr x -> Just x -- ditto
+ MO_U_Lt _ | isComparisonExpr x -> Just zero -- (x > y) < 0 = 0
+ MO_S_Lt _ | isComparisonExpr x -> Just zero
+ MO_U_Ge _ | isComparisonExpr x -> Just one -- (x > y) >= 0 = 1
+ MO_S_Ge _ | isComparisonExpr x -> Just one
+
+ MO_U_Le _ | Just x' <- maybeInvertCmmExpr x -> Just x' -- (x > y) <= 0 = x <= y
+ MO_S_Le _ | Just x' <- maybeInvertCmmExpr x -> Just x'
+ _ -> Nothing
+ where
+ zero = CmmLit (CmmInt 0 (wordWidth dflags))
+ one = CmmLit (CmmInt 1 (wordWidth dflags))
+
+cmmMachOpFoldM dflags mop [x, (CmmLit (CmmInt 1 rep))]
+ = case mop of
+ -- Arithmetic: x*1 = x, etc
+ MO_Mul _ -> Just x
+ MO_S_Quot _ -> Just x
+ MO_U_Quot _ -> Just x
+ MO_S_Rem _ -> Just $ CmmLit (CmmInt 0 rep)
+ MO_U_Rem _ -> Just $ CmmLit (CmmInt 0 rep)
+
+ -- Comparisons; trickier
+ -- See Note [Comparison operators]
+ MO_Ne _ | Just x' <- maybeInvertCmmExpr x -> Just x' -- (x>y) != 1 = x<=y
+ MO_Eq _ | isComparisonExpr x -> Just x -- (x>y) == 1 = x>y
+ MO_U_Lt _ | Just x' <- maybeInvertCmmExpr x -> Just x' -- (x>y) < 1 = x<=y
+ MO_S_Lt _ | Just x' <- maybeInvertCmmExpr x -> Just x' -- ditto
+ MO_U_Gt _ | isComparisonExpr x -> Just zero -- (x>y) > 1 = 0
+ MO_S_Gt _ | isComparisonExpr x -> Just zero
+ MO_U_Le _ | isComparisonExpr x -> Just one -- (x>y) <= 1 = 1
+ MO_S_Le _ | isComparisonExpr x -> Just one
+ MO_U_Ge _ | isComparisonExpr x -> Just x -- (x>y) >= 1 = x>y
+ MO_S_Ge _ | isComparisonExpr x -> Just x
+ _ -> Nothing
+ where
+ zero = CmmLit (CmmInt 0 (wordWidth dflags))
+ one = CmmLit (CmmInt 1 (wordWidth dflags))
+
+-- Now look for multiplication/division by powers of 2 (integers).
+
+cmmMachOpFoldM dflags mop [x, (CmmLit (CmmInt n _))]
+ = case mop of
+ MO_Mul rep
+ | Just p <- exactLog2 n ->
+ Just (cmmMachOpFold dflags (MO_Shl rep) [x, CmmLit (CmmInt p rep)])
+ MO_U_Quot rep
+ | Just p <- exactLog2 n ->
+ Just (cmmMachOpFold dflags (MO_U_Shr rep) [x, CmmLit (CmmInt p rep)])
+ MO_U_Rem rep
+ | Just _ <- exactLog2 n ->
+ Just (cmmMachOpFold dflags (MO_And rep) [x, CmmLit (CmmInt (n - 1) rep)])
+ MO_S_Quot rep
+ | Just p <- exactLog2 n,
+ CmmReg _ <- x -> -- We duplicate x in signedQuotRemHelper, hence require
+ -- it is a reg. FIXME: remove this restriction.
+ Just (cmmMachOpFold dflags (MO_S_Shr rep)
+ [signedQuotRemHelper rep p, CmmLit (CmmInt p rep)])
+ MO_S_Rem rep
+ | Just p <- exactLog2 n,
+ CmmReg _ <- x -> -- We duplicate x in signedQuotRemHelper, hence require
+ -- it is a reg. FIXME: remove this restriction.
+ -- We replace (x `rem` 2^p) by (x - (x `quot` 2^p) * 2^p).
+ -- Moreover, we fuse MO_S_Shr (last operation of MO_S_Quot)
+ -- and MO_S_Shl (multiplication by 2^p) into a single MO_And operation.
+ Just (cmmMachOpFold dflags (MO_Sub rep)
+ [x, cmmMachOpFold dflags (MO_And rep)
+ [signedQuotRemHelper rep p, CmmLit (CmmInt (- n) rep)]])
+ _ -> Nothing
+ where
+ -- In contrast with unsigned integers, for signed ones
+ -- shift right is not the same as quot, because it rounds
+ -- to minus infinity, whereas quot rounds toward zero.
+ -- To fix this up, we add one less than the divisor to the
+ -- dividend if it is a negative number.
+ --
+ -- to avoid a test/jump, we use the following sequence:
+ -- x1 = x >> word_size-1 (all 1s if -ve, all 0s if +ve)
+ -- x2 = y & (divisor-1)
+ -- result = x + x2
+ -- this could be done a bit more simply using conditional moves,
+ -- but we're processor independent here.
+ --
+ -- we optimise the divide by 2 case slightly, generating
+ -- x1 = x >> word_size-1 (unsigned)
+ -- return = x + x1
+ signedQuotRemHelper :: Width -> Integer -> CmmExpr
+ signedQuotRemHelper rep p = CmmMachOp (MO_Add rep) [x, x2]
+ where
+ bits = fromIntegral (widthInBits rep) - 1
+ shr = if p == 1 then MO_U_Shr rep else MO_S_Shr rep
+ x1 = CmmMachOp shr [x, CmmLit (CmmInt bits rep)]
+ x2 = if p == 1 then x1 else
+ CmmMachOp (MO_And rep) [x1, CmmLit (CmmInt (n-1) rep)]
+
+-- ToDo (#7116): optimise floating-point multiplication, e.g. x*2.0 -> x+x
+-- Unfortunately this needs a unique supply because x might not be a
+-- register. See #2253 (program 6) for an example.
+
+
+-- Anything else is just too hard.
+
+cmmMachOpFoldM _ _ _ = Nothing
+
+{- Note [Comparison operators]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+If we have
+ CmmCondBranch ((x>#y) == 1) t f
+we really want to convert to
+ CmmCondBranch (x>#y) t f
+
+That's what the constant-folding operations on comparison operators do above.
+-}
+
+
+-- -----------------------------------------------------------------------------
+-- Utils
+
+isPicReg :: CmmExpr -> Bool
+isPicReg (CmmReg (CmmGlobal PicBaseReg)) = True
+isPicReg _ = False
diff --git a/compiler/GHC/Cmm/Parser.y b/compiler/GHC/Cmm/Parser.y
new file mode 100644
index 0000000000..d7235d0167
--- /dev/null
+++ b/compiler/GHC/Cmm/Parser.y
@@ -0,0 +1,1442 @@
+-----------------------------------------------------------------------------
+--
+-- (c) The University of Glasgow, 2004-2012
+--
+-- Parser for concrete Cmm.
+--
+-----------------------------------------------------------------------------
+
+{- -----------------------------------------------------------------------------
+Note [Syntax of .cmm files]
+
+NOTE: You are very much on your own in .cmm. There is very little
+error checking at all:
+
+ * Type errors are detected by the (optional) -dcmm-lint pass, if you
+ don't turn this on then a type error will likely result in a panic
+ from the native code generator.
+
+ * Passing the wrong number of arguments or arguments of the wrong
+ type is not detected.
+
+There are two ways to write .cmm code:
+
+ (1) High-level Cmm code delegates the stack handling to GHC, and
+ never explicitly mentions Sp or registers.
+
+ (2) Low-level Cmm manages the stack itself, and must know about
+ calling conventions.
+
+Whether you want high-level or low-level Cmm is indicated by the
+presence of an argument list on a procedure. For example:
+
+foo ( gcptr a, bits32 b )
+{
+ // this is high-level cmm code
+
+ if (b > 0) {
+ // we can make tail calls passing arguments:
+ jump stg_ap_0_fast(a);
+ }
+
+ push (stg_upd_frame_info, a) {
+ // stack frames can be explicitly pushed
+
+ (x,y) = call wibble(a,b,3,4);
+ // calls pass arguments and return results using the native
+ // Haskell calling convention. The code generator will automatically
+ // construct a stack frame and an info table for the continuation.
+
+ return (x,y);
+ // we can return multiple values from the current proc
+ }
+}
+
+bar
+{
+ // this is low-level cmm code, indicated by the fact that we did not
+ // put an argument list on bar.
+
+ x = R1; // the calling convention is explicit: better be careful
+ // that this works on all platforms!
+
+ jump %ENTRY_CODE(Sp(0))
+}
+
+Here is a list of rules for high-level and low-level code. If you
+break the rules, you get a panic (for using a high-level construct in
+a low-level proc), or wrong code (when using low-level code in a
+high-level proc). This stuff isn't checked! (TODO!)
+
+High-level only:
+
+ - tail-calls with arguments, e.g.
+ jump stg_fun (arg1, arg2);
+
+ - function calls:
+ (ret1,ret2) = call stg_fun (arg1, arg2);
+
+ This makes a call with the NativeNodeCall convention, and the
+ values are returned to the following code using the NativeReturn
+ convention.
+
+ - returning:
+ return (ret1, ret2)
+
+ These use the NativeReturn convention to return zero or more
+ results to the caller.
+
+ - pushing stack frames:
+ push (info_ptr, field1, ..., fieldN) { ... statements ... }
+
+ - reserving temporary stack space:
+
+ reserve N = x { ... }
+
+ this reserves an area of size N (words) on the top of the stack,
+ and binds its address to x (a local register). Typically this is
+ used for allocating temporary storage for passing to foreign
+ functions.
+
+ Note that if you make any native calls or invoke the GC in the
+ scope of the reserve block, you are responsible for ensuring that
+ the stack you reserved is laid out correctly with an info table.
+
+Low-level only:
+
+ - References to Sp, R1-R8, F1-F4 etc.
+
+ NB. foreign calls may clobber the argument registers R1-R8, F1-F4
+ etc., so ensure they are saved into variables around foreign
+ calls.
+
+ - SAVE_THREAD_STATE() and LOAD_THREAD_STATE(), which modify Sp
+ directly.
+
+Both high-level and low-level code can use a raw tail-call:
+
+ jump stg_fun [R1,R2]
+
+NB. you *must* specify the list of GlobalRegs that are passed via a
+jump, otherwise the register allocator will assume that all the
+GlobalRegs are dead at the jump.
+
+
+Calling Conventions
+-------------------
+
+High-level procedures use the NativeNode calling convention, or the
+NativeReturn convention if the 'return' keyword is used (see Stack
+Frames below).
+
+Low-level procedures implement their own calling convention, so it can
+be anything at all.
+
+If a low-level procedure implements the NativeNode calling convention,
+then it can be called by high-level code using an ordinary function
+call. In general this is hard to arrange because the calling
+convention depends on the number of physical registers available for
+parameter passing, but there are two cases where the calling
+convention is platform-independent:
+
+ - Zero arguments.
+
+ - One argument of pointer or non-pointer word type; this is always
+ passed in R1 according to the NativeNode convention.
+
+ - Returning a single value; these conventions are fixed and platform
+ independent.
+
+
+Stack Frames
+------------
+
+A stack frame is written like this:
+
+INFO_TABLE_RET ( label, FRAME_TYPE, info_ptr, field1, ..., fieldN )
+ return ( arg1, ..., argM )
+{
+ ... code ...
+}
+
+where field1 ... fieldN are the fields of the stack frame (with types)
+arg1...argN are the values returned to the stack frame (with types).
+The return values are assumed to be passed according to the
+NativeReturn convention.
+
+On entry to the code, the stack frame looks like:
+
+ |----------|
+ | fieldN |
+ | ... |
+ | field1 |
+ |----------|
+ | info_ptr |
+ |----------|
+ | argN |
+ | ... | <- Sp
+
+and some of the args may be in registers.
+
+We prepend the code by a copyIn of the args, and assign all the stack
+frame fields to their formals. The initial "arg offset" for stack
+layout purposes consists of the whole stack frame plus any args that
+might be on the stack.
+
+A tail-call may pass a stack frame to the callee using the following
+syntax:
+
+jump f (info_ptr, field1,..,fieldN) (arg1,..,argN)
+
+where info_ptr and field1..fieldN describe the stack frame, and
+arg1..argN are the arguments passed to f using the NativeNodeCall
+convention. Note if a field is longer than a word (e.g. a D_ on
+a 32-bit machine) then the call will push as many words as
+necessary to the stack to accommodate it (e.g. 2).
+
+
+----------------------------------------------------------------------------- -}
+
+{
+{-# LANGUAGE TupleSections #-}
+
+module GHC.Cmm.Parser ( parseCmmFile ) where
+
+import GhcPrelude
+
+import GHC.StgToCmm.ExtCode
+import GHC.Cmm.CallConv
+import GHC.StgToCmm.Prof
+import GHC.StgToCmm.Heap
+import GHC.StgToCmm.Monad hiding ( getCode, getCodeR, getCodeScoped, emitLabel, emit
+ , emitStore, emitAssign, emitOutOfLine, withUpdFrameOff
+ , getUpdFrameOff )
+import qualified GHC.StgToCmm.Monad as F
+import GHC.StgToCmm.Utils
+import GHC.StgToCmm.Foreign
+import GHC.StgToCmm.Expr
+import GHC.StgToCmm.Closure
+import GHC.StgToCmm.Layout hiding (ArgRep(..))
+import GHC.StgToCmm.Ticky
+import GHC.StgToCmm.Bind ( emitBlackHoleCode, emitUpdateFrame )
+import CoreSyn ( Tickish(SourceNote) )
+
+import GHC.Cmm.Opt
+import GHC.Cmm.Graph
+import GHC.Cmm
+import GHC.Cmm.Utils
+import GHC.Cmm.Switch ( mkSwitchTargets )
+import GHC.Cmm.Info
+import GHC.Cmm.BlockId
+import GHC.Cmm.Lexer
+import GHC.Cmm.CLabel
+import GHC.Cmm.Monad
+import GHC.Runtime.Layout
+import Lexer
+
+import CostCentre
+import ForeignCall
+import Module
+import GHC.Platform
+import Literal
+import Unique
+import UniqFM
+import SrcLoc
+import DynFlags
+import ErrUtils
+import StringBuffer
+import FastString
+import Panic
+import Constants
+import Outputable
+import BasicTypes
+import Bag ( emptyBag, unitBag )
+import Var
+
+import Control.Monad
+import Data.Array
+import Data.Char ( ord )
+import System.Exit
+import Data.Maybe
+import qualified Data.Map as M
+import qualified Data.ByteString.Char8 as BS8
+
+#include "HsVersions.h"
+}
+
+%expect 0
+
+%token
+ ':' { L _ (CmmT_SpecChar ':') }
+ ';' { L _ (CmmT_SpecChar ';') }
+ '{' { L _ (CmmT_SpecChar '{') }
+ '}' { L _ (CmmT_SpecChar '}') }
+ '[' { L _ (CmmT_SpecChar '[') }
+ ']' { L _ (CmmT_SpecChar ']') }
+ '(' { L _ (CmmT_SpecChar '(') }
+ ')' { L _ (CmmT_SpecChar ')') }
+ '=' { L _ (CmmT_SpecChar '=') }
+ '`' { L _ (CmmT_SpecChar '`') }
+ '~' { L _ (CmmT_SpecChar '~') }
+ '/' { L _ (CmmT_SpecChar '/') }
+ '*' { L _ (CmmT_SpecChar '*') }
+ '%' { L _ (CmmT_SpecChar '%') }
+ '-' { L _ (CmmT_SpecChar '-') }
+ '+' { L _ (CmmT_SpecChar '+') }
+ '&' { L _ (CmmT_SpecChar '&') }
+ '^' { L _ (CmmT_SpecChar '^') }
+ '|' { L _ (CmmT_SpecChar '|') }
+ '>' { L _ (CmmT_SpecChar '>') }
+ '<' { L _ (CmmT_SpecChar '<') }
+ ',' { L _ (CmmT_SpecChar ',') }
+ '!' { L _ (CmmT_SpecChar '!') }
+
+ '..' { L _ (CmmT_DotDot) }
+ '::' { L _ (CmmT_DoubleColon) }
+ '>>' { L _ (CmmT_Shr) }
+ '<<' { L _ (CmmT_Shl) }
+ '>=' { L _ (CmmT_Ge) }
+ '<=' { L _ (CmmT_Le) }
+ '==' { L _ (CmmT_Eq) }
+ '!=' { L _ (CmmT_Ne) }
+ '&&' { L _ (CmmT_BoolAnd) }
+ '||' { L _ (CmmT_BoolOr) }
+
+ 'True' { L _ (CmmT_True ) }
+ 'False' { L _ (CmmT_False) }
+ 'likely'{ L _ (CmmT_likely)}
+
+ 'CLOSURE' { L _ (CmmT_CLOSURE) }
+ 'INFO_TABLE' { L _ (CmmT_INFO_TABLE) }
+ 'INFO_TABLE_RET'{ L _ (CmmT_INFO_TABLE_RET) }
+ 'INFO_TABLE_FUN'{ L _ (CmmT_INFO_TABLE_FUN) }
+ 'INFO_TABLE_CONSTR'{ L _ (CmmT_INFO_TABLE_CONSTR) }
+ 'INFO_TABLE_SELECTOR'{ L _ (CmmT_INFO_TABLE_SELECTOR) }
+ 'else' { L _ (CmmT_else) }
+ 'export' { L _ (CmmT_export) }
+ 'section' { L _ (CmmT_section) }
+ 'goto' { L _ (CmmT_goto) }
+ 'if' { L _ (CmmT_if) }
+ 'call' { L _ (CmmT_call) }
+ 'jump' { L _ (CmmT_jump) }
+ 'foreign' { L _ (CmmT_foreign) }
+ 'never' { L _ (CmmT_never) }
+ 'prim' { L _ (CmmT_prim) }
+ 'reserve' { L _ (CmmT_reserve) }
+ 'return' { L _ (CmmT_return) }
+ 'returns' { L _ (CmmT_returns) }
+ 'import' { L _ (CmmT_import) }
+ 'switch' { L _ (CmmT_switch) }
+ 'case' { L _ (CmmT_case) }
+ 'default' { L _ (CmmT_default) }
+ 'push' { L _ (CmmT_push) }
+ 'unwind' { L _ (CmmT_unwind) }
+ 'bits8' { L _ (CmmT_bits8) }
+ 'bits16' { L _ (CmmT_bits16) }
+ 'bits32' { L _ (CmmT_bits32) }
+ 'bits64' { L _ (CmmT_bits64) }
+ 'bits128' { L _ (CmmT_bits128) }
+ 'bits256' { L _ (CmmT_bits256) }
+ 'bits512' { L _ (CmmT_bits512) }
+ 'float32' { L _ (CmmT_float32) }
+ 'float64' { L _ (CmmT_float64) }
+ 'gcptr' { L _ (CmmT_gcptr) }
+
+ GLOBALREG { L _ (CmmT_GlobalReg $$) }
+ NAME { L _ (CmmT_Name $$) }
+ STRING { L _ (CmmT_String $$) }
+ INT { L _ (CmmT_Int $$) }
+ FLOAT { L _ (CmmT_Float $$) }
+
+%monad { PD } { >>= } { return }
+%lexer { cmmlex } { L _ CmmT_EOF }
+%name cmmParse cmm
+%tokentype { Located CmmToken }
+
+-- C-- operator precedences, taken from the C-- spec
+%right '||' -- non-std extension, called %disjoin in C--
+%right '&&' -- non-std extension, called %conjoin in C--
+%right '!'
+%nonassoc '>=' '>' '<=' '<' '!=' '=='
+%left '|'
+%left '^'
+%left '&'
+%left '>>' '<<'
+%left '-' '+'
+%left '/' '*' '%'
+%right '~'
+
+%%
+
+cmm :: { CmmParse () }
+ : {- empty -} { return () }
+ | cmmtop cmm { do $1; $2 }
+
+cmmtop :: { CmmParse () }
+ : cmmproc { $1 }
+ | cmmdata { $1 }
+ | decl { $1 }
+ | 'CLOSURE' '(' NAME ',' NAME lits ')' ';'
+ {% liftP . withThisPackage $ \pkg ->
+ do lits <- sequence $6;
+ staticClosure pkg $3 $5 (map getLit lits) }
+
+-- The only static closures in the RTS are dummy closures like
+-- stg_END_TSO_QUEUE_closure and stg_dummy_ret. We don't need
+-- to provide the full generality of static closures here.
+-- In particular:
+-- * CCS can always be CCS_DONT_CARE
+-- * closure is always extern
+-- * payload is always empty
+-- * we can derive closure and info table labels from a single NAME
+
+cmmdata :: { CmmParse () }
+ : 'section' STRING '{' data_label statics '}'
+ { do lbl <- $4;
+ ss <- sequence $5;
+ code (emitDecl (CmmData (Section (section $2) lbl) (Statics lbl $ concat ss))) }
+
+data_label :: { CmmParse CLabel }
+ : NAME ':'
+ {% liftP . withThisPackage $ \pkg ->
+ return (mkCmmDataLabel pkg $1) }
+
+statics :: { [CmmParse [CmmStatic]] }
+ : {- empty -} { [] }
+ | static statics { $1 : $2 }
+
+static :: { CmmParse [CmmStatic] }
+ : type expr ';' { do e <- $2;
+ return [CmmStaticLit (getLit e)] }
+ | type ';' { return [CmmUninitialised
+ (widthInBytes (typeWidth $1))] }
+ | 'bits8' '[' ']' STRING ';' { return [mkString $4] }
+ | 'bits8' '[' INT ']' ';' { return [CmmUninitialised
+ (fromIntegral $3)] }
+ | typenot8 '[' INT ']' ';' { return [CmmUninitialised
+ (widthInBytes (typeWidth $1) *
+ fromIntegral $3)] }
+ | 'CLOSURE' '(' NAME lits ')'
+ { do { lits <- sequence $4
+ ; dflags <- getDynFlags
+ ; return $ map CmmStaticLit $
+ mkStaticClosure dflags (mkForeignLabel $3 Nothing ForeignLabelInExternalPackage IsData)
+ -- mkForeignLabel because these are only used
+ -- for CHARLIKE and INTLIKE closures in the RTS.
+ dontCareCCS (map getLit lits) [] [] [] } }
+ -- arrays of closures required for the CHARLIKE & INTLIKE arrays
+
+lits :: { [CmmParse CmmExpr] }
+ : {- empty -} { [] }
+ | ',' expr lits { $2 : $3 }
+
+cmmproc :: { CmmParse () }
+ : info maybe_conv maybe_formals maybe_body
+ { do ((entry_ret_label, info, stk_formals, formals), agraph) <-
+ getCodeScoped $ loopDecls $ do {
+ (entry_ret_label, info, stk_formals) <- $1;
+ dflags <- getDynFlags;
+ formals <- sequence (fromMaybe [] $3);
+ withName (showSDoc dflags (ppr entry_ret_label))
+ $4;
+ return (entry_ret_label, info, stk_formals, formals) }
+ let do_layout = isJust $3
+ code (emitProcWithStackFrame $2 info
+ entry_ret_label stk_formals formals agraph
+ do_layout ) }
+
+maybe_conv :: { Convention }
+ : {- empty -} { NativeNodeCall }
+ | 'return' { NativeReturn }
+
+maybe_body :: { CmmParse () }
+ : ';' { return () }
+ | '{' body '}' { withSourceNote $1 $3 $2 }
+
+info :: { CmmParse (CLabel, Maybe CmmInfoTable, [LocalReg]) }
+ : NAME
+ {% liftP . withThisPackage $ \pkg ->
+ do newFunctionName $1 pkg
+ return (mkCmmCodeLabel pkg $1, Nothing, []) }
+
+
+ | 'INFO_TABLE' '(' NAME ',' INT ',' INT ',' INT ',' STRING ',' STRING ')'
+ -- ptrs, nptrs, closure type, description, type
+ {% liftP . withThisPackage $ \pkg ->
+ do dflags <- getDynFlags
+ let prof = profilingInfo dflags $11 $13
+ rep = mkRTSRep (fromIntegral $9) $
+ mkHeapRep dflags False (fromIntegral $5)
+ (fromIntegral $7) Thunk
+ -- not really Thunk, but that makes the info table
+ -- we want.
+ return (mkCmmEntryLabel pkg $3,
+ Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3
+ , cit_rep = rep
+ , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing },
+ []) }
+
+ | 'INFO_TABLE_FUN' '(' NAME ',' INT ',' INT ',' INT ',' STRING ',' STRING ',' INT ')'
+ -- ptrs, nptrs, closure type, description, type, fun type
+ {% liftP . withThisPackage $ \pkg ->
+ do dflags <- getDynFlags
+ let prof = profilingInfo dflags $11 $13
+ ty = Fun 0 (ArgSpec (fromIntegral $15))
+ -- Arity zero, arg_type $15
+ rep = mkRTSRep (fromIntegral $9) $
+ mkHeapRep dflags False (fromIntegral $5)
+ (fromIntegral $7) ty
+ return (mkCmmEntryLabel pkg $3,
+ Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3
+ , cit_rep = rep
+ , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing },
+ []) }
+ -- we leave most of the fields zero here. This is only used
+ -- to generate the BCO info table in the RTS at the moment.
+
+ | 'INFO_TABLE_CONSTR' '(' NAME ',' INT ',' INT ',' INT ',' INT ',' STRING ',' STRING ')'
+ -- ptrs, nptrs, tag, closure type, description, type
+ {% liftP . withThisPackage $ \pkg ->
+ do dflags <- getDynFlags
+ let prof = profilingInfo dflags $13 $15
+ ty = Constr (fromIntegral $9) -- Tag
+ (BS8.pack $13)
+ rep = mkRTSRep (fromIntegral $11) $
+ mkHeapRep dflags False (fromIntegral $5)
+ (fromIntegral $7) ty
+ return (mkCmmEntryLabel pkg $3,
+ Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3
+ , cit_rep = rep
+ , cit_prof = prof, cit_srt = Nothing,cit_clo = Nothing },
+ []) }
+
+ -- If profiling is on, this string gets duplicated,
+ -- but that's the way the old code did it we can fix it some other time.
+
+ | 'INFO_TABLE_SELECTOR' '(' NAME ',' INT ',' INT ',' STRING ',' STRING ')'
+ -- selector, closure type, description, type
+ {% liftP . withThisPackage $ \pkg ->
+ do dflags <- getDynFlags
+ let prof = profilingInfo dflags $9 $11
+ ty = ThunkSelector (fromIntegral $5)
+ rep = mkRTSRep (fromIntegral $7) $
+ mkHeapRep dflags False 0 0 ty
+ return (mkCmmEntryLabel pkg $3,
+ Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3
+ , cit_rep = rep
+ , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing },
+ []) }
+
+ | 'INFO_TABLE_RET' '(' NAME ',' INT ')'
+ -- closure type (no live regs)
+ {% liftP . withThisPackage $ \pkg ->
+ do let prof = NoProfilingInfo
+ rep = mkRTSRep (fromIntegral $5) $ mkStackRep []
+ return (mkCmmRetLabel pkg $3,
+ Just $ CmmInfoTable { cit_lbl = mkCmmRetInfoLabel pkg $3
+ , cit_rep = rep
+ , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing },
+ []) }
+
+ | 'INFO_TABLE_RET' '(' NAME ',' INT ',' formals0 ')'
+ -- closure type, live regs
+ {% liftP . withThisPackage $ \pkg ->
+ do dflags <- getDynFlags
+ live <- sequence $7
+ let prof = NoProfilingInfo
+ -- drop one for the info pointer
+ bitmap = mkLiveness dflags (drop 1 live)
+ rep = mkRTSRep (fromIntegral $5) $ mkStackRep bitmap
+ return (mkCmmRetLabel pkg $3,
+ Just $ CmmInfoTable { cit_lbl = mkCmmRetInfoLabel pkg $3
+ , cit_rep = rep
+ , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing },
+ live) }
+
+body :: { CmmParse () }
+ : {- empty -} { return () }
+ | decl body { do $1; $2 }
+ | stmt body { do $1; $2 }
+
+decl :: { CmmParse () }
+ : type names ';' { mapM_ (newLocal $1) $2 }
+ | 'import' importNames ';' { mapM_ newImport $2 }
+ | 'export' names ';' { return () } -- ignore exports
+
+
+-- an imported function name, with optional packageId
+importNames
+ :: { [(FastString, CLabel)] }
+ : importName { [$1] }
+ | importName ',' importNames { $1 : $3 }
+
+importName
+ :: { (FastString, CLabel) }
+
+ -- A label imported without an explicit packageId.
+ -- These are taken to come from some foreign, unnamed package.
+ : NAME
+ { ($1, mkForeignLabel $1 Nothing ForeignLabelInExternalPackage IsFunction) }
+
+ -- as previous 'NAME', but 'IsData'
+ | 'CLOSURE' NAME
+ { ($2, mkForeignLabel $2 Nothing ForeignLabelInExternalPackage IsData) }
+
+ -- A label imported with an explicit packageId.
+ | STRING NAME
+ { ($2, mkCmmCodeLabel (fsToUnitId (mkFastString $1)) $2) }
+
+
+names :: { [FastString] }
+ : NAME { [$1] }
+ | NAME ',' names { $1 : $3 }
+
+stmt :: { CmmParse () }
+ : ';' { return () }
+
+ | NAME ':'
+ { do l <- newLabel $1; emitLabel l }
+
+
+
+ | lreg '=' expr ';'
+ { do reg <- $1; e <- $3; withSourceNote $2 $4 (emitAssign reg e) }
+ | type '[' expr ']' '=' expr ';'
+ { withSourceNote $2 $7 (doStore $1 $3 $6) }
+
+ -- Gah! We really want to say "foreign_results" but that causes
+ -- a shift/reduce conflict with assignment. We either
+ -- we expand out the no-result and single result cases or
+ -- we tweak the syntax to avoid the conflict. The later
+ -- option is taken here because the other way would require
+ -- multiple levels of expanding and get unwieldy.
+ | foreign_results 'foreign' STRING foreignLabel '(' cmm_hint_exprs0 ')' safety opt_never_returns ';'
+ {% foreignCall $3 $1 $4 $6 $8 $9 }
+ | foreign_results 'prim' '%' NAME '(' exprs0 ')' ';'
+ {% primCall $1 $4 $6 }
+ -- stmt-level macros, stealing syntax from ordinary C-- function calls.
+ -- Perhaps we ought to use the %%-form?
+ | NAME '(' exprs0 ')' ';'
+ {% stmtMacro $1 $3 }
+ | 'switch' maybe_range expr '{' arms default '}'
+ { do as <- sequence $5; doSwitch $2 $3 as $6 }
+ | 'goto' NAME ';'
+ { do l <- lookupLabel $2; emit (mkBranch l) }
+ | 'return' '(' exprs0 ')' ';'
+ { doReturn $3 }
+ | 'jump' expr vols ';'
+ { doRawJump $2 $3 }
+ | 'jump' expr '(' exprs0 ')' ';'
+ { doJumpWithStack $2 [] $4 }
+ | 'jump' expr '(' exprs0 ')' '(' exprs0 ')' ';'
+ { doJumpWithStack $2 $4 $7 }
+ | 'call' expr '(' exprs0 ')' ';'
+ { doCall $2 [] $4 }
+ | '(' formals ')' '=' 'call' expr '(' exprs0 ')' ';'
+ { doCall $6 $2 $8 }
+ | 'if' bool_expr cond_likely 'goto' NAME
+ { do l <- lookupLabel $5; cmmRawIf $2 l $3 }
+ | 'if' bool_expr cond_likely '{' body '}' else
+ { cmmIfThenElse $2 (withSourceNote $4 $6 $5) $7 $3 }
+ | 'push' '(' exprs0 ')' maybe_body
+ { pushStackFrame $3 $5 }
+ | 'reserve' expr '=' lreg maybe_body
+ { reserveStackFrame $2 $4 $5 }
+ | 'unwind' unwind_regs ';'
+ { $2 >>= code . emitUnwind }
+
+unwind_regs
+ :: { CmmParse [(GlobalReg, Maybe CmmExpr)] }
+ : GLOBALREG '=' expr_or_unknown ',' unwind_regs
+ { do e <- $3; rest <- $5; return (($1, e) : rest) }
+ | GLOBALREG '=' expr_or_unknown
+ { do e <- $3; return [($1, e)] }
+
+-- | Used by unwind to indicate unknown unwinding values.
+expr_or_unknown
+ :: { CmmParse (Maybe CmmExpr) }
+ : 'return'
+ { do return Nothing }
+ | expr
+ { do e <- $1; return (Just e) }
+
+foreignLabel :: { CmmParse CmmExpr }
+ : NAME { return (CmmLit (CmmLabel (mkForeignLabel $1 Nothing ForeignLabelInThisPackage IsFunction))) }
+
+opt_never_returns :: { CmmReturnInfo }
+ : { CmmMayReturn }
+ | 'never' 'returns' { CmmNeverReturns }
+
+bool_expr :: { CmmParse BoolExpr }
+ : bool_op { $1 }
+ | expr { do e <- $1; return (BoolTest e) }
+
+bool_op :: { CmmParse BoolExpr }
+ : bool_expr '&&' bool_expr { do e1 <- $1; e2 <- $3;
+ return (BoolAnd e1 e2) }
+ | bool_expr '||' bool_expr { do e1 <- $1; e2 <- $3;
+ return (BoolOr e1 e2) }
+ | '!' bool_expr { do e <- $2; return (BoolNot e) }
+ | '(' bool_op ')' { $2 }
+
+safety :: { Safety }
+ : {- empty -} { PlayRisky }
+ | STRING {% parseSafety $1 }
+
+vols :: { [GlobalReg] }
+ : '[' ']' { [] }
+ | '[' '*' ']' {% do df <- getDynFlags
+ ; return (realArgRegsCover df) }
+ -- All of them. See comment attached
+ -- to realArgRegsCover
+ | '[' globals ']' { $2 }
+
+globals :: { [GlobalReg] }
+ : GLOBALREG { [$1] }
+ | GLOBALREG ',' globals { $1 : $3 }
+
+maybe_range :: { Maybe (Integer,Integer) }
+ : '[' INT '..' INT ']' { Just ($2, $4) }
+ | {- empty -} { Nothing }
+
+arms :: { [CmmParse ([Integer],Either BlockId (CmmParse ()))] }
+ : {- empty -} { [] }
+ | arm arms { $1 : $2 }
+
+arm :: { CmmParse ([Integer],Either BlockId (CmmParse ())) }
+ : 'case' ints ':' arm_body { do b <- $4; return ($2, b) }
+
+arm_body :: { CmmParse (Either BlockId (CmmParse ())) }
+ : '{' body '}' { return (Right (withSourceNote $1 $3 $2)) }
+ | 'goto' NAME ';' { do l <- lookupLabel $2; return (Left l) }
+
+ints :: { [Integer] }
+ : INT { [ $1 ] }
+ | INT ',' ints { $1 : $3 }
+
+default :: { Maybe (CmmParse ()) }
+ : 'default' ':' '{' body '}' { Just (withSourceNote $3 $5 $4) }
+ -- taking a few liberties with the C-- syntax here; C-- doesn't have
+ -- 'default' branches
+ | {- empty -} { Nothing }
+
+-- Note: OldCmm doesn't support a first class 'else' statement, though
+-- CmmNode does.
+else :: { CmmParse () }
+ : {- empty -} { return () }
+ | 'else' '{' body '}' { withSourceNote $2 $4 $3 }
+
+cond_likely :: { Maybe Bool }
+ : '(' 'likely' ':' 'True' ')' { Just True }
+ | '(' 'likely' ':' 'False' ')' { Just False }
+ | {- empty -} { Nothing }
+
+
+-- we have to write this out longhand so that Happy's precedence rules
+-- can kick in.
+expr :: { CmmParse CmmExpr }
+ : expr '/' expr { mkMachOp MO_U_Quot [$1,$3] }
+ | expr '*' expr { mkMachOp MO_Mul [$1,$3] }
+ | expr '%' expr { mkMachOp MO_U_Rem [$1,$3] }
+ | expr '-' expr { mkMachOp MO_Sub [$1,$3] }
+ | expr '+' expr { mkMachOp MO_Add [$1,$3] }
+ | expr '>>' expr { mkMachOp MO_U_Shr [$1,$3] }
+ | expr '<<' expr { mkMachOp MO_Shl [$1,$3] }
+ | expr '&' expr { mkMachOp MO_And [$1,$3] }
+ | expr '^' expr { mkMachOp MO_Xor [$1,$3] }
+ | expr '|' expr { mkMachOp MO_Or [$1,$3] }
+ | expr '>=' expr { mkMachOp MO_U_Ge [$1,$3] }
+ | expr '>' expr { mkMachOp MO_U_Gt [$1,$3] }
+ | expr '<=' expr { mkMachOp MO_U_Le [$1,$3] }
+ | expr '<' expr { mkMachOp MO_U_Lt [$1,$3] }
+ | expr '!=' expr { mkMachOp MO_Ne [$1,$3] }
+ | expr '==' expr { mkMachOp MO_Eq [$1,$3] }
+ | '~' expr { mkMachOp MO_Not [$2] }
+ | '-' expr { mkMachOp MO_S_Neg [$2] }
+ | expr0 '`' NAME '`' expr0 {% do { mo <- nameToMachOp $3 ;
+ return (mkMachOp mo [$1,$5]) } }
+ | expr0 { $1 }
+
+expr0 :: { CmmParse CmmExpr }
+ : INT maybe_ty { return (CmmLit (CmmInt $1 (typeWidth $2))) }
+ | FLOAT maybe_ty { return (CmmLit (CmmFloat $1 (typeWidth $2))) }
+ | STRING { do s <- code (newStringCLit $1);
+ return (CmmLit s) }
+ | reg { $1 }
+ | type '[' expr ']' { do e <- $3; return (CmmLoad e $1) }
+ | '%' NAME '(' exprs0 ')' {% exprOp $2 $4 }
+ | '(' expr ')' { $2 }
+
+
+-- leaving out the type of a literal gives you the native word size in C--
+maybe_ty :: { CmmType }
+ : {- empty -} {% do dflags <- getDynFlags; return $ bWord dflags }
+ | '::' type { $2 }
+
+cmm_hint_exprs0 :: { [CmmParse (CmmExpr, ForeignHint)] }
+ : {- empty -} { [] }
+ | cmm_hint_exprs { $1 }
+
+cmm_hint_exprs :: { [CmmParse (CmmExpr, ForeignHint)] }
+ : cmm_hint_expr { [$1] }
+ | cmm_hint_expr ',' cmm_hint_exprs { $1 : $3 }
+
+cmm_hint_expr :: { CmmParse (CmmExpr, ForeignHint) }
+ : expr { do e <- $1;
+ return (e, inferCmmHint e) }
+ | expr STRING {% do h <- parseCmmHint $2;
+ return $ do
+ e <- $1; return (e, h) }
+
+exprs0 :: { [CmmParse CmmExpr] }
+ : {- empty -} { [] }
+ | exprs { $1 }
+
+exprs :: { [CmmParse CmmExpr] }
+ : expr { [ $1 ] }
+ | expr ',' exprs { $1 : $3 }
+
+reg :: { CmmParse CmmExpr }
+ : NAME { lookupName $1 }
+ | GLOBALREG { return (CmmReg (CmmGlobal $1)) }
+
+foreign_results :: { [CmmParse (LocalReg, ForeignHint)] }
+ : {- empty -} { [] }
+ | '(' foreign_formals ')' '=' { $2 }
+
+foreign_formals :: { [CmmParse (LocalReg, ForeignHint)] }
+ : foreign_formal { [$1] }
+ | foreign_formal ',' { [$1] }
+ | foreign_formal ',' foreign_formals { $1 : $3 }
+
+foreign_formal :: { CmmParse (LocalReg, ForeignHint) }
+ : local_lreg { do e <- $1; return (e, inferCmmHint (CmmReg (CmmLocal e))) }
+ | STRING local_lreg {% do h <- parseCmmHint $1;
+ return $ do
+ e <- $2; return (e,h) }
+
+local_lreg :: { CmmParse LocalReg }
+ : NAME { do e <- lookupName $1;
+ return $
+ case e of
+ CmmReg (CmmLocal r) -> r
+ other -> pprPanic "CmmParse:" (ftext $1 <> text " not a local register") }
+
+lreg :: { CmmParse CmmReg }
+ : NAME { do e <- lookupName $1;
+ return $
+ case e of
+ CmmReg r -> r
+ other -> pprPanic "CmmParse:" (ftext $1 <> text " not a register") }
+ | GLOBALREG { return (CmmGlobal $1) }
+
+maybe_formals :: { Maybe [CmmParse LocalReg] }
+ : {- empty -} { Nothing }
+ | '(' formals0 ')' { Just $2 }
+
+formals0 :: { [CmmParse LocalReg] }
+ : {- empty -} { [] }
+ | formals { $1 }
+
+formals :: { [CmmParse LocalReg] }
+ : formal ',' { [$1] }
+ | formal { [$1] }
+ | formal ',' formals { $1 : $3 }
+
+formal :: { CmmParse LocalReg }
+ : type NAME { newLocal $1 $2 }
+
+type :: { CmmType }
+ : 'bits8' { b8 }
+ | typenot8 { $1 }
+
+typenot8 :: { CmmType }
+ : 'bits16' { b16 }
+ | 'bits32' { b32 }
+ | 'bits64' { b64 }
+ | 'bits128' { b128 }
+ | 'bits256' { b256 }
+ | 'bits512' { b512 }
+ | 'float32' { f32 }
+ | 'float64' { f64 }
+ | 'gcptr' {% do dflags <- getDynFlags; return $ gcWord dflags }
+
+{
+section :: String -> SectionType
+section "text" = Text
+section "data" = Data
+section "rodata" = ReadOnlyData
+section "relrodata" = RelocatableReadOnlyData
+section "bss" = UninitialisedData
+section s = OtherSection s
+
+mkString :: String -> CmmStatic
+mkString s = CmmString (BS8.pack s)
+
+-- |
+-- Given an info table, decide what the entry convention for the proc
+-- is. That is, for an INFO_TABLE_RET we want the return convention,
+-- otherwise it is a NativeNodeCall.
+--
+infoConv :: Maybe CmmInfoTable -> Convention
+infoConv Nothing = NativeNodeCall
+infoConv (Just info)
+ | isStackRep (cit_rep info) = NativeReturn
+ | otherwise = NativeNodeCall
+
+-- mkMachOp infers the type of the MachOp from the type of its first
+-- argument. We assume that this is correct: for MachOps that don't have
+-- symmetrical args (e.g. shift ops), the first arg determines the type of
+-- the op.
+mkMachOp :: (Width -> MachOp) -> [CmmParse CmmExpr] -> CmmParse CmmExpr
+mkMachOp fn args = do
+ dflags <- getDynFlags
+ arg_exprs <- sequence args
+ return (CmmMachOp (fn (typeWidth (cmmExprType dflags (head arg_exprs)))) arg_exprs)
+
+getLit :: CmmExpr -> CmmLit
+getLit (CmmLit l) = l
+getLit (CmmMachOp (MO_S_Neg _) [CmmLit (CmmInt i r)]) = CmmInt (negate i) r
+getLit _ = panic "invalid literal" -- TODO messy failure
+
+nameToMachOp :: FastString -> PD (Width -> MachOp)
+nameToMachOp name =
+ case lookupUFM machOps name of
+ Nothing -> fail ("unknown primitive " ++ unpackFS name)
+ Just m -> return m
+
+exprOp :: FastString -> [CmmParse CmmExpr] -> PD (CmmParse CmmExpr)
+exprOp name args_code = do
+ dflags <- getDynFlags
+ case lookupUFM (exprMacros dflags) name of
+ Just f -> return $ do
+ args <- sequence args_code
+ return (f args)
+ Nothing -> do
+ mo <- nameToMachOp name
+ return $ mkMachOp mo args_code
+
+exprMacros :: DynFlags -> UniqFM ([CmmExpr] -> CmmExpr)
+exprMacros dflags = listToUFM [
+ ( fsLit "ENTRY_CODE", \ [x] -> entryCode dflags x ),
+ ( fsLit "INFO_PTR", \ [x] -> closureInfoPtr dflags x ),
+ ( fsLit "STD_INFO", \ [x] -> infoTable dflags x ),
+ ( fsLit "FUN_INFO", \ [x] -> funInfoTable dflags x ),
+ ( fsLit "GET_ENTRY", \ [x] -> entryCode dflags (closureInfoPtr dflags x) ),
+ ( fsLit "GET_STD_INFO", \ [x] -> infoTable dflags (closureInfoPtr dflags x) ),
+ ( fsLit "GET_FUN_INFO", \ [x] -> funInfoTable dflags (closureInfoPtr dflags x) ),
+ ( fsLit "INFO_TYPE", \ [x] -> infoTableClosureType dflags x ),
+ ( fsLit "INFO_PTRS", \ [x] -> infoTablePtrs dflags x ),
+ ( fsLit "INFO_NPTRS", \ [x] -> infoTableNonPtrs dflags x )
+ ]
+
+-- we understand a subset of C-- primitives:
+machOps = listToUFM $
+ map (\(x, y) -> (mkFastString x, y)) [
+ ( "add", MO_Add ),
+ ( "sub", MO_Sub ),
+ ( "eq", MO_Eq ),
+ ( "ne", MO_Ne ),
+ ( "mul", MO_Mul ),
+ ( "neg", MO_S_Neg ),
+ ( "quot", MO_S_Quot ),
+ ( "rem", MO_S_Rem ),
+ ( "divu", MO_U_Quot ),
+ ( "modu", MO_U_Rem ),
+
+ ( "ge", MO_S_Ge ),
+ ( "le", MO_S_Le ),
+ ( "gt", MO_S_Gt ),
+ ( "lt", MO_S_Lt ),
+
+ ( "geu", MO_U_Ge ),
+ ( "leu", MO_U_Le ),
+ ( "gtu", MO_U_Gt ),
+ ( "ltu", MO_U_Lt ),
+
+ ( "and", MO_And ),
+ ( "or", MO_Or ),
+ ( "xor", MO_Xor ),
+ ( "com", MO_Not ),
+ ( "shl", MO_Shl ),
+ ( "shrl", MO_U_Shr ),
+ ( "shra", MO_S_Shr ),
+
+ ( "fadd", MO_F_Add ),
+ ( "fsub", MO_F_Sub ),
+ ( "fneg", MO_F_Neg ),
+ ( "fmul", MO_F_Mul ),
+ ( "fquot", MO_F_Quot ),
+
+ ( "feq", MO_F_Eq ),
+ ( "fne", MO_F_Ne ),
+ ( "fge", MO_F_Ge ),
+ ( "fle", MO_F_Le ),
+ ( "fgt", MO_F_Gt ),
+ ( "flt", MO_F_Lt ),
+
+ ( "lobits8", flip MO_UU_Conv W8 ),
+ ( "lobits16", flip MO_UU_Conv W16 ),
+ ( "lobits32", flip MO_UU_Conv W32 ),
+ ( "lobits64", flip MO_UU_Conv W64 ),
+
+ ( "zx16", flip MO_UU_Conv W16 ),
+ ( "zx32", flip MO_UU_Conv W32 ),
+ ( "zx64", flip MO_UU_Conv W64 ),
+
+ ( "sx16", flip MO_SS_Conv W16 ),
+ ( "sx32", flip MO_SS_Conv W32 ),
+ ( "sx64", flip MO_SS_Conv W64 ),
+
+ ( "f2f32", flip MO_FF_Conv W32 ), -- TODO; rounding mode
+ ( "f2f64", flip MO_FF_Conv W64 ), -- TODO; rounding mode
+ ( "f2i8", flip MO_FS_Conv W8 ),
+ ( "f2i16", flip MO_FS_Conv W16 ),
+ ( "f2i32", flip MO_FS_Conv W32 ),
+ ( "f2i64", flip MO_FS_Conv W64 ),
+ ( "i2f32", flip MO_SF_Conv W32 ),
+ ( "i2f64", flip MO_SF_Conv W64 )
+ ]
+
+callishMachOps :: UniqFM ([CmmExpr] -> (CallishMachOp, [CmmExpr]))
+callishMachOps = listToUFM $
+ map (\(x, y) -> (mkFastString x, y)) [
+ ( "read_barrier", (MO_ReadBarrier,)),
+ ( "write_barrier", (MO_WriteBarrier,)),
+ ( "memcpy", memcpyLikeTweakArgs MO_Memcpy ),
+ ( "memset", memcpyLikeTweakArgs MO_Memset ),
+ ( "memmove", memcpyLikeTweakArgs MO_Memmove ),
+ ( "memcmp", memcpyLikeTweakArgs MO_Memcmp ),
+
+ ("prefetch0", (MO_Prefetch_Data 0,)),
+ ("prefetch1", (MO_Prefetch_Data 1,)),
+ ("prefetch2", (MO_Prefetch_Data 2,)),
+ ("prefetch3", (MO_Prefetch_Data 3,)),
+
+ ( "popcnt8", (MO_PopCnt W8,)),
+ ( "popcnt16", (MO_PopCnt W16,)),
+ ( "popcnt32", (MO_PopCnt W32,)),
+ ( "popcnt64", (MO_PopCnt W64,)),
+
+ ( "pdep8", (MO_Pdep W8,)),
+ ( "pdep16", (MO_Pdep W16,)),
+ ( "pdep32", (MO_Pdep W32,)),
+ ( "pdep64", (MO_Pdep W64,)),
+
+ ( "pext8", (MO_Pext W8,)),
+ ( "pext16", (MO_Pext W16,)),
+ ( "pext32", (MO_Pext W32,)),
+ ( "pext64", (MO_Pext W64,)),
+
+ ( "cmpxchg8", (MO_Cmpxchg W8,)),
+ ( "cmpxchg16", (MO_Cmpxchg W16,)),
+ ( "cmpxchg32", (MO_Cmpxchg W32,)),
+ ( "cmpxchg64", (MO_Cmpxchg W64,))
+
+ -- ToDo: the rest, maybe
+ -- edit: which rest?
+ -- also: how do we tell CMM Lint how to type check callish macops?
+ ]
+ where
+ memcpyLikeTweakArgs :: (Int -> CallishMachOp) -> [CmmExpr] -> (CallishMachOp, [CmmExpr])
+ memcpyLikeTweakArgs op [] = pgmError "memcpy-like function requires at least one argument"
+ memcpyLikeTweakArgs op args@(_:_) =
+ (op align, args')
+ where
+ args' = init args
+ align = case last args of
+ CmmLit (CmmInt alignInteger _) -> fromInteger alignInteger
+ e -> pprPgmError "Non-constant alignment in memcpy-like function:" (ppr e)
+ -- The alignment of memcpy-ish operations must be a
+ -- compile-time constant. We verify this here, passing it around
+ -- in the MO_* constructor. In order to do this, however, we
+ -- must intercept the arguments in primCall.
+
+parseSafety :: String -> PD Safety
+parseSafety "safe" = return PlaySafe
+parseSafety "unsafe" = return PlayRisky
+parseSafety "interruptible" = return PlayInterruptible
+parseSafety str = fail ("unrecognised safety: " ++ str)
+
+parseCmmHint :: String -> PD ForeignHint
+parseCmmHint "ptr" = return AddrHint
+parseCmmHint "signed" = return SignedHint
+parseCmmHint str = fail ("unrecognised hint: " ++ str)
+
+-- labels are always pointers, so we might as well infer the hint
+inferCmmHint :: CmmExpr -> ForeignHint
+inferCmmHint (CmmLit (CmmLabel _)) = AddrHint
+inferCmmHint (CmmReg (CmmGlobal g)) | isPtrGlobalReg g = AddrHint
+inferCmmHint _ = NoHint
+
+isPtrGlobalReg Sp = True
+isPtrGlobalReg SpLim = True
+isPtrGlobalReg Hp = True
+isPtrGlobalReg HpLim = True
+isPtrGlobalReg CCCS = True
+isPtrGlobalReg CurrentTSO = True
+isPtrGlobalReg CurrentNursery = True
+isPtrGlobalReg (VanillaReg _ VGcPtr) = True
+isPtrGlobalReg _ = False
+
+happyError :: PD a
+happyError = PD $ \_ s -> unP srcParseFail s
+
+-- -----------------------------------------------------------------------------
+-- Statement-level macros
+
+stmtMacro :: FastString -> [CmmParse CmmExpr] -> PD (CmmParse ())
+stmtMacro fun args_code = do
+ case lookupUFM stmtMacros fun of
+ Nothing -> fail ("unknown macro: " ++ unpackFS fun)
+ Just fcode -> return $ do
+ args <- sequence args_code
+ code (fcode args)
+
+stmtMacros :: UniqFM ([CmmExpr] -> FCode ())
+stmtMacros = listToUFM [
+ ( fsLit "CCS_ALLOC", \[words,ccs] -> profAlloc words ccs ),
+ ( fsLit "ENTER_CCS_THUNK", \[e] -> enterCostCentreThunk e ),
+
+ ( fsLit "CLOSE_NURSERY", \[] -> emitCloseNursery ),
+ ( fsLit "OPEN_NURSERY", \[] -> emitOpenNursery ),
+
+ -- completely generic heap and stack checks, for use in high-level cmm.
+ ( fsLit "HP_CHK_GEN", \[bytes] ->
+ heapStackCheckGen Nothing (Just bytes) ),
+ ( fsLit "STK_CHK_GEN", \[] ->
+ heapStackCheckGen (Just (CmmLit CmmHighStackMark)) Nothing ),
+
+ -- A stack check for a fixed amount of stack. Sounds a bit strange, but
+ -- we use the stack for a bit of temporary storage in a couple of primops
+ ( fsLit "STK_CHK_GEN_N", \[bytes] ->
+ heapStackCheckGen (Just bytes) Nothing ),
+
+ -- A stack check on entry to a thunk, where the argument is the thunk pointer.
+ ( fsLit "STK_CHK_NP" , \[node] -> entryHeapCheck' False node 0 [] (return ())),
+
+ ( fsLit "LOAD_THREAD_STATE", \[] -> emitLoadThreadState ),
+ ( fsLit "SAVE_THREAD_STATE", \[] -> emitSaveThreadState ),
+
+ ( fsLit "LDV_ENTER", \[e] -> ldvEnter e ),
+ ( fsLit "LDV_RECORD_CREATE", \[e] -> ldvRecordCreate e ),
+
+ ( fsLit "PUSH_UPD_FRAME", \[sp,e] -> emitPushUpdateFrame sp e ),
+ ( fsLit "SET_HDR", \[ptr,info,ccs] ->
+ emitSetDynHdr ptr info ccs ),
+ ( fsLit "TICK_ALLOC_PRIM", \[hdr,goods,slop] ->
+ tickyAllocPrim hdr goods slop ),
+ ( fsLit "TICK_ALLOC_PAP", \[goods,slop] ->
+ tickyAllocPAP goods slop ),
+ ( fsLit "TICK_ALLOC_UP_THK", \[goods,slop] ->
+ tickyAllocThunk goods slop ),
+ ( fsLit "UPD_BH_UPDATABLE", \[reg] -> emitBlackHoleCode reg )
+ ]
+
+emitPushUpdateFrame :: CmmExpr -> CmmExpr -> FCode ()
+emitPushUpdateFrame sp e = do
+ dflags <- getDynFlags
+ emitUpdateFrame dflags sp mkUpdInfoLabel e
+
+pushStackFrame :: [CmmParse CmmExpr] -> CmmParse () -> CmmParse ()
+pushStackFrame fields body = do
+ dflags <- getDynFlags
+ exprs <- sequence fields
+ updfr_off <- getUpdFrameOff
+ let (new_updfr_off, _, g) = copyOutOflow dflags NativeReturn Ret Old
+ [] updfr_off exprs
+ emit g
+ withUpdFrameOff new_updfr_off body
+
+reserveStackFrame
+ :: CmmParse CmmExpr
+ -> CmmParse CmmReg
+ -> CmmParse ()
+ -> CmmParse ()
+reserveStackFrame psize preg body = do
+ dflags <- getDynFlags
+ old_updfr_off <- getUpdFrameOff
+ reg <- preg
+ esize <- psize
+ let size = case constantFoldExpr dflags esize of
+ CmmLit (CmmInt n _) -> n
+ _other -> pprPanic "CmmParse: not a compile-time integer: "
+ (ppr esize)
+ let frame = old_updfr_off + wORD_SIZE dflags * fromIntegral size
+ emitAssign reg (CmmStackSlot Old frame)
+ withUpdFrameOff frame body
+
+profilingInfo dflags desc_str ty_str
+ = if not (gopt Opt_SccProfilingOn dflags)
+ then NoProfilingInfo
+ else ProfilingInfo (BS8.pack desc_str) (BS8.pack ty_str)
+
+staticClosure :: UnitId -> FastString -> FastString -> [CmmLit] -> CmmParse ()
+staticClosure pkg cl_label info payload
+ = do dflags <- getDynFlags
+ let lits = mkStaticClosure dflags (mkCmmInfoLabel pkg info) dontCareCCS payload [] [] []
+ code $ emitDataLits (mkCmmDataLabel pkg cl_label) lits
+
+foreignCall
+ :: String
+ -> [CmmParse (LocalReg, ForeignHint)]
+ -> CmmParse CmmExpr
+ -> [CmmParse (CmmExpr, ForeignHint)]
+ -> Safety
+ -> CmmReturnInfo
+ -> PD (CmmParse ())
+foreignCall conv_string results_code expr_code args_code safety ret
+ = do conv <- case conv_string of
+ "C" -> return CCallConv
+ "stdcall" -> return StdCallConv
+ _ -> fail ("unknown calling convention: " ++ conv_string)
+ return $ do
+ dflags <- getDynFlags
+ results <- sequence results_code
+ expr <- expr_code
+ args <- sequence args_code
+ let
+ expr' = adjCallTarget dflags conv expr args
+ (arg_exprs, arg_hints) = unzip args
+ (res_regs, res_hints) = unzip results
+ fc = ForeignConvention conv arg_hints res_hints ret
+ target = ForeignTarget expr' fc
+ _ <- code $ emitForeignCall safety res_regs target arg_exprs
+ return ()
+
+
+doReturn :: [CmmParse CmmExpr] -> CmmParse ()
+doReturn exprs_code = do
+ dflags <- getDynFlags
+ exprs <- sequence exprs_code
+ updfr_off <- getUpdFrameOff
+ emit (mkReturnSimple dflags exprs updfr_off)
+
+mkReturnSimple :: DynFlags -> [CmmActual] -> UpdFrameOffset -> CmmAGraph
+mkReturnSimple dflags actuals updfr_off =
+ mkReturn dflags e actuals updfr_off
+ where e = entryCode dflags (CmmLoad (CmmStackSlot Old updfr_off)
+ (gcWord dflags))
+
+doRawJump :: CmmParse CmmExpr -> [GlobalReg] -> CmmParse ()
+doRawJump expr_code vols = do
+ dflags <- getDynFlags
+ expr <- expr_code
+ updfr_off <- getUpdFrameOff
+ emit (mkRawJump dflags expr updfr_off vols)
+
+doJumpWithStack :: CmmParse CmmExpr -> [CmmParse CmmExpr]
+ -> [CmmParse CmmExpr] -> CmmParse ()
+doJumpWithStack expr_code stk_code args_code = do
+ dflags <- getDynFlags
+ expr <- expr_code
+ stk_args <- sequence stk_code
+ args <- sequence args_code
+ updfr_off <- getUpdFrameOff
+ emit (mkJumpExtra dflags NativeNodeCall expr args updfr_off stk_args)
+
+doCall :: CmmParse CmmExpr -> [CmmParse LocalReg] -> [CmmParse CmmExpr]
+ -> CmmParse ()
+doCall expr_code res_code args_code = do
+ dflags <- getDynFlags
+ expr <- expr_code
+ args <- sequence args_code
+ ress <- sequence res_code
+ updfr_off <- getUpdFrameOff
+ c <- code $ mkCall expr (NativeNodeCall,NativeReturn) ress args updfr_off []
+ emit c
+
+adjCallTarget :: DynFlags -> CCallConv -> CmmExpr -> [(CmmExpr, ForeignHint) ]
+ -> CmmExpr
+-- On Windows, we have to add the '@N' suffix to the label when making
+-- a call with the stdcall calling convention.
+adjCallTarget dflags StdCallConv (CmmLit (CmmLabel lbl)) args
+ | platformOS (targetPlatform dflags) == OSMinGW32
+ = CmmLit (CmmLabel (addLabelSize lbl (sum (map size args))))
+ where size (e, _) = max (wORD_SIZE dflags) (widthInBytes (typeWidth (cmmExprType dflags e)))
+ -- c.f. CgForeignCall.emitForeignCall
+adjCallTarget _ _ expr _
+ = expr
+
+primCall
+ :: [CmmParse (CmmFormal, ForeignHint)]
+ -> FastString
+ -> [CmmParse CmmExpr]
+ -> PD (CmmParse ())
+primCall results_code name args_code
+ = case lookupUFM callishMachOps name of
+ Nothing -> fail ("unknown primitive " ++ unpackFS name)
+ Just f -> return $ do
+ results <- sequence results_code
+ args <- sequence args_code
+ let (p, args') = f args
+ code (emitPrimCall (map fst results) p args')
+
+doStore :: CmmType -> CmmParse CmmExpr -> CmmParse CmmExpr -> CmmParse ()
+doStore rep addr_code val_code
+ = do dflags <- getDynFlags
+ addr <- addr_code
+ val <- val_code
+ -- if the specified store type does not match the type of the expr
+ -- on the rhs, then we insert a coercion that will cause the type
+ -- mismatch to be flagged by cmm-lint. If we don't do this, then
+ -- the store will happen at the wrong type, and the error will not
+ -- be noticed.
+ let val_width = typeWidth (cmmExprType dflags val)
+ rep_width = typeWidth rep
+ let coerce_val
+ | val_width /= rep_width = CmmMachOp (MO_UU_Conv val_width rep_width) [val]
+ | otherwise = val
+ emitStore addr coerce_val
+
+-- -----------------------------------------------------------------------------
+-- If-then-else and boolean expressions
+
+data BoolExpr
+ = BoolExpr `BoolAnd` BoolExpr
+ | BoolExpr `BoolOr` BoolExpr
+ | BoolNot BoolExpr
+ | BoolTest CmmExpr
+
+-- ToDo: smart constructors which simplify the boolean expression.
+
+cmmIfThenElse cond then_part else_part likely = do
+ then_id <- newBlockId
+ join_id <- newBlockId
+ c <- cond
+ emitCond c then_id likely
+ else_part
+ emit (mkBranch join_id)
+ emitLabel then_id
+ then_part
+ -- fall through to join
+ emitLabel join_id
+
+cmmRawIf cond then_id likely = do
+ c <- cond
+ emitCond c then_id likely
+
+-- 'emitCond cond true_id' emits code to test whether the cond is true,
+-- branching to true_id if so, and falling through otherwise.
+emitCond (BoolTest e) then_id likely = do
+ else_id <- newBlockId
+ emit (mkCbranch e then_id else_id likely)
+ emitLabel else_id
+emitCond (BoolNot (BoolTest (CmmMachOp op args))) then_id likely
+ | Just op' <- maybeInvertComparison op
+ = emitCond (BoolTest (CmmMachOp op' args)) then_id (not <$> likely)
+emitCond (BoolNot e) then_id likely = do
+ else_id <- newBlockId
+ emitCond e else_id likely
+ emit (mkBranch then_id)
+ emitLabel else_id
+emitCond (e1 `BoolOr` e2) then_id likely = do
+ emitCond e1 then_id likely
+ emitCond e2 then_id likely
+emitCond (e1 `BoolAnd` e2) then_id likely = do
+ -- we'd like to invert one of the conditionals here to avoid an
+ -- extra branch instruction, but we can't use maybeInvertComparison
+ -- here because we can't look too closely at the expression since
+ -- we're in a loop.
+ and_id <- newBlockId
+ else_id <- newBlockId
+ emitCond e1 and_id likely
+ emit (mkBranch else_id)
+ emitLabel and_id
+ emitCond e2 then_id likely
+ emitLabel else_id
+
+-- -----------------------------------------------------------------------------
+-- Source code notes
+
+-- | Generate a source note spanning from "a" to "b" (inclusive), then
+-- proceed with parsing. This allows debugging tools to reason about
+-- locations in Cmm code.
+withSourceNote :: Located a -> Located b -> CmmParse c -> CmmParse c
+withSourceNote a b parse = do
+ name <- getName
+ case combineSrcSpans (getLoc a) (getLoc b) of
+ RealSrcSpan span -> code (emitTick (SourceNote span name)) >> parse
+ _other -> parse
+
+-- -----------------------------------------------------------------------------
+-- Table jumps
+
+-- We use a simplified form of C-- switch statements for now. A
+-- switch statement always compiles to a table jump. Each arm can
+-- specify a list of values (not ranges), and there can be a single
+-- default branch. The range of the table is given either by the
+-- optional range on the switch (eg. switch [0..7] {...}), or by
+-- the minimum/maximum values from the branches.
+
+doSwitch :: Maybe (Integer,Integer)
+ -> CmmParse CmmExpr
+ -> [([Integer],Either BlockId (CmmParse ()))]
+ -> Maybe (CmmParse ()) -> CmmParse ()
+doSwitch mb_range scrut arms deflt
+ = do
+ -- Compile code for the default branch
+ dflt_entry <-
+ case deflt of
+ Nothing -> return Nothing
+ Just e -> do b <- forkLabelledCode e; return (Just b)
+
+ -- Compile each case branch
+ table_entries <- mapM emitArm arms
+ let table = M.fromList (concat table_entries)
+
+ dflags <- getDynFlags
+ let range = fromMaybe (0, tARGET_MAX_WORD dflags) mb_range
+
+ expr <- scrut
+ -- ToDo: check for out of range and jump to default if necessary
+ emit $ mkSwitch expr (mkSwitchTargets False range dflt_entry table)
+ where
+ emitArm :: ([Integer],Either BlockId (CmmParse ())) -> CmmParse [(Integer,BlockId)]
+ emitArm (ints,Left blockid) = return [ (i,blockid) | i <- ints ]
+ emitArm (ints,Right code) = do
+ blockid <- forkLabelledCode code
+ return [ (i,blockid) | i <- ints ]
+
+forkLabelledCode :: CmmParse () -> CmmParse BlockId
+forkLabelledCode p = do
+ (_,ag) <- getCodeScoped p
+ l <- newBlockId
+ emitOutOfLine l ag
+ return l
+
+-- -----------------------------------------------------------------------------
+-- Putting it all together
+
+-- The initial environment: we define some constants that the compiler
+-- knows about here.
+initEnv :: DynFlags -> Env
+initEnv dflags = listToUFM [
+ ( fsLit "SIZEOF_StgHeader",
+ VarN (CmmLit (CmmInt (fromIntegral (fixedHdrSize dflags)) (wordWidth dflags)) )),
+ ( fsLit "SIZEOF_StgInfoTable",
+ VarN (CmmLit (CmmInt (fromIntegral (stdInfoTableSizeB dflags)) (wordWidth dflags)) ))
+ ]
+
+parseCmmFile :: DynFlags -> FilePath -> IO (Messages, Maybe CmmGroup)
+parseCmmFile dflags filename = withTiming dflags (text "ParseCmm"<+>brackets (text filename)) (\_ -> ()) $ do
+ buf <- hGetStringBuffer filename
+ let
+ init_loc = mkRealSrcLoc (mkFastString filename) 1 1
+ init_state = (mkPState dflags buf init_loc) { lex_state = [0] }
+ -- reset the lex_state: the Lexer monad leaves some stuff
+ -- in there we don't want.
+ case unPD cmmParse dflags init_state of
+ PFailed pst ->
+ return (getMessages pst dflags, Nothing)
+ POk pst code -> do
+ st <- initC
+ let fcode = getCmm $ unEC code "global" (initEnv dflags) [] >> return ()
+ (cmm,_) = runC dflags no_module st fcode
+ let ms = getMessages pst dflags
+ if (errorsFound dflags ms)
+ then return (ms, Nothing)
+ else return (ms, Just cmm)
+ where
+ no_module = panic "parseCmmFile: no module"
+}
diff --git a/compiler/GHC/Cmm/Pipeline.hs b/compiler/GHC/Cmm/Pipeline.hs
new file mode 100644
index 0000000000..6db9e23ee1
--- /dev/null
+++ b/compiler/GHC/Cmm/Pipeline.hs
@@ -0,0 +1,367 @@
+{-# LANGUAGE BangPatterns #-}
+
+module GHC.Cmm.Pipeline (
+ -- | Converts C-- with an implicit stack and native C-- calls into
+ -- optimized, CPS converted and native-call-less C--. The latter
+ -- C-- can be used to generate assembly.
+ cmmPipeline
+) where
+
+import GhcPrelude
+
+import GHC.Cmm
+import GHC.Cmm.Lint
+import GHC.Cmm.Info.Build
+import GHC.Cmm.CommonBlockElim
+import GHC.Cmm.Switch.Implement
+import GHC.Cmm.ProcPoint
+import GHC.Cmm.ContFlowOpt
+import GHC.Cmm.LayoutStack
+import GHC.Cmm.Sink
+import GHC.Cmm.Dataflow.Collections
+
+import UniqSupply
+import DynFlags
+import ErrUtils
+import HscTypes
+import Control.Monad
+import Outputable
+import GHC.Platform
+
+-----------------------------------------------------------------------------
+-- | Top level driver for C-- pipeline
+-----------------------------------------------------------------------------
+
+cmmPipeline
+ :: HscEnv -- Compilation env including
+ -- dynamic flags: -dcmm-lint -ddump-cmm-cps
+ -> ModuleSRTInfo -- Info about SRTs generated so far
+ -> CmmGroup -- Input C-- with Procedures
+ -> IO (ModuleSRTInfo, CmmGroup) -- Output CPS transformed C--
+
+cmmPipeline hsc_env srtInfo prog = withTimingSilent dflags (text "Cmm pipeline") forceRes $
+ do let dflags = hsc_dflags hsc_env
+
+ tops <- {-# SCC "tops" #-} mapM (cpsTop hsc_env) prog
+
+ (srtInfo, cmms) <- {-# SCC "doSRTs" #-} doSRTs dflags srtInfo tops
+ dumpWith dflags Opt_D_dump_cmm_cps "Post CPS Cmm" FormatCMM (ppr cmms)
+
+ return (srtInfo, cmms)
+
+ where forceRes (info, group) =
+ info `seq` foldr (\decl r -> decl `seq` r) () group
+
+ dflags = hsc_dflags hsc_env
+
+cpsTop :: HscEnv -> CmmDecl -> IO (CAFEnv, [CmmDecl])
+cpsTop _ p@(CmmData {}) = return (mapEmpty, [p])
+cpsTop hsc_env proc =
+ do
+ ----------- Control-flow optimisations ----------------------------------
+
+ -- The first round of control-flow optimisation speeds up the
+ -- later passes by removing lots of empty blocks, so we do it
+ -- even when optimisation isn't turned on.
+ --
+ CmmProc h l v g <- {-# SCC "cmmCfgOpts(1)" #-}
+ return $ cmmCfgOptsProc splitting_proc_points proc
+ dump Opt_D_dump_cmm_cfg "Post control-flow optimisations" g
+
+ let !TopInfo {stack_info=StackInfo { arg_space = entry_off
+ , do_layout = do_layout }} = h
+
+ ----------- Eliminate common blocks -------------------------------------
+ g <- {-# SCC "elimCommonBlocks" #-}
+ condPass Opt_CmmElimCommonBlocks elimCommonBlocks g
+ Opt_D_dump_cmm_cbe "Post common block elimination"
+
+ -- Any work storing block Labels must be performed _after_
+ -- elimCommonBlocks
+
+ ----------- Implement switches ------------------------------------------
+ g <- {-# SCC "createSwitchPlans" #-}
+ runUniqSM $ cmmImplementSwitchPlans dflags g
+ dump Opt_D_dump_cmm_switch "Post switch plan" g
+
+ ----------- Proc points -------------------------------------------------
+ let call_pps = {-# SCC "callProcPoints" #-} callProcPoints g
+ proc_points <-
+ if splitting_proc_points
+ then do
+ pp <- {-# SCC "minimalProcPointSet" #-} runUniqSM $
+ minimalProcPointSet (targetPlatform dflags) call_pps g
+ dumpWith dflags Opt_D_dump_cmm_proc "Proc points"
+ FormatCMM (ppr l $$ ppr pp $$ ppr g)
+ return pp
+ else
+ return call_pps
+
+ ----------- Layout the stack and manifest Sp ----------------------------
+ (g, stackmaps) <-
+ {-# SCC "layoutStack" #-}
+ if do_layout
+ then runUniqSM $ cmmLayoutStack dflags proc_points entry_off g
+ else return (g, mapEmpty)
+ dump Opt_D_dump_cmm_sp "Layout Stack" g
+
+ ----------- Sink and inline assignments --------------------------------
+ g <- {-# SCC "sink" #-} -- See Note [Sinking after stack layout]
+ condPass Opt_CmmSink (cmmSink dflags) g
+ Opt_D_dump_cmm_sink "Sink assignments"
+
+ ------------- CAF analysis ----------------------------------------------
+ let cafEnv = {-# SCC "cafAnal" #-} cafAnal call_pps l g
+ dumpWith dflags Opt_D_dump_cmm_caf "CAFEnv" FormatText (ppr cafEnv)
+
+ g <- if splitting_proc_points
+ then do
+ ------------- Split into separate procedures -----------------------
+ let pp_map = {-# SCC "procPointAnalysis" #-}
+ procPointAnalysis proc_points g
+ dumpWith dflags Opt_D_dump_cmm_procmap "procpoint map"
+ FormatCMM (ppr pp_map)
+ g <- {-# SCC "splitAtProcPoints" #-} runUniqSM $
+ splitAtProcPoints dflags l call_pps proc_points pp_map
+ (CmmProc h l v g)
+ dumps Opt_D_dump_cmm_split "Post splitting" g
+ return g
+ else do
+ -- attach info tables to return points
+ return $ [attachContInfoTables call_pps (CmmProc h l v g)]
+
+ ------------- Populate info tables with stack info -----------------
+ g <- {-# SCC "setInfoTableStackMap" #-}
+ return $ map (setInfoTableStackMap dflags stackmaps) g
+ dumps Opt_D_dump_cmm_info "after setInfoTableStackMap" g
+
+ ----------- Control-flow optimisations -----------------------------
+ g <- {-# SCC "cmmCfgOpts(2)" #-}
+ return $ if optLevel dflags >= 1
+ then map (cmmCfgOptsProc splitting_proc_points) g
+ else g
+ g <- return (map removeUnreachableBlocksProc g)
+ -- See Note [unreachable blocks]
+ dumps Opt_D_dump_cmm_cfg "Post control-flow optimisations" g
+
+ return (cafEnv, g)
+
+ where dflags = hsc_dflags hsc_env
+ platform = targetPlatform dflags
+ dump = dumpGraph dflags
+
+ dumps flag name
+ = mapM_ (dumpWith dflags flag name FormatCMM . ppr)
+
+ condPass flag pass g dumpflag dumpname =
+ if gopt flag dflags
+ then do
+ g <- return $ pass g
+ dump dumpflag dumpname g
+ return g
+ else return g
+
+ -- we don't need to split proc points for the NCG, unless
+ -- tablesNextToCode is off. The latter is because we have no
+ -- label to put on info tables for basic blocks that are not
+ -- the entry point.
+ splitting_proc_points = hscTarget dflags /= HscAsm
+ || not (tablesNextToCode dflags)
+ || -- Note [inconsistent-pic-reg]
+ usingInconsistentPicReg
+ usingInconsistentPicReg
+ = case (platformArch platform, platformOS platform, positionIndependent dflags)
+ of (ArchX86, OSDarwin, pic) -> pic
+ _ -> False
+
+-- Note [Sinking after stack layout]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- In the past we considered running sinking pass also before stack
+-- layout, but after making some measurements we realized that:
+--
+-- a) running sinking only before stack layout produces slower
+-- code than running sinking only before stack layout
+--
+-- b) running sinking both before and after stack layout produces
+-- code that has the same performance as when running sinking
+-- only after stack layout.
+--
+-- In other words sinking before stack layout doesn't buy as anything.
+--
+-- An interesting question is "why is it better to run sinking after
+-- stack layout"? It seems that the major reason are stores and loads
+-- generated by stack layout. Consider this code before stack layout:
+--
+-- c1E:
+-- _c1C::P64 = R3;
+-- _c1B::P64 = R2;
+-- _c1A::P64 = R1;
+-- I64[(young<c1D> + 8)] = c1D;
+-- call stg_gc_noregs() returns to c1D, args: 8, res: 8, upd: 8;
+-- c1D:
+-- R3 = _c1C::P64;
+-- R2 = _c1B::P64;
+-- R1 = _c1A::P64;
+-- call (P64[(old + 8)])(R3, R2, R1) args: 8, res: 0, upd: 8;
+--
+-- Stack layout pass will save all local variables live across a call
+-- (_c1C, _c1B and _c1A in this example) on the stack just before
+-- making a call and reload them from the stack after returning from a
+-- call:
+--
+-- c1E:
+-- _c1C::P64 = R3;
+-- _c1B::P64 = R2;
+-- _c1A::P64 = R1;
+-- I64[Sp - 32] = c1D;
+-- P64[Sp - 24] = _c1A::P64;
+-- P64[Sp - 16] = _c1B::P64;
+-- P64[Sp - 8] = _c1C::P64;
+-- Sp = Sp - 32;
+-- call stg_gc_noregs() returns to c1D, args: 8, res: 8, upd: 8;
+-- c1D:
+-- _c1A::P64 = P64[Sp + 8];
+-- _c1B::P64 = P64[Sp + 16];
+-- _c1C::P64 = P64[Sp + 24];
+-- R3 = _c1C::P64;
+-- R2 = _c1B::P64;
+-- R1 = _c1A::P64;
+-- Sp = Sp + 32;
+-- call (P64[Sp])(R3, R2, R1) args: 8, res: 0, upd: 8;
+--
+-- If we don't run sinking pass after stack layout we are basically
+-- left with such code. However, running sinking on this code can lead
+-- to significant improvements:
+--
+-- c1E:
+-- I64[Sp - 32] = c1D;
+-- P64[Sp - 24] = R1;
+-- P64[Sp - 16] = R2;
+-- P64[Sp - 8] = R3;
+-- Sp = Sp - 32;
+-- call stg_gc_noregs() returns to c1D, args: 8, res: 8, upd: 8;
+-- c1D:
+-- R3 = P64[Sp + 24];
+-- R2 = P64[Sp + 16];
+-- R1 = P64[Sp + 8];
+-- Sp = Sp + 32;
+-- call (P64[Sp])(R3, R2, R1) args: 8, res: 0, upd: 8;
+--
+-- Now we only have 9 assignments instead of 15.
+--
+-- There is one case when running sinking before stack layout could
+-- be beneficial. Consider this:
+--
+-- L1:
+-- x = y
+-- call f() returns L2
+-- L2: ...x...y...
+--
+-- Since both x and y are live across a call to f, they will be stored
+-- on the stack during stack layout and restored after the call:
+--
+-- L1:
+-- x = y
+-- P64[Sp - 24] = L2
+-- P64[Sp - 16] = x
+-- P64[Sp - 8] = y
+-- Sp = Sp - 24
+-- call f() returns L2
+-- L2:
+-- y = P64[Sp + 16]
+-- x = P64[Sp + 8]
+-- Sp = Sp + 24
+-- ...x...y...
+--
+-- However, if we run sinking before stack layout we would propagate x
+-- to its usage place (both x and y must be local register for this to
+-- be possible - global registers cannot be floated past a call):
+--
+-- L1:
+-- x = y
+-- call f() returns L2
+-- L2: ...y...y...
+--
+-- Thus making x dead at the call to f(). If we ran stack layout now
+-- we would generate less stores and loads:
+--
+-- L1:
+-- x = y
+-- P64[Sp - 16] = L2
+-- P64[Sp - 8] = y
+-- Sp = Sp - 16
+-- call f() returns L2
+-- L2:
+-- y = P64[Sp + 8]
+-- Sp = Sp + 16
+-- ...y...y...
+--
+-- But since we don't see any benefits from running sinking before stack
+-- layout, this situation probably doesn't arise too often in practice.
+--
+
+{- Note [inconsistent-pic-reg]
+
+On x86/Darwin, PIC is implemented by inserting a sequence like
+
+ call 1f
+ 1: popl %reg
+
+at the proc entry point, and then referring to labels as offsets from
+%reg. If we don't split proc points, then we could have many entry
+points in a proc that would need this sequence, and each entry point
+would then get a different value for %reg. If there are any join
+points, then at the join point we don't have a consistent value for
+%reg, so we don't know how to refer to labels.
+
+Hence, on x86/Darwin, we have to split proc points, and then each proc
+point will get its own PIC initialisation sequence.
+
+This isn't an issue on x86/ELF, where the sequence is
+
+ call 1f
+ 1: popl %reg
+ addl $_GLOBAL_OFFSET_TABLE_+(.-1b), %reg
+
+so %reg always has a consistent value: the address of
+_GLOBAL_OFFSET_TABLE_, regardless of which entry point we arrived via.
+
+-}
+
+{- Note [unreachable blocks]
+
+The control-flow optimiser sometimes leaves unreachable blocks behind
+containing junk code. These aren't necessarily a problem, but
+removing them is good because it might save time in the native code
+generator later.
+
+-}
+
+runUniqSM :: UniqSM a -> IO a
+runUniqSM m = do
+ us <- mkSplitUniqSupply 'u'
+ return (initUs_ us m)
+
+
+dumpGraph :: DynFlags -> DumpFlag -> String -> CmmGraph -> IO ()
+dumpGraph dflags flag name g = do
+ when (gopt Opt_DoCmmLinting dflags) $ do_lint g
+ dumpWith dflags flag name FormatCMM (ppr g)
+ where
+ do_lint g = case cmmLintGraph dflags g of
+ Just err -> do { fatalErrorMsg dflags err
+ ; ghcExit dflags 1
+ }
+ Nothing -> return ()
+
+dumpWith :: DynFlags -> DumpFlag -> String -> DumpFormat -> SDoc -> IO ()
+dumpWith dflags flag txt fmt sdoc = do
+ dumpIfSet_dyn dflags flag txt fmt sdoc
+ when (not (dopt flag dflags)) $
+ -- If `-ddump-cmm-verbose -ddump-to-file` is specified,
+ -- dump each Cmm pipeline stage output to a separate file. #16930
+ when (dopt Opt_D_dump_cmm_verbose dflags)
+ $ dumpAction dflags (mkDumpStyle dflags alwaysQualify)
+ (dumpOptionsFromFlag flag) txt fmt sdoc
+ dumpIfSet_dyn dflags Opt_D_dump_cmm_verbose_by_proc txt fmt sdoc
diff --git a/compiler/GHC/Cmm/Ppr.hs b/compiler/GHC/Cmm/Ppr.hs
new file mode 100644
index 0000000000..891cbd9c6d
--- /dev/null
+++ b/compiler/GHC/Cmm/Ppr.hs
@@ -0,0 +1,309 @@
+{-# LANGUAGE GADTs, TypeFamilies, FlexibleContexts, FlexibleInstances #-}
+{-# OPTIONS_GHC -fno-warn-orphans #-}
+
+----------------------------------------------------------------------------
+--
+-- Pretty-printing of Cmm as (a superset of) C--
+--
+-- (c) The University of Glasgow 2004-2006
+--
+-----------------------------------------------------------------------------
+--
+-- This is where we walk over CmmNode emitting an external representation,
+-- suitable for parsing, in a syntax strongly reminiscent of C--. This
+-- is the "External Core" for the Cmm layer.
+--
+-- As such, this should be a well-defined syntax: we want it to look nice.
+-- Thus, we try wherever possible to use syntax defined in [1],
+-- "The C-- Reference Manual", http://www.cs.tufts.edu/~nr/c--/index.html. We
+-- differ slightly, in some cases. For one, we use I8 .. I64 for types, rather
+-- than C--'s bits8 .. bits64.
+--
+-- We try to ensure that all information available in the abstract
+-- syntax is reproduced, or reproducible, in the concrete syntax.
+-- Data that is not in printed out can be reconstructed according to
+-- conventions used in the pretty printer. There are at least two such
+-- cases:
+-- 1) if a value has wordRep type, the type is not appended in the
+-- output.
+-- 2) MachOps that operate over wordRep type are printed in a
+-- C-style, rather than as their internal MachRep name.
+--
+-- These conventions produce much more readable Cmm output.
+--
+-- A useful example pass over Cmm is in nativeGen/MachCodeGen.hs
+
+module GHC.Cmm.Ppr
+ ( module GHC.Cmm.Ppr.Decl
+ , module GHC.Cmm.Ppr.Expr
+ )
+where
+
+import GhcPrelude hiding (succ)
+
+import GHC.Cmm.CLabel
+import GHC.Cmm
+import GHC.Cmm.Utils
+import GHC.Cmm.Switch
+import DynFlags
+import FastString
+import Outputable
+import GHC.Cmm.Ppr.Decl
+import GHC.Cmm.Ppr.Expr
+import Util
+
+import BasicTypes
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Graph
+
+-------------------------------------------------
+-- Outputable instances
+
+instance Outputable CmmStackInfo where
+ ppr = pprStackInfo
+
+instance Outputable CmmTopInfo where
+ ppr = pprTopInfo
+
+
+instance Outputable (CmmNode e x) where
+ ppr = pprNode
+
+instance Outputable Convention where
+ ppr = pprConvention
+
+instance Outputable ForeignConvention where
+ ppr = pprForeignConvention
+
+instance Outputable ForeignTarget where
+ ppr = pprForeignTarget
+
+instance Outputable CmmReturnInfo where
+ ppr = pprReturnInfo
+
+instance Outputable (Block CmmNode C C) where
+ ppr = pprBlock
+instance Outputable (Block CmmNode C O) where
+ ppr = pprBlock
+instance Outputable (Block CmmNode O C) where
+ ppr = pprBlock
+instance Outputable (Block CmmNode O O) where
+ ppr = pprBlock
+
+instance Outputable (Graph CmmNode e x) where
+ ppr = pprGraph
+
+instance Outputable CmmGraph where
+ ppr = pprCmmGraph
+
+----------------------------------------------------------
+-- Outputting types Cmm contains
+
+pprStackInfo :: CmmStackInfo -> SDoc
+pprStackInfo (StackInfo {arg_space=arg_space, updfr_space=updfr_space}) =
+ text "arg_space: " <> ppr arg_space <+>
+ text "updfr_space: " <> ppr updfr_space
+
+pprTopInfo :: CmmTopInfo -> SDoc
+pprTopInfo (TopInfo {info_tbls=info_tbl, stack_info=stack_info}) =
+ vcat [text "info_tbls: " <> ppr info_tbl,
+ text "stack_info: " <> ppr stack_info]
+
+----------------------------------------------------------
+-- Outputting blocks and graphs
+
+pprBlock :: IndexedCO x SDoc SDoc ~ SDoc
+ => Block CmmNode e x -> IndexedCO e SDoc SDoc
+pprBlock block
+ = foldBlockNodesB3 ( ($$) . ppr
+ , ($$) . (nest 4) . ppr
+ , ($$) . (nest 4) . ppr
+ )
+ block
+ empty
+
+pprGraph :: Graph CmmNode e x -> SDoc
+pprGraph GNil = empty
+pprGraph (GUnit block) = ppr block
+pprGraph (GMany entry body exit)
+ = text "{"
+ $$ nest 2 (pprMaybeO entry $$ (vcat $ map ppr $ bodyToBlockList body) $$ pprMaybeO exit)
+ $$ text "}"
+ where pprMaybeO :: Outputable (Block CmmNode e x)
+ => MaybeO ex (Block CmmNode e x) -> SDoc
+ pprMaybeO NothingO = empty
+ pprMaybeO (JustO block) = ppr block
+
+pprCmmGraph :: CmmGraph -> SDoc
+pprCmmGraph g
+ = text "{" <> text "offset"
+ $$ nest 2 (vcat $ map ppr blocks)
+ $$ text "}"
+ where blocks = revPostorder g
+ -- revPostorder has the side-effect of discarding unreachable code,
+ -- so pretty-printed Cmm will omit any unreachable blocks. This can
+ -- sometimes be confusing.
+
+---------------------------------------------
+-- Outputting CmmNode and types which it contains
+
+pprConvention :: Convention -> SDoc
+pprConvention (NativeNodeCall {}) = text "<native-node-call-convention>"
+pprConvention (NativeDirectCall {}) = text "<native-direct-call-convention>"
+pprConvention (NativeReturn {}) = text "<native-ret-convention>"
+pprConvention Slow = text "<slow-convention>"
+pprConvention GC = text "<gc-convention>"
+
+pprForeignConvention :: ForeignConvention -> SDoc
+pprForeignConvention (ForeignConvention c args res ret) =
+ doubleQuotes (ppr c) <+> text "arg hints: " <+> ppr args <+> text " result hints: " <+> ppr res <+> ppr ret
+
+pprReturnInfo :: CmmReturnInfo -> SDoc
+pprReturnInfo CmmMayReturn = empty
+pprReturnInfo CmmNeverReturns = text "never returns"
+
+pprForeignTarget :: ForeignTarget -> SDoc
+pprForeignTarget (ForeignTarget fn c) = ppr c <+> ppr_target fn
+ where
+ ppr_target :: CmmExpr -> SDoc
+ ppr_target t@(CmmLit _) = ppr t
+ ppr_target fn' = parens (ppr fn')
+
+pprForeignTarget (PrimTarget op)
+ -- HACK: We're just using a ForeignLabel to get this printed, the label
+ -- might not really be foreign.
+ = ppr
+ (CmmLabel (mkForeignLabel
+ (mkFastString (show op))
+ Nothing ForeignLabelInThisPackage IsFunction))
+
+pprNode :: CmmNode e x -> SDoc
+pprNode node = pp_node <+> pp_debug
+ where
+ pp_node :: SDoc
+ pp_node = sdocWithDynFlags $ \dflags -> case node of
+ -- label:
+ CmmEntry id tscope -> lbl <> colon <+>
+ (sdocWithDynFlags $ \dflags ->
+ ppUnless (gopt Opt_SuppressTicks dflags) (text "//" <+> ppr tscope))
+ where
+ lbl = if gopt Opt_SuppressUniques dflags
+ then text "_lbl_"
+ else ppr id
+
+ -- // text
+ CmmComment s -> text "//" <+> ftext s
+
+ -- //tick bla<...>
+ CmmTick t -> ppUnless (gopt Opt_SuppressTicks dflags) $
+ text "//tick" <+> ppr t
+
+ -- unwind reg = expr;
+ CmmUnwind regs ->
+ text "unwind "
+ <> commafy (map (\(r,e) -> ppr r <+> char '=' <+> ppr e) regs) <> semi
+
+ -- reg = expr;
+ CmmAssign reg expr -> ppr reg <+> equals <+> ppr expr <> semi
+
+ -- rep[lv] = expr;
+ CmmStore lv expr -> rep <> brackets(ppr lv) <+> equals <+> ppr expr <> semi
+ where
+ rep = sdocWithDynFlags $ \dflags ->
+ ppr ( cmmExprType dflags expr )
+
+ -- call "ccall" foo(x, y)[r1, r2];
+ -- ToDo ppr volatile
+ CmmUnsafeForeignCall target results args ->
+ hsep [ ppUnless (null results) $
+ parens (commafy $ map ppr results) <+> equals,
+ text "call",
+ ppr target <> parens (commafy $ map ppr args) <> semi]
+
+ -- goto label;
+ CmmBranch ident -> text "goto" <+> ppr ident <> semi
+
+ -- if (expr) goto t; else goto f;
+ CmmCondBranch expr t f l ->
+ hsep [ text "if"
+ , parens(ppr expr)
+ , case l of
+ Nothing -> empty
+ Just b -> parens (text "likely:" <+> ppr b)
+ , text "goto"
+ , ppr t <> semi
+ , text "else goto"
+ , ppr f <> semi
+ ]
+
+ CmmSwitch expr ids ->
+ hang (hsep [ text "switch"
+ , range
+ , if isTrivialCmmExpr expr
+ then ppr expr
+ else parens (ppr expr)
+ , text "{"
+ ])
+ 4 (vcat (map ppCase cases) $$ def) $$ rbrace
+ where
+ (cases, mbdef) = switchTargetsFallThrough ids
+ ppCase (is,l) = hsep
+ [ text "case"
+ , commafy $ map integer is
+ , text ": goto"
+ , ppr l <> semi
+ ]
+ def | Just l <- mbdef = hsep
+ [ text "default:"
+ , braces (text "goto" <+> ppr l <> semi)
+ ]
+ | otherwise = empty
+
+ range = brackets $ hsep [integer lo, text "..", integer hi]
+ where (lo,hi) = switchTargetsRange ids
+
+ CmmCall tgt k regs out res updfr_off ->
+ hcat [ text "call", space
+ , pprFun tgt, parens (interpp'SP regs), space
+ , returns <+>
+ text "args: " <> ppr out <> comma <+>
+ text "res: " <> ppr res <> comma <+>
+ text "upd: " <> ppr updfr_off
+ , semi ]
+ where pprFun f@(CmmLit _) = ppr f
+ pprFun f = parens (ppr f)
+
+ returns
+ | Just r <- k = text "returns to" <+> ppr r <> comma
+ | otherwise = empty
+
+ CmmForeignCall {tgt=t, res=rs, args=as, succ=s, ret_args=a, ret_off=u, intrbl=i} ->
+ hcat $ if i then [text "interruptible", space] else [] ++
+ [ text "foreign call", space
+ , ppr t, text "(...)", space
+ , text "returns to" <+> ppr s
+ <+> text "args:" <+> parens (ppr as)
+ <+> text "ress:" <+> parens (ppr rs)
+ , text "ret_args:" <+> ppr a
+ , text "ret_off:" <+> ppr u
+ , semi ]
+
+ pp_debug :: SDoc
+ pp_debug =
+ if not debugIsOn then empty
+ else case node of
+ CmmEntry {} -> empty -- Looks terrible with text " // CmmEntry"
+ CmmComment {} -> empty -- Looks also terrible with text " // CmmComment"
+ CmmTick {} -> empty
+ CmmUnwind {} -> text " // CmmUnwind"
+ CmmAssign {} -> text " // CmmAssign"
+ CmmStore {} -> text " // CmmStore"
+ CmmUnsafeForeignCall {} -> text " // CmmUnsafeForeignCall"
+ CmmBranch {} -> text " // CmmBranch"
+ CmmCondBranch {} -> text " // CmmCondBranch"
+ CmmSwitch {} -> text " // CmmSwitch"
+ CmmCall {} -> text " // CmmCall"
+ CmmForeignCall {} -> text " // CmmForeignCall"
+
+ commafy :: [SDoc] -> SDoc
+ commafy xs = hsep $ punctuate comma xs
diff --git a/compiler/GHC/Cmm/Ppr/Decl.hs b/compiler/GHC/Cmm/Ppr/Decl.hs
new file mode 100644
index 0000000000..2544e6a0d3
--- /dev/null
+++ b/compiler/GHC/Cmm/Ppr/Decl.hs
@@ -0,0 +1,169 @@
+----------------------------------------------------------------------------
+--
+-- Pretty-printing of common Cmm types
+--
+-- (c) The University of Glasgow 2004-2006
+--
+-----------------------------------------------------------------------------
+
+--
+-- This is where we walk over Cmm emitting an external representation,
+-- suitable for parsing, in a syntax strongly reminiscent of C--. This
+-- is the "External Core" for the Cmm layer.
+--
+-- As such, this should be a well-defined syntax: we want it to look nice.
+-- Thus, we try wherever possible to use syntax defined in [1],
+-- "The C-- Reference Manual", http://www.cs.tufts.edu/~nr/c--/index.html. We
+-- differ slightly, in some cases. For one, we use I8 .. I64 for types, rather
+-- than C--'s bits8 .. bits64.
+--
+-- We try to ensure that all information available in the abstract
+-- syntax is reproduced, or reproducible, in the concrete syntax.
+-- Data that is not in printed out can be reconstructed according to
+-- conventions used in the pretty printer. There are at least two such
+-- cases:
+-- 1) if a value has wordRep type, the type is not appended in the
+-- output.
+-- 2) MachOps that operate over wordRep type are printed in a
+-- C-style, rather than as their internal MachRep name.
+--
+-- These conventions produce much more readable Cmm output.
+--
+-- A useful example pass over Cmm is in nativeGen/MachCodeGen.hs
+--
+
+{-# OPTIONS_GHC -fno-warn-orphans #-}
+module GHC.Cmm.Ppr.Decl
+ ( writeCmms, pprCmms, pprCmmGroup, pprSection, pprStatic
+ )
+where
+
+import GhcPrelude
+
+import GHC.Cmm.Ppr.Expr
+import GHC.Cmm
+
+import DynFlags
+import Outputable
+import FastString
+
+import Data.List
+import System.IO
+
+import qualified Data.ByteString as BS
+
+
+pprCmms :: (Outputable info, Outputable g)
+ => [GenCmmGroup CmmStatics info g] -> SDoc
+pprCmms cmms = pprCode CStyle (vcat (intersperse separator $ map ppr cmms))
+ where
+ separator = space $$ text "-------------------" $$ space
+
+writeCmms :: (Outputable info, Outputable g)
+ => DynFlags -> Handle -> [GenCmmGroup CmmStatics info g] -> IO ()
+writeCmms dflags handle cmms = printForC dflags handle (pprCmms cmms)
+
+-----------------------------------------------------------------------------
+
+instance (Outputable d, Outputable info, Outputable i)
+ => Outputable (GenCmmDecl d info i) where
+ ppr t = pprTop t
+
+instance Outputable CmmStatics where
+ ppr = pprStatics
+
+instance Outputable CmmStatic where
+ ppr = pprStatic
+
+instance Outputable CmmInfoTable where
+ ppr = pprInfoTable
+
+
+-----------------------------------------------------------------------------
+
+pprCmmGroup :: (Outputable d, Outputable info, Outputable g)
+ => GenCmmGroup d info g -> SDoc
+pprCmmGroup tops
+ = vcat $ intersperse blankLine $ map pprTop tops
+
+-- --------------------------------------------------------------------------
+-- Top level `procedure' blocks.
+--
+pprTop :: (Outputable d, Outputable info, Outputable i)
+ => GenCmmDecl d info i -> SDoc
+
+pprTop (CmmProc info lbl live graph)
+
+ = vcat [ ppr lbl <> lparen <> rparen <+> lbrace <+> text "// " <+> ppr live
+ , nest 8 $ lbrace <+> ppr info $$ rbrace
+ , nest 4 $ ppr graph
+ , rbrace ]
+
+-- --------------------------------------------------------------------------
+-- We follow [1], 4.5
+--
+-- section "data" { ... }
+--
+pprTop (CmmData section ds) =
+ (hang (pprSection section <+> lbrace) 4 (ppr ds))
+ $$ rbrace
+
+-- --------------------------------------------------------------------------
+-- Info tables.
+
+pprInfoTable :: CmmInfoTable -> SDoc
+pprInfoTable (CmmInfoTable { cit_lbl = lbl, cit_rep = rep
+ , cit_prof = prof_info
+ , cit_srt = srt })
+ = vcat [ text "label: " <> ppr lbl
+ , text "rep: " <> ppr rep
+ , case prof_info of
+ NoProfilingInfo -> empty
+ ProfilingInfo ct cd ->
+ vcat [ text "type: " <> text (show (BS.unpack ct))
+ , text "desc: " <> text (show (BS.unpack cd)) ]
+ , text "srt: " <> ppr srt ]
+
+instance Outputable ForeignHint where
+ ppr NoHint = empty
+ ppr SignedHint = quotes(text "signed")
+-- ppr AddrHint = quotes(text "address")
+-- Temp Jan08
+ ppr AddrHint = (text "PtrHint")
+
+-- --------------------------------------------------------------------------
+-- Static data.
+-- Strings are printed as C strings, and we print them as I8[],
+-- following C--
+--
+pprStatics :: CmmStatics -> SDoc
+pprStatics (Statics lbl ds) = vcat ((ppr lbl <> colon) : map ppr ds)
+
+pprStatic :: CmmStatic -> SDoc
+pprStatic s = case s of
+ CmmStaticLit lit -> nest 4 $ text "const" <+> pprLit lit <> semi
+ CmmUninitialised i -> nest 4 $ text "I8" <> brackets (int i)
+ CmmString s' -> nest 4 $ text "I8[]" <+> text (show s')
+
+-- --------------------------------------------------------------------------
+-- data sections
+--
+pprSection :: Section -> SDoc
+pprSection (Section t suffix) =
+ section <+> doubleQuotes (pprSectionType t <+> char '.' <+> ppr suffix)
+ where
+ section = text "section"
+
+pprSectionType :: SectionType -> SDoc
+pprSectionType s = doubleQuotes (ptext t)
+ where
+ t = case s of
+ Text -> sLit "text"
+ Data -> sLit "data"
+ ReadOnlyData -> sLit "readonly"
+ ReadOnlyData16 -> sLit "readonly16"
+ RelocatableReadOnlyData
+ -> sLit "relreadonly"
+ UninitialisedData -> sLit "uninitialised"
+ CString -> sLit "cstring"
+ OtherSection s' -> sLit s' -- Not actually a literal though.
diff --git a/compiler/GHC/Cmm/Ppr/Expr.hs b/compiler/GHC/Cmm/Ppr/Expr.hs
new file mode 100644
index 0000000000..53a335e561
--- /dev/null
+++ b/compiler/GHC/Cmm/Ppr/Expr.hs
@@ -0,0 +1,286 @@
+----------------------------------------------------------------------------
+--
+-- Pretty-printing of common Cmm types
+--
+-- (c) The University of Glasgow 2004-2006
+--
+-----------------------------------------------------------------------------
+
+--
+-- This is where we walk over Cmm emitting an external representation,
+-- suitable for parsing, in a syntax strongly reminiscent of C--. This
+-- is the "External Core" for the Cmm layer.
+--
+-- As such, this should be a well-defined syntax: we want it to look nice.
+-- Thus, we try wherever possible to use syntax defined in [1],
+-- "The C-- Reference Manual", http://www.cs.tufts.edu/~nr/c--/index.html. We
+-- differ slightly, in some cases. For one, we use I8 .. I64 for types, rather
+-- than C--'s bits8 .. bits64.
+--
+-- We try to ensure that all information available in the abstract
+-- syntax is reproduced, or reproducible, in the concrete syntax.
+-- Data that is not in printed out can be reconstructed according to
+-- conventions used in the pretty printer. There are at least two such
+-- cases:
+-- 1) if a value has wordRep type, the type is not appended in the
+-- output.
+-- 2) MachOps that operate over wordRep type are printed in a
+-- C-style, rather than as their internal MachRep name.
+--
+-- These conventions produce much more readable Cmm output.
+--
+-- A useful example pass over Cmm is in nativeGen/MachCodeGen.hs
+--
+
+{-# OPTIONS_GHC -fno-warn-orphans #-}
+module GHC.Cmm.Ppr.Expr
+ ( pprExpr, pprLit
+ )
+where
+
+import GhcPrelude
+
+import GHC.Cmm.Expr
+
+import Outputable
+import DynFlags
+
+import Data.Maybe
+import Numeric ( fromRat )
+
+-----------------------------------------------------------------------------
+
+instance Outputable CmmExpr where
+ ppr e = pprExpr e
+
+instance Outputable CmmReg where
+ ppr e = pprReg e
+
+instance Outputable CmmLit where
+ ppr l = pprLit l
+
+instance Outputable LocalReg where
+ ppr e = pprLocalReg e
+
+instance Outputable Area where
+ ppr e = pprArea e
+
+instance Outputable GlobalReg where
+ ppr e = pprGlobalReg e
+
+-- --------------------------------------------------------------------------
+-- Expressions
+--
+
+pprExpr :: CmmExpr -> SDoc
+pprExpr e
+ = sdocWithDynFlags $ \dflags ->
+ case e of
+ CmmRegOff reg i ->
+ pprExpr (CmmMachOp (MO_Add rep)
+ [CmmReg reg, CmmLit (CmmInt (fromIntegral i) rep)])
+ where rep = typeWidth (cmmRegType dflags reg)
+ CmmLit lit -> pprLit lit
+ _other -> pprExpr1 e
+
+-- Here's the precedence table from GHC.Cmm.Parser:
+-- %nonassoc '>=' '>' '<=' '<' '!=' '=='
+-- %left '|'
+-- %left '^'
+-- %left '&'
+-- %left '>>' '<<'
+-- %left '-' '+'
+-- %left '/' '*' '%'
+-- %right '~'
+
+-- We just cope with the common operators for now, the rest will get
+-- a default conservative behaviour.
+
+-- %nonassoc '>=' '>' '<=' '<' '!=' '=='
+pprExpr1, pprExpr7, pprExpr8 :: CmmExpr -> SDoc
+pprExpr1 (CmmMachOp op [x,y]) | Just doc <- infixMachOp1 op
+ = pprExpr7 x <+> doc <+> pprExpr7 y
+pprExpr1 e = pprExpr7 e
+
+infixMachOp1, infixMachOp7, infixMachOp8 :: MachOp -> Maybe SDoc
+
+infixMachOp1 (MO_Eq _) = Just (text "==")
+infixMachOp1 (MO_Ne _) = Just (text "!=")
+infixMachOp1 (MO_Shl _) = Just (text "<<")
+infixMachOp1 (MO_U_Shr _) = Just (text ">>")
+infixMachOp1 (MO_U_Ge _) = Just (text ">=")
+infixMachOp1 (MO_U_Le _) = Just (text "<=")
+infixMachOp1 (MO_U_Gt _) = Just (char '>')
+infixMachOp1 (MO_U_Lt _) = Just (char '<')
+infixMachOp1 _ = Nothing
+
+-- %left '-' '+'
+pprExpr7 (CmmMachOp (MO_Add rep1) [x, CmmLit (CmmInt i rep2)]) | i < 0
+ = pprExpr7 (CmmMachOp (MO_Sub rep1) [x, CmmLit (CmmInt (negate i) rep2)])
+pprExpr7 (CmmMachOp op [x,y]) | Just doc <- infixMachOp7 op
+ = pprExpr7 x <+> doc <+> pprExpr8 y
+pprExpr7 e = pprExpr8 e
+
+infixMachOp7 (MO_Add _) = Just (char '+')
+infixMachOp7 (MO_Sub _) = Just (char '-')
+infixMachOp7 _ = Nothing
+
+-- %left '/' '*' '%'
+pprExpr8 (CmmMachOp op [x,y]) | Just doc <- infixMachOp8 op
+ = pprExpr8 x <+> doc <+> pprExpr9 y
+pprExpr8 e = pprExpr9 e
+
+infixMachOp8 (MO_U_Quot _) = Just (char '/')
+infixMachOp8 (MO_Mul _) = Just (char '*')
+infixMachOp8 (MO_U_Rem _) = Just (char '%')
+infixMachOp8 _ = Nothing
+
+pprExpr9 :: CmmExpr -> SDoc
+pprExpr9 e =
+ case e of
+ CmmLit lit -> pprLit1 lit
+ CmmLoad expr rep -> ppr rep <> brackets (ppr expr)
+ CmmReg reg -> ppr reg
+ CmmRegOff reg off -> parens (ppr reg <+> char '+' <+> int off)
+ CmmStackSlot a off -> parens (ppr a <+> char '+' <+> int off)
+ CmmMachOp mop args -> genMachOp mop args
+
+genMachOp :: MachOp -> [CmmExpr] -> SDoc
+genMachOp mop args
+ | Just doc <- infixMachOp mop = case args of
+ -- dyadic
+ [x,y] -> pprExpr9 x <+> doc <+> pprExpr9 y
+
+ -- unary
+ [x] -> doc <> pprExpr9 x
+
+ _ -> pprTrace "GHC.Cmm.Ppr.Expr.genMachOp: machop with strange number of args"
+ (pprMachOp mop <+>
+ parens (hcat $ punctuate comma (map pprExpr args)))
+ empty
+
+ | isJust (infixMachOp1 mop)
+ || isJust (infixMachOp7 mop)
+ || isJust (infixMachOp8 mop) = parens (pprExpr (CmmMachOp mop args))
+
+ | otherwise = char '%' <> ppr_op <> parens (commafy (map pprExpr args))
+ where ppr_op = text (map (\c -> if c == ' ' then '_' else c)
+ (show mop))
+ -- replace spaces in (show mop) with underscores,
+
+--
+-- Unsigned ops on the word size of the machine get nice symbols.
+-- All else get dumped in their ugly format.
+--
+infixMachOp :: MachOp -> Maybe SDoc
+infixMachOp mop
+ = case mop of
+ MO_And _ -> Just $ char '&'
+ MO_Or _ -> Just $ char '|'
+ MO_Xor _ -> Just $ char '^'
+ MO_Not _ -> Just $ char '~'
+ MO_S_Neg _ -> Just $ char '-' -- there is no unsigned neg :)
+ _ -> Nothing
+
+-- --------------------------------------------------------------------------
+-- Literals.
+-- To minimise line noise we adopt the convention that if the literal
+-- has the natural machine word size, we do not append the type
+--
+pprLit :: CmmLit -> SDoc
+pprLit lit = sdocWithDynFlags $ \dflags ->
+ case lit of
+ CmmInt i rep ->
+ hcat [ (if i < 0 then parens else id)(integer i)
+ , ppUnless (rep == wordWidth dflags) $
+ space <> dcolon <+> ppr rep ]
+
+ CmmFloat f rep -> hsep [ double (fromRat f), dcolon, ppr rep ]
+ CmmVec lits -> char '<' <> commafy (map pprLit lits) <> char '>'
+ CmmLabel clbl -> ppr clbl
+ CmmLabelOff clbl i -> ppr clbl <> ppr_offset i
+ CmmLabelDiffOff clbl1 clbl2 i _ -> ppr clbl1 <> char '-'
+ <> ppr clbl2 <> ppr_offset i
+ CmmBlock id -> ppr id
+ CmmHighStackMark -> text "<highSp>"
+
+pprLit1 :: CmmLit -> SDoc
+pprLit1 lit@(CmmLabelOff {}) = parens (pprLit lit)
+pprLit1 lit = pprLit lit
+
+ppr_offset :: Int -> SDoc
+ppr_offset i
+ | i==0 = empty
+ | i>=0 = char '+' <> int i
+ | otherwise = char '-' <> int (-i)
+
+-- --------------------------------------------------------------------------
+-- Registers, whether local (temps) or global
+--
+pprReg :: CmmReg -> SDoc
+pprReg r
+ = case r of
+ CmmLocal local -> pprLocalReg local
+ CmmGlobal global -> pprGlobalReg global
+
+--
+-- We only print the type of the local reg if it isn't wordRep
+--
+pprLocalReg :: LocalReg -> SDoc
+pprLocalReg (LocalReg uniq rep) = sdocWithDynFlags $ \dflags ->
+-- = ppr rep <> char '_' <> ppr uniq
+-- Temp Jan08
+ char '_' <> pprUnique dflags uniq <>
+ (if isWord32 rep -- && not (isGcPtrType rep) -- Temp Jan08 -- sigh
+ then dcolon <> ptr <> ppr rep
+ else dcolon <> ptr <> ppr rep)
+ where
+ pprUnique dflags unique =
+ if gopt Opt_SuppressUniques dflags
+ then text "_locVar_"
+ else ppr unique
+ ptr = empty
+ --if isGcPtrType rep
+ -- then doubleQuotes (text "ptr")
+ -- else empty
+
+-- Stack areas
+pprArea :: Area -> SDoc
+pprArea Old = text "old"
+pprArea (Young id) = hcat [ text "young<", ppr id, text ">" ]
+
+-- needs to be kept in syn with CmmExpr.hs.GlobalReg
+--
+pprGlobalReg :: GlobalReg -> SDoc
+pprGlobalReg gr
+ = case gr of
+ VanillaReg n _ -> char 'R' <> int n
+-- Temp Jan08
+-- VanillaReg n VNonGcPtr -> char 'R' <> int n
+-- VanillaReg n VGcPtr -> char 'P' <> int n
+ FloatReg n -> char 'F' <> int n
+ DoubleReg n -> char 'D' <> int n
+ LongReg n -> char 'L' <> int n
+ XmmReg n -> text "XMM" <> int n
+ YmmReg n -> text "YMM" <> int n
+ ZmmReg n -> text "ZMM" <> int n
+ Sp -> text "Sp"
+ SpLim -> text "SpLim"
+ Hp -> text "Hp"
+ HpLim -> text "HpLim"
+ MachSp -> text "MachSp"
+ UnwindReturnReg-> text "UnwindReturnReg"
+ CCCS -> text "CCCS"
+ CurrentTSO -> text "CurrentTSO"
+ CurrentNursery -> text "CurrentNursery"
+ HpAlloc -> text "HpAlloc"
+ EagerBlackholeInfo -> text "stg_EAGER_BLACKHOLE_info"
+ GCEnter1 -> text "stg_gc_enter_1"
+ GCFun -> text "stg_gc_fun"
+ BaseReg -> text "BaseReg"
+ PicBaseReg -> text "PicBaseReg"
+
+-----------------------------------------------------------------------------
+
+commafy :: [SDoc] -> SDoc
+commafy xs = fsep $ punctuate comma xs
diff --git a/compiler/GHC/Cmm/ProcPoint.hs b/compiler/GHC/Cmm/ProcPoint.hs
new file mode 100644
index 0000000000..00a7a73d89
--- /dev/null
+++ b/compiler/GHC/Cmm/ProcPoint.hs
@@ -0,0 +1,496 @@
+{-# LANGUAGE GADTs, DisambiguateRecordFields, BangPatterns #-}
+
+module GHC.Cmm.ProcPoint
+ ( ProcPointSet, Status(..)
+ , callProcPoints, minimalProcPointSet
+ , splitAtProcPoints, procPointAnalysis
+ , attachContInfoTables
+ )
+where
+
+import GhcPrelude hiding (last, unzip, succ, zip)
+
+import DynFlags
+import GHC.Cmm.BlockId
+import GHC.Cmm.CLabel
+import GHC.Cmm
+import GHC.Cmm.Ppr () -- For Outputable instances
+import GHC.Cmm.Utils
+import GHC.Cmm.Info
+import GHC.Cmm.Liveness
+import GHC.Cmm.Switch
+import Data.List (sortBy)
+import Maybes
+import Control.Monad
+import Outputable
+import GHC.Platform
+import UniqSupply
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow
+import GHC.Cmm.Dataflow.Graph
+import GHC.Cmm.Dataflow.Label
+
+-- Compute a minimal set of proc points for a control-flow graph.
+
+-- Determine a protocol for each proc point (which live variables will
+-- be passed as arguments and which will be on the stack).
+
+{-
+A proc point is a basic block that, after CPS transformation, will
+start a new function. The entry block of the original function is a
+proc point, as is the continuation of each function call.
+A third kind of proc point arises if we want to avoid copying code.
+Suppose we have code like the following:
+
+ f() {
+ if (...) { ..1..; call foo(); ..2..}
+ else { ..3..; call bar(); ..4..}
+ x = y + z;
+ return x;
+ }
+
+The statement 'x = y + z' can be reached from two different proc
+points: the continuations of foo() and bar(). We would prefer not to
+put a copy in each continuation; instead we would like 'x = y + z' to
+be the start of a new procedure to which the continuations can jump:
+
+ f_cps () {
+ if (...) { ..1..; push k_foo; jump foo_cps(); }
+ else { ..3..; push k_bar; jump bar_cps(); }
+ }
+ k_foo() { ..2..; jump k_join(y, z); }
+ k_bar() { ..4..; jump k_join(y, z); }
+ k_join(y, z) { x = y + z; return x; }
+
+You might think then that a criterion to make a node a proc point is
+that it is directly reached by two distinct proc points. (Note
+[Direct reachability].) But this criterion is a bit too simple; for
+example, 'return x' is also reached by two proc points, yet there is
+no point in pulling it out of k_join. A good criterion would be to
+say that a node should be made a proc point if it is reached by a set
+of proc points that is different than its immediate dominator. NR
+believes this criterion can be shown to produce a minimum set of proc
+points, and given a dominator tree, the proc points can be chosen in
+time linear in the number of blocks. Lacking a dominator analysis,
+however, we turn instead to an iterative solution, starting with no
+proc points and adding them according to these rules:
+
+ 1. The entry block is a proc point.
+ 2. The continuation of a call is a proc point.
+ 3. A node is a proc point if it is directly reached by more proc
+ points than one of its predecessors.
+
+Because we don't understand the problem very well, we apply rule 3 at
+most once per iteration, then recompute the reachability information.
+(See Note [No simple dataflow].) The choice of the new proc point is
+arbitrary, and I don't know if the choice affects the final solution,
+so I don't know if the number of proc points chosen is the
+minimum---but the set will be minimal.
+
+
+
+Note [Proc-point analysis]
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Given a specified set of proc-points (a set of block-ids), "proc-point
+analysis" figures out, for every block, which proc-point it belongs to.
+All the blocks belonging to proc-point P will constitute a single
+top-level C procedure.
+
+A non-proc-point block B "belongs to" a proc-point P iff B is
+reachable from P without going through another proc-point.
+
+Invariant: a block B should belong to at most one proc-point; if it
+belongs to two, that's a bug.
+
+Note [Non-existing proc-points]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+On some architectures it might happen that the list of proc-points
+computed before stack layout pass will be invalidated by the stack
+layout. This will happen if stack layout removes from the graph
+blocks that were determined to be proc-points. Later on in the pipeline
+we use list of proc-points to perform [Proc-point analysis], but
+if a proc-point does not exist anymore then we will get compiler panic.
+See #8205.
+-}
+
+type ProcPointSet = LabelSet
+
+data Status
+ = ReachedBy ProcPointSet -- set of proc points that directly reach the block
+ | ProcPoint -- this block is itself a proc point
+
+instance Outputable Status where
+ ppr (ReachedBy ps)
+ | setNull ps = text "<not-reached>"
+ | otherwise = text "reached by" <+>
+ (hsep $ punctuate comma $ map ppr $ setElems ps)
+ ppr ProcPoint = text "<procpt>"
+
+--------------------------------------------------
+-- Proc point analysis
+
+-- Once you know what the proc-points are, figure out
+-- what proc-points each block is reachable from
+-- See Note [Proc-point analysis]
+procPointAnalysis :: ProcPointSet -> CmmGraph -> LabelMap Status
+procPointAnalysis procPoints cmmGraph@(CmmGraph {g_graph = graph}) =
+ analyzeCmmFwd procPointLattice procPointTransfer cmmGraph initProcPoints
+ where
+ initProcPoints =
+ mkFactBase
+ procPointLattice
+ [ (id, ProcPoint)
+ | id <- setElems procPoints
+ -- See Note [Non-existing proc-points]
+ , id `setMember` labelsInGraph
+ ]
+ labelsInGraph = labelsDefined graph
+
+procPointTransfer :: TransferFun Status
+procPointTransfer block facts =
+ let label = entryLabel block
+ !fact = case getFact procPointLattice label facts of
+ ProcPoint -> ReachedBy $! setSingleton label
+ f -> f
+ result = map (\id -> (id, fact)) (successors block)
+ in mkFactBase procPointLattice result
+
+procPointLattice :: DataflowLattice Status
+procPointLattice = DataflowLattice unreached add_to
+ where
+ unreached = ReachedBy setEmpty
+ add_to (OldFact ProcPoint) _ = NotChanged ProcPoint
+ add_to _ (NewFact ProcPoint) = Changed ProcPoint -- because of previous case
+ add_to (OldFact (ReachedBy p)) (NewFact (ReachedBy p'))
+ | setSize union > setSize p = Changed (ReachedBy union)
+ | otherwise = NotChanged (ReachedBy p)
+ where
+ union = setUnion p' p
+
+----------------------------------------------------------------------
+
+-- It is worth distinguishing two sets of proc points: those that are
+-- induced by calls in the original graph and those that are
+-- introduced because they're reachable from multiple proc points.
+--
+-- Extract the set of Continuation BlockIds, see Note [Continuation BlockIds].
+callProcPoints :: CmmGraph -> ProcPointSet
+callProcPoints g = foldlGraphBlocks add (setSingleton (g_entry g)) g
+ where add :: LabelSet -> CmmBlock -> LabelSet
+ add set b = case lastNode b of
+ CmmCall {cml_cont = Just k} -> setInsert k set
+ CmmForeignCall {succ=k} -> setInsert k set
+ _ -> set
+
+minimalProcPointSet :: Platform -> ProcPointSet -> CmmGraph
+ -> UniqSM ProcPointSet
+-- Given the set of successors of calls (which must be proc-points)
+-- figure out the minimal set of necessary proc-points
+minimalProcPointSet platform callProcPoints g
+ = extendPPSet platform g (revPostorder g) callProcPoints
+
+extendPPSet
+ :: Platform -> CmmGraph -> [CmmBlock] -> ProcPointSet -> UniqSM ProcPointSet
+extendPPSet platform g blocks procPoints =
+ let env = procPointAnalysis procPoints g
+ add pps block = let id = entryLabel block
+ in case mapLookup id env of
+ Just ProcPoint -> setInsert id pps
+ _ -> pps
+ procPoints' = foldlGraphBlocks add setEmpty g
+ newPoints = mapMaybe ppSuccessor blocks
+ newPoint = listToMaybe newPoints
+ ppSuccessor b =
+ let nreached id = case mapLookup id env `orElse`
+ pprPanic "no ppt" (ppr id <+> ppr b) of
+ ProcPoint -> 1
+ ReachedBy ps -> setSize ps
+ block_procpoints = nreached (entryLabel b)
+ -- | Looking for a successor of b that is reached by
+ -- more proc points than b and is not already a proc
+ -- point. If found, it can become a proc point.
+ newId succ_id = not (setMember succ_id procPoints') &&
+ nreached succ_id > block_procpoints
+ in listToMaybe $ filter newId $ successors b
+
+ in case newPoint of
+ Just id ->
+ if setMember id procPoints'
+ then panic "added old proc pt"
+ else extendPPSet platform g blocks (setInsert id procPoints')
+ Nothing -> return procPoints'
+
+
+-- At this point, we have found a set of procpoints, each of which should be
+-- the entry point of a procedure.
+-- Now, we create the procedure for each proc point,
+-- which requires that we:
+-- 1. build a map from proc points to the blocks reachable from the proc point
+-- 2. turn each branch to a proc point into a jump
+-- 3. turn calls and returns into jumps
+-- 4. build info tables for the procedures -- and update the info table for
+-- the SRTs in the entry procedure as well.
+-- Input invariant: A block should only be reachable from a single ProcPoint.
+-- ToDo: use the _ret naming convention that the old code generator
+-- used. -- EZY
+splitAtProcPoints :: DynFlags -> CLabel -> ProcPointSet-> ProcPointSet -> LabelMap Status ->
+ CmmDecl -> UniqSM [CmmDecl]
+splitAtProcPoints dflags entry_label callPPs procPoints procMap
+ (CmmProc (TopInfo {info_tbls = info_tbls})
+ top_l _ g@(CmmGraph {g_entry=entry})) =
+ do -- Build a map from procpoints to the blocks they reach
+ let add_block
+ :: LabelMap (LabelMap CmmBlock)
+ -> CmmBlock
+ -> LabelMap (LabelMap CmmBlock)
+ add_block graphEnv b =
+ case mapLookup bid procMap of
+ Just ProcPoint -> add graphEnv bid bid b
+ Just (ReachedBy set) ->
+ case setElems set of
+ [] -> graphEnv
+ [id] -> add graphEnv id bid b
+ _ -> panic "Each block should be reachable from only one ProcPoint"
+ Nothing -> graphEnv
+ where bid = entryLabel b
+ add graphEnv procId bid b = mapInsert procId graph' graphEnv
+ where graph = mapLookup procId graphEnv `orElse` mapEmpty
+ graph' = mapInsert bid b graph
+
+ let liveness = cmmGlobalLiveness dflags g
+ let ppLiveness pp = filter isArgReg $
+ regSetToList $
+ expectJust "ppLiveness" $ mapLookup pp liveness
+
+ graphEnv <- return $ foldlGraphBlocks add_block mapEmpty g
+
+ -- Build a map from proc point BlockId to pairs of:
+ -- * Labels for their new procedures
+ -- * Labels for the info tables of their new procedures (only if
+ -- the proc point is a callPP)
+ -- Due to common blockification, we may overestimate the set of procpoints.
+ let add_label map pp = mapInsert pp lbls map
+ where lbls | pp == entry = (entry_label, fmap cit_lbl (mapLookup entry info_tbls))
+ | otherwise = (block_lbl, guard (setMember pp callPPs) >>
+ Just info_table_lbl)
+ where block_lbl = blockLbl pp
+ info_table_lbl = infoTblLbl pp
+
+ procLabels :: LabelMap (CLabel, Maybe CLabel)
+ procLabels = foldl' add_label mapEmpty
+ (filter (flip mapMember (toBlockMap g)) (setElems procPoints))
+
+ -- In each new graph, add blocks jumping off to the new procedures,
+ -- and replace branches to procpoints with branches to the jump-off blocks
+ let add_jump_block
+ :: (LabelMap Label, [CmmBlock])
+ -> (Label, CLabel)
+ -> UniqSM (LabelMap Label, [CmmBlock])
+ add_jump_block (env, bs) (pp, l) =
+ do bid <- liftM mkBlockId getUniqueM
+ let b = blockJoin (CmmEntry bid GlobalScope) emptyBlock jump
+ live = ppLiveness pp
+ jump = CmmCall (CmmLit (CmmLabel l)) Nothing live 0 0 0
+ return (mapInsert pp bid env, b : bs)
+
+ add_jumps
+ :: LabelMap CmmGraph
+ -> (Label, LabelMap CmmBlock)
+ -> UniqSM (LabelMap CmmGraph)
+ add_jumps newGraphEnv (ppId, blockEnv) =
+ do let needed_jumps = -- find which procpoints we currently branch to
+ mapFoldr add_if_branch_to_pp [] blockEnv
+ add_if_branch_to_pp :: CmmBlock -> [(BlockId, CLabel)] -> [(BlockId, CLabel)]
+ add_if_branch_to_pp block rst =
+ case lastNode block of
+ CmmBranch id -> add_if_pp id rst
+ CmmCondBranch _ ti fi _ -> add_if_pp ti (add_if_pp fi rst)
+ CmmSwitch _ ids -> foldr add_if_pp rst $ switchTargetsToList ids
+ _ -> rst
+
+ -- when jumping to a PP that has an info table, if
+ -- tablesNextToCode is off we must jump to the entry
+ -- label instead.
+ jump_label (Just info_lbl) _
+ | tablesNextToCode dflags = info_lbl
+ | otherwise = toEntryLbl info_lbl
+ jump_label Nothing block_lbl = block_lbl
+
+ add_if_pp id rst = case mapLookup id procLabels of
+ Just (lbl, mb_info_lbl) -> (id, jump_label mb_info_lbl lbl) : rst
+ Nothing -> rst
+ (jumpEnv, jumpBlocks) <-
+ foldM add_jump_block (mapEmpty, []) needed_jumps
+ -- update the entry block
+ let b = expectJust "block in env" $ mapLookup ppId blockEnv
+ blockEnv' = mapInsert ppId b blockEnv
+ -- replace branches to procpoints with branches to jumps
+ blockEnv'' = toBlockMap $ replaceBranches jumpEnv $ ofBlockMap ppId blockEnv'
+ -- add the jump blocks to the graph
+ blockEnv''' = foldl' (flip addBlock) blockEnv'' jumpBlocks
+ let g' = ofBlockMap ppId blockEnv'''
+ -- pprTrace "g' pre jumps" (ppr g') $ do
+ return (mapInsert ppId g' newGraphEnv)
+
+ graphEnv <- foldM add_jumps mapEmpty $ mapToList graphEnv
+
+ let to_proc (bid, g)
+ | bid == entry
+ = CmmProc (TopInfo {info_tbls = info_tbls,
+ stack_info = stack_info})
+ top_l live g'
+ | otherwise
+ = case expectJust "pp label" $ mapLookup bid procLabels of
+ (lbl, Just info_lbl)
+ -> CmmProc (TopInfo { info_tbls = mapSingleton (g_entry g) (mkEmptyContInfoTable info_lbl)
+ , stack_info=stack_info})
+ lbl live g'
+ (lbl, Nothing)
+ -> CmmProc (TopInfo {info_tbls = mapEmpty, stack_info=stack_info})
+ lbl live g'
+ where
+ g' = replacePPIds g
+ live = ppLiveness (g_entry g')
+ stack_info = StackInfo { arg_space = 0
+ , updfr_space = Nothing
+ , do_layout = True }
+ -- cannot use panic, this is printed by -ddump-cmm
+
+ -- References to procpoint IDs can now be replaced with the
+ -- infotable's label
+ replacePPIds g = {-# SCC "replacePPIds" #-}
+ mapGraphNodes (id, mapExp repl, mapExp repl) g
+ where repl e@(CmmLit (CmmBlock bid)) =
+ case mapLookup bid procLabels of
+ Just (_, Just info_lbl) -> CmmLit (CmmLabel info_lbl)
+ _ -> e
+ repl e = e
+
+ -- The C back end expects to see return continuations before the
+ -- call sites. Here, we sort them in reverse order -- it gets
+ -- reversed later.
+ let (_, block_order) =
+ foldl' add_block_num (0::Int, mapEmpty :: LabelMap Int)
+ (revPostorder g)
+ add_block_num (i, map) block =
+ (i + 1, mapInsert (entryLabel block) i map)
+ sort_fn (bid, _) (bid', _) =
+ compare (expectJust "block_order" $ mapLookup bid block_order)
+ (expectJust "block_order" $ mapLookup bid' block_order)
+ procs <- return $ map to_proc $ sortBy sort_fn $ mapToList graphEnv
+ return -- pprTrace "procLabels" (ppr procLabels)
+ -- pprTrace "splitting graphs" (ppr procs)
+ procs
+splitAtProcPoints _ _ _ _ _ t@(CmmData _ _) = return [t]
+
+-- Only called from GHC.Cmm.ProcPoint.splitAtProcPoints. NB. does a
+-- recursive lookup, see comment below.
+replaceBranches :: LabelMap BlockId -> CmmGraph -> CmmGraph
+replaceBranches env cmmg
+ = {-# SCC "replaceBranches" #-}
+ ofBlockMap (g_entry cmmg) $ mapMap f $ toBlockMap cmmg
+ where
+ f block = replaceLastNode block $ last (lastNode block)
+
+ last :: CmmNode O C -> CmmNode O C
+ last (CmmBranch id) = CmmBranch (lookup id)
+ last (CmmCondBranch e ti fi l) = CmmCondBranch e (lookup ti) (lookup fi) l
+ last (CmmSwitch e ids) = CmmSwitch e (mapSwitchTargets lookup ids)
+ last l@(CmmCall {}) = l { cml_cont = Nothing }
+ -- NB. remove the continuation of a CmmCall, since this
+ -- label will now be in a different CmmProc. Not only
+ -- is this tidier, it stops CmmLint from complaining.
+ last l@(CmmForeignCall {}) = l
+ lookup id = fmap lookup (mapLookup id env) `orElse` id
+ -- XXX: this is a recursive lookup, it follows chains
+ -- until the lookup returns Nothing, at which point we
+ -- return the last BlockId
+
+-- --------------------------------------------------------------
+-- Not splitting proc points: add info tables for continuations
+
+attachContInfoTables :: ProcPointSet -> CmmDecl -> CmmDecl
+attachContInfoTables call_proc_points (CmmProc top_info top_l live g)
+ = CmmProc top_info{info_tbls = info_tbls'} top_l live g
+ where
+ info_tbls' = mapUnion (info_tbls top_info) $
+ mapFromList [ (l, mkEmptyContInfoTable (infoTblLbl l))
+ | l <- setElems call_proc_points
+ , l /= g_entry g ]
+attachContInfoTables _ other_decl
+ = other_decl
+
+----------------------------------------------------------------
+
+{-
+Note [Direct reachability]
+
+Block B is directly reachable from proc point P iff control can flow
+from P to B without passing through an intervening proc point.
+-}
+
+----------------------------------------------------------------
+
+{-
+Note [No simple dataflow]
+
+Sadly, it seems impossible to compute the proc points using a single
+dataflow pass. One might attempt to use this simple lattice:
+
+ data Location = Unknown
+ | InProc BlockId -- node is in procedure headed by the named proc point
+ | ProcPoint -- node is itself a proc point
+
+At a join, a node in two different blocks becomes a proc point.
+The difficulty is that the change of information during iterative
+computation may promote a node prematurely. Here's a program that
+illustrates the difficulty:
+
+ f () {
+ entry:
+ ....
+ L1:
+ if (...) { ... }
+ else { ... }
+
+ L2: if (...) { g(); goto L1; }
+ return x + y;
+ }
+
+The only proc-point needed (besides the entry) is L1. But in an
+iterative analysis, consider what happens to L2. On the first pass
+through, it rises from Unknown to 'InProc entry', but when L1 is
+promoted to a proc point (because it's the successor of g()), L1's
+successors will be promoted to 'InProc L1'. The problem hits when the
+new fact 'InProc L1' flows into L2 which is already bound to 'InProc entry'.
+The join operation makes it a proc point when in fact it needn't be,
+because its immediate dominator L1 is already a proc point and there
+are no other proc points that directly reach L2.
+-}
+
+
+
+{- Note [Separate Adams optimization]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+It may be worthwhile to attempt the Adams optimization by rewriting
+the graph before the assignment of proc-point protocols. Here are a
+couple of rules:
+
+ g() returns to k; g() returns to L;
+ k: CopyIn c ress; goto L:
+ ... ==> ...
+ L: // no CopyIn node here L: CopyIn c ress;
+
+
+And when c == c' and ress == ress', this also:
+
+ g() returns to k; g() returns to L;
+ k: CopyIn c ress; goto L:
+ ... ==> ...
+ L: CopyIn c' ress' L: CopyIn c' ress' ;
+
+In both cases the goal is to eliminate k.
+-}
diff --git a/compiler/GHC/Cmm/Sink.hs b/compiler/GHC/Cmm/Sink.hs
new file mode 100644
index 0000000000..8e231df300
--- /dev/null
+++ b/compiler/GHC/Cmm/Sink.hs
@@ -0,0 +1,854 @@
+{-# LANGUAGE GADTs #-}
+module GHC.Cmm.Sink (
+ cmmSink
+ ) where
+
+import GhcPrelude
+
+import GHC.Cmm
+import GHC.Cmm.Opt
+import GHC.Cmm.Liveness
+import GHC.Cmm.Utils
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Label
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow.Graph
+import GHC.Platform.Regs
+import GHC.Platform (isARM, platformArch)
+
+import DynFlags
+import Unique
+import UniqFM
+
+import qualified Data.IntSet as IntSet
+import Data.List (partition)
+import qualified Data.Set as Set
+import Data.Maybe
+
+-- Compact sets for membership tests of local variables.
+
+type LRegSet = IntSet.IntSet
+
+emptyLRegSet :: LRegSet
+emptyLRegSet = IntSet.empty
+
+nullLRegSet :: LRegSet -> Bool
+nullLRegSet = IntSet.null
+
+insertLRegSet :: LocalReg -> LRegSet -> LRegSet
+insertLRegSet l = IntSet.insert (getKey (getUnique l))
+
+elemLRegSet :: LocalReg -> LRegSet -> Bool
+elemLRegSet l = IntSet.member (getKey (getUnique l))
+
+-- -----------------------------------------------------------------------------
+-- Sinking and inlining
+
+-- This is an optimisation pass that
+-- (a) moves assignments closer to their uses, to reduce register pressure
+-- (b) pushes assignments into a single branch of a conditional if possible
+-- (c) inlines assignments to registers that are mentioned only once
+-- (d) discards dead assignments
+--
+-- This tightens up lots of register-heavy code. It is particularly
+-- helpful in the Cmm generated by the Stg->Cmm code generator, in
+-- which every function starts with a copyIn sequence like:
+--
+-- x1 = R1
+-- x2 = Sp[8]
+-- x3 = Sp[16]
+-- if (Sp - 32 < SpLim) then L1 else L2
+--
+-- we really want to push the x1..x3 assignments into the L2 branch.
+--
+-- Algorithm:
+--
+-- * Start by doing liveness analysis.
+--
+-- * Keep a list of assignments A; earlier ones may refer to later ones.
+-- Currently we only sink assignments to local registers, because we don't
+-- have liveness information about global registers.
+--
+-- * Walk forwards through the graph, look at each node N:
+--
+-- * If it is a dead assignment, i.e. assignment to a register that is
+-- not used after N, discard it.
+--
+-- * Try to inline based on current list of assignments
+-- * If any assignments in A (1) occur only once in N, and (2) are
+-- not live after N, inline the assignment and remove it
+-- from A.
+--
+-- * If an assignment in A is cheap (RHS is local register), then
+-- inline the assignment and keep it in A in case it is used afterwards.
+--
+-- * Otherwise don't inline.
+--
+-- * If N is assignment to a local register pick up the assignment
+-- and add it to A.
+--
+-- * If N is not an assignment to a local register:
+-- * remove any assignments from A that conflict with N, and
+-- place them before N in the current block. We call this
+-- "dropping" the assignments.
+--
+-- * An assignment conflicts with N if it:
+-- - assigns to a register mentioned in N
+-- - mentions a register assigned by N
+-- - reads from memory written by N
+-- * do this recursively, dropping dependent assignments
+--
+-- * At an exit node:
+-- * drop any assignments that are live on more than one successor
+-- and are not trivial
+-- * if any successor has more than one predecessor (a join-point),
+-- drop everything live in that successor. Since we only propagate
+-- assignments that are not dead at the successor, we will therefore
+-- eliminate all assignments dead at this point. Thus analysis of a
+-- join-point will always begin with an empty list of assignments.
+--
+--
+-- As a result of above algorithm, sinking deletes some dead assignments
+-- (transitively, even). This isn't as good as removeDeadAssignments,
+-- but it's much cheaper.
+
+-- -----------------------------------------------------------------------------
+-- things that we aren't optimising very well yet.
+--
+-- -----------
+-- (1) From GHC's FastString.hashStr:
+--
+-- s2ay:
+-- if ((_s2an::I64 == _s2ao::I64) >= 1) goto c2gn; else goto c2gp;
+-- c2gn:
+-- R1 = _s2au::I64;
+-- call (I64[Sp])(R1) args: 8, res: 0, upd: 8;
+-- c2gp:
+-- _s2cO::I64 = %MO_S_Rem_W64(%MO_UU_Conv_W8_W64(I8[_s2aq::I64 + (_s2an::I64 << 0)]) + _s2au::I64 * 128,
+-- 4091);
+-- _s2an::I64 = _s2an::I64 + 1;
+-- _s2au::I64 = _s2cO::I64;
+-- goto s2ay;
+--
+-- a nice loop, but we didn't eliminate the silly assignment at the end.
+-- See Note [dependent assignments], which would probably fix this.
+-- This is #8336.
+--
+-- -----------
+-- (2) From stg_atomically_frame in PrimOps.cmm
+--
+-- We have a diamond control flow:
+--
+-- x = ...
+-- |
+-- / \
+-- A B
+-- \ /
+-- |
+-- use of x
+--
+-- Now x won't be sunk down to its use, because we won't push it into
+-- both branches of the conditional. We certainly do have to check
+-- that we can sink it past all the code in both A and B, but having
+-- discovered that, we could sink it to its use.
+--
+
+-- -----------------------------------------------------------------------------
+
+type Assignment = (LocalReg, CmmExpr, AbsMem)
+ -- Assignment caches AbsMem, an abstraction of the memory read by
+ -- the RHS of the assignment.
+
+type Assignments = [Assignment]
+ -- A sequence of assignments; kept in *reverse* order
+ -- So the list [ x=e1, y=e2 ] means the sequence of assignments
+ -- y = e2
+ -- x = e1
+
+cmmSink :: DynFlags -> CmmGraph -> CmmGraph
+cmmSink dflags graph = ofBlockList (g_entry graph) $ sink mapEmpty $ blocks
+ where
+ liveness = cmmLocalLiveness dflags graph
+ getLive l = mapFindWithDefault Set.empty l liveness
+
+ blocks = revPostorder graph
+
+ join_pts = findJoinPoints blocks
+
+ sink :: LabelMap Assignments -> [CmmBlock] -> [CmmBlock]
+ sink _ [] = []
+ sink sunk (b:bs) =
+ -- pprTrace "sink" (ppr lbl) $
+ blockJoin first final_middle final_last : sink sunk' bs
+ where
+ lbl = entryLabel b
+ (first, middle, last) = blockSplit b
+
+ succs = successors last
+
+ -- Annotate the middle nodes with the registers live *after*
+ -- the node. This will help us decide whether we can inline
+ -- an assignment in the current node or not.
+ live = Set.unions (map getLive succs)
+ live_middle = gen_kill dflags last live
+ ann_middles = annotate dflags live_middle (blockToList middle)
+
+ -- Now sink and inline in this block
+ (middle', assigs) = walk dflags ann_middles (mapFindWithDefault [] lbl sunk)
+ fold_last = constantFoldNode dflags last
+ (final_last, assigs') = tryToInline dflags live fold_last assigs
+
+ -- We cannot sink into join points (successors with more than
+ -- one predecessor), so identify the join points and the set
+ -- of registers live in them.
+ (joins, nonjoins) = partition (`mapMember` join_pts) succs
+ live_in_joins = Set.unions (map getLive joins)
+
+ -- We do not want to sink an assignment into multiple branches,
+ -- so identify the set of registers live in multiple successors.
+ -- This is made more complicated because when we sink an assignment
+ -- into one branch, this might change the set of registers that are
+ -- now live in multiple branches.
+ init_live_sets = map getLive nonjoins
+ live_in_multi live_sets r =
+ case filter (Set.member r) live_sets of
+ (_one:_two:_) -> True
+ _ -> False
+
+ -- Now, drop any assignments that we will not sink any further.
+ (dropped_last, assigs'') = dropAssignments dflags drop_if init_live_sets assigs'
+
+ drop_if a@(r,rhs,_) live_sets = (should_drop, live_sets')
+ where
+ should_drop = conflicts dflags a final_last
+ || not (isTrivial dflags rhs) && live_in_multi live_sets r
+ || r `Set.member` live_in_joins
+
+ live_sets' | should_drop = live_sets
+ | otherwise = map upd live_sets
+
+ upd set | r `Set.member` set = set `Set.union` live_rhs
+ | otherwise = set
+
+ live_rhs = foldRegsUsed dflags extendRegSet emptyRegSet rhs
+
+ final_middle = foldl' blockSnoc middle' dropped_last
+
+ sunk' = mapUnion sunk $
+ mapFromList [ (l, filterAssignments dflags (getLive l) assigs'')
+ | l <- succs ]
+
+{- TODO: enable this later, when we have some good tests in place to
+ measure the effect and tune it.
+
+-- small: an expression we don't mind duplicating
+isSmall :: CmmExpr -> Bool
+isSmall (CmmReg (CmmLocal _)) = True --
+isSmall (CmmLit _) = True
+isSmall (CmmMachOp (MO_Add _) [x,y]) = isTrivial x && isTrivial y
+isSmall (CmmRegOff (CmmLocal _) _) = True
+isSmall _ = False
+-}
+
+--
+-- We allow duplication of trivial expressions: registers (both local and
+-- global) and literals.
+--
+isTrivial :: DynFlags -> CmmExpr -> Bool
+isTrivial _ (CmmReg (CmmLocal _)) = True
+isTrivial dflags (CmmReg (CmmGlobal r)) = -- see Note [Inline GlobalRegs?]
+ if isARM (platformArch (targetPlatform dflags))
+ then True -- CodeGen.Platform.ARM does not have globalRegMaybe
+ else isJust (globalRegMaybe (targetPlatform dflags) r)
+ -- GlobalRegs that are loads from BaseReg are not trivial
+isTrivial _ (CmmLit _) = True
+isTrivial _ _ = False
+
+--
+-- annotate each node with the set of registers live *after* the node
+--
+annotate :: DynFlags -> LocalRegSet -> [CmmNode O O] -> [(LocalRegSet, CmmNode O O)]
+annotate dflags live nodes = snd $ foldr ann (live,[]) nodes
+ where ann n (live,nodes) = (gen_kill dflags n live, (live,n) : nodes)
+
+--
+-- Find the blocks that have multiple successors (join points)
+--
+findJoinPoints :: [CmmBlock] -> LabelMap Int
+findJoinPoints blocks = mapFilter (>1) succ_counts
+ where
+ all_succs = concatMap successors blocks
+
+ succ_counts :: LabelMap Int
+ succ_counts = foldr (\l -> mapInsertWith (+) l 1) mapEmpty all_succs
+
+--
+-- filter the list of assignments to remove any assignments that
+-- are not live in a continuation.
+--
+filterAssignments :: DynFlags -> LocalRegSet -> Assignments -> Assignments
+filterAssignments dflags live assigs = reverse (go assigs [])
+ where go [] kept = kept
+ go (a@(r,_,_):as) kept | needed = go as (a:kept)
+ | otherwise = go as kept
+ where
+ needed = r `Set.member` live
+ || any (conflicts dflags a) (map toNode kept)
+ -- Note that we must keep assignments that are
+ -- referred to by other assignments we have
+ -- already kept.
+
+-- -----------------------------------------------------------------------------
+-- Walk through the nodes of a block, sinking and inlining assignments
+-- as we go.
+--
+-- On input we pass in a:
+-- * list of nodes in the block
+-- * a list of assignments that appeared *before* this block and
+-- that are being sunk.
+--
+-- On output we get:
+-- * a new block
+-- * a list of assignments that will be placed *after* that block.
+--
+
+walk :: DynFlags
+ -> [(LocalRegSet, CmmNode O O)] -- nodes of the block, annotated with
+ -- the set of registers live *after*
+ -- this node.
+
+ -> Assignments -- The current list of
+ -- assignments we are sinking.
+ -- Earlier assignments may refer
+ -- to later ones.
+
+ -> ( Block CmmNode O O -- The new block
+ , Assignments -- Assignments to sink further
+ )
+
+walk dflags nodes assigs = go nodes emptyBlock assigs
+ where
+ go [] block as = (block, as)
+ go ((live,node):ns) block as
+ | shouldDiscard node live = go ns block as
+ -- discard dead assignment
+ | Just a <- shouldSink dflags node2 = go ns block (a : as1)
+ | otherwise = go ns block' as'
+ where
+ node1 = constantFoldNode dflags node
+
+ (node2, as1) = tryToInline dflags live node1 as
+
+ (dropped, as') = dropAssignmentsSimple dflags
+ (\a -> conflicts dflags a node2) as1
+
+ block' = foldl' blockSnoc block dropped `blockSnoc` node2
+
+
+--
+-- Heuristic to decide whether to pick up and sink an assignment
+-- Currently we pick up all assignments to local registers. It might
+-- be profitable to sink assignments to global regs too, but the
+-- liveness analysis doesn't track those (yet) so we can't.
+--
+shouldSink :: DynFlags -> CmmNode e x -> Maybe Assignment
+shouldSink dflags (CmmAssign (CmmLocal r) e) | no_local_regs = Just (r, e, exprMem dflags e)
+ where no_local_regs = True -- foldRegsUsed (\_ _ -> False) True e
+shouldSink _ _other = Nothing
+
+--
+-- discard dead assignments. This doesn't do as good a job as
+-- removeDeadAssignments, because it would need multiple passes
+-- to get all the dead code, but it catches the common case of
+-- superfluous reloads from the stack that the stack allocator
+-- leaves behind.
+--
+-- Also we catch "r = r" here. You might think it would fall
+-- out of inlining, but the inliner will see that r is live
+-- after the instruction and choose not to inline r in the rhs.
+--
+shouldDiscard :: CmmNode e x -> LocalRegSet -> Bool
+shouldDiscard node live
+ = case node of
+ CmmAssign r (CmmReg r') | r == r' -> True
+ CmmAssign (CmmLocal r) _ -> not (r `Set.member` live)
+ _otherwise -> False
+
+
+toNode :: Assignment -> CmmNode O O
+toNode (r,rhs,_) = CmmAssign (CmmLocal r) rhs
+
+dropAssignmentsSimple :: DynFlags -> (Assignment -> Bool) -> Assignments
+ -> ([CmmNode O O], Assignments)
+dropAssignmentsSimple dflags f = dropAssignments dflags (\a _ -> (f a, ())) ()
+
+dropAssignments :: DynFlags -> (Assignment -> s -> (Bool, s)) -> s -> Assignments
+ -> ([CmmNode O O], Assignments)
+dropAssignments dflags should_drop state assigs
+ = (dropped, reverse kept)
+ where
+ (dropped,kept) = go state assigs [] []
+
+ go _ [] dropped kept = (dropped, kept)
+ go state (assig : rest) dropped kept
+ | conflict = go state' rest (toNode assig : dropped) kept
+ | otherwise = go state' rest dropped (assig:kept)
+ where
+ (dropit, state') = should_drop assig state
+ conflict = dropit || any (conflicts dflags assig) dropped
+
+
+-- -----------------------------------------------------------------------------
+-- Try to inline assignments into a node.
+-- This also does constant folding for primpops, since
+-- inlining opens up opportunities for doing so.
+
+tryToInline
+ :: DynFlags
+ -> LocalRegSet -- set of registers live after this
+ -- node. We cannot inline anything
+ -- that is live after the node, unless
+ -- it is small enough to duplicate.
+ -> CmmNode O x -- The node to inline into
+ -> Assignments -- Assignments to inline
+ -> (
+ CmmNode O x -- New node
+ , Assignments -- Remaining assignments
+ )
+
+tryToInline dflags live node assigs = go usages node emptyLRegSet assigs
+ where
+ usages :: UniqFM Int -- Maps each LocalReg to a count of how often it is used
+ usages = foldLocalRegsUsed dflags addUsage emptyUFM node
+
+ go _usages node _skipped [] = (node, [])
+
+ go usages node skipped (a@(l,rhs,_) : rest)
+ | cannot_inline = dont_inline
+ | occurs_none = discard -- Note [discard during inlining]
+ | occurs_once = inline_and_discard
+ | isTrivial dflags rhs = inline_and_keep
+ | otherwise = dont_inline
+ where
+ inline_and_discard = go usages' inl_node skipped rest
+ where usages' = foldLocalRegsUsed dflags addUsage usages rhs
+
+ discard = go usages node skipped rest
+
+ dont_inline = keep node -- don't inline the assignment, keep it
+ inline_and_keep = keep inl_node -- inline the assignment, keep it
+
+ keep node' = (final_node, a : rest')
+ where (final_node, rest') = go usages' node' (insertLRegSet l skipped) rest
+ usages' = foldLocalRegsUsed dflags (\m r -> addToUFM m r 2)
+ usages rhs
+ -- we must not inline anything that is mentioned in the RHS
+ -- of a binding that we have already skipped, so we set the
+ -- usages of the regs on the RHS to 2.
+
+ cannot_inline = skipped `regsUsedIn` rhs -- Note [dependent assignments]
+ || l `elemLRegSet` skipped
+ || not (okToInline dflags rhs node)
+
+ l_usages = lookupUFM usages l
+ l_live = l `elemRegSet` live
+
+ occurs_once = not l_live && l_usages == Just 1
+ occurs_none = not l_live && l_usages == Nothing
+
+ inl_node = improveConditional (mapExpDeep inl_exp node)
+
+ inl_exp :: CmmExpr -> CmmExpr
+ -- inl_exp is where the inlining actually takes place!
+ inl_exp (CmmReg (CmmLocal l')) | l == l' = rhs
+ inl_exp (CmmRegOff (CmmLocal l') off) | l == l'
+ = cmmOffset dflags rhs off
+ -- re-constant fold after inlining
+ inl_exp (CmmMachOp op args) = cmmMachOpFold dflags op args
+ inl_exp other = other
+
+
+{- Note [improveConditional]
+
+cmmMachOpFold tries to simplify conditionals to turn things like
+ (a == b) != 1
+into
+ (a != b)
+but there's one case it can't handle: when the comparison is over
+floating-point values, we can't invert it, because floating-point
+comparisons aren't invertible (because of NaNs).
+
+But we *can* optimise this conditional by swapping the true and false
+branches. Given
+ CmmCondBranch ((a >## b) != 1) t f
+we can turn it into
+ CmmCondBranch (a >## b) f t
+
+So here we catch conditionals that weren't optimised by cmmMachOpFold,
+and apply above transformation to eliminate the comparison against 1.
+
+It's tempting to just turn every != into == and then let cmmMachOpFold
+do its thing, but that risks changing a nice fall-through conditional
+into one that requires two jumps. (see swapcond_last in
+GHC.Cmm.ContFlowOpt), so instead we carefully look for just the cases where
+we can eliminate a comparison.
+-}
+improveConditional :: CmmNode O x -> CmmNode O x
+improveConditional
+ (CmmCondBranch (CmmMachOp mop [x, CmmLit (CmmInt 1 _)]) t f l)
+ | neLike mop, isComparisonExpr x
+ = CmmCondBranch x f t (fmap not l)
+ where
+ neLike (MO_Ne _) = True
+ neLike (MO_U_Lt _) = True -- (x<y) < 1 behaves like (x<y) != 1
+ neLike (MO_S_Lt _) = True -- (x<y) < 1 behaves like (x<y) != 1
+ neLike _ = False
+improveConditional other = other
+
+-- Note [dependent assignments]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- If our assignment list looks like
+--
+-- [ y = e, x = ... y ... ]
+--
+-- We cannot inline x. Remember this list is really in reverse order,
+-- so it means x = ... y ...; y = e
+--
+-- Hence if we inline x, the outer assignment to y will capture the
+-- reference in x's right hand side.
+--
+-- In this case we should rename the y in x's right-hand side,
+-- i.e. change the list to [ y = e, x = ... y1 ..., y1 = y ]
+-- Now we can go ahead and inline x.
+--
+-- For now we do nothing, because this would require putting
+-- everything inside UniqSM.
+--
+-- One more variant of this (#7366):
+--
+-- [ y = e, y = z ]
+--
+-- If we don't want to inline y = e, because y is used many times, we
+-- might still be tempted to inline y = z (because we always inline
+-- trivial rhs's). But of course we can't, because y is equal to e,
+-- not z.
+
+-- Note [discard during inlining]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- Opportunities to discard assignments sometimes appear after we've
+-- done some inlining. Here's an example:
+--
+-- x = R1;
+-- y = P64[x + 7];
+-- z = P64[x + 15];
+-- /* z is dead */
+-- R1 = y & (-8);
+--
+-- The x assignment is trivial, so we inline it in the RHS of y, and
+-- keep both x and y. z gets dropped because it is dead, then we
+-- inline y, and we have a dead assignment to x. If we don't notice
+-- that x is dead in tryToInline, we end up retaining it.
+
+addUsage :: UniqFM Int -> LocalReg -> UniqFM Int
+addUsage m r = addToUFM_C (+) m r 1
+
+regsUsedIn :: LRegSet -> CmmExpr -> Bool
+regsUsedIn ls _ | nullLRegSet ls = False
+regsUsedIn ls e = wrapRecExpf f e False
+ where f (CmmReg (CmmLocal l)) _ | l `elemLRegSet` ls = True
+ f (CmmRegOff (CmmLocal l) _) _ | l `elemLRegSet` ls = True
+ f _ z = z
+
+-- we don't inline into CmmUnsafeForeignCall if the expression refers
+-- to global registers. This is a HACK to avoid global registers
+-- clashing with C argument-passing registers, really the back-end
+-- ought to be able to handle it properly, but currently neither PprC
+-- nor the NCG can do it. See Note [Register parameter passing]
+-- See also GHC.StgToCmm.Foreign.load_args_into_temps.
+okToInline :: DynFlags -> CmmExpr -> CmmNode e x -> Bool
+okToInline dflags expr node@(CmmUnsafeForeignCall{}) =
+ not (globalRegistersConflict dflags expr node)
+okToInline _ _ _ = True
+
+-- -----------------------------------------------------------------------------
+
+-- | @conflicts (r,e) node@ is @False@ if and only if the assignment
+-- @r = e@ can be safely commuted past statement @node@.
+conflicts :: DynFlags -> Assignment -> CmmNode O x -> Bool
+conflicts dflags (r, rhs, addr) node
+
+ -- (1) node defines registers used by rhs of assignment. This catches
+ -- assignments and all three kinds of calls. See Note [Sinking and calls]
+ | globalRegistersConflict dflags rhs node = True
+ | localRegistersConflict dflags rhs node = True
+
+ -- (2) node uses register defined by assignment
+ | foldRegsUsed dflags (\b r' -> r == r' || b) False node = True
+
+ -- (3) a store to an address conflicts with a read of the same memory
+ | CmmStore addr' e <- node
+ , memConflicts addr (loadAddr dflags addr' (cmmExprWidth dflags e)) = True
+
+ -- (4) an assignment to Hp/Sp conflicts with a heap/stack read respectively
+ | HeapMem <- addr, CmmAssign (CmmGlobal Hp) _ <- node = True
+ | StackMem <- addr, CmmAssign (CmmGlobal Sp) _ <- node = True
+ | SpMem{} <- addr, CmmAssign (CmmGlobal Sp) _ <- node = True
+
+ -- (5) foreign calls clobber heap: see Note [Foreign calls clobber heap]
+ | CmmUnsafeForeignCall{} <- node, memConflicts addr AnyMem = True
+
+ -- (6) native calls clobber any memory
+ | CmmCall{} <- node, memConflicts addr AnyMem = True
+
+ -- (7) otherwise, no conflict
+ | otherwise = False
+
+-- Returns True if node defines any global registers that are used in the
+-- Cmm expression
+globalRegistersConflict :: DynFlags -> CmmExpr -> CmmNode e x -> Bool
+globalRegistersConflict dflags expr node =
+ foldRegsDefd dflags (\b r -> b || regUsedIn dflags (CmmGlobal r) expr)
+ False node
+
+-- Returns True if node defines any local registers that are used in the
+-- Cmm expression
+localRegistersConflict :: DynFlags -> CmmExpr -> CmmNode e x -> Bool
+localRegistersConflict dflags expr node =
+ foldRegsDefd dflags (\b r -> b || regUsedIn dflags (CmmLocal r) expr)
+ False node
+
+-- Note [Sinking and calls]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- We have three kinds of calls: normal (CmmCall), safe foreign (CmmForeignCall)
+-- and unsafe foreign (CmmUnsafeForeignCall). We perform sinking pass after
+-- stack layout (see Note [Sinking after stack layout]) which leads to two
+-- invariants related to calls:
+--
+-- a) during stack layout phase all safe foreign calls are turned into
+-- unsafe foreign calls (see Note [Lower safe foreign calls]). This
+-- means that we will never encounter CmmForeignCall node when running
+-- sinking after stack layout
+--
+-- b) stack layout saves all variables live across a call on the stack
+-- just before making a call (remember we are not sinking assignments to
+-- stack):
+--
+-- L1:
+-- x = R1
+-- P64[Sp - 16] = L2
+-- P64[Sp - 8] = x
+-- Sp = Sp - 16
+-- call f() returns L2
+-- L2:
+--
+-- We will attempt to sink { x = R1 } but we will detect conflict with
+-- { P64[Sp - 8] = x } and hence we will drop { x = R1 } without even
+-- checking whether it conflicts with { call f() }. In this way we will
+-- never need to check any assignment conflicts with CmmCall. Remember
+-- that we still need to check for potential memory conflicts.
+--
+-- So the result is that we only need to worry about CmmUnsafeForeignCall nodes
+-- when checking conflicts (see Note [Unsafe foreign calls clobber caller-save registers]).
+-- This assumption holds only when we do sinking after stack layout. If we run
+-- it before stack layout we need to check for possible conflicts with all three
+-- kinds of calls. Our `conflicts` function does that by using a generic
+-- foldRegsDefd and foldRegsUsed functions defined in DefinerOfRegs and
+-- UserOfRegs typeclasses.
+--
+
+-- An abstraction of memory read or written.
+data AbsMem
+ = NoMem -- no memory accessed
+ | AnyMem -- arbitrary memory
+ | HeapMem -- definitely heap memory
+ | StackMem -- definitely stack memory
+ | SpMem -- <size>[Sp+n]
+ {-# UNPACK #-} !Int
+ {-# UNPACK #-} !Int
+
+-- Having SpMem is important because it lets us float loads from Sp
+-- past stores to Sp as long as they don't overlap, and this helps to
+-- unravel some long sequences of
+-- x1 = [Sp + 8]
+-- x2 = [Sp + 16]
+-- ...
+-- [Sp + 8] = xi
+-- [Sp + 16] = xj
+--
+-- Note that SpMem is invalidated if Sp is changed, but the definition
+-- of 'conflicts' above handles that.
+
+-- ToDo: this won't currently fix the following commonly occurring code:
+-- x1 = [R1 + 8]
+-- x2 = [R1 + 16]
+-- ..
+-- [Hp - 8] = x1
+-- [Hp - 16] = x2
+-- ..
+
+-- because [R1 + 8] and [Hp - 8] are both HeapMem. We know that
+-- assignments to [Hp + n] do not conflict with any other heap memory,
+-- but this is tricky to nail down. What if we had
+--
+-- x = Hp + n
+-- [x] = ...
+--
+-- the store to [x] should be "new heap", not "old heap".
+-- Furthermore, you could imagine that if we started inlining
+-- functions in Cmm then there might well be reads of heap memory
+-- that was written in the same basic block. To take advantage of
+-- non-aliasing of heap memory we will have to be more clever.
+
+-- Note [Foreign calls clobber heap]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- It is tempting to say that foreign calls clobber only
+-- non-heap/stack memory, but unfortunately we break this invariant in
+-- the RTS. For example, in stg_catch_retry_frame we call
+-- stmCommitNestedTransaction() which modifies the contents of the
+-- TRec it is passed (this actually caused incorrect code to be
+-- generated).
+--
+-- Since the invariant is true for the majority of foreign calls,
+-- perhaps we ought to have a special annotation for calls that can
+-- modify heap/stack memory. For now we just use the conservative
+-- definition here.
+--
+-- Some CallishMachOp imply a memory barrier e.g. AtomicRMW and
+-- therefore we should never float any memory operations across one of
+-- these calls.
+
+
+bothMems :: AbsMem -> AbsMem -> AbsMem
+bothMems NoMem x = x
+bothMems x NoMem = x
+bothMems HeapMem HeapMem = HeapMem
+bothMems StackMem StackMem = StackMem
+bothMems (SpMem o1 w1) (SpMem o2 w2)
+ | o1 == o2 = SpMem o1 (max w1 w2)
+ | otherwise = StackMem
+bothMems SpMem{} StackMem = StackMem
+bothMems StackMem SpMem{} = StackMem
+bothMems _ _ = AnyMem
+
+memConflicts :: AbsMem -> AbsMem -> Bool
+memConflicts NoMem _ = False
+memConflicts _ NoMem = False
+memConflicts HeapMem StackMem = False
+memConflicts StackMem HeapMem = False
+memConflicts SpMem{} HeapMem = False
+memConflicts HeapMem SpMem{} = False
+memConflicts (SpMem o1 w1) (SpMem o2 w2)
+ | o1 < o2 = o1 + w1 > o2
+ | otherwise = o2 + w2 > o1
+memConflicts _ _ = True
+
+exprMem :: DynFlags -> CmmExpr -> AbsMem
+exprMem dflags (CmmLoad addr w) = bothMems (loadAddr dflags addr (typeWidth w)) (exprMem dflags addr)
+exprMem dflags (CmmMachOp _ es) = foldr bothMems NoMem (map (exprMem dflags) es)
+exprMem _ _ = NoMem
+
+loadAddr :: DynFlags -> CmmExpr -> Width -> AbsMem
+loadAddr dflags e w =
+ case e of
+ CmmReg r -> regAddr dflags r 0 w
+ CmmRegOff r i -> regAddr dflags r i w
+ _other | regUsedIn dflags spReg e -> StackMem
+ | otherwise -> AnyMem
+
+regAddr :: DynFlags -> CmmReg -> Int -> Width -> AbsMem
+regAddr _ (CmmGlobal Sp) i w = SpMem i (widthInBytes w)
+regAddr _ (CmmGlobal Hp) _ _ = HeapMem
+regAddr _ (CmmGlobal CurrentTSO) _ _ = HeapMem -- important for PrimOps
+regAddr dflags r _ _ | isGcPtrType (cmmRegType dflags r) = HeapMem -- yay! GCPtr pays for itself
+regAddr _ _ _ _ = AnyMem
+
+{-
+Note [Inline GlobalRegs?]
+
+Should we freely inline GlobalRegs?
+
+Actually it doesn't make a huge amount of difference either way, so we
+*do* currently treat GlobalRegs as "trivial" and inline them
+everywhere, but for what it's worth, here is what I discovered when I
+(SimonM) looked into this:
+
+Common sense says we should not inline GlobalRegs, because when we
+have
+
+ x = R1
+
+the register allocator will coalesce this assignment, generating no
+code, and simply record the fact that x is bound to $rbx (or
+whatever). Furthermore, if we were to sink this assignment, then the
+range of code over which R1 is live increases, and the range of code
+over which x is live decreases. All things being equal, it is better
+for x to be live than R1, because R1 is a fixed register whereas x can
+live in any register. So we should neither sink nor inline 'x = R1'.
+
+However, not inlining GlobalRegs can have surprising
+consequences. e.g. (cgrun020)
+
+ c3EN:
+ _s3DB::P64 = R1;
+ _c3ES::P64 = _s3DB::P64 & 7;
+ if (_c3ES::P64 >= 2) goto c3EU; else goto c3EV;
+ c3EU:
+ _s3DD::P64 = P64[_s3DB::P64 + 6];
+ _s3DE::P64 = P64[_s3DB::P64 + 14];
+ I64[Sp - 8] = c3F0;
+ R1 = _s3DE::P64;
+ P64[Sp] = _s3DD::P64;
+
+inlining the GlobalReg gives:
+
+ c3EN:
+ if (R1 & 7 >= 2) goto c3EU; else goto c3EV;
+ c3EU:
+ I64[Sp - 8] = c3F0;
+ _s3DD::P64 = P64[R1 + 6];
+ R1 = P64[R1 + 14];
+ P64[Sp] = _s3DD::P64;
+
+but if we don't inline the GlobalReg, instead we get:
+
+ _s3DB::P64 = R1;
+ if (_s3DB::P64 & 7 >= 2) goto c3EU; else goto c3EV;
+ c3EU:
+ I64[Sp - 8] = c3F0;
+ R1 = P64[_s3DB::P64 + 14];
+ P64[Sp] = P64[_s3DB::P64 + 6];
+
+This looks better - we managed to inline _s3DD - but in fact it
+generates an extra reg-reg move:
+
+.Lc3EU:
+ movq $c3F0_info,-8(%rbp)
+ movq %rbx,%rax
+ movq 14(%rbx),%rbx
+ movq 6(%rax),%rax
+ movq %rax,(%rbp)
+
+because _s3DB is now live across the R1 assignment, we lost the
+benefit of coalescing.
+
+Who is at fault here? Perhaps if we knew that _s3DB was an alias for
+R1, then we would not sink a reference to _s3DB past the R1
+assignment. Or perhaps we *should* do that - we might gain by sinking
+it, despite losing the coalescing opportunity.
+
+Sometimes not inlining global registers wins by virtue of the rule
+about not inlining into arguments of a foreign call, e.g. (T7163) this
+is what happens when we inlined F1:
+
+ _s3L2::F32 = F1;
+ _c3O3::F32 = %MO_F_Mul_W32(F1, 10.0 :: W32);
+ (_s3L7::F32) = call "ccall" arg hints: [] result hints: [] rintFloat(_c3O3::F32);
+
+but if we don't inline F1:
+
+ (_s3L7::F32) = call "ccall" arg hints: [] result hints: [] rintFloat(%MO_F_Mul_W32(_s3L2::F32,
+ 10.0 :: W32));
+-}
diff --git a/compiler/GHC/Cmm/Switch.hs b/compiler/GHC/Cmm/Switch.hs
new file mode 100644
index 0000000000..e89fadfd2e
--- /dev/null
+++ b/compiler/GHC/Cmm/Switch.hs
@@ -0,0 +1,502 @@
+{-# LANGUAGE GADTs #-}
+module GHC.Cmm.Switch (
+ SwitchTargets,
+ mkSwitchTargets,
+ switchTargetsCases, switchTargetsDefault, switchTargetsRange, switchTargetsSigned,
+ mapSwitchTargets, switchTargetsToTable, switchTargetsFallThrough,
+ switchTargetsToList, eqSwitchTargetWith,
+
+ SwitchPlan(..),
+ targetSupportsSwitch,
+ createSwitchPlan,
+ ) where
+
+import GhcPrelude
+
+import Outputable
+import DynFlags
+import GHC.Cmm.Dataflow.Label (Label)
+
+import Data.Maybe
+import Data.List (groupBy)
+import Data.Function (on)
+import qualified Data.Map as M
+
+-- Note [Cmm Switches, the general plan]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- Compiling a high-level switch statement, as it comes out of a STG case
+-- expression, for example, allows for a surprising amount of design decisions.
+-- Therefore, we cleanly separated this from the Stg → Cmm transformation, as
+-- well as from the actual code generation.
+--
+-- The overall plan is:
+-- * The Stg → Cmm transformation creates a single `SwitchTargets` in
+-- emitSwitch and emitCmmLitSwitch in GHC.StgToCmm.Utils.
+-- At this stage, they are unsuitable for code generation.
+-- * A dedicated Cmm transformation (GHC.Cmm.Switch.Implement) replaces these
+-- switch statements with code that is suitable for code generation, i.e.
+-- a nice balanced tree of decisions with dense jump tables in the leafs.
+-- The actual planning of this tree is performed in pure code in createSwitchPlan
+-- in this module. See Note [createSwitchPlan].
+-- * The actual code generation will not do any further processing and
+-- implement each CmmSwitch with a jump tables.
+--
+-- When compiling to LLVM or C, GHC.Cmm.Switch.Implement leaves the switch
+-- statements alone, as we can turn a SwitchTargets value into a nice
+-- switch-statement in LLVM resp. C, and leave the rest to the compiler.
+--
+-- See Note [GHC.Cmm.Switch vs. GHC.Cmm.Switch.Implement] why the two module are
+-- separated.
+
+-----------------------------------------------------------------------------
+-- Note [Magic Constants in GHC.Cmm.Switch]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- There are a lot of heuristics here that depend on magic values where it is
+-- hard to determine the "best" value (for whatever that means). These are the
+-- magic values:
+
+-- | Number of consecutive default values allowed in a jump table. If there are
+-- more of them, the jump tables are split.
+--
+-- Currently 7, as it costs 7 words of additional code when a jump table is
+-- split (at least on x64, determined experimentally).
+maxJumpTableHole :: Integer
+maxJumpTableHole = 7
+
+-- | Minimum size of a jump table. If the number is smaller, the switch is
+-- implemented using conditionals.
+-- Currently 5, because an if-then-else tree of 4 values is nice and compact.
+minJumpTableSize :: Int
+minJumpTableSize = 5
+
+-- | Minimum non-zero offset for a jump table. See Note [Jump Table Offset].
+minJumpTableOffset :: Integer
+minJumpTableOffset = 2
+
+
+-----------------------------------------------------------------------------
+-- Switch Targets
+
+-- Note [SwitchTargets]
+-- ~~~~~~~~~~~~~~~~~~~~
+--
+-- The branches of a switch are stored in a SwitchTargets, which consists of an
+-- (optional) default jump target, and a map from values to jump targets.
+--
+-- If the default jump target is absent, the behaviour of the switch outside the
+-- values of the map is undefined.
+--
+-- We use an Integer for the keys the map so that it can be used in switches on
+-- unsigned as well as signed integers.
+--
+-- The map may be empty (we prune out-of-range branches here, so it could be us
+-- emptying it).
+--
+-- Before code generation, the table needs to be brought into a form where all
+-- entries are non-negative, so that it can be compiled into a jump table.
+-- See switchTargetsToTable.
+
+
+-- | A value of type SwitchTargets contains the alternatives for a 'CmmSwitch'
+-- value, and knows whether the value is signed, the possible range, an
+-- optional default value and a map from values to jump labels.
+data SwitchTargets =
+ SwitchTargets
+ Bool -- Signed values
+ (Integer, Integer) -- Range
+ (Maybe Label) -- Default value
+ (M.Map Integer Label) -- The branches
+ deriving (Show, Eq)
+
+-- | The smart constructor mkSwitchTargets normalises the map a bit:
+-- * No entries outside the range
+-- * No entries equal to the default
+-- * No default if all elements have explicit values
+mkSwitchTargets :: Bool -> (Integer, Integer) -> Maybe Label -> M.Map Integer Label -> SwitchTargets
+mkSwitchTargets signed range@(lo,hi) mbdef ids
+ = SwitchTargets signed range mbdef' ids'
+ where
+ ids' = dropDefault $ restrict ids
+ mbdef' | defaultNeeded = mbdef
+ | otherwise = Nothing
+
+ -- Drop entries outside the range, if there is a range
+ restrict = restrictMap (lo,hi)
+
+ -- Drop entries that equal the default, if there is a default
+ dropDefault | Just l <- mbdef = M.filter (/= l)
+ | otherwise = id
+
+ -- Check if the default is still needed
+ defaultNeeded = fromIntegral (M.size ids') /= hi-lo+1
+
+
+-- | Changes all labels mentioned in the SwitchTargets value
+mapSwitchTargets :: (Label -> Label) -> SwitchTargets -> SwitchTargets
+mapSwitchTargets f (SwitchTargets signed range mbdef branches)
+ = SwitchTargets signed range (fmap f mbdef) (fmap f branches)
+
+-- | Returns the list of non-default branches of the SwitchTargets value
+switchTargetsCases :: SwitchTargets -> [(Integer, Label)]
+switchTargetsCases (SwitchTargets _ _ _ branches) = M.toList branches
+
+-- | Return the default label of the SwitchTargets value
+switchTargetsDefault :: SwitchTargets -> Maybe Label
+switchTargetsDefault (SwitchTargets _ _ mbdef _) = mbdef
+
+-- | Return the range of the SwitchTargets value
+switchTargetsRange :: SwitchTargets -> (Integer, Integer)
+switchTargetsRange (SwitchTargets _ range _ _) = range
+
+-- | Return whether this is used for a signed value
+switchTargetsSigned :: SwitchTargets -> Bool
+switchTargetsSigned (SwitchTargets signed _ _ _) = signed
+
+-- | switchTargetsToTable creates a dense jump table, usable for code generation.
+--
+-- Also returns an offset to add to the value; the list is 0-based on the
+-- result of that addition.
+--
+-- The conversion from Integer to Int is a bit of a wart, as the actual
+-- scrutinee might be an unsigned word, but it just works, due to wrap-around
+-- arithmetic (as verified by the CmmSwitchTest test case).
+switchTargetsToTable :: SwitchTargets -> (Int, [Maybe Label])
+switchTargetsToTable (SwitchTargets _ (lo,hi) mbdef branches)
+ = (fromIntegral (-start), [ labelFor i | i <- [start..hi] ])
+ where
+ labelFor i = case M.lookup i branches of Just l -> Just l
+ Nothing -> mbdef
+ start | lo >= 0 && lo < minJumpTableOffset = 0 -- See Note [Jump Table Offset]
+ | otherwise = lo
+
+-- Note [Jump Table Offset]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- Usually, the code for a jump table starting at x will first subtract x from
+-- the value, to avoid a large amount of empty entries. But if x is very small,
+-- the extra entries are no worse than the subtraction in terms of code size, and
+-- not having to do the subtraction is quicker.
+--
+-- I.e. instead of
+-- _u20N:
+-- leaq -1(%r14),%rax
+-- jmp *_n20R(,%rax,8)
+-- _n20R:
+-- .quad _c20p
+-- .quad _c20q
+-- do
+-- _u20N:
+-- jmp *_n20Q(,%r14,8)
+--
+-- _n20Q:
+-- .quad 0
+-- .quad _c20p
+-- .quad _c20q
+-- .quad _c20r
+
+-- | The list of all labels occurring in the SwitchTargets value.
+switchTargetsToList :: SwitchTargets -> [Label]
+switchTargetsToList (SwitchTargets _ _ mbdef branches)
+ = maybeToList mbdef ++ M.elems branches
+
+-- | Groups cases with equal targets, suitable for pretty-printing to a
+-- c-like switch statement with fall-through semantics.
+switchTargetsFallThrough :: SwitchTargets -> ([([Integer], Label)], Maybe Label)
+switchTargetsFallThrough (SwitchTargets _ _ mbdef branches) = (groups, mbdef)
+ where
+ groups = map (\xs -> (map fst xs, snd (head xs))) $
+ groupBy ((==) `on` snd) $
+ M.toList branches
+
+-- | Custom equality helper, needed for "GHC.Cmm.CommonBlockElim"
+eqSwitchTargetWith :: (Label -> Label -> Bool) -> SwitchTargets -> SwitchTargets -> Bool
+eqSwitchTargetWith eq (SwitchTargets signed1 range1 mbdef1 ids1) (SwitchTargets signed2 range2 mbdef2 ids2) =
+ signed1 == signed2 && range1 == range2 && goMB mbdef1 mbdef2 && goList (M.toList ids1) (M.toList ids2)
+ where
+ goMB Nothing Nothing = True
+ goMB (Just l1) (Just l2) = l1 `eq` l2
+ goMB _ _ = False
+ goList [] [] = True
+ goList ((i1,l1):ls1) ((i2,l2):ls2) = i1 == i2 && l1 `eq` l2 && goList ls1 ls2
+ goList _ _ = False
+
+-----------------------------------------------------------------------------
+-- Code generation for Switches
+
+
+-- | A SwitchPlan abstractly describes how a Switch statement ought to be
+-- implemented. See Note [createSwitchPlan]
+data SwitchPlan
+ = Unconditionally Label
+ | IfEqual Integer Label SwitchPlan
+ | IfLT Bool Integer SwitchPlan SwitchPlan
+ | JumpTable SwitchTargets
+ deriving Show
+--
+-- Note [createSwitchPlan]
+-- ~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- A SwitchPlan describes how a Switch statement is to be broken down into
+-- smaller pieces suitable for code generation.
+--
+-- createSwitchPlan creates such a switch plan, in these steps:
+-- 1. It splits the switch statement at segments of non-default values that
+-- are too large. See splitAtHoles and Note [Magic Constants in GHC.Cmm.Switch]
+-- 2. Too small jump tables should be avoided, so we break up smaller pieces
+-- in breakTooSmall.
+-- 3. We fill in the segments between those pieces with a jump to the default
+-- label (if there is one), returning a SeparatedList in mkFlatSwitchPlan
+-- 4. We find and replace two less-than branches by a single equal-to-test in
+-- findSingleValues
+-- 5. The thus collected pieces are assembled to a balanced binary tree.
+
+{-
+ Note [Two alts + default]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Discussion and a bit more info at #14644
+
+When dealing with a switch of the form:
+switch(e) {
+ case 1: goto l1;
+ case 3000: goto l2;
+ default: goto ldef;
+}
+
+If we treat it as a sparse jump table we would generate:
+
+if (e > 3000) //Check if value is outside of the jump table.
+ goto ldef;
+else {
+ if (e < 3000) { //Compare to upper value
+ if(e != 1) //Compare to remaining value
+ goto ldef;
+ else
+ goto l2;
+ }
+ else
+ goto l1;
+}
+
+Instead we special case this to :
+
+if (e==1) goto l1;
+else if (e==3000) goto l2;
+else goto l3;
+
+This means we have:
+* Less comparisons for: 1,<3000
+* Unchanged for 3000
+* One more for >3000
+
+This improves code in a few ways:
+* One comparison less means smaller code which helps with cache.
+* It exchanges a taken jump for two jumps no taken in the >range case.
+ Jumps not taken are cheaper (See Agner guides) making this about as fast.
+* For all other cases the first range check is removed making it faster.
+
+The end result is that the change is not measurably slower for the case
+>3000 and faster for the other cases.
+
+This makes running this kind of match in an inner loop cheaper by 10-20%
+depending on the data.
+In nofib this improves wheel-sieve1 by 4-9% depending on problem
+size.
+
+We could also add a second conditional jump after the comparison to
+keep the range check like this:
+ cmp 3000, rArgument
+ jg <default>
+ je <branch 2>
+While this is fairly cheap it made no big difference for the >3000 case
+and slowed down all other cases making it not worthwhile.
+-}
+
+
+-- | Does the target support switch out of the box? Then leave this to the
+-- target!
+targetSupportsSwitch :: HscTarget -> Bool
+targetSupportsSwitch HscC = True
+targetSupportsSwitch HscLlvm = True
+targetSupportsSwitch _ = False
+
+-- | This function creates a SwitchPlan from a SwitchTargets value, breaking it
+-- down into smaller pieces suitable for code generation.
+createSwitchPlan :: SwitchTargets -> SwitchPlan
+-- Lets do the common case of a singleton map quickly and efficiently (#10677)
+createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m)
+ | [(x, l)] <- M.toList m
+ = IfEqual x l (Unconditionally defLabel)
+-- And another common case, matching "booleans"
+createSwitchPlan (SwitchTargets _signed (lo,hi) Nothing m)
+ | [(x1, l1), (_x2,l2)] <- M.toAscList m
+ --Checking If |range| = 2 is enough if we have two unique literals
+ , hi - lo == 1
+ = IfEqual x1 l1 (Unconditionally l2)
+-- See Note [Two alts + default]
+createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m)
+ | [(x1, l1), (x2,l2)] <- M.toAscList m
+ = IfEqual x1 l1 (IfEqual x2 l2 (Unconditionally defLabel))
+createSwitchPlan (SwitchTargets signed range mbdef m) =
+ -- pprTrace "createSwitchPlan" (text (show ids) $$ text (show (range,m)) $$ text (show pieces) $$ text (show flatPlan) $$ text (show plan)) $
+ plan
+ where
+ pieces = concatMap breakTooSmall $ splitAtHoles maxJumpTableHole m
+ flatPlan = findSingleValues $ mkFlatSwitchPlan signed mbdef range pieces
+ plan = buildTree signed $ flatPlan
+
+
+---
+--- Step 1: Splitting at large holes
+---
+splitAtHoles :: Integer -> M.Map Integer a -> [M.Map Integer a]
+splitAtHoles _ m | M.null m = []
+splitAtHoles holeSize m = map (\range -> restrictMap range m) nonHoles
+ where
+ holes = filter (\(l,h) -> h - l > holeSize) $ zip (M.keys m) (tail (M.keys m))
+ nonHoles = reassocTuples lo holes hi
+
+ (lo,_) = M.findMin m
+ (hi,_) = M.findMax m
+
+---
+--- Step 2: Avoid small jump tables
+---
+-- We do not want jump tables below a certain size. This breaks them up
+-- (into singleton maps, for now).
+breakTooSmall :: M.Map Integer a -> [M.Map Integer a]
+breakTooSmall m
+ | M.size m > minJumpTableSize = [m]
+ | otherwise = [M.singleton k v | (k,v) <- M.toList m]
+
+---
+--- Step 3: Fill in the blanks
+---
+
+-- | A FlatSwitchPlan is a list of SwitchPlans, with an integer inbetween every
+-- two entries, dividing the range.
+-- So if we have (abusing list syntax) [plan1,n,plan2], then we use plan1 if
+-- the expression is < n, and plan2 otherwise.
+
+type FlatSwitchPlan = SeparatedList Integer SwitchPlan
+
+mkFlatSwitchPlan :: Bool -> Maybe Label -> (Integer, Integer) -> [M.Map Integer Label] -> FlatSwitchPlan
+
+-- If we have no default (i.e. undefined where there is no entry), we can
+-- branch at the minimum of each map
+mkFlatSwitchPlan _ Nothing _ [] = pprPanic "mkFlatSwitchPlan with nothing left to do" empty
+mkFlatSwitchPlan signed Nothing _ (m:ms)
+ = (mkLeafPlan signed Nothing m , [ (fst (M.findMin m'), mkLeafPlan signed Nothing m') | m' <- ms ])
+
+-- If we have a default, we have to interleave segments that jump
+-- to the default between the maps
+mkFlatSwitchPlan signed (Just l) r ms = let ((_,p1):ps) = go r ms in (p1, ps)
+ where
+ go (lo,hi) []
+ | lo > hi = []
+ | otherwise = [(lo, Unconditionally l)]
+ go (lo,hi) (m:ms)
+ | lo < min
+ = (lo, Unconditionally l) : go (min,hi) (m:ms)
+ | lo == min
+ = (lo, mkLeafPlan signed (Just l) m) : go (max+1,hi) ms
+ | otherwise
+ = pprPanic "mkFlatSwitchPlan" (integer lo <+> integer min)
+ where
+ min = fst (M.findMin m)
+ max = fst (M.findMax m)
+
+
+mkLeafPlan :: Bool -> Maybe Label -> M.Map Integer Label -> SwitchPlan
+mkLeafPlan signed mbdef m
+ | [(_,l)] <- M.toList m -- singleton map
+ = Unconditionally l
+ | otherwise
+ = JumpTable $ mkSwitchTargets signed (min,max) mbdef m
+ where
+ min = fst (M.findMin m)
+ max = fst (M.findMax m)
+
+---
+--- Step 4: Reduce the number of branches using ==
+---
+
+-- A sequence of three unconditional jumps, with the outer two pointing to the
+-- same value and the bounds off by exactly one can be improved
+findSingleValues :: FlatSwitchPlan -> FlatSwitchPlan
+findSingleValues (Unconditionally l, (i, Unconditionally l2) : (i', Unconditionally l3) : xs)
+ | l == l3 && i + 1 == i'
+ = findSingleValues (IfEqual i l2 (Unconditionally l), xs)
+findSingleValues (p, (i,p'):xs)
+ = (p,i) `consSL` findSingleValues (p', xs)
+findSingleValues (p, [])
+ = (p, [])
+
+---
+--- Step 5: Actually build the tree
+---
+
+-- Build a balanced tree from a separated list
+buildTree :: Bool -> FlatSwitchPlan -> SwitchPlan
+buildTree _ (p,[]) = p
+buildTree signed sl = IfLT signed m (buildTree signed sl1) (buildTree signed sl2)
+ where
+ (sl1, m, sl2) = divideSL sl
+
+
+
+--
+-- Utility data type: Non-empty lists with extra markers in between each
+-- element:
+--
+
+type SeparatedList b a = (a, [(b,a)])
+
+consSL :: (a, b) -> SeparatedList b a -> SeparatedList b a
+consSL (a, b) (a', xs) = (a, (b,a'):xs)
+
+divideSL :: SeparatedList b a -> (SeparatedList b a, b, SeparatedList b a)
+divideSL (_,[]) = error "divideSL: Singleton SeparatedList"
+divideSL (p,xs) = ((p, xs1), m, (p', xs2))
+ where
+ (xs1, (m,p'):xs2) = splitAt (length xs `div` 2) xs
+
+--
+-- Other Utilities
+--
+
+restrictMap :: (Integer,Integer) -> M.Map Integer b -> M.Map Integer b
+restrictMap (lo,hi) m = mid
+ where (_, mid_hi) = M.split (lo-1) m
+ (mid, _) = M.split (hi+1) mid_hi
+
+-- for example: reassocTuples a [(b,c),(d,e)] f == [(a,b),(c,d),(e,f)]
+reassocTuples :: a -> [(a,a)] -> a -> [(a,a)]
+reassocTuples initial [] last
+ = [(initial,last)]
+reassocTuples initial ((a,b):tuples) last
+ = (initial,a) : reassocTuples b tuples last
+
+-- Note [GHC.Cmm.Switch vs. GHC.Cmm.Switch.Implement]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- I (Joachim) separated the two somewhat closely related modules
+--
+-- - GHC.Cmm.Switch, which provides the CmmSwitchTargets type and contains the strategy
+-- for implementing a Cmm switch (createSwitchPlan), and
+-- - GHC.Cmm.Switch.Implement, which contains the actual Cmm graph modification,
+--
+-- for these reasons:
+--
+-- * GHC.Cmm.Switch is very low in the dependency tree, i.e. does not depend on any
+-- GHC specific modules at all (with the exception of Output and
+-- GHC.Cmm.Dataflow (Literal)).
+-- * GHC.Cmm.Switch.Implement is the Cmm transformation and hence very high in
+-- the dependency tree.
+-- * GHC.Cmm.Switch provides the CmmSwitchTargets data type, which is abstract, but
+-- used in GHC.Cmm.Node.
+-- * Because GHC.Cmm.Switch is low in the dependency tree, the separation allows
+-- for more parallelism when building GHC.
+-- * The interaction between the modules is very explicit and easy to
+-- understand, due to the small and simple interface.
diff --git a/compiler/GHC/Cmm/Switch/Implement.hs b/compiler/GHC/Cmm/Switch/Implement.hs
new file mode 100644
index 0000000000..dfac116764
--- /dev/null
+++ b/compiler/GHC/Cmm/Switch/Implement.hs
@@ -0,0 +1,116 @@
+{-# LANGUAGE GADTs #-}
+module GHC.Cmm.Switch.Implement
+ ( cmmImplementSwitchPlans
+ )
+where
+
+import GhcPrelude
+
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.BlockId
+import GHC.Cmm
+import GHC.Cmm.Utils
+import GHC.Cmm.Switch
+import UniqSupply
+import DynFlags
+
+--
+-- This module replaces Switch statements as generated by the Stg -> Cmm
+-- transformation, which might be huge and sparse and hence unsuitable for
+-- assembly code, by proper constructs (if-then-else trees, dense jump tables).
+--
+-- The actual, abstract strategy is determined by createSwitchPlan in
+-- GHC.Cmm.Switch and returned as a SwitchPlan; here is just the implementation in
+-- terms of Cmm code. See Note [Cmm Switches, the general plan] in GHC.Cmm.Switch.
+--
+-- This division into different modules is both to clearly separate concerns,
+-- but also because createSwitchPlan needs access to the constructors of
+-- SwitchTargets, a data type exported abstractly by GHC.Cmm.Switch.
+--
+
+-- | Traverses the 'CmmGraph', making sure that 'CmmSwitch' are suitable for
+-- code generation.
+cmmImplementSwitchPlans :: DynFlags -> CmmGraph -> UniqSM CmmGraph
+cmmImplementSwitchPlans dflags g
+ -- Switch generation done by backend (LLVM/C)
+ | targetSupportsSwitch (hscTarget dflags) = return g
+ | otherwise = do
+ blocks' <- concat `fmap` mapM (visitSwitches dflags) (toBlockList g)
+ return $ ofBlockList (g_entry g) blocks'
+
+visitSwitches :: DynFlags -> CmmBlock -> UniqSM [CmmBlock]
+visitSwitches dflags block
+ | (entry@(CmmEntry _ scope), middle, CmmSwitch vanillaExpr ids) <- blockSplit block
+ = do
+ let plan = createSwitchPlan ids
+ -- See Note [Floating switch expressions]
+ (assignSimple, simpleExpr) <- floatSwitchExpr dflags vanillaExpr
+
+ (newTail, newBlocks) <- implementSwitchPlan dflags scope simpleExpr plan
+
+ let block' = entry `blockJoinHead` middle `blockAppend` assignSimple `blockAppend` newTail
+
+ return $ block' : newBlocks
+
+ | otherwise
+ = return [block]
+
+-- Note [Floating switch expressions]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+-- When we translate a sparse switch into a search tree we would like
+-- to compute the value we compare against only once.
+
+-- For this purpose we assign the switch expression to a local register
+-- and then use this register when constructing the actual binary tree.
+
+-- This is important as the expression could contain expensive code like
+-- memory loads or divisions which we REALLY don't want to duplicate.
+
+-- This happened in parts of the handwritten RTS Cmm code. See also #16933
+
+-- See Note [Floating switch expressions]
+floatSwitchExpr :: DynFlags -> CmmExpr -> UniqSM (Block CmmNode O O, CmmExpr)
+floatSwitchExpr _ reg@(CmmReg {}) = return (emptyBlock, reg)
+floatSwitchExpr dflags expr = do
+ (assign, expr') <- cmmMkAssign dflags expr <$> getUniqueM
+ return (BMiddle assign, expr')
+
+
+-- Implementing a switch plan (returning a tail block)
+implementSwitchPlan :: DynFlags -> CmmTickScope -> CmmExpr -> SwitchPlan -> UniqSM (Block CmmNode O C, [CmmBlock])
+implementSwitchPlan dflags scope expr = go
+ where
+ go (Unconditionally l)
+ = return (emptyBlock `blockJoinTail` CmmBranch l, [])
+ go (JumpTable ids)
+ = return (emptyBlock `blockJoinTail` CmmSwitch expr ids, [])
+ go (IfLT signed i ids1 ids2)
+ = do
+ (bid1, newBlocks1) <- go' ids1
+ (bid2, newBlocks2) <- go' ids2
+
+ let lt | signed = cmmSLtWord
+ | otherwise = cmmULtWord
+ scrut = lt dflags expr $ CmmLit $ mkWordCLit dflags i
+ lastNode = CmmCondBranch scrut bid1 bid2 Nothing
+ lastBlock = emptyBlock `blockJoinTail` lastNode
+ return (lastBlock, newBlocks1++newBlocks2)
+ go (IfEqual i l ids2)
+ = do
+ (bid2, newBlocks2) <- go' ids2
+
+ let scrut = cmmNeWord dflags expr $ CmmLit $ mkWordCLit dflags i
+ lastNode = CmmCondBranch scrut bid2 l Nothing
+ lastBlock = emptyBlock `blockJoinTail` lastNode
+ return (lastBlock, newBlocks2)
+
+ -- Same but returning a label to branch to
+ go' (Unconditionally l)
+ = return (l, [])
+ go' p
+ = do
+ bid <- mkBlockId `fmap` getUniqueM
+ (last, newBlocks) <- go p
+ let block = CmmEntry bid scope `blockJoinHead` last
+ return (bid, block: newBlocks)
diff --git a/compiler/GHC/Cmm/Type.hs b/compiler/GHC/Cmm/Type.hs
new file mode 100644
index 0000000000..867a260078
--- /dev/null
+++ b/compiler/GHC/Cmm/Type.hs
@@ -0,0 +1,432 @@
+module GHC.Cmm.Type
+ ( CmmType -- Abstract
+ , b8, b16, b32, b64, b128, b256, b512, f32, f64, bWord, bHalfWord, gcWord
+ , cInt
+ , cmmBits, cmmFloat
+ , typeWidth, cmmEqType, cmmEqType_ignoring_ptrhood
+ , isFloatType, isGcPtrType, isBitsType
+ , isWord32, isWord64, isFloat64, isFloat32
+
+ , Width(..)
+ , widthInBits, widthInBytes, widthInLog, widthFromBytes
+ , wordWidth, halfWordWidth, cIntWidth
+ , halfWordMask
+ , narrowU, narrowS
+ , rEP_CostCentreStack_mem_alloc
+ , rEP_CostCentreStack_scc_count
+ , rEP_StgEntCounter_allocs
+ , rEP_StgEntCounter_allocd
+
+ , ForeignHint(..)
+
+ , Length
+ , vec, vec2, vec4, vec8, vec16
+ , vec2f64, vec2b64, vec4f32, vec4b32, vec8b16, vec16b8
+ , cmmVec
+ , vecLength, vecElemType
+ , isVecType
+ )
+where
+
+
+import GhcPrelude
+
+import DynFlags
+import FastString
+import Outputable
+
+import Data.Word
+import Data.Int
+
+-----------------------------------------------------------------------------
+-- CmmType
+-----------------------------------------------------------------------------
+
+ -- NOTE: CmmType is an abstract type, not exported from this
+ -- module so you can easily change its representation
+ --
+ -- However Width is exported in a concrete way,
+ -- and is used extensively in pattern-matching
+
+data CmmType -- The important one!
+ = CmmType CmmCat Width
+
+data CmmCat -- "Category" (not exported)
+ = GcPtrCat -- GC pointer
+ | BitsCat -- Non-pointer
+ | FloatCat -- Float
+ | VecCat Length CmmCat -- Vector
+ deriving( Eq )
+ -- See Note [Signed vs unsigned] at the end
+
+instance Outputable CmmType where
+ ppr (CmmType cat wid) = ppr cat <> ppr (widthInBits wid)
+
+instance Outputable CmmCat where
+ ppr FloatCat = text "F"
+ ppr GcPtrCat = text "P"
+ ppr BitsCat = text "I"
+ ppr (VecCat n cat) = ppr cat <> text "x" <> ppr n <> text "V"
+
+-- Why is CmmType stratified? For native code generation,
+-- most of the time you just want to know what sort of register
+-- to put the thing in, and for this you need to know how
+-- many bits thing has, and whether it goes in a floating-point
+-- register. By contrast, the distinction between GcPtr and
+-- GcNonPtr is of interest to only a few parts of the code generator.
+
+-------- Equality on CmmType --------------
+-- CmmType is *not* an instance of Eq; sometimes we care about the
+-- Gc/NonGc distinction, and sometimes we don't
+-- So we use an explicit function to force you to think about it
+cmmEqType :: CmmType -> CmmType -> Bool -- Exact equality
+cmmEqType (CmmType c1 w1) (CmmType c2 w2) = c1==c2 && w1==w2
+
+cmmEqType_ignoring_ptrhood :: CmmType -> CmmType -> Bool
+ -- This equality is temporary; used in CmmLint
+ -- but the RTS files are not yet well-typed wrt pointers
+cmmEqType_ignoring_ptrhood (CmmType c1 w1) (CmmType c2 w2)
+ = c1 `weak_eq` c2 && w1==w2
+ where
+ weak_eq :: CmmCat -> CmmCat -> Bool
+ FloatCat `weak_eq` FloatCat = True
+ FloatCat `weak_eq` _other = False
+ _other `weak_eq` FloatCat = False
+ (VecCat l1 cat1) `weak_eq` (VecCat l2 cat2) = l1 == l2
+ && cat1 `weak_eq` cat2
+ (VecCat {}) `weak_eq` _other = False
+ _other `weak_eq` (VecCat {}) = False
+ _word1 `weak_eq` _word2 = True -- Ignores GcPtr
+
+--- Simple operations on CmmType -----
+typeWidth :: CmmType -> Width
+typeWidth (CmmType _ w) = w
+
+cmmBits, cmmFloat :: Width -> CmmType
+cmmBits = CmmType BitsCat
+cmmFloat = CmmType FloatCat
+
+-------- Common CmmTypes ------------
+-- Floats and words of specific widths
+b8, b16, b32, b64, b128, b256, b512, f32, f64 :: CmmType
+b8 = cmmBits W8
+b16 = cmmBits W16
+b32 = cmmBits W32
+b64 = cmmBits W64
+b128 = cmmBits W128
+b256 = cmmBits W256
+b512 = cmmBits W512
+f32 = cmmFloat W32
+f64 = cmmFloat W64
+
+-- CmmTypes of native word widths
+bWord :: DynFlags -> CmmType
+bWord dflags = cmmBits (wordWidth dflags)
+
+bHalfWord :: DynFlags -> CmmType
+bHalfWord dflags = cmmBits (halfWordWidth dflags)
+
+gcWord :: DynFlags -> CmmType
+gcWord dflags = CmmType GcPtrCat (wordWidth dflags)
+
+cInt :: DynFlags -> CmmType
+cInt dflags = cmmBits (cIntWidth dflags)
+
+------------ Predicates ----------------
+isFloatType, isGcPtrType, isBitsType :: CmmType -> Bool
+isFloatType (CmmType FloatCat _) = True
+isFloatType _other = False
+
+isGcPtrType (CmmType GcPtrCat _) = True
+isGcPtrType _other = False
+
+isBitsType (CmmType BitsCat _) = True
+isBitsType _ = False
+
+isWord32, isWord64, isFloat32, isFloat64 :: CmmType -> Bool
+-- isWord64 is true of 64-bit non-floats (both gc-ptrs and otherwise)
+-- isFloat32 and 64 are obvious
+
+isWord64 (CmmType BitsCat W64) = True
+isWord64 (CmmType GcPtrCat W64) = True
+isWord64 _other = False
+
+isWord32 (CmmType BitsCat W32) = True
+isWord32 (CmmType GcPtrCat W32) = True
+isWord32 _other = False
+
+isFloat32 (CmmType FloatCat W32) = True
+isFloat32 _other = False
+
+isFloat64 (CmmType FloatCat W64) = True
+isFloat64 _other = False
+
+-----------------------------------------------------------------------------
+-- Width
+-----------------------------------------------------------------------------
+
+data Width = W8 | W16 | W32 | W64
+ | W128
+ | W256
+ | W512
+ deriving (Eq, Ord, Show)
+
+instance Outputable Width where
+ ppr rep = ptext (mrStr rep)
+
+mrStr :: Width -> PtrString
+mrStr = sLit . show
+
+
+-------- Common Widths ------------
+wordWidth :: DynFlags -> Width
+wordWidth dflags
+ | wORD_SIZE dflags == 4 = W32
+ | wORD_SIZE dflags == 8 = W64
+ | otherwise = panic "MachOp.wordRep: Unknown word size"
+
+halfWordWidth :: DynFlags -> Width
+halfWordWidth dflags
+ | wORD_SIZE dflags == 4 = W16
+ | wORD_SIZE dflags == 8 = W32
+ | otherwise = panic "MachOp.halfWordRep: Unknown word size"
+
+halfWordMask :: DynFlags -> Integer
+halfWordMask dflags
+ | wORD_SIZE dflags == 4 = 0xFFFF
+ | wORD_SIZE dflags == 8 = 0xFFFFFFFF
+ | otherwise = panic "MachOp.halfWordMask: Unknown word size"
+
+-- cIntRep is the Width for a C-language 'int'
+cIntWidth :: DynFlags -> Width
+cIntWidth dflags = case cINT_SIZE dflags of
+ 4 -> W32
+ 8 -> W64
+ s -> panic ("cIntWidth: Unknown cINT_SIZE: " ++ show s)
+
+widthInBits :: Width -> Int
+widthInBits W8 = 8
+widthInBits W16 = 16
+widthInBits W32 = 32
+widthInBits W64 = 64
+widthInBits W128 = 128
+widthInBits W256 = 256
+widthInBits W512 = 512
+
+
+widthInBytes :: Width -> Int
+widthInBytes W8 = 1
+widthInBytes W16 = 2
+widthInBytes W32 = 4
+widthInBytes W64 = 8
+widthInBytes W128 = 16
+widthInBytes W256 = 32
+widthInBytes W512 = 64
+
+
+widthFromBytes :: Int -> Width
+widthFromBytes 1 = W8
+widthFromBytes 2 = W16
+widthFromBytes 4 = W32
+widthFromBytes 8 = W64
+widthFromBytes 16 = W128
+widthFromBytes 32 = W256
+widthFromBytes 64 = W512
+
+widthFromBytes n = pprPanic "no width for given number of bytes" (ppr n)
+
+-- log_2 of the width in bytes, useful for generating shifts.
+widthInLog :: Width -> Int
+widthInLog W8 = 0
+widthInLog W16 = 1
+widthInLog W32 = 2
+widthInLog W64 = 3
+widthInLog W128 = 4
+widthInLog W256 = 5
+widthInLog W512 = 6
+
+
+-- widening / narrowing
+
+narrowU :: Width -> Integer -> Integer
+narrowU W8 x = fromIntegral (fromIntegral x :: Word8)
+narrowU W16 x = fromIntegral (fromIntegral x :: Word16)
+narrowU W32 x = fromIntegral (fromIntegral x :: Word32)
+narrowU W64 x = fromIntegral (fromIntegral x :: Word64)
+narrowU _ _ = panic "narrowTo"
+
+narrowS :: Width -> Integer -> Integer
+narrowS W8 x = fromIntegral (fromIntegral x :: Int8)
+narrowS W16 x = fromIntegral (fromIntegral x :: Int16)
+narrowS W32 x = fromIntegral (fromIntegral x :: Int32)
+narrowS W64 x = fromIntegral (fromIntegral x :: Int64)
+narrowS _ _ = panic "narrowTo"
+
+-----------------------------------------------------------------------------
+-- SIMD
+-----------------------------------------------------------------------------
+
+type Length = Int
+
+vec :: Length -> CmmType -> CmmType
+vec l (CmmType cat w) = CmmType (VecCat l cat) vecw
+ where
+ vecw :: Width
+ vecw = widthFromBytes (l*widthInBytes w)
+
+vec2, vec4, vec8, vec16 :: CmmType -> CmmType
+vec2 = vec 2
+vec4 = vec 4
+vec8 = vec 8
+vec16 = vec 16
+
+vec2f64, vec2b64, vec4f32, vec4b32, vec8b16, vec16b8 :: CmmType
+vec2f64 = vec 2 f64
+vec2b64 = vec 2 b64
+vec4f32 = vec 4 f32
+vec4b32 = vec 4 b32
+vec8b16 = vec 8 b16
+vec16b8 = vec 16 b8
+
+cmmVec :: Int -> CmmType -> CmmType
+cmmVec n (CmmType cat w) =
+ CmmType (VecCat n cat) (widthFromBytes (n*widthInBytes w))
+
+vecLength :: CmmType -> Length
+vecLength (CmmType (VecCat l _) _) = l
+vecLength _ = panic "vecLength: not a vector"
+
+vecElemType :: CmmType -> CmmType
+vecElemType (CmmType (VecCat l cat) w) = CmmType cat scalw
+ where
+ scalw :: Width
+ scalw = widthFromBytes (widthInBytes w `div` l)
+vecElemType _ = panic "vecElemType: not a vector"
+
+isVecType :: CmmType -> Bool
+isVecType (CmmType (VecCat {}) _) = True
+isVecType _ = False
+
+-------------------------------------------------------------------------
+-- Hints
+
+-- Hints are extra type information we attach to the arguments and
+-- results of a foreign call, where more type information is sometimes
+-- needed by the ABI to make the correct kind of call.
+
+data ForeignHint
+ = NoHint | AddrHint | SignedHint
+ deriving( Eq )
+ -- Used to give extra per-argument or per-result
+ -- information needed by foreign calling conventions
+
+-------------------------------------------------------------------------
+
+-- These don't really belong here, but I don't know where is best to
+-- put them.
+
+rEP_CostCentreStack_mem_alloc :: DynFlags -> CmmType
+rEP_CostCentreStack_mem_alloc dflags
+ = cmmBits (widthFromBytes (pc_REP_CostCentreStack_mem_alloc pc))
+ where pc = platformConstants dflags
+
+rEP_CostCentreStack_scc_count :: DynFlags -> CmmType
+rEP_CostCentreStack_scc_count dflags
+ = cmmBits (widthFromBytes (pc_REP_CostCentreStack_scc_count pc))
+ where pc = platformConstants dflags
+
+rEP_StgEntCounter_allocs :: DynFlags -> CmmType
+rEP_StgEntCounter_allocs dflags
+ = cmmBits (widthFromBytes (pc_REP_StgEntCounter_allocs pc))
+ where pc = platformConstants dflags
+
+rEP_StgEntCounter_allocd :: DynFlags -> CmmType
+rEP_StgEntCounter_allocd dflags
+ = cmmBits (widthFromBytes (pc_REP_StgEntCounter_allocd pc))
+ where pc = platformConstants dflags
+
+-------------------------------------------------------------------------
+{- Note [Signed vs unsigned]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~
+Should a CmmType include a signed vs. unsigned distinction?
+
+This is very much like a "hint" in C-- terminology: it isn't necessary
+in order to generate correct code, but it might be useful in that the
+compiler can generate better code if it has access to higher-level
+hints about data. This is important at call boundaries, because the
+definition of a function is not visible at all of its call sites, so
+the compiler cannot infer the hints.
+
+Here in Cmm, we're taking a slightly different approach. We include
+the int vs. float hint in the CmmType, because (a) the majority of
+platforms have a strong distinction between float and int registers,
+and (b) we don't want to do any heavyweight hint-inference in the
+native code backend in order to get good code. We're treating the
+hint more like a type: our Cmm is always completely consistent with
+respect to hints. All coercions between float and int are explicit.
+
+What about the signed vs. unsigned hint? This information might be
+useful if we want to keep sub-word-sized values in word-size
+registers, which we must do if we only have word-sized registers.
+
+On such a system, there are two straightforward conventions for
+representing sub-word-sized values:
+
+(a) Leave the upper bits undefined. Comparison operations must
+ sign- or zero-extend both operands before comparing them,
+ depending on whether the comparison is signed or unsigned.
+
+(b) Always keep the values sign- or zero-extended as appropriate.
+ Arithmetic operations must narrow the result to the appropriate
+ size.
+
+A clever compiler might not use either (a) or (b) exclusively, instead
+it would attempt to minimize the coercions by analysis: the same kind
+of analysis that propagates hints around. In Cmm we don't want to
+have to do this, so we plump for having richer types and keeping the
+type information consistent.
+
+If signed/unsigned hints are missing from CmmType, then the only
+choice we have is (a), because we don't know whether the result of an
+operation should be sign- or zero-extended.
+
+Many architectures have extending load operations, which work well
+with (b). To make use of them with (a), you need to know whether the
+value is going to be sign- or zero-extended by an enclosing comparison
+(for example), which involves knowing above the context. This is
+doable but more complex.
+
+Further complicating the issue is foreign calls: a foreign calling
+convention can specify that signed 8-bit quantities are passed as
+sign-extended 32 bit quantities, for example (this is the case on the
+PowerPC). So we *do* need sign information on foreign call arguments.
+
+Pros for adding signed vs. unsigned to CmmType:
+
+ - It would let us use convention (b) above, and get easier
+ code generation for extending loads.
+
+ - Less information required on foreign calls.
+
+ - MachOp type would be simpler
+
+Cons:
+
+ - More complexity
+
+ - What is the CmmType for a VanillaReg? Currently it is
+ always wordRep, but now we have to decide whether it is
+ signed or unsigned. The same VanillaReg can thus have
+ different CmmType in different parts of the program.
+
+ - Extra coercions cluttering up expressions.
+
+Currently for GHC, the foreign call point is moot, because we do our
+own promotion of sub-word-sized values to word-sized values. The Int8
+type is represented by an Int# which is kept sign-extended at all times
+(this is slightly naughty, because we're making assumptions about the
+C calling convention rather early on in the compiler). However, given
+this, the cons outweigh the pros.
+
+-}
+
diff --git a/compiler/GHC/Cmm/Utils.hs b/compiler/GHC/Cmm/Utils.hs
new file mode 100644
index 0000000000..d879c7b82f
--- /dev/null
+++ b/compiler/GHC/Cmm/Utils.hs
@@ -0,0 +1,607 @@
+{-# LANGUAGE GADTs, RankNTypes #-}
+{-# LANGUAGE BangPatterns #-}
+
+-----------------------------------------------------------------------------
+--
+-- Cmm utilities.
+--
+-- (c) The University of Glasgow 2004-2006
+--
+-----------------------------------------------------------------------------
+
+module GHC.Cmm.Utils(
+ -- CmmType
+ primRepCmmType, slotCmmType, slotForeignHint,
+ typeCmmType, typeForeignHint, primRepForeignHint,
+
+ -- CmmLit
+ zeroCLit, mkIntCLit,
+ mkWordCLit, packHalfWordsCLit,
+ mkByteStringCLit,
+ mkDataLits, mkRODataLits,
+ mkStgWordCLit,
+
+ -- CmmExpr
+ mkIntExpr, zeroExpr,
+ mkLblExpr,
+ cmmRegOff, cmmOffset, cmmLabelOff, cmmOffsetLit, cmmOffsetExpr,
+ cmmRegOffB, cmmOffsetB, cmmLabelOffB, cmmOffsetLitB, cmmOffsetExprB,
+ cmmRegOffW, cmmOffsetW, cmmLabelOffW, cmmOffsetLitW, cmmOffsetExprW,
+ cmmIndex, cmmIndexExpr, cmmLoadIndex, cmmLoadIndexW,
+ cmmNegate,
+ cmmULtWord, cmmUGeWord, cmmUGtWord, cmmUShrWord,
+ cmmSLtWord,
+ cmmNeWord, cmmEqWord,
+ cmmOrWord, cmmAndWord,
+ cmmSubWord, cmmAddWord, cmmMulWord, cmmQuotWord,
+ cmmToWord,
+
+ cmmMkAssign,
+
+ isTrivialCmmExpr, hasNoGlobalRegs, isLit, isComparisonExpr,
+
+ baseExpr, spExpr, hpExpr, spLimExpr, hpLimExpr,
+ currentTSOExpr, currentNurseryExpr, cccsExpr,
+
+ -- Statics
+ blankWord,
+
+ -- Tagging
+ cmmTagMask, cmmPointerMask, cmmUntag, cmmIsTagged,
+ cmmConstrTag1,
+
+ -- Overlap and usage
+ regsOverlap, regUsedIn,
+
+ -- Liveness and bitmaps
+ mkLiveness,
+
+ -- * Operations that probably don't belong here
+ modifyGraph,
+
+ ofBlockMap, toBlockMap,
+ ofBlockList, toBlockList, bodyToBlockList,
+ toBlockListEntryFirst, toBlockListEntryFirstFalseFallthrough,
+ foldlGraphBlocks, mapGraphNodes, revPostorder, mapGraphNodes1,
+
+ -- * Ticks
+ blockTicks
+ ) where
+
+import GhcPrelude
+
+import TyCon ( PrimRep(..), PrimElemRep(..) )
+import GHC.Types.RepType ( UnaryType, SlotTy (..), typePrimRep1 )
+
+import GHC.Runtime.Layout
+import GHC.Cmm
+import GHC.Cmm.BlockId
+import GHC.Cmm.CLabel
+import Outputable
+import DynFlags
+import Unique
+import GHC.Platform.Regs
+
+import Data.ByteString (ByteString)
+import qualified Data.ByteString as BS
+import Data.Bits
+import GHC.Cmm.Dataflow.Graph
+import GHC.Cmm.Dataflow.Label
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Collections
+
+---------------------------------------------------
+--
+-- CmmTypes
+--
+---------------------------------------------------
+
+primRepCmmType :: DynFlags -> PrimRep -> CmmType
+primRepCmmType _ VoidRep = panic "primRepCmmType:VoidRep"
+primRepCmmType dflags LiftedRep = gcWord dflags
+primRepCmmType dflags UnliftedRep = gcWord dflags
+primRepCmmType dflags IntRep = bWord dflags
+primRepCmmType dflags WordRep = bWord dflags
+primRepCmmType _ Int8Rep = b8
+primRepCmmType _ Word8Rep = b8
+primRepCmmType _ Int16Rep = b16
+primRepCmmType _ Word16Rep = b16
+primRepCmmType _ Int32Rep = b32
+primRepCmmType _ Word32Rep = b32
+primRepCmmType _ Int64Rep = b64
+primRepCmmType _ Word64Rep = b64
+primRepCmmType dflags AddrRep = bWord dflags
+primRepCmmType _ FloatRep = f32
+primRepCmmType _ DoubleRep = f64
+primRepCmmType _ (VecRep len rep) = vec len (primElemRepCmmType rep)
+
+slotCmmType :: DynFlags -> SlotTy -> CmmType
+slotCmmType dflags PtrSlot = gcWord dflags
+slotCmmType dflags WordSlot = bWord dflags
+slotCmmType _ Word64Slot = b64
+slotCmmType _ FloatSlot = f32
+slotCmmType _ DoubleSlot = f64
+
+primElemRepCmmType :: PrimElemRep -> CmmType
+primElemRepCmmType Int8ElemRep = b8
+primElemRepCmmType Int16ElemRep = b16
+primElemRepCmmType Int32ElemRep = b32
+primElemRepCmmType Int64ElemRep = b64
+primElemRepCmmType Word8ElemRep = b8
+primElemRepCmmType Word16ElemRep = b16
+primElemRepCmmType Word32ElemRep = b32
+primElemRepCmmType Word64ElemRep = b64
+primElemRepCmmType FloatElemRep = f32
+primElemRepCmmType DoubleElemRep = f64
+
+typeCmmType :: DynFlags -> UnaryType -> CmmType
+typeCmmType dflags ty = primRepCmmType dflags (typePrimRep1 ty)
+
+primRepForeignHint :: PrimRep -> ForeignHint
+primRepForeignHint VoidRep = panic "primRepForeignHint:VoidRep"
+primRepForeignHint LiftedRep = AddrHint
+primRepForeignHint UnliftedRep = AddrHint
+primRepForeignHint IntRep = SignedHint
+primRepForeignHint Int8Rep = SignedHint
+primRepForeignHint Int16Rep = SignedHint
+primRepForeignHint Int32Rep = SignedHint
+primRepForeignHint Int64Rep = SignedHint
+primRepForeignHint WordRep = NoHint
+primRepForeignHint Word8Rep = NoHint
+primRepForeignHint Word16Rep = NoHint
+primRepForeignHint Word32Rep = NoHint
+primRepForeignHint Word64Rep = NoHint
+primRepForeignHint AddrRep = AddrHint -- NB! AddrHint, but NonPtrArg
+primRepForeignHint FloatRep = NoHint
+primRepForeignHint DoubleRep = NoHint
+primRepForeignHint (VecRep {}) = NoHint
+
+slotForeignHint :: SlotTy -> ForeignHint
+slotForeignHint PtrSlot = AddrHint
+slotForeignHint WordSlot = NoHint
+slotForeignHint Word64Slot = NoHint
+slotForeignHint FloatSlot = NoHint
+slotForeignHint DoubleSlot = NoHint
+
+typeForeignHint :: UnaryType -> ForeignHint
+typeForeignHint = primRepForeignHint . typePrimRep1
+
+---------------------------------------------------
+--
+-- CmmLit
+--
+---------------------------------------------------
+
+-- XXX: should really be Integer, since Int doesn't necessarily cover
+-- the full range of target Ints.
+mkIntCLit :: DynFlags -> Int -> CmmLit
+mkIntCLit dflags i = CmmInt (toInteger i) (wordWidth dflags)
+
+mkIntExpr :: DynFlags -> Int -> CmmExpr
+mkIntExpr dflags i = CmmLit $! mkIntCLit dflags i
+
+zeroCLit :: DynFlags -> CmmLit
+zeroCLit dflags = CmmInt 0 (wordWidth dflags)
+
+zeroExpr :: DynFlags -> CmmExpr
+zeroExpr dflags = CmmLit (zeroCLit dflags)
+
+mkWordCLit :: DynFlags -> Integer -> CmmLit
+mkWordCLit dflags wd = CmmInt wd (wordWidth dflags)
+
+mkByteStringCLit
+ :: CLabel -> ByteString -> (CmmLit, GenCmmDecl CmmStatics info stmt)
+-- We have to make a top-level decl for the string,
+-- and return a literal pointing to it
+mkByteStringCLit lbl bytes
+ = (CmmLabel lbl, CmmData (Section sec lbl) $ Statics lbl [CmmString bytes])
+ where
+ -- This can not happen for String literals (as there \NUL is replaced by
+ -- C0 80). However, it can happen with Addr# literals.
+ sec = if 0 `BS.elem` bytes then ReadOnlyData else CString
+
+mkDataLits :: Section -> CLabel -> [CmmLit] -> GenCmmDecl CmmStatics info stmt
+-- Build a data-segment data block
+mkDataLits section lbl lits
+ = CmmData section (Statics lbl $ map CmmStaticLit lits)
+
+mkRODataLits :: CLabel -> [CmmLit] -> GenCmmDecl CmmStatics info stmt
+-- Build a read-only data block
+mkRODataLits lbl lits
+ = mkDataLits section lbl lits
+ where
+ section | any needsRelocation lits = Section RelocatableReadOnlyData lbl
+ | otherwise = Section ReadOnlyData lbl
+ needsRelocation (CmmLabel _) = True
+ needsRelocation (CmmLabelOff _ _) = True
+ needsRelocation _ = False
+
+mkStgWordCLit :: DynFlags -> StgWord -> CmmLit
+mkStgWordCLit dflags wd = CmmInt (fromStgWord wd) (wordWidth dflags)
+
+packHalfWordsCLit :: DynFlags -> StgHalfWord -> StgHalfWord -> CmmLit
+-- Make a single word literal in which the lower_half_word is
+-- at the lower address, and the upper_half_word is at the
+-- higher address
+-- ToDo: consider using half-word lits instead
+-- but be careful: that's vulnerable when reversed
+packHalfWordsCLit dflags lower_half_word upper_half_word
+ = if wORDS_BIGENDIAN dflags
+ then mkWordCLit dflags ((l `shiftL` halfWordSizeInBits dflags) .|. u)
+ else mkWordCLit dflags (l .|. (u `shiftL` halfWordSizeInBits dflags))
+ where l = fromStgHalfWord lower_half_word
+ u = fromStgHalfWord upper_half_word
+
+---------------------------------------------------
+--
+-- CmmExpr
+--
+---------------------------------------------------
+
+mkLblExpr :: CLabel -> CmmExpr
+mkLblExpr lbl = CmmLit (CmmLabel lbl)
+
+cmmOffsetExpr :: DynFlags -> CmmExpr -> CmmExpr -> CmmExpr
+-- assumes base and offset have the same CmmType
+cmmOffsetExpr dflags e (CmmLit (CmmInt n _)) = cmmOffset dflags e (fromInteger n)
+cmmOffsetExpr dflags e byte_off = CmmMachOp (MO_Add (cmmExprWidth dflags e)) [e, byte_off]
+
+cmmOffset :: DynFlags -> CmmExpr -> Int -> CmmExpr
+cmmOffset _ e 0 = e
+cmmOffset _ (CmmReg reg) byte_off = cmmRegOff reg byte_off
+cmmOffset _ (CmmRegOff reg m) byte_off = cmmRegOff reg (m+byte_off)
+cmmOffset _ (CmmLit lit) byte_off = CmmLit (cmmOffsetLit lit byte_off)
+cmmOffset _ (CmmStackSlot area off) byte_off
+ = CmmStackSlot area (off - byte_off)
+ -- note stack area offsets increase towards lower addresses
+cmmOffset _ (CmmMachOp (MO_Add rep) [expr, CmmLit (CmmInt byte_off1 _rep)]) byte_off2
+ = CmmMachOp (MO_Add rep)
+ [expr, CmmLit (CmmInt (byte_off1 + toInteger byte_off2) rep)]
+cmmOffset dflags expr byte_off
+ = CmmMachOp (MO_Add width) [expr, CmmLit (CmmInt (toInteger byte_off) width)]
+ where
+ width = cmmExprWidth dflags expr
+
+-- Smart constructor for CmmRegOff. Same caveats as cmmOffset above.
+cmmRegOff :: CmmReg -> Int -> CmmExpr
+cmmRegOff reg 0 = CmmReg reg
+cmmRegOff reg byte_off = CmmRegOff reg byte_off
+
+cmmOffsetLit :: CmmLit -> Int -> CmmLit
+cmmOffsetLit (CmmLabel l) byte_off = cmmLabelOff l byte_off
+cmmOffsetLit (CmmLabelOff l m) byte_off = cmmLabelOff l (m+byte_off)
+cmmOffsetLit (CmmLabelDiffOff l1 l2 m w) byte_off
+ = CmmLabelDiffOff l1 l2 (m+byte_off) w
+cmmOffsetLit (CmmInt m rep) byte_off = CmmInt (m + fromIntegral byte_off) rep
+cmmOffsetLit _ byte_off = pprPanic "cmmOffsetLit" (ppr byte_off)
+
+cmmLabelOff :: CLabel -> Int -> CmmLit
+-- Smart constructor for CmmLabelOff
+cmmLabelOff lbl 0 = CmmLabel lbl
+cmmLabelOff lbl byte_off = CmmLabelOff lbl byte_off
+
+-- | Useful for creating an index into an array, with a statically known offset.
+-- The type is the element type; used for making the multiplier
+cmmIndex :: DynFlags
+ -> Width -- Width w
+ -> CmmExpr -- Address of vector of items of width w
+ -> Int -- Which element of the vector (0 based)
+ -> CmmExpr -- Address of i'th element
+cmmIndex dflags width base idx = cmmOffset dflags base (idx * widthInBytes width)
+
+-- | Useful for creating an index into an array, with an unknown offset.
+cmmIndexExpr :: DynFlags
+ -> Width -- Width w
+ -> CmmExpr -- Address of vector of items of width w
+ -> CmmExpr -- Which element of the vector (0 based)
+ -> CmmExpr -- Address of i'th element
+cmmIndexExpr dflags width base (CmmLit (CmmInt n _)) = cmmIndex dflags width base (fromInteger n)
+cmmIndexExpr dflags width base idx =
+ cmmOffsetExpr dflags base byte_off
+ where
+ idx_w = cmmExprWidth dflags idx
+ byte_off = CmmMachOp (MO_Shl idx_w) [idx, mkIntExpr dflags (widthInLog width)]
+
+cmmLoadIndex :: DynFlags -> CmmType -> CmmExpr -> Int -> CmmExpr
+cmmLoadIndex dflags ty expr ix = CmmLoad (cmmIndex dflags (typeWidth ty) expr ix) ty
+
+-- The "B" variants take byte offsets
+cmmRegOffB :: CmmReg -> ByteOff -> CmmExpr
+cmmRegOffB = cmmRegOff
+
+cmmOffsetB :: DynFlags -> CmmExpr -> ByteOff -> CmmExpr
+cmmOffsetB = cmmOffset
+
+cmmOffsetExprB :: DynFlags -> CmmExpr -> CmmExpr -> CmmExpr
+cmmOffsetExprB = cmmOffsetExpr
+
+cmmLabelOffB :: CLabel -> ByteOff -> CmmLit
+cmmLabelOffB = cmmLabelOff
+
+cmmOffsetLitB :: CmmLit -> ByteOff -> CmmLit
+cmmOffsetLitB = cmmOffsetLit
+
+-----------------------
+-- The "W" variants take word offsets
+
+cmmOffsetExprW :: DynFlags -> CmmExpr -> CmmExpr -> CmmExpr
+-- The second arg is a *word* offset; need to change it to bytes
+cmmOffsetExprW dflags e (CmmLit (CmmInt n _)) = cmmOffsetW dflags e (fromInteger n)
+cmmOffsetExprW dflags e wd_off = cmmIndexExpr dflags (wordWidth dflags) e wd_off
+
+cmmOffsetW :: DynFlags -> CmmExpr -> WordOff -> CmmExpr
+cmmOffsetW dflags e n = cmmOffsetB dflags e (wordsToBytes dflags n)
+
+cmmRegOffW :: DynFlags -> CmmReg -> WordOff -> CmmExpr
+cmmRegOffW dflags reg wd_off = cmmRegOffB reg (wordsToBytes dflags wd_off)
+
+cmmOffsetLitW :: DynFlags -> CmmLit -> WordOff -> CmmLit
+cmmOffsetLitW dflags lit wd_off = cmmOffsetLitB lit (wordsToBytes dflags wd_off)
+
+cmmLabelOffW :: DynFlags -> CLabel -> WordOff -> CmmLit
+cmmLabelOffW dflags lbl wd_off = cmmLabelOffB lbl (wordsToBytes dflags wd_off)
+
+cmmLoadIndexW :: DynFlags -> CmmExpr -> Int -> CmmType -> CmmExpr
+cmmLoadIndexW dflags base off ty = CmmLoad (cmmOffsetW dflags base off) ty
+
+-----------------------
+cmmULtWord, cmmUGeWord, cmmUGtWord, cmmUShrWord,
+ cmmSLtWord,
+ cmmNeWord, cmmEqWord,
+ cmmOrWord, cmmAndWord,
+ cmmSubWord, cmmAddWord, cmmMulWord, cmmQuotWord
+ :: DynFlags -> CmmExpr -> CmmExpr -> CmmExpr
+cmmOrWord dflags e1 e2 = CmmMachOp (mo_wordOr dflags) [e1, e2]
+cmmAndWord dflags e1 e2 = CmmMachOp (mo_wordAnd dflags) [e1, e2]
+cmmNeWord dflags e1 e2 = CmmMachOp (mo_wordNe dflags) [e1, e2]
+cmmEqWord dflags e1 e2 = CmmMachOp (mo_wordEq dflags) [e1, e2]
+cmmULtWord dflags e1 e2 = CmmMachOp (mo_wordULt dflags) [e1, e2]
+cmmUGeWord dflags e1 e2 = CmmMachOp (mo_wordUGe dflags) [e1, e2]
+cmmUGtWord dflags e1 e2 = CmmMachOp (mo_wordUGt dflags) [e1, e2]
+cmmSLtWord dflags e1 e2 = CmmMachOp (mo_wordSLt dflags) [e1, e2]
+cmmUShrWord dflags e1 e2 = CmmMachOp (mo_wordUShr dflags) [e1, e2]
+cmmAddWord dflags e1 e2 = CmmMachOp (mo_wordAdd dflags) [e1, e2]
+cmmSubWord dflags e1 e2 = CmmMachOp (mo_wordSub dflags) [e1, e2]
+cmmMulWord dflags e1 e2 = CmmMachOp (mo_wordMul dflags) [e1, e2]
+cmmQuotWord dflags e1 e2 = CmmMachOp (mo_wordUQuot dflags) [e1, e2]
+
+cmmNegate :: DynFlags -> CmmExpr -> CmmExpr
+cmmNegate _ (CmmLit (CmmInt n rep)) = CmmLit (CmmInt (-n) rep)
+cmmNegate dflags e = CmmMachOp (MO_S_Neg (cmmExprWidth dflags e)) [e]
+
+blankWord :: DynFlags -> CmmStatic
+blankWord dflags = CmmUninitialised (wORD_SIZE dflags)
+
+cmmToWord :: DynFlags -> CmmExpr -> CmmExpr
+cmmToWord dflags e
+ | w == word = e
+ | otherwise = CmmMachOp (MO_UU_Conv w word) [e]
+ where
+ w = cmmExprWidth dflags e
+ word = wordWidth dflags
+
+cmmMkAssign :: DynFlags -> CmmExpr -> Unique -> (CmmNode O O, CmmExpr)
+cmmMkAssign dflags expr uq =
+ let !ty = cmmExprType dflags expr
+ reg = (CmmLocal (LocalReg uq ty))
+ in (CmmAssign reg expr, CmmReg reg)
+
+
+---------------------------------------------------
+--
+-- CmmExpr predicates
+--
+---------------------------------------------------
+
+isTrivialCmmExpr :: CmmExpr -> Bool
+isTrivialCmmExpr (CmmLoad _ _) = False
+isTrivialCmmExpr (CmmMachOp _ _) = False
+isTrivialCmmExpr (CmmLit _) = True
+isTrivialCmmExpr (CmmReg _) = True
+isTrivialCmmExpr (CmmRegOff _ _) = True
+isTrivialCmmExpr (CmmStackSlot _ _) = panic "isTrivialCmmExpr CmmStackSlot"
+
+hasNoGlobalRegs :: CmmExpr -> Bool
+hasNoGlobalRegs (CmmLoad e _) = hasNoGlobalRegs e
+hasNoGlobalRegs (CmmMachOp _ es) = all hasNoGlobalRegs es
+hasNoGlobalRegs (CmmLit _) = True
+hasNoGlobalRegs (CmmReg (CmmLocal _)) = True
+hasNoGlobalRegs (CmmRegOff (CmmLocal _) _) = True
+hasNoGlobalRegs _ = False
+
+isLit :: CmmExpr -> Bool
+isLit (CmmLit _) = True
+isLit _ = False
+
+isComparisonExpr :: CmmExpr -> Bool
+isComparisonExpr (CmmMachOp op _) = isComparisonMachOp op
+isComparisonExpr _ = False
+
+---------------------------------------------------
+--
+-- Tagging
+--
+---------------------------------------------------
+
+-- Tag bits mask
+cmmTagMask, cmmPointerMask :: DynFlags -> CmmExpr
+cmmTagMask dflags = mkIntExpr dflags (tAG_MASK dflags)
+cmmPointerMask dflags = mkIntExpr dflags (complement (tAG_MASK dflags))
+
+-- Used to untag a possibly tagged pointer
+-- A static label need not be untagged
+cmmUntag, cmmIsTagged, cmmConstrTag1 :: DynFlags -> CmmExpr -> CmmExpr
+cmmUntag _ e@(CmmLit (CmmLabel _)) = e
+-- Default case
+cmmUntag dflags e = cmmAndWord dflags e (cmmPointerMask dflags)
+
+-- Test if a closure pointer is untagged
+cmmIsTagged dflags e = cmmNeWord dflags (cmmAndWord dflags e (cmmTagMask dflags)) (zeroExpr dflags)
+
+-- Get constructor tag, but one based.
+cmmConstrTag1 dflags e = cmmAndWord dflags e (cmmTagMask dflags)
+
+
+-----------------------------------------------------------------------------
+-- Overlap and usage
+
+-- | Returns True if the two STG registers overlap on the specified
+-- platform, in the sense that writing to one will clobber the
+-- other. This includes the case that the two registers are the same
+-- STG register. See Note [Overlapping global registers] for details.
+regsOverlap :: DynFlags -> CmmReg -> CmmReg -> Bool
+regsOverlap dflags (CmmGlobal g) (CmmGlobal g')
+ | Just real <- globalRegMaybe (targetPlatform dflags) g,
+ Just real' <- globalRegMaybe (targetPlatform dflags) g',
+ real == real'
+ = True
+regsOverlap _ reg reg' = reg == reg'
+
+-- | Returns True if the STG register is used by the expression, in
+-- the sense that a store to the register might affect the value of
+-- the expression.
+--
+-- We must check for overlapping registers and not just equal
+-- registers here, otherwise CmmSink may incorrectly reorder
+-- assignments that conflict due to overlap. See #10521 and Note
+-- [Overlapping global registers].
+regUsedIn :: DynFlags -> CmmReg -> CmmExpr -> Bool
+regUsedIn dflags = regUsedIn_ where
+ _ `regUsedIn_` CmmLit _ = False
+ reg `regUsedIn_` CmmLoad e _ = reg `regUsedIn_` e
+ reg `regUsedIn_` CmmReg reg' = regsOverlap dflags reg reg'
+ reg `regUsedIn_` CmmRegOff reg' _ = regsOverlap dflags reg reg'
+ reg `regUsedIn_` CmmMachOp _ es = any (reg `regUsedIn_`) es
+ _ `regUsedIn_` CmmStackSlot _ _ = False
+
+--------------------------------------------
+--
+-- mkLiveness
+--
+---------------------------------------------
+
+mkLiveness :: DynFlags -> [LocalReg] -> Liveness
+mkLiveness _ [] = []
+mkLiveness dflags (reg:regs)
+ = bits ++ mkLiveness dflags regs
+ where
+ sizeW = (widthInBytes (typeWidth (localRegType reg)) + wORD_SIZE dflags - 1)
+ `quot` wORD_SIZE dflags
+ -- number of words, rounded up
+ bits = replicate sizeW is_non_ptr -- True <=> Non Ptr
+
+ is_non_ptr = not $ isGcPtrType (localRegType reg)
+
+
+-- ============================================== -
+-- ============================================== -
+-- ============================================== -
+
+---------------------------------------------------
+--
+-- Manipulating CmmGraphs
+--
+---------------------------------------------------
+
+modifyGraph :: (Graph n C C -> Graph n' C C) -> GenCmmGraph n -> GenCmmGraph n'
+modifyGraph f g = CmmGraph {g_entry=g_entry g, g_graph=f (g_graph g)}
+
+toBlockMap :: CmmGraph -> LabelMap CmmBlock
+toBlockMap (CmmGraph {g_graph=GMany NothingO body NothingO}) = body
+
+ofBlockMap :: BlockId -> LabelMap CmmBlock -> CmmGraph
+ofBlockMap entry bodyMap = CmmGraph {g_entry=entry, g_graph=GMany NothingO bodyMap NothingO}
+
+toBlockList :: CmmGraph -> [CmmBlock]
+toBlockList g = mapElems $ toBlockMap g
+
+-- | like 'toBlockList', but the entry block always comes first
+toBlockListEntryFirst :: CmmGraph -> [CmmBlock]
+toBlockListEntryFirst g
+ | mapNull m = []
+ | otherwise = entry_block : others
+ where
+ m = toBlockMap g
+ entry_id = g_entry g
+ Just entry_block = mapLookup entry_id m
+ others = filter ((/= entry_id) . entryLabel) (mapElems m)
+
+-- | Like 'toBlockListEntryFirst', but we strive to ensure that we order blocks
+-- so that the false case of a conditional jumps to the next block in the output
+-- list of blocks. This matches the way OldCmm blocks were output since in
+-- OldCmm the false case was a fallthrough, whereas in Cmm conditional branches
+-- have both true and false successors. Block ordering can make a big difference
+-- in performance in the LLVM backend. Note that we rely crucially on the order
+-- of successors returned for CmmCondBranch by the NonLocal instance for CmmNode
+-- defined in cmm/CmmNode.hs. -GBM
+toBlockListEntryFirstFalseFallthrough :: CmmGraph -> [CmmBlock]
+toBlockListEntryFirstFalseFallthrough g
+ | mapNull m = []
+ | otherwise = dfs setEmpty [entry_block]
+ where
+ m = toBlockMap g
+ entry_id = g_entry g
+ Just entry_block = mapLookup entry_id m
+
+ dfs :: LabelSet -> [CmmBlock] -> [CmmBlock]
+ dfs _ [] = []
+ dfs visited (block:bs)
+ | id `setMember` visited = dfs visited bs
+ | otherwise = block : dfs (setInsert id visited) bs'
+ where id = entryLabel block
+ bs' = foldr add_id bs (successors block)
+ add_id id bs = case mapLookup id m of
+ Just b -> b : bs
+ Nothing -> bs
+
+ofBlockList :: BlockId -> [CmmBlock] -> CmmGraph
+ofBlockList entry blocks = CmmGraph { g_entry = entry
+ , g_graph = GMany NothingO body NothingO }
+ where body = foldr addBlock emptyBody blocks
+
+bodyToBlockList :: Body CmmNode -> [CmmBlock]
+bodyToBlockList body = mapElems body
+
+mapGraphNodes :: ( CmmNode C O -> CmmNode C O
+ , CmmNode O O -> CmmNode O O
+ , CmmNode O C -> CmmNode O C)
+ -> CmmGraph -> CmmGraph
+mapGraphNodes funs@(mf,_,_) g =
+ ofBlockMap (entryLabel $ mf $ CmmEntry (g_entry g) GlobalScope) $
+ mapMap (mapBlock3' funs) $ toBlockMap g
+
+mapGraphNodes1 :: (forall e x. CmmNode e x -> CmmNode e x) -> CmmGraph -> CmmGraph
+mapGraphNodes1 f = modifyGraph (mapGraph f)
+
+
+foldlGraphBlocks :: (a -> CmmBlock -> a) -> a -> CmmGraph -> a
+foldlGraphBlocks k z g = mapFoldl k z $ toBlockMap g
+
+revPostorder :: CmmGraph -> [CmmBlock]
+revPostorder g = {-# SCC "revPostorder" #-}
+ revPostorderFrom (toBlockMap g) (g_entry g)
+
+-------------------------------------------------
+-- Tick utilities
+
+-- | Extract all tick annotations from the given block
+blockTicks :: Block CmmNode C C -> [CmmTickish]
+blockTicks b = reverse $ foldBlockNodesF goStmt b []
+ where goStmt :: CmmNode e x -> [CmmTickish] -> [CmmTickish]
+ goStmt (CmmTick t) ts = t:ts
+ goStmt _other ts = ts
+
+
+-- -----------------------------------------------------------------------------
+-- Access to common global registers
+
+baseExpr, spExpr, hpExpr, currentTSOExpr, currentNurseryExpr,
+ spLimExpr, hpLimExpr, cccsExpr :: CmmExpr
+baseExpr = CmmReg baseReg
+spExpr = CmmReg spReg
+spLimExpr = CmmReg spLimReg
+hpExpr = CmmReg hpReg
+hpLimExpr = CmmReg hpLimReg
+currentTSOExpr = CmmReg currentTSOReg
+currentNurseryExpr = CmmReg currentNurseryReg
+cccsExpr = CmmReg cccsReg
diff --git a/compiler/GHC/Cmm/cmm-notes b/compiler/GHC/Cmm/cmm-notes
new file mode 100644
index 0000000000..d664a195b7
--- /dev/null
+++ b/compiler/GHC/Cmm/cmm-notes
@@ -0,0 +1,184 @@
+More notes (Aug 11)
+~~~~~~~~~~~~~~~~~~
+* CmmInfo.cmmToRawCmm expands info tables to their representations
+ (needed for .cmm files as well as the code generators)
+
+* Why is FCode a lazy monad? That makes it inefficient.
+ We want laziness to get code out one procedure at a time,
+ but not at the instruction level.
+ UPDATE (31/5/2016): FCode is strict since 09afcc9b.
+
+Things we did
+ * Remove CmmCvt.graphToZgraph (Conversion from old to new Cmm reps)
+ * Remove HscMain.optionallyConvertAndOrCPS (converted old Cmm to
+ new, ran pipeline, and converted back)
+ * Remove CmmDecl. Put its types in Cmm. Import Cmm into OldCmm
+ so it can get those types.
+
+
+More notes (June 11)
+~~~~~~~~~~~~~~~~~~~~
+
+* In CmmContFlowOpt.branchChainElim, can a single block be the
+ successor of two calls?
+
+* Check in ClosureInfo:
+ -- NB: Results here should line up with the results of SMRep.rtsClosureType
+
+More notes (May 11)
+~~~~~~~~~~~~~~~~~~~
+In CmmNode, consider splitting CmmCall into two: call and jump
+
+Notes on new codegen (Aug 10)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Things to do:
+ - Proc points pass all arguments on the stack, adding more code and
+ slowing down things a lot. We either need to fix this or even better
+ would be to get rid of proc points.
+
+ - Sort out Label, LabelMap, LabelSet versus BlockId, BlockEnv, BlockSet
+ dichotomy. Mostly this means global replace, but we also need to make
+ Label an instance of Outputable (probably in the Outputable module).
+
+ EZY: We should use Label, since that's the terminology Hoopl uses.
+
+ - AsmCodeGen has a generic Cmm optimiser; move this into new pipeline
+ EZY (2011-04-16): The mini-inliner has been generalized and ported,
+ but the constant folding and other optimizations need to still be
+ ported.
+
+ - AsmCodeGen has post-native-cg branch eliminator (shortCutBranches);
+ we ultimately want to share this with the Cmm branch eliminator.
+
+ - At the moment, references to global registers like Hp are "lowered"
+ late (in CgUtils.fixStgRegisters). We should do this early, in the
+ new native codegen, much in the way that we lower calling conventions.
+ Might need to be a bit sophisticated about aliasing.
+
+ - Move to new Cmm rep:
+ * Make native CG consume New Cmm;
+ * Convert Old Cmm->New Cmm to keep old path alive
+ * Produce New Cmm when reading in .cmm files
+
+ - Top-level SRT threading is a bit ugly
+
+ - See "CAFs" below; we want to totally refactor the way SRTs are calculated
+
+ - Garbage-collect https://gitlab.haskell.org/ghc/ghc/wikis/commentary/compiler/cps
+ moving good stuff into
+ https://gitlab.haskell.org/ghc/ghc/wikis/commentary/compiler/new-code-gen-pipeline
+
+ - Currently AsmCodeGen top level calls AsmCodeGen.cmmToCmm, which is a small
+ C-- optimiser. It has quite a lot of boilerplate folding code in AsmCodeGen
+ (cmmBlockConFold, cmmStmtConFold, cmmExprConFold), before calling out to
+ CmmOpt. ToDo: see what optimisations are being done; and do them before
+ AsmCodeGen.
+
+ - If we stick CAF and stack liveness info on a LastCall node (not LastRet/Jump)
+ then all CAF and stack liveness stuff be completed before we split
+ into separate C procedures.
+
+ Short term:
+ compute and attach liveness into LastCall
+ right at end, split, cvt to old rep
+ [must split before cvt, because old rep is not expressive enough]
+
+ Longer term:
+ when old rep disappears,
+ move the whole splitting game into the C back end *only*
+ (guided by the procpoint set)
+
+----------------------------------------------------
+ Proc-points
+----------------------------------------------------
+
+Consider this program, which has a diamond control flow,
+with a call on one branch
+ fn(p,x) {
+ h()
+ if b then { ... f(x) ...; q=5; goto J }
+ else { ...; q=7; goto J }
+ J: ..p...q...
+ }
+then the join point J is a "proc-point". So, is 'p' passed to J
+as a parameter? Or, if 'p' was saved on the stack anyway, perhaps
+to keep it alive across the call to h(), maybe 'p' gets communicated
+to J that way. This is an awkward choice. (We think that we currently
+never pass variables to join points via arguments.)
+
+Furthermore, there is *no way* to pass q to J in a register (other
+than a parameter register).
+
+What we want is to do register allocation across the whole caboodle.
+Then we could drop all the code that deals with the above awkward
+decisions about spilling variables across proc-points.
+
+Note that J doesn't need an info table.
+
+What we really want is for each LastCall (not LastJump/Ret)
+to have an info table. Note that ProcPoints that are not successors
+of calls don't need an info table.
+
+Figuring out proc-points
+~~~~~~~~~~~~~~~~~~~~~~~~
+Proc-points are identified by
+GHC.Cmm.ProcPoint.minimalProcPointSet/extendPPSet Although there isn't
+that much code, JD thinks that it could be done much more nicely using
+a dominator analysis, using the Dataflow Engine.
+
+----------------------------------------------------
+ CAFs
+----------------------------------------------------
+
+* The code for a procedure f may refer to either the *closure*
+ or the *entry point* of another top-level procedure g.
+ If f is live, then so is g. f's SRT must include g's closure.
+
+* The CLabel for the entry-point/closure reveals whether g is
+ a CAF (or refers to CAFs). See the IdLabel constructor of CLabel.
+
+* The CAF-ness of the original top-level definitions is figured out
+ (by GHC.Iface.Tidy) before we generate C--. This CafInfo is only set for
+ top-level Ids; nested bindings stay with MayHaveCafRefs.
+
+* Currently an SRT contains (only) pointers to (top-level) closures.
+
+* Consider this Core code
+ f = \x -> let g = \y -> ...x...y...h1...
+ in ...h2...g...
+ and suppose that h1, h2 have IdInfo of MayHaveCafRefs.
+ Therefore, so will f, But g will not (since it's nested).
+
+ This generates C-- roughly like this:
+ f_closure: .word f_entry
+ f_entry() [info-tbl-for-f] { ...jump g_entry...jump h2... }
+ g_entry() [info-tbl-for-g] { ...jump h1... }
+
+ Note that there is no top-level closure for g (only an info table).
+ This fact (whether or not there is a top-level closure) is recorded
+ in the InfoTable attached to the CmmProc for f, g
+ INVARIANT:
+ Any out-of-Group references to an IdLabel goes to
+ a Proc whose InfoTable says "I have a top-level closure".
+ Equivalently:
+ A CmmProc whose InfoTable says "I do not have a top-level
+ closure" is referred to only from its own Group.
+
+* So: info-tbl-for-f must have an SRT that keeps h1,h2 alive
+ info-tbl-for-g must have an SRT that keeps h1 (only) alive
+
+ But if we just look for the free CAF refs, we get:
+ f h2 (only)
+ g h1
+
+ So we need to do a transitive closure thing to flesh out
+ f's keep-alive refs to include h1.
+
+* The SRT info is the C_SRT field of Cmm.ClosureTypeInfo in a
+ CmmInfoTable attached to each CmmProc. CmmPipeline.toTops actually does
+ the attaching, right at the end of the pipeline. The C_SRT part
+ gives offsets within a single, shared table of closure pointers.
+
+* DECIDED: we can generate SRTs based on the final Cmm program
+ without knowledge of how it is generated.