diff options
Diffstat (limited to 'compiler/GHC/Cmm')
36 files changed, 16609 insertions, 0 deletions
diff --git a/compiler/GHC/Cmm/BlockId.hs b/compiler/GHC/Cmm/BlockId.hs new file mode 100644 index 0000000000..f7f369551b --- /dev/null +++ b/compiler/GHC/Cmm/BlockId.hs @@ -0,0 +1,46 @@ +{-# LANGUAGE TypeSynonymInstances #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} + +{- BlockId module should probably go away completely, being superseded by Label -} +module GHC.Cmm.BlockId + ( BlockId, mkBlockId -- ToDo: BlockId should be abstract, but it isn't yet + , newBlockId + , blockLbl, infoTblLbl + ) where + +import GhcPrelude + +import GHC.Cmm.CLabel +import IdInfo +import Name +import Unique +import UniqSupply + +import GHC.Cmm.Dataflow.Label (Label, mkHooplLabel) + +---------------------------------------------------------------- +--- Block Ids, their environments, and their sets + +{- Note [Unique BlockId] +~~~~~~~~~~~~~~~~~~~~~~~~ +Although a 'BlockId' is a local label, for reasons of implementation, +'BlockId's must be unique within an entire compilation unit. The reason +is that each local label is mapped to an assembly-language label, and in +most assembly languages allow, a label is visible throughout the entire +compilation unit in which it appears. +-} + +type BlockId = Label + +mkBlockId :: Unique -> BlockId +mkBlockId unique = mkHooplLabel $ getKey unique + +newBlockId :: MonadUnique m => m BlockId +newBlockId = mkBlockId <$> getUniqueM + +blockLbl :: BlockId -> CLabel +blockLbl label = mkLocalBlockLabel (getUnique label) + +infoTblLbl :: BlockId -> CLabel +infoTblLbl label + = mkBlockInfoTableLabel (mkFCallName (getUnique label) "block") NoCafRefs diff --git a/compiler/GHC/Cmm/BlockId.hs-boot b/compiler/GHC/Cmm/BlockId.hs-boot new file mode 100644 index 0000000000..76fd6180a9 --- /dev/null +++ b/compiler/GHC/Cmm/BlockId.hs-boot @@ -0,0 +1,8 @@ +module GHC.Cmm.BlockId (BlockId, mkBlockId) where + +import GHC.Cmm.Dataflow.Label (Label) +import Unique (Unique) + +type BlockId = Label + +mkBlockId :: Unique -> BlockId diff --git a/compiler/GHC/Cmm/CLabel.hs b/compiler/GHC/Cmm/CLabel.hs new file mode 100644 index 0000000000..e84278bf65 --- /dev/null +++ b/compiler/GHC/Cmm/CLabel.hs @@ -0,0 +1,1571 @@ +----------------------------------------------------------------------------- +-- +-- Object-file symbols (called CLabel for histerical raisins). +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +{-# LANGUAGE CPP #-} + +module GHC.Cmm.CLabel ( + CLabel, -- abstract type + ForeignLabelSource(..), + pprDebugCLabel, + + mkClosureLabel, + mkSRTLabel, + mkInfoTableLabel, + mkEntryLabel, + mkRednCountsLabel, + mkConInfoTableLabel, + mkApEntryLabel, + mkApInfoTableLabel, + mkClosureTableLabel, + mkBytesLabel, + + mkLocalBlockLabel, + mkLocalClosureLabel, + mkLocalInfoTableLabel, + mkLocalClosureTableLabel, + + mkBlockInfoTableLabel, + + mkBitmapLabel, + mkStringLitLabel, + + mkAsmTempLabel, + mkAsmTempDerivedLabel, + mkAsmTempEndLabel, + mkAsmTempDieLabel, + + mkDirty_MUT_VAR_Label, + mkNonmovingWriteBarrierEnabledLabel, + mkUpdInfoLabel, + mkBHUpdInfoLabel, + mkIndStaticInfoLabel, + mkMainCapabilityLabel, + mkMAP_FROZEN_CLEAN_infoLabel, + mkMAP_FROZEN_DIRTY_infoLabel, + mkMAP_DIRTY_infoLabel, + mkSMAP_FROZEN_CLEAN_infoLabel, + mkSMAP_FROZEN_DIRTY_infoLabel, + mkSMAP_DIRTY_infoLabel, + mkBadAlignmentLabel, + mkArrWords_infoLabel, + mkSRTInfoLabel, + + mkTopTickyCtrLabel, + mkCAFBlackHoleInfoTableLabel, + mkRtsPrimOpLabel, + mkRtsSlowFastTickyCtrLabel, + + mkSelectorInfoLabel, + mkSelectorEntryLabel, + + mkCmmInfoLabel, + mkCmmEntryLabel, + mkCmmRetInfoLabel, + mkCmmRetLabel, + mkCmmCodeLabel, + mkCmmDataLabel, + mkCmmClosureLabel, + + mkRtsApFastLabel, + + mkPrimCallLabel, + + mkForeignLabel, + addLabelSize, + + foreignLabelStdcallInfo, + isBytesLabel, + isForeignLabel, + isSomeRODataLabel, + isStaticClosureLabel, + mkCCLabel, mkCCSLabel, + + DynamicLinkerLabelInfo(..), + mkDynamicLinkerLabel, + dynamicLinkerLabelInfo, + + mkPicBaseLabel, + mkDeadStripPreventer, + + mkHpcTicksLabel, + + -- * Predicates + hasCAF, + needsCDecl, maybeLocalBlockLabel, externallyVisibleCLabel, + isMathFun, + isCFunctionLabel, isGcPtrLabel, labelDynamic, + isLocalCLabel, mayRedirectTo, + + -- * Conversions + toClosureLbl, toSlowEntryLbl, toEntryLbl, toInfoLbl, hasHaskellName, + + pprCLabel, + isInfoTableLabel, + isConInfoTableLabel + ) where + +#include "HsVersions.h" + +import GhcPrelude + +import IdInfo +import BasicTypes +import {-# SOURCE #-} GHC.Cmm.BlockId (BlockId, mkBlockId) +import Packages +import Module +import Name +import Unique +import PrimOp +import CostCentre +import Outputable +import FastString +import DynFlags +import GHC.Platform +import UniqSet +import Util +import PprCore ( {- instances -} ) + +-- ----------------------------------------------------------------------------- +-- The CLabel type + +{- | + 'CLabel' is an abstract type that supports the following operations: + + - Pretty printing + + - In a C file, does it need to be declared before use? (i.e. is it + guaranteed to be already in scope in the places we need to refer to it?) + + - If it needs to be declared, what type (code or data) should it be + declared to have? + + - Is it visible outside this object file or not? + + - Is it "dynamic" (see details below) + + - Eq and Ord, so that we can make sets of CLabels (currently only + used in outputting C as far as I can tell, to avoid generating + more than one declaration for any given label). + + - Converting an info table label into an entry label. + + CLabel usage is a bit messy in GHC as they are used in a number of different + contexts: + + - By the C-- AST to identify labels + + - By the unregisterised C code generator ("PprC") for naming functions (hence + the name 'CLabel') + + - By the native and LLVM code generators to identify labels + + For extra fun, each of these uses a slightly different subset of constructors + (e.g. 'AsmTempLabel' and 'AsmTempDerivedLabel' are used only in the NCG and + LLVM backends). + + In general, we use 'IdLabel' to represent Haskell things early in the + pipeline. However, later optimization passes will often represent blocks they + create with 'LocalBlockLabel' where there is no obvious 'Name' to hang off the + label. +-} + +data CLabel + = -- | A label related to the definition of a particular Id or Con in a .hs file. + IdLabel + Name + CafInfo + IdLabelInfo -- encodes the suffix of the label + + -- | A label from a .cmm file that is not associated with a .hs level Id. + | CmmLabel + UnitId -- what package the label belongs to. + FastString -- identifier giving the prefix of the label + CmmLabelInfo -- encodes the suffix of the label + + -- | A label with a baked-in \/ algorithmically generated name that definitely + -- comes from the RTS. The code for it must compile into libHSrts.a \/ libHSrts.so + -- If it doesn't have an algorithmically generated name then use a CmmLabel + -- instead and give it an appropriate UnitId argument. + | RtsLabel + RtsLabelInfo + + -- | A label associated with a block. These aren't visible outside of the + -- compilation unit in which they are defined. These are generally used to + -- name blocks produced by Cmm-to-Cmm passes and the native code generator, + -- where we don't have a 'Name' to associate the label to and therefore can't + -- use 'IdLabel'. + | LocalBlockLabel + {-# UNPACK #-} !Unique + + -- | A 'C' (or otherwise foreign) label. + -- + | ForeignLabel + FastString -- name of the imported label. + + (Maybe Int) -- possible '@n' suffix for stdcall functions + -- When generating C, the '@n' suffix is omitted, but when + -- generating assembler we must add it to the label. + + ForeignLabelSource -- what package the foreign label is in. + + FunctionOrData + + -- | Local temporary label used for native (or LLVM) code generation; must not + -- appear outside of these contexts. Use primarily for debug information + | AsmTempLabel + {-# UNPACK #-} !Unique + + -- | A label \"derived\" from another 'CLabel' by the addition of a suffix. + -- Must not occur outside of the NCG or LLVM code generators. + | AsmTempDerivedLabel + CLabel + FastString -- suffix + + | StringLitLabel + {-# UNPACK #-} !Unique + + | CC_Label CostCentre + | CCS_Label CostCentreStack + + + -- | These labels are generated and used inside the NCG only. + -- They are special variants of a label used for dynamic linking + -- see module PositionIndependentCode for details. + | DynamicLinkerLabel DynamicLinkerLabelInfo CLabel + + -- | This label is generated and used inside the NCG only. + -- It is used as a base for PIC calculations on some platforms. + -- It takes the form of a local numeric assembler label '1'; and + -- is pretty-printed as 1b, referring to the previous definition + -- of 1: in the assembler source file. + | PicBaseLabel + + -- | A label before an info table to prevent excessive dead-stripping on darwin + | DeadStripPreventer CLabel + + + -- | Per-module table of tick locations + | HpcTicksLabel Module + + -- | Static reference table + | SRTLabel + {-# UNPACK #-} !Unique + + -- | A bitmap (function or case return) + | LargeBitmapLabel + {-# UNPACK #-} !Unique + + deriving Eq + +-- This is laborious, but necessary. We can't derive Ord because +-- Unique doesn't have an Ord instance. Note nonDetCmpUnique in the +-- implementation. See Note [No Ord for Unique] +-- This is non-deterministic but we do not currently support deterministic +-- code-generation. See Note [Unique Determinism and code generation] +instance Ord CLabel where + compare (IdLabel a1 b1 c1) (IdLabel a2 b2 c2) = + compare a1 a2 `thenCmp` + compare b1 b2 `thenCmp` + compare c1 c2 + compare (CmmLabel a1 b1 c1) (CmmLabel a2 b2 c2) = + compare a1 a2 `thenCmp` + compare b1 b2 `thenCmp` + compare c1 c2 + compare (RtsLabel a1) (RtsLabel a2) = compare a1 a2 + compare (LocalBlockLabel u1) (LocalBlockLabel u2) = nonDetCmpUnique u1 u2 + compare (ForeignLabel a1 b1 c1 d1) (ForeignLabel a2 b2 c2 d2) = + compare a1 a2 `thenCmp` + compare b1 b2 `thenCmp` + compare c1 c2 `thenCmp` + compare d1 d2 + compare (AsmTempLabel u1) (AsmTempLabel u2) = nonDetCmpUnique u1 u2 + compare (AsmTempDerivedLabel a1 b1) (AsmTempDerivedLabel a2 b2) = + compare a1 a2 `thenCmp` + compare b1 b2 + compare (StringLitLabel u1) (StringLitLabel u2) = + nonDetCmpUnique u1 u2 + compare (CC_Label a1) (CC_Label a2) = + compare a1 a2 + compare (CCS_Label a1) (CCS_Label a2) = + compare a1 a2 + compare (DynamicLinkerLabel a1 b1) (DynamicLinkerLabel a2 b2) = + compare a1 a2 `thenCmp` + compare b1 b2 + compare PicBaseLabel PicBaseLabel = EQ + compare (DeadStripPreventer a1) (DeadStripPreventer a2) = + compare a1 a2 + compare (HpcTicksLabel a1) (HpcTicksLabel a2) = + compare a1 a2 + compare (SRTLabel u1) (SRTLabel u2) = + nonDetCmpUnique u1 u2 + compare (LargeBitmapLabel u1) (LargeBitmapLabel u2) = + nonDetCmpUnique u1 u2 + compare IdLabel{} _ = LT + compare _ IdLabel{} = GT + compare CmmLabel{} _ = LT + compare _ CmmLabel{} = GT + compare RtsLabel{} _ = LT + compare _ RtsLabel{} = GT + compare LocalBlockLabel{} _ = LT + compare _ LocalBlockLabel{} = GT + compare ForeignLabel{} _ = LT + compare _ ForeignLabel{} = GT + compare AsmTempLabel{} _ = LT + compare _ AsmTempLabel{} = GT + compare AsmTempDerivedLabel{} _ = LT + compare _ AsmTempDerivedLabel{} = GT + compare StringLitLabel{} _ = LT + compare _ StringLitLabel{} = GT + compare CC_Label{} _ = LT + compare _ CC_Label{} = GT + compare CCS_Label{} _ = LT + compare _ CCS_Label{} = GT + compare DynamicLinkerLabel{} _ = LT + compare _ DynamicLinkerLabel{} = GT + compare PicBaseLabel{} _ = LT + compare _ PicBaseLabel{} = GT + compare DeadStripPreventer{} _ = LT + compare _ DeadStripPreventer{} = GT + compare HpcTicksLabel{} _ = LT + compare _ HpcTicksLabel{} = GT + compare SRTLabel{} _ = LT + compare _ SRTLabel{} = GT + +-- | Record where a foreign label is stored. +data ForeignLabelSource + + -- | Label is in a named package + = ForeignLabelInPackage UnitId + + -- | Label is in some external, system package that doesn't also + -- contain compiled Haskell code, and is not associated with any .hi files. + -- We don't have to worry about Haskell code being inlined from + -- external packages. It is safe to treat the RTS package as "external". + | ForeignLabelInExternalPackage + + -- | Label is in the package currently being compiled. + -- This is only used for creating hacky tmp labels during code generation. + -- Don't use it in any code that might be inlined across a package boundary + -- (ie, core code) else the information will be wrong relative to the + -- destination module. + | ForeignLabelInThisPackage + + deriving (Eq, Ord) + + +-- | For debugging problems with the CLabel representation. +-- We can't make a Show instance for CLabel because lots of its components don't have instances. +-- The regular Outputable instance only shows the label name, and not its other info. +-- +pprDebugCLabel :: CLabel -> SDoc +pprDebugCLabel lbl + = case lbl of + IdLabel _ _ info-> ppr lbl <> (parens $ text "IdLabel" + <> whenPprDebug (text ":" <> text (show info))) + CmmLabel pkg _name _info + -> ppr lbl <> (parens $ text "CmmLabel" <+> ppr pkg) + + RtsLabel{} -> ppr lbl <> (parens $ text "RtsLabel") + + ForeignLabel _name mSuffix src funOrData + -> ppr lbl <> (parens $ text "ForeignLabel" + <+> ppr mSuffix + <+> ppr src + <+> ppr funOrData) + + _ -> ppr lbl <> (parens $ text "other CLabel") + + +data IdLabelInfo + = Closure -- ^ Label for closure + | InfoTable -- ^ Info tables for closures; always read-only + | Entry -- ^ Entry point + | Slow -- ^ Slow entry point + + | LocalInfoTable -- ^ Like InfoTable but not externally visible + | LocalEntry -- ^ Like Entry but not externally visible + + | RednCounts -- ^ Label of place to keep Ticky-ticky info for this Id + + | ConEntry -- ^ Constructor entry point + | ConInfoTable -- ^ Corresponding info table + + | ClosureTable -- ^ Table of closures for Enum tycons + + | Bytes -- ^ Content of a string literal. See + -- Note [Bytes label]. + | BlockInfoTable -- ^ Like LocalInfoTable but for a proc-point block + -- instead of a closure entry-point. + -- See Note [Proc-point local block entry-point]. + + deriving (Eq, Ord, Show) + + +data RtsLabelInfo + = RtsSelectorInfoTable Bool{-updatable-} Int{-offset-} -- ^ Selector thunks + | RtsSelectorEntry Bool{-updatable-} Int{-offset-} + + | RtsApInfoTable Bool{-updatable-} Int{-arity-} -- ^ AP thunks + | RtsApEntry Bool{-updatable-} Int{-arity-} + + | RtsPrimOp PrimOp + | RtsApFast FastString -- ^ _fast versions of generic apply + | RtsSlowFastTickyCtr String + + deriving (Eq, Ord) + -- NOTE: Eq on PtrString compares the pointer only, so this isn't + -- a real equality. + + +-- | What type of Cmm label we're dealing with. +-- Determines the suffix appended to the name when a CLabel.CmmLabel +-- is pretty printed. +data CmmLabelInfo + = CmmInfo -- ^ misc rts info tables, suffix _info + | CmmEntry -- ^ misc rts entry points, suffix _entry + | CmmRetInfo -- ^ misc rts ret info tables, suffix _info + | CmmRet -- ^ misc rts return points, suffix _ret + | CmmData -- ^ misc rts data bits, eg CHARLIKE_closure + | CmmCode -- ^ misc rts code + | CmmClosure -- ^ closures eg CHARLIKE_closure + | CmmPrimCall -- ^ a prim call to some hand written Cmm code + deriving (Eq, Ord) + +data DynamicLinkerLabelInfo + = CodeStub -- MachO: Lfoo$stub, ELF: foo@plt + | SymbolPtr -- MachO: Lfoo$non_lazy_ptr, Windows: __imp_foo + | GotSymbolPtr -- ELF: foo@got + | GotSymbolOffset -- ELF: foo@gotoff + + deriving (Eq, Ord) + + +-- ----------------------------------------------------------------------------- +-- Constructing CLabels +-- ----------------------------------------------------------------------------- + +-- Constructing IdLabels +-- These are always local: + +mkSRTLabel :: Unique -> CLabel +mkSRTLabel u = SRTLabel u + +mkRednCountsLabel :: Name -> CLabel +mkRednCountsLabel name = + IdLabel name NoCafRefs RednCounts -- Note [ticky for LNE] + +-- These have local & (possibly) external variants: +mkLocalClosureLabel :: Name -> CafInfo -> CLabel +mkLocalInfoTableLabel :: Name -> CafInfo -> CLabel +mkLocalClosureTableLabel :: Name -> CafInfo -> CLabel +mkLocalClosureLabel name c = IdLabel name c Closure +mkLocalInfoTableLabel name c = IdLabel name c LocalInfoTable +mkLocalClosureTableLabel name c = IdLabel name c ClosureTable + +mkClosureLabel :: Name -> CafInfo -> CLabel +mkInfoTableLabel :: Name -> CafInfo -> CLabel +mkEntryLabel :: Name -> CafInfo -> CLabel +mkClosureTableLabel :: Name -> CafInfo -> CLabel +mkConInfoTableLabel :: Name -> CafInfo -> CLabel +mkBytesLabel :: Name -> CLabel +mkClosureLabel name c = IdLabel name c Closure +mkInfoTableLabel name c = IdLabel name c InfoTable +mkEntryLabel name c = IdLabel name c Entry +mkClosureTableLabel name c = IdLabel name c ClosureTable +mkConInfoTableLabel name c = IdLabel name c ConInfoTable +mkBytesLabel name = IdLabel name NoCafRefs Bytes + +mkBlockInfoTableLabel :: Name -> CafInfo -> CLabel +mkBlockInfoTableLabel name c = IdLabel name c BlockInfoTable + -- See Note [Proc-point local block entry-point]. + +-- Constructing Cmm Labels +mkDirty_MUT_VAR_Label, + mkNonmovingWriteBarrierEnabledLabel, + mkUpdInfoLabel, + mkBHUpdInfoLabel, mkIndStaticInfoLabel, mkMainCapabilityLabel, + mkMAP_FROZEN_CLEAN_infoLabel, mkMAP_FROZEN_DIRTY_infoLabel, + mkMAP_DIRTY_infoLabel, + mkArrWords_infoLabel, + mkTopTickyCtrLabel, + mkCAFBlackHoleInfoTableLabel, + mkSMAP_FROZEN_CLEAN_infoLabel, mkSMAP_FROZEN_DIRTY_infoLabel, + mkSMAP_DIRTY_infoLabel, mkBadAlignmentLabel :: CLabel +mkDirty_MUT_VAR_Label = mkForeignLabel (fsLit "dirty_MUT_VAR") Nothing ForeignLabelInExternalPackage IsFunction +mkNonmovingWriteBarrierEnabledLabel + = CmmLabel rtsUnitId (fsLit "nonmoving_write_barrier_enabled") CmmData +mkUpdInfoLabel = CmmLabel rtsUnitId (fsLit "stg_upd_frame") CmmInfo +mkBHUpdInfoLabel = CmmLabel rtsUnitId (fsLit "stg_bh_upd_frame" ) CmmInfo +mkIndStaticInfoLabel = CmmLabel rtsUnitId (fsLit "stg_IND_STATIC") CmmInfo +mkMainCapabilityLabel = CmmLabel rtsUnitId (fsLit "MainCapability") CmmData +mkMAP_FROZEN_CLEAN_infoLabel = CmmLabel rtsUnitId (fsLit "stg_MUT_ARR_PTRS_FROZEN_CLEAN") CmmInfo +mkMAP_FROZEN_DIRTY_infoLabel = CmmLabel rtsUnitId (fsLit "stg_MUT_ARR_PTRS_FROZEN_DIRTY") CmmInfo +mkMAP_DIRTY_infoLabel = CmmLabel rtsUnitId (fsLit "stg_MUT_ARR_PTRS_DIRTY") CmmInfo +mkTopTickyCtrLabel = CmmLabel rtsUnitId (fsLit "top_ct") CmmData +mkCAFBlackHoleInfoTableLabel = CmmLabel rtsUnitId (fsLit "stg_CAF_BLACKHOLE") CmmInfo +mkArrWords_infoLabel = CmmLabel rtsUnitId (fsLit "stg_ARR_WORDS") CmmInfo +mkSMAP_FROZEN_CLEAN_infoLabel = CmmLabel rtsUnitId (fsLit "stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN") CmmInfo +mkSMAP_FROZEN_DIRTY_infoLabel = CmmLabel rtsUnitId (fsLit "stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY") CmmInfo +mkSMAP_DIRTY_infoLabel = CmmLabel rtsUnitId (fsLit "stg_SMALL_MUT_ARR_PTRS_DIRTY") CmmInfo +mkBadAlignmentLabel = CmmLabel rtsUnitId (fsLit "stg_badAlignment") CmmEntry + +mkSRTInfoLabel :: Int -> CLabel +mkSRTInfoLabel n = CmmLabel rtsUnitId lbl CmmInfo + where + lbl = + case n of + 1 -> fsLit "stg_SRT_1" + 2 -> fsLit "stg_SRT_2" + 3 -> fsLit "stg_SRT_3" + 4 -> fsLit "stg_SRT_4" + 5 -> fsLit "stg_SRT_5" + 6 -> fsLit "stg_SRT_6" + 7 -> fsLit "stg_SRT_7" + 8 -> fsLit "stg_SRT_8" + 9 -> fsLit "stg_SRT_9" + 10 -> fsLit "stg_SRT_10" + 11 -> fsLit "stg_SRT_11" + 12 -> fsLit "stg_SRT_12" + 13 -> fsLit "stg_SRT_13" + 14 -> fsLit "stg_SRT_14" + 15 -> fsLit "stg_SRT_15" + 16 -> fsLit "stg_SRT_16" + _ -> panic "mkSRTInfoLabel" + +----- +mkCmmInfoLabel, mkCmmEntryLabel, mkCmmRetInfoLabel, mkCmmRetLabel, + mkCmmCodeLabel, mkCmmDataLabel, mkCmmClosureLabel + :: UnitId -> FastString -> CLabel + +mkCmmInfoLabel pkg str = CmmLabel pkg str CmmInfo +mkCmmEntryLabel pkg str = CmmLabel pkg str CmmEntry +mkCmmRetInfoLabel pkg str = CmmLabel pkg str CmmRetInfo +mkCmmRetLabel pkg str = CmmLabel pkg str CmmRet +mkCmmCodeLabel pkg str = CmmLabel pkg str CmmCode +mkCmmDataLabel pkg str = CmmLabel pkg str CmmData +mkCmmClosureLabel pkg str = CmmLabel pkg str CmmClosure + +mkLocalBlockLabel :: Unique -> CLabel +mkLocalBlockLabel u = LocalBlockLabel u + +-- Constructing RtsLabels +mkRtsPrimOpLabel :: PrimOp -> CLabel +mkRtsPrimOpLabel primop = RtsLabel (RtsPrimOp primop) + +mkSelectorInfoLabel :: Bool -> Int -> CLabel +mkSelectorEntryLabel :: Bool -> Int -> CLabel +mkSelectorInfoLabel upd off = RtsLabel (RtsSelectorInfoTable upd off) +mkSelectorEntryLabel upd off = RtsLabel (RtsSelectorEntry upd off) + +mkApInfoTableLabel :: Bool -> Int -> CLabel +mkApEntryLabel :: Bool -> Int -> CLabel +mkApInfoTableLabel upd off = RtsLabel (RtsApInfoTable upd off) +mkApEntryLabel upd off = RtsLabel (RtsApEntry upd off) + + +-- A call to some primitive hand written Cmm code +mkPrimCallLabel :: PrimCall -> CLabel +mkPrimCallLabel (PrimCall str pkg) + = CmmLabel pkg str CmmPrimCall + + +-- Constructing ForeignLabels + +-- | Make a foreign label +mkForeignLabel + :: FastString -- name + -> Maybe Int -- size prefix + -> ForeignLabelSource -- what package it's in + -> FunctionOrData + -> CLabel + +mkForeignLabel = ForeignLabel + + +-- | Update the label size field in a ForeignLabel +addLabelSize :: CLabel -> Int -> CLabel +addLabelSize (ForeignLabel str _ src fod) sz + = ForeignLabel str (Just sz) src fod +addLabelSize label _ + = label + +-- | Whether label is a top-level string literal +isBytesLabel :: CLabel -> Bool +isBytesLabel (IdLabel _ _ Bytes) = True +isBytesLabel _lbl = False + +-- | Whether label is a non-haskell label (defined in C code) +isForeignLabel :: CLabel -> Bool +isForeignLabel (ForeignLabel _ _ _ _) = True +isForeignLabel _lbl = False + +-- | Whether label is a static closure label (can come from haskell or cmm) +isStaticClosureLabel :: CLabel -> Bool +-- Closure defined in haskell (.hs) +isStaticClosureLabel (IdLabel _ _ Closure) = True +-- Closure defined in cmm +isStaticClosureLabel (CmmLabel _ _ CmmClosure) = True +isStaticClosureLabel _lbl = False + +-- | Whether label is a .rodata label +isSomeRODataLabel :: CLabel -> Bool +-- info table defined in haskell (.hs) +isSomeRODataLabel (IdLabel _ _ ClosureTable) = True +isSomeRODataLabel (IdLabel _ _ ConInfoTable) = True +isSomeRODataLabel (IdLabel _ _ InfoTable) = True +isSomeRODataLabel (IdLabel _ _ LocalInfoTable) = True +isSomeRODataLabel (IdLabel _ _ BlockInfoTable) = True +-- info table defined in cmm (.cmm) +isSomeRODataLabel (CmmLabel _ _ CmmInfo) = True +isSomeRODataLabel _lbl = False + +-- | Whether label is points to some kind of info table +isInfoTableLabel :: CLabel -> Bool +isInfoTableLabel (IdLabel _ _ InfoTable) = True +isInfoTableLabel (IdLabel _ _ LocalInfoTable) = True +isInfoTableLabel (IdLabel _ _ ConInfoTable) = True +isInfoTableLabel (IdLabel _ _ BlockInfoTable) = True +isInfoTableLabel _ = False + +-- | Whether label is points to constructor info table +isConInfoTableLabel :: CLabel -> Bool +isConInfoTableLabel (IdLabel _ _ ConInfoTable) = True +isConInfoTableLabel _ = False + +-- | Get the label size field from a ForeignLabel +foreignLabelStdcallInfo :: CLabel -> Maybe Int +foreignLabelStdcallInfo (ForeignLabel _ info _ _) = info +foreignLabelStdcallInfo _lbl = Nothing + + +-- Constructing Large*Labels +mkBitmapLabel :: Unique -> CLabel +mkBitmapLabel uniq = LargeBitmapLabel uniq + +-- Constructing Cost Center Labels +mkCCLabel :: CostCentre -> CLabel +mkCCSLabel :: CostCentreStack -> CLabel +mkCCLabel cc = CC_Label cc +mkCCSLabel ccs = CCS_Label ccs + +mkRtsApFastLabel :: FastString -> CLabel +mkRtsApFastLabel str = RtsLabel (RtsApFast str) + +mkRtsSlowFastTickyCtrLabel :: String -> CLabel +mkRtsSlowFastTickyCtrLabel pat = RtsLabel (RtsSlowFastTickyCtr pat) + + +-- Constructing Code Coverage Labels +mkHpcTicksLabel :: Module -> CLabel +mkHpcTicksLabel = HpcTicksLabel + + +-- Constructing labels used for dynamic linking +mkDynamicLinkerLabel :: DynamicLinkerLabelInfo -> CLabel -> CLabel +mkDynamicLinkerLabel = DynamicLinkerLabel + +dynamicLinkerLabelInfo :: CLabel -> Maybe (DynamicLinkerLabelInfo, CLabel) +dynamicLinkerLabelInfo (DynamicLinkerLabel info lbl) = Just (info, lbl) +dynamicLinkerLabelInfo _ = Nothing + +mkPicBaseLabel :: CLabel +mkPicBaseLabel = PicBaseLabel + + +-- Constructing miscellaneous other labels +mkDeadStripPreventer :: CLabel -> CLabel +mkDeadStripPreventer lbl = DeadStripPreventer lbl + +mkStringLitLabel :: Unique -> CLabel +mkStringLitLabel = StringLitLabel + +mkAsmTempLabel :: Uniquable a => a -> CLabel +mkAsmTempLabel a = AsmTempLabel (getUnique a) + +mkAsmTempDerivedLabel :: CLabel -> FastString -> CLabel +mkAsmTempDerivedLabel = AsmTempDerivedLabel + +mkAsmTempEndLabel :: CLabel -> CLabel +mkAsmTempEndLabel l = mkAsmTempDerivedLabel l (fsLit "_end") + +-- | Construct a label for a DWARF Debug Information Entity (DIE) +-- describing another symbol. +mkAsmTempDieLabel :: CLabel -> CLabel +mkAsmTempDieLabel l = mkAsmTempDerivedLabel l (fsLit "_die") + +-- ----------------------------------------------------------------------------- +-- Convert between different kinds of label + +toClosureLbl :: CLabel -> CLabel +toClosureLbl (IdLabel n c _) = IdLabel n c Closure +toClosureLbl (CmmLabel m str _) = CmmLabel m str CmmClosure +toClosureLbl l = pprPanic "toClosureLbl" (ppr l) + +toSlowEntryLbl :: CLabel -> CLabel +toSlowEntryLbl (IdLabel n _ BlockInfoTable) + = pprPanic "toSlowEntryLbl" (ppr n) +toSlowEntryLbl (IdLabel n c _) = IdLabel n c Slow +toSlowEntryLbl l = pprPanic "toSlowEntryLbl" (ppr l) + +toEntryLbl :: CLabel -> CLabel +toEntryLbl (IdLabel n c LocalInfoTable) = IdLabel n c LocalEntry +toEntryLbl (IdLabel n c ConInfoTable) = IdLabel n c ConEntry +toEntryLbl (IdLabel n _ BlockInfoTable) = mkLocalBlockLabel (nameUnique n) + -- See Note [Proc-point local block entry-point]. +toEntryLbl (IdLabel n c _) = IdLabel n c Entry +toEntryLbl (CmmLabel m str CmmInfo) = CmmLabel m str CmmEntry +toEntryLbl (CmmLabel m str CmmRetInfo) = CmmLabel m str CmmRet +toEntryLbl l = pprPanic "toEntryLbl" (ppr l) + +toInfoLbl :: CLabel -> CLabel +toInfoLbl (IdLabel n c LocalEntry) = IdLabel n c LocalInfoTable +toInfoLbl (IdLabel n c ConEntry) = IdLabel n c ConInfoTable +toInfoLbl (IdLabel n c _) = IdLabel n c InfoTable +toInfoLbl (CmmLabel m str CmmEntry) = CmmLabel m str CmmInfo +toInfoLbl (CmmLabel m str CmmRet) = CmmLabel m str CmmRetInfo +toInfoLbl l = pprPanic "CLabel.toInfoLbl" (ppr l) + +hasHaskellName :: CLabel -> Maybe Name +hasHaskellName (IdLabel n _ _) = Just n +hasHaskellName _ = Nothing + +-- ----------------------------------------------------------------------------- +-- Does a CLabel's referent itself refer to a CAF? +hasCAF :: CLabel -> Bool +hasCAF (IdLabel _ _ RednCounts) = False -- Note [ticky for LNE] +hasCAF (IdLabel _ MayHaveCafRefs _) = True +hasCAF _ = False + +-- Note [ticky for LNE] +-- ~~~~~~~~~~~~~~~~~~~~~ + +-- Until 14 Feb 2013, every ticky counter was associated with a +-- closure. Thus, ticky labels used IdLabel. It is odd that +-- GHC.Cmm.Info.Build.cafTransfers would consider such a ticky label +-- reason to add the name to the CAFEnv (and thus eventually the SRT), +-- but it was harmless because the ticky was only used if the closure +-- was also. +-- +-- Since we now have ticky counters for LNEs, it is no longer the case +-- that every ticky counter has an actual closure. So I changed the +-- generation of ticky counters' CLabels to not result in their +-- associated id ending up in the SRT. +-- +-- NB IdLabel is still appropriate for ticky ids (as opposed to +-- CmmLabel) because the LNE's counter is still related to an .hs Id, +-- that Id just isn't for a proper closure. + +-- ----------------------------------------------------------------------------- +-- Does a CLabel need declaring before use or not? +-- +-- See wiki:commentary/compiler/backends/ppr-c#prototypes + +needsCDecl :: CLabel -> Bool + -- False <=> it's pre-declared; don't bother + -- don't bother declaring Bitmap labels, we always make sure + -- they are defined before use. +needsCDecl (SRTLabel _) = True +needsCDecl (LargeBitmapLabel _) = False +needsCDecl (IdLabel _ _ _) = True +needsCDecl (LocalBlockLabel _) = True + +needsCDecl (StringLitLabel _) = False +needsCDecl (AsmTempLabel _) = False +needsCDecl (AsmTempDerivedLabel _ _) = False +needsCDecl (RtsLabel _) = False + +needsCDecl (CmmLabel pkgId _ _) + -- Prototypes for labels defined in the runtime system are imported + -- into HC files via includes/Stg.h. + | pkgId == rtsUnitId = False + + -- For other labels we inline one into the HC file directly. + | otherwise = True + +needsCDecl l@(ForeignLabel{}) = not (isMathFun l) +needsCDecl (CC_Label _) = True +needsCDecl (CCS_Label _) = True +needsCDecl (HpcTicksLabel _) = True +needsCDecl (DynamicLinkerLabel {}) = panic "needsCDecl DynamicLinkerLabel" +needsCDecl PicBaseLabel = panic "needsCDecl PicBaseLabel" +needsCDecl (DeadStripPreventer {}) = panic "needsCDecl DeadStripPreventer" + +-- | If a label is a local block label then return just its 'BlockId', otherwise +-- 'Nothing'. +maybeLocalBlockLabel :: CLabel -> Maybe BlockId +maybeLocalBlockLabel (LocalBlockLabel uq) = Just $ mkBlockId uq +maybeLocalBlockLabel _ = Nothing + + +-- | Check whether a label corresponds to a C function that has +-- a prototype in a system header somewhere, or is built-in +-- to the C compiler. For these labels we avoid generating our +-- own C prototypes. +isMathFun :: CLabel -> Bool +isMathFun (ForeignLabel fs _ _ _) = fs `elementOfUniqSet` math_funs +isMathFun _ = False + +math_funs :: UniqSet FastString +math_funs = mkUniqSet [ + -- _ISOC99_SOURCE + (fsLit "acos"), (fsLit "acosf"), (fsLit "acosh"), + (fsLit "acoshf"), (fsLit "acoshl"), (fsLit "acosl"), + (fsLit "asin"), (fsLit "asinf"), (fsLit "asinl"), + (fsLit "asinh"), (fsLit "asinhf"), (fsLit "asinhl"), + (fsLit "atan"), (fsLit "atanf"), (fsLit "atanl"), + (fsLit "atan2"), (fsLit "atan2f"), (fsLit "atan2l"), + (fsLit "atanh"), (fsLit "atanhf"), (fsLit "atanhl"), + (fsLit "cbrt"), (fsLit "cbrtf"), (fsLit "cbrtl"), + (fsLit "ceil"), (fsLit "ceilf"), (fsLit "ceill"), + (fsLit "copysign"), (fsLit "copysignf"), (fsLit "copysignl"), + (fsLit "cos"), (fsLit "cosf"), (fsLit "cosl"), + (fsLit "cosh"), (fsLit "coshf"), (fsLit "coshl"), + (fsLit "erf"), (fsLit "erff"), (fsLit "erfl"), + (fsLit "erfc"), (fsLit "erfcf"), (fsLit "erfcl"), + (fsLit "exp"), (fsLit "expf"), (fsLit "expl"), + (fsLit "exp2"), (fsLit "exp2f"), (fsLit "exp2l"), + (fsLit "expm1"), (fsLit "expm1f"), (fsLit "expm1l"), + (fsLit "fabs"), (fsLit "fabsf"), (fsLit "fabsl"), + (fsLit "fdim"), (fsLit "fdimf"), (fsLit "fdiml"), + (fsLit "floor"), (fsLit "floorf"), (fsLit "floorl"), + (fsLit "fma"), (fsLit "fmaf"), (fsLit "fmal"), + (fsLit "fmax"), (fsLit "fmaxf"), (fsLit "fmaxl"), + (fsLit "fmin"), (fsLit "fminf"), (fsLit "fminl"), + (fsLit "fmod"), (fsLit "fmodf"), (fsLit "fmodl"), + (fsLit "frexp"), (fsLit "frexpf"), (fsLit "frexpl"), + (fsLit "hypot"), (fsLit "hypotf"), (fsLit "hypotl"), + (fsLit "ilogb"), (fsLit "ilogbf"), (fsLit "ilogbl"), + (fsLit "ldexp"), (fsLit "ldexpf"), (fsLit "ldexpl"), + (fsLit "lgamma"), (fsLit "lgammaf"), (fsLit "lgammal"), + (fsLit "llrint"), (fsLit "llrintf"), (fsLit "llrintl"), + (fsLit "llround"), (fsLit "llroundf"), (fsLit "llroundl"), + (fsLit "log"), (fsLit "logf"), (fsLit "logl"), + (fsLit "log10l"), (fsLit "log10"), (fsLit "log10f"), + (fsLit "log1pl"), (fsLit "log1p"), (fsLit "log1pf"), + (fsLit "log2"), (fsLit "log2f"), (fsLit "log2l"), + (fsLit "logb"), (fsLit "logbf"), (fsLit "logbl"), + (fsLit "lrint"), (fsLit "lrintf"), (fsLit "lrintl"), + (fsLit "lround"), (fsLit "lroundf"), (fsLit "lroundl"), + (fsLit "modf"), (fsLit "modff"), (fsLit "modfl"), + (fsLit "nan"), (fsLit "nanf"), (fsLit "nanl"), + (fsLit "nearbyint"), (fsLit "nearbyintf"), (fsLit "nearbyintl"), + (fsLit "nextafter"), (fsLit "nextafterf"), (fsLit "nextafterl"), + (fsLit "nexttoward"), (fsLit "nexttowardf"), (fsLit "nexttowardl"), + (fsLit "pow"), (fsLit "powf"), (fsLit "powl"), + (fsLit "remainder"), (fsLit "remainderf"), (fsLit "remainderl"), + (fsLit "remquo"), (fsLit "remquof"), (fsLit "remquol"), + (fsLit "rint"), (fsLit "rintf"), (fsLit "rintl"), + (fsLit "round"), (fsLit "roundf"), (fsLit "roundl"), + (fsLit "scalbln"), (fsLit "scalblnf"), (fsLit "scalblnl"), + (fsLit "scalbn"), (fsLit "scalbnf"), (fsLit "scalbnl"), + (fsLit "sin"), (fsLit "sinf"), (fsLit "sinl"), + (fsLit "sinh"), (fsLit "sinhf"), (fsLit "sinhl"), + (fsLit "sqrt"), (fsLit "sqrtf"), (fsLit "sqrtl"), + (fsLit "tan"), (fsLit "tanf"), (fsLit "tanl"), + (fsLit "tanh"), (fsLit "tanhf"), (fsLit "tanhl"), + (fsLit "tgamma"), (fsLit "tgammaf"), (fsLit "tgammal"), + (fsLit "trunc"), (fsLit "truncf"), (fsLit "truncl"), + -- ISO C 99 also defines these function-like macros in math.h: + -- fpclassify, isfinite, isinf, isnormal, signbit, isgreater, + -- isgreaterequal, isless, islessequal, islessgreater, isunordered + + -- additional symbols from _BSD_SOURCE + (fsLit "drem"), (fsLit "dremf"), (fsLit "dreml"), + (fsLit "finite"), (fsLit "finitef"), (fsLit "finitel"), + (fsLit "gamma"), (fsLit "gammaf"), (fsLit "gammal"), + (fsLit "isinf"), (fsLit "isinff"), (fsLit "isinfl"), + (fsLit "isnan"), (fsLit "isnanf"), (fsLit "isnanl"), + (fsLit "j0"), (fsLit "j0f"), (fsLit "j0l"), + (fsLit "j1"), (fsLit "j1f"), (fsLit "j1l"), + (fsLit "jn"), (fsLit "jnf"), (fsLit "jnl"), + (fsLit "lgamma_r"), (fsLit "lgammaf_r"), (fsLit "lgammal_r"), + (fsLit "scalb"), (fsLit "scalbf"), (fsLit "scalbl"), + (fsLit "significand"), (fsLit "significandf"), (fsLit "significandl"), + (fsLit "y0"), (fsLit "y0f"), (fsLit "y0l"), + (fsLit "y1"), (fsLit "y1f"), (fsLit "y1l"), + (fsLit "yn"), (fsLit "ynf"), (fsLit "ynl"), + + -- These functions are described in IEEE Std 754-2008 - + -- Standard for Floating-Point Arithmetic and ISO/IEC TS 18661 + (fsLit "nextup"), (fsLit "nextupf"), (fsLit "nextupl"), + (fsLit "nextdown"), (fsLit "nextdownf"), (fsLit "nextdownl") + ] + +-- ----------------------------------------------------------------------------- +-- | Is a CLabel visible outside this object file or not? +-- From the point of view of the code generator, a name is +-- externally visible if it has to be declared as exported +-- in the .o file's symbol table; that is, made non-static. +externallyVisibleCLabel :: CLabel -> Bool -- not C "static" +externallyVisibleCLabel (StringLitLabel _) = False +externallyVisibleCLabel (AsmTempLabel _) = False +externallyVisibleCLabel (AsmTempDerivedLabel _ _)= False +externallyVisibleCLabel (RtsLabel _) = True +externallyVisibleCLabel (LocalBlockLabel _) = False +externallyVisibleCLabel (CmmLabel _ _ _) = True +externallyVisibleCLabel (ForeignLabel{}) = True +externallyVisibleCLabel (IdLabel name _ info) = isExternalName name && externallyVisibleIdLabel info +externallyVisibleCLabel (CC_Label _) = True +externallyVisibleCLabel (CCS_Label _) = True +externallyVisibleCLabel (DynamicLinkerLabel _ _) = False +externallyVisibleCLabel (HpcTicksLabel _) = True +externallyVisibleCLabel (LargeBitmapLabel _) = False +externallyVisibleCLabel (SRTLabel _) = False +externallyVisibleCLabel (PicBaseLabel {}) = panic "externallyVisibleCLabel PicBaseLabel" +externallyVisibleCLabel (DeadStripPreventer {}) = panic "externallyVisibleCLabel DeadStripPreventer" + +externallyVisibleIdLabel :: IdLabelInfo -> Bool +externallyVisibleIdLabel LocalInfoTable = False +externallyVisibleIdLabel LocalEntry = False +externallyVisibleIdLabel BlockInfoTable = False +externallyVisibleIdLabel _ = True + +-- ----------------------------------------------------------------------------- +-- Finding the "type" of a CLabel + +-- For generating correct types in label declarations: + +data CLabelType + = CodeLabel -- Address of some executable instructions + | DataLabel -- Address of data, not a GC ptr + | GcPtrLabel -- Address of a (presumably static) GC object + +isCFunctionLabel :: CLabel -> Bool +isCFunctionLabel lbl = case labelType lbl of + CodeLabel -> True + _other -> False + +isGcPtrLabel :: CLabel -> Bool +isGcPtrLabel lbl = case labelType lbl of + GcPtrLabel -> True + _other -> False + + +-- | Work out the general type of data at the address of this label +-- whether it be code, data, or static GC object. +labelType :: CLabel -> CLabelType +labelType (IdLabel _ _ info) = idInfoLabelType info +labelType (CmmLabel _ _ CmmData) = DataLabel +labelType (CmmLabel _ _ CmmClosure) = GcPtrLabel +labelType (CmmLabel _ _ CmmCode) = CodeLabel +labelType (CmmLabel _ _ CmmInfo) = DataLabel +labelType (CmmLabel _ _ CmmEntry) = CodeLabel +labelType (CmmLabel _ _ CmmPrimCall) = CodeLabel +labelType (CmmLabel _ _ CmmRetInfo) = DataLabel +labelType (CmmLabel _ _ CmmRet) = CodeLabel +labelType (RtsLabel (RtsSelectorInfoTable _ _)) = DataLabel +labelType (RtsLabel (RtsApInfoTable _ _)) = DataLabel +labelType (RtsLabel (RtsApFast _)) = CodeLabel +labelType (RtsLabel _) = DataLabel +labelType (LocalBlockLabel _) = CodeLabel +labelType (SRTLabel _) = DataLabel +labelType (ForeignLabel _ _ _ IsFunction) = CodeLabel +labelType (ForeignLabel _ _ _ IsData) = DataLabel +labelType (AsmTempLabel _) = panic "labelType(AsmTempLabel)" +labelType (AsmTempDerivedLabel _ _) = panic "labelType(AsmTempDerivedLabel)" +labelType (StringLitLabel _) = DataLabel +labelType (CC_Label _) = DataLabel +labelType (CCS_Label _) = DataLabel +labelType (DynamicLinkerLabel _ _) = DataLabel -- Is this right? +labelType PicBaseLabel = DataLabel +labelType (DeadStripPreventer _) = DataLabel +labelType (HpcTicksLabel _) = DataLabel +labelType (LargeBitmapLabel _) = DataLabel + +idInfoLabelType :: IdLabelInfo -> CLabelType +idInfoLabelType info = + case info of + InfoTable -> DataLabel + LocalInfoTable -> DataLabel + BlockInfoTable -> DataLabel + Closure -> GcPtrLabel + ConInfoTable -> DataLabel + ClosureTable -> DataLabel + RednCounts -> DataLabel + Bytes -> DataLabel + _ -> CodeLabel + + +-- ----------------------------------------------------------------------------- + +-- | Is a 'CLabel' defined in the current module being compiled? +-- +-- Sometimes we can optimise references within a compilation unit in ways that +-- we couldn't for inter-module references. This provides a conservative +-- estimate of whether a 'CLabel' lives in the current module. +isLocalCLabel :: Module -> CLabel -> Bool +isLocalCLabel this_mod lbl = + case lbl of + IdLabel name _ _ + | isInternalName name -> True + | otherwise -> nameModule name == this_mod + LocalBlockLabel _ -> True + _ -> False + +-- ----------------------------------------------------------------------------- + +-- | Does a 'CLabel' need dynamic linkage? +-- +-- When referring to data in code, we need to know whether +-- that data resides in a DLL or not. [Win32 only.] +-- @labelDynamic@ returns @True@ if the label is located +-- in a DLL, be it a data reference or not. +labelDynamic :: DynFlags -> Module -> CLabel -> Bool +labelDynamic dflags this_mod lbl = + case lbl of + -- is the RTS in a DLL or not? + RtsLabel _ -> + externalDynamicRefs && (this_pkg /= rtsUnitId) + + IdLabel n _ _ -> + isDllName dflags this_mod n + + -- When compiling in the "dyn" way, each package is to be linked into + -- its own shared library. + CmmLabel pkg _ _ + | os == OSMinGW32 -> + externalDynamicRefs && (this_pkg /= pkg) + | otherwise -> + gopt Opt_ExternalDynamicRefs dflags + + LocalBlockLabel _ -> False + + ForeignLabel _ _ source _ -> + if os == OSMinGW32 + then case source of + -- Foreign label is in some un-named foreign package (or DLL). + ForeignLabelInExternalPackage -> True + + -- Foreign label is linked into the same package as the + -- source file currently being compiled. + ForeignLabelInThisPackage -> False + + -- Foreign label is in some named package. + -- When compiling in the "dyn" way, each package is to be + -- linked into its own DLL. + ForeignLabelInPackage pkgId -> + externalDynamicRefs && (this_pkg /= pkgId) + + else -- On Mac OS X and on ELF platforms, false positives are OK, + -- so we claim that all foreign imports come from dynamic + -- libraries + True + + CC_Label cc -> + externalDynamicRefs && not (ccFromThisModule cc this_mod) + + -- CCS_Label always contains a CostCentre defined in the current module + CCS_Label _ -> False + + HpcTicksLabel m -> + externalDynamicRefs && this_mod /= m + + -- Note that DynamicLinkerLabels do NOT require dynamic linking themselves. + _ -> False + where + externalDynamicRefs = gopt Opt_ExternalDynamicRefs dflags + os = platformOS (targetPlatform dflags) + this_pkg = moduleUnitId this_mod + + +----------------------------------------------------------------------------- +-- Printing out CLabels. + +{- +Convention: + + <name>_<type> + +where <name> is <Module>_<name> for external names and <unique> for +internal names. <type> is one of the following: + + info Info table + srt Static reference table + entry Entry code (function, closure) + slow Slow entry code (if any) + ret Direct return address + vtbl Vector table + <n>_alt Case alternative (tag n) + dflt Default case alternative + btm Large bitmap vector + closure Static closure + con_entry Dynamic Constructor entry code + con_info Dynamic Constructor info table + static_entry Static Constructor entry code + static_info Static Constructor info table + sel_info Selector info table + sel_entry Selector entry code + cc Cost centre + ccs Cost centre stack + +Many of these distinctions are only for documentation reasons. For +example, _ret is only distinguished from _entry to make it easy to +tell whether a code fragment is a return point or a closure/function +entry. + +Note [Closure and info labels] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +For a function 'foo, we have: + foo_info : Points to the info table describing foo's closure + (and entry code for foo with tables next to code) + foo_closure : Static (no-free-var) closure only: + points to the statically-allocated closure + +For a data constructor (such as Just or Nothing), we have: + Just_con_info: Info table for the data constructor itself + the first word of a heap-allocated Just + Just_info: Info table for the *worker function*, an + ordinary Haskell function of arity 1 that + allocates a (Just x) box: + Just = \x -> Just x + Just_closure: The closure for this worker + + Nothing_closure: a statically allocated closure for Nothing + Nothing_static_info: info table for Nothing_closure + +All these must be exported symbol, EXCEPT Just_info. We don't need to +export this because in other modules we either have + * A reference to 'Just'; use Just_closure + * A saturated call 'Just x'; allocate using Just_con_info +Not exporting these Just_info labels reduces the number of symbols +somewhat. + +Note [Bytes label] +~~~~~~~~~~~~~~~~~~ +For a top-level string literal 'foo', we have just one symbol 'foo_bytes', which +points to a static data block containing the content of the literal. + +Note [Proc-point local block entry-points] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +A label for a proc-point local block entry-point has no "_entry" suffix. With +`infoTblLbl` we derive an info table label from a proc-point block ID. If +we convert such an info table label into an entry label we must produce +the label without an "_entry" suffix. So an info table label records +the fact that it was derived from a block ID in `IdLabelInfo` as +`BlockInfoTable`. + +The info table label and the local block label are both local labels +and are not externally visible. +-} + +instance Outputable CLabel where + ppr c = sdocWithDynFlags $ \dynFlags -> pprCLabel dynFlags c + +pprCLabel :: DynFlags -> CLabel -> SDoc + +pprCLabel _ (LocalBlockLabel u) + = tempLabelPrefixOrUnderscore <> pprUniqueAlways u + +pprCLabel dynFlags (AsmTempLabel u) + | not (platformUnregisterised $ targetPlatform dynFlags) + = tempLabelPrefixOrUnderscore <> pprUniqueAlways u + +pprCLabel dynFlags (AsmTempDerivedLabel l suf) + | platformMisc_ghcWithNativeCodeGen $ platformMisc dynFlags + = ptext (asmTempLabelPrefix $ targetPlatform dynFlags) + <> case l of AsmTempLabel u -> pprUniqueAlways u + LocalBlockLabel u -> pprUniqueAlways u + _other -> pprCLabel dynFlags l + <> ftext suf + +pprCLabel dynFlags (DynamicLinkerLabel info lbl) + | platformMisc_ghcWithNativeCodeGen $ platformMisc dynFlags + = pprDynamicLinkerAsmLabel (targetPlatform dynFlags) info lbl + +pprCLabel dynFlags PicBaseLabel + | platformMisc_ghcWithNativeCodeGen $ platformMisc dynFlags + = text "1b" + +pprCLabel dynFlags (DeadStripPreventer lbl) + | platformMisc_ghcWithNativeCodeGen $ platformMisc dynFlags + = + {- + `lbl` can be temp one but we need to ensure that dsp label will stay + in the final binary so we prepend non-temp prefix ("dsp_") and + optional `_` (underscore) because this is how you mark non-temp symbols + on some platforms (Darwin) + -} + maybe_underscore dynFlags $ text "dsp_" + <> pprCLabel dynFlags lbl <> text "_dsp" + +pprCLabel dynFlags (StringLitLabel u) + | platformMisc_ghcWithNativeCodeGen $ platformMisc dynFlags + = pprUniqueAlways u <> ptext (sLit "_str") + +pprCLabel dynFlags lbl + = getPprStyle $ \ sty -> + if platformMisc_ghcWithNativeCodeGen (platformMisc dynFlags) && asmStyle sty + then maybe_underscore dynFlags $ pprAsmCLbl (targetPlatform dynFlags) lbl + else pprCLbl lbl + +maybe_underscore :: DynFlags -> SDoc -> SDoc +maybe_underscore dynFlags doc = + if platformMisc_leadingUnderscore $ platformMisc dynFlags + then pp_cSEP <> doc + else doc + +pprAsmCLbl :: Platform -> CLabel -> SDoc +pprAsmCLbl platform (ForeignLabel fs (Just sz) _ _) + | platformOS platform == OSMinGW32 + -- In asm mode, we need to put the suffix on a stdcall ForeignLabel. + -- (The C compiler does this itself). + = ftext fs <> char '@' <> int sz +pprAsmCLbl _ lbl + = pprCLbl lbl + +pprCLbl :: CLabel -> SDoc +pprCLbl (StringLitLabel u) + = pprUniqueAlways u <> text "_str" + +pprCLbl (SRTLabel u) + = tempLabelPrefixOrUnderscore <> pprUniqueAlways u <> pp_cSEP <> text "srt" + +pprCLbl (LargeBitmapLabel u) = + tempLabelPrefixOrUnderscore + <> char 'b' <> pprUniqueAlways u <> pp_cSEP <> text "btm" +-- Some bitsmaps for tuple constructors have a numeric tag (e.g. '7') +-- until that gets resolved we'll just force them to start +-- with a letter so the label will be legal assembly code. + + +pprCLbl (CmmLabel _ str CmmCode) = ftext str +pprCLbl (CmmLabel _ str CmmData) = ftext str +pprCLbl (CmmLabel _ str CmmPrimCall) = ftext str + +pprCLbl (LocalBlockLabel u) = + tempLabelPrefixOrUnderscore <> text "blk_" <> pprUniqueAlways u + +pprCLbl (RtsLabel (RtsApFast str)) = ftext str <> text "_fast" + +pprCLbl (RtsLabel (RtsSelectorInfoTable upd_reqd offset)) + = sdocWithDynFlags $ \dflags -> + ASSERT(offset >= 0 && offset <= mAX_SPEC_SELECTEE_SIZE dflags) + hcat [text "stg_sel_", text (show offset), + ptext (if upd_reqd + then (sLit "_upd_info") + else (sLit "_noupd_info")) + ] + +pprCLbl (RtsLabel (RtsSelectorEntry upd_reqd offset)) + = sdocWithDynFlags $ \dflags -> + ASSERT(offset >= 0 && offset <= mAX_SPEC_SELECTEE_SIZE dflags) + hcat [text "stg_sel_", text (show offset), + ptext (if upd_reqd + then (sLit "_upd_entry") + else (sLit "_noupd_entry")) + ] + +pprCLbl (RtsLabel (RtsApInfoTable upd_reqd arity)) + = sdocWithDynFlags $ \dflags -> + ASSERT(arity > 0 && arity <= mAX_SPEC_AP_SIZE dflags) + hcat [text "stg_ap_", text (show arity), + ptext (if upd_reqd + then (sLit "_upd_info") + else (sLit "_noupd_info")) + ] + +pprCLbl (RtsLabel (RtsApEntry upd_reqd arity)) + = sdocWithDynFlags $ \dflags -> + ASSERT(arity > 0 && arity <= mAX_SPEC_AP_SIZE dflags) + hcat [text "stg_ap_", text (show arity), + ptext (if upd_reqd + then (sLit "_upd_entry") + else (sLit "_noupd_entry")) + ] + +pprCLbl (CmmLabel _ fs CmmInfo) + = ftext fs <> text "_info" + +pprCLbl (CmmLabel _ fs CmmEntry) + = ftext fs <> text "_entry" + +pprCLbl (CmmLabel _ fs CmmRetInfo) + = ftext fs <> text "_info" + +pprCLbl (CmmLabel _ fs CmmRet) + = ftext fs <> text "_ret" + +pprCLbl (CmmLabel _ fs CmmClosure) + = ftext fs <> text "_closure" + +pprCLbl (RtsLabel (RtsPrimOp primop)) + = text "stg_" <> ppr primop + +pprCLbl (RtsLabel (RtsSlowFastTickyCtr pat)) + = text "SLOW_CALL_fast_" <> text pat <> ptext (sLit "_ctr") + +pprCLbl (ForeignLabel str _ _ _) + = ftext str + +pprCLbl (IdLabel name _cafs flavor) = + internalNamePrefix name <> ppr name <> ppIdFlavor flavor + +pprCLbl (CC_Label cc) = ppr cc +pprCLbl (CCS_Label ccs) = ppr ccs + +pprCLbl (HpcTicksLabel mod) + = text "_hpc_tickboxes_" <> ppr mod <> ptext (sLit "_hpc") + +pprCLbl (AsmTempLabel {}) = panic "pprCLbl AsmTempLabel" +pprCLbl (AsmTempDerivedLabel {})= panic "pprCLbl AsmTempDerivedLabel" +pprCLbl (DynamicLinkerLabel {}) = panic "pprCLbl DynamicLinkerLabel" +pprCLbl (PicBaseLabel {}) = panic "pprCLbl PicBaseLabel" +pprCLbl (DeadStripPreventer {}) = panic "pprCLbl DeadStripPreventer" + +ppIdFlavor :: IdLabelInfo -> SDoc +ppIdFlavor x = pp_cSEP <> text + (case x of + Closure -> "closure" + InfoTable -> "info" + LocalInfoTable -> "info" + Entry -> "entry" + LocalEntry -> "entry" + Slow -> "slow" + RednCounts -> "ct" + ConEntry -> "con_entry" + ConInfoTable -> "con_info" + ClosureTable -> "closure_tbl" + Bytes -> "bytes" + BlockInfoTable -> "info" + ) + + +pp_cSEP :: SDoc +pp_cSEP = char '_' + + +instance Outputable ForeignLabelSource where + ppr fs + = case fs of + ForeignLabelInPackage pkgId -> parens $ text "package: " <> ppr pkgId + ForeignLabelInThisPackage -> parens $ text "this package" + ForeignLabelInExternalPackage -> parens $ text "external package" + +internalNamePrefix :: Name -> SDoc +internalNamePrefix name = getPprStyle $ \ sty -> + if asmStyle sty && isRandomGenerated then + sdocWithPlatform $ \platform -> + ptext (asmTempLabelPrefix platform) + else + empty + where + isRandomGenerated = not $ isExternalName name + +tempLabelPrefixOrUnderscore :: SDoc +tempLabelPrefixOrUnderscore = sdocWithPlatform $ \platform -> + getPprStyle $ \ sty -> + if asmStyle sty then + ptext (asmTempLabelPrefix platform) + else + char '_' + +-- ----------------------------------------------------------------------------- +-- Machine-dependent knowledge about labels. + +asmTempLabelPrefix :: Platform -> PtrString -- for formatting labels +asmTempLabelPrefix platform = case platformOS platform of + OSDarwin -> sLit "L" + OSAIX -> sLit "__L" -- follow IBM XL C's convention + _ -> sLit ".L" + +pprDynamicLinkerAsmLabel :: Platform -> DynamicLinkerLabelInfo -> CLabel -> SDoc +pprDynamicLinkerAsmLabel platform dllInfo lbl = + case platformOS platform of + OSDarwin + | platformArch platform == ArchX86_64 -> + case dllInfo of + CodeStub -> char 'L' <> ppr lbl <> text "$stub" + SymbolPtr -> char 'L' <> ppr lbl <> text "$non_lazy_ptr" + GotSymbolPtr -> ppr lbl <> text "@GOTPCREL" + GotSymbolOffset -> ppr lbl + | otherwise -> + case dllInfo of + CodeStub -> char 'L' <> ppr lbl <> text "$stub" + SymbolPtr -> char 'L' <> ppr lbl <> text "$non_lazy_ptr" + _ -> panic "pprDynamicLinkerAsmLabel" + + OSAIX -> + case dllInfo of + SymbolPtr -> text "LC.." <> ppr lbl -- GCC's naming convention + _ -> panic "pprDynamicLinkerAsmLabel" + + _ | osElfTarget (platformOS platform) -> elfLabel + + OSMinGW32 -> + case dllInfo of + SymbolPtr -> text "__imp_" <> ppr lbl + _ -> panic "pprDynamicLinkerAsmLabel" + + _ -> panic "pprDynamicLinkerAsmLabel" + where + elfLabel + | platformArch platform == ArchPPC + = case dllInfo of + CodeStub -> -- See Note [.LCTOC1 in PPC PIC code] + ppr lbl <> text "+32768@plt" + SymbolPtr -> text ".LC_" <> ppr lbl + _ -> panic "pprDynamicLinkerAsmLabel" + + | platformArch platform == ArchX86_64 + = case dllInfo of + CodeStub -> ppr lbl <> text "@plt" + GotSymbolPtr -> ppr lbl <> text "@gotpcrel" + GotSymbolOffset -> ppr lbl + SymbolPtr -> text ".LC_" <> ppr lbl + + | platformArch platform == ArchPPC_64 ELF_V1 + || platformArch platform == ArchPPC_64 ELF_V2 + = case dllInfo of + GotSymbolPtr -> text ".LC_" <> ppr lbl + <> text "@toc" + GotSymbolOffset -> ppr lbl + SymbolPtr -> text ".LC_" <> ppr lbl + _ -> panic "pprDynamicLinkerAsmLabel" + + | otherwise + = case dllInfo of + CodeStub -> ppr lbl <> text "@plt" + SymbolPtr -> text ".LC_" <> ppr lbl + GotSymbolPtr -> ppr lbl <> text "@got" + GotSymbolOffset -> ppr lbl <> text "@gotoff" + +-- Figure out whether `symbol` may serve as an alias +-- to `target` within one compilation unit. +-- +-- This is true if any of these holds: +-- * `target` is a module-internal haskell name. +-- * `target` is an exported name, but comes from the same +-- module as `symbol` +-- +-- These are sufficient conditions for establishing e.g. a +-- GNU assembly alias ('.equiv' directive). Sadly, there is +-- no such thing as an alias to an imported symbol (conf. +-- http://blog.omega-prime.co.uk/2011/07/06/the-sad-state-of-symbol-aliases/) +-- See note [emit-time elimination of static indirections]. +-- +-- Precondition is that both labels represent the +-- same semantic value. + +mayRedirectTo :: CLabel -> CLabel -> Bool +mayRedirectTo symbol target + | Just nam <- haskellName + , staticClosureLabel + , isExternalName nam + , Just mod <- nameModule_maybe nam + , Just anam <- hasHaskellName symbol + , Just amod <- nameModule_maybe anam + = amod == mod + + | Just nam <- haskellName + , staticClosureLabel + , isInternalName nam + = True + + | otherwise = False + where staticClosureLabel = isStaticClosureLabel target + haskellName = hasHaskellName target + + +{- +Note [emit-time elimination of static indirections] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +As described in #15155, certain static values are representationally +equivalent, e.g. 'cast'ed values (when created by 'newtype' wrappers). + + newtype A = A Int + {-# NOINLINE a #-} + a = A 42 + +a1_rYB :: Int +[GblId, Caf=NoCafRefs, Unf=OtherCon []] +a1_rYB = GHC.Types.I# 42# + +a [InlPrag=NOINLINE] :: A +[GblId, Unf=OtherCon []] +a = a1_rYB `cast` (Sym (T15155.N:A[0]) :: Int ~R# A) + +Formerly we created static indirections for these (IND_STATIC), which +consist of a statically allocated forwarding closure that contains +the (possibly tagged) indirectee. (See CMM/assembly below.) +This approach is suboptimal for two reasons: + (a) they occupy extra space, + (b) they need to be entered in order to obtain the indirectee, + thus they cannot be tagged. + +Fortunately there is a common case where static indirections can be +eliminated while emitting assembly (native or LLVM), viz. when the +indirectee is in the same module (object file) as the symbol that +points to it. In this case an assembly-level identification can +be created ('.equiv' directive), and as such the same object will +be assigned two names in the symbol table. Any of the identified +symbols can be referenced by a tagged pointer. + +Currently the 'mayRedirectTo' predicate will +give a clue whether a label can be equated with another, already +emitted, label (which can in turn be an alias). The general mechanics +is that we identify data (IND_STATIC closures) that are amenable +to aliasing while pretty-printing of assembly output, and emit the +'.equiv' directive instead of static data in such a case. + +Here is a sketch how the output is massaged: + + Consider +newtype A = A Int +{-# NOINLINE a #-} +a = A 42 -- I# 42# is the indirectee + -- 'a' is exported + + results in STG + +a1_rXq :: GHC.Types.Int +[GblId, Caf=NoCafRefs, Unf=OtherCon []] = + CCS_DONT_CARE GHC.Types.I#! [42#]; + +T15155.a [InlPrag=NOINLINE] :: T15155.A +[GblId, Unf=OtherCon []] = + CAF_ccs \ u [] a1_rXq; + + and CMM + +[section ""data" . a1_rXq_closure" { + a1_rXq_closure: + const GHC.Types.I#_con_info; + const 42; + }] + +[section ""data" . T15155.a_closure" { + T15155.a_closure: + const stg_IND_STATIC_info; + const a1_rXq_closure+1; + const 0; + const 0; + }] + +The emitted assembly is + +#### INDIRECTEE +a1_rXq_closure: -- module local haskell value + .quad GHC.Types.I#_con_info -- an Int + .quad 42 + +#### BEFORE +.globl T15155.a_closure -- exported newtype wrapped value +T15155.a_closure: + .quad stg_IND_STATIC_info -- the closure info + .quad a1_rXq_closure+1 -- indirectee ('+1' being the tag) + .quad 0 + .quad 0 + +#### AFTER +.globl T15155.a_closure -- exported newtype wrapped value +.equiv a1_rXq_closure,T15155.a_closure -- both are shared + +The transformation is performed because + T15155.a_closure `mayRedirectTo` a1_rXq_closure+1 +returns True. +-} diff --git a/compiler/GHC/Cmm/CallConv.hs b/compiler/GHC/Cmm/CallConv.hs new file mode 100644 index 0000000000..9200daec57 --- /dev/null +++ b/compiler/GHC/Cmm/CallConv.hs @@ -0,0 +1,212 @@ +module GHC.Cmm.CallConv ( + ParamLocation(..), + assignArgumentsPos, + assignStack, + realArgRegsCover +) where + +import GhcPrelude + +import GHC.Cmm.Expr +import GHC.Runtime.Layout +import GHC.Cmm (Convention(..)) +import GHC.Cmm.Ppr () -- For Outputable instances + +import DynFlags +import GHC.Platform +import Outputable + +-- Calculate the 'GlobalReg' or stack locations for function call +-- parameters as used by the Cmm calling convention. + +data ParamLocation + = RegisterParam GlobalReg + | StackParam ByteOff + +instance Outputable ParamLocation where + ppr (RegisterParam g) = ppr g + ppr (StackParam p) = ppr p + +-- | +-- Given a list of arguments, and a function that tells their types, +-- return a list showing where each argument is passed +-- +assignArgumentsPos :: DynFlags + -> ByteOff -- stack offset to start with + -> Convention + -> (a -> CmmType) -- how to get a type from an arg + -> [a] -- args + -> ( + ByteOff -- bytes of stack args + , [(a, ParamLocation)] -- args and locations + ) + +assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments) + where + regs = case (reps, conv) of + (_, NativeNodeCall) -> getRegsWithNode dflags + (_, NativeDirectCall) -> getRegsWithoutNode dflags + ([_], NativeReturn) -> allRegs dflags + (_, NativeReturn) -> getRegsWithNode dflags + -- GC calling convention *must* put values in registers + (_, GC) -> allRegs dflags + (_, Slow) -> nodeOnly + -- The calling conventions first assign arguments to registers, + -- then switch to the stack when we first run out of registers + -- (even if there are still available registers for args of a + -- different type). When returning an unboxed tuple, we also + -- separate the stack arguments by pointerhood. + (reg_assts, stk_args) = assign_regs [] reps regs + (stk_off, stk_assts) = assignStack dflags off arg_ty stk_args + assignments = reg_assts ++ stk_assts + + assign_regs assts [] _ = (assts, []) + assign_regs assts (r:rs) regs | isVecType ty = vec + | isFloatType ty = float + | otherwise = int + where vec = case (w, regs) of + (W128, (vs, fs, ds, ls, s:ss)) + | passVectorInReg W128 dflags -> k (RegisterParam (XmmReg s), (vs, fs, ds, ls, ss)) + (W256, (vs, fs, ds, ls, s:ss)) + | passVectorInReg W256 dflags -> k (RegisterParam (YmmReg s), (vs, fs, ds, ls, ss)) + (W512, (vs, fs, ds, ls, s:ss)) + | passVectorInReg W512 dflags -> k (RegisterParam (ZmmReg s), (vs, fs, ds, ls, ss)) + _ -> (assts, (r:rs)) + float = case (w, regs) of + (W32, (vs, fs, ds, ls, s:ss)) + | passFloatInXmm -> k (RegisterParam (FloatReg s), (vs, fs, ds, ls, ss)) + (W32, (vs, f:fs, ds, ls, ss)) + | not passFloatInXmm -> k (RegisterParam f, (vs, fs, ds, ls, ss)) + (W64, (vs, fs, ds, ls, s:ss)) + | passFloatInXmm -> k (RegisterParam (DoubleReg s), (vs, fs, ds, ls, ss)) + (W64, (vs, fs, d:ds, ls, ss)) + | not passFloatInXmm -> k (RegisterParam d, (vs, fs, ds, ls, ss)) + _ -> (assts, (r:rs)) + int = case (w, regs) of + (W128, _) -> panic "W128 unsupported register type" + (_, (v:vs, fs, ds, ls, ss)) | widthInBits w <= widthInBits (wordWidth dflags) + -> k (RegisterParam (v gcp), (vs, fs, ds, ls, ss)) + (_, (vs, fs, ds, l:ls, ss)) | widthInBits w > widthInBits (wordWidth dflags) + -> k (RegisterParam l, (vs, fs, ds, ls, ss)) + _ -> (assts, (r:rs)) + k (asst, regs') = assign_regs ((r, asst) : assts) rs regs' + ty = arg_ty r + w = typeWidth ty + gcp | isGcPtrType ty = VGcPtr + | otherwise = VNonGcPtr + passFloatInXmm = passFloatArgsInXmm dflags + +passFloatArgsInXmm :: DynFlags -> Bool +passFloatArgsInXmm dflags = case platformArch (targetPlatform dflags) of + ArchX86_64 -> True + ArchX86 -> False + _ -> False + +-- We used to spill vector registers to the stack since the LLVM backend didn't +-- support vector registers in its calling convention. However, this has now +-- been fixed. This function remains only as a convenient way to re-enable +-- spilling when debugging code generation. +passVectorInReg :: Width -> DynFlags -> Bool +passVectorInReg _ _ = True + +assignStack :: DynFlags -> ByteOff -> (a -> CmmType) -> [a] + -> ( + ByteOff -- bytes of stack args + , [(a, ParamLocation)] -- args and locations + ) +assignStack dflags offset arg_ty args = assign_stk offset [] (reverse args) + where + assign_stk offset assts [] = (offset, assts) + assign_stk offset assts (r:rs) + = assign_stk off' ((r, StackParam off') : assts) rs + where w = typeWidth (arg_ty r) + off' = offset + size + -- Stack arguments always take a whole number of words, we never + -- pack them unlike constructor fields. + size = roundUpToWords dflags (widthInBytes w) + +----------------------------------------------------------------------------- +-- Local information about the registers available + +type AvailRegs = ( [VGcPtr -> GlobalReg] -- available vanilla regs. + , [GlobalReg] -- floats + , [GlobalReg] -- doubles + , [GlobalReg] -- longs (int64 and word64) + , [Int] -- XMM (floats and doubles) + ) + +-- Vanilla registers can contain pointers, Ints, Chars. +-- Floats and doubles have separate register supplies. +-- +-- We take these register supplies from the *real* registers, i.e. those +-- that are guaranteed to map to machine registers. + +getRegsWithoutNode, getRegsWithNode :: DynFlags -> AvailRegs +getRegsWithoutNode dflags = + ( filter (\r -> r VGcPtr /= node) (realVanillaRegs dflags) + , realFloatRegs dflags + , realDoubleRegs dflags + , realLongRegs dflags + , realXmmRegNos dflags) + +-- getRegsWithNode uses R1/node even if it isn't a register +getRegsWithNode dflags = + ( if null (realVanillaRegs dflags) + then [VanillaReg 1] + else realVanillaRegs dflags + , realFloatRegs dflags + , realDoubleRegs dflags + , realLongRegs dflags + , realXmmRegNos dflags) + +allFloatRegs, allDoubleRegs, allLongRegs :: DynFlags -> [GlobalReg] +allVanillaRegs :: DynFlags -> [VGcPtr -> GlobalReg] +allXmmRegs :: DynFlags -> [Int] + +allVanillaRegs dflags = map VanillaReg $ regList (mAX_Vanilla_REG dflags) +allFloatRegs dflags = map FloatReg $ regList (mAX_Float_REG dflags) +allDoubleRegs dflags = map DoubleReg $ regList (mAX_Double_REG dflags) +allLongRegs dflags = map LongReg $ regList (mAX_Long_REG dflags) +allXmmRegs dflags = regList (mAX_XMM_REG dflags) + +realFloatRegs, realDoubleRegs, realLongRegs :: DynFlags -> [GlobalReg] +realVanillaRegs :: DynFlags -> [VGcPtr -> GlobalReg] +realXmmRegNos :: DynFlags -> [Int] + +realVanillaRegs dflags = map VanillaReg $ regList (mAX_Real_Vanilla_REG dflags) +realFloatRegs dflags = map FloatReg $ regList (mAX_Real_Float_REG dflags) +realDoubleRegs dflags = map DoubleReg $ regList (mAX_Real_Double_REG dflags) +realLongRegs dflags = map LongReg $ regList (mAX_Real_Long_REG dflags) + +realXmmRegNos dflags + | isSse2Enabled dflags = regList (mAX_Real_XMM_REG dflags) + | otherwise = [] + +regList :: Int -> [Int] +regList n = [1 .. n] + +allRegs :: DynFlags -> AvailRegs +allRegs dflags = (allVanillaRegs dflags, + allFloatRegs dflags, + allDoubleRegs dflags, + allLongRegs dflags, + allXmmRegs dflags) + +nodeOnly :: AvailRegs +nodeOnly = ([VanillaReg 1], [], [], [], []) + +-- This returns the set of global registers that *cover* the machine registers +-- used for argument passing. On platforms where registers can overlap---right +-- now just x86-64, where Float and Double registers overlap---passing this set +-- of registers is guaranteed to preserve the contents of all live registers. We +-- only use this functionality in hand-written C-- code in the RTS. +realArgRegsCover :: DynFlags -> [GlobalReg] +realArgRegsCover dflags + | passFloatArgsInXmm dflags = map ($VGcPtr) (realVanillaRegs dflags) ++ + realLongRegs dflags ++ + map XmmReg (realXmmRegNos dflags) + | otherwise = map ($VGcPtr) (realVanillaRegs dflags) ++ + realFloatRegs dflags ++ + realDoubleRegs dflags ++ + realLongRegs dflags ++ + map XmmReg (realXmmRegNos dflags) diff --git a/compiler/GHC/Cmm/CommonBlockElim.hs b/compiler/GHC/Cmm/CommonBlockElim.hs new file mode 100644 index 0000000000..86ea0e94e2 --- /dev/null +++ b/compiler/GHC/Cmm/CommonBlockElim.hs @@ -0,0 +1,320 @@ +{-# LANGUAGE GADTs, BangPatterns, ScopedTypeVariables #-} + +module GHC.Cmm.CommonBlockElim + ( elimCommonBlocks + ) +where + + +import GhcPrelude hiding (iterate, succ, unzip, zip) + +import GHC.Cmm.BlockId +import GHC.Cmm +import GHC.Cmm.Utils +import GHC.Cmm.Switch (eqSwitchTargetWith) +import GHC.Cmm.ContFlowOpt + +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.Dataflow.Graph +import GHC.Cmm.Dataflow.Label +import GHC.Cmm.Dataflow.Collections +import Data.Bits +import Data.Maybe (mapMaybe) +import qualified Data.List as List +import Data.Word +import qualified Data.Map as M +import Outputable +import qualified TrieMap as TM +import UniqFM +import Unique +import Control.Arrow (first, second) + +-- ----------------------------------------------------------------------------- +-- Eliminate common blocks + +-- If two blocks are identical except for the label on the first node, +-- then we can eliminate one of the blocks. To ensure that the semantics +-- of the program are preserved, we have to rewrite each predecessor of the +-- eliminated block to proceed with the block we keep. + +-- The algorithm iterates over the blocks in the graph, +-- checking whether it has seen another block that is equal modulo labels. +-- If so, then it adds an entry in a map indicating that the new block +-- is made redundant by the old block. +-- Otherwise, it is added to the useful blocks. + +-- To avoid comparing every block with every other block repeatedly, we group +-- them by +-- * a hash of the block, ignoring labels (explained below) +-- * the list of outgoing labels +-- The hash is invariant under relabeling, so we only ever compare within +-- the same group of blocks. +-- +-- The list of outgoing labels is updated as we merge blocks (that is why they +-- are not included in the hash, which we want to calculate only once). +-- +-- All in all, two blocks should never be compared if they have different +-- hashes, and at most once otherwise. Previously, we were slower, and people +-- rightfully complained: #10397 + +-- TODO: Use optimization fuel +elimCommonBlocks :: CmmGraph -> CmmGraph +elimCommonBlocks g = replaceLabels env $ copyTicks env g + where + env = iterate mapEmpty blocks_with_key + -- The order of blocks doesn't matter here. While we could use + -- revPostorder which drops unreachable blocks this is done in + -- ContFlowOpt already which runs before this pass. So we use + -- toBlockList since it is faster. + groups = groupByInt hash_block (toBlockList g) :: [[CmmBlock]] + blocks_with_key = [ [ (successors b, [b]) | b <- bs] | bs <- groups] + +-- Invariant: The blocks in the list are pairwise distinct +-- (so avoid comparing them again) +type DistinctBlocks = [CmmBlock] +type Key = [Label] +type Subst = LabelMap BlockId + +-- The outer list groups by hash. We retain this grouping throughout. +iterate :: Subst -> [[(Key, DistinctBlocks)]] -> Subst +iterate subst blocks + | mapNull new_substs = subst + | otherwise = iterate subst' updated_blocks + where + grouped_blocks :: [[(Key, [DistinctBlocks])]] + grouped_blocks = map groupByLabel blocks + + merged_blocks :: [[(Key, DistinctBlocks)]] + (new_substs, merged_blocks) = List.mapAccumL (List.mapAccumL go) mapEmpty grouped_blocks + where + go !new_subst1 (k,dbs) = (new_subst1 `mapUnion` new_subst2, (k,db)) + where + (new_subst2, db) = mergeBlockList subst dbs + + subst' = subst `mapUnion` new_substs + updated_blocks = map (map (first (map (lookupBid subst')))) merged_blocks + +-- Combine two lists of blocks. +-- While they are internally distinct they can still share common blocks. +mergeBlocks :: Subst -> DistinctBlocks -> DistinctBlocks -> (Subst, DistinctBlocks) +mergeBlocks subst existing new = go new + where + go [] = (mapEmpty, existing) + go (b:bs) = case List.find (eqBlockBodyWith (eqBid subst) b) existing of + -- This block is a duplicate. Drop it, and add it to the substitution + Just b' -> first (mapInsert (entryLabel b) (entryLabel b')) $ go bs + -- This block is not a duplicate, keep it. + Nothing -> second (b:) $ go bs + +mergeBlockList :: Subst -> [DistinctBlocks] -> (Subst, DistinctBlocks) +mergeBlockList _ [] = pprPanic "mergeBlockList" empty +mergeBlockList subst (b:bs) = go mapEmpty b bs + where + go !new_subst1 b [] = (new_subst1, b) + go !new_subst1 b1 (b2:bs) = go new_subst b bs + where + (new_subst2, b) = mergeBlocks subst b1 b2 + new_subst = new_subst1 `mapUnion` new_subst2 + + +-- ----------------------------------------------------------------------------- +-- Hashing and equality on blocks + +-- Below here is mostly boilerplate: hashing blocks ignoring labels, +-- and comparing blocks modulo a label mapping. + +-- To speed up comparisons, we hash each basic block modulo jump labels. +-- The hashing is a bit arbitrary (the numbers are completely arbitrary), +-- but it should be fast and good enough. + +-- We want to get as many small buckets as possible, as comparing blocks is +-- expensive. So include as much as possible in the hash. Ideally everything +-- that is compared with (==) in eqBlockBodyWith. + +type HashCode = Int + +hash_block :: CmmBlock -> HashCode +hash_block block = + fromIntegral (foldBlockNodesB3 (hash_fst, hash_mid, hash_lst) block (0 :: Word32) .&. (0x7fffffff :: Word32)) + -- UniqFM doesn't like negative Ints + where hash_fst _ h = h + hash_mid m h = hash_node m + h `shiftL` 1 + hash_lst m h = hash_node m + h `shiftL` 1 + + hash_node :: CmmNode O x -> Word32 + hash_node n | dont_care n = 0 -- don't care + hash_node (CmmAssign r e) = hash_reg r + hash_e e + hash_node (CmmStore e e') = hash_e e + hash_e e' + hash_node (CmmUnsafeForeignCall t _ as) = hash_tgt t + hash_list hash_e as + hash_node (CmmBranch _) = 23 -- NB. ignore the label + hash_node (CmmCondBranch p _ _ _) = hash_e p + hash_node (CmmCall e _ _ _ _ _) = hash_e e + hash_node (CmmForeignCall t _ _ _ _ _ _) = hash_tgt t + hash_node (CmmSwitch e _) = hash_e e + hash_node _ = error "hash_node: unknown Cmm node!" + + hash_reg :: CmmReg -> Word32 + hash_reg (CmmLocal localReg) = hash_unique localReg -- important for performance, see #10397 + hash_reg (CmmGlobal _) = 19 + + hash_e :: CmmExpr -> Word32 + hash_e (CmmLit l) = hash_lit l + hash_e (CmmLoad e _) = 67 + hash_e e + hash_e (CmmReg r) = hash_reg r + hash_e (CmmMachOp _ es) = hash_list hash_e es -- pessimal - no operator check + hash_e (CmmRegOff r i) = hash_reg r + cvt i + hash_e (CmmStackSlot _ _) = 13 + + hash_lit :: CmmLit -> Word32 + hash_lit (CmmInt i _) = fromInteger i + hash_lit (CmmFloat r _) = truncate r + hash_lit (CmmVec ls) = hash_list hash_lit ls + hash_lit (CmmLabel _) = 119 -- ugh + hash_lit (CmmLabelOff _ i) = cvt $ 199 + i + hash_lit (CmmLabelDiffOff _ _ i _) = cvt $ 299 + i + hash_lit (CmmBlock _) = 191 -- ugh + hash_lit (CmmHighStackMark) = cvt 313 + + hash_tgt (ForeignTarget e _) = hash_e e + hash_tgt (PrimTarget _) = 31 -- lots of these + + hash_list f = foldl' (\z x -> f x + z) (0::Word32) + + cvt = fromInteger . toInteger + + hash_unique :: Uniquable a => a -> Word32 + hash_unique = cvt . getKey . getUnique + +-- | Ignore these node types for equality +dont_care :: CmmNode O x -> Bool +dont_care CmmComment {} = True +dont_care CmmTick {} = True +dont_care CmmUnwind {} = True +dont_care _other = False + +-- Utilities: equality and substitution on the graph. + +-- Given a map ``subst'' from BlockID -> BlockID, we define equality. +eqBid :: LabelMap BlockId -> BlockId -> BlockId -> Bool +eqBid subst bid bid' = lookupBid subst bid == lookupBid subst bid' +lookupBid :: LabelMap BlockId -> BlockId -> BlockId +lookupBid subst bid = case mapLookup bid subst of + Just bid -> lookupBid subst bid + Nothing -> bid + +-- Middle nodes and expressions can contain BlockIds, in particular in +-- CmmStackSlot and CmmBlock, so we have to use a special equality for +-- these. +-- +eqMiddleWith :: (BlockId -> BlockId -> Bool) + -> CmmNode O O -> CmmNode O O -> Bool +eqMiddleWith eqBid (CmmAssign r1 e1) (CmmAssign r2 e2) + = r1 == r2 && eqExprWith eqBid e1 e2 +eqMiddleWith eqBid (CmmStore l1 r1) (CmmStore l2 r2) + = eqExprWith eqBid l1 l2 && eqExprWith eqBid r1 r2 +eqMiddleWith eqBid (CmmUnsafeForeignCall t1 r1 a1) + (CmmUnsafeForeignCall t2 r2 a2) + = t1 == t2 && r1 == r2 && eqListWith (eqExprWith eqBid) a1 a2 +eqMiddleWith _ _ _ = False + +eqExprWith :: (BlockId -> BlockId -> Bool) + -> CmmExpr -> CmmExpr -> Bool +eqExprWith eqBid = eq + where + CmmLit l1 `eq` CmmLit l2 = eqLit l1 l2 + CmmLoad e1 _ `eq` CmmLoad e2 _ = e1 `eq` e2 + CmmReg r1 `eq` CmmReg r2 = r1==r2 + CmmRegOff r1 i1 `eq` CmmRegOff r2 i2 = r1==r2 && i1==i2 + CmmMachOp op1 es1 `eq` CmmMachOp op2 es2 = op1==op2 && es1 `eqs` es2 + CmmStackSlot a1 i1 `eq` CmmStackSlot a2 i2 = eqArea a1 a2 && i1==i2 + _e1 `eq` _e2 = False + + xs `eqs` ys = eqListWith eq xs ys + + eqLit (CmmBlock id1) (CmmBlock id2) = eqBid id1 id2 + eqLit l1 l2 = l1 == l2 + + eqArea Old Old = True + eqArea (Young id1) (Young id2) = eqBid id1 id2 + eqArea _ _ = False + +-- Equality on the body of a block, modulo a function mapping block +-- IDs to block IDs. +eqBlockBodyWith :: (BlockId -> BlockId -> Bool) -> CmmBlock -> CmmBlock -> Bool +eqBlockBodyWith eqBid block block' + {- + | equal = pprTrace "equal" (vcat [ppr block, ppr block']) True + | otherwise = pprTrace "not equal" (vcat [ppr block, ppr block']) False + -} + = equal + where (_,m,l) = blockSplit block + nodes = filter (not . dont_care) (blockToList m) + (_,m',l') = blockSplit block' + nodes' = filter (not . dont_care) (blockToList m') + + equal = eqListWith (eqMiddleWith eqBid) nodes nodes' && + eqLastWith eqBid l l' + + +eqLastWith :: (BlockId -> BlockId -> Bool) -> CmmNode O C -> CmmNode O C -> Bool +eqLastWith eqBid (CmmBranch bid1) (CmmBranch bid2) = eqBid bid1 bid2 +eqLastWith eqBid (CmmCondBranch c1 t1 f1 l1) (CmmCondBranch c2 t2 f2 l2) = + c1 == c2 && l1 == l2 && eqBid t1 t2 && eqBid f1 f2 +eqLastWith eqBid (CmmCall t1 c1 g1 a1 r1 u1) (CmmCall t2 c2 g2 a2 r2 u2) = + t1 == t2 && eqMaybeWith eqBid c1 c2 && a1 == a2 && r1 == r2 && u1 == u2 && g1 == g2 +eqLastWith eqBid (CmmSwitch e1 ids1) (CmmSwitch e2 ids2) = + e1 == e2 && eqSwitchTargetWith eqBid ids1 ids2 +eqLastWith _ _ _ = False + +eqMaybeWith :: (a -> b -> Bool) -> Maybe a -> Maybe b -> Bool +eqMaybeWith eltEq (Just e) (Just e') = eltEq e e' +eqMaybeWith _ Nothing Nothing = True +eqMaybeWith _ _ _ = False + +eqListWith :: (a -> b -> Bool) -> [a] -> [b] -> Bool +eqListWith f (a : as) (b : bs) = f a b && eqListWith f as bs +eqListWith _ [] [] = True +eqListWith _ _ _ = False + +-- | Given a block map, ensure that all "target" blocks are covered by +-- the same ticks as the respective "source" blocks. This not only +-- means copying ticks, but also adjusting tick scopes where +-- necessary. +copyTicks :: LabelMap BlockId -> CmmGraph -> CmmGraph +copyTicks env g + | mapNull env = g + | otherwise = ofBlockMap (g_entry g) $ mapMap copyTo blockMap + where -- Reverse block merge map + blockMap = toBlockMap g + revEnv = mapFoldlWithKey insertRev M.empty env + insertRev m k x = M.insertWith (const (k:)) x [k] m + -- Copy ticks and scopes into the given block + copyTo block = case M.lookup (entryLabel block) revEnv of + Nothing -> block + Just ls -> foldr copy block $ mapMaybe (flip mapLookup blockMap) ls + copy from to = + let ticks = blockTicks from + CmmEntry _ scp0 = firstNode from + (CmmEntry lbl scp1, code) = blockSplitHead to + in CmmEntry lbl (combineTickScopes scp0 scp1) `blockJoinHead` + foldr blockCons code (map CmmTick ticks) + +-- Group by [Label] +-- See Note [Compressed TrieMap] in coreSyn/TrieMap about the usage of GenMap. +groupByLabel :: [(Key, DistinctBlocks)] -> [(Key, [DistinctBlocks])] +groupByLabel = + go (TM.emptyTM :: TM.ListMap (TM.GenMap LabelMap) (Key, [DistinctBlocks])) + where + go !m [] = TM.foldTM (:) m [] + go !m ((k,v) : entries) = go (TM.alterTM k adjust m) entries + where --k' = map (getKey . getUnique) k + adjust Nothing = Just (k,[v]) + adjust (Just (_,vs)) = Just (k,v:vs) + +groupByInt :: (a -> Int) -> [a] -> [[a]] +groupByInt f xs = nonDetEltsUFM $ List.foldl' go emptyUFM xs + -- See Note [Unique Determinism and code generation] + where + go m x = alterUFM addEntry m (f x) + where + addEntry xs = Just $! maybe [x] (x:) xs diff --git a/compiler/GHC/Cmm/ContFlowOpt.hs b/compiler/GHC/Cmm/ContFlowOpt.hs new file mode 100644 index 0000000000..7765972d02 --- /dev/null +++ b/compiler/GHC/Cmm/ContFlowOpt.hs @@ -0,0 +1,451 @@ +{-# LANGUAGE GADTs #-} +{-# LANGUAGE BangPatterns #-} +{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-} +module GHC.Cmm.ContFlowOpt + ( cmmCfgOpts + , cmmCfgOptsProc + , removeUnreachableBlocksProc + , replaceLabels + ) +where + +import GhcPrelude hiding (succ, unzip, zip) + +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.Dataflow.Collections +import GHC.Cmm.Dataflow.Graph +import GHC.Cmm.Dataflow.Label +import GHC.Cmm.BlockId +import GHC.Cmm +import GHC.Cmm.Utils +import GHC.Cmm.Switch (mapSwitchTargets, switchTargetsToList) +import Maybes +import Panic +import Util + +import Control.Monad + + +-- Note [What is shortcutting] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- Consider this Cmm code: +-- +-- L1: ... +-- goto L2; +-- L2: goto L3; +-- L3: ... +-- +-- Here L2 is an empty block and contains only an unconditional branch +-- to L3. In this situation any block that jumps to L2 can jump +-- directly to L3: +-- +-- L1: ... +-- goto L3; +-- L2: goto L3; +-- L3: ... +-- +-- In this situation we say that we shortcut L2 to L3. One of +-- consequences of shortcutting is that some blocks of code may become +-- unreachable (in the example above this is true for L2). + + +-- Note [Control-flow optimisations] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- This optimisation does three things: +-- +-- - If a block finishes in an unconditional branch to another block +-- and that is the only jump to that block we concatenate the +-- destination block at the end of the current one. +-- +-- - If a block finishes in a call whose continuation block is a +-- goto, then we can shortcut the destination, making the +-- continuation block the destination of the goto - but see Note +-- [Shortcut call returns]. +-- +-- - For any block that is not a call we try to shortcut the +-- destination(s). Additionally, if a block ends with a +-- conditional branch we try to invert the condition. +-- +-- Blocks are processed using postorder DFS traversal. A side effect +-- of determining traversal order with a graph search is elimination +-- of any blocks that are unreachable. +-- +-- Transformations are improved by working from the end of the graph +-- towards the beginning, because we may be able to perform many +-- shortcuts in one go. + + +-- Note [Shortcut call returns] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- We are going to maintain the "current" graph (LabelMap CmmBlock) as +-- we go, and also a mapping from BlockId to BlockId, representing +-- continuation labels that we have renamed. This latter mapping is +-- important because we might shortcut a CmmCall continuation. For +-- example: +-- +-- Sp[0] = L +-- call g returns to L +-- L: goto M +-- M: ... +-- +-- So when we shortcut the L block, we need to replace not only +-- the continuation of the call, but also references to L in the +-- code (e.g. the assignment Sp[0] = L): +-- +-- Sp[0] = M +-- call g returns to M +-- M: ... +-- +-- So we keep track of which labels we have renamed and apply the mapping +-- at the end with replaceLabels. + + +-- Note [Shortcut call returns and proc-points] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- Consider this code that you might get from a recursive +-- let-no-escape: +-- +-- goto L1 +-- L1: +-- if (Hp > HpLim) then L2 else L3 +-- L2: +-- call stg_gc_noregs returns to L4 +-- L4: +-- goto L1 +-- L3: +-- ... +-- goto L1 +-- +-- Then the control-flow optimiser shortcuts L4. But that turns L1 +-- into the call-return proc point, and every iteration of the loop +-- has to shuffle variables to and from the stack. So we must *not* +-- shortcut L4. +-- +-- Moreover not shortcutting call returns is probably fine. If L4 can +-- concat with its branch target then it will still do so. And we +-- save some compile time because we don't have to traverse all the +-- code in replaceLabels. +-- +-- However, we probably do want to do this if we are splitting proc +-- points, because L1 will be a proc-point anyway, so merging it with +-- L4 reduces the number of proc points. Unfortunately recursive +-- let-no-escapes won't generate very good code with proc-point +-- splitting on - we should probably compile them to explicitly use +-- the native calling convention instead. + +cmmCfgOpts :: Bool -> CmmGraph -> CmmGraph +cmmCfgOpts split g = fst (blockConcat split g) + +cmmCfgOptsProc :: Bool -> CmmDecl -> CmmDecl +cmmCfgOptsProc split (CmmProc info lbl live g) = CmmProc info' lbl live g' + where (g', env) = blockConcat split g + info' = info{ info_tbls = new_info_tbls } + new_info_tbls = mapFromList (map upd_info (mapToList (info_tbls info))) + + -- If we changed any labels, then we have to update the info tables + -- too, except for the top-level info table because that might be + -- referred to by other procs. + upd_info (k,info) + | Just k' <- mapLookup k env + = (k', if k' == g_entry g' + then info + else info{ cit_lbl = infoTblLbl k' }) + | otherwise + = (k,info) +cmmCfgOptsProc _ top = top + + +blockConcat :: Bool -> CmmGraph -> (CmmGraph, LabelMap BlockId) +blockConcat splitting_procs g@CmmGraph { g_entry = entry_id } + = (replaceLabels shortcut_map $ ofBlockMap new_entry new_blocks, shortcut_map') + where + -- We might be able to shortcut the entry BlockId itself. + -- Remember to update the shortcut_map, since we also have to + -- update the info_tbls mapping now. + (new_entry, shortcut_map') + | Just entry_blk <- mapLookup entry_id new_blocks + , Just dest <- canShortcut entry_blk + = (dest, mapInsert entry_id dest shortcut_map) + | otherwise + = (entry_id, shortcut_map) + + -- blocks are sorted in reverse postorder, but we want to go from the exit + -- towards beginning, so we use foldr below. + blocks = revPostorder g + blockmap = foldl' (flip addBlock) emptyBody blocks + + -- Accumulator contains three components: + -- * map of blocks in a graph + -- * map of shortcut labels. See Note [Shortcut call returns] + -- * map containing number of predecessors for each block. We discard + -- it after we process all blocks. + (new_blocks, shortcut_map, _) = + foldr maybe_concat (blockmap, mapEmpty, initialBackEdges) blocks + + -- Map of predecessors for initial graph. We increase number of + -- predecessors for entry block by one to denote that it is + -- target of a jump, even if no block in the current graph jumps + -- to it. + initialBackEdges = incPreds entry_id (predMap blocks) + + maybe_concat :: CmmBlock + -> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int) + -> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int) + maybe_concat block (!blocks, !shortcut_map, !backEdges) + -- If: + -- (1) current block ends with unconditional branch to b' and + -- (2) it has exactly one predecessor (namely, current block) + -- + -- Then: + -- (1) append b' block at the end of current block + -- (2) remove b' from the map of blocks + -- (3) remove information about b' from predecessors map + -- + -- Since we know that the block has only one predecessor we call + -- mapDelete directly instead of calling decPreds. + -- + -- Note that we always maintain an up-to-date list of predecessors, so + -- we can ignore the contents of shortcut_map + | CmmBranch b' <- last + , hasOnePredecessor b' + , Just blk' <- mapLookup b' blocks + = let bid' = entryLabel blk' + in ( mapDelete bid' $ mapInsert bid (splice head blk') blocks + , shortcut_map + , mapDelete b' backEdges ) + + -- If: + -- (1) we are splitting proc points (see Note + -- [Shortcut call returns and proc-points]) and + -- (2) current block is a CmmCall or CmmForeignCall with + -- continuation b' and + -- (3) we can shortcut that continuation to dest + -- Then: + -- (1) we change continuation to point to b' + -- (2) create mapping from b' to dest + -- (3) increase number of predecessors of dest by 1 + -- (4) decrease number of predecessors of b' by 1 + -- + -- Later we will use replaceLabels to substitute all occurrences of b' + -- with dest. + | splitting_procs + , Just b' <- callContinuation_maybe last + , Just blk' <- mapLookup b' blocks + , Just dest <- canShortcut blk' + = ( mapInsert bid (blockJoinTail head (update_cont dest)) blocks + , mapInsert b' dest shortcut_map + , decPreds b' $ incPreds dest backEdges ) + + -- If: + -- (1) a block does not end with a call + -- Then: + -- (1) if it ends with a conditional attempt to invert the + -- conditional + -- (2) attempt to shortcut all destination blocks + -- (3) if new successors of a block are different from the old ones + -- update the of predecessors accordingly + -- + -- A special case of this is a situation when a block ends with an + -- unconditional jump to a block that can be shortcut. + | Nothing <- callContinuation_maybe last + = let oldSuccs = successors last + newSuccs = successors rewrite_last + in ( mapInsert bid (blockJoinTail head rewrite_last) blocks + , shortcut_map + , if oldSuccs == newSuccs + then backEdges + else foldr incPreds (foldr decPreds backEdges oldSuccs) newSuccs ) + + -- Otherwise don't do anything + | otherwise + = ( blocks, shortcut_map, backEdges ) + where + (head, last) = blockSplitTail block + bid = entryLabel block + + -- Changes continuation of a call to a specified label + update_cont dest = + case last of + CmmCall{} -> last { cml_cont = Just dest } + CmmForeignCall{} -> last { succ = dest } + _ -> panic "Can't shortcut continuation." + + -- Attempts to shortcut successors of last node + shortcut_last = mapSuccessors shortcut last + where + shortcut l = + case mapLookup l blocks of + Just b | Just dest <- canShortcut b -> dest + _otherwise -> l + + rewrite_last + -- Sometimes we can get rid of the conditional completely. + | CmmCondBranch _cond t f _l <- shortcut_last + , t == f + = CmmBranch t + + -- See Note [Invert Cmm conditionals] + | CmmCondBranch cond t f l <- shortcut_last + , hasOnePredecessor t -- inverting will make t a fallthrough + , likelyTrue l || (numPreds f > 1) + , Just cond' <- maybeInvertCmmExpr cond + = CmmCondBranch cond' f t (invertLikeliness l) + + -- If all jump destinations of a switch go to the + -- same target eliminate the switch. + | CmmSwitch _expr targets <- shortcut_last + , (t:ts) <- switchTargetsToList targets + , all (== t) ts + = CmmBranch t + + | otherwise + = shortcut_last + + likelyTrue (Just True) = True + likelyTrue _ = False + + invertLikeliness :: Maybe Bool -> Maybe Bool + invertLikeliness = fmap not + + -- Number of predecessors for a block + numPreds bid = mapLookup bid backEdges `orElse` 0 + + hasOnePredecessor b = numPreds b == 1 + +{- + Note [Invert Cmm conditionals] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + The native code generator always produces jumps to the true branch. + Falling through to the false branch is however faster. So we try to + arrange for that to happen. + This means we invert the condition if: + * The likely path will become a fallthrough. + * We can't guarantee a fallthrough for the false branch but for the + true branch. + + In some cases it's faster to avoid inverting when the false branch is likely. + However determining when that is the case is neither easy nor cheap so for + now we always invert as this produces smaller binaries and code that is + equally fast on average. (On an i7-6700K) + + TODO: + There is also the edge case when both branches have multiple predecessors. + In this case we could assume that we will end up with a jump for BOTH + branches. In this case it might be best to put the likely path in the true + branch especially if there are large numbers of predecessors as this saves + us the jump thats not taken. However I haven't tested this and as of early + 2018 we almost never generate cmm where this would apply. +-} + +-- Functions for incrementing and decrementing number of predecessors. If +-- decrementing would set the predecessor count to 0, we remove entry from the +-- map. +-- Invariant: if a block has no predecessors it should be dropped from the +-- graph because it is unreachable. maybe_concat is constructed to maintain +-- that invariant, but calling replaceLabels may introduce unreachable blocks. +-- We rely on subsequent passes in the Cmm pipeline to remove unreachable +-- blocks. +incPreds, decPreds :: BlockId -> LabelMap Int -> LabelMap Int +incPreds bid edges = mapInsertWith (+) bid 1 edges +decPreds bid edges = case mapLookup bid edges of + Just preds | preds > 1 -> mapInsert bid (preds - 1) edges + Just _ -> mapDelete bid edges + _ -> edges + + +-- Checks if a block consists only of "goto dest". If it does than we return +-- "Just dest" label. See Note [What is shortcutting] +canShortcut :: CmmBlock -> Maybe BlockId +canShortcut block + | (_, middle, CmmBranch dest) <- blockSplit block + , all dont_care $ blockToList middle + = Just dest + | otherwise + = Nothing + where dont_care CmmComment{} = True + dont_care CmmTick{} = True + dont_care _other = False + +-- Concatenates two blocks. First one is assumed to be open on exit, the second +-- is assumed to be closed on entry (i.e. it has a label attached to it, which +-- the splice function removes by calling snd on result of blockSplitHead). +splice :: Block CmmNode C O -> CmmBlock -> CmmBlock +splice head rest = entry `blockJoinHead` code0 `blockAppend` code1 + where (CmmEntry lbl sc0, code0) = blockSplitHead head + (CmmEntry _ sc1, code1) = blockSplitHead rest + entry = CmmEntry lbl (combineTickScopes sc0 sc1) + +-- If node is a call with continuation call return Just label of that +-- continuation. Otherwise return Nothing. +callContinuation_maybe :: CmmNode O C -> Maybe BlockId +callContinuation_maybe (CmmCall { cml_cont = Just b }) = Just b +callContinuation_maybe (CmmForeignCall { succ = b }) = Just b +callContinuation_maybe _ = Nothing + + +-- Map over the CmmGraph, replacing each label with its mapping in the +-- supplied LabelMap. +replaceLabels :: LabelMap BlockId -> CmmGraph -> CmmGraph +replaceLabels env g + | mapNull env = g + | otherwise = replace_eid $ mapGraphNodes1 txnode g + where + replace_eid g = g {g_entry = lookup (g_entry g)} + lookup id = mapLookup id env `orElse` id + + txnode :: CmmNode e x -> CmmNode e x + txnode (CmmBranch bid) = CmmBranch (lookup bid) + txnode (CmmCondBranch p t f l) = + mkCmmCondBranch (exp p) (lookup t) (lookup f) l + txnode (CmmSwitch e ids) = + CmmSwitch (exp e) (mapSwitchTargets lookup ids) + txnode (CmmCall t k rg a res r) = + CmmCall (exp t) (liftM lookup k) rg a res r + txnode fc@CmmForeignCall{} = + fc{ args = map exp (args fc), succ = lookup (succ fc) } + txnode other = mapExpDeep exp other + + exp :: CmmExpr -> CmmExpr + exp (CmmLit (CmmBlock bid)) = CmmLit (CmmBlock (lookup bid)) + exp (CmmStackSlot (Young id) i) = CmmStackSlot (Young (lookup id)) i + exp e = e + +mkCmmCondBranch :: CmmExpr -> Label -> Label -> Maybe Bool -> CmmNode O C +mkCmmCondBranch p t f l = + if t == f then CmmBranch t else CmmCondBranch p t f l + +-- Build a map from a block to its set of predecessors. +predMap :: [CmmBlock] -> LabelMap Int +predMap blocks = foldr add_preds mapEmpty blocks + where + add_preds block env = foldr add env (successors block) + where add lbl env = mapInsertWith (+) lbl 1 env + +-- Removing unreachable blocks +removeUnreachableBlocksProc :: CmmDecl -> CmmDecl +removeUnreachableBlocksProc proc@(CmmProc info lbl live g) + | used_blocks `lengthLessThan` mapSize (toBlockMap g) + = CmmProc info' lbl live g' + | otherwise + = proc + where + g' = ofBlockList (g_entry g) used_blocks + info' = info { info_tbls = keep_used (info_tbls info) } + -- Remove any info_tbls for unreachable + + keep_used :: LabelMap CmmInfoTable -> LabelMap CmmInfoTable + keep_used bs = mapFoldlWithKey keep mapEmpty bs + + keep :: LabelMap CmmInfoTable -> Label -> CmmInfoTable -> LabelMap CmmInfoTable + keep env l i | l `setMember` used_lbls = mapInsert l i env + | otherwise = env + + used_blocks :: [CmmBlock] + used_blocks = revPostorder g + + used_lbls :: LabelSet + used_lbls = setFromList $ map entryLabel used_blocks diff --git a/compiler/GHC/Cmm/Dataflow.hs b/compiler/GHC/Cmm/Dataflow.hs new file mode 100644 index 0000000000..fcabb1df0f --- /dev/null +++ b/compiler/GHC/Cmm/Dataflow.hs @@ -0,0 +1,441 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeFamilies #-} + +-- +-- Copyright (c) 2010, João Dias, Simon Marlow, Simon Peyton Jones, +-- and Norman Ramsey +-- +-- Modifications copyright (c) The University of Glasgow 2012 +-- +-- This module is a specialised and optimised version of +-- Compiler.Hoopl.Dataflow in the hoopl package. In particular it is +-- specialised to the UniqSM monad. +-- + +module GHC.Cmm.Dataflow + ( C, O, Block + , lastNode, entryLabel + , foldNodesBwdOO + , foldRewriteNodesBwdOO + , DataflowLattice(..), OldFact(..), NewFact(..), JoinedFact(..) + , TransferFun, RewriteFun + , Fact, FactBase + , getFact, mkFactBase + , analyzeCmmFwd, analyzeCmmBwd + , rewriteCmmBwd + , changedIf + , joinOutFacts + , joinFacts + ) +where + +import GhcPrelude + +import GHC.Cmm +import UniqSupply + +import Data.Array +import Data.Maybe +import Data.IntSet (IntSet) +import qualified Data.IntSet as IntSet + +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.Dataflow.Graph +import GHC.Cmm.Dataflow.Collections +import GHC.Cmm.Dataflow.Label + +type family Fact (x :: Extensibility) f :: * +type instance Fact C f = FactBase f +type instance Fact O f = f + +newtype OldFact a = OldFact a + +newtype NewFact a = NewFact a + +-- | The result of joining OldFact and NewFact. +data JoinedFact a + = Changed !a -- ^ Result is different than OldFact. + | NotChanged !a -- ^ Result is the same as OldFact. + +getJoined :: JoinedFact a -> a +getJoined (Changed a) = a +getJoined (NotChanged a) = a + +changedIf :: Bool -> a -> JoinedFact a +changedIf True = Changed +changedIf False = NotChanged + +type JoinFun a = OldFact a -> NewFact a -> JoinedFact a + +data DataflowLattice a = DataflowLattice + { fact_bot :: a + , fact_join :: JoinFun a + } + +data Direction = Fwd | Bwd + +type TransferFun f = CmmBlock -> FactBase f -> FactBase f + +-- | Function for rewrtiting and analysis combined. To be used with +-- @rewriteCmm@. +-- +-- Currently set to work with @UniqSM@ monad, but we could probably abstract +-- that away (if we do that, we might want to specialize the fixpoint algorithms +-- to the particular monads through SPECIALIZE). +type RewriteFun f = CmmBlock -> FactBase f -> UniqSM (CmmBlock, FactBase f) + +analyzeCmmBwd, analyzeCmmFwd + :: DataflowLattice f + -> TransferFun f + -> CmmGraph + -> FactBase f + -> FactBase f +analyzeCmmBwd = analyzeCmm Bwd +analyzeCmmFwd = analyzeCmm Fwd + +analyzeCmm + :: Direction + -> DataflowLattice f + -> TransferFun f + -> CmmGraph + -> FactBase f + -> FactBase f +analyzeCmm dir lattice transfer cmmGraph initFact = + {-# SCC analyzeCmm #-} + let entry = g_entry cmmGraph + hooplGraph = g_graph cmmGraph + blockMap = + case hooplGraph of + GMany NothingO bm NothingO -> bm + in fixpointAnalysis dir lattice transfer entry blockMap initFact + +-- Fixpoint algorithm. +fixpointAnalysis + :: forall f. + Direction + -> DataflowLattice f + -> TransferFun f + -> Label + -> LabelMap CmmBlock + -> FactBase f + -> FactBase f +fixpointAnalysis direction lattice do_block entry blockmap = loop start + where + -- Sorting the blocks helps to minimize the number of times we need to + -- process blocks. For instance, for forward analysis we want to look at + -- blocks in reverse postorder. Also, see comments for sortBlocks. + blocks = sortBlocks direction entry blockmap + num_blocks = length blocks + block_arr = {-# SCC "block_arr" #-} listArray (0, num_blocks - 1) blocks + start = {-# SCC "start" #-} IntSet.fromDistinctAscList + [0 .. num_blocks - 1] + dep_blocks = {-# SCC "dep_blocks" #-} mkDepBlocks direction blocks + join = fact_join lattice + + loop + :: IntHeap -- ^ Worklist, i.e., blocks to process + -> FactBase f -- ^ Current result (increases monotonically) + -> FactBase f + loop todo !fbase1 | Just (index, todo1) <- IntSet.minView todo = + let block = block_arr ! index + out_facts = {-# SCC "do_block" #-} do_block block fbase1 + -- For each of the outgoing edges, we join it with the current + -- information in fbase1 and (if something changed) we update it + -- and add the affected blocks to the worklist. + (todo2, fbase2) = {-# SCC "mapFoldWithKey" #-} + mapFoldlWithKey + (updateFact join dep_blocks) (todo1, fbase1) out_facts + in loop todo2 fbase2 + loop _ !fbase1 = fbase1 + +rewriteCmmBwd + :: DataflowLattice f + -> RewriteFun f + -> CmmGraph + -> FactBase f + -> UniqSM (CmmGraph, FactBase f) +rewriteCmmBwd = rewriteCmm Bwd + +rewriteCmm + :: Direction + -> DataflowLattice f + -> RewriteFun f + -> CmmGraph + -> FactBase f + -> UniqSM (CmmGraph, FactBase f) +rewriteCmm dir lattice rwFun cmmGraph initFact = {-# SCC rewriteCmm #-} do + let entry = g_entry cmmGraph + hooplGraph = g_graph cmmGraph + blockMap1 = + case hooplGraph of + GMany NothingO bm NothingO -> bm + (blockMap2, facts) <- + fixpointRewrite dir lattice rwFun entry blockMap1 initFact + return (cmmGraph {g_graph = GMany NothingO blockMap2 NothingO}, facts) + +fixpointRewrite + :: forall f. + Direction + -> DataflowLattice f + -> RewriteFun f + -> Label + -> LabelMap CmmBlock + -> FactBase f + -> UniqSM (LabelMap CmmBlock, FactBase f) +fixpointRewrite dir lattice do_block entry blockmap = loop start blockmap + where + -- Sorting the blocks helps to minimize the number of times we need to + -- process blocks. For instance, for forward analysis we want to look at + -- blocks in reverse postorder. Also, see comments for sortBlocks. + blocks = sortBlocks dir entry blockmap + num_blocks = length blocks + block_arr = {-# SCC "block_arr_rewrite" #-} + listArray (0, num_blocks - 1) blocks + start = {-# SCC "start_rewrite" #-} + IntSet.fromDistinctAscList [0 .. num_blocks - 1] + dep_blocks = {-# SCC "dep_blocks_rewrite" #-} mkDepBlocks dir blocks + join = fact_join lattice + + loop + :: IntHeap -- ^ Worklist, i.e., blocks to process + -> LabelMap CmmBlock -- ^ Rewritten blocks. + -> FactBase f -- ^ Current facts. + -> UniqSM (LabelMap CmmBlock, FactBase f) + loop todo !blocks1 !fbase1 + | Just (index, todo1) <- IntSet.minView todo = do + -- Note that we use the *original* block here. This is important. + -- We're optimistically rewriting blocks even before reaching the fixed + -- point, which means that the rewrite might be incorrect. So if the + -- facts change, we need to rewrite the original block again (taking + -- into account the new facts). + let block = block_arr ! index + (new_block, out_facts) <- {-# SCC "do_block_rewrite" #-} + do_block block fbase1 + let blocks2 = mapInsert (entryLabel new_block) new_block blocks1 + (todo2, fbase2) = {-# SCC "mapFoldWithKey_rewrite" #-} + mapFoldlWithKey + (updateFact join dep_blocks) (todo1, fbase1) out_facts + loop todo2 blocks2 fbase2 + loop _ !blocks1 !fbase1 = return (blocks1, fbase1) + + +{- +Note [Unreachable blocks] +~~~~~~~~~~~~~~~~~~~~~~~~~ +A block that is not in the domain of tfb_fbase is "currently unreachable". +A currently-unreachable block is not even analyzed. Reason: consider +constant prop and this graph, with entry point L1: + L1: x:=3; goto L4 + L2: x:=4; goto L4 + L4: if x>3 goto L2 else goto L5 +Here L2 is actually unreachable, but if we process it with bottom input fact, +we'll propagate (x=4) to L4, and nuke the otherwise-good rewriting of L4. + +* If a currently-unreachable block is not analyzed, then its rewritten + graph will not be accumulated in tfb_rg. And that is good: + unreachable blocks simply do not appear in the output. + +* Note that clients must be careful to provide a fact (even if bottom) + for each entry point. Otherwise useful blocks may be garbage collected. + +* Note that updateFact must set the change-flag if a label goes from + not-in-fbase to in-fbase, even if its fact is bottom. In effect the + real fact lattice is + UNR + bottom + the points above bottom + +* Even if the fact is going from UNR to bottom, we still call the + client's fact_join function because it might give the client + some useful debugging information. + +* All of this only applies for *forward* ixpoints. For the backward + case we must treat every block as reachable; it might finish with a + 'return', and therefore have no successors, for example. +-} + + +----------------------------------------------------------------------------- +-- Pieces that are shared by fixpoint and fixpoint_anal +----------------------------------------------------------------------------- + +-- | Sort the blocks into the right order for analysis. This means reverse +-- postorder for a forward analysis. For the backward one, we simply reverse +-- that (see Note [Backward vs forward analysis]). +sortBlocks + :: NonLocal n + => Direction -> Label -> LabelMap (Block n C C) -> [Block n C C] +sortBlocks direction entry blockmap = + case direction of + Fwd -> fwd + Bwd -> reverse fwd + where + fwd = revPostorderFrom blockmap entry + +-- Note [Backward vs forward analysis] +-- +-- The forward and backward cases are not dual. In the forward case, the entry +-- points are known, and one simply traverses the body blocks from those points. +-- In the backward case, something is known about the exit points, but a +-- backward analysis must also include reachable blocks that don't reach the +-- exit, as in a procedure that loops forever and has side effects.) +-- For instance, let E be the entry and X the exit blocks (arrows indicate +-- control flow) +-- E -> X +-- E -> B +-- B -> C +-- C -> B +-- We do need to include B and C even though they're unreachable in the +-- *reverse* graph (that we could use for backward analysis): +-- E <- X +-- E <- B +-- B <- C +-- C <- B +-- So when sorting the blocks for the backward analysis, we simply take the +-- reverse of what is used for the forward one. + + +-- | Construct a mapping from a @Label@ to the block indexes that should be +-- re-analyzed if the facts at that @Label@ change. +-- +-- Note that we're considering here the entry point of the block, so if the +-- facts change at the entry: +-- * for a backward analysis we need to re-analyze all the predecessors, but +-- * for a forward analysis, we only need to re-analyze the current block +-- (and that will in turn propagate facts into its successors). +mkDepBlocks :: Direction -> [CmmBlock] -> LabelMap IntSet +mkDepBlocks Fwd blocks = go blocks 0 mapEmpty + where + go [] !_ !dep_map = dep_map + go (b:bs) !n !dep_map = + go bs (n + 1) $ mapInsert (entryLabel b) (IntSet.singleton n) dep_map +mkDepBlocks Bwd blocks = go blocks 0 mapEmpty + where + go [] !_ !dep_map = dep_map + go (b:bs) !n !dep_map = + let insert m l = mapInsertWith IntSet.union l (IntSet.singleton n) m + in go bs (n + 1) $ foldl' insert dep_map (successors b) + +-- | After some new facts have been generated by analysing a block, we +-- fold this function over them to generate (a) a list of block +-- indices to (re-)analyse, and (b) the new FactBase. +updateFact + :: JoinFun f + -> LabelMap IntSet + -> (IntHeap, FactBase f) + -> Label + -> f -- out fact + -> (IntHeap, FactBase f) +updateFact fact_join dep_blocks (todo, fbase) lbl new_fact + = case lookupFact lbl fbase of + Nothing -> + -- Note [No old fact] + let !z = mapInsert lbl new_fact fbase in (changed, z) + Just old_fact -> + case fact_join (OldFact old_fact) (NewFact new_fact) of + (NotChanged _) -> (todo, fbase) + (Changed f) -> let !z = mapInsert lbl f fbase in (changed, z) + where + changed = todo `IntSet.union` + mapFindWithDefault IntSet.empty lbl dep_blocks + +{- +Note [No old fact] + +We know that the new_fact is >= _|_, so we don't need to join. However, +if the new fact is also _|_, and we have already analysed its block, +we don't need to record a change. So there's a tradeoff here. It turns +out that always recording a change is faster. +-} + +---------------------------------------------------------------- +-- Utilities +---------------------------------------------------------------- + +-- Fact lookup: the fact `orelse` bottom +getFact :: DataflowLattice f -> Label -> FactBase f -> f +getFact lat l fb = case lookupFact l fb of Just f -> f + Nothing -> fact_bot lat + +-- | Returns the result of joining the facts from all the successors of the +-- provided node or block. +joinOutFacts :: (NonLocal n) => DataflowLattice f -> n e C -> FactBase f -> f +joinOutFacts lattice nonLocal fact_base = foldl' join (fact_bot lattice) facts + where + join new old = getJoined $ fact_join lattice (OldFact old) (NewFact new) + facts = + [ fromJust fact + | s <- successors nonLocal + , let fact = lookupFact s fact_base + , isJust fact + ] + +joinFacts :: DataflowLattice f -> [f] -> f +joinFacts lattice facts = foldl' join (fact_bot lattice) facts + where + join new old = getJoined $ fact_join lattice (OldFact old) (NewFact new) + +-- | Returns the joined facts for each label. +mkFactBase :: DataflowLattice f -> [(Label, f)] -> FactBase f +mkFactBase lattice = foldl' add mapEmpty + where + join = fact_join lattice + + add result (l, f1) = + let !newFact = + case mapLookup l result of + Nothing -> f1 + Just f2 -> getJoined $ join (OldFact f1) (NewFact f2) + in mapInsert l newFact result + +-- | Folds backward over all nodes of an open-open block. +-- Strict in the accumulator. +foldNodesBwdOO :: (CmmNode O O -> f -> f) -> Block CmmNode O O -> f -> f +foldNodesBwdOO funOO = go + where + go (BCat b1 b2) f = go b1 $! go b2 f + go (BSnoc h n) f = go h $! funOO n f + go (BCons n t) f = funOO n $! go t f + go (BMiddle n) f = funOO n f + go BNil f = f +{-# INLINABLE foldNodesBwdOO #-} + +-- | Folds backward over all the nodes of an open-open block and allows +-- rewriting them. The accumulator is both the block of nodes and @f@ (usually +-- dataflow facts). +-- Strict in both accumulated parts. +foldRewriteNodesBwdOO + :: forall f. + (CmmNode O O -> f -> UniqSM (Block CmmNode O O, f)) + -> Block CmmNode O O + -> f + -> UniqSM (Block CmmNode O O, f) +foldRewriteNodesBwdOO rewriteOO initBlock initFacts = go initBlock initFacts + where + go (BCons node1 block1) !fact1 = (rewriteOO node1 `comp` go block1) fact1 + go (BSnoc block1 node1) !fact1 = (go block1 `comp` rewriteOO node1) fact1 + go (BCat blockA1 blockB1) !fact1 = (go blockA1 `comp` go blockB1) fact1 + go (BMiddle node) !fact1 = rewriteOO node fact1 + go BNil !fact = return (BNil, fact) + + comp rew1 rew2 = \f1 -> do + (b, f2) <- rew2 f1 + (a, !f3) <- rew1 f2 + let !c = joinBlocksOO a b + return (c, f3) + {-# INLINE comp #-} +{-# INLINABLE foldRewriteNodesBwdOO #-} + +joinBlocksOO :: Block n O O -> Block n O O -> Block n O O +joinBlocksOO BNil b = b +joinBlocksOO b BNil = b +joinBlocksOO (BMiddle n) b = blockCons n b +joinBlocksOO b (BMiddle n) = blockSnoc b n +joinBlocksOO b1 b2 = BCat b1 b2 + +type IntHeap = IntSet diff --git a/compiler/GHC/Cmm/Dataflow/Block.hs b/compiler/GHC/Cmm/Dataflow/Block.hs new file mode 100644 index 0000000000..d2e52a8904 --- /dev/null +++ b/compiler/GHC/Cmm/Dataflow/Block.hs @@ -0,0 +1,329 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE DeriveFunctor #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE PolyKinds #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE StandaloneDeriving #-} +{-# LANGUAGE TypeFamilies #-} +module GHC.Cmm.Dataflow.Block + ( Extensibility (..) + , O + , C + , MaybeO(..) + , IndexedCO + , Block(..) + , blockAppend + , blockCons + , blockFromList + , blockJoin + , blockJoinHead + , blockJoinTail + , blockSnoc + , blockSplit + , blockSplitHead + , blockSplitTail + , blockToList + , emptyBlock + , firstNode + , foldBlockNodesB + , foldBlockNodesB3 + , foldBlockNodesF + , isEmptyBlock + , lastNode + , mapBlock + , mapBlock' + , mapBlock3' + , replaceFirstNode + , replaceLastNode + ) where + +import GhcPrelude + +-- ----------------------------------------------------------------------------- +-- Shapes: Open and Closed + +-- | Used at the type level to indicate "open" vs "closed" structure. +data Extensibility + -- | An "open" structure with a unique, unnamed control-flow edge flowing in + -- or out. "Fallthrough" and concatenation are permitted at an open point. + = Open + -- | A "closed" structure which supports control transfer only through the use + -- of named labels---no "fallthrough" is permitted. The number of control-flow + -- edges is unconstrained. + | Closed + +type O = 'Open +type C = 'Closed + +-- | Either type indexed by closed/open using type families +type family IndexedCO (ex :: Extensibility) (a :: k) (b :: k) :: k +type instance IndexedCO C a _b = a +type instance IndexedCO O _a b = b + +-- | Maybe type indexed by open/closed +data MaybeO ex t where + JustO :: t -> MaybeO O t + NothingO :: MaybeO C t + +-- | Maybe type indexed by closed/open +data MaybeC ex t where + JustC :: t -> MaybeC C t + NothingC :: MaybeC O t + +deriving instance Functor (MaybeO ex) +deriving instance Functor (MaybeC ex) + +-- ----------------------------------------------------------------------------- +-- The Block type + +-- | A sequence of nodes. May be any of four shapes (O/O, O/C, C/O, C/C). +-- Open at the entry means single entry, mutatis mutandis for exit. +-- A closed/closed block is a /basic/ block and can't be extended further. +-- Clients should avoid manipulating blocks and should stick to either nodes +-- or graphs. +data Block n e x where + BlockCO :: n C O -> Block n O O -> Block n C O + BlockCC :: n C O -> Block n O O -> n O C -> Block n C C + BlockOC :: Block n O O -> n O C -> Block n O C + + BNil :: Block n O O + BMiddle :: n O O -> Block n O O + BCat :: Block n O O -> Block n O O -> Block n O O + BSnoc :: Block n O O -> n O O -> Block n O O + BCons :: n O O -> Block n O O -> Block n O O + + +-- ----------------------------------------------------------------------------- +-- Simple operations on Blocks + +-- Predicates + +isEmptyBlock :: Block n e x -> Bool +isEmptyBlock BNil = True +isEmptyBlock (BCat l r) = isEmptyBlock l && isEmptyBlock r +isEmptyBlock _ = False + + +-- Building + +emptyBlock :: Block n O O +emptyBlock = BNil + +blockCons :: n O O -> Block n O x -> Block n O x +blockCons n b = case b of + BlockOC b l -> (BlockOC $! (n `blockCons` b)) l + BNil{} -> BMiddle n + BMiddle{} -> n `BCons` b + BCat{} -> n `BCons` b + BSnoc{} -> n `BCons` b + BCons{} -> n `BCons` b + +blockSnoc :: Block n e O -> n O O -> Block n e O +blockSnoc b n = case b of + BlockCO f b -> BlockCO f $! (b `blockSnoc` n) + BNil{} -> BMiddle n + BMiddle{} -> b `BSnoc` n + BCat{} -> b `BSnoc` n + BSnoc{} -> b `BSnoc` n + BCons{} -> b `BSnoc` n + +blockJoinHead :: n C O -> Block n O x -> Block n C x +blockJoinHead f (BlockOC b l) = BlockCC f b l +blockJoinHead f b = BlockCO f BNil `cat` b + +blockJoinTail :: Block n e O -> n O C -> Block n e C +blockJoinTail (BlockCO f b) t = BlockCC f b t +blockJoinTail b t = b `cat` BlockOC BNil t + +blockJoin :: n C O -> Block n O O -> n O C -> Block n C C +blockJoin f b t = BlockCC f b t + +blockAppend :: Block n e O -> Block n O x -> Block n e x +blockAppend = cat + + +-- Taking apart + +firstNode :: Block n C x -> n C O +firstNode (BlockCO n _) = n +firstNode (BlockCC n _ _) = n + +lastNode :: Block n x C -> n O C +lastNode (BlockOC _ n) = n +lastNode (BlockCC _ _ n) = n + +blockSplitHead :: Block n C x -> (n C O, Block n O x) +blockSplitHead (BlockCO n b) = (n, b) +blockSplitHead (BlockCC n b t) = (n, BlockOC b t) + +blockSplitTail :: Block n e C -> (Block n e O, n O C) +blockSplitTail (BlockOC b n) = (b, n) +blockSplitTail (BlockCC f b t) = (BlockCO f b, t) + +-- | Split a closed block into its entry node, open middle block, and +-- exit node. +blockSplit :: Block n C C -> (n C O, Block n O O, n O C) +blockSplit (BlockCC f b t) = (f, b, t) + +blockToList :: Block n O O -> [n O O] +blockToList b = go b [] + where go :: Block n O O -> [n O O] -> [n O O] + go BNil r = r + go (BMiddle n) r = n : r + go (BCat b1 b2) r = go b1 $! go b2 r + go (BSnoc b1 n) r = go b1 (n:r) + go (BCons n b1) r = n : go b1 r + +blockFromList :: [n O O] -> Block n O O +blockFromList = foldr BCons BNil + +-- Modifying + +replaceFirstNode :: Block n C x -> n C O -> Block n C x +replaceFirstNode (BlockCO _ b) f = BlockCO f b +replaceFirstNode (BlockCC _ b n) f = BlockCC f b n + +replaceLastNode :: Block n x C -> n O C -> Block n x C +replaceLastNode (BlockOC b _) n = BlockOC b n +replaceLastNode (BlockCC l b _) n = BlockCC l b n + +-- ----------------------------------------------------------------------------- +-- General concatenation + +cat :: Block n e O -> Block n O x -> Block n e x +cat x y = case x of + BNil -> y + + BlockCO l b1 -> case y of + BlockOC b2 n -> (BlockCC l $! (b1 `cat` b2)) n + BNil -> x + BMiddle _ -> BlockCO l $! (b1 `cat` y) + BCat{} -> BlockCO l $! (b1 `cat` y) + BSnoc{} -> BlockCO l $! (b1 `cat` y) + BCons{} -> BlockCO l $! (b1 `cat` y) + + BMiddle n -> case y of + BlockOC b2 n2 -> (BlockOC $! (x `cat` b2)) n2 + BNil -> x + BMiddle{} -> BCons n y + BCat{} -> BCons n y + BSnoc{} -> BCons n y + BCons{} -> BCons n y + + BCat{} -> case y of + BlockOC b3 n2 -> (BlockOC $! (x `cat` b3)) n2 + BNil -> x + BMiddle n -> BSnoc x n + BCat{} -> BCat x y + BSnoc{} -> BCat x y + BCons{} -> BCat x y + + BSnoc{} -> case y of + BlockOC b2 n2 -> (BlockOC $! (x `cat` b2)) n2 + BNil -> x + BMiddle n -> BSnoc x n + BCat{} -> BCat x y + BSnoc{} -> BCat x y + BCons{} -> BCat x y + + + BCons{} -> case y of + BlockOC b2 n2 -> (BlockOC $! (x `cat` b2)) n2 + BNil -> x + BMiddle n -> BSnoc x n + BCat{} -> BCat x y + BSnoc{} -> BCat x y + BCons{} -> BCat x y + + +-- ----------------------------------------------------------------------------- +-- Mapping + +-- | map a function over the nodes of a 'Block' +mapBlock :: (forall e x. n e x -> n' e x) -> Block n e x -> Block n' e x +mapBlock f (BlockCO n b ) = BlockCO (f n) (mapBlock f b) +mapBlock f (BlockOC b n) = BlockOC (mapBlock f b) (f n) +mapBlock f (BlockCC n b m) = BlockCC (f n) (mapBlock f b) (f m) +mapBlock _ BNil = BNil +mapBlock f (BMiddle n) = BMiddle (f n) +mapBlock f (BCat b1 b2) = BCat (mapBlock f b1) (mapBlock f b2) +mapBlock f (BSnoc b n) = BSnoc (mapBlock f b) (f n) +mapBlock f (BCons n b) = BCons (f n) (mapBlock f b) + +-- | A strict 'mapBlock' +mapBlock' :: (forall e x. n e x -> n' e x) -> (Block n e x -> Block n' e x) +mapBlock' f = mapBlock3' (f, f, f) + +-- | map over a block, with different functions to apply to first nodes, +-- middle nodes and last nodes respectively. The map is strict. +-- +mapBlock3' :: forall n n' e x . + ( n C O -> n' C O + , n O O -> n' O O, + n O C -> n' O C) + -> Block n e x -> Block n' e x +mapBlock3' (f, m, l) b = go b + where go :: forall e x . Block n e x -> Block n' e x + go (BlockOC b y) = (BlockOC $! go b) $! l y + go (BlockCO x b) = (BlockCO $! f x) $! (go b) + go (BlockCC x b y) = ((BlockCC $! f x) $! go b) $! (l y) + go BNil = BNil + go (BMiddle n) = BMiddle $! m n + go (BCat x y) = (BCat $! go x) $! (go y) + go (BSnoc x n) = (BSnoc $! go x) $! (m n) + go (BCons n x) = (BCons $! m n) $! (go x) + +-- ----------------------------------------------------------------------------- +-- Folding + + +-- | Fold a function over every node in a block, forward or backward. +-- The fold function must be polymorphic in the shape of the nodes. +foldBlockNodesF3 :: forall n a b c . + ( n C O -> a -> b + , n O O -> b -> b + , n O C -> b -> c) + -> (forall e x . Block n e x -> IndexedCO e a b -> IndexedCO x c b) +foldBlockNodesF :: forall n a . + (forall e x . n e x -> a -> a) + -> (forall e x . Block n e x -> IndexedCO e a a -> IndexedCO x a a) +foldBlockNodesB3 :: forall n a b c . + ( n C O -> b -> c + , n O O -> b -> b + , n O C -> a -> b) + -> (forall e x . Block n e x -> IndexedCO x a b -> IndexedCO e c b) +foldBlockNodesB :: forall n a . + (forall e x . n e x -> a -> a) + -> (forall e x . Block n e x -> IndexedCO x a a -> IndexedCO e a a) + +foldBlockNodesF3 (ff, fm, fl) = block + where block :: forall e x . Block n e x -> IndexedCO e a b -> IndexedCO x c b + block (BlockCO f b ) = ff f `cat` block b + block (BlockCC f b l) = ff f `cat` block b `cat` fl l + block (BlockOC b l) = block b `cat` fl l + block BNil = id + block (BMiddle node) = fm node + block (b1 `BCat` b2) = block b1 `cat` block b2 + block (b1 `BSnoc` n) = block b1 `cat` fm n + block (n `BCons` b2) = fm n `cat` block b2 + cat :: forall a b c. (a -> b) -> (b -> c) -> a -> c + cat f f' = f' . f + +foldBlockNodesF f = foldBlockNodesF3 (f, f, f) + +foldBlockNodesB3 (ff, fm, fl) = block + where block :: forall e x . Block n e x -> IndexedCO x a b -> IndexedCO e c b + block (BlockCO f b ) = ff f `cat` block b + block (BlockCC f b l) = ff f `cat` block b `cat` fl l + block (BlockOC b l) = block b `cat` fl l + block BNil = id + block (BMiddle node) = fm node + block (b1 `BCat` b2) = block b1 `cat` block b2 + block (b1 `BSnoc` n) = block b1 `cat` fm n + block (n `BCons` b2) = fm n `cat` block b2 + cat :: forall a b c. (b -> c) -> (a -> b) -> a -> c + cat f f' = f . f' + +foldBlockNodesB f = foldBlockNodesB3 (f, f, f) + diff --git a/compiler/GHC/Cmm/Dataflow/Collections.hs b/compiler/GHC/Cmm/Dataflow/Collections.hs new file mode 100644 index 0000000000..f131f17cc1 --- /dev/null +++ b/compiler/GHC/Cmm/Dataflow/Collections.hs @@ -0,0 +1,177 @@ +{-# LANGUAGE DeriveFunctor #-} +{-# LANGUAGE DeriveTraversable #-} +{-# LANGUAGE DeriveFoldable #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} + +module GHC.Cmm.Dataflow.Collections + ( IsSet(..) + , setInsertList, setDeleteList, setUnions + , IsMap(..) + , mapInsertList, mapDeleteList, mapUnions + , UniqueMap, UniqueSet + ) where + +import GhcPrelude + +import qualified Data.IntMap.Strict as M +import qualified Data.IntSet as S + +import Data.List (foldl1') + +class IsSet set where + type ElemOf set + + setNull :: set -> Bool + setSize :: set -> Int + setMember :: ElemOf set -> set -> Bool + + setEmpty :: set + setSingleton :: ElemOf set -> set + setInsert :: ElemOf set -> set -> set + setDelete :: ElemOf set -> set -> set + + setUnion :: set -> set -> set + setDifference :: set -> set -> set + setIntersection :: set -> set -> set + setIsSubsetOf :: set -> set -> Bool + setFilter :: (ElemOf set -> Bool) -> set -> set + + setFoldl :: (b -> ElemOf set -> b) -> b -> set -> b + setFoldr :: (ElemOf set -> b -> b) -> b -> set -> b + + setElems :: set -> [ElemOf set] + setFromList :: [ElemOf set] -> set + +-- Helper functions for IsSet class +setInsertList :: IsSet set => [ElemOf set] -> set -> set +setInsertList keys set = foldl' (flip setInsert) set keys + +setDeleteList :: IsSet set => [ElemOf set] -> set -> set +setDeleteList keys set = foldl' (flip setDelete) set keys + +setUnions :: IsSet set => [set] -> set +setUnions [] = setEmpty +setUnions sets = foldl1' setUnion sets + + +class IsMap map where + type KeyOf map + + mapNull :: map a -> Bool + mapSize :: map a -> Int + mapMember :: KeyOf map -> map a -> Bool + mapLookup :: KeyOf map -> map a -> Maybe a + mapFindWithDefault :: a -> KeyOf map -> map a -> a + + mapEmpty :: map a + mapSingleton :: KeyOf map -> a -> map a + mapInsert :: KeyOf map -> a -> map a -> map a + mapInsertWith :: (a -> a -> a) -> KeyOf map -> a -> map a -> map a + mapDelete :: KeyOf map -> map a -> map a + mapAlter :: (Maybe a -> Maybe a) -> KeyOf map -> map a -> map a + mapAdjust :: (a -> a) -> KeyOf map -> map a -> map a + + mapUnion :: map a -> map a -> map a + mapUnionWithKey :: (KeyOf map -> a -> a -> a) -> map a -> map a -> map a + mapDifference :: map a -> map a -> map a + mapIntersection :: map a -> map a -> map a + mapIsSubmapOf :: Eq a => map a -> map a -> Bool + + mapMap :: (a -> b) -> map a -> map b + mapMapWithKey :: (KeyOf map -> a -> b) -> map a -> map b + mapFoldl :: (b -> a -> b) -> b -> map a -> b + mapFoldr :: (a -> b -> b) -> b -> map a -> b + mapFoldlWithKey :: (b -> KeyOf map -> a -> b) -> b -> map a -> b + mapFoldMapWithKey :: Monoid m => (KeyOf map -> a -> m) -> map a -> m + mapFilter :: (a -> Bool) -> map a -> map a + mapFilterWithKey :: (KeyOf map -> a -> Bool) -> map a -> map a + + + mapElems :: map a -> [a] + mapKeys :: map a -> [KeyOf map] + mapToList :: map a -> [(KeyOf map, a)] + mapFromList :: [(KeyOf map, a)] -> map a + mapFromListWith :: (a -> a -> a) -> [(KeyOf map,a)] -> map a + +-- Helper functions for IsMap class +mapInsertList :: IsMap map => [(KeyOf map, a)] -> map a -> map a +mapInsertList assocs map = foldl' (flip (uncurry mapInsert)) map assocs + +mapDeleteList :: IsMap map => [KeyOf map] -> map a -> map a +mapDeleteList keys map = foldl' (flip mapDelete) map keys + +mapUnions :: IsMap map => [map a] -> map a +mapUnions [] = mapEmpty +mapUnions maps = foldl1' mapUnion maps + +----------------------------------------------------------------------------- +-- Basic instances +----------------------------------------------------------------------------- + +newtype UniqueSet = US S.IntSet deriving (Eq, Ord, Show, Semigroup, Monoid) + +instance IsSet UniqueSet where + type ElemOf UniqueSet = Int + + setNull (US s) = S.null s + setSize (US s) = S.size s + setMember k (US s) = S.member k s + + setEmpty = US S.empty + setSingleton k = US (S.singleton k) + setInsert k (US s) = US (S.insert k s) + setDelete k (US s) = US (S.delete k s) + + setUnion (US x) (US y) = US (S.union x y) + setDifference (US x) (US y) = US (S.difference x y) + setIntersection (US x) (US y) = US (S.intersection x y) + setIsSubsetOf (US x) (US y) = S.isSubsetOf x y + setFilter f (US s) = US (S.filter f s) + + setFoldl k z (US s) = S.foldl' k z s + setFoldr k z (US s) = S.foldr k z s + + setElems (US s) = S.elems s + setFromList ks = US (S.fromList ks) + +newtype UniqueMap v = UM (M.IntMap v) + deriving (Eq, Ord, Show, Functor, Foldable, Traversable) + +instance IsMap UniqueMap where + type KeyOf UniqueMap = Int + + mapNull (UM m) = M.null m + mapSize (UM m) = M.size m + mapMember k (UM m) = M.member k m + mapLookup k (UM m) = M.lookup k m + mapFindWithDefault def k (UM m) = M.findWithDefault def k m + + mapEmpty = UM M.empty + mapSingleton k v = UM (M.singleton k v) + mapInsert k v (UM m) = UM (M.insert k v m) + mapInsertWith f k v (UM m) = UM (M.insertWith f k v m) + mapDelete k (UM m) = UM (M.delete k m) + mapAlter f k (UM m) = UM (M.alter f k m) + mapAdjust f k (UM m) = UM (M.adjust f k m) + + mapUnion (UM x) (UM y) = UM (M.union x y) + mapUnionWithKey f (UM x) (UM y) = UM (M.unionWithKey f x y) + mapDifference (UM x) (UM y) = UM (M.difference x y) + mapIntersection (UM x) (UM y) = UM (M.intersection x y) + mapIsSubmapOf (UM x) (UM y) = M.isSubmapOf x y + + mapMap f (UM m) = UM (M.map f m) + mapMapWithKey f (UM m) = UM (M.mapWithKey f m) + mapFoldl k z (UM m) = M.foldl' k z m + mapFoldr k z (UM m) = M.foldr k z m + mapFoldlWithKey k z (UM m) = M.foldlWithKey' k z m + mapFoldMapWithKey f (UM m) = M.foldMapWithKey f m + mapFilter f (UM m) = UM (M.filter f m) + mapFilterWithKey f (UM m) = UM (M.filterWithKey f m) + + mapElems (UM m) = M.elems m + mapKeys (UM m) = M.keys m + mapToList (UM m) = M.toList m + mapFromList assocs = UM (M.fromList assocs) + mapFromListWith f assocs = UM (M.fromListWith f assocs) diff --git a/compiler/GHC/Cmm/Dataflow/Graph.hs b/compiler/GHC/Cmm/Dataflow/Graph.hs new file mode 100644 index 0000000000..3f361de0fb --- /dev/null +++ b/compiler/GHC/Cmm/Dataflow/Graph.hs @@ -0,0 +1,186 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeFamilies #-} +module GHC.Cmm.Dataflow.Graph + ( Body + , Graph + , Graph'(..) + , NonLocal(..) + , addBlock + , bodyList + , emptyBody + , labelsDefined + , mapGraph + , mapGraphBlocks + , revPostorderFrom + ) where + + +import GhcPrelude +import Util + +import GHC.Cmm.Dataflow.Label +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.Dataflow.Collections + +-- | A (possibly empty) collection of closed/closed blocks +type Body n = LabelMap (Block n C C) + +-- | @Body@ abstracted over @block@ +type Body' block (n :: Extensibility -> Extensibility -> *) = LabelMap (block n C C) + +------------------------------- +-- | Gives access to the anchor points for +-- nonlocal edges as well as the edges themselves +class NonLocal thing where + entryLabel :: thing C x -> Label -- ^ The label of a first node or block + successors :: thing e C -> [Label] -- ^ Gives control-flow successors + +instance NonLocal n => NonLocal (Block n) where + entryLabel (BlockCO f _) = entryLabel f + entryLabel (BlockCC f _ _) = entryLabel f + + successors (BlockOC _ n) = successors n + successors (BlockCC _ _ n) = successors n + + +emptyBody :: Body' block n +emptyBody = mapEmpty + +bodyList :: Body' block n -> [(Label,block n C C)] +bodyList body = mapToList body + +addBlock + :: (NonLocal block, HasDebugCallStack) + => block C C -> LabelMap (block C C) -> LabelMap (block C C) +addBlock block body = mapAlter add lbl body + where + lbl = entryLabel block + add Nothing = Just block + add _ = error $ "duplicate label " ++ show lbl ++ " in graph" + + +-- --------------------------------------------------------------------------- +-- Graph + +-- | A control-flow graph, which may take any of four shapes (O/O, +-- O/C, C/O, C/C). A graph open at the entry has a single, +-- distinguished, anonymous entry point; if a graph is closed at the +-- entry, its entry point(s) are supplied by a context. +type Graph = Graph' Block + +-- | @Graph'@ is abstracted over the block type, so that we can build +-- graphs of annotated blocks for example (Compiler.Hoopl.Dataflow +-- needs this). +data Graph' block (n :: Extensibility -> Extensibility -> *) e x where + GNil :: Graph' block n O O + GUnit :: block n O O -> Graph' block n O O + GMany :: MaybeO e (block n O C) + -> Body' block n + -> MaybeO x (block n C O) + -> Graph' block n e x + + +-- ----------------------------------------------------------------------------- +-- Mapping over graphs + +-- | Maps over all nodes in a graph. +mapGraph :: (forall e x. n e x -> n' e x) -> Graph n e x -> Graph n' e x +mapGraph f = mapGraphBlocks (mapBlock f) + +-- | Function 'mapGraphBlocks' enables a change of representation of blocks, +-- nodes, or both. It lifts a polymorphic block transform into a polymorphic +-- graph transform. When the block representation stabilizes, a similar +-- function should be provided for blocks. +mapGraphBlocks :: forall block n block' n' e x . + (forall e x . block n e x -> block' n' e x) + -> (Graph' block n e x -> Graph' block' n' e x) + +mapGraphBlocks f = map + where map :: Graph' block n e x -> Graph' block' n' e x + map GNil = GNil + map (GUnit b) = GUnit (f b) + map (GMany e b x) = GMany (fmap f e) (mapMap f b) (fmap f x) + +-- ----------------------------------------------------------------------------- +-- Extracting Labels from graphs + +labelsDefined :: forall block n e x . NonLocal (block n) => Graph' block n e x + -> LabelSet +labelsDefined GNil = setEmpty +labelsDefined (GUnit{}) = setEmpty +labelsDefined (GMany _ body x) = mapFoldlWithKey addEntry (exitLabel x) body + where addEntry :: forall a. LabelSet -> ElemOf LabelSet -> a -> LabelSet + addEntry labels label _ = setInsert label labels + exitLabel :: MaybeO x (block n C O) -> LabelSet + exitLabel NothingO = setEmpty + exitLabel (JustO b) = setSingleton (entryLabel b) + + +---------------------------------------------------------------- + +-- | Returns a list of blocks reachable from the provided Labels in the reverse +-- postorder. +-- +-- This is the most important traversal over this data structure. It drops +-- unreachable code and puts blocks in an order that is good for solving forward +-- dataflow problems quickly. The reverse order is good for solving backward +-- dataflow problems quickly. The forward order is also reasonably good for +-- emitting instructions, except that it will not usually exploit Forrest +-- Baskett's trick of eliminating the unconditional branch from a loop. For +-- that you would need a more serious analysis, probably based on dominators, to +-- identify loop headers. +-- +-- For forward analyses we want reverse postorder visitation, consider: +-- @ +-- A -> [B,C] +-- B -> D +-- C -> D +-- @ +-- Postorder: [D, C, B, A] (or [D, B, C, A]) +-- Reverse postorder: [A, B, C, D] (or [A, C, B, D]) +-- This matters for, e.g., forward analysis, because we want to analyze *both* +-- B and C before we analyze D. +revPostorderFrom + :: forall block. (NonLocal block) + => LabelMap (block C C) -> Label -> [block C C] +revPostorderFrom graph start = go start_worklist setEmpty [] + where + start_worklist = lookup_for_descend start Nil + + -- To compute the postorder we need to "visit" a block (mark as done) + -- *after* visiting all its successors. So we need to know whether we + -- already processed all successors of each block (and @NonLocal@ allows + -- arbitrary many successors). So we use an explicit stack with an extra bit + -- of information: + -- * @ConsTodo@ means to explore the block if it wasn't visited before + -- * @ConsMark@ means that all successors were already done and we can add + -- the block to the result. + -- + -- NOTE: We add blocks to the result list in postorder, but we *prepend* + -- them (i.e., we use @(:)@), which means that the final list is in reverse + -- postorder. + go :: DfsStack (block C C) -> LabelSet -> [block C C] -> [block C C] + go Nil !_ !result = result + go (ConsMark block rest) !wip_or_done !result = + go rest wip_or_done (block : result) + go (ConsTodo block rest) !wip_or_done !result + | entryLabel block `setMember` wip_or_done = go rest wip_or_done result + | otherwise = + let new_worklist = + foldr lookup_for_descend + (ConsMark block rest) + (successors block) + in go new_worklist (setInsert (entryLabel block) wip_or_done) result + + lookup_for_descend :: Label -> DfsStack (block C C) -> DfsStack (block C C) + lookup_for_descend label wl + | Just b <- mapLookup label graph = ConsTodo b wl + | otherwise = + error $ "Label that doesn't have a block?! " ++ show label + +data DfsStack a = ConsTodo a (DfsStack a) | ConsMark a (DfsStack a) | Nil diff --git a/compiler/GHC/Cmm/Dataflow/Label.hs b/compiler/GHC/Cmm/Dataflow/Label.hs new file mode 100644 index 0000000000..c571cedb48 --- /dev/null +++ b/compiler/GHC/Cmm/Dataflow/Label.hs @@ -0,0 +1,142 @@ +{-# LANGUAGE DeriveFoldable #-} +{-# LANGUAGE DeriveFunctor #-} +{-# LANGUAGE DeriveTraversable #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} + +module GHC.Cmm.Dataflow.Label + ( Label + , LabelMap + , LabelSet + , FactBase + , lookupFact + , mkHooplLabel + ) where + +import GhcPrelude + +import Outputable + +-- TODO: This should really just use GHC's Unique and Uniq{Set,FM} +import GHC.Cmm.Dataflow.Collections + +import Unique (Uniquable(..)) +import TrieMap + + +----------------------------------------------------------------------------- +-- Label +----------------------------------------------------------------------------- + +newtype Label = Label { lblToUnique :: Int } + deriving (Eq, Ord) + +mkHooplLabel :: Int -> Label +mkHooplLabel = Label + +instance Show Label where + show (Label n) = "L" ++ show n + +instance Uniquable Label where + getUnique label = getUnique (lblToUnique label) + +instance Outputable Label where + ppr label = ppr (getUnique label) + +----------------------------------------------------------------------------- +-- LabelSet + +newtype LabelSet = LS UniqueSet deriving (Eq, Ord, Show, Monoid, Semigroup) + +instance IsSet LabelSet where + type ElemOf LabelSet = Label + + setNull (LS s) = setNull s + setSize (LS s) = setSize s + setMember (Label k) (LS s) = setMember k s + + setEmpty = LS setEmpty + setSingleton (Label k) = LS (setSingleton k) + setInsert (Label k) (LS s) = LS (setInsert k s) + setDelete (Label k) (LS s) = LS (setDelete k s) + + setUnion (LS x) (LS y) = LS (setUnion x y) + setDifference (LS x) (LS y) = LS (setDifference x y) + setIntersection (LS x) (LS y) = LS (setIntersection x y) + setIsSubsetOf (LS x) (LS y) = setIsSubsetOf x y + setFilter f (LS s) = LS (setFilter (f . mkHooplLabel) s) + setFoldl k z (LS s) = setFoldl (\a v -> k a (mkHooplLabel v)) z s + setFoldr k z (LS s) = setFoldr (\v a -> k (mkHooplLabel v) a) z s + + setElems (LS s) = map mkHooplLabel (setElems s) + setFromList ks = LS (setFromList (map lblToUnique ks)) + +----------------------------------------------------------------------------- +-- LabelMap + +newtype LabelMap v = LM (UniqueMap v) + deriving (Eq, Ord, Show, Functor, Foldable, Traversable) + +instance IsMap LabelMap where + type KeyOf LabelMap = Label + + mapNull (LM m) = mapNull m + mapSize (LM m) = mapSize m + mapMember (Label k) (LM m) = mapMember k m + mapLookup (Label k) (LM m) = mapLookup k m + mapFindWithDefault def (Label k) (LM m) = mapFindWithDefault def k m + + mapEmpty = LM mapEmpty + mapSingleton (Label k) v = LM (mapSingleton k v) + mapInsert (Label k) v (LM m) = LM (mapInsert k v m) + mapInsertWith f (Label k) v (LM m) = LM (mapInsertWith f k v m) + mapDelete (Label k) (LM m) = LM (mapDelete k m) + mapAlter f (Label k) (LM m) = LM (mapAlter f k m) + mapAdjust f (Label k) (LM m) = LM (mapAdjust f k m) + + mapUnion (LM x) (LM y) = LM (mapUnion x y) + mapUnionWithKey f (LM x) (LM y) = LM (mapUnionWithKey (f . mkHooplLabel) x y) + mapDifference (LM x) (LM y) = LM (mapDifference x y) + mapIntersection (LM x) (LM y) = LM (mapIntersection x y) + mapIsSubmapOf (LM x) (LM y) = mapIsSubmapOf x y + + mapMap f (LM m) = LM (mapMap f m) + mapMapWithKey f (LM m) = LM (mapMapWithKey (f . mkHooplLabel) m) + mapFoldl k z (LM m) = mapFoldl k z m + mapFoldr k z (LM m) = mapFoldr k z m + mapFoldlWithKey k z (LM m) = + mapFoldlWithKey (\a v -> k a (mkHooplLabel v)) z m + mapFoldMapWithKey f (LM m) = mapFoldMapWithKey (\k v -> f (mkHooplLabel k) v) m + mapFilter f (LM m) = LM (mapFilter f m) + mapFilterWithKey f (LM m) = LM (mapFilterWithKey (f . mkHooplLabel) m) + + mapElems (LM m) = mapElems m + mapKeys (LM m) = map mkHooplLabel (mapKeys m) + mapToList (LM m) = [(mkHooplLabel k, v) | (k, v) <- mapToList m] + mapFromList assocs = LM (mapFromList [(lblToUnique k, v) | (k, v) <- assocs]) + mapFromListWith f assocs = LM (mapFromListWith f [(lblToUnique k, v) | (k, v) <- assocs]) + +----------------------------------------------------------------------------- +-- Instances + +instance Outputable LabelSet where + ppr = ppr . setElems + +instance Outputable a => Outputable (LabelMap a) where + ppr = ppr . mapToList + +instance TrieMap LabelMap where + type Key LabelMap = Label + emptyTM = mapEmpty + lookupTM k m = mapLookup k m + alterTM k f m = mapAlter f k m + foldTM k m z = mapFoldr k z m + mapTM f m = mapMap f m + +----------------------------------------------------------------------------- +-- FactBase + +type FactBase f = LabelMap f + +lookupFact :: Label -> FactBase f -> Maybe f +lookupFact = mapLookup diff --git a/compiler/GHC/Cmm/DebugBlock.hs b/compiler/GHC/Cmm/DebugBlock.hs new file mode 100644 index 0000000000..70fc08ee94 --- /dev/null +++ b/compiler/GHC/Cmm/DebugBlock.hs @@ -0,0 +1,546 @@ +{-# LANGUAGE GADTs #-} +{-# LANGUAGE MultiWayIf #-} + +----------------------------------------------------------------------------- +-- +-- Debugging data +-- +-- Association of debug data on the Cmm level, with methods to encode it in +-- event log format for later inclusion in profiling event logs. +-- +----------------------------------------------------------------------------- + +module GHC.Cmm.DebugBlock ( + + DebugBlock(..), + cmmDebugGen, + cmmDebugLabels, + cmmDebugLink, + debugToMap, + + -- * Unwinding information + UnwindTable, UnwindPoint(..), + UnwindExpr(..), toUnwindExpr + ) where + +import GhcPrelude + +import GHC.Cmm.BlockId +import GHC.Cmm.CLabel +import GHC.Cmm +import GHC.Cmm.Utils +import CoreSyn +import FastString ( nilFS, mkFastString ) +import Module +import Outputable +import GHC.Cmm.Ppr.Expr ( pprExpr ) +import SrcLoc +import Util ( seqList ) + +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.Dataflow.Collections +import GHC.Cmm.Dataflow.Graph +import GHC.Cmm.Dataflow.Label + +import Data.Maybe +import Data.List ( minimumBy, nubBy ) +import Data.Ord ( comparing ) +import qualified Data.Map as Map +import Data.Either ( partitionEithers ) + +-- | Debug information about a block of code. Ticks scope over nested +-- blocks. +data DebugBlock = + DebugBlock + { dblProcedure :: !Label -- ^ Entry label of containing proc + , dblLabel :: !Label -- ^ Hoopl label + , dblCLabel :: !CLabel -- ^ Output label + , dblHasInfoTbl :: !Bool -- ^ Has an info table? + , dblParent :: !(Maybe DebugBlock) + -- ^ The parent of this proc. See Note [Splitting DebugBlocks] + , dblTicks :: ![CmmTickish] -- ^ Ticks defined in this block + , dblSourceTick :: !(Maybe CmmTickish) -- ^ Best source tick covering block + , dblPosition :: !(Maybe Int) -- ^ Output position relative to + -- other blocks. @Nothing@ means + -- the block was optimized out + , dblUnwind :: [UnwindPoint] + , dblBlocks :: ![DebugBlock] -- ^ Nested blocks + } + +instance Outputable DebugBlock where + ppr blk = (if | dblProcedure blk == dblLabel blk + -> text "proc" + | dblHasInfoTbl blk + -> text "pp-blk" + | otherwise + -> text "blk") <+> + ppr (dblLabel blk) <+> parens (ppr (dblCLabel blk)) <+> + (maybe empty ppr (dblSourceTick blk)) <+> + (maybe (text "removed") ((text "pos " <>) . ppr) + (dblPosition blk)) <+> + (ppr (dblUnwind blk)) $+$ + (if null (dblBlocks blk) then empty else nest 4 (ppr (dblBlocks blk))) + +-- | Intermediate data structure holding debug-relevant context information +-- about a block. +type BlockContext = (CmmBlock, RawCmmDecl) + +-- | Extract debug data from a group of procedures. We will prefer +-- source notes that come from the given module (presumably the module +-- that we are currently compiling). +cmmDebugGen :: ModLocation -> RawCmmGroup -> [DebugBlock] +cmmDebugGen modLoc decls = map (blocksForScope Nothing) topScopes + where + blockCtxs :: Map.Map CmmTickScope [BlockContext] + blockCtxs = blockContexts decls + + -- Analyse tick scope structure: Each one is either a top-level + -- tick scope, or the child of another. + (topScopes, childScopes) + = partitionEithers $ map (\a -> findP a a) $ Map.keys blockCtxs + findP tsc GlobalScope = Left tsc -- top scope + findP tsc scp | scp' `Map.member` blockCtxs = Right (scp', tsc) + | otherwise = findP tsc scp' + where -- Note that we only following the left parent of + -- combined scopes. This loses us ticks, which we will + -- recover by copying ticks below. + scp' | SubScope _ scp' <- scp = scp' + | CombinedScope scp' _ <- scp = scp' + | otherwise = panic "findP impossible" + + scopeMap = foldr (uncurry insertMulti) Map.empty childScopes + + -- This allows us to recover ticks that we lost by flattening + -- the graph. Basically, if the parent is A but the child is + -- CBA, we know that there is no BA, because it would have taken + -- priority - but there might be a B scope, with ticks that + -- would not be associated with our child anymore. Note however + -- that there might be other childs (DB), which we have to + -- filter out. + -- + -- We expect this to be called rarely, which is why we are not + -- trying too hard to be efficient here. In many cases we won't + -- have to construct blockCtxsU in the first place. + ticksToCopy :: CmmTickScope -> [CmmTickish] + ticksToCopy (CombinedScope scp s) = go s + where go s | scp `isTickSubScope` s = [] -- done + | SubScope _ s' <- s = ticks ++ go s' + | CombinedScope s1 s2 <- s = ticks ++ go s1 ++ go s2 + | otherwise = panic "ticksToCopy impossible" + where ticks = bCtxsTicks $ fromMaybe [] $ Map.lookup s blockCtxs + ticksToCopy _ = [] + bCtxsTicks = concatMap (blockTicks . fst) + + -- Finding the "best" source tick is somewhat arbitrary -- we + -- select the first source span, while preferring source ticks + -- from the same source file. Furthermore, dumps take priority + -- (if we generated one, we probably want debug information to + -- refer to it). + bestSrcTick = minimumBy (comparing rangeRating) + rangeRating (SourceNote span _) + | srcSpanFile span == thisFile = 1 + | otherwise = 2 :: Int + rangeRating note = pprPanic "rangeRating" (ppr note) + thisFile = maybe nilFS mkFastString $ ml_hs_file modLoc + + -- Returns block tree for this scope as well as all nested + -- scopes. Note that if there are multiple blocks in the (exact) + -- same scope we elect one as the "branch" node and add the rest + -- as children. + blocksForScope :: Maybe CmmTickish -> CmmTickScope -> DebugBlock + blocksForScope cstick scope = mkBlock True (head bctxs) + where bctxs = fromJust $ Map.lookup scope blockCtxs + nested = fromMaybe [] $ Map.lookup scope scopeMap + childs = map (mkBlock False) (tail bctxs) ++ + map (blocksForScope stick) nested + + mkBlock :: Bool -> BlockContext -> DebugBlock + mkBlock top (block, prc) + = DebugBlock { dblProcedure = g_entry graph + , dblLabel = label + , dblCLabel = case info of + Just (Statics infoLbl _) -> infoLbl + Nothing + | g_entry graph == label -> entryLbl + | otherwise -> blockLbl label + , dblHasInfoTbl = isJust info + , dblParent = Nothing + , dblTicks = ticks + , dblPosition = Nothing -- see cmmDebugLink + , dblSourceTick = stick + , dblBlocks = blocks + , dblUnwind = [] + } + where (CmmProc infos entryLbl _ graph) = prc + label = entryLabel block + info = mapLookup label infos + blocks | top = seqList childs childs + | otherwise = [] + + -- A source tick scopes over all nested blocks. However + -- their source ticks might take priority. + isSourceTick SourceNote {} = True + isSourceTick _ = False + -- Collect ticks from all blocks inside the tick scope. + -- We attempt to filter out duplicates while we're at it. + ticks = nubBy (flip tickishContains) $ + bCtxsTicks bctxs ++ ticksToCopy scope + stick = case filter isSourceTick ticks of + [] -> cstick + sticks -> Just $! bestSrcTick (sticks ++ maybeToList cstick) + +-- | Build a map of blocks sorted by their tick scopes +-- +-- This involves a pre-order traversal, as we want blocks in rough +-- control flow order (so ticks have a chance to be sorted in the +-- right order). +blockContexts :: RawCmmGroup -> Map.Map CmmTickScope [BlockContext] +blockContexts decls = Map.map reverse $ foldr walkProc Map.empty decls + where walkProc :: RawCmmDecl + -> Map.Map CmmTickScope [BlockContext] + -> Map.Map CmmTickScope [BlockContext] + walkProc CmmData{} m = m + walkProc prc@(CmmProc _ _ _ graph) m + | mapNull blocks = m + | otherwise = snd $ walkBlock prc entry (emptyLbls, m) + where blocks = toBlockMap graph + entry = [mapFind (g_entry graph) blocks] + emptyLbls = setEmpty :: LabelSet + + walkBlock :: RawCmmDecl -> [Block CmmNode C C] + -> (LabelSet, Map.Map CmmTickScope [BlockContext]) + -> (LabelSet, Map.Map CmmTickScope [BlockContext]) + walkBlock _ [] c = c + walkBlock prc (block:blocks) (visited, m) + | lbl `setMember` visited + = walkBlock prc blocks (visited, m) + | otherwise + = walkBlock prc blocks $ + walkBlock prc succs + (lbl `setInsert` visited, + insertMulti scope (block, prc) m) + where CmmEntry lbl scope = firstNode block + (CmmProc _ _ _ graph) = prc + succs = map (flip mapFind (toBlockMap graph)) + (successors (lastNode block)) + mapFind = mapFindWithDefault (error "contextTree: block not found!") + +insertMulti :: Ord k => k -> a -> Map.Map k [a] -> Map.Map k [a] +insertMulti k v = Map.insertWith (const (v:)) k [v] + +cmmDebugLabels :: (i -> Bool) -> GenCmmGroup d g (ListGraph i) -> [Label] +cmmDebugLabels isMeta nats = seqList lbls lbls + where -- Find order in which procedures will be generated by the + -- back-end (that actually matters for DWARF generation). + -- + -- Note that we might encounter blocks that are missing or only + -- consist of meta instructions -- we will declare them missing, + -- which will skip debug data generation without messing up the + -- block hierarchy. + lbls = map blockId $ filter (not . allMeta) $ concatMap getBlocks nats + getBlocks (CmmProc _ _ _ (ListGraph bs)) = bs + getBlocks _other = [] + allMeta (BasicBlock _ instrs) = all isMeta instrs + +-- | Sets position and unwind table fields in the debug block tree according to +-- native generated code. +cmmDebugLink :: [Label] -> LabelMap [UnwindPoint] + -> [DebugBlock] -> [DebugBlock] +cmmDebugLink labels unwindPts blocks = map link blocks + where blockPos :: LabelMap Int + blockPos = mapFromList $ flip zip [0..] labels + link block = block { dblPosition = mapLookup (dblLabel block) blockPos + , dblBlocks = map link (dblBlocks block) + , dblUnwind = fromMaybe mempty + $ mapLookup (dblLabel block) unwindPts + } + +-- | Converts debug blocks into a label map for easier lookups +debugToMap :: [DebugBlock] -> LabelMap DebugBlock +debugToMap = mapUnions . map go + where go b = mapInsert (dblLabel b) b $ mapUnions $ map go (dblBlocks b) + +{- +Note [What is this unwinding business?] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Unwinding tables are a variety of debugging information used by debugging tools +to reconstruct the execution history of a program at runtime. These tables +consist of sets of "instructions", one set for every instruction in the program, +which describe how to reconstruct the state of the machine at the point where +the current procedure was called. For instance, consider the following annotated +pseudo-code, + + a_fun: + add rsp, 8 -- unwind: rsp = rsp - 8 + mov rax, 1 -- unwind: rax = unknown + call another_block + sub rsp, 8 -- unwind: rsp = rsp + +We see that attached to each instruction there is an "unwind" annotation, which +provides a relationship between each updated register and its value at the +time of entry to a_fun. This is the sort of information that allows gdb to give +you a stack backtrace given the execution state of your program. This +unwinding information is captured in various ways by various debug information +formats; in the case of DWARF (the only format supported by GHC) it is known as +Call Frame Information (CFI) and can be found in the .debug.frames section of +your object files. + +Currently we only bother to produce unwinding information for registers which +are necessary to reconstruct flow-of-execution. On x86_64 this includes $rbp +(which is the STG stack pointer) and $rsp (the C stack pointer). + +Let's consider how GHC would annotate a C-- program with unwinding information +with a typical C-- procedure as would come from the STG-to-Cmm code generator, + + entry() + { c2fe: + v :: P64 = R2; + if ((Sp + 8) - 32 < SpLim) (likely: False) goto c2ff; else goto c2fg; + c2ff: + R2 = v :: P64; + R1 = test_closure; + call (stg_gc_fun)(R2, R1) args: 8, res: 0, upd: 8; + c2fg: + I64[Sp - 8] = c2dD; + R1 = v :: P64; + Sp = Sp - 8; // Sp updated here + if (R1 & 7 != 0) goto c2dD; else goto c2dE; + c2dE: + call (I64[R1])(R1) returns to c2dD, args: 8, res: 8, upd: 8; + c2dD: + w :: P64 = R1; + Hp = Hp + 48; + if (Hp > HpLim) (likely: False) goto c2fj; else goto c2fi; + ... + }, + +Let's consider how this procedure will be decorated with unwind information +(largely by GHC.Cmm.LayoutStack). Naturally, when we enter the procedure `entry` the +value of Sp is no different from what it was at its call site. Therefore we will +add an `unwind` statement saying this at the beginning of its unwind-annotated +code, + + entry() + { c2fe: + unwind Sp = Just Sp + 0; + v :: P64 = R2; + if ((Sp + 8) - 32 < SpLim) (likely: False) goto c2ff; else goto c2fg; + +After c2fe we may pass to either c2ff or c2fg; let's first consider the +former. In this case there is nothing in particular that we need to do other +than reiterate what we already know about Sp, + + c2ff: + unwind Sp = Just Sp + 0; + R2 = v :: P64; + R1 = test_closure; + call (stg_gc_fun)(R2, R1) args: 8, res: 0, upd: 8; + +In contrast, c2fg updates Sp midway through its body. To ensure that unwinding +can happen correctly after this point we must include an unwind statement there, +in addition to the usual beginning-of-block statement, + + c2fg: + unwind Sp = Just Sp + 0; + I64[Sp - 8] = c2dD; + R1 = v :: P64; + Sp = Sp - 8; + unwind Sp = Just Sp + 8; + if (R1 & 7 != 0) goto c2dD; else goto c2dE; + +The remaining blocks are simple, + + c2dE: + unwind Sp = Just Sp + 8; + call (I64[R1])(R1) returns to c2dD, args: 8, res: 8, upd: 8; + c2dD: + unwind Sp = Just Sp + 8; + w :: P64 = R1; + Hp = Hp + 48; + if (Hp > HpLim) (likely: False) goto c2fj; else goto c2fi; + ... + }, + + +The flow of unwinding information through the compiler is a bit convoluted: + + * C-- begins life in StgToCmm without any unwind information. This is because we + haven't actually done any register assignment or stack layout yet, so there + is no need for unwind information. + + * GHC.Cmm.LayoutStack figures out how to layout each procedure's stack, and produces + appropriate unwinding nodes for each adjustment of the STG Sp register. + + * The unwind nodes are carried through the sinking pass. Currently this is + guaranteed not to invalidate unwind information since it won't touch stores + to Sp, but this will need revisiting if CmmSink gets smarter in the future. + + * Eventually we make it to the native code generator backend which can then + preserve the unwind nodes in its machine-specific instructions. In so doing + the backend can also modify or add unwinding information; this is necessary, + for instance, in the case of x86-64, where adjustment of $rsp may be + necessary during calls to native foreign code due to the native calling + convention. + + * The NCG then retrieves the final unwinding table for each block from the + backend with extractUnwindPoints. + + * This unwind information is converted to DebugBlocks by Debug.cmmDebugGen + + * These DebugBlocks are then converted to, e.g., DWARF unwinding tables + (by the Dwarf module) and emitted in the final object. + +See also: + Note [Unwinding information in the NCG] in AsmCodeGen, + Note [Unwind pseudo-instruction in Cmm], + Note [Debugging DWARF unwinding info]. + + +Note [Debugging DWARF unwinding info] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For debugging generated unwinding info I've found it most useful to dump the +disassembled binary with objdump -D and dump the debug info with +readelf --debug-dump=frames-interp. + +You should get something like this: + + 0000000000000010 <stg_catch_frame_info>: + 10: 48 83 c5 18 add $0x18,%rbp + 14: ff 65 00 jmpq *0x0(%rbp) + +and: + + Contents of the .debug_frame section: + + 00000000 0000000000000014 ffffffff CIE "" cf=1 df=-8 ra=16 + LOC CFA rbp rsp ra + 0000000000000000 rbp+0 v+0 s c+0 + + 00000018 0000000000000024 00000000 FDE cie=00000000 pc=000000000000000f..0000000000000017 + LOC CFA rbp rsp ra + 000000000000000f rbp+0 v+0 s c+0 + 000000000000000f rbp+24 v+0 s c+0 + +To read it http://www.dwarfstd.org/doc/dwarf-2.0.0.pdf has a nice example in +Appendix 5 (page 101 of the pdf) and more details in the relevant section. + +The key thing to keep in mind is that the value at LOC is the value from +*before* the instruction at LOC executes. In other words it answers the +question: if my $rip is at LOC, how do I get the relevant values given the +values obtained through unwinding so far. + +If the readelf --debug-dump=frames-interp output looks wrong, it may also be +useful to look at readelf --debug-dump=frames, which is closer to the +information that GHC generated. + +It's also useful to dump the relevant Cmm with -ddump-cmm -ddump-opt-cmm +-ddump-cmm-proc -ddump-cmm-verbose. Note [Unwind pseudo-instruction in Cmm] +explains how to interpret it. + +Inside gdb there are a couple useful commands for inspecting frames. +For example: + + gdb> info frame <num> + +It shows the values of registers obtained through unwinding. + +Another useful thing to try when debugging the DWARF unwinding is to enable +extra debugging output in GDB: + + gdb> set debug frame 1 + +This makes GDB produce a trace of its internal workings. Having gone this far, +it's just a tiny step to run GDB in GDB. Make sure you install debugging +symbols for gdb if you obtain it through a package manager. + +Keep in mind that the current release of GDB has an instruction pointer handling +heuristic that works well for C-like languages, but doesn't always work for +Haskell. See Note [Info Offset] in Dwarf.Types for more details. + +Note [Unwind pseudo-instruction in Cmm] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +One of the possible CmmNodes is a CmmUnwind pseudo-instruction. It doesn't +generate any assembly, but controls what DWARF unwinding information gets +generated. + +It's important to understand what ranges of code the unwind pseudo-instruction +refers to. +For a sequence of CmmNodes like: + + A // starts at addr X and ends at addr Y-1 + unwind Sp = Just Sp + 16; + B // starts at addr Y and ends at addr Z + +the unwind statement reflects the state after A has executed, but before B +has executed. If you consult the Note [Debugging DWARF unwinding info], the +LOC this information will end up in is Y. +-} + +-- | A label associated with an 'UnwindTable' +data UnwindPoint = UnwindPoint !CLabel !UnwindTable + +instance Outputable UnwindPoint where + ppr (UnwindPoint lbl uws) = + braces $ ppr lbl<>colon + <+> hsep (punctuate comma $ map pprUw $ Map.toList uws) + where + pprUw (g, expr) = ppr g <> char '=' <> ppr expr + +-- | Maps registers to expressions that yield their "old" values +-- further up the stack. Most interesting for the stack pointer @Sp@, +-- but might be useful to document saved registers, too. Note that a +-- register's value will be 'Nothing' when the register's previous +-- value cannot be reconstructed. +type UnwindTable = Map.Map GlobalReg (Maybe UnwindExpr) + +-- | Expressions, used for unwind information +data UnwindExpr = UwConst !Int -- ^ literal value + | UwReg !GlobalReg !Int -- ^ register plus offset + | UwDeref UnwindExpr -- ^ pointer dereferencing + | UwLabel CLabel + | UwPlus UnwindExpr UnwindExpr + | UwMinus UnwindExpr UnwindExpr + | UwTimes UnwindExpr UnwindExpr + deriving (Eq) + +instance Outputable UnwindExpr where + pprPrec _ (UwConst i) = ppr i + pprPrec _ (UwReg g 0) = ppr g + pprPrec p (UwReg g x) = pprPrec p (UwPlus (UwReg g 0) (UwConst x)) + pprPrec _ (UwDeref e) = char '*' <> pprPrec 3 e + pprPrec _ (UwLabel l) = pprPrec 3 l + pprPrec p (UwPlus e0 e1) | p <= 0 + = pprPrec 0 e0 <> char '+' <> pprPrec 0 e1 + pprPrec p (UwMinus e0 e1) | p <= 0 + = pprPrec 1 e0 <> char '-' <> pprPrec 1 e1 + pprPrec p (UwTimes e0 e1) | p <= 1 + = pprPrec 2 e0 <> char '*' <> pprPrec 2 e1 + pprPrec _ other = parens (pprPrec 0 other) + +-- | Conversion of Cmm expressions to unwind expressions. We check for +-- unsupported operator usages and simplify the expression as far as +-- possible. +toUnwindExpr :: CmmExpr -> UnwindExpr +toUnwindExpr (CmmLit (CmmInt i _)) = UwConst (fromIntegral i) +toUnwindExpr (CmmLit (CmmLabel l)) = UwLabel l +toUnwindExpr (CmmRegOff (CmmGlobal g) i) = UwReg g i +toUnwindExpr (CmmReg (CmmGlobal g)) = UwReg g 0 +toUnwindExpr (CmmLoad e _) = UwDeref (toUnwindExpr e) +toUnwindExpr e@(CmmMachOp op [e1, e2]) = + case (op, toUnwindExpr e1, toUnwindExpr e2) of + (MO_Add{}, UwReg r x, UwConst y) -> UwReg r (x + y) + (MO_Sub{}, UwReg r x, UwConst y) -> UwReg r (x - y) + (MO_Add{}, UwConst x, UwReg r y) -> UwReg r (x + y) + (MO_Add{}, UwConst x, UwConst y) -> UwConst (x + y) + (MO_Sub{}, UwConst x, UwConst y) -> UwConst (x - y) + (MO_Mul{}, UwConst x, UwConst y) -> UwConst (x * y) + (MO_Add{}, u1, u2 ) -> UwPlus u1 u2 + (MO_Sub{}, u1, u2 ) -> UwMinus u1 u2 + (MO_Mul{}, u1, u2 ) -> UwTimes u1 u2 + _otherwise -> pprPanic "Unsupported operator in unwind expression!" + (pprExpr e) +toUnwindExpr e + = pprPanic "Unsupported unwind expression!" (ppr e) diff --git a/compiler/GHC/Cmm/Expr.hs b/compiler/GHC/Cmm/Expr.hs new file mode 100644 index 0000000000..3b4f0156a0 --- /dev/null +++ b/compiler/GHC/Cmm/Expr.hs @@ -0,0 +1,619 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE UndecidableInstances #-} + +module GHC.Cmm.Expr + ( CmmExpr(..), cmmExprType, cmmExprWidth, cmmExprAlignment, maybeInvertCmmExpr + , CmmReg(..), cmmRegType, cmmRegWidth + , CmmLit(..), cmmLitType + , LocalReg(..), localRegType + , GlobalReg(..), isArgReg, globalRegType + , spReg, hpReg, spLimReg, hpLimReg, nodeReg + , currentTSOReg, currentNurseryReg, hpAllocReg, cccsReg + , node, baseReg + , VGcPtr(..) + + , DefinerOfRegs, UserOfRegs + , foldRegsDefd, foldRegsUsed + , foldLocalRegsDefd, foldLocalRegsUsed + + , RegSet, LocalRegSet, GlobalRegSet + , emptyRegSet, elemRegSet, extendRegSet, deleteFromRegSet, mkRegSet + , plusRegSet, minusRegSet, timesRegSet, sizeRegSet, nullRegSet + , regSetToList + + , Area(..) + , module GHC.Cmm.MachOp + , module GHC.Cmm.Type + ) +where + +import GhcPrelude + +import GHC.Cmm.BlockId +import GHC.Cmm.CLabel +import GHC.Cmm.MachOp +import GHC.Cmm.Type +import DynFlags +import Outputable (panic) +import Unique + +import Data.Set (Set) +import qualified Data.Set as Set + +import BasicTypes (Alignment, mkAlignment, alignmentOf) + +----------------------------------------------------------------------------- +-- CmmExpr +-- An expression. Expressions have no side effects. +----------------------------------------------------------------------------- + +data CmmExpr + = CmmLit CmmLit -- Literal + | CmmLoad !CmmExpr !CmmType -- Read memory location + | CmmReg !CmmReg -- Contents of register + | CmmMachOp MachOp [CmmExpr] -- Machine operation (+, -, *, etc.) + | CmmStackSlot Area {-# UNPACK #-} !Int + -- addressing expression of a stack slot + -- See Note [CmmStackSlot aliasing] + | CmmRegOff !CmmReg Int + -- CmmRegOff reg i + -- ** is shorthand only, meaning ** + -- CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt (fromIntegral i) rep)] + -- where rep = typeWidth (cmmRegType reg) + +instance Eq CmmExpr where -- Equality ignores the types + CmmLit l1 == CmmLit l2 = l1==l2 + CmmLoad e1 _ == CmmLoad e2 _ = e1==e2 + CmmReg r1 == CmmReg r2 = r1==r2 + CmmRegOff r1 i1 == CmmRegOff r2 i2 = r1==r2 && i1==i2 + CmmMachOp op1 es1 == CmmMachOp op2 es2 = op1==op2 && es1==es2 + CmmStackSlot a1 i1 == CmmStackSlot a2 i2 = a1==a2 && i1==i2 + _e1 == _e2 = False + +data CmmReg + = CmmLocal {-# UNPACK #-} !LocalReg + | CmmGlobal GlobalReg + deriving( Eq, Ord ) + +-- | A stack area is either the stack slot where a variable is spilled +-- or the stack space where function arguments and results are passed. +data Area + = Old -- See Note [Old Area] + | Young {-# UNPACK #-} !BlockId -- Invariant: must be a continuation BlockId + -- See Note [Continuation BlockId] in GHC.Cmm.Node. + deriving (Eq, Ord) + +{- Note [Old Area] +~~~~~~~~~~~~~~~~~~ +There is a single call area 'Old', allocated at the extreme old +end of the stack frame (ie just younger than the return address) +which holds: + * incoming (overflow) parameters, + * outgoing (overflow) parameter to tail calls, + * outgoing (overflow) result values + * the update frame (if any) + +Its size is the max of all these requirements. On entry, the stack +pointer will point to the youngest incoming parameter, which is not +necessarily at the young end of the Old area. + +End of note -} + + +{- Note [CmmStackSlot aliasing] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +When do two CmmStackSlots alias? + + - T[old+N] aliases with U[young(L)+M] for all T, U, L, N and M + - T[old+N] aliases with U[old+M] only if the areas actually overlap + +Or more informally, different Areas may overlap with each other. + +An alternative semantics, that we previously had, was that different +Areas do not overlap. The problem that lead to redefining the +semantics of stack areas is described below. + +e.g. if we had + + x = Sp[old + 8] + y = Sp[old + 16] + + Sp[young(L) + 8] = L + Sp[young(L) + 16] = y + Sp[young(L) + 24] = x + call f() returns to L + +if areas semantically do not overlap, then we might optimise this to + + Sp[young(L) + 8] = L + Sp[young(L) + 16] = Sp[old + 8] + Sp[young(L) + 24] = Sp[old + 16] + call f() returns to L + +and now young(L) cannot be allocated at the same place as old, and we +are doomed to use more stack. + + - old+8 conflicts with young(L)+8 + - old+16 conflicts with young(L)+16 and young(L)+8 + +so young(L)+8 == old+24 and we get + + Sp[-8] = L + Sp[-16] = Sp[8] + Sp[-24] = Sp[0] + Sp -= 24 + call f() returns to L + +However, if areas are defined to be "possibly overlapping" in the +semantics, then we cannot commute any loads/stores of old with +young(L), and we will be able to re-use both old+8 and old+16 for +young(L). + + x = Sp[8] + y = Sp[0] + + Sp[8] = L + Sp[0] = y + Sp[-8] = x + Sp = Sp - 8 + call f() returns to L + +Now, the assignments of y go away, + + x = Sp[8] + Sp[8] = L + Sp[-8] = x + Sp = Sp - 8 + call f() returns to L +-} + +data CmmLit + = CmmInt !Integer Width + -- Interpretation: the 2's complement representation of the value + -- is truncated to the specified size. This is easier than trying + -- to keep the value within range, because we don't know whether + -- it will be used as a signed or unsigned value (the CmmType doesn't + -- distinguish between signed & unsigned). + | CmmFloat Rational Width + | CmmVec [CmmLit] -- Vector literal + | CmmLabel CLabel -- Address of label + | CmmLabelOff CLabel Int -- Address of label + byte offset + + -- Due to limitations in the C backend, the following + -- MUST ONLY be used inside the info table indicated by label2 + -- (label2 must be the info label), and label1 must be an + -- SRT, a slow entrypoint or a large bitmap (see the Mangler) + -- Don't use it at all unless tablesNextToCode. + -- It is also used inside the NCG during when generating + -- position-independent code. + | CmmLabelDiffOff CLabel CLabel Int Width -- label1 - label2 + offset + -- In an expression, the width just has the effect of MO_SS_Conv + -- from wordWidth to the desired width. + -- + -- In a static literal, the supported Widths depend on the + -- architecture: wordWidth is supported on all + -- architectures. Additionally W32 is supported on x86_64 when + -- using the small memory model. + + | CmmBlock {-# UNPACK #-} !BlockId -- Code label + -- Invariant: must be a continuation BlockId + -- See Note [Continuation BlockId] in GHC.Cmm.Node. + + | CmmHighStackMark -- A late-bound constant that stands for the max + -- #bytes of stack space used during a procedure. + -- During the stack-layout pass, CmmHighStackMark + -- is replaced by a CmmInt for the actual number + -- of bytes used + deriving Eq + +cmmExprType :: DynFlags -> CmmExpr -> CmmType +cmmExprType dflags (CmmLit lit) = cmmLitType dflags lit +cmmExprType _ (CmmLoad _ rep) = rep +cmmExprType dflags (CmmReg reg) = cmmRegType dflags reg +cmmExprType dflags (CmmMachOp op args) = machOpResultType dflags op (map (cmmExprType dflags) args) +cmmExprType dflags (CmmRegOff reg _) = cmmRegType dflags reg +cmmExprType dflags (CmmStackSlot _ _) = bWord dflags -- an address +-- Careful though: what is stored at the stack slot may be bigger than +-- an address + +cmmLitType :: DynFlags -> CmmLit -> CmmType +cmmLitType _ (CmmInt _ width) = cmmBits width +cmmLitType _ (CmmFloat _ width) = cmmFloat width +cmmLitType _ (CmmVec []) = panic "cmmLitType: CmmVec []" +cmmLitType cflags (CmmVec (l:ls)) = let ty = cmmLitType cflags l + in if all (`cmmEqType` ty) (map (cmmLitType cflags) ls) + then cmmVec (1+length ls) ty + else panic "cmmLitType: CmmVec" +cmmLitType dflags (CmmLabel lbl) = cmmLabelType dflags lbl +cmmLitType dflags (CmmLabelOff lbl _) = cmmLabelType dflags lbl +cmmLitType _ (CmmLabelDiffOff _ _ _ width) = cmmBits width +cmmLitType dflags (CmmBlock _) = bWord dflags +cmmLitType dflags (CmmHighStackMark) = bWord dflags + +cmmLabelType :: DynFlags -> CLabel -> CmmType +cmmLabelType dflags lbl + | isGcPtrLabel lbl = gcWord dflags + | otherwise = bWord dflags + +cmmExprWidth :: DynFlags -> CmmExpr -> Width +cmmExprWidth dflags e = typeWidth (cmmExprType dflags e) + +-- | Returns an alignment in bytes of a CmmExpr when it's a statically +-- known integer constant, otherwise returns an alignment of 1 byte. +-- The caller is responsible for using with a sensible CmmExpr +-- argument. +cmmExprAlignment :: CmmExpr -> Alignment +cmmExprAlignment (CmmLit (CmmInt intOff _)) = alignmentOf (fromInteger intOff) +cmmExprAlignment _ = mkAlignment 1 +-------- +--- Negation for conditional branches + +maybeInvertCmmExpr :: CmmExpr -> Maybe CmmExpr +maybeInvertCmmExpr (CmmMachOp op args) = do op' <- maybeInvertComparison op + return (CmmMachOp op' args) +maybeInvertCmmExpr _ = Nothing + +----------------------------------------------------------------------------- +-- Local registers +----------------------------------------------------------------------------- + +data LocalReg + = LocalReg {-# UNPACK #-} !Unique CmmType + -- ^ Parameters: + -- 1. Identifier + -- 2. Type + +instance Eq LocalReg where + (LocalReg u1 _) == (LocalReg u2 _) = u1 == u2 + +-- This is non-deterministic but we do not currently support deterministic +-- code-generation. See Note [Unique Determinism and code generation] +-- See Note [No Ord for Unique] +instance Ord LocalReg where + compare (LocalReg u1 _) (LocalReg u2 _) = nonDetCmpUnique u1 u2 + +instance Uniquable LocalReg where + getUnique (LocalReg uniq _) = uniq + +cmmRegType :: DynFlags -> CmmReg -> CmmType +cmmRegType _ (CmmLocal reg) = localRegType reg +cmmRegType dflags (CmmGlobal reg) = globalRegType dflags reg + +cmmRegWidth :: DynFlags -> CmmReg -> Width +cmmRegWidth dflags = typeWidth . cmmRegType dflags + +localRegType :: LocalReg -> CmmType +localRegType (LocalReg _ rep) = rep + +----------------------------------------------------------------------------- +-- Register-use information for expressions and other types +----------------------------------------------------------------------------- + +-- | Sets of registers + +-- These are used for dataflow facts, and a common operation is taking +-- the union of two RegSets and then asking whether the union is the +-- same as one of the inputs. UniqSet isn't good here, because +-- sizeUniqSet is O(n) whereas Set.size is O(1), so we use ordinary +-- Sets. + +type RegSet r = Set r +type LocalRegSet = RegSet LocalReg +type GlobalRegSet = RegSet GlobalReg + +emptyRegSet :: RegSet r +nullRegSet :: RegSet r -> Bool +elemRegSet :: Ord r => r -> RegSet r -> Bool +extendRegSet :: Ord r => RegSet r -> r -> RegSet r +deleteFromRegSet :: Ord r => RegSet r -> r -> RegSet r +mkRegSet :: Ord r => [r] -> RegSet r +minusRegSet, plusRegSet, timesRegSet :: Ord r => RegSet r -> RegSet r -> RegSet r +sizeRegSet :: RegSet r -> Int +regSetToList :: RegSet r -> [r] + +emptyRegSet = Set.empty +nullRegSet = Set.null +elemRegSet = Set.member +extendRegSet = flip Set.insert +deleteFromRegSet = flip Set.delete +mkRegSet = Set.fromList +minusRegSet = Set.difference +plusRegSet = Set.union +timesRegSet = Set.intersection +sizeRegSet = Set.size +regSetToList = Set.toList + +class Ord r => UserOfRegs r a where + foldRegsUsed :: DynFlags -> (b -> r -> b) -> b -> a -> b + +foldLocalRegsUsed :: UserOfRegs LocalReg a + => DynFlags -> (b -> LocalReg -> b) -> b -> a -> b +foldLocalRegsUsed = foldRegsUsed + +class Ord r => DefinerOfRegs r a where + foldRegsDefd :: DynFlags -> (b -> r -> b) -> b -> a -> b + +foldLocalRegsDefd :: DefinerOfRegs LocalReg a + => DynFlags -> (b -> LocalReg -> b) -> b -> a -> b +foldLocalRegsDefd = foldRegsDefd + +instance UserOfRegs LocalReg CmmReg where + foldRegsUsed _ f z (CmmLocal reg) = f z reg + foldRegsUsed _ _ z (CmmGlobal _) = z + +instance DefinerOfRegs LocalReg CmmReg where + foldRegsDefd _ f z (CmmLocal reg) = f z reg + foldRegsDefd _ _ z (CmmGlobal _) = z + +instance UserOfRegs GlobalReg CmmReg where + foldRegsUsed _ _ z (CmmLocal _) = z + foldRegsUsed _ f z (CmmGlobal reg) = f z reg + +instance DefinerOfRegs GlobalReg CmmReg where + foldRegsDefd _ _ z (CmmLocal _) = z + foldRegsDefd _ f z (CmmGlobal reg) = f z reg + +instance Ord r => UserOfRegs r r where + foldRegsUsed _ f z r = f z r + +instance Ord r => DefinerOfRegs r r where + foldRegsDefd _ f z r = f z r + +instance (Ord r, UserOfRegs r CmmReg) => UserOfRegs r CmmExpr where + -- The (Ord r) in the context is necessary here + -- See Note [Recursive superclasses] in TcInstDcls + foldRegsUsed dflags f !z e = expr z e + where expr z (CmmLit _) = z + expr z (CmmLoad addr _) = foldRegsUsed dflags f z addr + expr z (CmmReg r) = foldRegsUsed dflags f z r + expr z (CmmMachOp _ exprs) = foldRegsUsed dflags f z exprs + expr z (CmmRegOff r _) = foldRegsUsed dflags f z r + expr z (CmmStackSlot _ _) = z + +instance UserOfRegs r a => UserOfRegs r [a] where + foldRegsUsed dflags f set as = foldl' (foldRegsUsed dflags f) set as + {-# INLINABLE foldRegsUsed #-} + +instance DefinerOfRegs r a => DefinerOfRegs r [a] where + foldRegsDefd dflags f set as = foldl' (foldRegsDefd dflags f) set as + {-# INLINABLE foldRegsDefd #-} + +----------------------------------------------------------------------------- +-- Global STG registers +----------------------------------------------------------------------------- + +data VGcPtr = VGcPtr | VNonGcPtr deriving( Eq, Show ) + +----------------------------------------------------------------------------- +-- Global STG registers +----------------------------------------------------------------------------- +{- +Note [Overlapping global registers] + +The backend might not faithfully implement the abstraction of the STG +machine with independent registers for different values of type +GlobalReg. Specifically, certain pairs of registers (r1, r2) may +overlap in the sense that a store to r1 invalidates the value in r2, +and vice versa. + +Currently this occurs only on the x86_64 architecture where FloatReg n +and DoubleReg n are assigned the same microarchitectural register, in +order to allow functions to receive more Float# or Double# arguments +in registers (as opposed to on the stack). + +There are no specific rules about which registers might overlap with +which other registers, but presumably it's safe to assume that nothing +will overlap with special registers like Sp or BaseReg. + +Use GHC.Cmm.Utils.regsOverlap to determine whether two GlobalRegs overlap +on a particular platform. The instance Eq GlobalReg is syntactic +equality of STG registers and does not take overlap into +account. However it is still used in UserOfRegs/DefinerOfRegs and +there are likely still bugs there, beware! +-} + +data GlobalReg + -- Argument and return registers + = VanillaReg -- pointers, unboxed ints and chars + {-# UNPACK #-} !Int -- its number + VGcPtr + + | FloatReg -- single-precision floating-point registers + {-# UNPACK #-} !Int -- its number + + | DoubleReg -- double-precision floating-point registers + {-# UNPACK #-} !Int -- its number + + | LongReg -- long int registers (64-bit, really) + {-# UNPACK #-} !Int -- its number + + | XmmReg -- 128-bit SIMD vector register + {-# UNPACK #-} !Int -- its number + + | YmmReg -- 256-bit SIMD vector register + {-# UNPACK #-} !Int -- its number + + | ZmmReg -- 512-bit SIMD vector register + {-# UNPACK #-} !Int -- its number + + -- STG registers + | Sp -- Stack ptr; points to last occupied stack location. + | SpLim -- Stack limit + | Hp -- Heap ptr; points to last occupied heap location. + | HpLim -- Heap limit register + | CCCS -- Current cost-centre stack + | CurrentTSO -- pointer to current thread's TSO + | CurrentNursery -- pointer to allocation area + | HpAlloc -- allocation count for heap check failure + + -- We keep the address of some commonly-called + -- functions in the register table, to keep code + -- size down: + | EagerBlackholeInfo -- stg_EAGER_BLACKHOLE_info + | GCEnter1 -- stg_gc_enter_1 + | GCFun -- stg_gc_fun + + -- Base offset for the register table, used for accessing registers + -- which do not have real registers assigned to them. This register + -- will only appear after we have expanded GlobalReg into memory accesses + -- (where necessary) in the native code generator. + | BaseReg + + -- The register used by the platform for the C stack pointer. This is + -- a break in the STG abstraction used exclusively to setup stack unwinding + -- information. + | MachSp + + -- The is a dummy register used to indicate to the stack unwinder where + -- a routine would return to. + | UnwindReturnReg + + -- Base Register for PIC (position-independent code) calculations + -- Only used inside the native code generator. It's exact meaning differs + -- from platform to platform (see module PositionIndependentCode). + | PicBaseReg + + deriving( Show ) + +instance Eq GlobalReg where + VanillaReg i _ == VanillaReg j _ = i==j -- Ignore type when seeking clashes + FloatReg i == FloatReg j = i==j + DoubleReg i == DoubleReg j = i==j + LongReg i == LongReg j = i==j + -- NOTE: XMM, YMM, ZMM registers actually are the same registers + -- at least with respect to store at YMM i and then read from XMM i + -- and similarly for ZMM etc. + XmmReg i == XmmReg j = i==j + YmmReg i == YmmReg j = i==j + ZmmReg i == ZmmReg j = i==j + Sp == Sp = True + SpLim == SpLim = True + Hp == Hp = True + HpLim == HpLim = True + CCCS == CCCS = True + CurrentTSO == CurrentTSO = True + CurrentNursery == CurrentNursery = True + HpAlloc == HpAlloc = True + EagerBlackholeInfo == EagerBlackholeInfo = True + GCEnter1 == GCEnter1 = True + GCFun == GCFun = True + BaseReg == BaseReg = True + MachSp == MachSp = True + UnwindReturnReg == UnwindReturnReg = True + PicBaseReg == PicBaseReg = True + _r1 == _r2 = False + +instance Ord GlobalReg where + compare (VanillaReg i _) (VanillaReg j _) = compare i j + -- Ignore type when seeking clashes + compare (FloatReg i) (FloatReg j) = compare i j + compare (DoubleReg i) (DoubleReg j) = compare i j + compare (LongReg i) (LongReg j) = compare i j + compare (XmmReg i) (XmmReg j) = compare i j + compare (YmmReg i) (YmmReg j) = compare i j + compare (ZmmReg i) (ZmmReg j) = compare i j + compare Sp Sp = EQ + compare SpLim SpLim = EQ + compare Hp Hp = EQ + compare HpLim HpLim = EQ + compare CCCS CCCS = EQ + compare CurrentTSO CurrentTSO = EQ + compare CurrentNursery CurrentNursery = EQ + compare HpAlloc HpAlloc = EQ + compare EagerBlackholeInfo EagerBlackholeInfo = EQ + compare GCEnter1 GCEnter1 = EQ + compare GCFun GCFun = EQ + compare BaseReg BaseReg = EQ + compare MachSp MachSp = EQ + compare UnwindReturnReg UnwindReturnReg = EQ + compare PicBaseReg PicBaseReg = EQ + compare (VanillaReg _ _) _ = LT + compare _ (VanillaReg _ _) = GT + compare (FloatReg _) _ = LT + compare _ (FloatReg _) = GT + compare (DoubleReg _) _ = LT + compare _ (DoubleReg _) = GT + compare (LongReg _) _ = LT + compare _ (LongReg _) = GT + compare (XmmReg _) _ = LT + compare _ (XmmReg _) = GT + compare (YmmReg _) _ = LT + compare _ (YmmReg _) = GT + compare (ZmmReg _) _ = LT + compare _ (ZmmReg _) = GT + compare Sp _ = LT + compare _ Sp = GT + compare SpLim _ = LT + compare _ SpLim = GT + compare Hp _ = LT + compare _ Hp = GT + compare HpLim _ = LT + compare _ HpLim = GT + compare CCCS _ = LT + compare _ CCCS = GT + compare CurrentTSO _ = LT + compare _ CurrentTSO = GT + compare CurrentNursery _ = LT + compare _ CurrentNursery = GT + compare HpAlloc _ = LT + compare _ HpAlloc = GT + compare GCEnter1 _ = LT + compare _ GCEnter1 = GT + compare GCFun _ = LT + compare _ GCFun = GT + compare BaseReg _ = LT + compare _ BaseReg = GT + compare MachSp _ = LT + compare _ MachSp = GT + compare UnwindReturnReg _ = LT + compare _ UnwindReturnReg = GT + compare EagerBlackholeInfo _ = LT + compare _ EagerBlackholeInfo = GT + +-- convenient aliases +baseReg, spReg, hpReg, spLimReg, hpLimReg, nodeReg, + currentTSOReg, currentNurseryReg, hpAllocReg, cccsReg :: CmmReg +baseReg = CmmGlobal BaseReg +spReg = CmmGlobal Sp +hpReg = CmmGlobal Hp +hpLimReg = CmmGlobal HpLim +spLimReg = CmmGlobal SpLim +nodeReg = CmmGlobal node +currentTSOReg = CmmGlobal CurrentTSO +currentNurseryReg = CmmGlobal CurrentNursery +hpAllocReg = CmmGlobal HpAlloc +cccsReg = CmmGlobal CCCS + +node :: GlobalReg +node = VanillaReg 1 VGcPtr + +globalRegType :: DynFlags -> GlobalReg -> CmmType +globalRegType dflags (VanillaReg _ VGcPtr) = gcWord dflags +globalRegType dflags (VanillaReg _ VNonGcPtr) = bWord dflags +globalRegType _ (FloatReg _) = cmmFloat W32 +globalRegType _ (DoubleReg _) = cmmFloat W64 +globalRegType _ (LongReg _) = cmmBits W64 +-- TODO: improve the internal model of SIMD/vectorized registers +-- the right design SHOULd improve handling of float and double code too. +-- see remarks in "NOTE [SIMD Design for the future]"" in GHC.StgToCmm.Prim +globalRegType _ (XmmReg _) = cmmVec 4 (cmmBits W32) +globalRegType _ (YmmReg _) = cmmVec 8 (cmmBits W32) +globalRegType _ (ZmmReg _) = cmmVec 16 (cmmBits W32) + +globalRegType dflags Hp = gcWord dflags + -- The initialiser for all + -- dynamically allocated closures +globalRegType dflags _ = bWord dflags + +isArgReg :: GlobalReg -> Bool +isArgReg (VanillaReg {}) = True +isArgReg (FloatReg {}) = True +isArgReg (DoubleReg {}) = True +isArgReg (LongReg {}) = True +isArgReg (XmmReg {}) = True +isArgReg (YmmReg {}) = True +isArgReg (ZmmReg {}) = True +isArgReg _ = False diff --git a/compiler/GHC/Cmm/Graph.hs b/compiler/GHC/Cmm/Graph.hs new file mode 100644 index 0000000000..8d19e7fdb9 --- /dev/null +++ b/compiler/GHC/Cmm/Graph.hs @@ -0,0 +1,484 @@ +{-# LANGUAGE BangPatterns, GADTs #-} + +module GHC.Cmm.Graph + ( CmmAGraph, CmmAGraphScoped, CgStmt(..) + , (<*>), catAGraphs + , mkLabel, mkMiddle, mkLast, outOfLine + , lgraphOfAGraph, labelAGraph + + , stackStubExpr + , mkNop, mkAssign, mkStore + , mkUnsafeCall, mkFinalCall, mkCallReturnsTo + , mkJumpReturnsTo + , mkJump, mkJumpExtra + , mkRawJump + , mkCbranch, mkSwitch + , mkReturn, mkComment, mkCallEntry, mkBranch + , mkUnwind + , copyInOflow, copyOutOflow + , noExtraStack + , toCall, Transfer(..) + ) +where + +import GhcPrelude hiding ( (<*>) ) -- avoid importing (<*>) + +import GHC.Cmm.BlockId +import GHC.Cmm +import GHC.Cmm.CallConv +import GHC.Cmm.Switch (SwitchTargets) + +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.Dataflow.Graph +import GHC.Cmm.Dataflow.Label +import DynFlags +import FastString +import ForeignCall +import OrdList +import GHC.Runtime.Layout (ByteOff) +import UniqSupply +import Util +import Panic + + +----------------------------------------------------------------------------- +-- Building Graphs + + +-- | CmmAGraph is a chunk of code consisting of: +-- +-- * ordinary statements (assignments, stores etc.) +-- * jumps +-- * labels +-- * out-of-line labelled blocks +-- +-- The semantics is that control falls through labels and out-of-line +-- blocks. Everything after a jump up to the next label is by +-- definition unreachable code, and will be discarded. +-- +-- Two CmmAGraphs can be stuck together with <*>, with the meaning that +-- control flows from the first to the second. +-- +-- A 'CmmAGraph' can be turned into a 'CmmGraph' (closed at both ends) +-- by providing a label for the entry point and a tick scope; see +-- 'labelAGraph'. +type CmmAGraph = OrdList CgStmt +-- | Unlabeled graph with tick scope +type CmmAGraphScoped = (CmmAGraph, CmmTickScope) + +data CgStmt + = CgLabel BlockId CmmTickScope + | CgStmt (CmmNode O O) + | CgLast (CmmNode O C) + | CgFork BlockId CmmAGraph CmmTickScope + +flattenCmmAGraph :: BlockId -> CmmAGraphScoped -> CmmGraph +flattenCmmAGraph id (stmts_t, tscope) = + CmmGraph { g_entry = id, + g_graph = GMany NothingO body NothingO } + where + body = foldr addBlock emptyBody $ flatten id stmts_t tscope [] + + -- + -- flatten: given an entry label and a CmmAGraph, make a list of blocks. + -- + -- NB. avoid the quadratic-append trap by passing in the tail of the + -- list. This is important for Very Long Functions (e.g. in T783). + -- + flatten :: Label -> CmmAGraph -> CmmTickScope -> [Block CmmNode C C] + -> [Block CmmNode C C] + flatten id g tscope blocks + = flatten1 (fromOL g) block' blocks + where !block' = blockJoinHead (CmmEntry id tscope) emptyBlock + -- + -- flatten0: we are outside a block at this point: any code before + -- the first label is unreachable, so just drop it. + -- + flatten0 :: [CgStmt] -> [Block CmmNode C C] -> [Block CmmNode C C] + flatten0 [] blocks = blocks + + flatten0 (CgLabel id tscope : stmts) blocks + = flatten1 stmts block blocks + where !block = blockJoinHead (CmmEntry id tscope) emptyBlock + + flatten0 (CgFork fork_id stmts_t tscope : rest) blocks + = flatten fork_id stmts_t tscope $ flatten0 rest blocks + + flatten0 (CgLast _ : stmts) blocks = flatten0 stmts blocks + flatten0 (CgStmt _ : stmts) blocks = flatten0 stmts blocks + + -- + -- flatten1: we have a partial block, collect statements until the + -- next last node to make a block, then call flatten0 to get the rest + -- of the blocks + -- + flatten1 :: [CgStmt] -> Block CmmNode C O + -> [Block CmmNode C C] -> [Block CmmNode C C] + + -- The current block falls through to the end of a function or fork: + -- this code should not be reachable, but it may be referenced by + -- other code that is not reachable. We'll remove it later with + -- dead-code analysis, but for now we have to keep the graph + -- well-formed, so we terminate the block with a branch to the + -- beginning of the current block. + flatten1 [] block blocks + = blockJoinTail block (CmmBranch (entryLabel block)) : blocks + + flatten1 (CgLast stmt : stmts) block blocks + = block' : flatten0 stmts blocks + where !block' = blockJoinTail block stmt + + flatten1 (CgStmt stmt : stmts) block blocks + = flatten1 stmts block' blocks + where !block' = blockSnoc block stmt + + flatten1 (CgFork fork_id stmts_t tscope : rest) block blocks + = flatten fork_id stmts_t tscope $ flatten1 rest block blocks + + -- a label here means that we should start a new block, and the + -- current block should fall through to the new block. + flatten1 (CgLabel id tscp : stmts) block blocks + = blockJoinTail block (CmmBranch id) : + flatten1 stmts (blockJoinHead (CmmEntry id tscp) emptyBlock) blocks + + + +---------- AGraph manipulation + +(<*>) :: CmmAGraph -> CmmAGraph -> CmmAGraph +(<*>) = appOL + +catAGraphs :: [CmmAGraph] -> CmmAGraph +catAGraphs = concatOL + +-- | creates a sequence "goto id; id:" as an AGraph +mkLabel :: BlockId -> CmmTickScope -> CmmAGraph +mkLabel bid scp = unitOL (CgLabel bid scp) + +-- | creates an open AGraph from a given node +mkMiddle :: CmmNode O O -> CmmAGraph +mkMiddle middle = unitOL (CgStmt middle) + +-- | creates a closed AGraph from a given node +mkLast :: CmmNode O C -> CmmAGraph +mkLast last = unitOL (CgLast last) + +-- | A labelled code block; should end in a last node +outOfLine :: BlockId -> CmmAGraphScoped -> CmmAGraph +outOfLine l (c,s) = unitOL (CgFork l c s) + +-- | allocate a fresh label for the entry point +lgraphOfAGraph :: CmmAGraphScoped -> UniqSM CmmGraph +lgraphOfAGraph g = do + u <- getUniqueM + return (labelAGraph (mkBlockId u) g) + +-- | use the given BlockId as the label of the entry point +labelAGraph :: BlockId -> CmmAGraphScoped -> CmmGraph +labelAGraph lbl ag = flattenCmmAGraph lbl ag + +---------- No-ops +mkNop :: CmmAGraph +mkNop = nilOL + +mkComment :: FastString -> CmmAGraph +mkComment fs + -- SDM: generating all those comments takes time, this saved about 4% for me + | debugIsOn = mkMiddle $ CmmComment fs + | otherwise = nilOL + +---------- Assignment and store +mkAssign :: CmmReg -> CmmExpr -> CmmAGraph +mkAssign l (CmmReg r) | l == r = mkNop +mkAssign l r = mkMiddle $ CmmAssign l r + +mkStore :: CmmExpr -> CmmExpr -> CmmAGraph +mkStore l r = mkMiddle $ CmmStore l r + +---------- Control transfer +mkJump :: DynFlags -> Convention -> CmmExpr + -> [CmmExpr] + -> UpdFrameOffset + -> CmmAGraph +mkJump dflags conv e actuals updfr_off = + lastWithArgs dflags Jump Old conv actuals updfr_off $ + toCall e Nothing updfr_off 0 + +-- | A jump where the caller says what the live GlobalRegs are. Used +-- for low-level hand-written Cmm. +mkRawJump :: DynFlags -> CmmExpr -> UpdFrameOffset -> [GlobalReg] + -> CmmAGraph +mkRawJump dflags e updfr_off vols = + lastWithArgs dflags Jump Old NativeNodeCall [] updfr_off $ + \arg_space _ -> toCall e Nothing updfr_off 0 arg_space vols + + +mkJumpExtra :: DynFlags -> Convention -> CmmExpr -> [CmmExpr] + -> UpdFrameOffset -> [CmmExpr] + -> CmmAGraph +mkJumpExtra dflags conv e actuals updfr_off extra_stack = + lastWithArgsAndExtraStack dflags Jump Old conv actuals updfr_off extra_stack $ + toCall e Nothing updfr_off 0 + +mkCbranch :: CmmExpr -> BlockId -> BlockId -> Maybe Bool -> CmmAGraph +mkCbranch pred ifso ifnot likely = + mkLast (CmmCondBranch pred ifso ifnot likely) + +mkSwitch :: CmmExpr -> SwitchTargets -> CmmAGraph +mkSwitch e tbl = mkLast $ CmmSwitch e tbl + +mkReturn :: DynFlags -> CmmExpr -> [CmmExpr] -> UpdFrameOffset + -> CmmAGraph +mkReturn dflags e actuals updfr_off = + lastWithArgs dflags Ret Old NativeReturn actuals updfr_off $ + toCall e Nothing updfr_off 0 + +mkBranch :: BlockId -> CmmAGraph +mkBranch bid = mkLast (CmmBranch bid) + +mkFinalCall :: DynFlags + -> CmmExpr -> CCallConv -> [CmmExpr] -> UpdFrameOffset + -> CmmAGraph +mkFinalCall dflags f _ actuals updfr_off = + lastWithArgs dflags Call Old NativeDirectCall actuals updfr_off $ + toCall f Nothing updfr_off 0 + +mkCallReturnsTo :: DynFlags -> CmmExpr -> Convention -> [CmmExpr] + -> BlockId + -> ByteOff + -> UpdFrameOffset + -> [CmmExpr] + -> CmmAGraph +mkCallReturnsTo dflags f callConv actuals ret_lbl ret_off updfr_off extra_stack = do + lastWithArgsAndExtraStack dflags Call (Young ret_lbl) callConv actuals + updfr_off extra_stack $ + toCall f (Just ret_lbl) updfr_off ret_off + +-- Like mkCallReturnsTo, but does not push the return address (it is assumed to be +-- already on the stack). +mkJumpReturnsTo :: DynFlags -> CmmExpr -> Convention -> [CmmExpr] + -> BlockId + -> ByteOff + -> UpdFrameOffset + -> CmmAGraph +mkJumpReturnsTo dflags f callConv actuals ret_lbl ret_off updfr_off = do + lastWithArgs dflags JumpRet (Young ret_lbl) callConv actuals updfr_off $ + toCall f (Just ret_lbl) updfr_off ret_off + +mkUnsafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual] -> CmmAGraph +mkUnsafeCall t fs as = mkMiddle $ CmmUnsafeForeignCall t fs as + +-- | Construct a 'CmmUnwind' node for the given register and unwinding +-- expression. +mkUnwind :: GlobalReg -> CmmExpr -> CmmAGraph +mkUnwind r e = mkMiddle $ CmmUnwind [(r, Just e)] + +-------------------------------------------------------------------------- + + + + +-- Why are we inserting extra blocks that simply branch to the successors? +-- Because in addition to the branch instruction, @mkBranch@ will insert +-- a necessary adjustment to the stack pointer. + + +-- For debugging purposes, we can stub out dead stack slots: +stackStubExpr :: Width -> CmmExpr +stackStubExpr w = CmmLit (CmmInt 0 w) + +-- When we copy in parameters, we usually want to put overflow +-- parameters on the stack, but sometimes we want to pass the +-- variables in their spill slots. Therefore, for copying arguments +-- and results, we provide different functions to pass the arguments +-- in an overflow area and to pass them in spill slots. +copyInOflow :: DynFlags -> Convention -> Area + -> [CmmFormal] + -> [CmmFormal] + -> (Int, [GlobalReg], CmmAGraph) + +copyInOflow dflags conv area formals extra_stk + = (offset, gregs, catAGraphs $ map mkMiddle nodes) + where (offset, gregs, nodes) = copyIn dflags conv area formals extra_stk + +-- Return the number of bytes used for copying arguments, as well as the +-- instructions to copy the arguments. +copyIn :: DynFlags -> Convention -> Area + -> [CmmFormal] + -> [CmmFormal] + -> (ByteOff, [GlobalReg], [CmmNode O O]) +copyIn dflags conv area formals extra_stk + = (stk_size, [r | (_, RegisterParam r) <- args], map ci (stk_args ++ args)) + where + -- See Note [Width of parameters] + ci (reg, RegisterParam r@(VanillaReg {})) = + let local = CmmLocal reg + global = CmmReg (CmmGlobal r) + width = cmmRegWidth dflags local + expr + | width == wordWidth dflags = global + | width < wordWidth dflags = + CmmMachOp (MO_XX_Conv (wordWidth dflags) width) [global] + | otherwise = panic "Parameter width greater than word width" + + in CmmAssign local expr + + -- Non VanillaRegs + ci (reg, RegisterParam r) = + CmmAssign (CmmLocal reg) (CmmReg (CmmGlobal r)) + + ci (reg, StackParam off) + | isBitsType $ localRegType reg + , typeWidth (localRegType reg) < wordWidth dflags = + let + stack_slot = (CmmLoad (CmmStackSlot area off) (cmmBits $ wordWidth dflags)) + local = CmmLocal reg + width = cmmRegWidth dflags local + expr = CmmMachOp (MO_XX_Conv (wordWidth dflags) width) [stack_slot] + in CmmAssign local expr + + | otherwise = + CmmAssign (CmmLocal reg) (CmmLoad (CmmStackSlot area off) ty) + where ty = localRegType reg + + init_offset = widthInBytes (wordWidth dflags) -- infotable + + (stk_off, stk_args) = assignStack dflags init_offset localRegType extra_stk + + (stk_size, args) = assignArgumentsPos dflags stk_off conv + localRegType formals + +-- Factoring out the common parts of the copyout functions yielded something +-- more complicated: + +data Transfer = Call | JumpRet | Jump | Ret deriving Eq + +copyOutOflow :: DynFlags -> Convention -> Transfer -> Area -> [CmmExpr] + -> UpdFrameOffset + -> [CmmExpr] -- extra stack args + -> (Int, [GlobalReg], CmmAGraph) + +-- Generate code to move the actual parameters into the locations +-- required by the calling convention. This includes a store for the +-- return address. +-- +-- The argument layout function ignores the pointer to the info table, +-- so we slot that in here. When copying-out to a young area, we set +-- the info table for return and adjust the offsets of the other +-- parameters. If this is a call instruction, we adjust the offsets +-- of the other parameters. +copyOutOflow dflags conv transfer area actuals updfr_off extra_stack_stuff + = (stk_size, regs, graph) + where + (regs, graph) = foldr co ([], mkNop) (setRA ++ args ++ stack_params) + + -- See Note [Width of parameters] + co (v, RegisterParam r@(VanillaReg {})) (rs, ms) = + let width = cmmExprWidth dflags v + value + | width == wordWidth dflags = v + | width < wordWidth dflags = + CmmMachOp (MO_XX_Conv width (wordWidth dflags)) [v] + | otherwise = panic "Parameter width greater than word width" + + in (r:rs, mkAssign (CmmGlobal r) value <*> ms) + + -- Non VanillaRegs + co (v, RegisterParam r) (rs, ms) = + (r:rs, mkAssign (CmmGlobal r) v <*> ms) + + -- See Note [Width of parameters] + co (v, StackParam off) (rs, ms) + = (rs, mkStore (CmmStackSlot area off) (value v) <*> ms) + + width v = cmmExprWidth dflags v + value v + | isBitsType $ cmmExprType dflags v + , width v < wordWidth dflags = + CmmMachOp (MO_XX_Conv (width v) (wordWidth dflags)) [v] + | otherwise = v + + (setRA, init_offset) = + case area of + Young id -> -- Generate a store instruction for + -- the return address if making a call + case transfer of + Call -> + ([(CmmLit (CmmBlock id), StackParam init_offset)], + widthInBytes (wordWidth dflags)) + JumpRet -> + ([], + widthInBytes (wordWidth dflags)) + _other -> + ([], 0) + Old -> ([], updfr_off) + + (extra_stack_off, stack_params) = + assignStack dflags init_offset (cmmExprType dflags) extra_stack_stuff + + args :: [(CmmExpr, ParamLocation)] -- The argument and where to put it + (stk_size, args) = assignArgumentsPos dflags extra_stack_off conv + (cmmExprType dflags) actuals + + +-- Note [Width of parameters] +-- +-- Consider passing a small (< word width) primitive like Int8# to a function. +-- It's actually non-trivial to do this without extending/narrowing: +-- * Global registers are considered to have native word width (i.e., 64-bits on +-- x86-64), so CmmLint would complain if we assigned an 8-bit parameter to a +-- global register. +-- * Same problem exists with LLVM IR. +-- * Lowering gets harder since on x86-32 not every register exposes its lower +-- 8 bits (e.g., for %eax we can use %al, but there isn't a corresponding +-- 8-bit register for %edi). So we would either need to extend/narrow anyway, +-- or complicate the calling convention. +-- * Passing a small integer in a stack slot, which has native word width, +-- requires extending to word width when writing to the stack and narrowing +-- when reading off the stack (see #16258). +-- So instead, we always extend every parameter smaller than native word width +-- in copyOutOflow and then truncate it back to the expected width in copyIn. +-- Note that we do this in cmm using MO_XX_Conv to avoid requiring +-- zero-/sign-extending - it's up to a backend to handle this in a most +-- efficient way (e.g., a simple register move or a smaller size store). +-- This convention (of ignoring the upper bits) is different from some C ABIs, +-- e.g. all PowerPC ELF ABIs, that require sign or zero extending parameters. +-- +-- There was some discussion about this on this PR: +-- https://github.com/ghc-proposals/ghc-proposals/pull/74 + + +mkCallEntry :: DynFlags -> Convention -> [CmmFormal] -> [CmmFormal] + -> (Int, [GlobalReg], CmmAGraph) +mkCallEntry dflags conv formals extra_stk + = copyInOflow dflags conv Old formals extra_stk + +lastWithArgs :: DynFlags -> Transfer -> Area -> Convention -> [CmmExpr] + -> UpdFrameOffset + -> (ByteOff -> [GlobalReg] -> CmmAGraph) + -> CmmAGraph +lastWithArgs dflags transfer area conv actuals updfr_off last = + lastWithArgsAndExtraStack dflags transfer area conv actuals + updfr_off noExtraStack last + +lastWithArgsAndExtraStack :: DynFlags + -> Transfer -> Area -> Convention -> [CmmExpr] + -> UpdFrameOffset -> [CmmExpr] + -> (ByteOff -> [GlobalReg] -> CmmAGraph) + -> CmmAGraph +lastWithArgsAndExtraStack dflags transfer area conv actuals updfr_off + extra_stack last = + copies <*> last outArgs regs + where + (outArgs, regs, copies) = copyOutOflow dflags conv transfer area actuals + updfr_off extra_stack + + +noExtraStack :: [CmmExpr] +noExtraStack = [] + +toCall :: CmmExpr -> Maybe BlockId -> UpdFrameOffset -> ByteOff + -> ByteOff -> [GlobalReg] + -> CmmAGraph +toCall e cont updfr_off res_space arg_space regs = + mkLast $ CmmCall e cont regs arg_space res_space updfr_off diff --git a/compiler/GHC/Cmm/Info.hs b/compiler/GHC/Cmm/Info.hs new file mode 100644 index 0000000000..a10db2b292 --- /dev/null +++ b/compiler/GHC/Cmm/Info.hs @@ -0,0 +1,593 @@ +{-# LANGUAGE CPP #-} +module GHC.Cmm.Info ( + mkEmptyContInfoTable, + cmmToRawCmm, + mkInfoTable, + srtEscape, + + -- info table accessors + closureInfoPtr, + entryCode, + getConstrTag, + cmmGetClosureType, + infoTable, + infoTableConstrTag, + infoTableSrtBitmap, + infoTableClosureType, + infoTablePtrs, + infoTableNonPtrs, + funInfoTable, + funInfoArity, + + -- info table sizes and offsets + stdInfoTableSizeW, + fixedInfoTableSizeW, + profInfoTableSizeW, + maxStdInfoTableSizeW, + maxRetInfoTableSizeW, + stdInfoTableSizeB, + conInfoTableSizeB, + stdSrtBitmapOffset, + stdClosureTypeOffset, + stdPtrsOffset, stdNonPtrsOffset, +) where + +#include "HsVersions.h" + +import GhcPrelude + +import GHC.Cmm +import GHC.Cmm.Utils +import GHC.Cmm.CLabel +import GHC.Runtime.Layout +import GHC.Data.Bitmap +import Stream (Stream) +import qualified Stream +import GHC.Cmm.Dataflow.Collections + +import GHC.Platform +import Maybes +import DynFlags +import ErrUtils (withTimingSilent) +import Panic +import UniqSupply +import MonadUtils +import Util +import Outputable + +import Data.ByteString (ByteString) +import Data.Bits + +-- When we split at proc points, we need an empty info table. +mkEmptyContInfoTable :: CLabel -> CmmInfoTable +mkEmptyContInfoTable info_lbl + = CmmInfoTable { cit_lbl = info_lbl + , cit_rep = mkStackRep [] + , cit_prof = NoProfilingInfo + , cit_srt = Nothing + , cit_clo = Nothing } + +cmmToRawCmm :: DynFlags -> Stream IO CmmGroup a + -> IO (Stream IO RawCmmGroup a) +cmmToRawCmm dflags cmms + = do { uniqs <- mkSplitUniqSupply 'i' + ; let do_one :: UniqSupply -> [CmmDecl] -> IO (UniqSupply, [RawCmmDecl]) + do_one uniqs cmm = + -- NB. strictness fixes a space leak. DO NOT REMOVE. + withTimingSilent dflags (text "Cmm -> Raw Cmm") + forceRes $ + case initUs uniqs $ concatMapM (mkInfoTable dflags) cmm of + (b,uniqs') -> return (uniqs',b) + ; return (snd <$> Stream.mapAccumL_ do_one uniqs cmms) + } + + where forceRes (uniqs, rawcmms) = + uniqs `seq` foldr (\decl r -> decl `seq` r) () rawcmms + +-- Make a concrete info table, represented as a list of CmmStatic +-- (it can't be simply a list of Word, because the SRT field is +-- represented by a label+offset expression). +-- +-- With tablesNextToCode, the layout is +-- <reversed variable part> +-- <normal forward StgInfoTable, but without +-- an entry point at the front> +-- <code> +-- +-- Without tablesNextToCode, the layout of an info table is +-- <entry label> +-- <normal forward rest of StgInfoTable> +-- <forward variable part> +-- +-- See includes/rts/storage/InfoTables.h +-- +-- For return-points these are as follows +-- +-- Tables next to code: +-- +-- <srt slot> +-- <standard info table> +-- ret-addr --> <entry code (if any)> +-- +-- Not tables-next-to-code: +-- +-- ret-addr --> <ptr to entry code> +-- <standard info table> +-- <srt slot> +-- +-- * The SRT slot is only there if there is SRT info to record + +mkInfoTable :: DynFlags -> CmmDecl -> UniqSM [RawCmmDecl] +mkInfoTable _ (CmmData sec dat) + = return [CmmData sec dat] + +mkInfoTable dflags proc@(CmmProc infos entry_lbl live blocks) + -- + -- in the non-tables-next-to-code case, procs can have at most a + -- single info table associated with the entry label of the proc. + -- + | not (tablesNextToCode dflags) + = case topInfoTable proc of -- must be at most one + -- no info table + Nothing -> + return [CmmProc mapEmpty entry_lbl live blocks] + + Just info@CmmInfoTable { cit_lbl = info_lbl } -> do + (top_decls, (std_info, extra_bits)) <- + mkInfoTableContents dflags info Nothing + let + rel_std_info = map (makeRelativeRefTo dflags info_lbl) std_info + rel_extra_bits = map (makeRelativeRefTo dflags info_lbl) extra_bits + -- + -- Separately emit info table (with the function entry + -- point as first entry) and the entry code + -- + return (top_decls ++ + [CmmProc mapEmpty entry_lbl live blocks, + mkRODataLits info_lbl + (CmmLabel entry_lbl : rel_std_info ++ rel_extra_bits)]) + + -- + -- With tables-next-to-code, we can have many info tables, + -- associated with some of the BlockIds of the proc. For each info + -- table we need to turn it into CmmStatics, and collect any new + -- CmmDecls that arise from doing so. + -- + | otherwise + = do + (top_declss, raw_infos) <- + unzip `fmap` mapM do_one_info (mapToList (info_tbls infos)) + return (concat top_declss ++ + [CmmProc (mapFromList raw_infos) entry_lbl live blocks]) + + where + do_one_info (lbl,itbl) = do + (top_decls, (std_info, extra_bits)) <- + mkInfoTableContents dflags itbl Nothing + let + info_lbl = cit_lbl itbl + rel_std_info = map (makeRelativeRefTo dflags info_lbl) std_info + rel_extra_bits = map (makeRelativeRefTo dflags info_lbl) extra_bits + -- + return (top_decls, (lbl, Statics info_lbl $ map CmmStaticLit $ + reverse rel_extra_bits ++ rel_std_info)) + +----------------------------------------------------- +type InfoTableContents = ( [CmmLit] -- The standard part + , [CmmLit] ) -- The "extra bits" +-- These Lits have *not* had mkRelativeTo applied to them + +mkInfoTableContents :: DynFlags + -> CmmInfoTable + -> Maybe Int -- Override default RTS type tag? + -> UniqSM ([RawCmmDecl], -- Auxiliary top decls + InfoTableContents) -- Info tbl + extra bits + +mkInfoTableContents dflags + info@(CmmInfoTable { cit_lbl = info_lbl + , cit_rep = smrep + , cit_prof = prof + , cit_srt = srt }) + mb_rts_tag + | RTSRep rts_tag rep <- smrep + = mkInfoTableContents dflags info{cit_rep = rep} (Just rts_tag) + -- Completely override the rts_tag that mkInfoTableContents would + -- otherwise compute, with the rts_tag stored in the RTSRep + -- (which in turn came from a handwritten .cmm file) + + | StackRep frame <- smrep + = do { (prof_lits, prof_data) <- mkProfLits dflags prof + ; let (srt_label, srt_bitmap) = mkSRTLit dflags info_lbl srt + ; (liveness_lit, liveness_data) <- mkLivenessBits dflags frame + ; let + std_info = mkStdInfoTable dflags prof_lits rts_tag srt_bitmap liveness_lit + rts_tag | Just tag <- mb_rts_tag = tag + | null liveness_data = rET_SMALL -- Fits in extra_bits + | otherwise = rET_BIG -- Does not; extra_bits is + -- a label + ; return (prof_data ++ liveness_data, (std_info, srt_label)) } + + | HeapRep _ ptrs nonptrs closure_type <- smrep + = do { let layout = packIntsCLit dflags ptrs nonptrs + ; (prof_lits, prof_data) <- mkProfLits dflags prof + ; let (srt_label, srt_bitmap) = mkSRTLit dflags info_lbl srt + ; (mb_srt_field, mb_layout, extra_bits, ct_data) + <- mk_pieces closure_type srt_label + ; let std_info = mkStdInfoTable dflags prof_lits + (mb_rts_tag `orElse` rtsClosureType smrep) + (mb_srt_field `orElse` srt_bitmap) + (mb_layout `orElse` layout) + ; return (prof_data ++ ct_data, (std_info, extra_bits)) } + where + mk_pieces :: ClosureTypeInfo -> [CmmLit] + -> UniqSM ( Maybe CmmLit -- Override the SRT field with this + , Maybe CmmLit -- Override the layout field with this + , [CmmLit] -- "Extra bits" for info table + , [RawCmmDecl]) -- Auxiliary data decls + mk_pieces (Constr con_tag con_descr) _no_srt -- A data constructor + = do { (descr_lit, decl) <- newStringLit con_descr + ; return ( Just (CmmInt (fromIntegral con_tag) + (halfWordWidth dflags)) + , Nothing, [descr_lit], [decl]) } + + mk_pieces Thunk srt_label + = return (Nothing, Nothing, srt_label, []) + + mk_pieces (ThunkSelector offset) _no_srt + = return (Just (CmmInt 0 (halfWordWidth dflags)), + Just (mkWordCLit dflags (fromIntegral offset)), [], []) + -- Layout known (one free var); we use the layout field for offset + + mk_pieces (Fun arity (ArgSpec fun_type)) srt_label + = do { let extra_bits = packIntsCLit dflags fun_type arity : srt_label + ; return (Nothing, Nothing, extra_bits, []) } + + mk_pieces (Fun arity (ArgGen arg_bits)) srt_label + = do { (liveness_lit, liveness_data) <- mkLivenessBits dflags arg_bits + ; let fun_type | null liveness_data = aRG_GEN + | otherwise = aRG_GEN_BIG + extra_bits = [ packIntsCLit dflags fun_type arity ] + ++ (if inlineSRT dflags then [] else [ srt_lit ]) + ++ [ liveness_lit, slow_entry ] + ; return (Nothing, Nothing, extra_bits, liveness_data) } + where + slow_entry = CmmLabel (toSlowEntryLbl info_lbl) + srt_lit = case srt_label of + [] -> mkIntCLit dflags 0 + (lit:_rest) -> ASSERT( null _rest ) lit + + mk_pieces other _ = pprPanic "mk_pieces" (ppr other) + +mkInfoTableContents _ _ _ = panic "mkInfoTableContents" -- NonInfoTable dealt with earlier + +packIntsCLit :: DynFlags -> Int -> Int -> CmmLit +packIntsCLit dflags a b = packHalfWordsCLit dflags + (toStgHalfWord dflags (fromIntegral a)) + (toStgHalfWord dflags (fromIntegral b)) + + +mkSRTLit :: DynFlags + -> CLabel + -> Maybe CLabel + -> ([CmmLit], -- srt_label, if any + CmmLit) -- srt_bitmap +mkSRTLit dflags info_lbl (Just lbl) + | inlineSRT dflags + = ([], CmmLabelDiffOff lbl info_lbl 0 (halfWordWidth dflags)) +mkSRTLit dflags _ Nothing = ([], CmmInt 0 (halfWordWidth dflags)) +mkSRTLit dflags _ (Just lbl) = ([CmmLabel lbl], CmmInt 1 (halfWordWidth dflags)) + + +-- | Is the SRT offset field inline in the info table on this platform? +-- +-- See the section "Referring to an SRT from the info table" in +-- Note [SRTs] in GHC.Cmm.Info.Build +inlineSRT :: DynFlags -> Bool +inlineSRT dflags = platformArch (targetPlatform dflags) == ArchX86_64 + && tablesNextToCode dflags + +------------------------------------------------------------------------- +-- +-- Lay out the info table and handle relative offsets +-- +------------------------------------------------------------------------- + +-- This function takes +-- * the standard info table portion (StgInfoTable) +-- * the "extra bits" (StgFunInfoExtraRev etc.) +-- * the entry label +-- * the code +-- and lays them out in memory, producing a list of RawCmmDecl + +------------------------------------------------------------------------- +-- +-- Position independent code +-- +------------------------------------------------------------------------- +-- In order to support position independent code, we mustn't put absolute +-- references into read-only space. Info tables in the tablesNextToCode +-- case must be in .text, which is read-only, so we doctor the CmmLits +-- to use relative offsets instead. + +-- Note that this is done even when the -fPIC flag is not specified, +-- as we want to keep binary compatibility between PIC and non-PIC. + +makeRelativeRefTo :: DynFlags -> CLabel -> CmmLit -> CmmLit + +makeRelativeRefTo dflags info_lbl (CmmLabel lbl) + | tablesNextToCode dflags + = CmmLabelDiffOff lbl info_lbl 0 (wordWidth dflags) +makeRelativeRefTo dflags info_lbl (CmmLabelOff lbl off) + | tablesNextToCode dflags + = CmmLabelDiffOff lbl info_lbl off (wordWidth dflags) +makeRelativeRefTo _ _ lit = lit + + +------------------------------------------------------------------------- +-- +-- Build a liveness mask for the stack layout +-- +------------------------------------------------------------------------- + +-- There are four kinds of things on the stack: +-- +-- - pointer variables (bound in the environment) +-- - non-pointer variables (bound in the environment) +-- - free slots (recorded in the stack free list) +-- - non-pointer data slots (recorded in the stack free list) +-- +-- The first two are represented with a 'Just' of a 'LocalReg'. +-- The last two with one or more 'Nothing' constructors. +-- Each 'Nothing' represents one used word. +-- +-- The head of the stack layout is the top of the stack and +-- the least-significant bit. + +mkLivenessBits :: DynFlags -> Liveness -> UniqSM (CmmLit, [RawCmmDecl]) + -- ^ Returns: + -- 1. The bitmap (literal value or label) + -- 2. Large bitmap CmmData if needed + +mkLivenessBits dflags liveness + | n_bits > mAX_SMALL_BITMAP_SIZE dflags -- does not fit in one word + = do { uniq <- getUniqueM + ; let bitmap_lbl = mkBitmapLabel uniq + ; return (CmmLabel bitmap_lbl, + [mkRODataLits bitmap_lbl lits]) } + + | otherwise -- Fits in one word + = return (mkStgWordCLit dflags bitmap_word, []) + where + n_bits = length liveness + + bitmap :: Bitmap + bitmap = mkBitmap dflags liveness + + small_bitmap = case bitmap of + [] -> toStgWord dflags 0 + [b] -> b + _ -> panic "mkLiveness" + bitmap_word = toStgWord dflags (fromIntegral n_bits) + .|. (small_bitmap `shiftL` bITMAP_BITS_SHIFT dflags) + + lits = mkWordCLit dflags (fromIntegral n_bits) + : map (mkStgWordCLit dflags) bitmap + -- The first word is the size. The structure must match + -- StgLargeBitmap in includes/rts/storage/InfoTable.h + +------------------------------------------------------------------------- +-- +-- Generating a standard info table +-- +------------------------------------------------------------------------- + +-- The standard bits of an info table. This part of the info table +-- corresponds to the StgInfoTable type defined in +-- includes/rts/storage/InfoTables.h. +-- +-- Its shape varies with ticky/profiling/tables next to code etc +-- so we can't use constant offsets from Constants + +mkStdInfoTable + :: DynFlags + -> (CmmLit,CmmLit) -- Closure type descr and closure descr (profiling) + -> Int -- Closure RTS tag + -> CmmLit -- SRT length + -> CmmLit -- layout field + -> [CmmLit] + +mkStdInfoTable dflags (type_descr, closure_descr) cl_type srt layout_lit + = -- Parallel revertible-black hole field + prof_info + -- Ticky info (none at present) + -- Debug info (none at present) + ++ [layout_lit, tag, srt] + + where + prof_info + | gopt Opt_SccProfilingOn dflags = [type_descr, closure_descr] + | otherwise = [] + + tag = CmmInt (fromIntegral cl_type) (halfWordWidth dflags) + +------------------------------------------------------------------------- +-- +-- Making string literals +-- +------------------------------------------------------------------------- + +mkProfLits :: DynFlags -> ProfilingInfo -> UniqSM ((CmmLit,CmmLit), [RawCmmDecl]) +mkProfLits dflags NoProfilingInfo = return ((zeroCLit dflags, zeroCLit dflags), []) +mkProfLits _ (ProfilingInfo td cd) + = do { (td_lit, td_decl) <- newStringLit td + ; (cd_lit, cd_decl) <- newStringLit cd + ; return ((td_lit,cd_lit), [td_decl,cd_decl]) } + +newStringLit :: ByteString -> UniqSM (CmmLit, GenCmmDecl CmmStatics info stmt) +newStringLit bytes + = do { uniq <- getUniqueM + ; return (mkByteStringCLit (mkStringLitLabel uniq) bytes) } + + +-- Misc utils + +-- | Value of the srt field of an info table when using an StgLargeSRT +srtEscape :: DynFlags -> StgHalfWord +srtEscape dflags = toStgHalfWord dflags (-1) + +------------------------------------------------------------------------- +-- +-- Accessing fields of an info table +-- +------------------------------------------------------------------------- + +-- | Wrap a 'CmmExpr' in an alignment check when @-falignment-sanitisation@ is +-- enabled. +wordAligned :: DynFlags -> CmmExpr -> CmmExpr +wordAligned dflags e + | gopt Opt_AlignmentSanitisation dflags + = CmmMachOp (MO_AlignmentCheck (wORD_SIZE dflags) (wordWidth dflags)) [e] + | otherwise + = e + +closureInfoPtr :: DynFlags -> CmmExpr -> CmmExpr +-- Takes a closure pointer and returns the info table pointer +closureInfoPtr dflags e = + CmmLoad (wordAligned dflags e) (bWord dflags) + +entryCode :: DynFlags -> CmmExpr -> CmmExpr +-- Takes an info pointer (the first word of a closure) +-- and returns its entry code +entryCode dflags e + | tablesNextToCode dflags = e + | otherwise = CmmLoad e (bWord dflags) + +getConstrTag :: DynFlags -> CmmExpr -> CmmExpr +-- Takes a closure pointer, and return the *zero-indexed* +-- constructor tag obtained from the info table +-- This lives in the SRT field of the info table +-- (constructors don't need SRTs). +getConstrTag dflags closure_ptr + = CmmMachOp (MO_UU_Conv (halfWordWidth dflags) (wordWidth dflags)) [infoTableConstrTag dflags info_table] + where + info_table = infoTable dflags (closureInfoPtr dflags closure_ptr) + +cmmGetClosureType :: DynFlags -> CmmExpr -> CmmExpr +-- Takes a closure pointer, and return the closure type +-- obtained from the info table +cmmGetClosureType dflags closure_ptr + = CmmMachOp (MO_UU_Conv (halfWordWidth dflags) (wordWidth dflags)) [infoTableClosureType dflags info_table] + where + info_table = infoTable dflags (closureInfoPtr dflags closure_ptr) + +infoTable :: DynFlags -> CmmExpr -> CmmExpr +-- Takes an info pointer (the first word of a closure) +-- and returns a pointer to the first word of the standard-form +-- info table, excluding the entry-code word (if present) +infoTable dflags info_ptr + | tablesNextToCode dflags = cmmOffsetB dflags info_ptr (- stdInfoTableSizeB dflags) + | otherwise = cmmOffsetW dflags info_ptr 1 -- Past the entry code pointer + +infoTableConstrTag :: DynFlags -> CmmExpr -> CmmExpr +-- Takes an info table pointer (from infoTable) and returns the constr tag +-- field of the info table (same as the srt_bitmap field) +infoTableConstrTag = infoTableSrtBitmap + +infoTableSrtBitmap :: DynFlags -> CmmExpr -> CmmExpr +-- Takes an info table pointer (from infoTable) and returns the srt_bitmap +-- field of the info table +infoTableSrtBitmap dflags info_tbl + = CmmLoad (cmmOffsetB dflags info_tbl (stdSrtBitmapOffset dflags)) (bHalfWord dflags) + +infoTableClosureType :: DynFlags -> CmmExpr -> CmmExpr +-- Takes an info table pointer (from infoTable) and returns the closure type +-- field of the info table. +infoTableClosureType dflags info_tbl + = CmmLoad (cmmOffsetB dflags info_tbl (stdClosureTypeOffset dflags)) (bHalfWord dflags) + +infoTablePtrs :: DynFlags -> CmmExpr -> CmmExpr +infoTablePtrs dflags info_tbl + = CmmLoad (cmmOffsetB dflags info_tbl (stdPtrsOffset dflags)) (bHalfWord dflags) + +infoTableNonPtrs :: DynFlags -> CmmExpr -> CmmExpr +infoTableNonPtrs dflags info_tbl + = CmmLoad (cmmOffsetB dflags info_tbl (stdNonPtrsOffset dflags)) (bHalfWord dflags) + +funInfoTable :: DynFlags -> CmmExpr -> CmmExpr +-- Takes the info pointer of a function, +-- and returns a pointer to the first word of the StgFunInfoExtra struct +-- in the info table. +funInfoTable dflags info_ptr + | tablesNextToCode dflags + = cmmOffsetB dflags info_ptr (- stdInfoTableSizeB dflags - sIZEOF_StgFunInfoExtraRev dflags) + | otherwise + = cmmOffsetW dflags info_ptr (1 + stdInfoTableSizeW dflags) + -- Past the entry code pointer + +-- Takes the info pointer of a function, returns the function's arity +funInfoArity :: DynFlags -> CmmExpr -> CmmExpr +funInfoArity dflags iptr + = cmmToWord dflags (cmmLoadIndex dflags rep fun_info (offset `div` rep_bytes)) + where + fun_info = funInfoTable dflags iptr + rep = cmmBits (widthFromBytes rep_bytes) + + (rep_bytes, offset) + | tablesNextToCode dflags = ( pc_REP_StgFunInfoExtraRev_arity pc + , oFFSET_StgFunInfoExtraRev_arity dflags ) + | otherwise = ( pc_REP_StgFunInfoExtraFwd_arity pc + , oFFSET_StgFunInfoExtraFwd_arity dflags ) + + pc = platformConstants dflags + +----------------------------------------------------------------------------- +-- +-- Info table sizes & offsets +-- +----------------------------------------------------------------------------- + +stdInfoTableSizeW :: DynFlags -> WordOff +-- The size of a standard info table varies with profiling/ticky etc, +-- so we can't get it from Constants +-- It must vary in sync with mkStdInfoTable +stdInfoTableSizeW dflags + = fixedInfoTableSizeW + + if gopt Opt_SccProfilingOn dflags + then profInfoTableSizeW + else 0 + +fixedInfoTableSizeW :: WordOff +fixedInfoTableSizeW = 2 -- layout, type + +profInfoTableSizeW :: WordOff +profInfoTableSizeW = 2 + +maxStdInfoTableSizeW :: WordOff +maxStdInfoTableSizeW = + 1 {- entry, when !tablesNextToCode -} + + fixedInfoTableSizeW + + profInfoTableSizeW + +maxRetInfoTableSizeW :: WordOff +maxRetInfoTableSizeW = + maxStdInfoTableSizeW + + 1 {- srt label -} + +stdInfoTableSizeB :: DynFlags -> ByteOff +stdInfoTableSizeB dflags = stdInfoTableSizeW dflags * wORD_SIZE dflags + +stdSrtBitmapOffset :: DynFlags -> ByteOff +-- Byte offset of the SRT bitmap half-word which is +-- in the *higher-addressed* part of the type_lit +stdSrtBitmapOffset dflags = stdInfoTableSizeB dflags - halfWordSize dflags + +stdClosureTypeOffset :: DynFlags -> ByteOff +-- Byte offset of the closure type half-word +stdClosureTypeOffset dflags = stdInfoTableSizeB dflags - wORD_SIZE dflags + +stdPtrsOffset, stdNonPtrsOffset :: DynFlags -> ByteOff +stdPtrsOffset dflags = stdInfoTableSizeB dflags - 2 * wORD_SIZE dflags +stdNonPtrsOffset dflags = stdInfoTableSizeB dflags - 2 * wORD_SIZE dflags + halfWordSize dflags + +conInfoTableSizeB :: DynFlags -> Int +conInfoTableSizeB dflags = stdInfoTableSizeB dflags + wORD_SIZE dflags diff --git a/compiler/GHC/Cmm/Info/Build.hs b/compiler/GHC/Cmm/Info/Build.hs new file mode 100644 index 0000000000..1ba79befcd --- /dev/null +++ b/compiler/GHC/Cmm/Info/Build.hs @@ -0,0 +1,892 @@ +{-# LANGUAGE GADTs, BangPatterns, RecordWildCards, + GeneralizedNewtypeDeriving, NondecreasingIndentation, TupleSections #-} + +module GHC.Cmm.Info.Build + ( CAFSet, CAFEnv, cafAnal + , doSRTs, ModuleSRTInfo, emptySRT + ) where + +import GhcPrelude hiding (succ) + +import Id +import GHC.Cmm.BlockId +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.Dataflow.Graph +import GHC.Cmm.Dataflow.Label +import GHC.Cmm.Dataflow.Collections +import GHC.Cmm.Dataflow +import Module +import GHC.Platform +import Digraph +import GHC.Cmm.CLabel +import GHC.Cmm +import GHC.Cmm.Utils +import DynFlags +import Maybes +import Outputable +import GHC.Runtime.Layout +import UniqSupply +import CostCentre +import GHC.StgToCmm.Heap + +import Control.Monad +import Data.Map (Map) +import qualified Data.Map as Map +import Data.Set (Set) +import qualified Data.Set as Set +import Data.Tuple +import Control.Monad.Trans.State +import Control.Monad.Trans.Class + + +{- Note [SRTs] + +SRTs are the mechanism by which the garbage collector can determine +the live CAFs in the program. + +Representation +^^^^^^^^^^^^^^ + ++------+ +| info | +| | +-----+---+---+---+ +| -------->|SRT_2| | | | | 0 | +|------| +-----+-|-+-|-+---+ +| | | | +| code | | | +| | v v + +An SRT is simply an object in the program's data segment. It has the +same representation as a static constructor. There are 16 +pre-compiled SRT info tables: stg_SRT_1_info, .. stg_SRT_16_info, +representing SRT objects with 1-16 pointers, respectively. + +The entries of an SRT object point to static closures, which are either +- FUN_STATIC, THUNK_STATIC or CONSTR +- Another SRT (actually just a CONSTR) + +The final field of the SRT is the static link field, used by the +garbage collector to chain together static closures that it visits and +to determine whether a static closure has been visited or not. (see +Note [STATIC_LINK fields]) + +By traversing the transitive closure of an SRT, the GC will reach all +of the CAFs that are reachable from the code associated with this SRT. + +If we need to create an SRT with more than 16 entries, we build a +chain of SRT objects with all but the last having 16 entries. + ++-----+---+- -+---+---+ +|SRT16| | | | | | 0 | ++-----+-|-+- -+-|-+---+ + | | + v v + +----+---+---+---+ + |SRT2| | | | | 0 | + +----+-|-+-|-+---+ + | | + | | + v v + +Referring to an SRT from the info table +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The following things have SRTs: + +- Static functions (FUN) +- Static thunks (THUNK), ie. CAFs +- Continuations (RET_SMALL, etc.) + +In each case, the info table points to the SRT. + +- info->srt is zero if there's no SRT, otherwise: +- info->srt == 1 and info->f.srt_offset points to the SRT + +e.g. for a FUN with an SRT: + +StgFunInfoTable +------+ + info->f.srt_offset | ------------> offset to SRT object +StgStdInfoTable +------+ + info->layout.ptrs | ... | + info->layout.nptrs | ... | + info->srt | 1 | + info->type | ... | + |------| + +On x86_64, we optimise the info table representation further. The +offset to the SRT can be stored in 32 bits (all code lives within a +2GB region in x86_64's small memory model), so we can save a word in +the info table by storing the srt_offset in the srt field, which is +half a word. + +On x86_64 with TABLES_NEXT_TO_CODE (except on MachO, due to #15169): + +- info->srt is zero if there's no SRT, otherwise: +- info->srt is an offset from the info pointer to the SRT object + +StgStdInfoTable +------+ + info->layout.ptrs | | + info->layout.nptrs | | + info->srt | ------------> offset to SRT object + |------| + + +EXAMPLE +^^^^^^^ + +f = \x. ... g ... + where + g = \y. ... h ... c1 ... + h = \z. ... c2 ... + +c1 & c2 are CAFs + +g and h are local functions, but they have no static closures. When +we generate code for f, we start with a CmmGroup of four CmmDecls: + + [ f_closure, f_entry, g_entry, h_entry ] + +we process each CmmDecl separately in cpsTop, giving us a list of +CmmDecls. e.g. for f_entry, we might end up with + + [ f_entry, f1_ret, f2_proc ] + +where f1_ret is a return point, and f2_proc is a proc-point. We have +a CAFSet for each of these CmmDecls, let's suppose they are + + [ f_entry{g_info}, f1_ret{g_info}, f2_proc{} ] + [ g_entry{h_info, c1_closure} ] + [ h_entry{c2_closure} ] + +Next, we make an SRT for each of these functions: + + f_srt : [g_info] + g_srt : [h_info, c1_closure] + h_srt : [c2_closure] + +Now, for g_info and h_info, we want to refer to the SRTs for g and h +respectively, which we'll label g_srt and h_srt: + + f_srt : [g_srt] + g_srt : [h_srt, c1_closure] + h_srt : [c2_closure] + +Now, when an SRT has a single entry, we don't actually generate an SRT +closure for it, instead we just replace references to it with its +single element. So, since h_srt == c2_closure, we have + + f_srt : [g_srt] + g_srt : [c2_closure, c1_closure] + h_srt : [c2_closure] + +and the only SRT closure we generate is + + g_srt = SRT_2 [c2_closure, c1_closure] + + +Optimisations +^^^^^^^^^^^^^ + +To reduce the code size overhead and the cost of traversing SRTs in +the GC, we want to simplify SRTs where possible. We therefore apply +the following optimisations. Each has a [keyword]; search for the +keyword in the code below to see where the optimisation is +implemented. + +1. [Inline] we never create an SRT with a single entry, instead we + point to the single entry directly from the info table. + + i.e. instead of + + +------+ + | info | + | | +-----+---+---+ + | -------->|SRT_1| | | 0 | + |------| +-----+-|-+---+ + | | | + | code | | + | | v + C + + we can point directly to the closure: + + +------+ + | info | + | | + | -------->C + |------| + | | + | code | + | | + + + Furthermore, the SRT for any code that refers to this info table + can point directly to C. + + The exception to this is when we're doing dynamic linking. In that + case, if the closure is not locally defined then we can't point to + it directly from the info table, because this is the text section + which cannot contain runtime relocations. In this case we skip this + optimisation and generate the singleton SRT, because SRTs are in the + data section and *can* have relocatable references. + +2. [FUN] A static function closure can also be an SRT, we simply put + the SRT entries as fields in the static closure. This makes a lot + of sense: the static references are just like the free variables of + the FUN closure. + + i.e. instead of + + f_closure: + +-----+---+ + | | | 0 | + +- |--+---+ + | +------+ + | | info | f_srt: + | | | +-----+---+---+---+ + | | -------->|SRT_2| | | | + 0 | + `----------->|------| +-----+-|-+-|-+---+ + | | | | + | code | | | + | | v v + + + We can generate: + + f_closure: + +-----+---+---+---+ + | | | | | | | 0 | + +- |--+-|-+-|-+---+ + | | | +------+ + | v v | info | + | | | + | | 0 | + `----------->|------| + | | + | code | + | | + + + (note: we can't do this for THUNKs, because the thunk gets + overwritten when it is entered, so we wouldn't be able to share + this SRT with other info tables that want to refer to it (see + [Common] below). FUNs are immutable so don't have this problem.) + +3. [Common] Identical SRTs can be commoned up. + +4. [Filter] If an SRT A refers to an SRT B and a closure C, and B also + refers to C (perhaps transitively), then we can omit the reference + to C from A. + + +Note that there are many other optimisations that we could do, but +aren't implemented. In general, we could omit any reference from an +SRT if everything reachable from it is also reachable from the other +fields in the SRT. Our [Filter] optimisation is a special case of +this. + +Another opportunity we don't exploit is this: + +A = {X,Y,Z} +B = {Y,Z} +C = {X,B} + +Here we could use C = {A} and therefore [Inline] C = A. +-} + +-- --------------------------------------------------------------------- +{- Note [Invalid optimisation: shortcutting] + +You might think that if we have something like + +A's SRT = {B} +B's SRT = {X} + +that we could replace the reference to B in A's SRT with X. + +A's SRT = {X} +B's SRT = {X} + +and thereby perhaps save a little work at runtime, because we don't +have to visit B. + +But this is NOT valid. + +Consider these cases: + +0. B can't be a constructor, because constructors don't have SRTs + +1. B is a CAF. This is the easy one. Obviously we want A's SRT to + point to B, so that it keeps B alive. + +2. B is a function. This is the tricky one. The reason we can't +shortcut in this case is that we aren't allowed to resurrect static +objects. + +== How does this cause a problem? == + +The particular case that cropped up when we tried this was #15544. +- A is a thunk +- B is a static function +- X is a CAF +- suppose we GC when A is alive, and B is not otherwise reachable. +- B is "collected", meaning that it doesn't make it onto the static + objects list during this GC, but nothing bad happens yet. +- Next, suppose we enter A, and then call B. (remember that A refers to B) + At the entry point to B, we GC. This puts B on the stack, as part of the + RET_FUN stack frame that gets pushed when we GC at a function entry point. +- This GC will now reach B +- But because B was previous "collected", it breaks the assumption + that static objects are never resurrected. See Note [STATIC_LINK + fields] in rts/sm/Storage.h for why this is bad. +- In practice, the GC thinks that B has already been visited, and so + doesn't visit X, and catastrophe ensues. + +== Isn't this caused by the RET_FUN business? == + +Maybe, but could you prove that RET_FUN is the only way that +resurrection can occur? + +So, no shortcutting. +-} + +-- --------------------------------------------------------------------- +-- Label types + +-- Labels that come from cafAnal can be: +-- - _closure labels for static functions or CAFs +-- - _info labels for dynamic functions, thunks, or continuations +-- - _entry labels for functions or thunks +-- +-- Meanwhile the labels on top-level blocks are _entry labels. +-- +-- To put everything in the same namespace we convert all labels to +-- closure labels using toClosureLbl. Note that some of these +-- labels will not actually exist; that's ok because we're going to +-- map them to SRTEntry later, which ranges over labels that do exist. +-- +newtype CAFLabel = CAFLabel CLabel + deriving (Eq,Ord,Outputable) + +type CAFSet = Set CAFLabel +type CAFEnv = LabelMap CAFSet + +mkCAFLabel :: CLabel -> CAFLabel +mkCAFLabel lbl = CAFLabel (toClosureLbl lbl) + +-- This is a label that we can put in an SRT. It *must* be a closure label, +-- pointing to either a FUN_STATIC, THUNK_STATIC, or CONSTR. +newtype SRTEntry = SRTEntry CLabel + deriving (Eq, Ord, Outputable) + +-- --------------------------------------------------------------------- +-- CAF analysis + +-- | +-- For each code block: +-- - collect the references reachable from this code block to FUN, +-- THUNK or RET labels for which hasCAF == True +-- +-- This gives us a `CAFEnv`: a mapping from code block to sets of labels +-- +cafAnal + :: LabelSet -- The blocks representing continuations, ie. those + -- that will get RET info tables. These labels will + -- get their own SRTs, so we don't aggregate CAFs from + -- references to these labels, we just use the label. + -> CLabel -- The top label of the proc + -> CmmGraph + -> CAFEnv +cafAnal contLbls topLbl cmmGraph = + analyzeCmmBwd cafLattice + (cafTransfers contLbls (g_entry cmmGraph) topLbl) cmmGraph mapEmpty + + +cafLattice :: DataflowLattice CAFSet +cafLattice = DataflowLattice Set.empty add + where + add (OldFact old) (NewFact new) = + let !new' = old `Set.union` new + in changedIf (Set.size new' > Set.size old) new' + + +cafTransfers :: LabelSet -> Label -> CLabel -> TransferFun CAFSet +cafTransfers contLbls entry topLbl + (BlockCC eNode middle xNode) fBase = + let joined = cafsInNode xNode $! live' + !result = foldNodesBwdOO cafsInNode middle joined + + facts = mapMaybe successorFact (successors xNode) + live' = joinFacts cafLattice facts + + successorFact s + -- If this is a loop back to the entry, we can refer to the + -- entry label. + | s == entry = Just (add topLbl Set.empty) + -- If this is a continuation, we want to refer to the + -- SRT for the continuation's info table + | s `setMember` contLbls + = Just (Set.singleton (mkCAFLabel (infoTblLbl s))) + -- Otherwise, takes the CAF references from the destination + | otherwise + = lookupFact s fBase + + cafsInNode :: CmmNode e x -> CAFSet -> CAFSet + cafsInNode node set = foldExpDeep addCaf node set + + addCaf expr !set = + case expr of + CmmLit (CmmLabel c) -> add c set + CmmLit (CmmLabelOff c _) -> add c set + CmmLit (CmmLabelDiffOff c1 c2 _ _) -> add c1 $! add c2 set + _ -> set + add l s | hasCAF l = Set.insert (mkCAFLabel l) s + | otherwise = s + + in mapSingleton (entryLabel eNode) result + + +-- ----------------------------------------------------------------------------- +-- ModuleSRTInfo + +data ModuleSRTInfo = ModuleSRTInfo + { thisModule :: Module + -- ^ Current module being compiled. Required for calling labelDynamic. + , dedupSRTs :: Map (Set SRTEntry) SRTEntry + -- ^ previous SRTs we've emitted, so we can de-duplicate. + -- Used to implement the [Common] optimisation. + , flatSRTs :: Map SRTEntry (Set SRTEntry) + -- ^ The reverse mapping, so that we can remove redundant + -- entries. e.g. if we have an SRT [a,b,c], and we know that b + -- points to [c,d], we can omit c and emit [a,b]. + -- Used to implement the [Filter] optimisation. + } +instance Outputable ModuleSRTInfo where + ppr ModuleSRTInfo{..} = + text "ModuleSRTInfo:" <+> ppr dedupSRTs <+> ppr flatSRTs + +emptySRT :: Module -> ModuleSRTInfo +emptySRT mod = + ModuleSRTInfo + { thisModule = mod + , dedupSRTs = Map.empty + , flatSRTs = Map.empty } + +-- ----------------------------------------------------------------------------- +-- Constructing SRTs + +{- Implementation notes + +- In each CmmDecl there is a mapping info_tbls from Label -> CmmInfoTable + +- The entry in info_tbls corresponding to g_entry is the closure info + table, the rest are continuations. + +- Each entry in info_tbls possibly needs an SRT. We need to make a + label for each of these. + +- We get the CAFSet for each entry from the CAFEnv + +-} + +-- | Return a (Label,CLabel) pair for each labelled block of a CmmDecl, +-- where the label is +-- - the info label for a continuation or dynamic closure +-- - the closure label for a top-level function (not a CAF) +getLabelledBlocks :: CmmDecl -> [(Label, CAFLabel)] +getLabelledBlocks (CmmData _ _) = [] +getLabelledBlocks (CmmProc top_info _ _ _) = + [ (blockId, mkCAFLabel (cit_lbl info)) + | (blockId, info) <- mapToList (info_tbls top_info) + , let rep = cit_rep info + , not (isStaticRep rep) || not (isThunkRep rep) + ] + + +-- | Put the labelled blocks that we will be annotating with SRTs into +-- dependency order. This is so that we can process them one at a +-- time, resolving references to earlier blocks to point to their +-- SRTs. CAFs themselves are not included here; see getCAFs below. +depAnalSRTs + :: CAFEnv + -> [CmmDecl] + -> [SCC (Label, CAFLabel, Set CAFLabel)] +depAnalSRTs cafEnv decls = + srtTrace "depAnalSRTs" (ppr graph) graph + where + labelledBlocks = concatMap getLabelledBlocks decls + labelToBlock = Map.fromList (map swap labelledBlocks) + graph = stronglyConnCompFromEdgedVerticesOrd + [ let cafs' = Set.delete lbl cafs in + DigraphNode (l,lbl,cafs') l + (mapMaybe (flip Map.lookup labelToBlock) (Set.toList cafs')) + | (l, lbl) <- labelledBlocks + , Just cafs <- [mapLookup l cafEnv] ] + + +-- | Get (Label, CAFLabel, Set CAFLabel) for each block that represents a CAF. +-- These are treated differently from other labelled blocks: +-- - we never shortcut a reference to a CAF to the contents of its +-- SRT, since the point of SRTs is to keep CAFs alive. +-- - CAFs therefore don't take part in the dependency analysis in depAnalSRTs. +-- instead we generate their SRTs after everything else. +getCAFs :: CAFEnv -> [CmmDecl] -> [(Label, CAFLabel, Set CAFLabel)] +getCAFs cafEnv decls = + [ (g_entry g, mkCAFLabel topLbl, cafs) + | CmmProc top_info topLbl _ g <- decls + , Just info <- [mapLookup (g_entry g) (info_tbls top_info)] + , let rep = cit_rep info + , isStaticRep rep && isThunkRep rep + , Just cafs <- [mapLookup (g_entry g) cafEnv] + ] + + +-- | Get the list of blocks that correspond to the entry points for +-- FUN_STATIC closures. These are the blocks for which if we have an +-- SRT we can merge it with the static closure. [FUN] +getStaticFuns :: [CmmDecl] -> [(BlockId, CLabel)] +getStaticFuns decls = + [ (g_entry g, lbl) + | CmmProc top_info _ _ g <- decls + , Just info <- [mapLookup (g_entry g) (info_tbls top_info)] + , Just (id, _) <- [cit_clo info] + , let rep = cit_rep info + , isStaticRep rep && isFunRep rep + , let lbl = mkLocalClosureLabel (idName id) (idCafInfo id) + ] + + +-- | Maps labels from 'cafAnal' to the final CLabel that will appear +-- in the SRT. +-- - closures with singleton SRTs resolve to their single entry +-- - closures with larger SRTs map to the label for that SRT +-- - CAFs must not map to anything! +-- - if a labels maps to Nothing, we found that this label's SRT +-- is empty, so we don't need to refer to it from other SRTs. +type SRTMap = Map CAFLabel (Maybe SRTEntry) + +-- | resolve a CAFLabel to its SRTEntry using the SRTMap +resolveCAF :: SRTMap -> CAFLabel -> Maybe SRTEntry +resolveCAF srtMap lbl@(CAFLabel l) = + Map.findWithDefault (Just (SRTEntry (toClosureLbl l))) lbl srtMap + + +-- | Attach SRTs to all info tables in the CmmDecls, and add SRT +-- declarations to the ModuleSRTInfo. +-- +doSRTs + :: DynFlags + -> ModuleSRTInfo + -> [(CAFEnv, [CmmDecl])] + -> IO (ModuleSRTInfo, [CmmDecl]) + +doSRTs dflags moduleSRTInfo tops = do + us <- mkSplitUniqSupply 'u' + + -- Ignore the original grouping of decls, and combine all the + -- CAFEnvs into a single CAFEnv. + let (cafEnvs, declss) = unzip tops + cafEnv = mapUnions cafEnvs + decls = concat declss + staticFuns = mapFromList (getStaticFuns decls) + + -- Put the decls in dependency order. Why? So that we can implement + -- [Inline] and [Filter]. If we need to refer to an SRT that has + -- a single entry, we use the entry itself, which means that we + -- don't need to generate the singleton SRT in the first place. But + -- to do this we need to process blocks before things that depend on + -- them. + let + sccs = depAnalSRTs cafEnv decls + cafsWithSRTs = getCAFs cafEnv decls + + -- On each strongly-connected group of decls, construct the SRT + -- closures and the SRT fields for info tables. + let result :: + [ ( [CmmDecl] -- generated SRTs + , [(Label, CLabel)] -- SRT fields for info tables + , [(Label, [SRTEntry])] -- SRTs to attach to static functions + ) ] + ((result, _srtMap), moduleSRTInfo') = + initUs_ us $ + flip runStateT moduleSRTInfo $ + flip runStateT Map.empty $ do + nonCAFs <- mapM (doSCC dflags staticFuns) sccs + cAFs <- forM cafsWithSRTs $ \(l, cafLbl, cafs) -> + oneSRT dflags staticFuns [l] [cafLbl] True{-is a CAF-} cafs + return (nonCAFs ++ cAFs) + + (declss, pairs, funSRTs) = unzip3 result + + -- Next, update the info tables with the SRTs + let + srtFieldMap = mapFromList (concat pairs) + funSRTMap = mapFromList (concat funSRTs) + decls' = concatMap (updInfoSRTs dflags srtFieldMap funSRTMap) decls + + return (moduleSRTInfo', concat declss ++ decls') + + +-- | Build the SRT for a strongly-connected component of blocks +doSCC + :: DynFlags + -> LabelMap CLabel -- which blocks are static function entry points + -> SCC (Label, CAFLabel, Set CAFLabel) + -> StateT SRTMap + (StateT ModuleSRTInfo UniqSM) + ( [CmmDecl] -- generated SRTs + , [(Label, CLabel)] -- SRT fields for info tables + , [(Label, [SRTEntry])] -- SRTs to attach to static functions + ) + +doSCC dflags staticFuns (AcyclicSCC (l, cafLbl, cafs)) = + oneSRT dflags staticFuns [l] [cafLbl] False cafs + +doSCC dflags staticFuns (CyclicSCC nodes) = do + -- build a single SRT for the whole cycle, see Note [recursive SRTs] + let (blockids, lbls, cafsets) = unzip3 nodes + cafs = Set.unions cafsets + oneSRT dflags staticFuns blockids lbls False cafs + + +{- Note [recursive SRTs] + +If the dependency analyser has found us a recursive group of +declarations, then we build a single SRT for the whole group, on the +grounds that everything in the group is reachable from everything +else, so we lose nothing by having a single SRT. + +However, there are a couple of wrinkles to be aware of. + +* The Set CAFLabel for this SRT will contain labels in the group +itself. The SRTMap will therefore not contain entries for these labels +yet, so we can't turn them into SRTEntries using resolveCAF. BUT we +can just remove recursive references from the Set CAFLabel before +generating the SRT - the SRT will still contain all the CAFLabels that +we need to refer to from this group's SRT. + +* That is, EXCEPT for static function closures. For the same reason +described in Note [Invalid optimisation: shortcutting], we cannot omit +references to static function closures. + - But, since we will merge the SRT with one of the static function + closures (see [FUN]), we can omit references to *that* static + function closure from the SRT. +-} + +-- | Build an SRT for a set of blocks +oneSRT + :: DynFlags + -> LabelMap CLabel -- which blocks are static function entry points + -> [Label] -- blocks in this set + -> [CAFLabel] -- labels for those blocks + -> Bool -- True <=> this SRT is for a CAF + -> Set CAFLabel -- SRT for this set + -> StateT SRTMap + (StateT ModuleSRTInfo UniqSM) + ( [CmmDecl] -- SRT objects we built + , [(Label, CLabel)] -- SRT fields for these blocks' itbls + , [(Label, [SRTEntry])] -- SRTs to attach to static functions + ) + +oneSRT dflags staticFuns blockids lbls isCAF cafs = do + srtMap <- get + topSRT <- lift get + let + -- Can we merge this SRT with a FUN_STATIC closure? + (maybeFunClosure, otherFunLabels) = + case [ (l,b) | b <- blockids, Just l <- [mapLookup b staticFuns] ] of + [] -> (Nothing, []) + ((l,b):xs) -> (Just (l,b), map (mkCAFLabel . fst) xs) + + -- Remove recursive references from the SRT, except for (all but + -- one of the) static functions. See Note [recursive SRTs]. + nonRec = cafs `Set.difference` + (Set.fromList lbls `Set.difference` Set.fromList otherFunLabels) + + -- First resolve all the CAFLabels to SRTEntries + -- Implements the [Inline] optimisation. + resolved = mapMaybe (resolveCAF srtMap) (Set.toList nonRec) + + -- The set of all SRTEntries in SRTs that we refer to from here. + allBelow = + Set.unions [ lbls | caf <- resolved + , Just lbls <- [Map.lookup caf (flatSRTs topSRT)] ] + + -- Remove SRTEntries that are also in an SRT that we refer to. + -- Implements the [Filter] optimisation. + filtered = Set.difference (Set.fromList resolved) allBelow + + srtTrace "oneSRT:" + (ppr cafs <+> ppr resolved <+> ppr allBelow <+> ppr filtered) $ return () + + let + isStaticFun = isJust maybeFunClosure + + -- For a label without a closure (e.g. a continuation), we must + -- update the SRTMap for the label to point to a closure. It's + -- important that we don't do this for static functions or CAFs, + -- see Note [Invalid optimisation: shortcutting]. + updateSRTMap srtEntry = + when (not isCAF && (not isStaticFun || isNothing srtEntry)) $ do + let newSRTMap = Map.fromList [(cafLbl, srtEntry) | cafLbl <- lbls] + put (Map.union newSRTMap srtMap) + + this_mod = thisModule topSRT + + case Set.toList filtered of + [] -> do + srtTrace "oneSRT: empty" (ppr lbls) $ return () + updateSRTMap Nothing + return ([], [], []) + + -- [Inline] - when we have only one entry there is no need to + -- build an SRT object at all, instead we put the singleton SRT + -- entry in the info table. + [one@(SRTEntry lbl)] + | -- Info tables refer to SRTs by offset (as noted in the section + -- "Referring to an SRT from the info table" of Note [SRTs]). However, + -- when dynamic linking is used we cannot guarantee that the offset + -- between the SRT and the info table will fit in the offset field. + -- Consequently we build a singleton SRT in in this case. + not (labelDynamic dflags this_mod lbl) + + -- MachO relocations can't express offsets between compilation units at + -- all, so we are always forced to build a singleton SRT in this case. + && (not (osMachOTarget $ platformOS $ targetPlatform dflags) + || isLocalCLabel this_mod lbl) -> do + + -- If we have a static function closure, then it becomes the + -- SRT object, and everything else points to it. (the only way + -- we could have multiple labels here is if this is a + -- recursive group, see Note [recursive SRTs]) + case maybeFunClosure of + Just (staticFunLbl,staticFunBlock) -> return ([], withLabels, []) + where + withLabels = + [ (b, if b == staticFunBlock then lbl else staticFunLbl) + | b <- blockids ] + Nothing -> do + updateSRTMap (Just one) + return ([], map (,lbl) blockids, []) + + cafList -> + -- Check whether an SRT with the same entries has been emitted already. + -- Implements the [Common] optimisation. + case Map.lookup filtered (dedupSRTs topSRT) of + Just srtEntry@(SRTEntry srtLbl) -> do + srtTrace "oneSRT [Common]" (ppr lbls <+> ppr srtLbl) $ return () + updateSRTMap (Just srtEntry) + return ([], map (,srtLbl) blockids, []) + Nothing -> do + -- No duplicates: we have to build a new SRT object + srtTrace "oneSRT: new" (ppr lbls <+> ppr filtered) $ return () + (decls, funSRTs, srtEntry) <- + case maybeFunClosure of + Just (fun,block) -> + return ( [], [(block, cafList)], SRTEntry fun ) + Nothing -> do + (decls, entry) <- lift . lift $ buildSRTChain dflags cafList + return (decls, [], entry) + updateSRTMap (Just srtEntry) + let allBelowThis = Set.union allBelow filtered + oldFlatSRTs = flatSRTs topSRT + newFlatSRTs = Map.insert srtEntry allBelowThis oldFlatSRTs + newDedupSRTs = Map.insert filtered srtEntry (dedupSRTs topSRT) + lift (put (topSRT { dedupSRTs = newDedupSRTs + , flatSRTs = newFlatSRTs })) + let SRTEntry lbl = srtEntry + return (decls, map (,lbl) blockids, funSRTs) + + +-- | build a static SRT object (or a chain of objects) from a list of +-- SRTEntries. +buildSRTChain + :: DynFlags + -> [SRTEntry] + -> UniqSM + ( [CmmDecl] -- The SRT object(s) + , SRTEntry -- label to use in the info table + ) +buildSRTChain _ [] = panic "buildSRT: empty" +buildSRTChain dflags cafSet = + case splitAt mAX_SRT_SIZE cafSet of + (these, []) -> do + (decl,lbl) <- buildSRT dflags these + return ([decl], lbl) + (these,those) -> do + (rest, rest_lbl) <- buildSRTChain dflags (head these : those) + (decl,lbl) <- buildSRT dflags (rest_lbl : tail these) + return (decl:rest, lbl) + where + mAX_SRT_SIZE = 16 + + +buildSRT :: DynFlags -> [SRTEntry] -> UniqSM (CmmDecl, SRTEntry) +buildSRT dflags refs = do + id <- getUniqueM + let + lbl = mkSRTLabel id + srt_n_info = mkSRTInfoLabel (length refs) + fields = + mkStaticClosure dflags srt_n_info dontCareCCS + [ CmmLabel lbl | SRTEntry lbl <- refs ] + [] -- no padding + [mkIntCLit dflags 0] -- link field + [] -- no saved info + return (mkDataLits (Section Data lbl) lbl fields, SRTEntry lbl) + + +-- | Update info tables with references to their SRTs. Also generate +-- static closures, splicing in SRT fields as necessary. +updInfoSRTs + :: DynFlags + -> LabelMap CLabel -- SRT labels for each block + -> LabelMap [SRTEntry] -- SRTs to merge into FUN_STATIC closures + -> CmmDecl + -> [CmmDecl] + +updInfoSRTs dflags srt_env funSRTEnv (CmmProc top_info top_l live g) + | Just (_,closure) <- maybeStaticClosure = [ proc, closure ] + | otherwise = [ proc ] + where + proc = CmmProc top_info { info_tbls = newTopInfo } top_l live g + newTopInfo = mapMapWithKey updInfoTbl (info_tbls top_info) + updInfoTbl l info_tbl + | l == g_entry g, Just (inf, _) <- maybeStaticClosure = inf + | otherwise = info_tbl { cit_srt = mapLookup l srt_env } + + -- Generate static closures [FUN]. Note that this also generates + -- static closures for thunks (CAFs), because it's easier to treat + -- them uniformly in the code generator. + maybeStaticClosure :: Maybe (CmmInfoTable, CmmDecl) + maybeStaticClosure + | Just info_tbl@CmmInfoTable{..} <- + mapLookup (g_entry g) (info_tbls top_info) + , Just (id, ccs) <- cit_clo + , isStaticRep cit_rep = + let + (newInfo, srtEntries) = case mapLookup (g_entry g) funSRTEnv of + Nothing -> + -- if we don't add SRT entries to this closure, then we + -- want to set the srt field in its info table as usual + (info_tbl { cit_srt = mapLookup (g_entry g) srt_env }, []) + Just srtEntries -> srtTrace "maybeStaticFun" (ppr res) + (info_tbl { cit_rep = new_rep }, res) + where res = [ CmmLabel lbl | SRTEntry lbl <- srtEntries ] + fields = mkStaticClosureFields dflags info_tbl ccs (idCafInfo id) + srtEntries + new_rep = case cit_rep of + HeapRep sta ptrs nptrs ty -> + HeapRep sta (ptrs + length srtEntries) nptrs ty + _other -> panic "maybeStaticFun" + lbl = mkLocalClosureLabel (idName id) (idCafInfo id) + in + Just (newInfo, mkDataLits (Section Data lbl) lbl fields) + | otherwise = Nothing + +updInfoSRTs _ _ _ t = [t] + + +srtTrace :: String -> SDoc -> b -> b +-- srtTrace = pprTrace +srtTrace _ _ b = b diff --git a/compiler/GHC/Cmm/LayoutStack.hs b/compiler/GHC/Cmm/LayoutStack.hs new file mode 100644 index 0000000000..f6dda7728c --- /dev/null +++ b/compiler/GHC/Cmm/LayoutStack.hs @@ -0,0 +1,1236 @@ +{-# LANGUAGE BangPatterns, RecordWildCards, GADTs #-} +module GHC.Cmm.LayoutStack ( + cmmLayoutStack, setInfoTableStackMap + ) where + +import GhcPrelude hiding ((<*>)) + +import GHC.StgToCmm.Utils ( callerSaveVolatileRegs, newTemp ) -- XXX layering violation +import GHC.StgToCmm.Foreign ( saveThreadState, loadThreadState ) -- XXX layering violation + +import BasicTypes +import GHC.Cmm +import GHC.Cmm.Info +import GHC.Cmm.BlockId +import GHC.Cmm.CLabel +import GHC.Cmm.Utils +import GHC.Cmm.Graph +import ForeignCall +import GHC.Cmm.Liveness +import GHC.Cmm.ProcPoint +import GHC.Runtime.Layout +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.Dataflow.Collections +import GHC.Cmm.Dataflow +import GHC.Cmm.Dataflow.Graph +import GHC.Cmm.Dataflow.Label +import UniqSupply +import Maybes +import UniqFM +import Util + +import DynFlags +import FastString +import Outputable hiding ( isEmpty ) +import qualified Data.Set as Set +import Control.Monad.Fix +import Data.Array as Array +import Data.Bits +import Data.List (nub) + +{- Note [Stack Layout] + +The job of this pass is to + + - replace references to abstract stack Areas with fixed offsets from Sp. + + - replace the CmmHighStackMark constant used in the stack check with + the maximum stack usage of the proc. + + - save any variables that are live across a call, and reload them as + necessary. + +Before stack allocation, local variables remain live across native +calls (CmmCall{ cmm_cont = Just _ }), and after stack allocation local +variables are clobbered by native calls. + +We want to do stack allocation so that as far as possible + - stack use is minimized, and + - unnecessary stack saves and loads are avoided. + +The algorithm we use is a variant of linear-scan register allocation, +where the stack is our register file. + +We proceed in two passes, see Note [Two pass approach] for why they are not easy +to merge into one. + +Pass 1: + + - First, we do a liveness analysis, which annotates every block with + the variables live on entry to the block. + + - We traverse blocks in reverse postorder DFS; that is, we visit at + least one predecessor of a block before the block itself. The + stack layout flowing from the predecessor of the block will + determine the stack layout on entry to the block. + + - We maintain a data structure + + Map Label StackMap + + which describes the contents of the stack and the stack pointer on + entry to each block that is a successor of a block that we have + visited. + + - For each block we visit: + + - Look up the StackMap for this block. + + - If this block is a proc point (or a call continuation, if we aren't + splitting proc points), we need to reload all the live variables from the + stack - but this is done in Pass 2, which calculates more precise liveness + information (see description of Pass 2). + + - Walk forwards through the instructions: + - At an assignment x = Sp[loc] + - Record the fact that Sp[loc] contains x, so that we won't + need to save x if it ever needs to be spilled. + - At an assignment x = E + - If x was previously on the stack, it isn't any more + - At the last node, if it is a call or a jump to a proc point + - Lay out the stack frame for the call (see setupStackFrame) + - emit instructions to save all the live variables + - Remember the StackMaps for all the successors + - emit an instruction to adjust Sp + - If the last node is a branch, then the current StackMap is the + StackMap for the successors. + + - Manifest Sp: replace references to stack areas in this block + with real Sp offsets. We cannot do this until we have laid out + the stack area for the successors above. + + In this phase we also eliminate redundant stores to the stack; + see elimStackStores. + + - There is one important gotcha: sometimes we'll encounter a control + transfer to a block that we've already processed (a join point), + and in that case we might need to rearrange the stack to match + what the block is expecting. (exactly the same as in linear-scan + register allocation, except here we have the luxury of an infinite + supply of temporary variables). + + - Finally, we update the magic CmmHighStackMark constant with the + stack usage of the function, and eliminate the whole stack check + if there was no stack use. (in fact this is done as part of the + main traversal, by feeding the high-water-mark output back in as + an input. I hate cyclic programming, but it's just too convenient + sometimes.) + + There are plenty of tricky details: update frames, proc points, return + addresses, foreign calls, and some ad-hoc optimisations that are + convenient to do here and effective in common cases. Comments in the + code below explain these. + +Pass 2: + +- Calculate live registers, but taking into account that nothing is live at the + entry to a proc point. + +- At each proc point and call continuation insert reloads of live registers from + the stack (they were saved by Pass 1). + + +Note [Two pass approach] + +The main reason for Pass 2 is being able to insert only the reloads that are +needed and the fact that the two passes need different liveness information. +Let's consider an example: + + ..... + \ / + D <- proc point + / \ + E F + \ / + G <- proc point + | + X + +Pass 1 needs liveness assuming that local variables are preserved across calls. +This is important because it needs to save any local registers to the stack +(e.g., if register a is used in block X, it must be saved before any native +call). +However, for Pass 2, where we want to reload registers from stack (in a proc +point), this is overly conservative and would lead us to generate reloads in D +for things used in X, even though we're going to generate reloads in G anyway +(since it's also a proc point). +So Pass 2 calculates liveness knowing that nothing is live at the entry to a +proc point. This means that in D we only need to reload things used in E or F. +This can be quite important, for an extreme example see testcase for #3294. + +Merging the two passes is not trivial - Pass 2 is a backward rewrite and Pass 1 +is a forward one. Furthermore, Pass 1 is creating code that uses local registers +(saving them before a call), which the liveness analysis for Pass 2 must see to +be correct. + +-} + + +-- All stack locations are expressed as positive byte offsets from the +-- "base", which is defined to be the address above the return address +-- on the stack on entry to this CmmProc. +-- +-- Lower addresses have higher StackLocs. +-- +type StackLoc = ByteOff + +{- + A StackMap describes the stack at any given point. At a continuation + it has a particular layout, like this: + + | | <- base + |-------------| + | ret0 | <- base + 8 + |-------------| + . upd frame . <- base + sm_ret_off + |-------------| + | | + . vars . + . (live/dead) . + | | <- base + sm_sp - sm_args + |-------------| + | ret1 | + . ret vals . <- base + sm_sp (<--- Sp points here) + |-------------| + +Why do we include the final return address (ret0) in our stack map? I +have absolutely no idea, but it seems to be done that way consistently +in the rest of the code generator, so I played along here. --SDM + +Note that we will be constructing an info table for the continuation +(ret1), which needs to describe the stack down to, but not including, +the update frame (or ret0, if there is no update frame). +-} + +data StackMap = StackMap + { sm_sp :: StackLoc + -- ^ the offset of Sp relative to the base on entry + -- to this block. + , sm_args :: ByteOff + -- ^ the number of bytes of arguments in the area for this block + -- Defn: the offset of young(L) relative to the base is given by + -- (sm_sp - sm_args) of the StackMap for block L. + , sm_ret_off :: ByteOff + -- ^ Number of words of stack that we do not describe with an info + -- table, because it contains an update frame. + , sm_regs :: UniqFM (LocalReg,StackLoc) + -- ^ regs on the stack + } + +instance Outputable StackMap where + ppr StackMap{..} = + text "Sp = " <> int sm_sp $$ + text "sm_args = " <> int sm_args $$ + text "sm_ret_off = " <> int sm_ret_off $$ + text "sm_regs = " <> pprUFM sm_regs ppr + + +cmmLayoutStack :: DynFlags -> ProcPointSet -> ByteOff -> CmmGraph + -> UniqSM (CmmGraph, LabelMap StackMap) +cmmLayoutStack dflags procpoints entry_args + graph@(CmmGraph { g_entry = entry }) + = do + -- We need liveness info. Dead assignments are removed later + -- by the sinking pass. + let liveness = cmmLocalLiveness dflags graph + blocks = revPostorder graph + + (final_stackmaps, _final_high_sp, new_blocks) <- + mfix $ \ ~(rec_stackmaps, rec_high_sp, _new_blocks) -> + layout dflags procpoints liveness entry entry_args + rec_stackmaps rec_high_sp blocks + + blocks_with_reloads <- + insertReloadsAsNeeded dflags procpoints final_stackmaps entry new_blocks + new_blocks' <- mapM (lowerSafeForeignCall dflags) blocks_with_reloads + return (ofBlockList entry new_blocks', final_stackmaps) + +-- ----------------------------------------------------------------------------- +-- Pass 1 +-- ----------------------------------------------------------------------------- + +layout :: DynFlags + -> LabelSet -- proc points + -> LabelMap CmmLocalLive -- liveness + -> BlockId -- entry + -> ByteOff -- stack args on entry + + -> LabelMap StackMap -- [final] stack maps + -> ByteOff -- [final] Sp high water mark + + -> [CmmBlock] -- [in] blocks + + -> UniqSM + ( LabelMap StackMap -- [out] stack maps + , ByteOff -- [out] Sp high water mark + , [CmmBlock] -- [out] new blocks + ) + +layout dflags procpoints liveness entry entry_args final_stackmaps final_sp_high blocks + = go blocks init_stackmap entry_args [] + where + (updfr, cont_info) = collectContInfo blocks + + init_stackmap = mapSingleton entry StackMap{ sm_sp = entry_args + , sm_args = entry_args + , sm_ret_off = updfr + , sm_regs = emptyUFM + } + + go [] acc_stackmaps acc_hwm acc_blocks + = return (acc_stackmaps, acc_hwm, acc_blocks) + + go (b0 : bs) acc_stackmaps acc_hwm acc_blocks + = do + let (entry0@(CmmEntry entry_lbl tscope), middle0, last0) = blockSplit b0 + + let stack0@StackMap { sm_sp = sp0 } + = mapFindWithDefault + (pprPanic "no stack map for" (ppr entry_lbl)) + entry_lbl acc_stackmaps + + -- (a) Update the stack map to include the effects of + -- assignments in this block + let stack1 = foldBlockNodesF (procMiddle acc_stackmaps) middle0 stack0 + + -- (b) Look at the last node and if we are making a call or + -- jumping to a proc point, we must save the live + -- variables, adjust Sp, and construct the StackMaps for + -- each of the successor blocks. See handleLastNode for + -- details. + (middle1, sp_off, last1, fixup_blocks, out) + <- handleLastNode dflags procpoints liveness cont_info + acc_stackmaps stack1 tscope middle0 last0 + + -- (c) Manifest Sp: run over the nodes in the block and replace + -- CmmStackSlot with CmmLoad from Sp with a concrete offset. + -- + -- our block: + -- middle0 -- the original middle nodes + -- middle1 -- live variable saves from handleLastNode + -- Sp = Sp + sp_off -- Sp adjustment goes here + -- last1 -- the last node + -- + let middle_pre = blockToList $ foldl' blockSnoc middle0 middle1 + + let final_blocks = + manifestSp dflags final_stackmaps stack0 sp0 final_sp_high + entry0 middle_pre sp_off last1 fixup_blocks + + let acc_stackmaps' = mapUnion acc_stackmaps out + + -- If this block jumps to the GC, then we do not take its + -- stack usage into account for the high-water mark. + -- Otherwise, if the only stack usage is in the stack-check + -- failure block itself, we will do a redundant stack + -- check. The stack has a buffer designed to accommodate + -- the largest amount of stack needed for calling the GC. + -- + this_sp_hwm | isGcJump last0 = 0 + | otherwise = sp0 - sp_off + + hwm' = maximum (acc_hwm : this_sp_hwm : map sm_sp (mapElems out)) + + go bs acc_stackmaps' hwm' (final_blocks ++ acc_blocks) + + +-- ----------------------------------------------------------------------------- + +-- Not foolproof, but GCFun is the culprit we most want to catch +isGcJump :: CmmNode O C -> Bool +isGcJump (CmmCall { cml_target = CmmReg (CmmGlobal l) }) + = l == GCFun || l == GCEnter1 +isGcJump _something_else = False + +-- ----------------------------------------------------------------------------- + +-- This doesn't seem right somehow. We need to find out whether this +-- proc will push some update frame material at some point, so that we +-- can avoid using that area of the stack for spilling. The +-- updfr_space field of the CmmProc *should* tell us, but it doesn't +-- (I think maybe it gets filled in later when we do proc-point +-- splitting). +-- +-- So we'll just take the max of all the cml_ret_offs. This could be +-- unnecessarily pessimistic, but probably not in the code we +-- generate. + +collectContInfo :: [CmmBlock] -> (ByteOff, LabelMap ByteOff) +collectContInfo blocks + = (maximum ret_offs, mapFromList (catMaybes mb_argss)) + where + (mb_argss, ret_offs) = mapAndUnzip get_cont blocks + + get_cont :: Block CmmNode x C -> (Maybe (Label, ByteOff), ByteOff) + get_cont b = + case lastNode b of + CmmCall { cml_cont = Just l, .. } + -> (Just (l, cml_ret_args), cml_ret_off) + CmmForeignCall { .. } + -> (Just (succ, ret_args), ret_off) + _other -> (Nothing, 0) + + +-- ----------------------------------------------------------------------------- +-- Updating the StackMap from middle nodes + +-- Look for loads from stack slots, and update the StackMap. This is +-- purely for optimisation reasons, so that we can avoid saving a +-- variable back to a different stack slot if it is already on the +-- stack. +-- +-- This happens a lot: for example when function arguments are passed +-- on the stack and need to be immediately saved across a call, we +-- want to just leave them where they are on the stack. +-- +procMiddle :: LabelMap StackMap -> CmmNode e x -> StackMap -> StackMap +procMiddle stackmaps node sm + = case node of + CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot area off) _) + -> sm { sm_regs = addToUFM (sm_regs sm) r (r,loc) } + where loc = getStackLoc area off stackmaps + CmmAssign (CmmLocal r) _other + -> sm { sm_regs = delFromUFM (sm_regs sm) r } + _other + -> sm + +getStackLoc :: Area -> ByteOff -> LabelMap StackMap -> StackLoc +getStackLoc Old n _ = n +getStackLoc (Young l) n stackmaps = + case mapLookup l stackmaps of + Nothing -> pprPanic "getStackLoc" (ppr l) + Just sm -> sm_sp sm - sm_args sm + n + + +-- ----------------------------------------------------------------------------- +-- Handling stack allocation for a last node + +-- We take a single last node and turn it into: +-- +-- C1 (some statements) +-- Sp = Sp + N +-- C2 (some more statements) +-- call f() -- the actual last node +-- +-- plus possibly some more blocks (we may have to add some fixup code +-- between the last node and the continuation). +-- +-- C1: is the code for saving the variables across this last node onto +-- the stack, if the continuation is a call or jumps to a proc point. +-- +-- C2: if the last node is a safe foreign call, we have to inject some +-- extra code that goes *after* the Sp adjustment. + +handleLastNode + :: DynFlags -> ProcPointSet -> LabelMap CmmLocalLive -> LabelMap ByteOff + -> LabelMap StackMap -> StackMap -> CmmTickScope + -> Block CmmNode O O + -> CmmNode O C + -> UniqSM + ( [CmmNode O O] -- nodes to go *before* the Sp adjustment + , ByteOff -- amount to adjust Sp + , CmmNode O C -- new last node + , [CmmBlock] -- new blocks + , LabelMap StackMap -- stackmaps for the continuations + ) + +handleLastNode dflags procpoints liveness cont_info stackmaps + stack0@StackMap { sm_sp = sp0 } tscp middle last + = case last of + -- At each return / tail call, + -- adjust Sp to point to the last argument pushed, which + -- is cml_args, after popping any other junk from the stack. + CmmCall{ cml_cont = Nothing, .. } -> do + let sp_off = sp0 - cml_args + return ([], sp_off, last, [], mapEmpty) + + -- At each CmmCall with a continuation: + CmmCall{ cml_cont = Just cont_lbl, .. } -> + return $ lastCall cont_lbl cml_args cml_ret_args cml_ret_off + + CmmForeignCall{ succ = cont_lbl, .. } -> do + return $ lastCall cont_lbl (wORD_SIZE dflags) ret_args ret_off + -- one word of args: the return address + + CmmBranch {} -> handleBranches + CmmCondBranch {} -> handleBranches + CmmSwitch {} -> handleBranches + + where + -- Calls and ForeignCalls are handled the same way: + lastCall :: BlockId -> ByteOff -> ByteOff -> ByteOff + -> ( [CmmNode O O] + , ByteOff + , CmmNode O C + , [CmmBlock] + , LabelMap StackMap + ) + lastCall lbl cml_args cml_ret_args cml_ret_off + = ( assignments + , spOffsetForCall sp0 cont_stack cml_args + , last + , [] -- no new blocks + , mapSingleton lbl cont_stack ) + where + (assignments, cont_stack) = prepareStack lbl cml_ret_args cml_ret_off + + + prepareStack lbl cml_ret_args cml_ret_off + | Just cont_stack <- mapLookup lbl stackmaps + -- If we have already seen this continuation before, then + -- we just have to make the stack look the same: + = (fixupStack stack0 cont_stack, cont_stack) + -- Otherwise, we have to allocate the stack frame + | otherwise + = (save_assignments, new_cont_stack) + where + (new_cont_stack, save_assignments) + = setupStackFrame dflags lbl liveness cml_ret_off cml_ret_args stack0 + + + -- For other last nodes (branches), if any of the targets is a + -- proc point, we have to set up the stack to match what the proc + -- point is expecting. + -- + handleBranches :: UniqSM ( [CmmNode O O] + , ByteOff + , CmmNode O C + , [CmmBlock] + , LabelMap StackMap ) + + handleBranches + -- Note [diamond proc point] + | Just l <- futureContinuation middle + , (nub $ filter (`setMember` procpoints) $ successors last) == [l] + = do + let cont_args = mapFindWithDefault 0 l cont_info + (assigs, cont_stack) = prepareStack l cont_args (sm_ret_off stack0) + out = mapFromList [ (l', cont_stack) + | l' <- successors last ] + return ( assigs + , spOffsetForCall sp0 cont_stack (wORD_SIZE dflags) + , last + , [] + , out) + + | otherwise = do + pps <- mapM handleBranch (successors last) + let lbl_map :: LabelMap Label + lbl_map = mapFromList [ (l,tmp) | (l,tmp,_,_) <- pps ] + fix_lbl l = mapFindWithDefault l l lbl_map + return ( [] + , 0 + , mapSuccessors fix_lbl last + , concat [ blk | (_,_,_,blk) <- pps ] + , mapFromList [ (l, sm) | (l,_,sm,_) <- pps ] ) + + -- For each successor of this block + handleBranch :: BlockId -> UniqSM (BlockId, BlockId, StackMap, [CmmBlock]) + handleBranch l + -- (a) if the successor already has a stackmap, we need to + -- shuffle the current stack to make it look the same. + -- We have to insert a new block to make this happen. + | Just stack2 <- mapLookup l stackmaps + = do + let assigs = fixupStack stack0 stack2 + (tmp_lbl, block) <- makeFixupBlock dflags sp0 l stack2 tscp assigs + return (l, tmp_lbl, stack2, block) + + -- (b) if the successor is a proc point, save everything + -- on the stack. + | l `setMember` procpoints + = do + let cont_args = mapFindWithDefault 0 l cont_info + (stack2, assigs) = + setupStackFrame dflags l liveness (sm_ret_off stack0) + cont_args stack0 + (tmp_lbl, block) <- makeFixupBlock dflags sp0 l stack2 tscp assigs + return (l, tmp_lbl, stack2, block) + + -- (c) otherwise, the current StackMap is the StackMap for + -- the continuation. But we must remember to remove any + -- variables from the StackMap that are *not* live at + -- the destination, because this StackMap might be used + -- by fixupStack if this is a join point. + | otherwise = return (l, l, stack1, []) + where live = mapFindWithDefault (panic "handleBranch") l liveness + stack1 = stack0 { sm_regs = filterUFM is_live (sm_regs stack0) } + is_live (r,_) = r `elemRegSet` live + + +makeFixupBlock :: DynFlags -> ByteOff -> Label -> StackMap + -> CmmTickScope -> [CmmNode O O] + -> UniqSM (Label, [CmmBlock]) +makeFixupBlock dflags sp0 l stack tscope assigs + | null assigs && sp0 == sm_sp stack = return (l, []) + | otherwise = do + tmp_lbl <- newBlockId + let sp_off = sp0 - sm_sp stack + block = blockJoin (CmmEntry tmp_lbl tscope) + ( maybeAddSpAdj dflags sp0 sp_off + $ blockFromList assigs ) + (CmmBranch l) + return (tmp_lbl, [block]) + + +-- Sp is currently pointing to current_sp, +-- we want it to point to +-- (sm_sp cont_stack - sm_args cont_stack + args) +-- so the difference is +-- sp0 - (sm_sp cont_stack - sm_args cont_stack + args) +spOffsetForCall :: ByteOff -> StackMap -> ByteOff -> ByteOff +spOffsetForCall current_sp cont_stack args + = current_sp - (sm_sp cont_stack - sm_args cont_stack + args) + + +-- | create a sequence of assignments to establish the new StackMap, +-- given the old StackMap. +fixupStack :: StackMap -> StackMap -> [CmmNode O O] +fixupStack old_stack new_stack = concatMap move new_locs + where + old_map = sm_regs old_stack + new_locs = stackSlotRegs new_stack + + move (r,n) + | Just (_,m) <- lookupUFM old_map r, n == m = [] + | otherwise = [CmmStore (CmmStackSlot Old n) + (CmmReg (CmmLocal r))] + + + +setupStackFrame + :: DynFlags + -> BlockId -- label of continuation + -> LabelMap CmmLocalLive -- liveness + -> ByteOff -- updfr + -> ByteOff -- bytes of return values on stack + -> StackMap -- current StackMap + -> (StackMap, [CmmNode O O]) + +setupStackFrame dflags lbl liveness updfr_off ret_args stack0 + = (cont_stack, assignments) + where + -- get the set of LocalRegs live in the continuation + live = mapFindWithDefault Set.empty lbl liveness + + -- the stack from the base to updfr_off is off-limits. + -- our new stack frame contains: + -- * saved live variables + -- * the return address [young(C) + 8] + -- * the args for the call, + -- which are replaced by the return values at the return + -- point. + + -- everything up to updfr_off is off-limits + -- stack1 contains updfr_off, plus everything we need to save + (stack1, assignments) = allocate dflags updfr_off live stack0 + + -- And the Sp at the continuation is: + -- sm_sp stack1 + ret_args + cont_stack = stack1{ sm_sp = sm_sp stack1 + ret_args + , sm_args = ret_args + , sm_ret_off = updfr_off + } + + +-- ----------------------------------------------------------------------------- +-- Note [diamond proc point] +-- +-- This special case looks for the pattern we get from a typical +-- tagged case expression: +-- +-- Sp[young(L1)] = L1 +-- if (R1 & 7) != 0 goto L1 else goto L2 +-- L2: +-- call [R1] returns to L1 +-- L1: live: {y} +-- x = R1 +-- +-- If we let the generic case handle this, we get +-- +-- Sp[-16] = L1 +-- if (R1 & 7) != 0 goto L1a else goto L2 +-- L2: +-- Sp[-8] = y +-- Sp = Sp - 16 +-- call [R1] returns to L1 +-- L1a: +-- Sp[-8] = y +-- Sp = Sp - 16 +-- goto L1 +-- L1: +-- x = R1 +-- +-- The code for saving the live vars is duplicated in each branch, and +-- furthermore there is an extra jump in the fast path (assuming L1 is +-- a proc point, which it probably is if there is a heap check). +-- +-- So to fix this we want to set up the stack frame before the +-- conditional jump. How do we know when to do this, and when it is +-- safe? The basic idea is, when we see the assignment +-- +-- Sp[young(L)] = L +-- +-- we know that +-- * we are definitely heading for L +-- * there can be no more reads from another stack area, because young(L) +-- overlaps with it. +-- +-- We don't necessarily know that everything live at L is live now +-- (some might be assigned between here and the jump to L). So we +-- simplify and only do the optimisation when we see +-- +-- (1) a block containing an assignment of a return address L +-- (2) ending in a branch where one (and only) continuation goes to L, +-- and no other continuations go to proc points. +-- +-- then we allocate the stack frame for L at the end of the block, +-- before the branch. +-- +-- We could generalise (2), but that would make it a bit more +-- complicated to handle, and this currently catches the common case. + +futureContinuation :: Block CmmNode O O -> Maybe BlockId +futureContinuation middle = foldBlockNodesB f middle Nothing + where f :: CmmNode a b -> Maybe BlockId -> Maybe BlockId + f (CmmStore (CmmStackSlot (Young l) _) (CmmLit (CmmBlock _))) _ + = Just l + f _ r = r + +-- ----------------------------------------------------------------------------- +-- Saving live registers + +-- | Given a set of live registers and a StackMap, save all the registers +-- on the stack and return the new StackMap and the assignments to do +-- the saving. +-- +allocate :: DynFlags -> ByteOff -> LocalRegSet -> StackMap + -> (StackMap, [CmmNode O O]) +allocate dflags ret_off live stackmap@StackMap{ sm_sp = sp0 + , sm_regs = regs0 } + = + -- we only have to save regs that are not already in a slot + let to_save = filter (not . (`elemUFM` regs0)) (Set.elems live) + regs1 = filterUFM (\(r,_) -> elemRegSet r live) regs0 + in + + -- make a map of the stack + let stack = reverse $ Array.elems $ + accumArray (\_ x -> x) Empty (1, toWords dflags (max sp0 ret_off)) $ + ret_words ++ live_words + where ret_words = + [ (x, Occupied) + | x <- [ 1 .. toWords dflags ret_off] ] + live_words = + [ (toWords dflags x, Occupied) + | (r,off) <- nonDetEltsUFM regs1, + -- See Note [Unique Determinism and code generation] + let w = localRegBytes dflags r, + x <- [ off, off - wORD_SIZE dflags .. off - w + 1] ] + in + + -- Pass over the stack: find slots to save all the new live variables, + -- choosing the oldest slots first (hence a foldr). + let + save slot ([], stack, n, assigs, regs) -- no more regs to save + = ([], slot:stack, plusW dflags n 1, assigs, regs) + save slot (to_save, stack, n, assigs, regs) + = case slot of + Occupied -> (to_save, Occupied:stack, plusW dflags n 1, assigs, regs) + Empty + | Just (stack', r, to_save') <- + select_save to_save (slot:stack) + -> let assig = CmmStore (CmmStackSlot Old n') + (CmmReg (CmmLocal r)) + n' = plusW dflags n 1 + in + (to_save', stack', n', assig : assigs, (r,(r,n')):regs) + + | otherwise + -> (to_save, slot:stack, plusW dflags n 1, assigs, regs) + + -- we should do better here: right now we'll fit the smallest first, + -- but it would make more sense to fit the biggest first. + select_save :: [LocalReg] -> [StackSlot] + -> Maybe ([StackSlot], LocalReg, [LocalReg]) + select_save regs stack = go regs [] + where go [] _no_fit = Nothing + go (r:rs) no_fit + | Just rest <- dropEmpty words stack + = Just (replicate words Occupied ++ rest, r, rs++no_fit) + | otherwise + = go rs (r:no_fit) + where words = localRegWords dflags r + + -- fill in empty slots as much as possible + (still_to_save, save_stack, n, save_assigs, save_regs) + = foldr save (to_save, [], 0, [], []) stack + + -- push any remaining live vars on the stack + (push_sp, push_assigs, push_regs) + = foldr push (n, [], []) still_to_save + where + push r (n, assigs, regs) + = (n', assig : assigs, (r,(r,n')) : regs) + where + n' = n + localRegBytes dflags r + assig = CmmStore (CmmStackSlot Old n') + (CmmReg (CmmLocal r)) + + trim_sp + | not (null push_regs) = push_sp + | otherwise + = plusW dflags n (- length (takeWhile isEmpty save_stack)) + + final_regs = regs1 `addListToUFM` push_regs + `addListToUFM` save_regs + + in + -- XXX should be an assert + if ( n /= max sp0 ret_off ) then pprPanic "allocate" (ppr n <+> ppr sp0 <+> ppr ret_off) else + + if (trim_sp .&. (wORD_SIZE dflags - 1)) /= 0 then pprPanic "allocate2" (ppr trim_sp <+> ppr final_regs <+> ppr push_sp) else + + ( stackmap { sm_regs = final_regs , sm_sp = trim_sp } + , push_assigs ++ save_assigs ) + + +-- ----------------------------------------------------------------------------- +-- Manifesting Sp + +-- | Manifest Sp: turn all the CmmStackSlots into CmmLoads from Sp. The +-- block looks like this: +-- +-- middle_pre -- the middle nodes +-- Sp = Sp + sp_off -- Sp adjustment goes here +-- last -- the last node +-- +-- And we have some extra blocks too (that don't contain Sp adjustments) +-- +-- The adjustment for middle_pre will be different from that for +-- middle_post, because the Sp adjustment intervenes. +-- +manifestSp + :: DynFlags + -> LabelMap StackMap -- StackMaps for other blocks + -> StackMap -- StackMap for this block + -> ByteOff -- Sp on entry to the block + -> ByteOff -- SpHigh + -> CmmNode C O -- first node + -> [CmmNode O O] -- middle + -> ByteOff -- sp_off + -> CmmNode O C -- last node + -> [CmmBlock] -- new blocks + -> [CmmBlock] -- final blocks with Sp manifest + +manifestSp dflags stackmaps stack0 sp0 sp_high + first middle_pre sp_off last fixup_blocks + = final_block : fixup_blocks' + where + area_off = getAreaOff stackmaps + + adj_pre_sp, adj_post_sp :: CmmNode e x -> CmmNode e x + adj_pre_sp = mapExpDeep (areaToSp dflags sp0 sp_high area_off) + adj_post_sp = mapExpDeep (areaToSp dflags (sp0 - sp_off) sp_high area_off) + + final_middle = maybeAddSpAdj dflags sp0 sp_off + . blockFromList + . map adj_pre_sp + . elimStackStores stack0 stackmaps area_off + $ middle_pre + final_last = optStackCheck (adj_post_sp last) + + final_block = blockJoin first final_middle final_last + + fixup_blocks' = map (mapBlock3' (id, adj_post_sp, id)) fixup_blocks + +getAreaOff :: LabelMap StackMap -> (Area -> StackLoc) +getAreaOff _ Old = 0 +getAreaOff stackmaps (Young l) = + case mapLookup l stackmaps of + Just sm -> sm_sp sm - sm_args sm + Nothing -> pprPanic "getAreaOff" (ppr l) + + +maybeAddSpAdj + :: DynFlags -> ByteOff -> ByteOff -> Block CmmNode O O -> Block CmmNode O O +maybeAddSpAdj dflags sp0 sp_off block = + add_initial_unwind $ add_adj_unwind $ adj block + where + adj block + | sp_off /= 0 + = block `blockSnoc` CmmAssign spReg (cmmOffset dflags spExpr sp_off) + | otherwise = block + -- Add unwind pseudo-instruction at the beginning of each block to + -- document Sp level for debugging + add_initial_unwind block + | debugLevel dflags > 0 + = CmmUnwind [(Sp, Just sp_unwind)] `blockCons` block + | otherwise + = block + where sp_unwind = CmmRegOff spReg (sp0 - wORD_SIZE dflags) + + -- Add unwind pseudo-instruction right after the Sp adjustment + -- if there is one. + add_adj_unwind block + | debugLevel dflags > 0 + , sp_off /= 0 + = block `blockSnoc` CmmUnwind [(Sp, Just sp_unwind)] + | otherwise + = block + where sp_unwind = CmmRegOff spReg (sp0 - wORD_SIZE dflags - sp_off) + +{- Note [SP old/young offsets] + +Sp(L) is the Sp offset on entry to block L relative to the base of the +OLD area. + +SpArgs(L) is the size of the young area for L, i.e. the number of +arguments. + + - in block L, each reference to [old + N] turns into + [Sp + Sp(L) - N] + + - in block L, each reference to [young(L') + N] turns into + [Sp + Sp(L) - Sp(L') + SpArgs(L') - N] + + - be careful with the last node of each block: Sp has already been adjusted + to be Sp + Sp(L) - Sp(L') +-} + +areaToSp :: DynFlags -> ByteOff -> ByteOff -> (Area -> StackLoc) -> CmmExpr -> CmmExpr + +areaToSp dflags sp_old _sp_hwm area_off (CmmStackSlot area n) + = cmmOffset dflags spExpr (sp_old - area_off area - n) + -- Replace (CmmStackSlot area n) with an offset from Sp + +areaToSp dflags _ sp_hwm _ (CmmLit CmmHighStackMark) + = mkIntExpr dflags sp_hwm + -- Replace CmmHighStackMark with the number of bytes of stack used, + -- the sp_hwm. See Note [Stack usage] in GHC.StgToCmm.Heap + +areaToSp dflags _ _ _ (CmmMachOp (MO_U_Lt _) args) + | falseStackCheck args + = zeroExpr dflags +areaToSp dflags _ _ _ (CmmMachOp (MO_U_Ge _) args) + | falseStackCheck args + = mkIntExpr dflags 1 + -- Replace a stack-overflow test that cannot fail with a no-op + -- See Note [Always false stack check] + +areaToSp _ _ _ _ other = other + +-- | Determine whether a stack check cannot fail. +falseStackCheck :: [CmmExpr] -> Bool +falseStackCheck [ CmmMachOp (MO_Sub _) + [ CmmRegOff (CmmGlobal Sp) x_off + , CmmLit (CmmInt y_lit _)] + , CmmReg (CmmGlobal SpLim)] + = fromIntegral x_off >= y_lit +falseStackCheck _ = False + +-- Note [Always false stack check] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- We can optimise stack checks of the form +-- +-- if ((Sp + x) - y < SpLim) then .. else .. +-- +-- where are non-negative integer byte offsets. Since we know that +-- SpLim <= Sp (remember the stack grows downwards), this test must +-- yield False if (x >= y), so we can rewrite the comparison to False. +-- A subsequent sinking pass will later drop the dead code. +-- Optimising this away depends on knowing that SpLim <= Sp, so it is +-- really the job of the stack layout algorithm, hence we do it now. +-- +-- The control flow optimiser may negate a conditional to increase +-- the likelihood of a fallthrough if the branch is not taken. But +-- not every conditional is inverted as the control flow optimiser +-- places some requirements on the predecessors of both branch targets. +-- So we better look for the inverted comparison too. + +optStackCheck :: CmmNode O C -> CmmNode O C +optStackCheck n = -- Note [Always false stack check] + case n of + CmmCondBranch (CmmLit (CmmInt 0 _)) _true false _ -> CmmBranch false + CmmCondBranch (CmmLit (CmmInt _ _)) true _false _ -> CmmBranch true + other -> other + + +-- ----------------------------------------------------------------------------- + +-- | Eliminate stores of the form +-- +-- Sp[area+n] = r +-- +-- when we know that r is already in the same slot as Sp[area+n]. We +-- could do this in a later optimisation pass, but that would involve +-- a separate analysis and we already have the information to hand +-- here. It helps clean up some extra stack stores in common cases. +-- +-- Note that we may have to modify the StackMap as we walk through the +-- code using procMiddle, since an assignment to a variable in the +-- StackMap will invalidate its mapping there. +-- +elimStackStores :: StackMap + -> LabelMap StackMap + -> (Area -> ByteOff) + -> [CmmNode O O] + -> [CmmNode O O] +elimStackStores stackmap stackmaps area_off nodes + = go stackmap nodes + where + go _stackmap [] = [] + go stackmap (n:ns) + = case n of + CmmStore (CmmStackSlot area m) (CmmReg (CmmLocal r)) + | Just (_,off) <- lookupUFM (sm_regs stackmap) r + , area_off area + m == off + -> go stackmap ns + _otherwise + -> n : go (procMiddle stackmaps n stackmap) ns + + +-- ----------------------------------------------------------------------------- +-- Update info tables to include stack liveness + + +setInfoTableStackMap :: DynFlags -> LabelMap StackMap -> CmmDecl -> CmmDecl +setInfoTableStackMap dflags stackmaps (CmmProc top_info@TopInfo{..} l v g) + = CmmProc top_info{ info_tbls = mapMapWithKey fix_info info_tbls } l v g + where + fix_info lbl info_tbl@CmmInfoTable{ cit_rep = StackRep _ } = + info_tbl { cit_rep = StackRep (get_liveness lbl) } + fix_info _ other = other + + get_liveness :: BlockId -> Liveness + get_liveness lbl + = case mapLookup lbl stackmaps of + Nothing -> pprPanic "setInfoTableStackMap" (ppr lbl <+> ppr info_tbls) + Just sm -> stackMapToLiveness dflags sm + +setInfoTableStackMap _ _ d = d + + +stackMapToLiveness :: DynFlags -> StackMap -> Liveness +stackMapToLiveness dflags StackMap{..} = + reverse $ Array.elems $ + accumArray (\_ x -> x) True (toWords dflags sm_ret_off + 1, + toWords dflags (sm_sp - sm_args)) live_words + where + live_words = [ (toWords dflags off, False) + | (r,off) <- nonDetEltsUFM sm_regs + , isGcPtrType (localRegType r) ] + -- See Note [Unique Determinism and code generation] + +-- ----------------------------------------------------------------------------- +-- Pass 2 +-- ----------------------------------------------------------------------------- + +insertReloadsAsNeeded + :: DynFlags + -> ProcPointSet + -> LabelMap StackMap + -> BlockId + -> [CmmBlock] + -> UniqSM [CmmBlock] +insertReloadsAsNeeded dflags procpoints final_stackmaps entry blocks = do + toBlockList . fst <$> + rewriteCmmBwd liveLattice rewriteCC (ofBlockList entry blocks) mapEmpty + where + rewriteCC :: RewriteFun CmmLocalLive + rewriteCC (BlockCC e_node middle0 x_node) fact_base0 = do + let entry_label = entryLabel e_node + stackmap = case mapLookup entry_label final_stackmaps of + Just sm -> sm + Nothing -> panic "insertReloadsAsNeeded: rewriteCC: stackmap" + + -- Merge the liveness from successor blocks and analyse the last + -- node. + joined = gen_kill dflags x_node $! + joinOutFacts liveLattice x_node fact_base0 + -- What is live at the start of middle0. + live_at_middle0 = foldNodesBwdOO (gen_kill dflags) middle0 joined + + -- If this is a procpoint we need to add the reloads, but only if + -- they're actually live. Furthermore, nothing is live at the entry + -- to a proc point. + (middle1, live_with_reloads) + | entry_label `setMember` procpoints + = let reloads = insertReloads dflags stackmap live_at_middle0 + in (foldr blockCons middle0 reloads, emptyRegSet) + | otherwise + = (middle0, live_at_middle0) + + -- Final liveness for this block. + !fact_base2 = mapSingleton entry_label live_with_reloads + + return (BlockCC e_node middle1 x_node, fact_base2) + +insertReloads :: DynFlags -> StackMap -> CmmLocalLive -> [CmmNode O O] +insertReloads dflags stackmap live = + [ CmmAssign (CmmLocal reg) + -- This cmmOffset basically corresponds to manifesting + -- @CmmStackSlot Old sp_off@, see Note [SP old/young offsets] + (CmmLoad (cmmOffset dflags spExpr (sp_off - reg_off)) + (localRegType reg)) + | (reg, reg_off) <- stackSlotRegs stackmap + , reg `elemRegSet` live + ] + where + sp_off = sm_sp stackmap + +-- ----------------------------------------------------------------------------- +-- Lowering safe foreign calls + +{- +Note [Lower safe foreign calls] + +We start with + + Sp[young(L1)] = L1 + ,----------------------- + | r1 = foo(x,y,z) returns to L1 + '----------------------- + L1: + R1 = r1 -- copyIn, inserted by mkSafeCall + ... + +the stack layout algorithm will arrange to save and reload everything +live across the call. Our job now is to expand the call so we get + + Sp[young(L1)] = L1 + ,----------------------- + | SAVE_THREAD_STATE() + | token = suspendThread(BaseReg, interruptible) + | r = foo(x,y,z) + | BaseReg = resumeThread(token) + | LOAD_THREAD_STATE() + | R1 = r -- copyOut + | jump Sp[0] + '----------------------- + L1: + r = R1 -- copyIn, inserted by mkSafeCall + ... + +Note the copyOut, which saves the results in the places that L1 is +expecting them (see Note [safe foreign call convention]). Note also +that safe foreign call is replace by an unsafe one in the Cmm graph. +-} + +lowerSafeForeignCall :: DynFlags -> CmmBlock -> UniqSM CmmBlock +lowerSafeForeignCall dflags block + | (entry@(CmmEntry _ tscp), middle, CmmForeignCall { .. }) <- blockSplit block + = do + -- Both 'id' and 'new_base' are KindNonPtr because they're + -- RTS-only objects and are not subject to garbage collection + id <- newTemp (bWord dflags) + new_base <- newTemp (cmmRegType dflags baseReg) + let (caller_save, caller_load) = callerSaveVolatileRegs dflags + save_state_code <- saveThreadState dflags + load_state_code <- loadThreadState dflags + let suspend = save_state_code <*> + caller_save <*> + mkMiddle (callSuspendThread dflags id intrbl) + midCall = mkUnsafeCall tgt res args + resume = mkMiddle (callResumeThread new_base id) <*> + -- Assign the result to BaseReg: we + -- might now have a different Capability! + mkAssign baseReg (CmmReg (CmmLocal new_base)) <*> + caller_load <*> + load_state_code + + (_, regs, copyout) = + copyOutOflow dflags NativeReturn Jump (Young succ) + (map (CmmReg . CmmLocal) res) + ret_off [] + + -- NB. after resumeThread returns, the top-of-stack probably contains + -- the stack frame for succ, but it might not: if the current thread + -- received an exception during the call, then the stack might be + -- different. Hence we continue by jumping to the top stack frame, + -- not by jumping to succ. + jump = CmmCall { cml_target = entryCode dflags $ + CmmLoad spExpr (bWord dflags) + , cml_cont = Just succ + , cml_args_regs = regs + , cml_args = widthInBytes (wordWidth dflags) + , cml_ret_args = ret_args + , cml_ret_off = ret_off } + + graph' <- lgraphOfAGraph ( suspend <*> + midCall <*> + resume <*> + copyout <*> + mkLast jump, tscp) + + case toBlockList graph' of + [one] -> let (_, middle', last) = blockSplit one + in return (blockJoin entry (middle `blockAppend` middle') last) + _ -> panic "lowerSafeForeignCall0" + + -- Block doesn't end in a safe foreign call: + | otherwise = return block + + +foreignLbl :: FastString -> CmmExpr +foreignLbl name = CmmLit (CmmLabel (mkForeignLabel name Nothing ForeignLabelInExternalPackage IsFunction)) + +callSuspendThread :: DynFlags -> LocalReg -> Bool -> CmmNode O O +callSuspendThread dflags id intrbl = + CmmUnsafeForeignCall + (ForeignTarget (foreignLbl (fsLit "suspendThread")) + (ForeignConvention CCallConv [AddrHint, NoHint] [AddrHint] CmmMayReturn)) + [id] [baseExpr, mkIntExpr dflags (fromEnum intrbl)] + +callResumeThread :: LocalReg -> LocalReg -> CmmNode O O +callResumeThread new_base id = + CmmUnsafeForeignCall + (ForeignTarget (foreignLbl (fsLit "resumeThread")) + (ForeignConvention CCallConv [AddrHint] [AddrHint] CmmMayReturn)) + [new_base] [CmmReg (CmmLocal id)] + +-- ----------------------------------------------------------------------------- + +plusW :: DynFlags -> ByteOff -> WordOff -> ByteOff +plusW dflags b w = b + w * wORD_SIZE dflags + +data StackSlot = Occupied | Empty + -- Occupied: a return address or part of an update frame + +instance Outputable StackSlot where + ppr Occupied = text "XXX" + ppr Empty = text "---" + +dropEmpty :: WordOff -> [StackSlot] -> Maybe [StackSlot] +dropEmpty 0 ss = Just ss +dropEmpty n (Empty : ss) = dropEmpty (n-1) ss +dropEmpty _ _ = Nothing + +isEmpty :: StackSlot -> Bool +isEmpty Empty = True +isEmpty _ = False + +localRegBytes :: DynFlags -> LocalReg -> ByteOff +localRegBytes dflags r + = roundUpToWords dflags (widthInBytes (typeWidth (localRegType r))) + +localRegWords :: DynFlags -> LocalReg -> WordOff +localRegWords dflags = toWords dflags . localRegBytes dflags + +toWords :: DynFlags -> ByteOff -> WordOff +toWords dflags x = x `quot` wORD_SIZE dflags + + +stackSlotRegs :: StackMap -> [(LocalReg, StackLoc)] +stackSlotRegs sm = nonDetEltsUFM (sm_regs sm) + -- See Note [Unique Determinism and code generation] diff --git a/compiler/GHC/Cmm/Lexer.x b/compiler/GHC/Cmm/Lexer.x new file mode 100644 index 0000000000..d8f15b916c --- /dev/null +++ b/compiler/GHC/Cmm/Lexer.x @@ -0,0 +1,368 @@ +----------------------------------------------------------------------------- +-- +-- (c) The University of Glasgow, 2004-2006 +-- +-- Lexer for concrete Cmm. We try to stay close to the C-- spec, but there +-- are a few minor differences: +-- +-- * extra keywords for our macros, and float32/float64 types +-- * global registers (Sp,Hp, etc.) +-- +----------------------------------------------------------------------------- + +{ +module GHC.Cmm.Lexer ( + CmmToken(..), cmmlex, + ) where + +import GhcPrelude + +import GHC.Cmm.Expr + +import Lexer +import GHC.Cmm.Monad +import SrcLoc +import UniqFM +import StringBuffer +import FastString +import Ctype +import Util +--import TRACE + +import Data.Word +import Data.Char +} + +$whitechar = [\ \t\n\r\f\v\xa0] -- \xa0 is Unicode no-break space +$white_no_nl = $whitechar # \n + +$ascdigit = 0-9 +$unidigit = \x01 -- Trick Alex into handling Unicode. See alexGetChar. +$digit = [$ascdigit $unidigit] +$octit = 0-7 +$hexit = [$digit A-F a-f] + +$unilarge = \x03 -- Trick Alex into handling Unicode. See alexGetChar. +$asclarge = [A-Z \xc0-\xd6 \xd8-\xde] +$large = [$asclarge $unilarge] + +$unismall = \x04 -- Trick Alex into handling Unicode. See alexGetChar. +$ascsmall = [a-z \xdf-\xf6 \xf8-\xff] +$small = [$ascsmall $unismall \_] + +$namebegin = [$large $small \. \$ \@] +$namechar = [$namebegin $digit] + +@decimal = $digit+ +@octal = $octit+ +@hexadecimal = $hexit+ +@exponent = [eE] [\-\+]? @decimal + +@floating_point = @decimal \. @decimal @exponent? | @decimal @exponent + +@escape = \\ ([abfnrt\\\'\"\?] | x $hexit{1,2} | $octit{1,3}) +@strchar = ($printable # [\"\\]) | @escape + +cmm :- + +$white_no_nl+ ; +^\# pragma .* \n ; -- Apple GCC 3.3 CPP generates pragmas in its output + +^\# (line)? { begin line_prag } + +-- single-line line pragmas, of the form +-- # <line> "<file>" <extra-stuff> \n +<line_prag> $digit+ { setLine line_prag1 } +<line_prag1> \" [^\"]* \" { setFile line_prag2 } +<line_prag2> .* { pop } + +<0> { + \n ; + + [\:\;\{\}\[\]\(\)\=\`\~\/\*\%\-\+\&\^\|\>\<\,\!] { special_char } + + ".." { kw CmmT_DotDot } + "::" { kw CmmT_DoubleColon } + ">>" { kw CmmT_Shr } + "<<" { kw CmmT_Shl } + ">=" { kw CmmT_Ge } + "<=" { kw CmmT_Le } + "==" { kw CmmT_Eq } + "!=" { kw CmmT_Ne } + "&&" { kw CmmT_BoolAnd } + "||" { kw CmmT_BoolOr } + + "True" { kw CmmT_True } + "False" { kw CmmT_False } + "likely" { kw CmmT_likely} + + P@decimal { global_regN (\n -> VanillaReg n VGcPtr) } + R@decimal { global_regN (\n -> VanillaReg n VNonGcPtr) } + F@decimal { global_regN FloatReg } + D@decimal { global_regN DoubleReg } + L@decimal { global_regN LongReg } + Sp { global_reg Sp } + SpLim { global_reg SpLim } + Hp { global_reg Hp } + HpLim { global_reg HpLim } + CCCS { global_reg CCCS } + CurrentTSO { global_reg CurrentTSO } + CurrentNursery { global_reg CurrentNursery } + HpAlloc { global_reg HpAlloc } + BaseReg { global_reg BaseReg } + MachSp { global_reg MachSp } + UnwindReturnReg { global_reg UnwindReturnReg } + + $namebegin $namechar* { name } + + 0 @octal { tok_octal } + @decimal { tok_decimal } + 0[xX] @hexadecimal { tok_hexadecimal } + @floating_point { strtoken tok_float } + + \" @strchar* \" { strtoken tok_string } +} + +{ +data CmmToken + = CmmT_SpecChar Char + | CmmT_DotDot + | CmmT_DoubleColon + | CmmT_Shr + | CmmT_Shl + | CmmT_Ge + | CmmT_Le + | CmmT_Eq + | CmmT_Ne + | CmmT_BoolAnd + | CmmT_BoolOr + | CmmT_CLOSURE + | CmmT_INFO_TABLE + | CmmT_INFO_TABLE_RET + | CmmT_INFO_TABLE_FUN + | CmmT_INFO_TABLE_CONSTR + | CmmT_INFO_TABLE_SELECTOR + | CmmT_else + | CmmT_export + | CmmT_section + | CmmT_goto + | CmmT_if + | CmmT_call + | CmmT_jump + | CmmT_foreign + | CmmT_never + | CmmT_prim + | CmmT_reserve + | CmmT_return + | CmmT_returns + | CmmT_import + | CmmT_switch + | CmmT_case + | CmmT_default + | CmmT_push + | CmmT_unwind + | CmmT_bits8 + | CmmT_bits16 + | CmmT_bits32 + | CmmT_bits64 + | CmmT_bits128 + | CmmT_bits256 + | CmmT_bits512 + | CmmT_float32 + | CmmT_float64 + | CmmT_gcptr + | CmmT_GlobalReg GlobalReg + | CmmT_Name FastString + | CmmT_String String + | CmmT_Int Integer + | CmmT_Float Rational + | CmmT_EOF + | CmmT_False + | CmmT_True + | CmmT_likely + deriving (Show) + +-- ----------------------------------------------------------------------------- +-- Lexer actions + +type Action = RealSrcSpan -> StringBuffer -> Int -> PD (RealLocated CmmToken) + +begin :: Int -> Action +begin code _span _str _len = do liftP (pushLexState code); lexToken + +pop :: Action +pop _span _buf _len = liftP popLexState >> lexToken + +special_char :: Action +special_char span buf _len = return (L span (CmmT_SpecChar (currentChar buf))) + +kw :: CmmToken -> Action +kw tok span _buf _len = return (L span tok) + +global_regN :: (Int -> GlobalReg) -> Action +global_regN con span buf len + = return (L span (CmmT_GlobalReg (con (fromIntegral n)))) + where buf' = stepOn buf + n = parseUnsignedInteger buf' (len-1) 10 octDecDigit + +global_reg :: GlobalReg -> Action +global_reg r span _buf _len = return (L span (CmmT_GlobalReg r)) + +strtoken :: (String -> CmmToken) -> Action +strtoken f span buf len = + return (L span $! (f $! lexemeToString buf len)) + +name :: Action +name span buf len = + case lookupUFM reservedWordsFM fs of + Just tok -> return (L span tok) + Nothing -> return (L span (CmmT_Name fs)) + where + fs = lexemeToFastString buf len + +reservedWordsFM = listToUFM $ + map (\(x, y) -> (mkFastString x, y)) [ + ( "CLOSURE", CmmT_CLOSURE ), + ( "INFO_TABLE", CmmT_INFO_TABLE ), + ( "INFO_TABLE_RET", CmmT_INFO_TABLE_RET ), + ( "INFO_TABLE_FUN", CmmT_INFO_TABLE_FUN ), + ( "INFO_TABLE_CONSTR", CmmT_INFO_TABLE_CONSTR ), + ( "INFO_TABLE_SELECTOR",CmmT_INFO_TABLE_SELECTOR ), + ( "else", CmmT_else ), + ( "export", CmmT_export ), + ( "section", CmmT_section ), + ( "goto", CmmT_goto ), + ( "if", CmmT_if ), + ( "call", CmmT_call ), + ( "jump", CmmT_jump ), + ( "foreign", CmmT_foreign ), + ( "never", CmmT_never ), + ( "prim", CmmT_prim ), + ( "reserve", CmmT_reserve ), + ( "return", CmmT_return ), + ( "returns", CmmT_returns ), + ( "import", CmmT_import ), + ( "switch", CmmT_switch ), + ( "case", CmmT_case ), + ( "default", CmmT_default ), + ( "push", CmmT_push ), + ( "unwind", CmmT_unwind ), + ( "bits8", CmmT_bits8 ), + ( "bits16", CmmT_bits16 ), + ( "bits32", CmmT_bits32 ), + ( "bits64", CmmT_bits64 ), + ( "bits128", CmmT_bits128 ), + ( "bits256", CmmT_bits256 ), + ( "bits512", CmmT_bits512 ), + ( "float32", CmmT_float32 ), + ( "float64", CmmT_float64 ), +-- New forms + ( "b8", CmmT_bits8 ), + ( "b16", CmmT_bits16 ), + ( "b32", CmmT_bits32 ), + ( "b64", CmmT_bits64 ), + ( "b128", CmmT_bits128 ), + ( "b256", CmmT_bits256 ), + ( "b512", CmmT_bits512 ), + ( "f32", CmmT_float32 ), + ( "f64", CmmT_float64 ), + ( "gcptr", CmmT_gcptr ), + ( "likely", CmmT_likely), + ( "True", CmmT_True ), + ( "False", CmmT_False ) + ] + +tok_decimal span buf len + = return (L span (CmmT_Int $! parseUnsignedInteger buf len 10 octDecDigit)) + +tok_octal span buf len + = return (L span (CmmT_Int $! parseUnsignedInteger (offsetBytes 1 buf) (len-1) 8 octDecDigit)) + +tok_hexadecimal span buf len + = return (L span (CmmT_Int $! parseUnsignedInteger (offsetBytes 2 buf) (len-2) 16 hexDigit)) + +tok_float str = CmmT_Float $! readRational str + +tok_string str = CmmT_String (read str) + -- urk, not quite right, but it'll do for now + +-- ----------------------------------------------------------------------------- +-- Line pragmas + +setLine :: Int -> Action +setLine code span buf len = do + let line = parseUnsignedInteger buf len 10 octDecDigit + liftP $ do + setSrcLoc (mkRealSrcLoc (srcSpanFile span) (fromIntegral line - 1) 1) + -- subtract one: the line number refers to the *following* line + -- trace ("setLine " ++ show line) $ do + popLexState >> pushLexState code + lexToken + +setFile :: Int -> Action +setFile code span buf len = do + let file = lexemeToFastString (stepOn buf) (len-2) + liftP $ do + setSrcLoc (mkRealSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span)) + popLexState >> pushLexState code + lexToken + +-- ----------------------------------------------------------------------------- +-- This is the top-level function: called from the parser each time a +-- new token is to be read from the input. + +cmmlex :: (Located CmmToken -> PD a) -> PD a +cmmlex cont = do + (L span tok) <- lexToken + --trace ("token: " ++ show tok) $ do + cont (L (RealSrcSpan span) tok) + +lexToken :: PD (RealLocated CmmToken) +lexToken = do + inp@(loc1,buf) <- getInput + sc <- liftP getLexState + case alexScan inp sc of + AlexEOF -> do let span = mkRealSrcSpan loc1 loc1 + liftP (setLastToken span 0) + return (L span CmmT_EOF) + AlexError (loc2,_) -> liftP $ failLocMsgP loc1 loc2 "lexical error" + AlexSkip inp2 _ -> do + setInput inp2 + lexToken + AlexToken inp2@(end,_buf2) len t -> do + setInput inp2 + let span = mkRealSrcSpan loc1 end + span `seq` liftP (setLastToken span len) + t span buf len + +-- ----------------------------------------------------------------------------- +-- Monad stuff + +-- Stuff that Alex needs to know about our input type: +type AlexInput = (RealSrcLoc,StringBuffer) + +alexInputPrevChar :: AlexInput -> Char +alexInputPrevChar (_,s) = prevChar s '\n' + +-- backwards compatibility for Alex 2.x +alexGetChar :: AlexInput -> Maybe (Char,AlexInput) +alexGetChar inp = case alexGetByte inp of + Nothing -> Nothing + Just (b,i) -> c `seq` Just (c,i) + where c = chr $ fromIntegral b + +alexGetByte :: AlexInput -> Maybe (Word8,AlexInput) +alexGetByte (loc,s) + | atEnd s = Nothing + | otherwise = b `seq` loc' `seq` s' `seq` Just (b, (loc', s')) + where c = currentChar s + b = fromIntegral $ ord $ c + loc' = advanceSrcLoc loc c + s' = stepOn s + +getInput :: PD AlexInput +getInput = PD $ \_ s@PState{ loc=l, buffer=b } -> POk s (l,b) + +setInput :: AlexInput -> PD () +setInput (l,b) = PD $ \_ s -> POk s{ loc=l, buffer=b } () +} diff --git a/compiler/GHC/Cmm/Lint.hs b/compiler/GHC/Cmm/Lint.hs new file mode 100644 index 0000000000..d70fed3b9e --- /dev/null +++ b/compiler/GHC/Cmm/Lint.hs @@ -0,0 +1,261 @@ +----------------------------------------------------------------------------- +-- +-- (c) The University of Glasgow 2011 +-- +-- CmmLint: checking the correctness of Cmm statements and expressions +-- +----------------------------------------------------------------------------- +{-# LANGUAGE DeriveFunctor #-} +{-# LANGUAGE GADTs #-} +module GHC.Cmm.Lint ( + cmmLint, cmmLintGraph + ) where + +import GhcPrelude + +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.Dataflow.Collections +import GHC.Cmm.Dataflow.Graph +import GHC.Cmm.Dataflow.Label +import GHC.Cmm +import GHC.Cmm.Utils +import GHC.Cmm.Liveness +import GHC.Cmm.Switch (switchTargetsToList) +import GHC.Cmm.Ppr () -- For Outputable instances +import Outputable +import DynFlags + +import Control.Monad (ap) + +-- Things to check: +-- - invariant on CmmBlock in GHC.Cmm.Expr (see comment there) +-- - check for branches to blocks that don't exist +-- - check types + +-- ----------------------------------------------------------------------------- +-- Exported entry points: + +cmmLint :: (Outputable d, Outputable h) + => DynFlags -> GenCmmGroup d h CmmGraph -> Maybe SDoc +cmmLint dflags tops = runCmmLint dflags (mapM_ (lintCmmDecl dflags)) tops + +cmmLintGraph :: DynFlags -> CmmGraph -> Maybe SDoc +cmmLintGraph dflags g = runCmmLint dflags (lintCmmGraph dflags) g + +runCmmLint :: Outputable a => DynFlags -> (a -> CmmLint b) -> a -> Maybe SDoc +runCmmLint dflags l p = + case unCL (l p) dflags of + Left err -> Just (vcat [text "Cmm lint error:", + nest 2 err, + text "Program was:", + nest 2 (ppr p)]) + Right _ -> Nothing + +lintCmmDecl :: DynFlags -> GenCmmDecl h i CmmGraph -> CmmLint () +lintCmmDecl dflags (CmmProc _ lbl _ g) + = addLintInfo (text "in proc " <> ppr lbl) $ lintCmmGraph dflags g +lintCmmDecl _ (CmmData {}) + = return () + + +lintCmmGraph :: DynFlags -> CmmGraph -> CmmLint () +lintCmmGraph dflags g = + cmmLocalLiveness dflags g `seq` mapM_ (lintCmmBlock labels) blocks + -- cmmLiveness throws an error if there are registers + -- live on entry to the graph (i.e. undefined + -- variables) + where + blocks = toBlockList g + labels = setFromList (map entryLabel blocks) + + +lintCmmBlock :: LabelSet -> CmmBlock -> CmmLint () +lintCmmBlock labels block + = addLintInfo (text "in basic block " <> ppr (entryLabel block)) $ do + let (_, middle, last) = blockSplit block + mapM_ lintCmmMiddle (blockToList middle) + lintCmmLast labels last + +-- ----------------------------------------------------------------------------- +-- lintCmmExpr + +-- Checks whether a CmmExpr is "type-correct", and check for obvious-looking +-- byte/word mismatches. + +lintCmmExpr :: CmmExpr -> CmmLint CmmType +lintCmmExpr (CmmLoad expr rep) = do + _ <- lintCmmExpr expr + -- Disabled, if we have the inlining phase before the lint phase, + -- we can have funny offsets due to pointer tagging. -- EZY + -- when (widthInBytes (typeWidth rep) >= wORD_SIZE) $ + -- cmmCheckWordAddress expr + return rep +lintCmmExpr expr@(CmmMachOp op args) = do + dflags <- getDynFlags + tys <- mapM lintCmmExpr args + if map (typeWidth . cmmExprType dflags) args == machOpArgReps dflags op + then cmmCheckMachOp op args tys + else cmmLintMachOpErr expr (map (cmmExprType dflags) args) (machOpArgReps dflags op) +lintCmmExpr (CmmRegOff reg offset) + = do dflags <- getDynFlags + let rep = typeWidth (cmmRegType dflags reg) + lintCmmExpr (CmmMachOp (MO_Add rep) + [CmmReg reg, CmmLit (CmmInt (fromIntegral offset) rep)]) +lintCmmExpr expr = + do dflags <- getDynFlags + return (cmmExprType dflags expr) + +-- Check for some common byte/word mismatches (eg. Sp + 1) +cmmCheckMachOp :: MachOp -> [CmmExpr] -> [CmmType] -> CmmLint CmmType +cmmCheckMachOp op [lit@(CmmLit (CmmInt { })), reg@(CmmReg _)] tys + = cmmCheckMachOp op [reg, lit] tys +cmmCheckMachOp op _ tys + = do dflags <- getDynFlags + return (machOpResultType dflags op tys) + +{- +isOffsetOp :: MachOp -> Bool +isOffsetOp (MO_Add _) = True +isOffsetOp (MO_Sub _) = True +isOffsetOp _ = False + +-- This expression should be an address from which a word can be loaded: +-- check for funny-looking sub-word offsets. +_cmmCheckWordAddress :: CmmExpr -> CmmLint () +_cmmCheckWordAddress e@(CmmMachOp op [arg, CmmLit (CmmInt i _)]) + | isOffsetOp op && notNodeReg arg && i `rem` fromIntegral (wORD_SIZE dflags) /= 0 + = cmmLintDubiousWordOffset e +_cmmCheckWordAddress e@(CmmMachOp op [CmmLit (CmmInt i _), arg]) + | isOffsetOp op && notNodeReg arg && i `rem` fromIntegral (wORD_SIZE dflags) /= 0 + = cmmLintDubiousWordOffset e +_cmmCheckWordAddress _ + = return () + +-- No warnings for unaligned arithmetic with the node register, +-- which is used to extract fields from tagged constructor closures. +notNodeReg :: CmmExpr -> Bool +notNodeReg (CmmReg reg) | reg == nodeReg = False +notNodeReg _ = True +-} + +lintCmmMiddle :: CmmNode O O -> CmmLint () +lintCmmMiddle node = case node of + CmmComment _ -> return () + CmmTick _ -> return () + CmmUnwind{} -> return () + + CmmAssign reg expr -> do + dflags <- getDynFlags + erep <- lintCmmExpr expr + let reg_ty = cmmRegType dflags reg + if (erep `cmmEqType_ignoring_ptrhood` reg_ty) + then return () + else cmmLintAssignErr (CmmAssign reg expr) erep reg_ty + + CmmStore l r -> do + _ <- lintCmmExpr l + _ <- lintCmmExpr r + return () + + CmmUnsafeForeignCall target _formals actuals -> do + lintTarget target + mapM_ lintCmmExpr actuals + + +lintCmmLast :: LabelSet -> CmmNode O C -> CmmLint () +lintCmmLast labels node = case node of + CmmBranch id -> checkTarget id + + CmmCondBranch e t f _ -> do + dflags <- getDynFlags + mapM_ checkTarget [t,f] + _ <- lintCmmExpr e + checkCond dflags e + + CmmSwitch e ids -> do + dflags <- getDynFlags + mapM_ checkTarget $ switchTargetsToList ids + erep <- lintCmmExpr e + if (erep `cmmEqType_ignoring_ptrhood` bWord dflags) + then return () + else cmmLintErr (text "switch scrutinee is not a word: " <> + ppr e <> text " :: " <> ppr erep) + + CmmCall { cml_target = target, cml_cont = cont } -> do + _ <- lintCmmExpr target + maybe (return ()) checkTarget cont + + CmmForeignCall tgt _ args succ _ _ _ -> do + lintTarget tgt + mapM_ lintCmmExpr args + checkTarget succ + where + checkTarget id + | setMember id labels = return () + | otherwise = cmmLintErr (text "Branch to nonexistent id" <+> ppr id) + + +lintTarget :: ForeignTarget -> CmmLint () +lintTarget (ForeignTarget e _) = lintCmmExpr e >> return () +lintTarget (PrimTarget {}) = return () + + +checkCond :: DynFlags -> CmmExpr -> CmmLint () +checkCond _ (CmmMachOp mop _) | isComparisonMachOp mop = return () +checkCond dflags (CmmLit (CmmInt x t)) | x == 0 || x == 1, t == wordWidth dflags = return () -- constant values +checkCond _ expr + = cmmLintErr (hang (text "expression is not a conditional:") 2 + (ppr expr)) + +-- ----------------------------------------------------------------------------- +-- CmmLint monad + +-- just a basic error monad: + +newtype CmmLint a = CmmLint { unCL :: DynFlags -> Either SDoc a } + deriving (Functor) + +instance Applicative CmmLint where + pure a = CmmLint (\_ -> Right a) + (<*>) = ap + +instance Monad CmmLint where + CmmLint m >>= k = CmmLint $ \dflags -> + case m dflags of + Left e -> Left e + Right a -> unCL (k a) dflags + +instance HasDynFlags CmmLint where + getDynFlags = CmmLint (\dflags -> Right dflags) + +cmmLintErr :: SDoc -> CmmLint a +cmmLintErr msg = CmmLint (\_ -> Left msg) + +addLintInfo :: SDoc -> CmmLint a -> CmmLint a +addLintInfo info thing = CmmLint $ \dflags -> + case unCL thing dflags of + Left err -> Left (hang info 2 err) + Right a -> Right a + +cmmLintMachOpErr :: CmmExpr -> [CmmType] -> [Width] -> CmmLint a +cmmLintMachOpErr expr argsRep opExpectsRep + = cmmLintErr (text "in MachOp application: " $$ + nest 2 (ppr expr) $$ + (text "op is expecting: " <+> ppr opExpectsRep) $$ + (text "arguments provide: " <+> ppr argsRep)) + +cmmLintAssignErr :: CmmNode e x -> CmmType -> CmmType -> CmmLint a +cmmLintAssignErr stmt e_ty r_ty + = cmmLintErr (text "in assignment: " $$ + nest 2 (vcat [ppr stmt, + text "Reg ty:" <+> ppr r_ty, + text "Rhs ty:" <+> ppr e_ty])) + + +{- +cmmLintDubiousWordOffset :: CmmExpr -> CmmLint a +cmmLintDubiousWordOffset expr + = cmmLintErr (text "offset is not a multiple of words: " $$ + nest 2 (ppr expr)) +-} + diff --git a/compiler/GHC/Cmm/Liveness.hs b/compiler/GHC/Cmm/Liveness.hs new file mode 100644 index 0000000000..2b598f52e5 --- /dev/null +++ b/compiler/GHC/Cmm/Liveness.hs @@ -0,0 +1,93 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE ScopedTypeVariables #-} + +module GHC.Cmm.Liveness + ( CmmLocalLive + , cmmLocalLiveness + , cmmGlobalLiveness + , liveLattice + , gen_kill + ) +where + +import GhcPrelude + +import DynFlags +import GHC.Cmm.BlockId +import GHC.Cmm +import GHC.Cmm.Ppr.Expr () -- For Outputable instances +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.Dataflow.Collections +import GHC.Cmm.Dataflow +import GHC.Cmm.Dataflow.Label + +import Maybes +import Outputable + +----------------------------------------------------------------------------- +-- Calculating what variables are live on entry to a basic block +----------------------------------------------------------------------------- + +-- | The variables live on entry to a block +type CmmLive r = RegSet r +type CmmLocalLive = CmmLive LocalReg + +-- | The dataflow lattice +liveLattice :: Ord r => DataflowLattice (CmmLive r) +{-# SPECIALIZE liveLattice :: DataflowLattice (CmmLive LocalReg) #-} +{-# SPECIALIZE liveLattice :: DataflowLattice (CmmLive GlobalReg) #-} +liveLattice = DataflowLattice emptyRegSet add + where + add (OldFact old) (NewFact new) = + let !join = plusRegSet old new + in changedIf (sizeRegSet join > sizeRegSet old) join + +-- | A mapping from block labels to the variables live on entry +type BlockEntryLiveness r = LabelMap (CmmLive r) + +----------------------------------------------------------------------------- +-- | Calculated liveness info for a CmmGraph +----------------------------------------------------------------------------- + +cmmLocalLiveness :: DynFlags -> CmmGraph -> BlockEntryLiveness LocalReg +cmmLocalLiveness dflags graph = + check $ analyzeCmmBwd liveLattice (xferLive dflags) graph mapEmpty + where + entry = g_entry graph + check facts = + noLiveOnEntry entry (expectJust "check" $ mapLookup entry facts) facts + +cmmGlobalLiveness :: DynFlags -> CmmGraph -> BlockEntryLiveness GlobalReg +cmmGlobalLiveness dflags graph = + analyzeCmmBwd liveLattice (xferLive dflags) graph mapEmpty + +-- | On entry to the procedure, there had better not be any LocalReg's live-in. +noLiveOnEntry :: BlockId -> CmmLive LocalReg -> a -> a +noLiveOnEntry bid in_fact x = + if nullRegSet in_fact then x + else pprPanic "LocalReg's live-in to graph" (ppr bid <+> ppr in_fact) + +gen_kill + :: (DefinerOfRegs r n, UserOfRegs r n) + => DynFlags -> n -> CmmLive r -> CmmLive r +gen_kill dflags node set = + let !afterKill = foldRegsDefd dflags deleteFromRegSet set node + in foldRegsUsed dflags extendRegSet afterKill node +{-# INLINE gen_kill #-} + +xferLive + :: forall r. + ( UserOfRegs r (CmmNode O O) + , DefinerOfRegs r (CmmNode O O) + , UserOfRegs r (CmmNode O C) + , DefinerOfRegs r (CmmNode O C) + ) + => DynFlags -> TransferFun (CmmLive r) +xferLive dflags (BlockCC eNode middle xNode) fBase = + let joined = gen_kill dflags xNode $! joinOutFacts liveLattice xNode fBase + !result = foldNodesBwdOO (gen_kill dflags) middle joined + in mapSingleton (entryLabel eNode) result +{-# SPECIALIZE xferLive :: DynFlags -> TransferFun (CmmLive LocalReg) #-} +{-# SPECIALIZE xferLive :: DynFlags -> TransferFun (CmmLive GlobalReg) #-} diff --git a/compiler/GHC/Cmm/MachOp.hs b/compiler/GHC/Cmm/MachOp.hs new file mode 100644 index 0000000000..234001545c --- /dev/null +++ b/compiler/GHC/Cmm/MachOp.hs @@ -0,0 +1,664 @@ +module GHC.Cmm.MachOp + ( MachOp(..) + , pprMachOp, isCommutableMachOp, isAssociativeMachOp + , isComparisonMachOp, maybeIntComparison, machOpResultType + , machOpArgReps, maybeInvertComparison, isFloatComparison + + -- MachOp builders + , mo_wordAdd, mo_wordSub, mo_wordEq, mo_wordNe,mo_wordMul, mo_wordSQuot + , mo_wordSRem, mo_wordSNeg, mo_wordUQuot, mo_wordURem + , mo_wordSGe, mo_wordSLe, mo_wordSGt, mo_wordSLt, mo_wordUGe + , mo_wordULe, mo_wordUGt, mo_wordULt + , mo_wordAnd, mo_wordOr, mo_wordXor, mo_wordNot + , mo_wordShl, mo_wordSShr, mo_wordUShr + , mo_u_8To32, mo_s_8To32, mo_u_16To32, mo_s_16To32 + , mo_u_8ToWord, mo_s_8ToWord, mo_u_16ToWord, mo_s_16ToWord + , mo_u_32ToWord, mo_s_32ToWord + , mo_32To8, mo_32To16, mo_WordTo8, mo_WordTo16, mo_WordTo32, mo_WordTo64 + + -- CallishMachOp + , CallishMachOp(..), callishMachOpHints + , pprCallishMachOp + , machOpMemcpyishAlign + + -- Atomic read-modify-write + , AtomicMachOp(..) + ) +where + +import GhcPrelude + +import GHC.Cmm.Type +import Outputable +import DynFlags + +----------------------------------------------------------------------------- +-- MachOp +----------------------------------------------------------------------------- + +{- | +Machine-level primops; ones which we can reasonably delegate to the +native code generators to handle. + +Most operations are parameterised by the 'Width' that they operate on. +Some operations have separate signed and unsigned versions, and float +and integer versions. +-} + +data MachOp + -- Integer operations (insensitive to signed/unsigned) + = MO_Add Width + | MO_Sub Width + | MO_Eq Width + | MO_Ne Width + | MO_Mul Width -- low word of multiply + + -- Signed multiply/divide + | MO_S_MulMayOflo Width -- nonzero if signed multiply overflows + | MO_S_Quot Width -- signed / (same semantics as IntQuotOp) + | MO_S_Rem Width -- signed % (same semantics as IntRemOp) + | MO_S_Neg Width -- unary - + + -- Unsigned multiply/divide + | MO_U_MulMayOflo Width -- nonzero if unsigned multiply overflows + | MO_U_Quot Width -- unsigned / (same semantics as WordQuotOp) + | MO_U_Rem Width -- unsigned % (same semantics as WordRemOp) + + -- Signed comparisons + | MO_S_Ge Width + | MO_S_Le Width + | MO_S_Gt Width + | MO_S_Lt Width + + -- Unsigned comparisons + | MO_U_Ge Width + | MO_U_Le Width + | MO_U_Gt Width + | MO_U_Lt Width + + -- Floating point arithmetic + | MO_F_Add Width + | MO_F_Sub Width + | MO_F_Neg Width -- unary - + | MO_F_Mul Width + | MO_F_Quot Width + + -- Floating point comparison + | MO_F_Eq Width + | MO_F_Ne Width + | MO_F_Ge Width + | MO_F_Le Width + | MO_F_Gt Width + | MO_F_Lt Width + + -- Bitwise operations. Not all of these may be supported + -- at all sizes, and only integral Widths are valid. + | MO_And Width + | MO_Or Width + | MO_Xor Width + | MO_Not Width + | MO_Shl Width + | MO_U_Shr Width -- unsigned shift right + | MO_S_Shr Width -- signed shift right + + -- Conversions. Some of these will be NOPs. + -- Floating-point conversions use the signed variant. + | MO_SF_Conv Width Width -- Signed int -> Float + | MO_FS_Conv Width Width -- Float -> Signed int + | MO_SS_Conv Width Width -- Signed int -> Signed int + | MO_UU_Conv Width Width -- unsigned int -> unsigned int + | MO_XX_Conv Width Width -- int -> int; puts no requirements on the + -- contents of upper bits when extending; + -- narrowing is simply truncation; the only + -- expectation is that we can recover the + -- original value by applying the opposite + -- MO_XX_Conv, e.g., + -- MO_XX_CONV W64 W8 (MO_XX_CONV W8 W64 x) + -- is equivalent to just x. + | MO_FF_Conv Width Width -- Float -> Float + + -- Vector element insertion and extraction operations + | MO_V_Insert Length Width -- Insert scalar into vector + | MO_V_Extract Length Width -- Extract scalar from vector + + -- Integer vector operations + | MO_V_Add Length Width + | MO_V_Sub Length Width + | MO_V_Mul Length Width + + -- Signed vector multiply/divide + | MO_VS_Quot Length Width + | MO_VS_Rem Length Width + | MO_VS_Neg Length Width + + -- Unsigned vector multiply/divide + | MO_VU_Quot Length Width + | MO_VU_Rem Length Width + + -- Floating point vector element insertion and extraction operations + | MO_VF_Insert Length Width -- Insert scalar into vector + | MO_VF_Extract Length Width -- Extract scalar from vector + + -- Floating point vector operations + | MO_VF_Add Length Width + | MO_VF_Sub Length Width + | MO_VF_Neg Length Width -- unary negation + | MO_VF_Mul Length Width + | MO_VF_Quot Length Width + + -- Alignment check (for -falignment-sanitisation) + | MO_AlignmentCheck Int Width + deriving (Eq, Show) + +pprMachOp :: MachOp -> SDoc +pprMachOp mo = text (show mo) + + + +-- ----------------------------------------------------------------------------- +-- Some common MachReps + +-- A 'wordRep' is a machine word on the target architecture +-- Specifically, it is the size of an Int#, Word#, Addr# +-- and the unit of allocation on the stack and the heap +-- Any pointer is also guaranteed to be a wordRep. + +mo_wordAdd, mo_wordSub, mo_wordEq, mo_wordNe,mo_wordMul, mo_wordSQuot + , mo_wordSRem, mo_wordSNeg, mo_wordUQuot, mo_wordURem + , mo_wordSGe, mo_wordSLe, mo_wordSGt, mo_wordSLt, mo_wordUGe + , mo_wordULe, mo_wordUGt, mo_wordULt + , mo_wordAnd, mo_wordOr, mo_wordXor, mo_wordNot, mo_wordShl, mo_wordSShr, mo_wordUShr + , mo_u_8ToWord, mo_s_8ToWord, mo_u_16ToWord, mo_s_16ToWord, mo_u_32ToWord, mo_s_32ToWord + , mo_WordTo8, mo_WordTo16, mo_WordTo32, mo_WordTo64 + :: DynFlags -> MachOp + +mo_u_8To32, mo_s_8To32, mo_u_16To32, mo_s_16To32 + , mo_32To8, mo_32To16 + :: MachOp + +mo_wordAdd dflags = MO_Add (wordWidth dflags) +mo_wordSub dflags = MO_Sub (wordWidth dflags) +mo_wordEq dflags = MO_Eq (wordWidth dflags) +mo_wordNe dflags = MO_Ne (wordWidth dflags) +mo_wordMul dflags = MO_Mul (wordWidth dflags) +mo_wordSQuot dflags = MO_S_Quot (wordWidth dflags) +mo_wordSRem dflags = MO_S_Rem (wordWidth dflags) +mo_wordSNeg dflags = MO_S_Neg (wordWidth dflags) +mo_wordUQuot dflags = MO_U_Quot (wordWidth dflags) +mo_wordURem dflags = MO_U_Rem (wordWidth dflags) + +mo_wordSGe dflags = MO_S_Ge (wordWidth dflags) +mo_wordSLe dflags = MO_S_Le (wordWidth dflags) +mo_wordSGt dflags = MO_S_Gt (wordWidth dflags) +mo_wordSLt dflags = MO_S_Lt (wordWidth dflags) + +mo_wordUGe dflags = MO_U_Ge (wordWidth dflags) +mo_wordULe dflags = MO_U_Le (wordWidth dflags) +mo_wordUGt dflags = MO_U_Gt (wordWidth dflags) +mo_wordULt dflags = MO_U_Lt (wordWidth dflags) + +mo_wordAnd dflags = MO_And (wordWidth dflags) +mo_wordOr dflags = MO_Or (wordWidth dflags) +mo_wordXor dflags = MO_Xor (wordWidth dflags) +mo_wordNot dflags = MO_Not (wordWidth dflags) +mo_wordShl dflags = MO_Shl (wordWidth dflags) +mo_wordSShr dflags = MO_S_Shr (wordWidth dflags) +mo_wordUShr dflags = MO_U_Shr (wordWidth dflags) + +mo_u_8To32 = MO_UU_Conv W8 W32 +mo_s_8To32 = MO_SS_Conv W8 W32 +mo_u_16To32 = MO_UU_Conv W16 W32 +mo_s_16To32 = MO_SS_Conv W16 W32 + +mo_u_8ToWord dflags = MO_UU_Conv W8 (wordWidth dflags) +mo_s_8ToWord dflags = MO_SS_Conv W8 (wordWidth dflags) +mo_u_16ToWord dflags = MO_UU_Conv W16 (wordWidth dflags) +mo_s_16ToWord dflags = MO_SS_Conv W16 (wordWidth dflags) +mo_s_32ToWord dflags = MO_SS_Conv W32 (wordWidth dflags) +mo_u_32ToWord dflags = MO_UU_Conv W32 (wordWidth dflags) + +mo_WordTo8 dflags = MO_UU_Conv (wordWidth dflags) W8 +mo_WordTo16 dflags = MO_UU_Conv (wordWidth dflags) W16 +mo_WordTo32 dflags = MO_UU_Conv (wordWidth dflags) W32 +mo_WordTo64 dflags = MO_UU_Conv (wordWidth dflags) W64 + +mo_32To8 = MO_UU_Conv W32 W8 +mo_32To16 = MO_UU_Conv W32 W16 + + +-- ---------------------------------------------------------------------------- +-- isCommutableMachOp + +{- | +Returns 'True' if the MachOp has commutable arguments. This is used +in the platform-independent Cmm optimisations. + +If in doubt, return 'False'. This generates worse code on the +native routes, but is otherwise harmless. +-} +isCommutableMachOp :: MachOp -> Bool +isCommutableMachOp mop = + case mop of + MO_Add _ -> True + MO_Eq _ -> True + MO_Ne _ -> True + MO_Mul _ -> True + MO_S_MulMayOflo _ -> True + MO_U_MulMayOflo _ -> True + MO_And _ -> True + MO_Or _ -> True + MO_Xor _ -> True + MO_F_Add _ -> True + MO_F_Mul _ -> True + _other -> False + +-- ---------------------------------------------------------------------------- +-- isAssociativeMachOp + +{- | +Returns 'True' if the MachOp is associative (i.e. @(x+y)+z == x+(y+z)@) +This is used in the platform-independent Cmm optimisations. + +If in doubt, return 'False'. This generates worse code on the +native routes, but is otherwise harmless. +-} +isAssociativeMachOp :: MachOp -> Bool +isAssociativeMachOp mop = + case mop of + MO_Add {} -> True -- NB: does not include + MO_Mul {} -> True -- floatint point! + MO_And {} -> True + MO_Or {} -> True + MO_Xor {} -> True + _other -> False + + +-- ---------------------------------------------------------------------------- +-- isComparisonMachOp + +{- | +Returns 'True' if the MachOp is a comparison. + +If in doubt, return False. This generates worse code on the +native routes, but is otherwise harmless. +-} +isComparisonMachOp :: MachOp -> Bool +isComparisonMachOp mop = + case mop of + MO_Eq _ -> True + MO_Ne _ -> True + MO_S_Ge _ -> True + MO_S_Le _ -> True + MO_S_Gt _ -> True + MO_S_Lt _ -> True + MO_U_Ge _ -> True + MO_U_Le _ -> True + MO_U_Gt _ -> True + MO_U_Lt _ -> True + MO_F_Eq {} -> True + MO_F_Ne {} -> True + MO_F_Ge {} -> True + MO_F_Le {} -> True + MO_F_Gt {} -> True + MO_F_Lt {} -> True + _other -> False + +{- | +Returns @Just w@ if the operation is an integer comparison with width +@w@, or @Nothing@ otherwise. +-} +maybeIntComparison :: MachOp -> Maybe Width +maybeIntComparison mop = + case mop of + MO_Eq w -> Just w + MO_Ne w -> Just w + MO_S_Ge w -> Just w + MO_S_Le w -> Just w + MO_S_Gt w -> Just w + MO_S_Lt w -> Just w + MO_U_Ge w -> Just w + MO_U_Le w -> Just w + MO_U_Gt w -> Just w + MO_U_Lt w -> Just w + _ -> Nothing + +isFloatComparison :: MachOp -> Bool +isFloatComparison mop = + case mop of + MO_F_Eq {} -> True + MO_F_Ne {} -> True + MO_F_Ge {} -> True + MO_F_Le {} -> True + MO_F_Gt {} -> True + MO_F_Lt {} -> True + _other -> False + +-- ----------------------------------------------------------------------------- +-- Inverting conditions + +-- Sometimes it's useful to be able to invert the sense of a +-- condition. Not all conditional tests are invertible: in +-- particular, floating point conditionals cannot be inverted, because +-- there exist floating-point values which return False for both senses +-- of a condition (eg. !(NaN > NaN) && !(NaN /<= NaN)). + +maybeInvertComparison :: MachOp -> Maybe MachOp +maybeInvertComparison op + = case op of -- None of these Just cases include floating point + MO_Eq r -> Just (MO_Ne r) + MO_Ne r -> Just (MO_Eq r) + MO_U_Lt r -> Just (MO_U_Ge r) + MO_U_Gt r -> Just (MO_U_Le r) + MO_U_Le r -> Just (MO_U_Gt r) + MO_U_Ge r -> Just (MO_U_Lt r) + MO_S_Lt r -> Just (MO_S_Ge r) + MO_S_Gt r -> Just (MO_S_Le r) + MO_S_Le r -> Just (MO_S_Gt r) + MO_S_Ge r -> Just (MO_S_Lt r) + _other -> Nothing + +-- ---------------------------------------------------------------------------- +-- machOpResultType + +{- | +Returns the MachRep of the result of a MachOp. +-} +machOpResultType :: DynFlags -> MachOp -> [CmmType] -> CmmType +machOpResultType dflags mop tys = + case mop of + MO_Add {} -> ty1 -- Preserve GC-ptr-hood + MO_Sub {} -> ty1 -- of first arg + MO_Mul r -> cmmBits r + MO_S_MulMayOflo r -> cmmBits r + MO_S_Quot r -> cmmBits r + MO_S_Rem r -> cmmBits r + MO_S_Neg r -> cmmBits r + MO_U_MulMayOflo r -> cmmBits r + MO_U_Quot r -> cmmBits r + MO_U_Rem r -> cmmBits r + + MO_Eq {} -> comparisonResultRep dflags + MO_Ne {} -> comparisonResultRep dflags + MO_S_Ge {} -> comparisonResultRep dflags + MO_S_Le {} -> comparisonResultRep dflags + MO_S_Gt {} -> comparisonResultRep dflags + MO_S_Lt {} -> comparisonResultRep dflags + + MO_U_Ge {} -> comparisonResultRep dflags + MO_U_Le {} -> comparisonResultRep dflags + MO_U_Gt {} -> comparisonResultRep dflags + MO_U_Lt {} -> comparisonResultRep dflags + + MO_F_Add r -> cmmFloat r + MO_F_Sub r -> cmmFloat r + MO_F_Mul r -> cmmFloat r + MO_F_Quot r -> cmmFloat r + MO_F_Neg r -> cmmFloat r + MO_F_Eq {} -> comparisonResultRep dflags + MO_F_Ne {} -> comparisonResultRep dflags + MO_F_Ge {} -> comparisonResultRep dflags + MO_F_Le {} -> comparisonResultRep dflags + MO_F_Gt {} -> comparisonResultRep dflags + MO_F_Lt {} -> comparisonResultRep dflags + + MO_And {} -> ty1 -- Used for pointer masking + MO_Or {} -> ty1 + MO_Xor {} -> ty1 + MO_Not r -> cmmBits r + MO_Shl r -> cmmBits r + MO_U_Shr r -> cmmBits r + MO_S_Shr r -> cmmBits r + + MO_SS_Conv _ to -> cmmBits to + MO_UU_Conv _ to -> cmmBits to + MO_XX_Conv _ to -> cmmBits to + MO_FS_Conv _ to -> cmmBits to + MO_SF_Conv _ to -> cmmFloat to + MO_FF_Conv _ to -> cmmFloat to + + MO_V_Insert l w -> cmmVec l (cmmBits w) + MO_V_Extract _ w -> cmmBits w + + MO_V_Add l w -> cmmVec l (cmmBits w) + MO_V_Sub l w -> cmmVec l (cmmBits w) + MO_V_Mul l w -> cmmVec l (cmmBits w) + + MO_VS_Quot l w -> cmmVec l (cmmBits w) + MO_VS_Rem l w -> cmmVec l (cmmBits w) + MO_VS_Neg l w -> cmmVec l (cmmBits w) + + MO_VU_Quot l w -> cmmVec l (cmmBits w) + MO_VU_Rem l w -> cmmVec l (cmmBits w) + + MO_VF_Insert l w -> cmmVec l (cmmFloat w) + MO_VF_Extract _ w -> cmmFloat w + + MO_VF_Add l w -> cmmVec l (cmmFloat w) + MO_VF_Sub l w -> cmmVec l (cmmFloat w) + MO_VF_Mul l w -> cmmVec l (cmmFloat w) + MO_VF_Quot l w -> cmmVec l (cmmFloat w) + MO_VF_Neg l w -> cmmVec l (cmmFloat w) + + MO_AlignmentCheck _ _ -> ty1 + where + (ty1:_) = tys + +comparisonResultRep :: DynFlags -> CmmType +comparisonResultRep = bWord -- is it? + + +-- ----------------------------------------------------------------------------- +-- machOpArgReps + +-- | This function is used for debugging only: we can check whether an +-- application of a MachOp is "type-correct" by checking that the MachReps of +-- its arguments are the same as the MachOp expects. This is used when +-- linting a CmmExpr. + +machOpArgReps :: DynFlags -> MachOp -> [Width] +machOpArgReps dflags op = + case op of + MO_Add r -> [r,r] + MO_Sub r -> [r,r] + MO_Eq r -> [r,r] + MO_Ne r -> [r,r] + MO_Mul r -> [r,r] + MO_S_MulMayOflo r -> [r,r] + MO_S_Quot r -> [r,r] + MO_S_Rem r -> [r,r] + MO_S_Neg r -> [r] + MO_U_MulMayOflo r -> [r,r] + MO_U_Quot r -> [r,r] + MO_U_Rem r -> [r,r] + + MO_S_Ge r -> [r,r] + MO_S_Le r -> [r,r] + MO_S_Gt r -> [r,r] + MO_S_Lt r -> [r,r] + + MO_U_Ge r -> [r,r] + MO_U_Le r -> [r,r] + MO_U_Gt r -> [r,r] + MO_U_Lt r -> [r,r] + + MO_F_Add r -> [r,r] + MO_F_Sub r -> [r,r] + MO_F_Mul r -> [r,r] + MO_F_Quot r -> [r,r] + MO_F_Neg r -> [r] + MO_F_Eq r -> [r,r] + MO_F_Ne r -> [r,r] + MO_F_Ge r -> [r,r] + MO_F_Le r -> [r,r] + MO_F_Gt r -> [r,r] + MO_F_Lt r -> [r,r] + + MO_And r -> [r,r] + MO_Or r -> [r,r] + MO_Xor r -> [r,r] + MO_Not r -> [r] + MO_Shl r -> [r, wordWidth dflags] + MO_U_Shr r -> [r, wordWidth dflags] + MO_S_Shr r -> [r, wordWidth dflags] + + MO_SS_Conv from _ -> [from] + MO_UU_Conv from _ -> [from] + MO_XX_Conv from _ -> [from] + MO_SF_Conv from _ -> [from] + MO_FS_Conv from _ -> [from] + MO_FF_Conv from _ -> [from] + + MO_V_Insert l r -> [typeWidth (vec l (cmmBits r)),r,wordWidth dflags] + MO_V_Extract l r -> [typeWidth (vec l (cmmBits r)),wordWidth dflags] + + MO_V_Add _ r -> [r,r] + MO_V_Sub _ r -> [r,r] + MO_V_Mul _ r -> [r,r] + + MO_VS_Quot _ r -> [r,r] + MO_VS_Rem _ r -> [r,r] + MO_VS_Neg _ r -> [r] + + MO_VU_Quot _ r -> [r,r] + MO_VU_Rem _ r -> [r,r] + + MO_VF_Insert l r -> [typeWidth (vec l (cmmFloat r)),r,wordWidth dflags] + MO_VF_Extract l r -> [typeWidth (vec l (cmmFloat r)),wordWidth dflags] + + MO_VF_Add _ r -> [r,r] + MO_VF_Sub _ r -> [r,r] + MO_VF_Mul _ r -> [r,r] + MO_VF_Quot _ r -> [r,r] + MO_VF_Neg _ r -> [r] + + MO_AlignmentCheck _ r -> [r] + +----------------------------------------------------------------------------- +-- CallishMachOp +----------------------------------------------------------------------------- + +-- CallishMachOps tend to be implemented by foreign calls in some backends, +-- so we separate them out. In Cmm, these can only occur in a +-- statement position, in contrast to an ordinary MachOp which can occur +-- anywhere in an expression. +data CallishMachOp + = MO_F64_Pwr + | MO_F64_Sin + | MO_F64_Cos + | MO_F64_Tan + | MO_F64_Sinh + | MO_F64_Cosh + | MO_F64_Tanh + | MO_F64_Asin + | MO_F64_Acos + | MO_F64_Atan + | MO_F64_Asinh + | MO_F64_Acosh + | MO_F64_Atanh + | MO_F64_Log + | MO_F64_Log1P + | MO_F64_Exp + | MO_F64_ExpM1 + | MO_F64_Fabs + | MO_F64_Sqrt + | MO_F32_Pwr + | MO_F32_Sin + | MO_F32_Cos + | MO_F32_Tan + | MO_F32_Sinh + | MO_F32_Cosh + | MO_F32_Tanh + | MO_F32_Asin + | MO_F32_Acos + | MO_F32_Atan + | MO_F32_Asinh + | MO_F32_Acosh + | MO_F32_Atanh + | MO_F32_Log + | MO_F32_Log1P + | MO_F32_Exp + | MO_F32_ExpM1 + | MO_F32_Fabs + | MO_F32_Sqrt + + | MO_UF_Conv Width + + | MO_S_Mul2 Width + | MO_S_QuotRem Width + | MO_U_QuotRem Width + | MO_U_QuotRem2 Width + | MO_Add2 Width + | MO_AddWordC Width + | MO_SubWordC Width + | MO_AddIntC Width + | MO_SubIntC Width + | MO_U_Mul2 Width + + | MO_ReadBarrier + | MO_WriteBarrier + | MO_Touch -- Keep variables live (when using interior pointers) + + -- Prefetch + | MO_Prefetch_Data Int -- Prefetch hint. May change program performance but not + -- program behavior. + -- the Int can be 0-3. Needs to be known at compile time + -- to interact with code generation correctly. + -- TODO: add support for prefetch WRITES, + -- currently only exposes prefetch reads, which + -- would the majority of use cases in ghc anyways + + + -- These three MachOps are parameterised by the known alignment + -- of the destination and source (for memcpy/memmove) pointers. + -- This information may be used for optimisation in backends. + | MO_Memcpy Int + | MO_Memset Int + | MO_Memmove Int + | MO_Memcmp Int + + | MO_PopCnt Width + | MO_Pdep Width + | MO_Pext Width + | MO_Clz Width + | MO_Ctz Width + + | MO_BSwap Width + | MO_BRev Width + + -- Atomic read-modify-write. + | MO_AtomicRMW Width AtomicMachOp + | MO_AtomicRead Width + | MO_AtomicWrite Width + | MO_Cmpxchg Width + deriving (Eq, Show) + +-- | The operation to perform atomically. +data AtomicMachOp = + AMO_Add + | AMO_Sub + | AMO_And + | AMO_Nand + | AMO_Or + | AMO_Xor + deriving (Eq, Show) + +pprCallishMachOp :: CallishMachOp -> SDoc +pprCallishMachOp mo = text (show mo) + +callishMachOpHints :: CallishMachOp -> ([ForeignHint], [ForeignHint]) +callishMachOpHints op = case op of + MO_Memcpy _ -> ([], [AddrHint,AddrHint,NoHint]) + MO_Memset _ -> ([], [AddrHint,NoHint,NoHint]) + MO_Memmove _ -> ([], [AddrHint,AddrHint,NoHint]) + MO_Memcmp _ -> ([], [AddrHint, AddrHint, NoHint]) + _ -> ([],[]) + -- empty lists indicate NoHint + +-- | The alignment of a 'memcpy'-ish operation. +machOpMemcpyishAlign :: CallishMachOp -> Maybe Int +machOpMemcpyishAlign op = case op of + MO_Memcpy align -> Just align + MO_Memset align -> Just align + MO_Memmove align -> Just align + MO_Memcmp align -> Just align + _ -> Nothing diff --git a/compiler/GHC/Cmm/Monad.hs b/compiler/GHC/Cmm/Monad.hs new file mode 100644 index 0000000000..6b8d00a118 --- /dev/null +++ b/compiler/GHC/Cmm/Monad.hs @@ -0,0 +1,59 @@ +{-# LANGUAGE CPP #-} + +----------------------------------------------------------------------------- +-- A Parser monad with access to the 'DynFlags'. +-- +-- The 'P' monad only has access to the subset of of 'DynFlags' +-- required for parsing Haskell. + +-- The parser for C-- requires access to a lot more of the 'DynFlags', +-- so 'PD' provides access to 'DynFlags' via a 'HasDynFlags' instance. +----------------------------------------------------------------------------- +module GHC.Cmm.Monad ( + PD(..) + , liftP + ) where + +import GhcPrelude + +import Control.Monad +import qualified Control.Monad.Fail as MonadFail + +import DynFlags +import Lexer + +newtype PD a = PD { unPD :: DynFlags -> PState -> ParseResult a } + +instance Functor PD where + fmap = liftM + +instance Applicative PD where + pure = returnPD + (<*>) = ap + +instance Monad PD where + (>>=) = thenPD +#if !MIN_VERSION_base(4,13,0) + fail = MonadFail.fail +#endif + +instance MonadFail.MonadFail PD where + fail = failPD + +liftP :: P a -> PD a +liftP (P f) = PD $ \_ s -> f s + +returnPD :: a -> PD a +returnPD = liftP . return + +thenPD :: PD a -> (a -> PD b) -> PD b +(PD m) `thenPD` k = PD $ \d s -> + case m d s of + POk s1 a -> unPD (k a) d s1 + PFailed s1 -> PFailed s1 + +failPD :: String -> PD a +failPD = liftP . fail + +instance HasDynFlags PD where + getDynFlags = PD $ \d s -> POk s d diff --git a/compiler/GHC/Cmm/Node.hs b/compiler/GHC/Cmm/Node.hs new file mode 100644 index 0000000000..bb74647910 --- /dev/null +++ b/compiler/GHC/Cmm/Node.hs @@ -0,0 +1,724 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE ExplicitForAll #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE StandaloneDeriving #-} +{-# LANGUAGE UndecidableInstances #-} +{-# LANGUAGE ScopedTypeVariables #-} + + +-- CmmNode type for representation using Hoopl graphs. + +module GHC.Cmm.Node ( + CmmNode(..), CmmFormal, CmmActual, CmmTickish, + UpdFrameOffset, Convention(..), + ForeignConvention(..), ForeignTarget(..), foreignTargetHints, + CmmReturnInfo(..), + mapExp, mapExpDeep, wrapRecExp, foldExp, foldExpDeep, wrapRecExpf, + mapExpM, mapExpDeepM, wrapRecExpM, mapSuccessors, mapCollectSuccessors, + + -- * Tick scopes + CmmTickScope(..), isTickSubScope, combineTickScopes, + ) where + +import GhcPrelude hiding (succ) + +import GHC.Platform.Regs +import GHC.Cmm.Expr +import GHC.Cmm.Switch +import DynFlags +import FastString +import ForeignCall +import Outputable +import GHC.Runtime.Layout +import CoreSyn (Tickish) +import qualified Unique as U + +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.Dataflow.Graph +import GHC.Cmm.Dataflow.Collections +import GHC.Cmm.Dataflow.Label +import Data.Maybe +import Data.List (tails,sortBy) +import Unique (nonDetCmpUnique) +import Util + + +------------------------ +-- CmmNode + +#define ULabel {-# UNPACK #-} !Label + +data CmmNode e x where + CmmEntry :: ULabel -> CmmTickScope -> CmmNode C O + + CmmComment :: FastString -> CmmNode O O + + -- Tick annotation, covering Cmm code in our tick scope. We only + -- expect non-code @Tickish@ at this point (e.g. @SourceNote@). + -- See Note [CmmTick scoping details] + CmmTick :: !CmmTickish -> CmmNode O O + + -- Unwind pseudo-instruction, encoding stack unwinding + -- instructions for a debugger. This describes how to reconstruct + -- the "old" value of a register if we want to navigate the stack + -- up one frame. Having unwind information for @Sp@ will allow the + -- debugger to "walk" the stack. + -- + -- See Note [What is this unwinding business?] in Debug + CmmUnwind :: [(GlobalReg, Maybe CmmExpr)] -> CmmNode O O + + CmmAssign :: !CmmReg -> !CmmExpr -> CmmNode O O + -- Assign to register + + CmmStore :: !CmmExpr -> !CmmExpr -> CmmNode O O + -- Assign to memory location. Size is + -- given by cmmExprType of the rhs. + + CmmUnsafeForeignCall :: -- An unsafe foreign call; + -- see Note [Foreign calls] + -- Like a "fat machine instruction"; can occur + -- in the middle of a block + ForeignTarget -> -- call target + [CmmFormal] -> -- zero or more results + [CmmActual] -> -- zero or more arguments + CmmNode O O + -- Semantics: clobbers any GlobalRegs for which callerSaves r == True + -- See Note [Unsafe foreign calls clobber caller-save registers] + -- + -- Invariant: the arguments and the ForeignTarget must not + -- mention any registers for which GHC.Platform.callerSaves + -- is True. See Note [Register Parameter Passing]. + + CmmBranch :: ULabel -> CmmNode O C + -- Goto another block in the same procedure + + CmmCondBranch :: { -- conditional branch + cml_pred :: CmmExpr, + cml_true, cml_false :: ULabel, + cml_likely :: Maybe Bool -- likely result of the conditional, + -- if known + } -> CmmNode O C + + CmmSwitch + :: CmmExpr -- Scrutinee, of some integral type + -> SwitchTargets -- Cases. See [Note SwitchTargets] + -> CmmNode O C + + CmmCall :: { -- A native call or tail call + cml_target :: CmmExpr, -- never a CmmPrim to a CallishMachOp! + + cml_cont :: Maybe Label, + -- Label of continuation (Nothing for return or tail call) + -- + -- Note [Continuation BlockIds]: these BlockIds are called + -- Continuation BlockIds, and are the only BlockIds that can + -- occur in CmmExprs, namely as (CmmLit (CmmBlock b)) or + -- (CmmStackSlot (Young b) _). + + cml_args_regs :: [GlobalReg], + -- The argument GlobalRegs (Rx, Fx, Dx, Lx) that are passed + -- to the call. This is essential information for the + -- native code generator's register allocator; without + -- knowing which GlobalRegs are live it has to assume that + -- they are all live. This list should only include + -- GlobalRegs that are mapped to real machine registers on + -- the target platform. + + cml_args :: ByteOff, + -- Byte offset, from the *old* end of the Area associated with + -- the Label (if cml_cont = Nothing, then Old area), of + -- youngest outgoing arg. Set the stack pointer to this before + -- transferring control. + -- (NB: an update frame might also have been stored in the Old + -- area, but it'll be in an older part than the args.) + + cml_ret_args :: ByteOff, + -- For calls *only*, the byte offset for youngest returned value + -- This is really needed at the *return* point rather than here + -- at the call, but in practice it's convenient to record it here. + + cml_ret_off :: ByteOff + -- For calls *only*, the byte offset of the base of the frame that + -- must be described by the info table for the return point. + -- The older words are an update frames, which have their own + -- info-table and layout information + + -- From a liveness point of view, the stack words older than + -- cml_ret_off are treated as live, even if the sequel of + -- the call goes into a loop. + } -> CmmNode O C + + CmmForeignCall :: { -- A safe foreign call; see Note [Foreign calls] + -- Always the last node of a block + tgt :: ForeignTarget, -- call target and convention + res :: [CmmFormal], -- zero or more results + args :: [CmmActual], -- zero or more arguments; see Note [Register parameter passing] + succ :: ULabel, -- Label of continuation + ret_args :: ByteOff, -- same as cml_ret_args + ret_off :: ByteOff, -- same as cml_ret_off + intrbl:: Bool -- whether or not the call is interruptible + } -> CmmNode O C + +{- Note [Foreign calls] +~~~~~~~~~~~~~~~~~~~~~~~ +A CmmUnsafeForeignCall is used for *unsafe* foreign calls; +a CmmForeignCall call is used for *safe* foreign calls. + +Unsafe ones are mostly easy: think of them as a "fat machine +instruction". In particular, they do *not* kill all live registers, +just the registers they return to (there was a bit of code in GHC that +conservatively assumed otherwise.) However, see [Register parameter passing]. + +Safe ones are trickier. A safe foreign call + r = f(x) +ultimately expands to + push "return address" -- Never used to return to; + -- just points an info table + save registers into TSO + call suspendThread + r = f(x) -- Make the call + call resumeThread + restore registers + pop "return address" +We cannot "lower" a safe foreign call to this sequence of Cmms, because +after we've saved Sp all the Cmm optimiser's assumptions are broken. + +Note that a safe foreign call needs an info table. + +So Safe Foreign Calls must remain as last nodes until the stack is +made manifest in GHC.Cmm.LayoutStack, where they are lowered into the above +sequence. +-} + +{- Note [Unsafe foreign calls clobber caller-save registers] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A foreign call is defined to clobber any GlobalRegs that are mapped to +caller-saves machine registers (according to the prevailing C ABI). +GHC.StgToCmm.Utils.callerSaves tells you which GlobalRegs are caller-saves. + +This is a design choice that makes it easier to generate code later. +We could instead choose to say that foreign calls do *not* clobber +caller-saves regs, but then we would have to figure out which regs +were live across the call later and insert some saves/restores. + +Furthermore when we generate code we never have any GlobalRegs live +across a call, because they are always copied-in to LocalRegs and +copied-out again before making a call/jump. So all we have to do is +avoid any code motion that would make a caller-saves GlobalReg live +across a foreign call during subsequent optimisations. +-} + +{- Note [Register parameter passing] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +On certain architectures, some registers are utilized for parameter +passing in the C calling convention. For example, in x86-64 Linux +convention, rdi, rsi, rdx and rcx (as well as r8 and r9) may be used for +argument passing. These are registers R3-R6, which our generated +code may also be using; as a result, it's necessary to save these +values before doing a foreign call. This is done during initial +code generation in callerSaveVolatileRegs in GHC.StgToCmm.Utils. However, +one result of doing this is that the contents of these registers +may mysteriously change if referenced inside the arguments. This +is dangerous, so you'll need to disable inlining much in the same +way is done in GHC.Cmm.Opt currently. We should fix this! +-} + +--------------------------------------------- +-- Eq instance of CmmNode + +deriving instance Eq (CmmNode e x) + +---------------------------------------------- +-- Hoopl instances of CmmNode + +instance NonLocal CmmNode where + entryLabel (CmmEntry l _) = l + + successors (CmmBranch l) = [l] + successors (CmmCondBranch {cml_true=t, cml_false=f}) = [f, t] -- meets layout constraint + successors (CmmSwitch _ ids) = switchTargetsToList ids + successors (CmmCall {cml_cont=l}) = maybeToList l + successors (CmmForeignCall {succ=l}) = [l] + + +-------------------------------------------------- +-- Various helper types + +type CmmActual = CmmExpr +type CmmFormal = LocalReg + +type UpdFrameOffset = ByteOff + +-- | A convention maps a list of values (function arguments or return +-- values) to registers or stack locations. +data Convention + = NativeDirectCall + -- ^ top-level Haskell functions use @NativeDirectCall@, which + -- maps arguments to registers starting with R2, according to + -- how many registers are available on the platform. This + -- convention ignores R1, because for a top-level function call + -- the function closure is implicit, and doesn't need to be passed. + | NativeNodeCall + -- ^ non-top-level Haskell functions, which pass the address of + -- the function closure in R1 (regardless of whether R1 is a + -- real register or not), and the rest of the arguments in + -- registers or on the stack. + | NativeReturn + -- ^ a native return. The convention for returns depends on + -- how many values are returned: for just one value returned, + -- the appropriate register is used (R1, F1, etc.). regardless + -- of whether it is a real register or not. For multiple + -- values returned, they are mapped to registers or the stack. + | Slow + -- ^ Slow entry points: all args pushed on the stack + | GC + -- ^ Entry to the garbage collector: uses the node reg! + -- (TODO: I don't think we need this --SDM) + deriving( Eq ) + +data ForeignConvention + = ForeignConvention + CCallConv -- Which foreign-call convention + [ForeignHint] -- Extra info about the args + [ForeignHint] -- Extra info about the result + CmmReturnInfo + deriving Eq + +data CmmReturnInfo + = CmmMayReturn + | CmmNeverReturns + deriving ( Eq ) + +data ForeignTarget -- The target of a foreign call + = ForeignTarget -- A foreign procedure + CmmExpr -- Its address + ForeignConvention -- Its calling convention + | PrimTarget -- A possibly-side-effecting machine operation + CallishMachOp -- Which one + deriving Eq + +foreignTargetHints :: ForeignTarget -> ([ForeignHint], [ForeignHint]) +foreignTargetHints target + = ( res_hints ++ repeat NoHint + , arg_hints ++ repeat NoHint ) + where + (res_hints, arg_hints) = + case target of + PrimTarget op -> callishMachOpHints op + ForeignTarget _ (ForeignConvention _ arg_hints res_hints _) -> + (res_hints, arg_hints) + +-------------------------------------------------- +-- Instances of register and slot users / definers + +instance UserOfRegs LocalReg (CmmNode e x) where + foldRegsUsed dflags f !z n = case n of + CmmAssign _ expr -> fold f z expr + CmmStore addr rval -> fold f (fold f z addr) rval + CmmUnsafeForeignCall t _ args -> fold f (fold f z t) args + CmmCondBranch expr _ _ _ -> fold f z expr + CmmSwitch expr _ -> fold f z expr + CmmCall {cml_target=tgt} -> fold f z tgt + CmmForeignCall {tgt=tgt, args=args} -> fold f (fold f z tgt) args + _ -> z + where fold :: forall a b. UserOfRegs LocalReg a + => (b -> LocalReg -> b) -> b -> a -> b + fold f z n = foldRegsUsed dflags f z n + +instance UserOfRegs GlobalReg (CmmNode e x) where + foldRegsUsed dflags f !z n = case n of + CmmAssign _ expr -> fold f z expr + CmmStore addr rval -> fold f (fold f z addr) rval + CmmUnsafeForeignCall t _ args -> fold f (fold f z t) args + CmmCondBranch expr _ _ _ -> fold f z expr + CmmSwitch expr _ -> fold f z expr + CmmCall {cml_target=tgt, cml_args_regs=args} -> fold f (fold f z args) tgt + CmmForeignCall {tgt=tgt, args=args} -> fold f (fold f z tgt) args + _ -> z + where fold :: forall a b. UserOfRegs GlobalReg a + => (b -> GlobalReg -> b) -> b -> a -> b + fold f z n = foldRegsUsed dflags f z n + +instance (Ord r, UserOfRegs r CmmReg) => UserOfRegs r ForeignTarget where + -- The (Ord r) in the context is necessary here + -- See Note [Recursive superclasses] in TcInstDcls + foldRegsUsed _ _ !z (PrimTarget _) = z + foldRegsUsed dflags f !z (ForeignTarget e _) = foldRegsUsed dflags f z e + +instance DefinerOfRegs LocalReg (CmmNode e x) where + foldRegsDefd dflags f !z n = case n of + CmmAssign lhs _ -> fold f z lhs + CmmUnsafeForeignCall _ fs _ -> fold f z fs + CmmForeignCall {res=res} -> fold f z res + _ -> z + where fold :: forall a b. DefinerOfRegs LocalReg a + => (b -> LocalReg -> b) -> b -> a -> b + fold f z n = foldRegsDefd dflags f z n + +instance DefinerOfRegs GlobalReg (CmmNode e x) where + foldRegsDefd dflags f !z n = case n of + CmmAssign lhs _ -> fold f z lhs + CmmUnsafeForeignCall tgt _ _ -> fold f z (foreignTargetRegs tgt) + CmmCall {} -> fold f z activeRegs + CmmForeignCall {} -> fold f z activeRegs + -- See Note [Safe foreign calls clobber STG registers] + _ -> z + where fold :: forall a b. DefinerOfRegs GlobalReg a + => (b -> GlobalReg -> b) -> b -> a -> b + fold f z n = foldRegsDefd dflags f z n + + platform = targetPlatform dflags + activeRegs = activeStgRegs platform + activeCallerSavesRegs = filter (callerSaves platform) activeRegs + + foreignTargetRegs (ForeignTarget _ (ForeignConvention _ _ _ CmmNeverReturns)) = [] + foreignTargetRegs _ = activeCallerSavesRegs + +-- Note [Safe foreign calls clobber STG registers] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- During stack layout phase every safe foreign call is expanded into a block +-- that contains unsafe foreign call (instead of safe foreign call) and ends +-- with a normal call (See Note [Foreign calls]). This means that we must +-- treat safe foreign call as if it was a normal call (because eventually it +-- will be). This is important if we try to run sinking pass before stack +-- layout phase. Consider this example of what might go wrong (this is cmm +-- code from stablename001 test). Here is code after common block elimination +-- (before stack layout): +-- +-- c1q6: +-- _s1pf::P64 = R1; +-- _c1q8::I64 = performMajorGC; +-- I64[(young<c1q9> + 8)] = c1q9; +-- foreign call "ccall" arg hints: [] result hints: [] (_c1q8::I64)(...) +-- returns to c1q9 args: ([]) ress: ([])ret_args: 8ret_off: 8; +-- c1q9: +-- I64[(young<c1qb> + 8)] = c1qb; +-- R1 = _s1pc::P64; +-- call stg_makeStableName#(R1) returns to c1qb, args: 8, res: 8, upd: 8; +-- +-- If we run sinking pass now (still before stack layout) we will get this: +-- +-- c1q6: +-- I64[(young<c1q9> + 8)] = c1q9; +-- foreign call "ccall" arg hints: [] result hints: [] performMajorGC(...) +-- returns to c1q9 args: ([]) ress: ([])ret_args: 8ret_off: 8; +-- c1q9: +-- I64[(young<c1qb> + 8)] = c1qb; +-- _s1pf::P64 = R1; <------ _s1pf sunk past safe foreign call +-- R1 = _s1pc::P64; +-- call stg_makeStableName#(R1) returns to c1qb, args: 8, res: 8, upd: 8; +-- +-- Notice that _s1pf was sunk past a foreign call. When we run stack layout +-- safe call to performMajorGC will be turned into: +-- +-- c1q6: +-- _s1pc::P64 = P64[Sp + 8]; +-- I64[Sp - 8] = c1q9; +-- Sp = Sp - 8; +-- I64[I64[CurrentTSO + 24] + 16] = Sp; +-- P64[CurrentNursery + 8] = Hp + 8; +-- (_u1qI::I64) = call "ccall" arg hints: [PtrHint,] +-- result hints: [PtrHint] suspendThread(BaseReg, 0); +-- call "ccall" arg hints: [] result hints: [] performMajorGC(); +-- (_u1qJ::I64) = call "ccall" arg hints: [PtrHint] +-- result hints: [PtrHint] resumeThread(_u1qI::I64); +-- BaseReg = _u1qJ::I64; +-- _u1qK::P64 = CurrentTSO; +-- _u1qL::P64 = I64[_u1qK::P64 + 24]; +-- Sp = I64[_u1qL::P64 + 16]; +-- SpLim = _u1qL::P64 + 192; +-- HpAlloc = 0; +-- Hp = I64[CurrentNursery + 8] - 8; +-- HpLim = I64[CurrentNursery] + (%MO_SS_Conv_W32_W64(I32[CurrentNursery + 48]) * 4096 - 1); +-- call (I64[Sp])() returns to c1q9, args: 8, res: 8, upd: 8; +-- c1q9: +-- I64[(young<c1qb> + 8)] = c1qb; +-- _s1pf::P64 = R1; <------ INCORRECT! +-- R1 = _s1pc::P64; +-- call stg_makeStableName#(R1) returns to c1qb, args: 8, res: 8, upd: 8; +-- +-- Notice that c1q6 now ends with a call. Sinking _s1pf::P64 = R1 past that +-- call is clearly incorrect. This is what would happen if we assumed that +-- safe foreign call has the same semantics as unsafe foreign call. To prevent +-- this we need to treat safe foreign call as if was normal call. + +----------------------------------- +-- mapping Expr in GHC.Cmm.Node + +mapForeignTarget :: (CmmExpr -> CmmExpr) -> ForeignTarget -> ForeignTarget +mapForeignTarget exp (ForeignTarget e c) = ForeignTarget (exp e) c +mapForeignTarget _ m@(PrimTarget _) = m + +wrapRecExp :: (CmmExpr -> CmmExpr) -> CmmExpr -> CmmExpr +-- Take a transformer on expressions and apply it recursively. +-- (wrapRecExp f e) first recursively applies itself to sub-expressions of e +-- then uses f to rewrite the resulting expression +wrapRecExp f (CmmMachOp op es) = f (CmmMachOp op $ map (wrapRecExp f) es) +wrapRecExp f (CmmLoad addr ty) = f (CmmLoad (wrapRecExp f addr) ty) +wrapRecExp f e = f e + +mapExp :: (CmmExpr -> CmmExpr) -> CmmNode e x -> CmmNode e x +mapExp _ f@(CmmEntry{}) = f +mapExp _ m@(CmmComment _) = m +mapExp _ m@(CmmTick _) = m +mapExp f (CmmUnwind regs) = CmmUnwind (map (fmap (fmap f)) regs) +mapExp f (CmmAssign r e) = CmmAssign r (f e) +mapExp f (CmmStore addr e) = CmmStore (f addr) (f e) +mapExp f (CmmUnsafeForeignCall tgt fs as) = CmmUnsafeForeignCall (mapForeignTarget f tgt) fs (map f as) +mapExp _ l@(CmmBranch _) = l +mapExp f (CmmCondBranch e ti fi l) = CmmCondBranch (f e) ti fi l +mapExp f (CmmSwitch e ids) = CmmSwitch (f e) ids +mapExp f n@CmmCall {cml_target=tgt} = n{cml_target = f tgt} +mapExp f (CmmForeignCall tgt fs as succ ret_args updfr intrbl) = CmmForeignCall (mapForeignTarget f tgt) fs (map f as) succ ret_args updfr intrbl + +mapExpDeep :: (CmmExpr -> CmmExpr) -> CmmNode e x -> CmmNode e x +mapExpDeep f = mapExp $ wrapRecExp f + +------------------------------------------------------------------------ +-- mapping Expr in GHC.Cmm.Node, but not performing allocation if no changes + +mapForeignTargetM :: (CmmExpr -> Maybe CmmExpr) -> ForeignTarget -> Maybe ForeignTarget +mapForeignTargetM f (ForeignTarget e c) = (\x -> ForeignTarget x c) `fmap` f e +mapForeignTargetM _ (PrimTarget _) = Nothing + +wrapRecExpM :: (CmmExpr -> Maybe CmmExpr) -> (CmmExpr -> Maybe CmmExpr) +-- (wrapRecExpM f e) first recursively applies itself to sub-expressions of e +-- then gives f a chance to rewrite the resulting expression +wrapRecExpM f n@(CmmMachOp op es) = maybe (f n) (f . CmmMachOp op) (mapListM (wrapRecExpM f) es) +wrapRecExpM f n@(CmmLoad addr ty) = maybe (f n) (f . flip CmmLoad ty) (wrapRecExpM f addr) +wrapRecExpM f e = f e + +mapExpM :: (CmmExpr -> Maybe CmmExpr) -> CmmNode e x -> Maybe (CmmNode e x) +mapExpM _ (CmmEntry{}) = Nothing +mapExpM _ (CmmComment _) = Nothing +mapExpM _ (CmmTick _) = Nothing +mapExpM f (CmmUnwind regs) = CmmUnwind `fmap` mapM (\(r,e) -> mapM f e >>= \e' -> pure (r,e')) regs +mapExpM f (CmmAssign r e) = CmmAssign r `fmap` f e +mapExpM f (CmmStore addr e) = (\[addr', e'] -> CmmStore addr' e') `fmap` mapListM f [addr, e] +mapExpM _ (CmmBranch _) = Nothing +mapExpM f (CmmCondBranch e ti fi l) = (\x -> CmmCondBranch x ti fi l) `fmap` f e +mapExpM f (CmmSwitch e tbl) = (\x -> CmmSwitch x tbl) `fmap` f e +mapExpM f (CmmCall tgt mb_id r o i s) = (\x -> CmmCall x mb_id r o i s) `fmap` f tgt +mapExpM f (CmmUnsafeForeignCall tgt fs as) + = case mapForeignTargetM f tgt of + Just tgt' -> Just (CmmUnsafeForeignCall tgt' fs (mapListJ f as)) + Nothing -> (\xs -> CmmUnsafeForeignCall tgt fs xs) `fmap` mapListM f as +mapExpM f (CmmForeignCall tgt fs as succ ret_args updfr intrbl) + = case mapForeignTargetM f tgt of + Just tgt' -> Just (CmmForeignCall tgt' fs (mapListJ f as) succ ret_args updfr intrbl) + Nothing -> (\xs -> CmmForeignCall tgt fs xs succ ret_args updfr intrbl) `fmap` mapListM f as + +-- share as much as possible +mapListM :: (a -> Maybe a) -> [a] -> Maybe [a] +mapListM f xs = let (b, r) = mapListT f xs + in if b then Just r else Nothing + +mapListJ :: (a -> Maybe a) -> [a] -> [a] +mapListJ f xs = snd (mapListT f xs) + +mapListT :: (a -> Maybe a) -> [a] -> (Bool, [a]) +mapListT f xs = foldr g (False, []) (zip3 (tails xs) xs (map f xs)) + where g (_, y, Nothing) (True, ys) = (True, y:ys) + g (_, _, Just y) (True, ys) = (True, y:ys) + g (ys', _, Nothing) (False, _) = (False, ys') + g (_, _, Just y) (False, ys) = (True, y:ys) + +mapExpDeepM :: (CmmExpr -> Maybe CmmExpr) -> CmmNode e x -> Maybe (CmmNode e x) +mapExpDeepM f = mapExpM $ wrapRecExpM f + +----------------------------------- +-- folding Expr in GHC.Cmm.Node + +foldExpForeignTarget :: (CmmExpr -> z -> z) -> ForeignTarget -> z -> z +foldExpForeignTarget exp (ForeignTarget e _) z = exp e z +foldExpForeignTarget _ (PrimTarget _) z = z + +-- Take a folder on expressions and apply it recursively. +-- Specifically (wrapRecExpf f e z) deals with CmmMachOp and CmmLoad +-- itself, delegating all the other CmmExpr forms to 'f'. +wrapRecExpf :: (CmmExpr -> z -> z) -> CmmExpr -> z -> z +wrapRecExpf f e@(CmmMachOp _ es) z = foldr (wrapRecExpf f) (f e z) es +wrapRecExpf f e@(CmmLoad addr _) z = wrapRecExpf f addr (f e z) +wrapRecExpf f e z = f e z + +foldExp :: (CmmExpr -> z -> z) -> CmmNode e x -> z -> z +foldExp _ (CmmEntry {}) z = z +foldExp _ (CmmComment {}) z = z +foldExp _ (CmmTick {}) z = z +foldExp f (CmmUnwind xs) z = foldr (maybe id f) z (map snd xs) +foldExp f (CmmAssign _ e) z = f e z +foldExp f (CmmStore addr e) z = f addr $ f e z +foldExp f (CmmUnsafeForeignCall t _ as) z = foldr f (foldExpForeignTarget f t z) as +foldExp _ (CmmBranch _) z = z +foldExp f (CmmCondBranch e _ _ _) z = f e z +foldExp f (CmmSwitch e _) z = f e z +foldExp f (CmmCall {cml_target=tgt}) z = f tgt z +foldExp f (CmmForeignCall {tgt=tgt, args=args}) z = foldr f (foldExpForeignTarget f tgt z) args + +foldExpDeep :: (CmmExpr -> z -> z) -> CmmNode e x -> z -> z +foldExpDeep f = foldExp (wrapRecExpf f) + +-- ----------------------------------------------------------------------------- + +mapSuccessors :: (Label -> Label) -> CmmNode O C -> CmmNode O C +mapSuccessors f (CmmBranch bid) = CmmBranch (f bid) +mapSuccessors f (CmmCondBranch p y n l) = CmmCondBranch p (f y) (f n) l +mapSuccessors f (CmmSwitch e ids) = CmmSwitch e (mapSwitchTargets f ids) +mapSuccessors _ n = n + +mapCollectSuccessors :: forall a. (Label -> (Label,a)) -> CmmNode O C + -> (CmmNode O C, [a]) +mapCollectSuccessors f (CmmBranch bid) + = let (bid', acc) = f bid in (CmmBranch bid', [acc]) +mapCollectSuccessors f (CmmCondBranch p y n l) + = let (bidt, acct) = f y + (bidf, accf) = f n + in (CmmCondBranch p bidt bidf l, [accf, acct]) +mapCollectSuccessors f (CmmSwitch e ids) + = let lbls = switchTargetsToList ids :: [Label] + lblMap = mapFromList $ zip lbls (map f lbls) :: LabelMap (Label, a) + in ( CmmSwitch e + (mapSwitchTargets + (\l -> fst $ mapFindWithDefault (error "impossible") l lblMap) ids) + , map snd (mapElems lblMap) + ) +mapCollectSuccessors _ n = (n, []) + +-- ----------------------------------------------------------------------------- + +-- | Tickish in Cmm context (annotations only) +type CmmTickish = Tickish () + +-- | Tick scope identifier, allowing us to reason about what +-- annotations in a Cmm block should scope over. We especially take +-- care to allow optimisations to reorganise blocks without losing +-- tick association in the process. +data CmmTickScope + = GlobalScope + -- ^ The global scope is the "root" of the scope graph. Every + -- scope is a sub-scope of the global scope. It doesn't make sense + -- to add ticks to this scope. On the other hand, this means that + -- setting this scope on a block means no ticks apply to it. + + | SubScope !U.Unique CmmTickScope + -- ^ Constructs a new sub-scope to an existing scope. This allows + -- us to translate Core-style scoping rules (see @tickishScoped@) + -- into the Cmm world. Suppose the following code: + -- + -- tick<1> case ... of + -- A -> tick<2> ... + -- B -> tick<3> ... + -- + -- We want the top-level tick annotation to apply to blocks + -- generated for the A and B alternatives. We can achieve that by + -- generating tick<1> into a block with scope a, while the code + -- for alternatives A and B gets generated into sub-scopes a/b and + -- a/c respectively. + + | CombinedScope CmmTickScope CmmTickScope + -- ^ A combined scope scopes over everything that the two given + -- scopes cover. It is therefore a sub-scope of either scope. This + -- is required for optimisations. Consider common block elimination: + -- + -- A -> tick<2> case ... of + -- C -> [common] + -- B -> tick<3> case ... of + -- D -> [common] + -- + -- We will generate code for the C and D alternatives, and figure + -- out afterwards that it's actually common code. Scoping rules + -- dictate that the resulting common block needs to be covered by + -- both tick<2> and tick<3>, therefore we need to construct a + -- scope that is a child to *both* scope. Now we can do that - if + -- we assign the scopes a/c and b/d to the common-ed up blocks, + -- the new block could have a combined tick scope a/c+b/d, which + -- both tick<2> and tick<3> apply to. + +-- Note [CmmTick scoping details]: +-- +-- The scope of a @CmmTick@ is given by the @CmmEntry@ node of the +-- same block. Note that as a result of this, optimisations making +-- tick scopes more specific can *reduce* the amount of code a tick +-- scopes over. Fixing this would require a separate @CmmTickScope@ +-- field for @CmmTick@. Right now we do not do this simply because I +-- couldn't find an example where it actually mattered -- multiple +-- blocks within the same scope generally jump to each other, which +-- prevents common block elimination from happening in the first +-- place. But this is no strong reason, so if Cmm optimisations become +-- more involved in future this might have to be revisited. + +-- | Output all scope paths. +scopeToPaths :: CmmTickScope -> [[U.Unique]] +scopeToPaths GlobalScope = [[]] +scopeToPaths (SubScope u s) = map (u:) (scopeToPaths s) +scopeToPaths (CombinedScope s1 s2) = scopeToPaths s1 ++ scopeToPaths s2 + +-- | Returns the head uniques of the scopes. This is based on the +-- assumption that the @Unique@ of @SubScope@ identifies the +-- underlying super-scope. Used for efficient equality and comparison, +-- see below. +scopeUniques :: CmmTickScope -> [U.Unique] +scopeUniques GlobalScope = [] +scopeUniques (SubScope u _) = [u] +scopeUniques (CombinedScope s1 s2) = scopeUniques s1 ++ scopeUniques s2 + +-- Equality and order is based on the head uniques defined above. We +-- take care to short-cut the (extremely) common cases. +instance Eq CmmTickScope where + GlobalScope == GlobalScope = True + GlobalScope == _ = False + _ == GlobalScope = False + (SubScope u _) == (SubScope u' _) = u == u' + (SubScope _ _) == _ = False + _ == (SubScope _ _) = False + scope == scope' = + sortBy nonDetCmpUnique (scopeUniques scope) == + sortBy nonDetCmpUnique (scopeUniques scope') + -- This is still deterministic because + -- the order is the same for equal lists + +-- This is non-deterministic but we do not currently support deterministic +-- code-generation. See Note [Unique Determinism and code generation] +-- See Note [No Ord for Unique] +instance Ord CmmTickScope where + compare GlobalScope GlobalScope = EQ + compare GlobalScope _ = LT + compare _ GlobalScope = GT + compare (SubScope u _) (SubScope u' _) = nonDetCmpUnique u u' + compare scope scope' = cmpList nonDetCmpUnique + (sortBy nonDetCmpUnique $ scopeUniques scope) + (sortBy nonDetCmpUnique $ scopeUniques scope') + +instance Outputable CmmTickScope where + ppr GlobalScope = text "global" + ppr (SubScope us GlobalScope) + = ppr us + ppr (SubScope us s) = ppr s <> char '/' <> ppr us + ppr combined = parens $ hcat $ punctuate (char '+') $ + map (hcat . punctuate (char '/') . map ppr . reverse) $ + scopeToPaths combined + +-- | Checks whether two tick scopes are sub-scopes of each other. True +-- if the two scopes are equal. +isTickSubScope :: CmmTickScope -> CmmTickScope -> Bool +isTickSubScope = cmp + where cmp _ GlobalScope = True + cmp GlobalScope _ = False + cmp (CombinedScope s1 s2) s' = cmp s1 s' && cmp s2 s' + cmp s (CombinedScope s1' s2') = cmp s s1' || cmp s s2' + cmp (SubScope u s) s'@(SubScope u' _) = u == u' || cmp s s' + +-- | Combine two tick scopes. The new scope should be sub-scope of +-- both parameters. We simplify automatically if one tick scope is a +-- sub-scope of the other already. +combineTickScopes :: CmmTickScope -> CmmTickScope -> CmmTickScope +combineTickScopes s1 s2 + | s1 `isTickSubScope` s2 = s1 + | s2 `isTickSubScope` s1 = s2 + | otherwise = CombinedScope s1 s2 diff --git a/compiler/GHC/Cmm/Opt.hs b/compiler/GHC/Cmm/Opt.hs new file mode 100644 index 0000000000..1db37ae58c --- /dev/null +++ b/compiler/GHC/Cmm/Opt.hs @@ -0,0 +1,423 @@ +----------------------------------------------------------------------------- +-- +-- Cmm optimisation +-- +-- (c) The University of Glasgow 2006 +-- +----------------------------------------------------------------------------- + +module GHC.Cmm.Opt ( + constantFoldNode, + constantFoldExpr, + cmmMachOpFold, + cmmMachOpFoldM + ) where + +import GhcPrelude + +import GHC.Cmm.Utils +import GHC.Cmm +import DynFlags +import Util + +import Outputable +import GHC.Platform + +import Data.Bits +import Data.Maybe + + +constantFoldNode :: DynFlags -> CmmNode e x -> CmmNode e x +constantFoldNode dflags = mapExp (constantFoldExpr dflags) + +constantFoldExpr :: DynFlags -> CmmExpr -> CmmExpr +constantFoldExpr dflags = wrapRecExp f + where f (CmmMachOp op args) = cmmMachOpFold dflags op args + f (CmmRegOff r 0) = CmmReg r + f e = e + +-- ----------------------------------------------------------------------------- +-- MachOp constant folder + +-- Now, try to constant-fold the MachOps. The arguments have already +-- been optimized and folded. + +cmmMachOpFold + :: DynFlags + -> MachOp -- The operation from an CmmMachOp + -> [CmmExpr] -- The optimized arguments + -> CmmExpr + +cmmMachOpFold dflags op args = fromMaybe (CmmMachOp op args) (cmmMachOpFoldM dflags op args) + +-- Returns Nothing if no changes, useful for Hoopl, also reduces +-- allocation! +cmmMachOpFoldM + :: DynFlags + -> MachOp + -> [CmmExpr] + -> Maybe CmmExpr + +cmmMachOpFoldM _ op [CmmLit (CmmInt x rep)] + = Just $ case op of + MO_S_Neg _ -> CmmLit (CmmInt (-x) rep) + MO_Not _ -> CmmLit (CmmInt (complement x) rep) + + -- these are interesting: we must first narrow to the + -- "from" type, in order to truncate to the correct size. + -- The final narrow/widen to the destination type + -- is implicit in the CmmLit. + MO_SF_Conv _from to -> CmmLit (CmmFloat (fromInteger x) to) + MO_SS_Conv from to -> CmmLit (CmmInt (narrowS from x) to) + MO_UU_Conv from to -> CmmLit (CmmInt (narrowU from x) to) + + _ -> panic $ "cmmMachOpFoldM: unknown unary op: " ++ show op + + +-- Eliminate conversion NOPs +cmmMachOpFoldM _ (MO_SS_Conv rep1 rep2) [x] | rep1 == rep2 = Just x +cmmMachOpFoldM _ (MO_UU_Conv rep1 rep2) [x] | rep1 == rep2 = Just x + +-- Eliminate nested conversions where possible +cmmMachOpFoldM dflags conv_outer [CmmMachOp conv_inner [x]] + | Just (rep1,rep2,signed1) <- isIntConversion conv_inner, + Just (_, rep3,signed2) <- isIntConversion conv_outer + = case () of + -- widen then narrow to the same size is a nop + _ | rep1 < rep2 && rep1 == rep3 -> Just x + -- Widen then narrow to different size: collapse to single conversion + -- but remember to use the signedness from the widening, just in case + -- the final conversion is a widen. + | rep1 < rep2 && rep2 > rep3 -> + Just $ cmmMachOpFold dflags (intconv signed1 rep1 rep3) [x] + -- Nested widenings: collapse if the signedness is the same + | rep1 < rep2 && rep2 < rep3 && signed1 == signed2 -> + Just $ cmmMachOpFold dflags (intconv signed1 rep1 rep3) [x] + -- Nested narrowings: collapse + | rep1 > rep2 && rep2 > rep3 -> + Just $ cmmMachOpFold dflags (MO_UU_Conv rep1 rep3) [x] + | otherwise -> + Nothing + where + isIntConversion (MO_UU_Conv rep1 rep2) + = Just (rep1,rep2,False) + isIntConversion (MO_SS_Conv rep1 rep2) + = Just (rep1,rep2,True) + isIntConversion _ = Nothing + + intconv True = MO_SS_Conv + intconv False = MO_UU_Conv + +-- ToDo: a narrow of a load can be collapsed into a narrow load, right? +-- but what if the architecture only supports word-sized loads, should +-- we do the transformation anyway? + +cmmMachOpFoldM dflags mop [CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)] + = case mop of + -- for comparisons: don't forget to narrow the arguments before + -- comparing, since they might be out of range. + MO_Eq _ -> Just $ CmmLit (CmmInt (if x_u == y_u then 1 else 0) (wordWidth dflags)) + MO_Ne _ -> Just $ CmmLit (CmmInt (if x_u /= y_u then 1 else 0) (wordWidth dflags)) + + MO_U_Gt _ -> Just $ CmmLit (CmmInt (if x_u > y_u then 1 else 0) (wordWidth dflags)) + MO_U_Ge _ -> Just $ CmmLit (CmmInt (if x_u >= y_u then 1 else 0) (wordWidth dflags)) + MO_U_Lt _ -> Just $ CmmLit (CmmInt (if x_u < y_u then 1 else 0) (wordWidth dflags)) + MO_U_Le _ -> Just $ CmmLit (CmmInt (if x_u <= y_u then 1 else 0) (wordWidth dflags)) + + MO_S_Gt _ -> Just $ CmmLit (CmmInt (if x_s > y_s then 1 else 0) (wordWidth dflags)) + MO_S_Ge _ -> Just $ CmmLit (CmmInt (if x_s >= y_s then 1 else 0) (wordWidth dflags)) + MO_S_Lt _ -> Just $ CmmLit (CmmInt (if x_s < y_s then 1 else 0) (wordWidth dflags)) + MO_S_Le _ -> Just $ CmmLit (CmmInt (if x_s <= y_s then 1 else 0) (wordWidth dflags)) + + MO_Add r -> Just $ CmmLit (CmmInt (x + y) r) + MO_Sub r -> Just $ CmmLit (CmmInt (x - y) r) + MO_Mul r -> Just $ CmmLit (CmmInt (x * y) r) + MO_U_Quot r | y /= 0 -> Just $ CmmLit (CmmInt (x_u `quot` y_u) r) + MO_U_Rem r | y /= 0 -> Just $ CmmLit (CmmInt (x_u `rem` y_u) r) + MO_S_Quot r | y /= 0 -> Just $ CmmLit (CmmInt (x `quot` y) r) + MO_S_Rem r | y /= 0 -> Just $ CmmLit (CmmInt (x `rem` y) r) + + MO_And r -> Just $ CmmLit (CmmInt (x .&. y) r) + MO_Or r -> Just $ CmmLit (CmmInt (x .|. y) r) + MO_Xor r -> Just $ CmmLit (CmmInt (x `xor` y) r) + + MO_Shl r -> Just $ CmmLit (CmmInt (x `shiftL` fromIntegral y) r) + MO_U_Shr r -> Just $ CmmLit (CmmInt (x_u `shiftR` fromIntegral y) r) + MO_S_Shr r -> Just $ CmmLit (CmmInt (x `shiftR` fromIntegral y) r) + + _ -> Nothing + + where + x_u = narrowU xrep x + y_u = narrowU xrep y + x_s = narrowS xrep x + y_s = narrowS xrep y + + +-- When possible, shift the constants to the right-hand side, so that we +-- can match for strength reductions. Note that the code generator will +-- also assume that constants have been shifted to the right when +-- possible. + +cmmMachOpFoldM dflags op [x@(CmmLit _), y] + | not (isLit y) && isCommutableMachOp op + = Just (cmmMachOpFold dflags op [y, x]) + +-- Turn (a+b)+c into a+(b+c) where possible. Because literals are +-- moved to the right, it is more likely that we will find +-- opportunities for constant folding when the expression is +-- right-associated. +-- +-- ToDo: this appears to introduce a quadratic behaviour due to the +-- nested cmmMachOpFold. Can we fix this? +-- +-- Why do we check isLit arg1? If arg1 is a lit, it means that arg2 +-- is also a lit (otherwise arg1 would be on the right). If we +-- put arg1 on the left of the rearranged expression, we'll get into a +-- loop: (x1+x2)+x3 => x1+(x2+x3) => (x2+x3)+x1 => x2+(x3+x1) ... +-- +-- Also don't do it if arg1 is PicBaseReg, so that we don't separate the +-- PicBaseReg from the corresponding label (or label difference). +-- +cmmMachOpFoldM dflags mop1 [CmmMachOp mop2 [arg1,arg2], arg3] + | mop2 `associates_with` mop1 + && not (isLit arg1) && not (isPicReg arg1) + = Just (cmmMachOpFold dflags mop2 [arg1, cmmMachOpFold dflags mop1 [arg2,arg3]]) + where + MO_Add{} `associates_with` MO_Sub{} = True + mop1 `associates_with` mop2 = + mop1 == mop2 && isAssociativeMachOp mop1 + +-- special case: (a - b) + c ==> a + (c - b) +cmmMachOpFoldM dflags mop1@(MO_Add{}) [CmmMachOp mop2@(MO_Sub{}) [arg1,arg2], arg3] + | not (isLit arg1) && not (isPicReg arg1) + = Just (cmmMachOpFold dflags mop1 [arg1, cmmMachOpFold dflags mop2 [arg3,arg2]]) + +-- special case: (PicBaseReg + lit) + N ==> PicBaseReg + (lit+N) +-- +-- this is better because lit+N is a single link-time constant (e.g. a +-- CmmLabelOff), so the right-hand expression needs only one +-- instruction, whereas the left needs two. This happens when pointer +-- tagging gives us label+offset, and PIC turns the label into +-- PicBaseReg + label. +-- +cmmMachOpFoldM _ MO_Add{} [ CmmMachOp op@MO_Add{} [pic, CmmLit lit] + , CmmLit (CmmInt n rep) ] + | isPicReg pic + = Just $ CmmMachOp op [pic, CmmLit $ cmmOffsetLit lit off ] + where off = fromIntegral (narrowS rep n) + +-- Make a RegOff if we can +cmmMachOpFoldM _ (MO_Add _) [CmmReg reg, CmmLit (CmmInt n rep)] + = Just $ cmmRegOff reg (fromIntegral (narrowS rep n)) +cmmMachOpFoldM _ (MO_Add _) [CmmRegOff reg off, CmmLit (CmmInt n rep)] + = Just $ cmmRegOff reg (off + fromIntegral (narrowS rep n)) +cmmMachOpFoldM _ (MO_Sub _) [CmmReg reg, CmmLit (CmmInt n rep)] + = Just $ cmmRegOff reg (- fromIntegral (narrowS rep n)) +cmmMachOpFoldM _ (MO_Sub _) [CmmRegOff reg off, CmmLit (CmmInt n rep)] + = Just $ cmmRegOff reg (off - fromIntegral (narrowS rep n)) + +-- Fold label(+/-)offset into a CmmLit where possible + +cmmMachOpFoldM _ (MO_Add _) [CmmLit lit, CmmLit (CmmInt i rep)] + = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (narrowU rep i))) +cmmMachOpFoldM _ (MO_Add _) [CmmLit (CmmInt i rep), CmmLit lit] + = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (narrowU rep i))) +cmmMachOpFoldM _ (MO_Sub _) [CmmLit lit, CmmLit (CmmInt i rep)] + = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (negate (narrowU rep i)))) + + +-- Comparison of literal with widened operand: perform the comparison +-- at the smaller width, as long as the literal is within range. + +-- We can't do the reverse trick, when the operand is narrowed: +-- narrowing throws away bits from the operand, there's no way to do +-- the same comparison at the larger size. + +cmmMachOpFoldM dflags cmp [CmmMachOp conv [x], CmmLit (CmmInt i _)] + | -- powerPC NCG has a TODO for I8/I16 comparisons, so don't try + platformArch (targetPlatform dflags) `elem` [ArchX86, ArchX86_64], + -- if the operand is widened: + Just (rep, signed, narrow_fn) <- maybe_conversion conv, + -- and this is a comparison operation: + Just narrow_cmp <- maybe_comparison cmp rep signed, + -- and the literal fits in the smaller size: + i == narrow_fn rep i + -- then we can do the comparison at the smaller size + = Just (cmmMachOpFold dflags narrow_cmp [x, CmmLit (CmmInt i rep)]) + where + maybe_conversion (MO_UU_Conv from to) + | to > from + = Just (from, False, narrowU) + maybe_conversion (MO_SS_Conv from to) + | to > from + = Just (from, True, narrowS) + + -- don't attempt to apply this optimisation when the source + -- is a float; see #1916 + maybe_conversion _ = Nothing + + -- careful (#2080): if the original comparison was signed, but + -- we were doing an unsigned widen, then we must do an + -- unsigned comparison at the smaller size. + maybe_comparison (MO_U_Gt _) rep _ = Just (MO_U_Gt rep) + maybe_comparison (MO_U_Ge _) rep _ = Just (MO_U_Ge rep) + maybe_comparison (MO_U_Lt _) rep _ = Just (MO_U_Lt rep) + maybe_comparison (MO_U_Le _) rep _ = Just (MO_U_Le rep) + maybe_comparison (MO_Eq _) rep _ = Just (MO_Eq rep) + maybe_comparison (MO_S_Gt _) rep True = Just (MO_S_Gt rep) + maybe_comparison (MO_S_Ge _) rep True = Just (MO_S_Ge rep) + maybe_comparison (MO_S_Lt _) rep True = Just (MO_S_Lt rep) + maybe_comparison (MO_S_Le _) rep True = Just (MO_S_Le rep) + maybe_comparison (MO_S_Gt _) rep False = Just (MO_U_Gt rep) + maybe_comparison (MO_S_Ge _) rep False = Just (MO_U_Ge rep) + maybe_comparison (MO_S_Lt _) rep False = Just (MO_U_Lt rep) + maybe_comparison (MO_S_Le _) rep False = Just (MO_U_Le rep) + maybe_comparison _ _ _ = Nothing + +-- We can often do something with constants of 0 and 1 ... +-- See Note [Comparison operators] + +cmmMachOpFoldM dflags mop [x, y@(CmmLit (CmmInt 0 _))] + = case mop of + -- Arithmetic + MO_Add _ -> Just x -- x + 0 = x + MO_Sub _ -> Just x -- x - 0 = x + MO_Mul _ -> Just y -- x * 0 = 0 + + -- Logical operations + MO_And _ -> Just y -- x & 0 = 0 + MO_Or _ -> Just x -- x | 0 = x + MO_Xor _ -> Just x -- x `xor` 0 = x + + -- Shifts + MO_Shl _ -> Just x -- x << 0 = x + MO_S_Shr _ -> Just x -- ditto shift-right + MO_U_Shr _ -> Just x + + -- Comparisons; these ones are trickier + -- See Note [Comparison operators] + MO_Ne _ | isComparisonExpr x -> Just x -- (x > y) != 0 = x > y + MO_Eq _ | Just x' <- maybeInvertCmmExpr x -> Just x' -- (x > y) == 0 = x <= y + MO_U_Gt _ | isComparisonExpr x -> Just x -- (x > y) > 0 = x > y + MO_S_Gt _ | isComparisonExpr x -> Just x -- ditto + MO_U_Lt _ | isComparisonExpr x -> Just zero -- (x > y) < 0 = 0 + MO_S_Lt _ | isComparisonExpr x -> Just zero + MO_U_Ge _ | isComparisonExpr x -> Just one -- (x > y) >= 0 = 1 + MO_S_Ge _ | isComparisonExpr x -> Just one + + MO_U_Le _ | Just x' <- maybeInvertCmmExpr x -> Just x' -- (x > y) <= 0 = x <= y + MO_S_Le _ | Just x' <- maybeInvertCmmExpr x -> Just x' + _ -> Nothing + where + zero = CmmLit (CmmInt 0 (wordWidth dflags)) + one = CmmLit (CmmInt 1 (wordWidth dflags)) + +cmmMachOpFoldM dflags mop [x, (CmmLit (CmmInt 1 rep))] + = case mop of + -- Arithmetic: x*1 = x, etc + MO_Mul _ -> Just x + MO_S_Quot _ -> Just x + MO_U_Quot _ -> Just x + MO_S_Rem _ -> Just $ CmmLit (CmmInt 0 rep) + MO_U_Rem _ -> Just $ CmmLit (CmmInt 0 rep) + + -- Comparisons; trickier + -- See Note [Comparison operators] + MO_Ne _ | Just x' <- maybeInvertCmmExpr x -> Just x' -- (x>y) != 1 = x<=y + MO_Eq _ | isComparisonExpr x -> Just x -- (x>y) == 1 = x>y + MO_U_Lt _ | Just x' <- maybeInvertCmmExpr x -> Just x' -- (x>y) < 1 = x<=y + MO_S_Lt _ | Just x' <- maybeInvertCmmExpr x -> Just x' -- ditto + MO_U_Gt _ | isComparisonExpr x -> Just zero -- (x>y) > 1 = 0 + MO_S_Gt _ | isComparisonExpr x -> Just zero + MO_U_Le _ | isComparisonExpr x -> Just one -- (x>y) <= 1 = 1 + MO_S_Le _ | isComparisonExpr x -> Just one + MO_U_Ge _ | isComparisonExpr x -> Just x -- (x>y) >= 1 = x>y + MO_S_Ge _ | isComparisonExpr x -> Just x + _ -> Nothing + where + zero = CmmLit (CmmInt 0 (wordWidth dflags)) + one = CmmLit (CmmInt 1 (wordWidth dflags)) + +-- Now look for multiplication/division by powers of 2 (integers). + +cmmMachOpFoldM dflags mop [x, (CmmLit (CmmInt n _))] + = case mop of + MO_Mul rep + | Just p <- exactLog2 n -> + Just (cmmMachOpFold dflags (MO_Shl rep) [x, CmmLit (CmmInt p rep)]) + MO_U_Quot rep + | Just p <- exactLog2 n -> + Just (cmmMachOpFold dflags (MO_U_Shr rep) [x, CmmLit (CmmInt p rep)]) + MO_U_Rem rep + | Just _ <- exactLog2 n -> + Just (cmmMachOpFold dflags (MO_And rep) [x, CmmLit (CmmInt (n - 1) rep)]) + MO_S_Quot rep + | Just p <- exactLog2 n, + CmmReg _ <- x -> -- We duplicate x in signedQuotRemHelper, hence require + -- it is a reg. FIXME: remove this restriction. + Just (cmmMachOpFold dflags (MO_S_Shr rep) + [signedQuotRemHelper rep p, CmmLit (CmmInt p rep)]) + MO_S_Rem rep + | Just p <- exactLog2 n, + CmmReg _ <- x -> -- We duplicate x in signedQuotRemHelper, hence require + -- it is a reg. FIXME: remove this restriction. + -- We replace (x `rem` 2^p) by (x - (x `quot` 2^p) * 2^p). + -- Moreover, we fuse MO_S_Shr (last operation of MO_S_Quot) + -- and MO_S_Shl (multiplication by 2^p) into a single MO_And operation. + Just (cmmMachOpFold dflags (MO_Sub rep) + [x, cmmMachOpFold dflags (MO_And rep) + [signedQuotRemHelper rep p, CmmLit (CmmInt (- n) rep)]]) + _ -> Nothing + where + -- In contrast with unsigned integers, for signed ones + -- shift right is not the same as quot, because it rounds + -- to minus infinity, whereas quot rounds toward zero. + -- To fix this up, we add one less than the divisor to the + -- dividend if it is a negative number. + -- + -- to avoid a test/jump, we use the following sequence: + -- x1 = x >> word_size-1 (all 1s if -ve, all 0s if +ve) + -- x2 = y & (divisor-1) + -- result = x + x2 + -- this could be done a bit more simply using conditional moves, + -- but we're processor independent here. + -- + -- we optimise the divide by 2 case slightly, generating + -- x1 = x >> word_size-1 (unsigned) + -- return = x + x1 + signedQuotRemHelper :: Width -> Integer -> CmmExpr + signedQuotRemHelper rep p = CmmMachOp (MO_Add rep) [x, x2] + where + bits = fromIntegral (widthInBits rep) - 1 + shr = if p == 1 then MO_U_Shr rep else MO_S_Shr rep + x1 = CmmMachOp shr [x, CmmLit (CmmInt bits rep)] + x2 = if p == 1 then x1 else + CmmMachOp (MO_And rep) [x1, CmmLit (CmmInt (n-1) rep)] + +-- ToDo (#7116): optimise floating-point multiplication, e.g. x*2.0 -> x+x +-- Unfortunately this needs a unique supply because x might not be a +-- register. See #2253 (program 6) for an example. + + +-- Anything else is just too hard. + +cmmMachOpFoldM _ _ _ = Nothing + +{- Note [Comparison operators] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If we have + CmmCondBranch ((x>#y) == 1) t f +we really want to convert to + CmmCondBranch (x>#y) t f + +That's what the constant-folding operations on comparison operators do above. +-} + + +-- ----------------------------------------------------------------------------- +-- Utils + +isPicReg :: CmmExpr -> Bool +isPicReg (CmmReg (CmmGlobal PicBaseReg)) = True +isPicReg _ = False diff --git a/compiler/GHC/Cmm/Parser.y b/compiler/GHC/Cmm/Parser.y new file mode 100644 index 0000000000..d7235d0167 --- /dev/null +++ b/compiler/GHC/Cmm/Parser.y @@ -0,0 +1,1442 @@ +----------------------------------------------------------------------------- +-- +-- (c) The University of Glasgow, 2004-2012 +-- +-- Parser for concrete Cmm. +-- +----------------------------------------------------------------------------- + +{- ----------------------------------------------------------------------------- +Note [Syntax of .cmm files] + +NOTE: You are very much on your own in .cmm. There is very little +error checking at all: + + * Type errors are detected by the (optional) -dcmm-lint pass, if you + don't turn this on then a type error will likely result in a panic + from the native code generator. + + * Passing the wrong number of arguments or arguments of the wrong + type is not detected. + +There are two ways to write .cmm code: + + (1) High-level Cmm code delegates the stack handling to GHC, and + never explicitly mentions Sp or registers. + + (2) Low-level Cmm manages the stack itself, and must know about + calling conventions. + +Whether you want high-level or low-level Cmm is indicated by the +presence of an argument list on a procedure. For example: + +foo ( gcptr a, bits32 b ) +{ + // this is high-level cmm code + + if (b > 0) { + // we can make tail calls passing arguments: + jump stg_ap_0_fast(a); + } + + push (stg_upd_frame_info, a) { + // stack frames can be explicitly pushed + + (x,y) = call wibble(a,b,3,4); + // calls pass arguments and return results using the native + // Haskell calling convention. The code generator will automatically + // construct a stack frame and an info table for the continuation. + + return (x,y); + // we can return multiple values from the current proc + } +} + +bar +{ + // this is low-level cmm code, indicated by the fact that we did not + // put an argument list on bar. + + x = R1; // the calling convention is explicit: better be careful + // that this works on all platforms! + + jump %ENTRY_CODE(Sp(0)) +} + +Here is a list of rules for high-level and low-level code. If you +break the rules, you get a panic (for using a high-level construct in +a low-level proc), or wrong code (when using low-level code in a +high-level proc). This stuff isn't checked! (TODO!) + +High-level only: + + - tail-calls with arguments, e.g. + jump stg_fun (arg1, arg2); + + - function calls: + (ret1,ret2) = call stg_fun (arg1, arg2); + + This makes a call with the NativeNodeCall convention, and the + values are returned to the following code using the NativeReturn + convention. + + - returning: + return (ret1, ret2) + + These use the NativeReturn convention to return zero or more + results to the caller. + + - pushing stack frames: + push (info_ptr, field1, ..., fieldN) { ... statements ... } + + - reserving temporary stack space: + + reserve N = x { ... } + + this reserves an area of size N (words) on the top of the stack, + and binds its address to x (a local register). Typically this is + used for allocating temporary storage for passing to foreign + functions. + + Note that if you make any native calls or invoke the GC in the + scope of the reserve block, you are responsible for ensuring that + the stack you reserved is laid out correctly with an info table. + +Low-level only: + + - References to Sp, R1-R8, F1-F4 etc. + + NB. foreign calls may clobber the argument registers R1-R8, F1-F4 + etc., so ensure they are saved into variables around foreign + calls. + + - SAVE_THREAD_STATE() and LOAD_THREAD_STATE(), which modify Sp + directly. + +Both high-level and low-level code can use a raw tail-call: + + jump stg_fun [R1,R2] + +NB. you *must* specify the list of GlobalRegs that are passed via a +jump, otherwise the register allocator will assume that all the +GlobalRegs are dead at the jump. + + +Calling Conventions +------------------- + +High-level procedures use the NativeNode calling convention, or the +NativeReturn convention if the 'return' keyword is used (see Stack +Frames below). + +Low-level procedures implement their own calling convention, so it can +be anything at all. + +If a low-level procedure implements the NativeNode calling convention, +then it can be called by high-level code using an ordinary function +call. In general this is hard to arrange because the calling +convention depends on the number of physical registers available for +parameter passing, but there are two cases where the calling +convention is platform-independent: + + - Zero arguments. + + - One argument of pointer or non-pointer word type; this is always + passed in R1 according to the NativeNode convention. + + - Returning a single value; these conventions are fixed and platform + independent. + + +Stack Frames +------------ + +A stack frame is written like this: + +INFO_TABLE_RET ( label, FRAME_TYPE, info_ptr, field1, ..., fieldN ) + return ( arg1, ..., argM ) +{ + ... code ... +} + +where field1 ... fieldN are the fields of the stack frame (with types) +arg1...argN are the values returned to the stack frame (with types). +The return values are assumed to be passed according to the +NativeReturn convention. + +On entry to the code, the stack frame looks like: + + |----------| + | fieldN | + | ... | + | field1 | + |----------| + | info_ptr | + |----------| + | argN | + | ... | <- Sp + +and some of the args may be in registers. + +We prepend the code by a copyIn of the args, and assign all the stack +frame fields to their formals. The initial "arg offset" for stack +layout purposes consists of the whole stack frame plus any args that +might be on the stack. + +A tail-call may pass a stack frame to the callee using the following +syntax: + +jump f (info_ptr, field1,..,fieldN) (arg1,..,argN) + +where info_ptr and field1..fieldN describe the stack frame, and +arg1..argN are the arguments passed to f using the NativeNodeCall +convention. Note if a field is longer than a word (e.g. a D_ on +a 32-bit machine) then the call will push as many words as +necessary to the stack to accommodate it (e.g. 2). + + +----------------------------------------------------------------------------- -} + +{ +{-# LANGUAGE TupleSections #-} + +module GHC.Cmm.Parser ( parseCmmFile ) where + +import GhcPrelude + +import GHC.StgToCmm.ExtCode +import GHC.Cmm.CallConv +import GHC.StgToCmm.Prof +import GHC.StgToCmm.Heap +import GHC.StgToCmm.Monad hiding ( getCode, getCodeR, getCodeScoped, emitLabel, emit + , emitStore, emitAssign, emitOutOfLine, withUpdFrameOff + , getUpdFrameOff ) +import qualified GHC.StgToCmm.Monad as F +import GHC.StgToCmm.Utils +import GHC.StgToCmm.Foreign +import GHC.StgToCmm.Expr +import GHC.StgToCmm.Closure +import GHC.StgToCmm.Layout hiding (ArgRep(..)) +import GHC.StgToCmm.Ticky +import GHC.StgToCmm.Bind ( emitBlackHoleCode, emitUpdateFrame ) +import CoreSyn ( Tickish(SourceNote) ) + +import GHC.Cmm.Opt +import GHC.Cmm.Graph +import GHC.Cmm +import GHC.Cmm.Utils +import GHC.Cmm.Switch ( mkSwitchTargets ) +import GHC.Cmm.Info +import GHC.Cmm.BlockId +import GHC.Cmm.Lexer +import GHC.Cmm.CLabel +import GHC.Cmm.Monad +import GHC.Runtime.Layout +import Lexer + +import CostCentre +import ForeignCall +import Module +import GHC.Platform +import Literal +import Unique +import UniqFM +import SrcLoc +import DynFlags +import ErrUtils +import StringBuffer +import FastString +import Panic +import Constants +import Outputable +import BasicTypes +import Bag ( emptyBag, unitBag ) +import Var + +import Control.Monad +import Data.Array +import Data.Char ( ord ) +import System.Exit +import Data.Maybe +import qualified Data.Map as M +import qualified Data.ByteString.Char8 as BS8 + +#include "HsVersions.h" +} + +%expect 0 + +%token + ':' { L _ (CmmT_SpecChar ':') } + ';' { L _ (CmmT_SpecChar ';') } + '{' { L _ (CmmT_SpecChar '{') } + '}' { L _ (CmmT_SpecChar '}') } + '[' { L _ (CmmT_SpecChar '[') } + ']' { L _ (CmmT_SpecChar ']') } + '(' { L _ (CmmT_SpecChar '(') } + ')' { L _ (CmmT_SpecChar ')') } + '=' { L _ (CmmT_SpecChar '=') } + '`' { L _ (CmmT_SpecChar '`') } + '~' { L _ (CmmT_SpecChar '~') } + '/' { L _ (CmmT_SpecChar '/') } + '*' { L _ (CmmT_SpecChar '*') } + '%' { L _ (CmmT_SpecChar '%') } + '-' { L _ (CmmT_SpecChar '-') } + '+' { L _ (CmmT_SpecChar '+') } + '&' { L _ (CmmT_SpecChar '&') } + '^' { L _ (CmmT_SpecChar '^') } + '|' { L _ (CmmT_SpecChar '|') } + '>' { L _ (CmmT_SpecChar '>') } + '<' { L _ (CmmT_SpecChar '<') } + ',' { L _ (CmmT_SpecChar ',') } + '!' { L _ (CmmT_SpecChar '!') } + + '..' { L _ (CmmT_DotDot) } + '::' { L _ (CmmT_DoubleColon) } + '>>' { L _ (CmmT_Shr) } + '<<' { L _ (CmmT_Shl) } + '>=' { L _ (CmmT_Ge) } + '<=' { L _ (CmmT_Le) } + '==' { L _ (CmmT_Eq) } + '!=' { L _ (CmmT_Ne) } + '&&' { L _ (CmmT_BoolAnd) } + '||' { L _ (CmmT_BoolOr) } + + 'True' { L _ (CmmT_True ) } + 'False' { L _ (CmmT_False) } + 'likely'{ L _ (CmmT_likely)} + + 'CLOSURE' { L _ (CmmT_CLOSURE) } + 'INFO_TABLE' { L _ (CmmT_INFO_TABLE) } + 'INFO_TABLE_RET'{ L _ (CmmT_INFO_TABLE_RET) } + 'INFO_TABLE_FUN'{ L _ (CmmT_INFO_TABLE_FUN) } + 'INFO_TABLE_CONSTR'{ L _ (CmmT_INFO_TABLE_CONSTR) } + 'INFO_TABLE_SELECTOR'{ L _ (CmmT_INFO_TABLE_SELECTOR) } + 'else' { L _ (CmmT_else) } + 'export' { L _ (CmmT_export) } + 'section' { L _ (CmmT_section) } + 'goto' { L _ (CmmT_goto) } + 'if' { L _ (CmmT_if) } + 'call' { L _ (CmmT_call) } + 'jump' { L _ (CmmT_jump) } + 'foreign' { L _ (CmmT_foreign) } + 'never' { L _ (CmmT_never) } + 'prim' { L _ (CmmT_prim) } + 'reserve' { L _ (CmmT_reserve) } + 'return' { L _ (CmmT_return) } + 'returns' { L _ (CmmT_returns) } + 'import' { L _ (CmmT_import) } + 'switch' { L _ (CmmT_switch) } + 'case' { L _ (CmmT_case) } + 'default' { L _ (CmmT_default) } + 'push' { L _ (CmmT_push) } + 'unwind' { L _ (CmmT_unwind) } + 'bits8' { L _ (CmmT_bits8) } + 'bits16' { L _ (CmmT_bits16) } + 'bits32' { L _ (CmmT_bits32) } + 'bits64' { L _ (CmmT_bits64) } + 'bits128' { L _ (CmmT_bits128) } + 'bits256' { L _ (CmmT_bits256) } + 'bits512' { L _ (CmmT_bits512) } + 'float32' { L _ (CmmT_float32) } + 'float64' { L _ (CmmT_float64) } + 'gcptr' { L _ (CmmT_gcptr) } + + GLOBALREG { L _ (CmmT_GlobalReg $$) } + NAME { L _ (CmmT_Name $$) } + STRING { L _ (CmmT_String $$) } + INT { L _ (CmmT_Int $$) } + FLOAT { L _ (CmmT_Float $$) } + +%monad { PD } { >>= } { return } +%lexer { cmmlex } { L _ CmmT_EOF } +%name cmmParse cmm +%tokentype { Located CmmToken } + +-- C-- operator precedences, taken from the C-- spec +%right '||' -- non-std extension, called %disjoin in C-- +%right '&&' -- non-std extension, called %conjoin in C-- +%right '!' +%nonassoc '>=' '>' '<=' '<' '!=' '==' +%left '|' +%left '^' +%left '&' +%left '>>' '<<' +%left '-' '+' +%left '/' '*' '%' +%right '~' + +%% + +cmm :: { CmmParse () } + : {- empty -} { return () } + | cmmtop cmm { do $1; $2 } + +cmmtop :: { CmmParse () } + : cmmproc { $1 } + | cmmdata { $1 } + | decl { $1 } + | 'CLOSURE' '(' NAME ',' NAME lits ')' ';' + {% liftP . withThisPackage $ \pkg -> + do lits <- sequence $6; + staticClosure pkg $3 $5 (map getLit lits) } + +-- The only static closures in the RTS are dummy closures like +-- stg_END_TSO_QUEUE_closure and stg_dummy_ret. We don't need +-- to provide the full generality of static closures here. +-- In particular: +-- * CCS can always be CCS_DONT_CARE +-- * closure is always extern +-- * payload is always empty +-- * we can derive closure and info table labels from a single NAME + +cmmdata :: { CmmParse () } + : 'section' STRING '{' data_label statics '}' + { do lbl <- $4; + ss <- sequence $5; + code (emitDecl (CmmData (Section (section $2) lbl) (Statics lbl $ concat ss))) } + +data_label :: { CmmParse CLabel } + : NAME ':' + {% liftP . withThisPackage $ \pkg -> + return (mkCmmDataLabel pkg $1) } + +statics :: { [CmmParse [CmmStatic]] } + : {- empty -} { [] } + | static statics { $1 : $2 } + +static :: { CmmParse [CmmStatic] } + : type expr ';' { do e <- $2; + return [CmmStaticLit (getLit e)] } + | type ';' { return [CmmUninitialised + (widthInBytes (typeWidth $1))] } + | 'bits8' '[' ']' STRING ';' { return [mkString $4] } + | 'bits8' '[' INT ']' ';' { return [CmmUninitialised + (fromIntegral $3)] } + | typenot8 '[' INT ']' ';' { return [CmmUninitialised + (widthInBytes (typeWidth $1) * + fromIntegral $3)] } + | 'CLOSURE' '(' NAME lits ')' + { do { lits <- sequence $4 + ; dflags <- getDynFlags + ; return $ map CmmStaticLit $ + mkStaticClosure dflags (mkForeignLabel $3 Nothing ForeignLabelInExternalPackage IsData) + -- mkForeignLabel because these are only used + -- for CHARLIKE and INTLIKE closures in the RTS. + dontCareCCS (map getLit lits) [] [] [] } } + -- arrays of closures required for the CHARLIKE & INTLIKE arrays + +lits :: { [CmmParse CmmExpr] } + : {- empty -} { [] } + | ',' expr lits { $2 : $3 } + +cmmproc :: { CmmParse () } + : info maybe_conv maybe_formals maybe_body + { do ((entry_ret_label, info, stk_formals, formals), agraph) <- + getCodeScoped $ loopDecls $ do { + (entry_ret_label, info, stk_formals) <- $1; + dflags <- getDynFlags; + formals <- sequence (fromMaybe [] $3); + withName (showSDoc dflags (ppr entry_ret_label)) + $4; + return (entry_ret_label, info, stk_formals, formals) } + let do_layout = isJust $3 + code (emitProcWithStackFrame $2 info + entry_ret_label stk_formals formals agraph + do_layout ) } + +maybe_conv :: { Convention } + : {- empty -} { NativeNodeCall } + | 'return' { NativeReturn } + +maybe_body :: { CmmParse () } + : ';' { return () } + | '{' body '}' { withSourceNote $1 $3 $2 } + +info :: { CmmParse (CLabel, Maybe CmmInfoTable, [LocalReg]) } + : NAME + {% liftP . withThisPackage $ \pkg -> + do newFunctionName $1 pkg + return (mkCmmCodeLabel pkg $1, Nothing, []) } + + + | 'INFO_TABLE' '(' NAME ',' INT ',' INT ',' INT ',' STRING ',' STRING ')' + -- ptrs, nptrs, closure type, description, type + {% liftP . withThisPackage $ \pkg -> + do dflags <- getDynFlags + let prof = profilingInfo dflags $11 $13 + rep = mkRTSRep (fromIntegral $9) $ + mkHeapRep dflags False (fromIntegral $5) + (fromIntegral $7) Thunk + -- not really Thunk, but that makes the info table + -- we want. + return (mkCmmEntryLabel pkg $3, + Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3 + , cit_rep = rep + , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing }, + []) } + + | 'INFO_TABLE_FUN' '(' NAME ',' INT ',' INT ',' INT ',' STRING ',' STRING ',' INT ')' + -- ptrs, nptrs, closure type, description, type, fun type + {% liftP . withThisPackage $ \pkg -> + do dflags <- getDynFlags + let prof = profilingInfo dflags $11 $13 + ty = Fun 0 (ArgSpec (fromIntegral $15)) + -- Arity zero, arg_type $15 + rep = mkRTSRep (fromIntegral $9) $ + mkHeapRep dflags False (fromIntegral $5) + (fromIntegral $7) ty + return (mkCmmEntryLabel pkg $3, + Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3 + , cit_rep = rep + , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing }, + []) } + -- we leave most of the fields zero here. This is only used + -- to generate the BCO info table in the RTS at the moment. + + | 'INFO_TABLE_CONSTR' '(' NAME ',' INT ',' INT ',' INT ',' INT ',' STRING ',' STRING ')' + -- ptrs, nptrs, tag, closure type, description, type + {% liftP . withThisPackage $ \pkg -> + do dflags <- getDynFlags + let prof = profilingInfo dflags $13 $15 + ty = Constr (fromIntegral $9) -- Tag + (BS8.pack $13) + rep = mkRTSRep (fromIntegral $11) $ + mkHeapRep dflags False (fromIntegral $5) + (fromIntegral $7) ty + return (mkCmmEntryLabel pkg $3, + Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3 + , cit_rep = rep + , cit_prof = prof, cit_srt = Nothing,cit_clo = Nothing }, + []) } + + -- If profiling is on, this string gets duplicated, + -- but that's the way the old code did it we can fix it some other time. + + | 'INFO_TABLE_SELECTOR' '(' NAME ',' INT ',' INT ',' STRING ',' STRING ')' + -- selector, closure type, description, type + {% liftP . withThisPackage $ \pkg -> + do dflags <- getDynFlags + let prof = profilingInfo dflags $9 $11 + ty = ThunkSelector (fromIntegral $5) + rep = mkRTSRep (fromIntegral $7) $ + mkHeapRep dflags False 0 0 ty + return (mkCmmEntryLabel pkg $3, + Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3 + , cit_rep = rep + , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing }, + []) } + + | 'INFO_TABLE_RET' '(' NAME ',' INT ')' + -- closure type (no live regs) + {% liftP . withThisPackage $ \pkg -> + do let prof = NoProfilingInfo + rep = mkRTSRep (fromIntegral $5) $ mkStackRep [] + return (mkCmmRetLabel pkg $3, + Just $ CmmInfoTable { cit_lbl = mkCmmRetInfoLabel pkg $3 + , cit_rep = rep + , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing }, + []) } + + | 'INFO_TABLE_RET' '(' NAME ',' INT ',' formals0 ')' + -- closure type, live regs + {% liftP . withThisPackage $ \pkg -> + do dflags <- getDynFlags + live <- sequence $7 + let prof = NoProfilingInfo + -- drop one for the info pointer + bitmap = mkLiveness dflags (drop 1 live) + rep = mkRTSRep (fromIntegral $5) $ mkStackRep bitmap + return (mkCmmRetLabel pkg $3, + Just $ CmmInfoTable { cit_lbl = mkCmmRetInfoLabel pkg $3 + , cit_rep = rep + , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing }, + live) } + +body :: { CmmParse () } + : {- empty -} { return () } + | decl body { do $1; $2 } + | stmt body { do $1; $2 } + +decl :: { CmmParse () } + : type names ';' { mapM_ (newLocal $1) $2 } + | 'import' importNames ';' { mapM_ newImport $2 } + | 'export' names ';' { return () } -- ignore exports + + +-- an imported function name, with optional packageId +importNames + :: { [(FastString, CLabel)] } + : importName { [$1] } + | importName ',' importNames { $1 : $3 } + +importName + :: { (FastString, CLabel) } + + -- A label imported without an explicit packageId. + -- These are taken to come from some foreign, unnamed package. + : NAME + { ($1, mkForeignLabel $1 Nothing ForeignLabelInExternalPackage IsFunction) } + + -- as previous 'NAME', but 'IsData' + | 'CLOSURE' NAME + { ($2, mkForeignLabel $2 Nothing ForeignLabelInExternalPackage IsData) } + + -- A label imported with an explicit packageId. + | STRING NAME + { ($2, mkCmmCodeLabel (fsToUnitId (mkFastString $1)) $2) } + + +names :: { [FastString] } + : NAME { [$1] } + | NAME ',' names { $1 : $3 } + +stmt :: { CmmParse () } + : ';' { return () } + + | NAME ':' + { do l <- newLabel $1; emitLabel l } + + + + | lreg '=' expr ';' + { do reg <- $1; e <- $3; withSourceNote $2 $4 (emitAssign reg e) } + | type '[' expr ']' '=' expr ';' + { withSourceNote $2 $7 (doStore $1 $3 $6) } + + -- Gah! We really want to say "foreign_results" but that causes + -- a shift/reduce conflict with assignment. We either + -- we expand out the no-result and single result cases or + -- we tweak the syntax to avoid the conflict. The later + -- option is taken here because the other way would require + -- multiple levels of expanding and get unwieldy. + | foreign_results 'foreign' STRING foreignLabel '(' cmm_hint_exprs0 ')' safety opt_never_returns ';' + {% foreignCall $3 $1 $4 $6 $8 $9 } + | foreign_results 'prim' '%' NAME '(' exprs0 ')' ';' + {% primCall $1 $4 $6 } + -- stmt-level macros, stealing syntax from ordinary C-- function calls. + -- Perhaps we ought to use the %%-form? + | NAME '(' exprs0 ')' ';' + {% stmtMacro $1 $3 } + | 'switch' maybe_range expr '{' arms default '}' + { do as <- sequence $5; doSwitch $2 $3 as $6 } + | 'goto' NAME ';' + { do l <- lookupLabel $2; emit (mkBranch l) } + | 'return' '(' exprs0 ')' ';' + { doReturn $3 } + | 'jump' expr vols ';' + { doRawJump $2 $3 } + | 'jump' expr '(' exprs0 ')' ';' + { doJumpWithStack $2 [] $4 } + | 'jump' expr '(' exprs0 ')' '(' exprs0 ')' ';' + { doJumpWithStack $2 $4 $7 } + | 'call' expr '(' exprs0 ')' ';' + { doCall $2 [] $4 } + | '(' formals ')' '=' 'call' expr '(' exprs0 ')' ';' + { doCall $6 $2 $8 } + | 'if' bool_expr cond_likely 'goto' NAME + { do l <- lookupLabel $5; cmmRawIf $2 l $3 } + | 'if' bool_expr cond_likely '{' body '}' else + { cmmIfThenElse $2 (withSourceNote $4 $6 $5) $7 $3 } + | 'push' '(' exprs0 ')' maybe_body + { pushStackFrame $3 $5 } + | 'reserve' expr '=' lreg maybe_body + { reserveStackFrame $2 $4 $5 } + | 'unwind' unwind_regs ';' + { $2 >>= code . emitUnwind } + +unwind_regs + :: { CmmParse [(GlobalReg, Maybe CmmExpr)] } + : GLOBALREG '=' expr_or_unknown ',' unwind_regs + { do e <- $3; rest <- $5; return (($1, e) : rest) } + | GLOBALREG '=' expr_or_unknown + { do e <- $3; return [($1, e)] } + +-- | Used by unwind to indicate unknown unwinding values. +expr_or_unknown + :: { CmmParse (Maybe CmmExpr) } + : 'return' + { do return Nothing } + | expr + { do e <- $1; return (Just e) } + +foreignLabel :: { CmmParse CmmExpr } + : NAME { return (CmmLit (CmmLabel (mkForeignLabel $1 Nothing ForeignLabelInThisPackage IsFunction))) } + +opt_never_returns :: { CmmReturnInfo } + : { CmmMayReturn } + | 'never' 'returns' { CmmNeverReturns } + +bool_expr :: { CmmParse BoolExpr } + : bool_op { $1 } + | expr { do e <- $1; return (BoolTest e) } + +bool_op :: { CmmParse BoolExpr } + : bool_expr '&&' bool_expr { do e1 <- $1; e2 <- $3; + return (BoolAnd e1 e2) } + | bool_expr '||' bool_expr { do e1 <- $1; e2 <- $3; + return (BoolOr e1 e2) } + | '!' bool_expr { do e <- $2; return (BoolNot e) } + | '(' bool_op ')' { $2 } + +safety :: { Safety } + : {- empty -} { PlayRisky } + | STRING {% parseSafety $1 } + +vols :: { [GlobalReg] } + : '[' ']' { [] } + | '[' '*' ']' {% do df <- getDynFlags + ; return (realArgRegsCover df) } + -- All of them. See comment attached + -- to realArgRegsCover + | '[' globals ']' { $2 } + +globals :: { [GlobalReg] } + : GLOBALREG { [$1] } + | GLOBALREG ',' globals { $1 : $3 } + +maybe_range :: { Maybe (Integer,Integer) } + : '[' INT '..' INT ']' { Just ($2, $4) } + | {- empty -} { Nothing } + +arms :: { [CmmParse ([Integer],Either BlockId (CmmParse ()))] } + : {- empty -} { [] } + | arm arms { $1 : $2 } + +arm :: { CmmParse ([Integer],Either BlockId (CmmParse ())) } + : 'case' ints ':' arm_body { do b <- $4; return ($2, b) } + +arm_body :: { CmmParse (Either BlockId (CmmParse ())) } + : '{' body '}' { return (Right (withSourceNote $1 $3 $2)) } + | 'goto' NAME ';' { do l <- lookupLabel $2; return (Left l) } + +ints :: { [Integer] } + : INT { [ $1 ] } + | INT ',' ints { $1 : $3 } + +default :: { Maybe (CmmParse ()) } + : 'default' ':' '{' body '}' { Just (withSourceNote $3 $5 $4) } + -- taking a few liberties with the C-- syntax here; C-- doesn't have + -- 'default' branches + | {- empty -} { Nothing } + +-- Note: OldCmm doesn't support a first class 'else' statement, though +-- CmmNode does. +else :: { CmmParse () } + : {- empty -} { return () } + | 'else' '{' body '}' { withSourceNote $2 $4 $3 } + +cond_likely :: { Maybe Bool } + : '(' 'likely' ':' 'True' ')' { Just True } + | '(' 'likely' ':' 'False' ')' { Just False } + | {- empty -} { Nothing } + + +-- we have to write this out longhand so that Happy's precedence rules +-- can kick in. +expr :: { CmmParse CmmExpr } + : expr '/' expr { mkMachOp MO_U_Quot [$1,$3] } + | expr '*' expr { mkMachOp MO_Mul [$1,$3] } + | expr '%' expr { mkMachOp MO_U_Rem [$1,$3] } + | expr '-' expr { mkMachOp MO_Sub [$1,$3] } + | expr '+' expr { mkMachOp MO_Add [$1,$3] } + | expr '>>' expr { mkMachOp MO_U_Shr [$1,$3] } + | expr '<<' expr { mkMachOp MO_Shl [$1,$3] } + | expr '&' expr { mkMachOp MO_And [$1,$3] } + | expr '^' expr { mkMachOp MO_Xor [$1,$3] } + | expr '|' expr { mkMachOp MO_Or [$1,$3] } + | expr '>=' expr { mkMachOp MO_U_Ge [$1,$3] } + | expr '>' expr { mkMachOp MO_U_Gt [$1,$3] } + | expr '<=' expr { mkMachOp MO_U_Le [$1,$3] } + | expr '<' expr { mkMachOp MO_U_Lt [$1,$3] } + | expr '!=' expr { mkMachOp MO_Ne [$1,$3] } + | expr '==' expr { mkMachOp MO_Eq [$1,$3] } + | '~' expr { mkMachOp MO_Not [$2] } + | '-' expr { mkMachOp MO_S_Neg [$2] } + | expr0 '`' NAME '`' expr0 {% do { mo <- nameToMachOp $3 ; + return (mkMachOp mo [$1,$5]) } } + | expr0 { $1 } + +expr0 :: { CmmParse CmmExpr } + : INT maybe_ty { return (CmmLit (CmmInt $1 (typeWidth $2))) } + | FLOAT maybe_ty { return (CmmLit (CmmFloat $1 (typeWidth $2))) } + | STRING { do s <- code (newStringCLit $1); + return (CmmLit s) } + | reg { $1 } + | type '[' expr ']' { do e <- $3; return (CmmLoad e $1) } + | '%' NAME '(' exprs0 ')' {% exprOp $2 $4 } + | '(' expr ')' { $2 } + + +-- leaving out the type of a literal gives you the native word size in C-- +maybe_ty :: { CmmType } + : {- empty -} {% do dflags <- getDynFlags; return $ bWord dflags } + | '::' type { $2 } + +cmm_hint_exprs0 :: { [CmmParse (CmmExpr, ForeignHint)] } + : {- empty -} { [] } + | cmm_hint_exprs { $1 } + +cmm_hint_exprs :: { [CmmParse (CmmExpr, ForeignHint)] } + : cmm_hint_expr { [$1] } + | cmm_hint_expr ',' cmm_hint_exprs { $1 : $3 } + +cmm_hint_expr :: { CmmParse (CmmExpr, ForeignHint) } + : expr { do e <- $1; + return (e, inferCmmHint e) } + | expr STRING {% do h <- parseCmmHint $2; + return $ do + e <- $1; return (e, h) } + +exprs0 :: { [CmmParse CmmExpr] } + : {- empty -} { [] } + | exprs { $1 } + +exprs :: { [CmmParse CmmExpr] } + : expr { [ $1 ] } + | expr ',' exprs { $1 : $3 } + +reg :: { CmmParse CmmExpr } + : NAME { lookupName $1 } + | GLOBALREG { return (CmmReg (CmmGlobal $1)) } + +foreign_results :: { [CmmParse (LocalReg, ForeignHint)] } + : {- empty -} { [] } + | '(' foreign_formals ')' '=' { $2 } + +foreign_formals :: { [CmmParse (LocalReg, ForeignHint)] } + : foreign_formal { [$1] } + | foreign_formal ',' { [$1] } + | foreign_formal ',' foreign_formals { $1 : $3 } + +foreign_formal :: { CmmParse (LocalReg, ForeignHint) } + : local_lreg { do e <- $1; return (e, inferCmmHint (CmmReg (CmmLocal e))) } + | STRING local_lreg {% do h <- parseCmmHint $1; + return $ do + e <- $2; return (e,h) } + +local_lreg :: { CmmParse LocalReg } + : NAME { do e <- lookupName $1; + return $ + case e of + CmmReg (CmmLocal r) -> r + other -> pprPanic "CmmParse:" (ftext $1 <> text " not a local register") } + +lreg :: { CmmParse CmmReg } + : NAME { do e <- lookupName $1; + return $ + case e of + CmmReg r -> r + other -> pprPanic "CmmParse:" (ftext $1 <> text " not a register") } + | GLOBALREG { return (CmmGlobal $1) } + +maybe_formals :: { Maybe [CmmParse LocalReg] } + : {- empty -} { Nothing } + | '(' formals0 ')' { Just $2 } + +formals0 :: { [CmmParse LocalReg] } + : {- empty -} { [] } + | formals { $1 } + +formals :: { [CmmParse LocalReg] } + : formal ',' { [$1] } + | formal { [$1] } + | formal ',' formals { $1 : $3 } + +formal :: { CmmParse LocalReg } + : type NAME { newLocal $1 $2 } + +type :: { CmmType } + : 'bits8' { b8 } + | typenot8 { $1 } + +typenot8 :: { CmmType } + : 'bits16' { b16 } + | 'bits32' { b32 } + | 'bits64' { b64 } + | 'bits128' { b128 } + | 'bits256' { b256 } + | 'bits512' { b512 } + | 'float32' { f32 } + | 'float64' { f64 } + | 'gcptr' {% do dflags <- getDynFlags; return $ gcWord dflags } + +{ +section :: String -> SectionType +section "text" = Text +section "data" = Data +section "rodata" = ReadOnlyData +section "relrodata" = RelocatableReadOnlyData +section "bss" = UninitialisedData +section s = OtherSection s + +mkString :: String -> CmmStatic +mkString s = CmmString (BS8.pack s) + +-- | +-- Given an info table, decide what the entry convention for the proc +-- is. That is, for an INFO_TABLE_RET we want the return convention, +-- otherwise it is a NativeNodeCall. +-- +infoConv :: Maybe CmmInfoTable -> Convention +infoConv Nothing = NativeNodeCall +infoConv (Just info) + | isStackRep (cit_rep info) = NativeReturn + | otherwise = NativeNodeCall + +-- mkMachOp infers the type of the MachOp from the type of its first +-- argument. We assume that this is correct: for MachOps that don't have +-- symmetrical args (e.g. shift ops), the first arg determines the type of +-- the op. +mkMachOp :: (Width -> MachOp) -> [CmmParse CmmExpr] -> CmmParse CmmExpr +mkMachOp fn args = do + dflags <- getDynFlags + arg_exprs <- sequence args + return (CmmMachOp (fn (typeWidth (cmmExprType dflags (head arg_exprs)))) arg_exprs) + +getLit :: CmmExpr -> CmmLit +getLit (CmmLit l) = l +getLit (CmmMachOp (MO_S_Neg _) [CmmLit (CmmInt i r)]) = CmmInt (negate i) r +getLit _ = panic "invalid literal" -- TODO messy failure + +nameToMachOp :: FastString -> PD (Width -> MachOp) +nameToMachOp name = + case lookupUFM machOps name of + Nothing -> fail ("unknown primitive " ++ unpackFS name) + Just m -> return m + +exprOp :: FastString -> [CmmParse CmmExpr] -> PD (CmmParse CmmExpr) +exprOp name args_code = do + dflags <- getDynFlags + case lookupUFM (exprMacros dflags) name of + Just f -> return $ do + args <- sequence args_code + return (f args) + Nothing -> do + mo <- nameToMachOp name + return $ mkMachOp mo args_code + +exprMacros :: DynFlags -> UniqFM ([CmmExpr] -> CmmExpr) +exprMacros dflags = listToUFM [ + ( fsLit "ENTRY_CODE", \ [x] -> entryCode dflags x ), + ( fsLit "INFO_PTR", \ [x] -> closureInfoPtr dflags x ), + ( fsLit "STD_INFO", \ [x] -> infoTable dflags x ), + ( fsLit "FUN_INFO", \ [x] -> funInfoTable dflags x ), + ( fsLit "GET_ENTRY", \ [x] -> entryCode dflags (closureInfoPtr dflags x) ), + ( fsLit "GET_STD_INFO", \ [x] -> infoTable dflags (closureInfoPtr dflags x) ), + ( fsLit "GET_FUN_INFO", \ [x] -> funInfoTable dflags (closureInfoPtr dflags x) ), + ( fsLit "INFO_TYPE", \ [x] -> infoTableClosureType dflags x ), + ( fsLit "INFO_PTRS", \ [x] -> infoTablePtrs dflags x ), + ( fsLit "INFO_NPTRS", \ [x] -> infoTableNonPtrs dflags x ) + ] + +-- we understand a subset of C-- primitives: +machOps = listToUFM $ + map (\(x, y) -> (mkFastString x, y)) [ + ( "add", MO_Add ), + ( "sub", MO_Sub ), + ( "eq", MO_Eq ), + ( "ne", MO_Ne ), + ( "mul", MO_Mul ), + ( "neg", MO_S_Neg ), + ( "quot", MO_S_Quot ), + ( "rem", MO_S_Rem ), + ( "divu", MO_U_Quot ), + ( "modu", MO_U_Rem ), + + ( "ge", MO_S_Ge ), + ( "le", MO_S_Le ), + ( "gt", MO_S_Gt ), + ( "lt", MO_S_Lt ), + + ( "geu", MO_U_Ge ), + ( "leu", MO_U_Le ), + ( "gtu", MO_U_Gt ), + ( "ltu", MO_U_Lt ), + + ( "and", MO_And ), + ( "or", MO_Or ), + ( "xor", MO_Xor ), + ( "com", MO_Not ), + ( "shl", MO_Shl ), + ( "shrl", MO_U_Shr ), + ( "shra", MO_S_Shr ), + + ( "fadd", MO_F_Add ), + ( "fsub", MO_F_Sub ), + ( "fneg", MO_F_Neg ), + ( "fmul", MO_F_Mul ), + ( "fquot", MO_F_Quot ), + + ( "feq", MO_F_Eq ), + ( "fne", MO_F_Ne ), + ( "fge", MO_F_Ge ), + ( "fle", MO_F_Le ), + ( "fgt", MO_F_Gt ), + ( "flt", MO_F_Lt ), + + ( "lobits8", flip MO_UU_Conv W8 ), + ( "lobits16", flip MO_UU_Conv W16 ), + ( "lobits32", flip MO_UU_Conv W32 ), + ( "lobits64", flip MO_UU_Conv W64 ), + + ( "zx16", flip MO_UU_Conv W16 ), + ( "zx32", flip MO_UU_Conv W32 ), + ( "zx64", flip MO_UU_Conv W64 ), + + ( "sx16", flip MO_SS_Conv W16 ), + ( "sx32", flip MO_SS_Conv W32 ), + ( "sx64", flip MO_SS_Conv W64 ), + + ( "f2f32", flip MO_FF_Conv W32 ), -- TODO; rounding mode + ( "f2f64", flip MO_FF_Conv W64 ), -- TODO; rounding mode + ( "f2i8", flip MO_FS_Conv W8 ), + ( "f2i16", flip MO_FS_Conv W16 ), + ( "f2i32", flip MO_FS_Conv W32 ), + ( "f2i64", flip MO_FS_Conv W64 ), + ( "i2f32", flip MO_SF_Conv W32 ), + ( "i2f64", flip MO_SF_Conv W64 ) + ] + +callishMachOps :: UniqFM ([CmmExpr] -> (CallishMachOp, [CmmExpr])) +callishMachOps = listToUFM $ + map (\(x, y) -> (mkFastString x, y)) [ + ( "read_barrier", (MO_ReadBarrier,)), + ( "write_barrier", (MO_WriteBarrier,)), + ( "memcpy", memcpyLikeTweakArgs MO_Memcpy ), + ( "memset", memcpyLikeTweakArgs MO_Memset ), + ( "memmove", memcpyLikeTweakArgs MO_Memmove ), + ( "memcmp", memcpyLikeTweakArgs MO_Memcmp ), + + ("prefetch0", (MO_Prefetch_Data 0,)), + ("prefetch1", (MO_Prefetch_Data 1,)), + ("prefetch2", (MO_Prefetch_Data 2,)), + ("prefetch3", (MO_Prefetch_Data 3,)), + + ( "popcnt8", (MO_PopCnt W8,)), + ( "popcnt16", (MO_PopCnt W16,)), + ( "popcnt32", (MO_PopCnt W32,)), + ( "popcnt64", (MO_PopCnt W64,)), + + ( "pdep8", (MO_Pdep W8,)), + ( "pdep16", (MO_Pdep W16,)), + ( "pdep32", (MO_Pdep W32,)), + ( "pdep64", (MO_Pdep W64,)), + + ( "pext8", (MO_Pext W8,)), + ( "pext16", (MO_Pext W16,)), + ( "pext32", (MO_Pext W32,)), + ( "pext64", (MO_Pext W64,)), + + ( "cmpxchg8", (MO_Cmpxchg W8,)), + ( "cmpxchg16", (MO_Cmpxchg W16,)), + ( "cmpxchg32", (MO_Cmpxchg W32,)), + ( "cmpxchg64", (MO_Cmpxchg W64,)) + + -- ToDo: the rest, maybe + -- edit: which rest? + -- also: how do we tell CMM Lint how to type check callish macops? + ] + where + memcpyLikeTweakArgs :: (Int -> CallishMachOp) -> [CmmExpr] -> (CallishMachOp, [CmmExpr]) + memcpyLikeTweakArgs op [] = pgmError "memcpy-like function requires at least one argument" + memcpyLikeTweakArgs op args@(_:_) = + (op align, args') + where + args' = init args + align = case last args of + CmmLit (CmmInt alignInteger _) -> fromInteger alignInteger + e -> pprPgmError "Non-constant alignment in memcpy-like function:" (ppr e) + -- The alignment of memcpy-ish operations must be a + -- compile-time constant. We verify this here, passing it around + -- in the MO_* constructor. In order to do this, however, we + -- must intercept the arguments in primCall. + +parseSafety :: String -> PD Safety +parseSafety "safe" = return PlaySafe +parseSafety "unsafe" = return PlayRisky +parseSafety "interruptible" = return PlayInterruptible +parseSafety str = fail ("unrecognised safety: " ++ str) + +parseCmmHint :: String -> PD ForeignHint +parseCmmHint "ptr" = return AddrHint +parseCmmHint "signed" = return SignedHint +parseCmmHint str = fail ("unrecognised hint: " ++ str) + +-- labels are always pointers, so we might as well infer the hint +inferCmmHint :: CmmExpr -> ForeignHint +inferCmmHint (CmmLit (CmmLabel _)) = AddrHint +inferCmmHint (CmmReg (CmmGlobal g)) | isPtrGlobalReg g = AddrHint +inferCmmHint _ = NoHint + +isPtrGlobalReg Sp = True +isPtrGlobalReg SpLim = True +isPtrGlobalReg Hp = True +isPtrGlobalReg HpLim = True +isPtrGlobalReg CCCS = True +isPtrGlobalReg CurrentTSO = True +isPtrGlobalReg CurrentNursery = True +isPtrGlobalReg (VanillaReg _ VGcPtr) = True +isPtrGlobalReg _ = False + +happyError :: PD a +happyError = PD $ \_ s -> unP srcParseFail s + +-- ----------------------------------------------------------------------------- +-- Statement-level macros + +stmtMacro :: FastString -> [CmmParse CmmExpr] -> PD (CmmParse ()) +stmtMacro fun args_code = do + case lookupUFM stmtMacros fun of + Nothing -> fail ("unknown macro: " ++ unpackFS fun) + Just fcode -> return $ do + args <- sequence args_code + code (fcode args) + +stmtMacros :: UniqFM ([CmmExpr] -> FCode ()) +stmtMacros = listToUFM [ + ( fsLit "CCS_ALLOC", \[words,ccs] -> profAlloc words ccs ), + ( fsLit "ENTER_CCS_THUNK", \[e] -> enterCostCentreThunk e ), + + ( fsLit "CLOSE_NURSERY", \[] -> emitCloseNursery ), + ( fsLit "OPEN_NURSERY", \[] -> emitOpenNursery ), + + -- completely generic heap and stack checks, for use in high-level cmm. + ( fsLit "HP_CHK_GEN", \[bytes] -> + heapStackCheckGen Nothing (Just bytes) ), + ( fsLit "STK_CHK_GEN", \[] -> + heapStackCheckGen (Just (CmmLit CmmHighStackMark)) Nothing ), + + -- A stack check for a fixed amount of stack. Sounds a bit strange, but + -- we use the stack for a bit of temporary storage in a couple of primops + ( fsLit "STK_CHK_GEN_N", \[bytes] -> + heapStackCheckGen (Just bytes) Nothing ), + + -- A stack check on entry to a thunk, where the argument is the thunk pointer. + ( fsLit "STK_CHK_NP" , \[node] -> entryHeapCheck' False node 0 [] (return ())), + + ( fsLit "LOAD_THREAD_STATE", \[] -> emitLoadThreadState ), + ( fsLit "SAVE_THREAD_STATE", \[] -> emitSaveThreadState ), + + ( fsLit "LDV_ENTER", \[e] -> ldvEnter e ), + ( fsLit "LDV_RECORD_CREATE", \[e] -> ldvRecordCreate e ), + + ( fsLit "PUSH_UPD_FRAME", \[sp,e] -> emitPushUpdateFrame sp e ), + ( fsLit "SET_HDR", \[ptr,info,ccs] -> + emitSetDynHdr ptr info ccs ), + ( fsLit "TICK_ALLOC_PRIM", \[hdr,goods,slop] -> + tickyAllocPrim hdr goods slop ), + ( fsLit "TICK_ALLOC_PAP", \[goods,slop] -> + tickyAllocPAP goods slop ), + ( fsLit "TICK_ALLOC_UP_THK", \[goods,slop] -> + tickyAllocThunk goods slop ), + ( fsLit "UPD_BH_UPDATABLE", \[reg] -> emitBlackHoleCode reg ) + ] + +emitPushUpdateFrame :: CmmExpr -> CmmExpr -> FCode () +emitPushUpdateFrame sp e = do + dflags <- getDynFlags + emitUpdateFrame dflags sp mkUpdInfoLabel e + +pushStackFrame :: [CmmParse CmmExpr] -> CmmParse () -> CmmParse () +pushStackFrame fields body = do + dflags <- getDynFlags + exprs <- sequence fields + updfr_off <- getUpdFrameOff + let (new_updfr_off, _, g) = copyOutOflow dflags NativeReturn Ret Old + [] updfr_off exprs + emit g + withUpdFrameOff new_updfr_off body + +reserveStackFrame + :: CmmParse CmmExpr + -> CmmParse CmmReg + -> CmmParse () + -> CmmParse () +reserveStackFrame psize preg body = do + dflags <- getDynFlags + old_updfr_off <- getUpdFrameOff + reg <- preg + esize <- psize + let size = case constantFoldExpr dflags esize of + CmmLit (CmmInt n _) -> n + _other -> pprPanic "CmmParse: not a compile-time integer: " + (ppr esize) + let frame = old_updfr_off + wORD_SIZE dflags * fromIntegral size + emitAssign reg (CmmStackSlot Old frame) + withUpdFrameOff frame body + +profilingInfo dflags desc_str ty_str + = if not (gopt Opt_SccProfilingOn dflags) + then NoProfilingInfo + else ProfilingInfo (BS8.pack desc_str) (BS8.pack ty_str) + +staticClosure :: UnitId -> FastString -> FastString -> [CmmLit] -> CmmParse () +staticClosure pkg cl_label info payload + = do dflags <- getDynFlags + let lits = mkStaticClosure dflags (mkCmmInfoLabel pkg info) dontCareCCS payload [] [] [] + code $ emitDataLits (mkCmmDataLabel pkg cl_label) lits + +foreignCall + :: String + -> [CmmParse (LocalReg, ForeignHint)] + -> CmmParse CmmExpr + -> [CmmParse (CmmExpr, ForeignHint)] + -> Safety + -> CmmReturnInfo + -> PD (CmmParse ()) +foreignCall conv_string results_code expr_code args_code safety ret + = do conv <- case conv_string of + "C" -> return CCallConv + "stdcall" -> return StdCallConv + _ -> fail ("unknown calling convention: " ++ conv_string) + return $ do + dflags <- getDynFlags + results <- sequence results_code + expr <- expr_code + args <- sequence args_code + let + expr' = adjCallTarget dflags conv expr args + (arg_exprs, arg_hints) = unzip args + (res_regs, res_hints) = unzip results + fc = ForeignConvention conv arg_hints res_hints ret + target = ForeignTarget expr' fc + _ <- code $ emitForeignCall safety res_regs target arg_exprs + return () + + +doReturn :: [CmmParse CmmExpr] -> CmmParse () +doReturn exprs_code = do + dflags <- getDynFlags + exprs <- sequence exprs_code + updfr_off <- getUpdFrameOff + emit (mkReturnSimple dflags exprs updfr_off) + +mkReturnSimple :: DynFlags -> [CmmActual] -> UpdFrameOffset -> CmmAGraph +mkReturnSimple dflags actuals updfr_off = + mkReturn dflags e actuals updfr_off + where e = entryCode dflags (CmmLoad (CmmStackSlot Old updfr_off) + (gcWord dflags)) + +doRawJump :: CmmParse CmmExpr -> [GlobalReg] -> CmmParse () +doRawJump expr_code vols = do + dflags <- getDynFlags + expr <- expr_code + updfr_off <- getUpdFrameOff + emit (mkRawJump dflags expr updfr_off vols) + +doJumpWithStack :: CmmParse CmmExpr -> [CmmParse CmmExpr] + -> [CmmParse CmmExpr] -> CmmParse () +doJumpWithStack expr_code stk_code args_code = do + dflags <- getDynFlags + expr <- expr_code + stk_args <- sequence stk_code + args <- sequence args_code + updfr_off <- getUpdFrameOff + emit (mkJumpExtra dflags NativeNodeCall expr args updfr_off stk_args) + +doCall :: CmmParse CmmExpr -> [CmmParse LocalReg] -> [CmmParse CmmExpr] + -> CmmParse () +doCall expr_code res_code args_code = do + dflags <- getDynFlags + expr <- expr_code + args <- sequence args_code + ress <- sequence res_code + updfr_off <- getUpdFrameOff + c <- code $ mkCall expr (NativeNodeCall,NativeReturn) ress args updfr_off [] + emit c + +adjCallTarget :: DynFlags -> CCallConv -> CmmExpr -> [(CmmExpr, ForeignHint) ] + -> CmmExpr +-- On Windows, we have to add the '@N' suffix to the label when making +-- a call with the stdcall calling convention. +adjCallTarget dflags StdCallConv (CmmLit (CmmLabel lbl)) args + | platformOS (targetPlatform dflags) == OSMinGW32 + = CmmLit (CmmLabel (addLabelSize lbl (sum (map size args)))) + where size (e, _) = max (wORD_SIZE dflags) (widthInBytes (typeWidth (cmmExprType dflags e))) + -- c.f. CgForeignCall.emitForeignCall +adjCallTarget _ _ expr _ + = expr + +primCall + :: [CmmParse (CmmFormal, ForeignHint)] + -> FastString + -> [CmmParse CmmExpr] + -> PD (CmmParse ()) +primCall results_code name args_code + = case lookupUFM callishMachOps name of + Nothing -> fail ("unknown primitive " ++ unpackFS name) + Just f -> return $ do + results <- sequence results_code + args <- sequence args_code + let (p, args') = f args + code (emitPrimCall (map fst results) p args') + +doStore :: CmmType -> CmmParse CmmExpr -> CmmParse CmmExpr -> CmmParse () +doStore rep addr_code val_code + = do dflags <- getDynFlags + addr <- addr_code + val <- val_code + -- if the specified store type does not match the type of the expr + -- on the rhs, then we insert a coercion that will cause the type + -- mismatch to be flagged by cmm-lint. If we don't do this, then + -- the store will happen at the wrong type, and the error will not + -- be noticed. + let val_width = typeWidth (cmmExprType dflags val) + rep_width = typeWidth rep + let coerce_val + | val_width /= rep_width = CmmMachOp (MO_UU_Conv val_width rep_width) [val] + | otherwise = val + emitStore addr coerce_val + +-- ----------------------------------------------------------------------------- +-- If-then-else and boolean expressions + +data BoolExpr + = BoolExpr `BoolAnd` BoolExpr + | BoolExpr `BoolOr` BoolExpr + | BoolNot BoolExpr + | BoolTest CmmExpr + +-- ToDo: smart constructors which simplify the boolean expression. + +cmmIfThenElse cond then_part else_part likely = do + then_id <- newBlockId + join_id <- newBlockId + c <- cond + emitCond c then_id likely + else_part + emit (mkBranch join_id) + emitLabel then_id + then_part + -- fall through to join + emitLabel join_id + +cmmRawIf cond then_id likely = do + c <- cond + emitCond c then_id likely + +-- 'emitCond cond true_id' emits code to test whether the cond is true, +-- branching to true_id if so, and falling through otherwise. +emitCond (BoolTest e) then_id likely = do + else_id <- newBlockId + emit (mkCbranch e then_id else_id likely) + emitLabel else_id +emitCond (BoolNot (BoolTest (CmmMachOp op args))) then_id likely + | Just op' <- maybeInvertComparison op + = emitCond (BoolTest (CmmMachOp op' args)) then_id (not <$> likely) +emitCond (BoolNot e) then_id likely = do + else_id <- newBlockId + emitCond e else_id likely + emit (mkBranch then_id) + emitLabel else_id +emitCond (e1 `BoolOr` e2) then_id likely = do + emitCond e1 then_id likely + emitCond e2 then_id likely +emitCond (e1 `BoolAnd` e2) then_id likely = do + -- we'd like to invert one of the conditionals here to avoid an + -- extra branch instruction, but we can't use maybeInvertComparison + -- here because we can't look too closely at the expression since + -- we're in a loop. + and_id <- newBlockId + else_id <- newBlockId + emitCond e1 and_id likely + emit (mkBranch else_id) + emitLabel and_id + emitCond e2 then_id likely + emitLabel else_id + +-- ----------------------------------------------------------------------------- +-- Source code notes + +-- | Generate a source note spanning from "a" to "b" (inclusive), then +-- proceed with parsing. This allows debugging tools to reason about +-- locations in Cmm code. +withSourceNote :: Located a -> Located b -> CmmParse c -> CmmParse c +withSourceNote a b parse = do + name <- getName + case combineSrcSpans (getLoc a) (getLoc b) of + RealSrcSpan span -> code (emitTick (SourceNote span name)) >> parse + _other -> parse + +-- ----------------------------------------------------------------------------- +-- Table jumps + +-- We use a simplified form of C-- switch statements for now. A +-- switch statement always compiles to a table jump. Each arm can +-- specify a list of values (not ranges), and there can be a single +-- default branch. The range of the table is given either by the +-- optional range on the switch (eg. switch [0..7] {...}), or by +-- the minimum/maximum values from the branches. + +doSwitch :: Maybe (Integer,Integer) + -> CmmParse CmmExpr + -> [([Integer],Either BlockId (CmmParse ()))] + -> Maybe (CmmParse ()) -> CmmParse () +doSwitch mb_range scrut arms deflt + = do + -- Compile code for the default branch + dflt_entry <- + case deflt of + Nothing -> return Nothing + Just e -> do b <- forkLabelledCode e; return (Just b) + + -- Compile each case branch + table_entries <- mapM emitArm arms + let table = M.fromList (concat table_entries) + + dflags <- getDynFlags + let range = fromMaybe (0, tARGET_MAX_WORD dflags) mb_range + + expr <- scrut + -- ToDo: check for out of range and jump to default if necessary + emit $ mkSwitch expr (mkSwitchTargets False range dflt_entry table) + where + emitArm :: ([Integer],Either BlockId (CmmParse ())) -> CmmParse [(Integer,BlockId)] + emitArm (ints,Left blockid) = return [ (i,blockid) | i <- ints ] + emitArm (ints,Right code) = do + blockid <- forkLabelledCode code + return [ (i,blockid) | i <- ints ] + +forkLabelledCode :: CmmParse () -> CmmParse BlockId +forkLabelledCode p = do + (_,ag) <- getCodeScoped p + l <- newBlockId + emitOutOfLine l ag + return l + +-- ----------------------------------------------------------------------------- +-- Putting it all together + +-- The initial environment: we define some constants that the compiler +-- knows about here. +initEnv :: DynFlags -> Env +initEnv dflags = listToUFM [ + ( fsLit "SIZEOF_StgHeader", + VarN (CmmLit (CmmInt (fromIntegral (fixedHdrSize dflags)) (wordWidth dflags)) )), + ( fsLit "SIZEOF_StgInfoTable", + VarN (CmmLit (CmmInt (fromIntegral (stdInfoTableSizeB dflags)) (wordWidth dflags)) )) + ] + +parseCmmFile :: DynFlags -> FilePath -> IO (Messages, Maybe CmmGroup) +parseCmmFile dflags filename = withTiming dflags (text "ParseCmm"<+>brackets (text filename)) (\_ -> ()) $ do + buf <- hGetStringBuffer filename + let + init_loc = mkRealSrcLoc (mkFastString filename) 1 1 + init_state = (mkPState dflags buf init_loc) { lex_state = [0] } + -- reset the lex_state: the Lexer monad leaves some stuff + -- in there we don't want. + case unPD cmmParse dflags init_state of + PFailed pst -> + return (getMessages pst dflags, Nothing) + POk pst code -> do + st <- initC + let fcode = getCmm $ unEC code "global" (initEnv dflags) [] >> return () + (cmm,_) = runC dflags no_module st fcode + let ms = getMessages pst dflags + if (errorsFound dflags ms) + then return (ms, Nothing) + else return (ms, Just cmm) + where + no_module = panic "parseCmmFile: no module" +} diff --git a/compiler/GHC/Cmm/Pipeline.hs b/compiler/GHC/Cmm/Pipeline.hs new file mode 100644 index 0000000000..6db9e23ee1 --- /dev/null +++ b/compiler/GHC/Cmm/Pipeline.hs @@ -0,0 +1,367 @@ +{-# LANGUAGE BangPatterns #-} + +module GHC.Cmm.Pipeline ( + -- | Converts C-- with an implicit stack and native C-- calls into + -- optimized, CPS converted and native-call-less C--. The latter + -- C-- can be used to generate assembly. + cmmPipeline +) where + +import GhcPrelude + +import GHC.Cmm +import GHC.Cmm.Lint +import GHC.Cmm.Info.Build +import GHC.Cmm.CommonBlockElim +import GHC.Cmm.Switch.Implement +import GHC.Cmm.ProcPoint +import GHC.Cmm.ContFlowOpt +import GHC.Cmm.LayoutStack +import GHC.Cmm.Sink +import GHC.Cmm.Dataflow.Collections + +import UniqSupply +import DynFlags +import ErrUtils +import HscTypes +import Control.Monad +import Outputable +import GHC.Platform + +----------------------------------------------------------------------------- +-- | Top level driver for C-- pipeline +----------------------------------------------------------------------------- + +cmmPipeline + :: HscEnv -- Compilation env including + -- dynamic flags: -dcmm-lint -ddump-cmm-cps + -> ModuleSRTInfo -- Info about SRTs generated so far + -> CmmGroup -- Input C-- with Procedures + -> IO (ModuleSRTInfo, CmmGroup) -- Output CPS transformed C-- + +cmmPipeline hsc_env srtInfo prog = withTimingSilent dflags (text "Cmm pipeline") forceRes $ + do let dflags = hsc_dflags hsc_env + + tops <- {-# SCC "tops" #-} mapM (cpsTop hsc_env) prog + + (srtInfo, cmms) <- {-# SCC "doSRTs" #-} doSRTs dflags srtInfo tops + dumpWith dflags Opt_D_dump_cmm_cps "Post CPS Cmm" FormatCMM (ppr cmms) + + return (srtInfo, cmms) + + where forceRes (info, group) = + info `seq` foldr (\decl r -> decl `seq` r) () group + + dflags = hsc_dflags hsc_env + +cpsTop :: HscEnv -> CmmDecl -> IO (CAFEnv, [CmmDecl]) +cpsTop _ p@(CmmData {}) = return (mapEmpty, [p]) +cpsTop hsc_env proc = + do + ----------- Control-flow optimisations ---------------------------------- + + -- The first round of control-flow optimisation speeds up the + -- later passes by removing lots of empty blocks, so we do it + -- even when optimisation isn't turned on. + -- + CmmProc h l v g <- {-# SCC "cmmCfgOpts(1)" #-} + return $ cmmCfgOptsProc splitting_proc_points proc + dump Opt_D_dump_cmm_cfg "Post control-flow optimisations" g + + let !TopInfo {stack_info=StackInfo { arg_space = entry_off + , do_layout = do_layout }} = h + + ----------- Eliminate common blocks ------------------------------------- + g <- {-# SCC "elimCommonBlocks" #-} + condPass Opt_CmmElimCommonBlocks elimCommonBlocks g + Opt_D_dump_cmm_cbe "Post common block elimination" + + -- Any work storing block Labels must be performed _after_ + -- elimCommonBlocks + + ----------- Implement switches ------------------------------------------ + g <- {-# SCC "createSwitchPlans" #-} + runUniqSM $ cmmImplementSwitchPlans dflags g + dump Opt_D_dump_cmm_switch "Post switch plan" g + + ----------- Proc points ------------------------------------------------- + let call_pps = {-# SCC "callProcPoints" #-} callProcPoints g + proc_points <- + if splitting_proc_points + then do + pp <- {-# SCC "minimalProcPointSet" #-} runUniqSM $ + minimalProcPointSet (targetPlatform dflags) call_pps g + dumpWith dflags Opt_D_dump_cmm_proc "Proc points" + FormatCMM (ppr l $$ ppr pp $$ ppr g) + return pp + else + return call_pps + + ----------- Layout the stack and manifest Sp ---------------------------- + (g, stackmaps) <- + {-# SCC "layoutStack" #-} + if do_layout + then runUniqSM $ cmmLayoutStack dflags proc_points entry_off g + else return (g, mapEmpty) + dump Opt_D_dump_cmm_sp "Layout Stack" g + + ----------- Sink and inline assignments -------------------------------- + g <- {-# SCC "sink" #-} -- See Note [Sinking after stack layout] + condPass Opt_CmmSink (cmmSink dflags) g + Opt_D_dump_cmm_sink "Sink assignments" + + ------------- CAF analysis ---------------------------------------------- + let cafEnv = {-# SCC "cafAnal" #-} cafAnal call_pps l g + dumpWith dflags Opt_D_dump_cmm_caf "CAFEnv" FormatText (ppr cafEnv) + + g <- if splitting_proc_points + then do + ------------- Split into separate procedures ----------------------- + let pp_map = {-# SCC "procPointAnalysis" #-} + procPointAnalysis proc_points g + dumpWith dflags Opt_D_dump_cmm_procmap "procpoint map" + FormatCMM (ppr pp_map) + g <- {-# SCC "splitAtProcPoints" #-} runUniqSM $ + splitAtProcPoints dflags l call_pps proc_points pp_map + (CmmProc h l v g) + dumps Opt_D_dump_cmm_split "Post splitting" g + return g + else do + -- attach info tables to return points + return $ [attachContInfoTables call_pps (CmmProc h l v g)] + + ------------- Populate info tables with stack info ----------------- + g <- {-# SCC "setInfoTableStackMap" #-} + return $ map (setInfoTableStackMap dflags stackmaps) g + dumps Opt_D_dump_cmm_info "after setInfoTableStackMap" g + + ----------- Control-flow optimisations ----------------------------- + g <- {-# SCC "cmmCfgOpts(2)" #-} + return $ if optLevel dflags >= 1 + then map (cmmCfgOptsProc splitting_proc_points) g + else g + g <- return (map removeUnreachableBlocksProc g) + -- See Note [unreachable blocks] + dumps Opt_D_dump_cmm_cfg "Post control-flow optimisations" g + + return (cafEnv, g) + + where dflags = hsc_dflags hsc_env + platform = targetPlatform dflags + dump = dumpGraph dflags + + dumps flag name + = mapM_ (dumpWith dflags flag name FormatCMM . ppr) + + condPass flag pass g dumpflag dumpname = + if gopt flag dflags + then do + g <- return $ pass g + dump dumpflag dumpname g + return g + else return g + + -- we don't need to split proc points for the NCG, unless + -- tablesNextToCode is off. The latter is because we have no + -- label to put on info tables for basic blocks that are not + -- the entry point. + splitting_proc_points = hscTarget dflags /= HscAsm + || not (tablesNextToCode dflags) + || -- Note [inconsistent-pic-reg] + usingInconsistentPicReg + usingInconsistentPicReg + = case (platformArch platform, platformOS platform, positionIndependent dflags) + of (ArchX86, OSDarwin, pic) -> pic + _ -> False + +-- Note [Sinking after stack layout] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- In the past we considered running sinking pass also before stack +-- layout, but after making some measurements we realized that: +-- +-- a) running sinking only before stack layout produces slower +-- code than running sinking only before stack layout +-- +-- b) running sinking both before and after stack layout produces +-- code that has the same performance as when running sinking +-- only after stack layout. +-- +-- In other words sinking before stack layout doesn't buy as anything. +-- +-- An interesting question is "why is it better to run sinking after +-- stack layout"? It seems that the major reason are stores and loads +-- generated by stack layout. Consider this code before stack layout: +-- +-- c1E: +-- _c1C::P64 = R3; +-- _c1B::P64 = R2; +-- _c1A::P64 = R1; +-- I64[(young<c1D> + 8)] = c1D; +-- call stg_gc_noregs() returns to c1D, args: 8, res: 8, upd: 8; +-- c1D: +-- R3 = _c1C::P64; +-- R2 = _c1B::P64; +-- R1 = _c1A::P64; +-- call (P64[(old + 8)])(R3, R2, R1) args: 8, res: 0, upd: 8; +-- +-- Stack layout pass will save all local variables live across a call +-- (_c1C, _c1B and _c1A in this example) on the stack just before +-- making a call and reload them from the stack after returning from a +-- call: +-- +-- c1E: +-- _c1C::P64 = R3; +-- _c1B::P64 = R2; +-- _c1A::P64 = R1; +-- I64[Sp - 32] = c1D; +-- P64[Sp - 24] = _c1A::P64; +-- P64[Sp - 16] = _c1B::P64; +-- P64[Sp - 8] = _c1C::P64; +-- Sp = Sp - 32; +-- call stg_gc_noregs() returns to c1D, args: 8, res: 8, upd: 8; +-- c1D: +-- _c1A::P64 = P64[Sp + 8]; +-- _c1B::P64 = P64[Sp + 16]; +-- _c1C::P64 = P64[Sp + 24]; +-- R3 = _c1C::P64; +-- R2 = _c1B::P64; +-- R1 = _c1A::P64; +-- Sp = Sp + 32; +-- call (P64[Sp])(R3, R2, R1) args: 8, res: 0, upd: 8; +-- +-- If we don't run sinking pass after stack layout we are basically +-- left with such code. However, running sinking on this code can lead +-- to significant improvements: +-- +-- c1E: +-- I64[Sp - 32] = c1D; +-- P64[Sp - 24] = R1; +-- P64[Sp - 16] = R2; +-- P64[Sp - 8] = R3; +-- Sp = Sp - 32; +-- call stg_gc_noregs() returns to c1D, args: 8, res: 8, upd: 8; +-- c1D: +-- R3 = P64[Sp + 24]; +-- R2 = P64[Sp + 16]; +-- R1 = P64[Sp + 8]; +-- Sp = Sp + 32; +-- call (P64[Sp])(R3, R2, R1) args: 8, res: 0, upd: 8; +-- +-- Now we only have 9 assignments instead of 15. +-- +-- There is one case when running sinking before stack layout could +-- be beneficial. Consider this: +-- +-- L1: +-- x = y +-- call f() returns L2 +-- L2: ...x...y... +-- +-- Since both x and y are live across a call to f, they will be stored +-- on the stack during stack layout and restored after the call: +-- +-- L1: +-- x = y +-- P64[Sp - 24] = L2 +-- P64[Sp - 16] = x +-- P64[Sp - 8] = y +-- Sp = Sp - 24 +-- call f() returns L2 +-- L2: +-- y = P64[Sp + 16] +-- x = P64[Sp + 8] +-- Sp = Sp + 24 +-- ...x...y... +-- +-- However, if we run sinking before stack layout we would propagate x +-- to its usage place (both x and y must be local register for this to +-- be possible - global registers cannot be floated past a call): +-- +-- L1: +-- x = y +-- call f() returns L2 +-- L2: ...y...y... +-- +-- Thus making x dead at the call to f(). If we ran stack layout now +-- we would generate less stores and loads: +-- +-- L1: +-- x = y +-- P64[Sp - 16] = L2 +-- P64[Sp - 8] = y +-- Sp = Sp - 16 +-- call f() returns L2 +-- L2: +-- y = P64[Sp + 8] +-- Sp = Sp + 16 +-- ...y...y... +-- +-- But since we don't see any benefits from running sinking before stack +-- layout, this situation probably doesn't arise too often in practice. +-- + +{- Note [inconsistent-pic-reg] + +On x86/Darwin, PIC is implemented by inserting a sequence like + + call 1f + 1: popl %reg + +at the proc entry point, and then referring to labels as offsets from +%reg. If we don't split proc points, then we could have many entry +points in a proc that would need this sequence, and each entry point +would then get a different value for %reg. If there are any join +points, then at the join point we don't have a consistent value for +%reg, so we don't know how to refer to labels. + +Hence, on x86/Darwin, we have to split proc points, and then each proc +point will get its own PIC initialisation sequence. + +This isn't an issue on x86/ELF, where the sequence is + + call 1f + 1: popl %reg + addl $_GLOBAL_OFFSET_TABLE_+(.-1b), %reg + +so %reg always has a consistent value: the address of +_GLOBAL_OFFSET_TABLE_, regardless of which entry point we arrived via. + +-} + +{- Note [unreachable blocks] + +The control-flow optimiser sometimes leaves unreachable blocks behind +containing junk code. These aren't necessarily a problem, but +removing them is good because it might save time in the native code +generator later. + +-} + +runUniqSM :: UniqSM a -> IO a +runUniqSM m = do + us <- mkSplitUniqSupply 'u' + return (initUs_ us m) + + +dumpGraph :: DynFlags -> DumpFlag -> String -> CmmGraph -> IO () +dumpGraph dflags flag name g = do + when (gopt Opt_DoCmmLinting dflags) $ do_lint g + dumpWith dflags flag name FormatCMM (ppr g) + where + do_lint g = case cmmLintGraph dflags g of + Just err -> do { fatalErrorMsg dflags err + ; ghcExit dflags 1 + } + Nothing -> return () + +dumpWith :: DynFlags -> DumpFlag -> String -> DumpFormat -> SDoc -> IO () +dumpWith dflags flag txt fmt sdoc = do + dumpIfSet_dyn dflags flag txt fmt sdoc + when (not (dopt flag dflags)) $ + -- If `-ddump-cmm-verbose -ddump-to-file` is specified, + -- dump each Cmm pipeline stage output to a separate file. #16930 + when (dopt Opt_D_dump_cmm_verbose dflags) + $ dumpAction dflags (mkDumpStyle dflags alwaysQualify) + (dumpOptionsFromFlag flag) txt fmt sdoc + dumpIfSet_dyn dflags Opt_D_dump_cmm_verbose_by_proc txt fmt sdoc diff --git a/compiler/GHC/Cmm/Ppr.hs b/compiler/GHC/Cmm/Ppr.hs new file mode 100644 index 0000000000..891cbd9c6d --- /dev/null +++ b/compiler/GHC/Cmm/Ppr.hs @@ -0,0 +1,309 @@ +{-# LANGUAGE GADTs, TypeFamilies, FlexibleContexts, FlexibleInstances #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} + +---------------------------------------------------------------------------- +-- +-- Pretty-printing of Cmm as (a superset of) C-- +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- +-- +-- This is where we walk over CmmNode emitting an external representation, +-- suitable for parsing, in a syntax strongly reminiscent of C--. This +-- is the "External Core" for the Cmm layer. +-- +-- As such, this should be a well-defined syntax: we want it to look nice. +-- Thus, we try wherever possible to use syntax defined in [1], +-- "The C-- Reference Manual", http://www.cs.tufts.edu/~nr/c--/index.html. We +-- differ slightly, in some cases. For one, we use I8 .. I64 for types, rather +-- than C--'s bits8 .. bits64. +-- +-- We try to ensure that all information available in the abstract +-- syntax is reproduced, or reproducible, in the concrete syntax. +-- Data that is not in printed out can be reconstructed according to +-- conventions used in the pretty printer. There are at least two such +-- cases: +-- 1) if a value has wordRep type, the type is not appended in the +-- output. +-- 2) MachOps that operate over wordRep type are printed in a +-- C-style, rather than as their internal MachRep name. +-- +-- These conventions produce much more readable Cmm output. +-- +-- A useful example pass over Cmm is in nativeGen/MachCodeGen.hs + +module GHC.Cmm.Ppr + ( module GHC.Cmm.Ppr.Decl + , module GHC.Cmm.Ppr.Expr + ) +where + +import GhcPrelude hiding (succ) + +import GHC.Cmm.CLabel +import GHC.Cmm +import GHC.Cmm.Utils +import GHC.Cmm.Switch +import DynFlags +import FastString +import Outputable +import GHC.Cmm.Ppr.Decl +import GHC.Cmm.Ppr.Expr +import Util + +import BasicTypes +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.Dataflow.Graph + +------------------------------------------------- +-- Outputable instances + +instance Outputable CmmStackInfo where + ppr = pprStackInfo + +instance Outputable CmmTopInfo where + ppr = pprTopInfo + + +instance Outputable (CmmNode e x) where + ppr = pprNode + +instance Outputable Convention where + ppr = pprConvention + +instance Outputable ForeignConvention where + ppr = pprForeignConvention + +instance Outputable ForeignTarget where + ppr = pprForeignTarget + +instance Outputable CmmReturnInfo where + ppr = pprReturnInfo + +instance Outputable (Block CmmNode C C) where + ppr = pprBlock +instance Outputable (Block CmmNode C O) where + ppr = pprBlock +instance Outputable (Block CmmNode O C) where + ppr = pprBlock +instance Outputable (Block CmmNode O O) where + ppr = pprBlock + +instance Outputable (Graph CmmNode e x) where + ppr = pprGraph + +instance Outputable CmmGraph where + ppr = pprCmmGraph + +---------------------------------------------------------- +-- Outputting types Cmm contains + +pprStackInfo :: CmmStackInfo -> SDoc +pprStackInfo (StackInfo {arg_space=arg_space, updfr_space=updfr_space}) = + text "arg_space: " <> ppr arg_space <+> + text "updfr_space: " <> ppr updfr_space + +pprTopInfo :: CmmTopInfo -> SDoc +pprTopInfo (TopInfo {info_tbls=info_tbl, stack_info=stack_info}) = + vcat [text "info_tbls: " <> ppr info_tbl, + text "stack_info: " <> ppr stack_info] + +---------------------------------------------------------- +-- Outputting blocks and graphs + +pprBlock :: IndexedCO x SDoc SDoc ~ SDoc + => Block CmmNode e x -> IndexedCO e SDoc SDoc +pprBlock block + = foldBlockNodesB3 ( ($$) . ppr + , ($$) . (nest 4) . ppr + , ($$) . (nest 4) . ppr + ) + block + empty + +pprGraph :: Graph CmmNode e x -> SDoc +pprGraph GNil = empty +pprGraph (GUnit block) = ppr block +pprGraph (GMany entry body exit) + = text "{" + $$ nest 2 (pprMaybeO entry $$ (vcat $ map ppr $ bodyToBlockList body) $$ pprMaybeO exit) + $$ text "}" + where pprMaybeO :: Outputable (Block CmmNode e x) + => MaybeO ex (Block CmmNode e x) -> SDoc + pprMaybeO NothingO = empty + pprMaybeO (JustO block) = ppr block + +pprCmmGraph :: CmmGraph -> SDoc +pprCmmGraph g + = text "{" <> text "offset" + $$ nest 2 (vcat $ map ppr blocks) + $$ text "}" + where blocks = revPostorder g + -- revPostorder has the side-effect of discarding unreachable code, + -- so pretty-printed Cmm will omit any unreachable blocks. This can + -- sometimes be confusing. + +--------------------------------------------- +-- Outputting CmmNode and types which it contains + +pprConvention :: Convention -> SDoc +pprConvention (NativeNodeCall {}) = text "<native-node-call-convention>" +pprConvention (NativeDirectCall {}) = text "<native-direct-call-convention>" +pprConvention (NativeReturn {}) = text "<native-ret-convention>" +pprConvention Slow = text "<slow-convention>" +pprConvention GC = text "<gc-convention>" + +pprForeignConvention :: ForeignConvention -> SDoc +pprForeignConvention (ForeignConvention c args res ret) = + doubleQuotes (ppr c) <+> text "arg hints: " <+> ppr args <+> text " result hints: " <+> ppr res <+> ppr ret + +pprReturnInfo :: CmmReturnInfo -> SDoc +pprReturnInfo CmmMayReturn = empty +pprReturnInfo CmmNeverReturns = text "never returns" + +pprForeignTarget :: ForeignTarget -> SDoc +pprForeignTarget (ForeignTarget fn c) = ppr c <+> ppr_target fn + where + ppr_target :: CmmExpr -> SDoc + ppr_target t@(CmmLit _) = ppr t + ppr_target fn' = parens (ppr fn') + +pprForeignTarget (PrimTarget op) + -- HACK: We're just using a ForeignLabel to get this printed, the label + -- might not really be foreign. + = ppr + (CmmLabel (mkForeignLabel + (mkFastString (show op)) + Nothing ForeignLabelInThisPackage IsFunction)) + +pprNode :: CmmNode e x -> SDoc +pprNode node = pp_node <+> pp_debug + where + pp_node :: SDoc + pp_node = sdocWithDynFlags $ \dflags -> case node of + -- label: + CmmEntry id tscope -> lbl <> colon <+> + (sdocWithDynFlags $ \dflags -> + ppUnless (gopt Opt_SuppressTicks dflags) (text "//" <+> ppr tscope)) + where + lbl = if gopt Opt_SuppressUniques dflags + then text "_lbl_" + else ppr id + + -- // text + CmmComment s -> text "//" <+> ftext s + + -- //tick bla<...> + CmmTick t -> ppUnless (gopt Opt_SuppressTicks dflags) $ + text "//tick" <+> ppr t + + -- unwind reg = expr; + CmmUnwind regs -> + text "unwind " + <> commafy (map (\(r,e) -> ppr r <+> char '=' <+> ppr e) regs) <> semi + + -- reg = expr; + CmmAssign reg expr -> ppr reg <+> equals <+> ppr expr <> semi + + -- rep[lv] = expr; + CmmStore lv expr -> rep <> brackets(ppr lv) <+> equals <+> ppr expr <> semi + where + rep = sdocWithDynFlags $ \dflags -> + ppr ( cmmExprType dflags expr ) + + -- call "ccall" foo(x, y)[r1, r2]; + -- ToDo ppr volatile + CmmUnsafeForeignCall target results args -> + hsep [ ppUnless (null results) $ + parens (commafy $ map ppr results) <+> equals, + text "call", + ppr target <> parens (commafy $ map ppr args) <> semi] + + -- goto label; + CmmBranch ident -> text "goto" <+> ppr ident <> semi + + -- if (expr) goto t; else goto f; + CmmCondBranch expr t f l -> + hsep [ text "if" + , parens(ppr expr) + , case l of + Nothing -> empty + Just b -> parens (text "likely:" <+> ppr b) + , text "goto" + , ppr t <> semi + , text "else goto" + , ppr f <> semi + ] + + CmmSwitch expr ids -> + hang (hsep [ text "switch" + , range + , if isTrivialCmmExpr expr + then ppr expr + else parens (ppr expr) + , text "{" + ]) + 4 (vcat (map ppCase cases) $$ def) $$ rbrace + where + (cases, mbdef) = switchTargetsFallThrough ids + ppCase (is,l) = hsep + [ text "case" + , commafy $ map integer is + , text ": goto" + , ppr l <> semi + ] + def | Just l <- mbdef = hsep + [ text "default:" + , braces (text "goto" <+> ppr l <> semi) + ] + | otherwise = empty + + range = brackets $ hsep [integer lo, text "..", integer hi] + where (lo,hi) = switchTargetsRange ids + + CmmCall tgt k regs out res updfr_off -> + hcat [ text "call", space + , pprFun tgt, parens (interpp'SP regs), space + , returns <+> + text "args: " <> ppr out <> comma <+> + text "res: " <> ppr res <> comma <+> + text "upd: " <> ppr updfr_off + , semi ] + where pprFun f@(CmmLit _) = ppr f + pprFun f = parens (ppr f) + + returns + | Just r <- k = text "returns to" <+> ppr r <> comma + | otherwise = empty + + CmmForeignCall {tgt=t, res=rs, args=as, succ=s, ret_args=a, ret_off=u, intrbl=i} -> + hcat $ if i then [text "interruptible", space] else [] ++ + [ text "foreign call", space + , ppr t, text "(...)", space + , text "returns to" <+> ppr s + <+> text "args:" <+> parens (ppr as) + <+> text "ress:" <+> parens (ppr rs) + , text "ret_args:" <+> ppr a + , text "ret_off:" <+> ppr u + , semi ] + + pp_debug :: SDoc + pp_debug = + if not debugIsOn then empty + else case node of + CmmEntry {} -> empty -- Looks terrible with text " // CmmEntry" + CmmComment {} -> empty -- Looks also terrible with text " // CmmComment" + CmmTick {} -> empty + CmmUnwind {} -> text " // CmmUnwind" + CmmAssign {} -> text " // CmmAssign" + CmmStore {} -> text " // CmmStore" + CmmUnsafeForeignCall {} -> text " // CmmUnsafeForeignCall" + CmmBranch {} -> text " // CmmBranch" + CmmCondBranch {} -> text " // CmmCondBranch" + CmmSwitch {} -> text " // CmmSwitch" + CmmCall {} -> text " // CmmCall" + CmmForeignCall {} -> text " // CmmForeignCall" + + commafy :: [SDoc] -> SDoc + commafy xs = hsep $ punctuate comma xs diff --git a/compiler/GHC/Cmm/Ppr/Decl.hs b/compiler/GHC/Cmm/Ppr/Decl.hs new file mode 100644 index 0000000000..2544e6a0d3 --- /dev/null +++ b/compiler/GHC/Cmm/Ppr/Decl.hs @@ -0,0 +1,169 @@ +---------------------------------------------------------------------------- +-- +-- Pretty-printing of common Cmm types +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +-- +-- This is where we walk over Cmm emitting an external representation, +-- suitable for parsing, in a syntax strongly reminiscent of C--. This +-- is the "External Core" for the Cmm layer. +-- +-- As such, this should be a well-defined syntax: we want it to look nice. +-- Thus, we try wherever possible to use syntax defined in [1], +-- "The C-- Reference Manual", http://www.cs.tufts.edu/~nr/c--/index.html. We +-- differ slightly, in some cases. For one, we use I8 .. I64 for types, rather +-- than C--'s bits8 .. bits64. +-- +-- We try to ensure that all information available in the abstract +-- syntax is reproduced, or reproducible, in the concrete syntax. +-- Data that is not in printed out can be reconstructed according to +-- conventions used in the pretty printer. There are at least two such +-- cases: +-- 1) if a value has wordRep type, the type is not appended in the +-- output. +-- 2) MachOps that operate over wordRep type are printed in a +-- C-style, rather than as their internal MachRep name. +-- +-- These conventions produce much more readable Cmm output. +-- +-- A useful example pass over Cmm is in nativeGen/MachCodeGen.hs +-- + +{-# OPTIONS_GHC -fno-warn-orphans #-} +module GHC.Cmm.Ppr.Decl + ( writeCmms, pprCmms, pprCmmGroup, pprSection, pprStatic + ) +where + +import GhcPrelude + +import GHC.Cmm.Ppr.Expr +import GHC.Cmm + +import DynFlags +import Outputable +import FastString + +import Data.List +import System.IO + +import qualified Data.ByteString as BS + + +pprCmms :: (Outputable info, Outputable g) + => [GenCmmGroup CmmStatics info g] -> SDoc +pprCmms cmms = pprCode CStyle (vcat (intersperse separator $ map ppr cmms)) + where + separator = space $$ text "-------------------" $$ space + +writeCmms :: (Outputable info, Outputable g) + => DynFlags -> Handle -> [GenCmmGroup CmmStatics info g] -> IO () +writeCmms dflags handle cmms = printForC dflags handle (pprCmms cmms) + +----------------------------------------------------------------------------- + +instance (Outputable d, Outputable info, Outputable i) + => Outputable (GenCmmDecl d info i) where + ppr t = pprTop t + +instance Outputable CmmStatics where + ppr = pprStatics + +instance Outputable CmmStatic where + ppr = pprStatic + +instance Outputable CmmInfoTable where + ppr = pprInfoTable + + +----------------------------------------------------------------------------- + +pprCmmGroup :: (Outputable d, Outputable info, Outputable g) + => GenCmmGroup d info g -> SDoc +pprCmmGroup tops + = vcat $ intersperse blankLine $ map pprTop tops + +-- -------------------------------------------------------------------------- +-- Top level `procedure' blocks. +-- +pprTop :: (Outputable d, Outputable info, Outputable i) + => GenCmmDecl d info i -> SDoc + +pprTop (CmmProc info lbl live graph) + + = vcat [ ppr lbl <> lparen <> rparen <+> lbrace <+> text "// " <+> ppr live + , nest 8 $ lbrace <+> ppr info $$ rbrace + , nest 4 $ ppr graph + , rbrace ] + +-- -------------------------------------------------------------------------- +-- We follow [1], 4.5 +-- +-- section "data" { ... } +-- +pprTop (CmmData section ds) = + (hang (pprSection section <+> lbrace) 4 (ppr ds)) + $$ rbrace + +-- -------------------------------------------------------------------------- +-- Info tables. + +pprInfoTable :: CmmInfoTable -> SDoc +pprInfoTable (CmmInfoTable { cit_lbl = lbl, cit_rep = rep + , cit_prof = prof_info + , cit_srt = srt }) + = vcat [ text "label: " <> ppr lbl + , text "rep: " <> ppr rep + , case prof_info of + NoProfilingInfo -> empty + ProfilingInfo ct cd -> + vcat [ text "type: " <> text (show (BS.unpack ct)) + , text "desc: " <> text (show (BS.unpack cd)) ] + , text "srt: " <> ppr srt ] + +instance Outputable ForeignHint where + ppr NoHint = empty + ppr SignedHint = quotes(text "signed") +-- ppr AddrHint = quotes(text "address") +-- Temp Jan08 + ppr AddrHint = (text "PtrHint") + +-- -------------------------------------------------------------------------- +-- Static data. +-- Strings are printed as C strings, and we print them as I8[], +-- following C-- +-- +pprStatics :: CmmStatics -> SDoc +pprStatics (Statics lbl ds) = vcat ((ppr lbl <> colon) : map ppr ds) + +pprStatic :: CmmStatic -> SDoc +pprStatic s = case s of + CmmStaticLit lit -> nest 4 $ text "const" <+> pprLit lit <> semi + CmmUninitialised i -> nest 4 $ text "I8" <> brackets (int i) + CmmString s' -> nest 4 $ text "I8[]" <+> text (show s') + +-- -------------------------------------------------------------------------- +-- data sections +-- +pprSection :: Section -> SDoc +pprSection (Section t suffix) = + section <+> doubleQuotes (pprSectionType t <+> char '.' <+> ppr suffix) + where + section = text "section" + +pprSectionType :: SectionType -> SDoc +pprSectionType s = doubleQuotes (ptext t) + where + t = case s of + Text -> sLit "text" + Data -> sLit "data" + ReadOnlyData -> sLit "readonly" + ReadOnlyData16 -> sLit "readonly16" + RelocatableReadOnlyData + -> sLit "relreadonly" + UninitialisedData -> sLit "uninitialised" + CString -> sLit "cstring" + OtherSection s' -> sLit s' -- Not actually a literal though. diff --git a/compiler/GHC/Cmm/Ppr/Expr.hs b/compiler/GHC/Cmm/Ppr/Expr.hs new file mode 100644 index 0000000000..53a335e561 --- /dev/null +++ b/compiler/GHC/Cmm/Ppr/Expr.hs @@ -0,0 +1,286 @@ +---------------------------------------------------------------------------- +-- +-- Pretty-printing of common Cmm types +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +-- +-- This is where we walk over Cmm emitting an external representation, +-- suitable for parsing, in a syntax strongly reminiscent of C--. This +-- is the "External Core" for the Cmm layer. +-- +-- As such, this should be a well-defined syntax: we want it to look nice. +-- Thus, we try wherever possible to use syntax defined in [1], +-- "The C-- Reference Manual", http://www.cs.tufts.edu/~nr/c--/index.html. We +-- differ slightly, in some cases. For one, we use I8 .. I64 for types, rather +-- than C--'s bits8 .. bits64. +-- +-- We try to ensure that all information available in the abstract +-- syntax is reproduced, or reproducible, in the concrete syntax. +-- Data that is not in printed out can be reconstructed according to +-- conventions used in the pretty printer. There are at least two such +-- cases: +-- 1) if a value has wordRep type, the type is not appended in the +-- output. +-- 2) MachOps that operate over wordRep type are printed in a +-- C-style, rather than as their internal MachRep name. +-- +-- These conventions produce much more readable Cmm output. +-- +-- A useful example pass over Cmm is in nativeGen/MachCodeGen.hs +-- + +{-# OPTIONS_GHC -fno-warn-orphans #-} +module GHC.Cmm.Ppr.Expr + ( pprExpr, pprLit + ) +where + +import GhcPrelude + +import GHC.Cmm.Expr + +import Outputable +import DynFlags + +import Data.Maybe +import Numeric ( fromRat ) + +----------------------------------------------------------------------------- + +instance Outputable CmmExpr where + ppr e = pprExpr e + +instance Outputable CmmReg where + ppr e = pprReg e + +instance Outputable CmmLit where + ppr l = pprLit l + +instance Outputable LocalReg where + ppr e = pprLocalReg e + +instance Outputable Area where + ppr e = pprArea e + +instance Outputable GlobalReg where + ppr e = pprGlobalReg e + +-- -------------------------------------------------------------------------- +-- Expressions +-- + +pprExpr :: CmmExpr -> SDoc +pprExpr e + = sdocWithDynFlags $ \dflags -> + case e of + CmmRegOff reg i -> + pprExpr (CmmMachOp (MO_Add rep) + [CmmReg reg, CmmLit (CmmInt (fromIntegral i) rep)]) + where rep = typeWidth (cmmRegType dflags reg) + CmmLit lit -> pprLit lit + _other -> pprExpr1 e + +-- Here's the precedence table from GHC.Cmm.Parser: +-- %nonassoc '>=' '>' '<=' '<' '!=' '==' +-- %left '|' +-- %left '^' +-- %left '&' +-- %left '>>' '<<' +-- %left '-' '+' +-- %left '/' '*' '%' +-- %right '~' + +-- We just cope with the common operators for now, the rest will get +-- a default conservative behaviour. + +-- %nonassoc '>=' '>' '<=' '<' '!=' '==' +pprExpr1, pprExpr7, pprExpr8 :: CmmExpr -> SDoc +pprExpr1 (CmmMachOp op [x,y]) | Just doc <- infixMachOp1 op + = pprExpr7 x <+> doc <+> pprExpr7 y +pprExpr1 e = pprExpr7 e + +infixMachOp1, infixMachOp7, infixMachOp8 :: MachOp -> Maybe SDoc + +infixMachOp1 (MO_Eq _) = Just (text "==") +infixMachOp1 (MO_Ne _) = Just (text "!=") +infixMachOp1 (MO_Shl _) = Just (text "<<") +infixMachOp1 (MO_U_Shr _) = Just (text ">>") +infixMachOp1 (MO_U_Ge _) = Just (text ">=") +infixMachOp1 (MO_U_Le _) = Just (text "<=") +infixMachOp1 (MO_U_Gt _) = Just (char '>') +infixMachOp1 (MO_U_Lt _) = Just (char '<') +infixMachOp1 _ = Nothing + +-- %left '-' '+' +pprExpr7 (CmmMachOp (MO_Add rep1) [x, CmmLit (CmmInt i rep2)]) | i < 0 + = pprExpr7 (CmmMachOp (MO_Sub rep1) [x, CmmLit (CmmInt (negate i) rep2)]) +pprExpr7 (CmmMachOp op [x,y]) | Just doc <- infixMachOp7 op + = pprExpr7 x <+> doc <+> pprExpr8 y +pprExpr7 e = pprExpr8 e + +infixMachOp7 (MO_Add _) = Just (char '+') +infixMachOp7 (MO_Sub _) = Just (char '-') +infixMachOp7 _ = Nothing + +-- %left '/' '*' '%' +pprExpr8 (CmmMachOp op [x,y]) | Just doc <- infixMachOp8 op + = pprExpr8 x <+> doc <+> pprExpr9 y +pprExpr8 e = pprExpr9 e + +infixMachOp8 (MO_U_Quot _) = Just (char '/') +infixMachOp8 (MO_Mul _) = Just (char '*') +infixMachOp8 (MO_U_Rem _) = Just (char '%') +infixMachOp8 _ = Nothing + +pprExpr9 :: CmmExpr -> SDoc +pprExpr9 e = + case e of + CmmLit lit -> pprLit1 lit + CmmLoad expr rep -> ppr rep <> brackets (ppr expr) + CmmReg reg -> ppr reg + CmmRegOff reg off -> parens (ppr reg <+> char '+' <+> int off) + CmmStackSlot a off -> parens (ppr a <+> char '+' <+> int off) + CmmMachOp mop args -> genMachOp mop args + +genMachOp :: MachOp -> [CmmExpr] -> SDoc +genMachOp mop args + | Just doc <- infixMachOp mop = case args of + -- dyadic + [x,y] -> pprExpr9 x <+> doc <+> pprExpr9 y + + -- unary + [x] -> doc <> pprExpr9 x + + _ -> pprTrace "GHC.Cmm.Ppr.Expr.genMachOp: machop with strange number of args" + (pprMachOp mop <+> + parens (hcat $ punctuate comma (map pprExpr args))) + empty + + | isJust (infixMachOp1 mop) + || isJust (infixMachOp7 mop) + || isJust (infixMachOp8 mop) = parens (pprExpr (CmmMachOp mop args)) + + | otherwise = char '%' <> ppr_op <> parens (commafy (map pprExpr args)) + where ppr_op = text (map (\c -> if c == ' ' then '_' else c) + (show mop)) + -- replace spaces in (show mop) with underscores, + +-- +-- Unsigned ops on the word size of the machine get nice symbols. +-- All else get dumped in their ugly format. +-- +infixMachOp :: MachOp -> Maybe SDoc +infixMachOp mop + = case mop of + MO_And _ -> Just $ char '&' + MO_Or _ -> Just $ char '|' + MO_Xor _ -> Just $ char '^' + MO_Not _ -> Just $ char '~' + MO_S_Neg _ -> Just $ char '-' -- there is no unsigned neg :) + _ -> Nothing + +-- -------------------------------------------------------------------------- +-- Literals. +-- To minimise line noise we adopt the convention that if the literal +-- has the natural machine word size, we do not append the type +-- +pprLit :: CmmLit -> SDoc +pprLit lit = sdocWithDynFlags $ \dflags -> + case lit of + CmmInt i rep -> + hcat [ (if i < 0 then parens else id)(integer i) + , ppUnless (rep == wordWidth dflags) $ + space <> dcolon <+> ppr rep ] + + CmmFloat f rep -> hsep [ double (fromRat f), dcolon, ppr rep ] + CmmVec lits -> char '<' <> commafy (map pprLit lits) <> char '>' + CmmLabel clbl -> ppr clbl + CmmLabelOff clbl i -> ppr clbl <> ppr_offset i + CmmLabelDiffOff clbl1 clbl2 i _ -> ppr clbl1 <> char '-' + <> ppr clbl2 <> ppr_offset i + CmmBlock id -> ppr id + CmmHighStackMark -> text "<highSp>" + +pprLit1 :: CmmLit -> SDoc +pprLit1 lit@(CmmLabelOff {}) = parens (pprLit lit) +pprLit1 lit = pprLit lit + +ppr_offset :: Int -> SDoc +ppr_offset i + | i==0 = empty + | i>=0 = char '+' <> int i + | otherwise = char '-' <> int (-i) + +-- -------------------------------------------------------------------------- +-- Registers, whether local (temps) or global +-- +pprReg :: CmmReg -> SDoc +pprReg r + = case r of + CmmLocal local -> pprLocalReg local + CmmGlobal global -> pprGlobalReg global + +-- +-- We only print the type of the local reg if it isn't wordRep +-- +pprLocalReg :: LocalReg -> SDoc +pprLocalReg (LocalReg uniq rep) = sdocWithDynFlags $ \dflags -> +-- = ppr rep <> char '_' <> ppr uniq +-- Temp Jan08 + char '_' <> pprUnique dflags uniq <> + (if isWord32 rep -- && not (isGcPtrType rep) -- Temp Jan08 -- sigh + then dcolon <> ptr <> ppr rep + else dcolon <> ptr <> ppr rep) + where + pprUnique dflags unique = + if gopt Opt_SuppressUniques dflags + then text "_locVar_" + else ppr unique + ptr = empty + --if isGcPtrType rep + -- then doubleQuotes (text "ptr") + -- else empty + +-- Stack areas +pprArea :: Area -> SDoc +pprArea Old = text "old" +pprArea (Young id) = hcat [ text "young<", ppr id, text ">" ] + +-- needs to be kept in syn with CmmExpr.hs.GlobalReg +-- +pprGlobalReg :: GlobalReg -> SDoc +pprGlobalReg gr + = case gr of + VanillaReg n _ -> char 'R' <> int n +-- Temp Jan08 +-- VanillaReg n VNonGcPtr -> char 'R' <> int n +-- VanillaReg n VGcPtr -> char 'P' <> int n + FloatReg n -> char 'F' <> int n + DoubleReg n -> char 'D' <> int n + LongReg n -> char 'L' <> int n + XmmReg n -> text "XMM" <> int n + YmmReg n -> text "YMM" <> int n + ZmmReg n -> text "ZMM" <> int n + Sp -> text "Sp" + SpLim -> text "SpLim" + Hp -> text "Hp" + HpLim -> text "HpLim" + MachSp -> text "MachSp" + UnwindReturnReg-> text "UnwindReturnReg" + CCCS -> text "CCCS" + CurrentTSO -> text "CurrentTSO" + CurrentNursery -> text "CurrentNursery" + HpAlloc -> text "HpAlloc" + EagerBlackholeInfo -> text "stg_EAGER_BLACKHOLE_info" + GCEnter1 -> text "stg_gc_enter_1" + GCFun -> text "stg_gc_fun" + BaseReg -> text "BaseReg" + PicBaseReg -> text "PicBaseReg" + +----------------------------------------------------------------------------- + +commafy :: [SDoc] -> SDoc +commafy xs = fsep $ punctuate comma xs diff --git a/compiler/GHC/Cmm/ProcPoint.hs b/compiler/GHC/Cmm/ProcPoint.hs new file mode 100644 index 0000000000..00a7a73d89 --- /dev/null +++ b/compiler/GHC/Cmm/ProcPoint.hs @@ -0,0 +1,496 @@ +{-# LANGUAGE GADTs, DisambiguateRecordFields, BangPatterns #-} + +module GHC.Cmm.ProcPoint + ( ProcPointSet, Status(..) + , callProcPoints, minimalProcPointSet + , splitAtProcPoints, procPointAnalysis + , attachContInfoTables + ) +where + +import GhcPrelude hiding (last, unzip, succ, zip) + +import DynFlags +import GHC.Cmm.BlockId +import GHC.Cmm.CLabel +import GHC.Cmm +import GHC.Cmm.Ppr () -- For Outputable instances +import GHC.Cmm.Utils +import GHC.Cmm.Info +import GHC.Cmm.Liveness +import GHC.Cmm.Switch +import Data.List (sortBy) +import Maybes +import Control.Monad +import Outputable +import GHC.Platform +import UniqSupply +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.Dataflow.Collections +import GHC.Cmm.Dataflow +import GHC.Cmm.Dataflow.Graph +import GHC.Cmm.Dataflow.Label + +-- Compute a minimal set of proc points for a control-flow graph. + +-- Determine a protocol for each proc point (which live variables will +-- be passed as arguments and which will be on the stack). + +{- +A proc point is a basic block that, after CPS transformation, will +start a new function. The entry block of the original function is a +proc point, as is the continuation of each function call. +A third kind of proc point arises if we want to avoid copying code. +Suppose we have code like the following: + + f() { + if (...) { ..1..; call foo(); ..2..} + else { ..3..; call bar(); ..4..} + x = y + z; + return x; + } + +The statement 'x = y + z' can be reached from two different proc +points: the continuations of foo() and bar(). We would prefer not to +put a copy in each continuation; instead we would like 'x = y + z' to +be the start of a new procedure to which the continuations can jump: + + f_cps () { + if (...) { ..1..; push k_foo; jump foo_cps(); } + else { ..3..; push k_bar; jump bar_cps(); } + } + k_foo() { ..2..; jump k_join(y, z); } + k_bar() { ..4..; jump k_join(y, z); } + k_join(y, z) { x = y + z; return x; } + +You might think then that a criterion to make a node a proc point is +that it is directly reached by two distinct proc points. (Note +[Direct reachability].) But this criterion is a bit too simple; for +example, 'return x' is also reached by two proc points, yet there is +no point in pulling it out of k_join. A good criterion would be to +say that a node should be made a proc point if it is reached by a set +of proc points that is different than its immediate dominator. NR +believes this criterion can be shown to produce a minimum set of proc +points, and given a dominator tree, the proc points can be chosen in +time linear in the number of blocks. Lacking a dominator analysis, +however, we turn instead to an iterative solution, starting with no +proc points and adding them according to these rules: + + 1. The entry block is a proc point. + 2. The continuation of a call is a proc point. + 3. A node is a proc point if it is directly reached by more proc + points than one of its predecessors. + +Because we don't understand the problem very well, we apply rule 3 at +most once per iteration, then recompute the reachability information. +(See Note [No simple dataflow].) The choice of the new proc point is +arbitrary, and I don't know if the choice affects the final solution, +so I don't know if the number of proc points chosen is the +minimum---but the set will be minimal. + + + +Note [Proc-point analysis] +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Given a specified set of proc-points (a set of block-ids), "proc-point +analysis" figures out, for every block, which proc-point it belongs to. +All the blocks belonging to proc-point P will constitute a single +top-level C procedure. + +A non-proc-point block B "belongs to" a proc-point P iff B is +reachable from P without going through another proc-point. + +Invariant: a block B should belong to at most one proc-point; if it +belongs to two, that's a bug. + +Note [Non-existing proc-points] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +On some architectures it might happen that the list of proc-points +computed before stack layout pass will be invalidated by the stack +layout. This will happen if stack layout removes from the graph +blocks that were determined to be proc-points. Later on in the pipeline +we use list of proc-points to perform [Proc-point analysis], but +if a proc-point does not exist anymore then we will get compiler panic. +See #8205. +-} + +type ProcPointSet = LabelSet + +data Status + = ReachedBy ProcPointSet -- set of proc points that directly reach the block + | ProcPoint -- this block is itself a proc point + +instance Outputable Status where + ppr (ReachedBy ps) + | setNull ps = text "<not-reached>" + | otherwise = text "reached by" <+> + (hsep $ punctuate comma $ map ppr $ setElems ps) + ppr ProcPoint = text "<procpt>" + +-------------------------------------------------- +-- Proc point analysis + +-- Once you know what the proc-points are, figure out +-- what proc-points each block is reachable from +-- See Note [Proc-point analysis] +procPointAnalysis :: ProcPointSet -> CmmGraph -> LabelMap Status +procPointAnalysis procPoints cmmGraph@(CmmGraph {g_graph = graph}) = + analyzeCmmFwd procPointLattice procPointTransfer cmmGraph initProcPoints + where + initProcPoints = + mkFactBase + procPointLattice + [ (id, ProcPoint) + | id <- setElems procPoints + -- See Note [Non-existing proc-points] + , id `setMember` labelsInGraph + ] + labelsInGraph = labelsDefined graph + +procPointTransfer :: TransferFun Status +procPointTransfer block facts = + let label = entryLabel block + !fact = case getFact procPointLattice label facts of + ProcPoint -> ReachedBy $! setSingleton label + f -> f + result = map (\id -> (id, fact)) (successors block) + in mkFactBase procPointLattice result + +procPointLattice :: DataflowLattice Status +procPointLattice = DataflowLattice unreached add_to + where + unreached = ReachedBy setEmpty + add_to (OldFact ProcPoint) _ = NotChanged ProcPoint + add_to _ (NewFact ProcPoint) = Changed ProcPoint -- because of previous case + add_to (OldFact (ReachedBy p)) (NewFact (ReachedBy p')) + | setSize union > setSize p = Changed (ReachedBy union) + | otherwise = NotChanged (ReachedBy p) + where + union = setUnion p' p + +---------------------------------------------------------------------- + +-- It is worth distinguishing two sets of proc points: those that are +-- induced by calls in the original graph and those that are +-- introduced because they're reachable from multiple proc points. +-- +-- Extract the set of Continuation BlockIds, see Note [Continuation BlockIds]. +callProcPoints :: CmmGraph -> ProcPointSet +callProcPoints g = foldlGraphBlocks add (setSingleton (g_entry g)) g + where add :: LabelSet -> CmmBlock -> LabelSet + add set b = case lastNode b of + CmmCall {cml_cont = Just k} -> setInsert k set + CmmForeignCall {succ=k} -> setInsert k set + _ -> set + +minimalProcPointSet :: Platform -> ProcPointSet -> CmmGraph + -> UniqSM ProcPointSet +-- Given the set of successors of calls (which must be proc-points) +-- figure out the minimal set of necessary proc-points +minimalProcPointSet platform callProcPoints g + = extendPPSet platform g (revPostorder g) callProcPoints + +extendPPSet + :: Platform -> CmmGraph -> [CmmBlock] -> ProcPointSet -> UniqSM ProcPointSet +extendPPSet platform g blocks procPoints = + let env = procPointAnalysis procPoints g + add pps block = let id = entryLabel block + in case mapLookup id env of + Just ProcPoint -> setInsert id pps + _ -> pps + procPoints' = foldlGraphBlocks add setEmpty g + newPoints = mapMaybe ppSuccessor blocks + newPoint = listToMaybe newPoints + ppSuccessor b = + let nreached id = case mapLookup id env `orElse` + pprPanic "no ppt" (ppr id <+> ppr b) of + ProcPoint -> 1 + ReachedBy ps -> setSize ps + block_procpoints = nreached (entryLabel b) + -- | Looking for a successor of b that is reached by + -- more proc points than b and is not already a proc + -- point. If found, it can become a proc point. + newId succ_id = not (setMember succ_id procPoints') && + nreached succ_id > block_procpoints + in listToMaybe $ filter newId $ successors b + + in case newPoint of + Just id -> + if setMember id procPoints' + then panic "added old proc pt" + else extendPPSet platform g blocks (setInsert id procPoints') + Nothing -> return procPoints' + + +-- At this point, we have found a set of procpoints, each of which should be +-- the entry point of a procedure. +-- Now, we create the procedure for each proc point, +-- which requires that we: +-- 1. build a map from proc points to the blocks reachable from the proc point +-- 2. turn each branch to a proc point into a jump +-- 3. turn calls and returns into jumps +-- 4. build info tables for the procedures -- and update the info table for +-- the SRTs in the entry procedure as well. +-- Input invariant: A block should only be reachable from a single ProcPoint. +-- ToDo: use the _ret naming convention that the old code generator +-- used. -- EZY +splitAtProcPoints :: DynFlags -> CLabel -> ProcPointSet-> ProcPointSet -> LabelMap Status -> + CmmDecl -> UniqSM [CmmDecl] +splitAtProcPoints dflags entry_label callPPs procPoints procMap + (CmmProc (TopInfo {info_tbls = info_tbls}) + top_l _ g@(CmmGraph {g_entry=entry})) = + do -- Build a map from procpoints to the blocks they reach + let add_block + :: LabelMap (LabelMap CmmBlock) + -> CmmBlock + -> LabelMap (LabelMap CmmBlock) + add_block graphEnv b = + case mapLookup bid procMap of + Just ProcPoint -> add graphEnv bid bid b + Just (ReachedBy set) -> + case setElems set of + [] -> graphEnv + [id] -> add graphEnv id bid b + _ -> panic "Each block should be reachable from only one ProcPoint" + Nothing -> graphEnv + where bid = entryLabel b + add graphEnv procId bid b = mapInsert procId graph' graphEnv + where graph = mapLookup procId graphEnv `orElse` mapEmpty + graph' = mapInsert bid b graph + + let liveness = cmmGlobalLiveness dflags g + let ppLiveness pp = filter isArgReg $ + regSetToList $ + expectJust "ppLiveness" $ mapLookup pp liveness + + graphEnv <- return $ foldlGraphBlocks add_block mapEmpty g + + -- Build a map from proc point BlockId to pairs of: + -- * Labels for their new procedures + -- * Labels for the info tables of their new procedures (only if + -- the proc point is a callPP) + -- Due to common blockification, we may overestimate the set of procpoints. + let add_label map pp = mapInsert pp lbls map + where lbls | pp == entry = (entry_label, fmap cit_lbl (mapLookup entry info_tbls)) + | otherwise = (block_lbl, guard (setMember pp callPPs) >> + Just info_table_lbl) + where block_lbl = blockLbl pp + info_table_lbl = infoTblLbl pp + + procLabels :: LabelMap (CLabel, Maybe CLabel) + procLabels = foldl' add_label mapEmpty + (filter (flip mapMember (toBlockMap g)) (setElems procPoints)) + + -- In each new graph, add blocks jumping off to the new procedures, + -- and replace branches to procpoints with branches to the jump-off blocks + let add_jump_block + :: (LabelMap Label, [CmmBlock]) + -> (Label, CLabel) + -> UniqSM (LabelMap Label, [CmmBlock]) + add_jump_block (env, bs) (pp, l) = + do bid <- liftM mkBlockId getUniqueM + let b = blockJoin (CmmEntry bid GlobalScope) emptyBlock jump + live = ppLiveness pp + jump = CmmCall (CmmLit (CmmLabel l)) Nothing live 0 0 0 + return (mapInsert pp bid env, b : bs) + + add_jumps + :: LabelMap CmmGraph + -> (Label, LabelMap CmmBlock) + -> UniqSM (LabelMap CmmGraph) + add_jumps newGraphEnv (ppId, blockEnv) = + do let needed_jumps = -- find which procpoints we currently branch to + mapFoldr add_if_branch_to_pp [] blockEnv + add_if_branch_to_pp :: CmmBlock -> [(BlockId, CLabel)] -> [(BlockId, CLabel)] + add_if_branch_to_pp block rst = + case lastNode block of + CmmBranch id -> add_if_pp id rst + CmmCondBranch _ ti fi _ -> add_if_pp ti (add_if_pp fi rst) + CmmSwitch _ ids -> foldr add_if_pp rst $ switchTargetsToList ids + _ -> rst + + -- when jumping to a PP that has an info table, if + -- tablesNextToCode is off we must jump to the entry + -- label instead. + jump_label (Just info_lbl) _ + | tablesNextToCode dflags = info_lbl + | otherwise = toEntryLbl info_lbl + jump_label Nothing block_lbl = block_lbl + + add_if_pp id rst = case mapLookup id procLabels of + Just (lbl, mb_info_lbl) -> (id, jump_label mb_info_lbl lbl) : rst + Nothing -> rst + (jumpEnv, jumpBlocks) <- + foldM add_jump_block (mapEmpty, []) needed_jumps + -- update the entry block + let b = expectJust "block in env" $ mapLookup ppId blockEnv + blockEnv' = mapInsert ppId b blockEnv + -- replace branches to procpoints with branches to jumps + blockEnv'' = toBlockMap $ replaceBranches jumpEnv $ ofBlockMap ppId blockEnv' + -- add the jump blocks to the graph + blockEnv''' = foldl' (flip addBlock) blockEnv'' jumpBlocks + let g' = ofBlockMap ppId blockEnv''' + -- pprTrace "g' pre jumps" (ppr g') $ do + return (mapInsert ppId g' newGraphEnv) + + graphEnv <- foldM add_jumps mapEmpty $ mapToList graphEnv + + let to_proc (bid, g) + | bid == entry + = CmmProc (TopInfo {info_tbls = info_tbls, + stack_info = stack_info}) + top_l live g' + | otherwise + = case expectJust "pp label" $ mapLookup bid procLabels of + (lbl, Just info_lbl) + -> CmmProc (TopInfo { info_tbls = mapSingleton (g_entry g) (mkEmptyContInfoTable info_lbl) + , stack_info=stack_info}) + lbl live g' + (lbl, Nothing) + -> CmmProc (TopInfo {info_tbls = mapEmpty, stack_info=stack_info}) + lbl live g' + where + g' = replacePPIds g + live = ppLiveness (g_entry g') + stack_info = StackInfo { arg_space = 0 + , updfr_space = Nothing + , do_layout = True } + -- cannot use panic, this is printed by -ddump-cmm + + -- References to procpoint IDs can now be replaced with the + -- infotable's label + replacePPIds g = {-# SCC "replacePPIds" #-} + mapGraphNodes (id, mapExp repl, mapExp repl) g + where repl e@(CmmLit (CmmBlock bid)) = + case mapLookup bid procLabels of + Just (_, Just info_lbl) -> CmmLit (CmmLabel info_lbl) + _ -> e + repl e = e + + -- The C back end expects to see return continuations before the + -- call sites. Here, we sort them in reverse order -- it gets + -- reversed later. + let (_, block_order) = + foldl' add_block_num (0::Int, mapEmpty :: LabelMap Int) + (revPostorder g) + add_block_num (i, map) block = + (i + 1, mapInsert (entryLabel block) i map) + sort_fn (bid, _) (bid', _) = + compare (expectJust "block_order" $ mapLookup bid block_order) + (expectJust "block_order" $ mapLookup bid' block_order) + procs <- return $ map to_proc $ sortBy sort_fn $ mapToList graphEnv + return -- pprTrace "procLabels" (ppr procLabels) + -- pprTrace "splitting graphs" (ppr procs) + procs +splitAtProcPoints _ _ _ _ _ t@(CmmData _ _) = return [t] + +-- Only called from GHC.Cmm.ProcPoint.splitAtProcPoints. NB. does a +-- recursive lookup, see comment below. +replaceBranches :: LabelMap BlockId -> CmmGraph -> CmmGraph +replaceBranches env cmmg + = {-# SCC "replaceBranches" #-} + ofBlockMap (g_entry cmmg) $ mapMap f $ toBlockMap cmmg + where + f block = replaceLastNode block $ last (lastNode block) + + last :: CmmNode O C -> CmmNode O C + last (CmmBranch id) = CmmBranch (lookup id) + last (CmmCondBranch e ti fi l) = CmmCondBranch e (lookup ti) (lookup fi) l + last (CmmSwitch e ids) = CmmSwitch e (mapSwitchTargets lookup ids) + last l@(CmmCall {}) = l { cml_cont = Nothing } + -- NB. remove the continuation of a CmmCall, since this + -- label will now be in a different CmmProc. Not only + -- is this tidier, it stops CmmLint from complaining. + last l@(CmmForeignCall {}) = l + lookup id = fmap lookup (mapLookup id env) `orElse` id + -- XXX: this is a recursive lookup, it follows chains + -- until the lookup returns Nothing, at which point we + -- return the last BlockId + +-- -------------------------------------------------------------- +-- Not splitting proc points: add info tables for continuations + +attachContInfoTables :: ProcPointSet -> CmmDecl -> CmmDecl +attachContInfoTables call_proc_points (CmmProc top_info top_l live g) + = CmmProc top_info{info_tbls = info_tbls'} top_l live g + where + info_tbls' = mapUnion (info_tbls top_info) $ + mapFromList [ (l, mkEmptyContInfoTable (infoTblLbl l)) + | l <- setElems call_proc_points + , l /= g_entry g ] +attachContInfoTables _ other_decl + = other_decl + +---------------------------------------------------------------- + +{- +Note [Direct reachability] + +Block B is directly reachable from proc point P iff control can flow +from P to B without passing through an intervening proc point. +-} + +---------------------------------------------------------------- + +{- +Note [No simple dataflow] + +Sadly, it seems impossible to compute the proc points using a single +dataflow pass. One might attempt to use this simple lattice: + + data Location = Unknown + | InProc BlockId -- node is in procedure headed by the named proc point + | ProcPoint -- node is itself a proc point + +At a join, a node in two different blocks becomes a proc point. +The difficulty is that the change of information during iterative +computation may promote a node prematurely. Here's a program that +illustrates the difficulty: + + f () { + entry: + .... + L1: + if (...) { ... } + else { ... } + + L2: if (...) { g(); goto L1; } + return x + y; + } + +The only proc-point needed (besides the entry) is L1. But in an +iterative analysis, consider what happens to L2. On the first pass +through, it rises from Unknown to 'InProc entry', but when L1 is +promoted to a proc point (because it's the successor of g()), L1's +successors will be promoted to 'InProc L1'. The problem hits when the +new fact 'InProc L1' flows into L2 which is already bound to 'InProc entry'. +The join operation makes it a proc point when in fact it needn't be, +because its immediate dominator L1 is already a proc point and there +are no other proc points that directly reach L2. +-} + + + +{- Note [Separate Adams optimization] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +It may be worthwhile to attempt the Adams optimization by rewriting +the graph before the assignment of proc-point protocols. Here are a +couple of rules: + + g() returns to k; g() returns to L; + k: CopyIn c ress; goto L: + ... ==> ... + L: // no CopyIn node here L: CopyIn c ress; + + +And when c == c' and ress == ress', this also: + + g() returns to k; g() returns to L; + k: CopyIn c ress; goto L: + ... ==> ... + L: CopyIn c' ress' L: CopyIn c' ress' ; + +In both cases the goal is to eliminate k. +-} diff --git a/compiler/GHC/Cmm/Sink.hs b/compiler/GHC/Cmm/Sink.hs new file mode 100644 index 0000000000..8e231df300 --- /dev/null +++ b/compiler/GHC/Cmm/Sink.hs @@ -0,0 +1,854 @@ +{-# LANGUAGE GADTs #-} +module GHC.Cmm.Sink ( + cmmSink + ) where + +import GhcPrelude + +import GHC.Cmm +import GHC.Cmm.Opt +import GHC.Cmm.Liveness +import GHC.Cmm.Utils +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.Dataflow.Label +import GHC.Cmm.Dataflow.Collections +import GHC.Cmm.Dataflow.Graph +import GHC.Platform.Regs +import GHC.Platform (isARM, platformArch) + +import DynFlags +import Unique +import UniqFM + +import qualified Data.IntSet as IntSet +import Data.List (partition) +import qualified Data.Set as Set +import Data.Maybe + +-- Compact sets for membership tests of local variables. + +type LRegSet = IntSet.IntSet + +emptyLRegSet :: LRegSet +emptyLRegSet = IntSet.empty + +nullLRegSet :: LRegSet -> Bool +nullLRegSet = IntSet.null + +insertLRegSet :: LocalReg -> LRegSet -> LRegSet +insertLRegSet l = IntSet.insert (getKey (getUnique l)) + +elemLRegSet :: LocalReg -> LRegSet -> Bool +elemLRegSet l = IntSet.member (getKey (getUnique l)) + +-- ----------------------------------------------------------------------------- +-- Sinking and inlining + +-- This is an optimisation pass that +-- (a) moves assignments closer to their uses, to reduce register pressure +-- (b) pushes assignments into a single branch of a conditional if possible +-- (c) inlines assignments to registers that are mentioned only once +-- (d) discards dead assignments +-- +-- This tightens up lots of register-heavy code. It is particularly +-- helpful in the Cmm generated by the Stg->Cmm code generator, in +-- which every function starts with a copyIn sequence like: +-- +-- x1 = R1 +-- x2 = Sp[8] +-- x3 = Sp[16] +-- if (Sp - 32 < SpLim) then L1 else L2 +-- +-- we really want to push the x1..x3 assignments into the L2 branch. +-- +-- Algorithm: +-- +-- * Start by doing liveness analysis. +-- +-- * Keep a list of assignments A; earlier ones may refer to later ones. +-- Currently we only sink assignments to local registers, because we don't +-- have liveness information about global registers. +-- +-- * Walk forwards through the graph, look at each node N: +-- +-- * If it is a dead assignment, i.e. assignment to a register that is +-- not used after N, discard it. +-- +-- * Try to inline based on current list of assignments +-- * If any assignments in A (1) occur only once in N, and (2) are +-- not live after N, inline the assignment and remove it +-- from A. +-- +-- * If an assignment in A is cheap (RHS is local register), then +-- inline the assignment and keep it in A in case it is used afterwards. +-- +-- * Otherwise don't inline. +-- +-- * If N is assignment to a local register pick up the assignment +-- and add it to A. +-- +-- * If N is not an assignment to a local register: +-- * remove any assignments from A that conflict with N, and +-- place them before N in the current block. We call this +-- "dropping" the assignments. +-- +-- * An assignment conflicts with N if it: +-- - assigns to a register mentioned in N +-- - mentions a register assigned by N +-- - reads from memory written by N +-- * do this recursively, dropping dependent assignments +-- +-- * At an exit node: +-- * drop any assignments that are live on more than one successor +-- and are not trivial +-- * if any successor has more than one predecessor (a join-point), +-- drop everything live in that successor. Since we only propagate +-- assignments that are not dead at the successor, we will therefore +-- eliminate all assignments dead at this point. Thus analysis of a +-- join-point will always begin with an empty list of assignments. +-- +-- +-- As a result of above algorithm, sinking deletes some dead assignments +-- (transitively, even). This isn't as good as removeDeadAssignments, +-- but it's much cheaper. + +-- ----------------------------------------------------------------------------- +-- things that we aren't optimising very well yet. +-- +-- ----------- +-- (1) From GHC's FastString.hashStr: +-- +-- s2ay: +-- if ((_s2an::I64 == _s2ao::I64) >= 1) goto c2gn; else goto c2gp; +-- c2gn: +-- R1 = _s2au::I64; +-- call (I64[Sp])(R1) args: 8, res: 0, upd: 8; +-- c2gp: +-- _s2cO::I64 = %MO_S_Rem_W64(%MO_UU_Conv_W8_W64(I8[_s2aq::I64 + (_s2an::I64 << 0)]) + _s2au::I64 * 128, +-- 4091); +-- _s2an::I64 = _s2an::I64 + 1; +-- _s2au::I64 = _s2cO::I64; +-- goto s2ay; +-- +-- a nice loop, but we didn't eliminate the silly assignment at the end. +-- See Note [dependent assignments], which would probably fix this. +-- This is #8336. +-- +-- ----------- +-- (2) From stg_atomically_frame in PrimOps.cmm +-- +-- We have a diamond control flow: +-- +-- x = ... +-- | +-- / \ +-- A B +-- \ / +-- | +-- use of x +-- +-- Now x won't be sunk down to its use, because we won't push it into +-- both branches of the conditional. We certainly do have to check +-- that we can sink it past all the code in both A and B, but having +-- discovered that, we could sink it to its use. +-- + +-- ----------------------------------------------------------------------------- + +type Assignment = (LocalReg, CmmExpr, AbsMem) + -- Assignment caches AbsMem, an abstraction of the memory read by + -- the RHS of the assignment. + +type Assignments = [Assignment] + -- A sequence of assignments; kept in *reverse* order + -- So the list [ x=e1, y=e2 ] means the sequence of assignments + -- y = e2 + -- x = e1 + +cmmSink :: DynFlags -> CmmGraph -> CmmGraph +cmmSink dflags graph = ofBlockList (g_entry graph) $ sink mapEmpty $ blocks + where + liveness = cmmLocalLiveness dflags graph + getLive l = mapFindWithDefault Set.empty l liveness + + blocks = revPostorder graph + + join_pts = findJoinPoints blocks + + sink :: LabelMap Assignments -> [CmmBlock] -> [CmmBlock] + sink _ [] = [] + sink sunk (b:bs) = + -- pprTrace "sink" (ppr lbl) $ + blockJoin first final_middle final_last : sink sunk' bs + where + lbl = entryLabel b + (first, middle, last) = blockSplit b + + succs = successors last + + -- Annotate the middle nodes with the registers live *after* + -- the node. This will help us decide whether we can inline + -- an assignment in the current node or not. + live = Set.unions (map getLive succs) + live_middle = gen_kill dflags last live + ann_middles = annotate dflags live_middle (blockToList middle) + + -- Now sink and inline in this block + (middle', assigs) = walk dflags ann_middles (mapFindWithDefault [] lbl sunk) + fold_last = constantFoldNode dflags last + (final_last, assigs') = tryToInline dflags live fold_last assigs + + -- We cannot sink into join points (successors with more than + -- one predecessor), so identify the join points and the set + -- of registers live in them. + (joins, nonjoins) = partition (`mapMember` join_pts) succs + live_in_joins = Set.unions (map getLive joins) + + -- We do not want to sink an assignment into multiple branches, + -- so identify the set of registers live in multiple successors. + -- This is made more complicated because when we sink an assignment + -- into one branch, this might change the set of registers that are + -- now live in multiple branches. + init_live_sets = map getLive nonjoins + live_in_multi live_sets r = + case filter (Set.member r) live_sets of + (_one:_two:_) -> True + _ -> False + + -- Now, drop any assignments that we will not sink any further. + (dropped_last, assigs'') = dropAssignments dflags drop_if init_live_sets assigs' + + drop_if a@(r,rhs,_) live_sets = (should_drop, live_sets') + where + should_drop = conflicts dflags a final_last + || not (isTrivial dflags rhs) && live_in_multi live_sets r + || r `Set.member` live_in_joins + + live_sets' | should_drop = live_sets + | otherwise = map upd live_sets + + upd set | r `Set.member` set = set `Set.union` live_rhs + | otherwise = set + + live_rhs = foldRegsUsed dflags extendRegSet emptyRegSet rhs + + final_middle = foldl' blockSnoc middle' dropped_last + + sunk' = mapUnion sunk $ + mapFromList [ (l, filterAssignments dflags (getLive l) assigs'') + | l <- succs ] + +{- TODO: enable this later, when we have some good tests in place to + measure the effect and tune it. + +-- small: an expression we don't mind duplicating +isSmall :: CmmExpr -> Bool +isSmall (CmmReg (CmmLocal _)) = True -- +isSmall (CmmLit _) = True +isSmall (CmmMachOp (MO_Add _) [x,y]) = isTrivial x && isTrivial y +isSmall (CmmRegOff (CmmLocal _) _) = True +isSmall _ = False +-} + +-- +-- We allow duplication of trivial expressions: registers (both local and +-- global) and literals. +-- +isTrivial :: DynFlags -> CmmExpr -> Bool +isTrivial _ (CmmReg (CmmLocal _)) = True +isTrivial dflags (CmmReg (CmmGlobal r)) = -- see Note [Inline GlobalRegs?] + if isARM (platformArch (targetPlatform dflags)) + then True -- CodeGen.Platform.ARM does not have globalRegMaybe + else isJust (globalRegMaybe (targetPlatform dflags) r) + -- GlobalRegs that are loads from BaseReg are not trivial +isTrivial _ (CmmLit _) = True +isTrivial _ _ = False + +-- +-- annotate each node with the set of registers live *after* the node +-- +annotate :: DynFlags -> LocalRegSet -> [CmmNode O O] -> [(LocalRegSet, CmmNode O O)] +annotate dflags live nodes = snd $ foldr ann (live,[]) nodes + where ann n (live,nodes) = (gen_kill dflags n live, (live,n) : nodes) + +-- +-- Find the blocks that have multiple successors (join points) +-- +findJoinPoints :: [CmmBlock] -> LabelMap Int +findJoinPoints blocks = mapFilter (>1) succ_counts + where + all_succs = concatMap successors blocks + + succ_counts :: LabelMap Int + succ_counts = foldr (\l -> mapInsertWith (+) l 1) mapEmpty all_succs + +-- +-- filter the list of assignments to remove any assignments that +-- are not live in a continuation. +-- +filterAssignments :: DynFlags -> LocalRegSet -> Assignments -> Assignments +filterAssignments dflags live assigs = reverse (go assigs []) + where go [] kept = kept + go (a@(r,_,_):as) kept | needed = go as (a:kept) + | otherwise = go as kept + where + needed = r `Set.member` live + || any (conflicts dflags a) (map toNode kept) + -- Note that we must keep assignments that are + -- referred to by other assignments we have + -- already kept. + +-- ----------------------------------------------------------------------------- +-- Walk through the nodes of a block, sinking and inlining assignments +-- as we go. +-- +-- On input we pass in a: +-- * list of nodes in the block +-- * a list of assignments that appeared *before* this block and +-- that are being sunk. +-- +-- On output we get: +-- * a new block +-- * a list of assignments that will be placed *after* that block. +-- + +walk :: DynFlags + -> [(LocalRegSet, CmmNode O O)] -- nodes of the block, annotated with + -- the set of registers live *after* + -- this node. + + -> Assignments -- The current list of + -- assignments we are sinking. + -- Earlier assignments may refer + -- to later ones. + + -> ( Block CmmNode O O -- The new block + , Assignments -- Assignments to sink further + ) + +walk dflags nodes assigs = go nodes emptyBlock assigs + where + go [] block as = (block, as) + go ((live,node):ns) block as + | shouldDiscard node live = go ns block as + -- discard dead assignment + | Just a <- shouldSink dflags node2 = go ns block (a : as1) + | otherwise = go ns block' as' + where + node1 = constantFoldNode dflags node + + (node2, as1) = tryToInline dflags live node1 as + + (dropped, as') = dropAssignmentsSimple dflags + (\a -> conflicts dflags a node2) as1 + + block' = foldl' blockSnoc block dropped `blockSnoc` node2 + + +-- +-- Heuristic to decide whether to pick up and sink an assignment +-- Currently we pick up all assignments to local registers. It might +-- be profitable to sink assignments to global regs too, but the +-- liveness analysis doesn't track those (yet) so we can't. +-- +shouldSink :: DynFlags -> CmmNode e x -> Maybe Assignment +shouldSink dflags (CmmAssign (CmmLocal r) e) | no_local_regs = Just (r, e, exprMem dflags e) + where no_local_regs = True -- foldRegsUsed (\_ _ -> False) True e +shouldSink _ _other = Nothing + +-- +-- discard dead assignments. This doesn't do as good a job as +-- removeDeadAssignments, because it would need multiple passes +-- to get all the dead code, but it catches the common case of +-- superfluous reloads from the stack that the stack allocator +-- leaves behind. +-- +-- Also we catch "r = r" here. You might think it would fall +-- out of inlining, but the inliner will see that r is live +-- after the instruction and choose not to inline r in the rhs. +-- +shouldDiscard :: CmmNode e x -> LocalRegSet -> Bool +shouldDiscard node live + = case node of + CmmAssign r (CmmReg r') | r == r' -> True + CmmAssign (CmmLocal r) _ -> not (r `Set.member` live) + _otherwise -> False + + +toNode :: Assignment -> CmmNode O O +toNode (r,rhs,_) = CmmAssign (CmmLocal r) rhs + +dropAssignmentsSimple :: DynFlags -> (Assignment -> Bool) -> Assignments + -> ([CmmNode O O], Assignments) +dropAssignmentsSimple dflags f = dropAssignments dflags (\a _ -> (f a, ())) () + +dropAssignments :: DynFlags -> (Assignment -> s -> (Bool, s)) -> s -> Assignments + -> ([CmmNode O O], Assignments) +dropAssignments dflags should_drop state assigs + = (dropped, reverse kept) + where + (dropped,kept) = go state assigs [] [] + + go _ [] dropped kept = (dropped, kept) + go state (assig : rest) dropped kept + | conflict = go state' rest (toNode assig : dropped) kept + | otherwise = go state' rest dropped (assig:kept) + where + (dropit, state') = should_drop assig state + conflict = dropit || any (conflicts dflags assig) dropped + + +-- ----------------------------------------------------------------------------- +-- Try to inline assignments into a node. +-- This also does constant folding for primpops, since +-- inlining opens up opportunities for doing so. + +tryToInline + :: DynFlags + -> LocalRegSet -- set of registers live after this + -- node. We cannot inline anything + -- that is live after the node, unless + -- it is small enough to duplicate. + -> CmmNode O x -- The node to inline into + -> Assignments -- Assignments to inline + -> ( + CmmNode O x -- New node + , Assignments -- Remaining assignments + ) + +tryToInline dflags live node assigs = go usages node emptyLRegSet assigs + where + usages :: UniqFM Int -- Maps each LocalReg to a count of how often it is used + usages = foldLocalRegsUsed dflags addUsage emptyUFM node + + go _usages node _skipped [] = (node, []) + + go usages node skipped (a@(l,rhs,_) : rest) + | cannot_inline = dont_inline + | occurs_none = discard -- Note [discard during inlining] + | occurs_once = inline_and_discard + | isTrivial dflags rhs = inline_and_keep + | otherwise = dont_inline + where + inline_and_discard = go usages' inl_node skipped rest + where usages' = foldLocalRegsUsed dflags addUsage usages rhs + + discard = go usages node skipped rest + + dont_inline = keep node -- don't inline the assignment, keep it + inline_and_keep = keep inl_node -- inline the assignment, keep it + + keep node' = (final_node, a : rest') + where (final_node, rest') = go usages' node' (insertLRegSet l skipped) rest + usages' = foldLocalRegsUsed dflags (\m r -> addToUFM m r 2) + usages rhs + -- we must not inline anything that is mentioned in the RHS + -- of a binding that we have already skipped, so we set the + -- usages of the regs on the RHS to 2. + + cannot_inline = skipped `regsUsedIn` rhs -- Note [dependent assignments] + || l `elemLRegSet` skipped + || not (okToInline dflags rhs node) + + l_usages = lookupUFM usages l + l_live = l `elemRegSet` live + + occurs_once = not l_live && l_usages == Just 1 + occurs_none = not l_live && l_usages == Nothing + + inl_node = improveConditional (mapExpDeep inl_exp node) + + inl_exp :: CmmExpr -> CmmExpr + -- inl_exp is where the inlining actually takes place! + inl_exp (CmmReg (CmmLocal l')) | l == l' = rhs + inl_exp (CmmRegOff (CmmLocal l') off) | l == l' + = cmmOffset dflags rhs off + -- re-constant fold after inlining + inl_exp (CmmMachOp op args) = cmmMachOpFold dflags op args + inl_exp other = other + + +{- Note [improveConditional] + +cmmMachOpFold tries to simplify conditionals to turn things like + (a == b) != 1 +into + (a != b) +but there's one case it can't handle: when the comparison is over +floating-point values, we can't invert it, because floating-point +comparisons aren't invertible (because of NaNs). + +But we *can* optimise this conditional by swapping the true and false +branches. Given + CmmCondBranch ((a >## b) != 1) t f +we can turn it into + CmmCondBranch (a >## b) f t + +So here we catch conditionals that weren't optimised by cmmMachOpFold, +and apply above transformation to eliminate the comparison against 1. + +It's tempting to just turn every != into == and then let cmmMachOpFold +do its thing, but that risks changing a nice fall-through conditional +into one that requires two jumps. (see swapcond_last in +GHC.Cmm.ContFlowOpt), so instead we carefully look for just the cases where +we can eliminate a comparison. +-} +improveConditional :: CmmNode O x -> CmmNode O x +improveConditional + (CmmCondBranch (CmmMachOp mop [x, CmmLit (CmmInt 1 _)]) t f l) + | neLike mop, isComparisonExpr x + = CmmCondBranch x f t (fmap not l) + where + neLike (MO_Ne _) = True + neLike (MO_U_Lt _) = True -- (x<y) < 1 behaves like (x<y) != 1 + neLike (MO_S_Lt _) = True -- (x<y) < 1 behaves like (x<y) != 1 + neLike _ = False +improveConditional other = other + +-- Note [dependent assignments] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- If our assignment list looks like +-- +-- [ y = e, x = ... y ... ] +-- +-- We cannot inline x. Remember this list is really in reverse order, +-- so it means x = ... y ...; y = e +-- +-- Hence if we inline x, the outer assignment to y will capture the +-- reference in x's right hand side. +-- +-- In this case we should rename the y in x's right-hand side, +-- i.e. change the list to [ y = e, x = ... y1 ..., y1 = y ] +-- Now we can go ahead and inline x. +-- +-- For now we do nothing, because this would require putting +-- everything inside UniqSM. +-- +-- One more variant of this (#7366): +-- +-- [ y = e, y = z ] +-- +-- If we don't want to inline y = e, because y is used many times, we +-- might still be tempted to inline y = z (because we always inline +-- trivial rhs's). But of course we can't, because y is equal to e, +-- not z. + +-- Note [discard during inlining] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- Opportunities to discard assignments sometimes appear after we've +-- done some inlining. Here's an example: +-- +-- x = R1; +-- y = P64[x + 7]; +-- z = P64[x + 15]; +-- /* z is dead */ +-- R1 = y & (-8); +-- +-- The x assignment is trivial, so we inline it in the RHS of y, and +-- keep both x and y. z gets dropped because it is dead, then we +-- inline y, and we have a dead assignment to x. If we don't notice +-- that x is dead in tryToInline, we end up retaining it. + +addUsage :: UniqFM Int -> LocalReg -> UniqFM Int +addUsage m r = addToUFM_C (+) m r 1 + +regsUsedIn :: LRegSet -> CmmExpr -> Bool +regsUsedIn ls _ | nullLRegSet ls = False +regsUsedIn ls e = wrapRecExpf f e False + where f (CmmReg (CmmLocal l)) _ | l `elemLRegSet` ls = True + f (CmmRegOff (CmmLocal l) _) _ | l `elemLRegSet` ls = True + f _ z = z + +-- we don't inline into CmmUnsafeForeignCall if the expression refers +-- to global registers. This is a HACK to avoid global registers +-- clashing with C argument-passing registers, really the back-end +-- ought to be able to handle it properly, but currently neither PprC +-- nor the NCG can do it. See Note [Register parameter passing] +-- See also GHC.StgToCmm.Foreign.load_args_into_temps. +okToInline :: DynFlags -> CmmExpr -> CmmNode e x -> Bool +okToInline dflags expr node@(CmmUnsafeForeignCall{}) = + not (globalRegistersConflict dflags expr node) +okToInline _ _ _ = True + +-- ----------------------------------------------------------------------------- + +-- | @conflicts (r,e) node@ is @False@ if and only if the assignment +-- @r = e@ can be safely commuted past statement @node@. +conflicts :: DynFlags -> Assignment -> CmmNode O x -> Bool +conflicts dflags (r, rhs, addr) node + + -- (1) node defines registers used by rhs of assignment. This catches + -- assignments and all three kinds of calls. See Note [Sinking and calls] + | globalRegistersConflict dflags rhs node = True + | localRegistersConflict dflags rhs node = True + + -- (2) node uses register defined by assignment + | foldRegsUsed dflags (\b r' -> r == r' || b) False node = True + + -- (3) a store to an address conflicts with a read of the same memory + | CmmStore addr' e <- node + , memConflicts addr (loadAddr dflags addr' (cmmExprWidth dflags e)) = True + + -- (4) an assignment to Hp/Sp conflicts with a heap/stack read respectively + | HeapMem <- addr, CmmAssign (CmmGlobal Hp) _ <- node = True + | StackMem <- addr, CmmAssign (CmmGlobal Sp) _ <- node = True + | SpMem{} <- addr, CmmAssign (CmmGlobal Sp) _ <- node = True + + -- (5) foreign calls clobber heap: see Note [Foreign calls clobber heap] + | CmmUnsafeForeignCall{} <- node, memConflicts addr AnyMem = True + + -- (6) native calls clobber any memory + | CmmCall{} <- node, memConflicts addr AnyMem = True + + -- (7) otherwise, no conflict + | otherwise = False + +-- Returns True if node defines any global registers that are used in the +-- Cmm expression +globalRegistersConflict :: DynFlags -> CmmExpr -> CmmNode e x -> Bool +globalRegistersConflict dflags expr node = + foldRegsDefd dflags (\b r -> b || regUsedIn dflags (CmmGlobal r) expr) + False node + +-- Returns True if node defines any local registers that are used in the +-- Cmm expression +localRegistersConflict :: DynFlags -> CmmExpr -> CmmNode e x -> Bool +localRegistersConflict dflags expr node = + foldRegsDefd dflags (\b r -> b || regUsedIn dflags (CmmLocal r) expr) + False node + +-- Note [Sinking and calls] +-- ~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- We have three kinds of calls: normal (CmmCall), safe foreign (CmmForeignCall) +-- and unsafe foreign (CmmUnsafeForeignCall). We perform sinking pass after +-- stack layout (see Note [Sinking after stack layout]) which leads to two +-- invariants related to calls: +-- +-- a) during stack layout phase all safe foreign calls are turned into +-- unsafe foreign calls (see Note [Lower safe foreign calls]). This +-- means that we will never encounter CmmForeignCall node when running +-- sinking after stack layout +-- +-- b) stack layout saves all variables live across a call on the stack +-- just before making a call (remember we are not sinking assignments to +-- stack): +-- +-- L1: +-- x = R1 +-- P64[Sp - 16] = L2 +-- P64[Sp - 8] = x +-- Sp = Sp - 16 +-- call f() returns L2 +-- L2: +-- +-- We will attempt to sink { x = R1 } but we will detect conflict with +-- { P64[Sp - 8] = x } and hence we will drop { x = R1 } without even +-- checking whether it conflicts with { call f() }. In this way we will +-- never need to check any assignment conflicts with CmmCall. Remember +-- that we still need to check for potential memory conflicts. +-- +-- So the result is that we only need to worry about CmmUnsafeForeignCall nodes +-- when checking conflicts (see Note [Unsafe foreign calls clobber caller-save registers]). +-- This assumption holds only when we do sinking after stack layout. If we run +-- it before stack layout we need to check for possible conflicts with all three +-- kinds of calls. Our `conflicts` function does that by using a generic +-- foldRegsDefd and foldRegsUsed functions defined in DefinerOfRegs and +-- UserOfRegs typeclasses. +-- + +-- An abstraction of memory read or written. +data AbsMem + = NoMem -- no memory accessed + | AnyMem -- arbitrary memory + | HeapMem -- definitely heap memory + | StackMem -- definitely stack memory + | SpMem -- <size>[Sp+n] + {-# UNPACK #-} !Int + {-# UNPACK #-} !Int + +-- Having SpMem is important because it lets us float loads from Sp +-- past stores to Sp as long as they don't overlap, and this helps to +-- unravel some long sequences of +-- x1 = [Sp + 8] +-- x2 = [Sp + 16] +-- ... +-- [Sp + 8] = xi +-- [Sp + 16] = xj +-- +-- Note that SpMem is invalidated if Sp is changed, but the definition +-- of 'conflicts' above handles that. + +-- ToDo: this won't currently fix the following commonly occurring code: +-- x1 = [R1 + 8] +-- x2 = [R1 + 16] +-- .. +-- [Hp - 8] = x1 +-- [Hp - 16] = x2 +-- .. + +-- because [R1 + 8] and [Hp - 8] are both HeapMem. We know that +-- assignments to [Hp + n] do not conflict with any other heap memory, +-- but this is tricky to nail down. What if we had +-- +-- x = Hp + n +-- [x] = ... +-- +-- the store to [x] should be "new heap", not "old heap". +-- Furthermore, you could imagine that if we started inlining +-- functions in Cmm then there might well be reads of heap memory +-- that was written in the same basic block. To take advantage of +-- non-aliasing of heap memory we will have to be more clever. + +-- Note [Foreign calls clobber heap] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- It is tempting to say that foreign calls clobber only +-- non-heap/stack memory, but unfortunately we break this invariant in +-- the RTS. For example, in stg_catch_retry_frame we call +-- stmCommitNestedTransaction() which modifies the contents of the +-- TRec it is passed (this actually caused incorrect code to be +-- generated). +-- +-- Since the invariant is true for the majority of foreign calls, +-- perhaps we ought to have a special annotation for calls that can +-- modify heap/stack memory. For now we just use the conservative +-- definition here. +-- +-- Some CallishMachOp imply a memory barrier e.g. AtomicRMW and +-- therefore we should never float any memory operations across one of +-- these calls. + + +bothMems :: AbsMem -> AbsMem -> AbsMem +bothMems NoMem x = x +bothMems x NoMem = x +bothMems HeapMem HeapMem = HeapMem +bothMems StackMem StackMem = StackMem +bothMems (SpMem o1 w1) (SpMem o2 w2) + | o1 == o2 = SpMem o1 (max w1 w2) + | otherwise = StackMem +bothMems SpMem{} StackMem = StackMem +bothMems StackMem SpMem{} = StackMem +bothMems _ _ = AnyMem + +memConflicts :: AbsMem -> AbsMem -> Bool +memConflicts NoMem _ = False +memConflicts _ NoMem = False +memConflicts HeapMem StackMem = False +memConflicts StackMem HeapMem = False +memConflicts SpMem{} HeapMem = False +memConflicts HeapMem SpMem{} = False +memConflicts (SpMem o1 w1) (SpMem o2 w2) + | o1 < o2 = o1 + w1 > o2 + | otherwise = o2 + w2 > o1 +memConflicts _ _ = True + +exprMem :: DynFlags -> CmmExpr -> AbsMem +exprMem dflags (CmmLoad addr w) = bothMems (loadAddr dflags addr (typeWidth w)) (exprMem dflags addr) +exprMem dflags (CmmMachOp _ es) = foldr bothMems NoMem (map (exprMem dflags) es) +exprMem _ _ = NoMem + +loadAddr :: DynFlags -> CmmExpr -> Width -> AbsMem +loadAddr dflags e w = + case e of + CmmReg r -> regAddr dflags r 0 w + CmmRegOff r i -> regAddr dflags r i w + _other | regUsedIn dflags spReg e -> StackMem + | otherwise -> AnyMem + +regAddr :: DynFlags -> CmmReg -> Int -> Width -> AbsMem +regAddr _ (CmmGlobal Sp) i w = SpMem i (widthInBytes w) +regAddr _ (CmmGlobal Hp) _ _ = HeapMem +regAddr _ (CmmGlobal CurrentTSO) _ _ = HeapMem -- important for PrimOps +regAddr dflags r _ _ | isGcPtrType (cmmRegType dflags r) = HeapMem -- yay! GCPtr pays for itself +regAddr _ _ _ _ = AnyMem + +{- +Note [Inline GlobalRegs?] + +Should we freely inline GlobalRegs? + +Actually it doesn't make a huge amount of difference either way, so we +*do* currently treat GlobalRegs as "trivial" and inline them +everywhere, but for what it's worth, here is what I discovered when I +(SimonM) looked into this: + +Common sense says we should not inline GlobalRegs, because when we +have + + x = R1 + +the register allocator will coalesce this assignment, generating no +code, and simply record the fact that x is bound to $rbx (or +whatever). Furthermore, if we were to sink this assignment, then the +range of code over which R1 is live increases, and the range of code +over which x is live decreases. All things being equal, it is better +for x to be live than R1, because R1 is a fixed register whereas x can +live in any register. So we should neither sink nor inline 'x = R1'. + +However, not inlining GlobalRegs can have surprising +consequences. e.g. (cgrun020) + + c3EN: + _s3DB::P64 = R1; + _c3ES::P64 = _s3DB::P64 & 7; + if (_c3ES::P64 >= 2) goto c3EU; else goto c3EV; + c3EU: + _s3DD::P64 = P64[_s3DB::P64 + 6]; + _s3DE::P64 = P64[_s3DB::P64 + 14]; + I64[Sp - 8] = c3F0; + R1 = _s3DE::P64; + P64[Sp] = _s3DD::P64; + +inlining the GlobalReg gives: + + c3EN: + if (R1 & 7 >= 2) goto c3EU; else goto c3EV; + c3EU: + I64[Sp - 8] = c3F0; + _s3DD::P64 = P64[R1 + 6]; + R1 = P64[R1 + 14]; + P64[Sp] = _s3DD::P64; + +but if we don't inline the GlobalReg, instead we get: + + _s3DB::P64 = R1; + if (_s3DB::P64 & 7 >= 2) goto c3EU; else goto c3EV; + c3EU: + I64[Sp - 8] = c3F0; + R1 = P64[_s3DB::P64 + 14]; + P64[Sp] = P64[_s3DB::P64 + 6]; + +This looks better - we managed to inline _s3DD - but in fact it +generates an extra reg-reg move: + +.Lc3EU: + movq $c3F0_info,-8(%rbp) + movq %rbx,%rax + movq 14(%rbx),%rbx + movq 6(%rax),%rax + movq %rax,(%rbp) + +because _s3DB is now live across the R1 assignment, we lost the +benefit of coalescing. + +Who is at fault here? Perhaps if we knew that _s3DB was an alias for +R1, then we would not sink a reference to _s3DB past the R1 +assignment. Or perhaps we *should* do that - we might gain by sinking +it, despite losing the coalescing opportunity. + +Sometimes not inlining global registers wins by virtue of the rule +about not inlining into arguments of a foreign call, e.g. (T7163) this +is what happens when we inlined F1: + + _s3L2::F32 = F1; + _c3O3::F32 = %MO_F_Mul_W32(F1, 10.0 :: W32); + (_s3L7::F32) = call "ccall" arg hints: [] result hints: [] rintFloat(_c3O3::F32); + +but if we don't inline F1: + + (_s3L7::F32) = call "ccall" arg hints: [] result hints: [] rintFloat(%MO_F_Mul_W32(_s3L2::F32, + 10.0 :: W32)); +-} diff --git a/compiler/GHC/Cmm/Switch.hs b/compiler/GHC/Cmm/Switch.hs new file mode 100644 index 0000000000..e89fadfd2e --- /dev/null +++ b/compiler/GHC/Cmm/Switch.hs @@ -0,0 +1,502 @@ +{-# LANGUAGE GADTs #-} +module GHC.Cmm.Switch ( + SwitchTargets, + mkSwitchTargets, + switchTargetsCases, switchTargetsDefault, switchTargetsRange, switchTargetsSigned, + mapSwitchTargets, switchTargetsToTable, switchTargetsFallThrough, + switchTargetsToList, eqSwitchTargetWith, + + SwitchPlan(..), + targetSupportsSwitch, + createSwitchPlan, + ) where + +import GhcPrelude + +import Outputable +import DynFlags +import GHC.Cmm.Dataflow.Label (Label) + +import Data.Maybe +import Data.List (groupBy) +import Data.Function (on) +import qualified Data.Map as M + +-- Note [Cmm Switches, the general plan] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- Compiling a high-level switch statement, as it comes out of a STG case +-- expression, for example, allows for a surprising amount of design decisions. +-- Therefore, we cleanly separated this from the Stg → Cmm transformation, as +-- well as from the actual code generation. +-- +-- The overall plan is: +-- * The Stg → Cmm transformation creates a single `SwitchTargets` in +-- emitSwitch and emitCmmLitSwitch in GHC.StgToCmm.Utils. +-- At this stage, they are unsuitable for code generation. +-- * A dedicated Cmm transformation (GHC.Cmm.Switch.Implement) replaces these +-- switch statements with code that is suitable for code generation, i.e. +-- a nice balanced tree of decisions with dense jump tables in the leafs. +-- The actual planning of this tree is performed in pure code in createSwitchPlan +-- in this module. See Note [createSwitchPlan]. +-- * The actual code generation will not do any further processing and +-- implement each CmmSwitch with a jump tables. +-- +-- When compiling to LLVM or C, GHC.Cmm.Switch.Implement leaves the switch +-- statements alone, as we can turn a SwitchTargets value into a nice +-- switch-statement in LLVM resp. C, and leave the rest to the compiler. +-- +-- See Note [GHC.Cmm.Switch vs. GHC.Cmm.Switch.Implement] why the two module are +-- separated. + +----------------------------------------------------------------------------- +-- Note [Magic Constants in GHC.Cmm.Switch] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- There are a lot of heuristics here that depend on magic values where it is +-- hard to determine the "best" value (for whatever that means). These are the +-- magic values: + +-- | Number of consecutive default values allowed in a jump table. If there are +-- more of them, the jump tables are split. +-- +-- Currently 7, as it costs 7 words of additional code when a jump table is +-- split (at least on x64, determined experimentally). +maxJumpTableHole :: Integer +maxJumpTableHole = 7 + +-- | Minimum size of a jump table. If the number is smaller, the switch is +-- implemented using conditionals. +-- Currently 5, because an if-then-else tree of 4 values is nice and compact. +minJumpTableSize :: Int +minJumpTableSize = 5 + +-- | Minimum non-zero offset for a jump table. See Note [Jump Table Offset]. +minJumpTableOffset :: Integer +minJumpTableOffset = 2 + + +----------------------------------------------------------------------------- +-- Switch Targets + +-- Note [SwitchTargets] +-- ~~~~~~~~~~~~~~~~~~~~ +-- +-- The branches of a switch are stored in a SwitchTargets, which consists of an +-- (optional) default jump target, and a map from values to jump targets. +-- +-- If the default jump target is absent, the behaviour of the switch outside the +-- values of the map is undefined. +-- +-- We use an Integer for the keys the map so that it can be used in switches on +-- unsigned as well as signed integers. +-- +-- The map may be empty (we prune out-of-range branches here, so it could be us +-- emptying it). +-- +-- Before code generation, the table needs to be brought into a form where all +-- entries are non-negative, so that it can be compiled into a jump table. +-- See switchTargetsToTable. + + +-- | A value of type SwitchTargets contains the alternatives for a 'CmmSwitch' +-- value, and knows whether the value is signed, the possible range, an +-- optional default value and a map from values to jump labels. +data SwitchTargets = + SwitchTargets + Bool -- Signed values + (Integer, Integer) -- Range + (Maybe Label) -- Default value + (M.Map Integer Label) -- The branches + deriving (Show, Eq) + +-- | The smart constructor mkSwitchTargets normalises the map a bit: +-- * No entries outside the range +-- * No entries equal to the default +-- * No default if all elements have explicit values +mkSwitchTargets :: Bool -> (Integer, Integer) -> Maybe Label -> M.Map Integer Label -> SwitchTargets +mkSwitchTargets signed range@(lo,hi) mbdef ids + = SwitchTargets signed range mbdef' ids' + where + ids' = dropDefault $ restrict ids + mbdef' | defaultNeeded = mbdef + | otherwise = Nothing + + -- Drop entries outside the range, if there is a range + restrict = restrictMap (lo,hi) + + -- Drop entries that equal the default, if there is a default + dropDefault | Just l <- mbdef = M.filter (/= l) + | otherwise = id + + -- Check if the default is still needed + defaultNeeded = fromIntegral (M.size ids') /= hi-lo+1 + + +-- | Changes all labels mentioned in the SwitchTargets value +mapSwitchTargets :: (Label -> Label) -> SwitchTargets -> SwitchTargets +mapSwitchTargets f (SwitchTargets signed range mbdef branches) + = SwitchTargets signed range (fmap f mbdef) (fmap f branches) + +-- | Returns the list of non-default branches of the SwitchTargets value +switchTargetsCases :: SwitchTargets -> [(Integer, Label)] +switchTargetsCases (SwitchTargets _ _ _ branches) = M.toList branches + +-- | Return the default label of the SwitchTargets value +switchTargetsDefault :: SwitchTargets -> Maybe Label +switchTargetsDefault (SwitchTargets _ _ mbdef _) = mbdef + +-- | Return the range of the SwitchTargets value +switchTargetsRange :: SwitchTargets -> (Integer, Integer) +switchTargetsRange (SwitchTargets _ range _ _) = range + +-- | Return whether this is used for a signed value +switchTargetsSigned :: SwitchTargets -> Bool +switchTargetsSigned (SwitchTargets signed _ _ _) = signed + +-- | switchTargetsToTable creates a dense jump table, usable for code generation. +-- +-- Also returns an offset to add to the value; the list is 0-based on the +-- result of that addition. +-- +-- The conversion from Integer to Int is a bit of a wart, as the actual +-- scrutinee might be an unsigned word, but it just works, due to wrap-around +-- arithmetic (as verified by the CmmSwitchTest test case). +switchTargetsToTable :: SwitchTargets -> (Int, [Maybe Label]) +switchTargetsToTable (SwitchTargets _ (lo,hi) mbdef branches) + = (fromIntegral (-start), [ labelFor i | i <- [start..hi] ]) + where + labelFor i = case M.lookup i branches of Just l -> Just l + Nothing -> mbdef + start | lo >= 0 && lo < minJumpTableOffset = 0 -- See Note [Jump Table Offset] + | otherwise = lo + +-- Note [Jump Table Offset] +-- ~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- Usually, the code for a jump table starting at x will first subtract x from +-- the value, to avoid a large amount of empty entries. But if x is very small, +-- the extra entries are no worse than the subtraction in terms of code size, and +-- not having to do the subtraction is quicker. +-- +-- I.e. instead of +-- _u20N: +-- leaq -1(%r14),%rax +-- jmp *_n20R(,%rax,8) +-- _n20R: +-- .quad _c20p +-- .quad _c20q +-- do +-- _u20N: +-- jmp *_n20Q(,%r14,8) +-- +-- _n20Q: +-- .quad 0 +-- .quad _c20p +-- .quad _c20q +-- .quad _c20r + +-- | The list of all labels occurring in the SwitchTargets value. +switchTargetsToList :: SwitchTargets -> [Label] +switchTargetsToList (SwitchTargets _ _ mbdef branches) + = maybeToList mbdef ++ M.elems branches + +-- | Groups cases with equal targets, suitable for pretty-printing to a +-- c-like switch statement with fall-through semantics. +switchTargetsFallThrough :: SwitchTargets -> ([([Integer], Label)], Maybe Label) +switchTargetsFallThrough (SwitchTargets _ _ mbdef branches) = (groups, mbdef) + where + groups = map (\xs -> (map fst xs, snd (head xs))) $ + groupBy ((==) `on` snd) $ + M.toList branches + +-- | Custom equality helper, needed for "GHC.Cmm.CommonBlockElim" +eqSwitchTargetWith :: (Label -> Label -> Bool) -> SwitchTargets -> SwitchTargets -> Bool +eqSwitchTargetWith eq (SwitchTargets signed1 range1 mbdef1 ids1) (SwitchTargets signed2 range2 mbdef2 ids2) = + signed1 == signed2 && range1 == range2 && goMB mbdef1 mbdef2 && goList (M.toList ids1) (M.toList ids2) + where + goMB Nothing Nothing = True + goMB (Just l1) (Just l2) = l1 `eq` l2 + goMB _ _ = False + goList [] [] = True + goList ((i1,l1):ls1) ((i2,l2):ls2) = i1 == i2 && l1 `eq` l2 && goList ls1 ls2 + goList _ _ = False + +----------------------------------------------------------------------------- +-- Code generation for Switches + + +-- | A SwitchPlan abstractly describes how a Switch statement ought to be +-- implemented. See Note [createSwitchPlan] +data SwitchPlan + = Unconditionally Label + | IfEqual Integer Label SwitchPlan + | IfLT Bool Integer SwitchPlan SwitchPlan + | JumpTable SwitchTargets + deriving Show +-- +-- Note [createSwitchPlan] +-- ~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- A SwitchPlan describes how a Switch statement is to be broken down into +-- smaller pieces suitable for code generation. +-- +-- createSwitchPlan creates such a switch plan, in these steps: +-- 1. It splits the switch statement at segments of non-default values that +-- are too large. See splitAtHoles and Note [Magic Constants in GHC.Cmm.Switch] +-- 2. Too small jump tables should be avoided, so we break up smaller pieces +-- in breakTooSmall. +-- 3. We fill in the segments between those pieces with a jump to the default +-- label (if there is one), returning a SeparatedList in mkFlatSwitchPlan +-- 4. We find and replace two less-than branches by a single equal-to-test in +-- findSingleValues +-- 5. The thus collected pieces are assembled to a balanced binary tree. + +{- + Note [Two alts + default] + ~~~~~~~~~~~~~~~~~~~~~~~~~ + +Discussion and a bit more info at #14644 + +When dealing with a switch of the form: +switch(e) { + case 1: goto l1; + case 3000: goto l2; + default: goto ldef; +} + +If we treat it as a sparse jump table we would generate: + +if (e > 3000) //Check if value is outside of the jump table. + goto ldef; +else { + if (e < 3000) { //Compare to upper value + if(e != 1) //Compare to remaining value + goto ldef; + else + goto l2; + } + else + goto l1; +} + +Instead we special case this to : + +if (e==1) goto l1; +else if (e==3000) goto l2; +else goto l3; + +This means we have: +* Less comparisons for: 1,<3000 +* Unchanged for 3000 +* One more for >3000 + +This improves code in a few ways: +* One comparison less means smaller code which helps with cache. +* It exchanges a taken jump for two jumps no taken in the >range case. + Jumps not taken are cheaper (See Agner guides) making this about as fast. +* For all other cases the first range check is removed making it faster. + +The end result is that the change is not measurably slower for the case +>3000 and faster for the other cases. + +This makes running this kind of match in an inner loop cheaper by 10-20% +depending on the data. +In nofib this improves wheel-sieve1 by 4-9% depending on problem +size. + +We could also add a second conditional jump after the comparison to +keep the range check like this: + cmp 3000, rArgument + jg <default> + je <branch 2> +While this is fairly cheap it made no big difference for the >3000 case +and slowed down all other cases making it not worthwhile. +-} + + +-- | Does the target support switch out of the box? Then leave this to the +-- target! +targetSupportsSwitch :: HscTarget -> Bool +targetSupportsSwitch HscC = True +targetSupportsSwitch HscLlvm = True +targetSupportsSwitch _ = False + +-- | This function creates a SwitchPlan from a SwitchTargets value, breaking it +-- down into smaller pieces suitable for code generation. +createSwitchPlan :: SwitchTargets -> SwitchPlan +-- Lets do the common case of a singleton map quickly and efficiently (#10677) +createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m) + | [(x, l)] <- M.toList m + = IfEqual x l (Unconditionally defLabel) +-- And another common case, matching "booleans" +createSwitchPlan (SwitchTargets _signed (lo,hi) Nothing m) + | [(x1, l1), (_x2,l2)] <- M.toAscList m + --Checking If |range| = 2 is enough if we have two unique literals + , hi - lo == 1 + = IfEqual x1 l1 (Unconditionally l2) +-- See Note [Two alts + default] +createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m) + | [(x1, l1), (x2,l2)] <- M.toAscList m + = IfEqual x1 l1 (IfEqual x2 l2 (Unconditionally defLabel)) +createSwitchPlan (SwitchTargets signed range mbdef m) = + -- pprTrace "createSwitchPlan" (text (show ids) $$ text (show (range,m)) $$ text (show pieces) $$ text (show flatPlan) $$ text (show plan)) $ + plan + where + pieces = concatMap breakTooSmall $ splitAtHoles maxJumpTableHole m + flatPlan = findSingleValues $ mkFlatSwitchPlan signed mbdef range pieces + plan = buildTree signed $ flatPlan + + +--- +--- Step 1: Splitting at large holes +--- +splitAtHoles :: Integer -> M.Map Integer a -> [M.Map Integer a] +splitAtHoles _ m | M.null m = [] +splitAtHoles holeSize m = map (\range -> restrictMap range m) nonHoles + where + holes = filter (\(l,h) -> h - l > holeSize) $ zip (M.keys m) (tail (M.keys m)) + nonHoles = reassocTuples lo holes hi + + (lo,_) = M.findMin m + (hi,_) = M.findMax m + +--- +--- Step 2: Avoid small jump tables +--- +-- We do not want jump tables below a certain size. This breaks them up +-- (into singleton maps, for now). +breakTooSmall :: M.Map Integer a -> [M.Map Integer a] +breakTooSmall m + | M.size m > minJumpTableSize = [m] + | otherwise = [M.singleton k v | (k,v) <- M.toList m] + +--- +--- Step 3: Fill in the blanks +--- + +-- | A FlatSwitchPlan is a list of SwitchPlans, with an integer inbetween every +-- two entries, dividing the range. +-- So if we have (abusing list syntax) [plan1,n,plan2], then we use plan1 if +-- the expression is < n, and plan2 otherwise. + +type FlatSwitchPlan = SeparatedList Integer SwitchPlan + +mkFlatSwitchPlan :: Bool -> Maybe Label -> (Integer, Integer) -> [M.Map Integer Label] -> FlatSwitchPlan + +-- If we have no default (i.e. undefined where there is no entry), we can +-- branch at the minimum of each map +mkFlatSwitchPlan _ Nothing _ [] = pprPanic "mkFlatSwitchPlan with nothing left to do" empty +mkFlatSwitchPlan signed Nothing _ (m:ms) + = (mkLeafPlan signed Nothing m , [ (fst (M.findMin m'), mkLeafPlan signed Nothing m') | m' <- ms ]) + +-- If we have a default, we have to interleave segments that jump +-- to the default between the maps +mkFlatSwitchPlan signed (Just l) r ms = let ((_,p1):ps) = go r ms in (p1, ps) + where + go (lo,hi) [] + | lo > hi = [] + | otherwise = [(lo, Unconditionally l)] + go (lo,hi) (m:ms) + | lo < min + = (lo, Unconditionally l) : go (min,hi) (m:ms) + | lo == min + = (lo, mkLeafPlan signed (Just l) m) : go (max+1,hi) ms + | otherwise + = pprPanic "mkFlatSwitchPlan" (integer lo <+> integer min) + where + min = fst (M.findMin m) + max = fst (M.findMax m) + + +mkLeafPlan :: Bool -> Maybe Label -> M.Map Integer Label -> SwitchPlan +mkLeafPlan signed mbdef m + | [(_,l)] <- M.toList m -- singleton map + = Unconditionally l + | otherwise + = JumpTable $ mkSwitchTargets signed (min,max) mbdef m + where + min = fst (M.findMin m) + max = fst (M.findMax m) + +--- +--- Step 4: Reduce the number of branches using == +--- + +-- A sequence of three unconditional jumps, with the outer two pointing to the +-- same value and the bounds off by exactly one can be improved +findSingleValues :: FlatSwitchPlan -> FlatSwitchPlan +findSingleValues (Unconditionally l, (i, Unconditionally l2) : (i', Unconditionally l3) : xs) + | l == l3 && i + 1 == i' + = findSingleValues (IfEqual i l2 (Unconditionally l), xs) +findSingleValues (p, (i,p'):xs) + = (p,i) `consSL` findSingleValues (p', xs) +findSingleValues (p, []) + = (p, []) + +--- +--- Step 5: Actually build the tree +--- + +-- Build a balanced tree from a separated list +buildTree :: Bool -> FlatSwitchPlan -> SwitchPlan +buildTree _ (p,[]) = p +buildTree signed sl = IfLT signed m (buildTree signed sl1) (buildTree signed sl2) + where + (sl1, m, sl2) = divideSL sl + + + +-- +-- Utility data type: Non-empty lists with extra markers in between each +-- element: +-- + +type SeparatedList b a = (a, [(b,a)]) + +consSL :: (a, b) -> SeparatedList b a -> SeparatedList b a +consSL (a, b) (a', xs) = (a, (b,a'):xs) + +divideSL :: SeparatedList b a -> (SeparatedList b a, b, SeparatedList b a) +divideSL (_,[]) = error "divideSL: Singleton SeparatedList" +divideSL (p,xs) = ((p, xs1), m, (p', xs2)) + where + (xs1, (m,p'):xs2) = splitAt (length xs `div` 2) xs + +-- +-- Other Utilities +-- + +restrictMap :: (Integer,Integer) -> M.Map Integer b -> M.Map Integer b +restrictMap (lo,hi) m = mid + where (_, mid_hi) = M.split (lo-1) m + (mid, _) = M.split (hi+1) mid_hi + +-- for example: reassocTuples a [(b,c),(d,e)] f == [(a,b),(c,d),(e,f)] +reassocTuples :: a -> [(a,a)] -> a -> [(a,a)] +reassocTuples initial [] last + = [(initial,last)] +reassocTuples initial ((a,b):tuples) last + = (initial,a) : reassocTuples b tuples last + +-- Note [GHC.Cmm.Switch vs. GHC.Cmm.Switch.Implement] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- I (Joachim) separated the two somewhat closely related modules +-- +-- - GHC.Cmm.Switch, which provides the CmmSwitchTargets type and contains the strategy +-- for implementing a Cmm switch (createSwitchPlan), and +-- - GHC.Cmm.Switch.Implement, which contains the actual Cmm graph modification, +-- +-- for these reasons: +-- +-- * GHC.Cmm.Switch is very low in the dependency tree, i.e. does not depend on any +-- GHC specific modules at all (with the exception of Output and +-- GHC.Cmm.Dataflow (Literal)). +-- * GHC.Cmm.Switch.Implement is the Cmm transformation and hence very high in +-- the dependency tree. +-- * GHC.Cmm.Switch provides the CmmSwitchTargets data type, which is abstract, but +-- used in GHC.Cmm.Node. +-- * Because GHC.Cmm.Switch is low in the dependency tree, the separation allows +-- for more parallelism when building GHC. +-- * The interaction between the modules is very explicit and easy to +-- understand, due to the small and simple interface. diff --git a/compiler/GHC/Cmm/Switch/Implement.hs b/compiler/GHC/Cmm/Switch/Implement.hs new file mode 100644 index 0000000000..dfac116764 --- /dev/null +++ b/compiler/GHC/Cmm/Switch/Implement.hs @@ -0,0 +1,116 @@ +{-# LANGUAGE GADTs #-} +module GHC.Cmm.Switch.Implement + ( cmmImplementSwitchPlans + ) +where + +import GhcPrelude + +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.BlockId +import GHC.Cmm +import GHC.Cmm.Utils +import GHC.Cmm.Switch +import UniqSupply +import DynFlags + +-- +-- This module replaces Switch statements as generated by the Stg -> Cmm +-- transformation, which might be huge and sparse and hence unsuitable for +-- assembly code, by proper constructs (if-then-else trees, dense jump tables). +-- +-- The actual, abstract strategy is determined by createSwitchPlan in +-- GHC.Cmm.Switch and returned as a SwitchPlan; here is just the implementation in +-- terms of Cmm code. See Note [Cmm Switches, the general plan] in GHC.Cmm.Switch. +-- +-- This division into different modules is both to clearly separate concerns, +-- but also because createSwitchPlan needs access to the constructors of +-- SwitchTargets, a data type exported abstractly by GHC.Cmm.Switch. +-- + +-- | Traverses the 'CmmGraph', making sure that 'CmmSwitch' are suitable for +-- code generation. +cmmImplementSwitchPlans :: DynFlags -> CmmGraph -> UniqSM CmmGraph +cmmImplementSwitchPlans dflags g + -- Switch generation done by backend (LLVM/C) + | targetSupportsSwitch (hscTarget dflags) = return g + | otherwise = do + blocks' <- concat `fmap` mapM (visitSwitches dflags) (toBlockList g) + return $ ofBlockList (g_entry g) blocks' + +visitSwitches :: DynFlags -> CmmBlock -> UniqSM [CmmBlock] +visitSwitches dflags block + | (entry@(CmmEntry _ scope), middle, CmmSwitch vanillaExpr ids) <- blockSplit block + = do + let plan = createSwitchPlan ids + -- See Note [Floating switch expressions] + (assignSimple, simpleExpr) <- floatSwitchExpr dflags vanillaExpr + + (newTail, newBlocks) <- implementSwitchPlan dflags scope simpleExpr plan + + let block' = entry `blockJoinHead` middle `blockAppend` assignSimple `blockAppend` newTail + + return $ block' : newBlocks + + | otherwise + = return [block] + +-- Note [Floating switch expressions] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +-- When we translate a sparse switch into a search tree we would like +-- to compute the value we compare against only once. + +-- For this purpose we assign the switch expression to a local register +-- and then use this register when constructing the actual binary tree. + +-- This is important as the expression could contain expensive code like +-- memory loads or divisions which we REALLY don't want to duplicate. + +-- This happened in parts of the handwritten RTS Cmm code. See also #16933 + +-- See Note [Floating switch expressions] +floatSwitchExpr :: DynFlags -> CmmExpr -> UniqSM (Block CmmNode O O, CmmExpr) +floatSwitchExpr _ reg@(CmmReg {}) = return (emptyBlock, reg) +floatSwitchExpr dflags expr = do + (assign, expr') <- cmmMkAssign dflags expr <$> getUniqueM + return (BMiddle assign, expr') + + +-- Implementing a switch plan (returning a tail block) +implementSwitchPlan :: DynFlags -> CmmTickScope -> CmmExpr -> SwitchPlan -> UniqSM (Block CmmNode O C, [CmmBlock]) +implementSwitchPlan dflags scope expr = go + where + go (Unconditionally l) + = return (emptyBlock `blockJoinTail` CmmBranch l, []) + go (JumpTable ids) + = return (emptyBlock `blockJoinTail` CmmSwitch expr ids, []) + go (IfLT signed i ids1 ids2) + = do + (bid1, newBlocks1) <- go' ids1 + (bid2, newBlocks2) <- go' ids2 + + let lt | signed = cmmSLtWord + | otherwise = cmmULtWord + scrut = lt dflags expr $ CmmLit $ mkWordCLit dflags i + lastNode = CmmCondBranch scrut bid1 bid2 Nothing + lastBlock = emptyBlock `blockJoinTail` lastNode + return (lastBlock, newBlocks1++newBlocks2) + go (IfEqual i l ids2) + = do + (bid2, newBlocks2) <- go' ids2 + + let scrut = cmmNeWord dflags expr $ CmmLit $ mkWordCLit dflags i + lastNode = CmmCondBranch scrut bid2 l Nothing + lastBlock = emptyBlock `blockJoinTail` lastNode + return (lastBlock, newBlocks2) + + -- Same but returning a label to branch to + go' (Unconditionally l) + = return (l, []) + go' p + = do + bid <- mkBlockId `fmap` getUniqueM + (last, newBlocks) <- go p + let block = CmmEntry bid scope `blockJoinHead` last + return (bid, block: newBlocks) diff --git a/compiler/GHC/Cmm/Type.hs b/compiler/GHC/Cmm/Type.hs new file mode 100644 index 0000000000..867a260078 --- /dev/null +++ b/compiler/GHC/Cmm/Type.hs @@ -0,0 +1,432 @@ +module GHC.Cmm.Type + ( CmmType -- Abstract + , b8, b16, b32, b64, b128, b256, b512, f32, f64, bWord, bHalfWord, gcWord + , cInt + , cmmBits, cmmFloat + , typeWidth, cmmEqType, cmmEqType_ignoring_ptrhood + , isFloatType, isGcPtrType, isBitsType + , isWord32, isWord64, isFloat64, isFloat32 + + , Width(..) + , widthInBits, widthInBytes, widthInLog, widthFromBytes + , wordWidth, halfWordWidth, cIntWidth + , halfWordMask + , narrowU, narrowS + , rEP_CostCentreStack_mem_alloc + , rEP_CostCentreStack_scc_count + , rEP_StgEntCounter_allocs + , rEP_StgEntCounter_allocd + + , ForeignHint(..) + + , Length + , vec, vec2, vec4, vec8, vec16 + , vec2f64, vec2b64, vec4f32, vec4b32, vec8b16, vec16b8 + , cmmVec + , vecLength, vecElemType + , isVecType + ) +where + + +import GhcPrelude + +import DynFlags +import FastString +import Outputable + +import Data.Word +import Data.Int + +----------------------------------------------------------------------------- +-- CmmType +----------------------------------------------------------------------------- + + -- NOTE: CmmType is an abstract type, not exported from this + -- module so you can easily change its representation + -- + -- However Width is exported in a concrete way, + -- and is used extensively in pattern-matching + +data CmmType -- The important one! + = CmmType CmmCat Width + +data CmmCat -- "Category" (not exported) + = GcPtrCat -- GC pointer + | BitsCat -- Non-pointer + | FloatCat -- Float + | VecCat Length CmmCat -- Vector + deriving( Eq ) + -- See Note [Signed vs unsigned] at the end + +instance Outputable CmmType where + ppr (CmmType cat wid) = ppr cat <> ppr (widthInBits wid) + +instance Outputable CmmCat where + ppr FloatCat = text "F" + ppr GcPtrCat = text "P" + ppr BitsCat = text "I" + ppr (VecCat n cat) = ppr cat <> text "x" <> ppr n <> text "V" + +-- Why is CmmType stratified? For native code generation, +-- most of the time you just want to know what sort of register +-- to put the thing in, and for this you need to know how +-- many bits thing has, and whether it goes in a floating-point +-- register. By contrast, the distinction between GcPtr and +-- GcNonPtr is of interest to only a few parts of the code generator. + +-------- Equality on CmmType -------------- +-- CmmType is *not* an instance of Eq; sometimes we care about the +-- Gc/NonGc distinction, and sometimes we don't +-- So we use an explicit function to force you to think about it +cmmEqType :: CmmType -> CmmType -> Bool -- Exact equality +cmmEqType (CmmType c1 w1) (CmmType c2 w2) = c1==c2 && w1==w2 + +cmmEqType_ignoring_ptrhood :: CmmType -> CmmType -> Bool + -- This equality is temporary; used in CmmLint + -- but the RTS files are not yet well-typed wrt pointers +cmmEqType_ignoring_ptrhood (CmmType c1 w1) (CmmType c2 w2) + = c1 `weak_eq` c2 && w1==w2 + where + weak_eq :: CmmCat -> CmmCat -> Bool + FloatCat `weak_eq` FloatCat = True + FloatCat `weak_eq` _other = False + _other `weak_eq` FloatCat = False + (VecCat l1 cat1) `weak_eq` (VecCat l2 cat2) = l1 == l2 + && cat1 `weak_eq` cat2 + (VecCat {}) `weak_eq` _other = False + _other `weak_eq` (VecCat {}) = False + _word1 `weak_eq` _word2 = True -- Ignores GcPtr + +--- Simple operations on CmmType ----- +typeWidth :: CmmType -> Width +typeWidth (CmmType _ w) = w + +cmmBits, cmmFloat :: Width -> CmmType +cmmBits = CmmType BitsCat +cmmFloat = CmmType FloatCat + +-------- Common CmmTypes ------------ +-- Floats and words of specific widths +b8, b16, b32, b64, b128, b256, b512, f32, f64 :: CmmType +b8 = cmmBits W8 +b16 = cmmBits W16 +b32 = cmmBits W32 +b64 = cmmBits W64 +b128 = cmmBits W128 +b256 = cmmBits W256 +b512 = cmmBits W512 +f32 = cmmFloat W32 +f64 = cmmFloat W64 + +-- CmmTypes of native word widths +bWord :: DynFlags -> CmmType +bWord dflags = cmmBits (wordWidth dflags) + +bHalfWord :: DynFlags -> CmmType +bHalfWord dflags = cmmBits (halfWordWidth dflags) + +gcWord :: DynFlags -> CmmType +gcWord dflags = CmmType GcPtrCat (wordWidth dflags) + +cInt :: DynFlags -> CmmType +cInt dflags = cmmBits (cIntWidth dflags) + +------------ Predicates ---------------- +isFloatType, isGcPtrType, isBitsType :: CmmType -> Bool +isFloatType (CmmType FloatCat _) = True +isFloatType _other = False + +isGcPtrType (CmmType GcPtrCat _) = True +isGcPtrType _other = False + +isBitsType (CmmType BitsCat _) = True +isBitsType _ = False + +isWord32, isWord64, isFloat32, isFloat64 :: CmmType -> Bool +-- isWord64 is true of 64-bit non-floats (both gc-ptrs and otherwise) +-- isFloat32 and 64 are obvious + +isWord64 (CmmType BitsCat W64) = True +isWord64 (CmmType GcPtrCat W64) = True +isWord64 _other = False + +isWord32 (CmmType BitsCat W32) = True +isWord32 (CmmType GcPtrCat W32) = True +isWord32 _other = False + +isFloat32 (CmmType FloatCat W32) = True +isFloat32 _other = False + +isFloat64 (CmmType FloatCat W64) = True +isFloat64 _other = False + +----------------------------------------------------------------------------- +-- Width +----------------------------------------------------------------------------- + +data Width = W8 | W16 | W32 | W64 + | W128 + | W256 + | W512 + deriving (Eq, Ord, Show) + +instance Outputable Width where + ppr rep = ptext (mrStr rep) + +mrStr :: Width -> PtrString +mrStr = sLit . show + + +-------- Common Widths ------------ +wordWidth :: DynFlags -> Width +wordWidth dflags + | wORD_SIZE dflags == 4 = W32 + | wORD_SIZE dflags == 8 = W64 + | otherwise = panic "MachOp.wordRep: Unknown word size" + +halfWordWidth :: DynFlags -> Width +halfWordWidth dflags + | wORD_SIZE dflags == 4 = W16 + | wORD_SIZE dflags == 8 = W32 + | otherwise = panic "MachOp.halfWordRep: Unknown word size" + +halfWordMask :: DynFlags -> Integer +halfWordMask dflags + | wORD_SIZE dflags == 4 = 0xFFFF + | wORD_SIZE dflags == 8 = 0xFFFFFFFF + | otherwise = panic "MachOp.halfWordMask: Unknown word size" + +-- cIntRep is the Width for a C-language 'int' +cIntWidth :: DynFlags -> Width +cIntWidth dflags = case cINT_SIZE dflags of + 4 -> W32 + 8 -> W64 + s -> panic ("cIntWidth: Unknown cINT_SIZE: " ++ show s) + +widthInBits :: Width -> Int +widthInBits W8 = 8 +widthInBits W16 = 16 +widthInBits W32 = 32 +widthInBits W64 = 64 +widthInBits W128 = 128 +widthInBits W256 = 256 +widthInBits W512 = 512 + + +widthInBytes :: Width -> Int +widthInBytes W8 = 1 +widthInBytes W16 = 2 +widthInBytes W32 = 4 +widthInBytes W64 = 8 +widthInBytes W128 = 16 +widthInBytes W256 = 32 +widthInBytes W512 = 64 + + +widthFromBytes :: Int -> Width +widthFromBytes 1 = W8 +widthFromBytes 2 = W16 +widthFromBytes 4 = W32 +widthFromBytes 8 = W64 +widthFromBytes 16 = W128 +widthFromBytes 32 = W256 +widthFromBytes 64 = W512 + +widthFromBytes n = pprPanic "no width for given number of bytes" (ppr n) + +-- log_2 of the width in bytes, useful for generating shifts. +widthInLog :: Width -> Int +widthInLog W8 = 0 +widthInLog W16 = 1 +widthInLog W32 = 2 +widthInLog W64 = 3 +widthInLog W128 = 4 +widthInLog W256 = 5 +widthInLog W512 = 6 + + +-- widening / narrowing + +narrowU :: Width -> Integer -> Integer +narrowU W8 x = fromIntegral (fromIntegral x :: Word8) +narrowU W16 x = fromIntegral (fromIntegral x :: Word16) +narrowU W32 x = fromIntegral (fromIntegral x :: Word32) +narrowU W64 x = fromIntegral (fromIntegral x :: Word64) +narrowU _ _ = panic "narrowTo" + +narrowS :: Width -> Integer -> Integer +narrowS W8 x = fromIntegral (fromIntegral x :: Int8) +narrowS W16 x = fromIntegral (fromIntegral x :: Int16) +narrowS W32 x = fromIntegral (fromIntegral x :: Int32) +narrowS W64 x = fromIntegral (fromIntegral x :: Int64) +narrowS _ _ = panic "narrowTo" + +----------------------------------------------------------------------------- +-- SIMD +----------------------------------------------------------------------------- + +type Length = Int + +vec :: Length -> CmmType -> CmmType +vec l (CmmType cat w) = CmmType (VecCat l cat) vecw + where + vecw :: Width + vecw = widthFromBytes (l*widthInBytes w) + +vec2, vec4, vec8, vec16 :: CmmType -> CmmType +vec2 = vec 2 +vec4 = vec 4 +vec8 = vec 8 +vec16 = vec 16 + +vec2f64, vec2b64, vec4f32, vec4b32, vec8b16, vec16b8 :: CmmType +vec2f64 = vec 2 f64 +vec2b64 = vec 2 b64 +vec4f32 = vec 4 f32 +vec4b32 = vec 4 b32 +vec8b16 = vec 8 b16 +vec16b8 = vec 16 b8 + +cmmVec :: Int -> CmmType -> CmmType +cmmVec n (CmmType cat w) = + CmmType (VecCat n cat) (widthFromBytes (n*widthInBytes w)) + +vecLength :: CmmType -> Length +vecLength (CmmType (VecCat l _) _) = l +vecLength _ = panic "vecLength: not a vector" + +vecElemType :: CmmType -> CmmType +vecElemType (CmmType (VecCat l cat) w) = CmmType cat scalw + where + scalw :: Width + scalw = widthFromBytes (widthInBytes w `div` l) +vecElemType _ = panic "vecElemType: not a vector" + +isVecType :: CmmType -> Bool +isVecType (CmmType (VecCat {}) _) = True +isVecType _ = False + +------------------------------------------------------------------------- +-- Hints + +-- Hints are extra type information we attach to the arguments and +-- results of a foreign call, where more type information is sometimes +-- needed by the ABI to make the correct kind of call. + +data ForeignHint + = NoHint | AddrHint | SignedHint + deriving( Eq ) + -- Used to give extra per-argument or per-result + -- information needed by foreign calling conventions + +------------------------------------------------------------------------- + +-- These don't really belong here, but I don't know where is best to +-- put them. + +rEP_CostCentreStack_mem_alloc :: DynFlags -> CmmType +rEP_CostCentreStack_mem_alloc dflags + = cmmBits (widthFromBytes (pc_REP_CostCentreStack_mem_alloc pc)) + where pc = platformConstants dflags + +rEP_CostCentreStack_scc_count :: DynFlags -> CmmType +rEP_CostCentreStack_scc_count dflags + = cmmBits (widthFromBytes (pc_REP_CostCentreStack_scc_count pc)) + where pc = platformConstants dflags + +rEP_StgEntCounter_allocs :: DynFlags -> CmmType +rEP_StgEntCounter_allocs dflags + = cmmBits (widthFromBytes (pc_REP_StgEntCounter_allocs pc)) + where pc = platformConstants dflags + +rEP_StgEntCounter_allocd :: DynFlags -> CmmType +rEP_StgEntCounter_allocd dflags + = cmmBits (widthFromBytes (pc_REP_StgEntCounter_allocd pc)) + where pc = platformConstants dflags + +------------------------------------------------------------------------- +{- Note [Signed vs unsigned] + ~~~~~~~~~~~~~~~~~~~~~~~~~ +Should a CmmType include a signed vs. unsigned distinction? + +This is very much like a "hint" in C-- terminology: it isn't necessary +in order to generate correct code, but it might be useful in that the +compiler can generate better code if it has access to higher-level +hints about data. This is important at call boundaries, because the +definition of a function is not visible at all of its call sites, so +the compiler cannot infer the hints. + +Here in Cmm, we're taking a slightly different approach. We include +the int vs. float hint in the CmmType, because (a) the majority of +platforms have a strong distinction between float and int registers, +and (b) we don't want to do any heavyweight hint-inference in the +native code backend in order to get good code. We're treating the +hint more like a type: our Cmm is always completely consistent with +respect to hints. All coercions between float and int are explicit. + +What about the signed vs. unsigned hint? This information might be +useful if we want to keep sub-word-sized values in word-size +registers, which we must do if we only have word-sized registers. + +On such a system, there are two straightforward conventions for +representing sub-word-sized values: + +(a) Leave the upper bits undefined. Comparison operations must + sign- or zero-extend both operands before comparing them, + depending on whether the comparison is signed or unsigned. + +(b) Always keep the values sign- or zero-extended as appropriate. + Arithmetic operations must narrow the result to the appropriate + size. + +A clever compiler might not use either (a) or (b) exclusively, instead +it would attempt to minimize the coercions by analysis: the same kind +of analysis that propagates hints around. In Cmm we don't want to +have to do this, so we plump for having richer types and keeping the +type information consistent. + +If signed/unsigned hints are missing from CmmType, then the only +choice we have is (a), because we don't know whether the result of an +operation should be sign- or zero-extended. + +Many architectures have extending load operations, which work well +with (b). To make use of them with (a), you need to know whether the +value is going to be sign- or zero-extended by an enclosing comparison +(for example), which involves knowing above the context. This is +doable but more complex. + +Further complicating the issue is foreign calls: a foreign calling +convention can specify that signed 8-bit quantities are passed as +sign-extended 32 bit quantities, for example (this is the case on the +PowerPC). So we *do* need sign information on foreign call arguments. + +Pros for adding signed vs. unsigned to CmmType: + + - It would let us use convention (b) above, and get easier + code generation for extending loads. + + - Less information required on foreign calls. + + - MachOp type would be simpler + +Cons: + + - More complexity + + - What is the CmmType for a VanillaReg? Currently it is + always wordRep, but now we have to decide whether it is + signed or unsigned. The same VanillaReg can thus have + different CmmType in different parts of the program. + + - Extra coercions cluttering up expressions. + +Currently for GHC, the foreign call point is moot, because we do our +own promotion of sub-word-sized values to word-sized values. The Int8 +type is represented by an Int# which is kept sign-extended at all times +(this is slightly naughty, because we're making assumptions about the +C calling convention rather early on in the compiler). However, given +this, the cons outweigh the pros. + +-} + diff --git a/compiler/GHC/Cmm/Utils.hs b/compiler/GHC/Cmm/Utils.hs new file mode 100644 index 0000000000..d879c7b82f --- /dev/null +++ b/compiler/GHC/Cmm/Utils.hs @@ -0,0 +1,607 @@ +{-# LANGUAGE GADTs, RankNTypes #-} +{-# LANGUAGE BangPatterns #-} + +----------------------------------------------------------------------------- +-- +-- Cmm utilities. +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module GHC.Cmm.Utils( + -- CmmType + primRepCmmType, slotCmmType, slotForeignHint, + typeCmmType, typeForeignHint, primRepForeignHint, + + -- CmmLit + zeroCLit, mkIntCLit, + mkWordCLit, packHalfWordsCLit, + mkByteStringCLit, + mkDataLits, mkRODataLits, + mkStgWordCLit, + + -- CmmExpr + mkIntExpr, zeroExpr, + mkLblExpr, + cmmRegOff, cmmOffset, cmmLabelOff, cmmOffsetLit, cmmOffsetExpr, + cmmRegOffB, cmmOffsetB, cmmLabelOffB, cmmOffsetLitB, cmmOffsetExprB, + cmmRegOffW, cmmOffsetW, cmmLabelOffW, cmmOffsetLitW, cmmOffsetExprW, + cmmIndex, cmmIndexExpr, cmmLoadIndex, cmmLoadIndexW, + cmmNegate, + cmmULtWord, cmmUGeWord, cmmUGtWord, cmmUShrWord, + cmmSLtWord, + cmmNeWord, cmmEqWord, + cmmOrWord, cmmAndWord, + cmmSubWord, cmmAddWord, cmmMulWord, cmmQuotWord, + cmmToWord, + + cmmMkAssign, + + isTrivialCmmExpr, hasNoGlobalRegs, isLit, isComparisonExpr, + + baseExpr, spExpr, hpExpr, spLimExpr, hpLimExpr, + currentTSOExpr, currentNurseryExpr, cccsExpr, + + -- Statics + blankWord, + + -- Tagging + cmmTagMask, cmmPointerMask, cmmUntag, cmmIsTagged, + cmmConstrTag1, + + -- Overlap and usage + regsOverlap, regUsedIn, + + -- Liveness and bitmaps + mkLiveness, + + -- * Operations that probably don't belong here + modifyGraph, + + ofBlockMap, toBlockMap, + ofBlockList, toBlockList, bodyToBlockList, + toBlockListEntryFirst, toBlockListEntryFirstFalseFallthrough, + foldlGraphBlocks, mapGraphNodes, revPostorder, mapGraphNodes1, + + -- * Ticks + blockTicks + ) where + +import GhcPrelude + +import TyCon ( PrimRep(..), PrimElemRep(..) ) +import GHC.Types.RepType ( UnaryType, SlotTy (..), typePrimRep1 ) + +import GHC.Runtime.Layout +import GHC.Cmm +import GHC.Cmm.BlockId +import GHC.Cmm.CLabel +import Outputable +import DynFlags +import Unique +import GHC.Platform.Regs + +import Data.ByteString (ByteString) +import qualified Data.ByteString as BS +import Data.Bits +import GHC.Cmm.Dataflow.Graph +import GHC.Cmm.Dataflow.Label +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.Dataflow.Collections + +--------------------------------------------------- +-- +-- CmmTypes +-- +--------------------------------------------------- + +primRepCmmType :: DynFlags -> PrimRep -> CmmType +primRepCmmType _ VoidRep = panic "primRepCmmType:VoidRep" +primRepCmmType dflags LiftedRep = gcWord dflags +primRepCmmType dflags UnliftedRep = gcWord dflags +primRepCmmType dflags IntRep = bWord dflags +primRepCmmType dflags WordRep = bWord dflags +primRepCmmType _ Int8Rep = b8 +primRepCmmType _ Word8Rep = b8 +primRepCmmType _ Int16Rep = b16 +primRepCmmType _ Word16Rep = b16 +primRepCmmType _ Int32Rep = b32 +primRepCmmType _ Word32Rep = b32 +primRepCmmType _ Int64Rep = b64 +primRepCmmType _ Word64Rep = b64 +primRepCmmType dflags AddrRep = bWord dflags +primRepCmmType _ FloatRep = f32 +primRepCmmType _ DoubleRep = f64 +primRepCmmType _ (VecRep len rep) = vec len (primElemRepCmmType rep) + +slotCmmType :: DynFlags -> SlotTy -> CmmType +slotCmmType dflags PtrSlot = gcWord dflags +slotCmmType dflags WordSlot = bWord dflags +slotCmmType _ Word64Slot = b64 +slotCmmType _ FloatSlot = f32 +slotCmmType _ DoubleSlot = f64 + +primElemRepCmmType :: PrimElemRep -> CmmType +primElemRepCmmType Int8ElemRep = b8 +primElemRepCmmType Int16ElemRep = b16 +primElemRepCmmType Int32ElemRep = b32 +primElemRepCmmType Int64ElemRep = b64 +primElemRepCmmType Word8ElemRep = b8 +primElemRepCmmType Word16ElemRep = b16 +primElemRepCmmType Word32ElemRep = b32 +primElemRepCmmType Word64ElemRep = b64 +primElemRepCmmType FloatElemRep = f32 +primElemRepCmmType DoubleElemRep = f64 + +typeCmmType :: DynFlags -> UnaryType -> CmmType +typeCmmType dflags ty = primRepCmmType dflags (typePrimRep1 ty) + +primRepForeignHint :: PrimRep -> ForeignHint +primRepForeignHint VoidRep = panic "primRepForeignHint:VoidRep" +primRepForeignHint LiftedRep = AddrHint +primRepForeignHint UnliftedRep = AddrHint +primRepForeignHint IntRep = SignedHint +primRepForeignHint Int8Rep = SignedHint +primRepForeignHint Int16Rep = SignedHint +primRepForeignHint Int32Rep = SignedHint +primRepForeignHint Int64Rep = SignedHint +primRepForeignHint WordRep = NoHint +primRepForeignHint Word8Rep = NoHint +primRepForeignHint Word16Rep = NoHint +primRepForeignHint Word32Rep = NoHint +primRepForeignHint Word64Rep = NoHint +primRepForeignHint AddrRep = AddrHint -- NB! AddrHint, but NonPtrArg +primRepForeignHint FloatRep = NoHint +primRepForeignHint DoubleRep = NoHint +primRepForeignHint (VecRep {}) = NoHint + +slotForeignHint :: SlotTy -> ForeignHint +slotForeignHint PtrSlot = AddrHint +slotForeignHint WordSlot = NoHint +slotForeignHint Word64Slot = NoHint +slotForeignHint FloatSlot = NoHint +slotForeignHint DoubleSlot = NoHint + +typeForeignHint :: UnaryType -> ForeignHint +typeForeignHint = primRepForeignHint . typePrimRep1 + +--------------------------------------------------- +-- +-- CmmLit +-- +--------------------------------------------------- + +-- XXX: should really be Integer, since Int doesn't necessarily cover +-- the full range of target Ints. +mkIntCLit :: DynFlags -> Int -> CmmLit +mkIntCLit dflags i = CmmInt (toInteger i) (wordWidth dflags) + +mkIntExpr :: DynFlags -> Int -> CmmExpr +mkIntExpr dflags i = CmmLit $! mkIntCLit dflags i + +zeroCLit :: DynFlags -> CmmLit +zeroCLit dflags = CmmInt 0 (wordWidth dflags) + +zeroExpr :: DynFlags -> CmmExpr +zeroExpr dflags = CmmLit (zeroCLit dflags) + +mkWordCLit :: DynFlags -> Integer -> CmmLit +mkWordCLit dflags wd = CmmInt wd (wordWidth dflags) + +mkByteStringCLit + :: CLabel -> ByteString -> (CmmLit, GenCmmDecl CmmStatics info stmt) +-- We have to make a top-level decl for the string, +-- and return a literal pointing to it +mkByteStringCLit lbl bytes + = (CmmLabel lbl, CmmData (Section sec lbl) $ Statics lbl [CmmString bytes]) + where + -- This can not happen for String literals (as there \NUL is replaced by + -- C0 80). However, it can happen with Addr# literals. + sec = if 0 `BS.elem` bytes then ReadOnlyData else CString + +mkDataLits :: Section -> CLabel -> [CmmLit] -> GenCmmDecl CmmStatics info stmt +-- Build a data-segment data block +mkDataLits section lbl lits + = CmmData section (Statics lbl $ map CmmStaticLit lits) + +mkRODataLits :: CLabel -> [CmmLit] -> GenCmmDecl CmmStatics info stmt +-- Build a read-only data block +mkRODataLits lbl lits + = mkDataLits section lbl lits + where + section | any needsRelocation lits = Section RelocatableReadOnlyData lbl + | otherwise = Section ReadOnlyData lbl + needsRelocation (CmmLabel _) = True + needsRelocation (CmmLabelOff _ _) = True + needsRelocation _ = False + +mkStgWordCLit :: DynFlags -> StgWord -> CmmLit +mkStgWordCLit dflags wd = CmmInt (fromStgWord wd) (wordWidth dflags) + +packHalfWordsCLit :: DynFlags -> StgHalfWord -> StgHalfWord -> CmmLit +-- Make a single word literal in which the lower_half_word is +-- at the lower address, and the upper_half_word is at the +-- higher address +-- ToDo: consider using half-word lits instead +-- but be careful: that's vulnerable when reversed +packHalfWordsCLit dflags lower_half_word upper_half_word + = if wORDS_BIGENDIAN dflags + then mkWordCLit dflags ((l `shiftL` halfWordSizeInBits dflags) .|. u) + else mkWordCLit dflags (l .|. (u `shiftL` halfWordSizeInBits dflags)) + where l = fromStgHalfWord lower_half_word + u = fromStgHalfWord upper_half_word + +--------------------------------------------------- +-- +-- CmmExpr +-- +--------------------------------------------------- + +mkLblExpr :: CLabel -> CmmExpr +mkLblExpr lbl = CmmLit (CmmLabel lbl) + +cmmOffsetExpr :: DynFlags -> CmmExpr -> CmmExpr -> CmmExpr +-- assumes base and offset have the same CmmType +cmmOffsetExpr dflags e (CmmLit (CmmInt n _)) = cmmOffset dflags e (fromInteger n) +cmmOffsetExpr dflags e byte_off = CmmMachOp (MO_Add (cmmExprWidth dflags e)) [e, byte_off] + +cmmOffset :: DynFlags -> CmmExpr -> Int -> CmmExpr +cmmOffset _ e 0 = e +cmmOffset _ (CmmReg reg) byte_off = cmmRegOff reg byte_off +cmmOffset _ (CmmRegOff reg m) byte_off = cmmRegOff reg (m+byte_off) +cmmOffset _ (CmmLit lit) byte_off = CmmLit (cmmOffsetLit lit byte_off) +cmmOffset _ (CmmStackSlot area off) byte_off + = CmmStackSlot area (off - byte_off) + -- note stack area offsets increase towards lower addresses +cmmOffset _ (CmmMachOp (MO_Add rep) [expr, CmmLit (CmmInt byte_off1 _rep)]) byte_off2 + = CmmMachOp (MO_Add rep) + [expr, CmmLit (CmmInt (byte_off1 + toInteger byte_off2) rep)] +cmmOffset dflags expr byte_off + = CmmMachOp (MO_Add width) [expr, CmmLit (CmmInt (toInteger byte_off) width)] + where + width = cmmExprWidth dflags expr + +-- Smart constructor for CmmRegOff. Same caveats as cmmOffset above. +cmmRegOff :: CmmReg -> Int -> CmmExpr +cmmRegOff reg 0 = CmmReg reg +cmmRegOff reg byte_off = CmmRegOff reg byte_off + +cmmOffsetLit :: CmmLit -> Int -> CmmLit +cmmOffsetLit (CmmLabel l) byte_off = cmmLabelOff l byte_off +cmmOffsetLit (CmmLabelOff l m) byte_off = cmmLabelOff l (m+byte_off) +cmmOffsetLit (CmmLabelDiffOff l1 l2 m w) byte_off + = CmmLabelDiffOff l1 l2 (m+byte_off) w +cmmOffsetLit (CmmInt m rep) byte_off = CmmInt (m + fromIntegral byte_off) rep +cmmOffsetLit _ byte_off = pprPanic "cmmOffsetLit" (ppr byte_off) + +cmmLabelOff :: CLabel -> Int -> CmmLit +-- Smart constructor for CmmLabelOff +cmmLabelOff lbl 0 = CmmLabel lbl +cmmLabelOff lbl byte_off = CmmLabelOff lbl byte_off + +-- | Useful for creating an index into an array, with a statically known offset. +-- The type is the element type; used for making the multiplier +cmmIndex :: DynFlags + -> Width -- Width w + -> CmmExpr -- Address of vector of items of width w + -> Int -- Which element of the vector (0 based) + -> CmmExpr -- Address of i'th element +cmmIndex dflags width base idx = cmmOffset dflags base (idx * widthInBytes width) + +-- | Useful for creating an index into an array, with an unknown offset. +cmmIndexExpr :: DynFlags + -> Width -- Width w + -> CmmExpr -- Address of vector of items of width w + -> CmmExpr -- Which element of the vector (0 based) + -> CmmExpr -- Address of i'th element +cmmIndexExpr dflags width base (CmmLit (CmmInt n _)) = cmmIndex dflags width base (fromInteger n) +cmmIndexExpr dflags width base idx = + cmmOffsetExpr dflags base byte_off + where + idx_w = cmmExprWidth dflags idx + byte_off = CmmMachOp (MO_Shl idx_w) [idx, mkIntExpr dflags (widthInLog width)] + +cmmLoadIndex :: DynFlags -> CmmType -> CmmExpr -> Int -> CmmExpr +cmmLoadIndex dflags ty expr ix = CmmLoad (cmmIndex dflags (typeWidth ty) expr ix) ty + +-- The "B" variants take byte offsets +cmmRegOffB :: CmmReg -> ByteOff -> CmmExpr +cmmRegOffB = cmmRegOff + +cmmOffsetB :: DynFlags -> CmmExpr -> ByteOff -> CmmExpr +cmmOffsetB = cmmOffset + +cmmOffsetExprB :: DynFlags -> CmmExpr -> CmmExpr -> CmmExpr +cmmOffsetExprB = cmmOffsetExpr + +cmmLabelOffB :: CLabel -> ByteOff -> CmmLit +cmmLabelOffB = cmmLabelOff + +cmmOffsetLitB :: CmmLit -> ByteOff -> CmmLit +cmmOffsetLitB = cmmOffsetLit + +----------------------- +-- The "W" variants take word offsets + +cmmOffsetExprW :: DynFlags -> CmmExpr -> CmmExpr -> CmmExpr +-- The second arg is a *word* offset; need to change it to bytes +cmmOffsetExprW dflags e (CmmLit (CmmInt n _)) = cmmOffsetW dflags e (fromInteger n) +cmmOffsetExprW dflags e wd_off = cmmIndexExpr dflags (wordWidth dflags) e wd_off + +cmmOffsetW :: DynFlags -> CmmExpr -> WordOff -> CmmExpr +cmmOffsetW dflags e n = cmmOffsetB dflags e (wordsToBytes dflags n) + +cmmRegOffW :: DynFlags -> CmmReg -> WordOff -> CmmExpr +cmmRegOffW dflags reg wd_off = cmmRegOffB reg (wordsToBytes dflags wd_off) + +cmmOffsetLitW :: DynFlags -> CmmLit -> WordOff -> CmmLit +cmmOffsetLitW dflags lit wd_off = cmmOffsetLitB lit (wordsToBytes dflags wd_off) + +cmmLabelOffW :: DynFlags -> CLabel -> WordOff -> CmmLit +cmmLabelOffW dflags lbl wd_off = cmmLabelOffB lbl (wordsToBytes dflags wd_off) + +cmmLoadIndexW :: DynFlags -> CmmExpr -> Int -> CmmType -> CmmExpr +cmmLoadIndexW dflags base off ty = CmmLoad (cmmOffsetW dflags base off) ty + +----------------------- +cmmULtWord, cmmUGeWord, cmmUGtWord, cmmUShrWord, + cmmSLtWord, + cmmNeWord, cmmEqWord, + cmmOrWord, cmmAndWord, + cmmSubWord, cmmAddWord, cmmMulWord, cmmQuotWord + :: DynFlags -> CmmExpr -> CmmExpr -> CmmExpr +cmmOrWord dflags e1 e2 = CmmMachOp (mo_wordOr dflags) [e1, e2] +cmmAndWord dflags e1 e2 = CmmMachOp (mo_wordAnd dflags) [e1, e2] +cmmNeWord dflags e1 e2 = CmmMachOp (mo_wordNe dflags) [e1, e2] +cmmEqWord dflags e1 e2 = CmmMachOp (mo_wordEq dflags) [e1, e2] +cmmULtWord dflags e1 e2 = CmmMachOp (mo_wordULt dflags) [e1, e2] +cmmUGeWord dflags e1 e2 = CmmMachOp (mo_wordUGe dflags) [e1, e2] +cmmUGtWord dflags e1 e2 = CmmMachOp (mo_wordUGt dflags) [e1, e2] +cmmSLtWord dflags e1 e2 = CmmMachOp (mo_wordSLt dflags) [e1, e2] +cmmUShrWord dflags e1 e2 = CmmMachOp (mo_wordUShr dflags) [e1, e2] +cmmAddWord dflags e1 e2 = CmmMachOp (mo_wordAdd dflags) [e1, e2] +cmmSubWord dflags e1 e2 = CmmMachOp (mo_wordSub dflags) [e1, e2] +cmmMulWord dflags e1 e2 = CmmMachOp (mo_wordMul dflags) [e1, e2] +cmmQuotWord dflags e1 e2 = CmmMachOp (mo_wordUQuot dflags) [e1, e2] + +cmmNegate :: DynFlags -> CmmExpr -> CmmExpr +cmmNegate _ (CmmLit (CmmInt n rep)) = CmmLit (CmmInt (-n) rep) +cmmNegate dflags e = CmmMachOp (MO_S_Neg (cmmExprWidth dflags e)) [e] + +blankWord :: DynFlags -> CmmStatic +blankWord dflags = CmmUninitialised (wORD_SIZE dflags) + +cmmToWord :: DynFlags -> CmmExpr -> CmmExpr +cmmToWord dflags e + | w == word = e + | otherwise = CmmMachOp (MO_UU_Conv w word) [e] + where + w = cmmExprWidth dflags e + word = wordWidth dflags + +cmmMkAssign :: DynFlags -> CmmExpr -> Unique -> (CmmNode O O, CmmExpr) +cmmMkAssign dflags expr uq = + let !ty = cmmExprType dflags expr + reg = (CmmLocal (LocalReg uq ty)) + in (CmmAssign reg expr, CmmReg reg) + + +--------------------------------------------------- +-- +-- CmmExpr predicates +-- +--------------------------------------------------- + +isTrivialCmmExpr :: CmmExpr -> Bool +isTrivialCmmExpr (CmmLoad _ _) = False +isTrivialCmmExpr (CmmMachOp _ _) = False +isTrivialCmmExpr (CmmLit _) = True +isTrivialCmmExpr (CmmReg _) = True +isTrivialCmmExpr (CmmRegOff _ _) = True +isTrivialCmmExpr (CmmStackSlot _ _) = panic "isTrivialCmmExpr CmmStackSlot" + +hasNoGlobalRegs :: CmmExpr -> Bool +hasNoGlobalRegs (CmmLoad e _) = hasNoGlobalRegs e +hasNoGlobalRegs (CmmMachOp _ es) = all hasNoGlobalRegs es +hasNoGlobalRegs (CmmLit _) = True +hasNoGlobalRegs (CmmReg (CmmLocal _)) = True +hasNoGlobalRegs (CmmRegOff (CmmLocal _) _) = True +hasNoGlobalRegs _ = False + +isLit :: CmmExpr -> Bool +isLit (CmmLit _) = True +isLit _ = False + +isComparisonExpr :: CmmExpr -> Bool +isComparisonExpr (CmmMachOp op _) = isComparisonMachOp op +isComparisonExpr _ = False + +--------------------------------------------------- +-- +-- Tagging +-- +--------------------------------------------------- + +-- Tag bits mask +cmmTagMask, cmmPointerMask :: DynFlags -> CmmExpr +cmmTagMask dflags = mkIntExpr dflags (tAG_MASK dflags) +cmmPointerMask dflags = mkIntExpr dflags (complement (tAG_MASK dflags)) + +-- Used to untag a possibly tagged pointer +-- A static label need not be untagged +cmmUntag, cmmIsTagged, cmmConstrTag1 :: DynFlags -> CmmExpr -> CmmExpr +cmmUntag _ e@(CmmLit (CmmLabel _)) = e +-- Default case +cmmUntag dflags e = cmmAndWord dflags e (cmmPointerMask dflags) + +-- Test if a closure pointer is untagged +cmmIsTagged dflags e = cmmNeWord dflags (cmmAndWord dflags e (cmmTagMask dflags)) (zeroExpr dflags) + +-- Get constructor tag, but one based. +cmmConstrTag1 dflags e = cmmAndWord dflags e (cmmTagMask dflags) + + +----------------------------------------------------------------------------- +-- Overlap and usage + +-- | Returns True if the two STG registers overlap on the specified +-- platform, in the sense that writing to one will clobber the +-- other. This includes the case that the two registers are the same +-- STG register. See Note [Overlapping global registers] for details. +regsOverlap :: DynFlags -> CmmReg -> CmmReg -> Bool +regsOverlap dflags (CmmGlobal g) (CmmGlobal g') + | Just real <- globalRegMaybe (targetPlatform dflags) g, + Just real' <- globalRegMaybe (targetPlatform dflags) g', + real == real' + = True +regsOverlap _ reg reg' = reg == reg' + +-- | Returns True if the STG register is used by the expression, in +-- the sense that a store to the register might affect the value of +-- the expression. +-- +-- We must check for overlapping registers and not just equal +-- registers here, otherwise CmmSink may incorrectly reorder +-- assignments that conflict due to overlap. See #10521 and Note +-- [Overlapping global registers]. +regUsedIn :: DynFlags -> CmmReg -> CmmExpr -> Bool +regUsedIn dflags = regUsedIn_ where + _ `regUsedIn_` CmmLit _ = False + reg `regUsedIn_` CmmLoad e _ = reg `regUsedIn_` e + reg `regUsedIn_` CmmReg reg' = regsOverlap dflags reg reg' + reg `regUsedIn_` CmmRegOff reg' _ = regsOverlap dflags reg reg' + reg `regUsedIn_` CmmMachOp _ es = any (reg `regUsedIn_`) es + _ `regUsedIn_` CmmStackSlot _ _ = False + +-------------------------------------------- +-- +-- mkLiveness +-- +--------------------------------------------- + +mkLiveness :: DynFlags -> [LocalReg] -> Liveness +mkLiveness _ [] = [] +mkLiveness dflags (reg:regs) + = bits ++ mkLiveness dflags regs + where + sizeW = (widthInBytes (typeWidth (localRegType reg)) + wORD_SIZE dflags - 1) + `quot` wORD_SIZE dflags + -- number of words, rounded up + bits = replicate sizeW is_non_ptr -- True <=> Non Ptr + + is_non_ptr = not $ isGcPtrType (localRegType reg) + + +-- ============================================== - +-- ============================================== - +-- ============================================== - + +--------------------------------------------------- +-- +-- Manipulating CmmGraphs +-- +--------------------------------------------------- + +modifyGraph :: (Graph n C C -> Graph n' C C) -> GenCmmGraph n -> GenCmmGraph n' +modifyGraph f g = CmmGraph {g_entry=g_entry g, g_graph=f (g_graph g)} + +toBlockMap :: CmmGraph -> LabelMap CmmBlock +toBlockMap (CmmGraph {g_graph=GMany NothingO body NothingO}) = body + +ofBlockMap :: BlockId -> LabelMap CmmBlock -> CmmGraph +ofBlockMap entry bodyMap = CmmGraph {g_entry=entry, g_graph=GMany NothingO bodyMap NothingO} + +toBlockList :: CmmGraph -> [CmmBlock] +toBlockList g = mapElems $ toBlockMap g + +-- | like 'toBlockList', but the entry block always comes first +toBlockListEntryFirst :: CmmGraph -> [CmmBlock] +toBlockListEntryFirst g + | mapNull m = [] + | otherwise = entry_block : others + where + m = toBlockMap g + entry_id = g_entry g + Just entry_block = mapLookup entry_id m + others = filter ((/= entry_id) . entryLabel) (mapElems m) + +-- | Like 'toBlockListEntryFirst', but we strive to ensure that we order blocks +-- so that the false case of a conditional jumps to the next block in the output +-- list of blocks. This matches the way OldCmm blocks were output since in +-- OldCmm the false case was a fallthrough, whereas in Cmm conditional branches +-- have both true and false successors. Block ordering can make a big difference +-- in performance in the LLVM backend. Note that we rely crucially on the order +-- of successors returned for CmmCondBranch by the NonLocal instance for CmmNode +-- defined in cmm/CmmNode.hs. -GBM +toBlockListEntryFirstFalseFallthrough :: CmmGraph -> [CmmBlock] +toBlockListEntryFirstFalseFallthrough g + | mapNull m = [] + | otherwise = dfs setEmpty [entry_block] + where + m = toBlockMap g + entry_id = g_entry g + Just entry_block = mapLookup entry_id m + + dfs :: LabelSet -> [CmmBlock] -> [CmmBlock] + dfs _ [] = [] + dfs visited (block:bs) + | id `setMember` visited = dfs visited bs + | otherwise = block : dfs (setInsert id visited) bs' + where id = entryLabel block + bs' = foldr add_id bs (successors block) + add_id id bs = case mapLookup id m of + Just b -> b : bs + Nothing -> bs + +ofBlockList :: BlockId -> [CmmBlock] -> CmmGraph +ofBlockList entry blocks = CmmGraph { g_entry = entry + , g_graph = GMany NothingO body NothingO } + where body = foldr addBlock emptyBody blocks + +bodyToBlockList :: Body CmmNode -> [CmmBlock] +bodyToBlockList body = mapElems body + +mapGraphNodes :: ( CmmNode C O -> CmmNode C O + , CmmNode O O -> CmmNode O O + , CmmNode O C -> CmmNode O C) + -> CmmGraph -> CmmGraph +mapGraphNodes funs@(mf,_,_) g = + ofBlockMap (entryLabel $ mf $ CmmEntry (g_entry g) GlobalScope) $ + mapMap (mapBlock3' funs) $ toBlockMap g + +mapGraphNodes1 :: (forall e x. CmmNode e x -> CmmNode e x) -> CmmGraph -> CmmGraph +mapGraphNodes1 f = modifyGraph (mapGraph f) + + +foldlGraphBlocks :: (a -> CmmBlock -> a) -> a -> CmmGraph -> a +foldlGraphBlocks k z g = mapFoldl k z $ toBlockMap g + +revPostorder :: CmmGraph -> [CmmBlock] +revPostorder g = {-# SCC "revPostorder" #-} + revPostorderFrom (toBlockMap g) (g_entry g) + +------------------------------------------------- +-- Tick utilities + +-- | Extract all tick annotations from the given block +blockTicks :: Block CmmNode C C -> [CmmTickish] +blockTicks b = reverse $ foldBlockNodesF goStmt b [] + where goStmt :: CmmNode e x -> [CmmTickish] -> [CmmTickish] + goStmt (CmmTick t) ts = t:ts + goStmt _other ts = ts + + +-- ----------------------------------------------------------------------------- +-- Access to common global registers + +baseExpr, spExpr, hpExpr, currentTSOExpr, currentNurseryExpr, + spLimExpr, hpLimExpr, cccsExpr :: CmmExpr +baseExpr = CmmReg baseReg +spExpr = CmmReg spReg +spLimExpr = CmmReg spLimReg +hpExpr = CmmReg hpReg +hpLimExpr = CmmReg hpLimReg +currentTSOExpr = CmmReg currentTSOReg +currentNurseryExpr = CmmReg currentNurseryReg +cccsExpr = CmmReg cccsReg diff --git a/compiler/GHC/Cmm/cmm-notes b/compiler/GHC/Cmm/cmm-notes new file mode 100644 index 0000000000..d664a195b7 --- /dev/null +++ b/compiler/GHC/Cmm/cmm-notes @@ -0,0 +1,184 @@ +More notes (Aug 11) +~~~~~~~~~~~~~~~~~~ +* CmmInfo.cmmToRawCmm expands info tables to their representations + (needed for .cmm files as well as the code generators) + +* Why is FCode a lazy monad? That makes it inefficient. + We want laziness to get code out one procedure at a time, + but not at the instruction level. + UPDATE (31/5/2016): FCode is strict since 09afcc9b. + +Things we did + * Remove CmmCvt.graphToZgraph (Conversion from old to new Cmm reps) + * Remove HscMain.optionallyConvertAndOrCPS (converted old Cmm to + new, ran pipeline, and converted back) + * Remove CmmDecl. Put its types in Cmm. Import Cmm into OldCmm + so it can get those types. + + +More notes (June 11) +~~~~~~~~~~~~~~~~~~~~ + +* In CmmContFlowOpt.branchChainElim, can a single block be the + successor of two calls? + +* Check in ClosureInfo: + -- NB: Results here should line up with the results of SMRep.rtsClosureType + +More notes (May 11) +~~~~~~~~~~~~~~~~~~~ +In CmmNode, consider splitting CmmCall into two: call and jump + +Notes on new codegen (Aug 10) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Things to do: + - Proc points pass all arguments on the stack, adding more code and + slowing down things a lot. We either need to fix this or even better + would be to get rid of proc points. + + - Sort out Label, LabelMap, LabelSet versus BlockId, BlockEnv, BlockSet + dichotomy. Mostly this means global replace, but we also need to make + Label an instance of Outputable (probably in the Outputable module). + + EZY: We should use Label, since that's the terminology Hoopl uses. + + - AsmCodeGen has a generic Cmm optimiser; move this into new pipeline + EZY (2011-04-16): The mini-inliner has been generalized and ported, + but the constant folding and other optimizations need to still be + ported. + + - AsmCodeGen has post-native-cg branch eliminator (shortCutBranches); + we ultimately want to share this with the Cmm branch eliminator. + + - At the moment, references to global registers like Hp are "lowered" + late (in CgUtils.fixStgRegisters). We should do this early, in the + new native codegen, much in the way that we lower calling conventions. + Might need to be a bit sophisticated about aliasing. + + - Move to new Cmm rep: + * Make native CG consume New Cmm; + * Convert Old Cmm->New Cmm to keep old path alive + * Produce New Cmm when reading in .cmm files + + - Top-level SRT threading is a bit ugly + + - See "CAFs" below; we want to totally refactor the way SRTs are calculated + + - Garbage-collect https://gitlab.haskell.org/ghc/ghc/wikis/commentary/compiler/cps + moving good stuff into + https://gitlab.haskell.org/ghc/ghc/wikis/commentary/compiler/new-code-gen-pipeline + + - Currently AsmCodeGen top level calls AsmCodeGen.cmmToCmm, which is a small + C-- optimiser. It has quite a lot of boilerplate folding code in AsmCodeGen + (cmmBlockConFold, cmmStmtConFold, cmmExprConFold), before calling out to + CmmOpt. ToDo: see what optimisations are being done; and do them before + AsmCodeGen. + + - If we stick CAF and stack liveness info on a LastCall node (not LastRet/Jump) + then all CAF and stack liveness stuff be completed before we split + into separate C procedures. + + Short term: + compute and attach liveness into LastCall + right at end, split, cvt to old rep + [must split before cvt, because old rep is not expressive enough] + + Longer term: + when old rep disappears, + move the whole splitting game into the C back end *only* + (guided by the procpoint set) + +---------------------------------------------------- + Proc-points +---------------------------------------------------- + +Consider this program, which has a diamond control flow, +with a call on one branch + fn(p,x) { + h() + if b then { ... f(x) ...; q=5; goto J } + else { ...; q=7; goto J } + J: ..p...q... + } +then the join point J is a "proc-point". So, is 'p' passed to J +as a parameter? Or, if 'p' was saved on the stack anyway, perhaps +to keep it alive across the call to h(), maybe 'p' gets communicated +to J that way. This is an awkward choice. (We think that we currently +never pass variables to join points via arguments.) + +Furthermore, there is *no way* to pass q to J in a register (other +than a parameter register). + +What we want is to do register allocation across the whole caboodle. +Then we could drop all the code that deals with the above awkward +decisions about spilling variables across proc-points. + +Note that J doesn't need an info table. + +What we really want is for each LastCall (not LastJump/Ret) +to have an info table. Note that ProcPoints that are not successors +of calls don't need an info table. + +Figuring out proc-points +~~~~~~~~~~~~~~~~~~~~~~~~ +Proc-points are identified by +GHC.Cmm.ProcPoint.minimalProcPointSet/extendPPSet Although there isn't +that much code, JD thinks that it could be done much more nicely using +a dominator analysis, using the Dataflow Engine. + +---------------------------------------------------- + CAFs +---------------------------------------------------- + +* The code for a procedure f may refer to either the *closure* + or the *entry point* of another top-level procedure g. + If f is live, then so is g. f's SRT must include g's closure. + +* The CLabel for the entry-point/closure reveals whether g is + a CAF (or refers to CAFs). See the IdLabel constructor of CLabel. + +* The CAF-ness of the original top-level definitions is figured out + (by GHC.Iface.Tidy) before we generate C--. This CafInfo is only set for + top-level Ids; nested bindings stay with MayHaveCafRefs. + +* Currently an SRT contains (only) pointers to (top-level) closures. + +* Consider this Core code + f = \x -> let g = \y -> ...x...y...h1... + in ...h2...g... + and suppose that h1, h2 have IdInfo of MayHaveCafRefs. + Therefore, so will f, But g will not (since it's nested). + + This generates C-- roughly like this: + f_closure: .word f_entry + f_entry() [info-tbl-for-f] { ...jump g_entry...jump h2... } + g_entry() [info-tbl-for-g] { ...jump h1... } + + Note that there is no top-level closure for g (only an info table). + This fact (whether or not there is a top-level closure) is recorded + in the InfoTable attached to the CmmProc for f, g + INVARIANT: + Any out-of-Group references to an IdLabel goes to + a Proc whose InfoTable says "I have a top-level closure". + Equivalently: + A CmmProc whose InfoTable says "I do not have a top-level + closure" is referred to only from its own Group. + +* So: info-tbl-for-f must have an SRT that keeps h1,h2 alive + info-tbl-for-g must have an SRT that keeps h1 (only) alive + + But if we just look for the free CAF refs, we get: + f h2 (only) + g h1 + + So we need to do a transitive closure thing to flesh out + f's keep-alive refs to include h1. + +* The SRT info is the C_SRT field of Cmm.ClosureTypeInfo in a + CmmInfoTable attached to each CmmProc. CmmPipeline.toTops actually does + the attaching, right at the end of the pipeline. The C_SRT part + gives offsets within a single, shared table of closure pointers. + +* DECIDED: we can generate SRTs based on the final Cmm program + without knowledge of how it is generated. |